1 /* handling of writes to regular files and writing back to the server 2 * 3 * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. 4 * Written by David Howells (dhowells@redhat.com) 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 9 * 2 of the License, or (at your option) any later version. 10 */ 11 #include <linux/backing-dev.h> 12 #include <linux/slab.h> 13 #include <linux/fs.h> 14 #include <linux/pagemap.h> 15 #include <linux/writeback.h> 16 #include <linux/pagevec.h> 17 #include "internal.h" 18 19 static int afs_write_back_from_locked_page(struct afs_writeback *wb, 20 struct page *page); 21 22 /* 23 * mark a page as having been made dirty and thus needing writeback 24 */ 25 int afs_set_page_dirty(struct page *page) 26 { 27 _enter(""); 28 return __set_page_dirty_nobuffers(page); 29 } 30 31 /* 32 * unlink a writeback record because its usage has reached zero 33 * - must be called with the wb->vnode->writeback_lock held 34 */ 35 static void afs_unlink_writeback(struct afs_writeback *wb) 36 { 37 struct afs_writeback *front; 38 struct afs_vnode *vnode = wb->vnode; 39 40 list_del_init(&wb->link); 41 if (!list_empty(&vnode->writebacks)) { 42 /* if an fsync rises to the front of the queue then wake it 43 * up */ 44 front = list_entry(vnode->writebacks.next, 45 struct afs_writeback, link); 46 if (front->state == AFS_WBACK_SYNCING) { 47 _debug("wake up sync"); 48 front->state = AFS_WBACK_COMPLETE; 49 wake_up(&front->waitq); 50 } 51 } 52 } 53 54 /* 55 * free a writeback record 56 */ 57 static void afs_free_writeback(struct afs_writeback *wb) 58 { 59 _enter(""); 60 key_put(wb->key); 61 kfree(wb); 62 } 63 64 /* 65 * dispose of a reference to a writeback record 66 */ 67 void afs_put_writeback(struct afs_writeback *wb) 68 { 69 struct afs_vnode *vnode = wb->vnode; 70 71 _enter("{%d}", wb->usage); 72 73 spin_lock(&vnode->writeback_lock); 74 if (--wb->usage == 0) 75 afs_unlink_writeback(wb); 76 else 77 wb = NULL; 78 spin_unlock(&vnode->writeback_lock); 79 if (wb) 80 afs_free_writeback(wb); 81 } 82 83 /* 84 * partly or wholly fill a page that's under preparation for writing 85 */ 86 static int afs_fill_page(struct afs_vnode *vnode, struct key *key, 87 loff_t pos, struct page *page) 88 { 89 struct afs_read *req; 90 loff_t i_size; 91 int ret; 92 93 _enter(",,%llu", (unsigned long long)pos); 94 95 req = kzalloc(sizeof(struct afs_read) + sizeof(struct page *), 96 GFP_KERNEL); 97 if (!req) 98 return -ENOMEM; 99 100 atomic_set(&req->usage, 1); 101 req->pos = pos; 102 req->nr_pages = 1; 103 req->pages[0] = page; 104 get_page(page); 105 106 i_size = i_size_read(&vnode->vfs_inode); 107 if (pos + PAGE_SIZE > i_size) 108 req->len = i_size - pos; 109 else 110 req->len = PAGE_SIZE; 111 112 ret = afs_vnode_fetch_data(vnode, key, req); 113 afs_put_read(req); 114 if (ret < 0) { 115 if (ret == -ENOENT) { 116 _debug("got NOENT from server" 117 " - marking file deleted and stale"); 118 set_bit(AFS_VNODE_DELETED, &vnode->flags); 119 ret = -ESTALE; 120 } 121 } 122 123 _leave(" = %d", ret); 124 return ret; 125 } 126 127 /* 128 * prepare to perform part of a write to a page 129 */ 130 int afs_write_begin(struct file *file, struct address_space *mapping, 131 loff_t pos, unsigned len, unsigned flags, 132 struct page **pagep, void **fsdata) 133 { 134 struct afs_writeback *candidate, *wb; 135 struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 136 struct page *page; 137 struct key *key = file->private_data; 138 unsigned from = pos & (PAGE_SIZE - 1); 139 unsigned to = from + len; 140 pgoff_t index = pos >> PAGE_SHIFT; 141 int ret; 142 143 _enter("{%x:%u},{%lx},%u,%u", 144 vnode->fid.vid, vnode->fid.vnode, index, from, to); 145 146 candidate = kzalloc(sizeof(*candidate), GFP_KERNEL); 147 if (!candidate) 148 return -ENOMEM; 149 candidate->vnode = vnode; 150 candidate->first = candidate->last = index; 151 candidate->offset_first = from; 152 candidate->to_last = to; 153 INIT_LIST_HEAD(&candidate->link); 154 candidate->usage = 1; 155 candidate->state = AFS_WBACK_PENDING; 156 init_waitqueue_head(&candidate->waitq); 157 158 page = grab_cache_page_write_begin(mapping, index, flags); 159 if (!page) { 160 kfree(candidate); 161 return -ENOMEM; 162 } 163 *pagep = page; 164 /* page won't leak in error case: it eventually gets cleaned off LRU */ 165 166 if (!PageUptodate(page) && len != PAGE_SIZE) { 167 ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page); 168 if (ret < 0) { 169 kfree(candidate); 170 _leave(" = %d [prep]", ret); 171 return ret; 172 } 173 SetPageUptodate(page); 174 } 175 176 try_again: 177 spin_lock(&vnode->writeback_lock); 178 179 /* see if this page is already pending a writeback under a suitable key 180 * - if so we can just join onto that one */ 181 wb = (struct afs_writeback *) page_private(page); 182 if (wb) { 183 if (wb->key == key && wb->state == AFS_WBACK_PENDING) 184 goto subsume_in_current_wb; 185 goto flush_conflicting_wb; 186 } 187 188 if (index > 0) { 189 /* see if we can find an already pending writeback that we can 190 * append this page to */ 191 list_for_each_entry(wb, &vnode->writebacks, link) { 192 if (wb->last == index - 1 && wb->key == key && 193 wb->state == AFS_WBACK_PENDING) 194 goto append_to_previous_wb; 195 } 196 } 197 198 list_add_tail(&candidate->link, &vnode->writebacks); 199 candidate->key = key_get(key); 200 spin_unlock(&vnode->writeback_lock); 201 SetPagePrivate(page); 202 set_page_private(page, (unsigned long) candidate); 203 _leave(" = 0 [new]"); 204 return 0; 205 206 subsume_in_current_wb: 207 _debug("subsume"); 208 ASSERTRANGE(wb->first, <=, index, <=, wb->last); 209 if (index == wb->first && from < wb->offset_first) 210 wb->offset_first = from; 211 if (index == wb->last && to > wb->to_last) 212 wb->to_last = to; 213 spin_unlock(&vnode->writeback_lock); 214 kfree(candidate); 215 _leave(" = 0 [sub]"); 216 return 0; 217 218 append_to_previous_wb: 219 _debug("append into %lx-%lx", wb->first, wb->last); 220 wb->usage++; 221 wb->last++; 222 wb->to_last = to; 223 spin_unlock(&vnode->writeback_lock); 224 SetPagePrivate(page); 225 set_page_private(page, (unsigned long) wb); 226 kfree(candidate); 227 _leave(" = 0 [app]"); 228 return 0; 229 230 /* the page is currently bound to another context, so if it's dirty we 231 * need to flush it before we can use the new context */ 232 flush_conflicting_wb: 233 _debug("flush conflict"); 234 if (wb->state == AFS_WBACK_PENDING) 235 wb->state = AFS_WBACK_CONFLICTING; 236 spin_unlock(&vnode->writeback_lock); 237 if (PageDirty(page)) { 238 ret = afs_write_back_from_locked_page(wb, page); 239 if (ret < 0) { 240 afs_put_writeback(candidate); 241 _leave(" = %d", ret); 242 return ret; 243 } 244 } 245 246 /* the page holds a ref on the writeback record */ 247 afs_put_writeback(wb); 248 set_page_private(page, 0); 249 ClearPagePrivate(page); 250 goto try_again; 251 } 252 253 /* 254 * finalise part of a write to a page 255 */ 256 int afs_write_end(struct file *file, struct address_space *mapping, 257 loff_t pos, unsigned len, unsigned copied, 258 struct page *page, void *fsdata) 259 { 260 struct afs_vnode *vnode = AFS_FS_I(file_inode(file)); 261 loff_t i_size, maybe_i_size; 262 263 _enter("{%x:%u},{%lx}", 264 vnode->fid.vid, vnode->fid.vnode, page->index); 265 266 maybe_i_size = pos + copied; 267 268 i_size = i_size_read(&vnode->vfs_inode); 269 if (maybe_i_size > i_size) { 270 spin_lock(&vnode->writeback_lock); 271 i_size = i_size_read(&vnode->vfs_inode); 272 if (maybe_i_size > i_size) 273 i_size_write(&vnode->vfs_inode, maybe_i_size); 274 spin_unlock(&vnode->writeback_lock); 275 } 276 277 set_page_dirty(page); 278 if (PageDirty(page)) 279 _debug("dirtied"); 280 unlock_page(page); 281 put_page(page); 282 283 return copied; 284 } 285 286 /* 287 * kill all the pages in the given range 288 */ 289 static void afs_kill_pages(struct afs_vnode *vnode, bool error, 290 pgoff_t first, pgoff_t last) 291 { 292 struct pagevec pv; 293 unsigned count, loop; 294 295 _enter("{%x:%u},%lx-%lx", 296 vnode->fid.vid, vnode->fid.vnode, first, last); 297 298 pagevec_init(&pv, 0); 299 300 do { 301 _debug("kill %lx-%lx", first, last); 302 303 count = last - first + 1; 304 if (count > PAGEVEC_SIZE) 305 count = PAGEVEC_SIZE; 306 pv.nr = find_get_pages_contig(vnode->vfs_inode.i_mapping, 307 first, count, pv.pages); 308 ASSERTCMP(pv.nr, ==, count); 309 310 for (loop = 0; loop < count; loop++) { 311 ClearPageUptodate(pv.pages[loop]); 312 if (error) 313 SetPageError(pv.pages[loop]); 314 end_page_writeback(pv.pages[loop]); 315 } 316 317 __pagevec_release(&pv); 318 } while (first < last); 319 320 _leave(""); 321 } 322 323 /* 324 * synchronously write back the locked page and any subsequent non-locked dirty 325 * pages also covered by the same writeback record 326 */ 327 static int afs_write_back_from_locked_page(struct afs_writeback *wb, 328 struct page *primary_page) 329 { 330 struct page *pages[8], *page; 331 unsigned long count; 332 unsigned n, offset, to; 333 pgoff_t start, first, last; 334 int loop, ret; 335 336 _enter(",%lx", primary_page->index); 337 338 count = 1; 339 if (!clear_page_dirty_for_io(primary_page)) 340 BUG(); 341 if (test_set_page_writeback(primary_page)) 342 BUG(); 343 344 /* find all consecutive lockable dirty pages, stopping when we find a 345 * page that is not immediately lockable, is not dirty or is missing, 346 * or we reach the end of the range */ 347 start = primary_page->index; 348 if (start >= wb->last) 349 goto no_more; 350 start++; 351 do { 352 _debug("more %lx [%lx]", start, count); 353 n = wb->last - start + 1; 354 if (n > ARRAY_SIZE(pages)) 355 n = ARRAY_SIZE(pages); 356 n = find_get_pages_contig(wb->vnode->vfs_inode.i_mapping, 357 start, n, pages); 358 _debug("fgpc %u", n); 359 if (n == 0) 360 goto no_more; 361 if (pages[0]->index != start) { 362 do { 363 put_page(pages[--n]); 364 } while (n > 0); 365 goto no_more; 366 } 367 368 for (loop = 0; loop < n; loop++) { 369 page = pages[loop]; 370 if (page->index > wb->last) 371 break; 372 if (!trylock_page(page)) 373 break; 374 if (!PageDirty(page) || 375 page_private(page) != (unsigned long) wb) { 376 unlock_page(page); 377 break; 378 } 379 if (!clear_page_dirty_for_io(page)) 380 BUG(); 381 if (test_set_page_writeback(page)) 382 BUG(); 383 unlock_page(page); 384 put_page(page); 385 } 386 count += loop; 387 if (loop < n) { 388 for (; loop < n; loop++) 389 put_page(pages[loop]); 390 goto no_more; 391 } 392 393 start += loop; 394 } while (start <= wb->last && count < 65536); 395 396 no_more: 397 /* we now have a contiguous set of dirty pages, each with writeback set 398 * and the dirty mark cleared; the first page is locked and must remain 399 * so, all the rest are unlocked */ 400 first = primary_page->index; 401 last = first + count - 1; 402 403 offset = (first == wb->first) ? wb->offset_first : 0; 404 to = (last == wb->last) ? wb->to_last : PAGE_SIZE; 405 406 _debug("write back %lx[%u..] to %lx[..%u]", first, offset, last, to); 407 408 ret = afs_vnode_store_data(wb, first, last, offset, to); 409 if (ret < 0) { 410 switch (ret) { 411 case -EDQUOT: 412 case -ENOSPC: 413 mapping_set_error(wb->vnode->vfs_inode.i_mapping, -ENOSPC); 414 break; 415 case -EROFS: 416 case -EIO: 417 case -EREMOTEIO: 418 case -EFBIG: 419 case -ENOENT: 420 case -ENOMEDIUM: 421 case -ENXIO: 422 afs_kill_pages(wb->vnode, true, first, last); 423 mapping_set_error(wb->vnode->vfs_inode.i_mapping, -EIO); 424 break; 425 case -EACCES: 426 case -EPERM: 427 case -ENOKEY: 428 case -EKEYEXPIRED: 429 case -EKEYREJECTED: 430 case -EKEYREVOKED: 431 afs_kill_pages(wb->vnode, false, first, last); 432 break; 433 default: 434 break; 435 } 436 } else { 437 ret = count; 438 } 439 440 _leave(" = %d", ret); 441 return ret; 442 } 443 444 /* 445 * write a page back to the server 446 * - the caller locked the page for us 447 */ 448 int afs_writepage(struct page *page, struct writeback_control *wbc) 449 { 450 struct afs_writeback *wb; 451 int ret; 452 453 _enter("{%lx},", page->index); 454 455 wb = (struct afs_writeback *) page_private(page); 456 ASSERT(wb != NULL); 457 458 ret = afs_write_back_from_locked_page(wb, page); 459 unlock_page(page); 460 if (ret < 0) { 461 _leave(" = %d", ret); 462 return 0; 463 } 464 465 wbc->nr_to_write -= ret; 466 467 _leave(" = 0"); 468 return 0; 469 } 470 471 /* 472 * write a region of pages back to the server 473 */ 474 static int afs_writepages_region(struct address_space *mapping, 475 struct writeback_control *wbc, 476 pgoff_t index, pgoff_t end, pgoff_t *_next) 477 { 478 struct afs_writeback *wb; 479 struct page *page; 480 int ret, n; 481 482 _enter(",,%lx,%lx,", index, end); 483 484 do { 485 n = find_get_pages_tag(mapping, &index, PAGECACHE_TAG_DIRTY, 486 1, &page); 487 if (!n) 488 break; 489 490 _debug("wback %lx", page->index); 491 492 if (page->index > end) { 493 *_next = index; 494 put_page(page); 495 _leave(" = 0 [%lx]", *_next); 496 return 0; 497 } 498 499 /* at this point we hold neither mapping->tree_lock nor lock on 500 * the page itself: the page may be truncated or invalidated 501 * (changing page->mapping to NULL), or even swizzled back from 502 * swapper_space to tmpfs file mapping 503 */ 504 lock_page(page); 505 506 if (page->mapping != mapping) { 507 unlock_page(page); 508 put_page(page); 509 continue; 510 } 511 512 if (wbc->sync_mode != WB_SYNC_NONE) 513 wait_on_page_writeback(page); 514 515 if (PageWriteback(page) || !PageDirty(page)) { 516 unlock_page(page); 517 put_page(page); 518 continue; 519 } 520 521 wb = (struct afs_writeback *) page_private(page); 522 ASSERT(wb != NULL); 523 524 spin_lock(&wb->vnode->writeback_lock); 525 wb->state = AFS_WBACK_WRITING; 526 spin_unlock(&wb->vnode->writeback_lock); 527 528 ret = afs_write_back_from_locked_page(wb, page); 529 unlock_page(page); 530 put_page(page); 531 if (ret < 0) { 532 _leave(" = %d", ret); 533 return ret; 534 } 535 536 wbc->nr_to_write -= ret; 537 538 cond_resched(); 539 } while (index < end && wbc->nr_to_write > 0); 540 541 *_next = index; 542 _leave(" = 0 [%lx]", *_next); 543 return 0; 544 } 545 546 /* 547 * write some of the pending data back to the server 548 */ 549 int afs_writepages(struct address_space *mapping, 550 struct writeback_control *wbc) 551 { 552 pgoff_t start, end, next; 553 int ret; 554 555 _enter(""); 556 557 if (wbc->range_cyclic) { 558 start = mapping->writeback_index; 559 end = -1; 560 ret = afs_writepages_region(mapping, wbc, start, end, &next); 561 if (start > 0 && wbc->nr_to_write > 0 && ret == 0) 562 ret = afs_writepages_region(mapping, wbc, 0, start, 563 &next); 564 mapping->writeback_index = next; 565 } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { 566 end = (pgoff_t)(LLONG_MAX >> PAGE_SHIFT); 567 ret = afs_writepages_region(mapping, wbc, 0, end, &next); 568 if (wbc->nr_to_write > 0) 569 mapping->writeback_index = next; 570 } else { 571 start = wbc->range_start >> PAGE_SHIFT; 572 end = wbc->range_end >> PAGE_SHIFT; 573 ret = afs_writepages_region(mapping, wbc, start, end, &next); 574 } 575 576 _leave(" = %d", ret); 577 return ret; 578 } 579 580 /* 581 * completion of write to server 582 */ 583 void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) 584 { 585 struct afs_writeback *wb = call->wb; 586 struct pagevec pv; 587 unsigned count, loop; 588 pgoff_t first = call->first, last = call->last; 589 bool free_wb; 590 591 _enter("{%x:%u},{%lx-%lx}", 592 vnode->fid.vid, vnode->fid.vnode, first, last); 593 594 ASSERT(wb != NULL); 595 596 pagevec_init(&pv, 0); 597 598 do { 599 _debug("done %lx-%lx", first, last); 600 601 count = last - first + 1; 602 if (count > PAGEVEC_SIZE) 603 count = PAGEVEC_SIZE; 604 pv.nr = find_get_pages_contig(call->mapping, first, count, 605 pv.pages); 606 ASSERTCMP(pv.nr, ==, count); 607 608 spin_lock(&vnode->writeback_lock); 609 for (loop = 0; loop < count; loop++) { 610 struct page *page = pv.pages[loop]; 611 end_page_writeback(page); 612 if (page_private(page) == (unsigned long) wb) { 613 set_page_private(page, 0); 614 ClearPagePrivate(page); 615 wb->usage--; 616 } 617 } 618 free_wb = false; 619 if (wb->usage == 0) { 620 afs_unlink_writeback(wb); 621 free_wb = true; 622 } 623 spin_unlock(&vnode->writeback_lock); 624 first += count; 625 if (free_wb) { 626 afs_free_writeback(wb); 627 wb = NULL; 628 } 629 630 __pagevec_release(&pv); 631 } while (first <= last); 632 633 _leave(""); 634 } 635 636 /* 637 * write to an AFS file 638 */ 639 ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) 640 { 641 struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); 642 ssize_t result; 643 size_t count = iov_iter_count(from); 644 645 _enter("{%x.%u},{%zu},", 646 vnode->fid.vid, vnode->fid.vnode, count); 647 648 if (IS_SWAPFILE(&vnode->vfs_inode)) { 649 printk(KERN_INFO 650 "AFS: Attempt to write to active swap file!\n"); 651 return -EBUSY; 652 } 653 654 if (!count) 655 return 0; 656 657 result = generic_file_write_iter(iocb, from); 658 659 _leave(" = %zd", result); 660 return result; 661 } 662 663 /* 664 * flush the vnode to the fileserver 665 */ 666 int afs_writeback_all(struct afs_vnode *vnode) 667 { 668 struct address_space *mapping = vnode->vfs_inode.i_mapping; 669 struct writeback_control wbc = { 670 .sync_mode = WB_SYNC_ALL, 671 .nr_to_write = LONG_MAX, 672 .range_cyclic = 1, 673 }; 674 int ret; 675 676 _enter(""); 677 678 ret = mapping->a_ops->writepages(mapping, &wbc); 679 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 680 681 _leave(" = %d", ret); 682 return ret; 683 } 684 685 /* 686 * flush any dirty pages for this process, and check for write errors. 687 * - the return status from this call provides a reliable indication of 688 * whether any write errors occurred for this process. 689 */ 690 int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) 691 { 692 struct inode *inode = file_inode(file); 693 struct afs_writeback *wb, *xwb; 694 struct afs_vnode *vnode = AFS_FS_I(inode); 695 int ret; 696 697 _enter("{%x:%u},{n=%pD},%d", 698 vnode->fid.vid, vnode->fid.vnode, file, 699 datasync); 700 701 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 702 if (ret) 703 return ret; 704 inode_lock(inode); 705 706 /* use a writeback record as a marker in the queue - when this reaches 707 * the front of the queue, all the outstanding writes are either 708 * completed or rejected */ 709 wb = kzalloc(sizeof(*wb), GFP_KERNEL); 710 if (!wb) { 711 ret = -ENOMEM; 712 goto out; 713 } 714 wb->vnode = vnode; 715 wb->first = 0; 716 wb->last = -1; 717 wb->offset_first = 0; 718 wb->to_last = PAGE_SIZE; 719 wb->usage = 1; 720 wb->state = AFS_WBACK_SYNCING; 721 init_waitqueue_head(&wb->waitq); 722 723 spin_lock(&vnode->writeback_lock); 724 list_for_each_entry(xwb, &vnode->writebacks, link) { 725 if (xwb->state == AFS_WBACK_PENDING) 726 xwb->state = AFS_WBACK_CONFLICTING; 727 } 728 list_add_tail(&wb->link, &vnode->writebacks); 729 spin_unlock(&vnode->writeback_lock); 730 731 /* push all the outstanding writebacks to the server */ 732 ret = afs_writeback_all(vnode); 733 if (ret < 0) { 734 afs_put_writeback(wb); 735 _leave(" = %d [wb]", ret); 736 goto out; 737 } 738 739 /* wait for the preceding writes to actually complete */ 740 ret = wait_event_interruptible(wb->waitq, 741 wb->state == AFS_WBACK_COMPLETE || 742 vnode->writebacks.next == &wb->link); 743 afs_put_writeback(wb); 744 _leave(" = %d", ret); 745 out: 746 inode_unlock(inode); 747 return ret; 748 } 749 750 /* 751 * notification that a previously read-only page is about to become writable 752 * - if it returns an error, the caller will deliver a bus error signal 753 */ 754 int afs_page_mkwrite(struct vm_area_struct *vma, struct page *page) 755 { 756 struct afs_vnode *vnode = AFS_FS_I(vma->vm_file->f_mapping->host); 757 758 _enter("{{%x:%u}},{%lx}", 759 vnode->fid.vid, vnode->fid.vnode, page->index); 760 761 /* wait for the page to be written to the cache before we allow it to 762 * be modified */ 763 #ifdef CONFIG_AFS_FSCACHE 764 fscache_wait_on_page_write(vnode->cache, page); 765 #endif 766 767 _leave(" = 0"); 768 return 0; 769 } 770