1 /* 2 * linux/fs/nfs/write.c 3 * 4 * Write file data over NFS. 5 * 6 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> 7 */ 8 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/file.h> 14 #include <linux/writeback.h> 15 #include <linux/swap.h> 16 #include <linux/migrate.h> 17 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_mount.h> 21 #include <linux/nfs_page.h> 22 #include <linux/backing-dev.h> 23 #include <linux/export.h> 24 25 #include <asm/uaccess.h> 26 27 #include "delegation.h" 28 #include "internal.h" 29 #include "iostat.h" 30 #include "nfs4_fs.h" 31 #include "fscache.h" 32 #include "pnfs.h" 33 34 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 35 36 #define MIN_POOL_WRITE (32) 37 #define MIN_POOL_COMMIT (4) 38 39 /* 40 * Local function declarations 41 */ 42 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, 43 struct inode *inode, int ioflags); 44 static void nfs_redirty_request(struct nfs_page *req); 45 static const struct rpc_call_ops nfs_write_partial_ops; 46 static const struct rpc_call_ops nfs_write_full_ops; 47 static const struct rpc_call_ops nfs_commit_ops; 48 49 static struct kmem_cache *nfs_wdata_cachep; 50 static mempool_t *nfs_wdata_mempool; 51 static mempool_t *nfs_commit_mempool; 52 53 struct nfs_write_data *nfs_commitdata_alloc(void) 54 { 55 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); 56 57 if (p) { 58 memset(p, 0, sizeof(*p)); 59 INIT_LIST_HEAD(&p->pages); 60 } 61 return p; 62 } 63 EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); 64 65 void nfs_commit_free(struct nfs_write_data *p) 66 { 67 if (p && (p->pagevec != &p->page_array[0])) 68 kfree(p->pagevec); 69 mempool_free(p, nfs_commit_mempool); 70 } 71 EXPORT_SYMBOL_GPL(nfs_commit_free); 72 73 struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 74 { 75 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 76 77 if (p) { 78 memset(p, 0, sizeof(*p)); 79 INIT_LIST_HEAD(&p->pages); 80 p->npages = pagecount; 81 if (pagecount <= ARRAY_SIZE(p->page_array)) 82 p->pagevec = p->page_array; 83 else { 84 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 85 if (!p->pagevec) { 86 mempool_free(p, nfs_wdata_mempool); 87 p = NULL; 88 } 89 } 90 } 91 return p; 92 } 93 94 void nfs_writedata_free(struct nfs_write_data *p) 95 { 96 if (p && (p->pagevec != &p->page_array[0])) 97 kfree(p->pagevec); 98 mempool_free(p, nfs_wdata_mempool); 99 } 100 101 void nfs_writedata_release(struct nfs_write_data *wdata) 102 { 103 put_nfs_open_context(wdata->args.context); 104 nfs_writedata_free(wdata); 105 } 106 107 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 108 { 109 ctx->error = error; 110 smp_wmb(); 111 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 112 } 113 114 static struct nfs_page *nfs_page_find_request_locked(struct page *page) 115 { 116 struct nfs_page *req = NULL; 117 118 if (PagePrivate(page)) { 119 req = (struct nfs_page *)page_private(page); 120 if (req != NULL) 121 kref_get(&req->wb_kref); 122 } 123 return req; 124 } 125 126 static struct nfs_page *nfs_page_find_request(struct page *page) 127 { 128 struct inode *inode = page->mapping->host; 129 struct nfs_page *req = NULL; 130 131 spin_lock(&inode->i_lock); 132 req = nfs_page_find_request_locked(page); 133 spin_unlock(&inode->i_lock); 134 return req; 135 } 136 137 /* Adjust the file length if we're writing beyond the end */ 138 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) 139 { 140 struct inode *inode = page->mapping->host; 141 loff_t end, i_size; 142 pgoff_t end_index; 143 144 spin_lock(&inode->i_lock); 145 i_size = i_size_read(inode); 146 end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; 147 if (i_size > 0 && page->index < end_index) 148 goto out; 149 end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); 150 if (i_size >= end) 151 goto out; 152 i_size_write(inode, end); 153 nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); 154 out: 155 spin_unlock(&inode->i_lock); 156 } 157 158 /* A writeback failed: mark the page as bad, and invalidate the page cache */ 159 static void nfs_set_pageerror(struct page *page) 160 { 161 SetPageError(page); 162 nfs_zap_mapping(page->mapping->host, page->mapping); 163 } 164 165 /* We can set the PG_uptodate flag if we see that a write request 166 * covers the full page. 167 */ 168 static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) 169 { 170 if (PageUptodate(page)) 171 return; 172 if (base != 0) 173 return; 174 if (count != nfs_page_length(page)) 175 return; 176 SetPageUptodate(page); 177 } 178 179 static int wb_priority(struct writeback_control *wbc) 180 { 181 if (wbc->for_reclaim) 182 return FLUSH_HIGHPRI | FLUSH_STABLE; 183 if (wbc->for_kupdate || wbc->for_background) 184 return FLUSH_LOWPRI | FLUSH_COND_STABLE; 185 return FLUSH_COND_STABLE; 186 } 187 188 /* 189 * NFS congestion control 190 */ 191 192 int nfs_congestion_kb; 193 194 #define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) 195 #define NFS_CONGESTION_OFF_THRESH \ 196 (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) 197 198 static int nfs_set_page_writeback(struct page *page) 199 { 200 int ret = test_set_page_writeback(page); 201 202 if (!ret) { 203 struct inode *inode = page->mapping->host; 204 struct nfs_server *nfss = NFS_SERVER(inode); 205 206 page_cache_get(page); 207 if (atomic_long_inc_return(&nfss->writeback) > 208 NFS_CONGESTION_ON_THRESH) { 209 set_bdi_congested(&nfss->backing_dev_info, 210 BLK_RW_ASYNC); 211 } 212 } 213 return ret; 214 } 215 216 static void nfs_end_page_writeback(struct page *page) 217 { 218 struct inode *inode = page->mapping->host; 219 struct nfs_server *nfss = NFS_SERVER(inode); 220 221 end_page_writeback(page); 222 page_cache_release(page); 223 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 224 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 225 } 226 227 static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) 228 { 229 struct inode *inode = page->mapping->host; 230 struct nfs_page *req; 231 int ret; 232 233 spin_lock(&inode->i_lock); 234 for (;;) { 235 req = nfs_page_find_request_locked(page); 236 if (req == NULL) 237 break; 238 if (nfs_lock_request_dontget(req)) 239 break; 240 /* Note: If we hold the page lock, as is the case in nfs_writepage, 241 * then the call to nfs_lock_request_dontget() will always 242 * succeed provided that someone hasn't already marked the 243 * request as dirty (in which case we don't care). 244 */ 245 spin_unlock(&inode->i_lock); 246 if (!nonblock) 247 ret = nfs_wait_on_request(req); 248 else 249 ret = -EAGAIN; 250 nfs_release_request(req); 251 if (ret != 0) 252 return ERR_PTR(ret); 253 spin_lock(&inode->i_lock); 254 } 255 spin_unlock(&inode->i_lock); 256 return req; 257 } 258 259 /* 260 * Find an associated nfs write request, and prepare to flush it out 261 * May return an error if the user signalled nfs_wait_on_request(). 262 */ 263 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 264 struct page *page, bool nonblock) 265 { 266 struct nfs_page *req; 267 int ret = 0; 268 269 req = nfs_find_and_lock_request(page, nonblock); 270 if (!req) 271 goto out; 272 ret = PTR_ERR(req); 273 if (IS_ERR(req)) 274 goto out; 275 276 ret = nfs_set_page_writeback(page); 277 BUG_ON(ret != 0); 278 BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); 279 280 if (!nfs_pageio_add_request(pgio, req)) { 281 nfs_redirty_request(req); 282 ret = pgio->pg_error; 283 } 284 out: 285 return ret; 286 } 287 288 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 289 { 290 struct inode *inode = page->mapping->host; 291 int ret; 292 293 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 294 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 295 296 nfs_pageio_cond_complete(pgio, page->index); 297 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); 298 if (ret == -EAGAIN) { 299 redirty_page_for_writepage(wbc, page); 300 ret = 0; 301 } 302 return ret; 303 } 304 305 /* 306 * Write an mmapped page to the server. 307 */ 308 static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) 309 { 310 struct nfs_pageio_descriptor pgio; 311 int err; 312 313 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); 314 err = nfs_do_writepage(page, wbc, &pgio); 315 nfs_pageio_complete(&pgio); 316 if (err < 0) 317 return err; 318 if (pgio.pg_error < 0) 319 return pgio.pg_error; 320 return 0; 321 } 322 323 int nfs_writepage(struct page *page, struct writeback_control *wbc) 324 { 325 int ret; 326 327 ret = nfs_writepage_locked(page, wbc); 328 unlock_page(page); 329 return ret; 330 } 331 332 static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) 333 { 334 int ret; 335 336 ret = nfs_do_writepage(page, wbc, data); 337 unlock_page(page); 338 return ret; 339 } 340 341 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 342 { 343 struct inode *inode = mapping->host; 344 unsigned long *bitlock = &NFS_I(inode)->flags; 345 struct nfs_pageio_descriptor pgio; 346 int err; 347 348 /* Stop dirtying of new pages while we sync */ 349 err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, 350 nfs_wait_bit_killable, TASK_KILLABLE); 351 if (err) 352 goto out_err; 353 354 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 355 356 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 357 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 358 nfs_pageio_complete(&pgio); 359 360 clear_bit_unlock(NFS_INO_FLUSHING, bitlock); 361 smp_mb__after_clear_bit(); 362 wake_up_bit(bitlock, NFS_INO_FLUSHING); 363 364 if (err < 0) 365 goto out_err; 366 err = pgio.pg_error; 367 if (err < 0) 368 goto out_err; 369 return 0; 370 out_err: 371 return err; 372 } 373 374 /* 375 * Insert a write request into an inode 376 */ 377 static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 378 { 379 struct nfs_inode *nfsi = NFS_I(inode); 380 381 /* Lock the request! */ 382 nfs_lock_request_dontget(req); 383 384 spin_lock(&inode->i_lock); 385 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) 386 inode->i_version++; 387 set_bit(PG_MAPPED, &req->wb_flags); 388 SetPagePrivate(req->wb_page); 389 set_page_private(req->wb_page, (unsigned long)req); 390 nfsi->npages++; 391 kref_get(&req->wb_kref); 392 spin_unlock(&inode->i_lock); 393 } 394 395 /* 396 * Remove a write request from an inode 397 */ 398 static void nfs_inode_remove_request(struct nfs_page *req) 399 { 400 struct inode *inode = req->wb_context->dentry->d_inode; 401 struct nfs_inode *nfsi = NFS_I(inode); 402 403 BUG_ON (!NFS_WBACK_BUSY(req)); 404 405 spin_lock(&inode->i_lock); 406 set_page_private(req->wb_page, 0); 407 ClearPagePrivate(req->wb_page); 408 clear_bit(PG_MAPPED, &req->wb_flags); 409 nfsi->npages--; 410 spin_unlock(&inode->i_lock); 411 nfs_release_request(req); 412 } 413 414 static void 415 nfs_mark_request_dirty(struct nfs_page *req) 416 { 417 __set_page_dirty_nobuffers(req->wb_page); 418 } 419 420 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 421 /** 422 * nfs_request_add_commit_list - add request to a commit list 423 * @req: pointer to a struct nfs_page 424 * @head: commit list head 425 * 426 * This sets the PG_CLEAN bit, updates the inode global count of 427 * number of outstanding requests requiring a commit as well as 428 * the MM page stats. 429 * 430 * The caller must _not_ hold the inode->i_lock, but must be 431 * holding the nfs_page lock. 432 */ 433 void 434 nfs_request_add_commit_list(struct nfs_page *req, struct list_head *head) 435 { 436 struct inode *inode = req->wb_context->dentry->d_inode; 437 438 set_bit(PG_CLEAN, &(req)->wb_flags); 439 spin_lock(&inode->i_lock); 440 nfs_list_add_request(req, head); 441 NFS_I(inode)->ncommit++; 442 spin_unlock(&inode->i_lock); 443 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 444 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 445 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 446 } 447 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); 448 449 /** 450 * nfs_request_remove_commit_list - Remove request from a commit list 451 * @req: pointer to a nfs_page 452 * 453 * This clears the PG_CLEAN bit, and updates the inode global count of 454 * number of outstanding requests requiring a commit 455 * It does not update the MM page stats. 456 * 457 * The caller _must_ hold the inode->i_lock and the nfs_page lock. 458 */ 459 void 460 nfs_request_remove_commit_list(struct nfs_page *req) 461 { 462 struct inode *inode = req->wb_context->dentry->d_inode; 463 464 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) 465 return; 466 nfs_list_remove_request(req); 467 NFS_I(inode)->ncommit--; 468 } 469 EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); 470 471 472 /* 473 * Add a request to the inode's commit list. 474 */ 475 static void 476 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 477 { 478 struct inode *inode = req->wb_context->dentry->d_inode; 479 480 if (pnfs_mark_request_commit(req, lseg)) 481 return; 482 nfs_request_add_commit_list(req, &NFS_I(inode)->commit_list); 483 } 484 485 static void 486 nfs_clear_page_commit(struct page *page) 487 { 488 dec_zone_page_state(page, NR_UNSTABLE_NFS); 489 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); 490 } 491 492 static void 493 nfs_clear_request_commit(struct nfs_page *req) 494 { 495 if (test_bit(PG_CLEAN, &req->wb_flags)) { 496 struct inode *inode = req->wb_context->dentry->d_inode; 497 498 if (!pnfs_clear_request_commit(req)) { 499 spin_lock(&inode->i_lock); 500 nfs_request_remove_commit_list(req); 501 spin_unlock(&inode->i_lock); 502 } 503 nfs_clear_page_commit(req->wb_page); 504 } 505 } 506 507 static inline 508 int nfs_write_need_commit(struct nfs_write_data *data) 509 { 510 if (data->verf.committed == NFS_DATA_SYNC) 511 return data->lseg == NULL; 512 else 513 return data->verf.committed != NFS_FILE_SYNC; 514 } 515 516 static inline 517 int nfs_reschedule_unstable_write(struct nfs_page *req, 518 struct nfs_write_data *data) 519 { 520 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { 521 nfs_mark_request_commit(req, data->lseg); 522 return 1; 523 } 524 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 525 nfs_mark_request_dirty(req); 526 return 1; 527 } 528 return 0; 529 } 530 #else 531 static void 532 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg) 533 { 534 } 535 536 static void 537 nfs_clear_request_commit(struct nfs_page *req) 538 { 539 } 540 541 static inline 542 int nfs_write_need_commit(struct nfs_write_data *data) 543 { 544 return 0; 545 } 546 547 static inline 548 int nfs_reschedule_unstable_write(struct nfs_page *req, 549 struct nfs_write_data *data) 550 { 551 return 0; 552 } 553 #endif 554 555 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 556 static int 557 nfs_need_commit(struct nfs_inode *nfsi) 558 { 559 return nfsi->ncommit > 0; 560 } 561 562 /* i_lock held by caller */ 563 static int 564 nfs_scan_commit_list(struct list_head *src, struct list_head *dst, int max, 565 spinlock_t *lock) 566 { 567 struct nfs_page *req, *tmp; 568 int ret = 0; 569 570 list_for_each_entry_safe(req, tmp, src, wb_list) { 571 if (!nfs_lock_request(req)) 572 continue; 573 if (cond_resched_lock(lock)) 574 list_safe_reset_next(req, tmp, wb_list); 575 nfs_request_remove_commit_list(req); 576 nfs_list_add_request(req, dst); 577 ret++; 578 if (ret == max) 579 break; 580 } 581 return ret; 582 } 583 584 /* 585 * nfs_scan_commit - Scan an inode for commit requests 586 * @inode: NFS inode to scan 587 * @dst: destination list 588 * 589 * Moves requests from the inode's 'commit' request list. 590 * The requests are *not* checked to ensure that they form a contiguous set. 591 */ 592 static int 593 nfs_scan_commit(struct inode *inode, struct list_head *dst) 594 { 595 struct nfs_inode *nfsi = NFS_I(inode); 596 int ret = 0; 597 598 spin_lock(&inode->i_lock); 599 if (nfsi->ncommit > 0) { 600 const int max = INT_MAX; 601 602 ret = nfs_scan_commit_list(&nfsi->commit_list, dst, max, 603 &inode->i_lock); 604 ret += pnfs_scan_commit_lists(inode, max - ret, 605 &inode->i_lock); 606 } 607 spin_unlock(&inode->i_lock); 608 return ret; 609 } 610 611 #else 612 static inline int nfs_need_commit(struct nfs_inode *nfsi) 613 { 614 return 0; 615 } 616 617 static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst) 618 { 619 return 0; 620 } 621 #endif 622 623 /* 624 * Search for an existing write request, and attempt to update 625 * it to reflect a new dirty region on a given page. 626 * 627 * If the attempt fails, then the existing request is flushed out 628 * to disk. 629 */ 630 static struct nfs_page *nfs_try_to_update_request(struct inode *inode, 631 struct page *page, 632 unsigned int offset, 633 unsigned int bytes) 634 { 635 struct nfs_page *req; 636 unsigned int rqend; 637 unsigned int end; 638 int error; 639 640 if (!PagePrivate(page)) 641 return NULL; 642 643 end = offset + bytes; 644 spin_lock(&inode->i_lock); 645 646 for (;;) { 647 req = nfs_page_find_request_locked(page); 648 if (req == NULL) 649 goto out_unlock; 650 651 rqend = req->wb_offset + req->wb_bytes; 652 /* 653 * Tell the caller to flush out the request if 654 * the offsets are non-contiguous. 655 * Note: nfs_flush_incompatible() will already 656 * have flushed out requests having wrong owners. 657 */ 658 if (offset > rqend 659 || end < req->wb_offset) 660 goto out_flushme; 661 662 if (nfs_lock_request_dontget(req)) 663 break; 664 665 /* The request is locked, so wait and then retry */ 666 spin_unlock(&inode->i_lock); 667 error = nfs_wait_on_request(req); 668 nfs_release_request(req); 669 if (error != 0) 670 goto out_err; 671 spin_lock(&inode->i_lock); 672 } 673 674 /* Okay, the request matches. Update the region */ 675 if (offset < req->wb_offset) { 676 req->wb_offset = offset; 677 req->wb_pgbase = offset; 678 } 679 if (end > rqend) 680 req->wb_bytes = end - req->wb_offset; 681 else 682 req->wb_bytes = rqend - req->wb_offset; 683 out_unlock: 684 spin_unlock(&inode->i_lock); 685 if (req) 686 nfs_clear_request_commit(req); 687 return req; 688 out_flushme: 689 spin_unlock(&inode->i_lock); 690 nfs_release_request(req); 691 error = nfs_wb_page(inode, page); 692 out_err: 693 return ERR_PTR(error); 694 } 695 696 /* 697 * Try to update an existing write request, or create one if there is none. 698 * 699 * Note: Should always be called with the Page Lock held to prevent races 700 * if we have to add a new request. Also assumes that the caller has 701 * already called nfs_flush_incompatible() if necessary. 702 */ 703 static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, 704 struct page *page, unsigned int offset, unsigned int bytes) 705 { 706 struct inode *inode = page->mapping->host; 707 struct nfs_page *req; 708 709 req = nfs_try_to_update_request(inode, page, offset, bytes); 710 if (req != NULL) 711 goto out; 712 req = nfs_create_request(ctx, inode, page, offset, bytes); 713 if (IS_ERR(req)) 714 goto out; 715 nfs_inode_add_request(inode, req); 716 out: 717 return req; 718 } 719 720 static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, 721 unsigned int offset, unsigned int count) 722 { 723 struct nfs_page *req; 724 725 req = nfs_setup_write_request(ctx, page, offset, count); 726 if (IS_ERR(req)) 727 return PTR_ERR(req); 728 /* Update file length */ 729 nfs_grow_file(page, offset, count); 730 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 731 nfs_mark_request_dirty(req); 732 nfs_unlock_request(req); 733 return 0; 734 } 735 736 int nfs_flush_incompatible(struct file *file, struct page *page) 737 { 738 struct nfs_open_context *ctx = nfs_file_open_context(file); 739 struct nfs_page *req; 740 int do_flush, status; 741 /* 742 * Look for a request corresponding to this page. If there 743 * is one, and it belongs to another file, we flush it out 744 * before we try to copy anything into the page. Do this 745 * due to the lack of an ACCESS-type call in NFSv2. 746 * Also do the same if we find a request from an existing 747 * dropped page. 748 */ 749 do { 750 req = nfs_page_find_request(page); 751 if (req == NULL) 752 return 0; 753 do_flush = req->wb_page != page || req->wb_context != ctx || 754 req->wb_lock_context->lockowner != current->files || 755 req->wb_lock_context->pid != current->tgid; 756 nfs_release_request(req); 757 if (!do_flush) 758 return 0; 759 status = nfs_wb_page(page->mapping->host, page); 760 } while (status == 0); 761 return status; 762 } 763 764 /* 765 * If the page cache is marked as unsafe or invalid, then we can't rely on 766 * the PageUptodate() flag. In this case, we will need to turn off 767 * write optimisations that depend on the page contents being correct. 768 */ 769 static int nfs_write_pageuptodate(struct page *page, struct inode *inode) 770 { 771 return PageUptodate(page) && 772 !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); 773 } 774 775 /* 776 * Update and possibly write a cached page of an NFS file. 777 * 778 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad 779 * things with a page scheduled for an RPC call (e.g. invalidate it). 780 */ 781 int nfs_updatepage(struct file *file, struct page *page, 782 unsigned int offset, unsigned int count) 783 { 784 struct nfs_open_context *ctx = nfs_file_open_context(file); 785 struct inode *inode = page->mapping->host; 786 int status = 0; 787 788 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); 789 790 dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", 791 file->f_path.dentry->d_parent->d_name.name, 792 file->f_path.dentry->d_name.name, count, 793 (long long)(page_offset(page) + offset)); 794 795 /* If we're not using byte range locks, and we know the page 796 * is up to date, it may be more efficient to extend the write 797 * to cover the entire page in order to avoid fragmentation 798 * inefficiencies. 799 */ 800 if (nfs_write_pageuptodate(page, inode) && 801 inode->i_flock == NULL && 802 !(file->f_flags & O_DSYNC)) { 803 count = max(count + offset, nfs_page_length(page)); 804 offset = 0; 805 } 806 807 status = nfs_writepage_setup(ctx, page, offset, count); 808 if (status < 0) 809 nfs_set_pageerror(page); 810 else 811 __set_page_dirty_nobuffers(page); 812 813 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", 814 status, (long long)i_size_read(inode)); 815 return status; 816 } 817 818 static void nfs_writepage_release(struct nfs_page *req, 819 struct nfs_write_data *data) 820 { 821 struct page *page = req->wb_page; 822 823 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data)) 824 nfs_inode_remove_request(req); 825 nfs_unlock_request(req); 826 nfs_end_page_writeback(page); 827 } 828 829 static int flush_task_priority(int how) 830 { 831 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { 832 case FLUSH_HIGHPRI: 833 return RPC_PRIORITY_HIGH; 834 case FLUSH_LOWPRI: 835 return RPC_PRIORITY_LOW; 836 } 837 return RPC_PRIORITY_NORMAL; 838 } 839 840 int nfs_initiate_write(struct nfs_write_data *data, 841 struct rpc_clnt *clnt, 842 const struct rpc_call_ops *call_ops, 843 int how) 844 { 845 struct inode *inode = data->inode; 846 int priority = flush_task_priority(how); 847 struct rpc_task *task; 848 struct rpc_message msg = { 849 .rpc_argp = &data->args, 850 .rpc_resp = &data->res, 851 .rpc_cred = data->cred, 852 }; 853 struct rpc_task_setup task_setup_data = { 854 .rpc_client = clnt, 855 .task = &data->task, 856 .rpc_message = &msg, 857 .callback_ops = call_ops, 858 .callback_data = data, 859 .workqueue = nfsiod_workqueue, 860 .flags = RPC_TASK_ASYNC, 861 .priority = priority, 862 }; 863 int ret = 0; 864 865 /* Set up the initial task struct. */ 866 NFS_PROTO(inode)->write_setup(data, &msg); 867 868 dprintk("NFS: %5u initiated write call " 869 "(req %s/%lld, %u bytes @ offset %llu)\n", 870 data->task.tk_pid, 871 inode->i_sb->s_id, 872 (long long)NFS_FILEID(inode), 873 data->args.count, 874 (unsigned long long)data->args.offset); 875 876 task = rpc_run_task(&task_setup_data); 877 if (IS_ERR(task)) { 878 ret = PTR_ERR(task); 879 goto out; 880 } 881 if (how & FLUSH_SYNC) { 882 ret = rpc_wait_for_completion_task(task); 883 if (ret == 0) 884 ret = task->tk_status; 885 } 886 rpc_put_task(task); 887 out: 888 return ret; 889 } 890 EXPORT_SYMBOL_GPL(nfs_initiate_write); 891 892 /* 893 * Set up the argument/result storage required for the RPC call. 894 */ 895 static void nfs_write_rpcsetup(struct nfs_page *req, 896 struct nfs_write_data *data, 897 unsigned int count, unsigned int offset, 898 int how) 899 { 900 struct inode *inode = req->wb_context->dentry->d_inode; 901 902 /* Set up the RPC argument and reply structs 903 * NB: take care not to mess about with data->commit et al. */ 904 905 data->req = req; 906 data->inode = inode = req->wb_context->dentry->d_inode; 907 data->cred = req->wb_context->cred; 908 909 data->args.fh = NFS_FH(inode); 910 data->args.offset = req_offset(req) + offset; 911 /* pnfs_set_layoutcommit needs this */ 912 data->mds_offset = data->args.offset; 913 data->args.pgbase = req->wb_pgbase + offset; 914 data->args.pages = data->pagevec; 915 data->args.count = count; 916 data->args.context = get_nfs_open_context(req->wb_context); 917 data->args.lock_context = req->wb_lock_context; 918 data->args.stable = NFS_UNSTABLE; 919 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { 920 case 0: 921 break; 922 case FLUSH_COND_STABLE: 923 if (nfs_need_commit(NFS_I(inode))) 924 break; 925 default: 926 data->args.stable = NFS_FILE_SYNC; 927 } 928 929 data->res.fattr = &data->fattr; 930 data->res.count = count; 931 data->res.verf = &data->verf; 932 nfs_fattr_init(&data->fattr); 933 } 934 935 static int nfs_do_write(struct nfs_write_data *data, 936 const struct rpc_call_ops *call_ops, 937 int how) 938 { 939 struct inode *inode = data->args.context->dentry->d_inode; 940 941 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); 942 } 943 944 static int nfs_do_multiple_writes(struct list_head *head, 945 const struct rpc_call_ops *call_ops, 946 int how) 947 { 948 struct nfs_write_data *data; 949 int ret = 0; 950 951 while (!list_empty(head)) { 952 int ret2; 953 954 data = list_entry(head->next, struct nfs_write_data, list); 955 list_del_init(&data->list); 956 957 ret2 = nfs_do_write(data, call_ops, how); 958 if (ret == 0) 959 ret = ret2; 960 } 961 return ret; 962 } 963 964 /* If a nfs_flush_* function fails, it should remove reqs from @head and 965 * call this on each, which will prepare them to be retried on next 966 * writeback using standard nfs. 967 */ 968 static void nfs_redirty_request(struct nfs_page *req) 969 { 970 struct page *page = req->wb_page; 971 972 nfs_mark_request_dirty(req); 973 nfs_unlock_request(req); 974 nfs_end_page_writeback(page); 975 } 976 977 /* 978 * Generate multiple small requests to write out a single 979 * contiguous dirty area on one page. 980 */ 981 static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 982 { 983 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 984 struct page *page = req->wb_page; 985 struct nfs_write_data *data; 986 size_t wsize = desc->pg_bsize, nbytes; 987 unsigned int offset; 988 int requests = 0; 989 int ret = 0; 990 991 nfs_list_remove_request(req); 992 993 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 994 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit || 995 desc->pg_count > wsize)) 996 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 997 998 999 offset = 0; 1000 nbytes = desc->pg_count; 1001 do { 1002 size_t len = min(nbytes, wsize); 1003 1004 data = nfs_writedata_alloc(1); 1005 if (!data) 1006 goto out_bad; 1007 data->pagevec[0] = page; 1008 nfs_write_rpcsetup(req, data, len, offset, desc->pg_ioflags); 1009 list_add(&data->list, res); 1010 requests++; 1011 nbytes -= len; 1012 offset += len; 1013 } while (nbytes != 0); 1014 atomic_set(&req->wb_complete, requests); 1015 desc->pg_rpc_callops = &nfs_write_partial_ops; 1016 return ret; 1017 1018 out_bad: 1019 while (!list_empty(res)) { 1020 data = list_entry(res->next, struct nfs_write_data, list); 1021 list_del(&data->list); 1022 nfs_writedata_release(data); 1023 } 1024 nfs_redirty_request(req); 1025 return -ENOMEM; 1026 } 1027 1028 /* 1029 * Create an RPC task for the given write request and kick it. 1030 * The page must have been locked by the caller. 1031 * 1032 * It may happen that the page we're passed is not marked dirty. 1033 * This is the case if nfs_updatepage detects a conflicting request 1034 * that has been written but not committed. 1035 */ 1036 static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 1037 { 1038 struct nfs_page *req; 1039 struct page **pages; 1040 struct nfs_write_data *data; 1041 struct list_head *head = &desc->pg_list; 1042 int ret = 0; 1043 1044 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, 1045 desc->pg_count)); 1046 if (!data) { 1047 while (!list_empty(head)) { 1048 req = nfs_list_entry(head->next); 1049 nfs_list_remove_request(req); 1050 nfs_redirty_request(req); 1051 } 1052 ret = -ENOMEM; 1053 goto out; 1054 } 1055 pages = data->pagevec; 1056 while (!list_empty(head)) { 1057 req = nfs_list_entry(head->next); 1058 nfs_list_remove_request(req); 1059 nfs_list_add_request(req, &data->pages); 1060 *pages++ = req->wb_page; 1061 } 1062 req = nfs_list_entry(data->pages.next); 1063 1064 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1065 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1066 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1067 1068 /* Set up the argument struct */ 1069 nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); 1070 list_add(&data->list, res); 1071 desc->pg_rpc_callops = &nfs_write_full_ops; 1072 out: 1073 return ret; 1074 } 1075 1076 int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) 1077 { 1078 if (desc->pg_bsize < PAGE_CACHE_SIZE) 1079 return nfs_flush_multi(desc, head); 1080 return nfs_flush_one(desc, head); 1081 } 1082 1083 static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1084 { 1085 LIST_HEAD(head); 1086 int ret; 1087 1088 ret = nfs_generic_flush(desc, &head); 1089 if (ret == 0) 1090 ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, 1091 desc->pg_ioflags); 1092 return ret; 1093 } 1094 1095 static const struct nfs_pageio_ops nfs_pageio_write_ops = { 1096 .pg_test = nfs_generic_pg_test, 1097 .pg_doio = nfs_generic_pg_writepages, 1098 }; 1099 1100 void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 1101 struct inode *inode, int ioflags) 1102 { 1103 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, 1104 NFS_SERVER(inode)->wsize, ioflags); 1105 } 1106 1107 void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) 1108 { 1109 pgio->pg_ops = &nfs_pageio_write_ops; 1110 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; 1111 } 1112 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); 1113 1114 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1115 struct inode *inode, int ioflags) 1116 { 1117 if (!pnfs_pageio_init_write(pgio, inode, ioflags)) 1118 nfs_pageio_init_write_mds(pgio, inode, ioflags); 1119 } 1120 1121 /* 1122 * Handle a write reply that flushed part of a page. 1123 */ 1124 static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) 1125 { 1126 struct nfs_write_data *data = calldata; 1127 1128 dprintk("NFS: %5u write(%s/%lld %d@%lld)", 1129 task->tk_pid, 1130 data->req->wb_context->dentry->d_inode->i_sb->s_id, 1131 (long long) 1132 NFS_FILEID(data->req->wb_context->dentry->d_inode), 1133 data->req->wb_bytes, (long long)req_offset(data->req)); 1134 1135 nfs_writeback_done(task, data); 1136 } 1137 1138 static void nfs_writeback_release_partial(void *calldata) 1139 { 1140 struct nfs_write_data *data = calldata; 1141 struct nfs_page *req = data->req; 1142 struct page *page = req->wb_page; 1143 int status = data->task.tk_status; 1144 1145 if (status < 0) { 1146 nfs_set_pageerror(page); 1147 nfs_context_set_write_error(req->wb_context, status); 1148 dprintk(", error = %d\n", status); 1149 goto out; 1150 } 1151 1152 if (nfs_write_need_commit(data)) { 1153 struct inode *inode = page->mapping->host; 1154 1155 spin_lock(&inode->i_lock); 1156 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { 1157 /* Do nothing we need to resend the writes */ 1158 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { 1159 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1160 dprintk(" defer commit\n"); 1161 } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { 1162 set_bit(PG_NEED_RESCHED, &req->wb_flags); 1163 clear_bit(PG_NEED_COMMIT, &req->wb_flags); 1164 dprintk(" server reboot detected\n"); 1165 } 1166 spin_unlock(&inode->i_lock); 1167 } else 1168 dprintk(" OK\n"); 1169 1170 out: 1171 if (atomic_dec_and_test(&req->wb_complete)) 1172 nfs_writepage_release(req, data); 1173 nfs_writedata_release(calldata); 1174 } 1175 1176 void nfs_write_prepare(struct rpc_task *task, void *calldata) 1177 { 1178 struct nfs_write_data *data = calldata; 1179 NFS_PROTO(data->inode)->write_rpc_prepare(task, data); 1180 } 1181 1182 static const struct rpc_call_ops nfs_write_partial_ops = { 1183 .rpc_call_prepare = nfs_write_prepare, 1184 .rpc_call_done = nfs_writeback_done_partial, 1185 .rpc_release = nfs_writeback_release_partial, 1186 }; 1187 1188 /* 1189 * Handle a write reply that flushes a whole page. 1190 * 1191 * FIXME: There is an inherent race with invalidate_inode_pages and 1192 * writebacks since the page->count is kept > 1 for as long 1193 * as the page has a write request pending. 1194 */ 1195 static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) 1196 { 1197 struct nfs_write_data *data = calldata; 1198 1199 nfs_writeback_done(task, data); 1200 } 1201 1202 static void nfs_writeback_release_full(void *calldata) 1203 { 1204 struct nfs_write_data *data = calldata; 1205 int status = data->task.tk_status; 1206 1207 /* Update attributes as result of writeback. */ 1208 while (!list_empty(&data->pages)) { 1209 struct nfs_page *req = nfs_list_entry(data->pages.next); 1210 struct page *page = req->wb_page; 1211 1212 nfs_list_remove_request(req); 1213 1214 dprintk("NFS: %5u write (%s/%lld %d@%lld)", 1215 data->task.tk_pid, 1216 req->wb_context->dentry->d_inode->i_sb->s_id, 1217 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 1218 req->wb_bytes, 1219 (long long)req_offset(req)); 1220 1221 if (status < 0) { 1222 nfs_set_pageerror(page); 1223 nfs_context_set_write_error(req->wb_context, status); 1224 dprintk(", error = %d\n", status); 1225 goto remove_request; 1226 } 1227 1228 if (nfs_write_need_commit(data)) { 1229 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1230 nfs_mark_request_commit(req, data->lseg); 1231 dprintk(" marked for commit\n"); 1232 goto next; 1233 } 1234 dprintk(" OK\n"); 1235 remove_request: 1236 nfs_inode_remove_request(req); 1237 next: 1238 nfs_unlock_request(req); 1239 nfs_end_page_writeback(page); 1240 } 1241 nfs_writedata_release(calldata); 1242 } 1243 1244 static const struct rpc_call_ops nfs_write_full_ops = { 1245 .rpc_call_prepare = nfs_write_prepare, 1246 .rpc_call_done = nfs_writeback_done_full, 1247 .rpc_release = nfs_writeback_release_full, 1248 }; 1249 1250 1251 /* 1252 * This function is called when the WRITE call is complete. 1253 */ 1254 void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1255 { 1256 struct nfs_writeargs *argp = &data->args; 1257 struct nfs_writeres *resp = &data->res; 1258 int status; 1259 1260 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1261 task->tk_pid, task->tk_status); 1262 1263 /* 1264 * ->write_done will attempt to use post-op attributes to detect 1265 * conflicting writes by other clients. A strict interpretation 1266 * of close-to-open would allow us to continue caching even if 1267 * another writer had changed the file, but some applications 1268 * depend on tighter cache coherency when writing. 1269 */ 1270 status = NFS_PROTO(data->inode)->write_done(task, data); 1271 if (status != 0) 1272 return; 1273 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1274 1275 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1276 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1277 /* We tried a write call, but the server did not 1278 * commit data to stable storage even though we 1279 * requested it. 1280 * Note: There is a known bug in Tru64 < 5.0 in which 1281 * the server reports NFS_DATA_SYNC, but performs 1282 * NFS_FILE_SYNC. We therefore implement this checking 1283 * as a dprintk() in order to avoid filling syslog. 1284 */ 1285 static unsigned long complain; 1286 1287 /* Note this will print the MDS for a DS write */ 1288 if (time_before(complain, jiffies)) { 1289 dprintk("NFS: faulty NFS server %s:" 1290 " (committed = %d) != (stable = %d)\n", 1291 NFS_SERVER(data->inode)->nfs_client->cl_hostname, 1292 resp->verf->committed, argp->stable); 1293 complain = jiffies + 300 * HZ; 1294 } 1295 } 1296 #endif 1297 /* Is this a short write? */ 1298 if (task->tk_status >= 0 && resp->count < argp->count) { 1299 static unsigned long complain; 1300 1301 nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); 1302 1303 /* Has the server at least made some progress? */ 1304 if (resp->count != 0) { 1305 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1306 if (resp->verf->committed != NFS_UNSTABLE) { 1307 /* Resend from where the server left off */ 1308 data->mds_offset += resp->count; 1309 argp->offset += resp->count; 1310 argp->pgbase += resp->count; 1311 argp->count -= resp->count; 1312 } else { 1313 /* Resend as a stable write in order to avoid 1314 * headaches in the case of a server crash. 1315 */ 1316 argp->stable = NFS_FILE_SYNC; 1317 } 1318 rpc_restart_call_prepare(task); 1319 return; 1320 } 1321 if (time_before(complain, jiffies)) { 1322 printk(KERN_WARNING 1323 "NFS: Server wrote zero bytes, expected %u.\n", 1324 argp->count); 1325 complain = jiffies + 300 * HZ; 1326 } 1327 /* Can't do anything about it except throw an error. */ 1328 task->tk_status = -EIO; 1329 } 1330 return; 1331 } 1332 1333 1334 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1335 static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1336 { 1337 int ret; 1338 1339 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1340 return 1; 1341 if (!may_wait) 1342 return 0; 1343 ret = out_of_line_wait_on_bit_lock(&nfsi->flags, 1344 NFS_INO_COMMIT, 1345 nfs_wait_bit_killable, 1346 TASK_KILLABLE); 1347 return (ret < 0) ? ret : 1; 1348 } 1349 1350 void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1351 { 1352 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1353 smp_mb__after_clear_bit(); 1354 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1355 } 1356 EXPORT_SYMBOL_GPL(nfs_commit_clear_lock); 1357 1358 void nfs_commitdata_release(void *data) 1359 { 1360 struct nfs_write_data *wdata = data; 1361 1362 put_nfs_open_context(wdata->args.context); 1363 nfs_commit_free(wdata); 1364 } 1365 EXPORT_SYMBOL_GPL(nfs_commitdata_release); 1366 1367 int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt, 1368 const struct rpc_call_ops *call_ops, 1369 int how) 1370 { 1371 struct rpc_task *task; 1372 int priority = flush_task_priority(how); 1373 struct rpc_message msg = { 1374 .rpc_argp = &data->args, 1375 .rpc_resp = &data->res, 1376 .rpc_cred = data->cred, 1377 }; 1378 struct rpc_task_setup task_setup_data = { 1379 .task = &data->task, 1380 .rpc_client = clnt, 1381 .rpc_message = &msg, 1382 .callback_ops = call_ops, 1383 .callback_data = data, 1384 .workqueue = nfsiod_workqueue, 1385 .flags = RPC_TASK_ASYNC, 1386 .priority = priority, 1387 }; 1388 /* Set up the initial task struct. */ 1389 NFS_PROTO(data->inode)->commit_setup(data, &msg); 1390 1391 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1392 1393 task = rpc_run_task(&task_setup_data); 1394 if (IS_ERR(task)) 1395 return PTR_ERR(task); 1396 if (how & FLUSH_SYNC) 1397 rpc_wait_for_completion_task(task); 1398 rpc_put_task(task); 1399 return 0; 1400 } 1401 EXPORT_SYMBOL_GPL(nfs_initiate_commit); 1402 1403 /* 1404 * Set up the argument/result storage required for the RPC call. 1405 */ 1406 void nfs_init_commit(struct nfs_write_data *data, 1407 struct list_head *head, 1408 struct pnfs_layout_segment *lseg) 1409 { 1410 struct nfs_page *first = nfs_list_entry(head->next); 1411 struct inode *inode = first->wb_context->dentry->d_inode; 1412 1413 /* Set up the RPC argument and reply structs 1414 * NB: take care not to mess about with data->commit et al. */ 1415 1416 list_splice_init(head, &data->pages); 1417 1418 data->inode = inode; 1419 data->cred = first->wb_context->cred; 1420 data->lseg = lseg; /* reference transferred */ 1421 data->mds_ops = &nfs_commit_ops; 1422 1423 data->args.fh = NFS_FH(data->inode); 1424 /* Note: we always request a commit of the entire inode */ 1425 data->args.offset = 0; 1426 data->args.count = 0; 1427 data->args.context = get_nfs_open_context(first->wb_context); 1428 data->res.count = 0; 1429 data->res.fattr = &data->fattr; 1430 data->res.verf = &data->verf; 1431 nfs_fattr_init(&data->fattr); 1432 } 1433 EXPORT_SYMBOL_GPL(nfs_init_commit); 1434 1435 void nfs_retry_commit(struct list_head *page_list, 1436 struct pnfs_layout_segment *lseg) 1437 { 1438 struct nfs_page *req; 1439 1440 while (!list_empty(page_list)) { 1441 req = nfs_list_entry(page_list->next); 1442 nfs_list_remove_request(req); 1443 nfs_mark_request_commit(req, lseg); 1444 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1445 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1446 BDI_RECLAIMABLE); 1447 nfs_unlock_request(req); 1448 } 1449 } 1450 EXPORT_SYMBOL_GPL(nfs_retry_commit); 1451 1452 /* 1453 * Commit dirty pages 1454 */ 1455 static int 1456 nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1457 { 1458 struct nfs_write_data *data; 1459 1460 data = nfs_commitdata_alloc(); 1461 1462 if (!data) 1463 goto out_bad; 1464 1465 /* Set up the argument struct */ 1466 nfs_init_commit(data, head, NULL); 1467 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how); 1468 out_bad: 1469 nfs_retry_commit(head, NULL); 1470 nfs_commit_clear_lock(NFS_I(inode)); 1471 return -ENOMEM; 1472 } 1473 1474 /* 1475 * COMMIT call returned 1476 */ 1477 static void nfs_commit_done(struct rpc_task *task, void *calldata) 1478 { 1479 struct nfs_write_data *data = calldata; 1480 1481 dprintk("NFS: %5u nfs_commit_done (status %d)\n", 1482 task->tk_pid, task->tk_status); 1483 1484 /* Call the NFS version-specific code */ 1485 NFS_PROTO(data->inode)->commit_done(task, data); 1486 } 1487 1488 void nfs_commit_release_pages(struct nfs_write_data *data) 1489 { 1490 struct nfs_page *req; 1491 int status = data->task.tk_status; 1492 1493 while (!list_empty(&data->pages)) { 1494 req = nfs_list_entry(data->pages.next); 1495 nfs_list_remove_request(req); 1496 nfs_clear_page_commit(req->wb_page); 1497 1498 dprintk("NFS: commit (%s/%lld %d@%lld)", 1499 req->wb_context->dentry->d_sb->s_id, 1500 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 1501 req->wb_bytes, 1502 (long long)req_offset(req)); 1503 if (status < 0) { 1504 nfs_context_set_write_error(req->wb_context, status); 1505 nfs_inode_remove_request(req); 1506 dprintk(", error = %d\n", status); 1507 goto next; 1508 } 1509 1510 /* Okay, COMMIT succeeded, apparently. Check the verifier 1511 * returned by the server against all stored verfs. */ 1512 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { 1513 /* We have a match */ 1514 nfs_inode_remove_request(req); 1515 dprintk(" OK\n"); 1516 goto next; 1517 } 1518 /* We have a mismatch. Write the page again */ 1519 dprintk(" mismatch\n"); 1520 nfs_mark_request_dirty(req); 1521 next: 1522 nfs_unlock_request(req); 1523 } 1524 } 1525 EXPORT_SYMBOL_GPL(nfs_commit_release_pages); 1526 1527 static void nfs_commit_release(void *calldata) 1528 { 1529 struct nfs_write_data *data = calldata; 1530 1531 nfs_commit_release_pages(data); 1532 nfs_commit_clear_lock(NFS_I(data->inode)); 1533 nfs_commitdata_release(calldata); 1534 } 1535 1536 static const struct rpc_call_ops nfs_commit_ops = { 1537 .rpc_call_prepare = nfs_write_prepare, 1538 .rpc_call_done = nfs_commit_done, 1539 .rpc_release = nfs_commit_release, 1540 }; 1541 1542 int nfs_commit_inode(struct inode *inode, int how) 1543 { 1544 LIST_HEAD(head); 1545 int may_wait = how & FLUSH_SYNC; 1546 int res; 1547 1548 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1549 if (res <= 0) 1550 goto out_mark_dirty; 1551 res = nfs_scan_commit(inode, &head); 1552 if (res) { 1553 int error; 1554 1555 error = pnfs_commit_list(inode, &head, how); 1556 if (error == PNFS_NOT_ATTEMPTED) 1557 error = nfs_commit_list(inode, &head, how); 1558 if (error < 0) 1559 return error; 1560 if (!may_wait) 1561 goto out_mark_dirty; 1562 error = wait_on_bit(&NFS_I(inode)->flags, 1563 NFS_INO_COMMIT, 1564 nfs_wait_bit_killable, 1565 TASK_KILLABLE); 1566 if (error < 0) 1567 return error; 1568 } else 1569 nfs_commit_clear_lock(NFS_I(inode)); 1570 return res; 1571 /* Note: If we exit without ensuring that the commit is complete, 1572 * we must mark the inode as dirty. Otherwise, future calls to 1573 * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure 1574 * that the data is on the disk. 1575 */ 1576 out_mark_dirty: 1577 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1578 return res; 1579 } 1580 1581 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) 1582 { 1583 struct nfs_inode *nfsi = NFS_I(inode); 1584 int flags = FLUSH_SYNC; 1585 int ret = 0; 1586 1587 /* no commits means nothing needs to be done */ 1588 if (!nfsi->ncommit) 1589 return ret; 1590 1591 if (wbc->sync_mode == WB_SYNC_NONE) { 1592 /* Don't commit yet if this is a non-blocking flush and there 1593 * are a lot of outstanding writes for this mapping. 1594 */ 1595 if (nfsi->ncommit <= (nfsi->npages >> 1)) 1596 goto out_mark_dirty; 1597 1598 /* don't wait for the COMMIT response */ 1599 flags = 0; 1600 } 1601 1602 ret = nfs_commit_inode(inode, flags); 1603 if (ret >= 0) { 1604 if (wbc->sync_mode == WB_SYNC_NONE) { 1605 if (ret < wbc->nr_to_write) 1606 wbc->nr_to_write -= ret; 1607 else 1608 wbc->nr_to_write = 0; 1609 } 1610 return 0; 1611 } 1612 out_mark_dirty: 1613 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1614 return ret; 1615 } 1616 #else 1617 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) 1618 { 1619 return 0; 1620 } 1621 #endif 1622 1623 int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1624 { 1625 int ret; 1626 1627 ret = nfs_commit_unstable_pages(inode, wbc); 1628 if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { 1629 int status; 1630 bool sync = true; 1631 1632 if (wbc->sync_mode == WB_SYNC_NONE) 1633 sync = false; 1634 1635 status = pnfs_layoutcommit_inode(inode, sync); 1636 if (status < 0) 1637 return status; 1638 } 1639 return ret; 1640 } 1641 1642 /* 1643 * flush the inode to disk. 1644 */ 1645 int nfs_wb_all(struct inode *inode) 1646 { 1647 struct writeback_control wbc = { 1648 .sync_mode = WB_SYNC_ALL, 1649 .nr_to_write = LONG_MAX, 1650 .range_start = 0, 1651 .range_end = LLONG_MAX, 1652 }; 1653 1654 return sync_inode(inode, &wbc); 1655 } 1656 1657 int nfs_wb_page_cancel(struct inode *inode, struct page *page) 1658 { 1659 struct nfs_page *req; 1660 int ret = 0; 1661 1662 BUG_ON(!PageLocked(page)); 1663 for (;;) { 1664 wait_on_page_writeback(page); 1665 req = nfs_page_find_request(page); 1666 if (req == NULL) 1667 break; 1668 if (nfs_lock_request_dontget(req)) { 1669 nfs_clear_request_commit(req); 1670 nfs_inode_remove_request(req); 1671 /* 1672 * In case nfs_inode_remove_request has marked the 1673 * page as being dirty 1674 */ 1675 cancel_dirty_page(page, PAGE_CACHE_SIZE); 1676 nfs_unlock_request(req); 1677 break; 1678 } 1679 ret = nfs_wait_on_request(req); 1680 nfs_release_request(req); 1681 if (ret < 0) 1682 break; 1683 } 1684 return ret; 1685 } 1686 1687 /* 1688 * Write back all requests on one page - we do this before reading it. 1689 */ 1690 int nfs_wb_page(struct inode *inode, struct page *page) 1691 { 1692 loff_t range_start = page_offset(page); 1693 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); 1694 struct writeback_control wbc = { 1695 .sync_mode = WB_SYNC_ALL, 1696 .nr_to_write = 0, 1697 .range_start = range_start, 1698 .range_end = range_end, 1699 }; 1700 int ret; 1701 1702 for (;;) { 1703 wait_on_page_writeback(page); 1704 if (clear_page_dirty_for_io(page)) { 1705 ret = nfs_writepage_locked(page, &wbc); 1706 if (ret < 0) 1707 goto out_error; 1708 continue; 1709 } 1710 if (!PagePrivate(page)) 1711 break; 1712 ret = nfs_commit_inode(inode, FLUSH_SYNC); 1713 if (ret < 0) 1714 goto out_error; 1715 } 1716 return 0; 1717 out_error: 1718 return ret; 1719 } 1720 1721 #ifdef CONFIG_MIGRATION 1722 int nfs_migrate_page(struct address_space *mapping, struct page *newpage, 1723 struct page *page, enum migrate_mode mode) 1724 { 1725 /* 1726 * If PagePrivate is set, then the page is currently associated with 1727 * an in-progress read or write request. Don't try to migrate it. 1728 * 1729 * FIXME: we could do this in principle, but we'll need a way to ensure 1730 * that we can safely release the inode reference while holding 1731 * the page lock. 1732 */ 1733 if (PagePrivate(page)) 1734 return -EBUSY; 1735 1736 nfs_fscache_release_page(page, GFP_KERNEL); 1737 1738 return migrate_page(mapping, newpage, page, mode); 1739 } 1740 #endif 1741 1742 int __init nfs_init_writepagecache(void) 1743 { 1744 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1745 sizeof(struct nfs_write_data), 1746 0, SLAB_HWCACHE_ALIGN, 1747 NULL); 1748 if (nfs_wdata_cachep == NULL) 1749 return -ENOMEM; 1750 1751 nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, 1752 nfs_wdata_cachep); 1753 if (nfs_wdata_mempool == NULL) 1754 return -ENOMEM; 1755 1756 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1757 nfs_wdata_cachep); 1758 if (nfs_commit_mempool == NULL) 1759 return -ENOMEM; 1760 1761 /* 1762 * NFS congestion size, scale with available memory. 1763 * 1764 * 64MB: 8192k 1765 * 128MB: 11585k 1766 * 256MB: 16384k 1767 * 512MB: 23170k 1768 * 1GB: 32768k 1769 * 2GB: 46340k 1770 * 4GB: 65536k 1771 * 8GB: 92681k 1772 * 16GB: 131072k 1773 * 1774 * This allows larger machines to have larger/more transfers. 1775 * Limit the default to 256M 1776 */ 1777 nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); 1778 if (nfs_congestion_kb > 256*1024) 1779 nfs_congestion_kb = 256*1024; 1780 1781 return 0; 1782 } 1783 1784 void nfs_destroy_writepagecache(void) 1785 { 1786 mempool_destroy(nfs_commit_mempool); 1787 mempool_destroy(nfs_wdata_mempool); 1788 kmem_cache_destroy(nfs_wdata_cachep); 1789 } 1790 1791