1 /* 2 * linux/fs/nfs/write.c 3 * 4 * Write file data over NFS. 5 * 6 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> 7 */ 8 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/file.h> 14 #include <linux/writeback.h> 15 #include <linux/swap.h> 16 #include <linux/migrate.h> 17 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_mount.h> 21 #include <linux/nfs_page.h> 22 #include <linux/backing-dev.h> 23 #include <linux/export.h> 24 25 #include <asm/uaccess.h> 26 27 #include "delegation.h" 28 #include "internal.h" 29 #include "iostat.h" 30 #include "nfs4_fs.h" 31 #include "fscache.h" 32 #include "pnfs.h" 33 34 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 35 36 #define MIN_POOL_WRITE (32) 37 #define MIN_POOL_COMMIT (4) 38 39 /* 40 * Local function declarations 41 */ 42 static void nfs_redirty_request(struct nfs_page *req); 43 static const struct rpc_call_ops nfs_write_common_ops; 44 static const struct rpc_call_ops nfs_commit_ops; 45 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops; 46 static const struct nfs_commit_completion_ops nfs_commit_completion_ops; 47 48 static struct kmem_cache *nfs_wdata_cachep; 49 static mempool_t *nfs_wdata_mempool; 50 static struct kmem_cache *nfs_cdata_cachep; 51 static mempool_t *nfs_commit_mempool; 52 53 struct nfs_commit_data *nfs_commitdata_alloc(void) 54 { 55 struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); 56 57 if (p) { 58 memset(p, 0, sizeof(*p)); 59 INIT_LIST_HEAD(&p->pages); 60 } 61 return p; 62 } 63 EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); 64 65 void nfs_commit_free(struct nfs_commit_data *p) 66 { 67 mempool_free(p, nfs_commit_mempool); 68 } 69 EXPORT_SYMBOL_GPL(nfs_commit_free); 70 71 struct nfs_write_header *nfs_writehdr_alloc(void) 72 { 73 struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 74 75 if (p) { 76 struct nfs_pgio_header *hdr = &p->header; 77 78 memset(p, 0, sizeof(*p)); 79 INIT_LIST_HEAD(&hdr->pages); 80 INIT_LIST_HEAD(&hdr->rpc_list); 81 spin_lock_init(&hdr->lock); 82 atomic_set(&hdr->refcnt, 0); 83 } 84 return p; 85 } 86 87 static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr, 88 unsigned int pagecount) 89 { 90 struct nfs_write_data *data, *prealloc; 91 92 prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data; 93 if (prealloc->header == NULL) 94 data = prealloc; 95 else 96 data = kzalloc(sizeof(*data), GFP_KERNEL); 97 if (!data) 98 goto out; 99 100 if (nfs_pgarray_set(&data->pages, pagecount)) { 101 data->header = hdr; 102 atomic_inc(&hdr->refcnt); 103 } else { 104 if (data != prealloc) 105 kfree(data); 106 data = NULL; 107 } 108 out: 109 return data; 110 } 111 112 void nfs_writehdr_free(struct nfs_pgio_header *hdr) 113 { 114 struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header); 115 mempool_free(whdr, nfs_wdata_mempool); 116 } 117 118 void nfs_writedata_release(struct nfs_write_data *wdata) 119 { 120 struct nfs_pgio_header *hdr = wdata->header; 121 struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header); 122 123 put_nfs_open_context(wdata->args.context); 124 if (wdata->pages.pagevec != wdata->pages.page_array) 125 kfree(wdata->pages.pagevec); 126 if (wdata != &write_header->rpc_data) 127 kfree(wdata); 128 else 129 wdata->header = NULL; 130 if (atomic_dec_and_test(&hdr->refcnt)) 131 hdr->completion_ops->completion(hdr); 132 } 133 134 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 135 { 136 ctx->error = error; 137 smp_wmb(); 138 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 139 } 140 141 static struct nfs_page *nfs_page_find_request_locked(struct page *page) 142 { 143 struct nfs_page *req = NULL; 144 145 if (PagePrivate(page)) { 146 req = (struct nfs_page *)page_private(page); 147 if (req != NULL) 148 kref_get(&req->wb_kref); 149 } 150 return req; 151 } 152 153 static struct nfs_page *nfs_page_find_request(struct page *page) 154 { 155 struct inode *inode = page->mapping->host; 156 struct nfs_page *req = NULL; 157 158 spin_lock(&inode->i_lock); 159 req = nfs_page_find_request_locked(page); 160 spin_unlock(&inode->i_lock); 161 return req; 162 } 163 164 /* Adjust the file length if we're writing beyond the end */ 165 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) 166 { 167 struct inode *inode = page->mapping->host; 168 loff_t end, i_size; 169 pgoff_t end_index; 170 171 spin_lock(&inode->i_lock); 172 i_size = i_size_read(inode); 173 end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; 174 if (i_size > 0 && page->index < end_index) 175 goto out; 176 end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); 177 if (i_size >= end) 178 goto out; 179 i_size_write(inode, end); 180 nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); 181 out: 182 spin_unlock(&inode->i_lock); 183 } 184 185 /* A writeback failed: mark the page as bad, and invalidate the page cache */ 186 static void nfs_set_pageerror(struct page *page) 187 { 188 SetPageError(page); 189 nfs_zap_mapping(page->mapping->host, page->mapping); 190 } 191 192 /* We can set the PG_uptodate flag if we see that a write request 193 * covers the full page. 194 */ 195 static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) 196 { 197 if (PageUptodate(page)) 198 return; 199 if (base != 0) 200 return; 201 if (count != nfs_page_length(page)) 202 return; 203 SetPageUptodate(page); 204 } 205 206 static int wb_priority(struct writeback_control *wbc) 207 { 208 if (wbc->for_reclaim) 209 return FLUSH_HIGHPRI | FLUSH_STABLE; 210 if (wbc->for_kupdate || wbc->for_background) 211 return FLUSH_LOWPRI | FLUSH_COND_STABLE; 212 return FLUSH_COND_STABLE; 213 } 214 215 /* 216 * NFS congestion control 217 */ 218 219 int nfs_congestion_kb; 220 221 #define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) 222 #define NFS_CONGESTION_OFF_THRESH \ 223 (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) 224 225 static int nfs_set_page_writeback(struct page *page) 226 { 227 int ret = test_set_page_writeback(page); 228 229 if (!ret) { 230 struct inode *inode = page->mapping->host; 231 struct nfs_server *nfss = NFS_SERVER(inode); 232 233 if (atomic_long_inc_return(&nfss->writeback) > 234 NFS_CONGESTION_ON_THRESH) { 235 set_bdi_congested(&nfss->backing_dev_info, 236 BLK_RW_ASYNC); 237 } 238 } 239 return ret; 240 } 241 242 static void nfs_end_page_writeback(struct page *page) 243 { 244 struct inode *inode = page->mapping->host; 245 struct nfs_server *nfss = NFS_SERVER(inode); 246 247 end_page_writeback(page); 248 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 249 clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC); 250 } 251 252 static struct nfs_page *nfs_find_and_lock_request(struct page *page, bool nonblock) 253 { 254 struct inode *inode = page->mapping->host; 255 struct nfs_page *req; 256 int ret; 257 258 spin_lock(&inode->i_lock); 259 for (;;) { 260 req = nfs_page_find_request_locked(page); 261 if (req == NULL) 262 break; 263 if (nfs_lock_request(req)) 264 break; 265 /* Note: If we hold the page lock, as is the case in nfs_writepage, 266 * then the call to nfs_lock_request() will always 267 * succeed provided that someone hasn't already marked the 268 * request as dirty (in which case we don't care). 269 */ 270 spin_unlock(&inode->i_lock); 271 if (!nonblock) 272 ret = nfs_wait_on_request(req); 273 else 274 ret = -EAGAIN; 275 nfs_release_request(req); 276 if (ret != 0) 277 return ERR_PTR(ret); 278 spin_lock(&inode->i_lock); 279 } 280 spin_unlock(&inode->i_lock); 281 return req; 282 } 283 284 /* 285 * Find an associated nfs write request, and prepare to flush it out 286 * May return an error if the user signalled nfs_wait_on_request(). 287 */ 288 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 289 struct page *page, bool nonblock) 290 { 291 struct nfs_page *req; 292 int ret = 0; 293 294 req = nfs_find_and_lock_request(page, nonblock); 295 if (!req) 296 goto out; 297 ret = PTR_ERR(req); 298 if (IS_ERR(req)) 299 goto out; 300 301 ret = nfs_set_page_writeback(page); 302 BUG_ON(ret != 0); 303 BUG_ON(test_bit(PG_CLEAN, &req->wb_flags)); 304 305 if (!nfs_pageio_add_request(pgio, req)) { 306 nfs_redirty_request(req); 307 ret = pgio->pg_error; 308 } 309 out: 310 return ret; 311 } 312 313 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 314 { 315 struct inode *inode = page->mapping->host; 316 int ret; 317 318 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 319 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 320 321 nfs_pageio_cond_complete(pgio, page->index); 322 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); 323 if (ret == -EAGAIN) { 324 redirty_page_for_writepage(wbc, page); 325 ret = 0; 326 } 327 return ret; 328 } 329 330 /* 331 * Write an mmapped page to the server. 332 */ 333 static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) 334 { 335 struct nfs_pageio_descriptor pgio; 336 int err; 337 338 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc), 339 &nfs_async_write_completion_ops); 340 err = nfs_do_writepage(page, wbc, &pgio); 341 nfs_pageio_complete(&pgio); 342 if (err < 0) 343 return err; 344 if (pgio.pg_error < 0) 345 return pgio.pg_error; 346 return 0; 347 } 348 349 int nfs_writepage(struct page *page, struct writeback_control *wbc) 350 { 351 int ret; 352 353 ret = nfs_writepage_locked(page, wbc); 354 unlock_page(page); 355 return ret; 356 } 357 358 static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) 359 { 360 int ret; 361 362 ret = nfs_do_writepage(page, wbc, data); 363 unlock_page(page); 364 return ret; 365 } 366 367 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 368 { 369 struct inode *inode = mapping->host; 370 unsigned long *bitlock = &NFS_I(inode)->flags; 371 struct nfs_pageio_descriptor pgio; 372 int err; 373 374 /* Stop dirtying of new pages while we sync */ 375 err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, 376 nfs_wait_bit_killable, TASK_KILLABLE); 377 if (err) 378 goto out_err; 379 380 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 381 382 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), 383 &nfs_async_write_completion_ops); 384 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 385 nfs_pageio_complete(&pgio); 386 387 clear_bit_unlock(NFS_INO_FLUSHING, bitlock); 388 smp_mb__after_clear_bit(); 389 wake_up_bit(bitlock, NFS_INO_FLUSHING); 390 391 if (err < 0) 392 goto out_err; 393 err = pgio.pg_error; 394 if (err < 0) 395 goto out_err; 396 return 0; 397 out_err: 398 return err; 399 } 400 401 /* 402 * Insert a write request into an inode 403 */ 404 static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 405 { 406 struct nfs_inode *nfsi = NFS_I(inode); 407 408 /* Lock the request! */ 409 nfs_lock_request(req); 410 411 spin_lock(&inode->i_lock); 412 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE)) 413 inode->i_version++; 414 set_bit(PG_MAPPED, &req->wb_flags); 415 SetPagePrivate(req->wb_page); 416 set_page_private(req->wb_page, (unsigned long)req); 417 nfsi->npages++; 418 kref_get(&req->wb_kref); 419 spin_unlock(&inode->i_lock); 420 } 421 422 /* 423 * Remove a write request from an inode 424 */ 425 static void nfs_inode_remove_request(struct nfs_page *req) 426 { 427 struct inode *inode = req->wb_context->dentry->d_inode; 428 struct nfs_inode *nfsi = NFS_I(inode); 429 430 BUG_ON (!NFS_WBACK_BUSY(req)); 431 432 spin_lock(&inode->i_lock); 433 set_page_private(req->wb_page, 0); 434 ClearPagePrivate(req->wb_page); 435 clear_bit(PG_MAPPED, &req->wb_flags); 436 nfsi->npages--; 437 spin_unlock(&inode->i_lock); 438 nfs_release_request(req); 439 } 440 441 static void 442 nfs_mark_request_dirty(struct nfs_page *req) 443 { 444 __set_page_dirty_nobuffers(req->wb_page); 445 } 446 447 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 448 /** 449 * nfs_request_add_commit_list - add request to a commit list 450 * @req: pointer to a struct nfs_page 451 * @dst: commit list head 452 * @cinfo: holds list lock and accounting info 453 * 454 * This sets the PG_CLEAN bit, updates the cinfo count of 455 * number of outstanding requests requiring a commit as well as 456 * the MM page stats. 457 * 458 * The caller must _not_ hold the cinfo->lock, but must be 459 * holding the nfs_page lock. 460 */ 461 void 462 nfs_request_add_commit_list(struct nfs_page *req, struct list_head *dst, 463 struct nfs_commit_info *cinfo) 464 { 465 set_bit(PG_CLEAN, &(req)->wb_flags); 466 spin_lock(cinfo->lock); 467 nfs_list_add_request(req, dst); 468 cinfo->mds->ncommit++; 469 spin_unlock(cinfo->lock); 470 if (!cinfo->dreq) { 471 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 472 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, 473 BDI_RECLAIMABLE); 474 __mark_inode_dirty(req->wb_context->dentry->d_inode, 475 I_DIRTY_DATASYNC); 476 } 477 } 478 EXPORT_SYMBOL_GPL(nfs_request_add_commit_list); 479 480 /** 481 * nfs_request_remove_commit_list - Remove request from a commit list 482 * @req: pointer to a nfs_page 483 * @cinfo: holds list lock and accounting info 484 * 485 * This clears the PG_CLEAN bit, and updates the cinfo's count of 486 * number of outstanding requests requiring a commit 487 * It does not update the MM page stats. 488 * 489 * The caller _must_ hold the cinfo->lock and the nfs_page lock. 490 */ 491 void 492 nfs_request_remove_commit_list(struct nfs_page *req, 493 struct nfs_commit_info *cinfo) 494 { 495 if (!test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) 496 return; 497 nfs_list_remove_request(req); 498 cinfo->mds->ncommit--; 499 } 500 EXPORT_SYMBOL_GPL(nfs_request_remove_commit_list); 501 502 static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, 503 struct inode *inode) 504 { 505 cinfo->lock = &inode->i_lock; 506 cinfo->mds = &NFS_I(inode)->commit_info; 507 cinfo->ds = pnfs_get_ds_info(inode); 508 cinfo->dreq = NULL; 509 cinfo->completion_ops = &nfs_commit_completion_ops; 510 } 511 512 void nfs_init_cinfo(struct nfs_commit_info *cinfo, 513 struct inode *inode, 514 struct nfs_direct_req *dreq) 515 { 516 if (dreq) 517 nfs_init_cinfo_from_dreq(cinfo, dreq); 518 else 519 nfs_init_cinfo_from_inode(cinfo, inode); 520 } 521 EXPORT_SYMBOL_GPL(nfs_init_cinfo); 522 523 /* 524 * Add a request to the inode's commit list. 525 */ 526 void 527 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, 528 struct nfs_commit_info *cinfo) 529 { 530 if (pnfs_mark_request_commit(req, lseg, cinfo)) 531 return; 532 nfs_request_add_commit_list(req, &cinfo->mds->list, cinfo); 533 } 534 535 static void 536 nfs_clear_page_commit(struct page *page) 537 { 538 dec_zone_page_state(page, NR_UNSTABLE_NFS); 539 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); 540 } 541 542 static void 543 nfs_clear_request_commit(struct nfs_page *req) 544 { 545 if (test_bit(PG_CLEAN, &req->wb_flags)) { 546 struct inode *inode = req->wb_context->dentry->d_inode; 547 struct nfs_commit_info cinfo; 548 549 nfs_init_cinfo_from_inode(&cinfo, inode); 550 if (!pnfs_clear_request_commit(req, &cinfo)) { 551 spin_lock(cinfo.lock); 552 nfs_request_remove_commit_list(req, &cinfo); 553 spin_unlock(cinfo.lock); 554 } 555 nfs_clear_page_commit(req->wb_page); 556 } 557 } 558 559 static inline 560 int nfs_write_need_commit(struct nfs_write_data *data) 561 { 562 if (data->verf.committed == NFS_DATA_SYNC) 563 return data->header->lseg == NULL; 564 return data->verf.committed != NFS_FILE_SYNC; 565 } 566 567 #else 568 static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo, 569 struct inode *inode) 570 { 571 } 572 573 void nfs_init_cinfo(struct nfs_commit_info *cinfo, 574 struct inode *inode, 575 struct nfs_direct_req *dreq) 576 { 577 } 578 579 void 580 nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, 581 struct nfs_commit_info *cinfo) 582 { 583 } 584 585 static void 586 nfs_clear_request_commit(struct nfs_page *req) 587 { 588 } 589 590 static inline 591 int nfs_write_need_commit(struct nfs_write_data *data) 592 { 593 return 0; 594 } 595 596 #endif 597 598 static void nfs_write_completion(struct nfs_pgio_header *hdr) 599 { 600 struct nfs_commit_info cinfo; 601 unsigned long bytes = 0; 602 603 if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) 604 goto out; 605 nfs_init_cinfo_from_inode(&cinfo, hdr->inode); 606 while (!list_empty(&hdr->pages)) { 607 struct nfs_page *req = nfs_list_entry(hdr->pages.next); 608 609 bytes += req->wb_bytes; 610 nfs_list_remove_request(req); 611 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && 612 (hdr->good_bytes < bytes)) { 613 nfs_set_pageerror(req->wb_page); 614 nfs_context_set_write_error(req->wb_context, hdr->error); 615 goto remove_req; 616 } 617 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) { 618 nfs_mark_request_dirty(req); 619 goto next; 620 } 621 if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) { 622 nfs_mark_request_commit(req, hdr->lseg, &cinfo); 623 goto next; 624 } 625 remove_req: 626 nfs_inode_remove_request(req); 627 next: 628 nfs_unlock_request(req); 629 nfs_end_page_writeback(req->wb_page); 630 nfs_release_request(req); 631 } 632 out: 633 hdr->release(hdr); 634 } 635 636 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 637 static unsigned long 638 nfs_reqs_to_commit(struct nfs_commit_info *cinfo) 639 { 640 return cinfo->mds->ncommit; 641 } 642 643 /* cinfo->lock held by caller */ 644 int 645 nfs_scan_commit_list(struct list_head *src, struct list_head *dst, 646 struct nfs_commit_info *cinfo, int max) 647 { 648 struct nfs_page *req, *tmp; 649 int ret = 0; 650 651 list_for_each_entry_safe(req, tmp, src, wb_list) { 652 if (!nfs_lock_request(req)) 653 continue; 654 kref_get(&req->wb_kref); 655 if (cond_resched_lock(cinfo->lock)) 656 list_safe_reset_next(req, tmp, wb_list); 657 nfs_request_remove_commit_list(req, cinfo); 658 nfs_list_add_request(req, dst); 659 ret++; 660 if ((ret == max) && !cinfo->dreq) 661 break; 662 } 663 return ret; 664 } 665 666 /* 667 * nfs_scan_commit - Scan an inode for commit requests 668 * @inode: NFS inode to scan 669 * @dst: mds destination list 670 * @cinfo: mds and ds lists of reqs ready to commit 671 * 672 * Moves requests from the inode's 'commit' request list. 673 * The requests are *not* checked to ensure that they form a contiguous set. 674 */ 675 int 676 nfs_scan_commit(struct inode *inode, struct list_head *dst, 677 struct nfs_commit_info *cinfo) 678 { 679 int ret = 0; 680 681 spin_lock(cinfo->lock); 682 if (cinfo->mds->ncommit > 0) { 683 const int max = INT_MAX; 684 685 ret = nfs_scan_commit_list(&cinfo->mds->list, dst, 686 cinfo, max); 687 ret += pnfs_scan_commit_lists(inode, cinfo, max - ret); 688 } 689 spin_unlock(cinfo->lock); 690 return ret; 691 } 692 693 #else 694 static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo) 695 { 696 return 0; 697 } 698 699 int nfs_scan_commit(struct inode *inode, struct list_head *dst, 700 struct nfs_commit_info *cinfo) 701 { 702 return 0; 703 } 704 #endif 705 706 /* 707 * Search for an existing write request, and attempt to update 708 * it to reflect a new dirty region on a given page. 709 * 710 * If the attempt fails, then the existing request is flushed out 711 * to disk. 712 */ 713 static struct nfs_page *nfs_try_to_update_request(struct inode *inode, 714 struct page *page, 715 unsigned int offset, 716 unsigned int bytes) 717 { 718 struct nfs_page *req; 719 unsigned int rqend; 720 unsigned int end; 721 int error; 722 723 if (!PagePrivate(page)) 724 return NULL; 725 726 end = offset + bytes; 727 spin_lock(&inode->i_lock); 728 729 for (;;) { 730 req = nfs_page_find_request_locked(page); 731 if (req == NULL) 732 goto out_unlock; 733 734 rqend = req->wb_offset + req->wb_bytes; 735 /* 736 * Tell the caller to flush out the request if 737 * the offsets are non-contiguous. 738 * Note: nfs_flush_incompatible() will already 739 * have flushed out requests having wrong owners. 740 */ 741 if (offset > rqend 742 || end < req->wb_offset) 743 goto out_flushme; 744 745 if (nfs_lock_request(req)) 746 break; 747 748 /* The request is locked, so wait and then retry */ 749 spin_unlock(&inode->i_lock); 750 error = nfs_wait_on_request(req); 751 nfs_release_request(req); 752 if (error != 0) 753 goto out_err; 754 spin_lock(&inode->i_lock); 755 } 756 757 /* Okay, the request matches. Update the region */ 758 if (offset < req->wb_offset) { 759 req->wb_offset = offset; 760 req->wb_pgbase = offset; 761 } 762 if (end > rqend) 763 req->wb_bytes = end - req->wb_offset; 764 else 765 req->wb_bytes = rqend - req->wb_offset; 766 out_unlock: 767 spin_unlock(&inode->i_lock); 768 if (req) 769 nfs_clear_request_commit(req); 770 return req; 771 out_flushme: 772 spin_unlock(&inode->i_lock); 773 nfs_release_request(req); 774 error = nfs_wb_page(inode, page); 775 out_err: 776 return ERR_PTR(error); 777 } 778 779 /* 780 * Try to update an existing write request, or create one if there is none. 781 * 782 * Note: Should always be called with the Page Lock held to prevent races 783 * if we have to add a new request. Also assumes that the caller has 784 * already called nfs_flush_incompatible() if necessary. 785 */ 786 static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, 787 struct page *page, unsigned int offset, unsigned int bytes) 788 { 789 struct inode *inode = page->mapping->host; 790 struct nfs_page *req; 791 792 req = nfs_try_to_update_request(inode, page, offset, bytes); 793 if (req != NULL) 794 goto out; 795 req = nfs_create_request(ctx, inode, page, offset, bytes); 796 if (IS_ERR(req)) 797 goto out; 798 nfs_inode_add_request(inode, req); 799 out: 800 return req; 801 } 802 803 static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, 804 unsigned int offset, unsigned int count) 805 { 806 struct nfs_page *req; 807 808 req = nfs_setup_write_request(ctx, page, offset, count); 809 if (IS_ERR(req)) 810 return PTR_ERR(req); 811 /* Update file length */ 812 nfs_grow_file(page, offset, count); 813 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 814 nfs_mark_request_dirty(req); 815 nfs_unlock_and_release_request(req); 816 return 0; 817 } 818 819 int nfs_flush_incompatible(struct file *file, struct page *page) 820 { 821 struct nfs_open_context *ctx = nfs_file_open_context(file); 822 struct nfs_page *req; 823 int do_flush, status; 824 /* 825 * Look for a request corresponding to this page. If there 826 * is one, and it belongs to another file, we flush it out 827 * before we try to copy anything into the page. Do this 828 * due to the lack of an ACCESS-type call in NFSv2. 829 * Also do the same if we find a request from an existing 830 * dropped page. 831 */ 832 do { 833 req = nfs_page_find_request(page); 834 if (req == NULL) 835 return 0; 836 do_flush = req->wb_page != page || req->wb_context != ctx || 837 req->wb_lock_context->lockowner != current->files || 838 req->wb_lock_context->pid != current->tgid; 839 nfs_release_request(req); 840 if (!do_flush) 841 return 0; 842 status = nfs_wb_page(page->mapping->host, page); 843 } while (status == 0); 844 return status; 845 } 846 847 /* 848 * If the page cache is marked as unsafe or invalid, then we can't rely on 849 * the PageUptodate() flag. In this case, we will need to turn off 850 * write optimisations that depend on the page contents being correct. 851 */ 852 static bool nfs_write_pageuptodate(struct page *page, struct inode *inode) 853 { 854 if (nfs_have_delegated_attributes(inode)) 855 goto out; 856 if (NFS_I(inode)->cache_validity & NFS_INO_REVAL_PAGECACHE) 857 return false; 858 out: 859 return PageUptodate(page) != 0; 860 } 861 862 /* 863 * Update and possibly write a cached page of an NFS file. 864 * 865 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad 866 * things with a page scheduled for an RPC call (e.g. invalidate it). 867 */ 868 int nfs_updatepage(struct file *file, struct page *page, 869 unsigned int offset, unsigned int count) 870 { 871 struct nfs_open_context *ctx = nfs_file_open_context(file); 872 struct inode *inode = page->mapping->host; 873 int status = 0; 874 875 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); 876 877 dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", 878 file->f_path.dentry->d_parent->d_name.name, 879 file->f_path.dentry->d_name.name, count, 880 (long long)(page_offset(page) + offset)); 881 882 /* If we're not using byte range locks, and we know the page 883 * is up to date, it may be more efficient to extend the write 884 * to cover the entire page in order to avoid fragmentation 885 * inefficiencies. 886 */ 887 if (nfs_write_pageuptodate(page, inode) && 888 inode->i_flock == NULL && 889 !(file->f_flags & O_DSYNC)) { 890 count = max(count + offset, nfs_page_length(page)); 891 offset = 0; 892 } 893 894 status = nfs_writepage_setup(ctx, page, offset, count); 895 if (status < 0) 896 nfs_set_pageerror(page); 897 else 898 __set_page_dirty_nobuffers(page); 899 900 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", 901 status, (long long)i_size_read(inode)); 902 return status; 903 } 904 905 static int flush_task_priority(int how) 906 { 907 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { 908 case FLUSH_HIGHPRI: 909 return RPC_PRIORITY_HIGH; 910 case FLUSH_LOWPRI: 911 return RPC_PRIORITY_LOW; 912 } 913 return RPC_PRIORITY_NORMAL; 914 } 915 916 int nfs_initiate_write(struct rpc_clnt *clnt, 917 struct nfs_write_data *data, 918 const struct rpc_call_ops *call_ops, 919 int how, int flags) 920 { 921 struct inode *inode = data->header->inode; 922 int priority = flush_task_priority(how); 923 struct rpc_task *task; 924 struct rpc_message msg = { 925 .rpc_argp = &data->args, 926 .rpc_resp = &data->res, 927 .rpc_cred = data->header->cred, 928 }; 929 struct rpc_task_setup task_setup_data = { 930 .rpc_client = clnt, 931 .task = &data->task, 932 .rpc_message = &msg, 933 .callback_ops = call_ops, 934 .callback_data = data, 935 .workqueue = nfsiod_workqueue, 936 .flags = RPC_TASK_ASYNC | flags, 937 .priority = priority, 938 }; 939 int ret = 0; 940 941 /* Set up the initial task struct. */ 942 NFS_PROTO(inode)->write_setup(data, &msg); 943 944 dprintk("NFS: %5u initiated write call " 945 "(req %s/%lld, %u bytes @ offset %llu)\n", 946 data->task.tk_pid, 947 inode->i_sb->s_id, 948 (long long)NFS_FILEID(inode), 949 data->args.count, 950 (unsigned long long)data->args.offset); 951 952 task = rpc_run_task(&task_setup_data); 953 if (IS_ERR(task)) { 954 ret = PTR_ERR(task); 955 goto out; 956 } 957 if (how & FLUSH_SYNC) { 958 ret = rpc_wait_for_completion_task(task); 959 if (ret == 0) 960 ret = task->tk_status; 961 } 962 rpc_put_task(task); 963 out: 964 return ret; 965 } 966 EXPORT_SYMBOL_GPL(nfs_initiate_write); 967 968 /* 969 * Set up the argument/result storage required for the RPC call. 970 */ 971 static void nfs_write_rpcsetup(struct nfs_write_data *data, 972 unsigned int count, unsigned int offset, 973 int how, struct nfs_commit_info *cinfo) 974 { 975 struct nfs_page *req = data->header->req; 976 977 /* Set up the RPC argument and reply structs 978 * NB: take care not to mess about with data->commit et al. */ 979 980 data->args.fh = NFS_FH(data->header->inode); 981 data->args.offset = req_offset(req) + offset; 982 /* pnfs_set_layoutcommit needs this */ 983 data->mds_offset = data->args.offset; 984 data->args.pgbase = req->wb_pgbase + offset; 985 data->args.pages = data->pages.pagevec; 986 data->args.count = count; 987 data->args.context = get_nfs_open_context(req->wb_context); 988 data->args.lock_context = req->wb_lock_context; 989 data->args.stable = NFS_UNSTABLE; 990 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { 991 case 0: 992 break; 993 case FLUSH_COND_STABLE: 994 if (nfs_reqs_to_commit(cinfo)) 995 break; 996 default: 997 data->args.stable = NFS_FILE_SYNC; 998 } 999 1000 data->res.fattr = &data->fattr; 1001 data->res.count = count; 1002 data->res.verf = &data->verf; 1003 nfs_fattr_init(&data->fattr); 1004 } 1005 1006 static int nfs_do_write(struct nfs_write_data *data, 1007 const struct rpc_call_ops *call_ops, 1008 int how) 1009 { 1010 struct inode *inode = data->header->inode; 1011 1012 return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0); 1013 } 1014 1015 static int nfs_do_multiple_writes(struct list_head *head, 1016 const struct rpc_call_ops *call_ops, 1017 int how) 1018 { 1019 struct nfs_write_data *data; 1020 int ret = 0; 1021 1022 while (!list_empty(head)) { 1023 int ret2; 1024 1025 data = list_first_entry(head, struct nfs_write_data, list); 1026 list_del_init(&data->list); 1027 1028 ret2 = nfs_do_write(data, call_ops, how); 1029 if (ret == 0) 1030 ret = ret2; 1031 } 1032 return ret; 1033 } 1034 1035 /* If a nfs_flush_* function fails, it should remove reqs from @head and 1036 * call this on each, which will prepare them to be retried on next 1037 * writeback using standard nfs. 1038 */ 1039 static void nfs_redirty_request(struct nfs_page *req) 1040 { 1041 nfs_mark_request_dirty(req); 1042 nfs_unlock_request(req); 1043 nfs_end_page_writeback(req->wb_page); 1044 nfs_release_request(req); 1045 } 1046 1047 static void nfs_async_write_error(struct list_head *head) 1048 { 1049 struct nfs_page *req; 1050 1051 while (!list_empty(head)) { 1052 req = nfs_list_entry(head->next); 1053 nfs_list_remove_request(req); 1054 nfs_redirty_request(req); 1055 } 1056 } 1057 1058 static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = { 1059 .error_cleanup = nfs_async_write_error, 1060 .completion = nfs_write_completion, 1061 }; 1062 1063 static void nfs_flush_error(struct nfs_pageio_descriptor *desc, 1064 struct nfs_pgio_header *hdr) 1065 { 1066 set_bit(NFS_IOHDR_REDO, &hdr->flags); 1067 while (!list_empty(&hdr->rpc_list)) { 1068 struct nfs_write_data *data = list_first_entry(&hdr->rpc_list, 1069 struct nfs_write_data, list); 1070 list_del(&data->list); 1071 nfs_writedata_release(data); 1072 } 1073 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1074 } 1075 1076 /* 1077 * Generate multiple small requests to write out a single 1078 * contiguous dirty area on one page. 1079 */ 1080 static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, 1081 struct nfs_pgio_header *hdr) 1082 { 1083 struct nfs_page *req = hdr->req; 1084 struct page *page = req->wb_page; 1085 struct nfs_write_data *data; 1086 size_t wsize = desc->pg_bsize, nbytes; 1087 unsigned int offset; 1088 int requests = 0; 1089 struct nfs_commit_info cinfo; 1090 1091 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); 1092 1093 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1094 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) || 1095 desc->pg_count > wsize)) 1096 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1097 1098 1099 offset = 0; 1100 nbytes = desc->pg_count; 1101 do { 1102 size_t len = min(nbytes, wsize); 1103 1104 data = nfs_writedata_alloc(hdr, 1); 1105 if (!data) { 1106 nfs_flush_error(desc, hdr); 1107 return -ENOMEM; 1108 } 1109 data->pages.pagevec[0] = page; 1110 nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo); 1111 list_add(&data->list, &hdr->rpc_list); 1112 requests++; 1113 nbytes -= len; 1114 offset += len; 1115 } while (nbytes != 0); 1116 nfs_list_remove_request(req); 1117 nfs_list_add_request(req, &hdr->pages); 1118 desc->pg_rpc_callops = &nfs_write_common_ops; 1119 return 0; 1120 } 1121 1122 /* 1123 * Create an RPC task for the given write request and kick it. 1124 * The page must have been locked by the caller. 1125 * 1126 * It may happen that the page we're passed is not marked dirty. 1127 * This is the case if nfs_updatepage detects a conflicting request 1128 * that has been written but not committed. 1129 */ 1130 static int nfs_flush_one(struct nfs_pageio_descriptor *desc, 1131 struct nfs_pgio_header *hdr) 1132 { 1133 struct nfs_page *req; 1134 struct page **pages; 1135 struct nfs_write_data *data; 1136 struct list_head *head = &desc->pg_list; 1137 struct nfs_commit_info cinfo; 1138 1139 data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base, 1140 desc->pg_count)); 1141 if (!data) { 1142 nfs_flush_error(desc, hdr); 1143 return -ENOMEM; 1144 } 1145 1146 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); 1147 pages = data->pages.pagevec; 1148 while (!list_empty(head)) { 1149 req = nfs_list_entry(head->next); 1150 nfs_list_remove_request(req); 1151 nfs_list_add_request(req, &hdr->pages); 1152 *pages++ = req->wb_page; 1153 } 1154 1155 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1156 (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) 1157 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1158 1159 /* Set up the argument struct */ 1160 nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo); 1161 list_add(&data->list, &hdr->rpc_list); 1162 desc->pg_rpc_callops = &nfs_write_common_ops; 1163 return 0; 1164 } 1165 1166 int nfs_generic_flush(struct nfs_pageio_descriptor *desc, 1167 struct nfs_pgio_header *hdr) 1168 { 1169 if (desc->pg_bsize < PAGE_CACHE_SIZE) 1170 return nfs_flush_multi(desc, hdr); 1171 return nfs_flush_one(desc, hdr); 1172 } 1173 1174 static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) 1175 { 1176 struct nfs_write_header *whdr; 1177 struct nfs_pgio_header *hdr; 1178 int ret; 1179 1180 whdr = nfs_writehdr_alloc(); 1181 if (!whdr) { 1182 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1183 return -ENOMEM; 1184 } 1185 hdr = &whdr->header; 1186 nfs_pgheader_init(desc, hdr, nfs_writehdr_free); 1187 atomic_inc(&hdr->refcnt); 1188 ret = nfs_generic_flush(desc, hdr); 1189 if (ret == 0) 1190 ret = nfs_do_multiple_writes(&hdr->rpc_list, 1191 desc->pg_rpc_callops, 1192 desc->pg_ioflags); 1193 if (atomic_dec_and_test(&hdr->refcnt)) 1194 hdr->completion_ops->completion(hdr); 1195 return ret; 1196 } 1197 1198 static const struct nfs_pageio_ops nfs_pageio_write_ops = { 1199 .pg_test = nfs_generic_pg_test, 1200 .pg_doio = nfs_generic_pg_writepages, 1201 }; 1202 1203 void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, 1204 struct inode *inode, int ioflags, 1205 const struct nfs_pgio_completion_ops *compl_ops) 1206 { 1207 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops, 1208 NFS_SERVER(inode)->wsize, ioflags); 1209 } 1210 1211 void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) 1212 { 1213 pgio->pg_ops = &nfs_pageio_write_ops; 1214 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; 1215 } 1216 EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); 1217 1218 void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1219 struct inode *inode, int ioflags, 1220 const struct nfs_pgio_completion_ops *compl_ops) 1221 { 1222 if (!pnfs_pageio_init_write(pgio, inode, ioflags, compl_ops)) 1223 nfs_pageio_init_write_mds(pgio, inode, ioflags, compl_ops); 1224 } 1225 1226 void nfs_write_prepare(struct rpc_task *task, void *calldata) 1227 { 1228 struct nfs_write_data *data = calldata; 1229 NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data); 1230 } 1231 1232 void nfs_commit_prepare(struct rpc_task *task, void *calldata) 1233 { 1234 struct nfs_commit_data *data = calldata; 1235 1236 NFS_PROTO(data->inode)->commit_rpc_prepare(task, data); 1237 } 1238 1239 /* 1240 * Handle a write reply that flushes a whole page. 1241 * 1242 * FIXME: There is an inherent race with invalidate_inode_pages and 1243 * writebacks since the page->count is kept > 1 for as long 1244 * as the page has a write request pending. 1245 */ 1246 static void nfs_writeback_done_common(struct rpc_task *task, void *calldata) 1247 { 1248 struct nfs_write_data *data = calldata; 1249 1250 nfs_writeback_done(task, data); 1251 } 1252 1253 static void nfs_writeback_release_common(void *calldata) 1254 { 1255 struct nfs_write_data *data = calldata; 1256 struct nfs_pgio_header *hdr = data->header; 1257 int status = data->task.tk_status; 1258 struct nfs_page *req = hdr->req; 1259 1260 if ((status >= 0) && nfs_write_need_commit(data)) { 1261 spin_lock(&hdr->lock); 1262 if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags)) 1263 ; /* Do nothing */ 1264 else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) 1265 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1266 else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) 1267 set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags); 1268 spin_unlock(&hdr->lock); 1269 } 1270 nfs_writedata_release(data); 1271 } 1272 1273 static const struct rpc_call_ops nfs_write_common_ops = { 1274 .rpc_call_prepare = nfs_write_prepare, 1275 .rpc_call_done = nfs_writeback_done_common, 1276 .rpc_release = nfs_writeback_release_common, 1277 }; 1278 1279 1280 /* 1281 * This function is called when the WRITE call is complete. 1282 */ 1283 void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1284 { 1285 struct nfs_writeargs *argp = &data->args; 1286 struct nfs_writeres *resp = &data->res; 1287 struct inode *inode = data->header->inode; 1288 int status; 1289 1290 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1291 task->tk_pid, task->tk_status); 1292 1293 /* 1294 * ->write_done will attempt to use post-op attributes to detect 1295 * conflicting writes by other clients. A strict interpretation 1296 * of close-to-open would allow us to continue caching even if 1297 * another writer had changed the file, but some applications 1298 * depend on tighter cache coherency when writing. 1299 */ 1300 status = NFS_PROTO(inode)->write_done(task, data); 1301 if (status != 0) 1302 return; 1303 nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1304 1305 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1306 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1307 /* We tried a write call, but the server did not 1308 * commit data to stable storage even though we 1309 * requested it. 1310 * Note: There is a known bug in Tru64 < 5.0 in which 1311 * the server reports NFS_DATA_SYNC, but performs 1312 * NFS_FILE_SYNC. We therefore implement this checking 1313 * as a dprintk() in order to avoid filling syslog. 1314 */ 1315 static unsigned long complain; 1316 1317 /* Note this will print the MDS for a DS write */ 1318 if (time_before(complain, jiffies)) { 1319 dprintk("NFS: faulty NFS server %s:" 1320 " (committed = %d) != (stable = %d)\n", 1321 NFS_SERVER(inode)->nfs_client->cl_hostname, 1322 resp->verf->committed, argp->stable); 1323 complain = jiffies + 300 * HZ; 1324 } 1325 } 1326 #endif 1327 if (task->tk_status < 0) 1328 nfs_set_pgio_error(data->header, task->tk_status, argp->offset); 1329 else if (resp->count < argp->count) { 1330 static unsigned long complain; 1331 1332 /* This a short write! */ 1333 nfs_inc_stats(inode, NFSIOS_SHORTWRITE); 1334 1335 /* Has the server at least made some progress? */ 1336 if (resp->count == 0) { 1337 if (time_before(complain, jiffies)) { 1338 printk(KERN_WARNING 1339 "NFS: Server wrote zero bytes, expected %u.\n", 1340 argp->count); 1341 complain = jiffies + 300 * HZ; 1342 } 1343 nfs_set_pgio_error(data->header, -EIO, argp->offset); 1344 task->tk_status = -EIO; 1345 return; 1346 } 1347 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1348 if (resp->verf->committed != NFS_UNSTABLE) { 1349 /* Resend from where the server left off */ 1350 data->mds_offset += resp->count; 1351 argp->offset += resp->count; 1352 argp->pgbase += resp->count; 1353 argp->count -= resp->count; 1354 } else { 1355 /* Resend as a stable write in order to avoid 1356 * headaches in the case of a server crash. 1357 */ 1358 argp->stable = NFS_FILE_SYNC; 1359 } 1360 rpc_restart_call_prepare(task); 1361 } 1362 } 1363 1364 1365 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1366 static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1367 { 1368 int ret; 1369 1370 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1371 return 1; 1372 if (!may_wait) 1373 return 0; 1374 ret = out_of_line_wait_on_bit_lock(&nfsi->flags, 1375 NFS_INO_COMMIT, 1376 nfs_wait_bit_killable, 1377 TASK_KILLABLE); 1378 return (ret < 0) ? ret : 1; 1379 } 1380 1381 static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1382 { 1383 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1384 smp_mb__after_clear_bit(); 1385 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1386 } 1387 1388 void nfs_commitdata_release(struct nfs_commit_data *data) 1389 { 1390 put_nfs_open_context(data->context); 1391 nfs_commit_free(data); 1392 } 1393 EXPORT_SYMBOL_GPL(nfs_commitdata_release); 1394 1395 int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data, 1396 const struct rpc_call_ops *call_ops, 1397 int how, int flags) 1398 { 1399 struct rpc_task *task; 1400 int priority = flush_task_priority(how); 1401 struct rpc_message msg = { 1402 .rpc_argp = &data->args, 1403 .rpc_resp = &data->res, 1404 .rpc_cred = data->cred, 1405 }; 1406 struct rpc_task_setup task_setup_data = { 1407 .task = &data->task, 1408 .rpc_client = clnt, 1409 .rpc_message = &msg, 1410 .callback_ops = call_ops, 1411 .callback_data = data, 1412 .workqueue = nfsiod_workqueue, 1413 .flags = RPC_TASK_ASYNC | flags, 1414 .priority = priority, 1415 }; 1416 /* Set up the initial task struct. */ 1417 NFS_PROTO(data->inode)->commit_setup(data, &msg); 1418 1419 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1420 1421 task = rpc_run_task(&task_setup_data); 1422 if (IS_ERR(task)) 1423 return PTR_ERR(task); 1424 if (how & FLUSH_SYNC) 1425 rpc_wait_for_completion_task(task); 1426 rpc_put_task(task); 1427 return 0; 1428 } 1429 EXPORT_SYMBOL_GPL(nfs_initiate_commit); 1430 1431 /* 1432 * Set up the argument/result storage required for the RPC call. 1433 */ 1434 void nfs_init_commit(struct nfs_commit_data *data, 1435 struct list_head *head, 1436 struct pnfs_layout_segment *lseg, 1437 struct nfs_commit_info *cinfo) 1438 { 1439 struct nfs_page *first = nfs_list_entry(head->next); 1440 struct inode *inode = first->wb_context->dentry->d_inode; 1441 1442 /* Set up the RPC argument and reply structs 1443 * NB: take care not to mess about with data->commit et al. */ 1444 1445 list_splice_init(head, &data->pages); 1446 1447 data->inode = inode; 1448 data->cred = first->wb_context->cred; 1449 data->lseg = lseg; /* reference transferred */ 1450 data->mds_ops = &nfs_commit_ops; 1451 data->completion_ops = cinfo->completion_ops; 1452 data->dreq = cinfo->dreq; 1453 1454 data->args.fh = NFS_FH(data->inode); 1455 /* Note: we always request a commit of the entire inode */ 1456 data->args.offset = 0; 1457 data->args.count = 0; 1458 data->context = get_nfs_open_context(first->wb_context); 1459 data->res.fattr = &data->fattr; 1460 data->res.verf = &data->verf; 1461 nfs_fattr_init(&data->fattr); 1462 } 1463 EXPORT_SYMBOL_GPL(nfs_init_commit); 1464 1465 void nfs_retry_commit(struct list_head *page_list, 1466 struct pnfs_layout_segment *lseg, 1467 struct nfs_commit_info *cinfo) 1468 { 1469 struct nfs_page *req; 1470 1471 while (!list_empty(page_list)) { 1472 req = nfs_list_entry(page_list->next); 1473 nfs_list_remove_request(req); 1474 nfs_mark_request_commit(req, lseg, cinfo); 1475 if (!cinfo->dreq) { 1476 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1477 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1478 BDI_RECLAIMABLE); 1479 } 1480 nfs_unlock_and_release_request(req); 1481 } 1482 } 1483 EXPORT_SYMBOL_GPL(nfs_retry_commit); 1484 1485 /* 1486 * Commit dirty pages 1487 */ 1488 static int 1489 nfs_commit_list(struct inode *inode, struct list_head *head, int how, 1490 struct nfs_commit_info *cinfo) 1491 { 1492 struct nfs_commit_data *data; 1493 1494 data = nfs_commitdata_alloc(); 1495 1496 if (!data) 1497 goto out_bad; 1498 1499 /* Set up the argument struct */ 1500 nfs_init_commit(data, head, NULL, cinfo); 1501 atomic_inc(&cinfo->mds->rpcs_out); 1502 return nfs_initiate_commit(NFS_CLIENT(inode), data, data->mds_ops, 1503 how, 0); 1504 out_bad: 1505 nfs_retry_commit(head, NULL, cinfo); 1506 cinfo->completion_ops->error_cleanup(NFS_I(inode)); 1507 return -ENOMEM; 1508 } 1509 1510 /* 1511 * COMMIT call returned 1512 */ 1513 static void nfs_commit_done(struct rpc_task *task, void *calldata) 1514 { 1515 struct nfs_commit_data *data = calldata; 1516 1517 dprintk("NFS: %5u nfs_commit_done (status %d)\n", 1518 task->tk_pid, task->tk_status); 1519 1520 /* Call the NFS version-specific code */ 1521 NFS_PROTO(data->inode)->commit_done(task, data); 1522 } 1523 1524 static void nfs_commit_release_pages(struct nfs_commit_data *data) 1525 { 1526 struct nfs_page *req; 1527 int status = data->task.tk_status; 1528 struct nfs_commit_info cinfo; 1529 1530 while (!list_empty(&data->pages)) { 1531 req = nfs_list_entry(data->pages.next); 1532 nfs_list_remove_request(req); 1533 nfs_clear_page_commit(req->wb_page); 1534 1535 dprintk("NFS: commit (%s/%lld %d@%lld)", 1536 req->wb_context->dentry->d_sb->s_id, 1537 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 1538 req->wb_bytes, 1539 (long long)req_offset(req)); 1540 if (status < 0) { 1541 nfs_context_set_write_error(req->wb_context, status); 1542 nfs_inode_remove_request(req); 1543 dprintk(", error = %d\n", status); 1544 goto next; 1545 } 1546 1547 /* Okay, COMMIT succeeded, apparently. Check the verifier 1548 * returned by the server against all stored verfs. */ 1549 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { 1550 /* We have a match */ 1551 nfs_inode_remove_request(req); 1552 dprintk(" OK\n"); 1553 goto next; 1554 } 1555 /* We have a mismatch. Write the page again */ 1556 dprintk(" mismatch\n"); 1557 nfs_mark_request_dirty(req); 1558 next: 1559 nfs_unlock_and_release_request(req); 1560 } 1561 nfs_init_cinfo(&cinfo, data->inode, data->dreq); 1562 if (atomic_dec_and_test(&cinfo.mds->rpcs_out)) 1563 nfs_commit_clear_lock(NFS_I(data->inode)); 1564 } 1565 1566 static void nfs_commit_release(void *calldata) 1567 { 1568 struct nfs_commit_data *data = calldata; 1569 1570 data->completion_ops->completion(data); 1571 nfs_commitdata_release(calldata); 1572 } 1573 1574 static const struct rpc_call_ops nfs_commit_ops = { 1575 .rpc_call_prepare = nfs_commit_prepare, 1576 .rpc_call_done = nfs_commit_done, 1577 .rpc_release = nfs_commit_release, 1578 }; 1579 1580 static const struct nfs_commit_completion_ops nfs_commit_completion_ops = { 1581 .completion = nfs_commit_release_pages, 1582 .error_cleanup = nfs_commit_clear_lock, 1583 }; 1584 1585 int nfs_generic_commit_list(struct inode *inode, struct list_head *head, 1586 int how, struct nfs_commit_info *cinfo) 1587 { 1588 int status; 1589 1590 status = pnfs_commit_list(inode, head, how, cinfo); 1591 if (status == PNFS_NOT_ATTEMPTED) 1592 status = nfs_commit_list(inode, head, how, cinfo); 1593 return status; 1594 } 1595 1596 int nfs_commit_inode(struct inode *inode, int how) 1597 { 1598 LIST_HEAD(head); 1599 struct nfs_commit_info cinfo; 1600 int may_wait = how & FLUSH_SYNC; 1601 int res; 1602 1603 res = nfs_commit_set_lock(NFS_I(inode), may_wait); 1604 if (res <= 0) 1605 goto out_mark_dirty; 1606 nfs_init_cinfo_from_inode(&cinfo, inode); 1607 res = nfs_scan_commit(inode, &head, &cinfo); 1608 if (res) { 1609 int error; 1610 1611 error = nfs_generic_commit_list(inode, &head, how, &cinfo); 1612 if (error < 0) 1613 return error; 1614 if (!may_wait) 1615 goto out_mark_dirty; 1616 error = wait_on_bit(&NFS_I(inode)->flags, 1617 NFS_INO_COMMIT, 1618 nfs_wait_bit_killable, 1619 TASK_KILLABLE); 1620 if (error < 0) 1621 return error; 1622 } else 1623 nfs_commit_clear_lock(NFS_I(inode)); 1624 return res; 1625 /* Note: If we exit without ensuring that the commit is complete, 1626 * we must mark the inode as dirty. Otherwise, future calls to 1627 * sync_inode() with the WB_SYNC_ALL flag set will fail to ensure 1628 * that the data is on the disk. 1629 */ 1630 out_mark_dirty: 1631 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1632 return res; 1633 } 1634 1635 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) 1636 { 1637 struct nfs_inode *nfsi = NFS_I(inode); 1638 int flags = FLUSH_SYNC; 1639 int ret = 0; 1640 1641 /* no commits means nothing needs to be done */ 1642 if (!nfsi->commit_info.ncommit) 1643 return ret; 1644 1645 if (wbc->sync_mode == WB_SYNC_NONE) { 1646 /* Don't commit yet if this is a non-blocking flush and there 1647 * are a lot of outstanding writes for this mapping. 1648 */ 1649 if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1)) 1650 goto out_mark_dirty; 1651 1652 /* don't wait for the COMMIT response */ 1653 flags = 0; 1654 } 1655 1656 ret = nfs_commit_inode(inode, flags); 1657 if (ret >= 0) { 1658 if (wbc->sync_mode == WB_SYNC_NONE) { 1659 if (ret < wbc->nr_to_write) 1660 wbc->nr_to_write -= ret; 1661 else 1662 wbc->nr_to_write = 0; 1663 } 1664 return 0; 1665 } 1666 out_mark_dirty: 1667 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1668 return ret; 1669 } 1670 #else 1671 static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc) 1672 { 1673 return 0; 1674 } 1675 #endif 1676 1677 int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1678 { 1679 int ret; 1680 1681 ret = nfs_commit_unstable_pages(inode, wbc); 1682 if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) { 1683 int status; 1684 bool sync = true; 1685 1686 if (wbc->sync_mode == WB_SYNC_NONE) 1687 sync = false; 1688 1689 status = pnfs_layoutcommit_inode(inode, sync); 1690 if (status < 0) 1691 return status; 1692 } 1693 return ret; 1694 } 1695 1696 /* 1697 * flush the inode to disk. 1698 */ 1699 int nfs_wb_all(struct inode *inode) 1700 { 1701 struct writeback_control wbc = { 1702 .sync_mode = WB_SYNC_ALL, 1703 .nr_to_write = LONG_MAX, 1704 .range_start = 0, 1705 .range_end = LLONG_MAX, 1706 }; 1707 1708 return sync_inode(inode, &wbc); 1709 } 1710 1711 int nfs_wb_page_cancel(struct inode *inode, struct page *page) 1712 { 1713 struct nfs_page *req; 1714 int ret = 0; 1715 1716 BUG_ON(!PageLocked(page)); 1717 for (;;) { 1718 wait_on_page_writeback(page); 1719 req = nfs_page_find_request(page); 1720 if (req == NULL) 1721 break; 1722 if (nfs_lock_request(req)) { 1723 nfs_clear_request_commit(req); 1724 nfs_inode_remove_request(req); 1725 /* 1726 * In case nfs_inode_remove_request has marked the 1727 * page as being dirty 1728 */ 1729 cancel_dirty_page(page, PAGE_CACHE_SIZE); 1730 nfs_unlock_and_release_request(req); 1731 break; 1732 } 1733 ret = nfs_wait_on_request(req); 1734 nfs_release_request(req); 1735 if (ret < 0) 1736 break; 1737 } 1738 return ret; 1739 } 1740 1741 /* 1742 * Write back all requests on one page - we do this before reading it. 1743 */ 1744 int nfs_wb_page(struct inode *inode, struct page *page) 1745 { 1746 loff_t range_start = page_offset(page); 1747 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); 1748 struct writeback_control wbc = { 1749 .sync_mode = WB_SYNC_ALL, 1750 .nr_to_write = 0, 1751 .range_start = range_start, 1752 .range_end = range_end, 1753 }; 1754 int ret; 1755 1756 for (;;) { 1757 wait_on_page_writeback(page); 1758 if (clear_page_dirty_for_io(page)) { 1759 ret = nfs_writepage_locked(page, &wbc); 1760 if (ret < 0) 1761 goto out_error; 1762 continue; 1763 } 1764 if (!PagePrivate(page)) 1765 break; 1766 ret = nfs_commit_inode(inode, FLUSH_SYNC); 1767 if (ret < 0) 1768 goto out_error; 1769 } 1770 return 0; 1771 out_error: 1772 return ret; 1773 } 1774 1775 #ifdef CONFIG_MIGRATION 1776 int nfs_migrate_page(struct address_space *mapping, struct page *newpage, 1777 struct page *page, enum migrate_mode mode) 1778 { 1779 /* 1780 * If PagePrivate is set, then the page is currently associated with 1781 * an in-progress read or write request. Don't try to migrate it. 1782 * 1783 * FIXME: we could do this in principle, but we'll need a way to ensure 1784 * that we can safely release the inode reference while holding 1785 * the page lock. 1786 */ 1787 if (PagePrivate(page)) 1788 return -EBUSY; 1789 1790 nfs_fscache_release_page(page, GFP_KERNEL); 1791 1792 return migrate_page(mapping, newpage, page, mode); 1793 } 1794 #endif 1795 1796 int __init nfs_init_writepagecache(void) 1797 { 1798 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1799 sizeof(struct nfs_write_header), 1800 0, SLAB_HWCACHE_ALIGN, 1801 NULL); 1802 if (nfs_wdata_cachep == NULL) 1803 return -ENOMEM; 1804 1805 nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, 1806 nfs_wdata_cachep); 1807 if (nfs_wdata_mempool == NULL) 1808 return -ENOMEM; 1809 1810 nfs_cdata_cachep = kmem_cache_create("nfs_commit_data", 1811 sizeof(struct nfs_commit_data), 1812 0, SLAB_HWCACHE_ALIGN, 1813 NULL); 1814 if (nfs_cdata_cachep == NULL) 1815 return -ENOMEM; 1816 1817 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1818 nfs_wdata_cachep); 1819 if (nfs_commit_mempool == NULL) 1820 return -ENOMEM; 1821 1822 /* 1823 * NFS congestion size, scale with available memory. 1824 * 1825 * 64MB: 8192k 1826 * 128MB: 11585k 1827 * 256MB: 16384k 1828 * 512MB: 23170k 1829 * 1GB: 32768k 1830 * 2GB: 46340k 1831 * 4GB: 65536k 1832 * 8GB: 92681k 1833 * 16GB: 131072k 1834 * 1835 * This allows larger machines to have larger/more transfers. 1836 * Limit the default to 256M 1837 */ 1838 nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); 1839 if (nfs_congestion_kb > 256*1024) 1840 nfs_congestion_kb = 256*1024; 1841 1842 return 0; 1843 } 1844 1845 void nfs_destroy_writepagecache(void) 1846 { 1847 mempool_destroy(nfs_commit_mempool); 1848 mempool_destroy(nfs_wdata_mempool); 1849 kmem_cache_destroy(nfs_wdata_cachep); 1850 } 1851 1852