1 /* 2 * linux/fs/nfs/write.c 3 * 4 * Write file data over NFS. 5 * 6 * Copyright (C) 1996, 1997, Olaf Kirch <okir@monad.swb.de> 7 */ 8 9 #include <linux/types.h> 10 #include <linux/slab.h> 11 #include <linux/mm.h> 12 #include <linux/pagemap.h> 13 #include <linux/file.h> 14 #include <linux/writeback.h> 15 #include <linux/swap.h> 16 17 #include <linux/sunrpc/clnt.h> 18 #include <linux/nfs_fs.h> 19 #include <linux/nfs_mount.h> 20 #include <linux/nfs_page.h> 21 #include <linux/backing-dev.h> 22 23 #include <asm/uaccess.h> 24 25 #include "delegation.h" 26 #include "internal.h" 27 #include "iostat.h" 28 29 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 30 31 #define MIN_POOL_WRITE (32) 32 #define MIN_POOL_COMMIT (4) 33 34 /* 35 * Local function declarations 36 */ 37 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *desc, 38 struct inode *inode, int ioflags); 39 static void nfs_redirty_request(struct nfs_page *req); 40 static const struct rpc_call_ops nfs_write_partial_ops; 41 static const struct rpc_call_ops nfs_write_full_ops; 42 static const struct rpc_call_ops nfs_commit_ops; 43 44 static struct kmem_cache *nfs_wdata_cachep; 45 static mempool_t *nfs_wdata_mempool; 46 static mempool_t *nfs_commit_mempool; 47 48 struct nfs_write_data *nfs_commitdata_alloc(void) 49 { 50 struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS); 51 52 if (p) { 53 memset(p, 0, sizeof(*p)); 54 INIT_LIST_HEAD(&p->pages); 55 } 56 return p; 57 } 58 59 void nfs_commit_free(struct nfs_write_data *p) 60 { 61 if (p && (p->pagevec != &p->page_array[0])) 62 kfree(p->pagevec); 63 mempool_free(p, nfs_commit_mempool); 64 } 65 66 struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 67 { 68 struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, GFP_NOFS); 69 70 if (p) { 71 memset(p, 0, sizeof(*p)); 72 INIT_LIST_HEAD(&p->pages); 73 p->npages = pagecount; 74 if (pagecount <= ARRAY_SIZE(p->page_array)) 75 p->pagevec = p->page_array; 76 else { 77 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 78 if (!p->pagevec) { 79 mempool_free(p, nfs_wdata_mempool); 80 p = NULL; 81 } 82 } 83 } 84 return p; 85 } 86 87 static void nfs_writedata_free(struct nfs_write_data *p) 88 { 89 if (p && (p->pagevec != &p->page_array[0])) 90 kfree(p->pagevec); 91 mempool_free(p, nfs_wdata_mempool); 92 } 93 94 void nfs_writedata_release(void *data) 95 { 96 struct nfs_write_data *wdata = data; 97 98 put_nfs_open_context(wdata->args.context); 99 nfs_writedata_free(wdata); 100 } 101 102 static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error) 103 { 104 ctx->error = error; 105 smp_wmb(); 106 set_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 107 } 108 109 static struct nfs_page *nfs_page_find_request_locked(struct page *page) 110 { 111 struct nfs_page *req = NULL; 112 113 if (PagePrivate(page)) { 114 req = (struct nfs_page *)page_private(page); 115 if (req != NULL) 116 kref_get(&req->wb_kref); 117 } 118 return req; 119 } 120 121 static struct nfs_page *nfs_page_find_request(struct page *page) 122 { 123 struct inode *inode = page->mapping->host; 124 struct nfs_page *req = NULL; 125 126 spin_lock(&inode->i_lock); 127 req = nfs_page_find_request_locked(page); 128 spin_unlock(&inode->i_lock); 129 return req; 130 } 131 132 /* Adjust the file length if we're writing beyond the end */ 133 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) 134 { 135 struct inode *inode = page->mapping->host; 136 loff_t end, i_size; 137 pgoff_t end_index; 138 139 spin_lock(&inode->i_lock); 140 i_size = i_size_read(inode); 141 end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; 142 if (i_size > 0 && page->index < end_index) 143 goto out; 144 end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); 145 if (i_size >= end) 146 goto out; 147 i_size_write(inode, end); 148 nfs_inc_stats(inode, NFSIOS_EXTENDWRITE); 149 out: 150 spin_unlock(&inode->i_lock); 151 } 152 153 /* A writeback failed: mark the page as bad, and invalidate the page cache */ 154 static void nfs_set_pageerror(struct page *page) 155 { 156 SetPageError(page); 157 nfs_zap_mapping(page->mapping->host, page->mapping); 158 } 159 160 /* We can set the PG_uptodate flag if we see that a write request 161 * covers the full page. 162 */ 163 static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) 164 { 165 if (PageUptodate(page)) 166 return; 167 if (base != 0) 168 return; 169 if (count != nfs_page_length(page)) 170 return; 171 SetPageUptodate(page); 172 } 173 174 static int wb_priority(struct writeback_control *wbc) 175 { 176 if (wbc->for_reclaim) 177 return FLUSH_HIGHPRI | FLUSH_STABLE; 178 if (wbc->for_kupdate) 179 return FLUSH_LOWPRI; 180 return 0; 181 } 182 183 /* 184 * NFS congestion control 185 */ 186 187 int nfs_congestion_kb; 188 189 #define NFS_CONGESTION_ON_THRESH (nfs_congestion_kb >> (PAGE_SHIFT-10)) 190 #define NFS_CONGESTION_OFF_THRESH \ 191 (NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2)) 192 193 static int nfs_set_page_writeback(struct page *page) 194 { 195 int ret = test_set_page_writeback(page); 196 197 if (!ret) { 198 struct inode *inode = page->mapping->host; 199 struct nfs_server *nfss = NFS_SERVER(inode); 200 201 if (atomic_long_inc_return(&nfss->writeback) > 202 NFS_CONGESTION_ON_THRESH) 203 set_bdi_congested(&nfss->backing_dev_info, WRITE); 204 } 205 return ret; 206 } 207 208 static void nfs_end_page_writeback(struct page *page) 209 { 210 struct inode *inode = page->mapping->host; 211 struct nfs_server *nfss = NFS_SERVER(inode); 212 213 end_page_writeback(page); 214 if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) 215 clear_bdi_congested(&nfss->backing_dev_info, WRITE); 216 } 217 218 /* 219 * Find an associated nfs write request, and prepare to flush it out 220 * May return an error if the user signalled nfs_wait_on_request(). 221 */ 222 static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 223 struct page *page) 224 { 225 struct inode *inode = page->mapping->host; 226 struct nfs_page *req; 227 int ret; 228 229 spin_lock(&inode->i_lock); 230 for(;;) { 231 req = nfs_page_find_request_locked(page); 232 if (req == NULL) { 233 spin_unlock(&inode->i_lock); 234 return 0; 235 } 236 if (nfs_set_page_tag_locked(req)) 237 break; 238 /* Note: If we hold the page lock, as is the case in nfs_writepage, 239 * then the call to nfs_set_page_tag_locked() will always 240 * succeed provided that someone hasn't already marked the 241 * request as dirty (in which case we don't care). 242 */ 243 spin_unlock(&inode->i_lock); 244 ret = nfs_wait_on_request(req); 245 nfs_release_request(req); 246 if (ret != 0) 247 return ret; 248 spin_lock(&inode->i_lock); 249 } 250 if (test_bit(PG_CLEAN, &req->wb_flags)) { 251 spin_unlock(&inode->i_lock); 252 BUG(); 253 } 254 if (nfs_set_page_writeback(page) != 0) { 255 spin_unlock(&inode->i_lock); 256 BUG(); 257 } 258 spin_unlock(&inode->i_lock); 259 if (!nfs_pageio_add_request(pgio, req)) { 260 nfs_redirty_request(req); 261 return pgio->pg_error; 262 } 263 return 0; 264 } 265 266 static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) 267 { 268 struct inode *inode = page->mapping->host; 269 270 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 271 nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1); 272 273 nfs_pageio_cond_complete(pgio, page->index); 274 return nfs_page_async_flush(pgio, page); 275 } 276 277 /* 278 * Write an mmapped page to the server. 279 */ 280 static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) 281 { 282 struct nfs_pageio_descriptor pgio; 283 int err; 284 285 nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc)); 286 err = nfs_do_writepage(page, wbc, &pgio); 287 nfs_pageio_complete(&pgio); 288 if (err < 0) 289 return err; 290 if (pgio.pg_error < 0) 291 return pgio.pg_error; 292 return 0; 293 } 294 295 int nfs_writepage(struct page *page, struct writeback_control *wbc) 296 { 297 int ret; 298 299 ret = nfs_writepage_locked(page, wbc); 300 unlock_page(page); 301 return ret; 302 } 303 304 static int nfs_writepages_callback(struct page *page, struct writeback_control *wbc, void *data) 305 { 306 int ret; 307 308 ret = nfs_do_writepage(page, wbc, data); 309 unlock_page(page); 310 return ret; 311 } 312 313 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc) 314 { 315 struct inode *inode = mapping->host; 316 unsigned long *bitlock = &NFS_I(inode)->flags; 317 struct nfs_pageio_descriptor pgio; 318 int err; 319 320 /* Stop dirtying of new pages while we sync */ 321 err = wait_on_bit_lock(bitlock, NFS_INO_FLUSHING, 322 nfs_wait_bit_killable, TASK_KILLABLE); 323 if (err) 324 goto out_err; 325 326 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES); 327 328 nfs_pageio_init_write(&pgio, inode, wb_priority(wbc)); 329 err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio); 330 nfs_pageio_complete(&pgio); 331 332 clear_bit_unlock(NFS_INO_FLUSHING, bitlock); 333 smp_mb__after_clear_bit(); 334 wake_up_bit(bitlock, NFS_INO_FLUSHING); 335 336 if (err < 0) 337 goto out_err; 338 err = pgio.pg_error; 339 if (err < 0) 340 goto out_err; 341 return 0; 342 out_err: 343 return err; 344 } 345 346 /* 347 * Insert a write request into an inode 348 */ 349 static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req) 350 { 351 struct nfs_inode *nfsi = NFS_I(inode); 352 int error; 353 354 error = radix_tree_preload(GFP_NOFS); 355 if (error != 0) 356 goto out; 357 358 /* Lock the request! */ 359 nfs_lock_request_dontget(req); 360 361 spin_lock(&inode->i_lock); 362 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); 363 BUG_ON(error); 364 if (!nfsi->npages) { 365 igrab(inode); 366 if (nfs_have_delegation(inode, FMODE_WRITE)) 367 nfsi->change_attr++; 368 } 369 SetPagePrivate(req->wb_page); 370 set_page_private(req->wb_page, (unsigned long)req); 371 nfsi->npages++; 372 kref_get(&req->wb_kref); 373 radix_tree_tag_set(&nfsi->nfs_page_tree, req->wb_index, 374 NFS_PAGE_TAG_LOCKED); 375 spin_unlock(&inode->i_lock); 376 radix_tree_preload_end(); 377 out: 378 return error; 379 } 380 381 /* 382 * Remove a write request from an inode 383 */ 384 static void nfs_inode_remove_request(struct nfs_page *req) 385 { 386 struct inode *inode = req->wb_context->path.dentry->d_inode; 387 struct nfs_inode *nfsi = NFS_I(inode); 388 389 BUG_ON (!NFS_WBACK_BUSY(req)); 390 391 spin_lock(&inode->i_lock); 392 set_page_private(req->wb_page, 0); 393 ClearPagePrivate(req->wb_page); 394 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 395 nfsi->npages--; 396 if (!nfsi->npages) { 397 spin_unlock(&inode->i_lock); 398 iput(inode); 399 } else 400 spin_unlock(&inode->i_lock); 401 nfs_clear_request(req); 402 nfs_release_request(req); 403 } 404 405 static void 406 nfs_mark_request_dirty(struct nfs_page *req) 407 { 408 __set_page_dirty_nobuffers(req->wb_page); 409 } 410 411 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 412 /* 413 * Add a request to the inode's commit list. 414 */ 415 static void 416 nfs_mark_request_commit(struct nfs_page *req) 417 { 418 struct inode *inode = req->wb_context->path.dentry->d_inode; 419 struct nfs_inode *nfsi = NFS_I(inode); 420 421 spin_lock(&inode->i_lock); 422 set_bit(PG_CLEAN, &(req)->wb_flags); 423 radix_tree_tag_set(&nfsi->nfs_page_tree, 424 req->wb_index, 425 NFS_PAGE_TAG_COMMIT); 426 spin_unlock(&inode->i_lock); 427 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 428 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 429 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 430 } 431 432 static int 433 nfs_clear_request_commit(struct nfs_page *req) 434 { 435 struct page *page = req->wb_page; 436 437 if (test_and_clear_bit(PG_CLEAN, &(req)->wb_flags)) { 438 dec_zone_page_state(page, NR_UNSTABLE_NFS); 439 dec_bdi_stat(page->mapping->backing_dev_info, BDI_RECLAIMABLE); 440 return 1; 441 } 442 return 0; 443 } 444 445 static inline 446 int nfs_write_need_commit(struct nfs_write_data *data) 447 { 448 return data->verf.committed != NFS_FILE_SYNC; 449 } 450 451 static inline 452 int nfs_reschedule_unstable_write(struct nfs_page *req) 453 { 454 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { 455 nfs_mark_request_commit(req); 456 return 1; 457 } 458 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 459 nfs_mark_request_dirty(req); 460 return 1; 461 } 462 return 0; 463 } 464 #else 465 static inline void 466 nfs_mark_request_commit(struct nfs_page *req) 467 { 468 } 469 470 static inline int 471 nfs_clear_request_commit(struct nfs_page *req) 472 { 473 return 0; 474 } 475 476 static inline 477 int nfs_write_need_commit(struct nfs_write_data *data) 478 { 479 return 0; 480 } 481 482 static inline 483 int nfs_reschedule_unstable_write(struct nfs_page *req) 484 { 485 return 0; 486 } 487 #endif 488 489 /* 490 * Wait for a request to complete. 491 * 492 * Interruptible by fatal signals only. 493 */ 494 static int nfs_wait_on_requests_locked(struct inode *inode, pgoff_t idx_start, unsigned int npages) 495 { 496 struct nfs_inode *nfsi = NFS_I(inode); 497 struct nfs_page *req; 498 pgoff_t idx_end, next; 499 unsigned int res = 0; 500 int error; 501 502 if (npages == 0) 503 idx_end = ~0; 504 else 505 idx_end = idx_start + npages - 1; 506 507 next = idx_start; 508 while (radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree, (void **)&req, next, 1, NFS_PAGE_TAG_LOCKED)) { 509 if (req->wb_index > idx_end) 510 break; 511 512 next = req->wb_index + 1; 513 BUG_ON(!NFS_WBACK_BUSY(req)); 514 515 kref_get(&req->wb_kref); 516 spin_unlock(&inode->i_lock); 517 error = nfs_wait_on_request(req); 518 nfs_release_request(req); 519 spin_lock(&inode->i_lock); 520 if (error < 0) 521 return error; 522 res++; 523 } 524 return res; 525 } 526 527 static void nfs_cancel_commit_list(struct list_head *head) 528 { 529 struct nfs_page *req; 530 531 while(!list_empty(head)) { 532 req = nfs_list_entry(head->next); 533 nfs_list_remove_request(req); 534 nfs_clear_request_commit(req); 535 nfs_inode_remove_request(req); 536 nfs_unlock_request(req); 537 } 538 } 539 540 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 541 static int 542 nfs_need_commit(struct nfs_inode *nfsi) 543 { 544 return radix_tree_tagged(&nfsi->nfs_page_tree, NFS_PAGE_TAG_COMMIT); 545 } 546 547 /* 548 * nfs_scan_commit - Scan an inode for commit requests 549 * @inode: NFS inode to scan 550 * @dst: destination list 551 * @idx_start: lower bound of page->index to scan. 552 * @npages: idx_start + npages sets the upper bound to scan. 553 * 554 * Moves requests from the inode's 'commit' request list. 555 * The requests are *not* checked to ensure that they form a contiguous set. 556 */ 557 static int 558 nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 559 { 560 struct nfs_inode *nfsi = NFS_I(inode); 561 562 if (!nfs_need_commit(nfsi)) 563 return 0; 564 565 return nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 566 } 567 #else 568 static inline int nfs_need_commit(struct nfs_inode *nfsi) 569 { 570 return 0; 571 } 572 573 static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, unsigned int npages) 574 { 575 return 0; 576 } 577 #endif 578 579 /* 580 * Search for an existing write request, and attempt to update 581 * it to reflect a new dirty region on a given page. 582 * 583 * If the attempt fails, then the existing request is flushed out 584 * to disk. 585 */ 586 static struct nfs_page *nfs_try_to_update_request(struct inode *inode, 587 struct page *page, 588 unsigned int offset, 589 unsigned int bytes) 590 { 591 struct nfs_page *req; 592 unsigned int rqend; 593 unsigned int end; 594 int error; 595 596 if (!PagePrivate(page)) 597 return NULL; 598 599 end = offset + bytes; 600 spin_lock(&inode->i_lock); 601 602 for (;;) { 603 req = nfs_page_find_request_locked(page); 604 if (req == NULL) 605 goto out_unlock; 606 607 rqend = req->wb_offset + req->wb_bytes; 608 /* 609 * Tell the caller to flush out the request if 610 * the offsets are non-contiguous. 611 * Note: nfs_flush_incompatible() will already 612 * have flushed out requests having wrong owners. 613 */ 614 if (offset > rqend 615 || end < req->wb_offset) 616 goto out_flushme; 617 618 if (nfs_set_page_tag_locked(req)) 619 break; 620 621 /* The request is locked, so wait and then retry */ 622 spin_unlock(&inode->i_lock); 623 error = nfs_wait_on_request(req); 624 nfs_release_request(req); 625 if (error != 0) 626 goto out_err; 627 spin_lock(&inode->i_lock); 628 } 629 630 if (nfs_clear_request_commit(req)) 631 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, 632 req->wb_index, NFS_PAGE_TAG_COMMIT); 633 634 /* Okay, the request matches. Update the region */ 635 if (offset < req->wb_offset) { 636 req->wb_offset = offset; 637 req->wb_pgbase = offset; 638 } 639 if (end > rqend) 640 req->wb_bytes = end - req->wb_offset; 641 else 642 req->wb_bytes = rqend - req->wb_offset; 643 out_unlock: 644 spin_unlock(&inode->i_lock); 645 return req; 646 out_flushme: 647 spin_unlock(&inode->i_lock); 648 nfs_release_request(req); 649 error = nfs_wb_page(inode, page); 650 out_err: 651 return ERR_PTR(error); 652 } 653 654 /* 655 * Try to update an existing write request, or create one if there is none. 656 * 657 * Note: Should always be called with the Page Lock held to prevent races 658 * if we have to add a new request. Also assumes that the caller has 659 * already called nfs_flush_incompatible() if necessary. 660 */ 661 static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx, 662 struct page *page, unsigned int offset, unsigned int bytes) 663 { 664 struct inode *inode = page->mapping->host; 665 struct nfs_page *req; 666 int error; 667 668 req = nfs_try_to_update_request(inode, page, offset, bytes); 669 if (req != NULL) 670 goto out; 671 req = nfs_create_request(ctx, inode, page, offset, bytes); 672 if (IS_ERR(req)) 673 goto out; 674 error = nfs_inode_add_request(inode, req); 675 if (error != 0) { 676 nfs_release_request(req); 677 req = ERR_PTR(error); 678 } 679 out: 680 return req; 681 } 682 683 static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page, 684 unsigned int offset, unsigned int count) 685 { 686 struct nfs_page *req; 687 688 req = nfs_setup_write_request(ctx, page, offset, count); 689 if (IS_ERR(req)) 690 return PTR_ERR(req); 691 /* Update file length */ 692 nfs_grow_file(page, offset, count); 693 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 694 nfs_clear_page_tag_locked(req); 695 return 0; 696 } 697 698 int nfs_flush_incompatible(struct file *file, struct page *page) 699 { 700 struct nfs_open_context *ctx = nfs_file_open_context(file); 701 struct nfs_page *req; 702 int do_flush, status; 703 /* 704 * Look for a request corresponding to this page. If there 705 * is one, and it belongs to another file, we flush it out 706 * before we try to copy anything into the page. Do this 707 * due to the lack of an ACCESS-type call in NFSv2. 708 * Also do the same if we find a request from an existing 709 * dropped page. 710 */ 711 do { 712 req = nfs_page_find_request(page); 713 if (req == NULL) 714 return 0; 715 do_flush = req->wb_page != page || req->wb_context != ctx; 716 nfs_release_request(req); 717 if (!do_flush) 718 return 0; 719 status = nfs_wb_page(page->mapping->host, page); 720 } while (status == 0); 721 return status; 722 } 723 724 /* 725 * If the page cache is marked as unsafe or invalid, then we can't rely on 726 * the PageUptodate() flag. In this case, we will need to turn off 727 * write optimisations that depend on the page contents being correct. 728 */ 729 static int nfs_write_pageuptodate(struct page *page, struct inode *inode) 730 { 731 return PageUptodate(page) && 732 !(NFS_I(inode)->cache_validity & (NFS_INO_REVAL_PAGECACHE|NFS_INO_INVALID_DATA)); 733 } 734 735 /* 736 * Update and possibly write a cached page of an NFS file. 737 * 738 * XXX: Keep an eye on generic_file_read to make sure it doesn't do bad 739 * things with a page scheduled for an RPC call (e.g. invalidate it). 740 */ 741 int nfs_updatepage(struct file *file, struct page *page, 742 unsigned int offset, unsigned int count) 743 { 744 struct nfs_open_context *ctx = nfs_file_open_context(file); 745 struct inode *inode = page->mapping->host; 746 int status = 0; 747 748 nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE); 749 750 dprintk("NFS: nfs_updatepage(%s/%s %d@%lld)\n", 751 file->f_path.dentry->d_parent->d_name.name, 752 file->f_path.dentry->d_name.name, count, 753 (long long)(page_offset(page) + offset)); 754 755 /* If we're not using byte range locks, and we know the page 756 * is up to date, it may be more efficient to extend the write 757 * to cover the entire page in order to avoid fragmentation 758 * inefficiencies. 759 */ 760 if (nfs_write_pageuptodate(page, inode) && 761 inode->i_flock == NULL && 762 !(file->f_flags & O_SYNC)) { 763 count = max(count + offset, nfs_page_length(page)); 764 offset = 0; 765 } 766 767 status = nfs_writepage_setup(ctx, page, offset, count); 768 if (status < 0) 769 nfs_set_pageerror(page); 770 else 771 __set_page_dirty_nobuffers(page); 772 773 dprintk("NFS: nfs_updatepage returns %d (isize %lld)\n", 774 status, (long long)i_size_read(inode)); 775 return status; 776 } 777 778 static void nfs_writepage_release(struct nfs_page *req) 779 { 780 781 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) { 782 nfs_end_page_writeback(req->wb_page); 783 nfs_inode_remove_request(req); 784 } else 785 nfs_end_page_writeback(req->wb_page); 786 nfs_clear_page_tag_locked(req); 787 } 788 789 static int flush_task_priority(int how) 790 { 791 switch (how & (FLUSH_HIGHPRI|FLUSH_LOWPRI)) { 792 case FLUSH_HIGHPRI: 793 return RPC_PRIORITY_HIGH; 794 case FLUSH_LOWPRI: 795 return RPC_PRIORITY_LOW; 796 } 797 return RPC_PRIORITY_NORMAL; 798 } 799 800 /* 801 * Set up the argument/result storage required for the RPC call. 802 */ 803 static int nfs_write_rpcsetup(struct nfs_page *req, 804 struct nfs_write_data *data, 805 const struct rpc_call_ops *call_ops, 806 unsigned int count, unsigned int offset, 807 int how) 808 { 809 struct inode *inode = req->wb_context->path.dentry->d_inode; 810 int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; 811 int priority = flush_task_priority(how); 812 struct rpc_task *task; 813 struct rpc_message msg = { 814 .rpc_argp = &data->args, 815 .rpc_resp = &data->res, 816 .rpc_cred = req->wb_context->cred, 817 }; 818 struct rpc_task_setup task_setup_data = { 819 .rpc_client = NFS_CLIENT(inode), 820 .task = &data->task, 821 .rpc_message = &msg, 822 .callback_ops = call_ops, 823 .callback_data = data, 824 .workqueue = nfsiod_workqueue, 825 .flags = flags, 826 .priority = priority, 827 }; 828 829 /* Set up the RPC argument and reply structs 830 * NB: take care not to mess about with data->commit et al. */ 831 832 data->req = req; 833 data->inode = inode = req->wb_context->path.dentry->d_inode; 834 data->cred = msg.rpc_cred; 835 836 data->args.fh = NFS_FH(inode); 837 data->args.offset = req_offset(req) + offset; 838 data->args.pgbase = req->wb_pgbase + offset; 839 data->args.pages = data->pagevec; 840 data->args.count = count; 841 data->args.context = get_nfs_open_context(req->wb_context); 842 data->args.stable = NFS_UNSTABLE; 843 if (how & FLUSH_STABLE) { 844 data->args.stable = NFS_DATA_SYNC; 845 if (!nfs_need_commit(NFS_I(inode))) 846 data->args.stable = NFS_FILE_SYNC; 847 } 848 849 data->res.fattr = &data->fattr; 850 data->res.count = count; 851 data->res.verf = &data->verf; 852 nfs_fattr_init(&data->fattr); 853 854 /* Set up the initial task struct. */ 855 NFS_PROTO(inode)->write_setup(data, &msg); 856 857 dprintk("NFS: %5u initiated write call " 858 "(req %s/%lld, %u bytes @ offset %llu)\n", 859 data->task.tk_pid, 860 inode->i_sb->s_id, 861 (long long)NFS_FILEID(inode), 862 count, 863 (unsigned long long)data->args.offset); 864 865 task = rpc_run_task(&task_setup_data); 866 if (IS_ERR(task)) 867 return PTR_ERR(task); 868 rpc_put_task(task); 869 return 0; 870 } 871 872 /* If a nfs_flush_* function fails, it should remove reqs from @head and 873 * call this on each, which will prepare them to be retried on next 874 * writeback using standard nfs. 875 */ 876 static void nfs_redirty_request(struct nfs_page *req) 877 { 878 nfs_mark_request_dirty(req); 879 nfs_end_page_writeback(req->wb_page); 880 nfs_clear_page_tag_locked(req); 881 } 882 883 /* 884 * Generate multiple small requests to write out a single 885 * contiguous dirty area on one page. 886 */ 887 static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 888 { 889 struct nfs_page *req = nfs_list_entry(head->next); 890 struct page *page = req->wb_page; 891 struct nfs_write_data *data; 892 size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 893 unsigned int offset; 894 int requests = 0; 895 int ret = 0; 896 LIST_HEAD(list); 897 898 nfs_list_remove_request(req); 899 900 nbytes = count; 901 do { 902 size_t len = min(nbytes, wsize); 903 904 data = nfs_writedata_alloc(1); 905 if (!data) 906 goto out_bad; 907 list_add(&data->pages, &list); 908 requests++; 909 nbytes -= len; 910 } while (nbytes != 0); 911 atomic_set(&req->wb_complete, requests); 912 913 ClearPageError(page); 914 offset = 0; 915 nbytes = count; 916 do { 917 int ret2; 918 919 data = list_entry(list.next, struct nfs_write_data, pages); 920 list_del_init(&data->pages); 921 922 data->pagevec[0] = page; 923 924 if (nbytes < wsize) 925 wsize = nbytes; 926 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 927 wsize, offset, how); 928 if (ret == 0) 929 ret = ret2; 930 offset += wsize; 931 nbytes -= wsize; 932 } while (nbytes != 0); 933 934 return ret; 935 936 out_bad: 937 while (!list_empty(&list)) { 938 data = list_entry(list.next, struct nfs_write_data, pages); 939 list_del(&data->pages); 940 nfs_writedata_release(data); 941 } 942 nfs_redirty_request(req); 943 return -ENOMEM; 944 } 945 946 /* 947 * Create an RPC task for the given write request and kick it. 948 * The page must have been locked by the caller. 949 * 950 * It may happen that the page we're passed is not marked dirty. 951 * This is the case if nfs_updatepage detects a conflicting request 952 * that has been written but not committed. 953 */ 954 static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 955 { 956 struct nfs_page *req; 957 struct page **pages; 958 struct nfs_write_data *data; 959 960 data = nfs_writedata_alloc(npages); 961 if (!data) 962 goto out_bad; 963 964 pages = data->pagevec; 965 while (!list_empty(head)) { 966 req = nfs_list_entry(head->next); 967 nfs_list_remove_request(req); 968 nfs_list_add_request(req, &data->pages); 969 ClearPageError(req->wb_page); 970 *pages++ = req->wb_page; 971 } 972 req = nfs_list_entry(data->pages.next); 973 974 /* Set up the argument struct */ 975 return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); 976 out_bad: 977 while (!list_empty(head)) { 978 req = nfs_list_entry(head->next); 979 nfs_list_remove_request(req); 980 nfs_redirty_request(req); 981 } 982 return -ENOMEM; 983 } 984 985 static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 986 struct inode *inode, int ioflags) 987 { 988 size_t wsize = NFS_SERVER(inode)->wsize; 989 990 if (wsize < PAGE_CACHE_SIZE) 991 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 992 else 993 nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); 994 } 995 996 /* 997 * Handle a write reply that flushed part of a page. 998 */ 999 static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata) 1000 { 1001 struct nfs_write_data *data = calldata; 1002 1003 dprintk("NFS: %5u write(%s/%lld %d@%lld)", 1004 task->tk_pid, 1005 data->req->wb_context->path.dentry->d_inode->i_sb->s_id, 1006 (long long) 1007 NFS_FILEID(data->req->wb_context->path.dentry->d_inode), 1008 data->req->wb_bytes, (long long)req_offset(data->req)); 1009 1010 nfs_writeback_done(task, data); 1011 } 1012 1013 static void nfs_writeback_release_partial(void *calldata) 1014 { 1015 struct nfs_write_data *data = calldata; 1016 struct nfs_page *req = data->req; 1017 struct page *page = req->wb_page; 1018 int status = data->task.tk_status; 1019 1020 if (status < 0) { 1021 nfs_set_pageerror(page); 1022 nfs_context_set_write_error(req->wb_context, status); 1023 dprintk(", error = %d\n", status); 1024 goto out; 1025 } 1026 1027 if (nfs_write_need_commit(data)) { 1028 struct inode *inode = page->mapping->host; 1029 1030 spin_lock(&inode->i_lock); 1031 if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) { 1032 /* Do nothing we need to resend the writes */ 1033 } else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) { 1034 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1035 dprintk(" defer commit\n"); 1036 } else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) { 1037 set_bit(PG_NEED_RESCHED, &req->wb_flags); 1038 clear_bit(PG_NEED_COMMIT, &req->wb_flags); 1039 dprintk(" server reboot detected\n"); 1040 } 1041 spin_unlock(&inode->i_lock); 1042 } else 1043 dprintk(" OK\n"); 1044 1045 out: 1046 if (atomic_dec_and_test(&req->wb_complete)) 1047 nfs_writepage_release(req); 1048 nfs_writedata_release(calldata); 1049 } 1050 1051 static const struct rpc_call_ops nfs_write_partial_ops = { 1052 .rpc_call_done = nfs_writeback_done_partial, 1053 .rpc_release = nfs_writeback_release_partial, 1054 }; 1055 1056 /* 1057 * Handle a write reply that flushes a whole page. 1058 * 1059 * FIXME: There is an inherent race with invalidate_inode_pages and 1060 * writebacks since the page->count is kept > 1 for as long 1061 * as the page has a write request pending. 1062 */ 1063 static void nfs_writeback_done_full(struct rpc_task *task, void *calldata) 1064 { 1065 struct nfs_write_data *data = calldata; 1066 1067 nfs_writeback_done(task, data); 1068 } 1069 1070 static void nfs_writeback_release_full(void *calldata) 1071 { 1072 struct nfs_write_data *data = calldata; 1073 int status = data->task.tk_status; 1074 1075 /* Update attributes as result of writeback. */ 1076 while (!list_empty(&data->pages)) { 1077 struct nfs_page *req = nfs_list_entry(data->pages.next); 1078 struct page *page = req->wb_page; 1079 1080 nfs_list_remove_request(req); 1081 1082 dprintk("NFS: %5u write (%s/%lld %d@%lld)", 1083 data->task.tk_pid, 1084 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1085 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1086 req->wb_bytes, 1087 (long long)req_offset(req)); 1088 1089 if (status < 0) { 1090 nfs_set_pageerror(page); 1091 nfs_context_set_write_error(req->wb_context, status); 1092 dprintk(", error = %d\n", status); 1093 goto remove_request; 1094 } 1095 1096 if (nfs_write_need_commit(data)) { 1097 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1098 nfs_mark_request_commit(req); 1099 nfs_end_page_writeback(page); 1100 dprintk(" marked for commit\n"); 1101 goto next; 1102 } 1103 dprintk(" OK\n"); 1104 remove_request: 1105 nfs_end_page_writeback(page); 1106 nfs_inode_remove_request(req); 1107 next: 1108 nfs_clear_page_tag_locked(req); 1109 } 1110 nfs_writedata_release(calldata); 1111 } 1112 1113 static const struct rpc_call_ops nfs_write_full_ops = { 1114 .rpc_call_done = nfs_writeback_done_full, 1115 .rpc_release = nfs_writeback_release_full, 1116 }; 1117 1118 1119 /* 1120 * This function is called when the WRITE call is complete. 1121 */ 1122 int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1123 { 1124 struct nfs_writeargs *argp = &data->args; 1125 struct nfs_writeres *resp = &data->res; 1126 int status; 1127 1128 dprintk("NFS: %5u nfs_writeback_done (status %d)\n", 1129 task->tk_pid, task->tk_status); 1130 1131 /* 1132 * ->write_done will attempt to use post-op attributes to detect 1133 * conflicting writes by other clients. A strict interpretation 1134 * of close-to-open would allow us to continue caching even if 1135 * another writer had changed the file, but some applications 1136 * depend on tighter cache coherency when writing. 1137 */ 1138 status = NFS_PROTO(data->inode)->write_done(task, data); 1139 if (status != 0) 1140 return status; 1141 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1142 1143 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1144 if (resp->verf->committed < argp->stable && task->tk_status >= 0) { 1145 /* We tried a write call, but the server did not 1146 * commit data to stable storage even though we 1147 * requested it. 1148 * Note: There is a known bug in Tru64 < 5.0 in which 1149 * the server reports NFS_DATA_SYNC, but performs 1150 * NFS_FILE_SYNC. We therefore implement this checking 1151 * as a dprintk() in order to avoid filling syslog. 1152 */ 1153 static unsigned long complain; 1154 1155 if (time_before(complain, jiffies)) { 1156 dprintk("NFS: faulty NFS server %s:" 1157 " (committed = %d) != (stable = %d)\n", 1158 NFS_SERVER(data->inode)->nfs_client->cl_hostname, 1159 resp->verf->committed, argp->stable); 1160 complain = jiffies + 300 * HZ; 1161 } 1162 } 1163 #endif 1164 /* Is this a short write? */ 1165 if (task->tk_status >= 0 && resp->count < argp->count) { 1166 static unsigned long complain; 1167 1168 nfs_inc_stats(data->inode, NFSIOS_SHORTWRITE); 1169 1170 /* Has the server at least made some progress? */ 1171 if (resp->count != 0) { 1172 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1173 if (resp->verf->committed != NFS_UNSTABLE) { 1174 /* Resend from where the server left off */ 1175 argp->offset += resp->count; 1176 argp->pgbase += resp->count; 1177 argp->count -= resp->count; 1178 } else { 1179 /* Resend as a stable write in order to avoid 1180 * headaches in the case of a server crash. 1181 */ 1182 argp->stable = NFS_FILE_SYNC; 1183 } 1184 rpc_restart_call(task); 1185 return -EAGAIN; 1186 } 1187 if (time_before(complain, jiffies)) { 1188 printk(KERN_WARNING 1189 "NFS: Server wrote zero bytes, expected %u.\n", 1190 argp->count); 1191 complain = jiffies + 300 * HZ; 1192 } 1193 /* Can't do anything about it except throw an error. */ 1194 task->tk_status = -EIO; 1195 } 1196 return 0; 1197 } 1198 1199 1200 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1201 void nfs_commitdata_release(void *data) 1202 { 1203 struct nfs_write_data *wdata = data; 1204 1205 put_nfs_open_context(wdata->args.context); 1206 nfs_commit_free(wdata); 1207 } 1208 1209 /* 1210 * Set up the argument/result storage required for the RPC call. 1211 */ 1212 static int nfs_commit_rpcsetup(struct list_head *head, 1213 struct nfs_write_data *data, 1214 int how) 1215 { 1216 struct nfs_page *first = nfs_list_entry(head->next); 1217 struct inode *inode = first->wb_context->path.dentry->d_inode; 1218 int flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC; 1219 int priority = flush_task_priority(how); 1220 struct rpc_task *task; 1221 struct rpc_message msg = { 1222 .rpc_argp = &data->args, 1223 .rpc_resp = &data->res, 1224 .rpc_cred = first->wb_context->cred, 1225 }; 1226 struct rpc_task_setup task_setup_data = { 1227 .task = &data->task, 1228 .rpc_client = NFS_CLIENT(inode), 1229 .rpc_message = &msg, 1230 .callback_ops = &nfs_commit_ops, 1231 .callback_data = data, 1232 .workqueue = nfsiod_workqueue, 1233 .flags = flags, 1234 .priority = priority, 1235 }; 1236 1237 /* Set up the RPC argument and reply structs 1238 * NB: take care not to mess about with data->commit et al. */ 1239 1240 list_splice_init(head, &data->pages); 1241 1242 data->inode = inode; 1243 data->cred = msg.rpc_cred; 1244 1245 data->args.fh = NFS_FH(data->inode); 1246 /* Note: we always request a commit of the entire inode */ 1247 data->args.offset = 0; 1248 data->args.count = 0; 1249 data->args.context = get_nfs_open_context(first->wb_context); 1250 data->res.count = 0; 1251 data->res.fattr = &data->fattr; 1252 data->res.verf = &data->verf; 1253 nfs_fattr_init(&data->fattr); 1254 1255 /* Set up the initial task struct. */ 1256 NFS_PROTO(inode)->commit_setup(data, &msg); 1257 1258 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1259 1260 task = rpc_run_task(&task_setup_data); 1261 if (IS_ERR(task)) 1262 return PTR_ERR(task); 1263 rpc_put_task(task); 1264 return 0; 1265 } 1266 1267 /* 1268 * Commit dirty pages 1269 */ 1270 static int 1271 nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1272 { 1273 struct nfs_write_data *data; 1274 struct nfs_page *req; 1275 1276 data = nfs_commitdata_alloc(); 1277 1278 if (!data) 1279 goto out_bad; 1280 1281 /* Set up the argument struct */ 1282 return nfs_commit_rpcsetup(head, data, how); 1283 out_bad: 1284 while (!list_empty(head)) { 1285 req = nfs_list_entry(head->next); 1286 nfs_list_remove_request(req); 1287 nfs_mark_request_commit(req); 1288 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 1289 dec_bdi_stat(req->wb_page->mapping->backing_dev_info, 1290 BDI_RECLAIMABLE); 1291 nfs_clear_page_tag_locked(req); 1292 } 1293 return -ENOMEM; 1294 } 1295 1296 /* 1297 * COMMIT call returned 1298 */ 1299 static void nfs_commit_done(struct rpc_task *task, void *calldata) 1300 { 1301 struct nfs_write_data *data = calldata; 1302 1303 dprintk("NFS: %5u nfs_commit_done (status %d)\n", 1304 task->tk_pid, task->tk_status); 1305 1306 /* Call the NFS version-specific code */ 1307 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) 1308 return; 1309 } 1310 1311 static void nfs_commit_release(void *calldata) 1312 { 1313 struct nfs_write_data *data = calldata; 1314 struct nfs_page *req; 1315 int status = data->task.tk_status; 1316 1317 while (!list_empty(&data->pages)) { 1318 req = nfs_list_entry(data->pages.next); 1319 nfs_list_remove_request(req); 1320 nfs_clear_request_commit(req); 1321 1322 dprintk("NFS: commit (%s/%lld %d@%lld)", 1323 req->wb_context->path.dentry->d_inode->i_sb->s_id, 1324 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 1325 req->wb_bytes, 1326 (long long)req_offset(req)); 1327 if (status < 0) { 1328 nfs_context_set_write_error(req->wb_context, status); 1329 nfs_inode_remove_request(req); 1330 dprintk(", error = %d\n", status); 1331 goto next; 1332 } 1333 1334 /* Okay, COMMIT succeeded, apparently. Check the verifier 1335 * returned by the server against all stored verfs. */ 1336 if (!memcmp(req->wb_verf.verifier, data->verf.verifier, sizeof(data->verf.verifier))) { 1337 /* We have a match */ 1338 nfs_inode_remove_request(req); 1339 dprintk(" OK\n"); 1340 goto next; 1341 } 1342 /* We have a mismatch. Write the page again */ 1343 dprintk(" mismatch\n"); 1344 nfs_mark_request_dirty(req); 1345 next: 1346 nfs_clear_page_tag_locked(req); 1347 } 1348 nfs_commitdata_release(calldata); 1349 } 1350 1351 static const struct rpc_call_ops nfs_commit_ops = { 1352 .rpc_call_done = nfs_commit_done, 1353 .rpc_release = nfs_commit_release, 1354 }; 1355 1356 int nfs_commit_inode(struct inode *inode, int how) 1357 { 1358 LIST_HEAD(head); 1359 int res; 1360 1361 spin_lock(&inode->i_lock); 1362 res = nfs_scan_commit(inode, &head, 0, 0); 1363 spin_unlock(&inode->i_lock); 1364 if (res) { 1365 int error = nfs_commit_list(inode, &head, how); 1366 if (error < 0) 1367 return error; 1368 } 1369 return res; 1370 } 1371 #else 1372 static inline int nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1373 { 1374 return 0; 1375 } 1376 #endif 1377 1378 long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how) 1379 { 1380 struct inode *inode = mapping->host; 1381 pgoff_t idx_start, idx_end; 1382 unsigned int npages = 0; 1383 LIST_HEAD(head); 1384 int nocommit = how & FLUSH_NOCOMMIT; 1385 long pages, ret; 1386 1387 /* FIXME */ 1388 if (wbc->range_cyclic) 1389 idx_start = 0; 1390 else { 1391 idx_start = wbc->range_start >> PAGE_CACHE_SHIFT; 1392 idx_end = wbc->range_end >> PAGE_CACHE_SHIFT; 1393 if (idx_end > idx_start) { 1394 pgoff_t l_npages = 1 + idx_end - idx_start; 1395 npages = l_npages; 1396 if (sizeof(npages) != sizeof(l_npages) && 1397 (pgoff_t)npages != l_npages) 1398 npages = 0; 1399 } 1400 } 1401 how &= ~FLUSH_NOCOMMIT; 1402 spin_lock(&inode->i_lock); 1403 do { 1404 ret = nfs_wait_on_requests_locked(inode, idx_start, npages); 1405 if (ret != 0) 1406 continue; 1407 if (nocommit) 1408 break; 1409 pages = nfs_scan_commit(inode, &head, idx_start, npages); 1410 if (pages == 0) 1411 break; 1412 if (how & FLUSH_INVALIDATE) { 1413 spin_unlock(&inode->i_lock); 1414 nfs_cancel_commit_list(&head); 1415 ret = pages; 1416 spin_lock(&inode->i_lock); 1417 continue; 1418 } 1419 pages += nfs_scan_commit(inode, &head, 0, 0); 1420 spin_unlock(&inode->i_lock); 1421 ret = nfs_commit_list(inode, &head, how); 1422 spin_lock(&inode->i_lock); 1423 1424 } while (ret >= 0); 1425 spin_unlock(&inode->i_lock); 1426 return ret; 1427 } 1428 1429 static int __nfs_write_mapping(struct address_space *mapping, struct writeback_control *wbc, int how) 1430 { 1431 int ret; 1432 1433 ret = nfs_writepages(mapping, wbc); 1434 if (ret < 0) 1435 goto out; 1436 ret = nfs_sync_mapping_wait(mapping, wbc, how); 1437 if (ret < 0) 1438 goto out; 1439 return 0; 1440 out: 1441 __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 1442 return ret; 1443 } 1444 1445 /* Two pass sync: first using WB_SYNC_NONE, then WB_SYNC_ALL */ 1446 static int nfs_write_mapping(struct address_space *mapping, int how) 1447 { 1448 struct writeback_control wbc = { 1449 .bdi = mapping->backing_dev_info, 1450 .sync_mode = WB_SYNC_ALL, 1451 .nr_to_write = LONG_MAX, 1452 .range_start = 0, 1453 .range_end = LLONG_MAX, 1454 .for_writepages = 1, 1455 }; 1456 1457 return __nfs_write_mapping(mapping, &wbc, how); 1458 } 1459 1460 /* 1461 * flush the inode to disk. 1462 */ 1463 int nfs_wb_all(struct inode *inode) 1464 { 1465 return nfs_write_mapping(inode->i_mapping, 0); 1466 } 1467 1468 int nfs_wb_nocommit(struct inode *inode) 1469 { 1470 return nfs_write_mapping(inode->i_mapping, FLUSH_NOCOMMIT); 1471 } 1472 1473 int nfs_wb_page_cancel(struct inode *inode, struct page *page) 1474 { 1475 struct nfs_page *req; 1476 loff_t range_start = page_offset(page); 1477 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); 1478 struct writeback_control wbc = { 1479 .bdi = page->mapping->backing_dev_info, 1480 .sync_mode = WB_SYNC_ALL, 1481 .nr_to_write = LONG_MAX, 1482 .range_start = range_start, 1483 .range_end = range_end, 1484 }; 1485 int ret = 0; 1486 1487 BUG_ON(!PageLocked(page)); 1488 for (;;) { 1489 req = nfs_page_find_request(page); 1490 if (req == NULL) 1491 goto out; 1492 if (test_bit(PG_CLEAN, &req->wb_flags)) { 1493 nfs_release_request(req); 1494 break; 1495 } 1496 if (nfs_lock_request_dontget(req)) { 1497 nfs_inode_remove_request(req); 1498 /* 1499 * In case nfs_inode_remove_request has marked the 1500 * page as being dirty 1501 */ 1502 cancel_dirty_page(page, PAGE_CACHE_SIZE); 1503 nfs_unlock_request(req); 1504 break; 1505 } 1506 ret = nfs_wait_on_request(req); 1507 if (ret < 0) 1508 goto out; 1509 } 1510 if (!PagePrivate(page)) 1511 return 0; 1512 ret = nfs_sync_mapping_wait(page->mapping, &wbc, FLUSH_INVALIDATE); 1513 out: 1514 return ret; 1515 } 1516 1517 static int nfs_wb_page_priority(struct inode *inode, struct page *page, 1518 int how) 1519 { 1520 loff_t range_start = page_offset(page); 1521 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); 1522 struct writeback_control wbc = { 1523 .bdi = page->mapping->backing_dev_info, 1524 .sync_mode = WB_SYNC_ALL, 1525 .nr_to_write = LONG_MAX, 1526 .range_start = range_start, 1527 .range_end = range_end, 1528 }; 1529 int ret; 1530 1531 do { 1532 if (clear_page_dirty_for_io(page)) { 1533 ret = nfs_writepage_locked(page, &wbc); 1534 if (ret < 0) 1535 goto out_error; 1536 } else if (!PagePrivate(page)) 1537 break; 1538 ret = nfs_sync_mapping_wait(page->mapping, &wbc, how); 1539 if (ret < 0) 1540 goto out_error; 1541 } while (PagePrivate(page)); 1542 return 0; 1543 out_error: 1544 __mark_inode_dirty(inode, I_DIRTY_PAGES); 1545 return ret; 1546 } 1547 1548 /* 1549 * Write back all requests on one page - we do this before reading it. 1550 */ 1551 int nfs_wb_page(struct inode *inode, struct page* page) 1552 { 1553 return nfs_wb_page_priority(inode, page, FLUSH_STABLE); 1554 } 1555 1556 int __init nfs_init_writepagecache(void) 1557 { 1558 nfs_wdata_cachep = kmem_cache_create("nfs_write_data", 1559 sizeof(struct nfs_write_data), 1560 0, SLAB_HWCACHE_ALIGN, 1561 NULL); 1562 if (nfs_wdata_cachep == NULL) 1563 return -ENOMEM; 1564 1565 nfs_wdata_mempool = mempool_create_slab_pool(MIN_POOL_WRITE, 1566 nfs_wdata_cachep); 1567 if (nfs_wdata_mempool == NULL) 1568 return -ENOMEM; 1569 1570 nfs_commit_mempool = mempool_create_slab_pool(MIN_POOL_COMMIT, 1571 nfs_wdata_cachep); 1572 if (nfs_commit_mempool == NULL) 1573 return -ENOMEM; 1574 1575 /* 1576 * NFS congestion size, scale with available memory. 1577 * 1578 * 64MB: 8192k 1579 * 128MB: 11585k 1580 * 256MB: 16384k 1581 * 512MB: 23170k 1582 * 1GB: 32768k 1583 * 2GB: 46340k 1584 * 4GB: 65536k 1585 * 8GB: 92681k 1586 * 16GB: 131072k 1587 * 1588 * This allows larger machines to have larger/more transfers. 1589 * Limit the default to 256M 1590 */ 1591 nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10); 1592 if (nfs_congestion_kb > 256*1024) 1593 nfs_congestion_kb = 256*1024; 1594 1595 return 0; 1596 } 1597 1598 void nfs_destroy_writepagecache(void) 1599 { 1600 mempool_destroy(nfs_commit_mempool); 1601 mempool_destroy(nfs_wdata_mempool); 1602 kmem_cache_destroy(nfs_wdata_cachep); 1603 } 1604 1605