1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * linux/fs/nfs/direct.c 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Copyright (C) 2003 by Chuck Lever <cel@netapp.com> 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * High-performance uncached I/O for the Linux NFS client 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * There are important applications whose performance or correctness 101da177e4SLinus Torvalds * depends on uncached access to file data. Database clusters 111da177e4SLinus Torvalds * (multiple copies of the same instance running on separate hosts) 121da177e4SLinus Torvalds * implement their own cache coherency protocol that subsumes file 131da177e4SLinus Torvalds * system cache protocols. Applications that process datasets 141da177e4SLinus Torvalds * considerably larger than the client's memory do not always benefit 151da177e4SLinus Torvalds * from a local cache. A streaming video server, for instance, has no 161da177e4SLinus Torvalds * need to cache the contents of a file. 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * When an application requests uncached I/O, all read and write requests 191da177e4SLinus Torvalds * are made directly to the server; data stored or fetched via these 201da177e4SLinus Torvalds * requests is not cached in the Linux page cache. The client does not 211da177e4SLinus Torvalds * correct unaligned requests from applications. All requested bytes are 221da177e4SLinus Torvalds * held on permanent storage before a direct write system call returns to 231da177e4SLinus Torvalds * an application. 241da177e4SLinus Torvalds * 251da177e4SLinus Torvalds * Solaris implements an uncached I/O facility called directio() that 261da177e4SLinus Torvalds * is used for backups and sequential I/O to very large files. Solaris 271da177e4SLinus Torvalds * also supports uncaching whole NFS partitions with "-o forcedirectio," 281da177e4SLinus Torvalds * an undocumented mount option. 291da177e4SLinus Torvalds * 301da177e4SLinus Torvalds * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with 311da177e4SLinus Torvalds * help from Andrew Morton. 321da177e4SLinus Torvalds * 331da177e4SLinus Torvalds * 18 Dec 2001 Initial implementation for 2.4 --cel 341da177e4SLinus Torvalds * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy 351da177e4SLinus Torvalds * 08 Jun 2003 Port to 2.5 APIs --cel 361da177e4SLinus Torvalds * 31 Mar 2004 Handle direct I/O without VFS support --cel 371da177e4SLinus Torvalds * 15 Sep 2004 Parallel async reads --cel 3888467055SChuck Lever * 04 May 2005 support O_DIRECT with aio --cel 391da177e4SLinus Torvalds * 401da177e4SLinus Torvalds */ 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds #include <linux/errno.h> 431da177e4SLinus Torvalds #include <linux/sched.h> 441da177e4SLinus Torvalds #include <linux/kernel.h> 451da177e4SLinus Torvalds #include <linux/file.h> 461da177e4SLinus Torvalds #include <linux/pagemap.h> 471da177e4SLinus Torvalds #include <linux/kref.h> 485a0e3ad6STejun Heo #include <linux/slab.h> 497ec10f26SKonstantin Khlebnikov #include <linux/task_io_accounting_ops.h> 506296556fSPeng Tao #include <linux/module.h> 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds #include <linux/nfs_fs.h> 531da177e4SLinus Torvalds #include <linux/nfs_page.h> 541da177e4SLinus Torvalds #include <linux/sunrpc/clnt.h> 551da177e4SLinus Torvalds 567c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 5760063497SArun Sharma #include <linux/atomic.h> 581da177e4SLinus Torvalds 598d5658c9STrond Myklebust #include "internal.h" 6091d5b470SChuck Lever #include "iostat.h" 611763da12SFred Isaman #include "pnfs.h" 62a6b5a28eSDave Wysochanski #include "fscache.h" 631da177e4SLinus Torvalds 641da177e4SLinus Torvalds #define NFSDBG_FACILITY NFSDBG_VFS 651da177e4SLinus Torvalds 66e18b890bSChristoph Lameter static struct kmem_cache *nfs_direct_cachep; 671da177e4SLinus Torvalds 681da177e4SLinus Torvalds struct nfs_direct_req { 691da177e4SLinus Torvalds struct kref kref; /* release manager */ 7015ce4a0cSChuck Lever 7115ce4a0cSChuck Lever /* I/O parameters */ 72a8881f5aSTrond Myklebust struct nfs_open_context *ctx; /* file open context info */ 73f11ac8dbSTrond Myklebust struct nfs_lock_context *l_ctx; /* Lock context info */ 7499514f8fSChuck Lever struct kiocb * iocb; /* controlling i/o request */ 7588467055SChuck Lever struct inode * inode; /* target file of i/o */ 7615ce4a0cSChuck Lever 7715ce4a0cSChuck Lever /* completion state */ 78607f31e8STrond Myklebust atomic_t io_count; /* i/os we're waiting for */ 7915ce4a0cSChuck Lever spinlock_t lock; /* protect completion state */ 800a00b77bSWeston Andros Adamson 81d9ee6553STrond Myklebust loff_t io_start; /* Start offset for I/O */ 8215ce4a0cSChuck Lever ssize_t count, /* bytes actually processed */ 83ed3743a6SWeston Andros Adamson max_count, /* max expected count */ 8435754bc0SPeng Tao bytes_left, /* bytes left to be sent */ 851da177e4SLinus Torvalds error; /* any reported error */ 86d72b7a6bSTrond Myklebust struct completion completion; /* wait for i/o completion */ 87fad61490STrond Myklebust 88fad61490STrond Myklebust /* commit state */ 891763da12SFred Isaman struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */ 901763da12SFred Isaman struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */ 911763da12SFred Isaman struct work_struct work; 92fad61490STrond Myklebust int flags; 93ad3cba22SDave Kleikamp /* for write */ 94fad61490STrond Myklebust #define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */ 95fad61490STrond Myklebust #define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */ 96ad3cba22SDave Kleikamp /* for read */ 97ad3cba22SDave Kleikamp #define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */ 98fb5f7f20STrond Myklebust #define NFS_ODIRECT_DONE INT_MAX /* write verification failed */ 991da177e4SLinus Torvalds }; 1001da177e4SLinus Torvalds 1011763da12SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; 1021763da12SFred Isaman static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; 1034d3b55d3SAnna Schumaker static void nfs_direct_write_complete(struct nfs_direct_req *dreq); 1041763da12SFred Isaman static void nfs_direct_write_schedule_work(struct work_struct *work); 105607f31e8STrond Myklebust 106607f31e8STrond Myklebust static inline void get_dreq(struct nfs_direct_req *dreq) 107607f31e8STrond Myklebust { 108607f31e8STrond Myklebust atomic_inc(&dreq->io_count); 109607f31e8STrond Myklebust } 110607f31e8STrond Myklebust 111607f31e8STrond Myklebust static inline int put_dreq(struct nfs_direct_req *dreq) 112607f31e8STrond Myklebust { 113607f31e8STrond Myklebust return atomic_dec_and_test(&dreq->io_count); 114607f31e8STrond Myklebust } 115607f31e8STrond Myklebust 1160a00b77bSWeston Andros Adamson static void 117031d73edSTrond Myklebust nfs_direct_handle_truncated(struct nfs_direct_req *dreq, 118031d73edSTrond Myklebust const struct nfs_pgio_header *hdr, 119031d73edSTrond Myklebust ssize_t dreq_len) 1200a00b77bSWeston Andros Adamson { 121031d73edSTrond Myklebust if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) || 122031d73edSTrond Myklebust test_bit(NFS_IOHDR_EOF, &hdr->flags))) 123031d73edSTrond Myklebust return; 124031d73edSTrond Myklebust if (dreq->max_count >= dreq_len) { 125031d73edSTrond Myklebust dreq->max_count = dreq_len; 126031d73edSTrond Myklebust if (dreq->count > dreq_len) 127031d73edSTrond Myklebust dreq->count = dreq_len; 128ed3743a6SWeston Andros Adamson 129031d73edSTrond Myklebust if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) 130031d73edSTrond Myklebust dreq->error = hdr->error; 131031d73edSTrond Myklebust else /* Clear outstanding error if this is EOF */ 132031d73edSTrond Myklebust dreq->error = 0; 1335fadeb47SPeng Tao } 1340a00b77bSWeston Andros Adamson } 135031d73edSTrond Myklebust 136031d73edSTrond Myklebust static void 137031d73edSTrond Myklebust nfs_direct_count_bytes(struct nfs_direct_req *dreq, 138031d73edSTrond Myklebust const struct nfs_pgio_header *hdr) 139031d73edSTrond Myklebust { 140031d73edSTrond Myklebust loff_t hdr_end = hdr->io_start + hdr->good_bytes; 141031d73edSTrond Myklebust ssize_t dreq_len = 0; 142031d73edSTrond Myklebust 143031d73edSTrond Myklebust if (hdr_end > dreq->io_start) 144031d73edSTrond Myklebust dreq_len = hdr_end - dreq->io_start; 145031d73edSTrond Myklebust 146031d73edSTrond Myklebust nfs_direct_handle_truncated(dreq, hdr, dreq_len); 147031d73edSTrond Myklebust 148031d73edSTrond Myklebust if (dreq_len > dreq->max_count) 149031d73edSTrond Myklebust dreq_len = dreq->max_count; 150031d73edSTrond Myklebust 151031d73edSTrond Myklebust if (dreq->count < dreq_len) 152031d73edSTrond Myklebust dreq->count = dreq_len; 1531ccbad9fSPeng Tao } 1540a00b77bSWeston Andros Adamson 1551da177e4SLinus Torvalds /** 156b8a32e2bSChuck Lever * nfs_direct_IO - NFS address space operation for direct I/O 157b8a32e2bSChuck Lever * @iocb: target I/O control block 15890090ae6SAl Viro * @iter: I/O buffer 159b8a32e2bSChuck Lever * 160b8a32e2bSChuck Lever * The presence of this routine in the address space ops vector means 161a564b8f0SMel Gorman * the NFS client supports direct I/O. However, for most direct IO, we 162a564b8f0SMel Gorman * shunt off direct read and write requests before the VFS gets them, 163a564b8f0SMel Gorman * so this method is only ever called for swap. 1641da177e4SLinus Torvalds */ 165c8b8e32dSChristoph Hellwig ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) 166b8a32e2bSChuck Lever { 167ee8a1a8bSPeng Tao struct inode *inode = iocb->ki_filp->f_mapping->host; 168ee8a1a8bSPeng Tao 169ee8a1a8bSPeng Tao /* we only support swap file calling nfs_direct_IO */ 170ee8a1a8bSPeng Tao if (!IS_SWAPFILE(inode)) 171ee8a1a8bSPeng Tao return 0; 172ee8a1a8bSPeng Tao 17366ee59afSChristoph Hellwig VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE); 174a564b8f0SMel Gorman 1756f673763SOmar Sandoval if (iov_iter_rw(iter) == READ) 176*64158668SNeilBrown return nfs_file_direct_read(iocb, iter, true); 177*64158668SNeilBrown return nfs_file_direct_write(iocb, iter, true); 178b8a32e2bSChuck Lever } 179b8a32e2bSChuck Lever 180749e146eSChuck Lever static void nfs_direct_release_pages(struct page **pages, unsigned int npages) 1819c93ab7dSChuck Lever { 182749e146eSChuck Lever unsigned int i; 183607f31e8STrond Myklebust for (i = 0; i < npages; i++) 18409cbfeafSKirill A. Shutemov put_page(pages[i]); 1856b45d858STrond Myklebust } 1866b45d858STrond Myklebust 1871763da12SFred Isaman void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, 1881763da12SFred Isaman struct nfs_direct_req *dreq) 1891763da12SFred Isaman { 190fe238e60SDave Wysochanski cinfo->inode = dreq->inode; 1911763da12SFred Isaman cinfo->mds = &dreq->mds_cinfo; 1921763da12SFred Isaman cinfo->ds = &dreq->ds_cinfo; 1931763da12SFred Isaman cinfo->dreq = dreq; 1941763da12SFred Isaman cinfo->completion_ops = &nfs_direct_commit_completion_ops; 1951763da12SFred Isaman } 1961763da12SFred Isaman 19793619e59SChuck Lever static inline struct nfs_direct_req *nfs_direct_req_alloc(void) 1981da177e4SLinus Torvalds { 1991da177e4SLinus Torvalds struct nfs_direct_req *dreq; 2001da177e4SLinus Torvalds 201292f3eeeSTrond Myklebust dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL); 2021da177e4SLinus Torvalds if (!dreq) 2031da177e4SLinus Torvalds return NULL; 2041da177e4SLinus Torvalds 2051da177e4SLinus Torvalds kref_init(&dreq->kref); 206607f31e8STrond Myklebust kref_get(&dreq->kref); 207d72b7a6bSTrond Myklebust init_completion(&dreq->completion); 2081763da12SFred Isaman INIT_LIST_HEAD(&dreq->mds_cinfo.list); 209c21e7168STrond Myklebust pnfs_init_ds_commit_info(&dreq->ds_cinfo); 2101763da12SFred Isaman INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); 21115ce4a0cSChuck Lever spin_lock_init(&dreq->lock); 21293619e59SChuck Lever 21393619e59SChuck Lever return dreq; 21493619e59SChuck Lever } 21593619e59SChuck Lever 216b4946ffbSTrond Myklebust static void nfs_direct_req_free(struct kref *kref) 2171da177e4SLinus Torvalds { 2181da177e4SLinus Torvalds struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); 219a8881f5aSTrond Myklebust 22018f41296STrond Myklebust pnfs_release_ds_info(&dreq->ds_cinfo, dreq->inode); 221f11ac8dbSTrond Myklebust if (dreq->l_ctx != NULL) 222f11ac8dbSTrond Myklebust nfs_put_lock_context(dreq->l_ctx); 223a8881f5aSTrond Myklebust if (dreq->ctx != NULL) 224a8881f5aSTrond Myklebust put_nfs_open_context(dreq->ctx); 2251da177e4SLinus Torvalds kmem_cache_free(nfs_direct_cachep, dreq); 2261da177e4SLinus Torvalds } 2271da177e4SLinus Torvalds 228b4946ffbSTrond Myklebust static void nfs_direct_req_release(struct nfs_direct_req *dreq) 229b4946ffbSTrond Myklebust { 230b4946ffbSTrond Myklebust kref_put(&dreq->kref, nfs_direct_req_free); 231b4946ffbSTrond Myklebust } 232b4946ffbSTrond Myklebust 2336296556fSPeng Tao ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq) 2346296556fSPeng Tao { 2356296556fSPeng Tao return dreq->bytes_left; 2366296556fSPeng Tao } 2376296556fSPeng Tao EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); 2386296556fSPeng Tao 239d4cc948bSChuck Lever /* 240bc0fb201SChuck Lever * Collects and returns the final error value/byte-count. 241bc0fb201SChuck Lever */ 242bc0fb201SChuck Lever static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) 243bc0fb201SChuck Lever { 24415ce4a0cSChuck Lever ssize_t result = -EIOCBQUEUED; 245bc0fb201SChuck Lever 246bc0fb201SChuck Lever /* Async requests don't wait here */ 247bc0fb201SChuck Lever if (dreq->iocb) 248bc0fb201SChuck Lever goto out; 249bc0fb201SChuck Lever 250150030b7SMatthew Wilcox result = wait_for_completion_killable(&dreq->completion); 251bc0fb201SChuck Lever 252d2a7de0bSTrond Myklebust if (!result) { 253d2a7de0bSTrond Myklebust result = dreq->count; 254d2a7de0bSTrond Myklebust WARN_ON_ONCE(dreq->count < 0); 255d2a7de0bSTrond Myklebust } 256bc0fb201SChuck Lever if (!result) 25715ce4a0cSChuck Lever result = dreq->error; 258bc0fb201SChuck Lever 259bc0fb201SChuck Lever out: 260bc0fb201SChuck Lever return (ssize_t) result; 261bc0fb201SChuck Lever } 262bc0fb201SChuck Lever 263bc0fb201SChuck Lever /* 264607f31e8STrond Myklebust * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust 265607f31e8STrond Myklebust * the iocb is still valid here if this is a synchronous request. 26663ab46abSChuck Lever */ 267f7b5c340STrond Myklebust static void nfs_direct_complete(struct nfs_direct_req *dreq) 26863ab46abSChuck Lever { 2699811cd57SChristoph Hellwig struct inode *inode = dreq->inode; 2709811cd57SChristoph Hellwig 27165caafd0SOlga Kornievskaia inode_dio_end(inode); 27265caafd0SOlga Kornievskaia 2732a009ec9SChristoph Hellwig if (dreq->iocb) { 2742a009ec9SChristoph Hellwig long res = (long) dreq->error; 275d2a7de0bSTrond Myklebust if (dreq->count != 0) { 2762a009ec9SChristoph Hellwig res = (long) dreq->count; 277d2a7de0bSTrond Myklebust WARN_ON_ONCE(dreq->count < 0); 278d2a7de0bSTrond Myklebust } 2796b19b766SJens Axboe dreq->iocb->ki_complete(dreq->iocb, res); 280d72b7a6bSTrond Myklebust } 2812a009ec9SChristoph Hellwig 282024de8f1SDaniel Wagner complete(&dreq->completion); 28363ab46abSChuck Lever 284b4946ffbSTrond Myklebust nfs_direct_req_release(dreq); 28563ab46abSChuck Lever } 28663ab46abSChuck Lever 287584aa810SFred Isaman static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) 288fdd1e74cSTrond Myklebust { 289584aa810SFred Isaman unsigned long bytes = 0; 290584aa810SFred Isaman struct nfs_direct_req *dreq = hdr->dreq; 291fdd1e74cSTrond Myklebust 29215ce4a0cSChuck Lever spin_lock(&dreq->lock); 293eb2c50daSTrond Myklebust if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { 294eb2c50daSTrond Myklebust spin_unlock(&dreq->lock); 295eb2c50daSTrond Myklebust goto out_put; 296eb2c50daSTrond Myklebust } 297eb2c50daSTrond Myklebust 298031d73edSTrond Myklebust nfs_direct_count_bytes(dreq, hdr); 29915ce4a0cSChuck Lever spin_unlock(&dreq->lock); 3001da177e4SLinus Torvalds 301584aa810SFred Isaman while (!list_empty(&hdr->pages)) { 302584aa810SFred Isaman struct nfs_page *req = nfs_list_entry(hdr->pages.next); 303584aa810SFred Isaman struct page *page = req->wb_page; 304584aa810SFred Isaman 305ad3cba22SDave Kleikamp if (!PageCompound(page) && bytes < hdr->good_bytes && 306ad3cba22SDave Kleikamp (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY)) 3074bd8b010STrond Myklebust set_page_dirty(page); 308584aa810SFred Isaman bytes += req->wb_bytes; 309584aa810SFred Isaman nfs_list_remove_request(req); 310beeb5338SAnna Schumaker nfs_release_request(req); 311584aa810SFred Isaman } 312584aa810SFred Isaman out_put: 313607f31e8STrond Myklebust if (put_dreq(dreq)) 314f7b5c340STrond Myklebust nfs_direct_complete(dreq); 315584aa810SFred Isaman hdr->release(hdr); 3161da177e4SLinus Torvalds } 3171da177e4SLinus Torvalds 318df3accb8STrond Myklebust static void nfs_read_sync_pgio_error(struct list_head *head, int error) 319cd841605SFred Isaman { 320584aa810SFred Isaman struct nfs_page *req; 321cd841605SFred Isaman 322584aa810SFred Isaman while (!list_empty(head)) { 323584aa810SFred Isaman req = nfs_list_entry(head->next); 324584aa810SFred Isaman nfs_list_remove_request(req); 325584aa810SFred Isaman nfs_release_request(req); 326cd841605SFred Isaman } 327584aa810SFred Isaman } 328584aa810SFred Isaman 329584aa810SFred Isaman static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) 330584aa810SFred Isaman { 331584aa810SFred Isaman get_dreq(hdr->dreq); 332584aa810SFred Isaman } 333584aa810SFred Isaman 334584aa810SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { 3353e9e0ca3STrond Myklebust .error_cleanup = nfs_read_sync_pgio_error, 336584aa810SFred Isaman .init_hdr = nfs_direct_pgio_init, 337584aa810SFred Isaman .completion = nfs_direct_read_completion, 338584aa810SFred Isaman }; 339cd841605SFred Isaman 340d4cc948bSChuck Lever /* 341607f31e8STrond Myklebust * For each rsize'd chunk of the user's buffer, dispatch an NFS READ 342607f31e8STrond Myklebust * operation. If nfs_readdata_alloc() or get_user_pages() fails, 343607f31e8STrond Myklebust * bail and stop sending more reads. Read length accounting is 344607f31e8STrond Myklebust * handled automatically by nfs_direct_read_result(). Otherwise, if 345607f31e8STrond Myklebust * no requests have been sent, just return an error. 3461da177e4SLinus Torvalds */ 34791f79c43SAl Viro 34891f79c43SAl Viro static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, 34991f79c43SAl Viro struct iov_iter *iter, 35091f79c43SAl Viro loff_t pos) 3511da177e4SLinus Torvalds { 35291f79c43SAl Viro struct nfs_pageio_descriptor desc; 35391f79c43SAl Viro struct inode *inode = dreq->inode; 35491f79c43SAl Viro ssize_t result = -EINVAL; 35591f79c43SAl Viro size_t requested_bytes = 0; 35691f79c43SAl Viro size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE); 35782b145c5SChuck Lever 35816b90578SLinus Torvalds nfs_pageio_init_read(&desc, dreq->inode, false, 35991f79c43SAl Viro &nfs_direct_read_completion_ops); 36091f79c43SAl Viro get_dreq(dreq); 36191f79c43SAl Viro desc.pg_dreq = dreq; 362fe0f07d0SJens Axboe inode_dio_begin(inode); 36391f79c43SAl Viro 36491f79c43SAl Viro while (iov_iter_count(iter)) { 36591f79c43SAl Viro struct page **pagevec; 3665dd602f2SChuck Lever size_t bytes; 36791f79c43SAl Viro size_t pgbase; 36891f79c43SAl Viro unsigned npages, i; 3691da177e4SLinus Torvalds 37091f79c43SAl Viro result = iov_iter_get_pages_alloc(iter, &pagevec, 37191f79c43SAl Viro rsize, &pgbase); 372584aa810SFred Isaman if (result < 0) 373749e146eSChuck Lever break; 374a564b8f0SMel Gorman 37591f79c43SAl Viro bytes = result; 37691f79c43SAl Viro iov_iter_advance(iter, bytes); 37791f79c43SAl Viro npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; 378584aa810SFred Isaman for (i = 0; i < npages; i++) { 379584aa810SFred Isaman struct nfs_page *req; 380bf5fc402STrond Myklebust unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 381584aa810SFred Isaman /* XXX do we need to do the eof zeroing found in async_filler? */ 38228b1d3f5STrond Myklebust req = nfs_create_request(dreq->ctx, pagevec[i], 383584aa810SFred Isaman pgbase, req_len); 384584aa810SFred Isaman if (IS_ERR(req)) { 385584aa810SFred Isaman result = PTR_ERR(req); 386dbae4c73STrond Myklebust break; 387584aa810SFred Isaman } 388584aa810SFred Isaman req->wb_index = pos >> PAGE_SHIFT; 389584aa810SFred Isaman req->wb_offset = pos & ~PAGE_MASK; 39091f79c43SAl Viro if (!nfs_pageio_add_request(&desc, req)) { 39191f79c43SAl Viro result = desc.pg_error; 392584aa810SFred Isaman nfs_release_request(req); 393584aa810SFred Isaman break; 394584aa810SFred Isaman } 395584aa810SFred Isaman pgbase = 0; 396584aa810SFred Isaman bytes -= req_len; 39791f79c43SAl Viro requested_bytes += req_len; 398584aa810SFred Isaman pos += req_len; 39935754bc0SPeng Tao dreq->bytes_left -= req_len; 400584aa810SFred Isaman } 4016d74743bSTrond Myklebust nfs_direct_release_pages(pagevec, npages); 40291f79c43SAl Viro kvfree(pagevec); 40319f73787SChuck Lever if (result < 0) 40419f73787SChuck Lever break; 40519f73787SChuck Lever } 40619f73787SChuck Lever 407584aa810SFred Isaman nfs_pageio_complete(&desc); 408584aa810SFred Isaman 409839f7ad6SChuck Lever /* 410839f7ad6SChuck Lever * If no bytes were started, return the error, and let the 411839f7ad6SChuck Lever * generic layer handle the completion. 412839f7ad6SChuck Lever */ 413839f7ad6SChuck Lever if (requested_bytes == 0) { 414d03727b2SOlga Kornievskaia inode_dio_end(inode); 41565caafd0SOlga Kornievskaia nfs_direct_req_release(dreq); 416839f7ad6SChuck Lever return result < 0 ? result : -EIO; 417839f7ad6SChuck Lever } 418839f7ad6SChuck Lever 41919f73787SChuck Lever if (put_dreq(dreq)) 420f7b5c340STrond Myklebust nfs_direct_complete(dreq); 42185128b2bSAl Viro return requested_bytes; 42219f73787SChuck Lever } 42319f73787SChuck Lever 42414a3ec79SChristoph Hellwig /** 42514a3ec79SChristoph Hellwig * nfs_file_direct_read - file direct read operation for NFS files 42614a3ec79SChristoph Hellwig * @iocb: target I/O control block 427619d30b4SAl Viro * @iter: vector of user buffers into which to read data 428*64158668SNeilBrown * @swap: flag indicating this is swap IO, not O_DIRECT IO 42914a3ec79SChristoph Hellwig * 43014a3ec79SChristoph Hellwig * We use this function for direct reads instead of calling 43114a3ec79SChristoph Hellwig * generic_file_aio_read() in order to avoid gfar's check to see if 43214a3ec79SChristoph Hellwig * the request starts before the end of the file. For that check 43314a3ec79SChristoph Hellwig * to work, we must generate a GETATTR before each direct read, and 43414a3ec79SChristoph Hellwig * even then there is a window between the GETATTR and the subsequent 43514a3ec79SChristoph Hellwig * READ where the file size could change. Our preference is simply 43614a3ec79SChristoph Hellwig * to do all reads the application wants, and the server will take 43714a3ec79SChristoph Hellwig * care of managing the end of file boundary. 43814a3ec79SChristoph Hellwig * 43914a3ec79SChristoph Hellwig * This function also eliminates unnecessarily updating the file's 44014a3ec79SChristoph Hellwig * atime locally, as the NFS server sets the file's atime, and this 44114a3ec79SChristoph Hellwig * client must read the updated atime from the server back into its 44214a3ec79SChristoph Hellwig * cache. 44314a3ec79SChristoph Hellwig */ 444*64158668SNeilBrown ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, 445*64158668SNeilBrown bool swap) 4461da177e4SLinus Torvalds { 44714a3ec79SChristoph Hellwig struct file *file = iocb->ki_filp; 44814a3ec79SChristoph Hellwig struct address_space *mapping = file->f_mapping; 44914a3ec79SChristoph Hellwig struct inode *inode = mapping->host; 4501da177e4SLinus Torvalds struct nfs_direct_req *dreq; 451b3c54de6STrond Myklebust struct nfs_lock_context *l_ctx; 45286b93667SColin Ian King ssize_t result, requested; 453a6cbcd4aSAl Viro size_t count = iov_iter_count(iter); 45414a3ec79SChristoph Hellwig nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); 45514a3ec79SChristoph Hellwig 45614a3ec79SChristoph Hellwig dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", 457c8b8e32dSChristoph Hellwig file, count, (long long) iocb->ki_pos); 45814a3ec79SChristoph Hellwig 45914a3ec79SChristoph Hellwig result = 0; 46014a3ec79SChristoph Hellwig if (!count) 46114a3ec79SChristoph Hellwig goto out; 46214a3ec79SChristoph Hellwig 46314a3ec79SChristoph Hellwig task_io_account_read(count); 46414a3ec79SChristoph Hellwig 46514a3ec79SChristoph Hellwig result = -ENOMEM; 466607f31e8STrond Myklebust dreq = nfs_direct_req_alloc(); 467f11ac8dbSTrond Myklebust if (dreq == NULL) 468a5864c99STrond Myklebust goto out; 4691da177e4SLinus Torvalds 47091d5b470SChuck Lever dreq->inode = inode; 471ed3743a6SWeston Andros Adamson dreq->bytes_left = dreq->max_count = count; 472c8b8e32dSChristoph Hellwig dreq->io_start = iocb->ki_pos; 473cd3758e3STrond Myklebust dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 474b3c54de6STrond Myklebust l_ctx = nfs_get_lock_context(dreq->ctx); 475b3c54de6STrond Myklebust if (IS_ERR(l_ctx)) { 476b3c54de6STrond Myklebust result = PTR_ERR(l_ctx); 4778605cf0eSMisono Tomohiro nfs_direct_req_release(dreq); 478f11ac8dbSTrond Myklebust goto out_release; 479b3c54de6STrond Myklebust } 480b3c54de6STrond Myklebust dreq->l_ctx = l_ctx; 481487b8372SChuck Lever if (!is_sync_kiocb(iocb)) 482487b8372SChuck Lever dreq->iocb = iocb; 4831da177e4SLinus Torvalds 484ad3cba22SDave Kleikamp if (iter_is_iovec(iter)) 485ad3cba22SDave Kleikamp dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; 486ad3cba22SDave Kleikamp 487*64158668SNeilBrown if (!swap) 488a5864c99STrond Myklebust nfs_start_io_direct(inode); 489a5864c99STrond Myklebust 490619d30b4SAl Viro NFS_I(inode)->read_io += count; 49185128b2bSAl Viro requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); 492d0b9875dSChristoph Hellwig 493*64158668SNeilBrown if (!swap) 494a5864c99STrond Myklebust nfs_end_io_direct(inode); 495d0b9875dSChristoph Hellwig 49685128b2bSAl Viro if (requested > 0) { 497bc0fb201SChuck Lever result = nfs_direct_wait(dreq); 49885128b2bSAl Viro if (result > 0) { 49985128b2bSAl Viro requested -= result; 500c8b8e32dSChristoph Hellwig iocb->ki_pos += result; 50114a3ec79SChristoph Hellwig } 50285128b2bSAl Viro iov_iter_revert(iter, requested); 50385128b2bSAl Viro } else { 50485128b2bSAl Viro result = requested; 50585128b2bSAl Viro } 506d0b9875dSChristoph Hellwig 507f11ac8dbSTrond Myklebust out_release: 508b4946ffbSTrond Myklebust nfs_direct_req_release(dreq); 509f11ac8dbSTrond Myklebust out: 5101da177e4SLinus Torvalds return result; 5111da177e4SLinus Torvalds } 5121da177e4SLinus Torvalds 513085d1e33STom Haynes static void 514ed5d588fSTrond Myklebust nfs_direct_join_group(struct list_head *list, struct inode *inode) 515ed5d588fSTrond Myklebust { 516ed5d588fSTrond Myklebust struct nfs_page *req, *next; 517ed5d588fSTrond Myklebust 518ed5d588fSTrond Myklebust list_for_each_entry(req, list, wb_list) { 519ed5d588fSTrond Myklebust if (req->wb_head != req || req->wb_this_page == req) 520ed5d588fSTrond Myklebust continue; 521ed5d588fSTrond Myklebust for (next = req->wb_this_page; 522ed5d588fSTrond Myklebust next != req->wb_head; 523ed5d588fSTrond Myklebust next = next->wb_this_page) { 524ed5d588fSTrond Myklebust nfs_list_remove_request(next); 525ed5d588fSTrond Myklebust nfs_release_request(next); 526ed5d588fSTrond Myklebust } 527ed5d588fSTrond Myklebust nfs_join_page_group(req, inode); 528ed5d588fSTrond Myklebust } 529ed5d588fSTrond Myklebust } 530ed5d588fSTrond Myklebust 531ed5d588fSTrond Myklebust static void 532085d1e33STom Haynes nfs_direct_write_scan_commit_list(struct inode *inode, 533085d1e33STom Haynes struct list_head *list, 534085d1e33STom Haynes struct nfs_commit_info *cinfo) 535085d1e33STom Haynes { 536e824f99aSTrond Myklebust mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 5379c455a8cSTrond Myklebust pnfs_recover_commit_reqs(list, cinfo); 538085d1e33STom Haynes nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); 539e824f99aSTrond Myklebust mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 540085d1e33STom Haynes } 541085d1e33STom Haynes 542fad61490STrond Myklebust static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 5431da177e4SLinus Torvalds { 5441763da12SFred Isaman struct nfs_pageio_descriptor desc; 5451763da12SFred Isaman struct nfs_page *req, *tmp; 5461763da12SFred Isaman LIST_HEAD(reqs); 5471763da12SFred Isaman struct nfs_commit_info cinfo; 5481763da12SFred Isaman LIST_HEAD(failed); 5491763da12SFred Isaman 5501763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq); 551085d1e33STom Haynes nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); 5521da177e4SLinus Torvalds 553ed5d588fSTrond Myklebust nfs_direct_join_group(&reqs, dreq->inode); 554ed5d588fSTrond Myklebust 555fad61490STrond Myklebust dreq->count = 0; 556031d73edSTrond Myklebust dreq->max_count = 0; 557031d73edSTrond Myklebust list_for_each_entry(req, &reqs, wb_list) 558031d73edSTrond Myklebust dreq->max_count += req->wb_bytes; 559a5314a74STrond Myklebust nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); 560607f31e8STrond Myklebust get_dreq(dreq); 5611da177e4SLinus Torvalds 562a20c93e3SChristoph Hellwig nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, 5631763da12SFred Isaman &nfs_direct_write_completion_ops); 5641763da12SFred Isaman desc.pg_dreq = dreq; 565607f31e8STrond Myklebust 5661763da12SFred Isaman list_for_each_entry_safe(req, tmp, &reqs, wb_list) { 56733344e0fSTrond Myklebust /* Bump the transmission count */ 56833344e0fSTrond Myklebust req->wb_nio++; 5691763da12SFred Isaman if (!nfs_pageio_add_request(&desc, req)) { 570078b5fd9STrond Myklebust nfs_list_move_request(req, &failed); 571fe238e60SDave Wysochanski spin_lock(&cinfo.inode->i_lock); 5721763da12SFred Isaman dreq->flags = 0; 573d600ad1fSPeng Tao if (desc.pg_error < 0) 574d600ad1fSPeng Tao dreq->error = desc.pg_error; 575d600ad1fSPeng Tao else 5761763da12SFred Isaman dreq->error = -EIO; 577fe238e60SDave Wysochanski spin_unlock(&cinfo.inode->i_lock); 5781763da12SFred Isaman } 5795a695da2STrond Myklebust nfs_release_request(req); 5801763da12SFred Isaman } 5811763da12SFred Isaman nfs_pageio_complete(&desc); 582607f31e8STrond Myklebust 5834035c248STrond Myklebust while (!list_empty(&failed)) { 5844035c248STrond Myklebust req = nfs_list_entry(failed.next); 5854035c248STrond Myklebust nfs_list_remove_request(req); 5861d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req); 5874035c248STrond Myklebust } 588607f31e8STrond Myklebust 589607f31e8STrond Myklebust if (put_dreq(dreq)) 5904d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq); 591fad61490STrond Myklebust } 5921da177e4SLinus Torvalds 5931763da12SFred Isaman static void nfs_direct_commit_complete(struct nfs_commit_data *data) 594fad61490STrond Myklebust { 5951f28476dSTrond Myklebust const struct nfs_writeverf *verf = data->res.verf; 5960b7c0153SFred Isaman struct nfs_direct_req *dreq = data->dreq; 5971763da12SFred Isaman struct nfs_commit_info cinfo; 5981763da12SFred Isaman struct nfs_page *req; 599c9d8f89dSTrond Myklebust int status = data->task.tk_status; 600c9d8f89dSTrond Myklebust 601fb5f7f20STrond Myklebust if (status < 0) { 602fb5f7f20STrond Myklebust /* Errors in commit are fatal */ 603fb5f7f20STrond Myklebust dreq->error = status; 604fb5f7f20STrond Myklebust dreq->max_count = 0; 605fb5f7f20STrond Myklebust dreq->count = 0; 606fb5f7f20STrond Myklebust dreq->flags = NFS_ODIRECT_DONE; 607fb5f7f20STrond Myklebust } else if (dreq->flags == NFS_ODIRECT_DONE) 608fb5f7f20STrond Myklebust status = dreq->error; 609fb5f7f20STrond Myklebust 6101763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq); 611fad61490STrond Myklebust 6121763da12SFred Isaman while (!list_empty(&data->pages)) { 6131763da12SFred Isaman req = nfs_list_entry(data->pages.next); 6141763da12SFred Isaman nfs_list_remove_request(req); 6151f28476dSTrond Myklebust if (status >= 0 && !nfs_write_match_verf(verf, req)) { 6161f28476dSTrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 61733344e0fSTrond Myklebust /* 61833344e0fSTrond Myklebust * Despite the reboot, the write was successful, 61933344e0fSTrond Myklebust * so reset wb_nio. 62033344e0fSTrond Myklebust */ 62133344e0fSTrond Myklebust req->wb_nio = 0; 622b57ff130SWeston Andros Adamson nfs_mark_request_commit(req, NULL, &cinfo, 0); 6231f28476dSTrond Myklebust } else /* Error or match */ 624906369e4SFred Isaman nfs_release_request(req); 6251d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req); 626fad61490STrond Myklebust } 627fad61490STrond Myklebust 628133a48abSTrond Myklebust if (nfs_commit_end(cinfo.mds)) 6294d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq); 6301763da12SFred Isaman } 6311763da12SFred Isaman 632b20135d0STrond Myklebust static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, 633b20135d0STrond Myklebust struct nfs_page *req) 6341763da12SFred Isaman { 635b20135d0STrond Myklebust struct nfs_direct_req *dreq = cinfo->dreq; 636b20135d0STrond Myklebust 637b20135d0STrond Myklebust spin_lock(&dreq->lock); 638fb5f7f20STrond Myklebust if (dreq->flags != NFS_ODIRECT_DONE) 639b20135d0STrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 640b20135d0STrond Myklebust spin_unlock(&dreq->lock); 641b20135d0STrond Myklebust nfs_mark_request_commit(req, NULL, cinfo, 0); 6421763da12SFred Isaman } 6431763da12SFred Isaman 6441763da12SFred Isaman static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { 6451763da12SFred Isaman .completion = nfs_direct_commit_complete, 646b20135d0STrond Myklebust .resched_write = nfs_direct_resched_write, 647fad61490STrond Myklebust }; 648fad61490STrond Myklebust 649fad61490STrond Myklebust static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 650fad61490STrond Myklebust { 6511763da12SFred Isaman int res; 6521763da12SFred Isaman struct nfs_commit_info cinfo; 6531763da12SFred Isaman LIST_HEAD(mds_list); 654fad61490STrond Myklebust 6551763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq); 6561763da12SFred Isaman nfs_scan_commit(dreq->inode, &mds_list, &cinfo); 6571763da12SFred Isaman res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); 6581763da12SFred Isaman if (res < 0) /* res == -ENOMEM */ 6591763da12SFred Isaman nfs_direct_write_reschedule(dreq); 6601da177e4SLinus Torvalds } 6611da177e4SLinus Torvalds 662fb5f7f20STrond Myklebust static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) 663fb5f7f20STrond Myklebust { 664fb5f7f20STrond Myklebust struct nfs_commit_info cinfo; 665fb5f7f20STrond Myklebust struct nfs_page *req; 666fb5f7f20STrond Myklebust LIST_HEAD(reqs); 667fb5f7f20STrond Myklebust 668fb5f7f20STrond Myklebust nfs_init_cinfo_from_dreq(&cinfo, dreq); 669fb5f7f20STrond Myklebust nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); 670fb5f7f20STrond Myklebust 671fb5f7f20STrond Myklebust while (!list_empty(&reqs)) { 672fb5f7f20STrond Myklebust req = nfs_list_entry(reqs.next); 673fb5f7f20STrond Myklebust nfs_list_remove_request(req); 674f02cec9dSTrond Myklebust nfs_release_request(req); 675fb5f7f20STrond Myklebust nfs_unlock_and_release_request(req); 676fb5f7f20STrond Myklebust } 677fb5f7f20STrond Myklebust } 678fb5f7f20STrond Myklebust 6791763da12SFred Isaman static void nfs_direct_write_schedule_work(struct work_struct *work) 6801da177e4SLinus Torvalds { 6811763da12SFred Isaman struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work); 682fad61490STrond Myklebust int flags = dreq->flags; 6831da177e4SLinus Torvalds 684fad61490STrond Myklebust dreq->flags = 0; 685fad61490STrond Myklebust switch (flags) { 686fad61490STrond Myklebust case NFS_ODIRECT_DO_COMMIT: 687fad61490STrond Myklebust nfs_direct_commit_schedule(dreq); 6881da177e4SLinus Torvalds break; 689fad61490STrond Myklebust case NFS_ODIRECT_RESCHED_WRITES: 690fad61490STrond Myklebust nfs_direct_write_reschedule(dreq); 6911da177e4SLinus Torvalds break; 6921da177e4SLinus Torvalds default: 693fb5f7f20STrond Myklebust nfs_direct_write_clear_reqs(dreq); 694f7b5c340STrond Myklebust nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping); 695f7b5c340STrond Myklebust nfs_direct_complete(dreq); 6961da177e4SLinus Torvalds } 697fad61490STrond Myklebust } 698fad61490STrond Myklebust 6994d3b55d3SAnna Schumaker static void nfs_direct_write_complete(struct nfs_direct_req *dreq) 700fad61490STrond Myklebust { 70146483c2eSNeilBrown queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */ 702fad61490STrond Myklebust } 7031763da12SFred Isaman 7041763da12SFred Isaman static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) 7051763da12SFred Isaman { 7061763da12SFred Isaman struct nfs_direct_req *dreq = hdr->dreq; 7071763da12SFred Isaman struct nfs_commit_info cinfo; 7081763da12SFred Isaman struct nfs_page *req = nfs_list_entry(hdr->pages.next); 7093731d44bSTrond Myklebust int flags = NFS_ODIRECT_DONE; 7101763da12SFred Isaman 7111763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq); 7121763da12SFred Isaman 7131763da12SFred Isaman spin_lock(&dreq->lock); 714eb2c50daSTrond Myklebust if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { 715eb2c50daSTrond Myklebust spin_unlock(&dreq->lock); 716eb2c50daSTrond Myklebust goto out_put; 717eb2c50daSTrond Myklebust } 718eb2c50daSTrond Myklebust 719031d73edSTrond Myklebust nfs_direct_count_bytes(dreq, hdr); 7201f28476dSTrond Myklebust if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) { 7213731d44bSTrond Myklebust if (!dreq->flags) 7221763da12SFred Isaman dreq->flags = NFS_ODIRECT_DO_COMMIT; 7233731d44bSTrond Myklebust flags = dreq->flags; 7241763da12SFred Isaman } 7251763da12SFred Isaman spin_unlock(&dreq->lock); 7261763da12SFred Isaman 7271763da12SFred Isaman while (!list_empty(&hdr->pages)) { 7282bfc6e56SWeston Andros Adamson 7291763da12SFred Isaman req = nfs_list_entry(hdr->pages.next); 7301763da12SFred Isaman nfs_list_remove_request(req); 7313731d44bSTrond Myklebust if (flags == NFS_ODIRECT_DO_COMMIT) { 73204277086STrond Myklebust kref_get(&req->wb_kref); 733ba838a75SChuck Lever memcpy(&req->wb_verf, &hdr->verf.verifier, 734ba838a75SChuck Lever sizeof(req->wb_verf)); 735b57ff130SWeston Andros Adamson nfs_mark_request_commit(req, hdr->lseg, &cinfo, 736b57ff130SWeston Andros Adamson hdr->ds_commit_idx); 7373731d44bSTrond Myklebust } else if (flags == NFS_ODIRECT_RESCHED_WRITES) { 7383731d44bSTrond Myklebust kref_get(&req->wb_kref); 7393731d44bSTrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0); 7401763da12SFred Isaman } 7411d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req); 7421763da12SFred Isaman } 7431763da12SFred Isaman 7441763da12SFred Isaman out_put: 7451763da12SFred Isaman if (put_dreq(dreq)) 7464d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq); 7471763da12SFred Isaman hdr->release(hdr); 7481763da12SFred Isaman } 7491763da12SFred Isaman 750df3accb8STrond Myklebust static void nfs_write_sync_pgio_error(struct list_head *head, int error) 7513e9e0ca3STrond Myklebust { 7523e9e0ca3STrond Myklebust struct nfs_page *req; 7533e9e0ca3STrond Myklebust 7543e9e0ca3STrond Myklebust while (!list_empty(head)) { 7553e9e0ca3STrond Myklebust req = nfs_list_entry(head->next); 7563e9e0ca3STrond Myklebust nfs_list_remove_request(req); 7571d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req); 7583e9e0ca3STrond Myklebust } 7593e9e0ca3STrond Myklebust } 7603e9e0ca3STrond Myklebust 761dc602dd7STrond Myklebust static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) 762dc602dd7STrond Myklebust { 763dc602dd7STrond Myklebust struct nfs_direct_req *dreq = hdr->dreq; 764dc602dd7STrond Myklebust 765dc602dd7STrond Myklebust spin_lock(&dreq->lock); 766dc602dd7STrond Myklebust if (dreq->error == 0) { 767dc602dd7STrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 768dc602dd7STrond Myklebust /* fake unstable write to let common nfs resend pages */ 769dc602dd7STrond Myklebust hdr->verf.committed = NFS_UNSTABLE; 7704daaeba9STrond Myklebust hdr->good_bytes = hdr->args.offset + hdr->args.count - 7714daaeba9STrond Myklebust hdr->io_start; 772dc602dd7STrond Myklebust } 773dc602dd7STrond Myklebust spin_unlock(&dreq->lock); 774dc602dd7STrond Myklebust } 775dc602dd7STrond Myklebust 7761763da12SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { 7773e9e0ca3STrond Myklebust .error_cleanup = nfs_write_sync_pgio_error, 7781763da12SFred Isaman .init_hdr = nfs_direct_pgio_init, 7791763da12SFred Isaman .completion = nfs_direct_write_completion, 780dc602dd7STrond Myklebust .reschedule_io = nfs_direct_write_reschedule_io, 7811763da12SFred Isaman }; 7821763da12SFred Isaman 78391f79c43SAl Viro 78491f79c43SAl Viro /* 78591f79c43SAl Viro * NB: Return the value of the first error return code. Subsequent 78691f79c43SAl Viro * errors after the first one are ignored. 78791f79c43SAl Viro */ 78891f79c43SAl Viro /* 78991f79c43SAl Viro * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE 79091f79c43SAl Viro * operation. If nfs_writedata_alloc() or get_user_pages() fails, 79191f79c43SAl Viro * bail and stop sending more writes. Write length accounting is 79291f79c43SAl Viro * handled automatically by nfs_direct_write_result(). Otherwise, if 79391f79c43SAl Viro * no requests have been sent, just return an error. 79491f79c43SAl Viro */ 79519f73787SChuck Lever static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 796619d30b4SAl Viro struct iov_iter *iter, 79791f79c43SAl Viro loff_t pos) 79819f73787SChuck Lever { 7991763da12SFred Isaman struct nfs_pageio_descriptor desc; 8001d59d61fSTrond Myklebust struct inode *inode = dreq->inode; 80119f73787SChuck Lever ssize_t result = 0; 80219f73787SChuck Lever size_t requested_bytes = 0; 80391f79c43SAl Viro size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); 80419f73787SChuck Lever 805a20c93e3SChristoph Hellwig nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, 8061763da12SFred Isaman &nfs_direct_write_completion_ops); 8071763da12SFred Isaman desc.pg_dreq = dreq; 80819f73787SChuck Lever get_dreq(dreq); 809fe0f07d0SJens Axboe inode_dio_begin(inode); 81019f73787SChuck Lever 81191f79c43SAl Viro NFS_I(inode)->write_io += iov_iter_count(iter); 81291f79c43SAl Viro while (iov_iter_count(iter)) { 81391f79c43SAl Viro struct page **pagevec; 81491f79c43SAl Viro size_t bytes; 81591f79c43SAl Viro size_t pgbase; 81691f79c43SAl Viro unsigned npages, i; 81791f79c43SAl Viro 81891f79c43SAl Viro result = iov_iter_get_pages_alloc(iter, &pagevec, 81991f79c43SAl Viro wsize, &pgbase); 82019f73787SChuck Lever if (result < 0) 82119f73787SChuck Lever break; 82291f79c43SAl Viro 82391f79c43SAl Viro bytes = result; 82491f79c43SAl Viro iov_iter_advance(iter, bytes); 82591f79c43SAl Viro npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; 82691f79c43SAl Viro for (i = 0; i < npages; i++) { 82791f79c43SAl Viro struct nfs_page *req; 82891f79c43SAl Viro unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 82991f79c43SAl Viro 83028b1d3f5STrond Myklebust req = nfs_create_request(dreq->ctx, pagevec[i], 83191f79c43SAl Viro pgbase, req_len); 83291f79c43SAl Viro if (IS_ERR(req)) { 83391f79c43SAl Viro result = PTR_ERR(req); 83419f73787SChuck Lever break; 83591f79c43SAl Viro } 8360a00b77bSWeston Andros Adamson 837d600ad1fSPeng Tao if (desc.pg_error < 0) { 838d600ad1fSPeng Tao nfs_free_request(req); 839d600ad1fSPeng Tao result = desc.pg_error; 840d600ad1fSPeng Tao break; 841d600ad1fSPeng Tao } 8420a00b77bSWeston Andros Adamson 84391f79c43SAl Viro nfs_lock_request(req); 84491f79c43SAl Viro req->wb_index = pos >> PAGE_SHIFT; 84591f79c43SAl Viro req->wb_offset = pos & ~PAGE_MASK; 84691f79c43SAl Viro if (!nfs_pageio_add_request(&desc, req)) { 84791f79c43SAl Viro result = desc.pg_error; 84891f79c43SAl Viro nfs_unlock_and_release_request(req); 84991f79c43SAl Viro break; 85091f79c43SAl Viro } 85191f79c43SAl Viro pgbase = 0; 85291f79c43SAl Viro bytes -= req_len; 85391f79c43SAl Viro requested_bytes += req_len; 85491f79c43SAl Viro pos += req_len; 85591f79c43SAl Viro dreq->bytes_left -= req_len; 85691f79c43SAl Viro } 85791f79c43SAl Viro nfs_direct_release_pages(pagevec, npages); 85891f79c43SAl Viro kvfree(pagevec); 85991f79c43SAl Viro if (result < 0) 86091f79c43SAl Viro break; 86119f73787SChuck Lever } 8621763da12SFred Isaman nfs_pageio_complete(&desc); 86319f73787SChuck Lever 864839f7ad6SChuck Lever /* 865839f7ad6SChuck Lever * If no bytes were started, return the error, and let the 866839f7ad6SChuck Lever * generic layer handle the completion. 867839f7ad6SChuck Lever */ 868839f7ad6SChuck Lever if (requested_bytes == 0) { 869d03727b2SOlga Kornievskaia inode_dio_end(inode); 87065caafd0SOlga Kornievskaia nfs_direct_req_release(dreq); 871839f7ad6SChuck Lever return result < 0 ? result : -EIO; 872839f7ad6SChuck Lever } 873839f7ad6SChuck Lever 87419f73787SChuck Lever if (put_dreq(dreq)) 8754d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq); 87685128b2bSAl Viro return requested_bytes; 87719f73787SChuck Lever } 87819f73787SChuck Lever 8791da177e4SLinus Torvalds /** 8801da177e4SLinus Torvalds * nfs_file_direct_write - file direct write operation for NFS files 8811da177e4SLinus Torvalds * @iocb: target I/O control block 882619d30b4SAl Viro * @iter: vector of user buffers from which to write data 883*64158668SNeilBrown * @swap: flag indicating this is swap IO, not O_DIRECT IO 8841da177e4SLinus Torvalds * 8851da177e4SLinus Torvalds * We use this function for direct writes instead of calling 8861da177e4SLinus Torvalds * generic_file_aio_write() in order to avoid taking the inode 8871da177e4SLinus Torvalds * semaphore and updating the i_size. The NFS server will set 8881da177e4SLinus Torvalds * the new i_size and this client must read the updated size 8891da177e4SLinus Torvalds * back into its cache. We let the server do generic write 8901da177e4SLinus Torvalds * parameter checking and report problems. 8911da177e4SLinus Torvalds * 8921da177e4SLinus Torvalds * We eliminate local atime updates, see direct read above. 8931da177e4SLinus Torvalds * 8941da177e4SLinus Torvalds * We avoid unnecessary page cache invalidations for normal cached 8951da177e4SLinus Torvalds * readers of this file. 8961da177e4SLinus Torvalds * 8971da177e4SLinus Torvalds * Note that O_APPEND is not supported for NFS direct writes, as there 8981da177e4SLinus Torvalds * is no atomic O_APPEND write facility in the NFS protocol. 8991da177e4SLinus Torvalds */ 900*64158668SNeilBrown ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, 901*64158668SNeilBrown bool swap) 9021da177e4SLinus Torvalds { 9039a74a2b8SColin Ian King ssize_t result, requested; 90489698b24STrond Myklebust size_t count; 9051da177e4SLinus Torvalds struct file *file = iocb->ki_filp; 9061da177e4SLinus Torvalds struct address_space *mapping = file->f_mapping; 90722cd1bf1SChristoph Hellwig struct inode *inode = mapping->host; 90822cd1bf1SChristoph Hellwig struct nfs_direct_req *dreq; 90922cd1bf1SChristoph Hellwig struct nfs_lock_context *l_ctx; 91065a4a1caSAl Viro loff_t pos, end; 911c216fd70SChuck Lever 9126de1472fSAl Viro dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", 9133309dd04SAl Viro file, iov_iter_count(iter), (long long) iocb->ki_pos); 914027445c3SBadari Pulavarty 915*64158668SNeilBrown if (swap) 916*64158668SNeilBrown /* bypass generic checks */ 917*64158668SNeilBrown result = iov_iter_count(iter); 918*64158668SNeilBrown else 91989698b24STrond Myklebust result = generic_write_checks(iocb, iter); 92089698b24STrond Myklebust if (result <= 0) 92189698b24STrond Myklebust return result; 92289698b24STrond Myklebust count = result; 92389698b24STrond Myklebust nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); 9243309dd04SAl Viro 9253309dd04SAl Viro pos = iocb->ki_pos; 92609cbfeafSKirill A. Shutemov end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT; 927ce1a8e67SChuck Lever 92889698b24STrond Myklebust task_io_account_write(count); 9297ec10f26SKonstantin Khlebnikov 93022cd1bf1SChristoph Hellwig result = -ENOMEM; 93122cd1bf1SChristoph Hellwig dreq = nfs_direct_req_alloc(); 93222cd1bf1SChristoph Hellwig if (!dreq) 933a5864c99STrond Myklebust goto out; 93422cd1bf1SChristoph Hellwig 93522cd1bf1SChristoph Hellwig dreq->inode = inode; 93689698b24STrond Myklebust dreq->bytes_left = dreq->max_count = count; 9375fadeb47SPeng Tao dreq->io_start = pos; 93822cd1bf1SChristoph Hellwig dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 93922cd1bf1SChristoph Hellwig l_ctx = nfs_get_lock_context(dreq->ctx); 94022cd1bf1SChristoph Hellwig if (IS_ERR(l_ctx)) { 94122cd1bf1SChristoph Hellwig result = PTR_ERR(l_ctx); 9428605cf0eSMisono Tomohiro nfs_direct_req_release(dreq); 94322cd1bf1SChristoph Hellwig goto out_release; 94422cd1bf1SChristoph Hellwig } 94522cd1bf1SChristoph Hellwig dreq->l_ctx = l_ctx; 94622cd1bf1SChristoph Hellwig if (!is_sync_kiocb(iocb)) 94722cd1bf1SChristoph Hellwig dreq->iocb = iocb; 9489c455a8cSTrond Myklebust pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); 94922cd1bf1SChristoph Hellwig 950*64158668SNeilBrown if (swap) { 951*64158668SNeilBrown requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); 952*64158668SNeilBrown } else { 953a5864c99STrond Myklebust nfs_start_io_direct(inode); 954a5864c99STrond Myklebust 95585128b2bSAl Viro requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); 956a9ab5e84SChristoph Hellwig 957a9ab5e84SChristoph Hellwig if (mapping->nrpages) { 958a9ab5e84SChristoph Hellwig invalidate_inode_pages2_range(mapping, 95909cbfeafSKirill A. Shutemov pos >> PAGE_SHIFT, end); 960a9ab5e84SChristoph Hellwig } 961a9ab5e84SChristoph Hellwig 962a5864c99STrond Myklebust nfs_end_io_direct(inode); 963*64158668SNeilBrown } 964a9ab5e84SChristoph Hellwig 96585128b2bSAl Viro if (requested > 0) { 96622cd1bf1SChristoph Hellwig result = nfs_direct_wait(dreq); 96722cd1bf1SChristoph Hellwig if (result > 0) { 96885128b2bSAl Viro requested -= result; 96922cd1bf1SChristoph Hellwig iocb->ki_pos = pos + result; 970e2592217SChristoph Hellwig /* XXX: should check the generic_write_sync retval */ 971e2592217SChristoph Hellwig generic_write_sync(iocb, result); 9721763da12SFred Isaman } 97385128b2bSAl Viro iov_iter_revert(iter, requested); 97485128b2bSAl Viro } else { 97585128b2bSAl Viro result = requested; 97622cd1bf1SChristoph Hellwig } 977a6b5a28eSDave Wysochanski nfs_fscache_invalidate(inode, FSCACHE_INVAL_DIO_WRITE); 97822cd1bf1SChristoph Hellwig out_release: 97922cd1bf1SChristoph Hellwig nfs_direct_req_release(dreq); 980a5864c99STrond Myklebust out: 98122cd1bf1SChristoph Hellwig return result; 9821da177e4SLinus Torvalds } 9831da177e4SLinus Torvalds 98488467055SChuck Lever /** 98588467055SChuck Lever * nfs_init_directcache - create a slab cache for nfs_direct_req structures 98688467055SChuck Lever * 98788467055SChuck Lever */ 988f7b422b1SDavid Howells int __init nfs_init_directcache(void) 9891da177e4SLinus Torvalds { 9901da177e4SLinus Torvalds nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", 9911da177e4SLinus Torvalds sizeof(struct nfs_direct_req), 992fffb60f9SPaul Jackson 0, (SLAB_RECLAIM_ACCOUNT| 993fffb60f9SPaul Jackson SLAB_MEM_SPREAD), 99420c2df83SPaul Mundt NULL); 9951da177e4SLinus Torvalds if (nfs_direct_cachep == NULL) 9961da177e4SLinus Torvalds return -ENOMEM; 9971da177e4SLinus Torvalds 9981da177e4SLinus Torvalds return 0; 9991da177e4SLinus Torvalds } 10001da177e4SLinus Torvalds 100188467055SChuck Lever /** 1002f7b422b1SDavid Howells * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures 100388467055SChuck Lever * 100488467055SChuck Lever */ 1005266bee88SDavid Brownell void nfs_destroy_directcache(void) 10061da177e4SLinus Torvalds { 10071a1d92c1SAlexey Dobriyan kmem_cache_destroy(nfs_direct_cachep); 10081da177e4SLinus Torvalds } 1009