1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 21da177e4SLinus Torvalds /* 31da177e4SLinus Torvalds * linux/fs/nfs/direct.c 41da177e4SLinus Torvalds * 51da177e4SLinus Torvalds * Copyright (C) 2003 by Chuck Lever <cel@netapp.com> 61da177e4SLinus Torvalds * 71da177e4SLinus Torvalds * High-performance uncached I/O for the Linux NFS client 81da177e4SLinus Torvalds * 91da177e4SLinus Torvalds * There are important applications whose performance or correctness 101da177e4SLinus Torvalds * depends on uncached access to file data. Database clusters 111da177e4SLinus Torvalds * (multiple copies of the same instance running on separate hosts) 121da177e4SLinus Torvalds * implement their own cache coherency protocol that subsumes file 131da177e4SLinus Torvalds * system cache protocols. Applications that process datasets 141da177e4SLinus Torvalds * considerably larger than the client's memory do not always benefit 151da177e4SLinus Torvalds * from a local cache. A streaming video server, for instance, has no 161da177e4SLinus Torvalds * need to cache the contents of a file. 171da177e4SLinus Torvalds * 181da177e4SLinus Torvalds * When an application requests uncached I/O, all read and write requests 191da177e4SLinus Torvalds * are made directly to the server; data stored or fetched via these 201da177e4SLinus Torvalds * requests is not cached in the Linux page cache. The client does not 211da177e4SLinus Torvalds * correct unaligned requests from applications. All requested bytes are 221da177e4SLinus Torvalds * held on permanent storage before a direct write system call returns to 231da177e4SLinus Torvalds * an application. 241da177e4SLinus Torvalds * 251da177e4SLinus Torvalds * Solaris implements an uncached I/O facility called directio() that 261da177e4SLinus Torvalds * is used for backups and sequential I/O to very large files. Solaris 271da177e4SLinus Torvalds * also supports uncaching whole NFS partitions with "-o forcedirectio," 281da177e4SLinus Torvalds * an undocumented mount option. 291da177e4SLinus Torvalds * 301da177e4SLinus Torvalds * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with 311da177e4SLinus Torvalds * help from Andrew Morton. 321da177e4SLinus Torvalds * 331da177e4SLinus Torvalds * 18 Dec 2001 Initial implementation for 2.4 --cel 341da177e4SLinus Torvalds * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy 351da177e4SLinus Torvalds * 08 Jun 2003 Port to 2.5 APIs --cel 361da177e4SLinus Torvalds * 31 Mar 2004 Handle direct I/O without VFS support --cel 371da177e4SLinus Torvalds * 15 Sep 2004 Parallel async reads --cel 3888467055SChuck Lever * 04 May 2005 support O_DIRECT with aio --cel 391da177e4SLinus Torvalds * 401da177e4SLinus Torvalds */ 411da177e4SLinus Torvalds 421da177e4SLinus Torvalds #include <linux/errno.h> 431da177e4SLinus Torvalds #include <linux/sched.h> 441da177e4SLinus Torvalds #include <linux/kernel.h> 451da177e4SLinus Torvalds #include <linux/file.h> 461da177e4SLinus Torvalds #include <linux/pagemap.h> 471da177e4SLinus Torvalds #include <linux/kref.h> 485a0e3ad6STejun Heo #include <linux/slab.h> 497ec10f26SKonstantin Khlebnikov #include <linux/task_io_accounting_ops.h> 506296556fSPeng Tao #include <linux/module.h> 511da177e4SLinus Torvalds 521da177e4SLinus Torvalds #include <linux/nfs_fs.h> 531da177e4SLinus Torvalds #include <linux/nfs_page.h> 541da177e4SLinus Torvalds #include <linux/sunrpc/clnt.h> 551da177e4SLinus Torvalds 567c0f6ba6SLinus Torvalds #include <linux/uaccess.h> 5760063497SArun Sharma #include <linux/atomic.h> 581da177e4SLinus Torvalds 598d5658c9STrond Myklebust #include "internal.h" 6091d5b470SChuck Lever #include "iostat.h" 611763da12SFred Isaman #include "pnfs.h" 62a6b5a28eSDave Wysochanski #include "fscache.h" 638efc4bbeSJeff Layton #include "nfstrace.h" 641da177e4SLinus Torvalds 651da177e4SLinus Torvalds #define NFSDBG_FACILITY NFSDBG_VFS 661da177e4SLinus Torvalds 67e18b890bSChristoph Lameter static struct kmem_cache *nfs_direct_cachep; 681da177e4SLinus Torvalds 691763da12SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops; 701763da12SFred Isaman static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops; 714d3b55d3SAnna Schumaker static void nfs_direct_write_complete(struct nfs_direct_req *dreq); 721763da12SFred Isaman static void nfs_direct_write_schedule_work(struct work_struct *work); 73607f31e8STrond Myklebust 74607f31e8STrond Myklebust static inline void get_dreq(struct nfs_direct_req *dreq) 75607f31e8STrond Myklebust { 76607f31e8STrond Myklebust atomic_inc(&dreq->io_count); 77607f31e8STrond Myklebust } 78607f31e8STrond Myklebust 79607f31e8STrond Myklebust static inline int put_dreq(struct nfs_direct_req *dreq) 80607f31e8STrond Myklebust { 81607f31e8STrond Myklebust return atomic_dec_and_test(&dreq->io_count); 82607f31e8STrond Myklebust } 83607f31e8STrond Myklebust 840a00b77bSWeston Andros Adamson static void 85031d73edSTrond Myklebust nfs_direct_handle_truncated(struct nfs_direct_req *dreq, 86031d73edSTrond Myklebust const struct nfs_pgio_header *hdr, 87031d73edSTrond Myklebust ssize_t dreq_len) 880a00b77bSWeston Andros Adamson { 89031d73edSTrond Myklebust if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) || 90031d73edSTrond Myklebust test_bit(NFS_IOHDR_EOF, &hdr->flags))) 91031d73edSTrond Myklebust return; 92031d73edSTrond Myklebust if (dreq->max_count >= dreq_len) { 93031d73edSTrond Myklebust dreq->max_count = dreq_len; 94031d73edSTrond Myklebust if (dreq->count > dreq_len) 95031d73edSTrond Myklebust dreq->count = dreq_len; 965fadeb47SPeng Tao } 978982f7afSTrond Myklebust 988982f7afSTrond Myklebust if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error) 998982f7afSTrond Myklebust dreq->error = hdr->error; 1000a00b77bSWeston Andros Adamson } 101031d73edSTrond Myklebust 102031d73edSTrond Myklebust static void 103031d73edSTrond Myklebust nfs_direct_count_bytes(struct nfs_direct_req *dreq, 104031d73edSTrond Myklebust const struct nfs_pgio_header *hdr) 105031d73edSTrond Myklebust { 106031d73edSTrond Myklebust loff_t hdr_end = hdr->io_start + hdr->good_bytes; 107031d73edSTrond Myklebust ssize_t dreq_len = 0; 108031d73edSTrond Myklebust 109031d73edSTrond Myklebust if (hdr_end > dreq->io_start) 110031d73edSTrond Myklebust dreq_len = hdr_end - dreq->io_start; 111031d73edSTrond Myklebust 112031d73edSTrond Myklebust nfs_direct_handle_truncated(dreq, hdr, dreq_len); 113031d73edSTrond Myklebust 114031d73edSTrond Myklebust if (dreq_len > dreq->max_count) 115031d73edSTrond Myklebust dreq_len = dreq->max_count; 116031d73edSTrond Myklebust 117031d73edSTrond Myklebust if (dreq->count < dreq_len) 118031d73edSTrond Myklebust dreq->count = dreq_len; 1191ccbad9fSPeng Tao } 1200a00b77bSWeston Andros Adamson 1218982f7afSTrond Myklebust static void nfs_direct_truncate_request(struct nfs_direct_req *dreq, 1228982f7afSTrond Myklebust struct nfs_page *req) 1238982f7afSTrond Myklebust { 1248982f7afSTrond Myklebust loff_t offs = req_offset(req); 1258982f7afSTrond Myklebust size_t req_start = (size_t)(offs - dreq->io_start); 1268982f7afSTrond Myklebust 1278982f7afSTrond Myklebust if (req_start < dreq->max_count) 1288982f7afSTrond Myklebust dreq->max_count = req_start; 1298982f7afSTrond Myklebust if (req_start < dreq->count) 1308982f7afSTrond Myklebust dreq->count = req_start; 1318982f7afSTrond Myklebust } 1328982f7afSTrond Myklebust 1331da177e4SLinus Torvalds /** 134eb79f3afSNeilBrown * nfs_swap_rw - NFS address space operation for swap I/O 135b8a32e2bSChuck Lever * @iocb: target I/O control block 13690090ae6SAl Viro * @iter: I/O buffer 137b8a32e2bSChuck Lever * 138eb79f3afSNeilBrown * Perform IO to the swap-file. This is much like direct IO. 1391da177e4SLinus Torvalds */ 140eb79f3afSNeilBrown int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter) 141b8a32e2bSChuck Lever { 142eb79f3afSNeilBrown ssize_t ret; 143ee8a1a8bSPeng Tao 1446f673763SOmar Sandoval if (iov_iter_rw(iter) == READ) 145eb79f3afSNeilBrown ret = nfs_file_direct_read(iocb, iter, true); 146eb79f3afSNeilBrown else 147eb79f3afSNeilBrown ret = nfs_file_direct_write(iocb, iter, true); 148eb79f3afSNeilBrown if (ret < 0) 149eb79f3afSNeilBrown return ret; 150eb79f3afSNeilBrown return 0; 151b8a32e2bSChuck Lever } 152b8a32e2bSChuck Lever 153749e146eSChuck Lever static void nfs_direct_release_pages(struct page **pages, unsigned int npages) 1549c93ab7dSChuck Lever { 155749e146eSChuck Lever unsigned int i; 156607f31e8STrond Myklebust for (i = 0; i < npages; i++) 15709cbfeafSKirill A. Shutemov put_page(pages[i]); 1586b45d858STrond Myklebust } 1596b45d858STrond Myklebust 1601763da12SFred Isaman void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo, 1611763da12SFred Isaman struct nfs_direct_req *dreq) 1621763da12SFred Isaman { 163fe238e60SDave Wysochanski cinfo->inode = dreq->inode; 1641763da12SFred Isaman cinfo->mds = &dreq->mds_cinfo; 1651763da12SFred Isaman cinfo->ds = &dreq->ds_cinfo; 1661763da12SFred Isaman cinfo->dreq = dreq; 1671763da12SFred Isaman cinfo->completion_ops = &nfs_direct_commit_completion_ops; 1681763da12SFred Isaman } 1691763da12SFred Isaman 17093619e59SChuck Lever static inline struct nfs_direct_req *nfs_direct_req_alloc(void) 1711da177e4SLinus Torvalds { 1721da177e4SLinus Torvalds struct nfs_direct_req *dreq; 1731da177e4SLinus Torvalds 174292f3eeeSTrond Myklebust dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL); 1751da177e4SLinus Torvalds if (!dreq) 1761da177e4SLinus Torvalds return NULL; 1771da177e4SLinus Torvalds 1781da177e4SLinus Torvalds kref_init(&dreq->kref); 179607f31e8STrond Myklebust kref_get(&dreq->kref); 180d72b7a6bSTrond Myklebust init_completion(&dreq->completion); 1811763da12SFred Isaman INIT_LIST_HEAD(&dreq->mds_cinfo.list); 182c21e7168STrond Myklebust pnfs_init_ds_commit_info(&dreq->ds_cinfo); 1831763da12SFred Isaman INIT_WORK(&dreq->work, nfs_direct_write_schedule_work); 18415ce4a0cSChuck Lever spin_lock_init(&dreq->lock); 18593619e59SChuck Lever 18693619e59SChuck Lever return dreq; 18793619e59SChuck Lever } 18893619e59SChuck Lever 189b4946ffbSTrond Myklebust static void nfs_direct_req_free(struct kref *kref) 1901da177e4SLinus Torvalds { 1911da177e4SLinus Torvalds struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref); 192a8881f5aSTrond Myklebust 19318f41296STrond Myklebust pnfs_release_ds_info(&dreq->ds_cinfo, dreq->inode); 194f11ac8dbSTrond Myklebust if (dreq->l_ctx != NULL) 195f11ac8dbSTrond Myklebust nfs_put_lock_context(dreq->l_ctx); 196a8881f5aSTrond Myklebust if (dreq->ctx != NULL) 197a8881f5aSTrond Myklebust put_nfs_open_context(dreq->ctx); 1981da177e4SLinus Torvalds kmem_cache_free(nfs_direct_cachep, dreq); 1991da177e4SLinus Torvalds } 2001da177e4SLinus Torvalds 201b4946ffbSTrond Myklebust static void nfs_direct_req_release(struct nfs_direct_req *dreq) 202b4946ffbSTrond Myklebust { 203b4946ffbSTrond Myklebust kref_put(&dreq->kref, nfs_direct_req_free); 204b4946ffbSTrond Myklebust } 205b4946ffbSTrond Myklebust 20675aa038dSTrond Myklebust ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset) 2076296556fSPeng Tao { 20875aa038dSTrond Myklebust loff_t start = offset - dreq->io_start; 20975aa038dSTrond Myklebust return dreq->max_count - start; 2106296556fSPeng Tao } 2116296556fSPeng Tao EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left); 2126296556fSPeng Tao 213d4cc948bSChuck Lever /* 214bc0fb201SChuck Lever * Collects and returns the final error value/byte-count. 215bc0fb201SChuck Lever */ 216bc0fb201SChuck Lever static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq) 217bc0fb201SChuck Lever { 21815ce4a0cSChuck Lever ssize_t result = -EIOCBQUEUED; 219bc0fb201SChuck Lever 220bc0fb201SChuck Lever /* Async requests don't wait here */ 221bc0fb201SChuck Lever if (dreq->iocb) 222bc0fb201SChuck Lever goto out; 223bc0fb201SChuck Lever 224150030b7SMatthew Wilcox result = wait_for_completion_killable(&dreq->completion); 225bc0fb201SChuck Lever 226d2a7de0bSTrond Myklebust if (!result) { 227d2a7de0bSTrond Myklebust result = dreq->count; 228d2a7de0bSTrond Myklebust WARN_ON_ONCE(dreq->count < 0); 229d2a7de0bSTrond Myklebust } 230bc0fb201SChuck Lever if (!result) 23115ce4a0cSChuck Lever result = dreq->error; 232bc0fb201SChuck Lever 233bc0fb201SChuck Lever out: 234bc0fb201SChuck Lever return (ssize_t) result; 235bc0fb201SChuck Lever } 236bc0fb201SChuck Lever 237bc0fb201SChuck Lever /* 238607f31e8STrond Myklebust * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust 239607f31e8STrond Myklebust * the iocb is still valid here if this is a synchronous request. 24063ab46abSChuck Lever */ 241f7b5c340STrond Myklebust static void nfs_direct_complete(struct nfs_direct_req *dreq) 24263ab46abSChuck Lever { 2439811cd57SChristoph Hellwig struct inode *inode = dreq->inode; 2449811cd57SChristoph Hellwig 24565caafd0SOlga Kornievskaia inode_dio_end(inode); 24665caafd0SOlga Kornievskaia 2472a009ec9SChristoph Hellwig if (dreq->iocb) { 2482a009ec9SChristoph Hellwig long res = (long) dreq->error; 249d2a7de0bSTrond Myklebust if (dreq->count != 0) { 2502a009ec9SChristoph Hellwig res = (long) dreq->count; 251d2a7de0bSTrond Myklebust WARN_ON_ONCE(dreq->count < 0); 252d2a7de0bSTrond Myklebust } 2536b19b766SJens Axboe dreq->iocb->ki_complete(dreq->iocb, res); 254d72b7a6bSTrond Myklebust } 2552a009ec9SChristoph Hellwig 256024de8f1SDaniel Wagner complete(&dreq->completion); 25763ab46abSChuck Lever 258b4946ffbSTrond Myklebust nfs_direct_req_release(dreq); 25963ab46abSChuck Lever } 26063ab46abSChuck Lever 261584aa810SFred Isaman static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) 262fdd1e74cSTrond Myklebust { 263584aa810SFred Isaman unsigned long bytes = 0; 264584aa810SFred Isaman struct nfs_direct_req *dreq = hdr->dreq; 265fdd1e74cSTrond Myklebust 26615ce4a0cSChuck Lever spin_lock(&dreq->lock); 267eb2c50daSTrond Myklebust if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { 268eb2c50daSTrond Myklebust spin_unlock(&dreq->lock); 269eb2c50daSTrond Myklebust goto out_put; 270eb2c50daSTrond Myklebust } 271eb2c50daSTrond Myklebust 272031d73edSTrond Myklebust nfs_direct_count_bytes(dreq, hdr); 27315ce4a0cSChuck Lever spin_unlock(&dreq->lock); 2741da177e4SLinus Torvalds 275584aa810SFred Isaman while (!list_empty(&hdr->pages)) { 276584aa810SFred Isaman struct nfs_page *req = nfs_list_entry(hdr->pages.next); 277584aa810SFred Isaman struct page *page = req->wb_page; 278584aa810SFred Isaman 279ad3cba22SDave Kleikamp if (!PageCompound(page) && bytes < hdr->good_bytes && 280ad3cba22SDave Kleikamp (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY)) 2814bd8b010STrond Myklebust set_page_dirty(page); 282584aa810SFred Isaman bytes += req->wb_bytes; 283584aa810SFred Isaman nfs_list_remove_request(req); 284beeb5338SAnna Schumaker nfs_release_request(req); 285584aa810SFred Isaman } 286584aa810SFred Isaman out_put: 287607f31e8STrond Myklebust if (put_dreq(dreq)) 288f7b5c340STrond Myklebust nfs_direct_complete(dreq); 289584aa810SFred Isaman hdr->release(hdr); 2901da177e4SLinus Torvalds } 2911da177e4SLinus Torvalds 292df3accb8STrond Myklebust static void nfs_read_sync_pgio_error(struct list_head *head, int error) 293cd841605SFred Isaman { 294584aa810SFred Isaman struct nfs_page *req; 295cd841605SFred Isaman 296584aa810SFred Isaman while (!list_empty(head)) { 297584aa810SFred Isaman req = nfs_list_entry(head->next); 298584aa810SFred Isaman nfs_list_remove_request(req); 299584aa810SFred Isaman nfs_release_request(req); 300cd841605SFred Isaman } 301584aa810SFred Isaman } 302584aa810SFred Isaman 303584aa810SFred Isaman static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr) 304584aa810SFred Isaman { 305584aa810SFred Isaman get_dreq(hdr->dreq); 306584aa810SFred Isaman } 307584aa810SFred Isaman 308584aa810SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { 3093e9e0ca3STrond Myklebust .error_cleanup = nfs_read_sync_pgio_error, 310584aa810SFred Isaman .init_hdr = nfs_direct_pgio_init, 311584aa810SFred Isaman .completion = nfs_direct_read_completion, 312584aa810SFred Isaman }; 313cd841605SFred Isaman 314d4cc948bSChuck Lever /* 315607f31e8STrond Myklebust * For each rsize'd chunk of the user's buffer, dispatch an NFS READ 316607f31e8STrond Myklebust * operation. If nfs_readdata_alloc() or get_user_pages() fails, 317607f31e8STrond Myklebust * bail and stop sending more reads. Read length accounting is 318607f31e8STrond Myklebust * handled automatically by nfs_direct_read_result(). Otherwise, if 319607f31e8STrond Myklebust * no requests have been sent, just return an error. 3201da177e4SLinus Torvalds */ 32191f79c43SAl Viro 32291f79c43SAl Viro static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, 32391f79c43SAl Viro struct iov_iter *iter, 32491f79c43SAl Viro loff_t pos) 3251da177e4SLinus Torvalds { 32691f79c43SAl Viro struct nfs_pageio_descriptor desc; 32791f79c43SAl Viro struct inode *inode = dreq->inode; 32891f79c43SAl Viro ssize_t result = -EINVAL; 32991f79c43SAl Viro size_t requested_bytes = 0; 33091f79c43SAl Viro size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE); 33182b145c5SChuck Lever 33216b90578SLinus Torvalds nfs_pageio_init_read(&desc, dreq->inode, false, 33391f79c43SAl Viro &nfs_direct_read_completion_ops); 33491f79c43SAl Viro get_dreq(dreq); 33591f79c43SAl Viro desc.pg_dreq = dreq; 336fe0f07d0SJens Axboe inode_dio_begin(inode); 33791f79c43SAl Viro 33891f79c43SAl Viro while (iov_iter_count(iter)) { 33991f79c43SAl Viro struct page **pagevec; 3405dd602f2SChuck Lever size_t bytes; 34191f79c43SAl Viro size_t pgbase; 34291f79c43SAl Viro unsigned npages, i; 3431da177e4SLinus Torvalds 3441ef255e2SAl Viro result = iov_iter_get_pages_alloc2(iter, &pagevec, 34591f79c43SAl Viro rsize, &pgbase); 346584aa810SFred Isaman if (result < 0) 347749e146eSChuck Lever break; 348a564b8f0SMel Gorman 34991f79c43SAl Viro bytes = result; 35091f79c43SAl Viro npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; 351584aa810SFred Isaman for (i = 0; i < npages; i++) { 352584aa810SFred Isaman struct nfs_page *req; 353bf5fc402STrond Myklebust unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 354584aa810SFred Isaman /* XXX do we need to do the eof zeroing found in async_filler? */ 35570e9db69STrond Myklebust req = nfs_page_create_from_page(dreq->ctx, pagevec[i], 35670e9db69STrond Myklebust pgbase, pos, req_len); 357584aa810SFred Isaman if (IS_ERR(req)) { 358584aa810SFred Isaman result = PTR_ERR(req); 359dbae4c73STrond Myklebust break; 360584aa810SFred Isaman } 36191f79c43SAl Viro if (!nfs_pageio_add_request(&desc, req)) { 36291f79c43SAl Viro result = desc.pg_error; 363584aa810SFred Isaman nfs_release_request(req); 364584aa810SFred Isaman break; 365584aa810SFred Isaman } 366584aa810SFred Isaman pgbase = 0; 367584aa810SFred Isaman bytes -= req_len; 36891f79c43SAl Viro requested_bytes += req_len; 369584aa810SFred Isaman pos += req_len; 37035754bc0SPeng Tao dreq->bytes_left -= req_len; 371584aa810SFred Isaman } 3726d74743bSTrond Myklebust nfs_direct_release_pages(pagevec, npages); 37391f79c43SAl Viro kvfree(pagevec); 37419f73787SChuck Lever if (result < 0) 37519f73787SChuck Lever break; 37619f73787SChuck Lever } 37719f73787SChuck Lever 378584aa810SFred Isaman nfs_pageio_complete(&desc); 379584aa810SFred Isaman 380839f7ad6SChuck Lever /* 381839f7ad6SChuck Lever * If no bytes were started, return the error, and let the 382839f7ad6SChuck Lever * generic layer handle the completion. 383839f7ad6SChuck Lever */ 384839f7ad6SChuck Lever if (requested_bytes == 0) { 385d03727b2SOlga Kornievskaia inode_dio_end(inode); 38665caafd0SOlga Kornievskaia nfs_direct_req_release(dreq); 387839f7ad6SChuck Lever return result < 0 ? result : -EIO; 388839f7ad6SChuck Lever } 389839f7ad6SChuck Lever 39019f73787SChuck Lever if (put_dreq(dreq)) 391f7b5c340STrond Myklebust nfs_direct_complete(dreq); 39285128b2bSAl Viro return requested_bytes; 39319f73787SChuck Lever } 39419f73787SChuck Lever 39514a3ec79SChristoph Hellwig /** 39614a3ec79SChristoph Hellwig * nfs_file_direct_read - file direct read operation for NFS files 39714a3ec79SChristoph Hellwig * @iocb: target I/O control block 398619d30b4SAl Viro * @iter: vector of user buffers into which to read data 39964158668SNeilBrown * @swap: flag indicating this is swap IO, not O_DIRECT IO 40014a3ec79SChristoph Hellwig * 40114a3ec79SChristoph Hellwig * We use this function for direct reads instead of calling 40214a3ec79SChristoph Hellwig * generic_file_aio_read() in order to avoid gfar's check to see if 40314a3ec79SChristoph Hellwig * the request starts before the end of the file. For that check 40414a3ec79SChristoph Hellwig * to work, we must generate a GETATTR before each direct read, and 40514a3ec79SChristoph Hellwig * even then there is a window between the GETATTR and the subsequent 40614a3ec79SChristoph Hellwig * READ where the file size could change. Our preference is simply 40714a3ec79SChristoph Hellwig * to do all reads the application wants, and the server will take 40814a3ec79SChristoph Hellwig * care of managing the end of file boundary. 40914a3ec79SChristoph Hellwig * 41014a3ec79SChristoph Hellwig * This function also eliminates unnecessarily updating the file's 41114a3ec79SChristoph Hellwig * atime locally, as the NFS server sets the file's atime, and this 41214a3ec79SChristoph Hellwig * client must read the updated atime from the server back into its 41314a3ec79SChristoph Hellwig * cache. 41414a3ec79SChristoph Hellwig */ 41564158668SNeilBrown ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, 41664158668SNeilBrown bool swap) 4171da177e4SLinus Torvalds { 41814a3ec79SChristoph Hellwig struct file *file = iocb->ki_filp; 41914a3ec79SChristoph Hellwig struct address_space *mapping = file->f_mapping; 42014a3ec79SChristoph Hellwig struct inode *inode = mapping->host; 4211da177e4SLinus Torvalds struct nfs_direct_req *dreq; 422b3c54de6STrond Myklebust struct nfs_lock_context *l_ctx; 42386b93667SColin Ian King ssize_t result, requested; 424a6cbcd4aSAl Viro size_t count = iov_iter_count(iter); 42514a3ec79SChristoph Hellwig nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); 42614a3ec79SChristoph Hellwig 42714a3ec79SChristoph Hellwig dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", 428c8b8e32dSChristoph Hellwig file, count, (long long) iocb->ki_pos); 42914a3ec79SChristoph Hellwig 43014a3ec79SChristoph Hellwig result = 0; 43114a3ec79SChristoph Hellwig if (!count) 43214a3ec79SChristoph Hellwig goto out; 43314a3ec79SChristoph Hellwig 43414a3ec79SChristoph Hellwig task_io_account_read(count); 43514a3ec79SChristoph Hellwig 43614a3ec79SChristoph Hellwig result = -ENOMEM; 437607f31e8STrond Myklebust dreq = nfs_direct_req_alloc(); 438f11ac8dbSTrond Myklebust if (dreq == NULL) 439a5864c99STrond Myklebust goto out; 4401da177e4SLinus Torvalds 44191d5b470SChuck Lever dreq->inode = inode; 442ed3743a6SWeston Andros Adamson dreq->bytes_left = dreq->max_count = count; 443c8b8e32dSChristoph Hellwig dreq->io_start = iocb->ki_pos; 444cd3758e3STrond Myklebust dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 445b3c54de6STrond Myklebust l_ctx = nfs_get_lock_context(dreq->ctx); 446b3c54de6STrond Myklebust if (IS_ERR(l_ctx)) { 447b3c54de6STrond Myklebust result = PTR_ERR(l_ctx); 4488605cf0eSMisono Tomohiro nfs_direct_req_release(dreq); 449f11ac8dbSTrond Myklebust goto out_release; 450b3c54de6STrond Myklebust } 451b3c54de6STrond Myklebust dreq->l_ctx = l_ctx; 452487b8372SChuck Lever if (!is_sync_kiocb(iocb)) 453487b8372SChuck Lever dreq->iocb = iocb; 4541da177e4SLinus Torvalds 455fcb14cb1SAl Viro if (user_backed_iter(iter)) 456ad3cba22SDave Kleikamp dreq->flags = NFS_ODIRECT_SHOULD_DIRTY; 457ad3cba22SDave Kleikamp 45864158668SNeilBrown if (!swap) 459a5864c99STrond Myklebust nfs_start_io_direct(inode); 460a5864c99STrond Myklebust 461619d30b4SAl Viro NFS_I(inode)->read_io += count; 46285128b2bSAl Viro requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos); 463d0b9875dSChristoph Hellwig 46464158668SNeilBrown if (!swap) 465a5864c99STrond Myklebust nfs_end_io_direct(inode); 466d0b9875dSChristoph Hellwig 46785128b2bSAl Viro if (requested > 0) { 468bc0fb201SChuck Lever result = nfs_direct_wait(dreq); 46985128b2bSAl Viro if (result > 0) { 47085128b2bSAl Viro requested -= result; 471c8b8e32dSChristoph Hellwig iocb->ki_pos += result; 47214a3ec79SChristoph Hellwig } 47385128b2bSAl Viro iov_iter_revert(iter, requested); 47485128b2bSAl Viro } else { 47585128b2bSAl Viro result = requested; 47685128b2bSAl Viro } 477d0b9875dSChristoph Hellwig 478f11ac8dbSTrond Myklebust out_release: 479b4946ffbSTrond Myklebust nfs_direct_req_release(dreq); 480f11ac8dbSTrond Myklebust out: 4811da177e4SLinus Torvalds return result; 4821da177e4SLinus Torvalds } 4831da177e4SLinus Torvalds 48488975a55STrond Myklebust static void nfs_direct_add_page_head(struct list_head *list, 48588975a55STrond Myklebust struct nfs_page *req) 48688975a55STrond Myklebust { 48788975a55STrond Myklebust struct nfs_page *head = req->wb_head; 48888975a55STrond Myklebust 48988975a55STrond Myklebust if (!list_empty(&head->wb_list) || !nfs_lock_request(head)) 49088975a55STrond Myklebust return; 49188975a55STrond Myklebust if (!list_empty(&head->wb_list)) { 49288975a55STrond Myklebust nfs_unlock_request(head); 49388975a55STrond Myklebust return; 49488975a55STrond Myklebust } 49588975a55STrond Myklebust list_add(&head->wb_list, list); 49688975a55STrond Myklebust kref_get(&head->wb_kref); 49788975a55STrond Myklebust kref_get(&head->wb_kref); 49888975a55STrond Myklebust } 49988975a55STrond Myklebust 500b193a78dSTrond Myklebust static void nfs_direct_join_group(struct list_head *list, 501b193a78dSTrond Myklebust struct nfs_commit_info *cinfo, 502b193a78dSTrond Myklebust struct inode *inode) 503ed5d588fSTrond Myklebust { 504be2fd156STrond Myklebust struct nfs_page *req, *subreq; 505ed5d588fSTrond Myklebust 506ed5d588fSTrond Myklebust list_for_each_entry(req, list, wb_list) { 50788975a55STrond Myklebust if (req->wb_head != req) { 50888975a55STrond Myklebust nfs_direct_add_page_head(&req->wb_list, req); 509ed5d588fSTrond Myklebust continue; 51088975a55STrond Myklebust } 511be2fd156STrond Myklebust subreq = req->wb_this_page; 512be2fd156STrond Myklebust if (subreq == req) 513be2fd156STrond Myklebust continue; 514be2fd156STrond Myklebust do { 515be2fd156STrond Myklebust /* 516be2fd156STrond Myklebust * Remove subrequests from this list before freeing 517be2fd156STrond Myklebust * them in the call to nfs_join_page_group(). 518be2fd156STrond Myklebust */ 519be2fd156STrond Myklebust if (!list_empty(&subreq->wb_list)) { 520be2fd156STrond Myklebust nfs_list_remove_request(subreq); 521be2fd156STrond Myklebust nfs_release_request(subreq); 522ed5d588fSTrond Myklebust } 523be2fd156STrond Myklebust } while ((subreq = subreq->wb_this_page) != req); 524b193a78dSTrond Myklebust nfs_join_page_group(req, cinfo, inode); 525ed5d588fSTrond Myklebust } 526ed5d588fSTrond Myklebust } 527ed5d588fSTrond Myklebust 528ed5d588fSTrond Myklebust static void 529085d1e33STom Haynes nfs_direct_write_scan_commit_list(struct inode *inode, 530085d1e33STom Haynes struct list_head *list, 531085d1e33STom Haynes struct nfs_commit_info *cinfo) 532085d1e33STom Haynes { 533e824f99aSTrond Myklebust mutex_lock(&NFS_I(cinfo->inode)->commit_mutex); 5349c455a8cSTrond Myklebust pnfs_recover_commit_reqs(list, cinfo); 535085d1e33STom Haynes nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0); 536e824f99aSTrond Myklebust mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex); 537085d1e33STom Haynes } 538085d1e33STom Haynes 539fad61490STrond Myklebust static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) 5401da177e4SLinus Torvalds { 5411763da12SFred Isaman struct nfs_pageio_descriptor desc; 542954998b6STrond Myklebust struct nfs_page *req; 5431763da12SFred Isaman LIST_HEAD(reqs); 5441763da12SFred Isaman struct nfs_commit_info cinfo; 5451763da12SFred Isaman 5461763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq); 547085d1e33STom Haynes nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); 5481da177e4SLinus Torvalds 549b193a78dSTrond Myklebust nfs_direct_join_group(&reqs, &cinfo, dreq->inode); 550ed5d588fSTrond Myklebust 551a5314a74STrond Myklebust nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo); 552607f31e8STrond Myklebust get_dreq(dreq); 5531da177e4SLinus Torvalds 554a20c93e3SChristoph Hellwig nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false, 5551763da12SFred Isaman &nfs_direct_write_completion_ops); 5561763da12SFred Isaman desc.pg_dreq = dreq; 557607f31e8STrond Myklebust 558954998b6STrond Myklebust while (!list_empty(&reqs)) { 559954998b6STrond Myklebust req = nfs_list_entry(reqs.next); 56033344e0fSTrond Myklebust /* Bump the transmission count */ 56133344e0fSTrond Myklebust req->wb_nio++; 5621763da12SFred Isaman if (!nfs_pageio_add_request(&desc, req)) { 5637c633932STrond Myklebust spin_lock(&dreq->lock); 564954998b6STrond Myklebust if (dreq->error < 0) { 565954998b6STrond Myklebust desc.pg_error = dreq->error; 566954998b6STrond Myklebust } else if (desc.pg_error != -EAGAIN) { 5671763da12SFred Isaman dreq->flags = 0; 568954998b6STrond Myklebust if (!desc.pg_error) 569954998b6STrond Myklebust desc.pg_error = -EIO; 570d600ad1fSPeng Tao dreq->error = desc.pg_error; 571954998b6STrond Myklebust } else 572954998b6STrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 5737c633932STrond Myklebust spin_unlock(&dreq->lock); 574954998b6STrond Myklebust break; 5751763da12SFred Isaman } 5765a695da2STrond Myklebust nfs_release_request(req); 5771763da12SFred Isaman } 5781763da12SFred Isaman nfs_pageio_complete(&desc); 579607f31e8STrond Myklebust 580954998b6STrond Myklebust while (!list_empty(&reqs)) { 581954998b6STrond Myklebust req = nfs_list_entry(reqs.next); 5824035c248STrond Myklebust nfs_list_remove_request(req); 5831d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req); 5848982f7afSTrond Myklebust if (desc.pg_error == -EAGAIN) { 585954998b6STrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0); 5868982f7afSTrond Myklebust } else { 5878982f7afSTrond Myklebust spin_lock(&dreq->lock); 5888982f7afSTrond Myklebust nfs_direct_truncate_request(dreq, req); 5898982f7afSTrond Myklebust spin_unlock(&dreq->lock); 590954998b6STrond Myklebust nfs_release_request(req); 5914035c248STrond Myklebust } 5928982f7afSTrond Myklebust } 593607f31e8STrond Myklebust 594607f31e8STrond Myklebust if (put_dreq(dreq)) 5954d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq); 596fad61490STrond Myklebust } 5971da177e4SLinus Torvalds 5981763da12SFred Isaman static void nfs_direct_commit_complete(struct nfs_commit_data *data) 599fad61490STrond Myklebust { 6001f28476dSTrond Myklebust const struct nfs_writeverf *verf = data->res.verf; 6010b7c0153SFred Isaman struct nfs_direct_req *dreq = data->dreq; 6021763da12SFred Isaman struct nfs_commit_info cinfo; 6031763da12SFred Isaman struct nfs_page *req; 604c9d8f89dSTrond Myklebust int status = data->task.tk_status; 605c9d8f89dSTrond Myklebust 6068efc4bbeSJeff Layton trace_nfs_direct_commit_complete(dreq); 6078efc4bbeSJeff Layton 608fb5f7f20STrond Myklebust if (status < 0) { 609fb5f7f20STrond Myklebust /* Errors in commit are fatal */ 610fb5f7f20STrond Myklebust dreq->error = status; 611fb5f7f20STrond Myklebust dreq->flags = NFS_ODIRECT_DONE; 61255051c0cSJeff Layton } else { 613fb5f7f20STrond Myklebust status = dreq->error; 61455051c0cSJeff Layton } 615fb5f7f20STrond Myklebust 6161763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq); 617fad61490STrond Myklebust 6181763da12SFred Isaman while (!list_empty(&data->pages)) { 6191763da12SFred Isaman req = nfs_list_entry(data->pages.next); 6201763da12SFred Isaman nfs_list_remove_request(req); 6218982f7afSTrond Myklebust if (status < 0) { 6228982f7afSTrond Myklebust spin_lock(&dreq->lock); 6238982f7afSTrond Myklebust nfs_direct_truncate_request(dreq, req); 6248982f7afSTrond Myklebust spin_unlock(&dreq->lock); 6258982f7afSTrond Myklebust nfs_release_request(req); 6268982f7afSTrond Myklebust } else if (!nfs_write_match_verf(verf, req)) { 6271f28476dSTrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 62833344e0fSTrond Myklebust /* 62933344e0fSTrond Myklebust * Despite the reboot, the write was successful, 63033344e0fSTrond Myklebust * so reset wb_nio. 63133344e0fSTrond Myklebust */ 63233344e0fSTrond Myklebust req->wb_nio = 0; 633b57ff130SWeston Andros Adamson nfs_mark_request_commit(req, NULL, &cinfo, 0); 6348982f7afSTrond Myklebust } else 635906369e4SFred Isaman nfs_release_request(req); 6361d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req); 637fad61490STrond Myklebust } 638fad61490STrond Myklebust 639133a48abSTrond Myklebust if (nfs_commit_end(cinfo.mds)) 6404d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq); 6411763da12SFred Isaman } 6421763da12SFred Isaman 643b20135d0STrond Myklebust static void nfs_direct_resched_write(struct nfs_commit_info *cinfo, 644b20135d0STrond Myklebust struct nfs_page *req) 6451763da12SFred Isaman { 646b20135d0STrond Myklebust struct nfs_direct_req *dreq = cinfo->dreq; 647b20135d0STrond Myklebust 6488efc4bbeSJeff Layton trace_nfs_direct_resched_write(dreq); 6498efc4bbeSJeff Layton 650b20135d0STrond Myklebust spin_lock(&dreq->lock); 651fb5f7f20STrond Myklebust if (dreq->flags != NFS_ODIRECT_DONE) 652b20135d0STrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 653b20135d0STrond Myklebust spin_unlock(&dreq->lock); 654b20135d0STrond Myklebust nfs_mark_request_commit(req, NULL, cinfo, 0); 6551763da12SFred Isaman } 6561763da12SFred Isaman 6571763da12SFred Isaman static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = { 6581763da12SFred Isaman .completion = nfs_direct_commit_complete, 659b20135d0STrond Myklebust .resched_write = nfs_direct_resched_write, 660fad61490STrond Myklebust }; 661fad61490STrond Myklebust 662fad61490STrond Myklebust static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq) 663fad61490STrond Myklebust { 6641763da12SFred Isaman int res; 6651763da12SFred Isaman struct nfs_commit_info cinfo; 6661763da12SFred Isaman LIST_HEAD(mds_list); 667fad61490STrond Myklebust 6681763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq); 669*e25447c3SJosef Bacik nfs_commit_begin(cinfo.mds); 6701763da12SFred Isaman nfs_scan_commit(dreq->inode, &mds_list, &cinfo); 6711763da12SFred Isaman res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo); 672*e25447c3SJosef Bacik if (res < 0) { /* res == -ENOMEM */ 673*e25447c3SJosef Bacik spin_lock(&dreq->lock); 674*e25447c3SJosef Bacik if (dreq->flags == 0) 675*e25447c3SJosef Bacik dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 676*e25447c3SJosef Bacik spin_unlock(&dreq->lock); 677*e25447c3SJosef Bacik } 678*e25447c3SJosef Bacik if (nfs_commit_end(cinfo.mds)) 679*e25447c3SJosef Bacik nfs_direct_write_complete(dreq); 6801da177e4SLinus Torvalds } 6811da177e4SLinus Torvalds 682fb5f7f20STrond Myklebust static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq) 683fb5f7f20STrond Myklebust { 684fb5f7f20STrond Myklebust struct nfs_commit_info cinfo; 685fb5f7f20STrond Myklebust struct nfs_page *req; 686fb5f7f20STrond Myklebust LIST_HEAD(reqs); 687fb5f7f20STrond Myklebust 688fb5f7f20STrond Myklebust nfs_init_cinfo_from_dreq(&cinfo, dreq); 689fb5f7f20STrond Myklebust nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo); 690fb5f7f20STrond Myklebust 691fb5f7f20STrond Myklebust while (!list_empty(&reqs)) { 692fb5f7f20STrond Myklebust req = nfs_list_entry(reqs.next); 693fb5f7f20STrond Myklebust nfs_list_remove_request(req); 6948982f7afSTrond Myklebust nfs_direct_truncate_request(dreq, req); 695f02cec9dSTrond Myklebust nfs_release_request(req); 696fb5f7f20STrond Myklebust nfs_unlock_and_release_request(req); 697fb5f7f20STrond Myklebust } 698fb5f7f20STrond Myklebust } 699fb5f7f20STrond Myklebust 7001763da12SFred Isaman static void nfs_direct_write_schedule_work(struct work_struct *work) 7011da177e4SLinus Torvalds { 7021763da12SFred Isaman struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work); 703fad61490STrond Myklebust int flags = dreq->flags; 7041da177e4SLinus Torvalds 705fad61490STrond Myklebust dreq->flags = 0; 706fad61490STrond Myklebust switch (flags) { 707fad61490STrond Myklebust case NFS_ODIRECT_DO_COMMIT: 708fad61490STrond Myklebust nfs_direct_commit_schedule(dreq); 7091da177e4SLinus Torvalds break; 710fad61490STrond Myklebust case NFS_ODIRECT_RESCHED_WRITES: 711fad61490STrond Myklebust nfs_direct_write_reschedule(dreq); 7121da177e4SLinus Torvalds break; 7131da177e4SLinus Torvalds default: 714fb5f7f20STrond Myklebust nfs_direct_write_clear_reqs(dreq); 715f7b5c340STrond Myklebust nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping); 716f7b5c340STrond Myklebust nfs_direct_complete(dreq); 7171da177e4SLinus Torvalds } 718fad61490STrond Myklebust } 719fad61490STrond Myklebust 7204d3b55d3SAnna Schumaker static void nfs_direct_write_complete(struct nfs_direct_req *dreq) 721fad61490STrond Myklebust { 7228efc4bbeSJeff Layton trace_nfs_direct_write_complete(dreq); 72346483c2eSNeilBrown queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */ 724fad61490STrond Myklebust } 7251763da12SFred Isaman 7261763da12SFred Isaman static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) 7271763da12SFred Isaman { 7281763da12SFred Isaman struct nfs_direct_req *dreq = hdr->dreq; 7291763da12SFred Isaman struct nfs_commit_info cinfo; 7301763da12SFred Isaman struct nfs_page *req = nfs_list_entry(hdr->pages.next); 7313731d44bSTrond Myklebust int flags = NFS_ODIRECT_DONE; 7321763da12SFred Isaman 7338efc4bbeSJeff Layton trace_nfs_direct_write_completion(dreq); 7348efc4bbeSJeff Layton 7351763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq); 7361763da12SFred Isaman 7371763da12SFred Isaman spin_lock(&dreq->lock); 738eb2c50daSTrond Myklebust if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) { 739eb2c50daSTrond Myklebust spin_unlock(&dreq->lock); 740eb2c50daSTrond Myklebust goto out_put; 741eb2c50daSTrond Myklebust } 742eb2c50daSTrond Myklebust 743031d73edSTrond Myklebust nfs_direct_count_bytes(dreq, hdr); 7448982f7afSTrond Myklebust if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) && 7458982f7afSTrond Myklebust !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { 7463731d44bSTrond Myklebust if (!dreq->flags) 7471763da12SFred Isaman dreq->flags = NFS_ODIRECT_DO_COMMIT; 7483731d44bSTrond Myklebust flags = dreq->flags; 7491763da12SFred Isaman } 7501763da12SFred Isaman spin_unlock(&dreq->lock); 7511763da12SFred Isaman 7521763da12SFred Isaman while (!list_empty(&hdr->pages)) { 7532bfc6e56SWeston Andros Adamson 7541763da12SFred Isaman req = nfs_list_entry(hdr->pages.next); 7551763da12SFred Isaman nfs_list_remove_request(req); 7563731d44bSTrond Myklebust if (flags == NFS_ODIRECT_DO_COMMIT) { 75704277086STrond Myklebust kref_get(&req->wb_kref); 758ba838a75SChuck Lever memcpy(&req->wb_verf, &hdr->verf.verifier, 759ba838a75SChuck Lever sizeof(req->wb_verf)); 760b57ff130SWeston Andros Adamson nfs_mark_request_commit(req, hdr->lseg, &cinfo, 761b57ff130SWeston Andros Adamson hdr->ds_commit_idx); 7623731d44bSTrond Myklebust } else if (flags == NFS_ODIRECT_RESCHED_WRITES) { 7633731d44bSTrond Myklebust kref_get(&req->wb_kref); 7643731d44bSTrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0); 7651763da12SFred Isaman } 7661d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req); 7671763da12SFred Isaman } 7681763da12SFred Isaman 7691763da12SFred Isaman out_put: 7701763da12SFred Isaman if (put_dreq(dreq)) 7714d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq); 7721763da12SFred Isaman hdr->release(hdr); 7731763da12SFred Isaman } 7741763da12SFred Isaman 775df3accb8STrond Myklebust static void nfs_write_sync_pgio_error(struct list_head *head, int error) 7763e9e0ca3STrond Myklebust { 7773e9e0ca3STrond Myklebust struct nfs_page *req; 7783e9e0ca3STrond Myklebust 7793e9e0ca3STrond Myklebust while (!list_empty(head)) { 7803e9e0ca3STrond Myklebust req = nfs_list_entry(head->next); 7813e9e0ca3STrond Myklebust nfs_list_remove_request(req); 7821d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req); 7833e9e0ca3STrond Myklebust } 7843e9e0ca3STrond Myklebust } 7853e9e0ca3STrond Myklebust 786dc602dd7STrond Myklebust static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr) 787dc602dd7STrond Myklebust { 788dc602dd7STrond Myklebust struct nfs_direct_req *dreq = hdr->dreq; 789b11243f7STrond Myklebust struct nfs_page *req; 790b11243f7STrond Myklebust struct nfs_commit_info cinfo; 791dc602dd7STrond Myklebust 7928efc4bbeSJeff Layton trace_nfs_direct_write_reschedule_io(dreq); 7938efc4bbeSJeff Layton 794b11243f7STrond Myklebust nfs_init_cinfo_from_dreq(&cinfo, dreq); 795dc602dd7STrond Myklebust spin_lock(&dreq->lock); 796b11243f7STrond Myklebust if (dreq->error == 0) 797dc602dd7STrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 798b11243f7STrond Myklebust set_bit(NFS_IOHDR_REDO, &hdr->flags); 799dc602dd7STrond Myklebust spin_unlock(&dreq->lock); 800b11243f7STrond Myklebust while (!list_empty(&hdr->pages)) { 801b11243f7STrond Myklebust req = nfs_list_entry(hdr->pages.next); 802b11243f7STrond Myklebust nfs_list_remove_request(req); 803b11243f7STrond Myklebust nfs_unlock_request(req); 804b11243f7STrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0); 805b11243f7STrond Myklebust } 806dc602dd7STrond Myklebust } 807dc602dd7STrond Myklebust 8081763da12SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { 8093e9e0ca3STrond Myklebust .error_cleanup = nfs_write_sync_pgio_error, 8101763da12SFred Isaman .init_hdr = nfs_direct_pgio_init, 8111763da12SFred Isaman .completion = nfs_direct_write_completion, 812dc602dd7STrond Myklebust .reschedule_io = nfs_direct_write_reschedule_io, 8131763da12SFred Isaman }; 8141763da12SFred Isaman 81591f79c43SAl Viro 81691f79c43SAl Viro /* 81791f79c43SAl Viro * NB: Return the value of the first error return code. Subsequent 81891f79c43SAl Viro * errors after the first one are ignored. 81991f79c43SAl Viro */ 82091f79c43SAl Viro /* 82191f79c43SAl Viro * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE 82291f79c43SAl Viro * operation. If nfs_writedata_alloc() or get_user_pages() fails, 82391f79c43SAl Viro * bail and stop sending more writes. Write length accounting is 82491f79c43SAl Viro * handled automatically by nfs_direct_write_result(). Otherwise, if 82591f79c43SAl Viro * no requests have been sent, just return an error. 82691f79c43SAl Viro */ 82719f73787SChuck Lever static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, 828619d30b4SAl Viro struct iov_iter *iter, 829c265de25SNeilBrown loff_t pos, int ioflags) 83019f73787SChuck Lever { 8311763da12SFred Isaman struct nfs_pageio_descriptor desc; 8321d59d61fSTrond Myklebust struct inode *inode = dreq->inode; 833954998b6STrond Myklebust struct nfs_commit_info cinfo; 83419f73787SChuck Lever ssize_t result = 0; 83519f73787SChuck Lever size_t requested_bytes = 0; 83691f79c43SAl Viro size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); 837954998b6STrond Myklebust bool defer = false; 83819f73787SChuck Lever 8398efc4bbeSJeff Layton trace_nfs_direct_write_schedule_iovec(dreq); 8408efc4bbeSJeff Layton 841c265de25SNeilBrown nfs_pageio_init_write(&desc, inode, ioflags, false, 8421763da12SFred Isaman &nfs_direct_write_completion_ops); 8431763da12SFred Isaman desc.pg_dreq = dreq; 84419f73787SChuck Lever get_dreq(dreq); 845fe0f07d0SJens Axboe inode_dio_begin(inode); 84619f73787SChuck Lever 84791f79c43SAl Viro NFS_I(inode)->write_io += iov_iter_count(iter); 84891f79c43SAl Viro while (iov_iter_count(iter)) { 84991f79c43SAl Viro struct page **pagevec; 85091f79c43SAl Viro size_t bytes; 85191f79c43SAl Viro size_t pgbase; 85291f79c43SAl Viro unsigned npages, i; 85391f79c43SAl Viro 8541ef255e2SAl Viro result = iov_iter_get_pages_alloc2(iter, &pagevec, 85591f79c43SAl Viro wsize, &pgbase); 85619f73787SChuck Lever if (result < 0) 85719f73787SChuck Lever break; 85891f79c43SAl Viro 85991f79c43SAl Viro bytes = result; 86091f79c43SAl Viro npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; 86191f79c43SAl Viro for (i = 0; i < npages; i++) { 86291f79c43SAl Viro struct nfs_page *req; 86391f79c43SAl Viro unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); 86491f79c43SAl Viro 86570e9db69STrond Myklebust req = nfs_page_create_from_page(dreq->ctx, pagevec[i], 86670e9db69STrond Myklebust pgbase, pos, req_len); 86791f79c43SAl Viro if (IS_ERR(req)) { 86891f79c43SAl Viro result = PTR_ERR(req); 86919f73787SChuck Lever break; 87091f79c43SAl Viro } 8710a00b77bSWeston Andros Adamson 872d600ad1fSPeng Tao if (desc.pg_error < 0) { 873d600ad1fSPeng Tao nfs_free_request(req); 874d600ad1fSPeng Tao result = desc.pg_error; 875d600ad1fSPeng Tao break; 876d600ad1fSPeng Tao } 8770a00b77bSWeston Andros Adamson 87891f79c43SAl Viro pgbase = 0; 87991f79c43SAl Viro bytes -= req_len; 88091f79c43SAl Viro requested_bytes += req_len; 88191f79c43SAl Viro pos += req_len; 88291f79c43SAl Viro dreq->bytes_left -= req_len; 883954998b6STrond Myklebust 884954998b6STrond Myklebust if (defer) { 885954998b6STrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0); 886954998b6STrond Myklebust continue; 887954998b6STrond Myklebust } 888954998b6STrond Myklebust 889954998b6STrond Myklebust nfs_lock_request(req); 890954998b6STrond Myklebust if (nfs_pageio_add_request(&desc, req)) 891954998b6STrond Myklebust continue; 892954998b6STrond Myklebust 893954998b6STrond Myklebust /* Exit on hard errors */ 894954998b6STrond Myklebust if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) { 895954998b6STrond Myklebust result = desc.pg_error; 896954998b6STrond Myklebust nfs_unlock_and_release_request(req); 897954998b6STrond Myklebust break; 898954998b6STrond Myklebust } 899954998b6STrond Myklebust 900954998b6STrond Myklebust /* If the error is soft, defer remaining requests */ 901954998b6STrond Myklebust nfs_init_cinfo_from_dreq(&cinfo, dreq); 9027c633932STrond Myklebust spin_lock(&dreq->lock); 903954998b6STrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 9047c633932STrond Myklebust spin_unlock(&dreq->lock); 905954998b6STrond Myklebust nfs_unlock_request(req); 906954998b6STrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0); 907954998b6STrond Myklebust desc.pg_error = 0; 908954998b6STrond Myklebust defer = true; 90991f79c43SAl Viro } 91091f79c43SAl Viro nfs_direct_release_pages(pagevec, npages); 91191f79c43SAl Viro kvfree(pagevec); 91291f79c43SAl Viro if (result < 0) 91391f79c43SAl Viro break; 91419f73787SChuck Lever } 9151763da12SFred Isaman nfs_pageio_complete(&desc); 91619f73787SChuck Lever 917839f7ad6SChuck Lever /* 918839f7ad6SChuck Lever * If no bytes were started, return the error, and let the 919839f7ad6SChuck Lever * generic layer handle the completion. 920839f7ad6SChuck Lever */ 921839f7ad6SChuck Lever if (requested_bytes == 0) { 922d03727b2SOlga Kornievskaia inode_dio_end(inode); 92365caafd0SOlga Kornievskaia nfs_direct_req_release(dreq); 924839f7ad6SChuck Lever return result < 0 ? result : -EIO; 925839f7ad6SChuck Lever } 926839f7ad6SChuck Lever 92719f73787SChuck Lever if (put_dreq(dreq)) 9284d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq); 92985128b2bSAl Viro return requested_bytes; 93019f73787SChuck Lever } 93119f73787SChuck Lever 9321da177e4SLinus Torvalds /** 9331da177e4SLinus Torvalds * nfs_file_direct_write - file direct write operation for NFS files 9341da177e4SLinus Torvalds * @iocb: target I/O control block 935619d30b4SAl Viro * @iter: vector of user buffers from which to write data 93664158668SNeilBrown * @swap: flag indicating this is swap IO, not O_DIRECT IO 9371da177e4SLinus Torvalds * 9381da177e4SLinus Torvalds * We use this function for direct writes instead of calling 9391da177e4SLinus Torvalds * generic_file_aio_write() in order to avoid taking the inode 9401da177e4SLinus Torvalds * semaphore and updating the i_size. The NFS server will set 9411da177e4SLinus Torvalds * the new i_size and this client must read the updated size 9421da177e4SLinus Torvalds * back into its cache. We let the server do generic write 9431da177e4SLinus Torvalds * parameter checking and report problems. 9441da177e4SLinus Torvalds * 9451da177e4SLinus Torvalds * We eliminate local atime updates, see direct read above. 9461da177e4SLinus Torvalds * 9471da177e4SLinus Torvalds * We avoid unnecessary page cache invalidations for normal cached 9481da177e4SLinus Torvalds * readers of this file. 9491da177e4SLinus Torvalds * 9501da177e4SLinus Torvalds * Note that O_APPEND is not supported for NFS direct writes, as there 9511da177e4SLinus Torvalds * is no atomic O_APPEND write facility in the NFS protocol. 9521da177e4SLinus Torvalds */ 95364158668SNeilBrown ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, 95464158668SNeilBrown bool swap) 9551da177e4SLinus Torvalds { 9569a74a2b8SColin Ian King ssize_t result, requested; 95789698b24STrond Myklebust size_t count; 9581da177e4SLinus Torvalds struct file *file = iocb->ki_filp; 9591da177e4SLinus Torvalds struct address_space *mapping = file->f_mapping; 96022cd1bf1SChristoph Hellwig struct inode *inode = mapping->host; 96122cd1bf1SChristoph Hellwig struct nfs_direct_req *dreq; 96222cd1bf1SChristoph Hellwig struct nfs_lock_context *l_ctx; 96365a4a1caSAl Viro loff_t pos, end; 964c216fd70SChuck Lever 9656de1472fSAl Viro dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", 9663309dd04SAl Viro file, iov_iter_count(iter), (long long) iocb->ki_pos); 967027445c3SBadari Pulavarty 96864158668SNeilBrown if (swap) 96964158668SNeilBrown /* bypass generic checks */ 97064158668SNeilBrown result = iov_iter_count(iter); 97164158668SNeilBrown else 97289698b24STrond Myklebust result = generic_write_checks(iocb, iter); 97389698b24STrond Myklebust if (result <= 0) 97489698b24STrond Myklebust return result; 97589698b24STrond Myklebust count = result; 97689698b24STrond Myklebust nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); 9773309dd04SAl Viro 9783309dd04SAl Viro pos = iocb->ki_pos; 97909cbfeafSKirill A. Shutemov end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT; 980ce1a8e67SChuck Lever 98189698b24STrond Myklebust task_io_account_write(count); 9827ec10f26SKonstantin Khlebnikov 98322cd1bf1SChristoph Hellwig result = -ENOMEM; 98422cd1bf1SChristoph Hellwig dreq = nfs_direct_req_alloc(); 98522cd1bf1SChristoph Hellwig if (!dreq) 986a5864c99STrond Myklebust goto out; 98722cd1bf1SChristoph Hellwig 98822cd1bf1SChristoph Hellwig dreq->inode = inode; 98989698b24STrond Myklebust dreq->bytes_left = dreq->max_count = count; 9905fadeb47SPeng Tao dreq->io_start = pos; 99122cd1bf1SChristoph Hellwig dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 99222cd1bf1SChristoph Hellwig l_ctx = nfs_get_lock_context(dreq->ctx); 99322cd1bf1SChristoph Hellwig if (IS_ERR(l_ctx)) { 99422cd1bf1SChristoph Hellwig result = PTR_ERR(l_ctx); 9958605cf0eSMisono Tomohiro nfs_direct_req_release(dreq); 99622cd1bf1SChristoph Hellwig goto out_release; 99722cd1bf1SChristoph Hellwig } 99822cd1bf1SChristoph Hellwig dreq->l_ctx = l_ctx; 99922cd1bf1SChristoph Hellwig if (!is_sync_kiocb(iocb)) 100022cd1bf1SChristoph Hellwig dreq->iocb = iocb; 10019c455a8cSTrond Myklebust pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode); 100222cd1bf1SChristoph Hellwig 100364158668SNeilBrown if (swap) { 1004c265de25SNeilBrown requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, 1005c265de25SNeilBrown FLUSH_STABLE); 100664158668SNeilBrown } else { 1007a5864c99STrond Myklebust nfs_start_io_direct(inode); 1008a5864c99STrond Myklebust 1009c265de25SNeilBrown requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, 1010c265de25SNeilBrown FLUSH_COND_STABLE); 1011a9ab5e84SChristoph Hellwig 1012a9ab5e84SChristoph Hellwig if (mapping->nrpages) { 1013a9ab5e84SChristoph Hellwig invalidate_inode_pages2_range(mapping, 101409cbfeafSKirill A. Shutemov pos >> PAGE_SHIFT, end); 1015a9ab5e84SChristoph Hellwig } 1016a9ab5e84SChristoph Hellwig 1017a5864c99STrond Myklebust nfs_end_io_direct(inode); 101864158668SNeilBrown } 1019a9ab5e84SChristoph Hellwig 102085128b2bSAl Viro if (requested > 0) { 102122cd1bf1SChristoph Hellwig result = nfs_direct_wait(dreq); 102222cd1bf1SChristoph Hellwig if (result > 0) { 102385128b2bSAl Viro requested -= result; 102422cd1bf1SChristoph Hellwig iocb->ki_pos = pos + result; 1025e2592217SChristoph Hellwig /* XXX: should check the generic_write_sync retval */ 1026e2592217SChristoph Hellwig generic_write_sync(iocb, result); 10271763da12SFred Isaman } 102885128b2bSAl Viro iov_iter_revert(iter, requested); 102985128b2bSAl Viro } else { 103085128b2bSAl Viro result = requested; 103122cd1bf1SChristoph Hellwig } 1032a6b5a28eSDave Wysochanski nfs_fscache_invalidate(inode, FSCACHE_INVAL_DIO_WRITE); 103322cd1bf1SChristoph Hellwig out_release: 103422cd1bf1SChristoph Hellwig nfs_direct_req_release(dreq); 1035a5864c99STrond Myklebust out: 103622cd1bf1SChristoph Hellwig return result; 10371da177e4SLinus Torvalds } 10381da177e4SLinus Torvalds 103988467055SChuck Lever /** 104088467055SChuck Lever * nfs_init_directcache - create a slab cache for nfs_direct_req structures 104188467055SChuck Lever * 104288467055SChuck Lever */ 1043f7b422b1SDavid Howells int __init nfs_init_directcache(void) 10441da177e4SLinus Torvalds { 10451da177e4SLinus Torvalds nfs_direct_cachep = kmem_cache_create("nfs_direct_cache", 10461da177e4SLinus Torvalds sizeof(struct nfs_direct_req), 1047fffb60f9SPaul Jackson 0, (SLAB_RECLAIM_ACCOUNT| 1048fffb60f9SPaul Jackson SLAB_MEM_SPREAD), 104920c2df83SPaul Mundt NULL); 10501da177e4SLinus Torvalds if (nfs_direct_cachep == NULL) 10511da177e4SLinus Torvalds return -ENOMEM; 10521da177e4SLinus Torvalds 10531da177e4SLinus Torvalds return 0; 10541da177e4SLinus Torvalds } 10551da177e4SLinus Torvalds 105688467055SChuck Lever /** 1057f7b422b1SDavid Howells * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures 105888467055SChuck Lever * 105988467055SChuck Lever */ 1060266bee88SDavid Brownell void nfs_destroy_directcache(void) 10611da177e4SLinus Torvalds { 10621a1d92c1SAlexey Dobriyan kmem_cache_destroy(nfs_direct_cachep); 10631da177e4SLinus Torvalds } 1064