1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * linux/fs/nfs/direct.c
41da177e4SLinus Torvalds *
51da177e4SLinus Torvalds * Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
61da177e4SLinus Torvalds *
71da177e4SLinus Torvalds * High-performance uncached I/O for the Linux NFS client
81da177e4SLinus Torvalds *
91da177e4SLinus Torvalds * There are important applications whose performance or correctness
101da177e4SLinus Torvalds * depends on uncached access to file data. Database clusters
111da177e4SLinus Torvalds * (multiple copies of the same instance running on separate hosts)
121da177e4SLinus Torvalds * implement their own cache coherency protocol that subsumes file
131da177e4SLinus Torvalds * system cache protocols. Applications that process datasets
141da177e4SLinus Torvalds * considerably larger than the client's memory do not always benefit
151da177e4SLinus Torvalds * from a local cache. A streaming video server, for instance, has no
161da177e4SLinus Torvalds * need to cache the contents of a file.
171da177e4SLinus Torvalds *
181da177e4SLinus Torvalds * When an application requests uncached I/O, all read and write requests
191da177e4SLinus Torvalds * are made directly to the server; data stored or fetched via these
201da177e4SLinus Torvalds * requests is not cached in the Linux page cache. The client does not
211da177e4SLinus Torvalds * correct unaligned requests from applications. All requested bytes are
221da177e4SLinus Torvalds * held on permanent storage before a direct write system call returns to
231da177e4SLinus Torvalds * an application.
241da177e4SLinus Torvalds *
251da177e4SLinus Torvalds * Solaris implements an uncached I/O facility called directio() that
261da177e4SLinus Torvalds * is used for backups and sequential I/O to very large files. Solaris
271da177e4SLinus Torvalds * also supports uncaching whole NFS partitions with "-o forcedirectio,"
281da177e4SLinus Torvalds * an undocumented mount option.
291da177e4SLinus Torvalds *
301da177e4SLinus Torvalds * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
311da177e4SLinus Torvalds * help from Andrew Morton.
321da177e4SLinus Torvalds *
331da177e4SLinus Torvalds * 18 Dec 2001 Initial implementation for 2.4 --cel
341da177e4SLinus Torvalds * 08 Jul 2002 Version for 2.4.19, with bug fixes --trondmy
351da177e4SLinus Torvalds * 08 Jun 2003 Port to 2.5 APIs --cel
361da177e4SLinus Torvalds * 31 Mar 2004 Handle direct I/O without VFS support --cel
371da177e4SLinus Torvalds * 15 Sep 2004 Parallel async reads --cel
3888467055SChuck Lever * 04 May 2005 support O_DIRECT with aio --cel
391da177e4SLinus Torvalds *
401da177e4SLinus Torvalds */
411da177e4SLinus Torvalds
421da177e4SLinus Torvalds #include <linux/errno.h>
431da177e4SLinus Torvalds #include <linux/sched.h>
441da177e4SLinus Torvalds #include <linux/kernel.h>
451da177e4SLinus Torvalds #include <linux/file.h>
461da177e4SLinus Torvalds #include <linux/pagemap.h>
471da177e4SLinus Torvalds #include <linux/kref.h>
485a0e3ad6STejun Heo #include <linux/slab.h>
497ec10f26SKonstantin Khlebnikov #include <linux/task_io_accounting_ops.h>
506296556fSPeng Tao #include <linux/module.h>
511da177e4SLinus Torvalds
521da177e4SLinus Torvalds #include <linux/nfs_fs.h>
531da177e4SLinus Torvalds #include <linux/nfs_page.h>
541da177e4SLinus Torvalds #include <linux/sunrpc/clnt.h>
551da177e4SLinus Torvalds
567c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
5760063497SArun Sharma #include <linux/atomic.h>
581da177e4SLinus Torvalds
598d5658c9STrond Myklebust #include "internal.h"
6091d5b470SChuck Lever #include "iostat.h"
611763da12SFred Isaman #include "pnfs.h"
62a6b5a28eSDave Wysochanski #include "fscache.h"
638efc4bbeSJeff Layton #include "nfstrace.h"
641da177e4SLinus Torvalds
651da177e4SLinus Torvalds #define NFSDBG_FACILITY NFSDBG_VFS
661da177e4SLinus Torvalds
67e18b890bSChristoph Lameter static struct kmem_cache *nfs_direct_cachep;
681da177e4SLinus Torvalds
691763da12SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
701763da12SFred Isaman static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
714d3b55d3SAnna Schumaker static void nfs_direct_write_complete(struct nfs_direct_req *dreq);
721763da12SFred Isaman static void nfs_direct_write_schedule_work(struct work_struct *work);
73607f31e8STrond Myklebust
get_dreq(struct nfs_direct_req * dreq)74607f31e8STrond Myklebust static inline void get_dreq(struct nfs_direct_req *dreq)
75607f31e8STrond Myklebust {
76607f31e8STrond Myklebust atomic_inc(&dreq->io_count);
77607f31e8STrond Myklebust }
78607f31e8STrond Myklebust
put_dreq(struct nfs_direct_req * dreq)79607f31e8STrond Myklebust static inline int put_dreq(struct nfs_direct_req *dreq)
80607f31e8STrond Myklebust {
81607f31e8STrond Myklebust return atomic_dec_and_test(&dreq->io_count);
82607f31e8STrond Myklebust }
83607f31e8STrond Myklebust
840a00b77bSWeston Andros Adamson static void
nfs_direct_handle_truncated(struct nfs_direct_req * dreq,const struct nfs_pgio_header * hdr,ssize_t dreq_len)85031d73edSTrond Myklebust nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
86031d73edSTrond Myklebust const struct nfs_pgio_header *hdr,
87031d73edSTrond Myklebust ssize_t dreq_len)
880a00b77bSWeston Andros Adamson {
89031d73edSTrond Myklebust if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) ||
90031d73edSTrond Myklebust test_bit(NFS_IOHDR_EOF, &hdr->flags)))
91031d73edSTrond Myklebust return;
92031d73edSTrond Myklebust if (dreq->max_count >= dreq_len) {
93031d73edSTrond Myklebust dreq->max_count = dreq_len;
94031d73edSTrond Myklebust if (dreq->count > dreq_len)
95031d73edSTrond Myklebust dreq->count = dreq_len;
965fadeb47SPeng Tao }
978982f7afSTrond Myklebust
988982f7afSTrond Myklebust if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error)
998982f7afSTrond Myklebust dreq->error = hdr->error;
1000a00b77bSWeston Andros Adamson }
101031d73edSTrond Myklebust
102031d73edSTrond Myklebust static void
nfs_direct_count_bytes(struct nfs_direct_req * dreq,const struct nfs_pgio_header * hdr)103031d73edSTrond Myklebust nfs_direct_count_bytes(struct nfs_direct_req *dreq,
104031d73edSTrond Myklebust const struct nfs_pgio_header *hdr)
105031d73edSTrond Myklebust {
106031d73edSTrond Myklebust loff_t hdr_end = hdr->io_start + hdr->good_bytes;
107031d73edSTrond Myklebust ssize_t dreq_len = 0;
108031d73edSTrond Myklebust
109031d73edSTrond Myklebust if (hdr_end > dreq->io_start)
110031d73edSTrond Myklebust dreq_len = hdr_end - dreq->io_start;
111031d73edSTrond Myklebust
112031d73edSTrond Myklebust nfs_direct_handle_truncated(dreq, hdr, dreq_len);
113031d73edSTrond Myklebust
114031d73edSTrond Myklebust if (dreq_len > dreq->max_count)
115031d73edSTrond Myklebust dreq_len = dreq->max_count;
116031d73edSTrond Myklebust
117031d73edSTrond Myklebust if (dreq->count < dreq_len)
118031d73edSTrond Myklebust dreq->count = dreq_len;
1191ccbad9fSPeng Tao }
1200a00b77bSWeston Andros Adamson
nfs_direct_truncate_request(struct nfs_direct_req * dreq,struct nfs_page * req)1218982f7afSTrond Myklebust static void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
1228982f7afSTrond Myklebust struct nfs_page *req)
1238982f7afSTrond Myklebust {
1248982f7afSTrond Myklebust loff_t offs = req_offset(req);
1258982f7afSTrond Myklebust size_t req_start = (size_t)(offs - dreq->io_start);
1268982f7afSTrond Myklebust
1278982f7afSTrond Myklebust if (req_start < dreq->max_count)
1288982f7afSTrond Myklebust dreq->max_count = req_start;
1298982f7afSTrond Myklebust if (req_start < dreq->count)
1308982f7afSTrond Myklebust dreq->count = req_start;
1318982f7afSTrond Myklebust }
1328982f7afSTrond Myklebust
1331da177e4SLinus Torvalds /**
134eb79f3afSNeilBrown * nfs_swap_rw - NFS address space operation for swap I/O
135b8a32e2bSChuck Lever * @iocb: target I/O control block
13690090ae6SAl Viro * @iter: I/O buffer
137b8a32e2bSChuck Lever *
138eb79f3afSNeilBrown * Perform IO to the swap-file. This is much like direct IO.
1391da177e4SLinus Torvalds */
nfs_swap_rw(struct kiocb * iocb,struct iov_iter * iter)140eb79f3afSNeilBrown int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
141b8a32e2bSChuck Lever {
142eb79f3afSNeilBrown ssize_t ret;
143ee8a1a8bSPeng Tao
1446f673763SOmar Sandoval if (iov_iter_rw(iter) == READ)
145eb79f3afSNeilBrown ret = nfs_file_direct_read(iocb, iter, true);
146eb79f3afSNeilBrown else
147eb79f3afSNeilBrown ret = nfs_file_direct_write(iocb, iter, true);
148eb79f3afSNeilBrown if (ret < 0)
149eb79f3afSNeilBrown return ret;
150eb79f3afSNeilBrown return 0;
151b8a32e2bSChuck Lever }
152b8a32e2bSChuck Lever
nfs_direct_release_pages(struct page ** pages,unsigned int npages)153749e146eSChuck Lever static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
1549c93ab7dSChuck Lever {
155749e146eSChuck Lever unsigned int i;
156607f31e8STrond Myklebust for (i = 0; i < npages; i++)
15709cbfeafSKirill A. Shutemov put_page(pages[i]);
1586b45d858STrond Myklebust }
1596b45d858STrond Myklebust
nfs_init_cinfo_from_dreq(struct nfs_commit_info * cinfo,struct nfs_direct_req * dreq)1601763da12SFred Isaman void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
1611763da12SFred Isaman struct nfs_direct_req *dreq)
1621763da12SFred Isaman {
163fe238e60SDave Wysochanski cinfo->inode = dreq->inode;
1641763da12SFred Isaman cinfo->mds = &dreq->mds_cinfo;
1651763da12SFred Isaman cinfo->ds = &dreq->ds_cinfo;
1661763da12SFred Isaman cinfo->dreq = dreq;
1671763da12SFred Isaman cinfo->completion_ops = &nfs_direct_commit_completion_ops;
1681763da12SFred Isaman }
1691763da12SFred Isaman
nfs_direct_req_alloc(void)17093619e59SChuck Lever static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
1711da177e4SLinus Torvalds {
1721da177e4SLinus Torvalds struct nfs_direct_req *dreq;
1731da177e4SLinus Torvalds
174292f3eeeSTrond Myklebust dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
1751da177e4SLinus Torvalds if (!dreq)
1761da177e4SLinus Torvalds return NULL;
1771da177e4SLinus Torvalds
1781da177e4SLinus Torvalds kref_init(&dreq->kref);
179607f31e8STrond Myklebust kref_get(&dreq->kref);
180d72b7a6bSTrond Myklebust init_completion(&dreq->completion);
1811763da12SFred Isaman INIT_LIST_HEAD(&dreq->mds_cinfo.list);
182c21e7168STrond Myklebust pnfs_init_ds_commit_info(&dreq->ds_cinfo);
1831763da12SFred Isaman INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
18415ce4a0cSChuck Lever spin_lock_init(&dreq->lock);
18593619e59SChuck Lever
18693619e59SChuck Lever return dreq;
18793619e59SChuck Lever }
18893619e59SChuck Lever
nfs_direct_req_free(struct kref * kref)189b4946ffbSTrond Myklebust static void nfs_direct_req_free(struct kref *kref)
1901da177e4SLinus Torvalds {
1911da177e4SLinus Torvalds struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
192a8881f5aSTrond Myklebust
19318f41296STrond Myklebust pnfs_release_ds_info(&dreq->ds_cinfo, dreq->inode);
194f11ac8dbSTrond Myklebust if (dreq->l_ctx != NULL)
195f11ac8dbSTrond Myklebust nfs_put_lock_context(dreq->l_ctx);
196a8881f5aSTrond Myklebust if (dreq->ctx != NULL)
197a8881f5aSTrond Myklebust put_nfs_open_context(dreq->ctx);
1981da177e4SLinus Torvalds kmem_cache_free(nfs_direct_cachep, dreq);
1991da177e4SLinus Torvalds }
2001da177e4SLinus Torvalds
nfs_direct_req_release(struct nfs_direct_req * dreq)201b4946ffbSTrond Myklebust static void nfs_direct_req_release(struct nfs_direct_req *dreq)
202b4946ffbSTrond Myklebust {
203b4946ffbSTrond Myklebust kref_put(&dreq->kref, nfs_direct_req_free);
204b4946ffbSTrond Myklebust }
205b4946ffbSTrond Myklebust
nfs_dreq_bytes_left(struct nfs_direct_req * dreq,loff_t offset)20675aa038dSTrond Myklebust ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset)
2076296556fSPeng Tao {
20875aa038dSTrond Myklebust loff_t start = offset - dreq->io_start;
20975aa038dSTrond Myklebust return dreq->max_count - start;
2106296556fSPeng Tao }
2116296556fSPeng Tao EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left);
2126296556fSPeng Tao
213d4cc948bSChuck Lever /*
214bc0fb201SChuck Lever * Collects and returns the final error value/byte-count.
215bc0fb201SChuck Lever */
nfs_direct_wait(struct nfs_direct_req * dreq)216bc0fb201SChuck Lever static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
217bc0fb201SChuck Lever {
21815ce4a0cSChuck Lever ssize_t result = -EIOCBQUEUED;
219bc0fb201SChuck Lever
220bc0fb201SChuck Lever /* Async requests don't wait here */
221bc0fb201SChuck Lever if (dreq->iocb)
222bc0fb201SChuck Lever goto out;
223bc0fb201SChuck Lever
224150030b7SMatthew Wilcox result = wait_for_completion_killable(&dreq->completion);
225bc0fb201SChuck Lever
226d2a7de0bSTrond Myklebust if (!result) {
227d2a7de0bSTrond Myklebust result = dreq->count;
228d2a7de0bSTrond Myklebust WARN_ON_ONCE(dreq->count < 0);
229d2a7de0bSTrond Myklebust }
230bc0fb201SChuck Lever if (!result)
23115ce4a0cSChuck Lever result = dreq->error;
232bc0fb201SChuck Lever
233bc0fb201SChuck Lever out:
234bc0fb201SChuck Lever return (ssize_t) result;
235bc0fb201SChuck Lever }
236bc0fb201SChuck Lever
237bc0fb201SChuck Lever /*
238607f31e8STrond Myklebust * Synchronous I/O uses a stack-allocated iocb. Thus we can't trust
239607f31e8STrond Myklebust * the iocb is still valid here if this is a synchronous request.
24063ab46abSChuck Lever */
nfs_direct_complete(struct nfs_direct_req * dreq)241f7b5c340STrond Myklebust static void nfs_direct_complete(struct nfs_direct_req *dreq)
24263ab46abSChuck Lever {
2439811cd57SChristoph Hellwig struct inode *inode = dreq->inode;
2449811cd57SChristoph Hellwig
24565caafd0SOlga Kornievskaia inode_dio_end(inode);
24665caafd0SOlga Kornievskaia
2472a009ec9SChristoph Hellwig if (dreq->iocb) {
2482a009ec9SChristoph Hellwig long res = (long) dreq->error;
249d2a7de0bSTrond Myklebust if (dreq->count != 0) {
2502a009ec9SChristoph Hellwig res = (long) dreq->count;
251d2a7de0bSTrond Myklebust WARN_ON_ONCE(dreq->count < 0);
252d2a7de0bSTrond Myklebust }
2536b19b766SJens Axboe dreq->iocb->ki_complete(dreq->iocb, res);
254d72b7a6bSTrond Myklebust }
2552a009ec9SChristoph Hellwig
256024de8f1SDaniel Wagner complete(&dreq->completion);
25763ab46abSChuck Lever
258b4946ffbSTrond Myklebust nfs_direct_req_release(dreq);
25963ab46abSChuck Lever }
26063ab46abSChuck Lever
nfs_direct_read_completion(struct nfs_pgio_header * hdr)261584aa810SFred Isaman static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
262fdd1e74cSTrond Myklebust {
263584aa810SFred Isaman unsigned long bytes = 0;
264584aa810SFred Isaman struct nfs_direct_req *dreq = hdr->dreq;
265fdd1e74cSTrond Myklebust
26615ce4a0cSChuck Lever spin_lock(&dreq->lock);
267eb2c50daSTrond Myklebust if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
268eb2c50daSTrond Myklebust spin_unlock(&dreq->lock);
269eb2c50daSTrond Myklebust goto out_put;
270eb2c50daSTrond Myklebust }
271eb2c50daSTrond Myklebust
272031d73edSTrond Myklebust nfs_direct_count_bytes(dreq, hdr);
27315ce4a0cSChuck Lever spin_unlock(&dreq->lock);
2741da177e4SLinus Torvalds
275584aa810SFred Isaman while (!list_empty(&hdr->pages)) {
276584aa810SFred Isaman struct nfs_page *req = nfs_list_entry(hdr->pages.next);
277584aa810SFred Isaman struct page *page = req->wb_page;
278584aa810SFred Isaman
279ad3cba22SDave Kleikamp if (!PageCompound(page) && bytes < hdr->good_bytes &&
280ad3cba22SDave Kleikamp (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY))
2814bd8b010STrond Myklebust set_page_dirty(page);
282584aa810SFred Isaman bytes += req->wb_bytes;
283584aa810SFred Isaman nfs_list_remove_request(req);
284beeb5338SAnna Schumaker nfs_release_request(req);
285584aa810SFred Isaman }
286584aa810SFred Isaman out_put:
287607f31e8STrond Myklebust if (put_dreq(dreq))
288f7b5c340STrond Myklebust nfs_direct_complete(dreq);
289584aa810SFred Isaman hdr->release(hdr);
2901da177e4SLinus Torvalds }
2911da177e4SLinus Torvalds
nfs_read_sync_pgio_error(struct list_head * head,int error)292df3accb8STrond Myklebust static void nfs_read_sync_pgio_error(struct list_head *head, int error)
293cd841605SFred Isaman {
294584aa810SFred Isaman struct nfs_page *req;
295cd841605SFred Isaman
296584aa810SFred Isaman while (!list_empty(head)) {
297584aa810SFred Isaman req = nfs_list_entry(head->next);
298584aa810SFred Isaman nfs_list_remove_request(req);
299584aa810SFred Isaman nfs_release_request(req);
300cd841605SFred Isaman }
301584aa810SFred Isaman }
302584aa810SFred Isaman
nfs_direct_pgio_init(struct nfs_pgio_header * hdr)303584aa810SFred Isaman static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
304584aa810SFred Isaman {
305584aa810SFred Isaman get_dreq(hdr->dreq);
306584aa810SFred Isaman }
307584aa810SFred Isaman
308584aa810SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
3093e9e0ca3STrond Myklebust .error_cleanup = nfs_read_sync_pgio_error,
310584aa810SFred Isaman .init_hdr = nfs_direct_pgio_init,
311584aa810SFred Isaman .completion = nfs_direct_read_completion,
312584aa810SFred Isaman };
313cd841605SFred Isaman
314d4cc948bSChuck Lever /*
315607f31e8STrond Myklebust * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
316607f31e8STrond Myklebust * operation. If nfs_readdata_alloc() or get_user_pages() fails,
317607f31e8STrond Myklebust * bail and stop sending more reads. Read length accounting is
318607f31e8STrond Myklebust * handled automatically by nfs_direct_read_result(). Otherwise, if
319607f31e8STrond Myklebust * no requests have been sent, just return an error.
3201da177e4SLinus Torvalds */
32191f79c43SAl Viro
nfs_direct_read_schedule_iovec(struct nfs_direct_req * dreq,struct iov_iter * iter,loff_t pos)32291f79c43SAl Viro static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
32391f79c43SAl Viro struct iov_iter *iter,
32491f79c43SAl Viro loff_t pos)
3251da177e4SLinus Torvalds {
32691f79c43SAl Viro struct nfs_pageio_descriptor desc;
32791f79c43SAl Viro struct inode *inode = dreq->inode;
32891f79c43SAl Viro ssize_t result = -EINVAL;
32991f79c43SAl Viro size_t requested_bytes = 0;
33091f79c43SAl Viro size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
33182b145c5SChuck Lever
33216b90578SLinus Torvalds nfs_pageio_init_read(&desc, dreq->inode, false,
33391f79c43SAl Viro &nfs_direct_read_completion_ops);
33491f79c43SAl Viro get_dreq(dreq);
33591f79c43SAl Viro desc.pg_dreq = dreq;
336fe0f07d0SJens Axboe inode_dio_begin(inode);
33791f79c43SAl Viro
33891f79c43SAl Viro while (iov_iter_count(iter)) {
33991f79c43SAl Viro struct page **pagevec;
3405dd602f2SChuck Lever size_t bytes;
34191f79c43SAl Viro size_t pgbase;
34291f79c43SAl Viro unsigned npages, i;
3431da177e4SLinus Torvalds
3441ef255e2SAl Viro result = iov_iter_get_pages_alloc2(iter, &pagevec,
34591f79c43SAl Viro rsize, &pgbase);
346584aa810SFred Isaman if (result < 0)
347749e146eSChuck Lever break;
348a564b8f0SMel Gorman
34991f79c43SAl Viro bytes = result;
35091f79c43SAl Viro npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
351584aa810SFred Isaman for (i = 0; i < npages; i++) {
352584aa810SFred Isaman struct nfs_page *req;
353bf5fc402STrond Myklebust unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
354584aa810SFred Isaman /* XXX do we need to do the eof zeroing found in async_filler? */
35570e9db69STrond Myklebust req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
35670e9db69STrond Myklebust pgbase, pos, req_len);
357584aa810SFred Isaman if (IS_ERR(req)) {
358584aa810SFred Isaman result = PTR_ERR(req);
359dbae4c73STrond Myklebust break;
360584aa810SFred Isaman }
36191f79c43SAl Viro if (!nfs_pageio_add_request(&desc, req)) {
36291f79c43SAl Viro result = desc.pg_error;
363584aa810SFred Isaman nfs_release_request(req);
364584aa810SFred Isaman break;
365584aa810SFred Isaman }
366584aa810SFred Isaman pgbase = 0;
367584aa810SFred Isaman bytes -= req_len;
36891f79c43SAl Viro requested_bytes += req_len;
369584aa810SFred Isaman pos += req_len;
37035754bc0SPeng Tao dreq->bytes_left -= req_len;
371584aa810SFred Isaman }
3726d74743bSTrond Myklebust nfs_direct_release_pages(pagevec, npages);
37391f79c43SAl Viro kvfree(pagevec);
37419f73787SChuck Lever if (result < 0)
37519f73787SChuck Lever break;
37619f73787SChuck Lever }
37719f73787SChuck Lever
378584aa810SFred Isaman nfs_pageio_complete(&desc);
379584aa810SFred Isaman
380839f7ad6SChuck Lever /*
381839f7ad6SChuck Lever * If no bytes were started, return the error, and let the
382839f7ad6SChuck Lever * generic layer handle the completion.
383839f7ad6SChuck Lever */
384839f7ad6SChuck Lever if (requested_bytes == 0) {
385d03727b2SOlga Kornievskaia inode_dio_end(inode);
38665caafd0SOlga Kornievskaia nfs_direct_req_release(dreq);
387839f7ad6SChuck Lever return result < 0 ? result : -EIO;
388839f7ad6SChuck Lever }
389839f7ad6SChuck Lever
39019f73787SChuck Lever if (put_dreq(dreq))
391f7b5c340STrond Myklebust nfs_direct_complete(dreq);
39285128b2bSAl Viro return requested_bytes;
39319f73787SChuck Lever }
39419f73787SChuck Lever
39514a3ec79SChristoph Hellwig /**
39614a3ec79SChristoph Hellwig * nfs_file_direct_read - file direct read operation for NFS files
39714a3ec79SChristoph Hellwig * @iocb: target I/O control block
398619d30b4SAl Viro * @iter: vector of user buffers into which to read data
39964158668SNeilBrown * @swap: flag indicating this is swap IO, not O_DIRECT IO
40014a3ec79SChristoph Hellwig *
40114a3ec79SChristoph Hellwig * We use this function for direct reads instead of calling
40214a3ec79SChristoph Hellwig * generic_file_aio_read() in order to avoid gfar's check to see if
40314a3ec79SChristoph Hellwig * the request starts before the end of the file. For that check
40414a3ec79SChristoph Hellwig * to work, we must generate a GETATTR before each direct read, and
40514a3ec79SChristoph Hellwig * even then there is a window between the GETATTR and the subsequent
40614a3ec79SChristoph Hellwig * READ where the file size could change. Our preference is simply
40714a3ec79SChristoph Hellwig * to do all reads the application wants, and the server will take
40814a3ec79SChristoph Hellwig * care of managing the end of file boundary.
40914a3ec79SChristoph Hellwig *
41014a3ec79SChristoph Hellwig * This function also eliminates unnecessarily updating the file's
41114a3ec79SChristoph Hellwig * atime locally, as the NFS server sets the file's atime, and this
41214a3ec79SChristoph Hellwig * client must read the updated atime from the server back into its
41314a3ec79SChristoph Hellwig * cache.
41414a3ec79SChristoph Hellwig */
nfs_file_direct_read(struct kiocb * iocb,struct iov_iter * iter,bool swap)41564158668SNeilBrown ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
41664158668SNeilBrown bool swap)
4171da177e4SLinus Torvalds {
41814a3ec79SChristoph Hellwig struct file *file = iocb->ki_filp;
41914a3ec79SChristoph Hellwig struct address_space *mapping = file->f_mapping;
42014a3ec79SChristoph Hellwig struct inode *inode = mapping->host;
4211da177e4SLinus Torvalds struct nfs_direct_req *dreq;
422b3c54de6STrond Myklebust struct nfs_lock_context *l_ctx;
42386b93667SColin Ian King ssize_t result, requested;
424a6cbcd4aSAl Viro size_t count = iov_iter_count(iter);
42514a3ec79SChristoph Hellwig nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
42614a3ec79SChristoph Hellwig
42714a3ec79SChristoph Hellwig dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
428c8b8e32dSChristoph Hellwig file, count, (long long) iocb->ki_pos);
42914a3ec79SChristoph Hellwig
43014a3ec79SChristoph Hellwig result = 0;
43114a3ec79SChristoph Hellwig if (!count)
43214a3ec79SChristoph Hellwig goto out;
43314a3ec79SChristoph Hellwig
43414a3ec79SChristoph Hellwig task_io_account_read(count);
43514a3ec79SChristoph Hellwig
43614a3ec79SChristoph Hellwig result = -ENOMEM;
437607f31e8STrond Myklebust dreq = nfs_direct_req_alloc();
438f11ac8dbSTrond Myklebust if (dreq == NULL)
439a5864c99STrond Myklebust goto out;
4401da177e4SLinus Torvalds
44191d5b470SChuck Lever dreq->inode = inode;
442ed3743a6SWeston Andros Adamson dreq->bytes_left = dreq->max_count = count;
443c8b8e32dSChristoph Hellwig dreq->io_start = iocb->ki_pos;
444cd3758e3STrond Myklebust dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
445b3c54de6STrond Myklebust l_ctx = nfs_get_lock_context(dreq->ctx);
446b3c54de6STrond Myklebust if (IS_ERR(l_ctx)) {
447b3c54de6STrond Myklebust result = PTR_ERR(l_ctx);
4488605cf0eSMisono Tomohiro nfs_direct_req_release(dreq);
449f11ac8dbSTrond Myklebust goto out_release;
450b3c54de6STrond Myklebust }
451b3c54de6STrond Myklebust dreq->l_ctx = l_ctx;
452487b8372SChuck Lever if (!is_sync_kiocb(iocb))
453487b8372SChuck Lever dreq->iocb = iocb;
4541da177e4SLinus Torvalds
455fcb14cb1SAl Viro if (user_backed_iter(iter))
456ad3cba22SDave Kleikamp dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
457ad3cba22SDave Kleikamp
45864158668SNeilBrown if (!swap)
459a5864c99STrond Myklebust nfs_start_io_direct(inode);
460a5864c99STrond Myklebust
461619d30b4SAl Viro NFS_I(inode)->read_io += count;
46285128b2bSAl Viro requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
463d0b9875dSChristoph Hellwig
46464158668SNeilBrown if (!swap)
465a5864c99STrond Myklebust nfs_end_io_direct(inode);
466d0b9875dSChristoph Hellwig
46785128b2bSAl Viro if (requested > 0) {
468bc0fb201SChuck Lever result = nfs_direct_wait(dreq);
46985128b2bSAl Viro if (result > 0) {
47085128b2bSAl Viro requested -= result;
471c8b8e32dSChristoph Hellwig iocb->ki_pos += result;
47214a3ec79SChristoph Hellwig }
47385128b2bSAl Viro iov_iter_revert(iter, requested);
47485128b2bSAl Viro } else {
47585128b2bSAl Viro result = requested;
47685128b2bSAl Viro }
477d0b9875dSChristoph Hellwig
478f11ac8dbSTrond Myklebust out_release:
479b4946ffbSTrond Myklebust nfs_direct_req_release(dreq);
480f11ac8dbSTrond Myklebust out:
4811da177e4SLinus Torvalds return result;
4821da177e4SLinus Torvalds }
4831da177e4SLinus Torvalds
nfs_direct_add_page_head(struct list_head * list,struct nfs_page * req)48488975a55STrond Myklebust static void nfs_direct_add_page_head(struct list_head *list,
48588975a55STrond Myklebust struct nfs_page *req)
48688975a55STrond Myklebust {
48788975a55STrond Myklebust struct nfs_page *head = req->wb_head;
48888975a55STrond Myklebust
48988975a55STrond Myklebust if (!list_empty(&head->wb_list) || !nfs_lock_request(head))
49088975a55STrond Myklebust return;
49188975a55STrond Myklebust if (!list_empty(&head->wb_list)) {
49288975a55STrond Myklebust nfs_unlock_request(head);
49388975a55STrond Myklebust return;
49488975a55STrond Myklebust }
49588975a55STrond Myklebust list_add(&head->wb_list, list);
49688975a55STrond Myklebust kref_get(&head->wb_kref);
49788975a55STrond Myklebust kref_get(&head->wb_kref);
49888975a55STrond Myklebust }
49988975a55STrond Myklebust
nfs_direct_join_group(struct list_head * list,struct nfs_commit_info * cinfo,struct inode * inode)500b193a78dSTrond Myklebust static void nfs_direct_join_group(struct list_head *list,
501b193a78dSTrond Myklebust struct nfs_commit_info *cinfo,
502b193a78dSTrond Myklebust struct inode *inode)
503ed5d588fSTrond Myklebust {
504be2fd156STrond Myklebust struct nfs_page *req, *subreq;
505ed5d588fSTrond Myklebust
506ed5d588fSTrond Myklebust list_for_each_entry(req, list, wb_list) {
50788975a55STrond Myklebust if (req->wb_head != req) {
50888975a55STrond Myklebust nfs_direct_add_page_head(&req->wb_list, req);
509ed5d588fSTrond Myklebust continue;
51088975a55STrond Myklebust }
511be2fd156STrond Myklebust subreq = req->wb_this_page;
512be2fd156STrond Myklebust if (subreq == req)
513be2fd156STrond Myklebust continue;
514be2fd156STrond Myklebust do {
515be2fd156STrond Myklebust /*
516be2fd156STrond Myklebust * Remove subrequests from this list before freeing
517be2fd156STrond Myklebust * them in the call to nfs_join_page_group().
518be2fd156STrond Myklebust */
519be2fd156STrond Myklebust if (!list_empty(&subreq->wb_list)) {
520be2fd156STrond Myklebust nfs_list_remove_request(subreq);
521be2fd156STrond Myklebust nfs_release_request(subreq);
522ed5d588fSTrond Myklebust }
523be2fd156STrond Myklebust } while ((subreq = subreq->wb_this_page) != req);
524b193a78dSTrond Myklebust nfs_join_page_group(req, cinfo, inode);
525ed5d588fSTrond Myklebust }
526ed5d588fSTrond Myklebust }
527ed5d588fSTrond Myklebust
528ed5d588fSTrond Myklebust static void
nfs_direct_write_scan_commit_list(struct inode * inode,struct list_head * list,struct nfs_commit_info * cinfo)529085d1e33STom Haynes nfs_direct_write_scan_commit_list(struct inode *inode,
530085d1e33STom Haynes struct list_head *list,
531085d1e33STom Haynes struct nfs_commit_info *cinfo)
532085d1e33STom Haynes {
533e824f99aSTrond Myklebust mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
5349c455a8cSTrond Myklebust pnfs_recover_commit_reqs(list, cinfo);
535085d1e33STom Haynes nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0);
536e824f99aSTrond Myklebust mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
537085d1e33STom Haynes }
538085d1e33STom Haynes
nfs_direct_write_reschedule(struct nfs_direct_req * dreq)539fad61490STrond Myklebust static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
5401da177e4SLinus Torvalds {
5411763da12SFred Isaman struct nfs_pageio_descriptor desc;
542954998b6STrond Myklebust struct nfs_page *req;
5431763da12SFred Isaman LIST_HEAD(reqs);
5441763da12SFred Isaman struct nfs_commit_info cinfo;
5451763da12SFred Isaman
5461763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq);
547085d1e33STom Haynes nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
5481da177e4SLinus Torvalds
549b193a78dSTrond Myklebust nfs_direct_join_group(&reqs, &cinfo, dreq->inode);
550ed5d588fSTrond Myklebust
551a5314a74STrond Myklebust nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
552607f31e8STrond Myklebust get_dreq(dreq);
5531da177e4SLinus Torvalds
554a20c93e3SChristoph Hellwig nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
5551763da12SFred Isaman &nfs_direct_write_completion_ops);
5561763da12SFred Isaman desc.pg_dreq = dreq;
557607f31e8STrond Myklebust
558954998b6STrond Myklebust while (!list_empty(&reqs)) {
559954998b6STrond Myklebust req = nfs_list_entry(reqs.next);
56033344e0fSTrond Myklebust /* Bump the transmission count */
56133344e0fSTrond Myklebust req->wb_nio++;
5621763da12SFred Isaman if (!nfs_pageio_add_request(&desc, req)) {
5637c633932STrond Myklebust spin_lock(&dreq->lock);
564954998b6STrond Myklebust if (dreq->error < 0) {
565954998b6STrond Myklebust desc.pg_error = dreq->error;
566954998b6STrond Myklebust } else if (desc.pg_error != -EAGAIN) {
5671763da12SFred Isaman dreq->flags = 0;
568954998b6STrond Myklebust if (!desc.pg_error)
569954998b6STrond Myklebust desc.pg_error = -EIO;
570d600ad1fSPeng Tao dreq->error = desc.pg_error;
571954998b6STrond Myklebust } else
572954998b6STrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
5737c633932STrond Myklebust spin_unlock(&dreq->lock);
574954998b6STrond Myklebust break;
5751763da12SFred Isaman }
5765a695da2STrond Myklebust nfs_release_request(req);
5771763da12SFred Isaman }
5781763da12SFred Isaman nfs_pageio_complete(&desc);
579607f31e8STrond Myklebust
580954998b6STrond Myklebust while (!list_empty(&reqs)) {
581954998b6STrond Myklebust req = nfs_list_entry(reqs.next);
5824035c248STrond Myklebust nfs_list_remove_request(req);
5831d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req);
5848982f7afSTrond Myklebust if (desc.pg_error == -EAGAIN) {
585954998b6STrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0);
5868982f7afSTrond Myklebust } else {
5878982f7afSTrond Myklebust spin_lock(&dreq->lock);
5888982f7afSTrond Myklebust nfs_direct_truncate_request(dreq, req);
5898982f7afSTrond Myklebust spin_unlock(&dreq->lock);
590954998b6STrond Myklebust nfs_release_request(req);
5914035c248STrond Myklebust }
5928982f7afSTrond Myklebust }
593607f31e8STrond Myklebust
594607f31e8STrond Myklebust if (put_dreq(dreq))
5954d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq);
596fad61490STrond Myklebust }
5971da177e4SLinus Torvalds
nfs_direct_commit_complete(struct nfs_commit_data * data)5981763da12SFred Isaman static void nfs_direct_commit_complete(struct nfs_commit_data *data)
599fad61490STrond Myklebust {
6001f28476dSTrond Myklebust const struct nfs_writeverf *verf = data->res.verf;
6010b7c0153SFred Isaman struct nfs_direct_req *dreq = data->dreq;
6021763da12SFred Isaman struct nfs_commit_info cinfo;
6031763da12SFred Isaman struct nfs_page *req;
604c9d8f89dSTrond Myklebust int status = data->task.tk_status;
605c9d8f89dSTrond Myklebust
6068efc4bbeSJeff Layton trace_nfs_direct_commit_complete(dreq);
6078efc4bbeSJeff Layton
608fb5f7f20STrond Myklebust if (status < 0) {
609fb5f7f20STrond Myklebust /* Errors in commit are fatal */
610fb5f7f20STrond Myklebust dreq->error = status;
611fb5f7f20STrond Myklebust dreq->flags = NFS_ODIRECT_DONE;
61255051c0cSJeff Layton } else {
613fb5f7f20STrond Myklebust status = dreq->error;
61455051c0cSJeff Layton }
615fb5f7f20STrond Myklebust
6161763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq);
617fad61490STrond Myklebust
6181763da12SFred Isaman while (!list_empty(&data->pages)) {
6191763da12SFred Isaman req = nfs_list_entry(data->pages.next);
6201763da12SFred Isaman nfs_list_remove_request(req);
6218982f7afSTrond Myklebust if (status < 0) {
6228982f7afSTrond Myklebust spin_lock(&dreq->lock);
6238982f7afSTrond Myklebust nfs_direct_truncate_request(dreq, req);
6248982f7afSTrond Myklebust spin_unlock(&dreq->lock);
6258982f7afSTrond Myklebust nfs_release_request(req);
6268982f7afSTrond Myklebust } else if (!nfs_write_match_verf(verf, req)) {
6271f28476dSTrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
62833344e0fSTrond Myklebust /*
62933344e0fSTrond Myklebust * Despite the reboot, the write was successful,
63033344e0fSTrond Myklebust * so reset wb_nio.
63133344e0fSTrond Myklebust */
63233344e0fSTrond Myklebust req->wb_nio = 0;
633b57ff130SWeston Andros Adamson nfs_mark_request_commit(req, NULL, &cinfo, 0);
6348982f7afSTrond Myklebust } else
635906369e4SFred Isaman nfs_release_request(req);
6361d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req);
637fad61490STrond Myklebust }
638fad61490STrond Myklebust
639133a48abSTrond Myklebust if (nfs_commit_end(cinfo.mds))
6404d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq);
6411763da12SFred Isaman }
6421763da12SFred Isaman
nfs_direct_resched_write(struct nfs_commit_info * cinfo,struct nfs_page * req)643b20135d0STrond Myklebust static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
644b20135d0STrond Myklebust struct nfs_page *req)
6451763da12SFred Isaman {
646b20135d0STrond Myklebust struct nfs_direct_req *dreq = cinfo->dreq;
647b20135d0STrond Myklebust
6488efc4bbeSJeff Layton trace_nfs_direct_resched_write(dreq);
6498efc4bbeSJeff Layton
650b20135d0STrond Myklebust spin_lock(&dreq->lock);
651fb5f7f20STrond Myklebust if (dreq->flags != NFS_ODIRECT_DONE)
652b20135d0STrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
653b20135d0STrond Myklebust spin_unlock(&dreq->lock);
654b20135d0STrond Myklebust nfs_mark_request_commit(req, NULL, cinfo, 0);
6551763da12SFred Isaman }
6561763da12SFred Isaman
6571763da12SFred Isaman static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
6581763da12SFred Isaman .completion = nfs_direct_commit_complete,
659b20135d0STrond Myklebust .resched_write = nfs_direct_resched_write,
660fad61490STrond Myklebust };
661fad61490STrond Myklebust
nfs_direct_commit_schedule(struct nfs_direct_req * dreq)662fad61490STrond Myklebust static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
663fad61490STrond Myklebust {
6641763da12SFred Isaman int res;
6651763da12SFred Isaman struct nfs_commit_info cinfo;
6661763da12SFred Isaman LIST_HEAD(mds_list);
667fad61490STrond Myklebust
6681763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq);
669*e25447c3SJosef Bacik nfs_commit_begin(cinfo.mds);
6701763da12SFred Isaman nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
6711763da12SFred Isaman res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
672*e25447c3SJosef Bacik if (res < 0) { /* res == -ENOMEM */
673*e25447c3SJosef Bacik spin_lock(&dreq->lock);
674*e25447c3SJosef Bacik if (dreq->flags == 0)
675*e25447c3SJosef Bacik dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
676*e25447c3SJosef Bacik spin_unlock(&dreq->lock);
677*e25447c3SJosef Bacik }
678*e25447c3SJosef Bacik if (nfs_commit_end(cinfo.mds))
679*e25447c3SJosef Bacik nfs_direct_write_complete(dreq);
6801da177e4SLinus Torvalds }
6811da177e4SLinus Torvalds
nfs_direct_write_clear_reqs(struct nfs_direct_req * dreq)682fb5f7f20STrond Myklebust static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
683fb5f7f20STrond Myklebust {
684fb5f7f20STrond Myklebust struct nfs_commit_info cinfo;
685fb5f7f20STrond Myklebust struct nfs_page *req;
686fb5f7f20STrond Myklebust LIST_HEAD(reqs);
687fb5f7f20STrond Myklebust
688fb5f7f20STrond Myklebust nfs_init_cinfo_from_dreq(&cinfo, dreq);
689fb5f7f20STrond Myklebust nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
690fb5f7f20STrond Myklebust
691fb5f7f20STrond Myklebust while (!list_empty(&reqs)) {
692fb5f7f20STrond Myklebust req = nfs_list_entry(reqs.next);
693fb5f7f20STrond Myklebust nfs_list_remove_request(req);
6948982f7afSTrond Myklebust nfs_direct_truncate_request(dreq, req);
695f02cec9dSTrond Myklebust nfs_release_request(req);
696fb5f7f20STrond Myklebust nfs_unlock_and_release_request(req);
697fb5f7f20STrond Myklebust }
698fb5f7f20STrond Myklebust }
699fb5f7f20STrond Myklebust
nfs_direct_write_schedule_work(struct work_struct * work)7001763da12SFred Isaman static void nfs_direct_write_schedule_work(struct work_struct *work)
7011da177e4SLinus Torvalds {
7021763da12SFred Isaman struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
703fad61490STrond Myklebust int flags = dreq->flags;
7041da177e4SLinus Torvalds
705fad61490STrond Myklebust dreq->flags = 0;
706fad61490STrond Myklebust switch (flags) {
707fad61490STrond Myklebust case NFS_ODIRECT_DO_COMMIT:
708fad61490STrond Myklebust nfs_direct_commit_schedule(dreq);
7091da177e4SLinus Torvalds break;
710fad61490STrond Myklebust case NFS_ODIRECT_RESCHED_WRITES:
711fad61490STrond Myklebust nfs_direct_write_reschedule(dreq);
7121da177e4SLinus Torvalds break;
7131da177e4SLinus Torvalds default:
714fb5f7f20STrond Myklebust nfs_direct_write_clear_reqs(dreq);
715f7b5c340STrond Myklebust nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
716f7b5c340STrond Myklebust nfs_direct_complete(dreq);
7171da177e4SLinus Torvalds }
718fad61490STrond Myklebust }
719fad61490STrond Myklebust
nfs_direct_write_complete(struct nfs_direct_req * dreq)7204d3b55d3SAnna Schumaker static void nfs_direct_write_complete(struct nfs_direct_req *dreq)
721fad61490STrond Myklebust {
7228efc4bbeSJeff Layton trace_nfs_direct_write_complete(dreq);
72346483c2eSNeilBrown queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */
724fad61490STrond Myklebust }
7251763da12SFred Isaman
nfs_direct_write_completion(struct nfs_pgio_header * hdr)7261763da12SFred Isaman static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
7271763da12SFred Isaman {
7281763da12SFred Isaman struct nfs_direct_req *dreq = hdr->dreq;
7291763da12SFred Isaman struct nfs_commit_info cinfo;
7301763da12SFred Isaman struct nfs_page *req = nfs_list_entry(hdr->pages.next);
7313731d44bSTrond Myklebust int flags = NFS_ODIRECT_DONE;
7321763da12SFred Isaman
7338efc4bbeSJeff Layton trace_nfs_direct_write_completion(dreq);
7348efc4bbeSJeff Layton
7351763da12SFred Isaman nfs_init_cinfo_from_dreq(&cinfo, dreq);
7361763da12SFred Isaman
7371763da12SFred Isaman spin_lock(&dreq->lock);
738eb2c50daSTrond Myklebust if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
739eb2c50daSTrond Myklebust spin_unlock(&dreq->lock);
740eb2c50daSTrond Myklebust goto out_put;
741eb2c50daSTrond Myklebust }
742eb2c50daSTrond Myklebust
743031d73edSTrond Myklebust nfs_direct_count_bytes(dreq, hdr);
7448982f7afSTrond Myklebust if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) &&
7458982f7afSTrond Myklebust !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
7463731d44bSTrond Myklebust if (!dreq->flags)
7471763da12SFred Isaman dreq->flags = NFS_ODIRECT_DO_COMMIT;
7483731d44bSTrond Myklebust flags = dreq->flags;
7491763da12SFred Isaman }
7501763da12SFred Isaman spin_unlock(&dreq->lock);
7511763da12SFred Isaman
7521763da12SFred Isaman while (!list_empty(&hdr->pages)) {
7532bfc6e56SWeston Andros Adamson
7541763da12SFred Isaman req = nfs_list_entry(hdr->pages.next);
7551763da12SFred Isaman nfs_list_remove_request(req);
7563731d44bSTrond Myklebust if (flags == NFS_ODIRECT_DO_COMMIT) {
75704277086STrond Myklebust kref_get(&req->wb_kref);
758ba838a75SChuck Lever memcpy(&req->wb_verf, &hdr->verf.verifier,
759ba838a75SChuck Lever sizeof(req->wb_verf));
760b57ff130SWeston Andros Adamson nfs_mark_request_commit(req, hdr->lseg, &cinfo,
761b57ff130SWeston Andros Adamson hdr->ds_commit_idx);
7623731d44bSTrond Myklebust } else if (flags == NFS_ODIRECT_RESCHED_WRITES) {
7633731d44bSTrond Myklebust kref_get(&req->wb_kref);
7643731d44bSTrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0);
7651763da12SFred Isaman }
7661d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req);
7671763da12SFred Isaman }
7681763da12SFred Isaman
7691763da12SFred Isaman out_put:
7701763da12SFred Isaman if (put_dreq(dreq))
7714d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq);
7721763da12SFred Isaman hdr->release(hdr);
7731763da12SFred Isaman }
7741763da12SFred Isaman
nfs_write_sync_pgio_error(struct list_head * head,int error)775df3accb8STrond Myklebust static void nfs_write_sync_pgio_error(struct list_head *head, int error)
7763e9e0ca3STrond Myklebust {
7773e9e0ca3STrond Myklebust struct nfs_page *req;
7783e9e0ca3STrond Myklebust
7793e9e0ca3STrond Myklebust while (!list_empty(head)) {
7803e9e0ca3STrond Myklebust req = nfs_list_entry(head->next);
7813e9e0ca3STrond Myklebust nfs_list_remove_request(req);
7821d1afcbcSTrond Myklebust nfs_unlock_and_release_request(req);
7833e9e0ca3STrond Myklebust }
7843e9e0ca3STrond Myklebust }
7853e9e0ca3STrond Myklebust
nfs_direct_write_reschedule_io(struct nfs_pgio_header * hdr)786dc602dd7STrond Myklebust static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
787dc602dd7STrond Myklebust {
788dc602dd7STrond Myklebust struct nfs_direct_req *dreq = hdr->dreq;
789b11243f7STrond Myklebust struct nfs_page *req;
790b11243f7STrond Myklebust struct nfs_commit_info cinfo;
791dc602dd7STrond Myklebust
7928efc4bbeSJeff Layton trace_nfs_direct_write_reschedule_io(dreq);
7938efc4bbeSJeff Layton
794b11243f7STrond Myklebust nfs_init_cinfo_from_dreq(&cinfo, dreq);
795dc602dd7STrond Myklebust spin_lock(&dreq->lock);
796b11243f7STrond Myklebust if (dreq->error == 0)
797dc602dd7STrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
798b11243f7STrond Myklebust set_bit(NFS_IOHDR_REDO, &hdr->flags);
799dc602dd7STrond Myklebust spin_unlock(&dreq->lock);
800b11243f7STrond Myklebust while (!list_empty(&hdr->pages)) {
801b11243f7STrond Myklebust req = nfs_list_entry(hdr->pages.next);
802b11243f7STrond Myklebust nfs_list_remove_request(req);
803b11243f7STrond Myklebust nfs_unlock_request(req);
804b11243f7STrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0);
805b11243f7STrond Myklebust }
806dc602dd7STrond Myklebust }
807dc602dd7STrond Myklebust
8081763da12SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
8093e9e0ca3STrond Myklebust .error_cleanup = nfs_write_sync_pgio_error,
8101763da12SFred Isaman .init_hdr = nfs_direct_pgio_init,
8111763da12SFred Isaman .completion = nfs_direct_write_completion,
812dc602dd7STrond Myklebust .reschedule_io = nfs_direct_write_reschedule_io,
8131763da12SFred Isaman };
8141763da12SFred Isaman
81591f79c43SAl Viro
81691f79c43SAl Viro /*
81791f79c43SAl Viro * NB: Return the value of the first error return code. Subsequent
81891f79c43SAl Viro * errors after the first one are ignored.
81991f79c43SAl Viro */
82091f79c43SAl Viro /*
82191f79c43SAl Viro * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
82291f79c43SAl Viro * operation. If nfs_writedata_alloc() or get_user_pages() fails,
82391f79c43SAl Viro * bail and stop sending more writes. Write length accounting is
82491f79c43SAl Viro * handled automatically by nfs_direct_write_result(). Otherwise, if
82591f79c43SAl Viro * no requests have been sent, just return an error.
82691f79c43SAl Viro */
nfs_direct_write_schedule_iovec(struct nfs_direct_req * dreq,struct iov_iter * iter,loff_t pos,int ioflags)82719f73787SChuck Lever static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
828619d30b4SAl Viro struct iov_iter *iter,
829c265de25SNeilBrown loff_t pos, int ioflags)
83019f73787SChuck Lever {
8311763da12SFred Isaman struct nfs_pageio_descriptor desc;
8321d59d61fSTrond Myklebust struct inode *inode = dreq->inode;
833954998b6STrond Myklebust struct nfs_commit_info cinfo;
83419f73787SChuck Lever ssize_t result = 0;
83519f73787SChuck Lever size_t requested_bytes = 0;
83691f79c43SAl Viro size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
837954998b6STrond Myklebust bool defer = false;
83819f73787SChuck Lever
8398efc4bbeSJeff Layton trace_nfs_direct_write_schedule_iovec(dreq);
8408efc4bbeSJeff Layton
841c265de25SNeilBrown nfs_pageio_init_write(&desc, inode, ioflags, false,
8421763da12SFred Isaman &nfs_direct_write_completion_ops);
8431763da12SFred Isaman desc.pg_dreq = dreq;
84419f73787SChuck Lever get_dreq(dreq);
845fe0f07d0SJens Axboe inode_dio_begin(inode);
84619f73787SChuck Lever
84791f79c43SAl Viro NFS_I(inode)->write_io += iov_iter_count(iter);
84891f79c43SAl Viro while (iov_iter_count(iter)) {
84991f79c43SAl Viro struct page **pagevec;
85091f79c43SAl Viro size_t bytes;
85191f79c43SAl Viro size_t pgbase;
85291f79c43SAl Viro unsigned npages, i;
85391f79c43SAl Viro
8541ef255e2SAl Viro result = iov_iter_get_pages_alloc2(iter, &pagevec,
85591f79c43SAl Viro wsize, &pgbase);
85619f73787SChuck Lever if (result < 0)
85719f73787SChuck Lever break;
85891f79c43SAl Viro
85991f79c43SAl Viro bytes = result;
86091f79c43SAl Viro npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
86191f79c43SAl Viro for (i = 0; i < npages; i++) {
86291f79c43SAl Viro struct nfs_page *req;
86391f79c43SAl Viro unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
86491f79c43SAl Viro
86570e9db69STrond Myklebust req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
86670e9db69STrond Myklebust pgbase, pos, req_len);
86791f79c43SAl Viro if (IS_ERR(req)) {
86891f79c43SAl Viro result = PTR_ERR(req);
86919f73787SChuck Lever break;
87091f79c43SAl Viro }
8710a00b77bSWeston Andros Adamson
872d600ad1fSPeng Tao if (desc.pg_error < 0) {
873d600ad1fSPeng Tao nfs_free_request(req);
874d600ad1fSPeng Tao result = desc.pg_error;
875d600ad1fSPeng Tao break;
876d600ad1fSPeng Tao }
8770a00b77bSWeston Andros Adamson
87891f79c43SAl Viro pgbase = 0;
87991f79c43SAl Viro bytes -= req_len;
88091f79c43SAl Viro requested_bytes += req_len;
88191f79c43SAl Viro pos += req_len;
88291f79c43SAl Viro dreq->bytes_left -= req_len;
883954998b6STrond Myklebust
884954998b6STrond Myklebust if (defer) {
885954998b6STrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0);
886954998b6STrond Myklebust continue;
887954998b6STrond Myklebust }
888954998b6STrond Myklebust
889954998b6STrond Myklebust nfs_lock_request(req);
890954998b6STrond Myklebust if (nfs_pageio_add_request(&desc, req))
891954998b6STrond Myklebust continue;
892954998b6STrond Myklebust
893954998b6STrond Myklebust /* Exit on hard errors */
894954998b6STrond Myklebust if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) {
895954998b6STrond Myklebust result = desc.pg_error;
896954998b6STrond Myklebust nfs_unlock_and_release_request(req);
897954998b6STrond Myklebust break;
898954998b6STrond Myklebust }
899954998b6STrond Myklebust
900954998b6STrond Myklebust /* If the error is soft, defer remaining requests */
901954998b6STrond Myklebust nfs_init_cinfo_from_dreq(&cinfo, dreq);
9027c633932STrond Myklebust spin_lock(&dreq->lock);
903954998b6STrond Myklebust dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
9047c633932STrond Myklebust spin_unlock(&dreq->lock);
905954998b6STrond Myklebust nfs_unlock_request(req);
906954998b6STrond Myklebust nfs_mark_request_commit(req, NULL, &cinfo, 0);
907954998b6STrond Myklebust desc.pg_error = 0;
908954998b6STrond Myklebust defer = true;
90991f79c43SAl Viro }
91091f79c43SAl Viro nfs_direct_release_pages(pagevec, npages);
91191f79c43SAl Viro kvfree(pagevec);
91291f79c43SAl Viro if (result < 0)
91391f79c43SAl Viro break;
91419f73787SChuck Lever }
9151763da12SFred Isaman nfs_pageio_complete(&desc);
91619f73787SChuck Lever
917839f7ad6SChuck Lever /*
918839f7ad6SChuck Lever * If no bytes were started, return the error, and let the
919839f7ad6SChuck Lever * generic layer handle the completion.
920839f7ad6SChuck Lever */
921839f7ad6SChuck Lever if (requested_bytes == 0) {
922d03727b2SOlga Kornievskaia inode_dio_end(inode);
92365caafd0SOlga Kornievskaia nfs_direct_req_release(dreq);
924839f7ad6SChuck Lever return result < 0 ? result : -EIO;
925839f7ad6SChuck Lever }
926839f7ad6SChuck Lever
92719f73787SChuck Lever if (put_dreq(dreq))
9284d3b55d3SAnna Schumaker nfs_direct_write_complete(dreq);
92985128b2bSAl Viro return requested_bytes;
93019f73787SChuck Lever }
93119f73787SChuck Lever
9321da177e4SLinus Torvalds /**
9331da177e4SLinus Torvalds * nfs_file_direct_write - file direct write operation for NFS files
9341da177e4SLinus Torvalds * @iocb: target I/O control block
935619d30b4SAl Viro * @iter: vector of user buffers from which to write data
93664158668SNeilBrown * @swap: flag indicating this is swap IO, not O_DIRECT IO
9371da177e4SLinus Torvalds *
9381da177e4SLinus Torvalds * We use this function for direct writes instead of calling
9391da177e4SLinus Torvalds * generic_file_aio_write() in order to avoid taking the inode
9401da177e4SLinus Torvalds * semaphore and updating the i_size. The NFS server will set
9411da177e4SLinus Torvalds * the new i_size and this client must read the updated size
9421da177e4SLinus Torvalds * back into its cache. We let the server do generic write
9431da177e4SLinus Torvalds * parameter checking and report problems.
9441da177e4SLinus Torvalds *
9451da177e4SLinus Torvalds * We eliminate local atime updates, see direct read above.
9461da177e4SLinus Torvalds *
9471da177e4SLinus Torvalds * We avoid unnecessary page cache invalidations for normal cached
9481da177e4SLinus Torvalds * readers of this file.
9491da177e4SLinus Torvalds *
9501da177e4SLinus Torvalds * Note that O_APPEND is not supported for NFS direct writes, as there
9511da177e4SLinus Torvalds * is no atomic O_APPEND write facility in the NFS protocol.
9521da177e4SLinus Torvalds */
nfs_file_direct_write(struct kiocb * iocb,struct iov_iter * iter,bool swap)95364158668SNeilBrown ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
95464158668SNeilBrown bool swap)
9551da177e4SLinus Torvalds {
9569a74a2b8SColin Ian King ssize_t result, requested;
95789698b24STrond Myklebust size_t count;
9581da177e4SLinus Torvalds struct file *file = iocb->ki_filp;
9591da177e4SLinus Torvalds struct address_space *mapping = file->f_mapping;
96022cd1bf1SChristoph Hellwig struct inode *inode = mapping->host;
96122cd1bf1SChristoph Hellwig struct nfs_direct_req *dreq;
96222cd1bf1SChristoph Hellwig struct nfs_lock_context *l_ctx;
96365a4a1caSAl Viro loff_t pos, end;
964c216fd70SChuck Lever
9656de1472fSAl Viro dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
9663309dd04SAl Viro file, iov_iter_count(iter), (long long) iocb->ki_pos);
967027445c3SBadari Pulavarty
96864158668SNeilBrown if (swap)
96964158668SNeilBrown /* bypass generic checks */
97064158668SNeilBrown result = iov_iter_count(iter);
97164158668SNeilBrown else
97289698b24STrond Myklebust result = generic_write_checks(iocb, iter);
97389698b24STrond Myklebust if (result <= 0)
97489698b24STrond Myklebust return result;
97589698b24STrond Myklebust count = result;
97689698b24STrond Myklebust nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
9773309dd04SAl Viro
9783309dd04SAl Viro pos = iocb->ki_pos;
97909cbfeafSKirill A. Shutemov end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
980ce1a8e67SChuck Lever
98189698b24STrond Myklebust task_io_account_write(count);
9827ec10f26SKonstantin Khlebnikov
98322cd1bf1SChristoph Hellwig result = -ENOMEM;
98422cd1bf1SChristoph Hellwig dreq = nfs_direct_req_alloc();
98522cd1bf1SChristoph Hellwig if (!dreq)
986a5864c99STrond Myklebust goto out;
98722cd1bf1SChristoph Hellwig
98822cd1bf1SChristoph Hellwig dreq->inode = inode;
98989698b24STrond Myklebust dreq->bytes_left = dreq->max_count = count;
9905fadeb47SPeng Tao dreq->io_start = pos;
99122cd1bf1SChristoph Hellwig dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
99222cd1bf1SChristoph Hellwig l_ctx = nfs_get_lock_context(dreq->ctx);
99322cd1bf1SChristoph Hellwig if (IS_ERR(l_ctx)) {
99422cd1bf1SChristoph Hellwig result = PTR_ERR(l_ctx);
9958605cf0eSMisono Tomohiro nfs_direct_req_release(dreq);
99622cd1bf1SChristoph Hellwig goto out_release;
99722cd1bf1SChristoph Hellwig }
99822cd1bf1SChristoph Hellwig dreq->l_ctx = l_ctx;
99922cd1bf1SChristoph Hellwig if (!is_sync_kiocb(iocb))
100022cd1bf1SChristoph Hellwig dreq->iocb = iocb;
10019c455a8cSTrond Myklebust pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode);
100222cd1bf1SChristoph Hellwig
100364158668SNeilBrown if (swap) {
1004c265de25SNeilBrown requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
1005c265de25SNeilBrown FLUSH_STABLE);
100664158668SNeilBrown } else {
1007a5864c99STrond Myklebust nfs_start_io_direct(inode);
1008a5864c99STrond Myklebust
1009c265de25SNeilBrown requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
1010c265de25SNeilBrown FLUSH_COND_STABLE);
1011a9ab5e84SChristoph Hellwig
1012a9ab5e84SChristoph Hellwig if (mapping->nrpages) {
1013a9ab5e84SChristoph Hellwig invalidate_inode_pages2_range(mapping,
101409cbfeafSKirill A. Shutemov pos >> PAGE_SHIFT, end);
1015a9ab5e84SChristoph Hellwig }
1016a9ab5e84SChristoph Hellwig
1017a5864c99STrond Myklebust nfs_end_io_direct(inode);
101864158668SNeilBrown }
1019a9ab5e84SChristoph Hellwig
102085128b2bSAl Viro if (requested > 0) {
102122cd1bf1SChristoph Hellwig result = nfs_direct_wait(dreq);
102222cd1bf1SChristoph Hellwig if (result > 0) {
102385128b2bSAl Viro requested -= result;
102422cd1bf1SChristoph Hellwig iocb->ki_pos = pos + result;
1025e2592217SChristoph Hellwig /* XXX: should check the generic_write_sync retval */
1026e2592217SChristoph Hellwig generic_write_sync(iocb, result);
10271763da12SFred Isaman }
102885128b2bSAl Viro iov_iter_revert(iter, requested);
102985128b2bSAl Viro } else {
103085128b2bSAl Viro result = requested;
103122cd1bf1SChristoph Hellwig }
1032a6b5a28eSDave Wysochanski nfs_fscache_invalidate(inode, FSCACHE_INVAL_DIO_WRITE);
103322cd1bf1SChristoph Hellwig out_release:
103422cd1bf1SChristoph Hellwig nfs_direct_req_release(dreq);
1035a5864c99STrond Myklebust out:
103622cd1bf1SChristoph Hellwig return result;
10371da177e4SLinus Torvalds }
10381da177e4SLinus Torvalds
103988467055SChuck Lever /**
104088467055SChuck Lever * nfs_init_directcache - create a slab cache for nfs_direct_req structures
104188467055SChuck Lever *
104288467055SChuck Lever */
nfs_init_directcache(void)1043f7b422b1SDavid Howells int __init nfs_init_directcache(void)
10441da177e4SLinus Torvalds {
10451da177e4SLinus Torvalds nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
10461da177e4SLinus Torvalds sizeof(struct nfs_direct_req),
1047fffb60f9SPaul Jackson 0, (SLAB_RECLAIM_ACCOUNT|
1048fffb60f9SPaul Jackson SLAB_MEM_SPREAD),
104920c2df83SPaul Mundt NULL);
10501da177e4SLinus Torvalds if (nfs_direct_cachep == NULL)
10511da177e4SLinus Torvalds return -ENOMEM;
10521da177e4SLinus Torvalds
10531da177e4SLinus Torvalds return 0;
10541da177e4SLinus Torvalds }
10551da177e4SLinus Torvalds
105688467055SChuck Lever /**
1057f7b422b1SDavid Howells * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures
105888467055SChuck Lever *
105988467055SChuck Lever */
nfs_destroy_directcache(void)1060266bee88SDavid Brownell void nfs_destroy_directcache(void)
10611da177e4SLinus Torvalds {
10621a1d92c1SAlexey Dobriyan kmem_cache_destroy(nfs_direct_cachep);
10631da177e4SLinus Torvalds }
1064