xref: /openbmc/linux/fs/nfs/direct.c (revision b193a78ddb5ee7dba074d3f28dc050069ba083c0)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * linux/fs/nfs/direct.c
41da177e4SLinus Torvalds  *
51da177e4SLinus Torvalds  * Copyright (C) 2003 by Chuck Lever <cel@netapp.com>
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  * High-performance uncached I/O for the Linux NFS client
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  * There are important applications whose performance or correctness
101da177e4SLinus Torvalds  * depends on uncached access to file data.  Database clusters
111da177e4SLinus Torvalds  * (multiple copies of the same instance running on separate hosts)
121da177e4SLinus Torvalds  * implement their own cache coherency protocol that subsumes file
131da177e4SLinus Torvalds  * system cache protocols.  Applications that process datasets
141da177e4SLinus Torvalds  * considerably larger than the client's memory do not always benefit
151da177e4SLinus Torvalds  * from a local cache.  A streaming video server, for instance, has no
161da177e4SLinus Torvalds  * need to cache the contents of a file.
171da177e4SLinus Torvalds  *
181da177e4SLinus Torvalds  * When an application requests uncached I/O, all read and write requests
191da177e4SLinus Torvalds  * are made directly to the server; data stored or fetched via these
201da177e4SLinus Torvalds  * requests is not cached in the Linux page cache.  The client does not
211da177e4SLinus Torvalds  * correct unaligned requests from applications.  All requested bytes are
221da177e4SLinus Torvalds  * held on permanent storage before a direct write system call returns to
231da177e4SLinus Torvalds  * an application.
241da177e4SLinus Torvalds  *
251da177e4SLinus Torvalds  * Solaris implements an uncached I/O facility called directio() that
261da177e4SLinus Torvalds  * is used for backups and sequential I/O to very large files.  Solaris
271da177e4SLinus Torvalds  * also supports uncaching whole NFS partitions with "-o forcedirectio,"
281da177e4SLinus Torvalds  * an undocumented mount option.
291da177e4SLinus Torvalds  *
301da177e4SLinus Torvalds  * Designed by Jeff Kimmel, Chuck Lever, and Trond Myklebust, with
311da177e4SLinus Torvalds  * help from Andrew Morton.
321da177e4SLinus Torvalds  *
331da177e4SLinus Torvalds  * 18 Dec 2001	Initial implementation for 2.4  --cel
341da177e4SLinus Torvalds  * 08 Jul 2002	Version for 2.4.19, with bug fixes --trondmy
351da177e4SLinus Torvalds  * 08 Jun 2003	Port to 2.5 APIs  --cel
361da177e4SLinus Torvalds  * 31 Mar 2004	Handle direct I/O without VFS support  --cel
371da177e4SLinus Torvalds  * 15 Sep 2004	Parallel async reads  --cel
3888467055SChuck Lever  * 04 May 2005	support O_DIRECT with aio  --cel
391da177e4SLinus Torvalds  *
401da177e4SLinus Torvalds  */
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds #include <linux/errno.h>
431da177e4SLinus Torvalds #include <linux/sched.h>
441da177e4SLinus Torvalds #include <linux/kernel.h>
451da177e4SLinus Torvalds #include <linux/file.h>
461da177e4SLinus Torvalds #include <linux/pagemap.h>
471da177e4SLinus Torvalds #include <linux/kref.h>
485a0e3ad6STejun Heo #include <linux/slab.h>
497ec10f26SKonstantin Khlebnikov #include <linux/task_io_accounting_ops.h>
506296556fSPeng Tao #include <linux/module.h>
511da177e4SLinus Torvalds 
521da177e4SLinus Torvalds #include <linux/nfs_fs.h>
531da177e4SLinus Torvalds #include <linux/nfs_page.h>
541da177e4SLinus Torvalds #include <linux/sunrpc/clnt.h>
551da177e4SLinus Torvalds 
567c0f6ba6SLinus Torvalds #include <linux/uaccess.h>
5760063497SArun Sharma #include <linux/atomic.h>
581da177e4SLinus Torvalds 
598d5658c9STrond Myklebust #include "internal.h"
6091d5b470SChuck Lever #include "iostat.h"
611763da12SFred Isaman #include "pnfs.h"
62a6b5a28eSDave Wysochanski #include "fscache.h"
638efc4bbeSJeff Layton #include "nfstrace.h"
641da177e4SLinus Torvalds 
651da177e4SLinus Torvalds #define NFSDBG_FACILITY		NFSDBG_VFS
661da177e4SLinus Torvalds 
67e18b890bSChristoph Lameter static struct kmem_cache *nfs_direct_cachep;
681da177e4SLinus Torvalds 
691763da12SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
701763da12SFred Isaman static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
714d3b55d3SAnna Schumaker static void nfs_direct_write_complete(struct nfs_direct_req *dreq);
721763da12SFred Isaman static void nfs_direct_write_schedule_work(struct work_struct *work);
73607f31e8STrond Myklebust 
74607f31e8STrond Myklebust static inline void get_dreq(struct nfs_direct_req *dreq)
75607f31e8STrond Myklebust {
76607f31e8STrond Myklebust 	atomic_inc(&dreq->io_count);
77607f31e8STrond Myklebust }
78607f31e8STrond Myklebust 
79607f31e8STrond Myklebust static inline int put_dreq(struct nfs_direct_req *dreq)
80607f31e8STrond Myklebust {
81607f31e8STrond Myklebust 	return atomic_dec_and_test(&dreq->io_count);
82607f31e8STrond Myklebust }
83607f31e8STrond Myklebust 
840a00b77bSWeston Andros Adamson static void
85031d73edSTrond Myklebust nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
86031d73edSTrond Myklebust 			    const struct nfs_pgio_header *hdr,
87031d73edSTrond Myklebust 			    ssize_t dreq_len)
880a00b77bSWeston Andros Adamson {
89031d73edSTrond Myklebust 	if (!(test_bit(NFS_IOHDR_ERROR, &hdr->flags) ||
90031d73edSTrond Myklebust 	      test_bit(NFS_IOHDR_EOF, &hdr->flags)))
91031d73edSTrond Myklebust 		return;
92031d73edSTrond Myklebust 	if (dreq->max_count >= dreq_len) {
93031d73edSTrond Myklebust 		dreq->max_count = dreq_len;
94031d73edSTrond Myklebust 		if (dreq->count > dreq_len)
95031d73edSTrond Myklebust 			dreq->count = dreq_len;
965fadeb47SPeng Tao 	}
978982f7afSTrond Myklebust 
988982f7afSTrond Myklebust 	if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error)
998982f7afSTrond Myklebust 		dreq->error = hdr->error;
1000a00b77bSWeston Andros Adamson }
101031d73edSTrond Myklebust 
102031d73edSTrond Myklebust static void
103031d73edSTrond Myklebust nfs_direct_count_bytes(struct nfs_direct_req *dreq,
104031d73edSTrond Myklebust 		       const struct nfs_pgio_header *hdr)
105031d73edSTrond Myklebust {
106031d73edSTrond Myklebust 	loff_t hdr_end = hdr->io_start + hdr->good_bytes;
107031d73edSTrond Myklebust 	ssize_t dreq_len = 0;
108031d73edSTrond Myklebust 
109031d73edSTrond Myklebust 	if (hdr_end > dreq->io_start)
110031d73edSTrond Myklebust 		dreq_len = hdr_end - dreq->io_start;
111031d73edSTrond Myklebust 
112031d73edSTrond Myklebust 	nfs_direct_handle_truncated(dreq, hdr, dreq_len);
113031d73edSTrond Myklebust 
114031d73edSTrond Myklebust 	if (dreq_len > dreq->max_count)
115031d73edSTrond Myklebust 		dreq_len = dreq->max_count;
116031d73edSTrond Myklebust 
117031d73edSTrond Myklebust 	if (dreq->count < dreq_len)
118031d73edSTrond Myklebust 		dreq->count = dreq_len;
1191ccbad9fSPeng Tao }
1200a00b77bSWeston Andros Adamson 
1218982f7afSTrond Myklebust static void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
1228982f7afSTrond Myklebust 					struct nfs_page *req)
1238982f7afSTrond Myklebust {
1248982f7afSTrond Myklebust 	loff_t offs = req_offset(req);
1258982f7afSTrond Myklebust 	size_t req_start = (size_t)(offs - dreq->io_start);
1268982f7afSTrond Myklebust 
1278982f7afSTrond Myklebust 	if (req_start < dreq->max_count)
1288982f7afSTrond Myklebust 		dreq->max_count = req_start;
1298982f7afSTrond Myklebust 	if (req_start < dreq->count)
1308982f7afSTrond Myklebust 		dreq->count = req_start;
1318982f7afSTrond Myklebust }
1328982f7afSTrond Myklebust 
1331da177e4SLinus Torvalds /**
134eb79f3afSNeilBrown  * nfs_swap_rw - NFS address space operation for swap I/O
135b8a32e2bSChuck Lever  * @iocb: target I/O control block
13690090ae6SAl Viro  * @iter: I/O buffer
137b8a32e2bSChuck Lever  *
138eb79f3afSNeilBrown  * Perform IO to the swap-file.  This is much like direct IO.
1391da177e4SLinus Torvalds  */
140eb79f3afSNeilBrown int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
141b8a32e2bSChuck Lever {
142eb79f3afSNeilBrown 	ssize_t ret;
143ee8a1a8bSPeng Tao 
14466ee59afSChristoph Hellwig 	VM_BUG_ON(iov_iter_count(iter) != PAGE_SIZE);
145a564b8f0SMel Gorman 
1466f673763SOmar Sandoval 	if (iov_iter_rw(iter) == READ)
147eb79f3afSNeilBrown 		ret = nfs_file_direct_read(iocb, iter, true);
148eb79f3afSNeilBrown 	else
149eb79f3afSNeilBrown 		ret = nfs_file_direct_write(iocb, iter, true);
150eb79f3afSNeilBrown 	if (ret < 0)
151eb79f3afSNeilBrown 		return ret;
152eb79f3afSNeilBrown 	return 0;
153b8a32e2bSChuck Lever }
154b8a32e2bSChuck Lever 
155749e146eSChuck Lever static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
1569c93ab7dSChuck Lever {
157749e146eSChuck Lever 	unsigned int i;
158607f31e8STrond Myklebust 	for (i = 0; i < npages; i++)
15909cbfeafSKirill A. Shutemov 		put_page(pages[i]);
1606b45d858STrond Myklebust }
1616b45d858STrond Myklebust 
1621763da12SFred Isaman void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
1631763da12SFred Isaman 			      struct nfs_direct_req *dreq)
1641763da12SFred Isaman {
165fe238e60SDave Wysochanski 	cinfo->inode = dreq->inode;
1661763da12SFred Isaman 	cinfo->mds = &dreq->mds_cinfo;
1671763da12SFred Isaman 	cinfo->ds = &dreq->ds_cinfo;
1681763da12SFred Isaman 	cinfo->dreq = dreq;
1691763da12SFred Isaman 	cinfo->completion_ops = &nfs_direct_commit_completion_ops;
1701763da12SFred Isaman }
1711763da12SFred Isaman 
17293619e59SChuck Lever static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
1731da177e4SLinus Torvalds {
1741da177e4SLinus Torvalds 	struct nfs_direct_req *dreq;
1751da177e4SLinus Torvalds 
176292f3eeeSTrond Myklebust 	dreq = kmem_cache_zalloc(nfs_direct_cachep, GFP_KERNEL);
1771da177e4SLinus Torvalds 	if (!dreq)
1781da177e4SLinus Torvalds 		return NULL;
1791da177e4SLinus Torvalds 
1801da177e4SLinus Torvalds 	kref_init(&dreq->kref);
181607f31e8STrond Myklebust 	kref_get(&dreq->kref);
182d72b7a6bSTrond Myklebust 	init_completion(&dreq->completion);
1831763da12SFred Isaman 	INIT_LIST_HEAD(&dreq->mds_cinfo.list);
184c21e7168STrond Myklebust 	pnfs_init_ds_commit_info(&dreq->ds_cinfo);
1851763da12SFred Isaman 	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
18615ce4a0cSChuck Lever 	spin_lock_init(&dreq->lock);
18793619e59SChuck Lever 
18893619e59SChuck Lever 	return dreq;
18993619e59SChuck Lever }
19093619e59SChuck Lever 
191b4946ffbSTrond Myklebust static void nfs_direct_req_free(struct kref *kref)
1921da177e4SLinus Torvalds {
1931da177e4SLinus Torvalds 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
194a8881f5aSTrond Myklebust 
19518f41296STrond Myklebust 	pnfs_release_ds_info(&dreq->ds_cinfo, dreq->inode);
196f11ac8dbSTrond Myklebust 	if (dreq->l_ctx != NULL)
197f11ac8dbSTrond Myklebust 		nfs_put_lock_context(dreq->l_ctx);
198a8881f5aSTrond Myklebust 	if (dreq->ctx != NULL)
199a8881f5aSTrond Myklebust 		put_nfs_open_context(dreq->ctx);
2001da177e4SLinus Torvalds 	kmem_cache_free(nfs_direct_cachep, dreq);
2011da177e4SLinus Torvalds }
2021da177e4SLinus Torvalds 
203b4946ffbSTrond Myklebust static void nfs_direct_req_release(struct nfs_direct_req *dreq)
204b4946ffbSTrond Myklebust {
205b4946ffbSTrond Myklebust 	kref_put(&dreq->kref, nfs_direct_req_free);
206b4946ffbSTrond Myklebust }
207b4946ffbSTrond Myklebust 
2086296556fSPeng Tao ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq)
2096296556fSPeng Tao {
2106296556fSPeng Tao 	return dreq->bytes_left;
2116296556fSPeng Tao }
2126296556fSPeng Tao EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left);
2136296556fSPeng Tao 
214d4cc948bSChuck Lever /*
215bc0fb201SChuck Lever  * Collects and returns the final error value/byte-count.
216bc0fb201SChuck Lever  */
217bc0fb201SChuck Lever static ssize_t nfs_direct_wait(struct nfs_direct_req *dreq)
218bc0fb201SChuck Lever {
21915ce4a0cSChuck Lever 	ssize_t result = -EIOCBQUEUED;
220bc0fb201SChuck Lever 
221bc0fb201SChuck Lever 	/* Async requests don't wait here */
222bc0fb201SChuck Lever 	if (dreq->iocb)
223bc0fb201SChuck Lever 		goto out;
224bc0fb201SChuck Lever 
225150030b7SMatthew Wilcox 	result = wait_for_completion_killable(&dreq->completion);
226bc0fb201SChuck Lever 
227d2a7de0bSTrond Myklebust 	if (!result) {
228d2a7de0bSTrond Myklebust 		result = dreq->count;
229d2a7de0bSTrond Myklebust 		WARN_ON_ONCE(dreq->count < 0);
230d2a7de0bSTrond Myklebust 	}
231bc0fb201SChuck Lever 	if (!result)
23215ce4a0cSChuck Lever 		result = dreq->error;
233bc0fb201SChuck Lever 
234bc0fb201SChuck Lever out:
235bc0fb201SChuck Lever 	return (ssize_t) result;
236bc0fb201SChuck Lever }
237bc0fb201SChuck Lever 
238bc0fb201SChuck Lever /*
239607f31e8STrond Myklebust  * Synchronous I/O uses a stack-allocated iocb.  Thus we can't trust
240607f31e8STrond Myklebust  * the iocb is still valid here if this is a synchronous request.
24163ab46abSChuck Lever  */
242f7b5c340STrond Myklebust static void nfs_direct_complete(struct nfs_direct_req *dreq)
24363ab46abSChuck Lever {
2449811cd57SChristoph Hellwig 	struct inode *inode = dreq->inode;
2459811cd57SChristoph Hellwig 
24665caafd0SOlga Kornievskaia 	inode_dio_end(inode);
24765caafd0SOlga Kornievskaia 
2482a009ec9SChristoph Hellwig 	if (dreq->iocb) {
2492a009ec9SChristoph Hellwig 		long res = (long) dreq->error;
250d2a7de0bSTrond Myklebust 		if (dreq->count != 0) {
2512a009ec9SChristoph Hellwig 			res = (long) dreq->count;
252d2a7de0bSTrond Myklebust 			WARN_ON_ONCE(dreq->count < 0);
253d2a7de0bSTrond Myklebust 		}
2546b19b766SJens Axboe 		dreq->iocb->ki_complete(dreq->iocb, res);
255d72b7a6bSTrond Myklebust 	}
2562a009ec9SChristoph Hellwig 
257024de8f1SDaniel Wagner 	complete(&dreq->completion);
25863ab46abSChuck Lever 
259b4946ffbSTrond Myklebust 	nfs_direct_req_release(dreq);
26063ab46abSChuck Lever }
26163ab46abSChuck Lever 
262584aa810SFred Isaman static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
263fdd1e74cSTrond Myklebust {
264584aa810SFred Isaman 	unsigned long bytes = 0;
265584aa810SFred Isaman 	struct nfs_direct_req *dreq = hdr->dreq;
266fdd1e74cSTrond Myklebust 
26715ce4a0cSChuck Lever 	spin_lock(&dreq->lock);
268eb2c50daSTrond Myklebust 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
269eb2c50daSTrond Myklebust 		spin_unlock(&dreq->lock);
270eb2c50daSTrond Myklebust 		goto out_put;
271eb2c50daSTrond Myklebust 	}
272eb2c50daSTrond Myklebust 
273031d73edSTrond Myklebust 	nfs_direct_count_bytes(dreq, hdr);
27415ce4a0cSChuck Lever 	spin_unlock(&dreq->lock);
2751da177e4SLinus Torvalds 
276584aa810SFred Isaman 	while (!list_empty(&hdr->pages)) {
277584aa810SFred Isaman 		struct nfs_page *req = nfs_list_entry(hdr->pages.next);
278584aa810SFred Isaman 		struct page *page = req->wb_page;
279584aa810SFred Isaman 
280ad3cba22SDave Kleikamp 		if (!PageCompound(page) && bytes < hdr->good_bytes &&
281ad3cba22SDave Kleikamp 		    (dreq->flags == NFS_ODIRECT_SHOULD_DIRTY))
2824bd8b010STrond Myklebust 			set_page_dirty(page);
283584aa810SFred Isaman 		bytes += req->wb_bytes;
284584aa810SFred Isaman 		nfs_list_remove_request(req);
285beeb5338SAnna Schumaker 		nfs_release_request(req);
286584aa810SFred Isaman 	}
287584aa810SFred Isaman out_put:
288607f31e8STrond Myklebust 	if (put_dreq(dreq))
289f7b5c340STrond Myklebust 		nfs_direct_complete(dreq);
290584aa810SFred Isaman 	hdr->release(hdr);
2911da177e4SLinus Torvalds }
2921da177e4SLinus Torvalds 
293df3accb8STrond Myklebust static void nfs_read_sync_pgio_error(struct list_head *head, int error)
294cd841605SFred Isaman {
295584aa810SFred Isaman 	struct nfs_page *req;
296cd841605SFred Isaman 
297584aa810SFred Isaman 	while (!list_empty(head)) {
298584aa810SFred Isaman 		req = nfs_list_entry(head->next);
299584aa810SFred Isaman 		nfs_list_remove_request(req);
300584aa810SFred Isaman 		nfs_release_request(req);
301cd841605SFred Isaman 	}
302584aa810SFred Isaman }
303584aa810SFred Isaman 
304584aa810SFred Isaman static void nfs_direct_pgio_init(struct nfs_pgio_header *hdr)
305584aa810SFred Isaman {
306584aa810SFred Isaman 	get_dreq(hdr->dreq);
307584aa810SFred Isaman }
308584aa810SFred Isaman 
309584aa810SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
3103e9e0ca3STrond Myklebust 	.error_cleanup = nfs_read_sync_pgio_error,
311584aa810SFred Isaman 	.init_hdr = nfs_direct_pgio_init,
312584aa810SFred Isaman 	.completion = nfs_direct_read_completion,
313584aa810SFred Isaman };
314cd841605SFred Isaman 
315d4cc948bSChuck Lever /*
316607f31e8STrond Myklebust  * For each rsize'd chunk of the user's buffer, dispatch an NFS READ
317607f31e8STrond Myklebust  * operation.  If nfs_readdata_alloc() or get_user_pages() fails,
318607f31e8STrond Myklebust  * bail and stop sending more reads.  Read length accounting is
319607f31e8STrond Myklebust  * handled automatically by nfs_direct_read_result().  Otherwise, if
320607f31e8STrond Myklebust  * no requests have been sent, just return an error.
3211da177e4SLinus Torvalds  */
32291f79c43SAl Viro 
32391f79c43SAl Viro static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
32491f79c43SAl Viro 					      struct iov_iter *iter,
32591f79c43SAl Viro 					      loff_t pos)
3261da177e4SLinus Torvalds {
32791f79c43SAl Viro 	struct nfs_pageio_descriptor desc;
32891f79c43SAl Viro 	struct inode *inode = dreq->inode;
32991f79c43SAl Viro 	ssize_t result = -EINVAL;
33091f79c43SAl Viro 	size_t requested_bytes = 0;
33191f79c43SAl Viro 	size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
33282b145c5SChuck Lever 
33316b90578SLinus Torvalds 	nfs_pageio_init_read(&desc, dreq->inode, false,
33491f79c43SAl Viro 			     &nfs_direct_read_completion_ops);
33591f79c43SAl Viro 	get_dreq(dreq);
33691f79c43SAl Viro 	desc.pg_dreq = dreq;
337fe0f07d0SJens Axboe 	inode_dio_begin(inode);
33891f79c43SAl Viro 
33991f79c43SAl Viro 	while (iov_iter_count(iter)) {
34091f79c43SAl Viro 		struct page **pagevec;
3415dd602f2SChuck Lever 		size_t bytes;
34291f79c43SAl Viro 		size_t pgbase;
34391f79c43SAl Viro 		unsigned npages, i;
3441da177e4SLinus Torvalds 
3451ef255e2SAl Viro 		result = iov_iter_get_pages_alloc2(iter, &pagevec,
34691f79c43SAl Viro 						  rsize, &pgbase);
347584aa810SFred Isaman 		if (result < 0)
348749e146eSChuck Lever 			break;
349a564b8f0SMel Gorman 
35091f79c43SAl Viro 		bytes = result;
35191f79c43SAl Viro 		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
352584aa810SFred Isaman 		for (i = 0; i < npages; i++) {
353584aa810SFred Isaman 			struct nfs_page *req;
354bf5fc402STrond Myklebust 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
355584aa810SFred Isaman 			/* XXX do we need to do the eof zeroing found in async_filler? */
35670e9db69STrond Myklebust 			req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
35770e9db69STrond Myklebust 							pgbase, pos, req_len);
358584aa810SFred Isaman 			if (IS_ERR(req)) {
359584aa810SFred Isaman 				result = PTR_ERR(req);
360dbae4c73STrond Myklebust 				break;
361584aa810SFred Isaman 			}
36291f79c43SAl Viro 			if (!nfs_pageio_add_request(&desc, req)) {
36391f79c43SAl Viro 				result = desc.pg_error;
364584aa810SFred Isaman 				nfs_release_request(req);
365584aa810SFred Isaman 				break;
366584aa810SFred Isaman 			}
367584aa810SFred Isaman 			pgbase = 0;
368584aa810SFred Isaman 			bytes -= req_len;
36991f79c43SAl Viro 			requested_bytes += req_len;
370584aa810SFred Isaman 			pos += req_len;
37135754bc0SPeng Tao 			dreq->bytes_left -= req_len;
372584aa810SFred Isaman 		}
3736d74743bSTrond Myklebust 		nfs_direct_release_pages(pagevec, npages);
37491f79c43SAl Viro 		kvfree(pagevec);
37519f73787SChuck Lever 		if (result < 0)
37619f73787SChuck Lever 			break;
37719f73787SChuck Lever 	}
37819f73787SChuck Lever 
379584aa810SFred Isaman 	nfs_pageio_complete(&desc);
380584aa810SFred Isaman 
381839f7ad6SChuck Lever 	/*
382839f7ad6SChuck Lever 	 * If no bytes were started, return the error, and let the
383839f7ad6SChuck Lever 	 * generic layer handle the completion.
384839f7ad6SChuck Lever 	 */
385839f7ad6SChuck Lever 	if (requested_bytes == 0) {
386d03727b2SOlga Kornievskaia 		inode_dio_end(inode);
38765caafd0SOlga Kornievskaia 		nfs_direct_req_release(dreq);
388839f7ad6SChuck Lever 		return result < 0 ? result : -EIO;
389839f7ad6SChuck Lever 	}
390839f7ad6SChuck Lever 
39119f73787SChuck Lever 	if (put_dreq(dreq))
392f7b5c340STrond Myklebust 		nfs_direct_complete(dreq);
39385128b2bSAl Viro 	return requested_bytes;
39419f73787SChuck Lever }
39519f73787SChuck Lever 
39614a3ec79SChristoph Hellwig /**
39714a3ec79SChristoph Hellwig  * nfs_file_direct_read - file direct read operation for NFS files
39814a3ec79SChristoph Hellwig  * @iocb: target I/O control block
399619d30b4SAl Viro  * @iter: vector of user buffers into which to read data
40064158668SNeilBrown  * @swap: flag indicating this is swap IO, not O_DIRECT IO
40114a3ec79SChristoph Hellwig  *
40214a3ec79SChristoph Hellwig  * We use this function for direct reads instead of calling
40314a3ec79SChristoph Hellwig  * generic_file_aio_read() in order to avoid gfar's check to see if
40414a3ec79SChristoph Hellwig  * the request starts before the end of the file.  For that check
40514a3ec79SChristoph Hellwig  * to work, we must generate a GETATTR before each direct read, and
40614a3ec79SChristoph Hellwig  * even then there is a window between the GETATTR and the subsequent
40714a3ec79SChristoph Hellwig  * READ where the file size could change.  Our preference is simply
40814a3ec79SChristoph Hellwig  * to do all reads the application wants, and the server will take
40914a3ec79SChristoph Hellwig  * care of managing the end of file boundary.
41014a3ec79SChristoph Hellwig  *
41114a3ec79SChristoph Hellwig  * This function also eliminates unnecessarily updating the file's
41214a3ec79SChristoph Hellwig  * atime locally, as the NFS server sets the file's atime, and this
41314a3ec79SChristoph Hellwig  * client must read the updated atime from the server back into its
41414a3ec79SChristoph Hellwig  * cache.
41514a3ec79SChristoph Hellwig  */
41664158668SNeilBrown ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
41764158668SNeilBrown 			     bool swap)
4181da177e4SLinus Torvalds {
41914a3ec79SChristoph Hellwig 	struct file *file = iocb->ki_filp;
42014a3ec79SChristoph Hellwig 	struct address_space *mapping = file->f_mapping;
42114a3ec79SChristoph Hellwig 	struct inode *inode = mapping->host;
4221da177e4SLinus Torvalds 	struct nfs_direct_req *dreq;
423b3c54de6STrond Myklebust 	struct nfs_lock_context *l_ctx;
42486b93667SColin Ian King 	ssize_t result, requested;
425a6cbcd4aSAl Viro 	size_t count = iov_iter_count(iter);
42614a3ec79SChristoph Hellwig 	nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
42714a3ec79SChristoph Hellwig 
42814a3ec79SChristoph Hellwig 	dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
429c8b8e32dSChristoph Hellwig 		file, count, (long long) iocb->ki_pos);
43014a3ec79SChristoph Hellwig 
43114a3ec79SChristoph Hellwig 	result = 0;
43214a3ec79SChristoph Hellwig 	if (!count)
43314a3ec79SChristoph Hellwig 		goto out;
43414a3ec79SChristoph Hellwig 
43514a3ec79SChristoph Hellwig 	task_io_account_read(count);
43614a3ec79SChristoph Hellwig 
43714a3ec79SChristoph Hellwig 	result = -ENOMEM;
438607f31e8STrond Myklebust 	dreq = nfs_direct_req_alloc();
439f11ac8dbSTrond Myklebust 	if (dreq == NULL)
440a5864c99STrond Myklebust 		goto out;
4411da177e4SLinus Torvalds 
44291d5b470SChuck Lever 	dreq->inode = inode;
443ed3743a6SWeston Andros Adamson 	dreq->bytes_left = dreq->max_count = count;
444c8b8e32dSChristoph Hellwig 	dreq->io_start = iocb->ki_pos;
445cd3758e3STrond Myklebust 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
446b3c54de6STrond Myklebust 	l_ctx = nfs_get_lock_context(dreq->ctx);
447b3c54de6STrond Myklebust 	if (IS_ERR(l_ctx)) {
448b3c54de6STrond Myklebust 		result = PTR_ERR(l_ctx);
4498605cf0eSMisono Tomohiro 		nfs_direct_req_release(dreq);
450f11ac8dbSTrond Myklebust 		goto out_release;
451b3c54de6STrond Myklebust 	}
452b3c54de6STrond Myklebust 	dreq->l_ctx = l_ctx;
453487b8372SChuck Lever 	if (!is_sync_kiocb(iocb))
454487b8372SChuck Lever 		dreq->iocb = iocb;
4551da177e4SLinus Torvalds 
456fcb14cb1SAl Viro 	if (user_backed_iter(iter))
457ad3cba22SDave Kleikamp 		dreq->flags = NFS_ODIRECT_SHOULD_DIRTY;
458ad3cba22SDave Kleikamp 
45964158668SNeilBrown 	if (!swap)
460a5864c99STrond Myklebust 		nfs_start_io_direct(inode);
461a5864c99STrond Myklebust 
462619d30b4SAl Viro 	NFS_I(inode)->read_io += count;
46385128b2bSAl Viro 	requested = nfs_direct_read_schedule_iovec(dreq, iter, iocb->ki_pos);
464d0b9875dSChristoph Hellwig 
46564158668SNeilBrown 	if (!swap)
466a5864c99STrond Myklebust 		nfs_end_io_direct(inode);
467d0b9875dSChristoph Hellwig 
46885128b2bSAl Viro 	if (requested > 0) {
469bc0fb201SChuck Lever 		result = nfs_direct_wait(dreq);
47085128b2bSAl Viro 		if (result > 0) {
47185128b2bSAl Viro 			requested -= result;
472c8b8e32dSChristoph Hellwig 			iocb->ki_pos += result;
47314a3ec79SChristoph Hellwig 		}
47485128b2bSAl Viro 		iov_iter_revert(iter, requested);
47585128b2bSAl Viro 	} else {
47685128b2bSAl Viro 		result = requested;
47785128b2bSAl Viro 	}
478d0b9875dSChristoph Hellwig 
479f11ac8dbSTrond Myklebust out_release:
480b4946ffbSTrond Myklebust 	nfs_direct_req_release(dreq);
481f11ac8dbSTrond Myklebust out:
4821da177e4SLinus Torvalds 	return result;
4831da177e4SLinus Torvalds }
4841da177e4SLinus Torvalds 
48588975a55STrond Myklebust static void nfs_direct_add_page_head(struct list_head *list,
48688975a55STrond Myklebust 				     struct nfs_page *req)
48788975a55STrond Myklebust {
48888975a55STrond Myklebust 	struct nfs_page *head = req->wb_head;
48988975a55STrond Myklebust 
49088975a55STrond Myklebust 	if (!list_empty(&head->wb_list) || !nfs_lock_request(head))
49188975a55STrond Myklebust 		return;
49288975a55STrond Myklebust 	if (!list_empty(&head->wb_list)) {
49388975a55STrond Myklebust 		nfs_unlock_request(head);
49488975a55STrond Myklebust 		return;
49588975a55STrond Myklebust 	}
49688975a55STrond Myklebust 	list_add(&head->wb_list, list);
49788975a55STrond Myklebust 	kref_get(&head->wb_kref);
49888975a55STrond Myklebust 	kref_get(&head->wb_kref);
49988975a55STrond Myklebust }
50088975a55STrond Myklebust 
501*b193a78dSTrond Myklebust static void nfs_direct_join_group(struct list_head *list,
502*b193a78dSTrond Myklebust 				  struct nfs_commit_info *cinfo,
503*b193a78dSTrond Myklebust 				  struct inode *inode)
504ed5d588fSTrond Myklebust {
505be2fd156STrond Myklebust 	struct nfs_page *req, *subreq;
506ed5d588fSTrond Myklebust 
507ed5d588fSTrond Myklebust 	list_for_each_entry(req, list, wb_list) {
50888975a55STrond Myklebust 		if (req->wb_head != req) {
50988975a55STrond Myklebust 			nfs_direct_add_page_head(&req->wb_list, req);
510ed5d588fSTrond Myklebust 			continue;
51188975a55STrond Myklebust 		}
512be2fd156STrond Myklebust 		subreq = req->wb_this_page;
513be2fd156STrond Myklebust 		if (subreq == req)
514be2fd156STrond Myklebust 			continue;
515be2fd156STrond Myklebust 		do {
516be2fd156STrond Myklebust 			/*
517be2fd156STrond Myklebust 			 * Remove subrequests from this list before freeing
518be2fd156STrond Myklebust 			 * them in the call to nfs_join_page_group().
519be2fd156STrond Myklebust 			 */
520be2fd156STrond Myklebust 			if (!list_empty(&subreq->wb_list)) {
521be2fd156STrond Myklebust 				nfs_list_remove_request(subreq);
522be2fd156STrond Myklebust 				nfs_release_request(subreq);
523ed5d588fSTrond Myklebust 			}
524be2fd156STrond Myklebust 		} while ((subreq = subreq->wb_this_page) != req);
525*b193a78dSTrond Myklebust 		nfs_join_page_group(req, cinfo, inode);
526ed5d588fSTrond Myklebust 	}
527ed5d588fSTrond Myklebust }
528ed5d588fSTrond Myklebust 
529ed5d588fSTrond Myklebust static void
530085d1e33STom Haynes nfs_direct_write_scan_commit_list(struct inode *inode,
531085d1e33STom Haynes 				  struct list_head *list,
532085d1e33STom Haynes 				  struct nfs_commit_info *cinfo)
533085d1e33STom Haynes {
534e824f99aSTrond Myklebust 	mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
5359c455a8cSTrond Myklebust 	pnfs_recover_commit_reqs(list, cinfo);
536085d1e33STom Haynes 	nfs_scan_commit_list(&cinfo->mds->list, list, cinfo, 0);
537e824f99aSTrond Myklebust 	mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
538085d1e33STom Haynes }
539085d1e33STom Haynes 
540fad61490STrond Myklebust static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
5411da177e4SLinus Torvalds {
5421763da12SFred Isaman 	struct nfs_pageio_descriptor desc;
543954998b6STrond Myklebust 	struct nfs_page *req;
5441763da12SFred Isaman 	LIST_HEAD(reqs);
5451763da12SFred Isaman 	struct nfs_commit_info cinfo;
5461763da12SFred Isaman 
5471763da12SFred Isaman 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
548085d1e33STom Haynes 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
5491da177e4SLinus Torvalds 
550*b193a78dSTrond Myklebust 	nfs_direct_join_group(&reqs, &cinfo, dreq->inode);
551ed5d588fSTrond Myklebust 
552a5314a74STrond Myklebust 	nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
553607f31e8STrond Myklebust 	get_dreq(dreq);
5541da177e4SLinus Torvalds 
555a20c93e3SChristoph Hellwig 	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
5561763da12SFred Isaman 			      &nfs_direct_write_completion_ops);
5571763da12SFred Isaman 	desc.pg_dreq = dreq;
558607f31e8STrond Myklebust 
559954998b6STrond Myklebust 	while (!list_empty(&reqs)) {
560954998b6STrond Myklebust 		req = nfs_list_entry(reqs.next);
56133344e0fSTrond Myklebust 		/* Bump the transmission count */
56233344e0fSTrond Myklebust 		req->wb_nio++;
5631763da12SFred Isaman 		if (!nfs_pageio_add_request(&desc, req)) {
5647c633932STrond Myklebust 			spin_lock(&dreq->lock);
565954998b6STrond Myklebust 			if (dreq->error < 0) {
566954998b6STrond Myklebust 				desc.pg_error = dreq->error;
567954998b6STrond Myklebust 			} else if (desc.pg_error != -EAGAIN) {
5681763da12SFred Isaman 				dreq->flags = 0;
569954998b6STrond Myklebust 				if (!desc.pg_error)
570954998b6STrond Myklebust 					desc.pg_error = -EIO;
571d600ad1fSPeng Tao 				dreq->error = desc.pg_error;
572954998b6STrond Myklebust 			} else
573954998b6STrond Myklebust 				dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
5747c633932STrond Myklebust 			spin_unlock(&dreq->lock);
575954998b6STrond Myklebust 			break;
5761763da12SFred Isaman 		}
5775a695da2STrond Myklebust 		nfs_release_request(req);
5781763da12SFred Isaman 	}
5791763da12SFred Isaman 	nfs_pageio_complete(&desc);
580607f31e8STrond Myklebust 
581954998b6STrond Myklebust 	while (!list_empty(&reqs)) {
582954998b6STrond Myklebust 		req = nfs_list_entry(reqs.next);
5834035c248STrond Myklebust 		nfs_list_remove_request(req);
5841d1afcbcSTrond Myklebust 		nfs_unlock_and_release_request(req);
5858982f7afSTrond Myklebust 		if (desc.pg_error == -EAGAIN) {
586954998b6STrond Myklebust 			nfs_mark_request_commit(req, NULL, &cinfo, 0);
5878982f7afSTrond Myklebust 		} else {
5888982f7afSTrond Myklebust 			spin_lock(&dreq->lock);
5898982f7afSTrond Myklebust 			nfs_direct_truncate_request(dreq, req);
5908982f7afSTrond Myklebust 			spin_unlock(&dreq->lock);
591954998b6STrond Myklebust 			nfs_release_request(req);
5924035c248STrond Myklebust 		}
5938982f7afSTrond Myklebust 	}
594607f31e8STrond Myklebust 
595607f31e8STrond Myklebust 	if (put_dreq(dreq))
5964d3b55d3SAnna Schumaker 		nfs_direct_write_complete(dreq);
597fad61490STrond Myklebust }
5981da177e4SLinus Torvalds 
5991763da12SFred Isaman static void nfs_direct_commit_complete(struct nfs_commit_data *data)
600fad61490STrond Myklebust {
6011f28476dSTrond Myklebust 	const struct nfs_writeverf *verf = data->res.verf;
6020b7c0153SFred Isaman 	struct nfs_direct_req *dreq = data->dreq;
6031763da12SFred Isaman 	struct nfs_commit_info cinfo;
6041763da12SFred Isaman 	struct nfs_page *req;
605c9d8f89dSTrond Myklebust 	int status = data->task.tk_status;
606c9d8f89dSTrond Myklebust 
6078efc4bbeSJeff Layton 	trace_nfs_direct_commit_complete(dreq);
6088efc4bbeSJeff Layton 
609fb5f7f20STrond Myklebust 	if (status < 0) {
610fb5f7f20STrond Myklebust 		/* Errors in commit are fatal */
611fb5f7f20STrond Myklebust 		dreq->error = status;
612fb5f7f20STrond Myklebust 		dreq->flags = NFS_ODIRECT_DONE;
61355051c0cSJeff Layton 	} else {
614fb5f7f20STrond Myklebust 		status = dreq->error;
61555051c0cSJeff Layton 	}
616fb5f7f20STrond Myklebust 
6171763da12SFred Isaman 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
618fad61490STrond Myklebust 
6191763da12SFred Isaman 	while (!list_empty(&data->pages)) {
6201763da12SFred Isaman 		req = nfs_list_entry(data->pages.next);
6211763da12SFred Isaman 		nfs_list_remove_request(req);
6228982f7afSTrond Myklebust 		if (status < 0) {
6238982f7afSTrond Myklebust 			spin_lock(&dreq->lock);
6248982f7afSTrond Myklebust 			nfs_direct_truncate_request(dreq, req);
6258982f7afSTrond Myklebust 			spin_unlock(&dreq->lock);
6268982f7afSTrond Myklebust 			nfs_release_request(req);
6278982f7afSTrond Myklebust 		} else if (!nfs_write_match_verf(verf, req)) {
6281f28476dSTrond Myklebust 			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
62933344e0fSTrond Myklebust 			/*
63033344e0fSTrond Myklebust 			 * Despite the reboot, the write was successful,
63133344e0fSTrond Myklebust 			 * so reset wb_nio.
63233344e0fSTrond Myklebust 			 */
63333344e0fSTrond Myklebust 			req->wb_nio = 0;
634b57ff130SWeston Andros Adamson 			nfs_mark_request_commit(req, NULL, &cinfo, 0);
6358982f7afSTrond Myklebust 		} else
636906369e4SFred Isaman 			nfs_release_request(req);
6371d1afcbcSTrond Myklebust 		nfs_unlock_and_release_request(req);
638fad61490STrond Myklebust 	}
639fad61490STrond Myklebust 
640133a48abSTrond Myklebust 	if (nfs_commit_end(cinfo.mds))
6414d3b55d3SAnna Schumaker 		nfs_direct_write_complete(dreq);
6421763da12SFred Isaman }
6431763da12SFred Isaman 
644b20135d0STrond Myklebust static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
645b20135d0STrond Myklebust 		struct nfs_page *req)
6461763da12SFred Isaman {
647b20135d0STrond Myklebust 	struct nfs_direct_req *dreq = cinfo->dreq;
648b20135d0STrond Myklebust 
6498efc4bbeSJeff Layton 	trace_nfs_direct_resched_write(dreq);
6508efc4bbeSJeff Layton 
651b20135d0STrond Myklebust 	spin_lock(&dreq->lock);
652fb5f7f20STrond Myklebust 	if (dreq->flags != NFS_ODIRECT_DONE)
653b20135d0STrond Myklebust 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
654b20135d0STrond Myklebust 	spin_unlock(&dreq->lock);
655b20135d0STrond Myklebust 	nfs_mark_request_commit(req, NULL, cinfo, 0);
6561763da12SFred Isaman }
6571763da12SFred Isaman 
6581763da12SFred Isaman static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops = {
6591763da12SFred Isaman 	.completion = nfs_direct_commit_complete,
660b20135d0STrond Myklebust 	.resched_write = nfs_direct_resched_write,
661fad61490STrond Myklebust };
662fad61490STrond Myklebust 
663fad61490STrond Myklebust static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
664fad61490STrond Myklebust {
6651763da12SFred Isaman 	int res;
6661763da12SFred Isaman 	struct nfs_commit_info cinfo;
6671763da12SFred Isaman 	LIST_HEAD(mds_list);
668fad61490STrond Myklebust 
6691763da12SFred Isaman 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
6701763da12SFred Isaman 	nfs_scan_commit(dreq->inode, &mds_list, &cinfo);
6711763da12SFred Isaman 	res = nfs_generic_commit_list(dreq->inode, &mds_list, 0, &cinfo);
6721763da12SFred Isaman 	if (res < 0) /* res == -ENOMEM */
6731763da12SFred Isaman 		nfs_direct_write_reschedule(dreq);
6741da177e4SLinus Torvalds }
6751da177e4SLinus Torvalds 
676fb5f7f20STrond Myklebust static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
677fb5f7f20STrond Myklebust {
678fb5f7f20STrond Myklebust 	struct nfs_commit_info cinfo;
679fb5f7f20STrond Myklebust 	struct nfs_page *req;
680fb5f7f20STrond Myklebust 	LIST_HEAD(reqs);
681fb5f7f20STrond Myklebust 
682fb5f7f20STrond Myklebust 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
683fb5f7f20STrond Myklebust 	nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
684fb5f7f20STrond Myklebust 
685fb5f7f20STrond Myklebust 	while (!list_empty(&reqs)) {
686fb5f7f20STrond Myklebust 		req = nfs_list_entry(reqs.next);
687fb5f7f20STrond Myklebust 		nfs_list_remove_request(req);
6888982f7afSTrond Myklebust 		nfs_direct_truncate_request(dreq, req);
689f02cec9dSTrond Myklebust 		nfs_release_request(req);
690fb5f7f20STrond Myklebust 		nfs_unlock_and_release_request(req);
691fb5f7f20STrond Myklebust 	}
692fb5f7f20STrond Myklebust }
693fb5f7f20STrond Myklebust 
6941763da12SFred Isaman static void nfs_direct_write_schedule_work(struct work_struct *work)
6951da177e4SLinus Torvalds {
6961763da12SFred Isaman 	struct nfs_direct_req *dreq = container_of(work, struct nfs_direct_req, work);
697fad61490STrond Myklebust 	int flags = dreq->flags;
6981da177e4SLinus Torvalds 
699fad61490STrond Myklebust 	dreq->flags = 0;
700fad61490STrond Myklebust 	switch (flags) {
701fad61490STrond Myklebust 		case NFS_ODIRECT_DO_COMMIT:
702fad61490STrond Myklebust 			nfs_direct_commit_schedule(dreq);
7031da177e4SLinus Torvalds 			break;
704fad61490STrond Myklebust 		case NFS_ODIRECT_RESCHED_WRITES:
705fad61490STrond Myklebust 			nfs_direct_write_reschedule(dreq);
7061da177e4SLinus Torvalds 			break;
7071da177e4SLinus Torvalds 		default:
708fb5f7f20STrond Myklebust 			nfs_direct_write_clear_reqs(dreq);
709f7b5c340STrond Myklebust 			nfs_zap_mapping(dreq->inode, dreq->inode->i_mapping);
710f7b5c340STrond Myklebust 			nfs_direct_complete(dreq);
7111da177e4SLinus Torvalds 	}
712fad61490STrond Myklebust }
713fad61490STrond Myklebust 
7144d3b55d3SAnna Schumaker static void nfs_direct_write_complete(struct nfs_direct_req *dreq)
715fad61490STrond Myklebust {
7168efc4bbeSJeff Layton 	trace_nfs_direct_write_complete(dreq);
71746483c2eSNeilBrown 	queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */
718fad61490STrond Myklebust }
7191763da12SFred Isaman 
7201763da12SFred Isaman static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
7211763da12SFred Isaman {
7221763da12SFred Isaman 	struct nfs_direct_req *dreq = hdr->dreq;
7231763da12SFred Isaman 	struct nfs_commit_info cinfo;
7241763da12SFred Isaman 	struct nfs_page *req = nfs_list_entry(hdr->pages.next);
7253731d44bSTrond Myklebust 	int flags = NFS_ODIRECT_DONE;
7261763da12SFred Isaman 
7278efc4bbeSJeff Layton 	trace_nfs_direct_write_completion(dreq);
7288efc4bbeSJeff Layton 
7291763da12SFred Isaman 	nfs_init_cinfo_from_dreq(&cinfo, dreq);
7301763da12SFred Isaman 
7311763da12SFred Isaman 	spin_lock(&dreq->lock);
732eb2c50daSTrond Myklebust 	if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) {
733eb2c50daSTrond Myklebust 		spin_unlock(&dreq->lock);
734eb2c50daSTrond Myklebust 		goto out_put;
735eb2c50daSTrond Myklebust 	}
736eb2c50daSTrond Myklebust 
737031d73edSTrond Myklebust 	nfs_direct_count_bytes(dreq, hdr);
7388982f7afSTrond Myklebust 	if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) &&
7398982f7afSTrond Myklebust 	    !test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
7403731d44bSTrond Myklebust 		if (!dreq->flags)
7411763da12SFred Isaman 			dreq->flags = NFS_ODIRECT_DO_COMMIT;
7423731d44bSTrond Myklebust 		flags = dreq->flags;
7431763da12SFred Isaman 	}
7441763da12SFred Isaman 	spin_unlock(&dreq->lock);
7451763da12SFred Isaman 
7461763da12SFred Isaman 	while (!list_empty(&hdr->pages)) {
7472bfc6e56SWeston Andros Adamson 
7481763da12SFred Isaman 		req = nfs_list_entry(hdr->pages.next);
7491763da12SFred Isaman 		nfs_list_remove_request(req);
7503731d44bSTrond Myklebust 		if (flags == NFS_ODIRECT_DO_COMMIT) {
75104277086STrond Myklebust 			kref_get(&req->wb_kref);
752ba838a75SChuck Lever 			memcpy(&req->wb_verf, &hdr->verf.verifier,
753ba838a75SChuck Lever 			       sizeof(req->wb_verf));
754b57ff130SWeston Andros Adamson 			nfs_mark_request_commit(req, hdr->lseg, &cinfo,
755b57ff130SWeston Andros Adamson 				hdr->ds_commit_idx);
7563731d44bSTrond Myklebust 		} else if (flags == NFS_ODIRECT_RESCHED_WRITES) {
7573731d44bSTrond Myklebust 			kref_get(&req->wb_kref);
7583731d44bSTrond Myklebust 			nfs_mark_request_commit(req, NULL, &cinfo, 0);
7591763da12SFred Isaman 		}
7601d1afcbcSTrond Myklebust 		nfs_unlock_and_release_request(req);
7611763da12SFred Isaman 	}
7621763da12SFred Isaman 
7631763da12SFred Isaman out_put:
7641763da12SFred Isaman 	if (put_dreq(dreq))
7654d3b55d3SAnna Schumaker 		nfs_direct_write_complete(dreq);
7661763da12SFred Isaman 	hdr->release(hdr);
7671763da12SFred Isaman }
7681763da12SFred Isaman 
769df3accb8STrond Myklebust static void nfs_write_sync_pgio_error(struct list_head *head, int error)
7703e9e0ca3STrond Myklebust {
7713e9e0ca3STrond Myklebust 	struct nfs_page *req;
7723e9e0ca3STrond Myklebust 
7733e9e0ca3STrond Myklebust 	while (!list_empty(head)) {
7743e9e0ca3STrond Myklebust 		req = nfs_list_entry(head->next);
7753e9e0ca3STrond Myklebust 		nfs_list_remove_request(req);
7761d1afcbcSTrond Myklebust 		nfs_unlock_and_release_request(req);
7773e9e0ca3STrond Myklebust 	}
7783e9e0ca3STrond Myklebust }
7793e9e0ca3STrond Myklebust 
780dc602dd7STrond Myklebust static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
781dc602dd7STrond Myklebust {
782dc602dd7STrond Myklebust 	struct nfs_direct_req *dreq = hdr->dreq;
783dc602dd7STrond Myklebust 
7848efc4bbeSJeff Layton 	trace_nfs_direct_write_reschedule_io(dreq);
7858efc4bbeSJeff Layton 
786dc602dd7STrond Myklebust 	spin_lock(&dreq->lock);
787dc602dd7STrond Myklebust 	if (dreq->error == 0) {
788dc602dd7STrond Myklebust 		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
789dc602dd7STrond Myklebust 		/* fake unstable write to let common nfs resend pages */
790dc602dd7STrond Myklebust 		hdr->verf.committed = NFS_UNSTABLE;
7914daaeba9STrond Myklebust 		hdr->good_bytes = hdr->args.offset + hdr->args.count -
7924daaeba9STrond Myklebust 			hdr->io_start;
793dc602dd7STrond Myklebust 	}
794dc602dd7STrond Myklebust 	spin_unlock(&dreq->lock);
795dc602dd7STrond Myklebust }
796dc602dd7STrond Myklebust 
7971763da12SFred Isaman static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
7983e9e0ca3STrond Myklebust 	.error_cleanup = nfs_write_sync_pgio_error,
7991763da12SFred Isaman 	.init_hdr = nfs_direct_pgio_init,
8001763da12SFred Isaman 	.completion = nfs_direct_write_completion,
801dc602dd7STrond Myklebust 	.reschedule_io = nfs_direct_write_reschedule_io,
8021763da12SFred Isaman };
8031763da12SFred Isaman 
80491f79c43SAl Viro 
80591f79c43SAl Viro /*
80691f79c43SAl Viro  * NB: Return the value of the first error return code.  Subsequent
80791f79c43SAl Viro  *     errors after the first one are ignored.
80891f79c43SAl Viro  */
80991f79c43SAl Viro /*
81091f79c43SAl Viro  * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
81191f79c43SAl Viro  * operation.  If nfs_writedata_alloc() or get_user_pages() fails,
81291f79c43SAl Viro  * bail and stop sending more writes.  Write length accounting is
81391f79c43SAl Viro  * handled automatically by nfs_direct_write_result().  Otherwise, if
81491f79c43SAl Viro  * no requests have been sent, just return an error.
81591f79c43SAl Viro  */
81619f73787SChuck Lever static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
817619d30b4SAl Viro 					       struct iov_iter *iter,
818c265de25SNeilBrown 					       loff_t pos, int ioflags)
81919f73787SChuck Lever {
8201763da12SFred Isaman 	struct nfs_pageio_descriptor desc;
8211d59d61fSTrond Myklebust 	struct inode *inode = dreq->inode;
822954998b6STrond Myklebust 	struct nfs_commit_info cinfo;
82319f73787SChuck Lever 	ssize_t result = 0;
82419f73787SChuck Lever 	size_t requested_bytes = 0;
82591f79c43SAl Viro 	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
826954998b6STrond Myklebust 	bool defer = false;
82719f73787SChuck Lever 
8288efc4bbeSJeff Layton 	trace_nfs_direct_write_schedule_iovec(dreq);
8298efc4bbeSJeff Layton 
830c265de25SNeilBrown 	nfs_pageio_init_write(&desc, inode, ioflags, false,
8311763da12SFred Isaman 			      &nfs_direct_write_completion_ops);
8321763da12SFred Isaman 	desc.pg_dreq = dreq;
83319f73787SChuck Lever 	get_dreq(dreq);
834fe0f07d0SJens Axboe 	inode_dio_begin(inode);
83519f73787SChuck Lever 
83691f79c43SAl Viro 	NFS_I(inode)->write_io += iov_iter_count(iter);
83791f79c43SAl Viro 	while (iov_iter_count(iter)) {
83891f79c43SAl Viro 		struct page **pagevec;
83991f79c43SAl Viro 		size_t bytes;
84091f79c43SAl Viro 		size_t pgbase;
84191f79c43SAl Viro 		unsigned npages, i;
84291f79c43SAl Viro 
8431ef255e2SAl Viro 		result = iov_iter_get_pages_alloc2(iter, &pagevec,
84491f79c43SAl Viro 						  wsize, &pgbase);
84519f73787SChuck Lever 		if (result < 0)
84619f73787SChuck Lever 			break;
84791f79c43SAl Viro 
84891f79c43SAl Viro 		bytes = result;
84991f79c43SAl Viro 		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
85091f79c43SAl Viro 		for (i = 0; i < npages; i++) {
85191f79c43SAl Viro 			struct nfs_page *req;
85291f79c43SAl Viro 			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
85391f79c43SAl Viro 
85470e9db69STrond Myklebust 			req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
85570e9db69STrond Myklebust 							pgbase, pos, req_len);
85691f79c43SAl Viro 			if (IS_ERR(req)) {
85791f79c43SAl Viro 				result = PTR_ERR(req);
85819f73787SChuck Lever 				break;
85991f79c43SAl Viro 			}
8600a00b77bSWeston Andros Adamson 
861d600ad1fSPeng Tao 			if (desc.pg_error < 0) {
862d600ad1fSPeng Tao 				nfs_free_request(req);
863d600ad1fSPeng Tao 				result = desc.pg_error;
864d600ad1fSPeng Tao 				break;
865d600ad1fSPeng Tao 			}
8660a00b77bSWeston Andros Adamson 
86791f79c43SAl Viro 			pgbase = 0;
86891f79c43SAl Viro 			bytes -= req_len;
86991f79c43SAl Viro 			requested_bytes += req_len;
87091f79c43SAl Viro 			pos += req_len;
87191f79c43SAl Viro 			dreq->bytes_left -= req_len;
872954998b6STrond Myklebust 
873954998b6STrond Myklebust 			if (defer) {
874954998b6STrond Myklebust 				nfs_mark_request_commit(req, NULL, &cinfo, 0);
875954998b6STrond Myklebust 				continue;
876954998b6STrond Myklebust 			}
877954998b6STrond Myklebust 
878954998b6STrond Myklebust 			nfs_lock_request(req);
879954998b6STrond Myklebust 			if (nfs_pageio_add_request(&desc, req))
880954998b6STrond Myklebust 				continue;
881954998b6STrond Myklebust 
882954998b6STrond Myklebust 			/* Exit on hard errors */
883954998b6STrond Myklebust 			if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) {
884954998b6STrond Myklebust 				result = desc.pg_error;
885954998b6STrond Myklebust 				nfs_unlock_and_release_request(req);
886954998b6STrond Myklebust 				break;
887954998b6STrond Myklebust 			}
888954998b6STrond Myklebust 
889954998b6STrond Myklebust 			/* If the error is soft, defer remaining requests */
890954998b6STrond Myklebust 			nfs_init_cinfo_from_dreq(&cinfo, dreq);
8917c633932STrond Myklebust 			spin_lock(&dreq->lock);
892954998b6STrond Myklebust 			dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
8937c633932STrond Myklebust 			spin_unlock(&dreq->lock);
894954998b6STrond Myklebust 			nfs_unlock_request(req);
895954998b6STrond Myklebust 			nfs_mark_request_commit(req, NULL, &cinfo, 0);
896954998b6STrond Myklebust 			desc.pg_error = 0;
897954998b6STrond Myklebust 			defer = true;
89891f79c43SAl Viro 		}
89991f79c43SAl Viro 		nfs_direct_release_pages(pagevec, npages);
90091f79c43SAl Viro 		kvfree(pagevec);
90191f79c43SAl Viro 		if (result < 0)
90291f79c43SAl Viro 			break;
90319f73787SChuck Lever 	}
9041763da12SFred Isaman 	nfs_pageio_complete(&desc);
90519f73787SChuck Lever 
906839f7ad6SChuck Lever 	/*
907839f7ad6SChuck Lever 	 * If no bytes were started, return the error, and let the
908839f7ad6SChuck Lever 	 * generic layer handle the completion.
909839f7ad6SChuck Lever 	 */
910839f7ad6SChuck Lever 	if (requested_bytes == 0) {
911d03727b2SOlga Kornievskaia 		inode_dio_end(inode);
91265caafd0SOlga Kornievskaia 		nfs_direct_req_release(dreq);
913839f7ad6SChuck Lever 		return result < 0 ? result : -EIO;
914839f7ad6SChuck Lever 	}
915839f7ad6SChuck Lever 
91619f73787SChuck Lever 	if (put_dreq(dreq))
9174d3b55d3SAnna Schumaker 		nfs_direct_write_complete(dreq);
91885128b2bSAl Viro 	return requested_bytes;
91919f73787SChuck Lever }
92019f73787SChuck Lever 
9211da177e4SLinus Torvalds /**
9221da177e4SLinus Torvalds  * nfs_file_direct_write - file direct write operation for NFS files
9231da177e4SLinus Torvalds  * @iocb: target I/O control block
924619d30b4SAl Viro  * @iter: vector of user buffers from which to write data
92564158668SNeilBrown  * @swap: flag indicating this is swap IO, not O_DIRECT IO
9261da177e4SLinus Torvalds  *
9271da177e4SLinus Torvalds  * We use this function for direct writes instead of calling
9281da177e4SLinus Torvalds  * generic_file_aio_write() in order to avoid taking the inode
9291da177e4SLinus Torvalds  * semaphore and updating the i_size.  The NFS server will set
9301da177e4SLinus Torvalds  * the new i_size and this client must read the updated size
9311da177e4SLinus Torvalds  * back into its cache.  We let the server do generic write
9321da177e4SLinus Torvalds  * parameter checking and report problems.
9331da177e4SLinus Torvalds  *
9341da177e4SLinus Torvalds  * We eliminate local atime updates, see direct read above.
9351da177e4SLinus Torvalds  *
9361da177e4SLinus Torvalds  * We avoid unnecessary page cache invalidations for normal cached
9371da177e4SLinus Torvalds  * readers of this file.
9381da177e4SLinus Torvalds  *
9391da177e4SLinus Torvalds  * Note that O_APPEND is not supported for NFS direct writes, as there
9401da177e4SLinus Torvalds  * is no atomic O_APPEND write facility in the NFS protocol.
9411da177e4SLinus Torvalds  */
94264158668SNeilBrown ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
94364158668SNeilBrown 			      bool swap)
9441da177e4SLinus Torvalds {
9459a74a2b8SColin Ian King 	ssize_t result, requested;
94689698b24STrond Myklebust 	size_t count;
9471da177e4SLinus Torvalds 	struct file *file = iocb->ki_filp;
9481da177e4SLinus Torvalds 	struct address_space *mapping = file->f_mapping;
94922cd1bf1SChristoph Hellwig 	struct inode *inode = mapping->host;
95022cd1bf1SChristoph Hellwig 	struct nfs_direct_req *dreq;
95122cd1bf1SChristoph Hellwig 	struct nfs_lock_context *l_ctx;
95265a4a1caSAl Viro 	loff_t pos, end;
953c216fd70SChuck Lever 
9546de1472fSAl Viro 	dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
9553309dd04SAl Viro 		file, iov_iter_count(iter), (long long) iocb->ki_pos);
956027445c3SBadari Pulavarty 
95764158668SNeilBrown 	if (swap)
95864158668SNeilBrown 		/* bypass generic checks */
95964158668SNeilBrown 		result =  iov_iter_count(iter);
96064158668SNeilBrown 	else
96189698b24STrond Myklebust 		result = generic_write_checks(iocb, iter);
96289698b24STrond Myklebust 	if (result <= 0)
96389698b24STrond Myklebust 		return result;
96489698b24STrond Myklebust 	count = result;
96589698b24STrond Myklebust 	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
9663309dd04SAl Viro 
9673309dd04SAl Viro 	pos = iocb->ki_pos;
96809cbfeafSKirill A. Shutemov 	end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
969ce1a8e67SChuck Lever 
97089698b24STrond Myklebust 	task_io_account_write(count);
9717ec10f26SKonstantin Khlebnikov 
97222cd1bf1SChristoph Hellwig 	result = -ENOMEM;
97322cd1bf1SChristoph Hellwig 	dreq = nfs_direct_req_alloc();
97422cd1bf1SChristoph Hellwig 	if (!dreq)
975a5864c99STrond Myklebust 		goto out;
97622cd1bf1SChristoph Hellwig 
97722cd1bf1SChristoph Hellwig 	dreq->inode = inode;
97889698b24STrond Myklebust 	dreq->bytes_left = dreq->max_count = count;
9795fadeb47SPeng Tao 	dreq->io_start = pos;
98022cd1bf1SChristoph Hellwig 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
98122cd1bf1SChristoph Hellwig 	l_ctx = nfs_get_lock_context(dreq->ctx);
98222cd1bf1SChristoph Hellwig 	if (IS_ERR(l_ctx)) {
98322cd1bf1SChristoph Hellwig 		result = PTR_ERR(l_ctx);
9848605cf0eSMisono Tomohiro 		nfs_direct_req_release(dreq);
98522cd1bf1SChristoph Hellwig 		goto out_release;
98622cd1bf1SChristoph Hellwig 	}
98722cd1bf1SChristoph Hellwig 	dreq->l_ctx = l_ctx;
98822cd1bf1SChristoph Hellwig 	if (!is_sync_kiocb(iocb))
98922cd1bf1SChristoph Hellwig 		dreq->iocb = iocb;
9909c455a8cSTrond Myklebust 	pnfs_init_ds_commit_info_ops(&dreq->ds_cinfo, inode);
99122cd1bf1SChristoph Hellwig 
99264158668SNeilBrown 	if (swap) {
993c265de25SNeilBrown 		requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
994c265de25SNeilBrown 							    FLUSH_STABLE);
99564158668SNeilBrown 	} else {
996a5864c99STrond Myklebust 		nfs_start_io_direct(inode);
997a5864c99STrond Myklebust 
998c265de25SNeilBrown 		requested = nfs_direct_write_schedule_iovec(dreq, iter, pos,
999c265de25SNeilBrown 							    FLUSH_COND_STABLE);
1000a9ab5e84SChristoph Hellwig 
1001a9ab5e84SChristoph Hellwig 		if (mapping->nrpages) {
1002a9ab5e84SChristoph Hellwig 			invalidate_inode_pages2_range(mapping,
100309cbfeafSKirill A. Shutemov 						      pos >> PAGE_SHIFT, end);
1004a9ab5e84SChristoph Hellwig 		}
1005a9ab5e84SChristoph Hellwig 
1006a5864c99STrond Myklebust 		nfs_end_io_direct(inode);
100764158668SNeilBrown 	}
1008a9ab5e84SChristoph Hellwig 
100985128b2bSAl Viro 	if (requested > 0) {
101022cd1bf1SChristoph Hellwig 		result = nfs_direct_wait(dreq);
101122cd1bf1SChristoph Hellwig 		if (result > 0) {
101285128b2bSAl Viro 			requested -= result;
101322cd1bf1SChristoph Hellwig 			iocb->ki_pos = pos + result;
1014e2592217SChristoph Hellwig 			/* XXX: should check the generic_write_sync retval */
1015e2592217SChristoph Hellwig 			generic_write_sync(iocb, result);
10161763da12SFred Isaman 		}
101785128b2bSAl Viro 		iov_iter_revert(iter, requested);
101885128b2bSAl Viro 	} else {
101985128b2bSAl Viro 		result = requested;
102022cd1bf1SChristoph Hellwig 	}
1021a6b5a28eSDave Wysochanski 	nfs_fscache_invalidate(inode, FSCACHE_INVAL_DIO_WRITE);
102222cd1bf1SChristoph Hellwig out_release:
102322cd1bf1SChristoph Hellwig 	nfs_direct_req_release(dreq);
1024a5864c99STrond Myklebust out:
102522cd1bf1SChristoph Hellwig 	return result;
10261da177e4SLinus Torvalds }
10271da177e4SLinus Torvalds 
102888467055SChuck Lever /**
102988467055SChuck Lever  * nfs_init_directcache - create a slab cache for nfs_direct_req structures
103088467055SChuck Lever  *
103188467055SChuck Lever  */
1032f7b422b1SDavid Howells int __init nfs_init_directcache(void)
10331da177e4SLinus Torvalds {
10341da177e4SLinus Torvalds 	nfs_direct_cachep = kmem_cache_create("nfs_direct_cache",
10351da177e4SLinus Torvalds 						sizeof(struct nfs_direct_req),
1036fffb60f9SPaul Jackson 						0, (SLAB_RECLAIM_ACCOUNT|
1037fffb60f9SPaul Jackson 							SLAB_MEM_SPREAD),
103820c2df83SPaul Mundt 						NULL);
10391da177e4SLinus Torvalds 	if (nfs_direct_cachep == NULL)
10401da177e4SLinus Torvalds 		return -ENOMEM;
10411da177e4SLinus Torvalds 
10421da177e4SLinus Torvalds 	return 0;
10431da177e4SLinus Torvalds }
10441da177e4SLinus Torvalds 
104588467055SChuck Lever /**
1046f7b422b1SDavid Howells  * nfs_destroy_directcache - destroy the slab cache for nfs_direct_req structures
104788467055SChuck Lever  *
104888467055SChuck Lever  */
1049266bee88SDavid Brownell void nfs_destroy_directcache(void)
10501da177e4SLinus Torvalds {
10511a1d92c1SAlexey Dobriyan 	kmem_cache_destroy(nfs_direct_cachep);
10521da177e4SLinus Torvalds }
1053