xref: /openbmc/linux/fs/nfs/read.c (revision 3feb2d49394b7874348a6e43c076b780c1d222c5)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * linux/fs/nfs/read.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Block I/O for NFS
51da177e4SLinus Torvalds  *
61da177e4SLinus Torvalds  * Partial copy of Linus' read cache modifications to fs/nfs/file.c
71da177e4SLinus Torvalds  * modified for async RPC by okir@monad.swb.de
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  * We do an ugly hack here in order to return proper error codes to the
101da177e4SLinus Torvalds  * user program when a read request failed: since generic_file_read
111da177e4SLinus Torvalds  * only checks the return value of inode->i_op->readpage() which is always 0
121da177e4SLinus Torvalds  * for async RPC, we set the error bit of the page to 1 when an error occurs,
131da177e4SLinus Torvalds  * and make nfs_readpage transmit requests synchronously when encountering this.
141da177e4SLinus Torvalds  * This is only a small problem, though, since we now retry all operations
151da177e4SLinus Torvalds  * within the RPC code when root squashing is suspected.
161da177e4SLinus Torvalds  */
171da177e4SLinus Torvalds 
181da177e4SLinus Torvalds #include <linux/config.h>
191da177e4SLinus Torvalds #include <linux/time.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/errno.h>
221da177e4SLinus Torvalds #include <linux/fcntl.h>
231da177e4SLinus Torvalds #include <linux/stat.h>
241da177e4SLinus Torvalds #include <linux/mm.h>
251da177e4SLinus Torvalds #include <linux/slab.h>
261da177e4SLinus Torvalds #include <linux/pagemap.h>
271da177e4SLinus Torvalds #include <linux/sunrpc/clnt.h>
281da177e4SLinus Torvalds #include <linux/nfs_fs.h>
291da177e4SLinus Torvalds #include <linux/nfs_page.h>
301da177e4SLinus Torvalds #include <linux/smp_lock.h>
311da177e4SLinus Torvalds 
321da177e4SLinus Torvalds #include <asm/system.h>
331da177e4SLinus Torvalds 
3491d5b470SChuck Lever #include "iostat.h"
3591d5b470SChuck Lever 
361da177e4SLinus Torvalds #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds static int nfs_pagein_one(struct list_head *, struct inode *);
39ec06c096STrond Myklebust static const struct rpc_call_ops nfs_read_partial_ops;
40ec06c096STrond Myklebust static const struct rpc_call_ops nfs_read_full_ops;
411da177e4SLinus Torvalds 
421da177e4SLinus Torvalds static kmem_cache_t *nfs_rdata_cachep;
43*3feb2d49STrond Myklebust static mempool_t *nfs_rdata_mempool;
441da177e4SLinus Torvalds 
451da177e4SLinus Torvalds #define MIN_POOL_READ	(32)
461da177e4SLinus Torvalds 
47*3feb2d49STrond Myklebust struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
48*3feb2d49STrond Myklebust {
49*3feb2d49STrond Myklebust 	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
50*3feb2d49STrond Myklebust 
51*3feb2d49STrond Myklebust 	if (p) {
52*3feb2d49STrond Myklebust 		memset(p, 0, sizeof(*p));
53*3feb2d49STrond Myklebust 		INIT_LIST_HEAD(&p->pages);
54*3feb2d49STrond Myklebust 		if (pagecount < NFS_PAGEVEC_SIZE)
55*3feb2d49STrond Myklebust 			p->pagevec = &p->page_array[0];
56*3feb2d49STrond Myklebust 		else {
57*3feb2d49STrond Myklebust 			size_t size = ++pagecount * sizeof(struct page *);
58*3feb2d49STrond Myklebust 			p->pagevec = kmalloc(size, GFP_NOFS);
59*3feb2d49STrond Myklebust 			if (p->pagevec) {
60*3feb2d49STrond Myklebust 				memset(p->pagevec, 0, size);
61*3feb2d49STrond Myklebust 			} else {
62*3feb2d49STrond Myklebust 				mempool_free(p, nfs_rdata_mempool);
63*3feb2d49STrond Myklebust 				p = NULL;
64*3feb2d49STrond Myklebust 			}
65*3feb2d49STrond Myklebust 		}
66*3feb2d49STrond Myklebust 	}
67*3feb2d49STrond Myklebust 	return p;
68*3feb2d49STrond Myklebust }
69*3feb2d49STrond Myklebust 
70*3feb2d49STrond Myklebust void nfs_readdata_free(struct nfs_read_data *p)
71*3feb2d49STrond Myklebust {
72*3feb2d49STrond Myklebust 	if (p && (p->pagevec != &p->page_array[0]))
73*3feb2d49STrond Myklebust 		kfree(p->pagevec);
74*3feb2d49STrond Myklebust 	mempool_free(p, nfs_rdata_mempool);
75*3feb2d49STrond Myklebust }
76*3feb2d49STrond Myklebust 
77963d8fe5STrond Myklebust void nfs_readdata_release(void *data)
781da177e4SLinus Torvalds {
791da177e4SLinus Torvalds         nfs_readdata_free(data);
801da177e4SLinus Torvalds }
811da177e4SLinus Torvalds 
821da177e4SLinus Torvalds static
831da177e4SLinus Torvalds unsigned int nfs_page_length(struct inode *inode, struct page *page)
841da177e4SLinus Torvalds {
851da177e4SLinus Torvalds 	loff_t i_size = i_size_read(inode);
861da177e4SLinus Torvalds 	unsigned long idx;
871da177e4SLinus Torvalds 
881da177e4SLinus Torvalds 	if (i_size <= 0)
891da177e4SLinus Torvalds 		return 0;
901da177e4SLinus Torvalds 	idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
911da177e4SLinus Torvalds 	if (page->index > idx)
921da177e4SLinus Torvalds 		return 0;
931da177e4SLinus Torvalds 	if (page->index != idx)
941da177e4SLinus Torvalds 		return PAGE_CACHE_SIZE;
951da177e4SLinus Torvalds 	return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
961da177e4SLinus Torvalds }
971da177e4SLinus Torvalds 
981da177e4SLinus Torvalds static
991da177e4SLinus Torvalds int nfs_return_empty_page(struct page *page)
1001da177e4SLinus Torvalds {
1011da177e4SLinus Torvalds 	memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE);
1021da177e4SLinus Torvalds 	SetPageUptodate(page);
1031da177e4SLinus Torvalds 	unlock_page(page);
1041da177e4SLinus Torvalds 	return 0;
1051da177e4SLinus Torvalds }
1061da177e4SLinus Torvalds 
1071da177e4SLinus Torvalds /*
1081da177e4SLinus Torvalds  * Read a page synchronously.
1091da177e4SLinus Torvalds  */
1101da177e4SLinus Torvalds static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
1111da177e4SLinus Torvalds 		struct page *page)
1121da177e4SLinus Torvalds {
1131da177e4SLinus Torvalds 	unsigned int	rsize = NFS_SERVER(inode)->rsize;
1141da177e4SLinus Torvalds 	unsigned int	count = PAGE_CACHE_SIZE;
1151da177e4SLinus Torvalds 	int		result;
1161da177e4SLinus Torvalds 	struct nfs_read_data *rdata;
1171da177e4SLinus Torvalds 
11840859d7eSChuck Lever 	rdata = nfs_readdata_alloc(1);
1191da177e4SLinus Torvalds 	if (!rdata)
1201da177e4SLinus Torvalds 		return -ENOMEM;
1211da177e4SLinus Torvalds 
1221da177e4SLinus Torvalds 	memset(rdata, 0, sizeof(*rdata));
1231da177e4SLinus Torvalds 	rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
1241da177e4SLinus Torvalds 	rdata->cred = ctx->cred;
1251da177e4SLinus Torvalds 	rdata->inode = inode;
1261da177e4SLinus Torvalds 	INIT_LIST_HEAD(&rdata->pages);
1271da177e4SLinus Torvalds 	rdata->args.fh = NFS_FH(inode);
1281da177e4SLinus Torvalds 	rdata->args.context = ctx;
1291da177e4SLinus Torvalds 	rdata->args.pages = &page;
1301da177e4SLinus Torvalds 	rdata->args.pgbase = 0UL;
1311da177e4SLinus Torvalds 	rdata->args.count = rsize;
1321da177e4SLinus Torvalds 	rdata->res.fattr = &rdata->fattr;
1331da177e4SLinus Torvalds 
1341da177e4SLinus Torvalds 	dprintk("NFS: nfs_readpage_sync(%p)\n", page);
1351da177e4SLinus Torvalds 
1361da177e4SLinus Torvalds 	/*
1371da177e4SLinus Torvalds 	 * This works now because the socket layer never tries to DMA
1381da177e4SLinus Torvalds 	 * into this buffer directly.
1391da177e4SLinus Torvalds 	 */
1401da177e4SLinus Torvalds 	do {
1411da177e4SLinus Torvalds 		if (count < rsize)
1421da177e4SLinus Torvalds 			rdata->args.count = count;
1431da177e4SLinus Torvalds 		rdata->res.count = rdata->args.count;
1441da177e4SLinus Torvalds 		rdata->args.offset = page_offset(page) + rdata->args.pgbase;
1451da177e4SLinus Torvalds 
1461da177e4SLinus Torvalds 		dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
1471da177e4SLinus Torvalds 			NFS_SERVER(inode)->hostname,
1481da177e4SLinus Torvalds 			inode->i_sb->s_id,
1491da177e4SLinus Torvalds 			(long long)NFS_FILEID(inode),
1501da177e4SLinus Torvalds 			(unsigned long long)rdata->args.pgbase,
1511da177e4SLinus Torvalds 			rdata->args.count);
1521da177e4SLinus Torvalds 
1531da177e4SLinus Torvalds 		lock_kernel();
1541da177e4SLinus Torvalds 		result = NFS_PROTO(inode)->read(rdata);
1551da177e4SLinus Torvalds 		unlock_kernel();
1561da177e4SLinus Torvalds 
1571da177e4SLinus Torvalds 		/*
1581da177e4SLinus Torvalds 		 * Even if we had a partial success we can't mark the page
1591da177e4SLinus Torvalds 		 * cache valid.
1601da177e4SLinus Torvalds 		 */
1611da177e4SLinus Torvalds 		if (result < 0) {
1621da177e4SLinus Torvalds 			if (result == -EISDIR)
1631da177e4SLinus Torvalds 				result = -EINVAL;
1641da177e4SLinus Torvalds 			goto io_error;
1651da177e4SLinus Torvalds 		}
1661da177e4SLinus Torvalds 		count -= result;
1671da177e4SLinus Torvalds 		rdata->args.pgbase += result;
16891d5b470SChuck Lever 		nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, result);
16991d5b470SChuck Lever 
1701da177e4SLinus Torvalds 		/* Note: result == 0 should only happen if we're caching
1711da177e4SLinus Torvalds 		 * a write that extends the file and punches a hole.
1721da177e4SLinus Torvalds 		 */
1731da177e4SLinus Torvalds 		if (rdata->res.eof != 0 || result == 0)
1741da177e4SLinus Torvalds 			break;
1751da177e4SLinus Torvalds 	} while (count);
176dc59250cSChuck Lever 	spin_lock(&inode->i_lock);
17755296809SChuck Lever 	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME;
178dc59250cSChuck Lever 	spin_unlock(&inode->i_lock);
1791da177e4SLinus Torvalds 
1801da177e4SLinus Torvalds 	if (count)
1811da177e4SLinus Torvalds 		memclear_highpage_flush(page, rdata->args.pgbase, count);
1821da177e4SLinus Torvalds 	SetPageUptodate(page);
1831da177e4SLinus Torvalds 	if (PageError(page))
1841da177e4SLinus Torvalds 		ClearPageError(page);
1851da177e4SLinus Torvalds 	result = 0;
1861da177e4SLinus Torvalds 
1871da177e4SLinus Torvalds io_error:
1881da177e4SLinus Torvalds 	unlock_page(page);
1891da177e4SLinus Torvalds 	nfs_readdata_free(rdata);
1901da177e4SLinus Torvalds 	return result;
1911da177e4SLinus Torvalds }
1921da177e4SLinus Torvalds 
1931da177e4SLinus Torvalds static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
1941da177e4SLinus Torvalds 		struct page *page)
1951da177e4SLinus Torvalds {
1961da177e4SLinus Torvalds 	LIST_HEAD(one_request);
1971da177e4SLinus Torvalds 	struct nfs_page	*new;
1981da177e4SLinus Torvalds 	unsigned int len;
1991da177e4SLinus Torvalds 
2001da177e4SLinus Torvalds 	len = nfs_page_length(inode, page);
2011da177e4SLinus Torvalds 	if (len == 0)
2021da177e4SLinus Torvalds 		return nfs_return_empty_page(page);
2031da177e4SLinus Torvalds 	new = nfs_create_request(ctx, inode, page, 0, len);
2041da177e4SLinus Torvalds 	if (IS_ERR(new)) {
2051da177e4SLinus Torvalds 		unlock_page(page);
2061da177e4SLinus Torvalds 		return PTR_ERR(new);
2071da177e4SLinus Torvalds 	}
2081da177e4SLinus Torvalds 	if (len < PAGE_CACHE_SIZE)
2091da177e4SLinus Torvalds 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
2101da177e4SLinus Torvalds 
2111da177e4SLinus Torvalds 	nfs_list_add_request(new, &one_request);
2121da177e4SLinus Torvalds 	nfs_pagein_one(&one_request, inode);
2131da177e4SLinus Torvalds 	return 0;
2141da177e4SLinus Torvalds }
2151da177e4SLinus Torvalds 
2161da177e4SLinus Torvalds static void nfs_readpage_release(struct nfs_page *req)
2171da177e4SLinus Torvalds {
2181da177e4SLinus Torvalds 	unlock_page(req->wb_page);
2191da177e4SLinus Torvalds 
2201da177e4SLinus Torvalds 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
2211da177e4SLinus Torvalds 			req->wb_context->dentry->d_inode->i_sb->s_id,
2221da177e4SLinus Torvalds 			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
2231da177e4SLinus Torvalds 			req->wb_bytes,
2241da177e4SLinus Torvalds 			(long long)req_offset(req));
22510d2c46fSNick Wilson 	nfs_clear_request(req);
22610d2c46fSNick Wilson 	nfs_release_request(req);
2271da177e4SLinus Torvalds }
2281da177e4SLinus Torvalds 
2291da177e4SLinus Torvalds /*
2301da177e4SLinus Torvalds  * Set up the NFS read request struct
2311da177e4SLinus Torvalds  */
2321da177e4SLinus Torvalds static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
233ec06c096STrond Myklebust 		const struct rpc_call_ops *call_ops,
2341da177e4SLinus Torvalds 		unsigned int count, unsigned int offset)
2351da177e4SLinus Torvalds {
2361da177e4SLinus Torvalds 	struct inode		*inode;
237ec06c096STrond Myklebust 	int flags;
2381da177e4SLinus Torvalds 
2391da177e4SLinus Torvalds 	data->req	  = req;
2401da177e4SLinus Torvalds 	data->inode	  = inode = req->wb_context->dentry->d_inode;
2411da177e4SLinus Torvalds 	data->cred	  = req->wb_context->cred;
2421da177e4SLinus Torvalds 
2431da177e4SLinus Torvalds 	data->args.fh     = NFS_FH(inode);
2441da177e4SLinus Torvalds 	data->args.offset = req_offset(req) + offset;
2451da177e4SLinus Torvalds 	data->args.pgbase = req->wb_pgbase + offset;
2461da177e4SLinus Torvalds 	data->args.pages  = data->pagevec;
2471da177e4SLinus Torvalds 	data->args.count  = count;
2481da177e4SLinus Torvalds 	data->args.context = req->wb_context;
2491da177e4SLinus Torvalds 
2501da177e4SLinus Torvalds 	data->res.fattr   = &data->fattr;
2511da177e4SLinus Torvalds 	data->res.count   = count;
2521da177e4SLinus Torvalds 	data->res.eof     = 0;
2530e574af1STrond Myklebust 	nfs_fattr_init(&data->fattr);
2541da177e4SLinus Torvalds 
255ec06c096STrond Myklebust 	/* Set up the initial task struct. */
256ec06c096STrond Myklebust 	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
257ec06c096STrond Myklebust 	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
2581da177e4SLinus Torvalds 	NFS_PROTO(inode)->read_setup(data);
2591da177e4SLinus Torvalds 
2601da177e4SLinus Torvalds 	data->task.tk_cookie = (unsigned long)inode;
2611da177e4SLinus Torvalds 
2621da177e4SLinus Torvalds 	dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
2631da177e4SLinus Torvalds 			data->task.tk_pid,
2641da177e4SLinus Torvalds 			inode->i_sb->s_id,
2651da177e4SLinus Torvalds 			(long long)NFS_FILEID(inode),
2661da177e4SLinus Torvalds 			count,
2671da177e4SLinus Torvalds 			(unsigned long long)data->args.offset);
2681da177e4SLinus Torvalds }
2691da177e4SLinus Torvalds 
2701da177e4SLinus Torvalds static void
2711da177e4SLinus Torvalds nfs_async_read_error(struct list_head *head)
2721da177e4SLinus Torvalds {
2731da177e4SLinus Torvalds 	struct nfs_page	*req;
2741da177e4SLinus Torvalds 
2751da177e4SLinus Torvalds 	while (!list_empty(head)) {
2761da177e4SLinus Torvalds 		req = nfs_list_entry(head->next);
2771da177e4SLinus Torvalds 		nfs_list_remove_request(req);
2781da177e4SLinus Torvalds 		SetPageError(req->wb_page);
2791da177e4SLinus Torvalds 		nfs_readpage_release(req);
2801da177e4SLinus Torvalds 	}
2811da177e4SLinus Torvalds }
2821da177e4SLinus Torvalds 
2831da177e4SLinus Torvalds /*
2841da177e4SLinus Torvalds  * Start an async read operation
2851da177e4SLinus Torvalds  */
2861da177e4SLinus Torvalds static void nfs_execute_read(struct nfs_read_data *data)
2871da177e4SLinus Torvalds {
2881da177e4SLinus Torvalds 	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
2891da177e4SLinus Torvalds 	sigset_t oldset;
2901da177e4SLinus Torvalds 
2911da177e4SLinus Torvalds 	rpc_clnt_sigmask(clnt, &oldset);
2921da177e4SLinus Torvalds 	lock_kernel();
2931da177e4SLinus Torvalds 	rpc_execute(&data->task);
2941da177e4SLinus Torvalds 	unlock_kernel();
2951da177e4SLinus Torvalds 	rpc_clnt_sigunmask(clnt, &oldset);
2961da177e4SLinus Torvalds }
2971da177e4SLinus Torvalds 
2981da177e4SLinus Torvalds /*
2991da177e4SLinus Torvalds  * Generate multiple requests to fill a single page.
3001da177e4SLinus Torvalds  *
3011da177e4SLinus Torvalds  * We optimize to reduce the number of read operations on the wire.  If we
3021da177e4SLinus Torvalds  * detect that we're reading a page, or an area of a page, that is past the
3031da177e4SLinus Torvalds  * end of file, we do not generate NFS read operations but just clear the
3041da177e4SLinus Torvalds  * parts of the page that would have come back zero from the server anyway.
3051da177e4SLinus Torvalds  *
3061da177e4SLinus Torvalds  * We rely on the cached value of i_size to make this determination; another
3071da177e4SLinus Torvalds  * client can fill pages on the server past our cached end-of-file, but we
3081da177e4SLinus Torvalds  * won't see the new data until our attribute cache is updated.  This is more
3091da177e4SLinus Torvalds  * or less conventional NFS client behavior.
3101da177e4SLinus Torvalds  */
3111da177e4SLinus Torvalds static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
3121da177e4SLinus Torvalds {
3131da177e4SLinus Torvalds 	struct nfs_page *req = nfs_list_entry(head->next);
3141da177e4SLinus Torvalds 	struct page *page = req->wb_page;
3151da177e4SLinus Torvalds 	struct nfs_read_data *data;
3161da177e4SLinus Torvalds 	unsigned int rsize = NFS_SERVER(inode)->rsize;
3171da177e4SLinus Torvalds 	unsigned int nbytes, offset;
3181da177e4SLinus Torvalds 	int requests = 0;
3191da177e4SLinus Torvalds 	LIST_HEAD(list);
3201da177e4SLinus Torvalds 
3211da177e4SLinus Torvalds 	nfs_list_remove_request(req);
3221da177e4SLinus Torvalds 
3231da177e4SLinus Torvalds 	nbytes = req->wb_bytes;
3241da177e4SLinus Torvalds 	for(;;) {
32540859d7eSChuck Lever 		data = nfs_readdata_alloc(1);
3261da177e4SLinus Torvalds 		if (!data)
3271da177e4SLinus Torvalds 			goto out_bad;
3281da177e4SLinus Torvalds 		INIT_LIST_HEAD(&data->pages);
3291da177e4SLinus Torvalds 		list_add(&data->pages, &list);
3301da177e4SLinus Torvalds 		requests++;
3311da177e4SLinus Torvalds 		if (nbytes <= rsize)
3321da177e4SLinus Torvalds 			break;
3331da177e4SLinus Torvalds 		nbytes -= rsize;
3341da177e4SLinus Torvalds 	}
3351da177e4SLinus Torvalds 	atomic_set(&req->wb_complete, requests);
3361da177e4SLinus Torvalds 
3371da177e4SLinus Torvalds 	ClearPageError(page);
3381da177e4SLinus Torvalds 	offset = 0;
3391da177e4SLinus Torvalds 	nbytes = req->wb_bytes;
3401da177e4SLinus Torvalds 	do {
3411da177e4SLinus Torvalds 		data = list_entry(list.next, struct nfs_read_data, pages);
3421da177e4SLinus Torvalds 		list_del_init(&data->pages);
3431da177e4SLinus Torvalds 
3441da177e4SLinus Torvalds 		data->pagevec[0] = page;
3451da177e4SLinus Torvalds 
3461da177e4SLinus Torvalds 		if (nbytes > rsize) {
347ec06c096STrond Myklebust 			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
348ec06c096STrond Myklebust 					rsize, offset);
3491da177e4SLinus Torvalds 			offset += rsize;
3501da177e4SLinus Torvalds 			nbytes -= rsize;
3511da177e4SLinus Torvalds 		} else {
352ec06c096STrond Myklebust 			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
353ec06c096STrond Myklebust 					nbytes, offset);
3541da177e4SLinus Torvalds 			nbytes = 0;
3551da177e4SLinus Torvalds 		}
3561da177e4SLinus Torvalds 		nfs_execute_read(data);
3571da177e4SLinus Torvalds 	} while (nbytes != 0);
3581da177e4SLinus Torvalds 
3591da177e4SLinus Torvalds 	return 0;
3601da177e4SLinus Torvalds 
3611da177e4SLinus Torvalds out_bad:
3621da177e4SLinus Torvalds 	while (!list_empty(&list)) {
3631da177e4SLinus Torvalds 		data = list_entry(list.next, struct nfs_read_data, pages);
3641da177e4SLinus Torvalds 		list_del(&data->pages);
3651da177e4SLinus Torvalds 		nfs_readdata_free(data);
3661da177e4SLinus Torvalds 	}
3671da177e4SLinus Torvalds 	SetPageError(page);
3681da177e4SLinus Torvalds 	nfs_readpage_release(req);
3691da177e4SLinus Torvalds 	return -ENOMEM;
3701da177e4SLinus Torvalds }
3711da177e4SLinus Torvalds 
3721da177e4SLinus Torvalds static int nfs_pagein_one(struct list_head *head, struct inode *inode)
3731da177e4SLinus Torvalds {
3741da177e4SLinus Torvalds 	struct nfs_page		*req;
3751da177e4SLinus Torvalds 	struct page		**pages;
3761da177e4SLinus Torvalds 	struct nfs_read_data	*data;
3771da177e4SLinus Torvalds 	unsigned int		count;
3781da177e4SLinus Torvalds 
3791da177e4SLinus Torvalds 	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
3801da177e4SLinus Torvalds 		return nfs_pagein_multi(head, inode);
3811da177e4SLinus Torvalds 
38240859d7eSChuck Lever 	data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
3831da177e4SLinus Torvalds 	if (!data)
3841da177e4SLinus Torvalds 		goto out_bad;
3851da177e4SLinus Torvalds 
3861da177e4SLinus Torvalds 	INIT_LIST_HEAD(&data->pages);
3871da177e4SLinus Torvalds 	pages = data->pagevec;
3881da177e4SLinus Torvalds 	count = 0;
3891da177e4SLinus Torvalds 	while (!list_empty(head)) {
3901da177e4SLinus Torvalds 		req = nfs_list_entry(head->next);
3911da177e4SLinus Torvalds 		nfs_list_remove_request(req);
3921da177e4SLinus Torvalds 		nfs_list_add_request(req, &data->pages);
3931da177e4SLinus Torvalds 		ClearPageError(req->wb_page);
3941da177e4SLinus Torvalds 		*pages++ = req->wb_page;
3951da177e4SLinus Torvalds 		count += req->wb_bytes;
3961da177e4SLinus Torvalds 	}
3971da177e4SLinus Torvalds 	req = nfs_list_entry(data->pages.next);
3981da177e4SLinus Torvalds 
399ec06c096STrond Myklebust 	nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
4001da177e4SLinus Torvalds 
4011da177e4SLinus Torvalds 	nfs_execute_read(data);
4021da177e4SLinus Torvalds 	return 0;
4031da177e4SLinus Torvalds out_bad:
4041da177e4SLinus Torvalds 	nfs_async_read_error(head);
4051da177e4SLinus Torvalds 	return -ENOMEM;
4061da177e4SLinus Torvalds }
4071da177e4SLinus Torvalds 
4081da177e4SLinus Torvalds static int
4091da177e4SLinus Torvalds nfs_pagein_list(struct list_head *head, int rpages)
4101da177e4SLinus Torvalds {
4111da177e4SLinus Torvalds 	LIST_HEAD(one_request);
4121da177e4SLinus Torvalds 	struct nfs_page		*req;
4131da177e4SLinus Torvalds 	int			error = 0;
4141da177e4SLinus Torvalds 	unsigned int		pages = 0;
4151da177e4SLinus Torvalds 
4161da177e4SLinus Torvalds 	while (!list_empty(head)) {
4171da177e4SLinus Torvalds 		pages += nfs_coalesce_requests(head, &one_request, rpages);
4181da177e4SLinus Torvalds 		req = nfs_list_entry(one_request.next);
4191da177e4SLinus Torvalds 		error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
4201da177e4SLinus Torvalds 		if (error < 0)
4211da177e4SLinus Torvalds 			break;
4221da177e4SLinus Torvalds 	}
4231da177e4SLinus Torvalds 	if (error >= 0)
4241da177e4SLinus Torvalds 		return pages;
4251da177e4SLinus Torvalds 
4261da177e4SLinus Torvalds 	nfs_async_read_error(head);
4271da177e4SLinus Torvalds 	return error;
4281da177e4SLinus Torvalds }
4291da177e4SLinus Torvalds 
4301da177e4SLinus Torvalds /*
4311da177e4SLinus Torvalds  * Handle a read reply that fills part of a page.
4321da177e4SLinus Torvalds  */
433ec06c096STrond Myklebust static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
4341da177e4SLinus Torvalds {
435ec06c096STrond Myklebust 	struct nfs_read_data *data = calldata;
4361da177e4SLinus Torvalds 	struct nfs_page *req = data->req;
4371da177e4SLinus Torvalds 	struct page *page = req->wb_page;
4381da177e4SLinus Torvalds 
439ec06c096STrond Myklebust 	if (nfs_readpage_result(task, data) != 0)
440ec06c096STrond Myklebust 		return;
441ec06c096STrond Myklebust 	if (task->tk_status >= 0) {
4421da177e4SLinus Torvalds 		unsigned int request = data->args.count;
4431da177e4SLinus Torvalds 		unsigned int result = data->res.count;
4441da177e4SLinus Torvalds 
4451da177e4SLinus Torvalds 		if (result < request) {
4461da177e4SLinus Torvalds 			memclear_highpage_flush(page,
4471da177e4SLinus Torvalds 						data->args.pgbase + result,
4481da177e4SLinus Torvalds 						request - result);
4491da177e4SLinus Torvalds 		}
4501da177e4SLinus Torvalds 	} else
4511da177e4SLinus Torvalds 		SetPageError(page);
4521da177e4SLinus Torvalds 
4531da177e4SLinus Torvalds 	if (atomic_dec_and_test(&req->wb_complete)) {
4541da177e4SLinus Torvalds 		if (!PageError(page))
4551da177e4SLinus Torvalds 			SetPageUptodate(page);
4561da177e4SLinus Torvalds 		nfs_readpage_release(req);
4571da177e4SLinus Torvalds 	}
4581da177e4SLinus Torvalds }
4591da177e4SLinus Torvalds 
460ec06c096STrond Myklebust static const struct rpc_call_ops nfs_read_partial_ops = {
461ec06c096STrond Myklebust 	.rpc_call_done = nfs_readpage_result_partial,
462ec06c096STrond Myklebust 	.rpc_release = nfs_readdata_release,
463ec06c096STrond Myklebust };
464ec06c096STrond Myklebust 
4651da177e4SLinus Torvalds /*
4661da177e4SLinus Torvalds  * This is the callback from RPC telling us whether a reply was
4671da177e4SLinus Torvalds  * received or some error occurred (timeout or socket shutdown).
4681da177e4SLinus Torvalds  */
469ec06c096STrond Myklebust static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
4701da177e4SLinus Torvalds {
471ec06c096STrond Myklebust 	struct nfs_read_data *data = calldata;
4721da177e4SLinus Torvalds 	unsigned int count = data->res.count;
4731da177e4SLinus Torvalds 
474ec06c096STrond Myklebust 	if (nfs_readpage_result(task, data) != 0)
475ec06c096STrond Myklebust 		return;
4761da177e4SLinus Torvalds 	while (!list_empty(&data->pages)) {
4771da177e4SLinus Torvalds 		struct nfs_page *req = nfs_list_entry(data->pages.next);
4781da177e4SLinus Torvalds 		struct page *page = req->wb_page;
4791da177e4SLinus Torvalds 		nfs_list_remove_request(req);
4801da177e4SLinus Torvalds 
481ec06c096STrond Myklebust 		if (task->tk_status >= 0) {
4821da177e4SLinus Torvalds 			if (count < PAGE_CACHE_SIZE) {
4831da177e4SLinus Torvalds 				if (count < req->wb_bytes)
4841da177e4SLinus Torvalds 					memclear_highpage_flush(page,
4851da177e4SLinus Torvalds 							req->wb_pgbase + count,
4861da177e4SLinus Torvalds 							req->wb_bytes - count);
4871da177e4SLinus Torvalds 				count = 0;
4881da177e4SLinus Torvalds 			} else
4891da177e4SLinus Torvalds 				count -= PAGE_CACHE_SIZE;
4901da177e4SLinus Torvalds 			SetPageUptodate(page);
4911da177e4SLinus Torvalds 		} else
4921da177e4SLinus Torvalds 			SetPageError(page);
4931da177e4SLinus Torvalds 		nfs_readpage_release(req);
4941da177e4SLinus Torvalds 	}
4951da177e4SLinus Torvalds }
4961da177e4SLinus Torvalds 
497ec06c096STrond Myklebust static const struct rpc_call_ops nfs_read_full_ops = {
498ec06c096STrond Myklebust 	.rpc_call_done = nfs_readpage_result_full,
499ec06c096STrond Myklebust 	.rpc_release = nfs_readdata_release,
500ec06c096STrond Myklebust };
501ec06c096STrond Myklebust 
5021da177e4SLinus Torvalds /*
5031da177e4SLinus Torvalds  * This is the callback from RPC telling us whether a reply was
5041da177e4SLinus Torvalds  * received or some error occurred (timeout or socket shutdown).
5051da177e4SLinus Torvalds  */
506ec06c096STrond Myklebust int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
5071da177e4SLinus Torvalds {
5081da177e4SLinus Torvalds 	struct nfs_readargs *argp = &data->args;
5091da177e4SLinus Torvalds 	struct nfs_readres *resp = &data->res;
510ec06c096STrond Myklebust 	int status;
5111da177e4SLinus Torvalds 
5121da177e4SLinus Torvalds 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
513ec06c096STrond Myklebust 		task->tk_pid, task->tk_status);
514ec06c096STrond Myklebust 
515ec06c096STrond Myklebust 	status = NFS_PROTO(data->inode)->read_done(task, data);
516ec06c096STrond Myklebust 	if (status != 0)
517ec06c096STrond Myklebust 		return status;
5181da177e4SLinus Torvalds 
51991d5b470SChuck Lever 	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
52091d5b470SChuck Lever 
5211da177e4SLinus Torvalds 	/* Is this a short read? */
5221da177e4SLinus Torvalds 	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
52391d5b470SChuck Lever 		nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
5241da177e4SLinus Torvalds 		/* Has the server at least made some progress? */
5251da177e4SLinus Torvalds 		if (resp->count != 0) {
5261da177e4SLinus Torvalds 			/* Yes, so retry the read at the end of the data */
5271da177e4SLinus Torvalds 			argp->offset += resp->count;
5281da177e4SLinus Torvalds 			argp->pgbase += resp->count;
5291da177e4SLinus Torvalds 			argp->count -= resp->count;
5301da177e4SLinus Torvalds 			rpc_restart_call(task);
531ec06c096STrond Myklebust 			return -EAGAIN;
5321da177e4SLinus Torvalds 		}
5331da177e4SLinus Torvalds 		task->tk_status = -EIO;
5341da177e4SLinus Torvalds 	}
535dc59250cSChuck Lever 	spin_lock(&data->inode->i_lock);
53655296809SChuck Lever 	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
537dc59250cSChuck Lever 	spin_unlock(&data->inode->i_lock);
538ec06c096STrond Myklebust 	return 0;
5391da177e4SLinus Torvalds }
5401da177e4SLinus Torvalds 
5411da177e4SLinus Torvalds /*
5421da177e4SLinus Torvalds  * Read a page over NFS.
5431da177e4SLinus Torvalds  * We read the page synchronously in the following case:
5441da177e4SLinus Torvalds  *  -	The error flag is set for this page. This happens only when a
5451da177e4SLinus Torvalds  *	previous async read operation failed.
5461da177e4SLinus Torvalds  */
5471da177e4SLinus Torvalds int nfs_readpage(struct file *file, struct page *page)
5481da177e4SLinus Torvalds {
5491da177e4SLinus Torvalds 	struct nfs_open_context *ctx;
5501da177e4SLinus Torvalds 	struct inode *inode = page->mapping->host;
5511da177e4SLinus Torvalds 	int		error;
5521da177e4SLinus Torvalds 
5531da177e4SLinus Torvalds 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
5541da177e4SLinus Torvalds 		page, PAGE_CACHE_SIZE, page->index);
55591d5b470SChuck Lever 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
55691d5b470SChuck Lever 	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
55791d5b470SChuck Lever 
5581da177e4SLinus Torvalds 	/*
5591da177e4SLinus Torvalds 	 * Try to flush any pending writes to the file..
5601da177e4SLinus Torvalds 	 *
5611da177e4SLinus Torvalds 	 * NOTE! Because we own the page lock, there cannot
5621da177e4SLinus Torvalds 	 * be any new pending writes generated at this point
5631da177e4SLinus Torvalds 	 * for this page (other pages can be written to).
5641da177e4SLinus Torvalds 	 */
5651da177e4SLinus Torvalds 	error = nfs_wb_page(inode, page);
5661da177e4SLinus Torvalds 	if (error)
5671da177e4SLinus Torvalds 		goto out_error;
5681da177e4SLinus Torvalds 
5691da177e4SLinus Torvalds 	if (file == NULL) {
570d530838bSTrond Myklebust 		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
5711da177e4SLinus Torvalds 		if (ctx == NULL)
5721da177e4SLinus Torvalds 			return -EBADF;
5731da177e4SLinus Torvalds 	} else
5741da177e4SLinus Torvalds 		ctx = get_nfs_open_context((struct nfs_open_context *)
5751da177e4SLinus Torvalds 				file->private_data);
5761da177e4SLinus Torvalds 	if (!IS_SYNC(inode)) {
5771da177e4SLinus Torvalds 		error = nfs_readpage_async(ctx, inode, page);
5781da177e4SLinus Torvalds 		goto out;
5791da177e4SLinus Torvalds 	}
5801da177e4SLinus Torvalds 
5811da177e4SLinus Torvalds 	error = nfs_readpage_sync(ctx, inode, page);
5821da177e4SLinus Torvalds 	if (error < 0 && IS_SWAPFILE(inode))
5831da177e4SLinus Torvalds 		printk("Aiee.. nfs swap-in of page failed!\n");
5841da177e4SLinus Torvalds out:
5851da177e4SLinus Torvalds 	put_nfs_open_context(ctx);
5861da177e4SLinus Torvalds 	return error;
5871da177e4SLinus Torvalds 
5881da177e4SLinus Torvalds out_error:
5891da177e4SLinus Torvalds 	unlock_page(page);
5901da177e4SLinus Torvalds 	return error;
5911da177e4SLinus Torvalds }
5921da177e4SLinus Torvalds 
5931da177e4SLinus Torvalds struct nfs_readdesc {
5941da177e4SLinus Torvalds 	struct list_head *head;
5951da177e4SLinus Torvalds 	struct nfs_open_context *ctx;
5961da177e4SLinus Torvalds };
5971da177e4SLinus Torvalds 
5981da177e4SLinus Torvalds static int
5991da177e4SLinus Torvalds readpage_async_filler(void *data, struct page *page)
6001da177e4SLinus Torvalds {
6011da177e4SLinus Torvalds 	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
6021da177e4SLinus Torvalds 	struct inode *inode = page->mapping->host;
6031da177e4SLinus Torvalds 	struct nfs_page *new;
6041da177e4SLinus Torvalds 	unsigned int len;
6051da177e4SLinus Torvalds 
6061da177e4SLinus Torvalds 	nfs_wb_page(inode, page);
6071da177e4SLinus Torvalds 	len = nfs_page_length(inode, page);
6081da177e4SLinus Torvalds 	if (len == 0)
6091da177e4SLinus Torvalds 		return nfs_return_empty_page(page);
6101da177e4SLinus Torvalds 	new = nfs_create_request(desc->ctx, inode, page, 0, len);
6111da177e4SLinus Torvalds 	if (IS_ERR(new)) {
6121da177e4SLinus Torvalds 			SetPageError(page);
6131da177e4SLinus Torvalds 			unlock_page(page);
6141da177e4SLinus Torvalds 			return PTR_ERR(new);
6151da177e4SLinus Torvalds 	}
6161da177e4SLinus Torvalds 	if (len < PAGE_CACHE_SIZE)
6171da177e4SLinus Torvalds 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
6181da177e4SLinus Torvalds 	nfs_list_add_request(new, desc->head);
6191da177e4SLinus Torvalds 	return 0;
6201da177e4SLinus Torvalds }
6211da177e4SLinus Torvalds 
6221da177e4SLinus Torvalds int nfs_readpages(struct file *filp, struct address_space *mapping,
6231da177e4SLinus Torvalds 		struct list_head *pages, unsigned nr_pages)
6241da177e4SLinus Torvalds {
6251da177e4SLinus Torvalds 	LIST_HEAD(head);
6261da177e4SLinus Torvalds 	struct nfs_readdesc desc = {
6271da177e4SLinus Torvalds 		.head		= &head,
6281da177e4SLinus Torvalds 	};
6291da177e4SLinus Torvalds 	struct inode *inode = mapping->host;
6301da177e4SLinus Torvalds 	struct nfs_server *server = NFS_SERVER(inode);
6311da177e4SLinus Torvalds 	int ret;
6321da177e4SLinus Torvalds 
6331da177e4SLinus Torvalds 	dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
6341da177e4SLinus Torvalds 			inode->i_sb->s_id,
6351da177e4SLinus Torvalds 			(long long)NFS_FILEID(inode),
6361da177e4SLinus Torvalds 			nr_pages);
63791d5b470SChuck Lever 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
6381da177e4SLinus Torvalds 
6391da177e4SLinus Torvalds 	if (filp == NULL) {
640d530838bSTrond Myklebust 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
6411da177e4SLinus Torvalds 		if (desc.ctx == NULL)
6421da177e4SLinus Torvalds 			return -EBADF;
6431da177e4SLinus Torvalds 	} else
6441da177e4SLinus Torvalds 		desc.ctx = get_nfs_open_context((struct nfs_open_context *)
6451da177e4SLinus Torvalds 				filp->private_data);
6461da177e4SLinus Torvalds 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
6471da177e4SLinus Torvalds 	if (!list_empty(&head)) {
6481da177e4SLinus Torvalds 		int err = nfs_pagein_list(&head, server->rpages);
6491da177e4SLinus Torvalds 		if (!ret)
65091d5b470SChuck Lever 			nfs_add_stats(inode, NFSIOS_READPAGES, err);
6511da177e4SLinus Torvalds 			ret = err;
6521da177e4SLinus Torvalds 	}
6531da177e4SLinus Torvalds 	put_nfs_open_context(desc.ctx);
6541da177e4SLinus Torvalds 	return ret;
6551da177e4SLinus Torvalds }
6561da177e4SLinus Torvalds 
6571da177e4SLinus Torvalds int nfs_init_readpagecache(void)
6581da177e4SLinus Torvalds {
6591da177e4SLinus Torvalds 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
6601da177e4SLinus Torvalds 					     sizeof(struct nfs_read_data),
6611da177e4SLinus Torvalds 					     0, SLAB_HWCACHE_ALIGN,
6621da177e4SLinus Torvalds 					     NULL, NULL);
6631da177e4SLinus Torvalds 	if (nfs_rdata_cachep == NULL)
6641da177e4SLinus Torvalds 		return -ENOMEM;
6651da177e4SLinus Torvalds 
6661da177e4SLinus Torvalds 	nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
6671da177e4SLinus Torvalds 					   mempool_alloc_slab,
6681da177e4SLinus Torvalds 					   mempool_free_slab,
6691da177e4SLinus Torvalds 					   nfs_rdata_cachep);
6701da177e4SLinus Torvalds 	if (nfs_rdata_mempool == NULL)
6711da177e4SLinus Torvalds 		return -ENOMEM;
6721da177e4SLinus Torvalds 
6731da177e4SLinus Torvalds 	return 0;
6741da177e4SLinus Torvalds }
6751da177e4SLinus Torvalds 
6761da177e4SLinus Torvalds void nfs_destroy_readpagecache(void)
6771da177e4SLinus Torvalds {
6781da177e4SLinus Torvalds 	mempool_destroy(nfs_rdata_mempool);
6791da177e4SLinus Torvalds 	if (kmem_cache_destroy(nfs_rdata_cachep))
6801da177e4SLinus Torvalds 		printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
6811da177e4SLinus Torvalds }
682