xref: /openbmc/linux/fs/nfs/read.c (revision 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2)
1*1da177e4SLinus Torvalds /*
2*1da177e4SLinus Torvalds  * linux/fs/nfs/read.c
3*1da177e4SLinus Torvalds  *
4*1da177e4SLinus Torvalds  * Block I/O for NFS
5*1da177e4SLinus Torvalds  *
6*1da177e4SLinus Torvalds  * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7*1da177e4SLinus Torvalds  * modified for async RPC by okir@monad.swb.de
8*1da177e4SLinus Torvalds  *
9*1da177e4SLinus Torvalds  * We do an ugly hack here in order to return proper error codes to the
10*1da177e4SLinus Torvalds  * user program when a read request failed: since generic_file_read
11*1da177e4SLinus Torvalds  * only checks the return value of inode->i_op->readpage() which is always 0
12*1da177e4SLinus Torvalds  * for async RPC, we set the error bit of the page to 1 when an error occurs,
13*1da177e4SLinus Torvalds  * and make nfs_readpage transmit requests synchronously when encountering this.
14*1da177e4SLinus Torvalds  * This is only a small problem, though, since we now retry all operations
15*1da177e4SLinus Torvalds  * within the RPC code when root squashing is suspected.
16*1da177e4SLinus Torvalds  */
17*1da177e4SLinus Torvalds 
18*1da177e4SLinus Torvalds #include <linux/config.h>
19*1da177e4SLinus Torvalds #include <linux/time.h>
20*1da177e4SLinus Torvalds #include <linux/kernel.h>
21*1da177e4SLinus Torvalds #include <linux/errno.h>
22*1da177e4SLinus Torvalds #include <linux/fcntl.h>
23*1da177e4SLinus Torvalds #include <linux/stat.h>
24*1da177e4SLinus Torvalds #include <linux/mm.h>
25*1da177e4SLinus Torvalds #include <linux/slab.h>
26*1da177e4SLinus Torvalds #include <linux/pagemap.h>
27*1da177e4SLinus Torvalds #include <linux/sunrpc/clnt.h>
28*1da177e4SLinus Torvalds #include <linux/nfs_fs.h>
29*1da177e4SLinus Torvalds #include <linux/nfs_page.h>
30*1da177e4SLinus Torvalds #include <linux/smp_lock.h>
31*1da177e4SLinus Torvalds 
32*1da177e4SLinus Torvalds #include <asm/system.h>
33*1da177e4SLinus Torvalds 
34*1da177e4SLinus Torvalds #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
35*1da177e4SLinus Torvalds 
36*1da177e4SLinus Torvalds static int nfs_pagein_one(struct list_head *, struct inode *);
37*1da177e4SLinus Torvalds static void nfs_readpage_result_partial(struct nfs_read_data *, int);
38*1da177e4SLinus Torvalds static void nfs_readpage_result_full(struct nfs_read_data *, int);
39*1da177e4SLinus Torvalds 
40*1da177e4SLinus Torvalds static kmem_cache_t *nfs_rdata_cachep;
41*1da177e4SLinus Torvalds mempool_t *nfs_rdata_mempool;
42*1da177e4SLinus Torvalds 
43*1da177e4SLinus Torvalds #define MIN_POOL_READ	(32)
44*1da177e4SLinus Torvalds 
45*1da177e4SLinus Torvalds void nfs_readdata_release(struct rpc_task *task)
46*1da177e4SLinus Torvalds {
47*1da177e4SLinus Torvalds         struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
48*1da177e4SLinus Torvalds         nfs_readdata_free(data);
49*1da177e4SLinus Torvalds }
50*1da177e4SLinus Torvalds 
51*1da177e4SLinus Torvalds static
52*1da177e4SLinus Torvalds unsigned int nfs_page_length(struct inode *inode, struct page *page)
53*1da177e4SLinus Torvalds {
54*1da177e4SLinus Torvalds 	loff_t i_size = i_size_read(inode);
55*1da177e4SLinus Torvalds 	unsigned long idx;
56*1da177e4SLinus Torvalds 
57*1da177e4SLinus Torvalds 	if (i_size <= 0)
58*1da177e4SLinus Torvalds 		return 0;
59*1da177e4SLinus Torvalds 	idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
60*1da177e4SLinus Torvalds 	if (page->index > idx)
61*1da177e4SLinus Torvalds 		return 0;
62*1da177e4SLinus Torvalds 	if (page->index != idx)
63*1da177e4SLinus Torvalds 		return PAGE_CACHE_SIZE;
64*1da177e4SLinus Torvalds 	return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
65*1da177e4SLinus Torvalds }
66*1da177e4SLinus Torvalds 
67*1da177e4SLinus Torvalds static
68*1da177e4SLinus Torvalds int nfs_return_empty_page(struct page *page)
69*1da177e4SLinus Torvalds {
70*1da177e4SLinus Torvalds 	memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE);
71*1da177e4SLinus Torvalds 	SetPageUptodate(page);
72*1da177e4SLinus Torvalds 	unlock_page(page);
73*1da177e4SLinus Torvalds 	return 0;
74*1da177e4SLinus Torvalds }
75*1da177e4SLinus Torvalds 
76*1da177e4SLinus Torvalds /*
77*1da177e4SLinus Torvalds  * Read a page synchronously.
78*1da177e4SLinus Torvalds  */
79*1da177e4SLinus Torvalds static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
80*1da177e4SLinus Torvalds 		struct page *page)
81*1da177e4SLinus Torvalds {
82*1da177e4SLinus Torvalds 	unsigned int	rsize = NFS_SERVER(inode)->rsize;
83*1da177e4SLinus Torvalds 	unsigned int	count = PAGE_CACHE_SIZE;
84*1da177e4SLinus Torvalds 	int		result;
85*1da177e4SLinus Torvalds 	struct nfs_read_data *rdata;
86*1da177e4SLinus Torvalds 
87*1da177e4SLinus Torvalds 	rdata = nfs_readdata_alloc();
88*1da177e4SLinus Torvalds 	if (!rdata)
89*1da177e4SLinus Torvalds 		return -ENOMEM;
90*1da177e4SLinus Torvalds 
91*1da177e4SLinus Torvalds 	memset(rdata, 0, sizeof(*rdata));
92*1da177e4SLinus Torvalds 	rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
93*1da177e4SLinus Torvalds 	rdata->cred = ctx->cred;
94*1da177e4SLinus Torvalds 	rdata->inode = inode;
95*1da177e4SLinus Torvalds 	INIT_LIST_HEAD(&rdata->pages);
96*1da177e4SLinus Torvalds 	rdata->args.fh = NFS_FH(inode);
97*1da177e4SLinus Torvalds 	rdata->args.context = ctx;
98*1da177e4SLinus Torvalds 	rdata->args.pages = &page;
99*1da177e4SLinus Torvalds 	rdata->args.pgbase = 0UL;
100*1da177e4SLinus Torvalds 	rdata->args.count = rsize;
101*1da177e4SLinus Torvalds 	rdata->res.fattr = &rdata->fattr;
102*1da177e4SLinus Torvalds 
103*1da177e4SLinus Torvalds 	dprintk("NFS: nfs_readpage_sync(%p)\n", page);
104*1da177e4SLinus Torvalds 
105*1da177e4SLinus Torvalds 	/*
106*1da177e4SLinus Torvalds 	 * This works now because the socket layer never tries to DMA
107*1da177e4SLinus Torvalds 	 * into this buffer directly.
108*1da177e4SLinus Torvalds 	 */
109*1da177e4SLinus Torvalds 	do {
110*1da177e4SLinus Torvalds 		if (count < rsize)
111*1da177e4SLinus Torvalds 			rdata->args.count = count;
112*1da177e4SLinus Torvalds 		rdata->res.count = rdata->args.count;
113*1da177e4SLinus Torvalds 		rdata->args.offset = page_offset(page) + rdata->args.pgbase;
114*1da177e4SLinus Torvalds 
115*1da177e4SLinus Torvalds 		dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n",
116*1da177e4SLinus Torvalds 			NFS_SERVER(inode)->hostname,
117*1da177e4SLinus Torvalds 			inode->i_sb->s_id,
118*1da177e4SLinus Torvalds 			(long long)NFS_FILEID(inode),
119*1da177e4SLinus Torvalds 			(unsigned long long)rdata->args.pgbase,
120*1da177e4SLinus Torvalds 			rdata->args.count);
121*1da177e4SLinus Torvalds 
122*1da177e4SLinus Torvalds 		lock_kernel();
123*1da177e4SLinus Torvalds 		result = NFS_PROTO(inode)->read(rdata);
124*1da177e4SLinus Torvalds 		unlock_kernel();
125*1da177e4SLinus Torvalds 
126*1da177e4SLinus Torvalds 		/*
127*1da177e4SLinus Torvalds 		 * Even if we had a partial success we can't mark the page
128*1da177e4SLinus Torvalds 		 * cache valid.
129*1da177e4SLinus Torvalds 		 */
130*1da177e4SLinus Torvalds 		if (result < 0) {
131*1da177e4SLinus Torvalds 			if (result == -EISDIR)
132*1da177e4SLinus Torvalds 				result = -EINVAL;
133*1da177e4SLinus Torvalds 			goto io_error;
134*1da177e4SLinus Torvalds 		}
135*1da177e4SLinus Torvalds 		count -= result;
136*1da177e4SLinus Torvalds 		rdata->args.pgbase += result;
137*1da177e4SLinus Torvalds 		/* Note: result == 0 should only happen if we're caching
138*1da177e4SLinus Torvalds 		 * a write that extends the file and punches a hole.
139*1da177e4SLinus Torvalds 		 */
140*1da177e4SLinus Torvalds 		if (rdata->res.eof != 0 || result == 0)
141*1da177e4SLinus Torvalds 			break;
142*1da177e4SLinus Torvalds 	} while (count);
143*1da177e4SLinus Torvalds 	NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
144*1da177e4SLinus Torvalds 
145*1da177e4SLinus Torvalds 	if (count)
146*1da177e4SLinus Torvalds 		memclear_highpage_flush(page, rdata->args.pgbase, count);
147*1da177e4SLinus Torvalds 	SetPageUptodate(page);
148*1da177e4SLinus Torvalds 	if (PageError(page))
149*1da177e4SLinus Torvalds 		ClearPageError(page);
150*1da177e4SLinus Torvalds 	result = 0;
151*1da177e4SLinus Torvalds 
152*1da177e4SLinus Torvalds io_error:
153*1da177e4SLinus Torvalds 	unlock_page(page);
154*1da177e4SLinus Torvalds 	nfs_readdata_free(rdata);
155*1da177e4SLinus Torvalds 	return result;
156*1da177e4SLinus Torvalds }
157*1da177e4SLinus Torvalds 
158*1da177e4SLinus Torvalds static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
159*1da177e4SLinus Torvalds 		struct page *page)
160*1da177e4SLinus Torvalds {
161*1da177e4SLinus Torvalds 	LIST_HEAD(one_request);
162*1da177e4SLinus Torvalds 	struct nfs_page	*new;
163*1da177e4SLinus Torvalds 	unsigned int len;
164*1da177e4SLinus Torvalds 
165*1da177e4SLinus Torvalds 	len = nfs_page_length(inode, page);
166*1da177e4SLinus Torvalds 	if (len == 0)
167*1da177e4SLinus Torvalds 		return nfs_return_empty_page(page);
168*1da177e4SLinus Torvalds 	new = nfs_create_request(ctx, inode, page, 0, len);
169*1da177e4SLinus Torvalds 	if (IS_ERR(new)) {
170*1da177e4SLinus Torvalds 		unlock_page(page);
171*1da177e4SLinus Torvalds 		return PTR_ERR(new);
172*1da177e4SLinus Torvalds 	}
173*1da177e4SLinus Torvalds 	if (len < PAGE_CACHE_SIZE)
174*1da177e4SLinus Torvalds 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
175*1da177e4SLinus Torvalds 
176*1da177e4SLinus Torvalds 	nfs_lock_request(new);
177*1da177e4SLinus Torvalds 	nfs_list_add_request(new, &one_request);
178*1da177e4SLinus Torvalds 	nfs_pagein_one(&one_request, inode);
179*1da177e4SLinus Torvalds 	return 0;
180*1da177e4SLinus Torvalds }
181*1da177e4SLinus Torvalds 
182*1da177e4SLinus Torvalds static void nfs_readpage_release(struct nfs_page *req)
183*1da177e4SLinus Torvalds {
184*1da177e4SLinus Torvalds 	unlock_page(req->wb_page);
185*1da177e4SLinus Torvalds 
186*1da177e4SLinus Torvalds 	nfs_clear_request(req);
187*1da177e4SLinus Torvalds 	nfs_release_request(req);
188*1da177e4SLinus Torvalds 	nfs_unlock_request(req);
189*1da177e4SLinus Torvalds 
190*1da177e4SLinus Torvalds 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
191*1da177e4SLinus Torvalds 			req->wb_context->dentry->d_inode->i_sb->s_id,
192*1da177e4SLinus Torvalds 			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
193*1da177e4SLinus Torvalds 			req->wb_bytes,
194*1da177e4SLinus Torvalds 			(long long)req_offset(req));
195*1da177e4SLinus Torvalds }
196*1da177e4SLinus Torvalds 
197*1da177e4SLinus Torvalds /*
198*1da177e4SLinus Torvalds  * Set up the NFS read request struct
199*1da177e4SLinus Torvalds  */
200*1da177e4SLinus Torvalds static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
201*1da177e4SLinus Torvalds 		unsigned int count, unsigned int offset)
202*1da177e4SLinus Torvalds {
203*1da177e4SLinus Torvalds 	struct inode		*inode;
204*1da177e4SLinus Torvalds 
205*1da177e4SLinus Torvalds 	data->req	  = req;
206*1da177e4SLinus Torvalds 	data->inode	  = inode = req->wb_context->dentry->d_inode;
207*1da177e4SLinus Torvalds 	data->cred	  = req->wb_context->cred;
208*1da177e4SLinus Torvalds 
209*1da177e4SLinus Torvalds 	data->args.fh     = NFS_FH(inode);
210*1da177e4SLinus Torvalds 	data->args.offset = req_offset(req) + offset;
211*1da177e4SLinus Torvalds 	data->args.pgbase = req->wb_pgbase + offset;
212*1da177e4SLinus Torvalds 	data->args.pages  = data->pagevec;
213*1da177e4SLinus Torvalds 	data->args.count  = count;
214*1da177e4SLinus Torvalds 	data->args.context = req->wb_context;
215*1da177e4SLinus Torvalds 
216*1da177e4SLinus Torvalds 	data->res.fattr   = &data->fattr;
217*1da177e4SLinus Torvalds 	data->res.count   = count;
218*1da177e4SLinus Torvalds 	data->res.eof     = 0;
219*1da177e4SLinus Torvalds 
220*1da177e4SLinus Torvalds 	NFS_PROTO(inode)->read_setup(data);
221*1da177e4SLinus Torvalds 
222*1da177e4SLinus Torvalds 	data->task.tk_cookie = (unsigned long)inode;
223*1da177e4SLinus Torvalds 	data->task.tk_calldata = data;
224*1da177e4SLinus Torvalds 	/* Release requests */
225*1da177e4SLinus Torvalds 	data->task.tk_release = nfs_readdata_release;
226*1da177e4SLinus Torvalds 
227*1da177e4SLinus Torvalds 	dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
228*1da177e4SLinus Torvalds 			data->task.tk_pid,
229*1da177e4SLinus Torvalds 			inode->i_sb->s_id,
230*1da177e4SLinus Torvalds 			(long long)NFS_FILEID(inode),
231*1da177e4SLinus Torvalds 			count,
232*1da177e4SLinus Torvalds 			(unsigned long long)data->args.offset);
233*1da177e4SLinus Torvalds }
234*1da177e4SLinus Torvalds 
235*1da177e4SLinus Torvalds static void
236*1da177e4SLinus Torvalds nfs_async_read_error(struct list_head *head)
237*1da177e4SLinus Torvalds {
238*1da177e4SLinus Torvalds 	struct nfs_page	*req;
239*1da177e4SLinus Torvalds 
240*1da177e4SLinus Torvalds 	while (!list_empty(head)) {
241*1da177e4SLinus Torvalds 		req = nfs_list_entry(head->next);
242*1da177e4SLinus Torvalds 		nfs_list_remove_request(req);
243*1da177e4SLinus Torvalds 		SetPageError(req->wb_page);
244*1da177e4SLinus Torvalds 		nfs_readpage_release(req);
245*1da177e4SLinus Torvalds 	}
246*1da177e4SLinus Torvalds }
247*1da177e4SLinus Torvalds 
248*1da177e4SLinus Torvalds /*
249*1da177e4SLinus Torvalds  * Start an async read operation
250*1da177e4SLinus Torvalds  */
251*1da177e4SLinus Torvalds static void nfs_execute_read(struct nfs_read_data *data)
252*1da177e4SLinus Torvalds {
253*1da177e4SLinus Torvalds 	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
254*1da177e4SLinus Torvalds 	sigset_t oldset;
255*1da177e4SLinus Torvalds 
256*1da177e4SLinus Torvalds 	rpc_clnt_sigmask(clnt, &oldset);
257*1da177e4SLinus Torvalds 	lock_kernel();
258*1da177e4SLinus Torvalds 	rpc_execute(&data->task);
259*1da177e4SLinus Torvalds 	unlock_kernel();
260*1da177e4SLinus Torvalds 	rpc_clnt_sigunmask(clnt, &oldset);
261*1da177e4SLinus Torvalds }
262*1da177e4SLinus Torvalds 
263*1da177e4SLinus Torvalds /*
264*1da177e4SLinus Torvalds  * Generate multiple requests to fill a single page.
265*1da177e4SLinus Torvalds  *
266*1da177e4SLinus Torvalds  * We optimize to reduce the number of read operations on the wire.  If we
267*1da177e4SLinus Torvalds  * detect that we're reading a page, or an area of a page, that is past the
268*1da177e4SLinus Torvalds  * end of file, we do not generate NFS read operations but just clear the
269*1da177e4SLinus Torvalds  * parts of the page that would have come back zero from the server anyway.
270*1da177e4SLinus Torvalds  *
271*1da177e4SLinus Torvalds  * We rely on the cached value of i_size to make this determination; another
272*1da177e4SLinus Torvalds  * client can fill pages on the server past our cached end-of-file, but we
273*1da177e4SLinus Torvalds  * won't see the new data until our attribute cache is updated.  This is more
274*1da177e4SLinus Torvalds  * or less conventional NFS client behavior.
275*1da177e4SLinus Torvalds  */
276*1da177e4SLinus Torvalds static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
277*1da177e4SLinus Torvalds {
278*1da177e4SLinus Torvalds 	struct nfs_page *req = nfs_list_entry(head->next);
279*1da177e4SLinus Torvalds 	struct page *page = req->wb_page;
280*1da177e4SLinus Torvalds 	struct nfs_read_data *data;
281*1da177e4SLinus Torvalds 	unsigned int rsize = NFS_SERVER(inode)->rsize;
282*1da177e4SLinus Torvalds 	unsigned int nbytes, offset;
283*1da177e4SLinus Torvalds 	int requests = 0;
284*1da177e4SLinus Torvalds 	LIST_HEAD(list);
285*1da177e4SLinus Torvalds 
286*1da177e4SLinus Torvalds 	nfs_list_remove_request(req);
287*1da177e4SLinus Torvalds 
288*1da177e4SLinus Torvalds 	nbytes = req->wb_bytes;
289*1da177e4SLinus Torvalds 	for(;;) {
290*1da177e4SLinus Torvalds 		data = nfs_readdata_alloc();
291*1da177e4SLinus Torvalds 		if (!data)
292*1da177e4SLinus Torvalds 			goto out_bad;
293*1da177e4SLinus Torvalds 		INIT_LIST_HEAD(&data->pages);
294*1da177e4SLinus Torvalds 		list_add(&data->pages, &list);
295*1da177e4SLinus Torvalds 		requests++;
296*1da177e4SLinus Torvalds 		if (nbytes <= rsize)
297*1da177e4SLinus Torvalds 			break;
298*1da177e4SLinus Torvalds 		nbytes -= rsize;
299*1da177e4SLinus Torvalds 	}
300*1da177e4SLinus Torvalds 	atomic_set(&req->wb_complete, requests);
301*1da177e4SLinus Torvalds 
302*1da177e4SLinus Torvalds 	ClearPageError(page);
303*1da177e4SLinus Torvalds 	offset = 0;
304*1da177e4SLinus Torvalds 	nbytes = req->wb_bytes;
305*1da177e4SLinus Torvalds 	do {
306*1da177e4SLinus Torvalds 		data = list_entry(list.next, struct nfs_read_data, pages);
307*1da177e4SLinus Torvalds 		list_del_init(&data->pages);
308*1da177e4SLinus Torvalds 
309*1da177e4SLinus Torvalds 		data->pagevec[0] = page;
310*1da177e4SLinus Torvalds 		data->complete = nfs_readpage_result_partial;
311*1da177e4SLinus Torvalds 
312*1da177e4SLinus Torvalds 		if (nbytes > rsize) {
313*1da177e4SLinus Torvalds 			nfs_read_rpcsetup(req, data, rsize, offset);
314*1da177e4SLinus Torvalds 			offset += rsize;
315*1da177e4SLinus Torvalds 			nbytes -= rsize;
316*1da177e4SLinus Torvalds 		} else {
317*1da177e4SLinus Torvalds 			nfs_read_rpcsetup(req, data, nbytes, offset);
318*1da177e4SLinus Torvalds 			nbytes = 0;
319*1da177e4SLinus Torvalds 		}
320*1da177e4SLinus Torvalds 		nfs_execute_read(data);
321*1da177e4SLinus Torvalds 	} while (nbytes != 0);
322*1da177e4SLinus Torvalds 
323*1da177e4SLinus Torvalds 	return 0;
324*1da177e4SLinus Torvalds 
325*1da177e4SLinus Torvalds out_bad:
326*1da177e4SLinus Torvalds 	while (!list_empty(&list)) {
327*1da177e4SLinus Torvalds 		data = list_entry(list.next, struct nfs_read_data, pages);
328*1da177e4SLinus Torvalds 		list_del(&data->pages);
329*1da177e4SLinus Torvalds 		nfs_readdata_free(data);
330*1da177e4SLinus Torvalds 	}
331*1da177e4SLinus Torvalds 	SetPageError(page);
332*1da177e4SLinus Torvalds 	nfs_readpage_release(req);
333*1da177e4SLinus Torvalds 	return -ENOMEM;
334*1da177e4SLinus Torvalds }
335*1da177e4SLinus Torvalds 
336*1da177e4SLinus Torvalds static int nfs_pagein_one(struct list_head *head, struct inode *inode)
337*1da177e4SLinus Torvalds {
338*1da177e4SLinus Torvalds 	struct nfs_page		*req;
339*1da177e4SLinus Torvalds 	struct page		**pages;
340*1da177e4SLinus Torvalds 	struct nfs_read_data	*data;
341*1da177e4SLinus Torvalds 	unsigned int		count;
342*1da177e4SLinus Torvalds 
343*1da177e4SLinus Torvalds 	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
344*1da177e4SLinus Torvalds 		return nfs_pagein_multi(head, inode);
345*1da177e4SLinus Torvalds 
346*1da177e4SLinus Torvalds 	data = nfs_readdata_alloc();
347*1da177e4SLinus Torvalds 	if (!data)
348*1da177e4SLinus Torvalds 		goto out_bad;
349*1da177e4SLinus Torvalds 
350*1da177e4SLinus Torvalds 	INIT_LIST_HEAD(&data->pages);
351*1da177e4SLinus Torvalds 	pages = data->pagevec;
352*1da177e4SLinus Torvalds 	count = 0;
353*1da177e4SLinus Torvalds 	while (!list_empty(head)) {
354*1da177e4SLinus Torvalds 		req = nfs_list_entry(head->next);
355*1da177e4SLinus Torvalds 		nfs_list_remove_request(req);
356*1da177e4SLinus Torvalds 		nfs_list_add_request(req, &data->pages);
357*1da177e4SLinus Torvalds 		ClearPageError(req->wb_page);
358*1da177e4SLinus Torvalds 		*pages++ = req->wb_page;
359*1da177e4SLinus Torvalds 		count += req->wb_bytes;
360*1da177e4SLinus Torvalds 	}
361*1da177e4SLinus Torvalds 	req = nfs_list_entry(data->pages.next);
362*1da177e4SLinus Torvalds 
363*1da177e4SLinus Torvalds 	data->complete = nfs_readpage_result_full;
364*1da177e4SLinus Torvalds 	nfs_read_rpcsetup(req, data, count, 0);
365*1da177e4SLinus Torvalds 
366*1da177e4SLinus Torvalds 	nfs_execute_read(data);
367*1da177e4SLinus Torvalds 	return 0;
368*1da177e4SLinus Torvalds out_bad:
369*1da177e4SLinus Torvalds 	nfs_async_read_error(head);
370*1da177e4SLinus Torvalds 	return -ENOMEM;
371*1da177e4SLinus Torvalds }
372*1da177e4SLinus Torvalds 
373*1da177e4SLinus Torvalds static int
374*1da177e4SLinus Torvalds nfs_pagein_list(struct list_head *head, int rpages)
375*1da177e4SLinus Torvalds {
376*1da177e4SLinus Torvalds 	LIST_HEAD(one_request);
377*1da177e4SLinus Torvalds 	struct nfs_page		*req;
378*1da177e4SLinus Torvalds 	int			error = 0;
379*1da177e4SLinus Torvalds 	unsigned int		pages = 0;
380*1da177e4SLinus Torvalds 
381*1da177e4SLinus Torvalds 	while (!list_empty(head)) {
382*1da177e4SLinus Torvalds 		pages += nfs_coalesce_requests(head, &one_request, rpages);
383*1da177e4SLinus Torvalds 		req = nfs_list_entry(one_request.next);
384*1da177e4SLinus Torvalds 		error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
385*1da177e4SLinus Torvalds 		if (error < 0)
386*1da177e4SLinus Torvalds 			break;
387*1da177e4SLinus Torvalds 	}
388*1da177e4SLinus Torvalds 	if (error >= 0)
389*1da177e4SLinus Torvalds 		return pages;
390*1da177e4SLinus Torvalds 
391*1da177e4SLinus Torvalds 	nfs_async_read_error(head);
392*1da177e4SLinus Torvalds 	return error;
393*1da177e4SLinus Torvalds }
394*1da177e4SLinus Torvalds 
395*1da177e4SLinus Torvalds /*
396*1da177e4SLinus Torvalds  * Handle a read reply that fills part of a page.
397*1da177e4SLinus Torvalds  */
398*1da177e4SLinus Torvalds static void nfs_readpage_result_partial(struct nfs_read_data *data, int status)
399*1da177e4SLinus Torvalds {
400*1da177e4SLinus Torvalds 	struct nfs_page *req = data->req;
401*1da177e4SLinus Torvalds 	struct page *page = req->wb_page;
402*1da177e4SLinus Torvalds 
403*1da177e4SLinus Torvalds 	if (status >= 0) {
404*1da177e4SLinus Torvalds 		unsigned int request = data->args.count;
405*1da177e4SLinus Torvalds 		unsigned int result = data->res.count;
406*1da177e4SLinus Torvalds 
407*1da177e4SLinus Torvalds 		if (result < request) {
408*1da177e4SLinus Torvalds 			memclear_highpage_flush(page,
409*1da177e4SLinus Torvalds 						data->args.pgbase + result,
410*1da177e4SLinus Torvalds 						request - result);
411*1da177e4SLinus Torvalds 		}
412*1da177e4SLinus Torvalds 	} else
413*1da177e4SLinus Torvalds 		SetPageError(page);
414*1da177e4SLinus Torvalds 
415*1da177e4SLinus Torvalds 	if (atomic_dec_and_test(&req->wb_complete)) {
416*1da177e4SLinus Torvalds 		if (!PageError(page))
417*1da177e4SLinus Torvalds 			SetPageUptodate(page);
418*1da177e4SLinus Torvalds 		nfs_readpage_release(req);
419*1da177e4SLinus Torvalds 	}
420*1da177e4SLinus Torvalds }
421*1da177e4SLinus Torvalds 
422*1da177e4SLinus Torvalds /*
423*1da177e4SLinus Torvalds  * This is the callback from RPC telling us whether a reply was
424*1da177e4SLinus Torvalds  * received or some error occurred (timeout or socket shutdown).
425*1da177e4SLinus Torvalds  */
426*1da177e4SLinus Torvalds static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
427*1da177e4SLinus Torvalds {
428*1da177e4SLinus Torvalds 	unsigned int count = data->res.count;
429*1da177e4SLinus Torvalds 
430*1da177e4SLinus Torvalds 	while (!list_empty(&data->pages)) {
431*1da177e4SLinus Torvalds 		struct nfs_page *req = nfs_list_entry(data->pages.next);
432*1da177e4SLinus Torvalds 		struct page *page = req->wb_page;
433*1da177e4SLinus Torvalds 		nfs_list_remove_request(req);
434*1da177e4SLinus Torvalds 
435*1da177e4SLinus Torvalds 		if (status >= 0) {
436*1da177e4SLinus Torvalds 			if (count < PAGE_CACHE_SIZE) {
437*1da177e4SLinus Torvalds 				if (count < req->wb_bytes)
438*1da177e4SLinus Torvalds 					memclear_highpage_flush(page,
439*1da177e4SLinus Torvalds 							req->wb_pgbase + count,
440*1da177e4SLinus Torvalds 							req->wb_bytes - count);
441*1da177e4SLinus Torvalds 				count = 0;
442*1da177e4SLinus Torvalds 			} else
443*1da177e4SLinus Torvalds 				count -= PAGE_CACHE_SIZE;
444*1da177e4SLinus Torvalds 			SetPageUptodate(page);
445*1da177e4SLinus Torvalds 		} else
446*1da177e4SLinus Torvalds 			SetPageError(page);
447*1da177e4SLinus Torvalds 		nfs_readpage_release(req);
448*1da177e4SLinus Torvalds 	}
449*1da177e4SLinus Torvalds }
450*1da177e4SLinus Torvalds 
451*1da177e4SLinus Torvalds /*
452*1da177e4SLinus Torvalds  * This is the callback from RPC telling us whether a reply was
453*1da177e4SLinus Torvalds  * received or some error occurred (timeout or socket shutdown).
454*1da177e4SLinus Torvalds  */
455*1da177e4SLinus Torvalds void nfs_readpage_result(struct rpc_task *task)
456*1da177e4SLinus Torvalds {
457*1da177e4SLinus Torvalds 	struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
458*1da177e4SLinus Torvalds 	struct nfs_readargs *argp = &data->args;
459*1da177e4SLinus Torvalds 	struct nfs_readres *resp = &data->res;
460*1da177e4SLinus Torvalds 	int status = task->tk_status;
461*1da177e4SLinus Torvalds 
462*1da177e4SLinus Torvalds 	dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
463*1da177e4SLinus Torvalds 		task->tk_pid, status);
464*1da177e4SLinus Torvalds 
465*1da177e4SLinus Torvalds 	/* Is this a short read? */
466*1da177e4SLinus Torvalds 	if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) {
467*1da177e4SLinus Torvalds 		/* Has the server at least made some progress? */
468*1da177e4SLinus Torvalds 		if (resp->count != 0) {
469*1da177e4SLinus Torvalds 			/* Yes, so retry the read at the end of the data */
470*1da177e4SLinus Torvalds 			argp->offset += resp->count;
471*1da177e4SLinus Torvalds 			argp->pgbase += resp->count;
472*1da177e4SLinus Torvalds 			argp->count -= resp->count;
473*1da177e4SLinus Torvalds 			rpc_restart_call(task);
474*1da177e4SLinus Torvalds 			return;
475*1da177e4SLinus Torvalds 		}
476*1da177e4SLinus Torvalds 		task->tk_status = -EIO;
477*1da177e4SLinus Torvalds 	}
478*1da177e4SLinus Torvalds 	NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
479*1da177e4SLinus Torvalds 	data->complete(data, status);
480*1da177e4SLinus Torvalds }
481*1da177e4SLinus Torvalds 
482*1da177e4SLinus Torvalds /*
483*1da177e4SLinus Torvalds  * Read a page over NFS.
484*1da177e4SLinus Torvalds  * We read the page synchronously in the following case:
485*1da177e4SLinus Torvalds  *  -	The error flag is set for this page. This happens only when a
486*1da177e4SLinus Torvalds  *	previous async read operation failed.
487*1da177e4SLinus Torvalds  */
488*1da177e4SLinus Torvalds int nfs_readpage(struct file *file, struct page *page)
489*1da177e4SLinus Torvalds {
490*1da177e4SLinus Torvalds 	struct nfs_open_context *ctx;
491*1da177e4SLinus Torvalds 	struct inode *inode = page->mapping->host;
492*1da177e4SLinus Torvalds 	int		error;
493*1da177e4SLinus Torvalds 
494*1da177e4SLinus Torvalds 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
495*1da177e4SLinus Torvalds 		page, PAGE_CACHE_SIZE, page->index);
496*1da177e4SLinus Torvalds 	/*
497*1da177e4SLinus Torvalds 	 * Try to flush any pending writes to the file..
498*1da177e4SLinus Torvalds 	 *
499*1da177e4SLinus Torvalds 	 * NOTE! Because we own the page lock, there cannot
500*1da177e4SLinus Torvalds 	 * be any new pending writes generated at this point
501*1da177e4SLinus Torvalds 	 * for this page (other pages can be written to).
502*1da177e4SLinus Torvalds 	 */
503*1da177e4SLinus Torvalds 	error = nfs_wb_page(inode, page);
504*1da177e4SLinus Torvalds 	if (error)
505*1da177e4SLinus Torvalds 		goto out_error;
506*1da177e4SLinus Torvalds 
507*1da177e4SLinus Torvalds 	if (file == NULL) {
508*1da177e4SLinus Torvalds 		ctx = nfs_find_open_context(inode, FMODE_READ);
509*1da177e4SLinus Torvalds 		if (ctx == NULL)
510*1da177e4SLinus Torvalds 			return -EBADF;
511*1da177e4SLinus Torvalds 	} else
512*1da177e4SLinus Torvalds 		ctx = get_nfs_open_context((struct nfs_open_context *)
513*1da177e4SLinus Torvalds 				file->private_data);
514*1da177e4SLinus Torvalds 	if (!IS_SYNC(inode)) {
515*1da177e4SLinus Torvalds 		error = nfs_readpage_async(ctx, inode, page);
516*1da177e4SLinus Torvalds 		goto out;
517*1da177e4SLinus Torvalds 	}
518*1da177e4SLinus Torvalds 
519*1da177e4SLinus Torvalds 	error = nfs_readpage_sync(ctx, inode, page);
520*1da177e4SLinus Torvalds 	if (error < 0 && IS_SWAPFILE(inode))
521*1da177e4SLinus Torvalds 		printk("Aiee.. nfs swap-in of page failed!\n");
522*1da177e4SLinus Torvalds out:
523*1da177e4SLinus Torvalds 	put_nfs_open_context(ctx);
524*1da177e4SLinus Torvalds 	return error;
525*1da177e4SLinus Torvalds 
526*1da177e4SLinus Torvalds out_error:
527*1da177e4SLinus Torvalds 	unlock_page(page);
528*1da177e4SLinus Torvalds 	return error;
529*1da177e4SLinus Torvalds }
530*1da177e4SLinus Torvalds 
531*1da177e4SLinus Torvalds struct nfs_readdesc {
532*1da177e4SLinus Torvalds 	struct list_head *head;
533*1da177e4SLinus Torvalds 	struct nfs_open_context *ctx;
534*1da177e4SLinus Torvalds };
535*1da177e4SLinus Torvalds 
536*1da177e4SLinus Torvalds static int
537*1da177e4SLinus Torvalds readpage_async_filler(void *data, struct page *page)
538*1da177e4SLinus Torvalds {
539*1da177e4SLinus Torvalds 	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
540*1da177e4SLinus Torvalds 	struct inode *inode = page->mapping->host;
541*1da177e4SLinus Torvalds 	struct nfs_page *new;
542*1da177e4SLinus Torvalds 	unsigned int len;
543*1da177e4SLinus Torvalds 
544*1da177e4SLinus Torvalds 	nfs_wb_page(inode, page);
545*1da177e4SLinus Torvalds 	len = nfs_page_length(inode, page);
546*1da177e4SLinus Torvalds 	if (len == 0)
547*1da177e4SLinus Torvalds 		return nfs_return_empty_page(page);
548*1da177e4SLinus Torvalds 	new = nfs_create_request(desc->ctx, inode, page, 0, len);
549*1da177e4SLinus Torvalds 	if (IS_ERR(new)) {
550*1da177e4SLinus Torvalds 			SetPageError(page);
551*1da177e4SLinus Torvalds 			unlock_page(page);
552*1da177e4SLinus Torvalds 			return PTR_ERR(new);
553*1da177e4SLinus Torvalds 	}
554*1da177e4SLinus Torvalds 	if (len < PAGE_CACHE_SIZE)
555*1da177e4SLinus Torvalds 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
556*1da177e4SLinus Torvalds 	nfs_lock_request(new);
557*1da177e4SLinus Torvalds 	nfs_list_add_request(new, desc->head);
558*1da177e4SLinus Torvalds 	return 0;
559*1da177e4SLinus Torvalds }
560*1da177e4SLinus Torvalds 
561*1da177e4SLinus Torvalds int nfs_readpages(struct file *filp, struct address_space *mapping,
562*1da177e4SLinus Torvalds 		struct list_head *pages, unsigned nr_pages)
563*1da177e4SLinus Torvalds {
564*1da177e4SLinus Torvalds 	LIST_HEAD(head);
565*1da177e4SLinus Torvalds 	struct nfs_readdesc desc = {
566*1da177e4SLinus Torvalds 		.head		= &head,
567*1da177e4SLinus Torvalds 	};
568*1da177e4SLinus Torvalds 	struct inode *inode = mapping->host;
569*1da177e4SLinus Torvalds 	struct nfs_server *server = NFS_SERVER(inode);
570*1da177e4SLinus Torvalds 	int ret;
571*1da177e4SLinus Torvalds 
572*1da177e4SLinus Torvalds 	dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
573*1da177e4SLinus Torvalds 			inode->i_sb->s_id,
574*1da177e4SLinus Torvalds 			(long long)NFS_FILEID(inode),
575*1da177e4SLinus Torvalds 			nr_pages);
576*1da177e4SLinus Torvalds 
577*1da177e4SLinus Torvalds 	if (filp == NULL) {
578*1da177e4SLinus Torvalds 		desc.ctx = nfs_find_open_context(inode, FMODE_READ);
579*1da177e4SLinus Torvalds 		if (desc.ctx == NULL)
580*1da177e4SLinus Torvalds 			return -EBADF;
581*1da177e4SLinus Torvalds 	} else
582*1da177e4SLinus Torvalds 		desc.ctx = get_nfs_open_context((struct nfs_open_context *)
583*1da177e4SLinus Torvalds 				filp->private_data);
584*1da177e4SLinus Torvalds 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
585*1da177e4SLinus Torvalds 	if (!list_empty(&head)) {
586*1da177e4SLinus Torvalds 		int err = nfs_pagein_list(&head, server->rpages);
587*1da177e4SLinus Torvalds 		if (!ret)
588*1da177e4SLinus Torvalds 			ret = err;
589*1da177e4SLinus Torvalds 	}
590*1da177e4SLinus Torvalds 	put_nfs_open_context(desc.ctx);
591*1da177e4SLinus Torvalds 	return ret;
592*1da177e4SLinus Torvalds }
593*1da177e4SLinus Torvalds 
594*1da177e4SLinus Torvalds int nfs_init_readpagecache(void)
595*1da177e4SLinus Torvalds {
596*1da177e4SLinus Torvalds 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
597*1da177e4SLinus Torvalds 					     sizeof(struct nfs_read_data),
598*1da177e4SLinus Torvalds 					     0, SLAB_HWCACHE_ALIGN,
599*1da177e4SLinus Torvalds 					     NULL, NULL);
600*1da177e4SLinus Torvalds 	if (nfs_rdata_cachep == NULL)
601*1da177e4SLinus Torvalds 		return -ENOMEM;
602*1da177e4SLinus Torvalds 
603*1da177e4SLinus Torvalds 	nfs_rdata_mempool = mempool_create(MIN_POOL_READ,
604*1da177e4SLinus Torvalds 					   mempool_alloc_slab,
605*1da177e4SLinus Torvalds 					   mempool_free_slab,
606*1da177e4SLinus Torvalds 					   nfs_rdata_cachep);
607*1da177e4SLinus Torvalds 	if (nfs_rdata_mempool == NULL)
608*1da177e4SLinus Torvalds 		return -ENOMEM;
609*1da177e4SLinus Torvalds 
610*1da177e4SLinus Torvalds 	return 0;
611*1da177e4SLinus Torvalds }
612*1da177e4SLinus Torvalds 
613*1da177e4SLinus Torvalds void nfs_destroy_readpagecache(void)
614*1da177e4SLinus Torvalds {
615*1da177e4SLinus Torvalds 	mempool_destroy(nfs_rdata_mempool);
616*1da177e4SLinus Torvalds 	if (kmem_cache_destroy(nfs_rdata_cachep))
617*1da177e4SLinus Torvalds 		printk(KERN_INFO "nfs_read_data: not all structures were freed\n");
618*1da177e4SLinus Torvalds }
619