1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 #include <linux/smp_lock.h> 22 23 #include <asm/system.h> 24 25 #include "internal.h" 26 #include "iostat.h" 27 28 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 29 30 static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); 31 static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); 32 static const struct rpc_call_ops nfs_read_partial_ops; 33 static const struct rpc_call_ops nfs_read_full_ops; 34 35 static struct kmem_cache *nfs_rdata_cachep; 36 static mempool_t *nfs_rdata_mempool; 37 38 #define MIN_POOL_READ (32) 39 40 struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 41 { 42 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); 43 44 if (p) { 45 memset(p, 0, sizeof(*p)); 46 INIT_LIST_HEAD(&p->pages); 47 p->npages = pagecount; 48 if (pagecount <= ARRAY_SIZE(p->page_array)) 49 p->pagevec = p->page_array; 50 else { 51 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 52 if (!p->pagevec) { 53 mempool_free(p, nfs_rdata_mempool); 54 p = NULL; 55 } 56 } 57 } 58 return p; 59 } 60 61 static void nfs_readdata_rcu_free(struct rcu_head *head) 62 { 63 struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu); 64 if (p && (p->pagevec != &p->page_array[0])) 65 kfree(p->pagevec); 66 mempool_free(p, nfs_rdata_mempool); 67 } 68 69 static void nfs_readdata_free(struct nfs_read_data *rdata) 70 { 71 call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free); 72 } 73 74 void nfs_readdata_release(void *data) 75 { 76 nfs_readdata_free(data); 77 } 78 79 static 80 int nfs_return_empty_page(struct page *page) 81 { 82 memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); 83 SetPageUptodate(page); 84 unlock_page(page); 85 return 0; 86 } 87 88 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) 89 { 90 unsigned int remainder = data->args.count - data->res.count; 91 unsigned int base = data->args.pgbase + data->res.count; 92 unsigned int pglen; 93 struct page **pages; 94 95 if (data->res.eof == 0 || remainder == 0) 96 return; 97 /* 98 * Note: "remainder" can never be negative, since we check for 99 * this in the XDR code. 100 */ 101 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 102 base &= ~PAGE_CACHE_MASK; 103 pglen = PAGE_CACHE_SIZE - base; 104 for (;;) { 105 if (remainder <= pglen) { 106 memclear_highpage_flush(*pages, base, remainder); 107 break; 108 } 109 memclear_highpage_flush(*pages, base, pglen); 110 pages++; 111 remainder -= pglen; 112 pglen = PAGE_CACHE_SIZE; 113 base = 0; 114 } 115 } 116 117 static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 118 struct page *page) 119 { 120 LIST_HEAD(one_request); 121 struct nfs_page *new; 122 unsigned int len; 123 124 len = nfs_page_length(page); 125 if (len == 0) 126 return nfs_return_empty_page(page); 127 new = nfs_create_request(ctx, inode, page, 0, len); 128 if (IS_ERR(new)) { 129 unlock_page(page); 130 return PTR_ERR(new); 131 } 132 if (len < PAGE_CACHE_SIZE) 133 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 134 135 nfs_list_add_request(new, &one_request); 136 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 137 nfs_pagein_multi(inode, &one_request, 1, len, 0); 138 else 139 nfs_pagein_one(inode, &one_request, 1, len, 0); 140 return 0; 141 } 142 143 static void nfs_readpage_release(struct nfs_page *req) 144 { 145 unlock_page(req->wb_page); 146 147 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 148 req->wb_context->dentry->d_inode->i_sb->s_id, 149 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 150 req->wb_bytes, 151 (long long)req_offset(req)); 152 nfs_clear_request(req); 153 nfs_release_request(req); 154 } 155 156 /* 157 * Set up the NFS read request struct 158 */ 159 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 160 const struct rpc_call_ops *call_ops, 161 unsigned int count, unsigned int offset) 162 { 163 struct inode *inode; 164 int flags; 165 166 data->req = req; 167 data->inode = inode = req->wb_context->dentry->d_inode; 168 data->cred = req->wb_context->cred; 169 170 data->args.fh = NFS_FH(inode); 171 data->args.offset = req_offset(req) + offset; 172 data->args.pgbase = req->wb_pgbase + offset; 173 data->args.pages = data->pagevec; 174 data->args.count = count; 175 data->args.context = req->wb_context; 176 177 data->res.fattr = &data->fattr; 178 data->res.count = count; 179 data->res.eof = 0; 180 nfs_fattr_init(&data->fattr); 181 182 /* Set up the initial task struct. */ 183 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); 184 rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); 185 NFS_PROTO(inode)->read_setup(data); 186 187 data->task.tk_cookie = (unsigned long)inode; 188 189 dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 190 data->task.tk_pid, 191 inode->i_sb->s_id, 192 (long long)NFS_FILEID(inode), 193 count, 194 (unsigned long long)data->args.offset); 195 } 196 197 static void 198 nfs_async_read_error(struct list_head *head) 199 { 200 struct nfs_page *req; 201 202 while (!list_empty(head)) { 203 req = nfs_list_entry(head->next); 204 nfs_list_remove_request(req); 205 SetPageError(req->wb_page); 206 nfs_readpage_release(req); 207 } 208 } 209 210 /* 211 * Start an async read operation 212 */ 213 static void nfs_execute_read(struct nfs_read_data *data) 214 { 215 struct rpc_clnt *clnt = NFS_CLIENT(data->inode); 216 sigset_t oldset; 217 218 rpc_clnt_sigmask(clnt, &oldset); 219 rpc_execute(&data->task); 220 rpc_clnt_sigunmask(clnt, &oldset); 221 } 222 223 /* 224 * Generate multiple requests to fill a single page. 225 * 226 * We optimize to reduce the number of read operations on the wire. If we 227 * detect that we're reading a page, or an area of a page, that is past the 228 * end of file, we do not generate NFS read operations but just clear the 229 * parts of the page that would have come back zero from the server anyway. 230 * 231 * We rely on the cached value of i_size to make this determination; another 232 * client can fill pages on the server past our cached end-of-file, but we 233 * won't see the new data until our attribute cache is updated. This is more 234 * or less conventional NFS client behavior. 235 */ 236 static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 237 { 238 struct nfs_page *req = nfs_list_entry(head->next); 239 struct page *page = req->wb_page; 240 struct nfs_read_data *data; 241 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 242 unsigned int offset; 243 int requests = 0; 244 LIST_HEAD(list); 245 246 nfs_list_remove_request(req); 247 248 nbytes = count; 249 do { 250 size_t len = min(nbytes,rsize); 251 252 data = nfs_readdata_alloc(1); 253 if (!data) 254 goto out_bad; 255 INIT_LIST_HEAD(&data->pages); 256 list_add(&data->pages, &list); 257 requests++; 258 nbytes -= len; 259 } while(nbytes != 0); 260 atomic_set(&req->wb_complete, requests); 261 262 ClearPageError(page); 263 offset = 0; 264 nbytes = count; 265 do { 266 data = list_entry(list.next, struct nfs_read_data, pages); 267 list_del_init(&data->pages); 268 269 data->pagevec[0] = page; 270 271 if (nbytes < rsize) 272 rsize = nbytes; 273 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 274 rsize, offset); 275 offset += rsize; 276 nbytes -= rsize; 277 nfs_execute_read(data); 278 } while (nbytes != 0); 279 280 return 0; 281 282 out_bad: 283 while (!list_empty(&list)) { 284 data = list_entry(list.next, struct nfs_read_data, pages); 285 list_del(&data->pages); 286 nfs_readdata_free(data); 287 } 288 SetPageError(page); 289 nfs_readpage_release(req); 290 return -ENOMEM; 291 } 292 293 static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 294 { 295 struct nfs_page *req; 296 struct page **pages; 297 struct nfs_read_data *data; 298 299 data = nfs_readdata_alloc(npages); 300 if (!data) 301 goto out_bad; 302 303 INIT_LIST_HEAD(&data->pages); 304 pages = data->pagevec; 305 while (!list_empty(head)) { 306 req = nfs_list_entry(head->next); 307 nfs_list_remove_request(req); 308 nfs_list_add_request(req, &data->pages); 309 ClearPageError(req->wb_page); 310 *pages++ = req->wb_page; 311 } 312 req = nfs_list_entry(data->pages.next); 313 314 nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 315 316 nfs_execute_read(data); 317 return 0; 318 out_bad: 319 nfs_async_read_error(head); 320 return -ENOMEM; 321 } 322 323 /* 324 * This is the callback from RPC telling us whether a reply was 325 * received or some error occurred (timeout or socket shutdown). 326 */ 327 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 328 { 329 int status; 330 331 dprintk("NFS: %s: %5u, (status %d)\n", __FUNCTION__, task->tk_pid, 332 task->tk_status); 333 334 status = NFS_PROTO(data->inode)->read_done(task, data); 335 if (status != 0) 336 return status; 337 338 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 339 340 if (task->tk_status == -ESTALE) { 341 set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); 342 nfs_mark_for_revalidate(data->inode); 343 } 344 spin_lock(&data->inode->i_lock); 345 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; 346 spin_unlock(&data->inode->i_lock); 347 return 0; 348 } 349 350 static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 351 { 352 struct nfs_readargs *argp = &data->args; 353 struct nfs_readres *resp = &data->res; 354 355 if (resp->eof || resp->count == argp->count) 356 return 0; 357 358 /* This is a short read! */ 359 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 360 /* Has the server at least made some progress? */ 361 if (resp->count == 0) 362 return 0; 363 364 /* Yes, so retry the read at the end of the data */ 365 argp->offset += resp->count; 366 argp->pgbase += resp->count; 367 argp->count -= resp->count; 368 rpc_restart_call(task); 369 return -EAGAIN; 370 } 371 372 /* 373 * Handle a read reply that fills part of a page. 374 */ 375 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 376 { 377 struct nfs_read_data *data = calldata; 378 struct nfs_page *req = data->req; 379 struct page *page = req->wb_page; 380 381 if (nfs_readpage_result(task, data) != 0) 382 return; 383 384 if (likely(task->tk_status >= 0)) { 385 nfs_readpage_truncate_uninitialised_page(data); 386 if (nfs_readpage_retry(task, data) != 0) 387 return; 388 } 389 if (unlikely(task->tk_status < 0)) 390 SetPageError(page); 391 if (atomic_dec_and_test(&req->wb_complete)) { 392 if (!PageError(page)) 393 SetPageUptodate(page); 394 nfs_readpage_release(req); 395 } 396 } 397 398 static const struct rpc_call_ops nfs_read_partial_ops = { 399 .rpc_call_done = nfs_readpage_result_partial, 400 .rpc_release = nfs_readdata_release, 401 }; 402 403 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) 404 { 405 unsigned int count = data->res.count; 406 unsigned int base = data->args.pgbase; 407 struct page **pages; 408 409 if (data->res.eof) 410 count = data->args.count; 411 if (unlikely(count == 0)) 412 return; 413 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 414 base &= ~PAGE_CACHE_MASK; 415 count += base; 416 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 417 SetPageUptodate(*pages); 418 if (count == 0) 419 return; 420 /* Was this a short read? */ 421 if (data->res.eof || data->res.count == data->args.count) 422 SetPageUptodate(*pages); 423 } 424 425 /* 426 * This is the callback from RPC telling us whether a reply was 427 * received or some error occurred (timeout or socket shutdown). 428 */ 429 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) 430 { 431 struct nfs_read_data *data = calldata; 432 433 if (nfs_readpage_result(task, data) != 0) 434 return; 435 /* 436 * Note: nfs_readpage_retry may change the values of 437 * data->args. In the multi-page case, we therefore need 438 * to ensure that we call nfs_readpage_set_pages_uptodate() 439 * first. 440 */ 441 if (likely(task->tk_status >= 0)) { 442 nfs_readpage_truncate_uninitialised_page(data); 443 nfs_readpage_set_pages_uptodate(data); 444 if (nfs_readpage_retry(task, data) != 0) 445 return; 446 } 447 while (!list_empty(&data->pages)) { 448 struct nfs_page *req = nfs_list_entry(data->pages.next); 449 450 nfs_list_remove_request(req); 451 nfs_readpage_release(req); 452 } 453 } 454 455 static const struct rpc_call_ops nfs_read_full_ops = { 456 .rpc_call_done = nfs_readpage_result_full, 457 .rpc_release = nfs_readdata_release, 458 }; 459 460 /* 461 * Read a page over NFS. 462 * We read the page synchronously in the following case: 463 * - The error flag is set for this page. This happens only when a 464 * previous async read operation failed. 465 */ 466 int nfs_readpage(struct file *file, struct page *page) 467 { 468 struct nfs_open_context *ctx; 469 struct inode *inode = page->mapping->host; 470 int error; 471 472 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 473 page, PAGE_CACHE_SIZE, page->index); 474 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 475 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 476 477 /* 478 * Try to flush any pending writes to the file.. 479 * 480 * NOTE! Because we own the page lock, there cannot 481 * be any new pending writes generated at this point 482 * for this page (other pages can be written to). 483 */ 484 error = nfs_wb_page(inode, page); 485 if (error) 486 goto out_error; 487 488 error = -ESTALE; 489 if (NFS_STALE(inode)) 490 goto out_error; 491 492 if (file == NULL) { 493 error = -EBADF; 494 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 495 if (ctx == NULL) 496 goto out_error; 497 } else 498 ctx = get_nfs_open_context((struct nfs_open_context *) 499 file->private_data); 500 501 error = nfs_readpage_async(ctx, inode, page); 502 503 put_nfs_open_context(ctx); 504 return error; 505 506 out_error: 507 unlock_page(page); 508 return error; 509 } 510 511 struct nfs_readdesc { 512 struct nfs_pageio_descriptor *pgio; 513 struct nfs_open_context *ctx; 514 }; 515 516 static int 517 readpage_async_filler(void *data, struct page *page) 518 { 519 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 520 struct inode *inode = page->mapping->host; 521 struct nfs_page *new; 522 unsigned int len; 523 524 nfs_wb_page(inode, page); 525 len = nfs_page_length(page); 526 if (len == 0) 527 return nfs_return_empty_page(page); 528 new = nfs_create_request(desc->ctx, inode, page, 0, len); 529 if (IS_ERR(new)) { 530 SetPageError(page); 531 unlock_page(page); 532 return PTR_ERR(new); 533 } 534 if (len < PAGE_CACHE_SIZE) 535 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 536 nfs_pageio_add_request(desc->pgio, new); 537 return 0; 538 } 539 540 int nfs_readpages(struct file *filp, struct address_space *mapping, 541 struct list_head *pages, unsigned nr_pages) 542 { 543 struct nfs_pageio_descriptor pgio; 544 struct nfs_readdesc desc = { 545 .pgio = &pgio, 546 }; 547 struct inode *inode = mapping->host; 548 struct nfs_server *server = NFS_SERVER(inode); 549 size_t rsize = server->rsize; 550 unsigned long npages; 551 int ret = -ESTALE; 552 553 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 554 inode->i_sb->s_id, 555 (long long)NFS_FILEID(inode), 556 nr_pages); 557 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 558 559 if (NFS_STALE(inode)) 560 goto out; 561 562 if (filp == NULL) { 563 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 564 if (desc.ctx == NULL) 565 return -EBADF; 566 } else 567 desc.ctx = get_nfs_open_context((struct nfs_open_context *) 568 filp->private_data); 569 if (rsize < PAGE_CACHE_SIZE) 570 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 571 else 572 nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); 573 574 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 575 576 nfs_pageio_complete(&pgio); 577 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 578 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 579 put_nfs_open_context(desc.ctx); 580 out: 581 return ret; 582 } 583 584 int __init nfs_init_readpagecache(void) 585 { 586 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 587 sizeof(struct nfs_read_data), 588 0, SLAB_HWCACHE_ALIGN, 589 NULL, NULL); 590 if (nfs_rdata_cachep == NULL) 591 return -ENOMEM; 592 593 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, 594 nfs_rdata_cachep); 595 if (nfs_rdata_mempool == NULL) 596 return -ENOMEM; 597 598 return 0; 599 } 600 601 void nfs_destroy_readpagecache(void) 602 { 603 mempool_destroy(nfs_rdata_mempool); 604 kmem_cache_destroy(nfs_rdata_cachep); 605 } 606