1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 #include <linux/module.h> 22 23 #include <asm/system.h> 24 #include "pnfs.h" 25 26 #include "nfs4_fs.h" 27 #include "internal.h" 28 #include "iostat.h" 29 #include "fscache.h" 30 31 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 32 33 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); 34 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); 35 static const struct rpc_call_ops nfs_read_partial_ops; 36 static const struct rpc_call_ops nfs_read_full_ops; 37 38 static struct kmem_cache *nfs_rdata_cachep; 39 static mempool_t *nfs_rdata_mempool; 40 41 #define MIN_POOL_READ (32) 42 43 struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 44 { 45 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL); 46 47 if (p) { 48 memset(p, 0, sizeof(*p)); 49 INIT_LIST_HEAD(&p->pages); 50 p->npages = pagecount; 51 if (pagecount <= ARRAY_SIZE(p->page_array)) 52 p->pagevec = p->page_array; 53 else { 54 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); 55 if (!p->pagevec) { 56 mempool_free(p, nfs_rdata_mempool); 57 p = NULL; 58 } 59 } 60 } 61 return p; 62 } 63 64 void nfs_readdata_free(struct nfs_read_data *p) 65 { 66 if (p && (p->pagevec != &p->page_array[0])) 67 kfree(p->pagevec); 68 mempool_free(p, nfs_rdata_mempool); 69 } 70 71 static void nfs_readdata_release(struct nfs_read_data *rdata) 72 { 73 put_lseg(rdata->lseg); 74 put_nfs_open_context(rdata->args.context); 75 nfs_readdata_free(rdata); 76 } 77 78 static 79 int nfs_return_empty_page(struct page *page) 80 { 81 zero_user(page, 0, PAGE_CACHE_SIZE); 82 SetPageUptodate(page); 83 unlock_page(page); 84 return 0; 85 } 86 87 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) 88 { 89 unsigned int remainder = data->args.count - data->res.count; 90 unsigned int base = data->args.pgbase + data->res.count; 91 unsigned int pglen; 92 struct page **pages; 93 94 if (data->res.eof == 0 || remainder == 0) 95 return; 96 /* 97 * Note: "remainder" can never be negative, since we check for 98 * this in the XDR code. 99 */ 100 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 101 base &= ~PAGE_CACHE_MASK; 102 pglen = PAGE_CACHE_SIZE - base; 103 for (;;) { 104 if (remainder <= pglen) { 105 zero_user(*pages, base, remainder); 106 break; 107 } 108 zero_user(*pages, base, pglen); 109 pages++; 110 remainder -= pglen; 111 pglen = PAGE_CACHE_SIZE; 112 base = 0; 113 } 114 } 115 116 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 117 struct page *page) 118 { 119 struct nfs_page *new; 120 unsigned int len; 121 struct nfs_pageio_descriptor pgio; 122 123 len = nfs_page_length(page); 124 if (len == 0) 125 return nfs_return_empty_page(page); 126 new = nfs_create_request(ctx, inode, page, 0, len); 127 if (IS_ERR(new)) { 128 unlock_page(page); 129 return PTR_ERR(new); 130 } 131 if (len < PAGE_CACHE_SIZE) 132 zero_user_segment(page, len, PAGE_CACHE_SIZE); 133 134 nfs_pageio_init(&pgio, inode, NULL, 0, 0); 135 nfs_list_add_request(new, &pgio.pg_list); 136 pgio.pg_count = len; 137 138 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 139 nfs_pagein_multi(&pgio); 140 else 141 nfs_pagein_one(&pgio); 142 return 0; 143 } 144 145 static void nfs_readpage_release(struct nfs_page *req) 146 { 147 struct inode *d_inode = req->wb_context->path.dentry->d_inode; 148 149 if (PageUptodate(req->wb_page)) 150 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 151 152 unlock_page(req->wb_page); 153 154 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 155 req->wb_context->path.dentry->d_inode->i_sb->s_id, 156 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 157 req->wb_bytes, 158 (long long)req_offset(req)); 159 nfs_release_request(req); 160 } 161 162 int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 163 const struct rpc_call_ops *call_ops) 164 { 165 struct inode *inode = data->inode; 166 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 167 struct rpc_task *task; 168 struct rpc_message msg = { 169 .rpc_argp = &data->args, 170 .rpc_resp = &data->res, 171 .rpc_cred = data->cred, 172 }; 173 struct rpc_task_setup task_setup_data = { 174 .task = &data->task, 175 .rpc_client = clnt, 176 .rpc_message = &msg, 177 .callback_ops = call_ops, 178 .callback_data = data, 179 .workqueue = nfsiod_workqueue, 180 .flags = RPC_TASK_ASYNC | swap_flags, 181 }; 182 183 /* Set up the initial task struct. */ 184 NFS_PROTO(inode)->read_setup(data, &msg); 185 186 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ " 187 "offset %llu)\n", 188 data->task.tk_pid, 189 inode->i_sb->s_id, 190 (long long)NFS_FILEID(inode), 191 data->args.count, 192 (unsigned long long)data->args.offset); 193 194 task = rpc_run_task(&task_setup_data); 195 if (IS_ERR(task)) 196 return PTR_ERR(task); 197 rpc_put_task(task); 198 return 0; 199 } 200 EXPORT_SYMBOL_GPL(nfs_initiate_read); 201 202 /* 203 * Set up the NFS read request struct 204 */ 205 static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 206 const struct rpc_call_ops *call_ops, 207 unsigned int count, unsigned int offset, 208 struct pnfs_layout_segment *lseg) 209 { 210 struct inode *inode = req->wb_context->path.dentry->d_inode; 211 212 data->req = req; 213 data->inode = inode; 214 data->cred = req->wb_context->cred; 215 data->lseg = get_lseg(lseg); 216 217 data->args.fh = NFS_FH(inode); 218 data->args.offset = req_offset(req) + offset; 219 data->args.pgbase = req->wb_pgbase + offset; 220 data->args.pages = data->pagevec; 221 data->args.count = count; 222 data->args.context = get_nfs_open_context(req->wb_context); 223 data->args.lock_context = req->wb_lock_context; 224 225 data->res.fattr = &data->fattr; 226 data->res.count = count; 227 data->res.eof = 0; 228 nfs_fattr_init(&data->fattr); 229 230 if (data->lseg && 231 (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) 232 return 0; 233 234 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); 235 } 236 237 static void 238 nfs_async_read_error(struct list_head *head) 239 { 240 struct nfs_page *req; 241 242 while (!list_empty(head)) { 243 req = nfs_list_entry(head->next); 244 nfs_list_remove_request(req); 245 SetPageError(req->wb_page); 246 nfs_readpage_release(req); 247 } 248 } 249 250 /* 251 * Generate multiple requests to fill a single page. 252 * 253 * We optimize to reduce the number of read operations on the wire. If we 254 * detect that we're reading a page, or an area of a page, that is past the 255 * end of file, we do not generate NFS read operations but just clear the 256 * parts of the page that would have come back zero from the server anyway. 257 * 258 * We rely on the cached value of i_size to make this determination; another 259 * client can fill pages on the server past our cached end-of-file, but we 260 * won't see the new data until our attribute cache is updated. This is more 261 * or less conventional NFS client behavior. 262 */ 263 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) 264 { 265 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 266 struct page *page = req->wb_page; 267 struct nfs_read_data *data; 268 size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; 269 unsigned int offset; 270 int requests = 0; 271 int ret = 0; 272 struct pnfs_layout_segment *lseg; 273 LIST_HEAD(list); 274 275 nfs_list_remove_request(req); 276 277 nbytes = desc->pg_count; 278 do { 279 size_t len = min(nbytes,rsize); 280 281 data = nfs_readdata_alloc(1); 282 if (!data) 283 goto out_bad; 284 list_add(&data->pages, &list); 285 requests++; 286 nbytes -= len; 287 } while(nbytes != 0); 288 atomic_set(&req->wb_complete, requests); 289 290 BUG_ON(desc->pg_lseg != NULL); 291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); 292 ClearPageError(page); 293 offset = 0; 294 nbytes = desc->pg_count; 295 do { 296 int ret2; 297 298 data = list_entry(list.next, struct nfs_read_data, pages); 299 list_del_init(&data->pages); 300 301 data->pagevec[0] = page; 302 303 if (nbytes < rsize) 304 rsize = nbytes; 305 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 306 rsize, offset, lseg); 307 if (ret == 0) 308 ret = ret2; 309 offset += rsize; 310 nbytes -= rsize; 311 } while (nbytes != 0); 312 put_lseg(lseg); 313 desc->pg_lseg = NULL; 314 315 return ret; 316 317 out_bad: 318 while (!list_empty(&list)) { 319 data = list_entry(list.next, struct nfs_read_data, pages); 320 list_del(&data->pages); 321 nfs_readdata_free(data); 322 } 323 SetPageError(page); 324 nfs_readpage_release(req); 325 return -ENOMEM; 326 } 327 328 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) 329 { 330 struct nfs_page *req; 331 struct page **pages; 332 struct nfs_read_data *data; 333 struct list_head *head = &desc->pg_list; 334 struct pnfs_layout_segment *lseg = desc->pg_lseg; 335 int ret = -ENOMEM; 336 337 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, 338 desc->pg_count)); 339 if (!data) { 340 nfs_async_read_error(head); 341 goto out; 342 } 343 344 pages = data->pagevec; 345 while (!list_empty(head)) { 346 req = nfs_list_entry(head->next); 347 nfs_list_remove_request(req); 348 nfs_list_add_request(req, &data->pages); 349 ClearPageError(req->wb_page); 350 *pages++ = req->wb_page; 351 } 352 req = nfs_list_entry(data->pages.next); 353 if ((!lseg) && list_is_singular(&data->pages)) 354 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ); 355 356 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 357 0, lseg); 358 out: 359 put_lseg(lseg); 360 desc->pg_lseg = NULL; 361 return ret; 362 } 363 364 /* 365 * This is the callback from RPC telling us whether a reply was 366 * received or some error occurred (timeout or socket shutdown). 367 */ 368 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 369 { 370 int status; 371 372 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 373 task->tk_status); 374 375 status = NFS_PROTO(data->inode)->read_done(task, data); 376 if (status != 0) 377 return status; 378 379 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 380 381 if (task->tk_status == -ESTALE) { 382 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); 383 nfs_mark_for_revalidate(data->inode); 384 } 385 return 0; 386 } 387 388 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 389 { 390 struct nfs_readargs *argp = &data->args; 391 struct nfs_readres *resp = &data->res; 392 393 if (resp->eof || resp->count == argp->count) 394 return; 395 396 /* This is a short read! */ 397 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 398 /* Has the server at least made some progress? */ 399 if (resp->count == 0) 400 return; 401 402 /* Yes, so retry the read at the end of the data */ 403 data->mds_offset += resp->count; 404 argp->offset += resp->count; 405 argp->pgbase += resp->count; 406 argp->count -= resp->count; 407 nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); 408 } 409 410 /* 411 * Handle a read reply that fills part of a page. 412 */ 413 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 414 { 415 struct nfs_read_data *data = calldata; 416 417 if (nfs_readpage_result(task, data) != 0) 418 return; 419 if (task->tk_status < 0) 420 return; 421 422 nfs_readpage_truncate_uninitialised_page(data); 423 nfs_readpage_retry(task, data); 424 } 425 426 static void nfs_readpage_release_partial(void *calldata) 427 { 428 struct nfs_read_data *data = calldata; 429 struct nfs_page *req = data->req; 430 struct page *page = req->wb_page; 431 int status = data->task.tk_status; 432 433 if (status < 0) 434 SetPageError(page); 435 436 if (atomic_dec_and_test(&req->wb_complete)) { 437 if (!PageError(page)) 438 SetPageUptodate(page); 439 nfs_readpage_release(req); 440 } 441 nfs_readdata_release(calldata); 442 } 443 444 #if defined(CONFIG_NFS_V4_1) 445 void nfs_read_prepare(struct rpc_task *task, void *calldata) 446 { 447 struct nfs_read_data *data = calldata; 448 449 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 450 &data->args.seq_args, &data->res.seq_res, 451 0, task)) 452 return; 453 rpc_call_start(task); 454 } 455 #endif /* CONFIG_NFS_V4_1 */ 456 457 static const struct rpc_call_ops nfs_read_partial_ops = { 458 #if defined(CONFIG_NFS_V4_1) 459 .rpc_call_prepare = nfs_read_prepare, 460 #endif /* CONFIG_NFS_V4_1 */ 461 .rpc_call_done = nfs_readpage_result_partial, 462 .rpc_release = nfs_readpage_release_partial, 463 }; 464 465 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) 466 { 467 unsigned int count = data->res.count; 468 unsigned int base = data->args.pgbase; 469 struct page **pages; 470 471 if (data->res.eof) 472 count = data->args.count; 473 if (unlikely(count == 0)) 474 return; 475 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 476 base &= ~PAGE_CACHE_MASK; 477 count += base; 478 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 479 SetPageUptodate(*pages); 480 if (count == 0) 481 return; 482 /* Was this a short read? */ 483 if (data->res.eof || data->res.count == data->args.count) 484 SetPageUptodate(*pages); 485 } 486 487 /* 488 * This is the callback from RPC telling us whether a reply was 489 * received or some error occurred (timeout or socket shutdown). 490 */ 491 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) 492 { 493 struct nfs_read_data *data = calldata; 494 495 if (nfs_readpage_result(task, data) != 0) 496 return; 497 if (task->tk_status < 0) 498 return; 499 /* 500 * Note: nfs_readpage_retry may change the values of 501 * data->args. In the multi-page case, we therefore need 502 * to ensure that we call nfs_readpage_set_pages_uptodate() 503 * first. 504 */ 505 nfs_readpage_truncate_uninitialised_page(data); 506 nfs_readpage_set_pages_uptodate(data); 507 nfs_readpage_retry(task, data); 508 } 509 510 static void nfs_readpage_release_full(void *calldata) 511 { 512 struct nfs_read_data *data = calldata; 513 514 while (!list_empty(&data->pages)) { 515 struct nfs_page *req = nfs_list_entry(data->pages.next); 516 517 nfs_list_remove_request(req); 518 nfs_readpage_release(req); 519 } 520 nfs_readdata_release(calldata); 521 } 522 523 static const struct rpc_call_ops nfs_read_full_ops = { 524 #if defined(CONFIG_NFS_V4_1) 525 .rpc_call_prepare = nfs_read_prepare, 526 #endif /* CONFIG_NFS_V4_1 */ 527 .rpc_call_done = nfs_readpage_result_full, 528 .rpc_release = nfs_readpage_release_full, 529 }; 530 531 /* 532 * Read a page over NFS. 533 * We read the page synchronously in the following case: 534 * - The error flag is set for this page. This happens only when a 535 * previous async read operation failed. 536 */ 537 int nfs_readpage(struct file *file, struct page *page) 538 { 539 struct nfs_open_context *ctx; 540 struct inode *inode = page->mapping->host; 541 int error; 542 543 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 544 page, PAGE_CACHE_SIZE, page->index); 545 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 546 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 547 548 /* 549 * Try to flush any pending writes to the file.. 550 * 551 * NOTE! Because we own the page lock, there cannot 552 * be any new pending writes generated at this point 553 * for this page (other pages can be written to). 554 */ 555 error = nfs_wb_page(inode, page); 556 if (error) 557 goto out_unlock; 558 if (PageUptodate(page)) 559 goto out_unlock; 560 561 error = -ESTALE; 562 if (NFS_STALE(inode)) 563 goto out_unlock; 564 565 if (file == NULL) { 566 error = -EBADF; 567 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 568 if (ctx == NULL) 569 goto out_unlock; 570 } else 571 ctx = get_nfs_open_context(nfs_file_open_context(file)); 572 573 if (!IS_SYNC(inode)) { 574 error = nfs_readpage_from_fscache(ctx, inode, page); 575 if (error == 0) 576 goto out; 577 } 578 579 error = nfs_readpage_async(ctx, inode, page); 580 581 out: 582 put_nfs_open_context(ctx); 583 return error; 584 out_unlock: 585 unlock_page(page); 586 return error; 587 } 588 589 struct nfs_readdesc { 590 struct nfs_pageio_descriptor *pgio; 591 struct nfs_open_context *ctx; 592 }; 593 594 static int 595 readpage_async_filler(void *data, struct page *page) 596 { 597 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 598 struct inode *inode = page->mapping->host; 599 struct nfs_page *new; 600 unsigned int len; 601 int error; 602 603 len = nfs_page_length(page); 604 if (len == 0) 605 return nfs_return_empty_page(page); 606 607 new = nfs_create_request(desc->ctx, inode, page, 0, len); 608 if (IS_ERR(new)) 609 goto out_error; 610 611 if (len < PAGE_CACHE_SIZE) 612 zero_user_segment(page, len, PAGE_CACHE_SIZE); 613 if (!nfs_pageio_add_request(desc->pgio, new)) { 614 error = desc->pgio->pg_error; 615 goto out_unlock; 616 } 617 return 0; 618 out_error: 619 error = PTR_ERR(new); 620 SetPageError(page); 621 out_unlock: 622 unlock_page(page); 623 return error; 624 } 625 626 int nfs_readpages(struct file *filp, struct address_space *mapping, 627 struct list_head *pages, unsigned nr_pages) 628 { 629 struct nfs_pageio_descriptor pgio; 630 struct nfs_readdesc desc = { 631 .pgio = &pgio, 632 }; 633 struct inode *inode = mapping->host; 634 struct nfs_server *server = NFS_SERVER(inode); 635 size_t rsize = server->rsize; 636 unsigned long npages; 637 int ret = -ESTALE; 638 639 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 640 inode->i_sb->s_id, 641 (long long)NFS_FILEID(inode), 642 nr_pages); 643 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 644 645 if (NFS_STALE(inode)) 646 goto out; 647 648 if (filp == NULL) { 649 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 650 if (desc.ctx == NULL) 651 return -EBADF; 652 } else 653 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 654 655 /* attempt to read as many of the pages as possible from the cache 656 * - this returns -ENOBUFS immediately if the cookie is negative 657 */ 658 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, 659 pages, &nr_pages); 660 if (ret == 0) 661 goto read_complete; /* all pages were read */ 662 663 pnfs_pageio_init_read(&pgio, inode); 664 if (rsize < PAGE_CACHE_SIZE) 665 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 666 else 667 nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); 668 669 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 670 671 nfs_pageio_complete(&pgio); 672 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 673 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 674 read_complete: 675 put_nfs_open_context(desc.ctx); 676 out: 677 return ret; 678 } 679 680 int __init nfs_init_readpagecache(void) 681 { 682 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 683 sizeof(struct nfs_read_data), 684 0, SLAB_HWCACHE_ALIGN, 685 NULL); 686 if (nfs_rdata_cachep == NULL) 687 return -ENOMEM; 688 689 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, 690 nfs_rdata_cachep); 691 if (nfs_rdata_mempool == NULL) 692 return -ENOMEM; 693 694 return 0; 695 } 696 697 void nfs_destroy_readpagecache(void) 698 { 699 mempool_destroy(nfs_rdata_mempool); 700 kmem_cache_destroy(nfs_rdata_cachep); 701 } 702