1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 #include <linux/module.h> 22 23 #include "nfs4_fs.h" 24 #include "internal.h" 25 #include "iostat.h" 26 #include "fscache.h" 27 28 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 29 30 static const struct nfs_pageio_ops nfs_pageio_read_ops; 31 static const struct rpc_call_ops nfs_read_common_ops; 32 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; 33 34 static struct kmem_cache *nfs_rdata_cachep; 35 36 struct nfs_read_header *nfs_readhdr_alloc(void) 37 { 38 struct nfs_read_header *rhdr; 39 40 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 41 if (rhdr) { 42 struct nfs_pgio_header *hdr = &rhdr->header; 43 44 INIT_LIST_HEAD(&hdr->pages); 45 INIT_LIST_HEAD(&hdr->rpc_list); 46 spin_lock_init(&hdr->lock); 47 atomic_set(&hdr->refcnt, 0); 48 } 49 return rhdr; 50 } 51 EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); 52 53 static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, 54 unsigned int pagecount) 55 { 56 struct nfs_read_data *data, *prealloc; 57 58 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; 59 if (prealloc->header == NULL) 60 data = prealloc; 61 else 62 data = kzalloc(sizeof(*data), GFP_KERNEL); 63 if (!data) 64 goto out; 65 66 if (nfs_pgarray_set(&data->pages, pagecount)) { 67 data->header = hdr; 68 atomic_inc(&hdr->refcnt); 69 } else { 70 if (data != prealloc) 71 kfree(data); 72 data = NULL; 73 } 74 out: 75 return data; 76 } 77 78 void nfs_readhdr_free(struct nfs_pgio_header *hdr) 79 { 80 struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); 81 82 kmem_cache_free(nfs_rdata_cachep, rhdr); 83 } 84 EXPORT_SYMBOL_GPL(nfs_readhdr_free); 85 86 void nfs_readdata_release(struct nfs_read_data *rdata) 87 { 88 struct nfs_pgio_header *hdr = rdata->header; 89 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); 90 91 put_nfs_open_context(rdata->args.context); 92 if (rdata->pages.pagevec != rdata->pages.page_array) 93 kfree(rdata->pages.pagevec); 94 if (rdata != &read_header->rpc_data) 95 kfree(rdata); 96 else 97 rdata->header = NULL; 98 if (atomic_dec_and_test(&hdr->refcnt)) 99 hdr->completion_ops->completion(hdr); 100 } 101 EXPORT_SYMBOL_GPL(nfs_readdata_release); 102 103 static 104 int nfs_return_empty_page(struct page *page) 105 { 106 zero_user(page, 0, PAGE_CACHE_SIZE); 107 SetPageUptodate(page); 108 unlock_page(page); 109 return 0; 110 } 111 112 void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 113 struct inode *inode, 114 const struct nfs_pgio_completion_ops *compl_ops) 115 { 116 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, 117 NFS_SERVER(inode)->rsize, 0); 118 } 119 EXPORT_SYMBOL_GPL(nfs_pageio_init_read); 120 121 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) 122 { 123 pgio->pg_ops = &nfs_pageio_read_ops; 124 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; 125 } 126 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 127 128 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 129 struct page *page) 130 { 131 struct nfs_page *new; 132 unsigned int len; 133 struct nfs_pageio_descriptor pgio; 134 135 len = nfs_page_length(page); 136 if (len == 0) 137 return nfs_return_empty_page(page); 138 new = nfs_create_request(ctx, inode, page, 0, len); 139 if (IS_ERR(new)) { 140 unlock_page(page); 141 return PTR_ERR(new); 142 } 143 if (len < PAGE_CACHE_SIZE) 144 zero_user_segment(page, len, PAGE_CACHE_SIZE); 145 146 NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); 147 nfs_pageio_add_request(&pgio, new); 148 nfs_pageio_complete(&pgio); 149 NFS_I(inode)->read_io += pgio.pg_bytes_written; 150 return 0; 151 } 152 153 static void nfs_readpage_release(struct nfs_page *req) 154 { 155 struct inode *d_inode = req->wb_context->dentry->d_inode; 156 157 if (PageUptodate(req->wb_page)) 158 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 159 160 unlock_page(req->wb_page); 161 162 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 163 req->wb_context->dentry->d_inode->i_sb->s_id, 164 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 165 req->wb_bytes, 166 (long long)req_offset(req)); 167 nfs_release_request(req); 168 } 169 170 /* Note io was page aligned */ 171 static void nfs_read_completion(struct nfs_pgio_header *hdr) 172 { 173 unsigned long bytes = 0; 174 175 if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) 176 goto out; 177 while (!list_empty(&hdr->pages)) { 178 struct nfs_page *req = nfs_list_entry(hdr->pages.next); 179 struct page *page = req->wb_page; 180 181 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { 182 if (bytes > hdr->good_bytes) 183 zero_user(page, 0, PAGE_SIZE); 184 else if (hdr->good_bytes - bytes < PAGE_SIZE) 185 zero_user_segment(page, 186 hdr->good_bytes & ~PAGE_MASK, 187 PAGE_SIZE); 188 } 189 bytes += req->wb_bytes; 190 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { 191 if (bytes <= hdr->good_bytes) 192 SetPageUptodate(page); 193 } else 194 SetPageUptodate(page); 195 nfs_list_remove_request(req); 196 nfs_readpage_release(req); 197 } 198 out: 199 hdr->release(hdr); 200 } 201 202 int nfs_initiate_read(struct rpc_clnt *clnt, 203 struct nfs_read_data *data, 204 const struct rpc_call_ops *call_ops, int flags) 205 { 206 struct inode *inode = data->header->inode; 207 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 208 struct rpc_task *task; 209 struct rpc_message msg = { 210 .rpc_argp = &data->args, 211 .rpc_resp = &data->res, 212 .rpc_cred = data->header->cred, 213 }; 214 struct rpc_task_setup task_setup_data = { 215 .task = &data->task, 216 .rpc_client = clnt, 217 .rpc_message = &msg, 218 .callback_ops = call_ops, 219 .callback_data = data, 220 .workqueue = nfsiod_workqueue, 221 .flags = RPC_TASK_ASYNC | swap_flags | flags, 222 }; 223 224 /* Set up the initial task struct. */ 225 NFS_PROTO(inode)->read_setup(data, &msg); 226 227 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ " 228 "offset %llu)\n", 229 data->task.tk_pid, 230 inode->i_sb->s_id, 231 (long long)NFS_FILEID(inode), 232 data->args.count, 233 (unsigned long long)data->args.offset); 234 235 task = rpc_run_task(&task_setup_data); 236 if (IS_ERR(task)) 237 return PTR_ERR(task); 238 rpc_put_task(task); 239 return 0; 240 } 241 EXPORT_SYMBOL_GPL(nfs_initiate_read); 242 243 /* 244 * Set up the NFS read request struct 245 */ 246 static void nfs_read_rpcsetup(struct nfs_read_data *data, 247 unsigned int count, unsigned int offset) 248 { 249 struct nfs_page *req = data->header->req; 250 251 data->args.fh = NFS_FH(data->header->inode); 252 data->args.offset = req_offset(req) + offset; 253 data->args.pgbase = req->wb_pgbase + offset; 254 data->args.pages = data->pages.pagevec; 255 data->args.count = count; 256 data->args.context = get_nfs_open_context(req->wb_context); 257 data->args.lock_context = req->wb_lock_context; 258 259 data->res.fattr = &data->fattr; 260 data->res.count = count; 261 data->res.eof = 0; 262 nfs_fattr_init(&data->fattr); 263 } 264 265 static int nfs_do_read(struct nfs_read_data *data, 266 const struct rpc_call_ops *call_ops) 267 { 268 struct inode *inode = data->header->inode; 269 270 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0); 271 } 272 273 static int 274 nfs_do_multiple_reads(struct list_head *head, 275 const struct rpc_call_ops *call_ops) 276 { 277 struct nfs_read_data *data; 278 int ret = 0; 279 280 while (!list_empty(head)) { 281 int ret2; 282 283 data = list_first_entry(head, struct nfs_read_data, list); 284 list_del_init(&data->list); 285 286 ret2 = nfs_do_read(data, call_ops); 287 if (ret == 0) 288 ret = ret2; 289 } 290 return ret; 291 } 292 293 static void 294 nfs_async_read_error(struct list_head *head) 295 { 296 struct nfs_page *req; 297 298 while (!list_empty(head)) { 299 req = nfs_list_entry(head->next); 300 nfs_list_remove_request(req); 301 nfs_readpage_release(req); 302 } 303 } 304 305 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { 306 .error_cleanup = nfs_async_read_error, 307 .completion = nfs_read_completion, 308 }; 309 310 static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, 311 struct nfs_pgio_header *hdr) 312 { 313 set_bit(NFS_IOHDR_REDO, &hdr->flags); 314 while (!list_empty(&hdr->rpc_list)) { 315 struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, 316 struct nfs_read_data, list); 317 list_del(&data->list); 318 nfs_readdata_release(data); 319 } 320 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 321 } 322 323 /* 324 * Generate multiple requests to fill a single page. 325 * 326 * We optimize to reduce the number of read operations on the wire. If we 327 * detect that we're reading a page, or an area of a page, that is past the 328 * end of file, we do not generate NFS read operations but just clear the 329 * parts of the page that would have come back zero from the server anyway. 330 * 331 * We rely on the cached value of i_size to make this determination; another 332 * client can fill pages on the server past our cached end-of-file, but we 333 * won't see the new data until our attribute cache is updated. This is more 334 * or less conventional NFS client behavior. 335 */ 336 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, 337 struct nfs_pgio_header *hdr) 338 { 339 struct nfs_page *req = hdr->req; 340 struct page *page = req->wb_page; 341 struct nfs_read_data *data; 342 size_t rsize = desc->pg_bsize, nbytes; 343 unsigned int offset; 344 345 offset = 0; 346 nbytes = desc->pg_count; 347 do { 348 size_t len = min(nbytes,rsize); 349 350 data = nfs_readdata_alloc(hdr, 1); 351 if (!data) { 352 nfs_pagein_error(desc, hdr); 353 return -ENOMEM; 354 } 355 data->pages.pagevec[0] = page; 356 nfs_read_rpcsetup(data, len, offset); 357 list_add(&data->list, &hdr->rpc_list); 358 nbytes -= len; 359 offset += len; 360 } while (nbytes != 0); 361 362 nfs_list_remove_request(req); 363 nfs_list_add_request(req, &hdr->pages); 364 desc->pg_rpc_callops = &nfs_read_common_ops; 365 return 0; 366 } 367 368 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, 369 struct nfs_pgio_header *hdr) 370 { 371 struct nfs_page *req; 372 struct page **pages; 373 struct nfs_read_data *data; 374 struct list_head *head = &desc->pg_list; 375 376 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, 377 desc->pg_count)); 378 if (!data) { 379 nfs_pagein_error(desc, hdr); 380 return -ENOMEM; 381 } 382 383 pages = data->pages.pagevec; 384 while (!list_empty(head)) { 385 req = nfs_list_entry(head->next); 386 nfs_list_remove_request(req); 387 nfs_list_add_request(req, &hdr->pages); 388 *pages++ = req->wb_page; 389 } 390 391 nfs_read_rpcsetup(data, desc->pg_count, 0); 392 list_add(&data->list, &hdr->rpc_list); 393 desc->pg_rpc_callops = &nfs_read_common_ops; 394 return 0; 395 } 396 397 int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 398 struct nfs_pgio_header *hdr) 399 { 400 if (desc->pg_bsize < PAGE_CACHE_SIZE) 401 return nfs_pagein_multi(desc, hdr); 402 return nfs_pagein_one(desc, hdr); 403 } 404 EXPORT_SYMBOL_GPL(nfs_generic_pagein); 405 406 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 407 { 408 struct nfs_read_header *rhdr; 409 struct nfs_pgio_header *hdr; 410 int ret; 411 412 rhdr = nfs_readhdr_alloc(); 413 if (!rhdr) { 414 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 415 return -ENOMEM; 416 } 417 hdr = &rhdr->header; 418 nfs_pgheader_init(desc, hdr, nfs_readhdr_free); 419 atomic_inc(&hdr->refcnt); 420 ret = nfs_generic_pagein(desc, hdr); 421 if (ret == 0) 422 ret = nfs_do_multiple_reads(&hdr->rpc_list, 423 desc->pg_rpc_callops); 424 if (atomic_dec_and_test(&hdr->refcnt)) 425 hdr->completion_ops->completion(hdr); 426 return ret; 427 } 428 429 static const struct nfs_pageio_ops nfs_pageio_read_ops = { 430 .pg_test = nfs_generic_pg_test, 431 .pg_doio = nfs_generic_pg_readpages, 432 }; 433 434 /* 435 * This is the callback from RPC telling us whether a reply was 436 * received or some error occurred (timeout or socket shutdown). 437 */ 438 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 439 { 440 struct inode *inode = data->header->inode; 441 int status; 442 443 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 444 task->tk_status); 445 446 status = NFS_PROTO(inode)->read_done(task, data); 447 if (status != 0) 448 return status; 449 450 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); 451 452 if (task->tk_status == -ESTALE) { 453 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); 454 nfs_mark_for_revalidate(inode); 455 } 456 return 0; 457 } 458 459 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 460 { 461 struct nfs_readargs *argp = &data->args; 462 struct nfs_readres *resp = &data->res; 463 464 /* This is a short read! */ 465 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); 466 /* Has the server at least made some progress? */ 467 if (resp->count == 0) { 468 nfs_set_pgio_error(data->header, -EIO, argp->offset); 469 return; 470 } 471 /* Yes, so retry the read at the end of the data */ 472 data->mds_offset += resp->count; 473 argp->offset += resp->count; 474 argp->pgbase += resp->count; 475 argp->count -= resp->count; 476 rpc_restart_call_prepare(task); 477 } 478 479 static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) 480 { 481 struct nfs_read_data *data = calldata; 482 struct nfs_pgio_header *hdr = data->header; 483 484 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ 485 if (nfs_readpage_result(task, data) != 0) 486 return; 487 if (task->tk_status < 0) 488 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); 489 else if (data->res.eof) { 490 loff_t bound; 491 492 bound = data->args.offset + data->res.count; 493 spin_lock(&hdr->lock); 494 if (bound < hdr->io_start + hdr->good_bytes) { 495 set_bit(NFS_IOHDR_EOF, &hdr->flags); 496 clear_bit(NFS_IOHDR_ERROR, &hdr->flags); 497 hdr->good_bytes = bound - hdr->io_start; 498 } 499 spin_unlock(&hdr->lock); 500 } else if (data->res.count != data->args.count) 501 nfs_readpage_retry(task, data); 502 } 503 504 static void nfs_readpage_release_common(void *calldata) 505 { 506 nfs_readdata_release(calldata); 507 } 508 509 void nfs_read_prepare(struct rpc_task *task, void *calldata) 510 { 511 struct nfs_read_data *data = calldata; 512 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); 513 } 514 515 static const struct rpc_call_ops nfs_read_common_ops = { 516 .rpc_call_prepare = nfs_read_prepare, 517 .rpc_call_done = nfs_readpage_result_common, 518 .rpc_release = nfs_readpage_release_common, 519 }; 520 521 /* 522 * Read a page over NFS. 523 * We read the page synchronously in the following case: 524 * - The error flag is set for this page. This happens only when a 525 * previous async read operation failed. 526 */ 527 int nfs_readpage(struct file *file, struct page *page) 528 { 529 struct nfs_open_context *ctx; 530 struct inode *inode = page_file_mapping(page)->host; 531 int error; 532 533 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 534 page, PAGE_CACHE_SIZE, page_file_index(page)); 535 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 536 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 537 538 /* 539 * Try to flush any pending writes to the file.. 540 * 541 * NOTE! Because we own the page lock, there cannot 542 * be any new pending writes generated at this point 543 * for this page (other pages can be written to). 544 */ 545 error = nfs_wb_page(inode, page); 546 if (error) 547 goto out_unlock; 548 if (PageUptodate(page)) 549 goto out_unlock; 550 551 error = -ESTALE; 552 if (NFS_STALE(inode)) 553 goto out_unlock; 554 555 if (file == NULL) { 556 error = -EBADF; 557 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 558 if (ctx == NULL) 559 goto out_unlock; 560 } else 561 ctx = get_nfs_open_context(nfs_file_open_context(file)); 562 563 if (!IS_SYNC(inode)) { 564 error = nfs_readpage_from_fscache(ctx, inode, page); 565 if (error == 0) 566 goto out; 567 } 568 569 error = nfs_readpage_async(ctx, inode, page); 570 571 out: 572 put_nfs_open_context(ctx); 573 return error; 574 out_unlock: 575 unlock_page(page); 576 return error; 577 } 578 579 struct nfs_readdesc { 580 struct nfs_pageio_descriptor *pgio; 581 struct nfs_open_context *ctx; 582 }; 583 584 static int 585 readpage_async_filler(void *data, struct page *page) 586 { 587 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 588 struct inode *inode = page_file_mapping(page)->host; 589 struct nfs_page *new; 590 unsigned int len; 591 int error; 592 593 len = nfs_page_length(page); 594 if (len == 0) 595 return nfs_return_empty_page(page); 596 597 new = nfs_create_request(desc->ctx, inode, page, 0, len); 598 if (IS_ERR(new)) 599 goto out_error; 600 601 if (len < PAGE_CACHE_SIZE) 602 zero_user_segment(page, len, PAGE_CACHE_SIZE); 603 if (!nfs_pageio_add_request(desc->pgio, new)) { 604 error = desc->pgio->pg_error; 605 goto out_unlock; 606 } 607 return 0; 608 out_error: 609 error = PTR_ERR(new); 610 out_unlock: 611 unlock_page(page); 612 return error; 613 } 614 615 int nfs_readpages(struct file *filp, struct address_space *mapping, 616 struct list_head *pages, unsigned nr_pages) 617 { 618 struct nfs_pageio_descriptor pgio; 619 struct nfs_readdesc desc = { 620 .pgio = &pgio, 621 }; 622 struct inode *inode = mapping->host; 623 unsigned long npages; 624 int ret = -ESTALE; 625 626 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 627 inode->i_sb->s_id, 628 (long long)NFS_FILEID(inode), 629 nr_pages); 630 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 631 632 if (NFS_STALE(inode)) 633 goto out; 634 635 if (filp == NULL) { 636 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 637 if (desc.ctx == NULL) 638 return -EBADF; 639 } else 640 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 641 642 /* attempt to read as many of the pages as possible from the cache 643 * - this returns -ENOBUFS immediately if the cookie is negative 644 */ 645 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, 646 pages, &nr_pages); 647 if (ret == 0) 648 goto read_complete; /* all pages were read */ 649 650 NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); 651 652 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 653 654 nfs_pageio_complete(&pgio); 655 NFS_I(inode)->read_io += pgio.pg_bytes_written; 656 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 657 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 658 read_complete: 659 put_nfs_open_context(desc.ctx); 660 out: 661 return ret; 662 } 663 664 int __init nfs_init_readpagecache(void) 665 { 666 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 667 sizeof(struct nfs_read_header), 668 0, SLAB_HWCACHE_ALIGN, 669 NULL); 670 if (nfs_rdata_cachep == NULL) 671 return -ENOMEM; 672 673 return 0; 674 } 675 676 void nfs_destroy_readpagecache(void) 677 { 678 kmem_cache_destroy(nfs_rdata_cachep); 679 } 680