1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 #include <linux/module.h> 22 23 #include "nfs4_fs.h" 24 #include "internal.h" 25 #include "iostat.h" 26 #include "fscache.h" 27 28 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 29 30 static const struct nfs_pageio_ops nfs_pageio_read_ops; 31 static const struct rpc_call_ops nfs_read_common_ops; 32 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; 33 34 static struct kmem_cache *nfs_rdata_cachep; 35 36 struct nfs_read_header *nfs_readhdr_alloc(void) 37 { 38 struct nfs_read_header *rhdr; 39 40 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 41 if (rhdr) { 42 struct nfs_pgio_header *hdr = &rhdr->header; 43 44 INIT_LIST_HEAD(&hdr->pages); 45 INIT_LIST_HEAD(&hdr->rpc_list); 46 spin_lock_init(&hdr->lock); 47 atomic_set(&hdr->refcnt, 0); 48 } 49 return rhdr; 50 } 51 EXPORT_SYMBOL_GPL(nfs_readhdr_alloc); 52 53 static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, 54 unsigned int pagecount) 55 { 56 struct nfs_read_data *data, *prealloc; 57 58 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; 59 if (prealloc->header == NULL) 60 data = prealloc; 61 else 62 data = kzalloc(sizeof(*data), GFP_KERNEL); 63 if (!data) 64 goto out; 65 66 if (nfs_pgarray_set(&data->pages, pagecount)) { 67 data->header = hdr; 68 atomic_inc(&hdr->refcnt); 69 } else { 70 if (data != prealloc) 71 kfree(data); 72 data = NULL; 73 } 74 out: 75 return data; 76 } 77 78 void nfs_readhdr_free(struct nfs_pgio_header *hdr) 79 { 80 struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); 81 82 kmem_cache_free(nfs_rdata_cachep, rhdr); 83 } 84 EXPORT_SYMBOL_GPL(nfs_readhdr_free); 85 86 void nfs_readdata_release(struct nfs_read_data *rdata) 87 { 88 struct nfs_pgio_header *hdr = rdata->header; 89 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); 90 91 put_nfs_open_context(rdata->args.context); 92 if (rdata->pages.pagevec != rdata->pages.page_array) 93 kfree(rdata->pages.pagevec); 94 if (rdata == &read_header->rpc_data) { 95 rdata->header = NULL; 96 rdata = NULL; 97 } 98 if (atomic_dec_and_test(&hdr->refcnt)) 99 hdr->completion_ops->completion(hdr); 100 /* Note: we only free the rpc_task after callbacks are done. 101 * See the comment in rpc_free_task() for why 102 */ 103 kfree(rdata); 104 } 105 EXPORT_SYMBOL_GPL(nfs_readdata_release); 106 107 static 108 int nfs_return_empty_page(struct page *page) 109 { 110 zero_user(page, 0, PAGE_CACHE_SIZE); 111 SetPageUptodate(page); 112 unlock_page(page); 113 return 0; 114 } 115 116 void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 117 struct inode *inode, 118 const struct nfs_pgio_completion_ops *compl_ops) 119 { 120 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, 121 NFS_SERVER(inode)->rsize, 0); 122 } 123 EXPORT_SYMBOL_GPL(nfs_pageio_init_read); 124 125 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) 126 { 127 pgio->pg_ops = &nfs_pageio_read_ops; 128 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; 129 } 130 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 131 132 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 133 struct page *page) 134 { 135 struct nfs_page *new; 136 unsigned int len; 137 struct nfs_pageio_descriptor pgio; 138 139 len = nfs_page_length(page); 140 if (len == 0) 141 return nfs_return_empty_page(page); 142 new = nfs_create_request(ctx, inode, page, 0, len); 143 if (IS_ERR(new)) { 144 unlock_page(page); 145 return PTR_ERR(new); 146 } 147 if (len < PAGE_CACHE_SIZE) 148 zero_user_segment(page, len, PAGE_CACHE_SIZE); 149 150 NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); 151 nfs_pageio_add_request(&pgio, new); 152 nfs_pageio_complete(&pgio); 153 NFS_I(inode)->read_io += pgio.pg_bytes_written; 154 return 0; 155 } 156 157 static void nfs_readpage_release(struct nfs_page *req) 158 { 159 struct inode *d_inode = req->wb_context->dentry->d_inode; 160 161 if (PageUptodate(req->wb_page)) 162 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 163 164 unlock_page(req->wb_page); 165 166 dprintk("NFS: read done (%s/%Lu %d@%Ld)\n", 167 req->wb_context->dentry->d_inode->i_sb->s_id, 168 (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode), 169 req->wb_bytes, 170 (long long)req_offset(req)); 171 nfs_release_request(req); 172 } 173 174 /* Note io was page aligned */ 175 static void nfs_read_completion(struct nfs_pgio_header *hdr) 176 { 177 unsigned long bytes = 0; 178 179 if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) 180 goto out; 181 while (!list_empty(&hdr->pages)) { 182 struct nfs_page *req = nfs_list_entry(hdr->pages.next); 183 struct page *page = req->wb_page; 184 185 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { 186 if (bytes > hdr->good_bytes) 187 zero_user(page, 0, PAGE_SIZE); 188 else if (hdr->good_bytes - bytes < PAGE_SIZE) 189 zero_user_segment(page, 190 hdr->good_bytes & ~PAGE_MASK, 191 PAGE_SIZE); 192 } 193 bytes += req->wb_bytes; 194 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { 195 if (bytes <= hdr->good_bytes) 196 SetPageUptodate(page); 197 } else 198 SetPageUptodate(page); 199 nfs_list_remove_request(req); 200 nfs_readpage_release(req); 201 } 202 out: 203 hdr->release(hdr); 204 } 205 206 int nfs_initiate_read(struct rpc_clnt *clnt, 207 struct nfs_read_data *data, 208 const struct rpc_call_ops *call_ops, int flags) 209 { 210 struct inode *inode = data->header->inode; 211 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 212 struct rpc_task *task; 213 struct rpc_message msg = { 214 .rpc_argp = &data->args, 215 .rpc_resp = &data->res, 216 .rpc_cred = data->header->cred, 217 }; 218 struct rpc_task_setup task_setup_data = { 219 .task = &data->task, 220 .rpc_client = clnt, 221 .rpc_message = &msg, 222 .callback_ops = call_ops, 223 .callback_data = data, 224 .workqueue = nfsiod_workqueue, 225 .flags = RPC_TASK_ASYNC | swap_flags | flags, 226 }; 227 228 /* Set up the initial task struct. */ 229 NFS_PROTO(inode)->read_setup(data, &msg); 230 231 dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ " 232 "offset %llu)\n", 233 data->task.tk_pid, 234 inode->i_sb->s_id, 235 (unsigned long long)NFS_FILEID(inode), 236 data->args.count, 237 (unsigned long long)data->args.offset); 238 239 task = rpc_run_task(&task_setup_data); 240 if (IS_ERR(task)) 241 return PTR_ERR(task); 242 rpc_put_task(task); 243 return 0; 244 } 245 EXPORT_SYMBOL_GPL(nfs_initiate_read); 246 247 /* 248 * Set up the NFS read request struct 249 */ 250 static void nfs_read_rpcsetup(struct nfs_read_data *data, 251 unsigned int count, unsigned int offset) 252 { 253 struct nfs_page *req = data->header->req; 254 255 data->args.fh = NFS_FH(data->header->inode); 256 data->args.offset = req_offset(req) + offset; 257 data->args.pgbase = req->wb_pgbase + offset; 258 data->args.pages = data->pages.pagevec; 259 data->args.count = count; 260 data->args.context = get_nfs_open_context(req->wb_context); 261 data->args.lock_context = req->wb_lock_context; 262 263 data->res.fattr = &data->fattr; 264 data->res.count = count; 265 data->res.eof = 0; 266 nfs_fattr_init(&data->fattr); 267 } 268 269 static int nfs_do_read(struct nfs_read_data *data, 270 const struct rpc_call_ops *call_ops) 271 { 272 struct inode *inode = data->header->inode; 273 274 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0); 275 } 276 277 static int 278 nfs_do_multiple_reads(struct list_head *head, 279 const struct rpc_call_ops *call_ops) 280 { 281 struct nfs_read_data *data; 282 int ret = 0; 283 284 while (!list_empty(head)) { 285 int ret2; 286 287 data = list_first_entry(head, struct nfs_read_data, list); 288 list_del_init(&data->list); 289 290 ret2 = nfs_do_read(data, call_ops); 291 if (ret == 0) 292 ret = ret2; 293 } 294 return ret; 295 } 296 297 static void 298 nfs_async_read_error(struct list_head *head) 299 { 300 struct nfs_page *req; 301 302 while (!list_empty(head)) { 303 req = nfs_list_entry(head->next); 304 nfs_list_remove_request(req); 305 nfs_readpage_release(req); 306 } 307 } 308 309 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { 310 .error_cleanup = nfs_async_read_error, 311 .completion = nfs_read_completion, 312 }; 313 314 static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, 315 struct nfs_pgio_header *hdr) 316 { 317 set_bit(NFS_IOHDR_REDO, &hdr->flags); 318 while (!list_empty(&hdr->rpc_list)) { 319 struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, 320 struct nfs_read_data, list); 321 list_del(&data->list); 322 nfs_readdata_release(data); 323 } 324 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 325 } 326 327 /* 328 * Generate multiple requests to fill a single page. 329 * 330 * We optimize to reduce the number of read operations on the wire. If we 331 * detect that we're reading a page, or an area of a page, that is past the 332 * end of file, we do not generate NFS read operations but just clear the 333 * parts of the page that would have come back zero from the server anyway. 334 * 335 * We rely on the cached value of i_size to make this determination; another 336 * client can fill pages on the server past our cached end-of-file, but we 337 * won't see the new data until our attribute cache is updated. This is more 338 * or less conventional NFS client behavior. 339 */ 340 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, 341 struct nfs_pgio_header *hdr) 342 { 343 struct nfs_page *req = hdr->req; 344 struct page *page = req->wb_page; 345 struct nfs_read_data *data; 346 size_t rsize = desc->pg_bsize, nbytes; 347 unsigned int offset; 348 349 offset = 0; 350 nbytes = desc->pg_count; 351 do { 352 size_t len = min(nbytes,rsize); 353 354 data = nfs_readdata_alloc(hdr, 1); 355 if (!data) { 356 nfs_pagein_error(desc, hdr); 357 return -ENOMEM; 358 } 359 data->pages.pagevec[0] = page; 360 nfs_read_rpcsetup(data, len, offset); 361 list_add(&data->list, &hdr->rpc_list); 362 nbytes -= len; 363 offset += len; 364 } while (nbytes != 0); 365 366 nfs_list_remove_request(req); 367 nfs_list_add_request(req, &hdr->pages); 368 desc->pg_rpc_callops = &nfs_read_common_ops; 369 return 0; 370 } 371 372 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, 373 struct nfs_pgio_header *hdr) 374 { 375 struct nfs_page *req; 376 struct page **pages; 377 struct nfs_read_data *data; 378 struct list_head *head = &desc->pg_list; 379 380 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, 381 desc->pg_count)); 382 if (!data) { 383 nfs_pagein_error(desc, hdr); 384 return -ENOMEM; 385 } 386 387 pages = data->pages.pagevec; 388 while (!list_empty(head)) { 389 req = nfs_list_entry(head->next); 390 nfs_list_remove_request(req); 391 nfs_list_add_request(req, &hdr->pages); 392 *pages++ = req->wb_page; 393 } 394 395 nfs_read_rpcsetup(data, desc->pg_count, 0); 396 list_add(&data->list, &hdr->rpc_list); 397 desc->pg_rpc_callops = &nfs_read_common_ops; 398 return 0; 399 } 400 401 int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 402 struct nfs_pgio_header *hdr) 403 { 404 if (desc->pg_bsize < PAGE_CACHE_SIZE) 405 return nfs_pagein_multi(desc, hdr); 406 return nfs_pagein_one(desc, hdr); 407 } 408 EXPORT_SYMBOL_GPL(nfs_generic_pagein); 409 410 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 411 { 412 struct nfs_read_header *rhdr; 413 struct nfs_pgio_header *hdr; 414 int ret; 415 416 rhdr = nfs_readhdr_alloc(); 417 if (!rhdr) { 418 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 419 return -ENOMEM; 420 } 421 hdr = &rhdr->header; 422 nfs_pgheader_init(desc, hdr, nfs_readhdr_free); 423 atomic_inc(&hdr->refcnt); 424 ret = nfs_generic_pagein(desc, hdr); 425 if (ret == 0) 426 ret = nfs_do_multiple_reads(&hdr->rpc_list, 427 desc->pg_rpc_callops); 428 if (atomic_dec_and_test(&hdr->refcnt)) 429 hdr->completion_ops->completion(hdr); 430 return ret; 431 } 432 433 static const struct nfs_pageio_ops nfs_pageio_read_ops = { 434 .pg_test = nfs_generic_pg_test, 435 .pg_doio = nfs_generic_pg_readpages, 436 }; 437 438 /* 439 * This is the callback from RPC telling us whether a reply was 440 * received or some error occurred (timeout or socket shutdown). 441 */ 442 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 443 { 444 struct inode *inode = data->header->inode; 445 int status; 446 447 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 448 task->tk_status); 449 450 status = NFS_PROTO(inode)->read_done(task, data); 451 if (status != 0) 452 return status; 453 454 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); 455 456 if (task->tk_status == -ESTALE) { 457 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); 458 nfs_mark_for_revalidate(inode); 459 } 460 return 0; 461 } 462 463 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 464 { 465 struct nfs_readargs *argp = &data->args; 466 struct nfs_readres *resp = &data->res; 467 468 /* This is a short read! */ 469 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); 470 /* Has the server at least made some progress? */ 471 if (resp->count == 0) { 472 nfs_set_pgio_error(data->header, -EIO, argp->offset); 473 return; 474 } 475 /* Yes, so retry the read at the end of the data */ 476 data->mds_offset += resp->count; 477 argp->offset += resp->count; 478 argp->pgbase += resp->count; 479 argp->count -= resp->count; 480 rpc_restart_call_prepare(task); 481 } 482 483 static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) 484 { 485 struct nfs_read_data *data = calldata; 486 struct nfs_pgio_header *hdr = data->header; 487 488 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ 489 if (nfs_readpage_result(task, data) != 0) 490 return; 491 if (task->tk_status < 0) 492 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); 493 else if (data->res.eof) { 494 loff_t bound; 495 496 bound = data->args.offset + data->res.count; 497 spin_lock(&hdr->lock); 498 if (bound < hdr->io_start + hdr->good_bytes) { 499 set_bit(NFS_IOHDR_EOF, &hdr->flags); 500 clear_bit(NFS_IOHDR_ERROR, &hdr->flags); 501 hdr->good_bytes = bound - hdr->io_start; 502 } 503 spin_unlock(&hdr->lock); 504 } else if (data->res.count != data->args.count) 505 nfs_readpage_retry(task, data); 506 } 507 508 static void nfs_readpage_release_common(void *calldata) 509 { 510 nfs_readdata_release(calldata); 511 } 512 513 void nfs_read_prepare(struct rpc_task *task, void *calldata) 514 { 515 struct nfs_read_data *data = calldata; 516 int err; 517 err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); 518 if (err) 519 rpc_exit(task, err); 520 } 521 522 static const struct rpc_call_ops nfs_read_common_ops = { 523 .rpc_call_prepare = nfs_read_prepare, 524 .rpc_call_done = nfs_readpage_result_common, 525 .rpc_release = nfs_readpage_release_common, 526 }; 527 528 /* 529 * Read a page over NFS. 530 * We read the page synchronously in the following case: 531 * - The error flag is set for this page. This happens only when a 532 * previous async read operation failed. 533 */ 534 int nfs_readpage(struct file *file, struct page *page) 535 { 536 struct nfs_open_context *ctx; 537 struct inode *inode = page_file_mapping(page)->host; 538 int error; 539 540 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 541 page, PAGE_CACHE_SIZE, page_file_index(page)); 542 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 543 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 544 545 /* 546 * Try to flush any pending writes to the file.. 547 * 548 * NOTE! Because we own the page lock, there cannot 549 * be any new pending writes generated at this point 550 * for this page (other pages can be written to). 551 */ 552 error = nfs_wb_page(inode, page); 553 if (error) 554 goto out_unlock; 555 if (PageUptodate(page)) 556 goto out_unlock; 557 558 error = -ESTALE; 559 if (NFS_STALE(inode)) 560 goto out_unlock; 561 562 if (file == NULL) { 563 error = -EBADF; 564 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 565 if (ctx == NULL) 566 goto out_unlock; 567 } else 568 ctx = get_nfs_open_context(nfs_file_open_context(file)); 569 570 if (!IS_SYNC(inode)) { 571 error = nfs_readpage_from_fscache(ctx, inode, page); 572 if (error == 0) 573 goto out; 574 } 575 576 error = nfs_readpage_async(ctx, inode, page); 577 578 out: 579 put_nfs_open_context(ctx); 580 return error; 581 out_unlock: 582 unlock_page(page); 583 return error; 584 } 585 586 struct nfs_readdesc { 587 struct nfs_pageio_descriptor *pgio; 588 struct nfs_open_context *ctx; 589 }; 590 591 static int 592 readpage_async_filler(void *data, struct page *page) 593 { 594 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 595 struct inode *inode = page_file_mapping(page)->host; 596 struct nfs_page *new; 597 unsigned int len; 598 int error; 599 600 len = nfs_page_length(page); 601 if (len == 0) 602 return nfs_return_empty_page(page); 603 604 new = nfs_create_request(desc->ctx, inode, page, 0, len); 605 if (IS_ERR(new)) 606 goto out_error; 607 608 if (len < PAGE_CACHE_SIZE) 609 zero_user_segment(page, len, PAGE_CACHE_SIZE); 610 if (!nfs_pageio_add_request(desc->pgio, new)) { 611 error = desc->pgio->pg_error; 612 goto out_unlock; 613 } 614 return 0; 615 out_error: 616 error = PTR_ERR(new); 617 out_unlock: 618 unlock_page(page); 619 return error; 620 } 621 622 int nfs_readpages(struct file *filp, struct address_space *mapping, 623 struct list_head *pages, unsigned nr_pages) 624 { 625 struct nfs_pageio_descriptor pgio; 626 struct nfs_readdesc desc = { 627 .pgio = &pgio, 628 }; 629 struct inode *inode = mapping->host; 630 unsigned long npages; 631 int ret = -ESTALE; 632 633 dprintk("NFS: nfs_readpages (%s/%Lu %d)\n", 634 inode->i_sb->s_id, 635 (unsigned long long)NFS_FILEID(inode), 636 nr_pages); 637 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 638 639 if (NFS_STALE(inode)) 640 goto out; 641 642 if (filp == NULL) { 643 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 644 if (desc.ctx == NULL) 645 return -EBADF; 646 } else 647 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 648 649 /* attempt to read as many of the pages as possible from the cache 650 * - this returns -ENOBUFS immediately if the cookie is negative 651 */ 652 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, 653 pages, &nr_pages); 654 if (ret == 0) 655 goto read_complete; /* all pages were read */ 656 657 NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops); 658 659 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 660 661 nfs_pageio_complete(&pgio); 662 NFS_I(inode)->read_io += pgio.pg_bytes_written; 663 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 664 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 665 read_complete: 666 put_nfs_open_context(desc.ctx); 667 out: 668 return ret; 669 } 670 671 int __init nfs_init_readpagecache(void) 672 { 673 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 674 sizeof(struct nfs_read_header), 675 0, SLAB_HWCACHE_ALIGN, 676 NULL); 677 if (nfs_rdata_cachep == NULL) 678 return -ENOMEM; 679 680 return 0; 681 } 682 683 void nfs_destroy_readpagecache(void) 684 { 685 kmem_cache_destroy(nfs_rdata_cachep); 686 } 687