1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 22 #include <asm/system.h> 23 24 #include "nfs4_fs.h" 25 #include "internal.h" 26 #include "iostat.h" 27 #include "fscache.h" 28 29 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 30 31 static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); 32 static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); 33 static const struct rpc_call_ops nfs_read_partial_ops; 34 static const struct rpc_call_ops nfs_read_full_ops; 35 36 static struct kmem_cache *nfs_rdata_cachep; 37 static mempool_t *nfs_rdata_mempool; 38 39 #define MIN_POOL_READ (32) 40 41 struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 42 { 43 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); 44 45 if (p) { 46 memset(p, 0, sizeof(*p)); 47 INIT_LIST_HEAD(&p->pages); 48 p->npages = pagecount; 49 p->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; 50 if (pagecount <= ARRAY_SIZE(p->page_array)) 51 p->pagevec = p->page_array; 52 else { 53 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 54 if (!p->pagevec) { 55 mempool_free(p, nfs_rdata_mempool); 56 p = NULL; 57 } 58 } 59 } 60 return p; 61 } 62 63 void nfs_readdata_free(struct nfs_read_data *p) 64 { 65 if (p && (p->pagevec != &p->page_array[0])) 66 kfree(p->pagevec); 67 mempool_free(p, nfs_rdata_mempool); 68 } 69 70 static void nfs_readdata_release(struct nfs_read_data *rdata) 71 { 72 put_nfs_open_context(rdata->args.context); 73 nfs_readdata_free(rdata); 74 } 75 76 static 77 int nfs_return_empty_page(struct page *page) 78 { 79 zero_user(page, 0, PAGE_CACHE_SIZE); 80 SetPageUptodate(page); 81 unlock_page(page); 82 return 0; 83 } 84 85 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) 86 { 87 unsigned int remainder = data->args.count - data->res.count; 88 unsigned int base = data->args.pgbase + data->res.count; 89 unsigned int pglen; 90 struct page **pages; 91 92 if (data->res.eof == 0 || remainder == 0) 93 return; 94 /* 95 * Note: "remainder" can never be negative, since we check for 96 * this in the XDR code. 97 */ 98 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 99 base &= ~PAGE_CACHE_MASK; 100 pglen = PAGE_CACHE_SIZE - base; 101 for (;;) { 102 if (remainder <= pglen) { 103 zero_user(*pages, base, remainder); 104 break; 105 } 106 zero_user(*pages, base, pglen); 107 pages++; 108 remainder -= pglen; 109 pglen = PAGE_CACHE_SIZE; 110 base = 0; 111 } 112 } 113 114 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 115 struct page *page) 116 { 117 LIST_HEAD(one_request); 118 struct nfs_page *new; 119 unsigned int len; 120 121 len = nfs_page_length(page); 122 if (len == 0) 123 return nfs_return_empty_page(page); 124 new = nfs_create_request(ctx, inode, page, 0, len); 125 if (IS_ERR(new)) { 126 unlock_page(page); 127 return PTR_ERR(new); 128 } 129 if (len < PAGE_CACHE_SIZE) 130 zero_user_segment(page, len, PAGE_CACHE_SIZE); 131 132 nfs_list_add_request(new, &one_request); 133 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 134 nfs_pagein_multi(inode, &one_request, 1, len, 0); 135 else 136 nfs_pagein_one(inode, &one_request, 1, len, 0); 137 return 0; 138 } 139 140 static void nfs_readpage_release(struct nfs_page *req) 141 { 142 struct inode *d_inode = req->wb_context->path.dentry->d_inode; 143 144 if (PageUptodate(req->wb_page)) 145 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 146 147 unlock_page(req->wb_page); 148 149 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 150 req->wb_context->path.dentry->d_inode->i_sb->s_id, 151 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 152 req->wb_bytes, 153 (long long)req_offset(req)); 154 nfs_clear_request(req); 155 nfs_release_request(req); 156 } 157 158 /* 159 * Set up the NFS read request struct 160 */ 161 static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 162 const struct rpc_call_ops *call_ops, 163 unsigned int count, unsigned int offset) 164 { 165 struct inode *inode = req->wb_context->path.dentry->d_inode; 166 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 167 struct rpc_task *task; 168 struct rpc_message msg = { 169 .rpc_argp = &data->args, 170 .rpc_resp = &data->res, 171 .rpc_cred = req->wb_context->cred, 172 }; 173 struct rpc_task_setup task_setup_data = { 174 .task = &data->task, 175 .rpc_client = NFS_CLIENT(inode), 176 .rpc_message = &msg, 177 .callback_ops = call_ops, 178 .callback_data = data, 179 .workqueue = nfsiod_workqueue, 180 .flags = RPC_TASK_ASYNC | swap_flags, 181 }; 182 183 data->req = req; 184 data->inode = inode; 185 data->cred = msg.rpc_cred; 186 187 data->args.fh = NFS_FH(inode); 188 data->args.offset = req_offset(req) + offset; 189 data->args.pgbase = req->wb_pgbase + offset; 190 data->args.pages = data->pagevec; 191 data->args.count = count; 192 data->args.context = get_nfs_open_context(req->wb_context); 193 194 data->res.fattr = &data->fattr; 195 data->res.count = count; 196 data->res.eof = 0; 197 nfs_fattr_init(&data->fattr); 198 199 /* Set up the initial task struct. */ 200 NFS_PROTO(inode)->read_setup(data, &msg); 201 202 dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 203 data->task.tk_pid, 204 inode->i_sb->s_id, 205 (long long)NFS_FILEID(inode), 206 count, 207 (unsigned long long)data->args.offset); 208 209 task = rpc_run_task(&task_setup_data); 210 if (IS_ERR(task)) 211 return PTR_ERR(task); 212 rpc_put_task(task); 213 return 0; 214 } 215 216 static void 217 nfs_async_read_error(struct list_head *head) 218 { 219 struct nfs_page *req; 220 221 while (!list_empty(head)) { 222 req = nfs_list_entry(head->next); 223 nfs_list_remove_request(req); 224 SetPageError(req->wb_page); 225 nfs_readpage_release(req); 226 } 227 } 228 229 /* 230 * Generate multiple requests to fill a single page. 231 * 232 * We optimize to reduce the number of read operations on the wire. If we 233 * detect that we're reading a page, or an area of a page, that is past the 234 * end of file, we do not generate NFS read operations but just clear the 235 * parts of the page that would have come back zero from the server anyway. 236 * 237 * We rely on the cached value of i_size to make this determination; another 238 * client can fill pages on the server past our cached end-of-file, but we 239 * won't see the new data until our attribute cache is updated. This is more 240 * or less conventional NFS client behavior. 241 */ 242 static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 243 { 244 struct nfs_page *req = nfs_list_entry(head->next); 245 struct page *page = req->wb_page; 246 struct nfs_read_data *data; 247 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 248 unsigned int offset; 249 int requests = 0; 250 int ret = 0; 251 LIST_HEAD(list); 252 253 nfs_list_remove_request(req); 254 255 nbytes = count; 256 do { 257 size_t len = min(nbytes,rsize); 258 259 data = nfs_readdata_alloc(1); 260 if (!data) 261 goto out_bad; 262 list_add(&data->pages, &list); 263 requests++; 264 nbytes -= len; 265 } while(nbytes != 0); 266 atomic_set(&req->wb_complete, requests); 267 268 ClearPageError(page); 269 offset = 0; 270 nbytes = count; 271 do { 272 int ret2; 273 274 data = list_entry(list.next, struct nfs_read_data, pages); 275 list_del_init(&data->pages); 276 277 data->pagevec[0] = page; 278 279 if (nbytes < rsize) 280 rsize = nbytes; 281 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 282 rsize, offset); 283 if (ret == 0) 284 ret = ret2; 285 offset += rsize; 286 nbytes -= rsize; 287 } while (nbytes != 0); 288 289 return ret; 290 291 out_bad: 292 while (!list_empty(&list)) { 293 data = list_entry(list.next, struct nfs_read_data, pages); 294 list_del(&data->pages); 295 nfs_readdata_free(data); 296 } 297 SetPageError(page); 298 nfs_readpage_release(req); 299 return -ENOMEM; 300 } 301 302 static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 303 { 304 struct nfs_page *req; 305 struct page **pages; 306 struct nfs_read_data *data; 307 int ret = -ENOMEM; 308 309 data = nfs_readdata_alloc(npages); 310 if (!data) 311 goto out_bad; 312 313 pages = data->pagevec; 314 while (!list_empty(head)) { 315 req = nfs_list_entry(head->next); 316 nfs_list_remove_request(req); 317 nfs_list_add_request(req, &data->pages); 318 ClearPageError(req->wb_page); 319 *pages++ = req->wb_page; 320 } 321 req = nfs_list_entry(data->pages.next); 322 323 return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 324 out_bad: 325 nfs_async_read_error(head); 326 return ret; 327 } 328 329 /* 330 * This is the callback from RPC telling us whether a reply was 331 * received or some error occurred (timeout or socket shutdown). 332 */ 333 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 334 { 335 int status; 336 337 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 338 task->tk_status); 339 340 status = NFS_PROTO(data->inode)->read_done(task, data); 341 if (status != 0) 342 return status; 343 344 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 345 346 if (task->tk_status == -ESTALE) { 347 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); 348 nfs_mark_for_revalidate(data->inode); 349 } 350 return 0; 351 } 352 353 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 354 { 355 struct nfs_readargs *argp = &data->args; 356 struct nfs_readres *resp = &data->res; 357 358 if (resp->eof || resp->count == argp->count) 359 return; 360 361 /* This is a short read! */ 362 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 363 /* Has the server at least made some progress? */ 364 if (resp->count == 0) 365 return; 366 367 /* Yes, so retry the read at the end of the data */ 368 argp->offset += resp->count; 369 argp->pgbase += resp->count; 370 argp->count -= resp->count; 371 nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); 372 } 373 374 /* 375 * Handle a read reply that fills part of a page. 376 */ 377 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 378 { 379 struct nfs_read_data *data = calldata; 380 381 if (nfs_readpage_result(task, data) != 0) 382 return; 383 if (task->tk_status < 0) 384 return; 385 386 nfs_readpage_truncate_uninitialised_page(data); 387 nfs_readpage_retry(task, data); 388 } 389 390 static void nfs_readpage_release_partial(void *calldata) 391 { 392 struct nfs_read_data *data = calldata; 393 struct nfs_page *req = data->req; 394 struct page *page = req->wb_page; 395 int status = data->task.tk_status; 396 397 if (status < 0) 398 SetPageError(page); 399 400 if (atomic_dec_and_test(&req->wb_complete)) { 401 if (!PageError(page)) 402 SetPageUptodate(page); 403 nfs_readpage_release(req); 404 } 405 nfs_readdata_release(calldata); 406 } 407 408 #if defined(CONFIG_NFS_V4_1) 409 void nfs_read_prepare(struct rpc_task *task, void *calldata) 410 { 411 struct nfs_read_data *data = calldata; 412 413 if (nfs4_setup_sequence(NFS_SERVER(data->inode)->nfs_client, 414 &data->args.seq_args, &data->res.seq_res, 415 0, task)) 416 return; 417 rpc_call_start(task); 418 } 419 #endif /* CONFIG_NFS_V4_1 */ 420 421 static const struct rpc_call_ops nfs_read_partial_ops = { 422 #if defined(CONFIG_NFS_V4_1) 423 .rpc_call_prepare = nfs_read_prepare, 424 #endif /* CONFIG_NFS_V4_1 */ 425 .rpc_call_done = nfs_readpage_result_partial, 426 .rpc_release = nfs_readpage_release_partial, 427 }; 428 429 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) 430 { 431 unsigned int count = data->res.count; 432 unsigned int base = data->args.pgbase; 433 struct page **pages; 434 435 if (data->res.eof) 436 count = data->args.count; 437 if (unlikely(count == 0)) 438 return; 439 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 440 base &= ~PAGE_CACHE_MASK; 441 count += base; 442 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 443 SetPageUptodate(*pages); 444 if (count == 0) 445 return; 446 /* Was this a short read? */ 447 if (data->res.eof || data->res.count == data->args.count) 448 SetPageUptodate(*pages); 449 } 450 451 /* 452 * This is the callback from RPC telling us whether a reply was 453 * received or some error occurred (timeout or socket shutdown). 454 */ 455 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) 456 { 457 struct nfs_read_data *data = calldata; 458 459 if (nfs_readpage_result(task, data) != 0) 460 return; 461 if (task->tk_status < 0) 462 return; 463 /* 464 * Note: nfs_readpage_retry may change the values of 465 * data->args. In the multi-page case, we therefore need 466 * to ensure that we call nfs_readpage_set_pages_uptodate() 467 * first. 468 */ 469 nfs_readpage_truncate_uninitialised_page(data); 470 nfs_readpage_set_pages_uptodate(data); 471 nfs_readpage_retry(task, data); 472 } 473 474 static void nfs_readpage_release_full(void *calldata) 475 { 476 struct nfs_read_data *data = calldata; 477 478 while (!list_empty(&data->pages)) { 479 struct nfs_page *req = nfs_list_entry(data->pages.next); 480 481 nfs_list_remove_request(req); 482 nfs_readpage_release(req); 483 } 484 nfs_readdata_release(calldata); 485 } 486 487 static const struct rpc_call_ops nfs_read_full_ops = { 488 #if defined(CONFIG_NFS_V4_1) 489 .rpc_call_prepare = nfs_read_prepare, 490 #endif /* CONFIG_NFS_V4_1 */ 491 .rpc_call_done = nfs_readpage_result_full, 492 .rpc_release = nfs_readpage_release_full, 493 }; 494 495 /* 496 * Read a page over NFS. 497 * We read the page synchronously in the following case: 498 * - The error flag is set for this page. This happens only when a 499 * previous async read operation failed. 500 */ 501 int nfs_readpage(struct file *file, struct page *page) 502 { 503 struct nfs_open_context *ctx; 504 struct inode *inode = page->mapping->host; 505 int error; 506 507 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 508 page, PAGE_CACHE_SIZE, page->index); 509 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 510 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 511 512 /* 513 * Try to flush any pending writes to the file.. 514 * 515 * NOTE! Because we own the page lock, there cannot 516 * be any new pending writes generated at this point 517 * for this page (other pages can be written to). 518 */ 519 error = nfs_wb_page(inode, page); 520 if (error) 521 goto out_unlock; 522 if (PageUptodate(page)) 523 goto out_unlock; 524 525 error = -ESTALE; 526 if (NFS_STALE(inode)) 527 goto out_unlock; 528 529 if (file == NULL) { 530 error = -EBADF; 531 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 532 if (ctx == NULL) 533 goto out_unlock; 534 } else 535 ctx = get_nfs_open_context(nfs_file_open_context(file)); 536 537 if (!IS_SYNC(inode)) { 538 error = nfs_readpage_from_fscache(ctx, inode, page); 539 if (error == 0) 540 goto out; 541 } 542 543 error = nfs_readpage_async(ctx, inode, page); 544 545 out: 546 put_nfs_open_context(ctx); 547 return error; 548 out_unlock: 549 unlock_page(page); 550 return error; 551 } 552 553 struct nfs_readdesc { 554 struct nfs_pageio_descriptor *pgio; 555 struct nfs_open_context *ctx; 556 }; 557 558 static int 559 readpage_async_filler(void *data, struct page *page) 560 { 561 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 562 struct inode *inode = page->mapping->host; 563 struct nfs_page *new; 564 unsigned int len; 565 int error; 566 567 len = nfs_page_length(page); 568 if (len == 0) 569 return nfs_return_empty_page(page); 570 571 new = nfs_create_request(desc->ctx, inode, page, 0, len); 572 if (IS_ERR(new)) 573 goto out_error; 574 575 if (len < PAGE_CACHE_SIZE) 576 zero_user_segment(page, len, PAGE_CACHE_SIZE); 577 if (!nfs_pageio_add_request(desc->pgio, new)) { 578 error = desc->pgio->pg_error; 579 goto out_unlock; 580 } 581 return 0; 582 out_error: 583 error = PTR_ERR(new); 584 SetPageError(page); 585 out_unlock: 586 unlock_page(page); 587 return error; 588 } 589 590 int nfs_readpages(struct file *filp, struct address_space *mapping, 591 struct list_head *pages, unsigned nr_pages) 592 { 593 struct nfs_pageio_descriptor pgio; 594 struct nfs_readdesc desc = { 595 .pgio = &pgio, 596 }; 597 struct inode *inode = mapping->host; 598 struct nfs_server *server = NFS_SERVER(inode); 599 size_t rsize = server->rsize; 600 unsigned long npages; 601 int ret = -ESTALE; 602 603 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 604 inode->i_sb->s_id, 605 (long long)NFS_FILEID(inode), 606 nr_pages); 607 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 608 609 if (NFS_STALE(inode)) 610 goto out; 611 612 if (filp == NULL) { 613 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 614 if (desc.ctx == NULL) 615 return -EBADF; 616 } else 617 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 618 619 /* attempt to read as many of the pages as possible from the cache 620 * - this returns -ENOBUFS immediately if the cookie is negative 621 */ 622 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, 623 pages, &nr_pages); 624 if (ret == 0) 625 goto read_complete; /* all pages were read */ 626 627 if (rsize < PAGE_CACHE_SIZE) 628 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 629 else 630 nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); 631 632 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 633 634 nfs_pageio_complete(&pgio); 635 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 636 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 637 read_complete: 638 put_nfs_open_context(desc.ctx); 639 out: 640 return ret; 641 } 642 643 int __init nfs_init_readpagecache(void) 644 { 645 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 646 sizeof(struct nfs_read_data), 647 0, SLAB_HWCACHE_ALIGN, 648 NULL); 649 if (nfs_rdata_cachep == NULL) 650 return -ENOMEM; 651 652 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, 653 nfs_rdata_cachep); 654 if (nfs_rdata_mempool == NULL) 655 return -ENOMEM; 656 657 return 0; 658 } 659 660 void nfs_destroy_readpagecache(void) 661 { 662 mempool_destroy(nfs_rdata_mempool); 663 kmem_cache_destroy(nfs_rdata_cachep); 664 } 665