1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 #include <linux/smp_lock.h> 22 23 #include <asm/system.h> 24 25 #include "internal.h" 26 #include "iostat.h" 27 28 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 29 30 static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); 31 static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); 32 static const struct rpc_call_ops nfs_read_partial_ops; 33 static const struct rpc_call_ops nfs_read_full_ops; 34 35 static struct kmem_cache *nfs_rdata_cachep; 36 static mempool_t *nfs_rdata_mempool; 37 38 #define MIN_POOL_READ (32) 39 40 struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 41 { 42 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); 43 44 if (p) { 45 memset(p, 0, sizeof(*p)); 46 INIT_LIST_HEAD(&p->pages); 47 p->npages = pagecount; 48 if (pagecount <= ARRAY_SIZE(p->page_array)) 49 p->pagevec = p->page_array; 50 else { 51 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 52 if (!p->pagevec) { 53 mempool_free(p, nfs_rdata_mempool); 54 p = NULL; 55 } 56 } 57 } 58 return p; 59 } 60 61 static void nfs_readdata_rcu_free(struct rcu_head *head) 62 { 63 struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu); 64 if (p && (p->pagevec != &p->page_array[0])) 65 kfree(p->pagevec); 66 mempool_free(p, nfs_rdata_mempool); 67 } 68 69 static void nfs_readdata_free(struct nfs_read_data *rdata) 70 { 71 call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free); 72 } 73 74 void nfs_readdata_release(void *data) 75 { 76 nfs_readdata_free(data); 77 } 78 79 static 80 int nfs_return_empty_page(struct page *page) 81 { 82 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); 83 SetPageUptodate(page); 84 unlock_page(page); 85 return 0; 86 } 87 88 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) 89 { 90 unsigned int remainder = data->args.count - data->res.count; 91 unsigned int base = data->args.pgbase + data->res.count; 92 unsigned int pglen; 93 struct page **pages; 94 95 if (data->res.eof == 0 || remainder == 0) 96 return; 97 /* 98 * Note: "remainder" can never be negative, since we check for 99 * this in the XDR code. 100 */ 101 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 102 base &= ~PAGE_CACHE_MASK; 103 pglen = PAGE_CACHE_SIZE - base; 104 for (;;) { 105 if (remainder <= pglen) { 106 zero_user_page(*pages, base, remainder, KM_USER0); 107 break; 108 } 109 zero_user_page(*pages, base, pglen, KM_USER0); 110 pages++; 111 remainder -= pglen; 112 pglen = PAGE_CACHE_SIZE; 113 base = 0; 114 } 115 } 116 117 static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 118 struct page *page) 119 { 120 LIST_HEAD(one_request); 121 struct nfs_page *new; 122 unsigned int len; 123 124 len = nfs_page_length(page); 125 if (len == 0) 126 return nfs_return_empty_page(page); 127 new = nfs_create_request(ctx, inode, page, 0, len); 128 if (IS_ERR(new)) { 129 unlock_page(page); 130 return PTR_ERR(new); 131 } 132 if (len < PAGE_CACHE_SIZE) 133 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); 134 135 nfs_list_add_request(new, &one_request); 136 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 137 nfs_pagein_multi(inode, &one_request, 1, len, 0); 138 else 139 nfs_pagein_one(inode, &one_request, 1, len, 0); 140 return 0; 141 } 142 143 static void nfs_readpage_release(struct nfs_page *req) 144 { 145 unlock_page(req->wb_page); 146 147 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 148 req->wb_context->path.dentry->d_inode->i_sb->s_id, 149 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 150 req->wb_bytes, 151 (long long)req_offset(req)); 152 nfs_clear_request(req); 153 nfs_release_request(req); 154 } 155 156 /* 157 * Set up the NFS read request struct 158 */ 159 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 160 const struct rpc_call_ops *call_ops, 161 unsigned int count, unsigned int offset) 162 { 163 struct inode *inode = req->wb_context->path.dentry->d_inode; 164 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 165 struct rpc_task *task; 166 struct rpc_message msg = { 167 .rpc_argp = &data->args, 168 .rpc_resp = &data->res, 169 .rpc_cred = req->wb_context->cred, 170 }; 171 struct rpc_task_setup task_setup_data = { 172 .task = &data->task, 173 .rpc_client = NFS_CLIENT(inode), 174 .rpc_message = &msg, 175 .callback_ops = call_ops, 176 .callback_data = data, 177 .flags = RPC_TASK_ASYNC | swap_flags, 178 }; 179 180 data->req = req; 181 data->inode = inode; 182 data->cred = msg.rpc_cred; 183 184 data->args.fh = NFS_FH(inode); 185 data->args.offset = req_offset(req) + offset; 186 data->args.pgbase = req->wb_pgbase + offset; 187 data->args.pages = data->pagevec; 188 data->args.count = count; 189 data->args.context = req->wb_context; 190 191 data->res.fattr = &data->fattr; 192 data->res.count = count; 193 data->res.eof = 0; 194 nfs_fattr_init(&data->fattr); 195 196 /* Set up the initial task struct. */ 197 NFS_PROTO(inode)->read_setup(data, &msg); 198 199 dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 200 data->task.tk_pid, 201 inode->i_sb->s_id, 202 (long long)NFS_FILEID(inode), 203 count, 204 (unsigned long long)data->args.offset); 205 206 task = rpc_run_task(&task_setup_data); 207 if (!IS_ERR(task)) 208 rpc_put_task(task); 209 } 210 211 static void 212 nfs_async_read_error(struct list_head *head) 213 { 214 struct nfs_page *req; 215 216 while (!list_empty(head)) { 217 req = nfs_list_entry(head->next); 218 nfs_list_remove_request(req); 219 SetPageError(req->wb_page); 220 nfs_readpage_release(req); 221 } 222 } 223 224 /* 225 * Generate multiple requests to fill a single page. 226 * 227 * We optimize to reduce the number of read operations on the wire. If we 228 * detect that we're reading a page, or an area of a page, that is past the 229 * end of file, we do not generate NFS read operations but just clear the 230 * parts of the page that would have come back zero from the server anyway. 231 * 232 * We rely on the cached value of i_size to make this determination; another 233 * client can fill pages on the server past our cached end-of-file, but we 234 * won't see the new data until our attribute cache is updated. This is more 235 * or less conventional NFS client behavior. 236 */ 237 static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 238 { 239 struct nfs_page *req = nfs_list_entry(head->next); 240 struct page *page = req->wb_page; 241 struct nfs_read_data *data; 242 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 243 unsigned int offset; 244 int requests = 0; 245 LIST_HEAD(list); 246 247 nfs_list_remove_request(req); 248 249 nbytes = count; 250 do { 251 size_t len = min(nbytes,rsize); 252 253 data = nfs_readdata_alloc(1); 254 if (!data) 255 goto out_bad; 256 INIT_LIST_HEAD(&data->pages); 257 list_add(&data->pages, &list); 258 requests++; 259 nbytes -= len; 260 } while(nbytes != 0); 261 atomic_set(&req->wb_complete, requests); 262 263 ClearPageError(page); 264 offset = 0; 265 nbytes = count; 266 do { 267 data = list_entry(list.next, struct nfs_read_data, pages); 268 list_del_init(&data->pages); 269 270 data->pagevec[0] = page; 271 272 if (nbytes < rsize) 273 rsize = nbytes; 274 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 275 rsize, offset); 276 offset += rsize; 277 nbytes -= rsize; 278 } while (nbytes != 0); 279 280 return 0; 281 282 out_bad: 283 while (!list_empty(&list)) { 284 data = list_entry(list.next, struct nfs_read_data, pages); 285 list_del(&data->pages); 286 nfs_readdata_free(data); 287 } 288 SetPageError(page); 289 nfs_readpage_release(req); 290 return -ENOMEM; 291 } 292 293 static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 294 { 295 struct nfs_page *req; 296 struct page **pages; 297 struct nfs_read_data *data; 298 299 data = nfs_readdata_alloc(npages); 300 if (!data) 301 goto out_bad; 302 303 INIT_LIST_HEAD(&data->pages); 304 pages = data->pagevec; 305 while (!list_empty(head)) { 306 req = nfs_list_entry(head->next); 307 nfs_list_remove_request(req); 308 nfs_list_add_request(req, &data->pages); 309 ClearPageError(req->wb_page); 310 *pages++ = req->wb_page; 311 } 312 req = nfs_list_entry(data->pages.next); 313 314 nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 315 return 0; 316 out_bad: 317 nfs_async_read_error(head); 318 return -ENOMEM; 319 } 320 321 /* 322 * This is the callback from RPC telling us whether a reply was 323 * received or some error occurred (timeout or socket shutdown). 324 */ 325 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 326 { 327 int status; 328 329 dprintk("NFS: %s: %5u, (status %d)\n", __FUNCTION__, task->tk_pid, 330 task->tk_status); 331 332 status = NFS_PROTO(data->inode)->read_done(task, data); 333 if (status != 0) 334 return status; 335 336 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 337 338 if (task->tk_status == -ESTALE) { 339 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); 340 nfs_mark_for_revalidate(data->inode); 341 } 342 return 0; 343 } 344 345 static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 346 { 347 struct nfs_readargs *argp = &data->args; 348 struct nfs_readres *resp = &data->res; 349 350 if (resp->eof || resp->count == argp->count) 351 return 0; 352 353 /* This is a short read! */ 354 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 355 /* Has the server at least made some progress? */ 356 if (resp->count == 0) 357 return 0; 358 359 /* Yes, so retry the read at the end of the data */ 360 argp->offset += resp->count; 361 argp->pgbase += resp->count; 362 argp->count -= resp->count; 363 rpc_restart_call(task); 364 return -EAGAIN; 365 } 366 367 /* 368 * Handle a read reply that fills part of a page. 369 */ 370 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 371 { 372 struct nfs_read_data *data = calldata; 373 struct nfs_page *req = data->req; 374 struct page *page = req->wb_page; 375 376 if (nfs_readpage_result(task, data) != 0) 377 return; 378 379 if (likely(task->tk_status >= 0)) { 380 nfs_readpage_truncate_uninitialised_page(data); 381 if (nfs_readpage_retry(task, data) != 0) 382 return; 383 } 384 if (unlikely(task->tk_status < 0)) 385 SetPageError(page); 386 if (atomic_dec_and_test(&req->wb_complete)) { 387 if (!PageError(page)) 388 SetPageUptodate(page); 389 nfs_readpage_release(req); 390 } 391 } 392 393 static const struct rpc_call_ops nfs_read_partial_ops = { 394 .rpc_call_done = nfs_readpage_result_partial, 395 .rpc_release = nfs_readdata_release, 396 }; 397 398 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) 399 { 400 unsigned int count = data->res.count; 401 unsigned int base = data->args.pgbase; 402 struct page **pages; 403 404 if (data->res.eof) 405 count = data->args.count; 406 if (unlikely(count == 0)) 407 return; 408 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 409 base &= ~PAGE_CACHE_MASK; 410 count += base; 411 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 412 SetPageUptodate(*pages); 413 if (count == 0) 414 return; 415 /* Was this a short read? */ 416 if (data->res.eof || data->res.count == data->args.count) 417 SetPageUptodate(*pages); 418 } 419 420 /* 421 * This is the callback from RPC telling us whether a reply was 422 * received or some error occurred (timeout or socket shutdown). 423 */ 424 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) 425 { 426 struct nfs_read_data *data = calldata; 427 428 if (nfs_readpage_result(task, data) != 0) 429 return; 430 /* 431 * Note: nfs_readpage_retry may change the values of 432 * data->args. In the multi-page case, we therefore need 433 * to ensure that we call nfs_readpage_set_pages_uptodate() 434 * first. 435 */ 436 if (likely(task->tk_status >= 0)) { 437 nfs_readpage_truncate_uninitialised_page(data); 438 nfs_readpage_set_pages_uptodate(data); 439 if (nfs_readpage_retry(task, data) != 0) 440 return; 441 } 442 while (!list_empty(&data->pages)) { 443 struct nfs_page *req = nfs_list_entry(data->pages.next); 444 445 nfs_list_remove_request(req); 446 nfs_readpage_release(req); 447 } 448 } 449 450 static const struct rpc_call_ops nfs_read_full_ops = { 451 .rpc_call_done = nfs_readpage_result_full, 452 .rpc_release = nfs_readdata_release, 453 }; 454 455 /* 456 * Read a page over NFS. 457 * We read the page synchronously in the following case: 458 * - The error flag is set for this page. This happens only when a 459 * previous async read operation failed. 460 */ 461 int nfs_readpage(struct file *file, struct page *page) 462 { 463 struct nfs_open_context *ctx; 464 struct inode *inode = page->mapping->host; 465 int error; 466 467 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 468 page, PAGE_CACHE_SIZE, page->index); 469 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 470 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 471 472 /* 473 * Try to flush any pending writes to the file.. 474 * 475 * NOTE! Because we own the page lock, there cannot 476 * be any new pending writes generated at this point 477 * for this page (other pages can be written to). 478 */ 479 error = nfs_wb_page(inode, page); 480 if (error) 481 goto out_unlock; 482 if (PageUptodate(page)) 483 goto out_unlock; 484 485 error = -ESTALE; 486 if (NFS_STALE(inode)) 487 goto out_unlock; 488 489 if (file == NULL) { 490 error = -EBADF; 491 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 492 if (ctx == NULL) 493 goto out_unlock; 494 } else 495 ctx = get_nfs_open_context(nfs_file_open_context(file)); 496 497 error = nfs_readpage_async(ctx, inode, page); 498 499 put_nfs_open_context(ctx); 500 return error; 501 out_unlock: 502 unlock_page(page); 503 return error; 504 } 505 506 struct nfs_readdesc { 507 struct nfs_pageio_descriptor *pgio; 508 struct nfs_open_context *ctx; 509 }; 510 511 static int 512 readpage_async_filler(void *data, struct page *page) 513 { 514 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 515 struct inode *inode = page->mapping->host; 516 struct nfs_page *new; 517 unsigned int len; 518 int error; 519 520 error = nfs_wb_page(inode, page); 521 if (error) 522 goto out_unlock; 523 if (PageUptodate(page)) 524 goto out_unlock; 525 526 len = nfs_page_length(page); 527 if (len == 0) 528 return nfs_return_empty_page(page); 529 530 new = nfs_create_request(desc->ctx, inode, page, 0, len); 531 if (IS_ERR(new)) 532 goto out_error; 533 534 if (len < PAGE_CACHE_SIZE) 535 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); 536 nfs_pageio_add_request(desc->pgio, new); 537 return 0; 538 out_error: 539 error = PTR_ERR(new); 540 SetPageError(page); 541 out_unlock: 542 unlock_page(page); 543 return error; 544 } 545 546 int nfs_readpages(struct file *filp, struct address_space *mapping, 547 struct list_head *pages, unsigned nr_pages) 548 { 549 struct nfs_pageio_descriptor pgio; 550 struct nfs_readdesc desc = { 551 .pgio = &pgio, 552 }; 553 struct inode *inode = mapping->host; 554 struct nfs_server *server = NFS_SERVER(inode); 555 size_t rsize = server->rsize; 556 unsigned long npages; 557 int ret = -ESTALE; 558 559 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 560 inode->i_sb->s_id, 561 (long long)NFS_FILEID(inode), 562 nr_pages); 563 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 564 565 if (NFS_STALE(inode)) 566 goto out; 567 568 if (filp == NULL) { 569 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 570 if (desc.ctx == NULL) 571 return -EBADF; 572 } else 573 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 574 if (rsize < PAGE_CACHE_SIZE) 575 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 576 else 577 nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); 578 579 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 580 581 nfs_pageio_complete(&pgio); 582 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 583 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 584 put_nfs_open_context(desc.ctx); 585 out: 586 return ret; 587 } 588 589 int __init nfs_init_readpagecache(void) 590 { 591 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 592 sizeof(struct nfs_read_data), 593 0, SLAB_HWCACHE_ALIGN, 594 NULL); 595 if (nfs_rdata_cachep == NULL) 596 return -ENOMEM; 597 598 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, 599 nfs_rdata_cachep); 600 if (nfs_rdata_mempool == NULL) 601 return -ENOMEM; 602 603 return 0; 604 } 605 606 void nfs_destroy_readpagecache(void) 607 { 608 mempool_destroy(nfs_rdata_mempool); 609 kmem_cache_destroy(nfs_rdata_cachep); 610 } 611