1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 #include <linux/smp_lock.h> 22 23 #include <asm/system.h> 24 25 #include "internal.h" 26 #include "iostat.h" 27 28 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 29 30 static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); 31 static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); 32 static const struct rpc_call_ops nfs_read_partial_ops; 33 static const struct rpc_call_ops nfs_read_full_ops; 34 35 static struct kmem_cache *nfs_rdata_cachep; 36 static mempool_t *nfs_rdata_mempool; 37 38 #define MIN_POOL_READ (32) 39 40 struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 41 { 42 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); 43 44 if (p) { 45 memset(p, 0, sizeof(*p)); 46 INIT_LIST_HEAD(&p->pages); 47 p->npages = pagecount; 48 if (pagecount <= ARRAY_SIZE(p->page_array)) 49 p->pagevec = p->page_array; 50 else { 51 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 52 if (!p->pagevec) { 53 mempool_free(p, nfs_rdata_mempool); 54 p = NULL; 55 } 56 } 57 } 58 return p; 59 } 60 61 static void nfs_readdata_rcu_free(struct rcu_head *head) 62 { 63 struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu); 64 if (p && (p->pagevec != &p->page_array[0])) 65 kfree(p->pagevec); 66 mempool_free(p, nfs_rdata_mempool); 67 } 68 69 static void nfs_readdata_free(struct nfs_read_data *rdata) 70 { 71 call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free); 72 } 73 74 void nfs_readdata_release(void *data) 75 { 76 nfs_readdata_free(data); 77 } 78 79 static 80 int nfs_return_empty_page(struct page *page) 81 { 82 zero_user_page(page, 0, PAGE_CACHE_SIZE, KM_USER0); 83 SetPageUptodate(page); 84 unlock_page(page); 85 return 0; 86 } 87 88 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) 89 { 90 unsigned int remainder = data->args.count - data->res.count; 91 unsigned int base = data->args.pgbase + data->res.count; 92 unsigned int pglen; 93 struct page **pages; 94 95 if (data->res.eof == 0 || remainder == 0) 96 return; 97 /* 98 * Note: "remainder" can never be negative, since we check for 99 * this in the XDR code. 100 */ 101 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 102 base &= ~PAGE_CACHE_MASK; 103 pglen = PAGE_CACHE_SIZE - base; 104 for (;;) { 105 if (remainder <= pglen) { 106 zero_user_page(*pages, base, remainder, KM_USER0); 107 break; 108 } 109 zero_user_page(*pages, base, pglen, KM_USER0); 110 pages++; 111 remainder -= pglen; 112 pglen = PAGE_CACHE_SIZE; 113 base = 0; 114 } 115 } 116 117 static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 118 struct page *page) 119 { 120 LIST_HEAD(one_request); 121 struct nfs_page *new; 122 unsigned int len; 123 124 len = nfs_page_length(page); 125 if (len == 0) 126 return nfs_return_empty_page(page); 127 new = nfs_create_request(ctx, inode, page, 0, len); 128 if (IS_ERR(new)) { 129 unlock_page(page); 130 return PTR_ERR(new); 131 } 132 if (len < PAGE_CACHE_SIZE) 133 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); 134 135 nfs_list_add_request(new, &one_request); 136 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 137 nfs_pagein_multi(inode, &one_request, 1, len, 0); 138 else 139 nfs_pagein_one(inode, &one_request, 1, len, 0); 140 return 0; 141 } 142 143 static void nfs_readpage_release(struct nfs_page *req) 144 { 145 unlock_page(req->wb_page); 146 147 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 148 req->wb_context->path.dentry->d_inode->i_sb->s_id, 149 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 150 req->wb_bytes, 151 (long long)req_offset(req)); 152 nfs_clear_request(req); 153 nfs_release_request(req); 154 } 155 156 /* 157 * Set up the NFS read request struct 158 */ 159 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 160 const struct rpc_call_ops *call_ops, 161 unsigned int count, unsigned int offset) 162 { 163 struct inode *inode; 164 int flags; 165 166 data->req = req; 167 data->inode = inode = req->wb_context->path.dentry->d_inode; 168 data->cred = req->wb_context->cred; 169 170 data->args.fh = NFS_FH(inode); 171 data->args.offset = req_offset(req) + offset; 172 data->args.pgbase = req->wb_pgbase + offset; 173 data->args.pages = data->pagevec; 174 data->args.count = count; 175 data->args.context = req->wb_context; 176 177 data->res.fattr = &data->fattr; 178 data->res.count = count; 179 data->res.eof = 0; 180 nfs_fattr_init(&data->fattr); 181 182 /* Set up the initial task struct. */ 183 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); 184 rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); 185 NFS_PROTO(inode)->read_setup(data); 186 187 data->task.tk_cookie = (unsigned long)inode; 188 189 dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 190 data->task.tk_pid, 191 inode->i_sb->s_id, 192 (long long)NFS_FILEID(inode), 193 count, 194 (unsigned long long)data->args.offset); 195 } 196 197 static void 198 nfs_async_read_error(struct list_head *head) 199 { 200 struct nfs_page *req; 201 202 while (!list_empty(head)) { 203 req = nfs_list_entry(head->next); 204 nfs_list_remove_request(req); 205 SetPageError(req->wb_page); 206 nfs_readpage_release(req); 207 } 208 } 209 210 /* 211 * Start an async read operation 212 */ 213 static void nfs_execute_read(struct nfs_read_data *data) 214 { 215 struct rpc_clnt *clnt = NFS_CLIENT(data->inode); 216 sigset_t oldset; 217 218 rpc_clnt_sigmask(clnt, &oldset); 219 rpc_execute(&data->task); 220 rpc_clnt_sigunmask(clnt, &oldset); 221 } 222 223 /* 224 * Generate multiple requests to fill a single page. 225 * 226 * We optimize to reduce the number of read operations on the wire. If we 227 * detect that we're reading a page, or an area of a page, that is past the 228 * end of file, we do not generate NFS read operations but just clear the 229 * parts of the page that would have come back zero from the server anyway. 230 * 231 * We rely on the cached value of i_size to make this determination; another 232 * client can fill pages on the server past our cached end-of-file, but we 233 * won't see the new data until our attribute cache is updated. This is more 234 * or less conventional NFS client behavior. 235 */ 236 static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 237 { 238 struct nfs_page *req = nfs_list_entry(head->next); 239 struct page *page = req->wb_page; 240 struct nfs_read_data *data; 241 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 242 unsigned int offset; 243 int requests = 0; 244 LIST_HEAD(list); 245 246 nfs_list_remove_request(req); 247 248 nbytes = count; 249 do { 250 size_t len = min(nbytes,rsize); 251 252 data = nfs_readdata_alloc(1); 253 if (!data) 254 goto out_bad; 255 INIT_LIST_HEAD(&data->pages); 256 list_add(&data->pages, &list); 257 requests++; 258 nbytes -= len; 259 } while(nbytes != 0); 260 atomic_set(&req->wb_complete, requests); 261 262 ClearPageError(page); 263 offset = 0; 264 nbytes = count; 265 do { 266 data = list_entry(list.next, struct nfs_read_data, pages); 267 list_del_init(&data->pages); 268 269 data->pagevec[0] = page; 270 271 if (nbytes < rsize) 272 rsize = nbytes; 273 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 274 rsize, offset); 275 offset += rsize; 276 nbytes -= rsize; 277 nfs_execute_read(data); 278 } while (nbytes != 0); 279 280 return 0; 281 282 out_bad: 283 while (!list_empty(&list)) { 284 data = list_entry(list.next, struct nfs_read_data, pages); 285 list_del(&data->pages); 286 nfs_readdata_free(data); 287 } 288 SetPageError(page); 289 nfs_readpage_release(req); 290 return -ENOMEM; 291 } 292 293 static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 294 { 295 struct nfs_page *req; 296 struct page **pages; 297 struct nfs_read_data *data; 298 299 data = nfs_readdata_alloc(npages); 300 if (!data) 301 goto out_bad; 302 303 INIT_LIST_HEAD(&data->pages); 304 pages = data->pagevec; 305 while (!list_empty(head)) { 306 req = nfs_list_entry(head->next); 307 nfs_list_remove_request(req); 308 nfs_list_add_request(req, &data->pages); 309 ClearPageError(req->wb_page); 310 *pages++ = req->wb_page; 311 } 312 req = nfs_list_entry(data->pages.next); 313 314 nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 315 316 nfs_execute_read(data); 317 return 0; 318 out_bad: 319 nfs_async_read_error(head); 320 return -ENOMEM; 321 } 322 323 /* 324 * This is the callback from RPC telling us whether a reply was 325 * received or some error occurred (timeout or socket shutdown). 326 */ 327 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 328 { 329 int status; 330 331 dprintk("NFS: %s: %5u, (status %d)\n", __FUNCTION__, task->tk_pid, 332 task->tk_status); 333 334 status = NFS_PROTO(data->inode)->read_done(task, data); 335 if (status != 0) 336 return status; 337 338 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 339 340 if (task->tk_status == -ESTALE) { 341 set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); 342 nfs_mark_for_revalidate(data->inode); 343 } 344 return 0; 345 } 346 347 static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 348 { 349 struct nfs_readargs *argp = &data->args; 350 struct nfs_readres *resp = &data->res; 351 352 if (resp->eof || resp->count == argp->count) 353 return 0; 354 355 /* This is a short read! */ 356 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 357 /* Has the server at least made some progress? */ 358 if (resp->count == 0) 359 return 0; 360 361 /* Yes, so retry the read at the end of the data */ 362 argp->offset += resp->count; 363 argp->pgbase += resp->count; 364 argp->count -= resp->count; 365 rpc_restart_call(task); 366 return -EAGAIN; 367 } 368 369 /* 370 * Handle a read reply that fills part of a page. 371 */ 372 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 373 { 374 struct nfs_read_data *data = calldata; 375 struct nfs_page *req = data->req; 376 struct page *page = req->wb_page; 377 378 if (nfs_readpage_result(task, data) != 0) 379 return; 380 381 if (likely(task->tk_status >= 0)) { 382 nfs_readpage_truncate_uninitialised_page(data); 383 if (nfs_readpage_retry(task, data) != 0) 384 return; 385 } 386 if (unlikely(task->tk_status < 0)) 387 SetPageError(page); 388 if (atomic_dec_and_test(&req->wb_complete)) { 389 if (!PageError(page)) 390 SetPageUptodate(page); 391 nfs_readpage_release(req); 392 } 393 } 394 395 static const struct rpc_call_ops nfs_read_partial_ops = { 396 .rpc_call_done = nfs_readpage_result_partial, 397 .rpc_release = nfs_readdata_release, 398 }; 399 400 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) 401 { 402 unsigned int count = data->res.count; 403 unsigned int base = data->args.pgbase; 404 struct page **pages; 405 406 if (data->res.eof) 407 count = data->args.count; 408 if (unlikely(count == 0)) 409 return; 410 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 411 base &= ~PAGE_CACHE_MASK; 412 count += base; 413 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 414 SetPageUptodate(*pages); 415 if (count == 0) 416 return; 417 /* Was this a short read? */ 418 if (data->res.eof || data->res.count == data->args.count) 419 SetPageUptodate(*pages); 420 } 421 422 /* 423 * This is the callback from RPC telling us whether a reply was 424 * received or some error occurred (timeout or socket shutdown). 425 */ 426 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) 427 { 428 struct nfs_read_data *data = calldata; 429 430 if (nfs_readpage_result(task, data) != 0) 431 return; 432 /* 433 * Note: nfs_readpage_retry may change the values of 434 * data->args. In the multi-page case, we therefore need 435 * to ensure that we call nfs_readpage_set_pages_uptodate() 436 * first. 437 */ 438 if (likely(task->tk_status >= 0)) { 439 nfs_readpage_truncate_uninitialised_page(data); 440 nfs_readpage_set_pages_uptodate(data); 441 if (nfs_readpage_retry(task, data) != 0) 442 return; 443 } 444 while (!list_empty(&data->pages)) { 445 struct nfs_page *req = nfs_list_entry(data->pages.next); 446 447 nfs_list_remove_request(req); 448 nfs_readpage_release(req); 449 } 450 } 451 452 static const struct rpc_call_ops nfs_read_full_ops = { 453 .rpc_call_done = nfs_readpage_result_full, 454 .rpc_release = nfs_readdata_release, 455 }; 456 457 /* 458 * Read a page over NFS. 459 * We read the page synchronously in the following case: 460 * - The error flag is set for this page. This happens only when a 461 * previous async read operation failed. 462 */ 463 int nfs_readpage(struct file *file, struct page *page) 464 { 465 struct nfs_open_context *ctx; 466 struct inode *inode = page->mapping->host; 467 int error; 468 469 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 470 page, PAGE_CACHE_SIZE, page->index); 471 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 472 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 473 474 /* 475 * Try to flush any pending writes to the file.. 476 * 477 * NOTE! Because we own the page lock, there cannot 478 * be any new pending writes generated at this point 479 * for this page (other pages can be written to). 480 */ 481 error = nfs_wb_page(inode, page); 482 if (error) 483 goto out_unlock; 484 if (PageUptodate(page)) 485 goto out_unlock; 486 487 error = -ESTALE; 488 if (NFS_STALE(inode)) 489 goto out_unlock; 490 491 if (file == NULL) { 492 error = -EBADF; 493 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 494 if (ctx == NULL) 495 goto out_unlock; 496 } else 497 ctx = get_nfs_open_context(nfs_file_open_context(file)); 498 499 error = nfs_readpage_async(ctx, inode, page); 500 501 put_nfs_open_context(ctx); 502 return error; 503 out_unlock: 504 unlock_page(page); 505 return error; 506 } 507 508 struct nfs_readdesc { 509 struct nfs_pageio_descriptor *pgio; 510 struct nfs_open_context *ctx; 511 }; 512 513 static int 514 readpage_async_filler(void *data, struct page *page) 515 { 516 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 517 struct inode *inode = page->mapping->host; 518 struct nfs_page *new; 519 unsigned int len; 520 int error; 521 522 error = nfs_wb_page(inode, page); 523 if (error) 524 goto out_unlock; 525 if (PageUptodate(page)) 526 goto out_unlock; 527 528 len = nfs_page_length(page); 529 if (len == 0) 530 return nfs_return_empty_page(page); 531 532 new = nfs_create_request(desc->ctx, inode, page, 0, len); 533 if (IS_ERR(new)) 534 goto out_error; 535 536 if (len < PAGE_CACHE_SIZE) 537 zero_user_page(page, len, PAGE_CACHE_SIZE - len, KM_USER0); 538 nfs_pageio_add_request(desc->pgio, new); 539 return 0; 540 out_error: 541 error = PTR_ERR(new); 542 SetPageError(page); 543 out_unlock: 544 unlock_page(page); 545 return error; 546 } 547 548 int nfs_readpages(struct file *filp, struct address_space *mapping, 549 struct list_head *pages, unsigned nr_pages) 550 { 551 struct nfs_pageio_descriptor pgio; 552 struct nfs_readdesc desc = { 553 .pgio = &pgio, 554 }; 555 struct inode *inode = mapping->host; 556 struct nfs_server *server = NFS_SERVER(inode); 557 size_t rsize = server->rsize; 558 unsigned long npages; 559 int ret = -ESTALE; 560 561 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 562 inode->i_sb->s_id, 563 (long long)NFS_FILEID(inode), 564 nr_pages); 565 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 566 567 if (NFS_STALE(inode)) 568 goto out; 569 570 if (filp == NULL) { 571 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 572 if (desc.ctx == NULL) 573 return -EBADF; 574 } else 575 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 576 if (rsize < PAGE_CACHE_SIZE) 577 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 578 else 579 nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); 580 581 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 582 583 nfs_pageio_complete(&pgio); 584 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 585 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 586 put_nfs_open_context(desc.ctx); 587 out: 588 return ret; 589 } 590 591 int __init nfs_init_readpagecache(void) 592 { 593 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 594 sizeof(struct nfs_read_data), 595 0, SLAB_HWCACHE_ALIGN, 596 NULL); 597 if (nfs_rdata_cachep == NULL) 598 return -ENOMEM; 599 600 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, 601 nfs_rdata_cachep); 602 if (nfs_rdata_mempool == NULL) 603 return -ENOMEM; 604 605 return 0; 606 } 607 608 void nfs_destroy_readpagecache(void) 609 { 610 mempool_destroy(nfs_rdata_mempool); 611 kmem_cache_destroy(nfs_rdata_cachep); 612 } 613