1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 * 9 * We do an ugly hack here in order to return proper error codes to the 10 * user program when a read request failed: since generic_file_read 11 * only checks the return value of inode->i_op->readpage() which is always 0 12 * for async RPC, we set the error bit of the page to 1 when an error occurs, 13 * and make nfs_readpage transmit requests synchronously when encountering this. 14 * This is only a small problem, though, since we now retry all operations 15 * within the RPC code when root squashing is suspected. 16 */ 17 18 #include <linux/config.h> 19 #include <linux/time.h> 20 #include <linux/kernel.h> 21 #include <linux/errno.h> 22 #include <linux/fcntl.h> 23 #include <linux/stat.h> 24 #include <linux/mm.h> 25 #include <linux/slab.h> 26 #include <linux/pagemap.h> 27 #include <linux/sunrpc/clnt.h> 28 #include <linux/nfs_fs.h> 29 #include <linux/nfs_page.h> 30 #include <linux/smp_lock.h> 31 32 #include <asm/system.h> 33 34 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 35 36 static int nfs_pagein_one(struct list_head *, struct inode *); 37 static void nfs_readpage_result_partial(struct nfs_read_data *, int); 38 static void nfs_readpage_result_full(struct nfs_read_data *, int); 39 40 static kmem_cache_t *nfs_rdata_cachep; 41 mempool_t *nfs_rdata_mempool; 42 43 #define MIN_POOL_READ (32) 44 45 void nfs_readdata_release(struct rpc_task *task) 46 { 47 struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; 48 nfs_readdata_free(data); 49 } 50 51 static 52 unsigned int nfs_page_length(struct inode *inode, struct page *page) 53 { 54 loff_t i_size = i_size_read(inode); 55 unsigned long idx; 56 57 if (i_size <= 0) 58 return 0; 59 idx = (i_size - 1) >> PAGE_CACHE_SHIFT; 60 if (page->index > idx) 61 return 0; 62 if (page->index != idx) 63 return PAGE_CACHE_SIZE; 64 return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1)); 65 } 66 67 static 68 int nfs_return_empty_page(struct page *page) 69 { 70 memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); 71 SetPageUptodate(page); 72 unlock_page(page); 73 return 0; 74 } 75 76 /* 77 * Read a page synchronously. 78 */ 79 static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, 80 struct page *page) 81 { 82 unsigned int rsize = NFS_SERVER(inode)->rsize; 83 unsigned int count = PAGE_CACHE_SIZE; 84 int result; 85 struct nfs_read_data *rdata; 86 87 rdata = nfs_readdata_alloc(); 88 if (!rdata) 89 return -ENOMEM; 90 91 memset(rdata, 0, sizeof(*rdata)); 92 rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); 93 rdata->cred = ctx->cred; 94 rdata->inode = inode; 95 INIT_LIST_HEAD(&rdata->pages); 96 rdata->args.fh = NFS_FH(inode); 97 rdata->args.context = ctx; 98 rdata->args.pages = &page; 99 rdata->args.pgbase = 0UL; 100 rdata->args.count = rsize; 101 rdata->res.fattr = &rdata->fattr; 102 103 dprintk("NFS: nfs_readpage_sync(%p)\n", page); 104 105 /* 106 * This works now because the socket layer never tries to DMA 107 * into this buffer directly. 108 */ 109 do { 110 if (count < rsize) 111 rdata->args.count = count; 112 rdata->res.count = rdata->args.count; 113 rdata->args.offset = page_offset(page) + rdata->args.pgbase; 114 115 dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", 116 NFS_SERVER(inode)->hostname, 117 inode->i_sb->s_id, 118 (long long)NFS_FILEID(inode), 119 (unsigned long long)rdata->args.pgbase, 120 rdata->args.count); 121 122 lock_kernel(); 123 result = NFS_PROTO(inode)->read(rdata); 124 unlock_kernel(); 125 126 /* 127 * Even if we had a partial success we can't mark the page 128 * cache valid. 129 */ 130 if (result < 0) { 131 if (result == -EISDIR) 132 result = -EINVAL; 133 goto io_error; 134 } 135 count -= result; 136 rdata->args.pgbase += result; 137 /* Note: result == 0 should only happen if we're caching 138 * a write that extends the file and punches a hole. 139 */ 140 if (rdata->res.eof != 0 || result == 0) 141 break; 142 } while (count); 143 spin_lock(&inode->i_lock); 144 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 145 spin_unlock(&inode->i_lock); 146 147 if (count) 148 memclear_highpage_flush(page, rdata->args.pgbase, count); 149 SetPageUptodate(page); 150 if (PageError(page)) 151 ClearPageError(page); 152 result = 0; 153 154 io_error: 155 unlock_page(page); 156 nfs_readdata_free(rdata); 157 return result; 158 } 159 160 static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 161 struct page *page) 162 { 163 LIST_HEAD(one_request); 164 struct nfs_page *new; 165 unsigned int len; 166 167 len = nfs_page_length(inode, page); 168 if (len == 0) 169 return nfs_return_empty_page(page); 170 new = nfs_create_request(ctx, inode, page, 0, len); 171 if (IS_ERR(new)) { 172 unlock_page(page); 173 return PTR_ERR(new); 174 } 175 if (len < PAGE_CACHE_SIZE) 176 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 177 178 nfs_list_add_request(new, &one_request); 179 nfs_pagein_one(&one_request, inode); 180 return 0; 181 } 182 183 static void nfs_readpage_release(struct nfs_page *req) 184 { 185 unlock_page(req->wb_page); 186 187 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 188 req->wb_context->dentry->d_inode->i_sb->s_id, 189 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 190 req->wb_bytes, 191 (long long)req_offset(req)); 192 nfs_clear_request(req); 193 nfs_release_request(req); 194 } 195 196 /* 197 * Set up the NFS read request struct 198 */ 199 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 200 unsigned int count, unsigned int offset) 201 { 202 struct inode *inode; 203 204 data->req = req; 205 data->inode = inode = req->wb_context->dentry->d_inode; 206 data->cred = req->wb_context->cred; 207 208 data->args.fh = NFS_FH(inode); 209 data->args.offset = req_offset(req) + offset; 210 data->args.pgbase = req->wb_pgbase + offset; 211 data->args.pages = data->pagevec; 212 data->args.count = count; 213 data->args.context = req->wb_context; 214 215 data->res.fattr = &data->fattr; 216 data->res.count = count; 217 data->res.eof = 0; 218 nfs_fattr_init(&data->fattr); 219 220 NFS_PROTO(inode)->read_setup(data); 221 222 data->task.tk_cookie = (unsigned long)inode; 223 data->task.tk_calldata = data; 224 /* Release requests */ 225 data->task.tk_release = nfs_readdata_release; 226 227 dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 228 data->task.tk_pid, 229 inode->i_sb->s_id, 230 (long long)NFS_FILEID(inode), 231 count, 232 (unsigned long long)data->args.offset); 233 } 234 235 static void 236 nfs_async_read_error(struct list_head *head) 237 { 238 struct nfs_page *req; 239 240 while (!list_empty(head)) { 241 req = nfs_list_entry(head->next); 242 nfs_list_remove_request(req); 243 SetPageError(req->wb_page); 244 nfs_readpage_release(req); 245 } 246 } 247 248 /* 249 * Start an async read operation 250 */ 251 static void nfs_execute_read(struct nfs_read_data *data) 252 { 253 struct rpc_clnt *clnt = NFS_CLIENT(data->inode); 254 sigset_t oldset; 255 256 rpc_clnt_sigmask(clnt, &oldset); 257 lock_kernel(); 258 rpc_execute(&data->task); 259 unlock_kernel(); 260 rpc_clnt_sigunmask(clnt, &oldset); 261 } 262 263 /* 264 * Generate multiple requests to fill a single page. 265 * 266 * We optimize to reduce the number of read operations on the wire. If we 267 * detect that we're reading a page, or an area of a page, that is past the 268 * end of file, we do not generate NFS read operations but just clear the 269 * parts of the page that would have come back zero from the server anyway. 270 * 271 * We rely on the cached value of i_size to make this determination; another 272 * client can fill pages on the server past our cached end-of-file, but we 273 * won't see the new data until our attribute cache is updated. This is more 274 * or less conventional NFS client behavior. 275 */ 276 static int nfs_pagein_multi(struct list_head *head, struct inode *inode) 277 { 278 struct nfs_page *req = nfs_list_entry(head->next); 279 struct page *page = req->wb_page; 280 struct nfs_read_data *data; 281 unsigned int rsize = NFS_SERVER(inode)->rsize; 282 unsigned int nbytes, offset; 283 int requests = 0; 284 LIST_HEAD(list); 285 286 nfs_list_remove_request(req); 287 288 nbytes = req->wb_bytes; 289 for(;;) { 290 data = nfs_readdata_alloc(); 291 if (!data) 292 goto out_bad; 293 INIT_LIST_HEAD(&data->pages); 294 list_add(&data->pages, &list); 295 requests++; 296 if (nbytes <= rsize) 297 break; 298 nbytes -= rsize; 299 } 300 atomic_set(&req->wb_complete, requests); 301 302 ClearPageError(page); 303 offset = 0; 304 nbytes = req->wb_bytes; 305 do { 306 data = list_entry(list.next, struct nfs_read_data, pages); 307 list_del_init(&data->pages); 308 309 data->pagevec[0] = page; 310 data->complete = nfs_readpage_result_partial; 311 312 if (nbytes > rsize) { 313 nfs_read_rpcsetup(req, data, rsize, offset); 314 offset += rsize; 315 nbytes -= rsize; 316 } else { 317 nfs_read_rpcsetup(req, data, nbytes, offset); 318 nbytes = 0; 319 } 320 nfs_execute_read(data); 321 } while (nbytes != 0); 322 323 return 0; 324 325 out_bad: 326 while (!list_empty(&list)) { 327 data = list_entry(list.next, struct nfs_read_data, pages); 328 list_del(&data->pages); 329 nfs_readdata_free(data); 330 } 331 SetPageError(page); 332 nfs_readpage_release(req); 333 return -ENOMEM; 334 } 335 336 static int nfs_pagein_one(struct list_head *head, struct inode *inode) 337 { 338 struct nfs_page *req; 339 struct page **pages; 340 struct nfs_read_data *data; 341 unsigned int count; 342 343 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 344 return nfs_pagein_multi(head, inode); 345 346 data = nfs_readdata_alloc(); 347 if (!data) 348 goto out_bad; 349 350 INIT_LIST_HEAD(&data->pages); 351 pages = data->pagevec; 352 count = 0; 353 while (!list_empty(head)) { 354 req = nfs_list_entry(head->next); 355 nfs_list_remove_request(req); 356 nfs_list_add_request(req, &data->pages); 357 ClearPageError(req->wb_page); 358 *pages++ = req->wb_page; 359 count += req->wb_bytes; 360 } 361 req = nfs_list_entry(data->pages.next); 362 363 data->complete = nfs_readpage_result_full; 364 nfs_read_rpcsetup(req, data, count, 0); 365 366 nfs_execute_read(data); 367 return 0; 368 out_bad: 369 nfs_async_read_error(head); 370 return -ENOMEM; 371 } 372 373 static int 374 nfs_pagein_list(struct list_head *head, int rpages) 375 { 376 LIST_HEAD(one_request); 377 struct nfs_page *req; 378 int error = 0; 379 unsigned int pages = 0; 380 381 while (!list_empty(head)) { 382 pages += nfs_coalesce_requests(head, &one_request, rpages); 383 req = nfs_list_entry(one_request.next); 384 error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode); 385 if (error < 0) 386 break; 387 } 388 if (error >= 0) 389 return pages; 390 391 nfs_async_read_error(head); 392 return error; 393 } 394 395 /* 396 * Handle a read reply that fills part of a page. 397 */ 398 static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) 399 { 400 struct nfs_page *req = data->req; 401 struct page *page = req->wb_page; 402 403 if (status >= 0) { 404 unsigned int request = data->args.count; 405 unsigned int result = data->res.count; 406 407 if (result < request) { 408 memclear_highpage_flush(page, 409 data->args.pgbase + result, 410 request - result); 411 } 412 } else 413 SetPageError(page); 414 415 if (atomic_dec_and_test(&req->wb_complete)) { 416 if (!PageError(page)) 417 SetPageUptodate(page); 418 nfs_readpage_release(req); 419 } 420 } 421 422 /* 423 * This is the callback from RPC telling us whether a reply was 424 * received or some error occurred (timeout or socket shutdown). 425 */ 426 static void nfs_readpage_result_full(struct nfs_read_data *data, int status) 427 { 428 unsigned int count = data->res.count; 429 430 while (!list_empty(&data->pages)) { 431 struct nfs_page *req = nfs_list_entry(data->pages.next); 432 struct page *page = req->wb_page; 433 nfs_list_remove_request(req); 434 435 if (status >= 0) { 436 if (count < PAGE_CACHE_SIZE) { 437 if (count < req->wb_bytes) 438 memclear_highpage_flush(page, 439 req->wb_pgbase + count, 440 req->wb_bytes - count); 441 count = 0; 442 } else 443 count -= PAGE_CACHE_SIZE; 444 SetPageUptodate(page); 445 } else 446 SetPageError(page); 447 nfs_readpage_release(req); 448 } 449 } 450 451 /* 452 * This is the callback from RPC telling us whether a reply was 453 * received or some error occurred (timeout or socket shutdown). 454 */ 455 void nfs_readpage_result(struct rpc_task *task) 456 { 457 struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; 458 struct nfs_readargs *argp = &data->args; 459 struct nfs_readres *resp = &data->res; 460 int status = task->tk_status; 461 462 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", 463 task->tk_pid, status); 464 465 /* Is this a short read? */ 466 if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { 467 /* Has the server at least made some progress? */ 468 if (resp->count != 0) { 469 /* Yes, so retry the read at the end of the data */ 470 argp->offset += resp->count; 471 argp->pgbase += resp->count; 472 argp->count -= resp->count; 473 rpc_restart_call(task); 474 return; 475 } 476 task->tk_status = -EIO; 477 } 478 spin_lock(&data->inode->i_lock); 479 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; 480 spin_unlock(&data->inode->i_lock); 481 data->complete(data, status); 482 } 483 484 /* 485 * Read a page over NFS. 486 * We read the page synchronously in the following case: 487 * - The error flag is set for this page. This happens only when a 488 * previous async read operation failed. 489 */ 490 int nfs_readpage(struct file *file, struct page *page) 491 { 492 struct nfs_open_context *ctx; 493 struct inode *inode = page->mapping->host; 494 int error; 495 496 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 497 page, PAGE_CACHE_SIZE, page->index); 498 /* 499 * Try to flush any pending writes to the file.. 500 * 501 * NOTE! Because we own the page lock, there cannot 502 * be any new pending writes generated at this point 503 * for this page (other pages can be written to). 504 */ 505 error = nfs_wb_page(inode, page); 506 if (error) 507 goto out_error; 508 509 if (file == NULL) { 510 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 511 if (ctx == NULL) 512 return -EBADF; 513 } else 514 ctx = get_nfs_open_context((struct nfs_open_context *) 515 file->private_data); 516 if (!IS_SYNC(inode)) { 517 error = nfs_readpage_async(ctx, inode, page); 518 goto out; 519 } 520 521 error = nfs_readpage_sync(ctx, inode, page); 522 if (error < 0 && IS_SWAPFILE(inode)) 523 printk("Aiee.. nfs swap-in of page failed!\n"); 524 out: 525 put_nfs_open_context(ctx); 526 return error; 527 528 out_error: 529 unlock_page(page); 530 return error; 531 } 532 533 struct nfs_readdesc { 534 struct list_head *head; 535 struct nfs_open_context *ctx; 536 }; 537 538 static int 539 readpage_async_filler(void *data, struct page *page) 540 { 541 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 542 struct inode *inode = page->mapping->host; 543 struct nfs_page *new; 544 unsigned int len; 545 546 nfs_wb_page(inode, page); 547 len = nfs_page_length(inode, page); 548 if (len == 0) 549 return nfs_return_empty_page(page); 550 new = nfs_create_request(desc->ctx, inode, page, 0, len); 551 if (IS_ERR(new)) { 552 SetPageError(page); 553 unlock_page(page); 554 return PTR_ERR(new); 555 } 556 if (len < PAGE_CACHE_SIZE) 557 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 558 nfs_list_add_request(new, desc->head); 559 return 0; 560 } 561 562 int nfs_readpages(struct file *filp, struct address_space *mapping, 563 struct list_head *pages, unsigned nr_pages) 564 { 565 LIST_HEAD(head); 566 struct nfs_readdesc desc = { 567 .head = &head, 568 }; 569 struct inode *inode = mapping->host; 570 struct nfs_server *server = NFS_SERVER(inode); 571 int ret; 572 573 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 574 inode->i_sb->s_id, 575 (long long)NFS_FILEID(inode), 576 nr_pages); 577 578 if (filp == NULL) { 579 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 580 if (desc.ctx == NULL) 581 return -EBADF; 582 } else 583 desc.ctx = get_nfs_open_context((struct nfs_open_context *) 584 filp->private_data); 585 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 586 if (!list_empty(&head)) { 587 int err = nfs_pagein_list(&head, server->rpages); 588 if (!ret) 589 ret = err; 590 } 591 put_nfs_open_context(desc.ctx); 592 return ret; 593 } 594 595 int nfs_init_readpagecache(void) 596 { 597 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 598 sizeof(struct nfs_read_data), 599 0, SLAB_HWCACHE_ALIGN, 600 NULL, NULL); 601 if (nfs_rdata_cachep == NULL) 602 return -ENOMEM; 603 604 nfs_rdata_mempool = mempool_create(MIN_POOL_READ, 605 mempool_alloc_slab, 606 mempool_free_slab, 607 nfs_rdata_cachep); 608 if (nfs_rdata_mempool == NULL) 609 return -ENOMEM; 610 611 return 0; 612 } 613 614 void nfs_destroy_readpagecache(void) 615 { 616 mempool_destroy(nfs_rdata_mempool); 617 if (kmem_cache_destroy(nfs_rdata_cachep)) 618 printk(KERN_INFO "nfs_read_data: not all structures were freed\n"); 619 } 620