1 /* 2 * linux/fs/nfs/file.c 3 * 4 * Copyright (C) 1992 Rick Sladkey 5 * 6 * Changes Copyright (C) 1994 by Florian La Roche 7 * - Do not copy data too often around in the kernel. 8 * - In nfs_file_read the return value of kmalloc wasn't checked. 9 * - Put in a better version of read look-ahead buffering. Original idea 10 * and implementation by Wai S Kok elekokws@ee.nus.sg. 11 * 12 * Expire cache on write to a file by Wai S Kok (Oct 1994). 13 * 14 * Total rewrite of read side for new NFS buffer cache.. Linus. 15 * 16 * nfs regular file handling functions 17 */ 18 19 #include <linux/time.h> 20 #include <linux/kernel.h> 21 #include <linux/errno.h> 22 #include <linux/fcntl.h> 23 #include <linux/stat.h> 24 #include <linux/nfs_fs.h> 25 #include <linux/nfs_mount.h> 26 #include <linux/mm.h> 27 #include <linux/slab.h> 28 #include <linux/pagemap.h> 29 #include <linux/smp_lock.h> 30 #include <linux/aio.h> 31 32 #include <asm/uaccess.h> 33 #include <asm/system.h> 34 35 #include "delegation.h" 36 #include "internal.h" 37 #include "iostat.h" 38 39 #define NFSDBG_FACILITY NFSDBG_FILE 40 41 static int nfs_file_open(struct inode *, struct file *); 42 static int nfs_file_release(struct inode *, struct file *); 43 static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); 44 static int nfs_file_mmap(struct file *, struct vm_area_struct *); 45 static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, 46 struct pipe_inode_info *pipe, 47 size_t count, unsigned int flags); 48 static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, 49 unsigned long nr_segs, loff_t pos); 50 static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, 51 unsigned long nr_segs, loff_t pos); 52 static int nfs_file_flush(struct file *, fl_owner_t id); 53 static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); 54 static int nfs_check_flags(int flags); 55 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); 56 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); 57 static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); 58 59 static struct vm_operations_struct nfs_file_vm_ops; 60 61 const struct file_operations nfs_file_operations = { 62 .llseek = nfs_file_llseek, 63 .read = do_sync_read, 64 .write = do_sync_write, 65 .aio_read = nfs_file_read, 66 .aio_write = nfs_file_write, 67 .mmap = nfs_file_mmap, 68 .open = nfs_file_open, 69 .flush = nfs_file_flush, 70 .release = nfs_file_release, 71 .fsync = nfs_fsync, 72 .lock = nfs_lock, 73 .flock = nfs_flock, 74 .splice_read = nfs_file_splice_read, 75 .check_flags = nfs_check_flags, 76 .setlease = nfs_setlease, 77 }; 78 79 const struct inode_operations nfs_file_inode_operations = { 80 .permission = nfs_permission, 81 .getattr = nfs_getattr, 82 .setattr = nfs_setattr, 83 }; 84 85 #ifdef CONFIG_NFS_V3 86 const struct inode_operations nfs3_file_inode_operations = { 87 .permission = nfs_permission, 88 .getattr = nfs_getattr, 89 .setattr = nfs_setattr, 90 .listxattr = nfs3_listxattr, 91 .getxattr = nfs3_getxattr, 92 .setxattr = nfs3_setxattr, 93 .removexattr = nfs3_removexattr, 94 }; 95 #endif /* CONFIG_NFS_v3 */ 96 97 /* Hack for future NFS swap support */ 98 #ifndef IS_SWAPFILE 99 # define IS_SWAPFILE(inode) (0) 100 #endif 101 102 static int nfs_check_flags(int flags) 103 { 104 if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) 105 return -EINVAL; 106 107 return 0; 108 } 109 110 /* 111 * Open file 112 */ 113 static int 114 nfs_file_open(struct inode *inode, struct file *filp) 115 { 116 int res; 117 118 res = nfs_check_flags(filp->f_flags); 119 if (res) 120 return res; 121 122 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 123 lock_kernel(); 124 res = NFS_PROTO(inode)->file_open(inode, filp); 125 unlock_kernel(); 126 return res; 127 } 128 129 static int 130 nfs_file_release(struct inode *inode, struct file *filp) 131 { 132 /* Ensure that dirty pages are flushed out with the right creds */ 133 if (filp->f_mode & FMODE_WRITE) 134 filemap_fdatawrite(filp->f_mapping); 135 nfs_inc_stats(inode, NFSIOS_VFSRELEASE); 136 return NFS_PROTO(inode)->file_release(inode, filp); 137 } 138 139 /** 140 * nfs_revalidate_size - Revalidate the file size 141 * @inode - pointer to inode struct 142 * @file - pointer to struct file 143 * 144 * Revalidates the file length. This is basically a wrapper around 145 * nfs_revalidate_inode() that takes into account the fact that we may 146 * have cached writes (in which case we don't care about the server's 147 * idea of what the file length is), or O_DIRECT (in which case we 148 * shouldn't trust the cache). 149 */ 150 static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) 151 { 152 struct nfs_server *server = NFS_SERVER(inode); 153 struct nfs_inode *nfsi = NFS_I(inode); 154 155 if (server->flags & NFS_MOUNT_NOAC) 156 goto force_reval; 157 if (filp->f_flags & O_DIRECT) 158 goto force_reval; 159 if (nfsi->npages != 0) 160 return 0; 161 if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) 162 return 0; 163 force_reval: 164 return __nfs_revalidate_inode(server, inode); 165 } 166 167 static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) 168 { 169 /* origin == SEEK_END => we must revalidate the cached file length */ 170 if (origin == SEEK_END) { 171 struct inode *inode = filp->f_mapping->host; 172 int retval = nfs_revalidate_file_size(inode, filp); 173 if (retval < 0) 174 return (loff_t)retval; 175 } 176 return remote_llseek(filp, offset, origin); 177 } 178 179 /* 180 * Helper for nfs_file_flush() and nfs_fsync() 181 * 182 * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to 183 * disk, but it retrieves and clears ctx->error after synching, despite 184 * the two being set at the same time in nfs_context_set_write_error(). 185 * This is because the former is used to notify the _next_ call to 186 * nfs_file_write() that a write error occured, and hence cause it to 187 * fall back to doing a synchronous write. 188 */ 189 static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) 190 { 191 int have_error, status; 192 int ret = 0; 193 194 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 195 status = nfs_wb_all(inode); 196 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 197 if (have_error) 198 ret = xchg(&ctx->error, 0); 199 if (!ret) 200 ret = status; 201 return ret; 202 } 203 204 /* 205 * Flush all dirty pages, and check for write errors. 206 * 207 */ 208 static int 209 nfs_file_flush(struct file *file, fl_owner_t id) 210 { 211 struct nfs_open_context *ctx = nfs_file_open_context(file); 212 struct inode *inode = file->f_path.dentry->d_inode; 213 int status; 214 215 dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); 216 217 if ((file->f_mode & FMODE_WRITE) == 0) 218 return 0; 219 nfs_inc_stats(inode, NFSIOS_VFSFLUSH); 220 221 /* Ensure that data+attribute caches are up to date after close() */ 222 status = nfs_do_fsync(ctx, inode); 223 if (!status) 224 nfs_revalidate_inode(NFS_SERVER(inode), inode); 225 return status; 226 } 227 228 static ssize_t 229 nfs_file_read(struct kiocb *iocb, const struct iovec *iov, 230 unsigned long nr_segs, loff_t pos) 231 { 232 struct dentry * dentry = iocb->ki_filp->f_path.dentry; 233 struct inode * inode = dentry->d_inode; 234 ssize_t result; 235 size_t count = iov_length(iov, nr_segs); 236 237 #ifdef CONFIG_NFS_DIRECTIO 238 if (iocb->ki_filp->f_flags & O_DIRECT) 239 return nfs_file_direct_read(iocb, iov, nr_segs, pos); 240 #endif 241 242 dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", 243 dentry->d_parent->d_name.name, dentry->d_name.name, 244 (unsigned long) count, (unsigned long) pos); 245 246 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); 247 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); 248 if (!result) 249 result = generic_file_aio_read(iocb, iov, nr_segs, pos); 250 return result; 251 } 252 253 static ssize_t 254 nfs_file_splice_read(struct file *filp, loff_t *ppos, 255 struct pipe_inode_info *pipe, size_t count, 256 unsigned int flags) 257 { 258 struct dentry *dentry = filp->f_path.dentry; 259 struct inode *inode = dentry->d_inode; 260 ssize_t res; 261 262 dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n", 263 dentry->d_parent->d_name.name, dentry->d_name.name, 264 (unsigned long) count, (unsigned long long) *ppos); 265 266 res = nfs_revalidate_mapping(inode, filp->f_mapping); 267 if (!res) 268 res = generic_file_splice_read(filp, ppos, pipe, count, flags); 269 return res; 270 } 271 272 static int 273 nfs_file_mmap(struct file * file, struct vm_area_struct * vma) 274 { 275 struct dentry *dentry = file->f_path.dentry; 276 struct inode *inode = dentry->d_inode; 277 int status; 278 279 dfprintk(VFS, "nfs: mmap(%s/%s)\n", 280 dentry->d_parent->d_name.name, dentry->d_name.name); 281 282 status = nfs_revalidate_mapping(inode, file->f_mapping); 283 if (!status) { 284 vma->vm_ops = &nfs_file_vm_ops; 285 vma->vm_flags |= VM_CAN_NONLINEAR; 286 file_accessed(file); 287 } 288 return status; 289 } 290 291 /* 292 * Flush any dirty pages for this process, and check for write errors. 293 * The return status from this call provides a reliable indication of 294 * whether any write errors occurred for this process. 295 */ 296 static int 297 nfs_fsync(struct file *file, struct dentry *dentry, int datasync) 298 { 299 struct nfs_open_context *ctx = nfs_file_open_context(file); 300 struct inode *inode = dentry->d_inode; 301 302 dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); 303 304 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 305 return nfs_do_fsync(ctx, inode); 306 } 307 308 /* 309 * This does the "real" work of the write. We must allocate and lock the 310 * page to be sent back to the generic routine, which then copies the 311 * data from user space. 312 * 313 * If the writer ends up delaying the write, the writer needs to 314 * increment the page use counts until he is done with the page. 315 */ 316 static int nfs_write_begin(struct file *file, struct address_space *mapping, 317 loff_t pos, unsigned len, unsigned flags, 318 struct page **pagep, void **fsdata) 319 { 320 int ret; 321 pgoff_t index; 322 struct page *page; 323 index = pos >> PAGE_CACHE_SHIFT; 324 325 page = __grab_cache_page(mapping, index); 326 if (!page) 327 return -ENOMEM; 328 *pagep = page; 329 330 ret = nfs_flush_incompatible(file, page); 331 if (ret) { 332 unlock_page(page); 333 page_cache_release(page); 334 } 335 return ret; 336 } 337 338 static int nfs_write_end(struct file *file, struct address_space *mapping, 339 loff_t pos, unsigned len, unsigned copied, 340 struct page *page, void *fsdata) 341 { 342 unsigned offset = pos & (PAGE_CACHE_SIZE - 1); 343 int status; 344 345 lock_kernel(); 346 status = nfs_updatepage(file, page, offset, copied); 347 unlock_kernel(); 348 349 unlock_page(page); 350 page_cache_release(page); 351 352 return status < 0 ? status : copied; 353 } 354 355 static void nfs_invalidate_page(struct page *page, unsigned long offset) 356 { 357 if (offset != 0) 358 return; 359 /* Cancel any unstarted writes on this page */ 360 nfs_wb_page_cancel(page->mapping->host, page); 361 } 362 363 static int nfs_release_page(struct page *page, gfp_t gfp) 364 { 365 /* If PagePrivate() is set, then the page is not freeable */ 366 return 0; 367 } 368 369 static int nfs_launder_page(struct page *page) 370 { 371 return nfs_wb_page(page->mapping->host, page); 372 } 373 374 const struct address_space_operations nfs_file_aops = { 375 .readpage = nfs_readpage, 376 .readpages = nfs_readpages, 377 .set_page_dirty = __set_page_dirty_nobuffers, 378 .writepage = nfs_writepage, 379 .writepages = nfs_writepages, 380 .write_begin = nfs_write_begin, 381 .write_end = nfs_write_end, 382 .invalidatepage = nfs_invalidate_page, 383 .releasepage = nfs_release_page, 384 #ifdef CONFIG_NFS_DIRECTIO 385 .direct_IO = nfs_direct_IO, 386 #endif 387 .launder_page = nfs_launder_page, 388 }; 389 390 static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) 391 { 392 struct file *filp = vma->vm_file; 393 unsigned pagelen; 394 int ret = -EINVAL; 395 void *fsdata; 396 struct address_space *mapping; 397 loff_t offset; 398 399 lock_page(page); 400 mapping = page->mapping; 401 if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) { 402 unlock_page(page); 403 return -EINVAL; 404 } 405 pagelen = nfs_page_length(page); 406 offset = (loff_t)page->index << PAGE_CACHE_SHIFT; 407 unlock_page(page); 408 409 /* 410 * we can use mapping after releasing the page lock, because: 411 * we hold mmap_sem on the fault path, which should pin the vma 412 * which should pin the file, which pins the dentry which should 413 * hold a reference on inode. 414 */ 415 416 if (pagelen) { 417 struct page *page2 = NULL; 418 ret = nfs_write_begin(filp, mapping, offset, pagelen, 419 0, &page2, &fsdata); 420 if (!ret) 421 ret = nfs_write_end(filp, mapping, offset, pagelen, 422 pagelen, page2, fsdata); 423 } 424 return ret; 425 } 426 427 static struct vm_operations_struct nfs_file_vm_ops = { 428 .fault = filemap_fault, 429 .page_mkwrite = nfs_vm_page_mkwrite, 430 }; 431 432 static int nfs_need_sync_write(struct file *filp, struct inode *inode) 433 { 434 struct nfs_open_context *ctx; 435 436 if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) 437 return 1; 438 ctx = nfs_file_open_context(filp); 439 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) 440 return 1; 441 return 0; 442 } 443 444 static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, 445 unsigned long nr_segs, loff_t pos) 446 { 447 struct dentry * dentry = iocb->ki_filp->f_path.dentry; 448 struct inode * inode = dentry->d_inode; 449 ssize_t result; 450 size_t count = iov_length(iov, nr_segs); 451 452 #ifdef CONFIG_NFS_DIRECTIO 453 if (iocb->ki_filp->f_flags & O_DIRECT) 454 return nfs_file_direct_write(iocb, iov, nr_segs, pos); 455 #endif 456 457 dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", 458 dentry->d_parent->d_name.name, dentry->d_name.name, 459 inode->i_ino, (unsigned long) count, (long long) pos); 460 461 result = -EBUSY; 462 if (IS_SWAPFILE(inode)) 463 goto out_swapfile; 464 /* 465 * O_APPEND implies that we must revalidate the file length. 466 */ 467 if (iocb->ki_filp->f_flags & O_APPEND) { 468 result = nfs_revalidate_file_size(inode, iocb->ki_filp); 469 if (result) 470 goto out; 471 } 472 473 result = count; 474 if (!count) 475 goto out; 476 477 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); 478 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 479 /* Return error values for O_SYNC and IS_SYNC() */ 480 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { 481 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); 482 if (err < 0) 483 result = err; 484 } 485 out: 486 return result; 487 488 out_swapfile: 489 printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); 490 goto out; 491 } 492 493 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) 494 { 495 struct inode *inode = filp->f_mapping->host; 496 int status = 0; 497 498 lock_kernel(); 499 /* Try local locking first */ 500 posix_test_lock(filp, fl); 501 if (fl->fl_type != F_UNLCK) { 502 /* found a conflict */ 503 goto out; 504 } 505 506 if (nfs_have_delegation(inode, FMODE_READ)) 507 goto out_noconflict; 508 509 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) 510 goto out_noconflict; 511 512 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 513 out: 514 unlock_kernel(); 515 return status; 516 out_noconflict: 517 fl->fl_type = F_UNLCK; 518 goto out; 519 } 520 521 static int do_vfs_lock(struct file *file, struct file_lock *fl) 522 { 523 int res = 0; 524 switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { 525 case FL_POSIX: 526 res = posix_lock_file_wait(file, fl); 527 break; 528 case FL_FLOCK: 529 res = flock_lock_file_wait(file, fl); 530 break; 531 default: 532 BUG(); 533 } 534 if (res < 0) 535 dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager" 536 " - error %d!\n", 537 __FUNCTION__, res); 538 return res; 539 } 540 541 static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) 542 { 543 struct inode *inode = filp->f_mapping->host; 544 int status; 545 546 /* 547 * Flush all pending writes before doing anything 548 * with locks.. 549 */ 550 nfs_sync_mapping(filp->f_mapping); 551 552 /* NOTE: special case 553 * If we're signalled while cleaning up locks on process exit, we 554 * still need to complete the unlock. 555 */ 556 lock_kernel(); 557 /* Use local locking if mounted with "-onolock" */ 558 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 559 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 560 else 561 status = do_vfs_lock(filp, fl); 562 unlock_kernel(); 563 return status; 564 } 565 566 static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) 567 { 568 struct inode *inode = filp->f_mapping->host; 569 int status; 570 571 /* 572 * Flush all pending writes before doing anything 573 * with locks.. 574 */ 575 status = nfs_sync_mapping(filp->f_mapping); 576 if (status != 0) 577 goto out; 578 579 lock_kernel(); 580 /* Use local locking if mounted with "-onolock" */ 581 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) { 582 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 583 /* If we were signalled we still need to ensure that 584 * we clean up any state on the server. We therefore 585 * record the lock call as having succeeded in order to 586 * ensure that locks_remove_posix() cleans it out when 587 * the process exits. 588 */ 589 if (status == -EINTR || status == -ERESTARTSYS) 590 do_vfs_lock(filp, fl); 591 } else 592 status = do_vfs_lock(filp, fl); 593 unlock_kernel(); 594 if (status < 0) 595 goto out; 596 /* 597 * Make sure we clear the cache whenever we try to get the lock. 598 * This makes locking act as a cache coherency point. 599 */ 600 nfs_sync_mapping(filp->f_mapping); 601 nfs_zap_caches(inode); 602 out: 603 return status; 604 } 605 606 /* 607 * Lock a (portion of) a file 608 */ 609 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) 610 { 611 struct inode * inode = filp->f_mapping->host; 612 613 dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", 614 inode->i_sb->s_id, inode->i_ino, 615 fl->fl_type, fl->fl_flags, 616 (long long)fl->fl_start, (long long)fl->fl_end); 617 nfs_inc_stats(inode, NFSIOS_VFSLOCK); 618 619 /* No mandatory locks over NFS */ 620 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) 621 return -ENOLCK; 622 623 if (IS_GETLK(cmd)) 624 return do_getlk(filp, cmd, fl); 625 if (fl->fl_type == F_UNLCK) 626 return do_unlk(filp, cmd, fl); 627 return do_setlk(filp, cmd, fl); 628 } 629 630 /* 631 * Lock a (portion of) a file 632 */ 633 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 634 { 635 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", 636 filp->f_path.dentry->d_inode->i_sb->s_id, 637 filp->f_path.dentry->d_inode->i_ino, 638 fl->fl_type, fl->fl_flags); 639 640 /* 641 * No BSD flocks over NFS allowed. 642 * Note: we could try to fake a POSIX lock request here by 643 * using ((u32) filp | 0x80000000) or some such as the pid. 644 * Not sure whether that would be unique, though, or whether 645 * that would break in other places. 646 */ 647 if (!(fl->fl_flags & FL_FLOCK)) 648 return -ENOLCK; 649 650 /* We're simulating flock() locks using posix locks on the server */ 651 fl->fl_owner = (fl_owner_t)filp; 652 fl->fl_start = 0; 653 fl->fl_end = OFFSET_MAX; 654 655 if (fl->fl_type == F_UNLCK) 656 return do_unlk(filp, cmd, fl); 657 return do_setlk(filp, cmd, fl); 658 } 659 660 static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) 661 { 662 /* 663 * There is no protocol support for leases, so we have no way 664 * to implement them correctly in the face of opens by other 665 * clients. 666 */ 667 return -EINVAL; 668 } 669