1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/fsnotify.h> 13 #include <linux/security.h> 14 #include <linux/module.h> 15 #include <linux/syscalls.h> 16 #include <linux/pagemap.h> 17 #include <linux/splice.h> 18 #include "read_write.h" 19 20 #include <asm/uaccess.h> 21 #include <asm/unistd.h> 22 23 const struct file_operations generic_ro_fops = { 24 .llseek = generic_file_llseek, 25 .read = do_sync_read, 26 .aio_read = generic_file_aio_read, 27 .mmap = generic_file_readonly_mmap, 28 .splice_read = generic_file_splice_read, 29 }; 30 31 EXPORT_SYMBOL(generic_ro_fops); 32 33 static inline int unsigned_offsets(struct file *file) 34 { 35 return file->f_mode & FMODE_UNSIGNED_OFFSET; 36 } 37 38 /** 39 * generic_file_llseek_unlocked - lockless generic llseek implementation 40 * @file: file structure to seek on 41 * @offset: file offset to seek to 42 * @origin: type of seek 43 * 44 * Updates the file offset to the value specified by @offset and @origin. 45 * Locking must be provided by the caller. 46 */ 47 loff_t 48 generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) 49 { 50 struct inode *inode = file->f_mapping->host; 51 52 switch (origin) { 53 case SEEK_END: 54 offset += inode->i_size; 55 break; 56 case SEEK_CUR: 57 /* 58 * Here we special-case the lseek(fd, 0, SEEK_CUR) 59 * position-querying operation. Avoid rewriting the "same" 60 * f_pos value back to the file because a concurrent read(), 61 * write() or lseek() might have altered it 62 */ 63 if (offset == 0) 64 return file->f_pos; 65 offset += file->f_pos; 66 break; 67 case SEEK_DATA: 68 /* 69 * In the generic case the entire file is data, so as long as 70 * offset isn't at the end of the file then the offset is data. 71 */ 72 if (offset >= inode->i_size) 73 return -ENXIO; 74 break; 75 case SEEK_HOLE: 76 /* 77 * There is a virtual hole at the end of the file, so as long as 78 * offset isn't i_size or larger, return i_size. 79 */ 80 if (offset >= inode->i_size) 81 return -ENXIO; 82 offset = inode->i_size; 83 break; 84 } 85 86 if (offset < 0 && !unsigned_offsets(file)) 87 return -EINVAL; 88 if (offset > inode->i_sb->s_maxbytes) 89 return -EINVAL; 90 91 /* Special lock needed here? */ 92 if (offset != file->f_pos) { 93 file->f_pos = offset; 94 file->f_version = 0; 95 } 96 97 return offset; 98 } 99 EXPORT_SYMBOL(generic_file_llseek_unlocked); 100 101 /** 102 * generic_file_llseek - generic llseek implementation for regular files 103 * @file: file structure to seek on 104 * @offset: file offset to seek to 105 * @origin: type of seek 106 * 107 * This is a generic implemenation of ->llseek useable for all normal local 108 * filesystems. It just updates the file offset to the value specified by 109 * @offset and @origin under i_mutex. 110 */ 111 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 112 { 113 loff_t rval; 114 115 mutex_lock(&file->f_dentry->d_inode->i_mutex); 116 rval = generic_file_llseek_unlocked(file, offset, origin); 117 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 118 119 return rval; 120 } 121 EXPORT_SYMBOL(generic_file_llseek); 122 123 /** 124 * noop_llseek - No Operation Performed llseek implementation 125 * @file: file structure to seek on 126 * @offset: file offset to seek to 127 * @origin: type of seek 128 * 129 * This is an implementation of ->llseek useable for the rare special case when 130 * userspace expects the seek to succeed but the (device) file is actually not 131 * able to perform the seek. In this case you use noop_llseek() instead of 132 * falling back to the default implementation of ->llseek. 133 */ 134 loff_t noop_llseek(struct file *file, loff_t offset, int origin) 135 { 136 return file->f_pos; 137 } 138 EXPORT_SYMBOL(noop_llseek); 139 140 loff_t no_llseek(struct file *file, loff_t offset, int origin) 141 { 142 return -ESPIPE; 143 } 144 EXPORT_SYMBOL(no_llseek); 145 146 loff_t default_llseek(struct file *file, loff_t offset, int origin) 147 { 148 struct inode *inode = file->f_path.dentry->d_inode; 149 loff_t retval; 150 151 mutex_lock(&inode->i_mutex); 152 switch (origin) { 153 case SEEK_END: 154 offset += i_size_read(inode); 155 break; 156 case SEEK_CUR: 157 if (offset == 0) { 158 retval = file->f_pos; 159 goto out; 160 } 161 offset += file->f_pos; 162 break; 163 case SEEK_DATA: 164 /* 165 * In the generic case the entire file is data, so as 166 * long as offset isn't at the end of the file then the 167 * offset is data. 168 */ 169 if (offset >= inode->i_size) { 170 retval = -ENXIO; 171 goto out; 172 } 173 break; 174 case SEEK_HOLE: 175 /* 176 * There is a virtual hole at the end of the file, so 177 * as long as offset isn't i_size or larger, return 178 * i_size. 179 */ 180 if (offset >= inode->i_size) { 181 retval = -ENXIO; 182 goto out; 183 } 184 offset = inode->i_size; 185 break; 186 } 187 retval = -EINVAL; 188 if (offset >= 0 || unsigned_offsets(file)) { 189 if (offset != file->f_pos) { 190 file->f_pos = offset; 191 file->f_version = 0; 192 } 193 retval = offset; 194 } 195 out: 196 mutex_unlock(&inode->i_mutex); 197 return retval; 198 } 199 EXPORT_SYMBOL(default_llseek); 200 201 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 202 { 203 loff_t (*fn)(struct file *, loff_t, int); 204 205 fn = no_llseek; 206 if (file->f_mode & FMODE_LSEEK) { 207 if (file->f_op && file->f_op->llseek) 208 fn = file->f_op->llseek; 209 } 210 return fn(file, offset, origin); 211 } 212 EXPORT_SYMBOL(vfs_llseek); 213 214 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 215 { 216 off_t retval; 217 struct file * file; 218 int fput_needed; 219 220 retval = -EBADF; 221 file = fget_light(fd, &fput_needed); 222 if (!file) 223 goto bad; 224 225 retval = -EINVAL; 226 if (origin <= SEEK_MAX) { 227 loff_t res = vfs_llseek(file, offset, origin); 228 retval = res; 229 if (res != (loff_t)retval) 230 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 231 } 232 fput_light(file, fput_needed); 233 bad: 234 return retval; 235 } 236 237 #ifdef __ARCH_WANT_SYS_LLSEEK 238 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 239 unsigned long, offset_low, loff_t __user *, result, 240 unsigned int, origin) 241 { 242 int retval; 243 struct file * file; 244 loff_t offset; 245 int fput_needed; 246 247 retval = -EBADF; 248 file = fget_light(fd, &fput_needed); 249 if (!file) 250 goto bad; 251 252 retval = -EINVAL; 253 if (origin > SEEK_MAX) 254 goto out_putf; 255 256 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 257 origin); 258 259 retval = (int)offset; 260 if (offset >= 0) { 261 retval = -EFAULT; 262 if (!copy_to_user(result, &offset, sizeof(offset))) 263 retval = 0; 264 } 265 out_putf: 266 fput_light(file, fput_needed); 267 bad: 268 return retval; 269 } 270 #endif 271 272 273 /* 274 * rw_verify_area doesn't like huge counts. We limit 275 * them to something that fits in "int" so that others 276 * won't have to do range checks all the time. 277 */ 278 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 279 { 280 struct inode *inode; 281 loff_t pos; 282 int retval = -EINVAL; 283 284 inode = file->f_path.dentry->d_inode; 285 if (unlikely((ssize_t) count < 0)) 286 return retval; 287 pos = *ppos; 288 if (unlikely(pos < 0)) { 289 if (!unsigned_offsets(file)) 290 return retval; 291 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 292 return -EOVERFLOW; 293 } else if (unlikely((loff_t) (pos + count) < 0)) { 294 if (!unsigned_offsets(file)) 295 return retval; 296 } 297 298 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 299 retval = locks_mandatory_area( 300 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 301 inode, file, pos, count); 302 if (retval < 0) 303 return retval; 304 } 305 retval = security_file_permission(file, 306 read_write == READ ? MAY_READ : MAY_WRITE); 307 if (retval) 308 return retval; 309 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 310 } 311 312 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 313 { 314 set_current_state(TASK_UNINTERRUPTIBLE); 315 if (!kiocbIsKicked(iocb)) 316 schedule(); 317 else 318 kiocbClearKicked(iocb); 319 __set_current_state(TASK_RUNNING); 320 } 321 322 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 323 { 324 struct iovec iov = { .iov_base = buf, .iov_len = len }; 325 struct kiocb kiocb; 326 ssize_t ret; 327 328 init_sync_kiocb(&kiocb, filp); 329 kiocb.ki_pos = *ppos; 330 kiocb.ki_left = len; 331 kiocb.ki_nbytes = len; 332 333 for (;;) { 334 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 335 if (ret != -EIOCBRETRY) 336 break; 337 wait_on_retry_sync_kiocb(&kiocb); 338 } 339 340 if (-EIOCBQUEUED == ret) 341 ret = wait_on_sync_kiocb(&kiocb); 342 *ppos = kiocb.ki_pos; 343 return ret; 344 } 345 346 EXPORT_SYMBOL(do_sync_read); 347 348 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 349 { 350 ssize_t ret; 351 352 if (!(file->f_mode & FMODE_READ)) 353 return -EBADF; 354 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 355 return -EINVAL; 356 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 357 return -EFAULT; 358 359 ret = rw_verify_area(READ, file, pos, count); 360 if (ret >= 0) { 361 count = ret; 362 if (file->f_op->read) 363 ret = file->f_op->read(file, buf, count, pos); 364 else 365 ret = do_sync_read(file, buf, count, pos); 366 if (ret > 0) { 367 fsnotify_access(file); 368 add_rchar(current, ret); 369 } 370 inc_syscr(current); 371 } 372 373 return ret; 374 } 375 376 EXPORT_SYMBOL(vfs_read); 377 378 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 379 { 380 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 381 struct kiocb kiocb; 382 ssize_t ret; 383 384 init_sync_kiocb(&kiocb, filp); 385 kiocb.ki_pos = *ppos; 386 kiocb.ki_left = len; 387 kiocb.ki_nbytes = len; 388 389 for (;;) { 390 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 391 if (ret != -EIOCBRETRY) 392 break; 393 wait_on_retry_sync_kiocb(&kiocb); 394 } 395 396 if (-EIOCBQUEUED == ret) 397 ret = wait_on_sync_kiocb(&kiocb); 398 *ppos = kiocb.ki_pos; 399 return ret; 400 } 401 402 EXPORT_SYMBOL(do_sync_write); 403 404 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 405 { 406 ssize_t ret; 407 408 if (!(file->f_mode & FMODE_WRITE)) 409 return -EBADF; 410 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 411 return -EINVAL; 412 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 413 return -EFAULT; 414 415 ret = rw_verify_area(WRITE, file, pos, count); 416 if (ret >= 0) { 417 count = ret; 418 if (file->f_op->write) 419 ret = file->f_op->write(file, buf, count, pos); 420 else 421 ret = do_sync_write(file, buf, count, pos); 422 if (ret > 0) { 423 fsnotify_modify(file); 424 add_wchar(current, ret); 425 } 426 inc_syscw(current); 427 } 428 429 return ret; 430 } 431 432 EXPORT_SYMBOL(vfs_write); 433 434 static inline loff_t file_pos_read(struct file *file) 435 { 436 return file->f_pos; 437 } 438 439 static inline void file_pos_write(struct file *file, loff_t pos) 440 { 441 file->f_pos = pos; 442 } 443 444 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 445 { 446 struct file *file; 447 ssize_t ret = -EBADF; 448 int fput_needed; 449 450 file = fget_light(fd, &fput_needed); 451 if (file) { 452 loff_t pos = file_pos_read(file); 453 ret = vfs_read(file, buf, count, &pos); 454 file_pos_write(file, pos); 455 fput_light(file, fput_needed); 456 } 457 458 return ret; 459 } 460 461 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 462 size_t, count) 463 { 464 struct file *file; 465 ssize_t ret = -EBADF; 466 int fput_needed; 467 468 file = fget_light(fd, &fput_needed); 469 if (file) { 470 loff_t pos = file_pos_read(file); 471 ret = vfs_write(file, buf, count, &pos); 472 file_pos_write(file, pos); 473 fput_light(file, fput_needed); 474 } 475 476 return ret; 477 } 478 479 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 480 size_t count, loff_t pos) 481 { 482 struct file *file; 483 ssize_t ret = -EBADF; 484 int fput_needed; 485 486 if (pos < 0) 487 return -EINVAL; 488 489 file = fget_light(fd, &fput_needed); 490 if (file) { 491 ret = -ESPIPE; 492 if (file->f_mode & FMODE_PREAD) 493 ret = vfs_read(file, buf, count, &pos); 494 fput_light(file, fput_needed); 495 } 496 497 return ret; 498 } 499 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 500 asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) 501 { 502 return SYSC_pread64((unsigned int) fd, (char __user *) buf, 503 (size_t) count, pos); 504 } 505 SYSCALL_ALIAS(sys_pread64, SyS_pread64); 506 #endif 507 508 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 509 size_t count, loff_t pos) 510 { 511 struct file *file; 512 ssize_t ret = -EBADF; 513 int fput_needed; 514 515 if (pos < 0) 516 return -EINVAL; 517 518 file = fget_light(fd, &fput_needed); 519 if (file) { 520 ret = -ESPIPE; 521 if (file->f_mode & FMODE_PWRITE) 522 ret = vfs_write(file, buf, count, &pos); 523 fput_light(file, fput_needed); 524 } 525 526 return ret; 527 } 528 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 529 asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) 530 { 531 return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, 532 (size_t) count, pos); 533 } 534 SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); 535 #endif 536 537 /* 538 * Reduce an iovec's length in-place. Return the resulting number of segments 539 */ 540 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 541 { 542 unsigned long seg = 0; 543 size_t len = 0; 544 545 while (seg < nr_segs) { 546 seg++; 547 if (len + iov->iov_len >= to) { 548 iov->iov_len = to - len; 549 break; 550 } 551 len += iov->iov_len; 552 iov++; 553 } 554 return seg; 555 } 556 EXPORT_SYMBOL(iov_shorten); 557 558 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 559 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 560 { 561 struct kiocb kiocb; 562 ssize_t ret; 563 564 init_sync_kiocb(&kiocb, filp); 565 kiocb.ki_pos = *ppos; 566 kiocb.ki_left = len; 567 kiocb.ki_nbytes = len; 568 569 for (;;) { 570 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 571 if (ret != -EIOCBRETRY) 572 break; 573 wait_on_retry_sync_kiocb(&kiocb); 574 } 575 576 if (ret == -EIOCBQUEUED) 577 ret = wait_on_sync_kiocb(&kiocb); 578 *ppos = kiocb.ki_pos; 579 return ret; 580 } 581 582 /* Do it by hand, with file-ops */ 583 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 584 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 585 { 586 struct iovec *vector = iov; 587 ssize_t ret = 0; 588 589 while (nr_segs > 0) { 590 void __user *base; 591 size_t len; 592 ssize_t nr; 593 594 base = vector->iov_base; 595 len = vector->iov_len; 596 vector++; 597 nr_segs--; 598 599 nr = fn(filp, base, len, ppos); 600 601 if (nr < 0) { 602 if (!ret) 603 ret = nr; 604 break; 605 } 606 ret += nr; 607 if (nr != len) 608 break; 609 } 610 611 return ret; 612 } 613 614 /* A write operation does a read from user space and vice versa */ 615 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 616 617 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 618 unsigned long nr_segs, unsigned long fast_segs, 619 struct iovec *fast_pointer, 620 struct iovec **ret_pointer) 621 { 622 unsigned long seg; 623 ssize_t ret; 624 struct iovec *iov = fast_pointer; 625 626 /* 627 * SuS says "The readv() function *may* fail if the iovcnt argument 628 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 629 * traditionally returned zero for zero segments, so... 630 */ 631 if (nr_segs == 0) { 632 ret = 0; 633 goto out; 634 } 635 636 /* 637 * First get the "struct iovec" from user memory and 638 * verify all the pointers 639 */ 640 if (nr_segs > UIO_MAXIOV) { 641 ret = -EINVAL; 642 goto out; 643 } 644 if (nr_segs > fast_segs) { 645 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 646 if (iov == NULL) { 647 ret = -ENOMEM; 648 goto out; 649 } 650 } 651 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 652 ret = -EFAULT; 653 goto out; 654 } 655 656 /* 657 * According to the Single Unix Specification we should return EINVAL 658 * if an element length is < 0 when cast to ssize_t or if the 659 * total length would overflow the ssize_t return value of the 660 * system call. 661 * 662 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 663 * overflow case. 664 */ 665 ret = 0; 666 for (seg = 0; seg < nr_segs; seg++) { 667 void __user *buf = iov[seg].iov_base; 668 ssize_t len = (ssize_t)iov[seg].iov_len; 669 670 /* see if we we're about to use an invalid len or if 671 * it's about to overflow ssize_t */ 672 if (len < 0) { 673 ret = -EINVAL; 674 goto out; 675 } 676 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { 677 ret = -EFAULT; 678 goto out; 679 } 680 if (len > MAX_RW_COUNT - ret) { 681 len = MAX_RW_COUNT - ret; 682 iov[seg].iov_len = len; 683 } 684 ret += len; 685 } 686 out: 687 *ret_pointer = iov; 688 return ret; 689 } 690 691 static ssize_t do_readv_writev(int type, struct file *file, 692 const struct iovec __user * uvector, 693 unsigned long nr_segs, loff_t *pos) 694 { 695 size_t tot_len; 696 struct iovec iovstack[UIO_FASTIOV]; 697 struct iovec *iov = iovstack; 698 ssize_t ret; 699 io_fn_t fn; 700 iov_fn_t fnv; 701 702 if (!file->f_op) { 703 ret = -EINVAL; 704 goto out; 705 } 706 707 ret = rw_copy_check_uvector(type, uvector, nr_segs, 708 ARRAY_SIZE(iovstack), iovstack, &iov); 709 if (ret <= 0) 710 goto out; 711 712 tot_len = ret; 713 ret = rw_verify_area(type, file, pos, tot_len); 714 if (ret < 0) 715 goto out; 716 717 fnv = NULL; 718 if (type == READ) { 719 fn = file->f_op->read; 720 fnv = file->f_op->aio_read; 721 } else { 722 fn = (io_fn_t)file->f_op->write; 723 fnv = file->f_op->aio_write; 724 } 725 726 if (fnv) 727 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 728 pos, fnv); 729 else 730 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 731 732 out: 733 if (iov != iovstack) 734 kfree(iov); 735 if ((ret + (type == READ)) > 0) { 736 if (type == READ) 737 fsnotify_access(file); 738 else 739 fsnotify_modify(file); 740 } 741 return ret; 742 } 743 744 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 745 unsigned long vlen, loff_t *pos) 746 { 747 if (!(file->f_mode & FMODE_READ)) 748 return -EBADF; 749 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 750 return -EINVAL; 751 752 return do_readv_writev(READ, file, vec, vlen, pos); 753 } 754 755 EXPORT_SYMBOL(vfs_readv); 756 757 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 758 unsigned long vlen, loff_t *pos) 759 { 760 if (!(file->f_mode & FMODE_WRITE)) 761 return -EBADF; 762 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 763 return -EINVAL; 764 765 return do_readv_writev(WRITE, file, vec, vlen, pos); 766 } 767 768 EXPORT_SYMBOL(vfs_writev); 769 770 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 771 unsigned long, vlen) 772 { 773 struct file *file; 774 ssize_t ret = -EBADF; 775 int fput_needed; 776 777 file = fget_light(fd, &fput_needed); 778 if (file) { 779 loff_t pos = file_pos_read(file); 780 ret = vfs_readv(file, vec, vlen, &pos); 781 file_pos_write(file, pos); 782 fput_light(file, fput_needed); 783 } 784 785 if (ret > 0) 786 add_rchar(current, ret); 787 inc_syscr(current); 788 return ret; 789 } 790 791 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 792 unsigned long, vlen) 793 { 794 struct file *file; 795 ssize_t ret = -EBADF; 796 int fput_needed; 797 798 file = fget_light(fd, &fput_needed); 799 if (file) { 800 loff_t pos = file_pos_read(file); 801 ret = vfs_writev(file, vec, vlen, &pos); 802 file_pos_write(file, pos); 803 fput_light(file, fput_needed); 804 } 805 806 if (ret > 0) 807 add_wchar(current, ret); 808 inc_syscw(current); 809 return ret; 810 } 811 812 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 813 { 814 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 815 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 816 } 817 818 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 819 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 820 { 821 loff_t pos = pos_from_hilo(pos_h, pos_l); 822 struct file *file; 823 ssize_t ret = -EBADF; 824 int fput_needed; 825 826 if (pos < 0) 827 return -EINVAL; 828 829 file = fget_light(fd, &fput_needed); 830 if (file) { 831 ret = -ESPIPE; 832 if (file->f_mode & FMODE_PREAD) 833 ret = vfs_readv(file, vec, vlen, &pos); 834 fput_light(file, fput_needed); 835 } 836 837 if (ret > 0) 838 add_rchar(current, ret); 839 inc_syscr(current); 840 return ret; 841 } 842 843 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 844 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 845 { 846 loff_t pos = pos_from_hilo(pos_h, pos_l); 847 struct file *file; 848 ssize_t ret = -EBADF; 849 int fput_needed; 850 851 if (pos < 0) 852 return -EINVAL; 853 854 file = fget_light(fd, &fput_needed); 855 if (file) { 856 ret = -ESPIPE; 857 if (file->f_mode & FMODE_PWRITE) 858 ret = vfs_writev(file, vec, vlen, &pos); 859 fput_light(file, fput_needed); 860 } 861 862 if (ret > 0) 863 add_wchar(current, ret); 864 inc_syscw(current); 865 return ret; 866 } 867 868 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 869 size_t count, loff_t max) 870 { 871 struct file * in_file, * out_file; 872 struct inode * in_inode, * out_inode; 873 loff_t pos; 874 ssize_t retval; 875 int fput_needed_in, fput_needed_out, fl; 876 877 /* 878 * Get input file, and verify that it is ok.. 879 */ 880 retval = -EBADF; 881 in_file = fget_light(in_fd, &fput_needed_in); 882 if (!in_file) 883 goto out; 884 if (!(in_file->f_mode & FMODE_READ)) 885 goto fput_in; 886 retval = -ESPIPE; 887 if (!ppos) 888 ppos = &in_file->f_pos; 889 else 890 if (!(in_file->f_mode & FMODE_PREAD)) 891 goto fput_in; 892 retval = rw_verify_area(READ, in_file, ppos, count); 893 if (retval < 0) 894 goto fput_in; 895 count = retval; 896 897 /* 898 * Get output file, and verify that it is ok.. 899 */ 900 retval = -EBADF; 901 out_file = fget_light(out_fd, &fput_needed_out); 902 if (!out_file) 903 goto fput_in; 904 if (!(out_file->f_mode & FMODE_WRITE)) 905 goto fput_out; 906 retval = -EINVAL; 907 in_inode = in_file->f_path.dentry->d_inode; 908 out_inode = out_file->f_path.dentry->d_inode; 909 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 910 if (retval < 0) 911 goto fput_out; 912 count = retval; 913 914 if (!max) 915 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 916 917 pos = *ppos; 918 if (unlikely(pos + count > max)) { 919 retval = -EOVERFLOW; 920 if (pos >= max) 921 goto fput_out; 922 count = max - pos; 923 } 924 925 fl = 0; 926 #if 0 927 /* 928 * We need to debate whether we can enable this or not. The 929 * man page documents EAGAIN return for the output at least, 930 * and the application is arguably buggy if it doesn't expect 931 * EAGAIN on a non-blocking file descriptor. 932 */ 933 if (in_file->f_flags & O_NONBLOCK) 934 fl = SPLICE_F_NONBLOCK; 935 #endif 936 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 937 938 if (retval > 0) { 939 add_rchar(current, retval); 940 add_wchar(current, retval); 941 } 942 943 inc_syscr(current); 944 inc_syscw(current); 945 if (*ppos > max) 946 retval = -EOVERFLOW; 947 948 fput_out: 949 fput_light(out_file, fput_needed_out); 950 fput_in: 951 fput_light(in_file, fput_needed_in); 952 out: 953 return retval; 954 } 955 956 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 957 { 958 loff_t pos; 959 off_t off; 960 ssize_t ret; 961 962 if (offset) { 963 if (unlikely(get_user(off, offset))) 964 return -EFAULT; 965 pos = off; 966 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 967 if (unlikely(put_user(pos, offset))) 968 return -EFAULT; 969 return ret; 970 } 971 972 return do_sendfile(out_fd, in_fd, NULL, count, 0); 973 } 974 975 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 976 { 977 loff_t pos; 978 ssize_t ret; 979 980 if (offset) { 981 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 982 return -EFAULT; 983 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 984 if (unlikely(put_user(pos, offset))) 985 return -EFAULT; 986 return ret; 987 } 988 989 return do_sendfile(out_fd, in_fd, NULL, count, 0); 990 } 991