1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/fsnotify.h> 13 #include <linux/security.h> 14 #include <linux/module.h> 15 #include <linux/syscalls.h> 16 #include <linux/pagemap.h> 17 #include <linux/splice.h> 18 #include "read_write.h" 19 20 #include <asm/uaccess.h> 21 #include <asm/unistd.h> 22 23 const struct file_operations generic_ro_fops = { 24 .llseek = generic_file_llseek, 25 .read = do_sync_read, 26 .aio_read = generic_file_aio_read, 27 .mmap = generic_file_readonly_mmap, 28 .splice_read = generic_file_splice_read, 29 }; 30 31 EXPORT_SYMBOL(generic_ro_fops); 32 33 static inline int unsigned_offsets(struct file *file) 34 { 35 return file->f_mode & FMODE_UNSIGNED_OFFSET; 36 } 37 38 /** 39 * generic_file_llseek_unlocked - lockless generic llseek implementation 40 * @file: file structure to seek on 41 * @offset: file offset to seek to 42 * @origin: type of seek 43 * 44 * Updates the file offset to the value specified by @offset and @origin. 45 * Locking must be provided by the caller. 46 */ 47 loff_t 48 generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) 49 { 50 struct inode *inode = file->f_mapping->host; 51 52 switch (origin) { 53 case SEEK_END: 54 offset += inode->i_size; 55 break; 56 case SEEK_CUR: 57 /* 58 * Here we special-case the lseek(fd, 0, SEEK_CUR) 59 * position-querying operation. Avoid rewriting the "same" 60 * f_pos value back to the file because a concurrent read(), 61 * write() or lseek() might have altered it 62 */ 63 if (offset == 0) 64 return file->f_pos; 65 offset += file->f_pos; 66 break; 67 case SEEK_DATA: 68 /* 69 * In the generic case the entire file is data, so as long as 70 * offset isn't at the end of the file then the offset is data. 71 */ 72 if (offset >= inode->i_size) 73 return -ENXIO; 74 break; 75 case SEEK_HOLE: 76 /* 77 * There is a virtual hole at the end of the file, so as long as 78 * offset isn't i_size or larger, return i_size. 79 */ 80 if (offset >= inode->i_size) 81 return -ENXIO; 82 offset = inode->i_size; 83 break; 84 } 85 86 if (offset < 0 && !unsigned_offsets(file)) 87 return -EINVAL; 88 if (offset > inode->i_sb->s_maxbytes) 89 return -EINVAL; 90 91 /* Special lock needed here? */ 92 if (offset != file->f_pos) { 93 file->f_pos = offset; 94 file->f_version = 0; 95 } 96 97 return offset; 98 } 99 EXPORT_SYMBOL(generic_file_llseek_unlocked); 100 101 /** 102 * generic_file_llseek - generic llseek implementation for regular files 103 * @file: file structure to seek on 104 * @offset: file offset to seek to 105 * @origin: type of seek 106 * 107 * This is a generic implemenation of ->llseek useable for all normal local 108 * filesystems. It just updates the file offset to the value specified by 109 * @offset and @origin under i_mutex. 110 */ 111 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 112 { 113 loff_t rval; 114 115 mutex_lock(&file->f_dentry->d_inode->i_mutex); 116 rval = generic_file_llseek_unlocked(file, offset, origin); 117 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 118 119 return rval; 120 } 121 EXPORT_SYMBOL(generic_file_llseek); 122 123 /** 124 * noop_llseek - No Operation Performed llseek implementation 125 * @file: file structure to seek on 126 * @offset: file offset to seek to 127 * @origin: type of seek 128 * 129 * This is an implementation of ->llseek useable for the rare special case when 130 * userspace expects the seek to succeed but the (device) file is actually not 131 * able to perform the seek. In this case you use noop_llseek() instead of 132 * falling back to the default implementation of ->llseek. 133 */ 134 loff_t noop_llseek(struct file *file, loff_t offset, int origin) 135 { 136 return file->f_pos; 137 } 138 EXPORT_SYMBOL(noop_llseek); 139 140 loff_t no_llseek(struct file *file, loff_t offset, int origin) 141 { 142 return -ESPIPE; 143 } 144 EXPORT_SYMBOL(no_llseek); 145 146 loff_t default_llseek(struct file *file, loff_t offset, int origin) 147 { 148 struct inode *inode = file->f_path.dentry->d_inode; 149 loff_t retval; 150 151 mutex_lock(&inode->i_mutex); 152 switch (origin) { 153 case SEEK_END: 154 offset += i_size_read(inode); 155 break; 156 case SEEK_CUR: 157 if (offset == 0) { 158 retval = file->f_pos; 159 goto out; 160 } 161 offset += file->f_pos; 162 break; 163 case SEEK_DATA: 164 /* 165 * In the generic case the entire file is data, so as 166 * long as offset isn't at the end of the file then the 167 * offset is data. 168 */ 169 if (offset >= inode->i_size) 170 return -ENXIO; 171 break; 172 case SEEK_HOLE: 173 /* 174 * There is a virtual hole at the end of the file, so 175 * as long as offset isn't i_size or larger, return 176 * i_size. 177 */ 178 if (offset >= inode->i_size) 179 return -ENXIO; 180 offset = inode->i_size; 181 break; 182 } 183 retval = -EINVAL; 184 if (offset >= 0 || unsigned_offsets(file)) { 185 if (offset != file->f_pos) { 186 file->f_pos = offset; 187 file->f_version = 0; 188 } 189 retval = offset; 190 } 191 out: 192 mutex_unlock(&inode->i_mutex); 193 return retval; 194 } 195 EXPORT_SYMBOL(default_llseek); 196 197 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 198 { 199 loff_t (*fn)(struct file *, loff_t, int); 200 201 fn = no_llseek; 202 if (file->f_mode & FMODE_LSEEK) { 203 if (file->f_op && file->f_op->llseek) 204 fn = file->f_op->llseek; 205 } 206 return fn(file, offset, origin); 207 } 208 EXPORT_SYMBOL(vfs_llseek); 209 210 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 211 { 212 off_t retval; 213 struct file * file; 214 int fput_needed; 215 216 retval = -EBADF; 217 file = fget_light(fd, &fput_needed); 218 if (!file) 219 goto bad; 220 221 retval = -EINVAL; 222 if (origin <= SEEK_MAX) { 223 loff_t res = vfs_llseek(file, offset, origin); 224 retval = res; 225 if (res != (loff_t)retval) 226 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 227 } 228 fput_light(file, fput_needed); 229 bad: 230 return retval; 231 } 232 233 #ifdef __ARCH_WANT_SYS_LLSEEK 234 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 235 unsigned long, offset_low, loff_t __user *, result, 236 unsigned int, origin) 237 { 238 int retval; 239 struct file * file; 240 loff_t offset; 241 int fput_needed; 242 243 retval = -EBADF; 244 file = fget_light(fd, &fput_needed); 245 if (!file) 246 goto bad; 247 248 retval = -EINVAL; 249 if (origin > SEEK_MAX) 250 goto out_putf; 251 252 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 253 origin); 254 255 retval = (int)offset; 256 if (offset >= 0) { 257 retval = -EFAULT; 258 if (!copy_to_user(result, &offset, sizeof(offset))) 259 retval = 0; 260 } 261 out_putf: 262 fput_light(file, fput_needed); 263 bad: 264 return retval; 265 } 266 #endif 267 268 269 /* 270 * rw_verify_area doesn't like huge counts. We limit 271 * them to something that fits in "int" so that others 272 * won't have to do range checks all the time. 273 */ 274 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 275 { 276 struct inode *inode; 277 loff_t pos; 278 int retval = -EINVAL; 279 280 inode = file->f_path.dentry->d_inode; 281 if (unlikely((ssize_t) count < 0)) 282 return retval; 283 pos = *ppos; 284 if (unlikely(pos < 0)) { 285 if (!unsigned_offsets(file)) 286 return retval; 287 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 288 return -EOVERFLOW; 289 } else if (unlikely((loff_t) (pos + count) < 0)) { 290 if (!unsigned_offsets(file)) 291 return retval; 292 } 293 294 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 295 retval = locks_mandatory_area( 296 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 297 inode, file, pos, count); 298 if (retval < 0) 299 return retval; 300 } 301 retval = security_file_permission(file, 302 read_write == READ ? MAY_READ : MAY_WRITE); 303 if (retval) 304 return retval; 305 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 306 } 307 308 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 309 { 310 set_current_state(TASK_UNINTERRUPTIBLE); 311 if (!kiocbIsKicked(iocb)) 312 schedule(); 313 else 314 kiocbClearKicked(iocb); 315 __set_current_state(TASK_RUNNING); 316 } 317 318 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 319 { 320 struct iovec iov = { .iov_base = buf, .iov_len = len }; 321 struct kiocb kiocb; 322 ssize_t ret; 323 324 init_sync_kiocb(&kiocb, filp); 325 kiocb.ki_pos = *ppos; 326 kiocb.ki_left = len; 327 kiocb.ki_nbytes = len; 328 329 for (;;) { 330 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 331 if (ret != -EIOCBRETRY) 332 break; 333 wait_on_retry_sync_kiocb(&kiocb); 334 } 335 336 if (-EIOCBQUEUED == ret) 337 ret = wait_on_sync_kiocb(&kiocb); 338 *ppos = kiocb.ki_pos; 339 return ret; 340 } 341 342 EXPORT_SYMBOL(do_sync_read); 343 344 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 345 { 346 ssize_t ret; 347 348 if (!(file->f_mode & FMODE_READ)) 349 return -EBADF; 350 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 351 return -EINVAL; 352 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 353 return -EFAULT; 354 355 ret = rw_verify_area(READ, file, pos, count); 356 if (ret >= 0) { 357 count = ret; 358 if (file->f_op->read) 359 ret = file->f_op->read(file, buf, count, pos); 360 else 361 ret = do_sync_read(file, buf, count, pos); 362 if (ret > 0) { 363 fsnotify_access(file); 364 add_rchar(current, ret); 365 } 366 inc_syscr(current); 367 } 368 369 return ret; 370 } 371 372 EXPORT_SYMBOL(vfs_read); 373 374 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 375 { 376 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 377 struct kiocb kiocb; 378 ssize_t ret; 379 380 init_sync_kiocb(&kiocb, filp); 381 kiocb.ki_pos = *ppos; 382 kiocb.ki_left = len; 383 kiocb.ki_nbytes = len; 384 385 for (;;) { 386 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 387 if (ret != -EIOCBRETRY) 388 break; 389 wait_on_retry_sync_kiocb(&kiocb); 390 } 391 392 if (-EIOCBQUEUED == ret) 393 ret = wait_on_sync_kiocb(&kiocb); 394 *ppos = kiocb.ki_pos; 395 return ret; 396 } 397 398 EXPORT_SYMBOL(do_sync_write); 399 400 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 401 { 402 ssize_t ret; 403 404 if (!(file->f_mode & FMODE_WRITE)) 405 return -EBADF; 406 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 407 return -EINVAL; 408 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 409 return -EFAULT; 410 411 ret = rw_verify_area(WRITE, file, pos, count); 412 if (ret >= 0) { 413 count = ret; 414 if (file->f_op->write) 415 ret = file->f_op->write(file, buf, count, pos); 416 else 417 ret = do_sync_write(file, buf, count, pos); 418 if (ret > 0) { 419 fsnotify_modify(file); 420 add_wchar(current, ret); 421 } 422 inc_syscw(current); 423 } 424 425 return ret; 426 } 427 428 EXPORT_SYMBOL(vfs_write); 429 430 static inline loff_t file_pos_read(struct file *file) 431 { 432 return file->f_pos; 433 } 434 435 static inline void file_pos_write(struct file *file, loff_t pos) 436 { 437 file->f_pos = pos; 438 } 439 440 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 441 { 442 struct file *file; 443 ssize_t ret = -EBADF; 444 int fput_needed; 445 446 file = fget_light(fd, &fput_needed); 447 if (file) { 448 loff_t pos = file_pos_read(file); 449 ret = vfs_read(file, buf, count, &pos); 450 file_pos_write(file, pos); 451 fput_light(file, fput_needed); 452 } 453 454 return ret; 455 } 456 457 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 458 size_t, count) 459 { 460 struct file *file; 461 ssize_t ret = -EBADF; 462 int fput_needed; 463 464 file = fget_light(fd, &fput_needed); 465 if (file) { 466 loff_t pos = file_pos_read(file); 467 ret = vfs_write(file, buf, count, &pos); 468 file_pos_write(file, pos); 469 fput_light(file, fput_needed); 470 } 471 472 return ret; 473 } 474 475 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 476 size_t count, loff_t pos) 477 { 478 struct file *file; 479 ssize_t ret = -EBADF; 480 int fput_needed; 481 482 if (pos < 0) 483 return -EINVAL; 484 485 file = fget_light(fd, &fput_needed); 486 if (file) { 487 ret = -ESPIPE; 488 if (file->f_mode & FMODE_PREAD) 489 ret = vfs_read(file, buf, count, &pos); 490 fput_light(file, fput_needed); 491 } 492 493 return ret; 494 } 495 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 496 asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) 497 { 498 return SYSC_pread64((unsigned int) fd, (char __user *) buf, 499 (size_t) count, pos); 500 } 501 SYSCALL_ALIAS(sys_pread64, SyS_pread64); 502 #endif 503 504 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 505 size_t count, loff_t pos) 506 { 507 struct file *file; 508 ssize_t ret = -EBADF; 509 int fput_needed; 510 511 if (pos < 0) 512 return -EINVAL; 513 514 file = fget_light(fd, &fput_needed); 515 if (file) { 516 ret = -ESPIPE; 517 if (file->f_mode & FMODE_PWRITE) 518 ret = vfs_write(file, buf, count, &pos); 519 fput_light(file, fput_needed); 520 } 521 522 return ret; 523 } 524 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 525 asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) 526 { 527 return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, 528 (size_t) count, pos); 529 } 530 SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); 531 #endif 532 533 /* 534 * Reduce an iovec's length in-place. Return the resulting number of segments 535 */ 536 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 537 { 538 unsigned long seg = 0; 539 size_t len = 0; 540 541 while (seg < nr_segs) { 542 seg++; 543 if (len + iov->iov_len >= to) { 544 iov->iov_len = to - len; 545 break; 546 } 547 len += iov->iov_len; 548 iov++; 549 } 550 return seg; 551 } 552 EXPORT_SYMBOL(iov_shorten); 553 554 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 555 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 556 { 557 struct kiocb kiocb; 558 ssize_t ret; 559 560 init_sync_kiocb(&kiocb, filp); 561 kiocb.ki_pos = *ppos; 562 kiocb.ki_left = len; 563 kiocb.ki_nbytes = len; 564 565 for (;;) { 566 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 567 if (ret != -EIOCBRETRY) 568 break; 569 wait_on_retry_sync_kiocb(&kiocb); 570 } 571 572 if (ret == -EIOCBQUEUED) 573 ret = wait_on_sync_kiocb(&kiocb); 574 *ppos = kiocb.ki_pos; 575 return ret; 576 } 577 578 /* Do it by hand, with file-ops */ 579 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 580 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 581 { 582 struct iovec *vector = iov; 583 ssize_t ret = 0; 584 585 while (nr_segs > 0) { 586 void __user *base; 587 size_t len; 588 ssize_t nr; 589 590 base = vector->iov_base; 591 len = vector->iov_len; 592 vector++; 593 nr_segs--; 594 595 nr = fn(filp, base, len, ppos); 596 597 if (nr < 0) { 598 if (!ret) 599 ret = nr; 600 break; 601 } 602 ret += nr; 603 if (nr != len) 604 break; 605 } 606 607 return ret; 608 } 609 610 /* A write operation does a read from user space and vice versa */ 611 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 612 613 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 614 unsigned long nr_segs, unsigned long fast_segs, 615 struct iovec *fast_pointer, 616 struct iovec **ret_pointer) 617 { 618 unsigned long seg; 619 ssize_t ret; 620 struct iovec *iov = fast_pointer; 621 622 /* 623 * SuS says "The readv() function *may* fail if the iovcnt argument 624 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 625 * traditionally returned zero for zero segments, so... 626 */ 627 if (nr_segs == 0) { 628 ret = 0; 629 goto out; 630 } 631 632 /* 633 * First get the "struct iovec" from user memory and 634 * verify all the pointers 635 */ 636 if (nr_segs > UIO_MAXIOV) { 637 ret = -EINVAL; 638 goto out; 639 } 640 if (nr_segs > fast_segs) { 641 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 642 if (iov == NULL) { 643 ret = -ENOMEM; 644 goto out; 645 } 646 } 647 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 648 ret = -EFAULT; 649 goto out; 650 } 651 652 /* 653 * According to the Single Unix Specification we should return EINVAL 654 * if an element length is < 0 when cast to ssize_t or if the 655 * total length would overflow the ssize_t return value of the 656 * system call. 657 * 658 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 659 * overflow case. 660 */ 661 ret = 0; 662 for (seg = 0; seg < nr_segs; seg++) { 663 void __user *buf = iov[seg].iov_base; 664 ssize_t len = (ssize_t)iov[seg].iov_len; 665 666 /* see if we we're about to use an invalid len or if 667 * it's about to overflow ssize_t */ 668 if (len < 0) { 669 ret = -EINVAL; 670 goto out; 671 } 672 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { 673 ret = -EFAULT; 674 goto out; 675 } 676 if (len > MAX_RW_COUNT - ret) { 677 len = MAX_RW_COUNT - ret; 678 iov[seg].iov_len = len; 679 } 680 ret += len; 681 } 682 out: 683 *ret_pointer = iov; 684 return ret; 685 } 686 687 static ssize_t do_readv_writev(int type, struct file *file, 688 const struct iovec __user * uvector, 689 unsigned long nr_segs, loff_t *pos) 690 { 691 size_t tot_len; 692 struct iovec iovstack[UIO_FASTIOV]; 693 struct iovec *iov = iovstack; 694 ssize_t ret; 695 io_fn_t fn; 696 iov_fn_t fnv; 697 698 if (!file->f_op) { 699 ret = -EINVAL; 700 goto out; 701 } 702 703 ret = rw_copy_check_uvector(type, uvector, nr_segs, 704 ARRAY_SIZE(iovstack), iovstack, &iov); 705 if (ret <= 0) 706 goto out; 707 708 tot_len = ret; 709 ret = rw_verify_area(type, file, pos, tot_len); 710 if (ret < 0) 711 goto out; 712 713 fnv = NULL; 714 if (type == READ) { 715 fn = file->f_op->read; 716 fnv = file->f_op->aio_read; 717 } else { 718 fn = (io_fn_t)file->f_op->write; 719 fnv = file->f_op->aio_write; 720 } 721 722 if (fnv) 723 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 724 pos, fnv); 725 else 726 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 727 728 out: 729 if (iov != iovstack) 730 kfree(iov); 731 if ((ret + (type == READ)) > 0) { 732 if (type == READ) 733 fsnotify_access(file); 734 else 735 fsnotify_modify(file); 736 } 737 return ret; 738 } 739 740 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 741 unsigned long vlen, loff_t *pos) 742 { 743 if (!(file->f_mode & FMODE_READ)) 744 return -EBADF; 745 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 746 return -EINVAL; 747 748 return do_readv_writev(READ, file, vec, vlen, pos); 749 } 750 751 EXPORT_SYMBOL(vfs_readv); 752 753 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 754 unsigned long vlen, loff_t *pos) 755 { 756 if (!(file->f_mode & FMODE_WRITE)) 757 return -EBADF; 758 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 759 return -EINVAL; 760 761 return do_readv_writev(WRITE, file, vec, vlen, pos); 762 } 763 764 EXPORT_SYMBOL(vfs_writev); 765 766 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 767 unsigned long, vlen) 768 { 769 struct file *file; 770 ssize_t ret = -EBADF; 771 int fput_needed; 772 773 file = fget_light(fd, &fput_needed); 774 if (file) { 775 loff_t pos = file_pos_read(file); 776 ret = vfs_readv(file, vec, vlen, &pos); 777 file_pos_write(file, pos); 778 fput_light(file, fput_needed); 779 } 780 781 if (ret > 0) 782 add_rchar(current, ret); 783 inc_syscr(current); 784 return ret; 785 } 786 787 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 788 unsigned long, vlen) 789 { 790 struct file *file; 791 ssize_t ret = -EBADF; 792 int fput_needed; 793 794 file = fget_light(fd, &fput_needed); 795 if (file) { 796 loff_t pos = file_pos_read(file); 797 ret = vfs_writev(file, vec, vlen, &pos); 798 file_pos_write(file, pos); 799 fput_light(file, fput_needed); 800 } 801 802 if (ret > 0) 803 add_wchar(current, ret); 804 inc_syscw(current); 805 return ret; 806 } 807 808 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 809 { 810 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 811 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 812 } 813 814 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 815 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 816 { 817 loff_t pos = pos_from_hilo(pos_h, pos_l); 818 struct file *file; 819 ssize_t ret = -EBADF; 820 int fput_needed; 821 822 if (pos < 0) 823 return -EINVAL; 824 825 file = fget_light(fd, &fput_needed); 826 if (file) { 827 ret = -ESPIPE; 828 if (file->f_mode & FMODE_PREAD) 829 ret = vfs_readv(file, vec, vlen, &pos); 830 fput_light(file, fput_needed); 831 } 832 833 if (ret > 0) 834 add_rchar(current, ret); 835 inc_syscr(current); 836 return ret; 837 } 838 839 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 840 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 841 { 842 loff_t pos = pos_from_hilo(pos_h, pos_l); 843 struct file *file; 844 ssize_t ret = -EBADF; 845 int fput_needed; 846 847 if (pos < 0) 848 return -EINVAL; 849 850 file = fget_light(fd, &fput_needed); 851 if (file) { 852 ret = -ESPIPE; 853 if (file->f_mode & FMODE_PWRITE) 854 ret = vfs_writev(file, vec, vlen, &pos); 855 fput_light(file, fput_needed); 856 } 857 858 if (ret > 0) 859 add_wchar(current, ret); 860 inc_syscw(current); 861 return ret; 862 } 863 864 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 865 size_t count, loff_t max) 866 { 867 struct file * in_file, * out_file; 868 struct inode * in_inode, * out_inode; 869 loff_t pos; 870 ssize_t retval; 871 int fput_needed_in, fput_needed_out, fl; 872 873 /* 874 * Get input file, and verify that it is ok.. 875 */ 876 retval = -EBADF; 877 in_file = fget_light(in_fd, &fput_needed_in); 878 if (!in_file) 879 goto out; 880 if (!(in_file->f_mode & FMODE_READ)) 881 goto fput_in; 882 retval = -ESPIPE; 883 if (!ppos) 884 ppos = &in_file->f_pos; 885 else 886 if (!(in_file->f_mode & FMODE_PREAD)) 887 goto fput_in; 888 retval = rw_verify_area(READ, in_file, ppos, count); 889 if (retval < 0) 890 goto fput_in; 891 count = retval; 892 893 /* 894 * Get output file, and verify that it is ok.. 895 */ 896 retval = -EBADF; 897 out_file = fget_light(out_fd, &fput_needed_out); 898 if (!out_file) 899 goto fput_in; 900 if (!(out_file->f_mode & FMODE_WRITE)) 901 goto fput_out; 902 retval = -EINVAL; 903 in_inode = in_file->f_path.dentry->d_inode; 904 out_inode = out_file->f_path.dentry->d_inode; 905 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 906 if (retval < 0) 907 goto fput_out; 908 count = retval; 909 910 if (!max) 911 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 912 913 pos = *ppos; 914 if (unlikely(pos + count > max)) { 915 retval = -EOVERFLOW; 916 if (pos >= max) 917 goto fput_out; 918 count = max - pos; 919 } 920 921 fl = 0; 922 #if 0 923 /* 924 * We need to debate whether we can enable this or not. The 925 * man page documents EAGAIN return for the output at least, 926 * and the application is arguably buggy if it doesn't expect 927 * EAGAIN on a non-blocking file descriptor. 928 */ 929 if (in_file->f_flags & O_NONBLOCK) 930 fl = SPLICE_F_NONBLOCK; 931 #endif 932 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 933 934 if (retval > 0) { 935 add_rchar(current, retval); 936 add_wchar(current, retval); 937 } 938 939 inc_syscr(current); 940 inc_syscw(current); 941 if (*ppos > max) 942 retval = -EOVERFLOW; 943 944 fput_out: 945 fput_light(out_file, fput_needed_out); 946 fput_in: 947 fput_light(in_file, fput_needed_in); 948 out: 949 return retval; 950 } 951 952 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 953 { 954 loff_t pos; 955 off_t off; 956 ssize_t ret; 957 958 if (offset) { 959 if (unlikely(get_user(off, offset))) 960 return -EFAULT; 961 pos = off; 962 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 963 if (unlikely(put_user(pos, offset))) 964 return -EFAULT; 965 return ret; 966 } 967 968 return do_sendfile(out_fd, in_fd, NULL, count, 0); 969 } 970 971 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 972 { 973 loff_t pos; 974 ssize_t ret; 975 976 if (offset) { 977 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 978 return -EFAULT; 979 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 980 if (unlikely(put_user(pos, offset))) 981 return -EFAULT; 982 return ret; 983 } 984 985 return do_sendfile(out_fd, in_fd, NULL, count, 0); 986 } 987