1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/fsnotify.h> 13 #include <linux/security.h> 14 #include <linux/export.h> 15 #include <linux/syscalls.h> 16 #include <linux/pagemap.h> 17 #include <linux/splice.h> 18 #include "read_write.h" 19 20 #include <asm/uaccess.h> 21 #include <asm/unistd.h> 22 23 const struct file_operations generic_ro_fops = { 24 .llseek = generic_file_llseek, 25 .read = do_sync_read, 26 .aio_read = generic_file_aio_read, 27 .mmap = generic_file_readonly_mmap, 28 .splice_read = generic_file_splice_read, 29 }; 30 31 EXPORT_SYMBOL(generic_ro_fops); 32 33 static inline int unsigned_offsets(struct file *file) 34 { 35 return file->f_mode & FMODE_UNSIGNED_OFFSET; 36 } 37 38 static loff_t lseek_execute(struct file *file, struct inode *inode, 39 loff_t offset, loff_t maxsize) 40 { 41 if (offset < 0 && !unsigned_offsets(file)) 42 return -EINVAL; 43 if (offset > maxsize) 44 return -EINVAL; 45 46 if (offset != file->f_pos) { 47 file->f_pos = offset; 48 file->f_version = 0; 49 } 50 return offset; 51 } 52 53 /** 54 * generic_file_llseek_size - generic llseek implementation for regular files 55 * @file: file structure to seek on 56 * @offset: file offset to seek to 57 * @origin: type of seek 58 * @size: max size of file system 59 * 60 * This is a variant of generic_file_llseek that allows passing in a custom 61 * file size. 62 * 63 * Synchronization: 64 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 65 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 66 * read/writes behave like SEEK_SET against seeks. 67 */ 68 loff_t 69 generic_file_llseek_size(struct file *file, loff_t offset, int origin, 70 loff_t maxsize) 71 { 72 struct inode *inode = file->f_mapping->host; 73 74 switch (origin) { 75 case SEEK_END: 76 offset += i_size_read(inode); 77 break; 78 case SEEK_CUR: 79 /* 80 * Here we special-case the lseek(fd, 0, SEEK_CUR) 81 * position-querying operation. Avoid rewriting the "same" 82 * f_pos value back to the file because a concurrent read(), 83 * write() or lseek() might have altered it 84 */ 85 if (offset == 0) 86 return file->f_pos; 87 /* 88 * f_lock protects against read/modify/write race with other 89 * SEEK_CURs. Note that parallel writes and reads behave 90 * like SEEK_SET. 91 */ 92 spin_lock(&file->f_lock); 93 offset = lseek_execute(file, inode, file->f_pos + offset, 94 maxsize); 95 spin_unlock(&file->f_lock); 96 return offset; 97 case SEEK_DATA: 98 /* 99 * In the generic case the entire file is data, so as long as 100 * offset isn't at the end of the file then the offset is data. 101 */ 102 if (offset >= i_size_read(inode)) 103 return -ENXIO; 104 break; 105 case SEEK_HOLE: 106 /* 107 * There is a virtual hole at the end of the file, so as long as 108 * offset isn't i_size or larger, return i_size. 109 */ 110 if (offset >= i_size_read(inode)) 111 return -ENXIO; 112 offset = i_size_read(inode); 113 break; 114 } 115 116 return lseek_execute(file, inode, offset, maxsize); 117 } 118 EXPORT_SYMBOL(generic_file_llseek_size); 119 120 /** 121 * generic_file_llseek - generic llseek implementation for regular files 122 * @file: file structure to seek on 123 * @offset: file offset to seek to 124 * @origin: type of seek 125 * 126 * This is a generic implemenation of ->llseek useable for all normal local 127 * filesystems. It just updates the file offset to the value specified by 128 * @offset and @origin under i_mutex. 129 */ 130 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 131 { 132 struct inode *inode = file->f_mapping->host; 133 134 return generic_file_llseek_size(file, offset, origin, 135 inode->i_sb->s_maxbytes); 136 } 137 EXPORT_SYMBOL(generic_file_llseek); 138 139 /** 140 * noop_llseek - No Operation Performed llseek implementation 141 * @file: file structure to seek on 142 * @offset: file offset to seek to 143 * @origin: type of seek 144 * 145 * This is an implementation of ->llseek useable for the rare special case when 146 * userspace expects the seek to succeed but the (device) file is actually not 147 * able to perform the seek. In this case you use noop_llseek() instead of 148 * falling back to the default implementation of ->llseek. 149 */ 150 loff_t noop_llseek(struct file *file, loff_t offset, int origin) 151 { 152 return file->f_pos; 153 } 154 EXPORT_SYMBOL(noop_llseek); 155 156 loff_t no_llseek(struct file *file, loff_t offset, int origin) 157 { 158 return -ESPIPE; 159 } 160 EXPORT_SYMBOL(no_llseek); 161 162 loff_t default_llseek(struct file *file, loff_t offset, int origin) 163 { 164 struct inode *inode = file->f_path.dentry->d_inode; 165 loff_t retval; 166 167 mutex_lock(&inode->i_mutex); 168 switch (origin) { 169 case SEEK_END: 170 offset += i_size_read(inode); 171 break; 172 case SEEK_CUR: 173 if (offset == 0) { 174 retval = file->f_pos; 175 goto out; 176 } 177 offset += file->f_pos; 178 break; 179 case SEEK_DATA: 180 /* 181 * In the generic case the entire file is data, so as 182 * long as offset isn't at the end of the file then the 183 * offset is data. 184 */ 185 if (offset >= inode->i_size) { 186 retval = -ENXIO; 187 goto out; 188 } 189 break; 190 case SEEK_HOLE: 191 /* 192 * There is a virtual hole at the end of the file, so 193 * as long as offset isn't i_size or larger, return 194 * i_size. 195 */ 196 if (offset >= inode->i_size) { 197 retval = -ENXIO; 198 goto out; 199 } 200 offset = inode->i_size; 201 break; 202 } 203 retval = -EINVAL; 204 if (offset >= 0 || unsigned_offsets(file)) { 205 if (offset != file->f_pos) { 206 file->f_pos = offset; 207 file->f_version = 0; 208 } 209 retval = offset; 210 } 211 out: 212 mutex_unlock(&inode->i_mutex); 213 return retval; 214 } 215 EXPORT_SYMBOL(default_llseek); 216 217 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 218 { 219 loff_t (*fn)(struct file *, loff_t, int); 220 221 fn = no_llseek; 222 if (file->f_mode & FMODE_LSEEK) { 223 if (file->f_op && file->f_op->llseek) 224 fn = file->f_op->llseek; 225 } 226 return fn(file, offset, origin); 227 } 228 EXPORT_SYMBOL(vfs_llseek); 229 230 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 231 { 232 off_t retval; 233 struct file * file; 234 int fput_needed; 235 236 retval = -EBADF; 237 file = fget_light(fd, &fput_needed); 238 if (!file) 239 goto bad; 240 241 retval = -EINVAL; 242 if (origin <= SEEK_MAX) { 243 loff_t res = vfs_llseek(file, offset, origin); 244 retval = res; 245 if (res != (loff_t)retval) 246 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 247 } 248 fput_light(file, fput_needed); 249 bad: 250 return retval; 251 } 252 253 #ifdef __ARCH_WANT_SYS_LLSEEK 254 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 255 unsigned long, offset_low, loff_t __user *, result, 256 unsigned int, origin) 257 { 258 int retval; 259 struct file * file; 260 loff_t offset; 261 int fput_needed; 262 263 retval = -EBADF; 264 file = fget_light(fd, &fput_needed); 265 if (!file) 266 goto bad; 267 268 retval = -EINVAL; 269 if (origin > SEEK_MAX) 270 goto out_putf; 271 272 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 273 origin); 274 275 retval = (int)offset; 276 if (offset >= 0) { 277 retval = -EFAULT; 278 if (!copy_to_user(result, &offset, sizeof(offset))) 279 retval = 0; 280 } 281 out_putf: 282 fput_light(file, fput_needed); 283 bad: 284 return retval; 285 } 286 #endif 287 288 289 /* 290 * rw_verify_area doesn't like huge counts. We limit 291 * them to something that fits in "int" so that others 292 * won't have to do range checks all the time. 293 */ 294 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 295 { 296 struct inode *inode; 297 loff_t pos; 298 int retval = -EINVAL; 299 300 inode = file->f_path.dentry->d_inode; 301 if (unlikely((ssize_t) count < 0)) 302 return retval; 303 pos = *ppos; 304 if (unlikely(pos < 0)) { 305 if (!unsigned_offsets(file)) 306 return retval; 307 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 308 return -EOVERFLOW; 309 } else if (unlikely((loff_t) (pos + count) < 0)) { 310 if (!unsigned_offsets(file)) 311 return retval; 312 } 313 314 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 315 retval = locks_mandatory_area( 316 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 317 inode, file, pos, count); 318 if (retval < 0) 319 return retval; 320 } 321 retval = security_file_permission(file, 322 read_write == READ ? MAY_READ : MAY_WRITE); 323 if (retval) 324 return retval; 325 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 326 } 327 328 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 329 { 330 set_current_state(TASK_UNINTERRUPTIBLE); 331 if (!kiocbIsKicked(iocb)) 332 schedule(); 333 else 334 kiocbClearKicked(iocb); 335 __set_current_state(TASK_RUNNING); 336 } 337 338 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 339 { 340 struct iovec iov = { .iov_base = buf, .iov_len = len }; 341 struct kiocb kiocb; 342 ssize_t ret; 343 344 init_sync_kiocb(&kiocb, filp); 345 kiocb.ki_pos = *ppos; 346 kiocb.ki_left = len; 347 kiocb.ki_nbytes = len; 348 349 for (;;) { 350 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 351 if (ret != -EIOCBRETRY) 352 break; 353 wait_on_retry_sync_kiocb(&kiocb); 354 } 355 356 if (-EIOCBQUEUED == ret) 357 ret = wait_on_sync_kiocb(&kiocb); 358 *ppos = kiocb.ki_pos; 359 return ret; 360 } 361 362 EXPORT_SYMBOL(do_sync_read); 363 364 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 365 { 366 ssize_t ret; 367 368 if (!(file->f_mode & FMODE_READ)) 369 return -EBADF; 370 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 371 return -EINVAL; 372 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 373 return -EFAULT; 374 375 ret = rw_verify_area(READ, file, pos, count); 376 if (ret >= 0) { 377 count = ret; 378 if (file->f_op->read) 379 ret = file->f_op->read(file, buf, count, pos); 380 else 381 ret = do_sync_read(file, buf, count, pos); 382 if (ret > 0) { 383 fsnotify_access(file); 384 add_rchar(current, ret); 385 } 386 inc_syscr(current); 387 } 388 389 return ret; 390 } 391 392 EXPORT_SYMBOL(vfs_read); 393 394 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 395 { 396 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 397 struct kiocb kiocb; 398 ssize_t ret; 399 400 init_sync_kiocb(&kiocb, filp); 401 kiocb.ki_pos = *ppos; 402 kiocb.ki_left = len; 403 kiocb.ki_nbytes = len; 404 405 for (;;) { 406 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 407 if (ret != -EIOCBRETRY) 408 break; 409 wait_on_retry_sync_kiocb(&kiocb); 410 } 411 412 if (-EIOCBQUEUED == ret) 413 ret = wait_on_sync_kiocb(&kiocb); 414 *ppos = kiocb.ki_pos; 415 return ret; 416 } 417 418 EXPORT_SYMBOL(do_sync_write); 419 420 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 421 { 422 ssize_t ret; 423 424 if (!(file->f_mode & FMODE_WRITE)) 425 return -EBADF; 426 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 427 return -EINVAL; 428 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 429 return -EFAULT; 430 431 ret = rw_verify_area(WRITE, file, pos, count); 432 if (ret >= 0) { 433 count = ret; 434 if (file->f_op->write) 435 ret = file->f_op->write(file, buf, count, pos); 436 else 437 ret = do_sync_write(file, buf, count, pos); 438 if (ret > 0) { 439 fsnotify_modify(file); 440 add_wchar(current, ret); 441 } 442 inc_syscw(current); 443 } 444 445 return ret; 446 } 447 448 EXPORT_SYMBOL(vfs_write); 449 450 static inline loff_t file_pos_read(struct file *file) 451 { 452 return file->f_pos; 453 } 454 455 static inline void file_pos_write(struct file *file, loff_t pos) 456 { 457 file->f_pos = pos; 458 } 459 460 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 461 { 462 struct file *file; 463 ssize_t ret = -EBADF; 464 int fput_needed; 465 466 file = fget_light(fd, &fput_needed); 467 if (file) { 468 loff_t pos = file_pos_read(file); 469 ret = vfs_read(file, buf, count, &pos); 470 file_pos_write(file, pos); 471 fput_light(file, fput_needed); 472 } 473 474 return ret; 475 } 476 477 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 478 size_t, count) 479 { 480 struct file *file; 481 ssize_t ret = -EBADF; 482 int fput_needed; 483 484 file = fget_light(fd, &fput_needed); 485 if (file) { 486 loff_t pos = file_pos_read(file); 487 ret = vfs_write(file, buf, count, &pos); 488 file_pos_write(file, pos); 489 fput_light(file, fput_needed); 490 } 491 492 return ret; 493 } 494 495 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 496 size_t count, loff_t pos) 497 { 498 struct file *file; 499 ssize_t ret = -EBADF; 500 int fput_needed; 501 502 if (pos < 0) 503 return -EINVAL; 504 505 file = fget_light(fd, &fput_needed); 506 if (file) { 507 ret = -ESPIPE; 508 if (file->f_mode & FMODE_PREAD) 509 ret = vfs_read(file, buf, count, &pos); 510 fput_light(file, fput_needed); 511 } 512 513 return ret; 514 } 515 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 516 asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) 517 { 518 return SYSC_pread64((unsigned int) fd, (char __user *) buf, 519 (size_t) count, pos); 520 } 521 SYSCALL_ALIAS(sys_pread64, SyS_pread64); 522 #endif 523 524 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 525 size_t count, loff_t pos) 526 { 527 struct file *file; 528 ssize_t ret = -EBADF; 529 int fput_needed; 530 531 if (pos < 0) 532 return -EINVAL; 533 534 file = fget_light(fd, &fput_needed); 535 if (file) { 536 ret = -ESPIPE; 537 if (file->f_mode & FMODE_PWRITE) 538 ret = vfs_write(file, buf, count, &pos); 539 fput_light(file, fput_needed); 540 } 541 542 return ret; 543 } 544 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 545 asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) 546 { 547 return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, 548 (size_t) count, pos); 549 } 550 SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); 551 #endif 552 553 /* 554 * Reduce an iovec's length in-place. Return the resulting number of segments 555 */ 556 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 557 { 558 unsigned long seg = 0; 559 size_t len = 0; 560 561 while (seg < nr_segs) { 562 seg++; 563 if (len + iov->iov_len >= to) { 564 iov->iov_len = to - len; 565 break; 566 } 567 len += iov->iov_len; 568 iov++; 569 } 570 return seg; 571 } 572 EXPORT_SYMBOL(iov_shorten); 573 574 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 575 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 576 { 577 struct kiocb kiocb; 578 ssize_t ret; 579 580 init_sync_kiocb(&kiocb, filp); 581 kiocb.ki_pos = *ppos; 582 kiocb.ki_left = len; 583 kiocb.ki_nbytes = len; 584 585 for (;;) { 586 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 587 if (ret != -EIOCBRETRY) 588 break; 589 wait_on_retry_sync_kiocb(&kiocb); 590 } 591 592 if (ret == -EIOCBQUEUED) 593 ret = wait_on_sync_kiocb(&kiocb); 594 *ppos = kiocb.ki_pos; 595 return ret; 596 } 597 598 /* Do it by hand, with file-ops */ 599 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 600 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 601 { 602 struct iovec *vector = iov; 603 ssize_t ret = 0; 604 605 while (nr_segs > 0) { 606 void __user *base; 607 size_t len; 608 ssize_t nr; 609 610 base = vector->iov_base; 611 len = vector->iov_len; 612 vector++; 613 nr_segs--; 614 615 nr = fn(filp, base, len, ppos); 616 617 if (nr < 0) { 618 if (!ret) 619 ret = nr; 620 break; 621 } 622 ret += nr; 623 if (nr != len) 624 break; 625 } 626 627 return ret; 628 } 629 630 /* A write operation does a read from user space and vice versa */ 631 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 632 633 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 634 unsigned long nr_segs, unsigned long fast_segs, 635 struct iovec *fast_pointer, 636 struct iovec **ret_pointer, 637 int check_access) 638 { 639 unsigned long seg; 640 ssize_t ret; 641 struct iovec *iov = fast_pointer; 642 643 /* 644 * SuS says "The readv() function *may* fail if the iovcnt argument 645 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 646 * traditionally returned zero for zero segments, so... 647 */ 648 if (nr_segs == 0) { 649 ret = 0; 650 goto out; 651 } 652 653 /* 654 * First get the "struct iovec" from user memory and 655 * verify all the pointers 656 */ 657 if (nr_segs > UIO_MAXIOV) { 658 ret = -EINVAL; 659 goto out; 660 } 661 if (nr_segs > fast_segs) { 662 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 663 if (iov == NULL) { 664 ret = -ENOMEM; 665 goto out; 666 } 667 } 668 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 669 ret = -EFAULT; 670 goto out; 671 } 672 673 /* 674 * According to the Single Unix Specification we should return EINVAL 675 * if an element length is < 0 when cast to ssize_t or if the 676 * total length would overflow the ssize_t return value of the 677 * system call. 678 * 679 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 680 * overflow case. 681 */ 682 ret = 0; 683 for (seg = 0; seg < nr_segs; seg++) { 684 void __user *buf = iov[seg].iov_base; 685 ssize_t len = (ssize_t)iov[seg].iov_len; 686 687 /* see if we we're about to use an invalid len or if 688 * it's about to overflow ssize_t */ 689 if (len < 0) { 690 ret = -EINVAL; 691 goto out; 692 } 693 if (check_access 694 && unlikely(!access_ok(vrfy_dir(type), buf, len))) { 695 ret = -EFAULT; 696 goto out; 697 } 698 if (len > MAX_RW_COUNT - ret) { 699 len = MAX_RW_COUNT - ret; 700 iov[seg].iov_len = len; 701 } 702 ret += len; 703 } 704 out: 705 *ret_pointer = iov; 706 return ret; 707 } 708 709 static ssize_t do_readv_writev(int type, struct file *file, 710 const struct iovec __user * uvector, 711 unsigned long nr_segs, loff_t *pos) 712 { 713 size_t tot_len; 714 struct iovec iovstack[UIO_FASTIOV]; 715 struct iovec *iov = iovstack; 716 ssize_t ret; 717 io_fn_t fn; 718 iov_fn_t fnv; 719 720 if (!file->f_op) { 721 ret = -EINVAL; 722 goto out; 723 } 724 725 ret = rw_copy_check_uvector(type, uvector, nr_segs, 726 ARRAY_SIZE(iovstack), iovstack, &iov, 1); 727 if (ret <= 0) 728 goto out; 729 730 tot_len = ret; 731 ret = rw_verify_area(type, file, pos, tot_len); 732 if (ret < 0) 733 goto out; 734 735 fnv = NULL; 736 if (type == READ) { 737 fn = file->f_op->read; 738 fnv = file->f_op->aio_read; 739 } else { 740 fn = (io_fn_t)file->f_op->write; 741 fnv = file->f_op->aio_write; 742 } 743 744 if (fnv) 745 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 746 pos, fnv); 747 else 748 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 749 750 out: 751 if (iov != iovstack) 752 kfree(iov); 753 if ((ret + (type == READ)) > 0) { 754 if (type == READ) 755 fsnotify_access(file); 756 else 757 fsnotify_modify(file); 758 } 759 return ret; 760 } 761 762 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 763 unsigned long vlen, loff_t *pos) 764 { 765 if (!(file->f_mode & FMODE_READ)) 766 return -EBADF; 767 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 768 return -EINVAL; 769 770 return do_readv_writev(READ, file, vec, vlen, pos); 771 } 772 773 EXPORT_SYMBOL(vfs_readv); 774 775 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 776 unsigned long vlen, loff_t *pos) 777 { 778 if (!(file->f_mode & FMODE_WRITE)) 779 return -EBADF; 780 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 781 return -EINVAL; 782 783 return do_readv_writev(WRITE, file, vec, vlen, pos); 784 } 785 786 EXPORT_SYMBOL(vfs_writev); 787 788 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 789 unsigned long, vlen) 790 { 791 struct file *file; 792 ssize_t ret = -EBADF; 793 int fput_needed; 794 795 file = fget_light(fd, &fput_needed); 796 if (file) { 797 loff_t pos = file_pos_read(file); 798 ret = vfs_readv(file, vec, vlen, &pos); 799 file_pos_write(file, pos); 800 fput_light(file, fput_needed); 801 } 802 803 if (ret > 0) 804 add_rchar(current, ret); 805 inc_syscr(current); 806 return ret; 807 } 808 809 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 810 unsigned long, vlen) 811 { 812 struct file *file; 813 ssize_t ret = -EBADF; 814 int fput_needed; 815 816 file = fget_light(fd, &fput_needed); 817 if (file) { 818 loff_t pos = file_pos_read(file); 819 ret = vfs_writev(file, vec, vlen, &pos); 820 file_pos_write(file, pos); 821 fput_light(file, fput_needed); 822 } 823 824 if (ret > 0) 825 add_wchar(current, ret); 826 inc_syscw(current); 827 return ret; 828 } 829 830 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 831 { 832 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 833 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 834 } 835 836 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 837 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 838 { 839 loff_t pos = pos_from_hilo(pos_h, pos_l); 840 struct file *file; 841 ssize_t ret = -EBADF; 842 int fput_needed; 843 844 if (pos < 0) 845 return -EINVAL; 846 847 file = fget_light(fd, &fput_needed); 848 if (file) { 849 ret = -ESPIPE; 850 if (file->f_mode & FMODE_PREAD) 851 ret = vfs_readv(file, vec, vlen, &pos); 852 fput_light(file, fput_needed); 853 } 854 855 if (ret > 0) 856 add_rchar(current, ret); 857 inc_syscr(current); 858 return ret; 859 } 860 861 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 862 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 863 { 864 loff_t pos = pos_from_hilo(pos_h, pos_l); 865 struct file *file; 866 ssize_t ret = -EBADF; 867 int fput_needed; 868 869 if (pos < 0) 870 return -EINVAL; 871 872 file = fget_light(fd, &fput_needed); 873 if (file) { 874 ret = -ESPIPE; 875 if (file->f_mode & FMODE_PWRITE) 876 ret = vfs_writev(file, vec, vlen, &pos); 877 fput_light(file, fput_needed); 878 } 879 880 if (ret > 0) 881 add_wchar(current, ret); 882 inc_syscw(current); 883 return ret; 884 } 885 886 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 887 size_t count, loff_t max) 888 { 889 struct file * in_file, * out_file; 890 struct inode * in_inode, * out_inode; 891 loff_t pos; 892 ssize_t retval; 893 int fput_needed_in, fput_needed_out, fl; 894 895 /* 896 * Get input file, and verify that it is ok.. 897 */ 898 retval = -EBADF; 899 in_file = fget_light(in_fd, &fput_needed_in); 900 if (!in_file) 901 goto out; 902 if (!(in_file->f_mode & FMODE_READ)) 903 goto fput_in; 904 retval = -ESPIPE; 905 if (!ppos) 906 ppos = &in_file->f_pos; 907 else 908 if (!(in_file->f_mode & FMODE_PREAD)) 909 goto fput_in; 910 retval = rw_verify_area(READ, in_file, ppos, count); 911 if (retval < 0) 912 goto fput_in; 913 count = retval; 914 915 /* 916 * Get output file, and verify that it is ok.. 917 */ 918 retval = -EBADF; 919 out_file = fget_light(out_fd, &fput_needed_out); 920 if (!out_file) 921 goto fput_in; 922 if (!(out_file->f_mode & FMODE_WRITE)) 923 goto fput_out; 924 retval = -EINVAL; 925 in_inode = in_file->f_path.dentry->d_inode; 926 out_inode = out_file->f_path.dentry->d_inode; 927 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 928 if (retval < 0) 929 goto fput_out; 930 count = retval; 931 932 if (!max) 933 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 934 935 pos = *ppos; 936 if (unlikely(pos + count > max)) { 937 retval = -EOVERFLOW; 938 if (pos >= max) 939 goto fput_out; 940 count = max - pos; 941 } 942 943 fl = 0; 944 #if 0 945 /* 946 * We need to debate whether we can enable this or not. The 947 * man page documents EAGAIN return for the output at least, 948 * and the application is arguably buggy if it doesn't expect 949 * EAGAIN on a non-blocking file descriptor. 950 */ 951 if (in_file->f_flags & O_NONBLOCK) 952 fl = SPLICE_F_NONBLOCK; 953 #endif 954 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 955 956 if (retval > 0) { 957 add_rchar(current, retval); 958 add_wchar(current, retval); 959 } 960 961 inc_syscr(current); 962 inc_syscw(current); 963 if (*ppos > max) 964 retval = -EOVERFLOW; 965 966 fput_out: 967 fput_light(out_file, fput_needed_out); 968 fput_in: 969 fput_light(in_file, fput_needed_in); 970 out: 971 return retval; 972 } 973 974 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 975 { 976 loff_t pos; 977 off_t off; 978 ssize_t ret; 979 980 if (offset) { 981 if (unlikely(get_user(off, offset))) 982 return -EFAULT; 983 pos = off; 984 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 985 if (unlikely(put_user(pos, offset))) 986 return -EFAULT; 987 return ret; 988 } 989 990 return do_sendfile(out_fd, in_fd, NULL, count, 0); 991 } 992 993 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 994 { 995 loff_t pos; 996 ssize_t ret; 997 998 if (offset) { 999 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1000 return -EFAULT; 1001 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1002 if (unlikely(put_user(pos, offset))) 1003 return -EFAULT; 1004 return ret; 1005 } 1006 1007 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1008 } 1009