1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/fsnotify.h> 13 #include <linux/security.h> 14 #include <linux/export.h> 15 #include <linux/syscalls.h> 16 #include <linux/pagemap.h> 17 #include <linux/splice.h> 18 #include "read_write.h" 19 20 #include <asm/uaccess.h> 21 #include <asm/unistd.h> 22 23 const struct file_operations generic_ro_fops = { 24 .llseek = generic_file_llseek, 25 .read = do_sync_read, 26 .aio_read = generic_file_aio_read, 27 .mmap = generic_file_readonly_mmap, 28 .splice_read = generic_file_splice_read, 29 }; 30 31 EXPORT_SYMBOL(generic_ro_fops); 32 33 static inline int unsigned_offsets(struct file *file) 34 { 35 return file->f_mode & FMODE_UNSIGNED_OFFSET; 36 } 37 38 static loff_t lseek_execute(struct file *file, struct inode *inode, 39 loff_t offset, loff_t maxsize) 40 { 41 if (offset < 0 && !unsigned_offsets(file)) 42 return -EINVAL; 43 if (offset > maxsize) 44 return -EINVAL; 45 46 if (offset != file->f_pos) { 47 file->f_pos = offset; 48 file->f_version = 0; 49 } 50 return offset; 51 } 52 53 /** 54 * generic_file_llseek_size - generic llseek implementation for regular files 55 * @file: file structure to seek on 56 * @offset: file offset to seek to 57 * @origin: type of seek 58 * @size: max size of this file in file system 59 * @eof: offset used for SEEK_END position 60 * 61 * This is a variant of generic_file_llseek that allows passing in a custom 62 * maximum file size and a custom EOF position, for e.g. hashed directories 63 * 64 * Synchronization: 65 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 66 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 67 * read/writes behave like SEEK_SET against seeks. 68 */ 69 loff_t 70 generic_file_llseek_size(struct file *file, loff_t offset, int origin, 71 loff_t maxsize, loff_t eof) 72 { 73 struct inode *inode = file->f_mapping->host; 74 75 switch (origin) { 76 case SEEK_END: 77 offset += eof; 78 break; 79 case SEEK_CUR: 80 /* 81 * Here we special-case the lseek(fd, 0, SEEK_CUR) 82 * position-querying operation. Avoid rewriting the "same" 83 * f_pos value back to the file because a concurrent read(), 84 * write() or lseek() might have altered it 85 */ 86 if (offset == 0) 87 return file->f_pos; 88 /* 89 * f_lock protects against read/modify/write race with other 90 * SEEK_CURs. Note that parallel writes and reads behave 91 * like SEEK_SET. 92 */ 93 spin_lock(&file->f_lock); 94 offset = lseek_execute(file, inode, file->f_pos + offset, 95 maxsize); 96 spin_unlock(&file->f_lock); 97 return offset; 98 case SEEK_DATA: 99 /* 100 * In the generic case the entire file is data, so as long as 101 * offset isn't at the end of the file then the offset is data. 102 */ 103 if (offset >= eof) 104 return -ENXIO; 105 break; 106 case SEEK_HOLE: 107 /* 108 * There is a virtual hole at the end of the file, so as long as 109 * offset isn't i_size or larger, return i_size. 110 */ 111 if (offset >= eof) 112 return -ENXIO; 113 offset = eof; 114 break; 115 } 116 117 return lseek_execute(file, inode, offset, maxsize); 118 } 119 EXPORT_SYMBOL(generic_file_llseek_size); 120 121 /** 122 * generic_file_llseek - generic llseek implementation for regular files 123 * @file: file structure to seek on 124 * @offset: file offset to seek to 125 * @origin: type of seek 126 * 127 * This is a generic implemenation of ->llseek useable for all normal local 128 * filesystems. It just updates the file offset to the value specified by 129 * @offset and @origin under i_mutex. 130 */ 131 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 132 { 133 struct inode *inode = file->f_mapping->host; 134 135 return generic_file_llseek_size(file, offset, origin, 136 inode->i_sb->s_maxbytes, 137 i_size_read(inode)); 138 } 139 EXPORT_SYMBOL(generic_file_llseek); 140 141 /** 142 * noop_llseek - No Operation Performed llseek implementation 143 * @file: file structure to seek on 144 * @offset: file offset to seek to 145 * @origin: type of seek 146 * 147 * This is an implementation of ->llseek useable for the rare special case when 148 * userspace expects the seek to succeed but the (device) file is actually not 149 * able to perform the seek. In this case you use noop_llseek() instead of 150 * falling back to the default implementation of ->llseek. 151 */ 152 loff_t noop_llseek(struct file *file, loff_t offset, int origin) 153 { 154 return file->f_pos; 155 } 156 EXPORT_SYMBOL(noop_llseek); 157 158 loff_t no_llseek(struct file *file, loff_t offset, int origin) 159 { 160 return -ESPIPE; 161 } 162 EXPORT_SYMBOL(no_llseek); 163 164 loff_t default_llseek(struct file *file, loff_t offset, int origin) 165 { 166 struct inode *inode = file->f_path.dentry->d_inode; 167 loff_t retval; 168 169 mutex_lock(&inode->i_mutex); 170 switch (origin) { 171 case SEEK_END: 172 offset += i_size_read(inode); 173 break; 174 case SEEK_CUR: 175 if (offset == 0) { 176 retval = file->f_pos; 177 goto out; 178 } 179 offset += file->f_pos; 180 break; 181 case SEEK_DATA: 182 /* 183 * In the generic case the entire file is data, so as 184 * long as offset isn't at the end of the file then the 185 * offset is data. 186 */ 187 if (offset >= inode->i_size) { 188 retval = -ENXIO; 189 goto out; 190 } 191 break; 192 case SEEK_HOLE: 193 /* 194 * There is a virtual hole at the end of the file, so 195 * as long as offset isn't i_size or larger, return 196 * i_size. 197 */ 198 if (offset >= inode->i_size) { 199 retval = -ENXIO; 200 goto out; 201 } 202 offset = inode->i_size; 203 break; 204 } 205 retval = -EINVAL; 206 if (offset >= 0 || unsigned_offsets(file)) { 207 if (offset != file->f_pos) { 208 file->f_pos = offset; 209 file->f_version = 0; 210 } 211 retval = offset; 212 } 213 out: 214 mutex_unlock(&inode->i_mutex); 215 return retval; 216 } 217 EXPORT_SYMBOL(default_llseek); 218 219 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 220 { 221 loff_t (*fn)(struct file *, loff_t, int); 222 223 fn = no_llseek; 224 if (file->f_mode & FMODE_LSEEK) { 225 if (file->f_op && file->f_op->llseek) 226 fn = file->f_op->llseek; 227 } 228 return fn(file, offset, origin); 229 } 230 EXPORT_SYMBOL(vfs_llseek); 231 232 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 233 { 234 off_t retval; 235 struct fd f = fdget(fd); 236 if (!f.file) 237 return -EBADF; 238 239 retval = -EINVAL; 240 if (origin <= SEEK_MAX) { 241 loff_t res = vfs_llseek(f.file, offset, origin); 242 retval = res; 243 if (res != (loff_t)retval) 244 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 245 } 246 fdput(f); 247 return retval; 248 } 249 250 #ifdef __ARCH_WANT_SYS_LLSEEK 251 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 252 unsigned long, offset_low, loff_t __user *, result, 253 unsigned int, origin) 254 { 255 int retval; 256 struct fd f = fdget(fd); 257 loff_t offset; 258 259 if (!f.file) 260 return -EBADF; 261 262 retval = -EINVAL; 263 if (origin > SEEK_MAX) 264 goto out_putf; 265 266 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 267 origin); 268 269 retval = (int)offset; 270 if (offset >= 0) { 271 retval = -EFAULT; 272 if (!copy_to_user(result, &offset, sizeof(offset))) 273 retval = 0; 274 } 275 out_putf: 276 fdput(f); 277 return retval; 278 } 279 #endif 280 281 282 /* 283 * rw_verify_area doesn't like huge counts. We limit 284 * them to something that fits in "int" so that others 285 * won't have to do range checks all the time. 286 */ 287 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 288 { 289 struct inode *inode; 290 loff_t pos; 291 int retval = -EINVAL; 292 293 inode = file->f_path.dentry->d_inode; 294 if (unlikely((ssize_t) count < 0)) 295 return retval; 296 pos = *ppos; 297 if (unlikely(pos < 0)) { 298 if (!unsigned_offsets(file)) 299 return retval; 300 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 301 return -EOVERFLOW; 302 } else if (unlikely((loff_t) (pos + count) < 0)) { 303 if (!unsigned_offsets(file)) 304 return retval; 305 } 306 307 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 308 retval = locks_mandatory_area( 309 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 310 inode, file, pos, count); 311 if (retval < 0) 312 return retval; 313 } 314 retval = security_file_permission(file, 315 read_write == READ ? MAY_READ : MAY_WRITE); 316 if (retval) 317 return retval; 318 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 319 } 320 321 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 322 { 323 set_current_state(TASK_UNINTERRUPTIBLE); 324 if (!kiocbIsKicked(iocb)) 325 schedule(); 326 else 327 kiocbClearKicked(iocb); 328 __set_current_state(TASK_RUNNING); 329 } 330 331 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 332 { 333 struct iovec iov = { .iov_base = buf, .iov_len = len }; 334 struct kiocb kiocb; 335 ssize_t ret; 336 337 init_sync_kiocb(&kiocb, filp); 338 kiocb.ki_pos = *ppos; 339 kiocb.ki_left = len; 340 kiocb.ki_nbytes = len; 341 342 for (;;) { 343 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 344 if (ret != -EIOCBRETRY) 345 break; 346 wait_on_retry_sync_kiocb(&kiocb); 347 } 348 349 if (-EIOCBQUEUED == ret) 350 ret = wait_on_sync_kiocb(&kiocb); 351 *ppos = kiocb.ki_pos; 352 return ret; 353 } 354 355 EXPORT_SYMBOL(do_sync_read); 356 357 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 358 { 359 ssize_t ret; 360 361 if (!(file->f_mode & FMODE_READ)) 362 return -EBADF; 363 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 364 return -EINVAL; 365 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 366 return -EFAULT; 367 368 ret = rw_verify_area(READ, file, pos, count); 369 if (ret >= 0) { 370 count = ret; 371 if (file->f_op->read) 372 ret = file->f_op->read(file, buf, count, pos); 373 else 374 ret = do_sync_read(file, buf, count, pos); 375 if (ret > 0) { 376 fsnotify_access(file); 377 add_rchar(current, ret); 378 } 379 inc_syscr(current); 380 } 381 382 return ret; 383 } 384 385 EXPORT_SYMBOL(vfs_read); 386 387 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 388 { 389 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 390 struct kiocb kiocb; 391 ssize_t ret; 392 393 init_sync_kiocb(&kiocb, filp); 394 kiocb.ki_pos = *ppos; 395 kiocb.ki_left = len; 396 kiocb.ki_nbytes = len; 397 398 for (;;) { 399 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 400 if (ret != -EIOCBRETRY) 401 break; 402 wait_on_retry_sync_kiocb(&kiocb); 403 } 404 405 if (-EIOCBQUEUED == ret) 406 ret = wait_on_sync_kiocb(&kiocb); 407 *ppos = kiocb.ki_pos; 408 return ret; 409 } 410 411 EXPORT_SYMBOL(do_sync_write); 412 413 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 414 { 415 ssize_t ret; 416 417 if (!(file->f_mode & FMODE_WRITE)) 418 return -EBADF; 419 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 420 return -EINVAL; 421 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 422 return -EFAULT; 423 424 ret = rw_verify_area(WRITE, file, pos, count); 425 if (ret >= 0) { 426 count = ret; 427 if (file->f_op->write) 428 ret = file->f_op->write(file, buf, count, pos); 429 else 430 ret = do_sync_write(file, buf, count, pos); 431 if (ret > 0) { 432 fsnotify_modify(file); 433 add_wchar(current, ret); 434 } 435 inc_syscw(current); 436 } 437 438 return ret; 439 } 440 441 EXPORT_SYMBOL(vfs_write); 442 443 static inline loff_t file_pos_read(struct file *file) 444 { 445 return file->f_pos; 446 } 447 448 static inline void file_pos_write(struct file *file, loff_t pos) 449 { 450 file->f_pos = pos; 451 } 452 453 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 454 { 455 struct fd f = fdget(fd); 456 ssize_t ret = -EBADF; 457 458 if (f.file) { 459 loff_t pos = file_pos_read(f.file); 460 ret = vfs_read(f.file, buf, count, &pos); 461 file_pos_write(f.file, pos); 462 fdput(f); 463 } 464 return ret; 465 } 466 467 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 468 size_t, count) 469 { 470 struct fd f = fdget(fd); 471 ssize_t ret = -EBADF; 472 473 if (f.file) { 474 loff_t pos = file_pos_read(f.file); 475 ret = vfs_write(f.file, buf, count, &pos); 476 file_pos_write(f.file, pos); 477 fdput(f); 478 } 479 480 return ret; 481 } 482 483 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 484 size_t count, loff_t pos) 485 { 486 struct fd f; 487 ssize_t ret = -EBADF; 488 489 if (pos < 0) 490 return -EINVAL; 491 492 f = fdget(fd); 493 if (f.file) { 494 ret = -ESPIPE; 495 if (f.file->f_mode & FMODE_PREAD) 496 ret = vfs_read(f.file, buf, count, &pos); 497 fdput(f); 498 } 499 500 return ret; 501 } 502 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 503 asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) 504 { 505 return SYSC_pread64((unsigned int) fd, (char __user *) buf, 506 (size_t) count, pos); 507 } 508 SYSCALL_ALIAS(sys_pread64, SyS_pread64); 509 #endif 510 511 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 512 size_t count, loff_t pos) 513 { 514 struct fd f; 515 ssize_t ret = -EBADF; 516 517 if (pos < 0) 518 return -EINVAL; 519 520 f = fdget(fd); 521 if (f.file) { 522 ret = -ESPIPE; 523 if (f.file->f_mode & FMODE_PWRITE) 524 ret = vfs_write(f.file, buf, count, &pos); 525 fdput(f); 526 } 527 528 return ret; 529 } 530 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 531 asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) 532 { 533 return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, 534 (size_t) count, pos); 535 } 536 SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); 537 #endif 538 539 /* 540 * Reduce an iovec's length in-place. Return the resulting number of segments 541 */ 542 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 543 { 544 unsigned long seg = 0; 545 size_t len = 0; 546 547 while (seg < nr_segs) { 548 seg++; 549 if (len + iov->iov_len >= to) { 550 iov->iov_len = to - len; 551 break; 552 } 553 len += iov->iov_len; 554 iov++; 555 } 556 return seg; 557 } 558 EXPORT_SYMBOL(iov_shorten); 559 560 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 561 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 562 { 563 struct kiocb kiocb; 564 ssize_t ret; 565 566 init_sync_kiocb(&kiocb, filp); 567 kiocb.ki_pos = *ppos; 568 kiocb.ki_left = len; 569 kiocb.ki_nbytes = len; 570 571 for (;;) { 572 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 573 if (ret != -EIOCBRETRY) 574 break; 575 wait_on_retry_sync_kiocb(&kiocb); 576 } 577 578 if (ret == -EIOCBQUEUED) 579 ret = wait_on_sync_kiocb(&kiocb); 580 *ppos = kiocb.ki_pos; 581 return ret; 582 } 583 584 /* Do it by hand, with file-ops */ 585 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 586 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 587 { 588 struct iovec *vector = iov; 589 ssize_t ret = 0; 590 591 while (nr_segs > 0) { 592 void __user *base; 593 size_t len; 594 ssize_t nr; 595 596 base = vector->iov_base; 597 len = vector->iov_len; 598 vector++; 599 nr_segs--; 600 601 nr = fn(filp, base, len, ppos); 602 603 if (nr < 0) { 604 if (!ret) 605 ret = nr; 606 break; 607 } 608 ret += nr; 609 if (nr != len) 610 break; 611 } 612 613 return ret; 614 } 615 616 /* A write operation does a read from user space and vice versa */ 617 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 618 619 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 620 unsigned long nr_segs, unsigned long fast_segs, 621 struct iovec *fast_pointer, 622 struct iovec **ret_pointer) 623 { 624 unsigned long seg; 625 ssize_t ret; 626 struct iovec *iov = fast_pointer; 627 628 /* 629 * SuS says "The readv() function *may* fail if the iovcnt argument 630 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 631 * traditionally returned zero for zero segments, so... 632 */ 633 if (nr_segs == 0) { 634 ret = 0; 635 goto out; 636 } 637 638 /* 639 * First get the "struct iovec" from user memory and 640 * verify all the pointers 641 */ 642 if (nr_segs > UIO_MAXIOV) { 643 ret = -EINVAL; 644 goto out; 645 } 646 if (nr_segs > fast_segs) { 647 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 648 if (iov == NULL) { 649 ret = -ENOMEM; 650 goto out; 651 } 652 } 653 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 654 ret = -EFAULT; 655 goto out; 656 } 657 658 /* 659 * According to the Single Unix Specification we should return EINVAL 660 * if an element length is < 0 when cast to ssize_t or if the 661 * total length would overflow the ssize_t return value of the 662 * system call. 663 * 664 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 665 * overflow case. 666 */ 667 ret = 0; 668 for (seg = 0; seg < nr_segs; seg++) { 669 void __user *buf = iov[seg].iov_base; 670 ssize_t len = (ssize_t)iov[seg].iov_len; 671 672 /* see if we we're about to use an invalid len or if 673 * it's about to overflow ssize_t */ 674 if (len < 0) { 675 ret = -EINVAL; 676 goto out; 677 } 678 if (type >= 0 679 && unlikely(!access_ok(vrfy_dir(type), buf, len))) { 680 ret = -EFAULT; 681 goto out; 682 } 683 if (len > MAX_RW_COUNT - ret) { 684 len = MAX_RW_COUNT - ret; 685 iov[seg].iov_len = len; 686 } 687 ret += len; 688 } 689 out: 690 *ret_pointer = iov; 691 return ret; 692 } 693 694 static ssize_t do_readv_writev(int type, struct file *file, 695 const struct iovec __user * uvector, 696 unsigned long nr_segs, loff_t *pos) 697 { 698 size_t tot_len; 699 struct iovec iovstack[UIO_FASTIOV]; 700 struct iovec *iov = iovstack; 701 ssize_t ret; 702 io_fn_t fn; 703 iov_fn_t fnv; 704 705 if (!file->f_op) { 706 ret = -EINVAL; 707 goto out; 708 } 709 710 ret = rw_copy_check_uvector(type, uvector, nr_segs, 711 ARRAY_SIZE(iovstack), iovstack, &iov); 712 if (ret <= 0) 713 goto out; 714 715 tot_len = ret; 716 ret = rw_verify_area(type, file, pos, tot_len); 717 if (ret < 0) 718 goto out; 719 720 fnv = NULL; 721 if (type == READ) { 722 fn = file->f_op->read; 723 fnv = file->f_op->aio_read; 724 } else { 725 fn = (io_fn_t)file->f_op->write; 726 fnv = file->f_op->aio_write; 727 } 728 729 if (fnv) 730 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 731 pos, fnv); 732 else 733 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 734 735 out: 736 if (iov != iovstack) 737 kfree(iov); 738 if ((ret + (type == READ)) > 0) { 739 if (type == READ) 740 fsnotify_access(file); 741 else 742 fsnotify_modify(file); 743 } 744 return ret; 745 } 746 747 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 748 unsigned long vlen, loff_t *pos) 749 { 750 if (!(file->f_mode & FMODE_READ)) 751 return -EBADF; 752 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 753 return -EINVAL; 754 755 return do_readv_writev(READ, file, vec, vlen, pos); 756 } 757 758 EXPORT_SYMBOL(vfs_readv); 759 760 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 761 unsigned long vlen, loff_t *pos) 762 { 763 if (!(file->f_mode & FMODE_WRITE)) 764 return -EBADF; 765 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 766 return -EINVAL; 767 768 return do_readv_writev(WRITE, file, vec, vlen, pos); 769 } 770 771 EXPORT_SYMBOL(vfs_writev); 772 773 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 774 unsigned long, vlen) 775 { 776 struct fd f = fdget(fd); 777 ssize_t ret = -EBADF; 778 779 if (f.file) { 780 loff_t pos = file_pos_read(f.file); 781 ret = vfs_readv(f.file, vec, vlen, &pos); 782 file_pos_write(f.file, pos); 783 fdput(f); 784 } 785 786 if (ret > 0) 787 add_rchar(current, ret); 788 inc_syscr(current); 789 return ret; 790 } 791 792 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 793 unsigned long, vlen) 794 { 795 struct fd f = fdget(fd); 796 ssize_t ret = -EBADF; 797 798 if (f.file) { 799 loff_t pos = file_pos_read(f.file); 800 ret = vfs_writev(f.file, vec, vlen, &pos); 801 file_pos_write(f.file, pos); 802 fdput(f); 803 } 804 805 if (ret > 0) 806 add_wchar(current, ret); 807 inc_syscw(current); 808 return ret; 809 } 810 811 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 812 { 813 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 814 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 815 } 816 817 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 818 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 819 { 820 loff_t pos = pos_from_hilo(pos_h, pos_l); 821 struct fd f; 822 ssize_t ret = -EBADF; 823 824 if (pos < 0) 825 return -EINVAL; 826 827 f = fdget(fd); 828 if (f.file) { 829 ret = -ESPIPE; 830 if (f.file->f_mode & FMODE_PREAD) 831 ret = vfs_readv(f.file, vec, vlen, &pos); 832 fdput(f); 833 } 834 835 if (ret > 0) 836 add_rchar(current, ret); 837 inc_syscr(current); 838 return ret; 839 } 840 841 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 842 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 843 { 844 loff_t pos = pos_from_hilo(pos_h, pos_l); 845 struct fd f; 846 ssize_t ret = -EBADF; 847 848 if (pos < 0) 849 return -EINVAL; 850 851 f = fdget(fd); 852 if (f.file) { 853 ret = -ESPIPE; 854 if (f.file->f_mode & FMODE_PWRITE) 855 ret = vfs_writev(f.file, vec, vlen, &pos); 856 fdput(f); 857 } 858 859 if (ret > 0) 860 add_wchar(current, ret); 861 inc_syscw(current); 862 return ret; 863 } 864 865 ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, size_t count, 866 loff_t max) 867 { 868 struct fd in, out; 869 struct inode *in_inode, *out_inode; 870 loff_t pos; 871 ssize_t retval; 872 int fl; 873 874 /* 875 * Get input file, and verify that it is ok.. 876 */ 877 retval = -EBADF; 878 in = fdget(in_fd); 879 if (!in.file) 880 goto out; 881 if (!(in.file->f_mode & FMODE_READ)) 882 goto fput_in; 883 retval = -ESPIPE; 884 if (!ppos) 885 ppos = &in.file->f_pos; 886 else 887 if (!(in.file->f_mode & FMODE_PREAD)) 888 goto fput_in; 889 retval = rw_verify_area(READ, in.file, ppos, count); 890 if (retval < 0) 891 goto fput_in; 892 count = retval; 893 894 /* 895 * Get output file, and verify that it is ok.. 896 */ 897 retval = -EBADF; 898 out = fdget(out_fd); 899 if (!out.file) 900 goto fput_in; 901 if (!(out.file->f_mode & FMODE_WRITE)) 902 goto fput_out; 903 retval = -EINVAL; 904 in_inode = in.file->f_path.dentry->d_inode; 905 out_inode = out.file->f_path.dentry->d_inode; 906 retval = rw_verify_area(WRITE, out.file, &out.file->f_pos, count); 907 if (retval < 0) 908 goto fput_out; 909 count = retval; 910 911 if (!max) 912 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 913 914 pos = *ppos; 915 if (unlikely(pos + count > max)) { 916 retval = -EOVERFLOW; 917 if (pos >= max) 918 goto fput_out; 919 count = max - pos; 920 } 921 922 fl = 0; 923 #if 0 924 /* 925 * We need to debate whether we can enable this or not. The 926 * man page documents EAGAIN return for the output at least, 927 * and the application is arguably buggy if it doesn't expect 928 * EAGAIN on a non-blocking file descriptor. 929 */ 930 if (in.file->f_flags & O_NONBLOCK) 931 fl = SPLICE_F_NONBLOCK; 932 #endif 933 retval = do_splice_direct(in.file, ppos, out.file, count, fl); 934 935 if (retval > 0) { 936 add_rchar(current, retval); 937 add_wchar(current, retval); 938 } 939 940 inc_syscr(current); 941 inc_syscw(current); 942 if (*ppos > max) 943 retval = -EOVERFLOW; 944 945 fput_out: 946 fdput(out); 947 fput_in: 948 fdput(in); 949 out: 950 return retval; 951 } 952 953 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 954 { 955 loff_t pos; 956 off_t off; 957 ssize_t ret; 958 959 if (offset) { 960 if (unlikely(get_user(off, offset))) 961 return -EFAULT; 962 pos = off; 963 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 964 if (unlikely(put_user(pos, offset))) 965 return -EFAULT; 966 return ret; 967 } 968 969 return do_sendfile(out_fd, in_fd, NULL, count, 0); 970 } 971 972 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 973 { 974 loff_t pos; 975 ssize_t ret; 976 977 if (offset) { 978 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 979 return -EFAULT; 980 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 981 if (unlikely(put_user(pos, offset))) 982 return -EFAULT; 983 return ret; 984 } 985 986 return do_sendfile(out_fd, in_fd, NULL, count, 0); 987 } 988