1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/aio.h> 13 #include <linux/fsnotify.h> 14 #include <linux/security.h> 15 #include <linux/export.h> 16 #include <linux/syscalls.h> 17 #include <linux/pagemap.h> 18 #include <linux/splice.h> 19 #include <linux/compat.h> 20 #include "internal.h" 21 22 #include <asm/uaccess.h> 23 #include <asm/unistd.h> 24 25 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 26 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, 27 unsigned long, loff_t); 28 29 const struct file_operations generic_ro_fops = { 30 .llseek = generic_file_llseek, 31 .read = do_sync_read, 32 .aio_read = generic_file_aio_read, 33 .mmap = generic_file_readonly_mmap, 34 .splice_read = generic_file_splice_read, 35 }; 36 37 EXPORT_SYMBOL(generic_ro_fops); 38 39 static inline int unsigned_offsets(struct file *file) 40 { 41 return file->f_mode & FMODE_UNSIGNED_OFFSET; 42 } 43 44 /** 45 * vfs_setpos - update the file offset for lseek 46 * @file: file structure in question 47 * @offset: file offset to seek to 48 * @maxsize: maximum file size 49 * 50 * This is a low-level filesystem helper for updating the file offset to 51 * the value specified by @offset if the given offset is valid and it is 52 * not equal to the current file offset. 53 * 54 * Return the specified offset on success and -EINVAL on invalid offset. 55 */ 56 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) 57 { 58 if (offset < 0 && !unsigned_offsets(file)) 59 return -EINVAL; 60 if (offset > maxsize) 61 return -EINVAL; 62 63 if (offset != file->f_pos) { 64 file->f_pos = offset; 65 file->f_version = 0; 66 } 67 return offset; 68 } 69 EXPORT_SYMBOL(vfs_setpos); 70 71 /** 72 * generic_file_llseek_size - generic llseek implementation for regular files 73 * @file: file structure to seek on 74 * @offset: file offset to seek to 75 * @whence: type of seek 76 * @size: max size of this file in file system 77 * @eof: offset used for SEEK_END position 78 * 79 * This is a variant of generic_file_llseek that allows passing in a custom 80 * maximum file size and a custom EOF position, for e.g. hashed directories 81 * 82 * Synchronization: 83 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 84 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 85 * read/writes behave like SEEK_SET against seeks. 86 */ 87 loff_t 88 generic_file_llseek_size(struct file *file, loff_t offset, int whence, 89 loff_t maxsize, loff_t eof) 90 { 91 switch (whence) { 92 case SEEK_END: 93 offset += eof; 94 break; 95 case SEEK_CUR: 96 /* 97 * Here we special-case the lseek(fd, 0, SEEK_CUR) 98 * position-querying operation. Avoid rewriting the "same" 99 * f_pos value back to the file because a concurrent read(), 100 * write() or lseek() might have altered it 101 */ 102 if (offset == 0) 103 return file->f_pos; 104 /* 105 * f_lock protects against read/modify/write race with other 106 * SEEK_CURs. Note that parallel writes and reads behave 107 * like SEEK_SET. 108 */ 109 spin_lock(&file->f_lock); 110 offset = vfs_setpos(file, file->f_pos + offset, maxsize); 111 spin_unlock(&file->f_lock); 112 return offset; 113 case SEEK_DATA: 114 /* 115 * In the generic case the entire file is data, so as long as 116 * offset isn't at the end of the file then the offset is data. 117 */ 118 if (offset >= eof) 119 return -ENXIO; 120 break; 121 case SEEK_HOLE: 122 /* 123 * There is a virtual hole at the end of the file, so as long as 124 * offset isn't i_size or larger, return i_size. 125 */ 126 if (offset >= eof) 127 return -ENXIO; 128 offset = eof; 129 break; 130 } 131 132 return vfs_setpos(file, offset, maxsize); 133 } 134 EXPORT_SYMBOL(generic_file_llseek_size); 135 136 /** 137 * generic_file_llseek - generic llseek implementation for regular files 138 * @file: file structure to seek on 139 * @offset: file offset to seek to 140 * @whence: type of seek 141 * 142 * This is a generic implemenation of ->llseek useable for all normal local 143 * filesystems. It just updates the file offset to the value specified by 144 * @offset and @whence. 145 */ 146 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) 147 { 148 struct inode *inode = file->f_mapping->host; 149 150 return generic_file_llseek_size(file, offset, whence, 151 inode->i_sb->s_maxbytes, 152 i_size_read(inode)); 153 } 154 EXPORT_SYMBOL(generic_file_llseek); 155 156 /** 157 * fixed_size_llseek - llseek implementation for fixed-sized devices 158 * @file: file structure to seek on 159 * @offset: file offset to seek to 160 * @whence: type of seek 161 * @size: size of the file 162 * 163 */ 164 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) 165 { 166 switch (whence) { 167 case SEEK_SET: case SEEK_CUR: case SEEK_END: 168 return generic_file_llseek_size(file, offset, whence, 169 size, size); 170 default: 171 return -EINVAL; 172 } 173 } 174 EXPORT_SYMBOL(fixed_size_llseek); 175 176 /** 177 * noop_llseek - No Operation Performed llseek implementation 178 * @file: file structure to seek on 179 * @offset: file offset to seek to 180 * @whence: type of seek 181 * 182 * This is an implementation of ->llseek useable for the rare special case when 183 * userspace expects the seek to succeed but the (device) file is actually not 184 * able to perform the seek. In this case you use noop_llseek() instead of 185 * falling back to the default implementation of ->llseek. 186 */ 187 loff_t noop_llseek(struct file *file, loff_t offset, int whence) 188 { 189 return file->f_pos; 190 } 191 EXPORT_SYMBOL(noop_llseek); 192 193 loff_t no_llseek(struct file *file, loff_t offset, int whence) 194 { 195 return -ESPIPE; 196 } 197 EXPORT_SYMBOL(no_llseek); 198 199 loff_t default_llseek(struct file *file, loff_t offset, int whence) 200 { 201 struct inode *inode = file_inode(file); 202 loff_t retval; 203 204 mutex_lock(&inode->i_mutex); 205 switch (whence) { 206 case SEEK_END: 207 offset += i_size_read(inode); 208 break; 209 case SEEK_CUR: 210 if (offset == 0) { 211 retval = file->f_pos; 212 goto out; 213 } 214 offset += file->f_pos; 215 break; 216 case SEEK_DATA: 217 /* 218 * In the generic case the entire file is data, so as 219 * long as offset isn't at the end of the file then the 220 * offset is data. 221 */ 222 if (offset >= inode->i_size) { 223 retval = -ENXIO; 224 goto out; 225 } 226 break; 227 case SEEK_HOLE: 228 /* 229 * There is a virtual hole at the end of the file, so 230 * as long as offset isn't i_size or larger, return 231 * i_size. 232 */ 233 if (offset >= inode->i_size) { 234 retval = -ENXIO; 235 goto out; 236 } 237 offset = inode->i_size; 238 break; 239 } 240 retval = -EINVAL; 241 if (offset >= 0 || unsigned_offsets(file)) { 242 if (offset != file->f_pos) { 243 file->f_pos = offset; 244 file->f_version = 0; 245 } 246 retval = offset; 247 } 248 out: 249 mutex_unlock(&inode->i_mutex); 250 return retval; 251 } 252 EXPORT_SYMBOL(default_llseek); 253 254 loff_t vfs_llseek(struct file *file, loff_t offset, int whence) 255 { 256 loff_t (*fn)(struct file *, loff_t, int); 257 258 fn = no_llseek; 259 if (file->f_mode & FMODE_LSEEK) { 260 if (file->f_op && file->f_op->llseek) 261 fn = file->f_op->llseek; 262 } 263 return fn(file, offset, whence); 264 } 265 EXPORT_SYMBOL(vfs_llseek); 266 267 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 268 { 269 off_t retval; 270 struct fd f = fdget(fd); 271 if (!f.file) 272 return -EBADF; 273 274 retval = -EINVAL; 275 if (whence <= SEEK_MAX) { 276 loff_t res = vfs_llseek(f.file, offset, whence); 277 retval = res; 278 if (res != (loff_t)retval) 279 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 280 } 281 fdput(f); 282 return retval; 283 } 284 285 #ifdef CONFIG_COMPAT 286 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) 287 { 288 return sys_lseek(fd, offset, whence); 289 } 290 #endif 291 292 #ifdef __ARCH_WANT_SYS_LLSEEK 293 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 294 unsigned long, offset_low, loff_t __user *, result, 295 unsigned int, whence) 296 { 297 int retval; 298 struct fd f = fdget(fd); 299 loff_t offset; 300 301 if (!f.file) 302 return -EBADF; 303 304 retval = -EINVAL; 305 if (whence > SEEK_MAX) 306 goto out_putf; 307 308 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 309 whence); 310 311 retval = (int)offset; 312 if (offset >= 0) { 313 retval = -EFAULT; 314 if (!copy_to_user(result, &offset, sizeof(offset))) 315 retval = 0; 316 } 317 out_putf: 318 fdput(f); 319 return retval; 320 } 321 #endif 322 323 /* 324 * rw_verify_area doesn't like huge counts. We limit 325 * them to something that fits in "int" so that others 326 * won't have to do range checks all the time. 327 */ 328 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) 329 { 330 struct inode *inode; 331 loff_t pos; 332 int retval = -EINVAL; 333 334 inode = file_inode(file); 335 if (unlikely((ssize_t) count < 0)) 336 return retval; 337 pos = *ppos; 338 if (unlikely(pos < 0)) { 339 if (!unsigned_offsets(file)) 340 return retval; 341 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 342 return -EOVERFLOW; 343 } else if (unlikely((loff_t) (pos + count) < 0)) { 344 if (!unsigned_offsets(file)) 345 return retval; 346 } 347 348 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 349 retval = locks_mandatory_area( 350 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 351 inode, file, pos, count); 352 if (retval < 0) 353 return retval; 354 } 355 retval = security_file_permission(file, 356 read_write == READ ? MAY_READ : MAY_WRITE); 357 if (retval) 358 return retval; 359 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 360 } 361 362 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 363 { 364 struct iovec iov = { .iov_base = buf, .iov_len = len }; 365 struct kiocb kiocb; 366 ssize_t ret; 367 368 init_sync_kiocb(&kiocb, filp); 369 kiocb.ki_pos = *ppos; 370 kiocb.ki_nbytes = len; 371 372 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 373 if (-EIOCBQUEUED == ret) 374 ret = wait_on_sync_kiocb(&kiocb); 375 *ppos = kiocb.ki_pos; 376 return ret; 377 } 378 379 EXPORT_SYMBOL(do_sync_read); 380 381 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 382 { 383 ssize_t ret; 384 385 if (!(file->f_mode & FMODE_READ)) 386 return -EBADF; 387 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 388 return -EINVAL; 389 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 390 return -EFAULT; 391 392 ret = rw_verify_area(READ, file, pos, count); 393 if (ret >= 0) { 394 count = ret; 395 if (file->f_op->read) 396 ret = file->f_op->read(file, buf, count, pos); 397 else 398 ret = do_sync_read(file, buf, count, pos); 399 if (ret > 0) { 400 fsnotify_access(file); 401 add_rchar(current, ret); 402 } 403 inc_syscr(current); 404 } 405 406 return ret; 407 } 408 409 EXPORT_SYMBOL(vfs_read); 410 411 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 412 { 413 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 414 struct kiocb kiocb; 415 ssize_t ret; 416 417 init_sync_kiocb(&kiocb, filp); 418 kiocb.ki_pos = *ppos; 419 kiocb.ki_nbytes = len; 420 421 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 422 if (-EIOCBQUEUED == ret) 423 ret = wait_on_sync_kiocb(&kiocb); 424 *ppos = kiocb.ki_pos; 425 return ret; 426 } 427 428 EXPORT_SYMBOL(do_sync_write); 429 430 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) 431 { 432 mm_segment_t old_fs; 433 const char __user *p; 434 ssize_t ret; 435 436 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 437 return -EINVAL; 438 439 old_fs = get_fs(); 440 set_fs(get_ds()); 441 p = (__force const char __user *)buf; 442 if (count > MAX_RW_COUNT) 443 count = MAX_RW_COUNT; 444 if (file->f_op->write) 445 ret = file->f_op->write(file, p, count, pos); 446 else 447 ret = do_sync_write(file, p, count, pos); 448 set_fs(old_fs); 449 if (ret > 0) { 450 fsnotify_modify(file); 451 add_wchar(current, ret); 452 } 453 inc_syscw(current); 454 return ret; 455 } 456 457 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 458 { 459 ssize_t ret; 460 461 if (!(file->f_mode & FMODE_WRITE)) 462 return -EBADF; 463 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 464 return -EINVAL; 465 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 466 return -EFAULT; 467 468 ret = rw_verify_area(WRITE, file, pos, count); 469 if (ret >= 0) { 470 count = ret; 471 file_start_write(file); 472 if (file->f_op->write) 473 ret = file->f_op->write(file, buf, count, pos); 474 else 475 ret = do_sync_write(file, buf, count, pos); 476 if (ret > 0) { 477 fsnotify_modify(file); 478 add_wchar(current, ret); 479 } 480 inc_syscw(current); 481 file_end_write(file); 482 } 483 484 return ret; 485 } 486 487 EXPORT_SYMBOL(vfs_write); 488 489 static inline loff_t file_pos_read(struct file *file) 490 { 491 return file->f_pos; 492 } 493 494 static inline void file_pos_write(struct file *file, loff_t pos) 495 { 496 file->f_pos = pos; 497 } 498 499 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 500 { 501 struct fd f = fdget(fd); 502 ssize_t ret = -EBADF; 503 504 if (f.file) { 505 loff_t pos = file_pos_read(f.file); 506 ret = vfs_read(f.file, buf, count, &pos); 507 if (ret >= 0) 508 file_pos_write(f.file, pos); 509 fdput(f); 510 } 511 return ret; 512 } 513 514 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 515 size_t, count) 516 { 517 struct fd f = fdget(fd); 518 ssize_t ret = -EBADF; 519 520 if (f.file) { 521 loff_t pos = file_pos_read(f.file); 522 ret = vfs_write(f.file, buf, count, &pos); 523 if (ret >= 0) 524 file_pos_write(f.file, pos); 525 fdput(f); 526 } 527 528 return ret; 529 } 530 531 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, 532 size_t, count, loff_t, pos) 533 { 534 struct fd f; 535 ssize_t ret = -EBADF; 536 537 if (pos < 0) 538 return -EINVAL; 539 540 f = fdget(fd); 541 if (f.file) { 542 ret = -ESPIPE; 543 if (f.file->f_mode & FMODE_PREAD) 544 ret = vfs_read(f.file, buf, count, &pos); 545 fdput(f); 546 } 547 548 return ret; 549 } 550 551 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, 552 size_t, count, loff_t, pos) 553 { 554 struct fd f; 555 ssize_t ret = -EBADF; 556 557 if (pos < 0) 558 return -EINVAL; 559 560 f = fdget(fd); 561 if (f.file) { 562 ret = -ESPIPE; 563 if (f.file->f_mode & FMODE_PWRITE) 564 ret = vfs_write(f.file, buf, count, &pos); 565 fdput(f); 566 } 567 568 return ret; 569 } 570 571 /* 572 * Reduce an iovec's length in-place. Return the resulting number of segments 573 */ 574 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 575 { 576 unsigned long seg = 0; 577 size_t len = 0; 578 579 while (seg < nr_segs) { 580 seg++; 581 if (len + iov->iov_len >= to) { 582 iov->iov_len = to - len; 583 break; 584 } 585 len += iov->iov_len; 586 iov++; 587 } 588 return seg; 589 } 590 EXPORT_SYMBOL(iov_shorten); 591 592 static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 593 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 594 { 595 struct kiocb kiocb; 596 ssize_t ret; 597 598 init_sync_kiocb(&kiocb, filp); 599 kiocb.ki_pos = *ppos; 600 kiocb.ki_nbytes = len; 601 602 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 603 if (ret == -EIOCBQUEUED) 604 ret = wait_on_sync_kiocb(&kiocb); 605 *ppos = kiocb.ki_pos; 606 return ret; 607 } 608 609 /* Do it by hand, with file-ops */ 610 static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 611 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 612 { 613 struct iovec *vector = iov; 614 ssize_t ret = 0; 615 616 while (nr_segs > 0) { 617 void __user *base; 618 size_t len; 619 ssize_t nr; 620 621 base = vector->iov_base; 622 len = vector->iov_len; 623 vector++; 624 nr_segs--; 625 626 nr = fn(filp, base, len, ppos); 627 628 if (nr < 0) { 629 if (!ret) 630 ret = nr; 631 break; 632 } 633 ret += nr; 634 if (nr != len) 635 break; 636 } 637 638 return ret; 639 } 640 641 /* A write operation does a read from user space and vice versa */ 642 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 643 644 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 645 unsigned long nr_segs, unsigned long fast_segs, 646 struct iovec *fast_pointer, 647 struct iovec **ret_pointer) 648 { 649 unsigned long seg; 650 ssize_t ret; 651 struct iovec *iov = fast_pointer; 652 653 /* 654 * SuS says "The readv() function *may* fail if the iovcnt argument 655 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 656 * traditionally returned zero for zero segments, so... 657 */ 658 if (nr_segs == 0) { 659 ret = 0; 660 goto out; 661 } 662 663 /* 664 * First get the "struct iovec" from user memory and 665 * verify all the pointers 666 */ 667 if (nr_segs > UIO_MAXIOV) { 668 ret = -EINVAL; 669 goto out; 670 } 671 if (nr_segs > fast_segs) { 672 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 673 if (iov == NULL) { 674 ret = -ENOMEM; 675 goto out; 676 } 677 } 678 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 679 ret = -EFAULT; 680 goto out; 681 } 682 683 /* 684 * According to the Single Unix Specification we should return EINVAL 685 * if an element length is < 0 when cast to ssize_t or if the 686 * total length would overflow the ssize_t return value of the 687 * system call. 688 * 689 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 690 * overflow case. 691 */ 692 ret = 0; 693 for (seg = 0; seg < nr_segs; seg++) { 694 void __user *buf = iov[seg].iov_base; 695 ssize_t len = (ssize_t)iov[seg].iov_len; 696 697 /* see if we we're about to use an invalid len or if 698 * it's about to overflow ssize_t */ 699 if (len < 0) { 700 ret = -EINVAL; 701 goto out; 702 } 703 if (type >= 0 704 && unlikely(!access_ok(vrfy_dir(type), buf, len))) { 705 ret = -EFAULT; 706 goto out; 707 } 708 if (len > MAX_RW_COUNT - ret) { 709 len = MAX_RW_COUNT - ret; 710 iov[seg].iov_len = len; 711 } 712 ret += len; 713 } 714 out: 715 *ret_pointer = iov; 716 return ret; 717 } 718 719 static ssize_t do_readv_writev(int type, struct file *file, 720 const struct iovec __user * uvector, 721 unsigned long nr_segs, loff_t *pos) 722 { 723 size_t tot_len; 724 struct iovec iovstack[UIO_FASTIOV]; 725 struct iovec *iov = iovstack; 726 ssize_t ret; 727 io_fn_t fn; 728 iov_fn_t fnv; 729 730 if (!file->f_op) { 731 ret = -EINVAL; 732 goto out; 733 } 734 735 ret = rw_copy_check_uvector(type, uvector, nr_segs, 736 ARRAY_SIZE(iovstack), iovstack, &iov); 737 if (ret <= 0) 738 goto out; 739 740 tot_len = ret; 741 ret = rw_verify_area(type, file, pos, tot_len); 742 if (ret < 0) 743 goto out; 744 745 fnv = NULL; 746 if (type == READ) { 747 fn = file->f_op->read; 748 fnv = file->f_op->aio_read; 749 } else { 750 fn = (io_fn_t)file->f_op->write; 751 fnv = file->f_op->aio_write; 752 file_start_write(file); 753 } 754 755 if (fnv) 756 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 757 pos, fnv); 758 else 759 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 760 761 if (type != READ) 762 file_end_write(file); 763 764 out: 765 if (iov != iovstack) 766 kfree(iov); 767 if ((ret + (type == READ)) > 0) { 768 if (type == READ) 769 fsnotify_access(file); 770 else 771 fsnotify_modify(file); 772 } 773 return ret; 774 } 775 776 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 777 unsigned long vlen, loff_t *pos) 778 { 779 if (!(file->f_mode & FMODE_READ)) 780 return -EBADF; 781 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 782 return -EINVAL; 783 784 return do_readv_writev(READ, file, vec, vlen, pos); 785 } 786 787 EXPORT_SYMBOL(vfs_readv); 788 789 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 790 unsigned long vlen, loff_t *pos) 791 { 792 if (!(file->f_mode & FMODE_WRITE)) 793 return -EBADF; 794 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 795 return -EINVAL; 796 797 return do_readv_writev(WRITE, file, vec, vlen, pos); 798 } 799 800 EXPORT_SYMBOL(vfs_writev); 801 802 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 803 unsigned long, vlen) 804 { 805 struct fd f = fdget(fd); 806 ssize_t ret = -EBADF; 807 808 if (f.file) { 809 loff_t pos = file_pos_read(f.file); 810 ret = vfs_readv(f.file, vec, vlen, &pos); 811 if (ret >= 0) 812 file_pos_write(f.file, pos); 813 fdput(f); 814 } 815 816 if (ret > 0) 817 add_rchar(current, ret); 818 inc_syscr(current); 819 return ret; 820 } 821 822 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 823 unsigned long, vlen) 824 { 825 struct fd f = fdget(fd); 826 ssize_t ret = -EBADF; 827 828 if (f.file) { 829 loff_t pos = file_pos_read(f.file); 830 ret = vfs_writev(f.file, vec, vlen, &pos); 831 if (ret >= 0) 832 file_pos_write(f.file, pos); 833 fdput(f); 834 } 835 836 if (ret > 0) 837 add_wchar(current, ret); 838 inc_syscw(current); 839 return ret; 840 } 841 842 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 843 { 844 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 845 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 846 } 847 848 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 849 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 850 { 851 loff_t pos = pos_from_hilo(pos_h, pos_l); 852 struct fd f; 853 ssize_t ret = -EBADF; 854 855 if (pos < 0) 856 return -EINVAL; 857 858 f = fdget(fd); 859 if (f.file) { 860 ret = -ESPIPE; 861 if (f.file->f_mode & FMODE_PREAD) 862 ret = vfs_readv(f.file, vec, vlen, &pos); 863 fdput(f); 864 } 865 866 if (ret > 0) 867 add_rchar(current, ret); 868 inc_syscr(current); 869 return ret; 870 } 871 872 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 873 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 874 { 875 loff_t pos = pos_from_hilo(pos_h, pos_l); 876 struct fd f; 877 ssize_t ret = -EBADF; 878 879 if (pos < 0) 880 return -EINVAL; 881 882 f = fdget(fd); 883 if (f.file) { 884 ret = -ESPIPE; 885 if (f.file->f_mode & FMODE_PWRITE) 886 ret = vfs_writev(f.file, vec, vlen, &pos); 887 fdput(f); 888 } 889 890 if (ret > 0) 891 add_wchar(current, ret); 892 inc_syscw(current); 893 return ret; 894 } 895 896 #ifdef CONFIG_COMPAT 897 898 static ssize_t compat_do_readv_writev(int type, struct file *file, 899 const struct compat_iovec __user *uvector, 900 unsigned long nr_segs, loff_t *pos) 901 { 902 compat_ssize_t tot_len; 903 struct iovec iovstack[UIO_FASTIOV]; 904 struct iovec *iov = iovstack; 905 ssize_t ret; 906 io_fn_t fn; 907 iov_fn_t fnv; 908 909 ret = -EINVAL; 910 if (!file->f_op) 911 goto out; 912 913 ret = -EFAULT; 914 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) 915 goto out; 916 917 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, 918 UIO_FASTIOV, iovstack, &iov); 919 if (ret <= 0) 920 goto out; 921 922 tot_len = ret; 923 ret = rw_verify_area(type, file, pos, tot_len); 924 if (ret < 0) 925 goto out; 926 927 fnv = NULL; 928 if (type == READ) { 929 fn = file->f_op->read; 930 fnv = file->f_op->aio_read; 931 } else { 932 fn = (io_fn_t)file->f_op->write; 933 fnv = file->f_op->aio_write; 934 file_start_write(file); 935 } 936 937 if (fnv) 938 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 939 pos, fnv); 940 else 941 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 942 943 if (type != READ) 944 file_end_write(file); 945 946 out: 947 if (iov != iovstack) 948 kfree(iov); 949 if ((ret + (type == READ)) > 0) { 950 if (type == READ) 951 fsnotify_access(file); 952 else 953 fsnotify_modify(file); 954 } 955 return ret; 956 } 957 958 static size_t compat_readv(struct file *file, 959 const struct compat_iovec __user *vec, 960 unsigned long vlen, loff_t *pos) 961 { 962 ssize_t ret = -EBADF; 963 964 if (!(file->f_mode & FMODE_READ)) 965 goto out; 966 967 ret = -EINVAL; 968 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 969 goto out; 970 971 ret = compat_do_readv_writev(READ, file, vec, vlen, pos); 972 973 out: 974 if (ret > 0) 975 add_rchar(current, ret); 976 inc_syscr(current); 977 return ret; 978 } 979 980 COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd, 981 const struct compat_iovec __user *,vec, 982 unsigned long, vlen) 983 { 984 struct fd f = fdget(fd); 985 ssize_t ret; 986 loff_t pos; 987 988 if (!f.file) 989 return -EBADF; 990 pos = f.file->f_pos; 991 ret = compat_readv(f.file, vec, vlen, &pos); 992 if (ret >= 0) 993 f.file->f_pos = pos; 994 fdput(f); 995 return ret; 996 } 997 998 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, 999 const struct compat_iovec __user *,vec, 1000 unsigned long, vlen, loff_t, pos) 1001 { 1002 struct fd f; 1003 ssize_t ret; 1004 1005 if (pos < 0) 1006 return -EINVAL; 1007 f = fdget(fd); 1008 if (!f.file) 1009 return -EBADF; 1010 ret = -ESPIPE; 1011 if (f.file->f_mode & FMODE_PREAD) 1012 ret = compat_readv(f.file, vec, vlen, &pos); 1013 fdput(f); 1014 return ret; 1015 } 1016 1017 COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd, 1018 const struct compat_iovec __user *,vec, 1019 unsigned long, vlen, u32, pos_low, u32, pos_high) 1020 { 1021 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1022 return compat_sys_preadv64(fd, vec, vlen, pos); 1023 } 1024 1025 static size_t compat_writev(struct file *file, 1026 const struct compat_iovec __user *vec, 1027 unsigned long vlen, loff_t *pos) 1028 { 1029 ssize_t ret = -EBADF; 1030 1031 if (!(file->f_mode & FMODE_WRITE)) 1032 goto out; 1033 1034 ret = -EINVAL; 1035 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 1036 goto out; 1037 1038 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); 1039 1040 out: 1041 if (ret > 0) 1042 add_wchar(current, ret); 1043 inc_syscw(current); 1044 return ret; 1045 } 1046 1047 COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd, 1048 const struct compat_iovec __user *, vec, 1049 unsigned long, vlen) 1050 { 1051 struct fd f = fdget(fd); 1052 ssize_t ret; 1053 loff_t pos; 1054 1055 if (!f.file) 1056 return -EBADF; 1057 pos = f.file->f_pos; 1058 ret = compat_writev(f.file, vec, vlen, &pos); 1059 if (ret >= 0) 1060 f.file->f_pos = pos; 1061 fdput(f); 1062 return ret; 1063 } 1064 1065 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, 1066 const struct compat_iovec __user *,vec, 1067 unsigned long, vlen, loff_t, pos) 1068 { 1069 struct fd f; 1070 ssize_t ret; 1071 1072 if (pos < 0) 1073 return -EINVAL; 1074 f = fdget(fd); 1075 if (!f.file) 1076 return -EBADF; 1077 ret = -ESPIPE; 1078 if (f.file->f_mode & FMODE_PWRITE) 1079 ret = compat_writev(f.file, vec, vlen, &pos); 1080 fdput(f); 1081 return ret; 1082 } 1083 1084 COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd, 1085 const struct compat_iovec __user *,vec, 1086 unsigned long, vlen, u32, pos_low, u32, pos_high) 1087 { 1088 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1089 return compat_sys_pwritev64(fd, vec, vlen, pos); 1090 } 1091 #endif 1092 1093 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1094 size_t count, loff_t max) 1095 { 1096 struct fd in, out; 1097 struct inode *in_inode, *out_inode; 1098 loff_t pos; 1099 loff_t out_pos; 1100 ssize_t retval; 1101 int fl; 1102 1103 /* 1104 * Get input file, and verify that it is ok.. 1105 */ 1106 retval = -EBADF; 1107 in = fdget(in_fd); 1108 if (!in.file) 1109 goto out; 1110 if (!(in.file->f_mode & FMODE_READ)) 1111 goto fput_in; 1112 retval = -ESPIPE; 1113 if (!ppos) { 1114 pos = in.file->f_pos; 1115 } else { 1116 pos = *ppos; 1117 if (!(in.file->f_mode & FMODE_PREAD)) 1118 goto fput_in; 1119 } 1120 retval = rw_verify_area(READ, in.file, &pos, count); 1121 if (retval < 0) 1122 goto fput_in; 1123 count = retval; 1124 1125 /* 1126 * Get output file, and verify that it is ok.. 1127 */ 1128 retval = -EBADF; 1129 out = fdget(out_fd); 1130 if (!out.file) 1131 goto fput_in; 1132 if (!(out.file->f_mode & FMODE_WRITE)) 1133 goto fput_out; 1134 retval = -EINVAL; 1135 in_inode = file_inode(in.file); 1136 out_inode = file_inode(out.file); 1137 out_pos = out.file->f_pos; 1138 retval = rw_verify_area(WRITE, out.file, &out_pos, count); 1139 if (retval < 0) 1140 goto fput_out; 1141 count = retval; 1142 1143 if (!max) 1144 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1145 1146 if (unlikely(pos + count > max)) { 1147 retval = -EOVERFLOW; 1148 if (pos >= max) 1149 goto fput_out; 1150 count = max - pos; 1151 } 1152 1153 fl = 0; 1154 #if 0 1155 /* 1156 * We need to debate whether we can enable this or not. The 1157 * man page documents EAGAIN return for the output at least, 1158 * and the application is arguably buggy if it doesn't expect 1159 * EAGAIN on a non-blocking file descriptor. 1160 */ 1161 if (in.file->f_flags & O_NONBLOCK) 1162 fl = SPLICE_F_NONBLOCK; 1163 #endif 1164 file_start_write(out.file); 1165 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); 1166 file_end_write(out.file); 1167 1168 if (retval > 0) { 1169 add_rchar(current, retval); 1170 add_wchar(current, retval); 1171 fsnotify_access(in.file); 1172 fsnotify_modify(out.file); 1173 out.file->f_pos = out_pos; 1174 if (ppos) 1175 *ppos = pos; 1176 else 1177 in.file->f_pos = pos; 1178 } 1179 1180 inc_syscr(current); 1181 inc_syscw(current); 1182 if (pos > max) 1183 retval = -EOVERFLOW; 1184 1185 fput_out: 1186 fdput(out); 1187 fput_in: 1188 fdput(in); 1189 out: 1190 return retval; 1191 } 1192 1193 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 1194 { 1195 loff_t pos; 1196 off_t off; 1197 ssize_t ret; 1198 1199 if (offset) { 1200 if (unlikely(get_user(off, offset))) 1201 return -EFAULT; 1202 pos = off; 1203 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1204 if (unlikely(put_user(pos, offset))) 1205 return -EFAULT; 1206 return ret; 1207 } 1208 1209 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1210 } 1211 1212 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 1213 { 1214 loff_t pos; 1215 ssize_t ret; 1216 1217 if (offset) { 1218 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1219 return -EFAULT; 1220 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1221 if (unlikely(put_user(pos, offset))) 1222 return -EFAULT; 1223 return ret; 1224 } 1225 1226 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1227 } 1228 1229 #ifdef CONFIG_COMPAT 1230 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, 1231 compat_off_t __user *, offset, compat_size_t, count) 1232 { 1233 loff_t pos; 1234 off_t off; 1235 ssize_t ret; 1236 1237 if (offset) { 1238 if (unlikely(get_user(off, offset))) 1239 return -EFAULT; 1240 pos = off; 1241 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1242 if (unlikely(put_user(pos, offset))) 1243 return -EFAULT; 1244 return ret; 1245 } 1246 1247 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1248 } 1249 1250 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, 1251 compat_loff_t __user *, offset, compat_size_t, count) 1252 { 1253 loff_t pos; 1254 ssize_t ret; 1255 1256 if (offset) { 1257 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1258 return -EFAULT; 1259 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1260 if (unlikely(put_user(pos, offset))) 1261 return -EFAULT; 1262 return ret; 1263 } 1264 1265 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1266 } 1267 #endif 1268