1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/aio.h> 13 #include <linux/fsnotify.h> 14 #include <linux/security.h> 15 #include <linux/export.h> 16 #include <linux/syscalls.h> 17 #include <linux/pagemap.h> 18 #include <linux/splice.h> 19 #include <linux/compat.h> 20 #include "internal.h" 21 22 #include <asm/uaccess.h> 23 #include <asm/unistd.h> 24 25 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 26 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, 27 unsigned long, loff_t); 28 29 const struct file_operations generic_ro_fops = { 30 .llseek = generic_file_llseek, 31 .read = do_sync_read, 32 .aio_read = generic_file_aio_read, 33 .mmap = generic_file_readonly_mmap, 34 .splice_read = generic_file_splice_read, 35 }; 36 37 EXPORT_SYMBOL(generic_ro_fops); 38 39 static inline int unsigned_offsets(struct file *file) 40 { 41 return file->f_mode & FMODE_UNSIGNED_OFFSET; 42 } 43 44 /** 45 * vfs_setpos - update the file offset for lseek 46 * @file: file structure in question 47 * @offset: file offset to seek to 48 * @maxsize: maximum file size 49 * 50 * This is a low-level filesystem helper for updating the file offset to 51 * the value specified by @offset if the given offset is valid and it is 52 * not equal to the current file offset. 53 * 54 * Return the specified offset on success and -EINVAL on invalid offset. 55 */ 56 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) 57 { 58 if (offset < 0 && !unsigned_offsets(file)) 59 return -EINVAL; 60 if (offset > maxsize) 61 return -EINVAL; 62 63 if (offset != file->f_pos) { 64 file->f_pos = offset; 65 file->f_version = 0; 66 } 67 return offset; 68 } 69 EXPORT_SYMBOL(vfs_setpos); 70 71 /** 72 * generic_file_llseek_size - generic llseek implementation for regular files 73 * @file: file structure to seek on 74 * @offset: file offset to seek to 75 * @whence: type of seek 76 * @size: max size of this file in file system 77 * @eof: offset used for SEEK_END position 78 * 79 * This is a variant of generic_file_llseek that allows passing in a custom 80 * maximum file size and a custom EOF position, for e.g. hashed directories 81 * 82 * Synchronization: 83 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 84 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 85 * read/writes behave like SEEK_SET against seeks. 86 */ 87 loff_t 88 generic_file_llseek_size(struct file *file, loff_t offset, int whence, 89 loff_t maxsize, loff_t eof) 90 { 91 switch (whence) { 92 case SEEK_END: 93 offset += eof; 94 break; 95 case SEEK_CUR: 96 /* 97 * Here we special-case the lseek(fd, 0, SEEK_CUR) 98 * position-querying operation. Avoid rewriting the "same" 99 * f_pos value back to the file because a concurrent read(), 100 * write() or lseek() might have altered it 101 */ 102 if (offset == 0) 103 return file->f_pos; 104 /* 105 * f_lock protects against read/modify/write race with other 106 * SEEK_CURs. Note that parallel writes and reads behave 107 * like SEEK_SET. 108 */ 109 spin_lock(&file->f_lock); 110 offset = vfs_setpos(file, file->f_pos + offset, maxsize); 111 spin_unlock(&file->f_lock); 112 return offset; 113 case SEEK_DATA: 114 /* 115 * In the generic case the entire file is data, so as long as 116 * offset isn't at the end of the file then the offset is data. 117 */ 118 if (offset >= eof) 119 return -ENXIO; 120 break; 121 case SEEK_HOLE: 122 /* 123 * There is a virtual hole at the end of the file, so as long as 124 * offset isn't i_size or larger, return i_size. 125 */ 126 if (offset >= eof) 127 return -ENXIO; 128 offset = eof; 129 break; 130 } 131 132 return vfs_setpos(file, offset, maxsize); 133 } 134 EXPORT_SYMBOL(generic_file_llseek_size); 135 136 /** 137 * generic_file_llseek - generic llseek implementation for regular files 138 * @file: file structure to seek on 139 * @offset: file offset to seek to 140 * @whence: type of seek 141 * 142 * This is a generic implemenation of ->llseek useable for all normal local 143 * filesystems. It just updates the file offset to the value specified by 144 * @offset and @whence. 145 */ 146 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) 147 { 148 struct inode *inode = file->f_mapping->host; 149 150 return generic_file_llseek_size(file, offset, whence, 151 inode->i_sb->s_maxbytes, 152 i_size_read(inode)); 153 } 154 EXPORT_SYMBOL(generic_file_llseek); 155 156 /** 157 * fixed_size_llseek - llseek implementation for fixed-sized devices 158 * @file: file structure to seek on 159 * @offset: file offset to seek to 160 * @whence: type of seek 161 * @size: size of the file 162 * 163 */ 164 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) 165 { 166 switch (whence) { 167 case SEEK_SET: case SEEK_CUR: case SEEK_END: 168 return generic_file_llseek_size(file, offset, whence, 169 size, size); 170 default: 171 return -EINVAL; 172 } 173 } 174 EXPORT_SYMBOL(fixed_size_llseek); 175 176 /** 177 * noop_llseek - No Operation Performed llseek implementation 178 * @file: file structure to seek on 179 * @offset: file offset to seek to 180 * @whence: type of seek 181 * 182 * This is an implementation of ->llseek useable for the rare special case when 183 * userspace expects the seek to succeed but the (device) file is actually not 184 * able to perform the seek. In this case you use noop_llseek() instead of 185 * falling back to the default implementation of ->llseek. 186 */ 187 loff_t noop_llseek(struct file *file, loff_t offset, int whence) 188 { 189 return file->f_pos; 190 } 191 EXPORT_SYMBOL(noop_llseek); 192 193 loff_t no_llseek(struct file *file, loff_t offset, int whence) 194 { 195 return -ESPIPE; 196 } 197 EXPORT_SYMBOL(no_llseek); 198 199 loff_t default_llseek(struct file *file, loff_t offset, int whence) 200 { 201 struct inode *inode = file_inode(file); 202 loff_t retval; 203 204 mutex_lock(&inode->i_mutex); 205 switch (whence) { 206 case SEEK_END: 207 offset += i_size_read(inode); 208 break; 209 case SEEK_CUR: 210 if (offset == 0) { 211 retval = file->f_pos; 212 goto out; 213 } 214 offset += file->f_pos; 215 break; 216 case SEEK_DATA: 217 /* 218 * In the generic case the entire file is data, so as 219 * long as offset isn't at the end of the file then the 220 * offset is data. 221 */ 222 if (offset >= inode->i_size) { 223 retval = -ENXIO; 224 goto out; 225 } 226 break; 227 case SEEK_HOLE: 228 /* 229 * There is a virtual hole at the end of the file, so 230 * as long as offset isn't i_size or larger, return 231 * i_size. 232 */ 233 if (offset >= inode->i_size) { 234 retval = -ENXIO; 235 goto out; 236 } 237 offset = inode->i_size; 238 break; 239 } 240 retval = -EINVAL; 241 if (offset >= 0 || unsigned_offsets(file)) { 242 if (offset != file->f_pos) { 243 file->f_pos = offset; 244 file->f_version = 0; 245 } 246 retval = offset; 247 } 248 out: 249 mutex_unlock(&inode->i_mutex); 250 return retval; 251 } 252 EXPORT_SYMBOL(default_llseek); 253 254 loff_t vfs_llseek(struct file *file, loff_t offset, int whence) 255 { 256 loff_t (*fn)(struct file *, loff_t, int); 257 258 fn = no_llseek; 259 if (file->f_mode & FMODE_LSEEK) { 260 if (file->f_op->llseek) 261 fn = file->f_op->llseek; 262 } 263 return fn(file, offset, whence); 264 } 265 EXPORT_SYMBOL(vfs_llseek); 266 267 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 268 { 269 off_t retval; 270 struct fd f = fdget(fd); 271 if (!f.file) 272 return -EBADF; 273 274 retval = -EINVAL; 275 if (whence <= SEEK_MAX) { 276 loff_t res = vfs_llseek(f.file, offset, whence); 277 retval = res; 278 if (res != (loff_t)retval) 279 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 280 } 281 fdput(f); 282 return retval; 283 } 284 285 #ifdef CONFIG_COMPAT 286 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) 287 { 288 return sys_lseek(fd, offset, whence); 289 } 290 #endif 291 292 #ifdef __ARCH_WANT_SYS_LLSEEK 293 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 294 unsigned long, offset_low, loff_t __user *, result, 295 unsigned int, whence) 296 { 297 int retval; 298 struct fd f = fdget(fd); 299 loff_t offset; 300 301 if (!f.file) 302 return -EBADF; 303 304 retval = -EINVAL; 305 if (whence > SEEK_MAX) 306 goto out_putf; 307 308 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 309 whence); 310 311 retval = (int)offset; 312 if (offset >= 0) { 313 retval = -EFAULT; 314 if (!copy_to_user(result, &offset, sizeof(offset))) 315 retval = 0; 316 } 317 out_putf: 318 fdput(f); 319 return retval; 320 } 321 #endif 322 323 /* 324 * rw_verify_area doesn't like huge counts. We limit 325 * them to something that fits in "int" so that others 326 * won't have to do range checks all the time. 327 */ 328 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) 329 { 330 struct inode *inode; 331 loff_t pos; 332 int retval = -EINVAL; 333 334 inode = file_inode(file); 335 if (unlikely((ssize_t) count < 0)) 336 return retval; 337 pos = *ppos; 338 if (unlikely(pos < 0)) { 339 if (!unsigned_offsets(file)) 340 return retval; 341 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 342 return -EOVERFLOW; 343 } else if (unlikely((loff_t) (pos + count) < 0)) { 344 if (!unsigned_offsets(file)) 345 return retval; 346 } 347 348 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 349 retval = locks_mandatory_area( 350 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 351 inode, file, pos, count); 352 if (retval < 0) 353 return retval; 354 } 355 retval = security_file_permission(file, 356 read_write == READ ? MAY_READ : MAY_WRITE); 357 if (retval) 358 return retval; 359 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 360 } 361 362 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 363 { 364 struct iovec iov = { .iov_base = buf, .iov_len = len }; 365 struct kiocb kiocb; 366 ssize_t ret; 367 368 init_sync_kiocb(&kiocb, filp); 369 kiocb.ki_pos = *ppos; 370 kiocb.ki_nbytes = len; 371 372 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 373 if (-EIOCBQUEUED == ret) 374 ret = wait_on_sync_kiocb(&kiocb); 375 *ppos = kiocb.ki_pos; 376 return ret; 377 } 378 379 EXPORT_SYMBOL(do_sync_read); 380 381 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 382 { 383 ssize_t ret; 384 385 if (!(file->f_mode & FMODE_READ)) 386 return -EBADF; 387 if (!file->f_op->read && !file->f_op->aio_read) 388 return -EINVAL; 389 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 390 return -EFAULT; 391 392 ret = rw_verify_area(READ, file, pos, count); 393 if (ret >= 0) { 394 count = ret; 395 if (file->f_op->read) 396 ret = file->f_op->read(file, buf, count, pos); 397 else 398 ret = do_sync_read(file, buf, count, pos); 399 if (ret > 0) { 400 fsnotify_access(file); 401 add_rchar(current, ret); 402 } 403 inc_syscr(current); 404 } 405 406 return ret; 407 } 408 409 EXPORT_SYMBOL(vfs_read); 410 411 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 412 { 413 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 414 struct kiocb kiocb; 415 ssize_t ret; 416 417 init_sync_kiocb(&kiocb, filp); 418 kiocb.ki_pos = *ppos; 419 kiocb.ki_nbytes = len; 420 421 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 422 if (-EIOCBQUEUED == ret) 423 ret = wait_on_sync_kiocb(&kiocb); 424 *ppos = kiocb.ki_pos; 425 return ret; 426 } 427 428 EXPORT_SYMBOL(do_sync_write); 429 430 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) 431 { 432 mm_segment_t old_fs; 433 const char __user *p; 434 ssize_t ret; 435 436 if (!file->f_op->write && !file->f_op->aio_write) 437 return -EINVAL; 438 439 old_fs = get_fs(); 440 set_fs(get_ds()); 441 p = (__force const char __user *)buf; 442 if (count > MAX_RW_COUNT) 443 count = MAX_RW_COUNT; 444 if (file->f_op->write) 445 ret = file->f_op->write(file, p, count, pos); 446 else 447 ret = do_sync_write(file, p, count, pos); 448 set_fs(old_fs); 449 if (ret > 0) { 450 fsnotify_modify(file); 451 add_wchar(current, ret); 452 } 453 inc_syscw(current); 454 return ret; 455 } 456 457 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 458 { 459 ssize_t ret; 460 461 if (!(file->f_mode & FMODE_WRITE)) 462 return -EBADF; 463 if (!file->f_op->write && !file->f_op->aio_write) 464 return -EINVAL; 465 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 466 return -EFAULT; 467 468 ret = rw_verify_area(WRITE, file, pos, count); 469 if (ret >= 0) { 470 count = ret; 471 file_start_write(file); 472 if (file->f_op->write) 473 ret = file->f_op->write(file, buf, count, pos); 474 else 475 ret = do_sync_write(file, buf, count, pos); 476 if (ret > 0) { 477 fsnotify_modify(file); 478 add_wchar(current, ret); 479 } 480 inc_syscw(current); 481 file_end_write(file); 482 } 483 484 return ret; 485 } 486 487 EXPORT_SYMBOL(vfs_write); 488 489 static inline loff_t file_pos_read(struct file *file) 490 { 491 return file->f_pos; 492 } 493 494 static inline void file_pos_write(struct file *file, loff_t pos) 495 { 496 file->f_pos = pos; 497 } 498 499 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 500 { 501 struct fd f = fdget(fd); 502 ssize_t ret = -EBADF; 503 504 if (f.file) { 505 loff_t pos = file_pos_read(f.file); 506 ret = vfs_read(f.file, buf, count, &pos); 507 if (ret >= 0) 508 file_pos_write(f.file, pos); 509 fdput(f); 510 } 511 return ret; 512 } 513 514 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 515 size_t, count) 516 { 517 struct fd f = fdget(fd); 518 ssize_t ret = -EBADF; 519 520 if (f.file) { 521 loff_t pos = file_pos_read(f.file); 522 ret = vfs_write(f.file, buf, count, &pos); 523 if (ret >= 0) 524 file_pos_write(f.file, pos); 525 fdput(f); 526 } 527 528 return ret; 529 } 530 531 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, 532 size_t, count, loff_t, pos) 533 { 534 struct fd f; 535 ssize_t ret = -EBADF; 536 537 if (pos < 0) 538 return -EINVAL; 539 540 f = fdget(fd); 541 if (f.file) { 542 ret = -ESPIPE; 543 if (f.file->f_mode & FMODE_PREAD) 544 ret = vfs_read(f.file, buf, count, &pos); 545 fdput(f); 546 } 547 548 return ret; 549 } 550 551 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, 552 size_t, count, loff_t, pos) 553 { 554 struct fd f; 555 ssize_t ret = -EBADF; 556 557 if (pos < 0) 558 return -EINVAL; 559 560 f = fdget(fd); 561 if (f.file) { 562 ret = -ESPIPE; 563 if (f.file->f_mode & FMODE_PWRITE) 564 ret = vfs_write(f.file, buf, count, &pos); 565 fdput(f); 566 } 567 568 return ret; 569 } 570 571 /* 572 * Reduce an iovec's length in-place. Return the resulting number of segments 573 */ 574 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 575 { 576 unsigned long seg = 0; 577 size_t len = 0; 578 579 while (seg < nr_segs) { 580 seg++; 581 if (len + iov->iov_len >= to) { 582 iov->iov_len = to - len; 583 break; 584 } 585 len += iov->iov_len; 586 iov++; 587 } 588 return seg; 589 } 590 EXPORT_SYMBOL(iov_shorten); 591 592 static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 593 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 594 { 595 struct kiocb kiocb; 596 ssize_t ret; 597 598 init_sync_kiocb(&kiocb, filp); 599 kiocb.ki_pos = *ppos; 600 kiocb.ki_nbytes = len; 601 602 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 603 if (ret == -EIOCBQUEUED) 604 ret = wait_on_sync_kiocb(&kiocb); 605 *ppos = kiocb.ki_pos; 606 return ret; 607 } 608 609 /* Do it by hand, with file-ops */ 610 static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 611 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 612 { 613 struct iovec *vector = iov; 614 ssize_t ret = 0; 615 616 while (nr_segs > 0) { 617 void __user *base; 618 size_t len; 619 ssize_t nr; 620 621 base = vector->iov_base; 622 len = vector->iov_len; 623 vector++; 624 nr_segs--; 625 626 nr = fn(filp, base, len, ppos); 627 628 if (nr < 0) { 629 if (!ret) 630 ret = nr; 631 break; 632 } 633 ret += nr; 634 if (nr != len) 635 break; 636 } 637 638 return ret; 639 } 640 641 /* A write operation does a read from user space and vice versa */ 642 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 643 644 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 645 unsigned long nr_segs, unsigned long fast_segs, 646 struct iovec *fast_pointer, 647 struct iovec **ret_pointer) 648 { 649 unsigned long seg; 650 ssize_t ret; 651 struct iovec *iov = fast_pointer; 652 653 /* 654 * SuS says "The readv() function *may* fail if the iovcnt argument 655 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 656 * traditionally returned zero for zero segments, so... 657 */ 658 if (nr_segs == 0) { 659 ret = 0; 660 goto out; 661 } 662 663 /* 664 * First get the "struct iovec" from user memory and 665 * verify all the pointers 666 */ 667 if (nr_segs > UIO_MAXIOV) { 668 ret = -EINVAL; 669 goto out; 670 } 671 if (nr_segs > fast_segs) { 672 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 673 if (iov == NULL) { 674 ret = -ENOMEM; 675 goto out; 676 } 677 } 678 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 679 ret = -EFAULT; 680 goto out; 681 } 682 683 /* 684 * According to the Single Unix Specification we should return EINVAL 685 * if an element length is < 0 when cast to ssize_t or if the 686 * total length would overflow the ssize_t return value of the 687 * system call. 688 * 689 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 690 * overflow case. 691 */ 692 ret = 0; 693 for (seg = 0; seg < nr_segs; seg++) { 694 void __user *buf = iov[seg].iov_base; 695 ssize_t len = (ssize_t)iov[seg].iov_len; 696 697 /* see if we we're about to use an invalid len or if 698 * it's about to overflow ssize_t */ 699 if (len < 0) { 700 ret = -EINVAL; 701 goto out; 702 } 703 if (type >= 0 704 && unlikely(!access_ok(vrfy_dir(type), buf, len))) { 705 ret = -EFAULT; 706 goto out; 707 } 708 if (len > MAX_RW_COUNT - ret) { 709 len = MAX_RW_COUNT - ret; 710 iov[seg].iov_len = len; 711 } 712 ret += len; 713 } 714 out: 715 *ret_pointer = iov; 716 return ret; 717 } 718 719 static ssize_t do_readv_writev(int type, struct file *file, 720 const struct iovec __user * uvector, 721 unsigned long nr_segs, loff_t *pos) 722 { 723 size_t tot_len; 724 struct iovec iovstack[UIO_FASTIOV]; 725 struct iovec *iov = iovstack; 726 ssize_t ret; 727 io_fn_t fn; 728 iov_fn_t fnv; 729 730 ret = rw_copy_check_uvector(type, uvector, nr_segs, 731 ARRAY_SIZE(iovstack), iovstack, &iov); 732 if (ret <= 0) 733 goto out; 734 735 tot_len = ret; 736 ret = rw_verify_area(type, file, pos, tot_len); 737 if (ret < 0) 738 goto out; 739 740 fnv = NULL; 741 if (type == READ) { 742 fn = file->f_op->read; 743 fnv = file->f_op->aio_read; 744 } else { 745 fn = (io_fn_t)file->f_op->write; 746 fnv = file->f_op->aio_write; 747 file_start_write(file); 748 } 749 750 if (fnv) 751 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 752 pos, fnv); 753 else 754 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 755 756 if (type != READ) 757 file_end_write(file); 758 759 out: 760 if (iov != iovstack) 761 kfree(iov); 762 if ((ret + (type == READ)) > 0) { 763 if (type == READ) 764 fsnotify_access(file); 765 else 766 fsnotify_modify(file); 767 } 768 return ret; 769 } 770 771 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 772 unsigned long vlen, loff_t *pos) 773 { 774 if (!(file->f_mode & FMODE_READ)) 775 return -EBADF; 776 if (!file->f_op->aio_read && !file->f_op->read) 777 return -EINVAL; 778 779 return do_readv_writev(READ, file, vec, vlen, pos); 780 } 781 782 EXPORT_SYMBOL(vfs_readv); 783 784 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 785 unsigned long vlen, loff_t *pos) 786 { 787 if (!(file->f_mode & FMODE_WRITE)) 788 return -EBADF; 789 if (!file->f_op->aio_write && !file->f_op->write) 790 return -EINVAL; 791 792 return do_readv_writev(WRITE, file, vec, vlen, pos); 793 } 794 795 EXPORT_SYMBOL(vfs_writev); 796 797 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 798 unsigned long, vlen) 799 { 800 struct fd f = fdget(fd); 801 ssize_t ret = -EBADF; 802 803 if (f.file) { 804 loff_t pos = file_pos_read(f.file); 805 ret = vfs_readv(f.file, vec, vlen, &pos); 806 if (ret >= 0) 807 file_pos_write(f.file, pos); 808 fdput(f); 809 } 810 811 if (ret > 0) 812 add_rchar(current, ret); 813 inc_syscr(current); 814 return ret; 815 } 816 817 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 818 unsigned long, vlen) 819 { 820 struct fd f = fdget(fd); 821 ssize_t ret = -EBADF; 822 823 if (f.file) { 824 loff_t pos = file_pos_read(f.file); 825 ret = vfs_writev(f.file, vec, vlen, &pos); 826 if (ret >= 0) 827 file_pos_write(f.file, pos); 828 fdput(f); 829 } 830 831 if (ret > 0) 832 add_wchar(current, ret); 833 inc_syscw(current); 834 return ret; 835 } 836 837 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 838 { 839 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 840 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 841 } 842 843 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 844 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 845 { 846 loff_t pos = pos_from_hilo(pos_h, pos_l); 847 struct fd f; 848 ssize_t ret = -EBADF; 849 850 if (pos < 0) 851 return -EINVAL; 852 853 f = fdget(fd); 854 if (f.file) { 855 ret = -ESPIPE; 856 if (f.file->f_mode & FMODE_PREAD) 857 ret = vfs_readv(f.file, vec, vlen, &pos); 858 fdput(f); 859 } 860 861 if (ret > 0) 862 add_rchar(current, ret); 863 inc_syscr(current); 864 return ret; 865 } 866 867 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 868 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 869 { 870 loff_t pos = pos_from_hilo(pos_h, pos_l); 871 struct fd f; 872 ssize_t ret = -EBADF; 873 874 if (pos < 0) 875 return -EINVAL; 876 877 f = fdget(fd); 878 if (f.file) { 879 ret = -ESPIPE; 880 if (f.file->f_mode & FMODE_PWRITE) 881 ret = vfs_writev(f.file, vec, vlen, &pos); 882 fdput(f); 883 } 884 885 if (ret > 0) 886 add_wchar(current, ret); 887 inc_syscw(current); 888 return ret; 889 } 890 891 #ifdef CONFIG_COMPAT 892 893 static ssize_t compat_do_readv_writev(int type, struct file *file, 894 const struct compat_iovec __user *uvector, 895 unsigned long nr_segs, loff_t *pos) 896 { 897 compat_ssize_t tot_len; 898 struct iovec iovstack[UIO_FASTIOV]; 899 struct iovec *iov = iovstack; 900 ssize_t ret; 901 io_fn_t fn; 902 iov_fn_t fnv; 903 904 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, 905 UIO_FASTIOV, iovstack, &iov); 906 if (ret <= 0) 907 goto out; 908 909 tot_len = ret; 910 ret = rw_verify_area(type, file, pos, tot_len); 911 if (ret < 0) 912 goto out; 913 914 fnv = NULL; 915 if (type == READ) { 916 fn = file->f_op->read; 917 fnv = file->f_op->aio_read; 918 } else { 919 fn = (io_fn_t)file->f_op->write; 920 fnv = file->f_op->aio_write; 921 file_start_write(file); 922 } 923 924 if (fnv) 925 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 926 pos, fnv); 927 else 928 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 929 930 if (type != READ) 931 file_end_write(file); 932 933 out: 934 if (iov != iovstack) 935 kfree(iov); 936 if ((ret + (type == READ)) > 0) { 937 if (type == READ) 938 fsnotify_access(file); 939 else 940 fsnotify_modify(file); 941 } 942 return ret; 943 } 944 945 static size_t compat_readv(struct file *file, 946 const struct compat_iovec __user *vec, 947 unsigned long vlen, loff_t *pos) 948 { 949 ssize_t ret = -EBADF; 950 951 if (!(file->f_mode & FMODE_READ)) 952 goto out; 953 954 ret = -EINVAL; 955 if (!file->f_op->aio_read && !file->f_op->read) 956 goto out; 957 958 ret = compat_do_readv_writev(READ, file, vec, vlen, pos); 959 960 out: 961 if (ret > 0) 962 add_rchar(current, ret); 963 inc_syscr(current); 964 return ret; 965 } 966 967 COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, 968 const struct compat_iovec __user *,vec, 969 compat_ulong_t, vlen) 970 { 971 struct fd f = fdget(fd); 972 ssize_t ret; 973 loff_t pos; 974 975 if (!f.file) 976 return -EBADF; 977 pos = f.file->f_pos; 978 ret = compat_readv(f.file, vec, vlen, &pos); 979 if (ret >= 0) 980 f.file->f_pos = pos; 981 fdput(f); 982 return ret; 983 } 984 985 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, 986 const struct compat_iovec __user *,vec, 987 unsigned long, vlen, loff_t, pos) 988 { 989 struct fd f; 990 ssize_t ret; 991 992 if (pos < 0) 993 return -EINVAL; 994 f = fdget(fd); 995 if (!f.file) 996 return -EBADF; 997 ret = -ESPIPE; 998 if (f.file->f_mode & FMODE_PREAD) 999 ret = compat_readv(f.file, vec, vlen, &pos); 1000 fdput(f); 1001 return ret; 1002 } 1003 1004 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, 1005 const struct compat_iovec __user *,vec, 1006 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1007 { 1008 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1009 return compat_sys_preadv64(fd, vec, vlen, pos); 1010 } 1011 1012 static size_t compat_writev(struct file *file, 1013 const struct compat_iovec __user *vec, 1014 unsigned long vlen, loff_t *pos) 1015 { 1016 ssize_t ret = -EBADF; 1017 1018 if (!(file->f_mode & FMODE_WRITE)) 1019 goto out; 1020 1021 ret = -EINVAL; 1022 if (!file->f_op->aio_write && !file->f_op->write) 1023 goto out; 1024 1025 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); 1026 1027 out: 1028 if (ret > 0) 1029 add_wchar(current, ret); 1030 inc_syscw(current); 1031 return ret; 1032 } 1033 1034 COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, 1035 const struct compat_iovec __user *, vec, 1036 compat_ulong_t, vlen) 1037 { 1038 struct fd f = fdget(fd); 1039 ssize_t ret; 1040 loff_t pos; 1041 1042 if (!f.file) 1043 return -EBADF; 1044 pos = f.file->f_pos; 1045 ret = compat_writev(f.file, vec, vlen, &pos); 1046 if (ret >= 0) 1047 f.file->f_pos = pos; 1048 fdput(f); 1049 return ret; 1050 } 1051 1052 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, 1053 const struct compat_iovec __user *,vec, 1054 unsigned long, vlen, loff_t, pos) 1055 { 1056 struct fd f; 1057 ssize_t ret; 1058 1059 if (pos < 0) 1060 return -EINVAL; 1061 f = fdget(fd); 1062 if (!f.file) 1063 return -EBADF; 1064 ret = -ESPIPE; 1065 if (f.file->f_mode & FMODE_PWRITE) 1066 ret = compat_writev(f.file, vec, vlen, &pos); 1067 fdput(f); 1068 return ret; 1069 } 1070 1071 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, 1072 const struct compat_iovec __user *,vec, 1073 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1074 { 1075 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1076 return compat_sys_pwritev64(fd, vec, vlen, pos); 1077 } 1078 #endif 1079 1080 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1081 size_t count, loff_t max) 1082 { 1083 struct fd in, out; 1084 struct inode *in_inode, *out_inode; 1085 loff_t pos; 1086 loff_t out_pos; 1087 ssize_t retval; 1088 int fl; 1089 1090 /* 1091 * Get input file, and verify that it is ok.. 1092 */ 1093 retval = -EBADF; 1094 in = fdget(in_fd); 1095 if (!in.file) 1096 goto out; 1097 if (!(in.file->f_mode & FMODE_READ)) 1098 goto fput_in; 1099 retval = -ESPIPE; 1100 if (!ppos) { 1101 pos = in.file->f_pos; 1102 } else { 1103 pos = *ppos; 1104 if (!(in.file->f_mode & FMODE_PREAD)) 1105 goto fput_in; 1106 } 1107 retval = rw_verify_area(READ, in.file, &pos, count); 1108 if (retval < 0) 1109 goto fput_in; 1110 count = retval; 1111 1112 /* 1113 * Get output file, and verify that it is ok.. 1114 */ 1115 retval = -EBADF; 1116 out = fdget(out_fd); 1117 if (!out.file) 1118 goto fput_in; 1119 if (!(out.file->f_mode & FMODE_WRITE)) 1120 goto fput_out; 1121 retval = -EINVAL; 1122 in_inode = file_inode(in.file); 1123 out_inode = file_inode(out.file); 1124 out_pos = out.file->f_pos; 1125 retval = rw_verify_area(WRITE, out.file, &out_pos, count); 1126 if (retval < 0) 1127 goto fput_out; 1128 count = retval; 1129 1130 if (!max) 1131 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1132 1133 if (unlikely(pos + count > max)) { 1134 retval = -EOVERFLOW; 1135 if (pos >= max) 1136 goto fput_out; 1137 count = max - pos; 1138 } 1139 1140 fl = 0; 1141 #if 0 1142 /* 1143 * We need to debate whether we can enable this or not. The 1144 * man page documents EAGAIN return for the output at least, 1145 * and the application is arguably buggy if it doesn't expect 1146 * EAGAIN on a non-blocking file descriptor. 1147 */ 1148 if (in.file->f_flags & O_NONBLOCK) 1149 fl = SPLICE_F_NONBLOCK; 1150 #endif 1151 file_start_write(out.file); 1152 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); 1153 file_end_write(out.file); 1154 1155 if (retval > 0) { 1156 add_rchar(current, retval); 1157 add_wchar(current, retval); 1158 fsnotify_access(in.file); 1159 fsnotify_modify(out.file); 1160 out.file->f_pos = out_pos; 1161 if (ppos) 1162 *ppos = pos; 1163 else 1164 in.file->f_pos = pos; 1165 } 1166 1167 inc_syscr(current); 1168 inc_syscw(current); 1169 if (pos > max) 1170 retval = -EOVERFLOW; 1171 1172 fput_out: 1173 fdput(out); 1174 fput_in: 1175 fdput(in); 1176 out: 1177 return retval; 1178 } 1179 1180 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 1181 { 1182 loff_t pos; 1183 off_t off; 1184 ssize_t ret; 1185 1186 if (offset) { 1187 if (unlikely(get_user(off, offset))) 1188 return -EFAULT; 1189 pos = off; 1190 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1191 if (unlikely(put_user(pos, offset))) 1192 return -EFAULT; 1193 return ret; 1194 } 1195 1196 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1197 } 1198 1199 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 1200 { 1201 loff_t pos; 1202 ssize_t ret; 1203 1204 if (offset) { 1205 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1206 return -EFAULT; 1207 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1208 if (unlikely(put_user(pos, offset))) 1209 return -EFAULT; 1210 return ret; 1211 } 1212 1213 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1214 } 1215 1216 #ifdef CONFIG_COMPAT 1217 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, 1218 compat_off_t __user *, offset, compat_size_t, count) 1219 { 1220 loff_t pos; 1221 off_t off; 1222 ssize_t ret; 1223 1224 if (offset) { 1225 if (unlikely(get_user(off, offset))) 1226 return -EFAULT; 1227 pos = off; 1228 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1229 if (unlikely(put_user(pos, offset))) 1230 return -EFAULT; 1231 return ret; 1232 } 1233 1234 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1235 } 1236 1237 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, 1238 compat_loff_t __user *, offset, compat_size_t, count) 1239 { 1240 loff_t pos; 1241 ssize_t ret; 1242 1243 if (offset) { 1244 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1245 return -EFAULT; 1246 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1247 if (unlikely(put_user(pos, offset))) 1248 return -EFAULT; 1249 return ret; 1250 } 1251 1252 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1253 } 1254 #endif 1255