1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/smp_lock.h> 13 #include <linux/fsnotify.h> 14 #include <linux/security.h> 15 #include <linux/module.h> 16 #include <linux/syscalls.h> 17 #include <linux/pagemap.h> 18 19 #include <asm/uaccess.h> 20 #include <asm/unistd.h> 21 22 struct file_operations generic_ro_fops = { 23 .llseek = generic_file_llseek, 24 .read = generic_file_read, 25 .mmap = generic_file_readonly_mmap, 26 .sendfile = generic_file_sendfile, 27 }; 28 29 EXPORT_SYMBOL(generic_ro_fops); 30 31 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 32 { 33 long long retval; 34 struct inode *inode = file->f_mapping->host; 35 36 mutex_lock(&inode->i_mutex); 37 switch (origin) { 38 case 2: 39 offset += inode->i_size; 40 break; 41 case 1: 42 offset += file->f_pos; 43 } 44 retval = -EINVAL; 45 if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { 46 if (offset != file->f_pos) { 47 file->f_pos = offset; 48 file->f_version = 0; 49 } 50 retval = offset; 51 } 52 mutex_unlock(&inode->i_mutex); 53 return retval; 54 } 55 56 EXPORT_SYMBOL(generic_file_llseek); 57 58 loff_t remote_llseek(struct file *file, loff_t offset, int origin) 59 { 60 long long retval; 61 62 lock_kernel(); 63 switch (origin) { 64 case 2: 65 offset += i_size_read(file->f_dentry->d_inode); 66 break; 67 case 1: 68 offset += file->f_pos; 69 } 70 retval = -EINVAL; 71 if (offset>=0 && offset<=file->f_dentry->d_inode->i_sb->s_maxbytes) { 72 if (offset != file->f_pos) { 73 file->f_pos = offset; 74 file->f_version = 0; 75 } 76 retval = offset; 77 } 78 unlock_kernel(); 79 return retval; 80 } 81 EXPORT_SYMBOL(remote_llseek); 82 83 loff_t no_llseek(struct file *file, loff_t offset, int origin) 84 { 85 return -ESPIPE; 86 } 87 EXPORT_SYMBOL(no_llseek); 88 89 loff_t default_llseek(struct file *file, loff_t offset, int origin) 90 { 91 long long retval; 92 93 lock_kernel(); 94 switch (origin) { 95 case 2: 96 offset += i_size_read(file->f_dentry->d_inode); 97 break; 98 case 1: 99 offset += file->f_pos; 100 } 101 retval = -EINVAL; 102 if (offset >= 0) { 103 if (offset != file->f_pos) { 104 file->f_pos = offset; 105 file->f_version = 0; 106 } 107 retval = offset; 108 } 109 unlock_kernel(); 110 return retval; 111 } 112 EXPORT_SYMBOL(default_llseek); 113 114 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 115 { 116 loff_t (*fn)(struct file *, loff_t, int); 117 118 fn = no_llseek; 119 if (file->f_mode & FMODE_LSEEK) { 120 fn = default_llseek; 121 if (file->f_op && file->f_op->llseek) 122 fn = file->f_op->llseek; 123 } 124 return fn(file, offset, origin); 125 } 126 EXPORT_SYMBOL(vfs_llseek); 127 128 asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) 129 { 130 off_t retval; 131 struct file * file; 132 int fput_needed; 133 134 retval = -EBADF; 135 file = fget_light(fd, &fput_needed); 136 if (!file) 137 goto bad; 138 139 retval = -EINVAL; 140 if (origin <= 2) { 141 loff_t res = vfs_llseek(file, offset, origin); 142 retval = res; 143 if (res != (loff_t)retval) 144 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 145 } 146 fput_light(file, fput_needed); 147 bad: 148 return retval; 149 } 150 151 #ifdef __ARCH_WANT_SYS_LLSEEK 152 asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, 153 unsigned long offset_low, loff_t __user * result, 154 unsigned int origin) 155 { 156 int retval; 157 struct file * file; 158 loff_t offset; 159 int fput_needed; 160 161 retval = -EBADF; 162 file = fget_light(fd, &fput_needed); 163 if (!file) 164 goto bad; 165 166 retval = -EINVAL; 167 if (origin > 2) 168 goto out_putf; 169 170 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 171 origin); 172 173 retval = (int)offset; 174 if (offset >= 0) { 175 retval = -EFAULT; 176 if (!copy_to_user(result, &offset, sizeof(offset))) 177 retval = 0; 178 } 179 out_putf: 180 fput_light(file, fput_needed); 181 bad: 182 return retval; 183 } 184 #endif 185 186 /* 187 * rw_verify_area doesn't like huge counts. We limit 188 * them to something that fits in "int" so that others 189 * won't have to do range checks all the time. 190 */ 191 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) 192 193 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 194 { 195 struct inode *inode; 196 loff_t pos; 197 198 if (unlikely((ssize_t) count < 0)) 199 goto Einval; 200 pos = *ppos; 201 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 202 goto Einval; 203 204 inode = file->f_dentry->d_inode; 205 if (inode->i_flock && MANDATORY_LOCK(inode)) { 206 int retval = locks_mandatory_area( 207 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 208 inode, file, pos, count); 209 if (retval < 0) 210 return retval; 211 } 212 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 213 214 Einval: 215 return -EINVAL; 216 } 217 218 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 219 { 220 set_current_state(TASK_UNINTERRUPTIBLE); 221 if (!kiocbIsKicked(iocb)) 222 schedule(); 223 else 224 kiocbClearKicked(iocb); 225 __set_current_state(TASK_RUNNING); 226 } 227 228 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 229 { 230 struct kiocb kiocb; 231 ssize_t ret; 232 233 init_sync_kiocb(&kiocb, filp); 234 kiocb.ki_pos = *ppos; 235 while (-EIOCBRETRY == 236 (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) 237 wait_on_retry_sync_kiocb(&kiocb); 238 239 if (-EIOCBQUEUED == ret) 240 ret = wait_on_sync_kiocb(&kiocb); 241 *ppos = kiocb.ki_pos; 242 return ret; 243 } 244 245 EXPORT_SYMBOL(do_sync_read); 246 247 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 248 { 249 ssize_t ret; 250 251 if (!(file->f_mode & FMODE_READ)) 252 return -EBADF; 253 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 254 return -EINVAL; 255 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 256 return -EFAULT; 257 258 ret = rw_verify_area(READ, file, pos, count); 259 if (ret >= 0) { 260 count = ret; 261 ret = security_file_permission (file, MAY_READ); 262 if (!ret) { 263 if (file->f_op->read) 264 ret = file->f_op->read(file, buf, count, pos); 265 else 266 ret = do_sync_read(file, buf, count, pos); 267 if (ret > 0) { 268 fsnotify_access(file->f_dentry); 269 current->rchar += ret; 270 } 271 current->syscr++; 272 } 273 } 274 275 return ret; 276 } 277 278 EXPORT_SYMBOL(vfs_read); 279 280 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 281 { 282 struct kiocb kiocb; 283 ssize_t ret; 284 285 init_sync_kiocb(&kiocb, filp); 286 kiocb.ki_pos = *ppos; 287 while (-EIOCBRETRY == 288 (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) 289 wait_on_retry_sync_kiocb(&kiocb); 290 291 if (-EIOCBQUEUED == ret) 292 ret = wait_on_sync_kiocb(&kiocb); 293 *ppos = kiocb.ki_pos; 294 return ret; 295 } 296 297 EXPORT_SYMBOL(do_sync_write); 298 299 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 300 { 301 ssize_t ret; 302 303 if (!(file->f_mode & FMODE_WRITE)) 304 return -EBADF; 305 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 306 return -EINVAL; 307 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 308 return -EFAULT; 309 310 ret = rw_verify_area(WRITE, file, pos, count); 311 if (ret >= 0) { 312 count = ret; 313 ret = security_file_permission (file, MAY_WRITE); 314 if (!ret) { 315 if (file->f_op->write) 316 ret = file->f_op->write(file, buf, count, pos); 317 else 318 ret = do_sync_write(file, buf, count, pos); 319 if (ret > 0) { 320 fsnotify_modify(file->f_dentry); 321 current->wchar += ret; 322 } 323 current->syscw++; 324 } 325 } 326 327 return ret; 328 } 329 330 EXPORT_SYMBOL(vfs_write); 331 332 static inline loff_t file_pos_read(struct file *file) 333 { 334 return file->f_pos; 335 } 336 337 static inline void file_pos_write(struct file *file, loff_t pos) 338 { 339 file->f_pos = pos; 340 } 341 342 asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) 343 { 344 struct file *file; 345 ssize_t ret = -EBADF; 346 int fput_needed; 347 348 file = fget_light(fd, &fput_needed); 349 if (file) { 350 loff_t pos = file_pos_read(file); 351 ret = vfs_read(file, buf, count, &pos); 352 file_pos_write(file, pos); 353 fput_light(file, fput_needed); 354 } 355 356 return ret; 357 } 358 EXPORT_SYMBOL_GPL(sys_read); 359 360 asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) 361 { 362 struct file *file; 363 ssize_t ret = -EBADF; 364 int fput_needed; 365 366 file = fget_light(fd, &fput_needed); 367 if (file) { 368 loff_t pos = file_pos_read(file); 369 ret = vfs_write(file, buf, count, &pos); 370 file_pos_write(file, pos); 371 fput_light(file, fput_needed); 372 } 373 374 return ret; 375 } 376 377 asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, 378 size_t count, loff_t pos) 379 { 380 struct file *file; 381 ssize_t ret = -EBADF; 382 int fput_needed; 383 384 if (pos < 0) 385 return -EINVAL; 386 387 file = fget_light(fd, &fput_needed); 388 if (file) { 389 ret = -ESPIPE; 390 if (file->f_mode & FMODE_PREAD) 391 ret = vfs_read(file, buf, count, &pos); 392 fput_light(file, fput_needed); 393 } 394 395 return ret; 396 } 397 398 asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, 399 size_t count, loff_t pos) 400 { 401 struct file *file; 402 ssize_t ret = -EBADF; 403 int fput_needed; 404 405 if (pos < 0) 406 return -EINVAL; 407 408 file = fget_light(fd, &fput_needed); 409 if (file) { 410 ret = -ESPIPE; 411 if (file->f_mode & FMODE_PWRITE) 412 ret = vfs_write(file, buf, count, &pos); 413 fput_light(file, fput_needed); 414 } 415 416 return ret; 417 } 418 419 /* 420 * Reduce an iovec's length in-place. Return the resulting number of segments 421 */ 422 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 423 { 424 unsigned long seg = 0; 425 size_t len = 0; 426 427 while (seg < nr_segs) { 428 seg++; 429 if (len + iov->iov_len >= to) { 430 iov->iov_len = to - len; 431 break; 432 } 433 len += iov->iov_len; 434 iov++; 435 } 436 return seg; 437 } 438 439 EXPORT_SYMBOL(iov_shorten); 440 441 /* A write operation does a read from user space and vice versa */ 442 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 443 444 static ssize_t do_readv_writev(int type, struct file *file, 445 const struct iovec __user * uvector, 446 unsigned long nr_segs, loff_t *pos) 447 { 448 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 449 typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); 450 451 size_t tot_len; 452 struct iovec iovstack[UIO_FASTIOV]; 453 struct iovec *iov=iovstack, *vector; 454 ssize_t ret; 455 int seg; 456 io_fn_t fn; 457 iov_fn_t fnv; 458 459 /* 460 * SuS says "The readv() function *may* fail if the iovcnt argument 461 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 462 * traditionally returned zero for zero segments, so... 463 */ 464 ret = 0; 465 if (nr_segs == 0) 466 goto out; 467 468 /* 469 * First get the "struct iovec" from user memory and 470 * verify all the pointers 471 */ 472 ret = -EINVAL; 473 if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) 474 goto out; 475 if (!file->f_op) 476 goto out; 477 if (nr_segs > UIO_FASTIOV) { 478 ret = -ENOMEM; 479 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 480 if (!iov) 481 goto out; 482 } 483 ret = -EFAULT; 484 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) 485 goto out; 486 487 /* 488 * Single unix specification: 489 * We should -EINVAL if an element length is not >= 0 and fitting an 490 * ssize_t. The total length is fitting an ssize_t 491 * 492 * Be careful here because iov_len is a size_t not an ssize_t 493 */ 494 tot_len = 0; 495 ret = -EINVAL; 496 for (seg = 0; seg < nr_segs; seg++) { 497 void __user *buf = iov[seg].iov_base; 498 ssize_t len = (ssize_t)iov[seg].iov_len; 499 500 if (len < 0) /* size_t not fitting an ssize_t .. */ 501 goto out; 502 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) 503 goto Efault; 504 tot_len += len; 505 if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ 506 goto out; 507 } 508 if (tot_len == 0) { 509 ret = 0; 510 goto out; 511 } 512 513 ret = rw_verify_area(type, file, pos, tot_len); 514 if (ret < 0) 515 goto out; 516 ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE); 517 if (ret) 518 goto out; 519 520 fnv = NULL; 521 if (type == READ) { 522 fn = file->f_op->read; 523 fnv = file->f_op->readv; 524 } else { 525 fn = (io_fn_t)file->f_op->write; 526 fnv = file->f_op->writev; 527 } 528 if (fnv) { 529 ret = fnv(file, iov, nr_segs, pos); 530 goto out; 531 } 532 533 /* Do it by hand, with file-ops */ 534 ret = 0; 535 vector = iov; 536 while (nr_segs > 0) { 537 void __user * base; 538 size_t len; 539 ssize_t nr; 540 541 base = vector->iov_base; 542 len = vector->iov_len; 543 vector++; 544 nr_segs--; 545 546 nr = fn(file, base, len, pos); 547 548 if (nr < 0) { 549 if (!ret) ret = nr; 550 break; 551 } 552 ret += nr; 553 if (nr != len) 554 break; 555 } 556 out: 557 if (iov != iovstack) 558 kfree(iov); 559 if ((ret + (type == READ)) > 0) { 560 if (type == READ) 561 fsnotify_access(file->f_dentry); 562 else 563 fsnotify_modify(file->f_dentry); 564 } 565 return ret; 566 Efault: 567 ret = -EFAULT; 568 goto out; 569 } 570 571 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 572 unsigned long vlen, loff_t *pos) 573 { 574 if (!(file->f_mode & FMODE_READ)) 575 return -EBADF; 576 if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) 577 return -EINVAL; 578 579 return do_readv_writev(READ, file, vec, vlen, pos); 580 } 581 582 EXPORT_SYMBOL(vfs_readv); 583 584 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 585 unsigned long vlen, loff_t *pos) 586 { 587 if (!(file->f_mode & FMODE_WRITE)) 588 return -EBADF; 589 if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) 590 return -EINVAL; 591 592 return do_readv_writev(WRITE, file, vec, vlen, pos); 593 } 594 595 EXPORT_SYMBOL(vfs_writev); 596 597 asmlinkage ssize_t 598 sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) 599 { 600 struct file *file; 601 ssize_t ret = -EBADF; 602 int fput_needed; 603 604 file = fget_light(fd, &fput_needed); 605 if (file) { 606 loff_t pos = file_pos_read(file); 607 ret = vfs_readv(file, vec, vlen, &pos); 608 file_pos_write(file, pos); 609 fput_light(file, fput_needed); 610 } 611 612 if (ret > 0) 613 current->rchar += ret; 614 current->syscr++; 615 return ret; 616 } 617 618 asmlinkage ssize_t 619 sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) 620 { 621 struct file *file; 622 ssize_t ret = -EBADF; 623 int fput_needed; 624 625 file = fget_light(fd, &fput_needed); 626 if (file) { 627 loff_t pos = file_pos_read(file); 628 ret = vfs_writev(file, vec, vlen, &pos); 629 file_pos_write(file, pos); 630 fput_light(file, fput_needed); 631 } 632 633 if (ret > 0) 634 current->wchar += ret; 635 current->syscw++; 636 return ret; 637 } 638 639 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 640 size_t count, loff_t max) 641 { 642 struct file * in_file, * out_file; 643 struct inode * in_inode, * out_inode; 644 loff_t pos; 645 ssize_t retval; 646 int fput_needed_in, fput_needed_out; 647 648 /* 649 * Get input file, and verify that it is ok.. 650 */ 651 retval = -EBADF; 652 in_file = fget_light(in_fd, &fput_needed_in); 653 if (!in_file) 654 goto out; 655 if (!(in_file->f_mode & FMODE_READ)) 656 goto fput_in; 657 retval = -EINVAL; 658 in_inode = in_file->f_dentry->d_inode; 659 if (!in_inode) 660 goto fput_in; 661 if (!in_file->f_op || !in_file->f_op->sendfile) 662 goto fput_in; 663 retval = -ESPIPE; 664 if (!ppos) 665 ppos = &in_file->f_pos; 666 else 667 if (!(in_file->f_mode & FMODE_PREAD)) 668 goto fput_in; 669 retval = rw_verify_area(READ, in_file, ppos, count); 670 if (retval < 0) 671 goto fput_in; 672 count = retval; 673 674 retval = security_file_permission (in_file, MAY_READ); 675 if (retval) 676 goto fput_in; 677 678 /* 679 * Get output file, and verify that it is ok.. 680 */ 681 retval = -EBADF; 682 out_file = fget_light(out_fd, &fput_needed_out); 683 if (!out_file) 684 goto fput_in; 685 if (!(out_file->f_mode & FMODE_WRITE)) 686 goto fput_out; 687 retval = -EINVAL; 688 if (!out_file->f_op || !out_file->f_op->sendpage) 689 goto fput_out; 690 out_inode = out_file->f_dentry->d_inode; 691 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 692 if (retval < 0) 693 goto fput_out; 694 count = retval; 695 696 retval = security_file_permission (out_file, MAY_WRITE); 697 if (retval) 698 goto fput_out; 699 700 if (!max) 701 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 702 703 pos = *ppos; 704 retval = -EINVAL; 705 if (unlikely(pos < 0)) 706 goto fput_out; 707 if (unlikely(pos + count > max)) { 708 retval = -EOVERFLOW; 709 if (pos >= max) 710 goto fput_out; 711 count = max - pos; 712 } 713 714 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); 715 716 if (retval > 0) { 717 current->rchar += retval; 718 current->wchar += retval; 719 } 720 current->syscr++; 721 current->syscw++; 722 723 if (*ppos > max) 724 retval = -EOVERFLOW; 725 726 fput_out: 727 fput_light(out_file, fput_needed_out); 728 fput_in: 729 fput_light(in_file, fput_needed_in); 730 out: 731 return retval; 732 } 733 734 asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) 735 { 736 loff_t pos; 737 off_t off; 738 ssize_t ret; 739 740 if (offset) { 741 if (unlikely(get_user(off, offset))) 742 return -EFAULT; 743 pos = off; 744 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 745 if (unlikely(put_user(pos, offset))) 746 return -EFAULT; 747 return ret; 748 } 749 750 return do_sendfile(out_fd, in_fd, NULL, count, 0); 751 } 752 753 asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) 754 { 755 loff_t pos; 756 ssize_t ret; 757 758 if (offset) { 759 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 760 return -EFAULT; 761 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 762 if (unlikely(put_user(pos, offset))) 763 return -EFAULT; 764 return ret; 765 } 766 767 return do_sendfile(out_fd, in_fd, NULL, count, 0); 768 } 769