1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/smp_lock.h> 13 #include <linux/fsnotify.h> 14 #include <linux/security.h> 15 #include <linux/module.h> 16 #include <linux/syscalls.h> 17 18 #include <asm/uaccess.h> 19 #include <asm/unistd.h> 20 21 struct file_operations generic_ro_fops = { 22 .llseek = generic_file_llseek, 23 .read = generic_file_read, 24 .mmap = generic_file_readonly_mmap, 25 .sendfile = generic_file_sendfile, 26 }; 27 28 EXPORT_SYMBOL(generic_ro_fops); 29 30 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 31 { 32 long long retval; 33 struct inode *inode = file->f_mapping->host; 34 35 down(&inode->i_sem); 36 switch (origin) { 37 case 2: 38 offset += inode->i_size; 39 break; 40 case 1: 41 offset += file->f_pos; 42 } 43 retval = -EINVAL; 44 if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { 45 if (offset != file->f_pos) { 46 file->f_pos = offset; 47 file->f_version = 0; 48 } 49 retval = offset; 50 } 51 up(&inode->i_sem); 52 return retval; 53 } 54 55 EXPORT_SYMBOL(generic_file_llseek); 56 57 loff_t remote_llseek(struct file *file, loff_t offset, int origin) 58 { 59 long long retval; 60 61 lock_kernel(); 62 switch (origin) { 63 case 2: 64 offset += i_size_read(file->f_dentry->d_inode); 65 break; 66 case 1: 67 offset += file->f_pos; 68 } 69 retval = -EINVAL; 70 if (offset>=0 && offset<=file->f_dentry->d_inode->i_sb->s_maxbytes) { 71 if (offset != file->f_pos) { 72 file->f_pos = offset; 73 file->f_version = 0; 74 } 75 retval = offset; 76 } 77 unlock_kernel(); 78 return retval; 79 } 80 EXPORT_SYMBOL(remote_llseek); 81 82 loff_t no_llseek(struct file *file, loff_t offset, int origin) 83 { 84 return -ESPIPE; 85 } 86 EXPORT_SYMBOL(no_llseek); 87 88 loff_t default_llseek(struct file *file, loff_t offset, int origin) 89 { 90 long long retval; 91 92 lock_kernel(); 93 switch (origin) { 94 case 2: 95 offset += i_size_read(file->f_dentry->d_inode); 96 break; 97 case 1: 98 offset += file->f_pos; 99 } 100 retval = -EINVAL; 101 if (offset >= 0) { 102 if (offset != file->f_pos) { 103 file->f_pos = offset; 104 file->f_version = 0; 105 } 106 retval = offset; 107 } 108 unlock_kernel(); 109 return retval; 110 } 111 EXPORT_SYMBOL(default_llseek); 112 113 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 114 { 115 loff_t (*fn)(struct file *, loff_t, int); 116 117 fn = no_llseek; 118 if (file->f_mode & FMODE_LSEEK) { 119 fn = default_llseek; 120 if (file->f_op && file->f_op->llseek) 121 fn = file->f_op->llseek; 122 } 123 return fn(file, offset, origin); 124 } 125 EXPORT_SYMBOL(vfs_llseek); 126 127 asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) 128 { 129 off_t retval; 130 struct file * file; 131 int fput_needed; 132 133 retval = -EBADF; 134 file = fget_light(fd, &fput_needed); 135 if (!file) 136 goto bad; 137 138 retval = -EINVAL; 139 if (origin <= 2) { 140 loff_t res = vfs_llseek(file, offset, origin); 141 retval = res; 142 if (res != (loff_t)retval) 143 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 144 } 145 fput_light(file, fput_needed); 146 bad: 147 return retval; 148 } 149 150 #ifdef __ARCH_WANT_SYS_LLSEEK 151 asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, 152 unsigned long offset_low, loff_t __user * result, 153 unsigned int origin) 154 { 155 int retval; 156 struct file * file; 157 loff_t offset; 158 int fput_needed; 159 160 retval = -EBADF; 161 file = fget_light(fd, &fput_needed); 162 if (!file) 163 goto bad; 164 165 retval = -EINVAL; 166 if (origin > 2) 167 goto out_putf; 168 169 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 170 origin); 171 172 retval = (int)offset; 173 if (offset >= 0) { 174 retval = -EFAULT; 175 if (!copy_to_user(result, &offset, sizeof(offset))) 176 retval = 0; 177 } 178 out_putf: 179 fput_light(file, fput_needed); 180 bad: 181 return retval; 182 } 183 #endif 184 185 186 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 187 { 188 struct inode *inode; 189 loff_t pos; 190 191 if (unlikely(count > INT_MAX)) 192 goto Einval; 193 pos = *ppos; 194 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 195 goto Einval; 196 197 inode = file->f_dentry->d_inode; 198 if (inode->i_flock && MANDATORY_LOCK(inode)) 199 return locks_mandatory_area(read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, pos, count); 200 return 0; 201 202 Einval: 203 return -EINVAL; 204 } 205 206 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 207 { 208 set_current_state(TASK_UNINTERRUPTIBLE); 209 if (!kiocbIsKicked(iocb)) 210 schedule(); 211 else 212 kiocbClearKicked(iocb); 213 __set_current_state(TASK_RUNNING); 214 } 215 216 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 217 { 218 struct kiocb kiocb; 219 ssize_t ret; 220 221 init_sync_kiocb(&kiocb, filp); 222 kiocb.ki_pos = *ppos; 223 while (-EIOCBRETRY == 224 (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) 225 wait_on_retry_sync_kiocb(&kiocb); 226 227 if (-EIOCBQUEUED == ret) 228 ret = wait_on_sync_kiocb(&kiocb); 229 *ppos = kiocb.ki_pos; 230 return ret; 231 } 232 233 EXPORT_SYMBOL(do_sync_read); 234 235 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 236 { 237 ssize_t ret; 238 239 if (!(file->f_mode & FMODE_READ)) 240 return -EBADF; 241 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 242 return -EINVAL; 243 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 244 return -EFAULT; 245 246 ret = rw_verify_area(READ, file, pos, count); 247 if (!ret) { 248 ret = security_file_permission (file, MAY_READ); 249 if (!ret) { 250 if (file->f_op->read) 251 ret = file->f_op->read(file, buf, count, pos); 252 else 253 ret = do_sync_read(file, buf, count, pos); 254 if (ret > 0) { 255 fsnotify_access(file->f_dentry); 256 current->rchar += ret; 257 } 258 current->syscr++; 259 } 260 } 261 262 return ret; 263 } 264 265 EXPORT_SYMBOL(vfs_read); 266 267 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 268 { 269 struct kiocb kiocb; 270 ssize_t ret; 271 272 init_sync_kiocb(&kiocb, filp); 273 kiocb.ki_pos = *ppos; 274 while (-EIOCBRETRY == 275 (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) 276 wait_on_retry_sync_kiocb(&kiocb); 277 278 if (-EIOCBQUEUED == ret) 279 ret = wait_on_sync_kiocb(&kiocb); 280 *ppos = kiocb.ki_pos; 281 return ret; 282 } 283 284 EXPORT_SYMBOL(do_sync_write); 285 286 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 287 { 288 ssize_t ret; 289 290 if (!(file->f_mode & FMODE_WRITE)) 291 return -EBADF; 292 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 293 return -EINVAL; 294 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 295 return -EFAULT; 296 297 ret = rw_verify_area(WRITE, file, pos, count); 298 if (!ret) { 299 ret = security_file_permission (file, MAY_WRITE); 300 if (!ret) { 301 if (file->f_op->write) 302 ret = file->f_op->write(file, buf, count, pos); 303 else 304 ret = do_sync_write(file, buf, count, pos); 305 if (ret > 0) { 306 fsnotify_modify(file->f_dentry); 307 current->wchar += ret; 308 } 309 current->syscw++; 310 } 311 } 312 313 return ret; 314 } 315 316 EXPORT_SYMBOL(vfs_write); 317 318 static inline loff_t file_pos_read(struct file *file) 319 { 320 return file->f_pos; 321 } 322 323 static inline void file_pos_write(struct file *file, loff_t pos) 324 { 325 file->f_pos = pos; 326 } 327 328 asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) 329 { 330 struct file *file; 331 ssize_t ret = -EBADF; 332 int fput_needed; 333 334 file = fget_light(fd, &fput_needed); 335 if (file) { 336 loff_t pos = file_pos_read(file); 337 ret = vfs_read(file, buf, count, &pos); 338 file_pos_write(file, pos); 339 fput_light(file, fput_needed); 340 } 341 342 return ret; 343 } 344 EXPORT_SYMBOL_GPL(sys_read); 345 346 asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) 347 { 348 struct file *file; 349 ssize_t ret = -EBADF; 350 int fput_needed; 351 352 file = fget_light(fd, &fput_needed); 353 if (file) { 354 loff_t pos = file_pos_read(file); 355 ret = vfs_write(file, buf, count, &pos); 356 file_pos_write(file, pos); 357 fput_light(file, fput_needed); 358 } 359 360 return ret; 361 } 362 363 asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, 364 size_t count, loff_t pos) 365 { 366 struct file *file; 367 ssize_t ret = -EBADF; 368 int fput_needed; 369 370 if (pos < 0) 371 return -EINVAL; 372 373 file = fget_light(fd, &fput_needed); 374 if (file) { 375 ret = -ESPIPE; 376 if (file->f_mode & FMODE_PREAD) 377 ret = vfs_read(file, buf, count, &pos); 378 fput_light(file, fput_needed); 379 } 380 381 return ret; 382 } 383 384 asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, 385 size_t count, loff_t pos) 386 { 387 struct file *file; 388 ssize_t ret = -EBADF; 389 int fput_needed; 390 391 if (pos < 0) 392 return -EINVAL; 393 394 file = fget_light(fd, &fput_needed); 395 if (file) { 396 ret = -ESPIPE; 397 if (file->f_mode & FMODE_PWRITE) 398 ret = vfs_write(file, buf, count, &pos); 399 fput_light(file, fput_needed); 400 } 401 402 return ret; 403 } 404 405 /* 406 * Reduce an iovec's length in-place. Return the resulting number of segments 407 */ 408 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 409 { 410 unsigned long seg = 0; 411 size_t len = 0; 412 413 while (seg < nr_segs) { 414 seg++; 415 if (len + iov->iov_len >= to) { 416 iov->iov_len = to - len; 417 break; 418 } 419 len += iov->iov_len; 420 iov++; 421 } 422 return seg; 423 } 424 425 EXPORT_SYMBOL(iov_shorten); 426 427 /* A write operation does a read from user space and vice versa */ 428 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 429 430 static ssize_t do_readv_writev(int type, struct file *file, 431 const struct iovec __user * uvector, 432 unsigned long nr_segs, loff_t *pos) 433 { 434 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 435 typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); 436 437 size_t tot_len; 438 struct iovec iovstack[UIO_FASTIOV]; 439 struct iovec *iov=iovstack, *vector; 440 ssize_t ret; 441 int seg; 442 io_fn_t fn; 443 iov_fn_t fnv; 444 445 /* 446 * SuS says "The readv() function *may* fail if the iovcnt argument 447 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 448 * traditionally returned zero for zero segments, so... 449 */ 450 ret = 0; 451 if (nr_segs == 0) 452 goto out; 453 454 /* 455 * First get the "struct iovec" from user memory and 456 * verify all the pointers 457 */ 458 ret = -EINVAL; 459 if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) 460 goto out; 461 if (!file->f_op) 462 goto out; 463 if (nr_segs > UIO_FASTIOV) { 464 ret = -ENOMEM; 465 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 466 if (!iov) 467 goto out; 468 } 469 ret = -EFAULT; 470 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) 471 goto out; 472 473 /* 474 * Single unix specification: 475 * We should -EINVAL if an element length is not >= 0 and fitting an 476 * ssize_t. The total length is fitting an ssize_t 477 * 478 * Be careful here because iov_len is a size_t not an ssize_t 479 */ 480 tot_len = 0; 481 ret = -EINVAL; 482 for (seg = 0; seg < nr_segs; seg++) { 483 void __user *buf = iov[seg].iov_base; 484 ssize_t len = (ssize_t)iov[seg].iov_len; 485 486 if (len < 0) /* size_t not fitting an ssize_t .. */ 487 goto out; 488 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) 489 goto Efault; 490 tot_len += len; 491 if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ 492 goto out; 493 } 494 if (tot_len == 0) { 495 ret = 0; 496 goto out; 497 } 498 499 ret = rw_verify_area(type, file, pos, tot_len); 500 if (ret) 501 goto out; 502 ret = security_file_permission(file, type == READ ? MAY_READ : MAY_WRITE); 503 if (ret) 504 goto out; 505 506 fnv = NULL; 507 if (type == READ) { 508 fn = file->f_op->read; 509 fnv = file->f_op->readv; 510 } else { 511 fn = (io_fn_t)file->f_op->write; 512 fnv = file->f_op->writev; 513 } 514 if (fnv) { 515 ret = fnv(file, iov, nr_segs, pos); 516 goto out; 517 } 518 519 /* Do it by hand, with file-ops */ 520 ret = 0; 521 vector = iov; 522 while (nr_segs > 0) { 523 void __user * base; 524 size_t len; 525 ssize_t nr; 526 527 base = vector->iov_base; 528 len = vector->iov_len; 529 vector++; 530 nr_segs--; 531 532 nr = fn(file, base, len, pos); 533 534 if (nr < 0) { 535 if (!ret) ret = nr; 536 break; 537 } 538 ret += nr; 539 if (nr != len) 540 break; 541 } 542 out: 543 if (iov != iovstack) 544 kfree(iov); 545 if ((ret + (type == READ)) > 0) { 546 if (type == READ) 547 fsnotify_access(file->f_dentry); 548 else 549 fsnotify_modify(file->f_dentry); 550 } 551 return ret; 552 Efault: 553 ret = -EFAULT; 554 goto out; 555 } 556 557 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 558 unsigned long vlen, loff_t *pos) 559 { 560 if (!(file->f_mode & FMODE_READ)) 561 return -EBADF; 562 if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) 563 return -EINVAL; 564 565 return do_readv_writev(READ, file, vec, vlen, pos); 566 } 567 568 EXPORT_SYMBOL(vfs_readv); 569 570 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 571 unsigned long vlen, loff_t *pos) 572 { 573 if (!(file->f_mode & FMODE_WRITE)) 574 return -EBADF; 575 if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) 576 return -EINVAL; 577 578 return do_readv_writev(WRITE, file, vec, vlen, pos); 579 } 580 581 EXPORT_SYMBOL(vfs_writev); 582 583 asmlinkage ssize_t 584 sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) 585 { 586 struct file *file; 587 ssize_t ret = -EBADF; 588 int fput_needed; 589 590 file = fget_light(fd, &fput_needed); 591 if (file) { 592 loff_t pos = file_pos_read(file); 593 ret = vfs_readv(file, vec, vlen, &pos); 594 file_pos_write(file, pos); 595 fput_light(file, fput_needed); 596 } 597 598 if (ret > 0) 599 current->rchar += ret; 600 current->syscr++; 601 return ret; 602 } 603 604 asmlinkage ssize_t 605 sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) 606 { 607 struct file *file; 608 ssize_t ret = -EBADF; 609 int fput_needed; 610 611 file = fget_light(fd, &fput_needed); 612 if (file) { 613 loff_t pos = file_pos_read(file); 614 ret = vfs_writev(file, vec, vlen, &pos); 615 file_pos_write(file, pos); 616 fput_light(file, fput_needed); 617 } 618 619 if (ret > 0) 620 current->wchar += ret; 621 current->syscw++; 622 return ret; 623 } 624 625 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 626 size_t count, loff_t max) 627 { 628 struct file * in_file, * out_file; 629 struct inode * in_inode, * out_inode; 630 loff_t pos; 631 ssize_t retval; 632 int fput_needed_in, fput_needed_out; 633 634 /* 635 * Get input file, and verify that it is ok.. 636 */ 637 retval = -EBADF; 638 in_file = fget_light(in_fd, &fput_needed_in); 639 if (!in_file) 640 goto out; 641 if (!(in_file->f_mode & FMODE_READ)) 642 goto fput_in; 643 retval = -EINVAL; 644 in_inode = in_file->f_dentry->d_inode; 645 if (!in_inode) 646 goto fput_in; 647 if (!in_file->f_op || !in_file->f_op->sendfile) 648 goto fput_in; 649 retval = -ESPIPE; 650 if (!ppos) 651 ppos = &in_file->f_pos; 652 else 653 if (!(in_file->f_mode & FMODE_PREAD)) 654 goto fput_in; 655 retval = rw_verify_area(READ, in_file, ppos, count); 656 if (retval) 657 goto fput_in; 658 659 retval = security_file_permission (in_file, MAY_READ); 660 if (retval) 661 goto fput_in; 662 663 /* 664 * Get output file, and verify that it is ok.. 665 */ 666 retval = -EBADF; 667 out_file = fget_light(out_fd, &fput_needed_out); 668 if (!out_file) 669 goto fput_in; 670 if (!(out_file->f_mode & FMODE_WRITE)) 671 goto fput_out; 672 retval = -EINVAL; 673 if (!out_file->f_op || !out_file->f_op->sendpage) 674 goto fput_out; 675 out_inode = out_file->f_dentry->d_inode; 676 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 677 if (retval) 678 goto fput_out; 679 680 retval = security_file_permission (out_file, MAY_WRITE); 681 if (retval) 682 goto fput_out; 683 684 if (!max) 685 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 686 687 pos = *ppos; 688 retval = -EINVAL; 689 if (unlikely(pos < 0)) 690 goto fput_out; 691 if (unlikely(pos + count > max)) { 692 retval = -EOVERFLOW; 693 if (pos >= max) 694 goto fput_out; 695 count = max - pos; 696 } 697 698 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); 699 700 if (retval > 0) { 701 current->rchar += retval; 702 current->wchar += retval; 703 } 704 current->syscr++; 705 current->syscw++; 706 707 if (*ppos > max) 708 retval = -EOVERFLOW; 709 710 fput_out: 711 fput_light(out_file, fput_needed_out); 712 fput_in: 713 fput_light(in_file, fput_needed_in); 714 out: 715 return retval; 716 } 717 718 asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) 719 { 720 loff_t pos; 721 off_t off; 722 ssize_t ret; 723 724 if (offset) { 725 if (unlikely(get_user(off, offset))) 726 return -EFAULT; 727 pos = off; 728 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 729 if (unlikely(put_user(pos, offset))) 730 return -EFAULT; 731 return ret; 732 } 733 734 return do_sendfile(out_fd, in_fd, NULL, count, 0); 735 } 736 737 asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) 738 { 739 loff_t pos; 740 ssize_t ret; 741 742 if (offset) { 743 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 744 return -EFAULT; 745 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 746 if (unlikely(put_user(pos, offset))) 747 return -EFAULT; 748 return ret; 749 } 750 751 return do_sendfile(out_fd, in_fd, NULL, count, 0); 752 } 753