1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/fsnotify.h> 13 #include <linux/security.h> 14 #include <linux/export.h> 15 #include <linux/syscalls.h> 16 #include <linux/pagemap.h> 17 #include <linux/splice.h> 18 #include <linux/compat.h> 19 #include <linux/mount.h> 20 #include <linux/fs.h> 21 #include "internal.h" 22 23 #include <asm/uaccess.h> 24 #include <asm/unistd.h> 25 26 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 27 typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); 28 29 const struct file_operations generic_ro_fops = { 30 .llseek = generic_file_llseek, 31 .read_iter = generic_file_read_iter, 32 .mmap = generic_file_readonly_mmap, 33 .splice_read = generic_file_splice_read, 34 }; 35 36 EXPORT_SYMBOL(generic_ro_fops); 37 38 static inline int unsigned_offsets(struct file *file) 39 { 40 return file->f_mode & FMODE_UNSIGNED_OFFSET; 41 } 42 43 /** 44 * vfs_setpos - update the file offset for lseek 45 * @file: file structure in question 46 * @offset: file offset to seek to 47 * @maxsize: maximum file size 48 * 49 * This is a low-level filesystem helper for updating the file offset to 50 * the value specified by @offset if the given offset is valid and it is 51 * not equal to the current file offset. 52 * 53 * Return the specified offset on success and -EINVAL on invalid offset. 54 */ 55 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) 56 { 57 if (offset < 0 && !unsigned_offsets(file)) 58 return -EINVAL; 59 if (offset > maxsize) 60 return -EINVAL; 61 62 if (offset != file->f_pos) { 63 file->f_pos = offset; 64 file->f_version = 0; 65 } 66 return offset; 67 } 68 EXPORT_SYMBOL(vfs_setpos); 69 70 /** 71 * generic_file_llseek_size - generic llseek implementation for regular files 72 * @file: file structure to seek on 73 * @offset: file offset to seek to 74 * @whence: type of seek 75 * @size: max size of this file in file system 76 * @eof: offset used for SEEK_END position 77 * 78 * This is a variant of generic_file_llseek that allows passing in a custom 79 * maximum file size and a custom EOF position, for e.g. hashed directories 80 * 81 * Synchronization: 82 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 83 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 84 * read/writes behave like SEEK_SET against seeks. 85 */ 86 loff_t 87 generic_file_llseek_size(struct file *file, loff_t offset, int whence, 88 loff_t maxsize, loff_t eof) 89 { 90 switch (whence) { 91 case SEEK_END: 92 offset += eof; 93 break; 94 case SEEK_CUR: 95 /* 96 * Here we special-case the lseek(fd, 0, SEEK_CUR) 97 * position-querying operation. Avoid rewriting the "same" 98 * f_pos value back to the file because a concurrent read(), 99 * write() or lseek() might have altered it 100 */ 101 if (offset == 0) 102 return file->f_pos; 103 /* 104 * f_lock protects against read/modify/write race with other 105 * SEEK_CURs. Note that parallel writes and reads behave 106 * like SEEK_SET. 107 */ 108 spin_lock(&file->f_lock); 109 offset = vfs_setpos(file, file->f_pos + offset, maxsize); 110 spin_unlock(&file->f_lock); 111 return offset; 112 case SEEK_DATA: 113 /* 114 * In the generic case the entire file is data, so as long as 115 * offset isn't at the end of the file then the offset is data. 116 */ 117 if (offset >= eof) 118 return -ENXIO; 119 break; 120 case SEEK_HOLE: 121 /* 122 * There is a virtual hole at the end of the file, so as long as 123 * offset isn't i_size or larger, return i_size. 124 */ 125 if (offset >= eof) 126 return -ENXIO; 127 offset = eof; 128 break; 129 } 130 131 return vfs_setpos(file, offset, maxsize); 132 } 133 EXPORT_SYMBOL(generic_file_llseek_size); 134 135 /** 136 * generic_file_llseek - generic llseek implementation for regular files 137 * @file: file structure to seek on 138 * @offset: file offset to seek to 139 * @whence: type of seek 140 * 141 * This is a generic implemenation of ->llseek useable for all normal local 142 * filesystems. It just updates the file offset to the value specified by 143 * @offset and @whence. 144 */ 145 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) 146 { 147 struct inode *inode = file->f_mapping->host; 148 149 return generic_file_llseek_size(file, offset, whence, 150 inode->i_sb->s_maxbytes, 151 i_size_read(inode)); 152 } 153 EXPORT_SYMBOL(generic_file_llseek); 154 155 /** 156 * fixed_size_llseek - llseek implementation for fixed-sized devices 157 * @file: file structure to seek on 158 * @offset: file offset to seek to 159 * @whence: type of seek 160 * @size: size of the file 161 * 162 */ 163 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) 164 { 165 switch (whence) { 166 case SEEK_SET: case SEEK_CUR: case SEEK_END: 167 return generic_file_llseek_size(file, offset, whence, 168 size, size); 169 default: 170 return -EINVAL; 171 } 172 } 173 EXPORT_SYMBOL(fixed_size_llseek); 174 175 /** 176 * no_seek_end_llseek - llseek implementation for fixed-sized devices 177 * @file: file structure to seek on 178 * @offset: file offset to seek to 179 * @whence: type of seek 180 * 181 */ 182 loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) 183 { 184 switch (whence) { 185 case SEEK_SET: case SEEK_CUR: 186 return generic_file_llseek_size(file, offset, whence, 187 OFFSET_MAX, 0); 188 default: 189 return -EINVAL; 190 } 191 } 192 EXPORT_SYMBOL(no_seek_end_llseek); 193 194 /** 195 * no_seek_end_llseek_size - llseek implementation for fixed-sized devices 196 * @file: file structure to seek on 197 * @offset: file offset to seek to 198 * @whence: type of seek 199 * @size: maximal offset allowed 200 * 201 */ 202 loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size) 203 { 204 switch (whence) { 205 case SEEK_SET: case SEEK_CUR: 206 return generic_file_llseek_size(file, offset, whence, 207 size, 0); 208 default: 209 return -EINVAL; 210 } 211 } 212 EXPORT_SYMBOL(no_seek_end_llseek_size); 213 214 /** 215 * noop_llseek - No Operation Performed llseek implementation 216 * @file: file structure to seek on 217 * @offset: file offset to seek to 218 * @whence: type of seek 219 * 220 * This is an implementation of ->llseek useable for the rare special case when 221 * userspace expects the seek to succeed but the (device) file is actually not 222 * able to perform the seek. In this case you use noop_llseek() instead of 223 * falling back to the default implementation of ->llseek. 224 */ 225 loff_t noop_llseek(struct file *file, loff_t offset, int whence) 226 { 227 return file->f_pos; 228 } 229 EXPORT_SYMBOL(noop_llseek); 230 231 loff_t no_llseek(struct file *file, loff_t offset, int whence) 232 { 233 return -ESPIPE; 234 } 235 EXPORT_SYMBOL(no_llseek); 236 237 loff_t default_llseek(struct file *file, loff_t offset, int whence) 238 { 239 struct inode *inode = file_inode(file); 240 loff_t retval; 241 242 inode_lock(inode); 243 switch (whence) { 244 case SEEK_END: 245 offset += i_size_read(inode); 246 break; 247 case SEEK_CUR: 248 if (offset == 0) { 249 retval = file->f_pos; 250 goto out; 251 } 252 offset += file->f_pos; 253 break; 254 case SEEK_DATA: 255 /* 256 * In the generic case the entire file is data, so as 257 * long as offset isn't at the end of the file then the 258 * offset is data. 259 */ 260 if (offset >= inode->i_size) { 261 retval = -ENXIO; 262 goto out; 263 } 264 break; 265 case SEEK_HOLE: 266 /* 267 * There is a virtual hole at the end of the file, so 268 * as long as offset isn't i_size or larger, return 269 * i_size. 270 */ 271 if (offset >= inode->i_size) { 272 retval = -ENXIO; 273 goto out; 274 } 275 offset = inode->i_size; 276 break; 277 } 278 retval = -EINVAL; 279 if (offset >= 0 || unsigned_offsets(file)) { 280 if (offset != file->f_pos) { 281 file->f_pos = offset; 282 file->f_version = 0; 283 } 284 retval = offset; 285 } 286 out: 287 inode_unlock(inode); 288 return retval; 289 } 290 EXPORT_SYMBOL(default_llseek); 291 292 loff_t vfs_llseek(struct file *file, loff_t offset, int whence) 293 { 294 loff_t (*fn)(struct file *, loff_t, int); 295 296 fn = no_llseek; 297 if (file->f_mode & FMODE_LSEEK) { 298 if (file->f_op->llseek) 299 fn = file->f_op->llseek; 300 } 301 return fn(file, offset, whence); 302 } 303 EXPORT_SYMBOL(vfs_llseek); 304 305 static inline struct fd fdget_pos(int fd) 306 { 307 return __to_fd(__fdget_pos(fd)); 308 } 309 310 static inline void fdput_pos(struct fd f) 311 { 312 if (f.flags & FDPUT_POS_UNLOCK) 313 mutex_unlock(&f.file->f_pos_lock); 314 fdput(f); 315 } 316 317 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 318 { 319 off_t retval; 320 struct fd f = fdget_pos(fd); 321 if (!f.file) 322 return -EBADF; 323 324 retval = -EINVAL; 325 if (whence <= SEEK_MAX) { 326 loff_t res = vfs_llseek(f.file, offset, whence); 327 retval = res; 328 if (res != (loff_t)retval) 329 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 330 } 331 fdput_pos(f); 332 return retval; 333 } 334 335 #ifdef CONFIG_COMPAT 336 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) 337 { 338 return sys_lseek(fd, offset, whence); 339 } 340 #endif 341 342 #ifdef __ARCH_WANT_SYS_LLSEEK 343 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 344 unsigned long, offset_low, loff_t __user *, result, 345 unsigned int, whence) 346 { 347 int retval; 348 struct fd f = fdget_pos(fd); 349 loff_t offset; 350 351 if (!f.file) 352 return -EBADF; 353 354 retval = -EINVAL; 355 if (whence > SEEK_MAX) 356 goto out_putf; 357 358 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 359 whence); 360 361 retval = (int)offset; 362 if (offset >= 0) { 363 retval = -EFAULT; 364 if (!copy_to_user(result, &offset, sizeof(offset))) 365 retval = 0; 366 } 367 out_putf: 368 fdput_pos(f); 369 return retval; 370 } 371 #endif 372 373 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos) 374 { 375 struct kiocb kiocb; 376 ssize_t ret; 377 378 if (!file->f_op->read_iter) 379 return -EINVAL; 380 381 init_sync_kiocb(&kiocb, file); 382 kiocb.ki_pos = *ppos; 383 384 iter->type |= READ; 385 ret = file->f_op->read_iter(&kiocb, iter); 386 BUG_ON(ret == -EIOCBQUEUED); 387 if (ret > 0) 388 *ppos = kiocb.ki_pos; 389 return ret; 390 } 391 EXPORT_SYMBOL(vfs_iter_read); 392 393 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos) 394 { 395 struct kiocb kiocb; 396 ssize_t ret; 397 398 if (!file->f_op->write_iter) 399 return -EINVAL; 400 401 init_sync_kiocb(&kiocb, file); 402 kiocb.ki_pos = *ppos; 403 404 iter->type |= WRITE; 405 ret = file->f_op->write_iter(&kiocb, iter); 406 BUG_ON(ret == -EIOCBQUEUED); 407 if (ret > 0) 408 *ppos = kiocb.ki_pos; 409 return ret; 410 } 411 EXPORT_SYMBOL(vfs_iter_write); 412 413 /* 414 * rw_verify_area doesn't like huge counts. We limit 415 * them to something that fits in "int" so that others 416 * won't have to do range checks all the time. 417 */ 418 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) 419 { 420 struct inode *inode; 421 loff_t pos; 422 int retval = -EINVAL; 423 424 inode = file_inode(file); 425 if (unlikely((ssize_t) count < 0)) 426 return retval; 427 pos = *ppos; 428 if (unlikely(pos < 0)) { 429 if (!unsigned_offsets(file)) 430 return retval; 431 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 432 return -EOVERFLOW; 433 } else if (unlikely((loff_t) (pos + count) < 0)) { 434 if (!unsigned_offsets(file)) 435 return retval; 436 } 437 438 if (unlikely(inode->i_flctx && mandatory_lock(inode))) { 439 retval = locks_mandatory_area(inode, file, pos, pos + count - 1, 440 read_write == READ ? F_RDLCK : F_WRLCK); 441 if (retval < 0) 442 return retval; 443 } 444 retval = security_file_permission(file, 445 read_write == READ ? MAY_READ : MAY_WRITE); 446 if (retval) 447 return retval; 448 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 449 } 450 451 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 452 { 453 struct iovec iov = { .iov_base = buf, .iov_len = len }; 454 struct kiocb kiocb; 455 struct iov_iter iter; 456 ssize_t ret; 457 458 init_sync_kiocb(&kiocb, filp); 459 kiocb.ki_pos = *ppos; 460 iov_iter_init(&iter, READ, &iov, 1, len); 461 462 ret = filp->f_op->read_iter(&kiocb, &iter); 463 BUG_ON(ret == -EIOCBQUEUED); 464 *ppos = kiocb.ki_pos; 465 return ret; 466 } 467 468 ssize_t __vfs_read(struct file *file, char __user *buf, size_t count, 469 loff_t *pos) 470 { 471 if (file->f_op->read) 472 return file->f_op->read(file, buf, count, pos); 473 else if (file->f_op->read_iter) 474 return new_sync_read(file, buf, count, pos); 475 else 476 return -EINVAL; 477 } 478 EXPORT_SYMBOL(__vfs_read); 479 480 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 481 { 482 ssize_t ret; 483 484 if (!(file->f_mode & FMODE_READ)) 485 return -EBADF; 486 if (!(file->f_mode & FMODE_CAN_READ)) 487 return -EINVAL; 488 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 489 return -EFAULT; 490 491 ret = rw_verify_area(READ, file, pos, count); 492 if (ret >= 0) { 493 count = ret; 494 ret = __vfs_read(file, buf, count, pos); 495 if (ret > 0) { 496 fsnotify_access(file); 497 add_rchar(current, ret); 498 } 499 inc_syscr(current); 500 } 501 502 return ret; 503 } 504 505 EXPORT_SYMBOL(vfs_read); 506 507 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 508 { 509 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 510 struct kiocb kiocb; 511 struct iov_iter iter; 512 ssize_t ret; 513 514 init_sync_kiocb(&kiocb, filp); 515 kiocb.ki_pos = *ppos; 516 iov_iter_init(&iter, WRITE, &iov, 1, len); 517 518 ret = filp->f_op->write_iter(&kiocb, &iter); 519 BUG_ON(ret == -EIOCBQUEUED); 520 if (ret > 0) 521 *ppos = kiocb.ki_pos; 522 return ret; 523 } 524 525 ssize_t __vfs_write(struct file *file, const char __user *p, size_t count, 526 loff_t *pos) 527 { 528 if (file->f_op->write) 529 return file->f_op->write(file, p, count, pos); 530 else if (file->f_op->write_iter) 531 return new_sync_write(file, p, count, pos); 532 else 533 return -EINVAL; 534 } 535 EXPORT_SYMBOL(__vfs_write); 536 537 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) 538 { 539 mm_segment_t old_fs; 540 const char __user *p; 541 ssize_t ret; 542 543 if (!(file->f_mode & FMODE_CAN_WRITE)) 544 return -EINVAL; 545 546 old_fs = get_fs(); 547 set_fs(get_ds()); 548 p = (__force const char __user *)buf; 549 if (count > MAX_RW_COUNT) 550 count = MAX_RW_COUNT; 551 ret = __vfs_write(file, p, count, pos); 552 set_fs(old_fs); 553 if (ret > 0) { 554 fsnotify_modify(file); 555 add_wchar(current, ret); 556 } 557 inc_syscw(current); 558 return ret; 559 } 560 561 EXPORT_SYMBOL(__kernel_write); 562 563 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 564 { 565 ssize_t ret; 566 567 if (!(file->f_mode & FMODE_WRITE)) 568 return -EBADF; 569 if (!(file->f_mode & FMODE_CAN_WRITE)) 570 return -EINVAL; 571 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 572 return -EFAULT; 573 574 ret = rw_verify_area(WRITE, file, pos, count); 575 if (ret >= 0) { 576 count = ret; 577 file_start_write(file); 578 ret = __vfs_write(file, buf, count, pos); 579 if (ret > 0) { 580 fsnotify_modify(file); 581 add_wchar(current, ret); 582 } 583 inc_syscw(current); 584 file_end_write(file); 585 } 586 587 return ret; 588 } 589 590 EXPORT_SYMBOL(vfs_write); 591 592 static inline loff_t file_pos_read(struct file *file) 593 { 594 return file->f_pos; 595 } 596 597 static inline void file_pos_write(struct file *file, loff_t pos) 598 { 599 file->f_pos = pos; 600 } 601 602 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 603 { 604 struct fd f = fdget_pos(fd); 605 ssize_t ret = -EBADF; 606 607 if (f.file) { 608 loff_t pos = file_pos_read(f.file); 609 ret = vfs_read(f.file, buf, count, &pos); 610 if (ret >= 0) 611 file_pos_write(f.file, pos); 612 fdput_pos(f); 613 } 614 return ret; 615 } 616 617 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 618 size_t, count) 619 { 620 struct fd f = fdget_pos(fd); 621 ssize_t ret = -EBADF; 622 623 if (f.file) { 624 loff_t pos = file_pos_read(f.file); 625 ret = vfs_write(f.file, buf, count, &pos); 626 if (ret >= 0) 627 file_pos_write(f.file, pos); 628 fdput_pos(f); 629 } 630 631 return ret; 632 } 633 634 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, 635 size_t, count, loff_t, pos) 636 { 637 struct fd f; 638 ssize_t ret = -EBADF; 639 640 if (pos < 0) 641 return -EINVAL; 642 643 f = fdget(fd); 644 if (f.file) { 645 ret = -ESPIPE; 646 if (f.file->f_mode & FMODE_PREAD) 647 ret = vfs_read(f.file, buf, count, &pos); 648 fdput(f); 649 } 650 651 return ret; 652 } 653 654 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, 655 size_t, count, loff_t, pos) 656 { 657 struct fd f; 658 ssize_t ret = -EBADF; 659 660 if (pos < 0) 661 return -EINVAL; 662 663 f = fdget(fd); 664 if (f.file) { 665 ret = -ESPIPE; 666 if (f.file->f_mode & FMODE_PWRITE) 667 ret = vfs_write(f.file, buf, count, &pos); 668 fdput(f); 669 } 670 671 return ret; 672 } 673 674 /* 675 * Reduce an iovec's length in-place. Return the resulting number of segments 676 */ 677 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 678 { 679 unsigned long seg = 0; 680 size_t len = 0; 681 682 while (seg < nr_segs) { 683 seg++; 684 if (len + iov->iov_len >= to) { 685 iov->iov_len = to - len; 686 break; 687 } 688 len += iov->iov_len; 689 iov++; 690 } 691 return seg; 692 } 693 EXPORT_SYMBOL(iov_shorten); 694 695 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, 696 loff_t *ppos, iter_fn_t fn) 697 { 698 struct kiocb kiocb; 699 ssize_t ret; 700 701 init_sync_kiocb(&kiocb, filp); 702 kiocb.ki_pos = *ppos; 703 704 ret = fn(&kiocb, iter); 705 BUG_ON(ret == -EIOCBQUEUED); 706 *ppos = kiocb.ki_pos; 707 return ret; 708 } 709 710 /* Do it by hand, with file-ops */ 711 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, 712 loff_t *ppos, io_fn_t fn) 713 { 714 ssize_t ret = 0; 715 716 while (iov_iter_count(iter)) { 717 struct iovec iovec = iov_iter_iovec(iter); 718 ssize_t nr; 719 720 nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos); 721 722 if (nr < 0) { 723 if (!ret) 724 ret = nr; 725 break; 726 } 727 ret += nr; 728 if (nr != iovec.iov_len) 729 break; 730 iov_iter_advance(iter, nr); 731 } 732 733 return ret; 734 } 735 736 /* A write operation does a read from user space and vice versa */ 737 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 738 739 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 740 unsigned long nr_segs, unsigned long fast_segs, 741 struct iovec *fast_pointer, 742 struct iovec **ret_pointer) 743 { 744 unsigned long seg; 745 ssize_t ret; 746 struct iovec *iov = fast_pointer; 747 748 /* 749 * SuS says "The readv() function *may* fail if the iovcnt argument 750 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 751 * traditionally returned zero for zero segments, so... 752 */ 753 if (nr_segs == 0) { 754 ret = 0; 755 goto out; 756 } 757 758 /* 759 * First get the "struct iovec" from user memory and 760 * verify all the pointers 761 */ 762 if (nr_segs > UIO_MAXIOV) { 763 ret = -EINVAL; 764 goto out; 765 } 766 if (nr_segs > fast_segs) { 767 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 768 if (iov == NULL) { 769 ret = -ENOMEM; 770 goto out; 771 } 772 } 773 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 774 ret = -EFAULT; 775 goto out; 776 } 777 778 /* 779 * According to the Single Unix Specification we should return EINVAL 780 * if an element length is < 0 when cast to ssize_t or if the 781 * total length would overflow the ssize_t return value of the 782 * system call. 783 * 784 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 785 * overflow case. 786 */ 787 ret = 0; 788 for (seg = 0; seg < nr_segs; seg++) { 789 void __user *buf = iov[seg].iov_base; 790 ssize_t len = (ssize_t)iov[seg].iov_len; 791 792 /* see if we we're about to use an invalid len or if 793 * it's about to overflow ssize_t */ 794 if (len < 0) { 795 ret = -EINVAL; 796 goto out; 797 } 798 if (type >= 0 799 && unlikely(!access_ok(vrfy_dir(type), buf, len))) { 800 ret = -EFAULT; 801 goto out; 802 } 803 if (len > MAX_RW_COUNT - ret) { 804 len = MAX_RW_COUNT - ret; 805 iov[seg].iov_len = len; 806 } 807 ret += len; 808 } 809 out: 810 *ret_pointer = iov; 811 return ret; 812 } 813 814 static ssize_t do_readv_writev(int type, struct file *file, 815 const struct iovec __user * uvector, 816 unsigned long nr_segs, loff_t *pos) 817 { 818 size_t tot_len; 819 struct iovec iovstack[UIO_FASTIOV]; 820 struct iovec *iov = iovstack; 821 struct iov_iter iter; 822 ssize_t ret; 823 io_fn_t fn; 824 iter_fn_t iter_fn; 825 826 ret = import_iovec(type, uvector, nr_segs, 827 ARRAY_SIZE(iovstack), &iov, &iter); 828 if (ret < 0) 829 return ret; 830 831 tot_len = iov_iter_count(&iter); 832 if (!tot_len) 833 goto out; 834 ret = rw_verify_area(type, file, pos, tot_len); 835 if (ret < 0) 836 goto out; 837 838 if (type == READ) { 839 fn = file->f_op->read; 840 iter_fn = file->f_op->read_iter; 841 } else { 842 fn = (io_fn_t)file->f_op->write; 843 iter_fn = file->f_op->write_iter; 844 file_start_write(file); 845 } 846 847 if (iter_fn) 848 ret = do_iter_readv_writev(file, &iter, pos, iter_fn); 849 else 850 ret = do_loop_readv_writev(file, &iter, pos, fn); 851 852 if (type != READ) 853 file_end_write(file); 854 855 out: 856 kfree(iov); 857 if ((ret + (type == READ)) > 0) { 858 if (type == READ) 859 fsnotify_access(file); 860 else 861 fsnotify_modify(file); 862 } 863 return ret; 864 } 865 866 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 867 unsigned long vlen, loff_t *pos) 868 { 869 if (!(file->f_mode & FMODE_READ)) 870 return -EBADF; 871 if (!(file->f_mode & FMODE_CAN_READ)) 872 return -EINVAL; 873 874 return do_readv_writev(READ, file, vec, vlen, pos); 875 } 876 877 EXPORT_SYMBOL(vfs_readv); 878 879 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 880 unsigned long vlen, loff_t *pos) 881 { 882 if (!(file->f_mode & FMODE_WRITE)) 883 return -EBADF; 884 if (!(file->f_mode & FMODE_CAN_WRITE)) 885 return -EINVAL; 886 887 return do_readv_writev(WRITE, file, vec, vlen, pos); 888 } 889 890 EXPORT_SYMBOL(vfs_writev); 891 892 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 893 unsigned long, vlen) 894 { 895 struct fd f = fdget_pos(fd); 896 ssize_t ret = -EBADF; 897 898 if (f.file) { 899 loff_t pos = file_pos_read(f.file); 900 ret = vfs_readv(f.file, vec, vlen, &pos); 901 if (ret >= 0) 902 file_pos_write(f.file, pos); 903 fdput_pos(f); 904 } 905 906 if (ret > 0) 907 add_rchar(current, ret); 908 inc_syscr(current); 909 return ret; 910 } 911 912 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 913 unsigned long, vlen) 914 { 915 struct fd f = fdget_pos(fd); 916 ssize_t ret = -EBADF; 917 918 if (f.file) { 919 loff_t pos = file_pos_read(f.file); 920 ret = vfs_writev(f.file, vec, vlen, &pos); 921 if (ret >= 0) 922 file_pos_write(f.file, pos); 923 fdput_pos(f); 924 } 925 926 if (ret > 0) 927 add_wchar(current, ret); 928 inc_syscw(current); 929 return ret; 930 } 931 932 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 933 { 934 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 935 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 936 } 937 938 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 939 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 940 { 941 loff_t pos = pos_from_hilo(pos_h, pos_l); 942 struct fd f; 943 ssize_t ret = -EBADF; 944 945 if (pos < 0) 946 return -EINVAL; 947 948 f = fdget(fd); 949 if (f.file) { 950 ret = -ESPIPE; 951 if (f.file->f_mode & FMODE_PREAD) 952 ret = vfs_readv(f.file, vec, vlen, &pos); 953 fdput(f); 954 } 955 956 if (ret > 0) 957 add_rchar(current, ret); 958 inc_syscr(current); 959 return ret; 960 } 961 962 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 963 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 964 { 965 loff_t pos = pos_from_hilo(pos_h, pos_l); 966 struct fd f; 967 ssize_t ret = -EBADF; 968 969 if (pos < 0) 970 return -EINVAL; 971 972 f = fdget(fd); 973 if (f.file) { 974 ret = -ESPIPE; 975 if (f.file->f_mode & FMODE_PWRITE) 976 ret = vfs_writev(f.file, vec, vlen, &pos); 977 fdput(f); 978 } 979 980 if (ret > 0) 981 add_wchar(current, ret); 982 inc_syscw(current); 983 return ret; 984 } 985 986 #ifdef CONFIG_COMPAT 987 988 static ssize_t compat_do_readv_writev(int type, struct file *file, 989 const struct compat_iovec __user *uvector, 990 unsigned long nr_segs, loff_t *pos) 991 { 992 compat_ssize_t tot_len; 993 struct iovec iovstack[UIO_FASTIOV]; 994 struct iovec *iov = iovstack; 995 struct iov_iter iter; 996 ssize_t ret; 997 io_fn_t fn; 998 iter_fn_t iter_fn; 999 1000 ret = compat_import_iovec(type, uvector, nr_segs, 1001 UIO_FASTIOV, &iov, &iter); 1002 if (ret < 0) 1003 return ret; 1004 1005 tot_len = iov_iter_count(&iter); 1006 if (!tot_len) 1007 goto out; 1008 ret = rw_verify_area(type, file, pos, tot_len); 1009 if (ret < 0) 1010 goto out; 1011 1012 if (type == READ) { 1013 fn = file->f_op->read; 1014 iter_fn = file->f_op->read_iter; 1015 } else { 1016 fn = (io_fn_t)file->f_op->write; 1017 iter_fn = file->f_op->write_iter; 1018 file_start_write(file); 1019 } 1020 1021 if (iter_fn) 1022 ret = do_iter_readv_writev(file, &iter, pos, iter_fn); 1023 else 1024 ret = do_loop_readv_writev(file, &iter, pos, fn); 1025 1026 if (type != READ) 1027 file_end_write(file); 1028 1029 out: 1030 kfree(iov); 1031 if ((ret + (type == READ)) > 0) { 1032 if (type == READ) 1033 fsnotify_access(file); 1034 else 1035 fsnotify_modify(file); 1036 } 1037 return ret; 1038 } 1039 1040 static size_t compat_readv(struct file *file, 1041 const struct compat_iovec __user *vec, 1042 unsigned long vlen, loff_t *pos) 1043 { 1044 ssize_t ret = -EBADF; 1045 1046 if (!(file->f_mode & FMODE_READ)) 1047 goto out; 1048 1049 ret = -EINVAL; 1050 if (!(file->f_mode & FMODE_CAN_READ)) 1051 goto out; 1052 1053 ret = compat_do_readv_writev(READ, file, vec, vlen, pos); 1054 1055 out: 1056 if (ret > 0) 1057 add_rchar(current, ret); 1058 inc_syscr(current); 1059 return ret; 1060 } 1061 1062 COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd, 1063 const struct compat_iovec __user *,vec, 1064 compat_ulong_t, vlen) 1065 { 1066 struct fd f = fdget_pos(fd); 1067 ssize_t ret; 1068 loff_t pos; 1069 1070 if (!f.file) 1071 return -EBADF; 1072 pos = f.file->f_pos; 1073 ret = compat_readv(f.file, vec, vlen, &pos); 1074 if (ret >= 0) 1075 f.file->f_pos = pos; 1076 fdput_pos(f); 1077 return ret; 1078 } 1079 1080 static long __compat_sys_preadv64(unsigned long fd, 1081 const struct compat_iovec __user *vec, 1082 unsigned long vlen, loff_t pos) 1083 { 1084 struct fd f; 1085 ssize_t ret; 1086 1087 if (pos < 0) 1088 return -EINVAL; 1089 f = fdget(fd); 1090 if (!f.file) 1091 return -EBADF; 1092 ret = -ESPIPE; 1093 if (f.file->f_mode & FMODE_PREAD) 1094 ret = compat_readv(f.file, vec, vlen, &pos); 1095 fdput(f); 1096 return ret; 1097 } 1098 1099 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 1100 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, 1101 const struct compat_iovec __user *,vec, 1102 unsigned long, vlen, loff_t, pos) 1103 { 1104 return __compat_sys_preadv64(fd, vec, vlen, pos); 1105 } 1106 #endif 1107 1108 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, 1109 const struct compat_iovec __user *,vec, 1110 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1111 { 1112 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1113 1114 return __compat_sys_preadv64(fd, vec, vlen, pos); 1115 } 1116 1117 static size_t compat_writev(struct file *file, 1118 const struct compat_iovec __user *vec, 1119 unsigned long vlen, loff_t *pos) 1120 { 1121 ssize_t ret = -EBADF; 1122 1123 if (!(file->f_mode & FMODE_WRITE)) 1124 goto out; 1125 1126 ret = -EINVAL; 1127 if (!(file->f_mode & FMODE_CAN_WRITE)) 1128 goto out; 1129 1130 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); 1131 1132 out: 1133 if (ret > 0) 1134 add_wchar(current, ret); 1135 inc_syscw(current); 1136 return ret; 1137 } 1138 1139 COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd, 1140 const struct compat_iovec __user *, vec, 1141 compat_ulong_t, vlen) 1142 { 1143 struct fd f = fdget_pos(fd); 1144 ssize_t ret; 1145 loff_t pos; 1146 1147 if (!f.file) 1148 return -EBADF; 1149 pos = f.file->f_pos; 1150 ret = compat_writev(f.file, vec, vlen, &pos); 1151 if (ret >= 0) 1152 f.file->f_pos = pos; 1153 fdput_pos(f); 1154 return ret; 1155 } 1156 1157 static long __compat_sys_pwritev64(unsigned long fd, 1158 const struct compat_iovec __user *vec, 1159 unsigned long vlen, loff_t pos) 1160 { 1161 struct fd f; 1162 ssize_t ret; 1163 1164 if (pos < 0) 1165 return -EINVAL; 1166 f = fdget(fd); 1167 if (!f.file) 1168 return -EBADF; 1169 ret = -ESPIPE; 1170 if (f.file->f_mode & FMODE_PWRITE) 1171 ret = compat_writev(f.file, vec, vlen, &pos); 1172 fdput(f); 1173 return ret; 1174 } 1175 1176 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 1177 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, 1178 const struct compat_iovec __user *,vec, 1179 unsigned long, vlen, loff_t, pos) 1180 { 1181 return __compat_sys_pwritev64(fd, vec, vlen, pos); 1182 } 1183 #endif 1184 1185 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, 1186 const struct compat_iovec __user *,vec, 1187 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1188 { 1189 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1190 1191 return __compat_sys_pwritev64(fd, vec, vlen, pos); 1192 } 1193 #endif 1194 1195 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1196 size_t count, loff_t max) 1197 { 1198 struct fd in, out; 1199 struct inode *in_inode, *out_inode; 1200 loff_t pos; 1201 loff_t out_pos; 1202 ssize_t retval; 1203 int fl; 1204 1205 /* 1206 * Get input file, and verify that it is ok.. 1207 */ 1208 retval = -EBADF; 1209 in = fdget(in_fd); 1210 if (!in.file) 1211 goto out; 1212 if (!(in.file->f_mode & FMODE_READ)) 1213 goto fput_in; 1214 retval = -ESPIPE; 1215 if (!ppos) { 1216 pos = in.file->f_pos; 1217 } else { 1218 pos = *ppos; 1219 if (!(in.file->f_mode & FMODE_PREAD)) 1220 goto fput_in; 1221 } 1222 retval = rw_verify_area(READ, in.file, &pos, count); 1223 if (retval < 0) 1224 goto fput_in; 1225 count = retval; 1226 1227 /* 1228 * Get output file, and verify that it is ok.. 1229 */ 1230 retval = -EBADF; 1231 out = fdget(out_fd); 1232 if (!out.file) 1233 goto fput_in; 1234 if (!(out.file->f_mode & FMODE_WRITE)) 1235 goto fput_out; 1236 retval = -EINVAL; 1237 in_inode = file_inode(in.file); 1238 out_inode = file_inode(out.file); 1239 out_pos = out.file->f_pos; 1240 retval = rw_verify_area(WRITE, out.file, &out_pos, count); 1241 if (retval < 0) 1242 goto fput_out; 1243 count = retval; 1244 1245 if (!max) 1246 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1247 1248 if (unlikely(pos + count > max)) { 1249 retval = -EOVERFLOW; 1250 if (pos >= max) 1251 goto fput_out; 1252 count = max - pos; 1253 } 1254 1255 fl = 0; 1256 #if 0 1257 /* 1258 * We need to debate whether we can enable this or not. The 1259 * man page documents EAGAIN return for the output at least, 1260 * and the application is arguably buggy if it doesn't expect 1261 * EAGAIN on a non-blocking file descriptor. 1262 */ 1263 if (in.file->f_flags & O_NONBLOCK) 1264 fl = SPLICE_F_NONBLOCK; 1265 #endif 1266 file_start_write(out.file); 1267 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); 1268 file_end_write(out.file); 1269 1270 if (retval > 0) { 1271 add_rchar(current, retval); 1272 add_wchar(current, retval); 1273 fsnotify_access(in.file); 1274 fsnotify_modify(out.file); 1275 out.file->f_pos = out_pos; 1276 if (ppos) 1277 *ppos = pos; 1278 else 1279 in.file->f_pos = pos; 1280 } 1281 1282 inc_syscr(current); 1283 inc_syscw(current); 1284 if (pos > max) 1285 retval = -EOVERFLOW; 1286 1287 fput_out: 1288 fdput(out); 1289 fput_in: 1290 fdput(in); 1291 out: 1292 return retval; 1293 } 1294 1295 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 1296 { 1297 loff_t pos; 1298 off_t off; 1299 ssize_t ret; 1300 1301 if (offset) { 1302 if (unlikely(get_user(off, offset))) 1303 return -EFAULT; 1304 pos = off; 1305 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1306 if (unlikely(put_user(pos, offset))) 1307 return -EFAULT; 1308 return ret; 1309 } 1310 1311 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1312 } 1313 1314 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 1315 { 1316 loff_t pos; 1317 ssize_t ret; 1318 1319 if (offset) { 1320 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1321 return -EFAULT; 1322 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1323 if (unlikely(put_user(pos, offset))) 1324 return -EFAULT; 1325 return ret; 1326 } 1327 1328 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1329 } 1330 1331 #ifdef CONFIG_COMPAT 1332 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, 1333 compat_off_t __user *, offset, compat_size_t, count) 1334 { 1335 loff_t pos; 1336 off_t off; 1337 ssize_t ret; 1338 1339 if (offset) { 1340 if (unlikely(get_user(off, offset))) 1341 return -EFAULT; 1342 pos = off; 1343 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1344 if (unlikely(put_user(pos, offset))) 1345 return -EFAULT; 1346 return ret; 1347 } 1348 1349 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1350 } 1351 1352 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, 1353 compat_loff_t __user *, offset, compat_size_t, count) 1354 { 1355 loff_t pos; 1356 ssize_t ret; 1357 1358 if (offset) { 1359 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1360 return -EFAULT; 1361 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1362 if (unlikely(put_user(pos, offset))) 1363 return -EFAULT; 1364 return ret; 1365 } 1366 1367 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1368 } 1369 #endif 1370 1371 /* 1372 * copy_file_range() differs from regular file read and write in that it 1373 * specifically allows return partial success. When it does so is up to 1374 * the copy_file_range method. 1375 */ 1376 ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, 1377 struct file *file_out, loff_t pos_out, 1378 size_t len, unsigned int flags) 1379 { 1380 struct inode *inode_in = file_inode(file_in); 1381 struct inode *inode_out = file_inode(file_out); 1382 ssize_t ret; 1383 1384 if (flags != 0) 1385 return -EINVAL; 1386 1387 /* copy_file_range allows full ssize_t len, ignoring MAX_RW_COUNT */ 1388 ret = rw_verify_area(READ, file_in, &pos_in, len); 1389 if (ret >= 0) 1390 ret = rw_verify_area(WRITE, file_out, &pos_out, len); 1391 if (ret < 0) 1392 return ret; 1393 1394 if (!(file_in->f_mode & FMODE_READ) || 1395 !(file_out->f_mode & FMODE_WRITE) || 1396 (file_out->f_flags & O_APPEND)) 1397 return -EBADF; 1398 1399 /* this could be relaxed once a method supports cross-fs copies */ 1400 if (inode_in->i_sb != inode_out->i_sb) 1401 return -EXDEV; 1402 1403 if (len == 0) 1404 return 0; 1405 1406 ret = mnt_want_write_file(file_out); 1407 if (ret) 1408 return ret; 1409 1410 ret = -EOPNOTSUPP; 1411 if (file_out->f_op->copy_file_range) 1412 ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out, 1413 pos_out, len, flags); 1414 if (ret == -EOPNOTSUPP) 1415 ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out, 1416 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); 1417 1418 if (ret > 0) { 1419 fsnotify_access(file_in); 1420 add_rchar(current, ret); 1421 fsnotify_modify(file_out); 1422 add_wchar(current, ret); 1423 } 1424 inc_syscr(current); 1425 inc_syscw(current); 1426 1427 mnt_drop_write_file(file_out); 1428 1429 return ret; 1430 } 1431 EXPORT_SYMBOL(vfs_copy_file_range); 1432 1433 SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, 1434 int, fd_out, loff_t __user *, off_out, 1435 size_t, len, unsigned int, flags) 1436 { 1437 loff_t pos_in; 1438 loff_t pos_out; 1439 struct fd f_in; 1440 struct fd f_out; 1441 ssize_t ret = -EBADF; 1442 1443 f_in = fdget(fd_in); 1444 if (!f_in.file) 1445 goto out2; 1446 1447 f_out = fdget(fd_out); 1448 if (!f_out.file) 1449 goto out1; 1450 1451 ret = -EFAULT; 1452 if (off_in) { 1453 if (copy_from_user(&pos_in, off_in, sizeof(loff_t))) 1454 goto out; 1455 } else { 1456 pos_in = f_in.file->f_pos; 1457 } 1458 1459 if (off_out) { 1460 if (copy_from_user(&pos_out, off_out, sizeof(loff_t))) 1461 goto out; 1462 } else { 1463 pos_out = f_out.file->f_pos; 1464 } 1465 1466 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, 1467 flags); 1468 if (ret > 0) { 1469 pos_in += ret; 1470 pos_out += ret; 1471 1472 if (off_in) { 1473 if (copy_to_user(off_in, &pos_in, sizeof(loff_t))) 1474 ret = -EFAULT; 1475 } else { 1476 f_in.file->f_pos = pos_in; 1477 } 1478 1479 if (off_out) { 1480 if (copy_to_user(off_out, &pos_out, sizeof(loff_t))) 1481 ret = -EFAULT; 1482 } else { 1483 f_out.file->f_pos = pos_out; 1484 } 1485 } 1486 1487 out: 1488 fdput(f_out); 1489 out1: 1490 fdput(f_in); 1491 out2: 1492 return ret; 1493 } 1494 1495 static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write) 1496 { 1497 struct inode *inode = file_inode(file); 1498 1499 if (unlikely(pos < 0)) 1500 return -EINVAL; 1501 1502 if (unlikely((loff_t) (pos + len) < 0)) 1503 return -EINVAL; 1504 1505 if (unlikely(inode->i_flctx && mandatory_lock(inode))) { 1506 loff_t end = len ? pos + len - 1 : OFFSET_MAX; 1507 int retval; 1508 1509 retval = locks_mandatory_area(inode, file, pos, end, 1510 write ? F_WRLCK : F_RDLCK); 1511 if (retval < 0) 1512 return retval; 1513 } 1514 1515 return security_file_permission(file, write ? MAY_WRITE : MAY_READ); 1516 } 1517 1518 int vfs_clone_file_range(struct file *file_in, loff_t pos_in, 1519 struct file *file_out, loff_t pos_out, u64 len) 1520 { 1521 struct inode *inode_in = file_inode(file_in); 1522 struct inode *inode_out = file_inode(file_out); 1523 int ret; 1524 1525 if (inode_in->i_sb != inode_out->i_sb || 1526 file_in->f_path.mnt != file_out->f_path.mnt) 1527 return -EXDEV; 1528 1529 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1530 return -EISDIR; 1531 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1532 return -EINVAL; 1533 1534 if (!(file_in->f_mode & FMODE_READ) || 1535 !(file_out->f_mode & FMODE_WRITE) || 1536 (file_out->f_flags & O_APPEND)) 1537 return -EBADF; 1538 1539 if (!file_in->f_op->clone_file_range) 1540 return -EOPNOTSUPP; 1541 1542 ret = clone_verify_area(file_in, pos_in, len, false); 1543 if (ret) 1544 return ret; 1545 1546 ret = clone_verify_area(file_out, pos_out, len, true); 1547 if (ret) 1548 return ret; 1549 1550 if (pos_in + len > i_size_read(inode_in)) 1551 return -EINVAL; 1552 1553 ret = mnt_want_write_file(file_out); 1554 if (ret) 1555 return ret; 1556 1557 ret = file_in->f_op->clone_file_range(file_in, pos_in, 1558 file_out, pos_out, len); 1559 if (!ret) { 1560 fsnotify_access(file_in); 1561 fsnotify_modify(file_out); 1562 } 1563 1564 mnt_drop_write_file(file_out); 1565 return ret; 1566 } 1567 EXPORT_SYMBOL(vfs_clone_file_range); 1568 1569 int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same) 1570 { 1571 struct file_dedupe_range_info *info; 1572 struct inode *src = file_inode(file); 1573 u64 off; 1574 u64 len; 1575 int i; 1576 int ret; 1577 bool is_admin = capable(CAP_SYS_ADMIN); 1578 u16 count = same->dest_count; 1579 struct file *dst_file; 1580 loff_t dst_off; 1581 ssize_t deduped; 1582 1583 if (!(file->f_mode & FMODE_READ)) 1584 return -EINVAL; 1585 1586 if (same->reserved1 || same->reserved2) 1587 return -EINVAL; 1588 1589 off = same->src_offset; 1590 len = same->src_length; 1591 1592 ret = -EISDIR; 1593 if (S_ISDIR(src->i_mode)) 1594 goto out; 1595 1596 ret = -EINVAL; 1597 if (!S_ISREG(src->i_mode)) 1598 goto out; 1599 1600 ret = clone_verify_area(file, off, len, false); 1601 if (ret < 0) 1602 goto out; 1603 ret = 0; 1604 1605 /* pre-format output fields to sane values */ 1606 for (i = 0; i < count; i++) { 1607 same->info[i].bytes_deduped = 0ULL; 1608 same->info[i].status = FILE_DEDUPE_RANGE_SAME; 1609 } 1610 1611 for (i = 0, info = same->info; i < count; i++, info++) { 1612 struct inode *dst; 1613 struct fd dst_fd = fdget(info->dest_fd); 1614 1615 dst_file = dst_fd.file; 1616 if (!dst_file) { 1617 info->status = -EBADF; 1618 goto next_loop; 1619 } 1620 dst = file_inode(dst_file); 1621 1622 ret = mnt_want_write_file(dst_file); 1623 if (ret) { 1624 info->status = ret; 1625 goto next_loop; 1626 } 1627 1628 dst_off = info->dest_offset; 1629 ret = clone_verify_area(dst_file, dst_off, len, true); 1630 if (ret < 0) { 1631 info->status = ret; 1632 goto next_file; 1633 } 1634 ret = 0; 1635 1636 if (info->reserved) { 1637 info->status = -EINVAL; 1638 } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { 1639 info->status = -EINVAL; 1640 } else if (file->f_path.mnt != dst_file->f_path.mnt) { 1641 info->status = -EXDEV; 1642 } else if (S_ISDIR(dst->i_mode)) { 1643 info->status = -EISDIR; 1644 } else if (dst_file->f_op->dedupe_file_range == NULL) { 1645 info->status = -EINVAL; 1646 } else { 1647 deduped = dst_file->f_op->dedupe_file_range(file, off, 1648 len, dst_file, 1649 info->dest_offset); 1650 if (deduped == -EBADE) 1651 info->status = FILE_DEDUPE_RANGE_DIFFERS; 1652 else if (deduped < 0) 1653 info->status = deduped; 1654 else 1655 info->bytes_deduped += deduped; 1656 } 1657 1658 next_file: 1659 mnt_drop_write_file(dst_file); 1660 next_loop: 1661 fdput(dst_fd); 1662 1663 if (fatal_signal_pending(current)) 1664 goto out; 1665 } 1666 1667 out: 1668 return ret; 1669 } 1670 EXPORT_SYMBOL(vfs_dedupe_file_range); 1671