1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/read_write.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/slab.h> 9 #include <linux/stat.h> 10 #include <linux/sched/xacct.h> 11 #include <linux/fcntl.h> 12 #include <linux/file.h> 13 #include <linux/uio.h> 14 #include <linux/fsnotify.h> 15 #include <linux/security.h> 16 #include <linux/export.h> 17 #include <linux/syscalls.h> 18 #include <linux/pagemap.h> 19 #include <linux/splice.h> 20 #include <linux/compat.h> 21 #include <linux/mount.h> 22 #include <linux/fs.h> 23 #include "internal.h" 24 25 #include <linux/uaccess.h> 26 #include <asm/unistd.h> 27 28 const struct file_operations generic_ro_fops = { 29 .llseek = generic_file_llseek, 30 .read_iter = generic_file_read_iter, 31 .mmap = generic_file_readonly_mmap, 32 .splice_read = generic_file_splice_read, 33 }; 34 35 EXPORT_SYMBOL(generic_ro_fops); 36 37 static inline bool unsigned_offsets(struct file *file) 38 { 39 return file->f_mode & FMODE_UNSIGNED_OFFSET; 40 } 41 42 /** 43 * vfs_setpos - update the file offset for lseek 44 * @file: file structure in question 45 * @offset: file offset to seek to 46 * @maxsize: maximum file size 47 * 48 * This is a low-level filesystem helper for updating the file offset to 49 * the value specified by @offset if the given offset is valid and it is 50 * not equal to the current file offset. 51 * 52 * Return the specified offset on success and -EINVAL on invalid offset. 53 */ 54 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) 55 { 56 if (offset < 0 && !unsigned_offsets(file)) 57 return -EINVAL; 58 if (offset > maxsize) 59 return -EINVAL; 60 61 if (offset != file->f_pos) { 62 file->f_pos = offset; 63 file->f_version = 0; 64 } 65 return offset; 66 } 67 EXPORT_SYMBOL(vfs_setpos); 68 69 /** 70 * generic_file_llseek_size - generic llseek implementation for regular files 71 * @file: file structure to seek on 72 * @offset: file offset to seek to 73 * @whence: type of seek 74 * @size: max size of this file in file system 75 * @eof: offset used for SEEK_END position 76 * 77 * This is a variant of generic_file_llseek that allows passing in a custom 78 * maximum file size and a custom EOF position, for e.g. hashed directories 79 * 80 * Synchronization: 81 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 82 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 83 * read/writes behave like SEEK_SET against seeks. 84 */ 85 loff_t 86 generic_file_llseek_size(struct file *file, loff_t offset, int whence, 87 loff_t maxsize, loff_t eof) 88 { 89 switch (whence) { 90 case SEEK_END: 91 offset += eof; 92 break; 93 case SEEK_CUR: 94 /* 95 * Here we special-case the lseek(fd, 0, SEEK_CUR) 96 * position-querying operation. Avoid rewriting the "same" 97 * f_pos value back to the file because a concurrent read(), 98 * write() or lseek() might have altered it 99 */ 100 if (offset == 0) 101 return file->f_pos; 102 /* 103 * f_lock protects against read/modify/write race with other 104 * SEEK_CURs. Note that parallel writes and reads behave 105 * like SEEK_SET. 106 */ 107 spin_lock(&file->f_lock); 108 offset = vfs_setpos(file, file->f_pos + offset, maxsize); 109 spin_unlock(&file->f_lock); 110 return offset; 111 case SEEK_DATA: 112 /* 113 * In the generic case the entire file is data, so as long as 114 * offset isn't at the end of the file then the offset is data. 115 */ 116 if ((unsigned long long)offset >= eof) 117 return -ENXIO; 118 break; 119 case SEEK_HOLE: 120 /* 121 * There is a virtual hole at the end of the file, so as long as 122 * offset isn't i_size or larger, return i_size. 123 */ 124 if ((unsigned long long)offset >= eof) 125 return -ENXIO; 126 offset = eof; 127 break; 128 } 129 130 return vfs_setpos(file, offset, maxsize); 131 } 132 EXPORT_SYMBOL(generic_file_llseek_size); 133 134 /** 135 * generic_file_llseek - generic llseek implementation for regular files 136 * @file: file structure to seek on 137 * @offset: file offset to seek to 138 * @whence: type of seek 139 * 140 * This is a generic implemenation of ->llseek useable for all normal local 141 * filesystems. It just updates the file offset to the value specified by 142 * @offset and @whence. 143 */ 144 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) 145 { 146 struct inode *inode = file->f_mapping->host; 147 148 return generic_file_llseek_size(file, offset, whence, 149 inode->i_sb->s_maxbytes, 150 i_size_read(inode)); 151 } 152 EXPORT_SYMBOL(generic_file_llseek); 153 154 /** 155 * fixed_size_llseek - llseek implementation for fixed-sized devices 156 * @file: file structure to seek on 157 * @offset: file offset to seek to 158 * @whence: type of seek 159 * @size: size of the file 160 * 161 */ 162 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) 163 { 164 switch (whence) { 165 case SEEK_SET: case SEEK_CUR: case SEEK_END: 166 return generic_file_llseek_size(file, offset, whence, 167 size, size); 168 default: 169 return -EINVAL; 170 } 171 } 172 EXPORT_SYMBOL(fixed_size_llseek); 173 174 /** 175 * no_seek_end_llseek - llseek implementation for fixed-sized devices 176 * @file: file structure to seek on 177 * @offset: file offset to seek to 178 * @whence: type of seek 179 * 180 */ 181 loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) 182 { 183 switch (whence) { 184 case SEEK_SET: case SEEK_CUR: 185 return generic_file_llseek_size(file, offset, whence, 186 OFFSET_MAX, 0); 187 default: 188 return -EINVAL; 189 } 190 } 191 EXPORT_SYMBOL(no_seek_end_llseek); 192 193 /** 194 * no_seek_end_llseek_size - llseek implementation for fixed-sized devices 195 * @file: file structure to seek on 196 * @offset: file offset to seek to 197 * @whence: type of seek 198 * @size: maximal offset allowed 199 * 200 */ 201 loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size) 202 { 203 switch (whence) { 204 case SEEK_SET: case SEEK_CUR: 205 return generic_file_llseek_size(file, offset, whence, 206 size, 0); 207 default: 208 return -EINVAL; 209 } 210 } 211 EXPORT_SYMBOL(no_seek_end_llseek_size); 212 213 /** 214 * noop_llseek - No Operation Performed llseek implementation 215 * @file: file structure to seek on 216 * @offset: file offset to seek to 217 * @whence: type of seek 218 * 219 * This is an implementation of ->llseek useable for the rare special case when 220 * userspace expects the seek to succeed but the (device) file is actually not 221 * able to perform the seek. In this case you use noop_llseek() instead of 222 * falling back to the default implementation of ->llseek. 223 */ 224 loff_t noop_llseek(struct file *file, loff_t offset, int whence) 225 { 226 return file->f_pos; 227 } 228 EXPORT_SYMBOL(noop_llseek); 229 230 loff_t no_llseek(struct file *file, loff_t offset, int whence) 231 { 232 return -ESPIPE; 233 } 234 EXPORT_SYMBOL(no_llseek); 235 236 loff_t default_llseek(struct file *file, loff_t offset, int whence) 237 { 238 struct inode *inode = file_inode(file); 239 loff_t retval; 240 241 inode_lock(inode); 242 switch (whence) { 243 case SEEK_END: 244 offset += i_size_read(inode); 245 break; 246 case SEEK_CUR: 247 if (offset == 0) { 248 retval = file->f_pos; 249 goto out; 250 } 251 offset += file->f_pos; 252 break; 253 case SEEK_DATA: 254 /* 255 * In the generic case the entire file is data, so as 256 * long as offset isn't at the end of the file then the 257 * offset is data. 258 */ 259 if (offset >= inode->i_size) { 260 retval = -ENXIO; 261 goto out; 262 } 263 break; 264 case SEEK_HOLE: 265 /* 266 * There is a virtual hole at the end of the file, so 267 * as long as offset isn't i_size or larger, return 268 * i_size. 269 */ 270 if (offset >= inode->i_size) { 271 retval = -ENXIO; 272 goto out; 273 } 274 offset = inode->i_size; 275 break; 276 } 277 retval = -EINVAL; 278 if (offset >= 0 || unsigned_offsets(file)) { 279 if (offset != file->f_pos) { 280 file->f_pos = offset; 281 file->f_version = 0; 282 } 283 retval = offset; 284 } 285 out: 286 inode_unlock(inode); 287 return retval; 288 } 289 EXPORT_SYMBOL(default_llseek); 290 291 loff_t vfs_llseek(struct file *file, loff_t offset, int whence) 292 { 293 loff_t (*fn)(struct file *, loff_t, int); 294 295 fn = no_llseek; 296 if (file->f_mode & FMODE_LSEEK) { 297 if (file->f_op->llseek) 298 fn = file->f_op->llseek; 299 } 300 return fn(file, offset, whence); 301 } 302 EXPORT_SYMBOL(vfs_llseek); 303 304 static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) 305 { 306 off_t retval; 307 struct fd f = fdget_pos(fd); 308 if (!f.file) 309 return -EBADF; 310 311 retval = -EINVAL; 312 if (whence <= SEEK_MAX) { 313 loff_t res = vfs_llseek(f.file, offset, whence); 314 retval = res; 315 if (res != (loff_t)retval) 316 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 317 } 318 fdput_pos(f); 319 return retval; 320 } 321 322 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 323 { 324 return ksys_lseek(fd, offset, whence); 325 } 326 327 #ifdef CONFIG_COMPAT 328 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) 329 { 330 return ksys_lseek(fd, offset, whence); 331 } 332 #endif 333 334 #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \ 335 defined(__ARCH_WANT_SYS_LLSEEK) 336 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 337 unsigned long, offset_low, loff_t __user *, result, 338 unsigned int, whence) 339 { 340 int retval; 341 struct fd f = fdget_pos(fd); 342 loff_t offset; 343 344 if (!f.file) 345 return -EBADF; 346 347 retval = -EINVAL; 348 if (whence > SEEK_MAX) 349 goto out_putf; 350 351 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 352 whence); 353 354 retval = (int)offset; 355 if (offset >= 0) { 356 retval = -EFAULT; 357 if (!copy_to_user(result, &offset, sizeof(offset))) 358 retval = 0; 359 } 360 out_putf: 361 fdput_pos(f); 362 return retval; 363 } 364 #endif 365 366 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) 367 { 368 if (unlikely((ssize_t) count < 0)) 369 return -EINVAL; 370 371 /* 372 * ranged mandatory locking does not apply to streams - it makes sense 373 * only for files where position has a meaning. 374 */ 375 if (ppos) { 376 loff_t pos = *ppos; 377 378 if (unlikely(pos < 0)) { 379 if (!unsigned_offsets(file)) 380 return -EINVAL; 381 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 382 return -EOVERFLOW; 383 } else if (unlikely((loff_t) (pos + count) < 0)) { 384 if (!unsigned_offsets(file)) 385 return -EINVAL; 386 } 387 } 388 389 return security_file_permission(file, 390 read_write == READ ? MAY_READ : MAY_WRITE); 391 } 392 393 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 394 { 395 struct iovec iov = { .iov_base = buf, .iov_len = len }; 396 struct kiocb kiocb; 397 struct iov_iter iter; 398 ssize_t ret; 399 400 init_sync_kiocb(&kiocb, filp); 401 kiocb.ki_pos = (ppos ? *ppos : 0); 402 iov_iter_init(&iter, READ, &iov, 1, len); 403 404 ret = call_read_iter(filp, &kiocb, &iter); 405 BUG_ON(ret == -EIOCBQUEUED); 406 if (ppos) 407 *ppos = kiocb.ki_pos; 408 return ret; 409 } 410 411 static int warn_unsupported(struct file *file, const char *op) 412 { 413 pr_warn_ratelimited( 414 "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n", 415 op, file, current->pid, current->comm); 416 return -EINVAL; 417 } 418 419 ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) 420 { 421 struct kvec iov = { 422 .iov_base = buf, 423 .iov_len = min_t(size_t, count, MAX_RW_COUNT), 424 }; 425 struct kiocb kiocb; 426 struct iov_iter iter; 427 ssize_t ret; 428 429 if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) 430 return -EINVAL; 431 if (!(file->f_mode & FMODE_CAN_READ)) 432 return -EINVAL; 433 /* 434 * Also fail if ->read_iter and ->read are both wired up as that 435 * implies very convoluted semantics. 436 */ 437 if (unlikely(!file->f_op->read_iter || file->f_op->read)) 438 return warn_unsupported(file, "read"); 439 440 init_sync_kiocb(&kiocb, file); 441 kiocb.ki_pos = pos ? *pos : 0; 442 iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); 443 ret = file->f_op->read_iter(&kiocb, &iter); 444 if (ret > 0) { 445 if (pos) 446 *pos = kiocb.ki_pos; 447 fsnotify_access(file); 448 add_rchar(current, ret); 449 } 450 inc_syscr(current); 451 return ret; 452 } 453 454 ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) 455 { 456 ssize_t ret; 457 458 ret = rw_verify_area(READ, file, pos, count); 459 if (ret) 460 return ret; 461 return __kernel_read(file, buf, count, pos); 462 } 463 EXPORT_SYMBOL(kernel_read); 464 465 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 466 { 467 ssize_t ret; 468 469 if (!(file->f_mode & FMODE_READ)) 470 return -EBADF; 471 if (!(file->f_mode & FMODE_CAN_READ)) 472 return -EINVAL; 473 if (unlikely(!access_ok(buf, count))) 474 return -EFAULT; 475 476 ret = rw_verify_area(READ, file, pos, count); 477 if (ret) 478 return ret; 479 if (count > MAX_RW_COUNT) 480 count = MAX_RW_COUNT; 481 482 if (file->f_op->read) 483 ret = file->f_op->read(file, buf, count, pos); 484 else if (file->f_op->read_iter) 485 ret = new_sync_read(file, buf, count, pos); 486 else 487 ret = -EINVAL; 488 if (ret > 0) { 489 fsnotify_access(file); 490 add_rchar(current, ret); 491 } 492 inc_syscr(current); 493 return ret; 494 } 495 496 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 497 { 498 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 499 struct kiocb kiocb; 500 struct iov_iter iter; 501 ssize_t ret; 502 503 init_sync_kiocb(&kiocb, filp); 504 kiocb.ki_pos = (ppos ? *ppos : 0); 505 iov_iter_init(&iter, WRITE, &iov, 1, len); 506 507 ret = call_write_iter(filp, &kiocb, &iter); 508 BUG_ON(ret == -EIOCBQUEUED); 509 if (ret > 0 && ppos) 510 *ppos = kiocb.ki_pos; 511 return ret; 512 } 513 514 /* caller is responsible for file_start_write/file_end_write */ 515 ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) 516 { 517 struct kvec iov = { 518 .iov_base = (void *)buf, 519 .iov_len = min_t(size_t, count, MAX_RW_COUNT), 520 }; 521 struct kiocb kiocb; 522 struct iov_iter iter; 523 ssize_t ret; 524 525 if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) 526 return -EBADF; 527 if (!(file->f_mode & FMODE_CAN_WRITE)) 528 return -EINVAL; 529 /* 530 * Also fail if ->write_iter and ->write are both wired up as that 531 * implies very convoluted semantics. 532 */ 533 if (unlikely(!file->f_op->write_iter || file->f_op->write)) 534 return warn_unsupported(file, "write"); 535 536 init_sync_kiocb(&kiocb, file); 537 kiocb.ki_pos = pos ? *pos : 0; 538 iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); 539 ret = file->f_op->write_iter(&kiocb, &iter); 540 if (ret > 0) { 541 if (pos) 542 *pos = kiocb.ki_pos; 543 fsnotify_modify(file); 544 add_wchar(current, ret); 545 } 546 inc_syscw(current); 547 return ret; 548 } 549 /* 550 * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()", 551 * but autofs is one of the few internal kernel users that actually 552 * wants this _and_ can be built as a module. So we need to export 553 * this symbol for autofs, even though it really isn't appropriate 554 * for any other kernel modules. 555 */ 556 EXPORT_SYMBOL_GPL(__kernel_write); 557 558 ssize_t kernel_write(struct file *file, const void *buf, size_t count, 559 loff_t *pos) 560 { 561 ssize_t ret; 562 563 ret = rw_verify_area(WRITE, file, pos, count); 564 if (ret) 565 return ret; 566 567 file_start_write(file); 568 ret = __kernel_write(file, buf, count, pos); 569 file_end_write(file); 570 return ret; 571 } 572 EXPORT_SYMBOL(kernel_write); 573 574 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 575 { 576 ssize_t ret; 577 578 if (!(file->f_mode & FMODE_WRITE)) 579 return -EBADF; 580 if (!(file->f_mode & FMODE_CAN_WRITE)) 581 return -EINVAL; 582 if (unlikely(!access_ok(buf, count))) 583 return -EFAULT; 584 585 ret = rw_verify_area(WRITE, file, pos, count); 586 if (ret) 587 return ret; 588 if (count > MAX_RW_COUNT) 589 count = MAX_RW_COUNT; 590 file_start_write(file); 591 if (file->f_op->write) 592 ret = file->f_op->write(file, buf, count, pos); 593 else if (file->f_op->write_iter) 594 ret = new_sync_write(file, buf, count, pos); 595 else 596 ret = -EINVAL; 597 if (ret > 0) { 598 fsnotify_modify(file); 599 add_wchar(current, ret); 600 } 601 inc_syscw(current); 602 file_end_write(file); 603 return ret; 604 } 605 606 /* file_ppos returns &file->f_pos or NULL if file is stream */ 607 static inline loff_t *file_ppos(struct file *file) 608 { 609 return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos; 610 } 611 612 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) 613 { 614 struct fd f = fdget_pos(fd); 615 ssize_t ret = -EBADF; 616 617 if (f.file) { 618 loff_t pos, *ppos = file_ppos(f.file); 619 if (ppos) { 620 pos = *ppos; 621 ppos = &pos; 622 } 623 ret = vfs_read(f.file, buf, count, ppos); 624 if (ret >= 0 && ppos) 625 f.file->f_pos = pos; 626 fdput_pos(f); 627 } 628 return ret; 629 } 630 631 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 632 { 633 return ksys_read(fd, buf, count); 634 } 635 636 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count) 637 { 638 struct fd f = fdget_pos(fd); 639 ssize_t ret = -EBADF; 640 641 if (f.file) { 642 loff_t pos, *ppos = file_ppos(f.file); 643 if (ppos) { 644 pos = *ppos; 645 ppos = &pos; 646 } 647 ret = vfs_write(f.file, buf, count, ppos); 648 if (ret >= 0 && ppos) 649 f.file->f_pos = pos; 650 fdput_pos(f); 651 } 652 653 return ret; 654 } 655 656 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 657 size_t, count) 658 { 659 return ksys_write(fd, buf, count); 660 } 661 662 ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, 663 loff_t pos) 664 { 665 struct fd f; 666 ssize_t ret = -EBADF; 667 668 if (pos < 0) 669 return -EINVAL; 670 671 f = fdget(fd); 672 if (f.file) { 673 ret = -ESPIPE; 674 if (f.file->f_mode & FMODE_PREAD) 675 ret = vfs_read(f.file, buf, count, &pos); 676 fdput(f); 677 } 678 679 return ret; 680 } 681 682 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, 683 size_t, count, loff_t, pos) 684 { 685 return ksys_pread64(fd, buf, count, pos); 686 } 687 688 ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, 689 size_t count, loff_t pos) 690 { 691 struct fd f; 692 ssize_t ret = -EBADF; 693 694 if (pos < 0) 695 return -EINVAL; 696 697 f = fdget(fd); 698 if (f.file) { 699 ret = -ESPIPE; 700 if (f.file->f_mode & FMODE_PWRITE) 701 ret = vfs_write(f.file, buf, count, &pos); 702 fdput(f); 703 } 704 705 return ret; 706 } 707 708 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, 709 size_t, count, loff_t, pos) 710 { 711 return ksys_pwrite64(fd, buf, count, pos); 712 } 713 714 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, 715 loff_t *ppos, int type, rwf_t flags) 716 { 717 struct kiocb kiocb; 718 ssize_t ret; 719 720 init_sync_kiocb(&kiocb, filp); 721 ret = kiocb_set_rw_flags(&kiocb, flags); 722 if (ret) 723 return ret; 724 kiocb.ki_pos = (ppos ? *ppos : 0); 725 726 if (type == READ) 727 ret = call_read_iter(filp, &kiocb, iter); 728 else 729 ret = call_write_iter(filp, &kiocb, iter); 730 BUG_ON(ret == -EIOCBQUEUED); 731 if (ppos) 732 *ppos = kiocb.ki_pos; 733 return ret; 734 } 735 736 /* Do it by hand, with file-ops */ 737 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, 738 loff_t *ppos, int type, rwf_t flags) 739 { 740 ssize_t ret = 0; 741 742 if (flags & ~RWF_HIPRI) 743 return -EOPNOTSUPP; 744 745 while (iov_iter_count(iter)) { 746 struct iovec iovec = iov_iter_iovec(iter); 747 ssize_t nr; 748 749 if (type == READ) { 750 nr = filp->f_op->read(filp, iovec.iov_base, 751 iovec.iov_len, ppos); 752 } else { 753 nr = filp->f_op->write(filp, iovec.iov_base, 754 iovec.iov_len, ppos); 755 } 756 757 if (nr < 0) { 758 if (!ret) 759 ret = nr; 760 break; 761 } 762 ret += nr; 763 if (nr != iovec.iov_len) 764 break; 765 iov_iter_advance(iter, nr); 766 } 767 768 return ret; 769 } 770 771 static ssize_t do_iter_read(struct file *file, struct iov_iter *iter, 772 loff_t *pos, rwf_t flags) 773 { 774 size_t tot_len; 775 ssize_t ret = 0; 776 777 if (!(file->f_mode & FMODE_READ)) 778 return -EBADF; 779 if (!(file->f_mode & FMODE_CAN_READ)) 780 return -EINVAL; 781 782 tot_len = iov_iter_count(iter); 783 if (!tot_len) 784 goto out; 785 ret = rw_verify_area(READ, file, pos, tot_len); 786 if (ret < 0) 787 return ret; 788 789 if (file->f_op->read_iter) 790 ret = do_iter_readv_writev(file, iter, pos, READ, flags); 791 else 792 ret = do_loop_readv_writev(file, iter, pos, READ, flags); 793 out: 794 if (ret >= 0) 795 fsnotify_access(file); 796 return ret; 797 } 798 799 ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, 800 struct iov_iter *iter) 801 { 802 size_t tot_len; 803 ssize_t ret = 0; 804 805 if (!file->f_op->read_iter) 806 return -EINVAL; 807 if (!(file->f_mode & FMODE_READ)) 808 return -EBADF; 809 if (!(file->f_mode & FMODE_CAN_READ)) 810 return -EINVAL; 811 812 tot_len = iov_iter_count(iter); 813 if (!tot_len) 814 goto out; 815 ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len); 816 if (ret < 0) 817 return ret; 818 819 ret = call_read_iter(file, iocb, iter); 820 out: 821 if (ret >= 0) 822 fsnotify_access(file); 823 return ret; 824 } 825 EXPORT_SYMBOL(vfs_iocb_iter_read); 826 827 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, 828 rwf_t flags) 829 { 830 if (!file->f_op->read_iter) 831 return -EINVAL; 832 return do_iter_read(file, iter, ppos, flags); 833 } 834 EXPORT_SYMBOL(vfs_iter_read); 835 836 static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, 837 loff_t *pos, rwf_t flags) 838 { 839 size_t tot_len; 840 ssize_t ret = 0; 841 842 if (!(file->f_mode & FMODE_WRITE)) 843 return -EBADF; 844 if (!(file->f_mode & FMODE_CAN_WRITE)) 845 return -EINVAL; 846 847 tot_len = iov_iter_count(iter); 848 if (!tot_len) 849 return 0; 850 ret = rw_verify_area(WRITE, file, pos, tot_len); 851 if (ret < 0) 852 return ret; 853 854 if (file->f_op->write_iter) 855 ret = do_iter_readv_writev(file, iter, pos, WRITE, flags); 856 else 857 ret = do_loop_readv_writev(file, iter, pos, WRITE, flags); 858 if (ret > 0) 859 fsnotify_modify(file); 860 return ret; 861 } 862 863 ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, 864 struct iov_iter *iter) 865 { 866 size_t tot_len; 867 ssize_t ret = 0; 868 869 if (!file->f_op->write_iter) 870 return -EINVAL; 871 if (!(file->f_mode & FMODE_WRITE)) 872 return -EBADF; 873 if (!(file->f_mode & FMODE_CAN_WRITE)) 874 return -EINVAL; 875 876 tot_len = iov_iter_count(iter); 877 if (!tot_len) 878 return 0; 879 ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len); 880 if (ret < 0) 881 return ret; 882 883 ret = call_write_iter(file, iocb, iter); 884 if (ret > 0) 885 fsnotify_modify(file); 886 887 return ret; 888 } 889 EXPORT_SYMBOL(vfs_iocb_iter_write); 890 891 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, 892 rwf_t flags) 893 { 894 if (!file->f_op->write_iter) 895 return -EINVAL; 896 return do_iter_write(file, iter, ppos, flags); 897 } 898 EXPORT_SYMBOL(vfs_iter_write); 899 900 static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 901 unsigned long vlen, loff_t *pos, rwf_t flags) 902 { 903 struct iovec iovstack[UIO_FASTIOV]; 904 struct iovec *iov = iovstack; 905 struct iov_iter iter; 906 ssize_t ret; 907 908 ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 909 if (ret >= 0) { 910 ret = do_iter_read(file, &iter, pos, flags); 911 kfree(iov); 912 } 913 914 return ret; 915 } 916 917 static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 918 unsigned long vlen, loff_t *pos, rwf_t flags) 919 { 920 struct iovec iovstack[UIO_FASTIOV]; 921 struct iovec *iov = iovstack; 922 struct iov_iter iter; 923 ssize_t ret; 924 925 ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 926 if (ret >= 0) { 927 file_start_write(file); 928 ret = do_iter_write(file, &iter, pos, flags); 929 file_end_write(file); 930 kfree(iov); 931 } 932 return ret; 933 } 934 935 static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, 936 unsigned long vlen, rwf_t flags) 937 { 938 struct fd f = fdget_pos(fd); 939 ssize_t ret = -EBADF; 940 941 if (f.file) { 942 loff_t pos, *ppos = file_ppos(f.file); 943 if (ppos) { 944 pos = *ppos; 945 ppos = &pos; 946 } 947 ret = vfs_readv(f.file, vec, vlen, ppos, flags); 948 if (ret >= 0 && ppos) 949 f.file->f_pos = pos; 950 fdput_pos(f); 951 } 952 953 if (ret > 0) 954 add_rchar(current, ret); 955 inc_syscr(current); 956 return ret; 957 } 958 959 static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, 960 unsigned long vlen, rwf_t flags) 961 { 962 struct fd f = fdget_pos(fd); 963 ssize_t ret = -EBADF; 964 965 if (f.file) { 966 loff_t pos, *ppos = file_ppos(f.file); 967 if (ppos) { 968 pos = *ppos; 969 ppos = &pos; 970 } 971 ret = vfs_writev(f.file, vec, vlen, ppos, flags); 972 if (ret >= 0 && ppos) 973 f.file->f_pos = pos; 974 fdput_pos(f); 975 } 976 977 if (ret > 0) 978 add_wchar(current, ret); 979 inc_syscw(current); 980 return ret; 981 } 982 983 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 984 { 985 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 986 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 987 } 988 989 static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, 990 unsigned long vlen, loff_t pos, rwf_t flags) 991 { 992 struct fd f; 993 ssize_t ret = -EBADF; 994 995 if (pos < 0) 996 return -EINVAL; 997 998 f = fdget(fd); 999 if (f.file) { 1000 ret = -ESPIPE; 1001 if (f.file->f_mode & FMODE_PREAD) 1002 ret = vfs_readv(f.file, vec, vlen, &pos, flags); 1003 fdput(f); 1004 } 1005 1006 if (ret > 0) 1007 add_rchar(current, ret); 1008 inc_syscr(current); 1009 return ret; 1010 } 1011 1012 static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, 1013 unsigned long vlen, loff_t pos, rwf_t flags) 1014 { 1015 struct fd f; 1016 ssize_t ret = -EBADF; 1017 1018 if (pos < 0) 1019 return -EINVAL; 1020 1021 f = fdget(fd); 1022 if (f.file) { 1023 ret = -ESPIPE; 1024 if (f.file->f_mode & FMODE_PWRITE) 1025 ret = vfs_writev(f.file, vec, vlen, &pos, flags); 1026 fdput(f); 1027 } 1028 1029 if (ret > 0) 1030 add_wchar(current, ret); 1031 inc_syscw(current); 1032 return ret; 1033 } 1034 1035 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 1036 unsigned long, vlen) 1037 { 1038 return do_readv(fd, vec, vlen, 0); 1039 } 1040 1041 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 1042 unsigned long, vlen) 1043 { 1044 return do_writev(fd, vec, vlen, 0); 1045 } 1046 1047 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 1048 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 1049 { 1050 loff_t pos = pos_from_hilo(pos_h, pos_l); 1051 1052 return do_preadv(fd, vec, vlen, pos, 0); 1053 } 1054 1055 SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, 1056 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, 1057 rwf_t, flags) 1058 { 1059 loff_t pos = pos_from_hilo(pos_h, pos_l); 1060 1061 if (pos == -1) 1062 return do_readv(fd, vec, vlen, flags); 1063 1064 return do_preadv(fd, vec, vlen, pos, flags); 1065 } 1066 1067 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 1068 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 1069 { 1070 loff_t pos = pos_from_hilo(pos_h, pos_l); 1071 1072 return do_pwritev(fd, vec, vlen, pos, 0); 1073 } 1074 1075 SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, 1076 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, 1077 rwf_t, flags) 1078 { 1079 loff_t pos = pos_from_hilo(pos_h, pos_l); 1080 1081 if (pos == -1) 1082 return do_writev(fd, vec, vlen, flags); 1083 1084 return do_pwritev(fd, vec, vlen, pos, flags); 1085 } 1086 1087 /* 1088 * Various compat syscalls. Note that they all pretend to take a native 1089 * iovec - import_iovec will properly treat those as compat_iovecs based on 1090 * in_compat_syscall(). 1091 */ 1092 #ifdef CONFIG_COMPAT 1093 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 1094 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, 1095 const struct iovec __user *, vec, 1096 unsigned long, vlen, loff_t, pos) 1097 { 1098 return do_preadv(fd, vec, vlen, pos, 0); 1099 } 1100 #endif 1101 1102 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, 1103 const struct iovec __user *, vec, 1104 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1105 { 1106 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1107 1108 return do_preadv(fd, vec, vlen, pos, 0); 1109 } 1110 1111 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 1112 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, 1113 const struct iovec __user *, vec, 1114 unsigned long, vlen, loff_t, pos, rwf_t, flags) 1115 { 1116 if (pos == -1) 1117 return do_readv(fd, vec, vlen, flags); 1118 return do_preadv(fd, vec, vlen, pos, flags); 1119 } 1120 #endif 1121 1122 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, 1123 const struct iovec __user *, vec, 1124 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, 1125 rwf_t, flags) 1126 { 1127 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1128 1129 if (pos == -1) 1130 return do_readv(fd, vec, vlen, flags); 1131 return do_preadv(fd, vec, vlen, pos, flags); 1132 } 1133 1134 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 1135 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, 1136 const struct iovec __user *, vec, 1137 unsigned long, vlen, loff_t, pos) 1138 { 1139 return do_pwritev(fd, vec, vlen, pos, 0); 1140 } 1141 #endif 1142 1143 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, 1144 const struct iovec __user *,vec, 1145 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1146 { 1147 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1148 1149 return do_pwritev(fd, vec, vlen, pos, 0); 1150 } 1151 1152 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 1153 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, 1154 const struct iovec __user *, vec, 1155 unsigned long, vlen, loff_t, pos, rwf_t, flags) 1156 { 1157 if (pos == -1) 1158 return do_writev(fd, vec, vlen, flags); 1159 return do_pwritev(fd, vec, vlen, pos, flags); 1160 } 1161 #endif 1162 1163 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, 1164 const struct iovec __user *,vec, 1165 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) 1166 { 1167 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1168 1169 if (pos == -1) 1170 return do_writev(fd, vec, vlen, flags); 1171 return do_pwritev(fd, vec, vlen, pos, flags); 1172 } 1173 #endif /* CONFIG_COMPAT */ 1174 1175 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1176 size_t count, loff_t max) 1177 { 1178 struct fd in, out; 1179 struct inode *in_inode, *out_inode; 1180 struct pipe_inode_info *opipe; 1181 loff_t pos; 1182 loff_t out_pos; 1183 ssize_t retval; 1184 int fl; 1185 1186 /* 1187 * Get input file, and verify that it is ok.. 1188 */ 1189 retval = -EBADF; 1190 in = fdget(in_fd); 1191 if (!in.file) 1192 goto out; 1193 if (!(in.file->f_mode & FMODE_READ)) 1194 goto fput_in; 1195 retval = -ESPIPE; 1196 if (!ppos) { 1197 pos = in.file->f_pos; 1198 } else { 1199 pos = *ppos; 1200 if (!(in.file->f_mode & FMODE_PREAD)) 1201 goto fput_in; 1202 } 1203 retval = rw_verify_area(READ, in.file, &pos, count); 1204 if (retval < 0) 1205 goto fput_in; 1206 if (count > MAX_RW_COUNT) 1207 count = MAX_RW_COUNT; 1208 1209 /* 1210 * Get output file, and verify that it is ok.. 1211 */ 1212 retval = -EBADF; 1213 out = fdget(out_fd); 1214 if (!out.file) 1215 goto fput_in; 1216 if (!(out.file->f_mode & FMODE_WRITE)) 1217 goto fput_out; 1218 in_inode = file_inode(in.file); 1219 out_inode = file_inode(out.file); 1220 out_pos = out.file->f_pos; 1221 1222 if (!max) 1223 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1224 1225 if (unlikely(pos + count > max)) { 1226 retval = -EOVERFLOW; 1227 if (pos >= max) 1228 goto fput_out; 1229 count = max - pos; 1230 } 1231 1232 fl = 0; 1233 #if 0 1234 /* 1235 * We need to debate whether we can enable this or not. The 1236 * man page documents EAGAIN return for the output at least, 1237 * and the application is arguably buggy if it doesn't expect 1238 * EAGAIN on a non-blocking file descriptor. 1239 */ 1240 if (in.file->f_flags & O_NONBLOCK) 1241 fl = SPLICE_F_NONBLOCK; 1242 #endif 1243 opipe = get_pipe_info(out.file, true); 1244 if (!opipe) { 1245 retval = rw_verify_area(WRITE, out.file, &out_pos, count); 1246 if (retval < 0) 1247 goto fput_out; 1248 file_start_write(out.file); 1249 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, 1250 count, fl); 1251 file_end_write(out.file); 1252 } else { 1253 retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); 1254 } 1255 1256 if (retval > 0) { 1257 add_rchar(current, retval); 1258 add_wchar(current, retval); 1259 fsnotify_access(in.file); 1260 fsnotify_modify(out.file); 1261 out.file->f_pos = out_pos; 1262 if (ppos) 1263 *ppos = pos; 1264 else 1265 in.file->f_pos = pos; 1266 } 1267 1268 inc_syscr(current); 1269 inc_syscw(current); 1270 if (pos > max) 1271 retval = -EOVERFLOW; 1272 1273 fput_out: 1274 fdput(out); 1275 fput_in: 1276 fdput(in); 1277 out: 1278 return retval; 1279 } 1280 1281 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 1282 { 1283 loff_t pos; 1284 off_t off; 1285 ssize_t ret; 1286 1287 if (offset) { 1288 if (unlikely(get_user(off, offset))) 1289 return -EFAULT; 1290 pos = off; 1291 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1292 if (unlikely(put_user(pos, offset))) 1293 return -EFAULT; 1294 return ret; 1295 } 1296 1297 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1298 } 1299 1300 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 1301 { 1302 loff_t pos; 1303 ssize_t ret; 1304 1305 if (offset) { 1306 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1307 return -EFAULT; 1308 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1309 if (unlikely(put_user(pos, offset))) 1310 return -EFAULT; 1311 return ret; 1312 } 1313 1314 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1315 } 1316 1317 #ifdef CONFIG_COMPAT 1318 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, 1319 compat_off_t __user *, offset, compat_size_t, count) 1320 { 1321 loff_t pos; 1322 off_t off; 1323 ssize_t ret; 1324 1325 if (offset) { 1326 if (unlikely(get_user(off, offset))) 1327 return -EFAULT; 1328 pos = off; 1329 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1330 if (unlikely(put_user(pos, offset))) 1331 return -EFAULT; 1332 return ret; 1333 } 1334 1335 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1336 } 1337 1338 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, 1339 compat_loff_t __user *, offset, compat_size_t, count) 1340 { 1341 loff_t pos; 1342 ssize_t ret; 1343 1344 if (offset) { 1345 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1346 return -EFAULT; 1347 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1348 if (unlikely(put_user(pos, offset))) 1349 return -EFAULT; 1350 return ret; 1351 } 1352 1353 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1354 } 1355 #endif 1356 1357 /** 1358 * generic_copy_file_range - copy data between two files 1359 * @file_in: file structure to read from 1360 * @pos_in: file offset to read from 1361 * @file_out: file structure to write data to 1362 * @pos_out: file offset to write data to 1363 * @len: amount of data to copy 1364 * @flags: copy flags 1365 * 1366 * This is a generic filesystem helper to copy data from one file to another. 1367 * It has no constraints on the source or destination file owners - the files 1368 * can belong to different superblocks and different filesystem types. Short 1369 * copies are allowed. 1370 * 1371 * This should be called from the @file_out filesystem, as per the 1372 * ->copy_file_range() method. 1373 * 1374 * Returns the number of bytes copied or a negative error indicating the 1375 * failure. 1376 */ 1377 1378 ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, 1379 struct file *file_out, loff_t pos_out, 1380 size_t len, unsigned int flags) 1381 { 1382 return do_splice_direct(file_in, &pos_in, file_out, &pos_out, 1383 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); 1384 } 1385 EXPORT_SYMBOL(generic_copy_file_range); 1386 1387 static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, 1388 struct file *file_out, loff_t pos_out, 1389 size_t len, unsigned int flags) 1390 { 1391 /* 1392 * Although we now allow filesystems to handle cross sb copy, passing 1393 * a file of the wrong filesystem type to filesystem driver can result 1394 * in an attempt to dereference the wrong type of ->private_data, so 1395 * avoid doing that until we really have a good reason. NFS defines 1396 * several different file_system_type structures, but they all end up 1397 * using the same ->copy_file_range() function pointer. 1398 */ 1399 if (file_out->f_op->copy_file_range && 1400 file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) 1401 return file_out->f_op->copy_file_range(file_in, pos_in, 1402 file_out, pos_out, 1403 len, flags); 1404 1405 return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, 1406 flags); 1407 } 1408 1409 /* 1410 * Performs necessary checks before doing a file copy 1411 * 1412 * Can adjust amount of bytes to copy via @req_count argument. 1413 * Returns appropriate error code that caller should return or 1414 * zero in case the copy should be allowed. 1415 */ 1416 static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, 1417 struct file *file_out, loff_t pos_out, 1418 size_t *req_count, unsigned int flags) 1419 { 1420 struct inode *inode_in = file_inode(file_in); 1421 struct inode *inode_out = file_inode(file_out); 1422 uint64_t count = *req_count; 1423 loff_t size_in; 1424 int ret; 1425 1426 ret = generic_file_rw_checks(file_in, file_out); 1427 if (ret) 1428 return ret; 1429 1430 /* Don't touch certain kinds of inodes */ 1431 if (IS_IMMUTABLE(inode_out)) 1432 return -EPERM; 1433 1434 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) 1435 return -ETXTBSY; 1436 1437 /* Ensure offsets don't wrap. */ 1438 if (pos_in + count < pos_in || pos_out + count < pos_out) 1439 return -EOVERFLOW; 1440 1441 /* Shorten the copy to EOF */ 1442 size_in = i_size_read(inode_in); 1443 if (pos_in >= size_in) 1444 count = 0; 1445 else 1446 count = min(count, size_in - (uint64_t)pos_in); 1447 1448 ret = generic_write_check_limits(file_out, pos_out, &count); 1449 if (ret) 1450 return ret; 1451 1452 /* Don't allow overlapped copying within the same file. */ 1453 if (inode_in == inode_out && 1454 pos_out + count > pos_in && 1455 pos_out < pos_in + count) 1456 return -EINVAL; 1457 1458 *req_count = count; 1459 return 0; 1460 } 1461 1462 /* 1463 * copy_file_range() differs from regular file read and write in that it 1464 * specifically allows return partial success. When it does so is up to 1465 * the copy_file_range method. 1466 */ 1467 ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, 1468 struct file *file_out, loff_t pos_out, 1469 size_t len, unsigned int flags) 1470 { 1471 ssize_t ret; 1472 1473 if (flags != 0) 1474 return -EINVAL; 1475 1476 ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, 1477 flags); 1478 if (unlikely(ret)) 1479 return ret; 1480 1481 ret = rw_verify_area(READ, file_in, &pos_in, len); 1482 if (unlikely(ret)) 1483 return ret; 1484 1485 ret = rw_verify_area(WRITE, file_out, &pos_out, len); 1486 if (unlikely(ret)) 1487 return ret; 1488 1489 if (len == 0) 1490 return 0; 1491 1492 file_start_write(file_out); 1493 1494 /* 1495 * Try cloning first, this is supported by more file systems, and 1496 * more efficient if both clone and copy are supported (e.g. NFS). 1497 */ 1498 if (file_in->f_op->remap_file_range && 1499 file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { 1500 loff_t cloned; 1501 1502 cloned = file_in->f_op->remap_file_range(file_in, pos_in, 1503 file_out, pos_out, 1504 min_t(loff_t, MAX_RW_COUNT, len), 1505 REMAP_FILE_CAN_SHORTEN); 1506 if (cloned > 0) { 1507 ret = cloned; 1508 goto done; 1509 } 1510 } 1511 1512 ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, 1513 flags); 1514 WARN_ON_ONCE(ret == -EOPNOTSUPP); 1515 done: 1516 if (ret > 0) { 1517 fsnotify_access(file_in); 1518 add_rchar(current, ret); 1519 fsnotify_modify(file_out); 1520 add_wchar(current, ret); 1521 } 1522 1523 inc_syscr(current); 1524 inc_syscw(current); 1525 1526 file_end_write(file_out); 1527 1528 return ret; 1529 } 1530 EXPORT_SYMBOL(vfs_copy_file_range); 1531 1532 SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, 1533 int, fd_out, loff_t __user *, off_out, 1534 size_t, len, unsigned int, flags) 1535 { 1536 loff_t pos_in; 1537 loff_t pos_out; 1538 struct fd f_in; 1539 struct fd f_out; 1540 ssize_t ret = -EBADF; 1541 1542 f_in = fdget(fd_in); 1543 if (!f_in.file) 1544 goto out2; 1545 1546 f_out = fdget(fd_out); 1547 if (!f_out.file) 1548 goto out1; 1549 1550 ret = -EFAULT; 1551 if (off_in) { 1552 if (copy_from_user(&pos_in, off_in, sizeof(loff_t))) 1553 goto out; 1554 } else { 1555 pos_in = f_in.file->f_pos; 1556 } 1557 1558 if (off_out) { 1559 if (copy_from_user(&pos_out, off_out, sizeof(loff_t))) 1560 goto out; 1561 } else { 1562 pos_out = f_out.file->f_pos; 1563 } 1564 1565 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, 1566 flags); 1567 if (ret > 0) { 1568 pos_in += ret; 1569 pos_out += ret; 1570 1571 if (off_in) { 1572 if (copy_to_user(off_in, &pos_in, sizeof(loff_t))) 1573 ret = -EFAULT; 1574 } else { 1575 f_in.file->f_pos = pos_in; 1576 } 1577 1578 if (off_out) { 1579 if (copy_to_user(off_out, &pos_out, sizeof(loff_t))) 1580 ret = -EFAULT; 1581 } else { 1582 f_out.file->f_pos = pos_out; 1583 } 1584 } 1585 1586 out: 1587 fdput(f_out); 1588 out1: 1589 fdput(f_in); 1590 out2: 1591 return ret; 1592 } 1593 1594 /* 1595 * Don't operate on ranges the page cache doesn't support, and don't exceed the 1596 * LFS limits. If pos is under the limit it becomes a short access. If it 1597 * exceeds the limit we return -EFBIG. 1598 */ 1599 int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count) 1600 { 1601 struct inode *inode = file->f_mapping->host; 1602 loff_t max_size = inode->i_sb->s_maxbytes; 1603 loff_t limit = rlimit(RLIMIT_FSIZE); 1604 1605 if (limit != RLIM_INFINITY) { 1606 if (pos >= limit) { 1607 send_sig(SIGXFSZ, current, 0); 1608 return -EFBIG; 1609 } 1610 *count = min(*count, limit - pos); 1611 } 1612 1613 if (!(file->f_flags & O_LARGEFILE)) 1614 max_size = MAX_NON_LFS; 1615 1616 if (unlikely(pos >= max_size)) 1617 return -EFBIG; 1618 1619 *count = min(*count, max_size - pos); 1620 1621 return 0; 1622 } 1623 1624 /* 1625 * Performs necessary checks before doing a write 1626 * 1627 * Can adjust writing position or amount of bytes to write. 1628 * Returns appropriate error code that caller should return or 1629 * zero in case that write should be allowed. 1630 */ 1631 ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) 1632 { 1633 struct file *file = iocb->ki_filp; 1634 struct inode *inode = file->f_mapping->host; 1635 loff_t count; 1636 int ret; 1637 1638 if (IS_SWAPFILE(inode)) 1639 return -ETXTBSY; 1640 1641 if (!iov_iter_count(from)) 1642 return 0; 1643 1644 /* FIXME: this is for backwards compatibility with 2.4 */ 1645 if (iocb->ki_flags & IOCB_APPEND) 1646 iocb->ki_pos = i_size_read(inode); 1647 1648 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) 1649 return -EINVAL; 1650 1651 count = iov_iter_count(from); 1652 ret = generic_write_check_limits(file, iocb->ki_pos, &count); 1653 if (ret) 1654 return ret; 1655 1656 iov_iter_truncate(from, count); 1657 return iov_iter_count(from); 1658 } 1659 EXPORT_SYMBOL(generic_write_checks); 1660 1661 /* 1662 * Performs common checks before doing a file copy/clone 1663 * from @file_in to @file_out. 1664 */ 1665 int generic_file_rw_checks(struct file *file_in, struct file *file_out) 1666 { 1667 struct inode *inode_in = file_inode(file_in); 1668 struct inode *inode_out = file_inode(file_out); 1669 1670 /* Don't copy dirs, pipes, sockets... */ 1671 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1672 return -EISDIR; 1673 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1674 return -EINVAL; 1675 1676 if (!(file_in->f_mode & FMODE_READ) || 1677 !(file_out->f_mode & FMODE_WRITE) || 1678 (file_out->f_flags & O_APPEND)) 1679 return -EBADF; 1680 1681 return 0; 1682 } 1683