1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/read_write.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/slab.h> 9 #include <linux/stat.h> 10 #include <linux/sched/xacct.h> 11 #include <linux/fcntl.h> 12 #include <linux/file.h> 13 #include <linux/uio.h> 14 #include <linux/fsnotify.h> 15 #include <linux/security.h> 16 #include <linux/export.h> 17 #include <linux/syscalls.h> 18 #include <linux/pagemap.h> 19 #include <linux/splice.h> 20 #include <linux/compat.h> 21 #include <linux/mount.h> 22 #include <linux/fs.h> 23 #include "internal.h" 24 25 #include <linux/uaccess.h> 26 #include <asm/unistd.h> 27 28 const struct file_operations generic_ro_fops = { 29 .llseek = generic_file_llseek, 30 .read_iter = generic_file_read_iter, 31 .mmap = generic_file_readonly_mmap, 32 .splice_read = generic_file_splice_read, 33 }; 34 35 EXPORT_SYMBOL(generic_ro_fops); 36 37 static inline bool unsigned_offsets(struct file *file) 38 { 39 return file->f_mode & FMODE_UNSIGNED_OFFSET; 40 } 41 42 /** 43 * vfs_setpos - update the file offset for lseek 44 * @file: file structure in question 45 * @offset: file offset to seek to 46 * @maxsize: maximum file size 47 * 48 * This is a low-level filesystem helper for updating the file offset to 49 * the value specified by @offset if the given offset is valid and it is 50 * not equal to the current file offset. 51 * 52 * Return the specified offset on success and -EINVAL on invalid offset. 53 */ 54 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) 55 { 56 if (offset < 0 && !unsigned_offsets(file)) 57 return -EINVAL; 58 if (offset > maxsize) 59 return -EINVAL; 60 61 if (offset != file->f_pos) { 62 file->f_pos = offset; 63 file->f_version = 0; 64 } 65 return offset; 66 } 67 EXPORT_SYMBOL(vfs_setpos); 68 69 /** 70 * generic_file_llseek_size - generic llseek implementation for regular files 71 * @file: file structure to seek on 72 * @offset: file offset to seek to 73 * @whence: type of seek 74 * @size: max size of this file in file system 75 * @eof: offset used for SEEK_END position 76 * 77 * This is a variant of generic_file_llseek that allows passing in a custom 78 * maximum file size and a custom EOF position, for e.g. hashed directories 79 * 80 * Synchronization: 81 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 82 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 83 * read/writes behave like SEEK_SET against seeks. 84 */ 85 loff_t 86 generic_file_llseek_size(struct file *file, loff_t offset, int whence, 87 loff_t maxsize, loff_t eof) 88 { 89 switch (whence) { 90 case SEEK_END: 91 offset += eof; 92 break; 93 case SEEK_CUR: 94 /* 95 * Here we special-case the lseek(fd, 0, SEEK_CUR) 96 * position-querying operation. Avoid rewriting the "same" 97 * f_pos value back to the file because a concurrent read(), 98 * write() or lseek() might have altered it 99 */ 100 if (offset == 0) 101 return file->f_pos; 102 /* 103 * f_lock protects against read/modify/write race with other 104 * SEEK_CURs. Note that parallel writes and reads behave 105 * like SEEK_SET. 106 */ 107 spin_lock(&file->f_lock); 108 offset = vfs_setpos(file, file->f_pos + offset, maxsize); 109 spin_unlock(&file->f_lock); 110 return offset; 111 case SEEK_DATA: 112 /* 113 * In the generic case the entire file is data, so as long as 114 * offset isn't at the end of the file then the offset is data. 115 */ 116 if ((unsigned long long)offset >= eof) 117 return -ENXIO; 118 break; 119 case SEEK_HOLE: 120 /* 121 * There is a virtual hole at the end of the file, so as long as 122 * offset isn't i_size or larger, return i_size. 123 */ 124 if ((unsigned long long)offset >= eof) 125 return -ENXIO; 126 offset = eof; 127 break; 128 } 129 130 return vfs_setpos(file, offset, maxsize); 131 } 132 EXPORT_SYMBOL(generic_file_llseek_size); 133 134 /** 135 * generic_file_llseek - generic llseek implementation for regular files 136 * @file: file structure to seek on 137 * @offset: file offset to seek to 138 * @whence: type of seek 139 * 140 * This is a generic implemenation of ->llseek useable for all normal local 141 * filesystems. It just updates the file offset to the value specified by 142 * @offset and @whence. 143 */ 144 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) 145 { 146 struct inode *inode = file->f_mapping->host; 147 148 return generic_file_llseek_size(file, offset, whence, 149 inode->i_sb->s_maxbytes, 150 i_size_read(inode)); 151 } 152 EXPORT_SYMBOL(generic_file_llseek); 153 154 /** 155 * fixed_size_llseek - llseek implementation for fixed-sized devices 156 * @file: file structure to seek on 157 * @offset: file offset to seek to 158 * @whence: type of seek 159 * @size: size of the file 160 * 161 */ 162 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) 163 { 164 switch (whence) { 165 case SEEK_SET: case SEEK_CUR: case SEEK_END: 166 return generic_file_llseek_size(file, offset, whence, 167 size, size); 168 default: 169 return -EINVAL; 170 } 171 } 172 EXPORT_SYMBOL(fixed_size_llseek); 173 174 /** 175 * no_seek_end_llseek - llseek implementation for fixed-sized devices 176 * @file: file structure to seek on 177 * @offset: file offset to seek to 178 * @whence: type of seek 179 * 180 */ 181 loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) 182 { 183 switch (whence) { 184 case SEEK_SET: case SEEK_CUR: 185 return generic_file_llseek_size(file, offset, whence, 186 OFFSET_MAX, 0); 187 default: 188 return -EINVAL; 189 } 190 } 191 EXPORT_SYMBOL(no_seek_end_llseek); 192 193 /** 194 * no_seek_end_llseek_size - llseek implementation for fixed-sized devices 195 * @file: file structure to seek on 196 * @offset: file offset to seek to 197 * @whence: type of seek 198 * @size: maximal offset allowed 199 * 200 */ 201 loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size) 202 { 203 switch (whence) { 204 case SEEK_SET: case SEEK_CUR: 205 return generic_file_llseek_size(file, offset, whence, 206 size, 0); 207 default: 208 return -EINVAL; 209 } 210 } 211 EXPORT_SYMBOL(no_seek_end_llseek_size); 212 213 /** 214 * noop_llseek - No Operation Performed llseek implementation 215 * @file: file structure to seek on 216 * @offset: file offset to seek to 217 * @whence: type of seek 218 * 219 * This is an implementation of ->llseek useable for the rare special case when 220 * userspace expects the seek to succeed but the (device) file is actually not 221 * able to perform the seek. In this case you use noop_llseek() instead of 222 * falling back to the default implementation of ->llseek. 223 */ 224 loff_t noop_llseek(struct file *file, loff_t offset, int whence) 225 { 226 return file->f_pos; 227 } 228 EXPORT_SYMBOL(noop_llseek); 229 230 loff_t no_llseek(struct file *file, loff_t offset, int whence) 231 { 232 return -ESPIPE; 233 } 234 EXPORT_SYMBOL(no_llseek); 235 236 loff_t default_llseek(struct file *file, loff_t offset, int whence) 237 { 238 struct inode *inode = file_inode(file); 239 loff_t retval; 240 241 inode_lock(inode); 242 switch (whence) { 243 case SEEK_END: 244 offset += i_size_read(inode); 245 break; 246 case SEEK_CUR: 247 if (offset == 0) { 248 retval = file->f_pos; 249 goto out; 250 } 251 offset += file->f_pos; 252 break; 253 case SEEK_DATA: 254 /* 255 * In the generic case the entire file is data, so as 256 * long as offset isn't at the end of the file then the 257 * offset is data. 258 */ 259 if (offset >= inode->i_size) { 260 retval = -ENXIO; 261 goto out; 262 } 263 break; 264 case SEEK_HOLE: 265 /* 266 * There is a virtual hole at the end of the file, so 267 * as long as offset isn't i_size or larger, return 268 * i_size. 269 */ 270 if (offset >= inode->i_size) { 271 retval = -ENXIO; 272 goto out; 273 } 274 offset = inode->i_size; 275 break; 276 } 277 retval = -EINVAL; 278 if (offset >= 0 || unsigned_offsets(file)) { 279 if (offset != file->f_pos) { 280 file->f_pos = offset; 281 file->f_version = 0; 282 } 283 retval = offset; 284 } 285 out: 286 inode_unlock(inode); 287 return retval; 288 } 289 EXPORT_SYMBOL(default_llseek); 290 291 loff_t vfs_llseek(struct file *file, loff_t offset, int whence) 292 { 293 loff_t (*fn)(struct file *, loff_t, int); 294 295 fn = no_llseek; 296 if (file->f_mode & FMODE_LSEEK) { 297 if (file->f_op->llseek) 298 fn = file->f_op->llseek; 299 } 300 return fn(file, offset, whence); 301 } 302 EXPORT_SYMBOL(vfs_llseek); 303 304 static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) 305 { 306 off_t retval; 307 struct fd f = fdget_pos(fd); 308 if (!f.file) 309 return -EBADF; 310 311 retval = -EINVAL; 312 if (whence <= SEEK_MAX) { 313 loff_t res = vfs_llseek(f.file, offset, whence); 314 retval = res; 315 if (res != (loff_t)retval) 316 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 317 } 318 fdput_pos(f); 319 return retval; 320 } 321 322 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 323 { 324 return ksys_lseek(fd, offset, whence); 325 } 326 327 #ifdef CONFIG_COMPAT 328 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) 329 { 330 return ksys_lseek(fd, offset, whence); 331 } 332 #endif 333 334 #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \ 335 defined(__ARCH_WANT_SYS_LLSEEK) 336 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 337 unsigned long, offset_low, loff_t __user *, result, 338 unsigned int, whence) 339 { 340 int retval; 341 struct fd f = fdget_pos(fd); 342 loff_t offset; 343 344 if (!f.file) 345 return -EBADF; 346 347 retval = -EINVAL; 348 if (whence > SEEK_MAX) 349 goto out_putf; 350 351 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 352 whence); 353 354 retval = (int)offset; 355 if (offset >= 0) { 356 retval = -EFAULT; 357 if (!copy_to_user(result, &offset, sizeof(offset))) 358 retval = 0; 359 } 360 out_putf: 361 fdput_pos(f); 362 return retval; 363 } 364 #endif 365 366 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) 367 { 368 if (unlikely((ssize_t) count < 0)) 369 return -EINVAL; 370 371 if (ppos) { 372 loff_t pos = *ppos; 373 374 if (unlikely(pos < 0)) { 375 if (!unsigned_offsets(file)) 376 return -EINVAL; 377 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 378 return -EOVERFLOW; 379 } else if (unlikely((loff_t) (pos + count) < 0)) { 380 if (!unsigned_offsets(file)) 381 return -EINVAL; 382 } 383 } 384 385 return security_file_permission(file, 386 read_write == READ ? MAY_READ : MAY_WRITE); 387 } 388 389 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 390 { 391 struct iovec iov = { .iov_base = buf, .iov_len = len }; 392 struct kiocb kiocb; 393 struct iov_iter iter; 394 ssize_t ret; 395 396 init_sync_kiocb(&kiocb, filp); 397 kiocb.ki_pos = (ppos ? *ppos : 0); 398 iov_iter_init(&iter, READ, &iov, 1, len); 399 400 ret = call_read_iter(filp, &kiocb, &iter); 401 BUG_ON(ret == -EIOCBQUEUED); 402 if (ppos) 403 *ppos = kiocb.ki_pos; 404 return ret; 405 } 406 407 static int warn_unsupported(struct file *file, const char *op) 408 { 409 pr_warn_ratelimited( 410 "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n", 411 op, file, current->pid, current->comm); 412 return -EINVAL; 413 } 414 415 ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) 416 { 417 struct kvec iov = { 418 .iov_base = buf, 419 .iov_len = min_t(size_t, count, MAX_RW_COUNT), 420 }; 421 struct kiocb kiocb; 422 struct iov_iter iter; 423 ssize_t ret; 424 425 if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) 426 return -EINVAL; 427 if (!(file->f_mode & FMODE_CAN_READ)) 428 return -EINVAL; 429 /* 430 * Also fail if ->read_iter and ->read are both wired up as that 431 * implies very convoluted semantics. 432 */ 433 if (unlikely(!file->f_op->read_iter || file->f_op->read)) 434 return warn_unsupported(file, "read"); 435 436 init_sync_kiocb(&kiocb, file); 437 kiocb.ki_pos = pos ? *pos : 0; 438 iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); 439 ret = file->f_op->read_iter(&kiocb, &iter); 440 if (ret > 0) { 441 if (pos) 442 *pos = kiocb.ki_pos; 443 fsnotify_access(file); 444 add_rchar(current, ret); 445 } 446 inc_syscr(current); 447 return ret; 448 } 449 450 ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) 451 { 452 ssize_t ret; 453 454 ret = rw_verify_area(READ, file, pos, count); 455 if (ret) 456 return ret; 457 return __kernel_read(file, buf, count, pos); 458 } 459 EXPORT_SYMBOL(kernel_read); 460 461 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 462 { 463 ssize_t ret; 464 465 if (!(file->f_mode & FMODE_READ)) 466 return -EBADF; 467 if (!(file->f_mode & FMODE_CAN_READ)) 468 return -EINVAL; 469 if (unlikely(!access_ok(buf, count))) 470 return -EFAULT; 471 472 ret = rw_verify_area(READ, file, pos, count); 473 if (ret) 474 return ret; 475 if (count > MAX_RW_COUNT) 476 count = MAX_RW_COUNT; 477 478 if (file->f_op->read) 479 ret = file->f_op->read(file, buf, count, pos); 480 else if (file->f_op->read_iter) 481 ret = new_sync_read(file, buf, count, pos); 482 else 483 ret = -EINVAL; 484 if (ret > 0) { 485 fsnotify_access(file); 486 add_rchar(current, ret); 487 } 488 inc_syscr(current); 489 return ret; 490 } 491 492 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 493 { 494 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 495 struct kiocb kiocb; 496 struct iov_iter iter; 497 ssize_t ret; 498 499 init_sync_kiocb(&kiocb, filp); 500 kiocb.ki_pos = (ppos ? *ppos : 0); 501 iov_iter_init(&iter, WRITE, &iov, 1, len); 502 503 ret = call_write_iter(filp, &kiocb, &iter); 504 BUG_ON(ret == -EIOCBQUEUED); 505 if (ret > 0 && ppos) 506 *ppos = kiocb.ki_pos; 507 return ret; 508 } 509 510 /* caller is responsible for file_start_write/file_end_write */ 511 ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) 512 { 513 struct kvec iov = { 514 .iov_base = (void *)buf, 515 .iov_len = min_t(size_t, count, MAX_RW_COUNT), 516 }; 517 struct kiocb kiocb; 518 struct iov_iter iter; 519 ssize_t ret; 520 521 if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) 522 return -EBADF; 523 if (!(file->f_mode & FMODE_CAN_WRITE)) 524 return -EINVAL; 525 /* 526 * Also fail if ->write_iter and ->write are both wired up as that 527 * implies very convoluted semantics. 528 */ 529 if (unlikely(!file->f_op->write_iter || file->f_op->write)) 530 return warn_unsupported(file, "write"); 531 532 init_sync_kiocb(&kiocb, file); 533 kiocb.ki_pos = pos ? *pos : 0; 534 iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); 535 ret = file->f_op->write_iter(&kiocb, &iter); 536 if (ret > 0) { 537 if (pos) 538 *pos = kiocb.ki_pos; 539 fsnotify_modify(file); 540 add_wchar(current, ret); 541 } 542 inc_syscw(current); 543 return ret; 544 } 545 /* 546 * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()", 547 * but autofs is one of the few internal kernel users that actually 548 * wants this _and_ can be built as a module. So we need to export 549 * this symbol for autofs, even though it really isn't appropriate 550 * for any other kernel modules. 551 */ 552 EXPORT_SYMBOL_GPL(__kernel_write); 553 554 ssize_t kernel_write(struct file *file, const void *buf, size_t count, 555 loff_t *pos) 556 { 557 ssize_t ret; 558 559 ret = rw_verify_area(WRITE, file, pos, count); 560 if (ret) 561 return ret; 562 563 file_start_write(file); 564 ret = __kernel_write(file, buf, count, pos); 565 file_end_write(file); 566 return ret; 567 } 568 EXPORT_SYMBOL(kernel_write); 569 570 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 571 { 572 ssize_t ret; 573 574 if (!(file->f_mode & FMODE_WRITE)) 575 return -EBADF; 576 if (!(file->f_mode & FMODE_CAN_WRITE)) 577 return -EINVAL; 578 if (unlikely(!access_ok(buf, count))) 579 return -EFAULT; 580 581 ret = rw_verify_area(WRITE, file, pos, count); 582 if (ret) 583 return ret; 584 if (count > MAX_RW_COUNT) 585 count = MAX_RW_COUNT; 586 file_start_write(file); 587 if (file->f_op->write) 588 ret = file->f_op->write(file, buf, count, pos); 589 else if (file->f_op->write_iter) 590 ret = new_sync_write(file, buf, count, pos); 591 else 592 ret = -EINVAL; 593 if (ret > 0) { 594 fsnotify_modify(file); 595 add_wchar(current, ret); 596 } 597 inc_syscw(current); 598 file_end_write(file); 599 return ret; 600 } 601 602 /* file_ppos returns &file->f_pos or NULL if file is stream */ 603 static inline loff_t *file_ppos(struct file *file) 604 { 605 return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos; 606 } 607 608 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) 609 { 610 struct fd f = fdget_pos(fd); 611 ssize_t ret = -EBADF; 612 613 if (f.file) { 614 loff_t pos, *ppos = file_ppos(f.file); 615 if (ppos) { 616 pos = *ppos; 617 ppos = &pos; 618 } 619 ret = vfs_read(f.file, buf, count, ppos); 620 if (ret >= 0 && ppos) 621 f.file->f_pos = pos; 622 fdput_pos(f); 623 } 624 return ret; 625 } 626 627 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 628 { 629 return ksys_read(fd, buf, count); 630 } 631 632 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count) 633 { 634 struct fd f = fdget_pos(fd); 635 ssize_t ret = -EBADF; 636 637 if (f.file) { 638 loff_t pos, *ppos = file_ppos(f.file); 639 if (ppos) { 640 pos = *ppos; 641 ppos = &pos; 642 } 643 ret = vfs_write(f.file, buf, count, ppos); 644 if (ret >= 0 && ppos) 645 f.file->f_pos = pos; 646 fdput_pos(f); 647 } 648 649 return ret; 650 } 651 652 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 653 size_t, count) 654 { 655 return ksys_write(fd, buf, count); 656 } 657 658 ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, 659 loff_t pos) 660 { 661 struct fd f; 662 ssize_t ret = -EBADF; 663 664 if (pos < 0) 665 return -EINVAL; 666 667 f = fdget(fd); 668 if (f.file) { 669 ret = -ESPIPE; 670 if (f.file->f_mode & FMODE_PREAD) 671 ret = vfs_read(f.file, buf, count, &pos); 672 fdput(f); 673 } 674 675 return ret; 676 } 677 678 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, 679 size_t, count, loff_t, pos) 680 { 681 return ksys_pread64(fd, buf, count, pos); 682 } 683 684 ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, 685 size_t count, loff_t pos) 686 { 687 struct fd f; 688 ssize_t ret = -EBADF; 689 690 if (pos < 0) 691 return -EINVAL; 692 693 f = fdget(fd); 694 if (f.file) { 695 ret = -ESPIPE; 696 if (f.file->f_mode & FMODE_PWRITE) 697 ret = vfs_write(f.file, buf, count, &pos); 698 fdput(f); 699 } 700 701 return ret; 702 } 703 704 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, 705 size_t, count, loff_t, pos) 706 { 707 return ksys_pwrite64(fd, buf, count, pos); 708 } 709 710 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, 711 loff_t *ppos, int type, rwf_t flags) 712 { 713 struct kiocb kiocb; 714 ssize_t ret; 715 716 init_sync_kiocb(&kiocb, filp); 717 ret = kiocb_set_rw_flags(&kiocb, flags); 718 if (ret) 719 return ret; 720 kiocb.ki_pos = (ppos ? *ppos : 0); 721 722 if (type == READ) 723 ret = call_read_iter(filp, &kiocb, iter); 724 else 725 ret = call_write_iter(filp, &kiocb, iter); 726 BUG_ON(ret == -EIOCBQUEUED); 727 if (ppos) 728 *ppos = kiocb.ki_pos; 729 return ret; 730 } 731 732 /* Do it by hand, with file-ops */ 733 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, 734 loff_t *ppos, int type, rwf_t flags) 735 { 736 ssize_t ret = 0; 737 738 if (flags & ~RWF_HIPRI) 739 return -EOPNOTSUPP; 740 741 while (iov_iter_count(iter)) { 742 struct iovec iovec = iov_iter_iovec(iter); 743 ssize_t nr; 744 745 if (type == READ) { 746 nr = filp->f_op->read(filp, iovec.iov_base, 747 iovec.iov_len, ppos); 748 } else { 749 nr = filp->f_op->write(filp, iovec.iov_base, 750 iovec.iov_len, ppos); 751 } 752 753 if (nr < 0) { 754 if (!ret) 755 ret = nr; 756 break; 757 } 758 ret += nr; 759 if (nr != iovec.iov_len) 760 break; 761 iov_iter_advance(iter, nr); 762 } 763 764 return ret; 765 } 766 767 static ssize_t do_iter_read(struct file *file, struct iov_iter *iter, 768 loff_t *pos, rwf_t flags) 769 { 770 size_t tot_len; 771 ssize_t ret = 0; 772 773 if (!(file->f_mode & FMODE_READ)) 774 return -EBADF; 775 if (!(file->f_mode & FMODE_CAN_READ)) 776 return -EINVAL; 777 778 tot_len = iov_iter_count(iter); 779 if (!tot_len) 780 goto out; 781 ret = rw_verify_area(READ, file, pos, tot_len); 782 if (ret < 0) 783 return ret; 784 785 if (file->f_op->read_iter) 786 ret = do_iter_readv_writev(file, iter, pos, READ, flags); 787 else 788 ret = do_loop_readv_writev(file, iter, pos, READ, flags); 789 out: 790 if (ret >= 0) 791 fsnotify_access(file); 792 return ret; 793 } 794 795 ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, 796 struct iov_iter *iter) 797 { 798 size_t tot_len; 799 ssize_t ret = 0; 800 801 if (!file->f_op->read_iter) 802 return -EINVAL; 803 if (!(file->f_mode & FMODE_READ)) 804 return -EBADF; 805 if (!(file->f_mode & FMODE_CAN_READ)) 806 return -EINVAL; 807 808 tot_len = iov_iter_count(iter); 809 if (!tot_len) 810 goto out; 811 ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len); 812 if (ret < 0) 813 return ret; 814 815 ret = call_read_iter(file, iocb, iter); 816 out: 817 if (ret >= 0) 818 fsnotify_access(file); 819 return ret; 820 } 821 EXPORT_SYMBOL(vfs_iocb_iter_read); 822 823 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, 824 rwf_t flags) 825 { 826 if (!file->f_op->read_iter) 827 return -EINVAL; 828 return do_iter_read(file, iter, ppos, flags); 829 } 830 EXPORT_SYMBOL(vfs_iter_read); 831 832 static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, 833 loff_t *pos, rwf_t flags) 834 { 835 size_t tot_len; 836 ssize_t ret = 0; 837 838 if (!(file->f_mode & FMODE_WRITE)) 839 return -EBADF; 840 if (!(file->f_mode & FMODE_CAN_WRITE)) 841 return -EINVAL; 842 843 tot_len = iov_iter_count(iter); 844 if (!tot_len) 845 return 0; 846 ret = rw_verify_area(WRITE, file, pos, tot_len); 847 if (ret < 0) 848 return ret; 849 850 if (file->f_op->write_iter) 851 ret = do_iter_readv_writev(file, iter, pos, WRITE, flags); 852 else 853 ret = do_loop_readv_writev(file, iter, pos, WRITE, flags); 854 if (ret > 0) 855 fsnotify_modify(file); 856 return ret; 857 } 858 859 ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, 860 struct iov_iter *iter) 861 { 862 size_t tot_len; 863 ssize_t ret = 0; 864 865 if (!file->f_op->write_iter) 866 return -EINVAL; 867 if (!(file->f_mode & FMODE_WRITE)) 868 return -EBADF; 869 if (!(file->f_mode & FMODE_CAN_WRITE)) 870 return -EINVAL; 871 872 tot_len = iov_iter_count(iter); 873 if (!tot_len) 874 return 0; 875 ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len); 876 if (ret < 0) 877 return ret; 878 879 ret = call_write_iter(file, iocb, iter); 880 if (ret > 0) 881 fsnotify_modify(file); 882 883 return ret; 884 } 885 EXPORT_SYMBOL(vfs_iocb_iter_write); 886 887 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, 888 rwf_t flags) 889 { 890 if (!file->f_op->write_iter) 891 return -EINVAL; 892 return do_iter_write(file, iter, ppos, flags); 893 } 894 EXPORT_SYMBOL(vfs_iter_write); 895 896 static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 897 unsigned long vlen, loff_t *pos, rwf_t flags) 898 { 899 struct iovec iovstack[UIO_FASTIOV]; 900 struct iovec *iov = iovstack; 901 struct iov_iter iter; 902 ssize_t ret; 903 904 ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 905 if (ret >= 0) { 906 ret = do_iter_read(file, &iter, pos, flags); 907 kfree(iov); 908 } 909 910 return ret; 911 } 912 913 static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 914 unsigned long vlen, loff_t *pos, rwf_t flags) 915 { 916 struct iovec iovstack[UIO_FASTIOV]; 917 struct iovec *iov = iovstack; 918 struct iov_iter iter; 919 ssize_t ret; 920 921 ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 922 if (ret >= 0) { 923 file_start_write(file); 924 ret = do_iter_write(file, &iter, pos, flags); 925 file_end_write(file); 926 kfree(iov); 927 } 928 return ret; 929 } 930 931 static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, 932 unsigned long vlen, rwf_t flags) 933 { 934 struct fd f = fdget_pos(fd); 935 ssize_t ret = -EBADF; 936 937 if (f.file) { 938 loff_t pos, *ppos = file_ppos(f.file); 939 if (ppos) { 940 pos = *ppos; 941 ppos = &pos; 942 } 943 ret = vfs_readv(f.file, vec, vlen, ppos, flags); 944 if (ret >= 0 && ppos) 945 f.file->f_pos = pos; 946 fdput_pos(f); 947 } 948 949 if (ret > 0) 950 add_rchar(current, ret); 951 inc_syscr(current); 952 return ret; 953 } 954 955 static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, 956 unsigned long vlen, rwf_t flags) 957 { 958 struct fd f = fdget_pos(fd); 959 ssize_t ret = -EBADF; 960 961 if (f.file) { 962 loff_t pos, *ppos = file_ppos(f.file); 963 if (ppos) { 964 pos = *ppos; 965 ppos = &pos; 966 } 967 ret = vfs_writev(f.file, vec, vlen, ppos, flags); 968 if (ret >= 0 && ppos) 969 f.file->f_pos = pos; 970 fdput_pos(f); 971 } 972 973 if (ret > 0) 974 add_wchar(current, ret); 975 inc_syscw(current); 976 return ret; 977 } 978 979 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 980 { 981 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 982 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 983 } 984 985 static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, 986 unsigned long vlen, loff_t pos, rwf_t flags) 987 { 988 struct fd f; 989 ssize_t ret = -EBADF; 990 991 if (pos < 0) 992 return -EINVAL; 993 994 f = fdget(fd); 995 if (f.file) { 996 ret = -ESPIPE; 997 if (f.file->f_mode & FMODE_PREAD) 998 ret = vfs_readv(f.file, vec, vlen, &pos, flags); 999 fdput(f); 1000 } 1001 1002 if (ret > 0) 1003 add_rchar(current, ret); 1004 inc_syscr(current); 1005 return ret; 1006 } 1007 1008 static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, 1009 unsigned long vlen, loff_t pos, rwf_t flags) 1010 { 1011 struct fd f; 1012 ssize_t ret = -EBADF; 1013 1014 if (pos < 0) 1015 return -EINVAL; 1016 1017 f = fdget(fd); 1018 if (f.file) { 1019 ret = -ESPIPE; 1020 if (f.file->f_mode & FMODE_PWRITE) 1021 ret = vfs_writev(f.file, vec, vlen, &pos, flags); 1022 fdput(f); 1023 } 1024 1025 if (ret > 0) 1026 add_wchar(current, ret); 1027 inc_syscw(current); 1028 return ret; 1029 } 1030 1031 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 1032 unsigned long, vlen) 1033 { 1034 return do_readv(fd, vec, vlen, 0); 1035 } 1036 1037 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 1038 unsigned long, vlen) 1039 { 1040 return do_writev(fd, vec, vlen, 0); 1041 } 1042 1043 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 1044 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 1045 { 1046 loff_t pos = pos_from_hilo(pos_h, pos_l); 1047 1048 return do_preadv(fd, vec, vlen, pos, 0); 1049 } 1050 1051 SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, 1052 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, 1053 rwf_t, flags) 1054 { 1055 loff_t pos = pos_from_hilo(pos_h, pos_l); 1056 1057 if (pos == -1) 1058 return do_readv(fd, vec, vlen, flags); 1059 1060 return do_preadv(fd, vec, vlen, pos, flags); 1061 } 1062 1063 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 1064 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 1065 { 1066 loff_t pos = pos_from_hilo(pos_h, pos_l); 1067 1068 return do_pwritev(fd, vec, vlen, pos, 0); 1069 } 1070 1071 SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, 1072 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, 1073 rwf_t, flags) 1074 { 1075 loff_t pos = pos_from_hilo(pos_h, pos_l); 1076 1077 if (pos == -1) 1078 return do_writev(fd, vec, vlen, flags); 1079 1080 return do_pwritev(fd, vec, vlen, pos, flags); 1081 } 1082 1083 /* 1084 * Various compat syscalls. Note that they all pretend to take a native 1085 * iovec - import_iovec will properly treat those as compat_iovecs based on 1086 * in_compat_syscall(). 1087 */ 1088 #ifdef CONFIG_COMPAT 1089 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 1090 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, 1091 const struct iovec __user *, vec, 1092 unsigned long, vlen, loff_t, pos) 1093 { 1094 return do_preadv(fd, vec, vlen, pos, 0); 1095 } 1096 #endif 1097 1098 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, 1099 const struct iovec __user *, vec, 1100 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1101 { 1102 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1103 1104 return do_preadv(fd, vec, vlen, pos, 0); 1105 } 1106 1107 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 1108 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, 1109 const struct iovec __user *, vec, 1110 unsigned long, vlen, loff_t, pos, rwf_t, flags) 1111 { 1112 if (pos == -1) 1113 return do_readv(fd, vec, vlen, flags); 1114 return do_preadv(fd, vec, vlen, pos, flags); 1115 } 1116 #endif 1117 1118 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, 1119 const struct iovec __user *, vec, 1120 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, 1121 rwf_t, flags) 1122 { 1123 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1124 1125 if (pos == -1) 1126 return do_readv(fd, vec, vlen, flags); 1127 return do_preadv(fd, vec, vlen, pos, flags); 1128 } 1129 1130 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 1131 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, 1132 const struct iovec __user *, vec, 1133 unsigned long, vlen, loff_t, pos) 1134 { 1135 return do_pwritev(fd, vec, vlen, pos, 0); 1136 } 1137 #endif 1138 1139 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, 1140 const struct iovec __user *,vec, 1141 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1142 { 1143 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1144 1145 return do_pwritev(fd, vec, vlen, pos, 0); 1146 } 1147 1148 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 1149 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, 1150 const struct iovec __user *, vec, 1151 unsigned long, vlen, loff_t, pos, rwf_t, flags) 1152 { 1153 if (pos == -1) 1154 return do_writev(fd, vec, vlen, flags); 1155 return do_pwritev(fd, vec, vlen, pos, flags); 1156 } 1157 #endif 1158 1159 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, 1160 const struct iovec __user *,vec, 1161 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) 1162 { 1163 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1164 1165 if (pos == -1) 1166 return do_writev(fd, vec, vlen, flags); 1167 return do_pwritev(fd, vec, vlen, pos, flags); 1168 } 1169 #endif /* CONFIG_COMPAT */ 1170 1171 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1172 size_t count, loff_t max) 1173 { 1174 struct fd in, out; 1175 struct inode *in_inode, *out_inode; 1176 struct pipe_inode_info *opipe; 1177 loff_t pos; 1178 loff_t out_pos; 1179 ssize_t retval; 1180 int fl; 1181 1182 /* 1183 * Get input file, and verify that it is ok.. 1184 */ 1185 retval = -EBADF; 1186 in = fdget(in_fd); 1187 if (!in.file) 1188 goto out; 1189 if (!(in.file->f_mode & FMODE_READ)) 1190 goto fput_in; 1191 retval = -ESPIPE; 1192 if (!ppos) { 1193 pos = in.file->f_pos; 1194 } else { 1195 pos = *ppos; 1196 if (!(in.file->f_mode & FMODE_PREAD)) 1197 goto fput_in; 1198 } 1199 retval = rw_verify_area(READ, in.file, &pos, count); 1200 if (retval < 0) 1201 goto fput_in; 1202 if (count > MAX_RW_COUNT) 1203 count = MAX_RW_COUNT; 1204 1205 /* 1206 * Get output file, and verify that it is ok.. 1207 */ 1208 retval = -EBADF; 1209 out = fdget(out_fd); 1210 if (!out.file) 1211 goto fput_in; 1212 if (!(out.file->f_mode & FMODE_WRITE)) 1213 goto fput_out; 1214 in_inode = file_inode(in.file); 1215 out_inode = file_inode(out.file); 1216 out_pos = out.file->f_pos; 1217 1218 if (!max) 1219 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1220 1221 if (unlikely(pos + count > max)) { 1222 retval = -EOVERFLOW; 1223 if (pos >= max) 1224 goto fput_out; 1225 count = max - pos; 1226 } 1227 1228 fl = 0; 1229 #if 0 1230 /* 1231 * We need to debate whether we can enable this or not. The 1232 * man page documents EAGAIN return for the output at least, 1233 * and the application is arguably buggy if it doesn't expect 1234 * EAGAIN on a non-blocking file descriptor. 1235 */ 1236 if (in.file->f_flags & O_NONBLOCK) 1237 fl = SPLICE_F_NONBLOCK; 1238 #endif 1239 opipe = get_pipe_info(out.file, true); 1240 if (!opipe) { 1241 retval = rw_verify_area(WRITE, out.file, &out_pos, count); 1242 if (retval < 0) 1243 goto fput_out; 1244 file_start_write(out.file); 1245 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, 1246 count, fl); 1247 file_end_write(out.file); 1248 } else { 1249 retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); 1250 } 1251 1252 if (retval > 0) { 1253 add_rchar(current, retval); 1254 add_wchar(current, retval); 1255 fsnotify_access(in.file); 1256 fsnotify_modify(out.file); 1257 out.file->f_pos = out_pos; 1258 if (ppos) 1259 *ppos = pos; 1260 else 1261 in.file->f_pos = pos; 1262 } 1263 1264 inc_syscr(current); 1265 inc_syscw(current); 1266 if (pos > max) 1267 retval = -EOVERFLOW; 1268 1269 fput_out: 1270 fdput(out); 1271 fput_in: 1272 fdput(in); 1273 out: 1274 return retval; 1275 } 1276 1277 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 1278 { 1279 loff_t pos; 1280 off_t off; 1281 ssize_t ret; 1282 1283 if (offset) { 1284 if (unlikely(get_user(off, offset))) 1285 return -EFAULT; 1286 pos = off; 1287 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1288 if (unlikely(put_user(pos, offset))) 1289 return -EFAULT; 1290 return ret; 1291 } 1292 1293 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1294 } 1295 1296 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 1297 { 1298 loff_t pos; 1299 ssize_t ret; 1300 1301 if (offset) { 1302 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1303 return -EFAULT; 1304 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1305 if (unlikely(put_user(pos, offset))) 1306 return -EFAULT; 1307 return ret; 1308 } 1309 1310 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1311 } 1312 1313 #ifdef CONFIG_COMPAT 1314 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, 1315 compat_off_t __user *, offset, compat_size_t, count) 1316 { 1317 loff_t pos; 1318 off_t off; 1319 ssize_t ret; 1320 1321 if (offset) { 1322 if (unlikely(get_user(off, offset))) 1323 return -EFAULT; 1324 pos = off; 1325 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1326 if (unlikely(put_user(pos, offset))) 1327 return -EFAULT; 1328 return ret; 1329 } 1330 1331 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1332 } 1333 1334 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, 1335 compat_loff_t __user *, offset, compat_size_t, count) 1336 { 1337 loff_t pos; 1338 ssize_t ret; 1339 1340 if (offset) { 1341 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1342 return -EFAULT; 1343 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1344 if (unlikely(put_user(pos, offset))) 1345 return -EFAULT; 1346 return ret; 1347 } 1348 1349 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1350 } 1351 #endif 1352 1353 /** 1354 * generic_copy_file_range - copy data between two files 1355 * @file_in: file structure to read from 1356 * @pos_in: file offset to read from 1357 * @file_out: file structure to write data to 1358 * @pos_out: file offset to write data to 1359 * @len: amount of data to copy 1360 * @flags: copy flags 1361 * 1362 * This is a generic filesystem helper to copy data from one file to another. 1363 * It has no constraints on the source or destination file owners - the files 1364 * can belong to different superblocks and different filesystem types. Short 1365 * copies are allowed. 1366 * 1367 * This should be called from the @file_out filesystem, as per the 1368 * ->copy_file_range() method. 1369 * 1370 * Returns the number of bytes copied or a negative error indicating the 1371 * failure. 1372 */ 1373 1374 ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, 1375 struct file *file_out, loff_t pos_out, 1376 size_t len, unsigned int flags) 1377 { 1378 return do_splice_direct(file_in, &pos_in, file_out, &pos_out, 1379 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); 1380 } 1381 EXPORT_SYMBOL(generic_copy_file_range); 1382 1383 static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in, 1384 struct file *file_out, loff_t pos_out, 1385 size_t len, unsigned int flags) 1386 { 1387 /* 1388 * Although we now allow filesystems to handle cross sb copy, passing 1389 * a file of the wrong filesystem type to filesystem driver can result 1390 * in an attempt to dereference the wrong type of ->private_data, so 1391 * avoid doing that until we really have a good reason. NFS defines 1392 * several different file_system_type structures, but they all end up 1393 * using the same ->copy_file_range() function pointer. 1394 */ 1395 if (file_out->f_op->copy_file_range && 1396 file_out->f_op->copy_file_range == file_in->f_op->copy_file_range) 1397 return file_out->f_op->copy_file_range(file_in, pos_in, 1398 file_out, pos_out, 1399 len, flags); 1400 1401 return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, 1402 flags); 1403 } 1404 1405 /* 1406 * Performs necessary checks before doing a file copy 1407 * 1408 * Can adjust amount of bytes to copy via @req_count argument. 1409 * Returns appropriate error code that caller should return or 1410 * zero in case the copy should be allowed. 1411 */ 1412 static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, 1413 struct file *file_out, loff_t pos_out, 1414 size_t *req_count, unsigned int flags) 1415 { 1416 struct inode *inode_in = file_inode(file_in); 1417 struct inode *inode_out = file_inode(file_out); 1418 uint64_t count = *req_count; 1419 loff_t size_in; 1420 int ret; 1421 1422 ret = generic_file_rw_checks(file_in, file_out); 1423 if (ret) 1424 return ret; 1425 1426 /* Don't touch certain kinds of inodes */ 1427 if (IS_IMMUTABLE(inode_out)) 1428 return -EPERM; 1429 1430 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) 1431 return -ETXTBSY; 1432 1433 /* Ensure offsets don't wrap. */ 1434 if (pos_in + count < pos_in || pos_out + count < pos_out) 1435 return -EOVERFLOW; 1436 1437 /* Shorten the copy to EOF */ 1438 size_in = i_size_read(inode_in); 1439 if (pos_in >= size_in) 1440 count = 0; 1441 else 1442 count = min(count, size_in - (uint64_t)pos_in); 1443 1444 ret = generic_write_check_limits(file_out, pos_out, &count); 1445 if (ret) 1446 return ret; 1447 1448 /* Don't allow overlapped copying within the same file. */ 1449 if (inode_in == inode_out && 1450 pos_out + count > pos_in && 1451 pos_out < pos_in + count) 1452 return -EINVAL; 1453 1454 *req_count = count; 1455 return 0; 1456 } 1457 1458 /* 1459 * copy_file_range() differs from regular file read and write in that it 1460 * specifically allows return partial success. When it does so is up to 1461 * the copy_file_range method. 1462 */ 1463 ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, 1464 struct file *file_out, loff_t pos_out, 1465 size_t len, unsigned int flags) 1466 { 1467 ssize_t ret; 1468 1469 if (flags != 0) 1470 return -EINVAL; 1471 1472 ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, 1473 flags); 1474 if (unlikely(ret)) 1475 return ret; 1476 1477 ret = rw_verify_area(READ, file_in, &pos_in, len); 1478 if (unlikely(ret)) 1479 return ret; 1480 1481 ret = rw_verify_area(WRITE, file_out, &pos_out, len); 1482 if (unlikely(ret)) 1483 return ret; 1484 1485 if (len == 0) 1486 return 0; 1487 1488 file_start_write(file_out); 1489 1490 /* 1491 * Try cloning first, this is supported by more file systems, and 1492 * more efficient if both clone and copy are supported (e.g. NFS). 1493 */ 1494 if (file_in->f_op->remap_file_range && 1495 file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { 1496 loff_t cloned; 1497 1498 cloned = file_in->f_op->remap_file_range(file_in, pos_in, 1499 file_out, pos_out, 1500 min_t(loff_t, MAX_RW_COUNT, len), 1501 REMAP_FILE_CAN_SHORTEN); 1502 if (cloned > 0) { 1503 ret = cloned; 1504 goto done; 1505 } 1506 } 1507 1508 ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len, 1509 flags); 1510 WARN_ON_ONCE(ret == -EOPNOTSUPP); 1511 done: 1512 if (ret > 0) { 1513 fsnotify_access(file_in); 1514 add_rchar(current, ret); 1515 fsnotify_modify(file_out); 1516 add_wchar(current, ret); 1517 } 1518 1519 inc_syscr(current); 1520 inc_syscw(current); 1521 1522 file_end_write(file_out); 1523 1524 return ret; 1525 } 1526 EXPORT_SYMBOL(vfs_copy_file_range); 1527 1528 SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, 1529 int, fd_out, loff_t __user *, off_out, 1530 size_t, len, unsigned int, flags) 1531 { 1532 loff_t pos_in; 1533 loff_t pos_out; 1534 struct fd f_in; 1535 struct fd f_out; 1536 ssize_t ret = -EBADF; 1537 1538 f_in = fdget(fd_in); 1539 if (!f_in.file) 1540 goto out2; 1541 1542 f_out = fdget(fd_out); 1543 if (!f_out.file) 1544 goto out1; 1545 1546 ret = -EFAULT; 1547 if (off_in) { 1548 if (copy_from_user(&pos_in, off_in, sizeof(loff_t))) 1549 goto out; 1550 } else { 1551 pos_in = f_in.file->f_pos; 1552 } 1553 1554 if (off_out) { 1555 if (copy_from_user(&pos_out, off_out, sizeof(loff_t))) 1556 goto out; 1557 } else { 1558 pos_out = f_out.file->f_pos; 1559 } 1560 1561 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, 1562 flags); 1563 if (ret > 0) { 1564 pos_in += ret; 1565 pos_out += ret; 1566 1567 if (off_in) { 1568 if (copy_to_user(off_in, &pos_in, sizeof(loff_t))) 1569 ret = -EFAULT; 1570 } else { 1571 f_in.file->f_pos = pos_in; 1572 } 1573 1574 if (off_out) { 1575 if (copy_to_user(off_out, &pos_out, sizeof(loff_t))) 1576 ret = -EFAULT; 1577 } else { 1578 f_out.file->f_pos = pos_out; 1579 } 1580 } 1581 1582 out: 1583 fdput(f_out); 1584 out1: 1585 fdput(f_in); 1586 out2: 1587 return ret; 1588 } 1589 1590 /* 1591 * Don't operate on ranges the page cache doesn't support, and don't exceed the 1592 * LFS limits. If pos is under the limit it becomes a short access. If it 1593 * exceeds the limit we return -EFBIG. 1594 */ 1595 int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count) 1596 { 1597 struct inode *inode = file->f_mapping->host; 1598 loff_t max_size = inode->i_sb->s_maxbytes; 1599 loff_t limit = rlimit(RLIMIT_FSIZE); 1600 1601 if (limit != RLIM_INFINITY) { 1602 if (pos >= limit) { 1603 send_sig(SIGXFSZ, current, 0); 1604 return -EFBIG; 1605 } 1606 *count = min(*count, limit - pos); 1607 } 1608 1609 if (!(file->f_flags & O_LARGEFILE)) 1610 max_size = MAX_NON_LFS; 1611 1612 if (unlikely(pos >= max_size)) 1613 return -EFBIG; 1614 1615 *count = min(*count, max_size - pos); 1616 1617 return 0; 1618 } 1619 1620 /* 1621 * Performs necessary checks before doing a write 1622 * 1623 * Can adjust writing position or amount of bytes to write. 1624 * Returns appropriate error code that caller should return or 1625 * zero in case that write should be allowed. 1626 */ 1627 ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) 1628 { 1629 struct file *file = iocb->ki_filp; 1630 struct inode *inode = file->f_mapping->host; 1631 loff_t count; 1632 int ret; 1633 1634 if (IS_SWAPFILE(inode)) 1635 return -ETXTBSY; 1636 1637 if (!iov_iter_count(from)) 1638 return 0; 1639 1640 /* FIXME: this is for backwards compatibility with 2.4 */ 1641 if (iocb->ki_flags & IOCB_APPEND) 1642 iocb->ki_pos = i_size_read(inode); 1643 1644 if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT)) 1645 return -EINVAL; 1646 1647 count = iov_iter_count(from); 1648 ret = generic_write_check_limits(file, iocb->ki_pos, &count); 1649 if (ret) 1650 return ret; 1651 1652 iov_iter_truncate(from, count); 1653 return iov_iter_count(from); 1654 } 1655 EXPORT_SYMBOL(generic_write_checks); 1656 1657 /* 1658 * Performs common checks before doing a file copy/clone 1659 * from @file_in to @file_out. 1660 */ 1661 int generic_file_rw_checks(struct file *file_in, struct file *file_out) 1662 { 1663 struct inode *inode_in = file_inode(file_in); 1664 struct inode *inode_out = file_inode(file_out); 1665 1666 /* Don't copy dirs, pipes, sockets... */ 1667 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1668 return -EISDIR; 1669 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1670 return -EINVAL; 1671 1672 if (!(file_in->f_mode & FMODE_READ) || 1673 !(file_out->f_mode & FMODE_WRITE) || 1674 (file_out->f_flags & O_APPEND)) 1675 return -EBADF; 1676 1677 return 0; 1678 } 1679