1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/read_write.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/slab.h> 9 #include <linux/stat.h> 10 #include <linux/sched/xacct.h> 11 #include <linux/fcntl.h> 12 #include <linux/file.h> 13 #include <linux/uio.h> 14 #include <linux/fsnotify.h> 15 #include <linux/security.h> 16 #include <linux/export.h> 17 #include <linux/syscalls.h> 18 #include <linux/pagemap.h> 19 #include <linux/splice.h> 20 #include <linux/compat.h> 21 #include <linux/mount.h> 22 #include <linux/fs.h> 23 #include "internal.h" 24 25 #include <linux/uaccess.h> 26 #include <asm/unistd.h> 27 28 const struct file_operations generic_ro_fops = { 29 .llseek = generic_file_llseek, 30 .read_iter = generic_file_read_iter, 31 .mmap = generic_file_readonly_mmap, 32 .splice_read = generic_file_splice_read, 33 }; 34 35 EXPORT_SYMBOL(generic_ro_fops); 36 37 static inline bool unsigned_offsets(struct file *file) 38 { 39 return file->f_mode & FMODE_UNSIGNED_OFFSET; 40 } 41 42 /** 43 * vfs_setpos - update the file offset for lseek 44 * @file: file structure in question 45 * @offset: file offset to seek to 46 * @maxsize: maximum file size 47 * 48 * This is a low-level filesystem helper for updating the file offset to 49 * the value specified by @offset if the given offset is valid and it is 50 * not equal to the current file offset. 51 * 52 * Return the specified offset on success and -EINVAL on invalid offset. 53 */ 54 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) 55 { 56 if (offset < 0 && !unsigned_offsets(file)) 57 return -EINVAL; 58 if (offset > maxsize) 59 return -EINVAL; 60 61 if (offset != file->f_pos) { 62 file->f_pos = offset; 63 file->f_version = 0; 64 } 65 return offset; 66 } 67 EXPORT_SYMBOL(vfs_setpos); 68 69 /** 70 * generic_file_llseek_size - generic llseek implementation for regular files 71 * @file: file structure to seek on 72 * @offset: file offset to seek to 73 * @whence: type of seek 74 * @size: max size of this file in file system 75 * @eof: offset used for SEEK_END position 76 * 77 * This is a variant of generic_file_llseek that allows passing in a custom 78 * maximum file size and a custom EOF position, for e.g. hashed directories 79 * 80 * Synchronization: 81 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 82 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 83 * read/writes behave like SEEK_SET against seeks. 84 */ 85 loff_t 86 generic_file_llseek_size(struct file *file, loff_t offset, int whence, 87 loff_t maxsize, loff_t eof) 88 { 89 switch (whence) { 90 case SEEK_END: 91 offset += eof; 92 break; 93 case SEEK_CUR: 94 /* 95 * Here we special-case the lseek(fd, 0, SEEK_CUR) 96 * position-querying operation. Avoid rewriting the "same" 97 * f_pos value back to the file because a concurrent read(), 98 * write() or lseek() might have altered it 99 */ 100 if (offset == 0) 101 return file->f_pos; 102 /* 103 * f_lock protects against read/modify/write race with other 104 * SEEK_CURs. Note that parallel writes and reads behave 105 * like SEEK_SET. 106 */ 107 spin_lock(&file->f_lock); 108 offset = vfs_setpos(file, file->f_pos + offset, maxsize); 109 spin_unlock(&file->f_lock); 110 return offset; 111 case SEEK_DATA: 112 /* 113 * In the generic case the entire file is data, so as long as 114 * offset isn't at the end of the file then the offset is data. 115 */ 116 if ((unsigned long long)offset >= eof) 117 return -ENXIO; 118 break; 119 case SEEK_HOLE: 120 /* 121 * There is a virtual hole at the end of the file, so as long as 122 * offset isn't i_size or larger, return i_size. 123 */ 124 if ((unsigned long long)offset >= eof) 125 return -ENXIO; 126 offset = eof; 127 break; 128 } 129 130 return vfs_setpos(file, offset, maxsize); 131 } 132 EXPORT_SYMBOL(generic_file_llseek_size); 133 134 /** 135 * generic_file_llseek - generic llseek implementation for regular files 136 * @file: file structure to seek on 137 * @offset: file offset to seek to 138 * @whence: type of seek 139 * 140 * This is a generic implemenation of ->llseek useable for all normal local 141 * filesystems. It just updates the file offset to the value specified by 142 * @offset and @whence. 143 */ 144 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) 145 { 146 struct inode *inode = file->f_mapping->host; 147 148 return generic_file_llseek_size(file, offset, whence, 149 inode->i_sb->s_maxbytes, 150 i_size_read(inode)); 151 } 152 EXPORT_SYMBOL(generic_file_llseek); 153 154 /** 155 * fixed_size_llseek - llseek implementation for fixed-sized devices 156 * @file: file structure to seek on 157 * @offset: file offset to seek to 158 * @whence: type of seek 159 * @size: size of the file 160 * 161 */ 162 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) 163 { 164 switch (whence) { 165 case SEEK_SET: case SEEK_CUR: case SEEK_END: 166 return generic_file_llseek_size(file, offset, whence, 167 size, size); 168 default: 169 return -EINVAL; 170 } 171 } 172 EXPORT_SYMBOL(fixed_size_llseek); 173 174 /** 175 * no_seek_end_llseek - llseek implementation for fixed-sized devices 176 * @file: file structure to seek on 177 * @offset: file offset to seek to 178 * @whence: type of seek 179 * 180 */ 181 loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) 182 { 183 switch (whence) { 184 case SEEK_SET: case SEEK_CUR: 185 return generic_file_llseek_size(file, offset, whence, 186 OFFSET_MAX, 0); 187 default: 188 return -EINVAL; 189 } 190 } 191 EXPORT_SYMBOL(no_seek_end_llseek); 192 193 /** 194 * no_seek_end_llseek_size - llseek implementation for fixed-sized devices 195 * @file: file structure to seek on 196 * @offset: file offset to seek to 197 * @whence: type of seek 198 * @size: maximal offset allowed 199 * 200 */ 201 loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size) 202 { 203 switch (whence) { 204 case SEEK_SET: case SEEK_CUR: 205 return generic_file_llseek_size(file, offset, whence, 206 size, 0); 207 default: 208 return -EINVAL; 209 } 210 } 211 EXPORT_SYMBOL(no_seek_end_llseek_size); 212 213 /** 214 * noop_llseek - No Operation Performed llseek implementation 215 * @file: file structure to seek on 216 * @offset: file offset to seek to 217 * @whence: type of seek 218 * 219 * This is an implementation of ->llseek useable for the rare special case when 220 * userspace expects the seek to succeed but the (device) file is actually not 221 * able to perform the seek. In this case you use noop_llseek() instead of 222 * falling back to the default implementation of ->llseek. 223 */ 224 loff_t noop_llseek(struct file *file, loff_t offset, int whence) 225 { 226 return file->f_pos; 227 } 228 EXPORT_SYMBOL(noop_llseek); 229 230 loff_t default_llseek(struct file *file, loff_t offset, int whence) 231 { 232 struct inode *inode = file_inode(file); 233 loff_t retval; 234 235 inode_lock(inode); 236 switch (whence) { 237 case SEEK_END: 238 offset += i_size_read(inode); 239 break; 240 case SEEK_CUR: 241 if (offset == 0) { 242 retval = file->f_pos; 243 goto out; 244 } 245 offset += file->f_pos; 246 break; 247 case SEEK_DATA: 248 /* 249 * In the generic case the entire file is data, so as 250 * long as offset isn't at the end of the file then the 251 * offset is data. 252 */ 253 if (offset >= inode->i_size) { 254 retval = -ENXIO; 255 goto out; 256 } 257 break; 258 case SEEK_HOLE: 259 /* 260 * There is a virtual hole at the end of the file, so 261 * as long as offset isn't i_size or larger, return 262 * i_size. 263 */ 264 if (offset >= inode->i_size) { 265 retval = -ENXIO; 266 goto out; 267 } 268 offset = inode->i_size; 269 break; 270 } 271 retval = -EINVAL; 272 if (offset >= 0 || unsigned_offsets(file)) { 273 if (offset != file->f_pos) { 274 file->f_pos = offset; 275 file->f_version = 0; 276 } 277 retval = offset; 278 } 279 out: 280 inode_unlock(inode); 281 return retval; 282 } 283 EXPORT_SYMBOL(default_llseek); 284 285 loff_t vfs_llseek(struct file *file, loff_t offset, int whence) 286 { 287 if (!(file->f_mode & FMODE_LSEEK)) 288 return -ESPIPE; 289 return file->f_op->llseek(file, offset, whence); 290 } 291 EXPORT_SYMBOL(vfs_llseek); 292 293 static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) 294 { 295 off_t retval; 296 struct fd f = fdget_pos(fd); 297 if (!f.file) 298 return -EBADF; 299 300 retval = -EINVAL; 301 if (whence <= SEEK_MAX) { 302 loff_t res = vfs_llseek(f.file, offset, whence); 303 retval = res; 304 if (res != (loff_t)retval) 305 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 306 } 307 fdput_pos(f); 308 return retval; 309 } 310 311 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 312 { 313 return ksys_lseek(fd, offset, whence); 314 } 315 316 #ifdef CONFIG_COMPAT 317 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) 318 { 319 return ksys_lseek(fd, offset, whence); 320 } 321 #endif 322 323 #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \ 324 defined(__ARCH_WANT_SYS_LLSEEK) 325 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 326 unsigned long, offset_low, loff_t __user *, result, 327 unsigned int, whence) 328 { 329 int retval; 330 struct fd f = fdget_pos(fd); 331 loff_t offset; 332 333 if (!f.file) 334 return -EBADF; 335 336 retval = -EINVAL; 337 if (whence > SEEK_MAX) 338 goto out_putf; 339 340 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 341 whence); 342 343 retval = (int)offset; 344 if (offset >= 0) { 345 retval = -EFAULT; 346 if (!copy_to_user(result, &offset, sizeof(offset))) 347 retval = 0; 348 } 349 out_putf: 350 fdput_pos(f); 351 return retval; 352 } 353 #endif 354 355 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) 356 { 357 if (unlikely((ssize_t) count < 0)) 358 return -EINVAL; 359 360 if (ppos) { 361 loff_t pos = *ppos; 362 363 if (unlikely(pos < 0)) { 364 if (!unsigned_offsets(file)) 365 return -EINVAL; 366 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 367 return -EOVERFLOW; 368 } else if (unlikely((loff_t) (pos + count) < 0)) { 369 if (!unsigned_offsets(file)) 370 return -EINVAL; 371 } 372 } 373 374 return security_file_permission(file, 375 read_write == READ ? MAY_READ : MAY_WRITE); 376 } 377 EXPORT_SYMBOL(rw_verify_area); 378 379 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 380 { 381 struct iovec iov = { .iov_base = buf, .iov_len = len }; 382 struct kiocb kiocb; 383 struct iov_iter iter; 384 ssize_t ret; 385 386 init_sync_kiocb(&kiocb, filp); 387 kiocb.ki_pos = (ppos ? *ppos : 0); 388 iov_iter_init(&iter, READ, &iov, 1, len); 389 390 ret = call_read_iter(filp, &kiocb, &iter); 391 BUG_ON(ret == -EIOCBQUEUED); 392 if (ppos) 393 *ppos = kiocb.ki_pos; 394 return ret; 395 } 396 397 static int warn_unsupported(struct file *file, const char *op) 398 { 399 pr_warn_ratelimited( 400 "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n", 401 op, file, current->pid, current->comm); 402 return -EINVAL; 403 } 404 405 ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) 406 { 407 struct kvec iov = { 408 .iov_base = buf, 409 .iov_len = min_t(size_t, count, MAX_RW_COUNT), 410 }; 411 struct kiocb kiocb; 412 struct iov_iter iter; 413 ssize_t ret; 414 415 if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) 416 return -EINVAL; 417 if (!(file->f_mode & FMODE_CAN_READ)) 418 return -EINVAL; 419 /* 420 * Also fail if ->read_iter and ->read are both wired up as that 421 * implies very convoluted semantics. 422 */ 423 if (unlikely(!file->f_op->read_iter || file->f_op->read)) 424 return warn_unsupported(file, "read"); 425 426 init_sync_kiocb(&kiocb, file); 427 kiocb.ki_pos = pos ? *pos : 0; 428 iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); 429 ret = file->f_op->read_iter(&kiocb, &iter); 430 if (ret > 0) { 431 if (pos) 432 *pos = kiocb.ki_pos; 433 fsnotify_access(file); 434 add_rchar(current, ret); 435 } 436 inc_syscr(current); 437 return ret; 438 } 439 440 ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) 441 { 442 ssize_t ret; 443 444 ret = rw_verify_area(READ, file, pos, count); 445 if (ret) 446 return ret; 447 return __kernel_read(file, buf, count, pos); 448 } 449 EXPORT_SYMBOL(kernel_read); 450 451 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 452 { 453 ssize_t ret; 454 455 if (!(file->f_mode & FMODE_READ)) 456 return -EBADF; 457 if (!(file->f_mode & FMODE_CAN_READ)) 458 return -EINVAL; 459 if (unlikely(!access_ok(buf, count))) 460 return -EFAULT; 461 462 ret = rw_verify_area(READ, file, pos, count); 463 if (ret) 464 return ret; 465 if (count > MAX_RW_COUNT) 466 count = MAX_RW_COUNT; 467 468 if (file->f_op->read) 469 ret = file->f_op->read(file, buf, count, pos); 470 else if (file->f_op->read_iter) 471 ret = new_sync_read(file, buf, count, pos); 472 else 473 ret = -EINVAL; 474 if (ret > 0) { 475 fsnotify_access(file); 476 add_rchar(current, ret); 477 } 478 inc_syscr(current); 479 return ret; 480 } 481 482 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 483 { 484 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 485 struct kiocb kiocb; 486 struct iov_iter iter; 487 ssize_t ret; 488 489 init_sync_kiocb(&kiocb, filp); 490 kiocb.ki_pos = (ppos ? *ppos : 0); 491 iov_iter_init(&iter, WRITE, &iov, 1, len); 492 493 ret = call_write_iter(filp, &kiocb, &iter); 494 BUG_ON(ret == -EIOCBQUEUED); 495 if (ret > 0 && ppos) 496 *ppos = kiocb.ki_pos; 497 return ret; 498 } 499 500 /* caller is responsible for file_start_write/file_end_write */ 501 ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) 502 { 503 struct kvec iov = { 504 .iov_base = (void *)buf, 505 .iov_len = min_t(size_t, count, MAX_RW_COUNT), 506 }; 507 struct kiocb kiocb; 508 struct iov_iter iter; 509 ssize_t ret; 510 511 if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) 512 return -EBADF; 513 if (!(file->f_mode & FMODE_CAN_WRITE)) 514 return -EINVAL; 515 /* 516 * Also fail if ->write_iter and ->write are both wired up as that 517 * implies very convoluted semantics. 518 */ 519 if (unlikely(!file->f_op->write_iter || file->f_op->write)) 520 return warn_unsupported(file, "write"); 521 522 init_sync_kiocb(&kiocb, file); 523 kiocb.ki_pos = pos ? *pos : 0; 524 iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); 525 ret = file->f_op->write_iter(&kiocb, &iter); 526 if (ret > 0) { 527 if (pos) 528 *pos = kiocb.ki_pos; 529 fsnotify_modify(file); 530 add_wchar(current, ret); 531 } 532 inc_syscw(current); 533 return ret; 534 } 535 /* 536 * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()", 537 * but autofs is one of the few internal kernel users that actually 538 * wants this _and_ can be built as a module. So we need to export 539 * this symbol for autofs, even though it really isn't appropriate 540 * for any other kernel modules. 541 */ 542 EXPORT_SYMBOL_GPL(__kernel_write); 543 544 ssize_t kernel_write(struct file *file, const void *buf, size_t count, 545 loff_t *pos) 546 { 547 ssize_t ret; 548 549 ret = rw_verify_area(WRITE, file, pos, count); 550 if (ret) 551 return ret; 552 553 file_start_write(file); 554 ret = __kernel_write(file, buf, count, pos); 555 file_end_write(file); 556 return ret; 557 } 558 EXPORT_SYMBOL(kernel_write); 559 560 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 561 { 562 ssize_t ret; 563 564 if (!(file->f_mode & FMODE_WRITE)) 565 return -EBADF; 566 if (!(file->f_mode & FMODE_CAN_WRITE)) 567 return -EINVAL; 568 if (unlikely(!access_ok(buf, count))) 569 return -EFAULT; 570 571 ret = rw_verify_area(WRITE, file, pos, count); 572 if (ret) 573 return ret; 574 if (count > MAX_RW_COUNT) 575 count = MAX_RW_COUNT; 576 file_start_write(file); 577 if (file->f_op->write) 578 ret = file->f_op->write(file, buf, count, pos); 579 else if (file->f_op->write_iter) 580 ret = new_sync_write(file, buf, count, pos); 581 else 582 ret = -EINVAL; 583 if (ret > 0) { 584 fsnotify_modify(file); 585 add_wchar(current, ret); 586 } 587 inc_syscw(current); 588 file_end_write(file); 589 return ret; 590 } 591 592 /* file_ppos returns &file->f_pos or NULL if file is stream */ 593 static inline loff_t *file_ppos(struct file *file) 594 { 595 return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos; 596 } 597 598 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) 599 { 600 struct fd f = fdget_pos(fd); 601 ssize_t ret = -EBADF; 602 603 if (f.file) { 604 loff_t pos, *ppos = file_ppos(f.file); 605 if (ppos) { 606 pos = *ppos; 607 ppos = &pos; 608 } 609 ret = vfs_read(f.file, buf, count, ppos); 610 if (ret >= 0 && ppos) 611 f.file->f_pos = pos; 612 fdput_pos(f); 613 } 614 return ret; 615 } 616 617 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 618 { 619 return ksys_read(fd, buf, count); 620 } 621 622 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count) 623 { 624 struct fd f = fdget_pos(fd); 625 ssize_t ret = -EBADF; 626 627 if (f.file) { 628 loff_t pos, *ppos = file_ppos(f.file); 629 if (ppos) { 630 pos = *ppos; 631 ppos = &pos; 632 } 633 ret = vfs_write(f.file, buf, count, ppos); 634 if (ret >= 0 && ppos) 635 f.file->f_pos = pos; 636 fdput_pos(f); 637 } 638 639 return ret; 640 } 641 642 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 643 size_t, count) 644 { 645 return ksys_write(fd, buf, count); 646 } 647 648 ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, 649 loff_t pos) 650 { 651 struct fd f; 652 ssize_t ret = -EBADF; 653 654 if (pos < 0) 655 return -EINVAL; 656 657 f = fdget(fd); 658 if (f.file) { 659 ret = -ESPIPE; 660 if (f.file->f_mode & FMODE_PREAD) 661 ret = vfs_read(f.file, buf, count, &pos); 662 fdput(f); 663 } 664 665 return ret; 666 } 667 668 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, 669 size_t, count, loff_t, pos) 670 { 671 return ksys_pread64(fd, buf, count, pos); 672 } 673 674 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PREAD64) 675 COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf, 676 size_t, count, compat_arg_u64_dual(pos)) 677 { 678 return ksys_pread64(fd, buf, count, compat_arg_u64_glue(pos)); 679 } 680 #endif 681 682 ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, 683 size_t count, loff_t pos) 684 { 685 struct fd f; 686 ssize_t ret = -EBADF; 687 688 if (pos < 0) 689 return -EINVAL; 690 691 f = fdget(fd); 692 if (f.file) { 693 ret = -ESPIPE; 694 if (f.file->f_mode & FMODE_PWRITE) 695 ret = vfs_write(f.file, buf, count, &pos); 696 fdput(f); 697 } 698 699 return ret; 700 } 701 702 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, 703 size_t, count, loff_t, pos) 704 { 705 return ksys_pwrite64(fd, buf, count, pos); 706 } 707 708 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PWRITE64) 709 COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, const char __user *, buf, 710 size_t, count, compat_arg_u64_dual(pos)) 711 { 712 return ksys_pwrite64(fd, buf, count, compat_arg_u64_glue(pos)); 713 } 714 #endif 715 716 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, 717 loff_t *ppos, int type, rwf_t flags) 718 { 719 struct kiocb kiocb; 720 ssize_t ret; 721 722 init_sync_kiocb(&kiocb, filp); 723 ret = kiocb_set_rw_flags(&kiocb, flags); 724 if (ret) 725 return ret; 726 kiocb.ki_pos = (ppos ? *ppos : 0); 727 728 if (type == READ) 729 ret = call_read_iter(filp, &kiocb, iter); 730 else 731 ret = call_write_iter(filp, &kiocb, iter); 732 BUG_ON(ret == -EIOCBQUEUED); 733 if (ppos) 734 *ppos = kiocb.ki_pos; 735 return ret; 736 } 737 738 /* Do it by hand, with file-ops */ 739 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, 740 loff_t *ppos, int type, rwf_t flags) 741 { 742 ssize_t ret = 0; 743 744 if (flags & ~RWF_HIPRI) 745 return -EOPNOTSUPP; 746 747 while (iov_iter_count(iter)) { 748 struct iovec iovec = iov_iter_iovec(iter); 749 ssize_t nr; 750 751 if (type == READ) { 752 nr = filp->f_op->read(filp, iovec.iov_base, 753 iovec.iov_len, ppos); 754 } else { 755 nr = filp->f_op->write(filp, iovec.iov_base, 756 iovec.iov_len, ppos); 757 } 758 759 if (nr < 0) { 760 if (!ret) 761 ret = nr; 762 break; 763 } 764 ret += nr; 765 if (nr != iovec.iov_len) 766 break; 767 iov_iter_advance(iter, nr); 768 } 769 770 return ret; 771 } 772 773 static ssize_t do_iter_read(struct file *file, struct iov_iter *iter, 774 loff_t *pos, rwf_t flags) 775 { 776 size_t tot_len; 777 ssize_t ret = 0; 778 779 if (!(file->f_mode & FMODE_READ)) 780 return -EBADF; 781 if (!(file->f_mode & FMODE_CAN_READ)) 782 return -EINVAL; 783 784 tot_len = iov_iter_count(iter); 785 if (!tot_len) 786 goto out; 787 ret = rw_verify_area(READ, file, pos, tot_len); 788 if (ret < 0) 789 return ret; 790 791 if (file->f_op->read_iter) 792 ret = do_iter_readv_writev(file, iter, pos, READ, flags); 793 else 794 ret = do_loop_readv_writev(file, iter, pos, READ, flags); 795 out: 796 if (ret >= 0) 797 fsnotify_access(file); 798 return ret; 799 } 800 801 ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, 802 struct iov_iter *iter) 803 { 804 size_t tot_len; 805 ssize_t ret = 0; 806 807 if (!file->f_op->read_iter) 808 return -EINVAL; 809 if (!(file->f_mode & FMODE_READ)) 810 return -EBADF; 811 if (!(file->f_mode & FMODE_CAN_READ)) 812 return -EINVAL; 813 814 tot_len = iov_iter_count(iter); 815 if (!tot_len) 816 goto out; 817 ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len); 818 if (ret < 0) 819 return ret; 820 821 ret = call_read_iter(file, iocb, iter); 822 out: 823 if (ret >= 0) 824 fsnotify_access(file); 825 return ret; 826 } 827 EXPORT_SYMBOL(vfs_iocb_iter_read); 828 829 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, 830 rwf_t flags) 831 { 832 if (!file->f_op->read_iter) 833 return -EINVAL; 834 return do_iter_read(file, iter, ppos, flags); 835 } 836 EXPORT_SYMBOL(vfs_iter_read); 837 838 static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, 839 loff_t *pos, rwf_t flags) 840 { 841 size_t tot_len; 842 ssize_t ret = 0; 843 844 if (!(file->f_mode & FMODE_WRITE)) 845 return -EBADF; 846 if (!(file->f_mode & FMODE_CAN_WRITE)) 847 return -EINVAL; 848 849 tot_len = iov_iter_count(iter); 850 if (!tot_len) 851 return 0; 852 ret = rw_verify_area(WRITE, file, pos, tot_len); 853 if (ret < 0) 854 return ret; 855 856 if (file->f_op->write_iter) 857 ret = do_iter_readv_writev(file, iter, pos, WRITE, flags); 858 else 859 ret = do_loop_readv_writev(file, iter, pos, WRITE, flags); 860 if (ret > 0) 861 fsnotify_modify(file); 862 return ret; 863 } 864 865 ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, 866 struct iov_iter *iter) 867 { 868 size_t tot_len; 869 ssize_t ret = 0; 870 871 if (!file->f_op->write_iter) 872 return -EINVAL; 873 if (!(file->f_mode & FMODE_WRITE)) 874 return -EBADF; 875 if (!(file->f_mode & FMODE_CAN_WRITE)) 876 return -EINVAL; 877 878 tot_len = iov_iter_count(iter); 879 if (!tot_len) 880 return 0; 881 ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len); 882 if (ret < 0) 883 return ret; 884 885 ret = call_write_iter(file, iocb, iter); 886 if (ret > 0) 887 fsnotify_modify(file); 888 889 return ret; 890 } 891 EXPORT_SYMBOL(vfs_iocb_iter_write); 892 893 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, 894 rwf_t flags) 895 { 896 if (!file->f_op->write_iter) 897 return -EINVAL; 898 return do_iter_write(file, iter, ppos, flags); 899 } 900 EXPORT_SYMBOL(vfs_iter_write); 901 902 static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 903 unsigned long vlen, loff_t *pos, rwf_t flags) 904 { 905 struct iovec iovstack[UIO_FASTIOV]; 906 struct iovec *iov = iovstack; 907 struct iov_iter iter; 908 ssize_t ret; 909 910 ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 911 if (ret >= 0) { 912 ret = do_iter_read(file, &iter, pos, flags); 913 kfree(iov); 914 } 915 916 return ret; 917 } 918 919 static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 920 unsigned long vlen, loff_t *pos, rwf_t flags) 921 { 922 struct iovec iovstack[UIO_FASTIOV]; 923 struct iovec *iov = iovstack; 924 struct iov_iter iter; 925 ssize_t ret; 926 927 ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 928 if (ret >= 0) { 929 file_start_write(file); 930 ret = do_iter_write(file, &iter, pos, flags); 931 file_end_write(file); 932 kfree(iov); 933 } 934 return ret; 935 } 936 937 static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, 938 unsigned long vlen, rwf_t flags) 939 { 940 struct fd f = fdget_pos(fd); 941 ssize_t ret = -EBADF; 942 943 if (f.file) { 944 loff_t pos, *ppos = file_ppos(f.file); 945 if (ppos) { 946 pos = *ppos; 947 ppos = &pos; 948 } 949 ret = vfs_readv(f.file, vec, vlen, ppos, flags); 950 if (ret >= 0 && ppos) 951 f.file->f_pos = pos; 952 fdput_pos(f); 953 } 954 955 if (ret > 0) 956 add_rchar(current, ret); 957 inc_syscr(current); 958 return ret; 959 } 960 961 static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, 962 unsigned long vlen, rwf_t flags) 963 { 964 struct fd f = fdget_pos(fd); 965 ssize_t ret = -EBADF; 966 967 if (f.file) { 968 loff_t pos, *ppos = file_ppos(f.file); 969 if (ppos) { 970 pos = *ppos; 971 ppos = &pos; 972 } 973 ret = vfs_writev(f.file, vec, vlen, ppos, flags); 974 if (ret >= 0 && ppos) 975 f.file->f_pos = pos; 976 fdput_pos(f); 977 } 978 979 if (ret > 0) 980 add_wchar(current, ret); 981 inc_syscw(current); 982 return ret; 983 } 984 985 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 986 { 987 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 988 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 989 } 990 991 static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, 992 unsigned long vlen, loff_t pos, rwf_t flags) 993 { 994 struct fd f; 995 ssize_t ret = -EBADF; 996 997 if (pos < 0) 998 return -EINVAL; 999 1000 f = fdget(fd); 1001 if (f.file) { 1002 ret = -ESPIPE; 1003 if (f.file->f_mode & FMODE_PREAD) 1004 ret = vfs_readv(f.file, vec, vlen, &pos, flags); 1005 fdput(f); 1006 } 1007 1008 if (ret > 0) 1009 add_rchar(current, ret); 1010 inc_syscr(current); 1011 return ret; 1012 } 1013 1014 static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, 1015 unsigned long vlen, loff_t pos, rwf_t flags) 1016 { 1017 struct fd f; 1018 ssize_t ret = -EBADF; 1019 1020 if (pos < 0) 1021 return -EINVAL; 1022 1023 f = fdget(fd); 1024 if (f.file) { 1025 ret = -ESPIPE; 1026 if (f.file->f_mode & FMODE_PWRITE) 1027 ret = vfs_writev(f.file, vec, vlen, &pos, flags); 1028 fdput(f); 1029 } 1030 1031 if (ret > 0) 1032 add_wchar(current, ret); 1033 inc_syscw(current); 1034 return ret; 1035 } 1036 1037 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 1038 unsigned long, vlen) 1039 { 1040 return do_readv(fd, vec, vlen, 0); 1041 } 1042 1043 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 1044 unsigned long, vlen) 1045 { 1046 return do_writev(fd, vec, vlen, 0); 1047 } 1048 1049 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 1050 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 1051 { 1052 loff_t pos = pos_from_hilo(pos_h, pos_l); 1053 1054 return do_preadv(fd, vec, vlen, pos, 0); 1055 } 1056 1057 SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, 1058 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, 1059 rwf_t, flags) 1060 { 1061 loff_t pos = pos_from_hilo(pos_h, pos_l); 1062 1063 if (pos == -1) 1064 return do_readv(fd, vec, vlen, flags); 1065 1066 return do_preadv(fd, vec, vlen, pos, flags); 1067 } 1068 1069 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 1070 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 1071 { 1072 loff_t pos = pos_from_hilo(pos_h, pos_l); 1073 1074 return do_pwritev(fd, vec, vlen, pos, 0); 1075 } 1076 1077 SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, 1078 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, 1079 rwf_t, flags) 1080 { 1081 loff_t pos = pos_from_hilo(pos_h, pos_l); 1082 1083 if (pos == -1) 1084 return do_writev(fd, vec, vlen, flags); 1085 1086 return do_pwritev(fd, vec, vlen, pos, flags); 1087 } 1088 1089 /* 1090 * Various compat syscalls. Note that they all pretend to take a native 1091 * iovec - import_iovec will properly treat those as compat_iovecs based on 1092 * in_compat_syscall(). 1093 */ 1094 #ifdef CONFIG_COMPAT 1095 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 1096 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, 1097 const struct iovec __user *, vec, 1098 unsigned long, vlen, loff_t, pos) 1099 { 1100 return do_preadv(fd, vec, vlen, pos, 0); 1101 } 1102 #endif 1103 1104 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, 1105 const struct iovec __user *, vec, 1106 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1107 { 1108 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1109 1110 return do_preadv(fd, vec, vlen, pos, 0); 1111 } 1112 1113 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 1114 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, 1115 const struct iovec __user *, vec, 1116 unsigned long, vlen, loff_t, pos, rwf_t, flags) 1117 { 1118 if (pos == -1) 1119 return do_readv(fd, vec, vlen, flags); 1120 return do_preadv(fd, vec, vlen, pos, flags); 1121 } 1122 #endif 1123 1124 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, 1125 const struct iovec __user *, vec, 1126 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, 1127 rwf_t, flags) 1128 { 1129 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1130 1131 if (pos == -1) 1132 return do_readv(fd, vec, vlen, flags); 1133 return do_preadv(fd, vec, vlen, pos, flags); 1134 } 1135 1136 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 1137 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, 1138 const struct iovec __user *, vec, 1139 unsigned long, vlen, loff_t, pos) 1140 { 1141 return do_pwritev(fd, vec, vlen, pos, 0); 1142 } 1143 #endif 1144 1145 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, 1146 const struct iovec __user *,vec, 1147 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1148 { 1149 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1150 1151 return do_pwritev(fd, vec, vlen, pos, 0); 1152 } 1153 1154 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 1155 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, 1156 const struct iovec __user *, vec, 1157 unsigned long, vlen, loff_t, pos, rwf_t, flags) 1158 { 1159 if (pos == -1) 1160 return do_writev(fd, vec, vlen, flags); 1161 return do_pwritev(fd, vec, vlen, pos, flags); 1162 } 1163 #endif 1164 1165 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, 1166 const struct iovec __user *,vec, 1167 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) 1168 { 1169 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1170 1171 if (pos == -1) 1172 return do_writev(fd, vec, vlen, flags); 1173 return do_pwritev(fd, vec, vlen, pos, flags); 1174 } 1175 #endif /* CONFIG_COMPAT */ 1176 1177 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1178 size_t count, loff_t max) 1179 { 1180 struct fd in, out; 1181 struct inode *in_inode, *out_inode; 1182 struct pipe_inode_info *opipe; 1183 loff_t pos; 1184 loff_t out_pos; 1185 ssize_t retval; 1186 int fl; 1187 1188 /* 1189 * Get input file, and verify that it is ok.. 1190 */ 1191 retval = -EBADF; 1192 in = fdget(in_fd); 1193 if (!in.file) 1194 goto out; 1195 if (!(in.file->f_mode & FMODE_READ)) 1196 goto fput_in; 1197 retval = -ESPIPE; 1198 if (!ppos) { 1199 pos = in.file->f_pos; 1200 } else { 1201 pos = *ppos; 1202 if (!(in.file->f_mode & FMODE_PREAD)) 1203 goto fput_in; 1204 } 1205 retval = rw_verify_area(READ, in.file, &pos, count); 1206 if (retval < 0) 1207 goto fput_in; 1208 if (count > MAX_RW_COUNT) 1209 count = MAX_RW_COUNT; 1210 1211 /* 1212 * Get output file, and verify that it is ok.. 1213 */ 1214 retval = -EBADF; 1215 out = fdget(out_fd); 1216 if (!out.file) 1217 goto fput_in; 1218 if (!(out.file->f_mode & FMODE_WRITE)) 1219 goto fput_out; 1220 in_inode = file_inode(in.file); 1221 out_inode = file_inode(out.file); 1222 out_pos = out.file->f_pos; 1223 1224 if (!max) 1225 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1226 1227 if (unlikely(pos + count > max)) { 1228 retval = -EOVERFLOW; 1229 if (pos >= max) 1230 goto fput_out; 1231 count = max - pos; 1232 } 1233 1234 fl = 0; 1235 #if 0 1236 /* 1237 * We need to debate whether we can enable this or not. The 1238 * man page documents EAGAIN return for the output at least, 1239 * and the application is arguably buggy if it doesn't expect 1240 * EAGAIN on a non-blocking file descriptor. 1241 */ 1242 if (in.file->f_flags & O_NONBLOCK) 1243 fl = SPLICE_F_NONBLOCK; 1244 #endif 1245 opipe = get_pipe_info(out.file, true); 1246 if (!opipe) { 1247 retval = rw_verify_area(WRITE, out.file, &out_pos, count); 1248 if (retval < 0) 1249 goto fput_out; 1250 file_start_write(out.file); 1251 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, 1252 count, fl); 1253 file_end_write(out.file); 1254 } else { 1255 if (out.file->f_flags & O_NONBLOCK) 1256 fl |= SPLICE_F_NONBLOCK; 1257 1258 retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); 1259 } 1260 1261 if (retval > 0) { 1262 add_rchar(current, retval); 1263 add_wchar(current, retval); 1264 fsnotify_access(in.file); 1265 fsnotify_modify(out.file); 1266 out.file->f_pos = out_pos; 1267 if (ppos) 1268 *ppos = pos; 1269 else 1270 in.file->f_pos = pos; 1271 } 1272 1273 inc_syscr(current); 1274 inc_syscw(current); 1275 if (pos > max) 1276 retval = -EOVERFLOW; 1277 1278 fput_out: 1279 fdput(out); 1280 fput_in: 1281 fdput(in); 1282 out: 1283 return retval; 1284 } 1285 1286 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 1287 { 1288 loff_t pos; 1289 off_t off; 1290 ssize_t ret; 1291 1292 if (offset) { 1293 if (unlikely(get_user(off, offset))) 1294 return -EFAULT; 1295 pos = off; 1296 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1297 if (unlikely(put_user(pos, offset))) 1298 return -EFAULT; 1299 return ret; 1300 } 1301 1302 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1303 } 1304 1305 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 1306 { 1307 loff_t pos; 1308 ssize_t ret; 1309 1310 if (offset) { 1311 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1312 return -EFAULT; 1313 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1314 if (unlikely(put_user(pos, offset))) 1315 return -EFAULT; 1316 return ret; 1317 } 1318 1319 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1320 } 1321 1322 #ifdef CONFIG_COMPAT 1323 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, 1324 compat_off_t __user *, offset, compat_size_t, count) 1325 { 1326 loff_t pos; 1327 off_t off; 1328 ssize_t ret; 1329 1330 if (offset) { 1331 if (unlikely(get_user(off, offset))) 1332 return -EFAULT; 1333 pos = off; 1334 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1335 if (unlikely(put_user(pos, offset))) 1336 return -EFAULT; 1337 return ret; 1338 } 1339 1340 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1341 } 1342 1343 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, 1344 compat_loff_t __user *, offset, compat_size_t, count) 1345 { 1346 loff_t pos; 1347 ssize_t ret; 1348 1349 if (offset) { 1350 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1351 return -EFAULT; 1352 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1353 if (unlikely(put_user(pos, offset))) 1354 return -EFAULT; 1355 return ret; 1356 } 1357 1358 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1359 } 1360 #endif 1361 1362 /** 1363 * generic_copy_file_range - copy data between two files 1364 * @file_in: file structure to read from 1365 * @pos_in: file offset to read from 1366 * @file_out: file structure to write data to 1367 * @pos_out: file offset to write data to 1368 * @len: amount of data to copy 1369 * @flags: copy flags 1370 * 1371 * This is a generic filesystem helper to copy data from one file to another. 1372 * It has no constraints on the source or destination file owners - the files 1373 * can belong to different superblocks and different filesystem types. Short 1374 * copies are allowed. 1375 * 1376 * This should be called from the @file_out filesystem, as per the 1377 * ->copy_file_range() method. 1378 * 1379 * Returns the number of bytes copied or a negative error indicating the 1380 * failure. 1381 */ 1382 1383 ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, 1384 struct file *file_out, loff_t pos_out, 1385 size_t len, unsigned int flags) 1386 { 1387 return do_splice_direct(file_in, &pos_in, file_out, &pos_out, 1388 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); 1389 } 1390 EXPORT_SYMBOL(generic_copy_file_range); 1391 1392 /* 1393 * Performs necessary checks before doing a file copy 1394 * 1395 * Can adjust amount of bytes to copy via @req_count argument. 1396 * Returns appropriate error code that caller should return or 1397 * zero in case the copy should be allowed. 1398 */ 1399 static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, 1400 struct file *file_out, loff_t pos_out, 1401 size_t *req_count, unsigned int flags) 1402 { 1403 struct inode *inode_in = file_inode(file_in); 1404 struct inode *inode_out = file_inode(file_out); 1405 uint64_t count = *req_count; 1406 loff_t size_in; 1407 int ret; 1408 1409 ret = generic_file_rw_checks(file_in, file_out); 1410 if (ret) 1411 return ret; 1412 1413 /* 1414 * We allow some filesystems to handle cross sb copy, but passing 1415 * a file of the wrong filesystem type to filesystem driver can result 1416 * in an attempt to dereference the wrong type of ->private_data, so 1417 * avoid doing that until we really have a good reason. 1418 * 1419 * nfs and cifs define several different file_system_type structures 1420 * and several different sets of file_operations, but they all end up 1421 * using the same ->copy_file_range() function pointer. 1422 */ 1423 if (file_out->f_op->copy_file_range) { 1424 if (file_in->f_op->copy_file_range != 1425 file_out->f_op->copy_file_range) 1426 return -EXDEV; 1427 } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { 1428 return -EXDEV; 1429 } 1430 1431 /* Don't touch certain kinds of inodes */ 1432 if (IS_IMMUTABLE(inode_out)) 1433 return -EPERM; 1434 1435 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) 1436 return -ETXTBSY; 1437 1438 /* Ensure offsets don't wrap. */ 1439 if (pos_in + count < pos_in || pos_out + count < pos_out) 1440 return -EOVERFLOW; 1441 1442 /* Shorten the copy to EOF */ 1443 size_in = i_size_read(inode_in); 1444 if (pos_in >= size_in) 1445 count = 0; 1446 else 1447 count = min(count, size_in - (uint64_t)pos_in); 1448 1449 ret = generic_write_check_limits(file_out, pos_out, &count); 1450 if (ret) 1451 return ret; 1452 1453 /* Don't allow overlapped copying within the same file. */ 1454 if (inode_in == inode_out && 1455 pos_out + count > pos_in && 1456 pos_out < pos_in + count) 1457 return -EINVAL; 1458 1459 *req_count = count; 1460 return 0; 1461 } 1462 1463 /* 1464 * copy_file_range() differs from regular file read and write in that it 1465 * specifically allows return partial success. When it does so is up to 1466 * the copy_file_range method. 1467 */ 1468 ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, 1469 struct file *file_out, loff_t pos_out, 1470 size_t len, unsigned int flags) 1471 { 1472 ssize_t ret; 1473 1474 if (flags != 0) 1475 return -EINVAL; 1476 1477 ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, 1478 flags); 1479 if (unlikely(ret)) 1480 return ret; 1481 1482 ret = rw_verify_area(READ, file_in, &pos_in, len); 1483 if (unlikely(ret)) 1484 return ret; 1485 1486 ret = rw_verify_area(WRITE, file_out, &pos_out, len); 1487 if (unlikely(ret)) 1488 return ret; 1489 1490 if (len == 0) 1491 return 0; 1492 1493 file_start_write(file_out); 1494 1495 /* 1496 * Cloning is supported by more file systems, so we implement copy on 1497 * same sb using clone, but for filesystems where both clone and copy 1498 * are supported (e.g. nfs,cifs), we only call the copy method. 1499 */ 1500 if (file_out->f_op->copy_file_range) { 1501 ret = file_out->f_op->copy_file_range(file_in, pos_in, 1502 file_out, pos_out, 1503 len, flags); 1504 goto done; 1505 } 1506 1507 if (file_in->f_op->remap_file_range && 1508 file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { 1509 ret = file_in->f_op->remap_file_range(file_in, pos_in, 1510 file_out, pos_out, 1511 min_t(loff_t, MAX_RW_COUNT, len), 1512 REMAP_FILE_CAN_SHORTEN); 1513 if (ret > 0) 1514 goto done; 1515 } 1516 1517 /* 1518 * We can get here for same sb copy of filesystems that do not implement 1519 * ->copy_file_range() in case filesystem does not support clone or in 1520 * case filesystem supports clone but rejected the clone request (e.g. 1521 * because it was not block aligned). 1522 * 1523 * In both cases, fall back to kernel copy so we are able to maintain a 1524 * consistent story about which filesystems support copy_file_range() 1525 * and which filesystems do not, that will allow userspace tools to 1526 * make consistent desicions w.r.t using copy_file_range(). 1527 */ 1528 ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, 1529 flags); 1530 1531 done: 1532 if (ret > 0) { 1533 fsnotify_access(file_in); 1534 add_rchar(current, ret); 1535 fsnotify_modify(file_out); 1536 add_wchar(current, ret); 1537 } 1538 1539 inc_syscr(current); 1540 inc_syscw(current); 1541 1542 file_end_write(file_out); 1543 1544 return ret; 1545 } 1546 EXPORT_SYMBOL(vfs_copy_file_range); 1547 1548 SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, 1549 int, fd_out, loff_t __user *, off_out, 1550 size_t, len, unsigned int, flags) 1551 { 1552 loff_t pos_in; 1553 loff_t pos_out; 1554 struct fd f_in; 1555 struct fd f_out; 1556 ssize_t ret = -EBADF; 1557 1558 f_in = fdget(fd_in); 1559 if (!f_in.file) 1560 goto out2; 1561 1562 f_out = fdget(fd_out); 1563 if (!f_out.file) 1564 goto out1; 1565 1566 ret = -EFAULT; 1567 if (off_in) { 1568 if (copy_from_user(&pos_in, off_in, sizeof(loff_t))) 1569 goto out; 1570 } else { 1571 pos_in = f_in.file->f_pos; 1572 } 1573 1574 if (off_out) { 1575 if (copy_from_user(&pos_out, off_out, sizeof(loff_t))) 1576 goto out; 1577 } else { 1578 pos_out = f_out.file->f_pos; 1579 } 1580 1581 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, 1582 flags); 1583 if (ret > 0) { 1584 pos_in += ret; 1585 pos_out += ret; 1586 1587 if (off_in) { 1588 if (copy_to_user(off_in, &pos_in, sizeof(loff_t))) 1589 ret = -EFAULT; 1590 } else { 1591 f_in.file->f_pos = pos_in; 1592 } 1593 1594 if (off_out) { 1595 if (copy_to_user(off_out, &pos_out, sizeof(loff_t))) 1596 ret = -EFAULT; 1597 } else { 1598 f_out.file->f_pos = pos_out; 1599 } 1600 } 1601 1602 out: 1603 fdput(f_out); 1604 out1: 1605 fdput(f_in); 1606 out2: 1607 return ret; 1608 } 1609 1610 /* 1611 * Don't operate on ranges the page cache doesn't support, and don't exceed the 1612 * LFS limits. If pos is under the limit it becomes a short access. If it 1613 * exceeds the limit we return -EFBIG. 1614 */ 1615 int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count) 1616 { 1617 struct inode *inode = file->f_mapping->host; 1618 loff_t max_size = inode->i_sb->s_maxbytes; 1619 loff_t limit = rlimit(RLIMIT_FSIZE); 1620 1621 if (limit != RLIM_INFINITY) { 1622 if (pos >= limit) { 1623 send_sig(SIGXFSZ, current, 0); 1624 return -EFBIG; 1625 } 1626 *count = min(*count, limit - pos); 1627 } 1628 1629 if (!(file->f_flags & O_LARGEFILE)) 1630 max_size = MAX_NON_LFS; 1631 1632 if (unlikely(pos >= max_size)) 1633 return -EFBIG; 1634 1635 *count = min(*count, max_size - pos); 1636 1637 return 0; 1638 } 1639 1640 /* Like generic_write_checks(), but takes size of write instead of iter. */ 1641 int generic_write_checks_count(struct kiocb *iocb, loff_t *count) 1642 { 1643 struct file *file = iocb->ki_filp; 1644 struct inode *inode = file->f_mapping->host; 1645 1646 if (IS_SWAPFILE(inode)) 1647 return -ETXTBSY; 1648 1649 if (!*count) 1650 return 0; 1651 1652 if (iocb->ki_flags & IOCB_APPEND) 1653 iocb->ki_pos = i_size_read(inode); 1654 1655 if ((iocb->ki_flags & IOCB_NOWAIT) && 1656 !((iocb->ki_flags & IOCB_DIRECT) || 1657 (file->f_mode & FMODE_BUF_WASYNC))) 1658 return -EINVAL; 1659 1660 return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count); 1661 } 1662 EXPORT_SYMBOL(generic_write_checks_count); 1663 1664 /* 1665 * Performs necessary checks before doing a write 1666 * 1667 * Can adjust writing position or amount of bytes to write. 1668 * Returns appropriate error code that caller should return or 1669 * zero in case that write should be allowed. 1670 */ 1671 ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) 1672 { 1673 loff_t count = iov_iter_count(from); 1674 int ret; 1675 1676 ret = generic_write_checks_count(iocb, &count); 1677 if (ret) 1678 return ret; 1679 1680 iov_iter_truncate(from, count); 1681 return iov_iter_count(from); 1682 } 1683 EXPORT_SYMBOL(generic_write_checks); 1684 1685 /* 1686 * Performs common checks before doing a file copy/clone 1687 * from @file_in to @file_out. 1688 */ 1689 int generic_file_rw_checks(struct file *file_in, struct file *file_out) 1690 { 1691 struct inode *inode_in = file_inode(file_in); 1692 struct inode *inode_out = file_inode(file_out); 1693 1694 /* Don't copy dirs, pipes, sockets... */ 1695 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1696 return -EISDIR; 1697 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1698 return -EINVAL; 1699 1700 if (!(file_in->f_mode & FMODE_READ) || 1701 !(file_out->f_mode & FMODE_WRITE) || 1702 (file_out->f_flags & O_APPEND)) 1703 return -EBADF; 1704 1705 return 0; 1706 } 1707