1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <crypto/hash.h> 3 #include <linux/export.h> 4 #include <linux/bvec.h> 5 #include <linux/fault-inject-usercopy.h> 6 #include <linux/uio.h> 7 #include <linux/pagemap.h> 8 #include <linux/slab.h> 9 #include <linux/vmalloc.h> 10 #include <linux/splice.h> 11 #include <linux/compat.h> 12 #include <net/checksum.h> 13 #include <linux/scatterlist.h> 14 #include <linux/instrumented.h> 15 16 #define PIPE_PARANOIA /* for now */ 17 18 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 19 size_t left; \ 20 size_t wanted = n; \ 21 __p = i->iov; \ 22 __v.iov_len = min(n, __p->iov_len - skip); \ 23 if (likely(__v.iov_len)) { \ 24 __v.iov_base = __p->iov_base + skip; \ 25 left = (STEP); \ 26 __v.iov_len -= left; \ 27 skip += __v.iov_len; \ 28 n -= __v.iov_len; \ 29 } else { \ 30 left = 0; \ 31 } \ 32 while (unlikely(!left && n)) { \ 33 __p++; \ 34 __v.iov_len = min(n, __p->iov_len); \ 35 if (unlikely(!__v.iov_len)) \ 36 continue; \ 37 __v.iov_base = __p->iov_base; \ 38 left = (STEP); \ 39 __v.iov_len -= left; \ 40 skip = __v.iov_len; \ 41 n -= __v.iov_len; \ 42 } \ 43 n = wanted - n; \ 44 } 45 46 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 47 size_t wanted = n; \ 48 __p = i->kvec; \ 49 __v.iov_len = min(n, __p->iov_len - skip); \ 50 if (likely(__v.iov_len)) { \ 51 __v.iov_base = __p->iov_base + skip; \ 52 (void)(STEP); \ 53 skip += __v.iov_len; \ 54 n -= __v.iov_len; \ 55 } \ 56 while (unlikely(n)) { \ 57 __p++; \ 58 __v.iov_len = min(n, __p->iov_len); \ 59 if (unlikely(!__v.iov_len)) \ 60 continue; \ 61 __v.iov_base = __p->iov_base; \ 62 (void)(STEP); \ 63 skip = __v.iov_len; \ 64 n -= __v.iov_len; \ 65 } \ 66 n = wanted; \ 67 } 68 69 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 70 struct bvec_iter __start; \ 71 __start.bi_size = n; \ 72 __start.bi_bvec_done = skip; \ 73 __start.bi_idx = 0; \ 74 for_each_bvec(__v, i->bvec, __bi, __start) { \ 75 (void)(STEP); \ 76 } \ 77 } 78 79 #define iterate_all_kinds(i, n, v, I, B, K) { \ 80 if (likely(n)) { \ 81 size_t skip = i->iov_offset; \ 82 if (unlikely(i->type & ITER_BVEC)) { \ 83 struct bio_vec v; \ 84 struct bvec_iter __bi; \ 85 iterate_bvec(i, n, v, __bi, skip, (B)) \ 86 } else if (unlikely(i->type & ITER_KVEC)) { \ 87 const struct kvec *kvec; \ 88 struct kvec v; \ 89 iterate_kvec(i, n, v, kvec, skip, (K)) \ 90 } else if (unlikely(i->type & ITER_DISCARD)) { \ 91 } else { \ 92 const struct iovec *iov; \ 93 struct iovec v; \ 94 iterate_iovec(i, n, v, iov, skip, (I)) \ 95 } \ 96 } \ 97 } 98 99 #define iterate_and_advance(i, n, v, I, B, K) { \ 100 if (unlikely(i->count < n)) \ 101 n = i->count; \ 102 if (i->count) { \ 103 size_t skip = i->iov_offset; \ 104 if (unlikely(i->type & ITER_BVEC)) { \ 105 const struct bio_vec *bvec = i->bvec; \ 106 struct bio_vec v; \ 107 struct bvec_iter __bi; \ 108 iterate_bvec(i, n, v, __bi, skip, (B)) \ 109 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 110 i->nr_segs -= i->bvec - bvec; \ 111 skip = __bi.bi_bvec_done; \ 112 } else if (unlikely(i->type & ITER_KVEC)) { \ 113 const struct kvec *kvec; \ 114 struct kvec v; \ 115 iterate_kvec(i, n, v, kvec, skip, (K)) \ 116 if (skip == kvec->iov_len) { \ 117 kvec++; \ 118 skip = 0; \ 119 } \ 120 i->nr_segs -= kvec - i->kvec; \ 121 i->kvec = kvec; \ 122 } else if (unlikely(i->type & ITER_DISCARD)) { \ 123 skip += n; \ 124 } else { \ 125 const struct iovec *iov; \ 126 struct iovec v; \ 127 iterate_iovec(i, n, v, iov, skip, (I)) \ 128 if (skip == iov->iov_len) { \ 129 iov++; \ 130 skip = 0; \ 131 } \ 132 i->nr_segs -= iov - i->iov; \ 133 i->iov = iov; \ 134 } \ 135 i->count -= n; \ 136 i->iov_offset = skip; \ 137 } \ 138 } 139 140 static int copyout(void __user *to, const void *from, size_t n) 141 { 142 if (should_fail_usercopy()) 143 return n; 144 if (access_ok(to, n)) { 145 instrument_copy_to_user(to, from, n); 146 n = raw_copy_to_user(to, from, n); 147 } 148 return n; 149 } 150 151 static int copyin(void *to, const void __user *from, size_t n) 152 { 153 if (should_fail_usercopy()) 154 return n; 155 if (access_ok(from, n)) { 156 instrument_copy_from_user(to, from, n); 157 n = raw_copy_from_user(to, from, n); 158 } 159 return n; 160 } 161 162 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 163 struct iov_iter *i) 164 { 165 size_t skip, copy, left, wanted; 166 const struct iovec *iov; 167 char __user *buf; 168 void *kaddr, *from; 169 170 if (unlikely(bytes > i->count)) 171 bytes = i->count; 172 173 if (unlikely(!bytes)) 174 return 0; 175 176 might_fault(); 177 wanted = bytes; 178 iov = i->iov; 179 skip = i->iov_offset; 180 buf = iov->iov_base + skip; 181 copy = min(bytes, iov->iov_len - skip); 182 183 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 184 kaddr = kmap_atomic(page); 185 from = kaddr + offset; 186 187 /* first chunk, usually the only one */ 188 left = copyout(buf, from, copy); 189 copy -= left; 190 skip += copy; 191 from += copy; 192 bytes -= copy; 193 194 while (unlikely(!left && bytes)) { 195 iov++; 196 buf = iov->iov_base; 197 copy = min(bytes, iov->iov_len); 198 left = copyout(buf, from, copy); 199 copy -= left; 200 skip = copy; 201 from += copy; 202 bytes -= copy; 203 } 204 if (likely(!bytes)) { 205 kunmap_atomic(kaddr); 206 goto done; 207 } 208 offset = from - kaddr; 209 buf += copy; 210 kunmap_atomic(kaddr); 211 copy = min(bytes, iov->iov_len - skip); 212 } 213 /* Too bad - revert to non-atomic kmap */ 214 215 kaddr = kmap(page); 216 from = kaddr + offset; 217 left = copyout(buf, from, copy); 218 copy -= left; 219 skip += copy; 220 from += copy; 221 bytes -= copy; 222 while (unlikely(!left && bytes)) { 223 iov++; 224 buf = iov->iov_base; 225 copy = min(bytes, iov->iov_len); 226 left = copyout(buf, from, copy); 227 copy -= left; 228 skip = copy; 229 from += copy; 230 bytes -= copy; 231 } 232 kunmap(page); 233 234 done: 235 if (skip == iov->iov_len) { 236 iov++; 237 skip = 0; 238 } 239 i->count -= wanted - bytes; 240 i->nr_segs -= iov - i->iov; 241 i->iov = iov; 242 i->iov_offset = skip; 243 return wanted - bytes; 244 } 245 246 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 247 struct iov_iter *i) 248 { 249 size_t skip, copy, left, wanted; 250 const struct iovec *iov; 251 char __user *buf; 252 void *kaddr, *to; 253 254 if (unlikely(bytes > i->count)) 255 bytes = i->count; 256 257 if (unlikely(!bytes)) 258 return 0; 259 260 might_fault(); 261 wanted = bytes; 262 iov = i->iov; 263 skip = i->iov_offset; 264 buf = iov->iov_base + skip; 265 copy = min(bytes, iov->iov_len - skip); 266 267 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 268 kaddr = kmap_atomic(page); 269 to = kaddr + offset; 270 271 /* first chunk, usually the only one */ 272 left = copyin(to, buf, copy); 273 copy -= left; 274 skip += copy; 275 to += copy; 276 bytes -= copy; 277 278 while (unlikely(!left && bytes)) { 279 iov++; 280 buf = iov->iov_base; 281 copy = min(bytes, iov->iov_len); 282 left = copyin(to, buf, copy); 283 copy -= left; 284 skip = copy; 285 to += copy; 286 bytes -= copy; 287 } 288 if (likely(!bytes)) { 289 kunmap_atomic(kaddr); 290 goto done; 291 } 292 offset = to - kaddr; 293 buf += copy; 294 kunmap_atomic(kaddr); 295 copy = min(bytes, iov->iov_len - skip); 296 } 297 /* Too bad - revert to non-atomic kmap */ 298 299 kaddr = kmap(page); 300 to = kaddr + offset; 301 left = copyin(to, buf, copy); 302 copy -= left; 303 skip += copy; 304 to += copy; 305 bytes -= copy; 306 while (unlikely(!left && bytes)) { 307 iov++; 308 buf = iov->iov_base; 309 copy = min(bytes, iov->iov_len); 310 left = copyin(to, buf, copy); 311 copy -= left; 312 skip = copy; 313 to += copy; 314 bytes -= copy; 315 } 316 kunmap(page); 317 318 done: 319 if (skip == iov->iov_len) { 320 iov++; 321 skip = 0; 322 } 323 i->count -= wanted - bytes; 324 i->nr_segs -= iov - i->iov; 325 i->iov = iov; 326 i->iov_offset = skip; 327 return wanted - bytes; 328 } 329 330 #ifdef PIPE_PARANOIA 331 static bool sanity(const struct iov_iter *i) 332 { 333 struct pipe_inode_info *pipe = i->pipe; 334 unsigned int p_head = pipe->head; 335 unsigned int p_tail = pipe->tail; 336 unsigned int p_mask = pipe->ring_size - 1; 337 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); 338 unsigned int i_head = i->head; 339 unsigned int idx; 340 341 if (i->iov_offset) { 342 struct pipe_buffer *p; 343 if (unlikely(p_occupancy == 0)) 344 goto Bad; // pipe must be non-empty 345 if (unlikely(i_head != p_head - 1)) 346 goto Bad; // must be at the last buffer... 347 348 p = &pipe->bufs[i_head & p_mask]; 349 if (unlikely(p->offset + p->len != i->iov_offset)) 350 goto Bad; // ... at the end of segment 351 } else { 352 if (i_head != p_head) 353 goto Bad; // must be right after the last buffer 354 } 355 return true; 356 Bad: 357 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); 358 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", 359 p_head, p_tail, pipe->ring_size); 360 for (idx = 0; idx < pipe->ring_size; idx++) 361 printk(KERN_ERR "[%p %p %d %d]\n", 362 pipe->bufs[idx].ops, 363 pipe->bufs[idx].page, 364 pipe->bufs[idx].offset, 365 pipe->bufs[idx].len); 366 WARN_ON(1); 367 return false; 368 } 369 #else 370 #define sanity(i) true 371 #endif 372 373 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 374 struct iov_iter *i) 375 { 376 struct pipe_inode_info *pipe = i->pipe; 377 struct pipe_buffer *buf; 378 unsigned int p_tail = pipe->tail; 379 unsigned int p_mask = pipe->ring_size - 1; 380 unsigned int i_head = i->head; 381 size_t off; 382 383 if (unlikely(bytes > i->count)) 384 bytes = i->count; 385 386 if (unlikely(!bytes)) 387 return 0; 388 389 if (!sanity(i)) 390 return 0; 391 392 off = i->iov_offset; 393 buf = &pipe->bufs[i_head & p_mask]; 394 if (off) { 395 if (offset == off && buf->page == page) { 396 /* merge with the last one */ 397 buf->len += bytes; 398 i->iov_offset += bytes; 399 goto out; 400 } 401 i_head++; 402 buf = &pipe->bufs[i_head & p_mask]; 403 } 404 if (pipe_full(i_head, p_tail, pipe->max_usage)) 405 return 0; 406 407 buf->ops = &page_cache_pipe_buf_ops; 408 get_page(page); 409 buf->page = page; 410 buf->offset = offset; 411 buf->len = bytes; 412 413 pipe->head = i_head + 1; 414 i->iov_offset = offset + bytes; 415 i->head = i_head; 416 out: 417 i->count -= bytes; 418 return bytes; 419 } 420 421 /* 422 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 423 * bytes. For each iovec, fault in each page that constitutes the iovec. 424 * 425 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 426 * because it is an invalid address). 427 */ 428 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 429 { 430 size_t skip = i->iov_offset; 431 const struct iovec *iov; 432 int err; 433 struct iovec v; 434 435 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 436 iterate_iovec(i, bytes, v, iov, skip, ({ 437 err = fault_in_pages_readable(v.iov_base, v.iov_len); 438 if (unlikely(err)) 439 return err; 440 0;})) 441 } 442 return 0; 443 } 444 EXPORT_SYMBOL(iov_iter_fault_in_readable); 445 446 void iov_iter_init(struct iov_iter *i, unsigned int direction, 447 const struct iovec *iov, unsigned long nr_segs, 448 size_t count) 449 { 450 WARN_ON(direction & ~(READ | WRITE)); 451 direction &= READ | WRITE; 452 453 /* It will get better. Eventually... */ 454 if (uaccess_kernel()) { 455 i->type = ITER_KVEC | direction; 456 i->kvec = (struct kvec *)iov; 457 } else { 458 i->type = ITER_IOVEC | direction; 459 i->iov = iov; 460 } 461 i->nr_segs = nr_segs; 462 i->iov_offset = 0; 463 i->count = count; 464 } 465 EXPORT_SYMBOL(iov_iter_init); 466 467 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 468 { 469 char *from = kmap_atomic(page); 470 memcpy(to, from + offset, len); 471 kunmap_atomic(from); 472 } 473 474 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 475 { 476 char *to = kmap_atomic(page); 477 memcpy(to + offset, from, len); 478 kunmap_atomic(to); 479 } 480 481 static void memzero_page(struct page *page, size_t offset, size_t len) 482 { 483 char *addr = kmap_atomic(page); 484 memset(addr + offset, 0, len); 485 kunmap_atomic(addr); 486 } 487 488 static inline bool allocated(struct pipe_buffer *buf) 489 { 490 return buf->ops == &default_pipe_buf_ops; 491 } 492 493 static inline void data_start(const struct iov_iter *i, 494 unsigned int *iter_headp, size_t *offp) 495 { 496 unsigned int p_mask = i->pipe->ring_size - 1; 497 unsigned int iter_head = i->head; 498 size_t off = i->iov_offset; 499 500 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || 501 off == PAGE_SIZE)) { 502 iter_head++; 503 off = 0; 504 } 505 *iter_headp = iter_head; 506 *offp = off; 507 } 508 509 static size_t push_pipe(struct iov_iter *i, size_t size, 510 int *iter_headp, size_t *offp) 511 { 512 struct pipe_inode_info *pipe = i->pipe; 513 unsigned int p_tail = pipe->tail; 514 unsigned int p_mask = pipe->ring_size - 1; 515 unsigned int iter_head; 516 size_t off; 517 ssize_t left; 518 519 if (unlikely(size > i->count)) 520 size = i->count; 521 if (unlikely(!size)) 522 return 0; 523 524 left = size; 525 data_start(i, &iter_head, &off); 526 *iter_headp = iter_head; 527 *offp = off; 528 if (off) { 529 left -= PAGE_SIZE - off; 530 if (left <= 0) { 531 pipe->bufs[iter_head & p_mask].len += size; 532 return size; 533 } 534 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; 535 iter_head++; 536 } 537 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { 538 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; 539 struct page *page = alloc_page(GFP_USER); 540 if (!page) 541 break; 542 543 buf->ops = &default_pipe_buf_ops; 544 buf->page = page; 545 buf->offset = 0; 546 buf->len = min_t(ssize_t, left, PAGE_SIZE); 547 left -= buf->len; 548 iter_head++; 549 pipe->head = iter_head; 550 551 if (left == 0) 552 return size; 553 } 554 return size - left; 555 } 556 557 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 558 struct iov_iter *i) 559 { 560 struct pipe_inode_info *pipe = i->pipe; 561 unsigned int p_mask = pipe->ring_size - 1; 562 unsigned int i_head; 563 size_t n, off; 564 565 if (!sanity(i)) 566 return 0; 567 568 bytes = n = push_pipe(i, bytes, &i_head, &off); 569 if (unlikely(!n)) 570 return 0; 571 do { 572 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 573 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); 574 i->head = i_head; 575 i->iov_offset = off + chunk; 576 n -= chunk; 577 addr += chunk; 578 off = 0; 579 i_head++; 580 } while (n); 581 i->count -= bytes; 582 return bytes; 583 } 584 585 static __wsum csum_and_memcpy(void *to, const void *from, size_t len, 586 __wsum sum, size_t off) 587 { 588 __wsum next = csum_partial_copy_nocheck(from, to, len); 589 return csum_block_add(sum, next, off); 590 } 591 592 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 593 __wsum *csum, struct iov_iter *i) 594 { 595 struct pipe_inode_info *pipe = i->pipe; 596 unsigned int p_mask = pipe->ring_size - 1; 597 unsigned int i_head; 598 size_t n, r; 599 size_t off = 0; 600 __wsum sum = *csum; 601 602 if (!sanity(i)) 603 return 0; 604 605 bytes = n = push_pipe(i, bytes, &i_head, &r); 606 if (unlikely(!n)) 607 return 0; 608 do { 609 size_t chunk = min_t(size_t, n, PAGE_SIZE - r); 610 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); 611 sum = csum_and_memcpy(p + r, addr, chunk, sum, off); 612 kunmap_atomic(p); 613 i->head = i_head; 614 i->iov_offset = r + chunk; 615 n -= chunk; 616 off += chunk; 617 addr += chunk; 618 r = 0; 619 i_head++; 620 } while (n); 621 i->count -= bytes; 622 *csum = sum; 623 return bytes; 624 } 625 626 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 627 { 628 const char *from = addr; 629 if (unlikely(iov_iter_is_pipe(i))) 630 return copy_pipe_to_iter(addr, bytes, i); 631 if (iter_is_iovec(i)) 632 might_fault(); 633 iterate_and_advance(i, bytes, v, 634 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 635 memcpy_to_page(v.bv_page, v.bv_offset, 636 (from += v.bv_len) - v.bv_len, v.bv_len), 637 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 638 ) 639 640 return bytes; 641 } 642 EXPORT_SYMBOL(_copy_to_iter); 643 644 #ifdef CONFIG_ARCH_HAS_COPY_MC 645 static int copyout_mc(void __user *to, const void *from, size_t n) 646 { 647 if (access_ok(to, n)) { 648 instrument_copy_to_user(to, from, n); 649 n = copy_mc_to_user((__force void *) to, from, n); 650 } 651 return n; 652 } 653 654 static unsigned long copy_mc_to_page(struct page *page, size_t offset, 655 const char *from, size_t len) 656 { 657 unsigned long ret; 658 char *to; 659 660 to = kmap_atomic(page); 661 ret = copy_mc_to_kernel(to + offset, from, len); 662 kunmap_atomic(to); 663 664 return ret; 665 } 666 667 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, 668 struct iov_iter *i) 669 { 670 struct pipe_inode_info *pipe = i->pipe; 671 unsigned int p_mask = pipe->ring_size - 1; 672 unsigned int i_head; 673 size_t n, off, xfer = 0; 674 675 if (!sanity(i)) 676 return 0; 677 678 bytes = n = push_pipe(i, bytes, &i_head, &off); 679 if (unlikely(!n)) 680 return 0; 681 do { 682 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 683 unsigned long rem; 684 685 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, 686 off, addr, chunk); 687 i->head = i_head; 688 i->iov_offset = off + chunk - rem; 689 xfer += chunk - rem; 690 if (rem) 691 break; 692 n -= chunk; 693 addr += chunk; 694 off = 0; 695 i_head++; 696 } while (n); 697 i->count -= xfer; 698 return xfer; 699 } 700 701 /** 702 * _copy_mc_to_iter - copy to iter with source memory error exception handling 703 * @addr: source kernel address 704 * @bytes: total transfer length 705 * @iter: destination iterator 706 * 707 * The pmem driver deploys this for the dax operation 708 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the 709 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes 710 * successfully copied. 711 * 712 * The main differences between this and typical _copy_to_iter(). 713 * 714 * * Typical tail/residue handling after a fault retries the copy 715 * byte-by-byte until the fault happens again. Re-triggering machine 716 * checks is potentially fatal so the implementation uses source 717 * alignment and poison alignment assumptions to avoid re-triggering 718 * hardware exceptions. 719 * 720 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. 721 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return 722 * a short copy. 723 */ 724 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 725 { 726 const char *from = addr; 727 unsigned long rem, curr_addr, s_addr = (unsigned long) addr; 728 729 if (unlikely(iov_iter_is_pipe(i))) 730 return copy_mc_pipe_to_iter(addr, bytes, i); 731 if (iter_is_iovec(i)) 732 might_fault(); 733 iterate_and_advance(i, bytes, v, 734 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, 735 v.iov_len), 736 ({ 737 rem = copy_mc_to_page(v.bv_page, v.bv_offset, 738 (from += v.bv_len) - v.bv_len, v.bv_len); 739 if (rem) { 740 curr_addr = (unsigned long) from; 741 bytes = curr_addr - s_addr - rem; 742 return bytes; 743 } 744 }), 745 ({ 746 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) 747 - v.iov_len, v.iov_len); 748 if (rem) { 749 curr_addr = (unsigned long) from; 750 bytes = curr_addr - s_addr - rem; 751 return bytes; 752 } 753 }) 754 ) 755 756 return bytes; 757 } 758 EXPORT_SYMBOL_GPL(_copy_mc_to_iter); 759 #endif /* CONFIG_ARCH_HAS_COPY_MC */ 760 761 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 762 { 763 char *to = addr; 764 if (unlikely(iov_iter_is_pipe(i))) { 765 WARN_ON(1); 766 return 0; 767 } 768 if (iter_is_iovec(i)) 769 might_fault(); 770 iterate_and_advance(i, bytes, v, 771 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 772 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 773 v.bv_offset, v.bv_len), 774 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 775 ) 776 777 return bytes; 778 } 779 EXPORT_SYMBOL(_copy_from_iter); 780 781 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 782 { 783 char *to = addr; 784 if (unlikely(iov_iter_is_pipe(i))) { 785 WARN_ON(1); 786 return false; 787 } 788 if (unlikely(i->count < bytes)) 789 return false; 790 791 if (iter_is_iovec(i)) 792 might_fault(); 793 iterate_all_kinds(i, bytes, v, ({ 794 if (copyin((to += v.iov_len) - v.iov_len, 795 v.iov_base, v.iov_len)) 796 return false; 797 0;}), 798 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 799 v.bv_offset, v.bv_len), 800 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 801 ) 802 803 iov_iter_advance(i, bytes); 804 return true; 805 } 806 EXPORT_SYMBOL(_copy_from_iter_full); 807 808 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 809 { 810 char *to = addr; 811 if (unlikely(iov_iter_is_pipe(i))) { 812 WARN_ON(1); 813 return 0; 814 } 815 iterate_and_advance(i, bytes, v, 816 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 817 v.iov_base, v.iov_len), 818 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 819 v.bv_offset, v.bv_len), 820 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 821 ) 822 823 return bytes; 824 } 825 EXPORT_SYMBOL(_copy_from_iter_nocache); 826 827 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 828 /** 829 * _copy_from_iter_flushcache - write destination through cpu cache 830 * @addr: destination kernel address 831 * @bytes: total transfer length 832 * @iter: source iterator 833 * 834 * The pmem driver arranges for filesystem-dax to use this facility via 835 * dax_copy_from_iter() for ensuring that writes to persistent memory 836 * are flushed through the CPU cache. It is differentiated from 837 * _copy_from_iter_nocache() in that guarantees all data is flushed for 838 * all iterator types. The _copy_from_iter_nocache() only attempts to 839 * bypass the cache for the ITER_IOVEC case, and on some archs may use 840 * instructions that strand dirty-data in the cache. 841 */ 842 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 843 { 844 char *to = addr; 845 if (unlikely(iov_iter_is_pipe(i))) { 846 WARN_ON(1); 847 return 0; 848 } 849 iterate_and_advance(i, bytes, v, 850 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 851 v.iov_base, v.iov_len), 852 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 853 v.bv_offset, v.bv_len), 854 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 855 v.iov_len) 856 ) 857 858 return bytes; 859 } 860 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); 861 #endif 862 863 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 864 { 865 char *to = addr; 866 if (unlikely(iov_iter_is_pipe(i))) { 867 WARN_ON(1); 868 return false; 869 } 870 if (unlikely(i->count < bytes)) 871 return false; 872 iterate_all_kinds(i, bytes, v, ({ 873 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 874 v.iov_base, v.iov_len)) 875 return false; 876 0;}), 877 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 878 v.bv_offset, v.bv_len), 879 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 880 ) 881 882 iov_iter_advance(i, bytes); 883 return true; 884 } 885 EXPORT_SYMBOL(_copy_from_iter_full_nocache); 886 887 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) 888 { 889 struct page *head; 890 size_t v = n + offset; 891 892 /* 893 * The general case needs to access the page order in order 894 * to compute the page size. 895 * However, we mostly deal with order-0 pages and thus can 896 * avoid a possible cache line miss for requests that fit all 897 * page orders. 898 */ 899 if (n <= v && v <= PAGE_SIZE) 900 return true; 901 902 head = compound_head(page); 903 v += (page - head) << PAGE_SHIFT; 904 905 if (likely(n <= v && v <= (page_size(head)))) 906 return true; 907 WARN_ON(1); 908 return false; 909 } 910 911 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 912 struct iov_iter *i) 913 { 914 if (unlikely(!page_copy_sane(page, offset, bytes))) 915 return 0; 916 if (i->type & (ITER_BVEC|ITER_KVEC)) { 917 void *kaddr = kmap_atomic(page); 918 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 919 kunmap_atomic(kaddr); 920 return wanted; 921 } else if (unlikely(iov_iter_is_discard(i))) 922 return bytes; 923 else if (likely(!iov_iter_is_pipe(i))) 924 return copy_page_to_iter_iovec(page, offset, bytes, i); 925 else 926 return copy_page_to_iter_pipe(page, offset, bytes, i); 927 } 928 EXPORT_SYMBOL(copy_page_to_iter); 929 930 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 931 struct iov_iter *i) 932 { 933 if (unlikely(!page_copy_sane(page, offset, bytes))) 934 return 0; 935 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 936 WARN_ON(1); 937 return 0; 938 } 939 if (i->type & (ITER_BVEC|ITER_KVEC)) { 940 void *kaddr = kmap_atomic(page); 941 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 942 kunmap_atomic(kaddr); 943 return wanted; 944 } else 945 return copy_page_from_iter_iovec(page, offset, bytes, i); 946 } 947 EXPORT_SYMBOL(copy_page_from_iter); 948 949 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 950 { 951 struct pipe_inode_info *pipe = i->pipe; 952 unsigned int p_mask = pipe->ring_size - 1; 953 unsigned int i_head; 954 size_t n, off; 955 956 if (!sanity(i)) 957 return 0; 958 959 bytes = n = push_pipe(i, bytes, &i_head, &off); 960 if (unlikely(!n)) 961 return 0; 962 963 do { 964 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 965 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); 966 i->head = i_head; 967 i->iov_offset = off + chunk; 968 n -= chunk; 969 off = 0; 970 i_head++; 971 } while (n); 972 i->count -= bytes; 973 return bytes; 974 } 975 976 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 977 { 978 if (unlikely(iov_iter_is_pipe(i))) 979 return pipe_zero(bytes, i); 980 iterate_and_advance(i, bytes, v, 981 clear_user(v.iov_base, v.iov_len), 982 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 983 memset(v.iov_base, 0, v.iov_len) 984 ) 985 986 return bytes; 987 } 988 EXPORT_SYMBOL(iov_iter_zero); 989 990 size_t iov_iter_copy_from_user_atomic(struct page *page, 991 struct iov_iter *i, unsigned long offset, size_t bytes) 992 { 993 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 994 if (unlikely(!page_copy_sane(page, offset, bytes))) { 995 kunmap_atomic(kaddr); 996 return 0; 997 } 998 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 999 kunmap_atomic(kaddr); 1000 WARN_ON(1); 1001 return 0; 1002 } 1003 iterate_all_kinds(i, bytes, v, 1004 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 1005 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 1006 v.bv_offset, v.bv_len), 1007 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 1008 ) 1009 kunmap_atomic(kaddr); 1010 return bytes; 1011 } 1012 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 1013 1014 static inline void pipe_truncate(struct iov_iter *i) 1015 { 1016 struct pipe_inode_info *pipe = i->pipe; 1017 unsigned int p_tail = pipe->tail; 1018 unsigned int p_head = pipe->head; 1019 unsigned int p_mask = pipe->ring_size - 1; 1020 1021 if (!pipe_empty(p_head, p_tail)) { 1022 struct pipe_buffer *buf; 1023 unsigned int i_head = i->head; 1024 size_t off = i->iov_offset; 1025 1026 if (off) { 1027 buf = &pipe->bufs[i_head & p_mask]; 1028 buf->len = off - buf->offset; 1029 i_head++; 1030 } 1031 while (p_head != i_head) { 1032 p_head--; 1033 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); 1034 } 1035 1036 pipe->head = p_head; 1037 } 1038 } 1039 1040 static void pipe_advance(struct iov_iter *i, size_t size) 1041 { 1042 struct pipe_inode_info *pipe = i->pipe; 1043 if (unlikely(i->count < size)) 1044 size = i->count; 1045 if (size) { 1046 struct pipe_buffer *buf; 1047 unsigned int p_mask = pipe->ring_size - 1; 1048 unsigned int i_head = i->head; 1049 size_t off = i->iov_offset, left = size; 1050 1051 if (off) /* make it relative to the beginning of buffer */ 1052 left += off - pipe->bufs[i_head & p_mask].offset; 1053 while (1) { 1054 buf = &pipe->bufs[i_head & p_mask]; 1055 if (left <= buf->len) 1056 break; 1057 left -= buf->len; 1058 i_head++; 1059 } 1060 i->head = i_head; 1061 i->iov_offset = buf->offset + left; 1062 } 1063 i->count -= size; 1064 /* ... and discard everything past that point */ 1065 pipe_truncate(i); 1066 } 1067 1068 void iov_iter_advance(struct iov_iter *i, size_t size) 1069 { 1070 if (unlikely(iov_iter_is_pipe(i))) { 1071 pipe_advance(i, size); 1072 return; 1073 } 1074 if (unlikely(iov_iter_is_discard(i))) { 1075 i->count -= size; 1076 return; 1077 } 1078 iterate_and_advance(i, size, v, 0, 0, 0) 1079 } 1080 EXPORT_SYMBOL(iov_iter_advance); 1081 1082 void iov_iter_revert(struct iov_iter *i, size_t unroll) 1083 { 1084 if (!unroll) 1085 return; 1086 if (WARN_ON(unroll > MAX_RW_COUNT)) 1087 return; 1088 i->count += unroll; 1089 if (unlikely(iov_iter_is_pipe(i))) { 1090 struct pipe_inode_info *pipe = i->pipe; 1091 unsigned int p_mask = pipe->ring_size - 1; 1092 unsigned int i_head = i->head; 1093 size_t off = i->iov_offset; 1094 while (1) { 1095 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; 1096 size_t n = off - b->offset; 1097 if (unroll < n) { 1098 off -= unroll; 1099 break; 1100 } 1101 unroll -= n; 1102 if (!unroll && i_head == i->start_head) { 1103 off = 0; 1104 break; 1105 } 1106 i_head--; 1107 b = &pipe->bufs[i_head & p_mask]; 1108 off = b->offset + b->len; 1109 } 1110 i->iov_offset = off; 1111 i->head = i_head; 1112 pipe_truncate(i); 1113 return; 1114 } 1115 if (unlikely(iov_iter_is_discard(i))) 1116 return; 1117 if (unroll <= i->iov_offset) { 1118 i->iov_offset -= unroll; 1119 return; 1120 } 1121 unroll -= i->iov_offset; 1122 if (iov_iter_is_bvec(i)) { 1123 const struct bio_vec *bvec = i->bvec; 1124 while (1) { 1125 size_t n = (--bvec)->bv_len; 1126 i->nr_segs++; 1127 if (unroll <= n) { 1128 i->bvec = bvec; 1129 i->iov_offset = n - unroll; 1130 return; 1131 } 1132 unroll -= n; 1133 } 1134 } else { /* same logics for iovec and kvec */ 1135 const struct iovec *iov = i->iov; 1136 while (1) { 1137 size_t n = (--iov)->iov_len; 1138 i->nr_segs++; 1139 if (unroll <= n) { 1140 i->iov = iov; 1141 i->iov_offset = n - unroll; 1142 return; 1143 } 1144 unroll -= n; 1145 } 1146 } 1147 } 1148 EXPORT_SYMBOL(iov_iter_revert); 1149 1150 /* 1151 * Return the count of just the current iov_iter segment. 1152 */ 1153 size_t iov_iter_single_seg_count(const struct iov_iter *i) 1154 { 1155 if (unlikely(iov_iter_is_pipe(i))) 1156 return i->count; // it is a silly place, anyway 1157 if (i->nr_segs == 1) 1158 return i->count; 1159 if (unlikely(iov_iter_is_discard(i))) 1160 return i->count; 1161 else if (iov_iter_is_bvec(i)) 1162 return min(i->count, i->bvec->bv_len - i->iov_offset); 1163 else 1164 return min(i->count, i->iov->iov_len - i->iov_offset); 1165 } 1166 EXPORT_SYMBOL(iov_iter_single_seg_count); 1167 1168 void iov_iter_kvec(struct iov_iter *i, unsigned int direction, 1169 const struct kvec *kvec, unsigned long nr_segs, 1170 size_t count) 1171 { 1172 WARN_ON(direction & ~(READ | WRITE)); 1173 i->type = ITER_KVEC | (direction & (READ | WRITE)); 1174 i->kvec = kvec; 1175 i->nr_segs = nr_segs; 1176 i->iov_offset = 0; 1177 i->count = count; 1178 } 1179 EXPORT_SYMBOL(iov_iter_kvec); 1180 1181 void iov_iter_bvec(struct iov_iter *i, unsigned int direction, 1182 const struct bio_vec *bvec, unsigned long nr_segs, 1183 size_t count) 1184 { 1185 WARN_ON(direction & ~(READ | WRITE)); 1186 i->type = ITER_BVEC | (direction & (READ | WRITE)); 1187 i->bvec = bvec; 1188 i->nr_segs = nr_segs; 1189 i->iov_offset = 0; 1190 i->count = count; 1191 } 1192 EXPORT_SYMBOL(iov_iter_bvec); 1193 1194 void iov_iter_pipe(struct iov_iter *i, unsigned int direction, 1195 struct pipe_inode_info *pipe, 1196 size_t count) 1197 { 1198 BUG_ON(direction != READ); 1199 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); 1200 i->type = ITER_PIPE | READ; 1201 i->pipe = pipe; 1202 i->head = pipe->head; 1203 i->iov_offset = 0; 1204 i->count = count; 1205 i->start_head = i->head; 1206 } 1207 EXPORT_SYMBOL(iov_iter_pipe); 1208 1209 /** 1210 * iov_iter_discard - Initialise an I/O iterator that discards data 1211 * @i: The iterator to initialise. 1212 * @direction: The direction of the transfer. 1213 * @count: The size of the I/O buffer in bytes. 1214 * 1215 * Set up an I/O iterator that just discards everything that's written to it. 1216 * It's only available as a READ iterator. 1217 */ 1218 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) 1219 { 1220 BUG_ON(direction != READ); 1221 i->type = ITER_DISCARD | READ; 1222 i->count = count; 1223 i->iov_offset = 0; 1224 } 1225 EXPORT_SYMBOL(iov_iter_discard); 1226 1227 unsigned long iov_iter_alignment(const struct iov_iter *i) 1228 { 1229 unsigned long res = 0; 1230 size_t size = i->count; 1231 1232 if (unlikely(iov_iter_is_pipe(i))) { 1233 unsigned int p_mask = i->pipe->ring_size - 1; 1234 1235 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) 1236 return size | i->iov_offset; 1237 return size; 1238 } 1239 iterate_all_kinds(i, size, v, 1240 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 1241 res |= v.bv_offset | v.bv_len, 1242 res |= (unsigned long)v.iov_base | v.iov_len 1243 ) 1244 return res; 1245 } 1246 EXPORT_SYMBOL(iov_iter_alignment); 1247 1248 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 1249 { 1250 unsigned long res = 0; 1251 size_t size = i->count; 1252 1253 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1254 WARN_ON(1); 1255 return ~0U; 1256 } 1257 1258 iterate_all_kinds(i, size, v, 1259 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1260 (size != v.iov_len ? size : 0), 0), 1261 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 1262 (size != v.bv_len ? size : 0)), 1263 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1264 (size != v.iov_len ? size : 0)) 1265 ); 1266 return res; 1267 } 1268 EXPORT_SYMBOL(iov_iter_gap_alignment); 1269 1270 static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1271 size_t maxsize, 1272 struct page **pages, 1273 int iter_head, 1274 size_t *start) 1275 { 1276 struct pipe_inode_info *pipe = i->pipe; 1277 unsigned int p_mask = pipe->ring_size - 1; 1278 ssize_t n = push_pipe(i, maxsize, &iter_head, start); 1279 if (!n) 1280 return -EFAULT; 1281 1282 maxsize = n; 1283 n += *start; 1284 while (n > 0) { 1285 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); 1286 iter_head++; 1287 n -= PAGE_SIZE; 1288 } 1289 1290 return maxsize; 1291 } 1292 1293 static ssize_t pipe_get_pages(struct iov_iter *i, 1294 struct page **pages, size_t maxsize, unsigned maxpages, 1295 size_t *start) 1296 { 1297 unsigned int iter_head, npages; 1298 size_t capacity; 1299 1300 if (!maxsize) 1301 return 0; 1302 1303 if (!sanity(i)) 1304 return -EFAULT; 1305 1306 data_start(i, &iter_head, start); 1307 /* Amount of free space: some of this one + all after this one */ 1308 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1309 capacity = min(npages, maxpages) * PAGE_SIZE - *start; 1310 1311 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); 1312 } 1313 1314 ssize_t iov_iter_get_pages(struct iov_iter *i, 1315 struct page **pages, size_t maxsize, unsigned maxpages, 1316 size_t *start) 1317 { 1318 if (maxsize > i->count) 1319 maxsize = i->count; 1320 1321 if (unlikely(iov_iter_is_pipe(i))) 1322 return pipe_get_pages(i, pages, maxsize, maxpages, start); 1323 if (unlikely(iov_iter_is_discard(i))) 1324 return -EFAULT; 1325 1326 iterate_all_kinds(i, maxsize, v, ({ 1327 unsigned long addr = (unsigned long)v.iov_base; 1328 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1329 int n; 1330 int res; 1331 1332 if (len > maxpages * PAGE_SIZE) 1333 len = maxpages * PAGE_SIZE; 1334 addr &= ~(PAGE_SIZE - 1); 1335 n = DIV_ROUND_UP(len, PAGE_SIZE); 1336 res = get_user_pages_fast(addr, n, 1337 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, 1338 pages); 1339 if (unlikely(res < 0)) 1340 return res; 1341 return (res == n ? len : res * PAGE_SIZE) - *start; 1342 0;}),({ 1343 /* can't be more than PAGE_SIZE */ 1344 *start = v.bv_offset; 1345 get_page(*pages = v.bv_page); 1346 return v.bv_len; 1347 }),({ 1348 return -EFAULT; 1349 }) 1350 ) 1351 return 0; 1352 } 1353 EXPORT_SYMBOL(iov_iter_get_pages); 1354 1355 static struct page **get_pages_array(size_t n) 1356 { 1357 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1358 } 1359 1360 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1361 struct page ***pages, size_t maxsize, 1362 size_t *start) 1363 { 1364 struct page **p; 1365 unsigned int iter_head, npages; 1366 ssize_t n; 1367 1368 if (!maxsize) 1369 return 0; 1370 1371 if (!sanity(i)) 1372 return -EFAULT; 1373 1374 data_start(i, &iter_head, start); 1375 /* Amount of free space: some of this one + all after this one */ 1376 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1377 n = npages * PAGE_SIZE - *start; 1378 if (maxsize > n) 1379 maxsize = n; 1380 else 1381 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1382 p = get_pages_array(npages); 1383 if (!p) 1384 return -ENOMEM; 1385 n = __pipe_get_pages(i, maxsize, p, iter_head, start); 1386 if (n > 0) 1387 *pages = p; 1388 else 1389 kvfree(p); 1390 return n; 1391 } 1392 1393 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1394 struct page ***pages, size_t maxsize, 1395 size_t *start) 1396 { 1397 struct page **p; 1398 1399 if (maxsize > i->count) 1400 maxsize = i->count; 1401 1402 if (unlikely(iov_iter_is_pipe(i))) 1403 return pipe_get_pages_alloc(i, pages, maxsize, start); 1404 if (unlikely(iov_iter_is_discard(i))) 1405 return -EFAULT; 1406 1407 iterate_all_kinds(i, maxsize, v, ({ 1408 unsigned long addr = (unsigned long)v.iov_base; 1409 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1410 int n; 1411 int res; 1412 1413 addr &= ~(PAGE_SIZE - 1); 1414 n = DIV_ROUND_UP(len, PAGE_SIZE); 1415 p = get_pages_array(n); 1416 if (!p) 1417 return -ENOMEM; 1418 res = get_user_pages_fast(addr, n, 1419 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); 1420 if (unlikely(res < 0)) { 1421 kvfree(p); 1422 return res; 1423 } 1424 *pages = p; 1425 return (res == n ? len : res * PAGE_SIZE) - *start; 1426 0;}),({ 1427 /* can't be more than PAGE_SIZE */ 1428 *start = v.bv_offset; 1429 *pages = p = get_pages_array(1); 1430 if (!p) 1431 return -ENOMEM; 1432 get_page(*p = v.bv_page); 1433 return v.bv_len; 1434 }),({ 1435 return -EFAULT; 1436 }) 1437 ) 1438 return 0; 1439 } 1440 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1441 1442 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1443 struct iov_iter *i) 1444 { 1445 char *to = addr; 1446 __wsum sum, next; 1447 size_t off = 0; 1448 sum = *csum; 1449 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1450 WARN_ON(1); 1451 return 0; 1452 } 1453 iterate_and_advance(i, bytes, v, ({ 1454 next = csum_and_copy_from_user(v.iov_base, 1455 (to += v.iov_len) - v.iov_len, 1456 v.iov_len); 1457 if (next) { 1458 sum = csum_block_add(sum, next, off); 1459 off += v.iov_len; 1460 } 1461 next ? 0 : v.iov_len; 1462 }), ({ 1463 char *p = kmap_atomic(v.bv_page); 1464 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1465 p + v.bv_offset, v.bv_len, 1466 sum, off); 1467 kunmap_atomic(p); 1468 off += v.bv_len; 1469 }),({ 1470 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1471 v.iov_base, v.iov_len, 1472 sum, off); 1473 off += v.iov_len; 1474 }) 1475 ) 1476 *csum = sum; 1477 return bytes; 1478 } 1479 EXPORT_SYMBOL(csum_and_copy_from_iter); 1480 1481 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1482 struct iov_iter *i) 1483 { 1484 char *to = addr; 1485 __wsum sum, next; 1486 size_t off = 0; 1487 sum = *csum; 1488 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1489 WARN_ON(1); 1490 return false; 1491 } 1492 if (unlikely(i->count < bytes)) 1493 return false; 1494 iterate_all_kinds(i, bytes, v, ({ 1495 next = csum_and_copy_from_user(v.iov_base, 1496 (to += v.iov_len) - v.iov_len, 1497 v.iov_len); 1498 if (!next) 1499 return false; 1500 sum = csum_block_add(sum, next, off); 1501 off += v.iov_len; 1502 0; 1503 }), ({ 1504 char *p = kmap_atomic(v.bv_page); 1505 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1506 p + v.bv_offset, v.bv_len, 1507 sum, off); 1508 kunmap_atomic(p); 1509 off += v.bv_len; 1510 }),({ 1511 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1512 v.iov_base, v.iov_len, 1513 sum, off); 1514 off += v.iov_len; 1515 }) 1516 ) 1517 *csum = sum; 1518 iov_iter_advance(i, bytes); 1519 return true; 1520 } 1521 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1522 1523 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, 1524 struct iov_iter *i) 1525 { 1526 const char *from = addr; 1527 __wsum *csum = csump; 1528 __wsum sum, next; 1529 size_t off = 0; 1530 1531 if (unlikely(iov_iter_is_pipe(i))) 1532 return csum_and_copy_to_pipe_iter(addr, bytes, csum, i); 1533 1534 sum = *csum; 1535 if (unlikely(iov_iter_is_discard(i))) { 1536 WARN_ON(1); /* for now */ 1537 return 0; 1538 } 1539 iterate_and_advance(i, bytes, v, ({ 1540 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1541 v.iov_base, 1542 v.iov_len); 1543 if (next) { 1544 sum = csum_block_add(sum, next, off); 1545 off += v.iov_len; 1546 } 1547 next ? 0 : v.iov_len; 1548 }), ({ 1549 char *p = kmap_atomic(v.bv_page); 1550 sum = csum_and_memcpy(p + v.bv_offset, 1551 (from += v.bv_len) - v.bv_len, 1552 v.bv_len, sum, off); 1553 kunmap_atomic(p); 1554 off += v.bv_len; 1555 }),({ 1556 sum = csum_and_memcpy(v.iov_base, 1557 (from += v.iov_len) - v.iov_len, 1558 v.iov_len, sum, off); 1559 off += v.iov_len; 1560 }) 1561 ) 1562 *csum = sum; 1563 return bytes; 1564 } 1565 EXPORT_SYMBOL(csum_and_copy_to_iter); 1566 1567 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, 1568 struct iov_iter *i) 1569 { 1570 #ifdef CONFIG_CRYPTO_HASH 1571 struct ahash_request *hash = hashp; 1572 struct scatterlist sg; 1573 size_t copied; 1574 1575 copied = copy_to_iter(addr, bytes, i); 1576 sg_init_one(&sg, addr, copied); 1577 ahash_request_set_crypt(hash, &sg, NULL, copied); 1578 crypto_ahash_update(hash); 1579 return copied; 1580 #else 1581 return 0; 1582 #endif 1583 } 1584 EXPORT_SYMBOL(hash_and_copy_to_iter); 1585 1586 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1587 { 1588 size_t size = i->count; 1589 int npages = 0; 1590 1591 if (!size) 1592 return 0; 1593 if (unlikely(iov_iter_is_discard(i))) 1594 return 0; 1595 1596 if (unlikely(iov_iter_is_pipe(i))) { 1597 struct pipe_inode_info *pipe = i->pipe; 1598 unsigned int iter_head; 1599 size_t off; 1600 1601 if (!sanity(i)) 1602 return 0; 1603 1604 data_start(i, &iter_head, &off); 1605 /* some of this one + all after this one */ 1606 npages = pipe_space_for_user(iter_head, pipe->tail, pipe); 1607 if (npages >= maxpages) 1608 return maxpages; 1609 } else iterate_all_kinds(i, size, v, ({ 1610 unsigned long p = (unsigned long)v.iov_base; 1611 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1612 - p / PAGE_SIZE; 1613 if (npages >= maxpages) 1614 return maxpages; 1615 0;}),({ 1616 npages++; 1617 if (npages >= maxpages) 1618 return maxpages; 1619 }),({ 1620 unsigned long p = (unsigned long)v.iov_base; 1621 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1622 - p / PAGE_SIZE; 1623 if (npages >= maxpages) 1624 return maxpages; 1625 }) 1626 ) 1627 return npages; 1628 } 1629 EXPORT_SYMBOL(iov_iter_npages); 1630 1631 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1632 { 1633 *new = *old; 1634 if (unlikely(iov_iter_is_pipe(new))) { 1635 WARN_ON(1); 1636 return NULL; 1637 } 1638 if (unlikely(iov_iter_is_discard(new))) 1639 return NULL; 1640 if (iov_iter_is_bvec(new)) 1641 return new->bvec = kmemdup(new->bvec, 1642 new->nr_segs * sizeof(struct bio_vec), 1643 flags); 1644 else 1645 /* iovec and kvec have identical layout */ 1646 return new->iov = kmemdup(new->iov, 1647 new->nr_segs * sizeof(struct iovec), 1648 flags); 1649 } 1650 EXPORT_SYMBOL(dup_iter); 1651 1652 static int copy_compat_iovec_from_user(struct iovec *iov, 1653 const struct iovec __user *uvec, unsigned long nr_segs) 1654 { 1655 const struct compat_iovec __user *uiov = 1656 (const struct compat_iovec __user *)uvec; 1657 int ret = -EFAULT, i; 1658 1659 if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) 1660 return -EFAULT; 1661 1662 for (i = 0; i < nr_segs; i++) { 1663 compat_uptr_t buf; 1664 compat_ssize_t len; 1665 1666 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); 1667 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); 1668 1669 /* check for compat_size_t not fitting in compat_ssize_t .. */ 1670 if (len < 0) { 1671 ret = -EINVAL; 1672 goto uaccess_end; 1673 } 1674 iov[i].iov_base = compat_ptr(buf); 1675 iov[i].iov_len = len; 1676 } 1677 1678 ret = 0; 1679 uaccess_end: 1680 user_access_end(); 1681 return ret; 1682 } 1683 1684 static int copy_iovec_from_user(struct iovec *iov, 1685 const struct iovec __user *uvec, unsigned long nr_segs) 1686 { 1687 unsigned long seg; 1688 1689 if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) 1690 return -EFAULT; 1691 for (seg = 0; seg < nr_segs; seg++) { 1692 if ((ssize_t)iov[seg].iov_len < 0) 1693 return -EINVAL; 1694 } 1695 1696 return 0; 1697 } 1698 1699 struct iovec *iovec_from_user(const struct iovec __user *uvec, 1700 unsigned long nr_segs, unsigned long fast_segs, 1701 struct iovec *fast_iov, bool compat) 1702 { 1703 struct iovec *iov = fast_iov; 1704 int ret; 1705 1706 /* 1707 * SuS says "The readv() function *may* fail if the iovcnt argument was 1708 * less than or equal to 0, or greater than {IOV_MAX}. Linux has 1709 * traditionally returned zero for zero segments, so... 1710 */ 1711 if (nr_segs == 0) 1712 return iov; 1713 if (nr_segs > UIO_MAXIOV) 1714 return ERR_PTR(-EINVAL); 1715 if (nr_segs > fast_segs) { 1716 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); 1717 if (!iov) 1718 return ERR_PTR(-ENOMEM); 1719 } 1720 1721 if (compat) 1722 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); 1723 else 1724 ret = copy_iovec_from_user(iov, uvec, nr_segs); 1725 if (ret) { 1726 if (iov != fast_iov) 1727 kfree(iov); 1728 return ERR_PTR(ret); 1729 } 1730 1731 return iov; 1732 } 1733 1734 ssize_t __import_iovec(int type, const struct iovec __user *uvec, 1735 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, 1736 struct iov_iter *i, bool compat) 1737 { 1738 ssize_t total_len = 0; 1739 unsigned long seg; 1740 struct iovec *iov; 1741 1742 iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); 1743 if (IS_ERR(iov)) { 1744 *iovp = NULL; 1745 return PTR_ERR(iov); 1746 } 1747 1748 /* 1749 * According to the Single Unix Specification we should return EINVAL if 1750 * an element length is < 0 when cast to ssize_t or if the total length 1751 * would overflow the ssize_t return value of the system call. 1752 * 1753 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 1754 * overflow case. 1755 */ 1756 for (seg = 0; seg < nr_segs; seg++) { 1757 ssize_t len = (ssize_t)iov[seg].iov_len; 1758 1759 if (!access_ok(iov[seg].iov_base, len)) { 1760 if (iov != *iovp) 1761 kfree(iov); 1762 *iovp = NULL; 1763 return -EFAULT; 1764 } 1765 1766 if (len > MAX_RW_COUNT - total_len) { 1767 len = MAX_RW_COUNT - total_len; 1768 iov[seg].iov_len = len; 1769 } 1770 total_len += len; 1771 } 1772 1773 iov_iter_init(i, type, iov, nr_segs, total_len); 1774 if (iov == *iovp) 1775 *iovp = NULL; 1776 else 1777 *iovp = iov; 1778 return total_len; 1779 } 1780 1781 /** 1782 * import_iovec() - Copy an array of &struct iovec from userspace 1783 * into the kernel, check that it is valid, and initialize a new 1784 * &struct iov_iter iterator to access it. 1785 * 1786 * @type: One of %READ or %WRITE. 1787 * @uvec: Pointer to the userspace array. 1788 * @nr_segs: Number of elements in userspace array. 1789 * @fast_segs: Number of elements in @iov. 1790 * @iovp: (input and output parameter) Pointer to pointer to (usually small 1791 * on-stack) kernel array. 1792 * @i: Pointer to iterator that will be initialized on success. 1793 * 1794 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1795 * then this function places %NULL in *@iov on return. Otherwise, a new 1796 * array will be allocated and the result placed in *@iov. This means that 1797 * the caller may call kfree() on *@iov regardless of whether the small 1798 * on-stack array was used or not (and regardless of whether this function 1799 * returns an error or not). 1800 * 1801 * Return: Negative error code on error, bytes imported on success 1802 */ 1803 ssize_t import_iovec(int type, const struct iovec __user *uvec, 1804 unsigned nr_segs, unsigned fast_segs, 1805 struct iovec **iovp, struct iov_iter *i) 1806 { 1807 return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, 1808 in_compat_syscall()); 1809 } 1810 EXPORT_SYMBOL(import_iovec); 1811 1812 int import_single_range(int rw, void __user *buf, size_t len, 1813 struct iovec *iov, struct iov_iter *i) 1814 { 1815 if (len > MAX_RW_COUNT) 1816 len = MAX_RW_COUNT; 1817 if (unlikely(!access_ok(buf, len))) 1818 return -EFAULT; 1819 1820 iov->iov_base = buf; 1821 iov->iov_len = len; 1822 iov_iter_init(i, rw, iov, 1, len); 1823 return 0; 1824 } 1825 EXPORT_SYMBOL(import_single_range); 1826 1827 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, 1828 int (*f)(struct kvec *vec, void *context), 1829 void *context) 1830 { 1831 struct kvec w; 1832 int err = -EINVAL; 1833 if (!bytes) 1834 return 0; 1835 1836 iterate_all_kinds(i, bytes, v, -EINVAL, ({ 1837 w.iov_base = kmap(v.bv_page) + v.bv_offset; 1838 w.iov_len = v.bv_len; 1839 err = f(&w, context); 1840 kunmap(v.bv_page); 1841 err;}), ({ 1842 w = v; 1843 err = f(&w, context);}) 1844 ) 1845 return err; 1846 } 1847 EXPORT_SYMBOL(iov_iter_for_each_range); 1848