1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <crypto/hash.h> 3 #include <linux/export.h> 4 #include <linux/bvec.h> 5 #include <linux/fault-inject-usercopy.h> 6 #include <linux/uio.h> 7 #include <linux/pagemap.h> 8 #include <linux/slab.h> 9 #include <linux/vmalloc.h> 10 #include <linux/splice.h> 11 #include <linux/compat.h> 12 #include <net/checksum.h> 13 #include <linux/scatterlist.h> 14 #include <linux/instrumented.h> 15 16 #define PIPE_PARANOIA /* for now */ 17 18 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 19 size_t left; \ 20 size_t wanted = n; \ 21 __p = i->iov; \ 22 __v.iov_len = min(n, __p->iov_len - skip); \ 23 if (likely(__v.iov_len)) { \ 24 __v.iov_base = __p->iov_base + skip; \ 25 left = (STEP); \ 26 __v.iov_len -= left; \ 27 skip += __v.iov_len; \ 28 n -= __v.iov_len; \ 29 } else { \ 30 left = 0; \ 31 } \ 32 while (unlikely(!left && n)) { \ 33 __p++; \ 34 __v.iov_len = min(n, __p->iov_len); \ 35 if (unlikely(!__v.iov_len)) \ 36 continue; \ 37 __v.iov_base = __p->iov_base; \ 38 left = (STEP); \ 39 __v.iov_len -= left; \ 40 skip = __v.iov_len; \ 41 n -= __v.iov_len; \ 42 } \ 43 n = wanted - n; \ 44 } 45 46 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 47 size_t wanted = n; \ 48 __p = i->kvec; \ 49 __v.iov_len = min(n, __p->iov_len - skip); \ 50 if (likely(__v.iov_len)) { \ 51 __v.iov_base = __p->iov_base + skip; \ 52 (void)(STEP); \ 53 skip += __v.iov_len; \ 54 n -= __v.iov_len; \ 55 } \ 56 while (unlikely(n)) { \ 57 __p++; \ 58 __v.iov_len = min(n, __p->iov_len); \ 59 if (unlikely(!__v.iov_len)) \ 60 continue; \ 61 __v.iov_base = __p->iov_base; \ 62 (void)(STEP); \ 63 skip = __v.iov_len; \ 64 n -= __v.iov_len; \ 65 } \ 66 n = wanted; \ 67 } 68 69 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 70 struct bvec_iter __start; \ 71 __start.bi_size = n; \ 72 __start.bi_bvec_done = skip; \ 73 __start.bi_idx = 0; \ 74 for_each_bvec(__v, i->bvec, __bi, __start) { \ 75 (void)(STEP); \ 76 } \ 77 } 78 79 #define iterate_all_kinds(i, n, v, I, B, K) { \ 80 if (likely(n)) { \ 81 size_t skip = i->iov_offset; \ 82 if (unlikely(i->type & ITER_BVEC)) { \ 83 struct bio_vec v; \ 84 struct bvec_iter __bi; \ 85 iterate_bvec(i, n, v, __bi, skip, (B)) \ 86 } else if (unlikely(i->type & ITER_KVEC)) { \ 87 const struct kvec *kvec; \ 88 struct kvec v; \ 89 iterate_kvec(i, n, v, kvec, skip, (K)) \ 90 } else if (unlikely(i->type & ITER_DISCARD)) { \ 91 } else { \ 92 const struct iovec *iov; \ 93 struct iovec v; \ 94 iterate_iovec(i, n, v, iov, skip, (I)) \ 95 } \ 96 } \ 97 } 98 99 #define iterate_and_advance(i, n, v, I, B, K) { \ 100 if (unlikely(i->count < n)) \ 101 n = i->count; \ 102 if (i->count) { \ 103 size_t skip = i->iov_offset; \ 104 if (unlikely(i->type & ITER_BVEC)) { \ 105 const struct bio_vec *bvec = i->bvec; \ 106 struct bio_vec v; \ 107 struct bvec_iter __bi; \ 108 iterate_bvec(i, n, v, __bi, skip, (B)) \ 109 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 110 i->nr_segs -= i->bvec - bvec; \ 111 skip = __bi.bi_bvec_done; \ 112 } else if (unlikely(i->type & ITER_KVEC)) { \ 113 const struct kvec *kvec; \ 114 struct kvec v; \ 115 iterate_kvec(i, n, v, kvec, skip, (K)) \ 116 if (skip == kvec->iov_len) { \ 117 kvec++; \ 118 skip = 0; \ 119 } \ 120 i->nr_segs -= kvec - i->kvec; \ 121 i->kvec = kvec; \ 122 } else if (unlikely(i->type & ITER_DISCARD)) { \ 123 skip += n; \ 124 } else { \ 125 const struct iovec *iov; \ 126 struct iovec v; \ 127 iterate_iovec(i, n, v, iov, skip, (I)) \ 128 if (skip == iov->iov_len) { \ 129 iov++; \ 130 skip = 0; \ 131 } \ 132 i->nr_segs -= iov - i->iov; \ 133 i->iov = iov; \ 134 } \ 135 i->count -= n; \ 136 i->iov_offset = skip; \ 137 } \ 138 } 139 140 static int copyout(void __user *to, const void *from, size_t n) 141 { 142 if (should_fail_usercopy()) 143 return n; 144 if (access_ok(to, n)) { 145 instrument_copy_to_user(to, from, n); 146 n = raw_copy_to_user(to, from, n); 147 } 148 return n; 149 } 150 151 static int copyin(void *to, const void __user *from, size_t n) 152 { 153 if (should_fail_usercopy()) 154 return n; 155 if (access_ok(from, n)) { 156 instrument_copy_from_user(to, from, n); 157 n = raw_copy_from_user(to, from, n); 158 } 159 return n; 160 } 161 162 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 163 struct iov_iter *i) 164 { 165 size_t skip, copy, left, wanted; 166 const struct iovec *iov; 167 char __user *buf; 168 void *kaddr, *from; 169 170 if (unlikely(bytes > i->count)) 171 bytes = i->count; 172 173 if (unlikely(!bytes)) 174 return 0; 175 176 might_fault(); 177 wanted = bytes; 178 iov = i->iov; 179 skip = i->iov_offset; 180 buf = iov->iov_base + skip; 181 copy = min(bytes, iov->iov_len - skip); 182 183 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 184 kaddr = kmap_atomic(page); 185 from = kaddr + offset; 186 187 /* first chunk, usually the only one */ 188 left = copyout(buf, from, copy); 189 copy -= left; 190 skip += copy; 191 from += copy; 192 bytes -= copy; 193 194 while (unlikely(!left && bytes)) { 195 iov++; 196 buf = iov->iov_base; 197 copy = min(bytes, iov->iov_len); 198 left = copyout(buf, from, copy); 199 copy -= left; 200 skip = copy; 201 from += copy; 202 bytes -= copy; 203 } 204 if (likely(!bytes)) { 205 kunmap_atomic(kaddr); 206 goto done; 207 } 208 offset = from - kaddr; 209 buf += copy; 210 kunmap_atomic(kaddr); 211 copy = min(bytes, iov->iov_len - skip); 212 } 213 /* Too bad - revert to non-atomic kmap */ 214 215 kaddr = kmap(page); 216 from = kaddr + offset; 217 left = copyout(buf, from, copy); 218 copy -= left; 219 skip += copy; 220 from += copy; 221 bytes -= copy; 222 while (unlikely(!left && bytes)) { 223 iov++; 224 buf = iov->iov_base; 225 copy = min(bytes, iov->iov_len); 226 left = copyout(buf, from, copy); 227 copy -= left; 228 skip = copy; 229 from += copy; 230 bytes -= copy; 231 } 232 kunmap(page); 233 234 done: 235 if (skip == iov->iov_len) { 236 iov++; 237 skip = 0; 238 } 239 i->count -= wanted - bytes; 240 i->nr_segs -= iov - i->iov; 241 i->iov = iov; 242 i->iov_offset = skip; 243 return wanted - bytes; 244 } 245 246 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 247 struct iov_iter *i) 248 { 249 size_t skip, copy, left, wanted; 250 const struct iovec *iov; 251 char __user *buf; 252 void *kaddr, *to; 253 254 if (unlikely(bytes > i->count)) 255 bytes = i->count; 256 257 if (unlikely(!bytes)) 258 return 0; 259 260 might_fault(); 261 wanted = bytes; 262 iov = i->iov; 263 skip = i->iov_offset; 264 buf = iov->iov_base + skip; 265 copy = min(bytes, iov->iov_len - skip); 266 267 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 268 kaddr = kmap_atomic(page); 269 to = kaddr + offset; 270 271 /* first chunk, usually the only one */ 272 left = copyin(to, buf, copy); 273 copy -= left; 274 skip += copy; 275 to += copy; 276 bytes -= copy; 277 278 while (unlikely(!left && bytes)) { 279 iov++; 280 buf = iov->iov_base; 281 copy = min(bytes, iov->iov_len); 282 left = copyin(to, buf, copy); 283 copy -= left; 284 skip = copy; 285 to += copy; 286 bytes -= copy; 287 } 288 if (likely(!bytes)) { 289 kunmap_atomic(kaddr); 290 goto done; 291 } 292 offset = to - kaddr; 293 buf += copy; 294 kunmap_atomic(kaddr); 295 copy = min(bytes, iov->iov_len - skip); 296 } 297 /* Too bad - revert to non-atomic kmap */ 298 299 kaddr = kmap(page); 300 to = kaddr + offset; 301 left = copyin(to, buf, copy); 302 copy -= left; 303 skip += copy; 304 to += copy; 305 bytes -= copy; 306 while (unlikely(!left && bytes)) { 307 iov++; 308 buf = iov->iov_base; 309 copy = min(bytes, iov->iov_len); 310 left = copyin(to, buf, copy); 311 copy -= left; 312 skip = copy; 313 to += copy; 314 bytes -= copy; 315 } 316 kunmap(page); 317 318 done: 319 if (skip == iov->iov_len) { 320 iov++; 321 skip = 0; 322 } 323 i->count -= wanted - bytes; 324 i->nr_segs -= iov - i->iov; 325 i->iov = iov; 326 i->iov_offset = skip; 327 return wanted - bytes; 328 } 329 330 #ifdef PIPE_PARANOIA 331 static bool sanity(const struct iov_iter *i) 332 { 333 struct pipe_inode_info *pipe = i->pipe; 334 unsigned int p_head = pipe->head; 335 unsigned int p_tail = pipe->tail; 336 unsigned int p_mask = pipe->ring_size - 1; 337 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); 338 unsigned int i_head = i->head; 339 unsigned int idx; 340 341 if (i->iov_offset) { 342 struct pipe_buffer *p; 343 if (unlikely(p_occupancy == 0)) 344 goto Bad; // pipe must be non-empty 345 if (unlikely(i_head != p_head - 1)) 346 goto Bad; // must be at the last buffer... 347 348 p = &pipe->bufs[i_head & p_mask]; 349 if (unlikely(p->offset + p->len != i->iov_offset)) 350 goto Bad; // ... at the end of segment 351 } else { 352 if (i_head != p_head) 353 goto Bad; // must be right after the last buffer 354 } 355 return true; 356 Bad: 357 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); 358 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", 359 p_head, p_tail, pipe->ring_size); 360 for (idx = 0; idx < pipe->ring_size; idx++) 361 printk(KERN_ERR "[%p %p %d %d]\n", 362 pipe->bufs[idx].ops, 363 pipe->bufs[idx].page, 364 pipe->bufs[idx].offset, 365 pipe->bufs[idx].len); 366 WARN_ON(1); 367 return false; 368 } 369 #else 370 #define sanity(i) true 371 #endif 372 373 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 374 struct iov_iter *i) 375 { 376 struct pipe_inode_info *pipe = i->pipe; 377 struct pipe_buffer *buf; 378 unsigned int p_tail = pipe->tail; 379 unsigned int p_mask = pipe->ring_size - 1; 380 unsigned int i_head = i->head; 381 size_t off; 382 383 if (unlikely(bytes > i->count)) 384 bytes = i->count; 385 386 if (unlikely(!bytes)) 387 return 0; 388 389 if (!sanity(i)) 390 return 0; 391 392 off = i->iov_offset; 393 buf = &pipe->bufs[i_head & p_mask]; 394 if (off) { 395 if (offset == off && buf->page == page) { 396 /* merge with the last one */ 397 buf->len += bytes; 398 i->iov_offset += bytes; 399 goto out; 400 } 401 i_head++; 402 buf = &pipe->bufs[i_head & p_mask]; 403 } 404 if (pipe_full(i_head, p_tail, pipe->max_usage)) 405 return 0; 406 407 buf->ops = &page_cache_pipe_buf_ops; 408 get_page(page); 409 buf->page = page; 410 buf->offset = offset; 411 buf->len = bytes; 412 413 pipe->head = i_head + 1; 414 i->iov_offset = offset + bytes; 415 i->head = i_head; 416 out: 417 i->count -= bytes; 418 return bytes; 419 } 420 421 /* 422 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 423 * bytes. For each iovec, fault in each page that constitutes the iovec. 424 * 425 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 426 * because it is an invalid address). 427 */ 428 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 429 { 430 size_t skip = i->iov_offset; 431 const struct iovec *iov; 432 int err; 433 struct iovec v; 434 435 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 436 iterate_iovec(i, bytes, v, iov, skip, ({ 437 err = fault_in_pages_readable(v.iov_base, v.iov_len); 438 if (unlikely(err)) 439 return err; 440 0;})) 441 } 442 return 0; 443 } 444 EXPORT_SYMBOL(iov_iter_fault_in_readable); 445 446 void iov_iter_init(struct iov_iter *i, unsigned int direction, 447 const struct iovec *iov, unsigned long nr_segs, 448 size_t count) 449 { 450 WARN_ON(direction & ~(READ | WRITE)); 451 direction &= READ | WRITE; 452 453 /* It will get better. Eventually... */ 454 if (uaccess_kernel()) { 455 i->type = ITER_KVEC | direction; 456 i->kvec = (struct kvec *)iov; 457 } else { 458 i->type = ITER_IOVEC | direction; 459 i->iov = iov; 460 } 461 i->nr_segs = nr_segs; 462 i->iov_offset = 0; 463 i->count = count; 464 } 465 EXPORT_SYMBOL(iov_iter_init); 466 467 static void memzero_page(struct page *page, size_t offset, size_t len) 468 { 469 char *addr = kmap_atomic(page); 470 memset(addr + offset, 0, len); 471 kunmap_atomic(addr); 472 } 473 474 static inline bool allocated(struct pipe_buffer *buf) 475 { 476 return buf->ops == &default_pipe_buf_ops; 477 } 478 479 static inline void data_start(const struct iov_iter *i, 480 unsigned int *iter_headp, size_t *offp) 481 { 482 unsigned int p_mask = i->pipe->ring_size - 1; 483 unsigned int iter_head = i->head; 484 size_t off = i->iov_offset; 485 486 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || 487 off == PAGE_SIZE)) { 488 iter_head++; 489 off = 0; 490 } 491 *iter_headp = iter_head; 492 *offp = off; 493 } 494 495 static size_t push_pipe(struct iov_iter *i, size_t size, 496 int *iter_headp, size_t *offp) 497 { 498 struct pipe_inode_info *pipe = i->pipe; 499 unsigned int p_tail = pipe->tail; 500 unsigned int p_mask = pipe->ring_size - 1; 501 unsigned int iter_head; 502 size_t off; 503 ssize_t left; 504 505 if (unlikely(size > i->count)) 506 size = i->count; 507 if (unlikely(!size)) 508 return 0; 509 510 left = size; 511 data_start(i, &iter_head, &off); 512 *iter_headp = iter_head; 513 *offp = off; 514 if (off) { 515 left -= PAGE_SIZE - off; 516 if (left <= 0) { 517 pipe->bufs[iter_head & p_mask].len += size; 518 return size; 519 } 520 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; 521 iter_head++; 522 } 523 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { 524 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; 525 struct page *page = alloc_page(GFP_USER); 526 if (!page) 527 break; 528 529 buf->ops = &default_pipe_buf_ops; 530 buf->page = page; 531 buf->offset = 0; 532 buf->len = min_t(ssize_t, left, PAGE_SIZE); 533 left -= buf->len; 534 iter_head++; 535 pipe->head = iter_head; 536 537 if (left == 0) 538 return size; 539 } 540 return size - left; 541 } 542 543 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 544 struct iov_iter *i) 545 { 546 struct pipe_inode_info *pipe = i->pipe; 547 unsigned int p_mask = pipe->ring_size - 1; 548 unsigned int i_head; 549 size_t n, off; 550 551 if (!sanity(i)) 552 return 0; 553 554 bytes = n = push_pipe(i, bytes, &i_head, &off); 555 if (unlikely(!n)) 556 return 0; 557 do { 558 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 559 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); 560 i->head = i_head; 561 i->iov_offset = off + chunk; 562 n -= chunk; 563 addr += chunk; 564 off = 0; 565 i_head++; 566 } while (n); 567 i->count -= bytes; 568 return bytes; 569 } 570 571 static __wsum csum_and_memcpy(void *to, const void *from, size_t len, 572 __wsum sum, size_t off) 573 { 574 __wsum next = csum_partial_copy_nocheck(from, to, len); 575 return csum_block_add(sum, next, off); 576 } 577 578 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 579 struct csum_state *csstate, 580 struct iov_iter *i) 581 { 582 struct pipe_inode_info *pipe = i->pipe; 583 unsigned int p_mask = pipe->ring_size - 1; 584 __wsum sum = csstate->csum; 585 size_t off = csstate->off; 586 unsigned int i_head; 587 size_t n, r; 588 589 if (!sanity(i)) 590 return 0; 591 592 bytes = n = push_pipe(i, bytes, &i_head, &r); 593 if (unlikely(!n)) 594 return 0; 595 do { 596 size_t chunk = min_t(size_t, n, PAGE_SIZE - r); 597 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); 598 sum = csum_and_memcpy(p + r, addr, chunk, sum, off); 599 kunmap_atomic(p); 600 i->head = i_head; 601 i->iov_offset = r + chunk; 602 n -= chunk; 603 off += chunk; 604 addr += chunk; 605 r = 0; 606 i_head++; 607 } while (n); 608 i->count -= bytes; 609 csstate->csum = sum; 610 csstate->off = off; 611 return bytes; 612 } 613 614 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 615 { 616 const char *from = addr; 617 if (unlikely(iov_iter_is_pipe(i))) 618 return copy_pipe_to_iter(addr, bytes, i); 619 if (iter_is_iovec(i)) 620 might_fault(); 621 iterate_and_advance(i, bytes, v, 622 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 623 memcpy_to_page(v.bv_page, v.bv_offset, 624 (from += v.bv_len) - v.bv_len, v.bv_len), 625 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 626 ) 627 628 return bytes; 629 } 630 EXPORT_SYMBOL(_copy_to_iter); 631 632 #ifdef CONFIG_ARCH_HAS_COPY_MC 633 static int copyout_mc(void __user *to, const void *from, size_t n) 634 { 635 if (access_ok(to, n)) { 636 instrument_copy_to_user(to, from, n); 637 n = copy_mc_to_user((__force void *) to, from, n); 638 } 639 return n; 640 } 641 642 static unsigned long copy_mc_to_page(struct page *page, size_t offset, 643 const char *from, size_t len) 644 { 645 unsigned long ret; 646 char *to; 647 648 to = kmap_atomic(page); 649 ret = copy_mc_to_kernel(to + offset, from, len); 650 kunmap_atomic(to); 651 652 return ret; 653 } 654 655 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, 656 struct iov_iter *i) 657 { 658 struct pipe_inode_info *pipe = i->pipe; 659 unsigned int p_mask = pipe->ring_size - 1; 660 unsigned int i_head; 661 size_t n, off, xfer = 0; 662 663 if (!sanity(i)) 664 return 0; 665 666 bytes = n = push_pipe(i, bytes, &i_head, &off); 667 if (unlikely(!n)) 668 return 0; 669 do { 670 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 671 unsigned long rem; 672 673 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, 674 off, addr, chunk); 675 i->head = i_head; 676 i->iov_offset = off + chunk - rem; 677 xfer += chunk - rem; 678 if (rem) 679 break; 680 n -= chunk; 681 addr += chunk; 682 off = 0; 683 i_head++; 684 } while (n); 685 i->count -= xfer; 686 return xfer; 687 } 688 689 /** 690 * _copy_mc_to_iter - copy to iter with source memory error exception handling 691 * @addr: source kernel address 692 * @bytes: total transfer length 693 * @iter: destination iterator 694 * 695 * The pmem driver deploys this for the dax operation 696 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the 697 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes 698 * successfully copied. 699 * 700 * The main differences between this and typical _copy_to_iter(). 701 * 702 * * Typical tail/residue handling after a fault retries the copy 703 * byte-by-byte until the fault happens again. Re-triggering machine 704 * checks is potentially fatal so the implementation uses source 705 * alignment and poison alignment assumptions to avoid re-triggering 706 * hardware exceptions. 707 * 708 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. 709 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return 710 * a short copy. 711 */ 712 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 713 { 714 const char *from = addr; 715 unsigned long rem, curr_addr, s_addr = (unsigned long) addr; 716 717 if (unlikely(iov_iter_is_pipe(i))) 718 return copy_mc_pipe_to_iter(addr, bytes, i); 719 if (iter_is_iovec(i)) 720 might_fault(); 721 iterate_and_advance(i, bytes, v, 722 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, 723 v.iov_len), 724 ({ 725 rem = copy_mc_to_page(v.bv_page, v.bv_offset, 726 (from += v.bv_len) - v.bv_len, v.bv_len); 727 if (rem) { 728 curr_addr = (unsigned long) from; 729 bytes = curr_addr - s_addr - rem; 730 return bytes; 731 } 732 }), 733 ({ 734 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) 735 - v.iov_len, v.iov_len); 736 if (rem) { 737 curr_addr = (unsigned long) from; 738 bytes = curr_addr - s_addr - rem; 739 return bytes; 740 } 741 }) 742 ) 743 744 return bytes; 745 } 746 EXPORT_SYMBOL_GPL(_copy_mc_to_iter); 747 #endif /* CONFIG_ARCH_HAS_COPY_MC */ 748 749 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 750 { 751 char *to = addr; 752 if (unlikely(iov_iter_is_pipe(i))) { 753 WARN_ON(1); 754 return 0; 755 } 756 if (iter_is_iovec(i)) 757 might_fault(); 758 iterate_and_advance(i, bytes, v, 759 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 760 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 761 v.bv_offset, v.bv_len), 762 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 763 ) 764 765 return bytes; 766 } 767 EXPORT_SYMBOL(_copy_from_iter); 768 769 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 770 { 771 char *to = addr; 772 if (unlikely(iov_iter_is_pipe(i))) { 773 WARN_ON(1); 774 return false; 775 } 776 if (unlikely(i->count < bytes)) 777 return false; 778 779 if (iter_is_iovec(i)) 780 might_fault(); 781 iterate_all_kinds(i, bytes, v, ({ 782 if (copyin((to += v.iov_len) - v.iov_len, 783 v.iov_base, v.iov_len)) 784 return false; 785 0;}), 786 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 787 v.bv_offset, v.bv_len), 788 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 789 ) 790 791 iov_iter_advance(i, bytes); 792 return true; 793 } 794 EXPORT_SYMBOL(_copy_from_iter_full); 795 796 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 797 { 798 char *to = addr; 799 if (unlikely(iov_iter_is_pipe(i))) { 800 WARN_ON(1); 801 return 0; 802 } 803 iterate_and_advance(i, bytes, v, 804 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 805 v.iov_base, v.iov_len), 806 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 807 v.bv_offset, v.bv_len), 808 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 809 ) 810 811 return bytes; 812 } 813 EXPORT_SYMBOL(_copy_from_iter_nocache); 814 815 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 816 /** 817 * _copy_from_iter_flushcache - write destination through cpu cache 818 * @addr: destination kernel address 819 * @bytes: total transfer length 820 * @iter: source iterator 821 * 822 * The pmem driver arranges for filesystem-dax to use this facility via 823 * dax_copy_from_iter() for ensuring that writes to persistent memory 824 * are flushed through the CPU cache. It is differentiated from 825 * _copy_from_iter_nocache() in that guarantees all data is flushed for 826 * all iterator types. The _copy_from_iter_nocache() only attempts to 827 * bypass the cache for the ITER_IOVEC case, and on some archs may use 828 * instructions that strand dirty-data in the cache. 829 */ 830 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 831 { 832 char *to = addr; 833 if (unlikely(iov_iter_is_pipe(i))) { 834 WARN_ON(1); 835 return 0; 836 } 837 iterate_and_advance(i, bytes, v, 838 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 839 v.iov_base, v.iov_len), 840 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 841 v.bv_offset, v.bv_len), 842 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 843 v.iov_len) 844 ) 845 846 return bytes; 847 } 848 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); 849 #endif 850 851 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 852 { 853 char *to = addr; 854 if (unlikely(iov_iter_is_pipe(i))) { 855 WARN_ON(1); 856 return false; 857 } 858 if (unlikely(i->count < bytes)) 859 return false; 860 iterate_all_kinds(i, bytes, v, ({ 861 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 862 v.iov_base, v.iov_len)) 863 return false; 864 0;}), 865 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 866 v.bv_offset, v.bv_len), 867 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 868 ) 869 870 iov_iter_advance(i, bytes); 871 return true; 872 } 873 EXPORT_SYMBOL(_copy_from_iter_full_nocache); 874 875 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) 876 { 877 struct page *head; 878 size_t v = n + offset; 879 880 /* 881 * The general case needs to access the page order in order 882 * to compute the page size. 883 * However, we mostly deal with order-0 pages and thus can 884 * avoid a possible cache line miss for requests that fit all 885 * page orders. 886 */ 887 if (n <= v && v <= PAGE_SIZE) 888 return true; 889 890 head = compound_head(page); 891 v += (page - head) << PAGE_SHIFT; 892 893 if (likely(n <= v && v <= (page_size(head)))) 894 return true; 895 WARN_ON(1); 896 return false; 897 } 898 899 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 900 struct iov_iter *i) 901 { 902 if (unlikely(!page_copy_sane(page, offset, bytes))) 903 return 0; 904 if (i->type & (ITER_BVEC|ITER_KVEC)) { 905 void *kaddr = kmap_atomic(page); 906 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 907 kunmap_atomic(kaddr); 908 return wanted; 909 } else if (unlikely(iov_iter_is_discard(i))) 910 return bytes; 911 else if (likely(!iov_iter_is_pipe(i))) 912 return copy_page_to_iter_iovec(page, offset, bytes, i); 913 else 914 return copy_page_to_iter_pipe(page, offset, bytes, i); 915 } 916 EXPORT_SYMBOL(copy_page_to_iter); 917 918 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 919 struct iov_iter *i) 920 { 921 if (unlikely(!page_copy_sane(page, offset, bytes))) 922 return 0; 923 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 924 WARN_ON(1); 925 return 0; 926 } 927 if (i->type & (ITER_BVEC|ITER_KVEC)) { 928 void *kaddr = kmap_atomic(page); 929 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 930 kunmap_atomic(kaddr); 931 return wanted; 932 } else 933 return copy_page_from_iter_iovec(page, offset, bytes, i); 934 } 935 EXPORT_SYMBOL(copy_page_from_iter); 936 937 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 938 { 939 struct pipe_inode_info *pipe = i->pipe; 940 unsigned int p_mask = pipe->ring_size - 1; 941 unsigned int i_head; 942 size_t n, off; 943 944 if (!sanity(i)) 945 return 0; 946 947 bytes = n = push_pipe(i, bytes, &i_head, &off); 948 if (unlikely(!n)) 949 return 0; 950 951 do { 952 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 953 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); 954 i->head = i_head; 955 i->iov_offset = off + chunk; 956 n -= chunk; 957 off = 0; 958 i_head++; 959 } while (n); 960 i->count -= bytes; 961 return bytes; 962 } 963 964 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 965 { 966 if (unlikely(iov_iter_is_pipe(i))) 967 return pipe_zero(bytes, i); 968 iterate_and_advance(i, bytes, v, 969 clear_user(v.iov_base, v.iov_len), 970 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 971 memset(v.iov_base, 0, v.iov_len) 972 ) 973 974 return bytes; 975 } 976 EXPORT_SYMBOL(iov_iter_zero); 977 978 size_t iov_iter_copy_from_user_atomic(struct page *page, 979 struct iov_iter *i, unsigned long offset, size_t bytes) 980 { 981 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 982 if (unlikely(!page_copy_sane(page, offset, bytes))) { 983 kunmap_atomic(kaddr); 984 return 0; 985 } 986 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 987 kunmap_atomic(kaddr); 988 WARN_ON(1); 989 return 0; 990 } 991 iterate_all_kinds(i, bytes, v, 992 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 993 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 994 v.bv_offset, v.bv_len), 995 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 996 ) 997 kunmap_atomic(kaddr); 998 return bytes; 999 } 1000 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 1001 1002 static inline void pipe_truncate(struct iov_iter *i) 1003 { 1004 struct pipe_inode_info *pipe = i->pipe; 1005 unsigned int p_tail = pipe->tail; 1006 unsigned int p_head = pipe->head; 1007 unsigned int p_mask = pipe->ring_size - 1; 1008 1009 if (!pipe_empty(p_head, p_tail)) { 1010 struct pipe_buffer *buf; 1011 unsigned int i_head = i->head; 1012 size_t off = i->iov_offset; 1013 1014 if (off) { 1015 buf = &pipe->bufs[i_head & p_mask]; 1016 buf->len = off - buf->offset; 1017 i_head++; 1018 } 1019 while (p_head != i_head) { 1020 p_head--; 1021 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); 1022 } 1023 1024 pipe->head = p_head; 1025 } 1026 } 1027 1028 static void pipe_advance(struct iov_iter *i, size_t size) 1029 { 1030 struct pipe_inode_info *pipe = i->pipe; 1031 if (unlikely(i->count < size)) 1032 size = i->count; 1033 if (size) { 1034 struct pipe_buffer *buf; 1035 unsigned int p_mask = pipe->ring_size - 1; 1036 unsigned int i_head = i->head; 1037 size_t off = i->iov_offset, left = size; 1038 1039 if (off) /* make it relative to the beginning of buffer */ 1040 left += off - pipe->bufs[i_head & p_mask].offset; 1041 while (1) { 1042 buf = &pipe->bufs[i_head & p_mask]; 1043 if (left <= buf->len) 1044 break; 1045 left -= buf->len; 1046 i_head++; 1047 } 1048 i->head = i_head; 1049 i->iov_offset = buf->offset + left; 1050 } 1051 i->count -= size; 1052 /* ... and discard everything past that point */ 1053 pipe_truncate(i); 1054 } 1055 1056 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size) 1057 { 1058 struct bvec_iter bi; 1059 1060 bi.bi_size = i->count; 1061 bi.bi_bvec_done = i->iov_offset; 1062 bi.bi_idx = 0; 1063 bvec_iter_advance(i->bvec, &bi, size); 1064 1065 i->bvec += bi.bi_idx; 1066 i->nr_segs -= bi.bi_idx; 1067 i->count = bi.bi_size; 1068 i->iov_offset = bi.bi_bvec_done; 1069 } 1070 1071 void iov_iter_advance(struct iov_iter *i, size_t size) 1072 { 1073 if (unlikely(iov_iter_is_pipe(i))) { 1074 pipe_advance(i, size); 1075 return; 1076 } 1077 if (unlikely(iov_iter_is_discard(i))) { 1078 i->count -= size; 1079 return; 1080 } 1081 if (iov_iter_is_bvec(i)) { 1082 iov_iter_bvec_advance(i, size); 1083 return; 1084 } 1085 iterate_and_advance(i, size, v, 0, 0, 0) 1086 } 1087 EXPORT_SYMBOL(iov_iter_advance); 1088 1089 void iov_iter_revert(struct iov_iter *i, size_t unroll) 1090 { 1091 if (!unroll) 1092 return; 1093 if (WARN_ON(unroll > MAX_RW_COUNT)) 1094 return; 1095 i->count += unroll; 1096 if (unlikely(iov_iter_is_pipe(i))) { 1097 struct pipe_inode_info *pipe = i->pipe; 1098 unsigned int p_mask = pipe->ring_size - 1; 1099 unsigned int i_head = i->head; 1100 size_t off = i->iov_offset; 1101 while (1) { 1102 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; 1103 size_t n = off - b->offset; 1104 if (unroll < n) { 1105 off -= unroll; 1106 break; 1107 } 1108 unroll -= n; 1109 if (!unroll && i_head == i->start_head) { 1110 off = 0; 1111 break; 1112 } 1113 i_head--; 1114 b = &pipe->bufs[i_head & p_mask]; 1115 off = b->offset + b->len; 1116 } 1117 i->iov_offset = off; 1118 i->head = i_head; 1119 pipe_truncate(i); 1120 return; 1121 } 1122 if (unlikely(iov_iter_is_discard(i))) 1123 return; 1124 if (unroll <= i->iov_offset) { 1125 i->iov_offset -= unroll; 1126 return; 1127 } 1128 unroll -= i->iov_offset; 1129 if (iov_iter_is_bvec(i)) { 1130 const struct bio_vec *bvec = i->bvec; 1131 while (1) { 1132 size_t n = (--bvec)->bv_len; 1133 i->nr_segs++; 1134 if (unroll <= n) { 1135 i->bvec = bvec; 1136 i->iov_offset = n - unroll; 1137 return; 1138 } 1139 unroll -= n; 1140 } 1141 } else { /* same logics for iovec and kvec */ 1142 const struct iovec *iov = i->iov; 1143 while (1) { 1144 size_t n = (--iov)->iov_len; 1145 i->nr_segs++; 1146 if (unroll <= n) { 1147 i->iov = iov; 1148 i->iov_offset = n - unroll; 1149 return; 1150 } 1151 unroll -= n; 1152 } 1153 } 1154 } 1155 EXPORT_SYMBOL(iov_iter_revert); 1156 1157 /* 1158 * Return the count of just the current iov_iter segment. 1159 */ 1160 size_t iov_iter_single_seg_count(const struct iov_iter *i) 1161 { 1162 if (unlikely(iov_iter_is_pipe(i))) 1163 return i->count; // it is a silly place, anyway 1164 if (i->nr_segs == 1) 1165 return i->count; 1166 if (unlikely(iov_iter_is_discard(i))) 1167 return i->count; 1168 else if (iov_iter_is_bvec(i)) 1169 return min(i->count, i->bvec->bv_len - i->iov_offset); 1170 else 1171 return min(i->count, i->iov->iov_len - i->iov_offset); 1172 } 1173 EXPORT_SYMBOL(iov_iter_single_seg_count); 1174 1175 void iov_iter_kvec(struct iov_iter *i, unsigned int direction, 1176 const struct kvec *kvec, unsigned long nr_segs, 1177 size_t count) 1178 { 1179 WARN_ON(direction & ~(READ | WRITE)); 1180 i->type = ITER_KVEC | (direction & (READ | WRITE)); 1181 i->kvec = kvec; 1182 i->nr_segs = nr_segs; 1183 i->iov_offset = 0; 1184 i->count = count; 1185 } 1186 EXPORT_SYMBOL(iov_iter_kvec); 1187 1188 void iov_iter_bvec(struct iov_iter *i, unsigned int direction, 1189 const struct bio_vec *bvec, unsigned long nr_segs, 1190 size_t count) 1191 { 1192 WARN_ON(direction & ~(READ | WRITE)); 1193 i->type = ITER_BVEC | (direction & (READ | WRITE)); 1194 i->bvec = bvec; 1195 i->nr_segs = nr_segs; 1196 i->iov_offset = 0; 1197 i->count = count; 1198 } 1199 EXPORT_SYMBOL(iov_iter_bvec); 1200 1201 void iov_iter_pipe(struct iov_iter *i, unsigned int direction, 1202 struct pipe_inode_info *pipe, 1203 size_t count) 1204 { 1205 BUG_ON(direction != READ); 1206 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); 1207 i->type = ITER_PIPE | READ; 1208 i->pipe = pipe; 1209 i->head = pipe->head; 1210 i->iov_offset = 0; 1211 i->count = count; 1212 i->start_head = i->head; 1213 } 1214 EXPORT_SYMBOL(iov_iter_pipe); 1215 1216 /** 1217 * iov_iter_discard - Initialise an I/O iterator that discards data 1218 * @i: The iterator to initialise. 1219 * @direction: The direction of the transfer. 1220 * @count: The size of the I/O buffer in bytes. 1221 * 1222 * Set up an I/O iterator that just discards everything that's written to it. 1223 * It's only available as a READ iterator. 1224 */ 1225 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) 1226 { 1227 BUG_ON(direction != READ); 1228 i->type = ITER_DISCARD | READ; 1229 i->count = count; 1230 i->iov_offset = 0; 1231 } 1232 EXPORT_SYMBOL(iov_iter_discard); 1233 1234 unsigned long iov_iter_alignment(const struct iov_iter *i) 1235 { 1236 unsigned long res = 0; 1237 size_t size = i->count; 1238 1239 if (unlikely(iov_iter_is_pipe(i))) { 1240 unsigned int p_mask = i->pipe->ring_size - 1; 1241 1242 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) 1243 return size | i->iov_offset; 1244 return size; 1245 } 1246 iterate_all_kinds(i, size, v, 1247 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 1248 res |= v.bv_offset | v.bv_len, 1249 res |= (unsigned long)v.iov_base | v.iov_len 1250 ) 1251 return res; 1252 } 1253 EXPORT_SYMBOL(iov_iter_alignment); 1254 1255 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 1256 { 1257 unsigned long res = 0; 1258 size_t size = i->count; 1259 1260 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1261 WARN_ON(1); 1262 return ~0U; 1263 } 1264 1265 iterate_all_kinds(i, size, v, 1266 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1267 (size != v.iov_len ? size : 0), 0), 1268 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 1269 (size != v.bv_len ? size : 0)), 1270 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1271 (size != v.iov_len ? size : 0)) 1272 ); 1273 return res; 1274 } 1275 EXPORT_SYMBOL(iov_iter_gap_alignment); 1276 1277 static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1278 size_t maxsize, 1279 struct page **pages, 1280 int iter_head, 1281 size_t *start) 1282 { 1283 struct pipe_inode_info *pipe = i->pipe; 1284 unsigned int p_mask = pipe->ring_size - 1; 1285 ssize_t n = push_pipe(i, maxsize, &iter_head, start); 1286 if (!n) 1287 return -EFAULT; 1288 1289 maxsize = n; 1290 n += *start; 1291 while (n > 0) { 1292 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); 1293 iter_head++; 1294 n -= PAGE_SIZE; 1295 } 1296 1297 return maxsize; 1298 } 1299 1300 static ssize_t pipe_get_pages(struct iov_iter *i, 1301 struct page **pages, size_t maxsize, unsigned maxpages, 1302 size_t *start) 1303 { 1304 unsigned int iter_head, npages; 1305 size_t capacity; 1306 1307 if (!maxsize) 1308 return 0; 1309 1310 if (!sanity(i)) 1311 return -EFAULT; 1312 1313 data_start(i, &iter_head, start); 1314 /* Amount of free space: some of this one + all after this one */ 1315 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1316 capacity = min(npages, maxpages) * PAGE_SIZE - *start; 1317 1318 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); 1319 } 1320 1321 ssize_t iov_iter_get_pages(struct iov_iter *i, 1322 struct page **pages, size_t maxsize, unsigned maxpages, 1323 size_t *start) 1324 { 1325 if (maxsize > i->count) 1326 maxsize = i->count; 1327 1328 if (unlikely(iov_iter_is_pipe(i))) 1329 return pipe_get_pages(i, pages, maxsize, maxpages, start); 1330 if (unlikely(iov_iter_is_discard(i))) 1331 return -EFAULT; 1332 1333 iterate_all_kinds(i, maxsize, v, ({ 1334 unsigned long addr = (unsigned long)v.iov_base; 1335 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1336 int n; 1337 int res; 1338 1339 if (len > maxpages * PAGE_SIZE) 1340 len = maxpages * PAGE_SIZE; 1341 addr &= ~(PAGE_SIZE - 1); 1342 n = DIV_ROUND_UP(len, PAGE_SIZE); 1343 res = get_user_pages_fast(addr, n, 1344 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, 1345 pages); 1346 if (unlikely(res < 0)) 1347 return res; 1348 return (res == n ? len : res * PAGE_SIZE) - *start; 1349 0;}),({ 1350 /* can't be more than PAGE_SIZE */ 1351 *start = v.bv_offset; 1352 get_page(*pages = v.bv_page); 1353 return v.bv_len; 1354 }),({ 1355 return -EFAULT; 1356 }) 1357 ) 1358 return 0; 1359 } 1360 EXPORT_SYMBOL(iov_iter_get_pages); 1361 1362 static struct page **get_pages_array(size_t n) 1363 { 1364 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1365 } 1366 1367 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1368 struct page ***pages, size_t maxsize, 1369 size_t *start) 1370 { 1371 struct page **p; 1372 unsigned int iter_head, npages; 1373 ssize_t n; 1374 1375 if (!maxsize) 1376 return 0; 1377 1378 if (!sanity(i)) 1379 return -EFAULT; 1380 1381 data_start(i, &iter_head, start); 1382 /* Amount of free space: some of this one + all after this one */ 1383 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1384 n = npages * PAGE_SIZE - *start; 1385 if (maxsize > n) 1386 maxsize = n; 1387 else 1388 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1389 p = get_pages_array(npages); 1390 if (!p) 1391 return -ENOMEM; 1392 n = __pipe_get_pages(i, maxsize, p, iter_head, start); 1393 if (n > 0) 1394 *pages = p; 1395 else 1396 kvfree(p); 1397 return n; 1398 } 1399 1400 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1401 struct page ***pages, size_t maxsize, 1402 size_t *start) 1403 { 1404 struct page **p; 1405 1406 if (maxsize > i->count) 1407 maxsize = i->count; 1408 1409 if (unlikely(iov_iter_is_pipe(i))) 1410 return pipe_get_pages_alloc(i, pages, maxsize, start); 1411 if (unlikely(iov_iter_is_discard(i))) 1412 return -EFAULT; 1413 1414 iterate_all_kinds(i, maxsize, v, ({ 1415 unsigned long addr = (unsigned long)v.iov_base; 1416 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1417 int n; 1418 int res; 1419 1420 addr &= ~(PAGE_SIZE - 1); 1421 n = DIV_ROUND_UP(len, PAGE_SIZE); 1422 p = get_pages_array(n); 1423 if (!p) 1424 return -ENOMEM; 1425 res = get_user_pages_fast(addr, n, 1426 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); 1427 if (unlikely(res < 0)) { 1428 kvfree(p); 1429 return res; 1430 } 1431 *pages = p; 1432 return (res == n ? len : res * PAGE_SIZE) - *start; 1433 0;}),({ 1434 /* can't be more than PAGE_SIZE */ 1435 *start = v.bv_offset; 1436 *pages = p = get_pages_array(1); 1437 if (!p) 1438 return -ENOMEM; 1439 get_page(*p = v.bv_page); 1440 return v.bv_len; 1441 }),({ 1442 return -EFAULT; 1443 }) 1444 ) 1445 return 0; 1446 } 1447 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1448 1449 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1450 struct iov_iter *i) 1451 { 1452 char *to = addr; 1453 __wsum sum, next; 1454 size_t off = 0; 1455 sum = *csum; 1456 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1457 WARN_ON(1); 1458 return 0; 1459 } 1460 iterate_and_advance(i, bytes, v, ({ 1461 next = csum_and_copy_from_user(v.iov_base, 1462 (to += v.iov_len) - v.iov_len, 1463 v.iov_len); 1464 if (next) { 1465 sum = csum_block_add(sum, next, off); 1466 off += v.iov_len; 1467 } 1468 next ? 0 : v.iov_len; 1469 }), ({ 1470 char *p = kmap_atomic(v.bv_page); 1471 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1472 p + v.bv_offset, v.bv_len, 1473 sum, off); 1474 kunmap_atomic(p); 1475 off += v.bv_len; 1476 }),({ 1477 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1478 v.iov_base, v.iov_len, 1479 sum, off); 1480 off += v.iov_len; 1481 }) 1482 ) 1483 *csum = sum; 1484 return bytes; 1485 } 1486 EXPORT_SYMBOL(csum_and_copy_from_iter); 1487 1488 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1489 struct iov_iter *i) 1490 { 1491 char *to = addr; 1492 __wsum sum, next; 1493 size_t off = 0; 1494 sum = *csum; 1495 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1496 WARN_ON(1); 1497 return false; 1498 } 1499 if (unlikely(i->count < bytes)) 1500 return false; 1501 iterate_all_kinds(i, bytes, v, ({ 1502 next = csum_and_copy_from_user(v.iov_base, 1503 (to += v.iov_len) - v.iov_len, 1504 v.iov_len); 1505 if (!next) 1506 return false; 1507 sum = csum_block_add(sum, next, off); 1508 off += v.iov_len; 1509 0; 1510 }), ({ 1511 char *p = kmap_atomic(v.bv_page); 1512 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1513 p + v.bv_offset, v.bv_len, 1514 sum, off); 1515 kunmap_atomic(p); 1516 off += v.bv_len; 1517 }),({ 1518 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1519 v.iov_base, v.iov_len, 1520 sum, off); 1521 off += v.iov_len; 1522 }) 1523 ) 1524 *csum = sum; 1525 iov_iter_advance(i, bytes); 1526 return true; 1527 } 1528 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1529 1530 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate, 1531 struct iov_iter *i) 1532 { 1533 struct csum_state *csstate = _csstate; 1534 const char *from = addr; 1535 __wsum sum, next; 1536 size_t off; 1537 1538 if (unlikely(iov_iter_is_pipe(i))) 1539 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i); 1540 1541 sum = csstate->csum; 1542 off = csstate->off; 1543 if (unlikely(iov_iter_is_discard(i))) { 1544 WARN_ON(1); /* for now */ 1545 return 0; 1546 } 1547 iterate_and_advance(i, bytes, v, ({ 1548 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1549 v.iov_base, 1550 v.iov_len); 1551 if (next) { 1552 sum = csum_block_add(sum, next, off); 1553 off += v.iov_len; 1554 } 1555 next ? 0 : v.iov_len; 1556 }), ({ 1557 char *p = kmap_atomic(v.bv_page); 1558 sum = csum_and_memcpy(p + v.bv_offset, 1559 (from += v.bv_len) - v.bv_len, 1560 v.bv_len, sum, off); 1561 kunmap_atomic(p); 1562 off += v.bv_len; 1563 }),({ 1564 sum = csum_and_memcpy(v.iov_base, 1565 (from += v.iov_len) - v.iov_len, 1566 v.iov_len, sum, off); 1567 off += v.iov_len; 1568 }) 1569 ) 1570 csstate->csum = sum; 1571 csstate->off = off; 1572 return bytes; 1573 } 1574 EXPORT_SYMBOL(csum_and_copy_to_iter); 1575 1576 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, 1577 struct iov_iter *i) 1578 { 1579 #ifdef CONFIG_CRYPTO_HASH 1580 struct ahash_request *hash = hashp; 1581 struct scatterlist sg; 1582 size_t copied; 1583 1584 copied = copy_to_iter(addr, bytes, i); 1585 sg_init_one(&sg, addr, copied); 1586 ahash_request_set_crypt(hash, &sg, NULL, copied); 1587 crypto_ahash_update(hash); 1588 return copied; 1589 #else 1590 return 0; 1591 #endif 1592 } 1593 EXPORT_SYMBOL(hash_and_copy_to_iter); 1594 1595 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1596 { 1597 size_t size = i->count; 1598 int npages = 0; 1599 1600 if (!size) 1601 return 0; 1602 if (unlikely(iov_iter_is_discard(i))) 1603 return 0; 1604 1605 if (unlikely(iov_iter_is_pipe(i))) { 1606 struct pipe_inode_info *pipe = i->pipe; 1607 unsigned int iter_head; 1608 size_t off; 1609 1610 if (!sanity(i)) 1611 return 0; 1612 1613 data_start(i, &iter_head, &off); 1614 /* some of this one + all after this one */ 1615 npages = pipe_space_for_user(iter_head, pipe->tail, pipe); 1616 if (npages >= maxpages) 1617 return maxpages; 1618 } else iterate_all_kinds(i, size, v, ({ 1619 unsigned long p = (unsigned long)v.iov_base; 1620 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1621 - p / PAGE_SIZE; 1622 if (npages >= maxpages) 1623 return maxpages; 1624 0;}),({ 1625 npages++; 1626 if (npages >= maxpages) 1627 return maxpages; 1628 }),({ 1629 unsigned long p = (unsigned long)v.iov_base; 1630 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1631 - p / PAGE_SIZE; 1632 if (npages >= maxpages) 1633 return maxpages; 1634 }) 1635 ) 1636 return npages; 1637 } 1638 EXPORT_SYMBOL(iov_iter_npages); 1639 1640 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1641 { 1642 *new = *old; 1643 if (unlikely(iov_iter_is_pipe(new))) { 1644 WARN_ON(1); 1645 return NULL; 1646 } 1647 if (unlikely(iov_iter_is_discard(new))) 1648 return NULL; 1649 if (iov_iter_is_bvec(new)) 1650 return new->bvec = kmemdup(new->bvec, 1651 new->nr_segs * sizeof(struct bio_vec), 1652 flags); 1653 else 1654 /* iovec and kvec have identical layout */ 1655 return new->iov = kmemdup(new->iov, 1656 new->nr_segs * sizeof(struct iovec), 1657 flags); 1658 } 1659 EXPORT_SYMBOL(dup_iter); 1660 1661 static int copy_compat_iovec_from_user(struct iovec *iov, 1662 const struct iovec __user *uvec, unsigned long nr_segs) 1663 { 1664 const struct compat_iovec __user *uiov = 1665 (const struct compat_iovec __user *)uvec; 1666 int ret = -EFAULT, i; 1667 1668 if (!user_access_begin(uiov, nr_segs * sizeof(*uiov))) 1669 return -EFAULT; 1670 1671 for (i = 0; i < nr_segs; i++) { 1672 compat_uptr_t buf; 1673 compat_ssize_t len; 1674 1675 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); 1676 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); 1677 1678 /* check for compat_size_t not fitting in compat_ssize_t .. */ 1679 if (len < 0) { 1680 ret = -EINVAL; 1681 goto uaccess_end; 1682 } 1683 iov[i].iov_base = compat_ptr(buf); 1684 iov[i].iov_len = len; 1685 } 1686 1687 ret = 0; 1688 uaccess_end: 1689 user_access_end(); 1690 return ret; 1691 } 1692 1693 static int copy_iovec_from_user(struct iovec *iov, 1694 const struct iovec __user *uvec, unsigned long nr_segs) 1695 { 1696 unsigned long seg; 1697 1698 if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) 1699 return -EFAULT; 1700 for (seg = 0; seg < nr_segs; seg++) { 1701 if ((ssize_t)iov[seg].iov_len < 0) 1702 return -EINVAL; 1703 } 1704 1705 return 0; 1706 } 1707 1708 struct iovec *iovec_from_user(const struct iovec __user *uvec, 1709 unsigned long nr_segs, unsigned long fast_segs, 1710 struct iovec *fast_iov, bool compat) 1711 { 1712 struct iovec *iov = fast_iov; 1713 int ret; 1714 1715 /* 1716 * SuS says "The readv() function *may* fail if the iovcnt argument was 1717 * less than or equal to 0, or greater than {IOV_MAX}. Linux has 1718 * traditionally returned zero for zero segments, so... 1719 */ 1720 if (nr_segs == 0) 1721 return iov; 1722 if (nr_segs > UIO_MAXIOV) 1723 return ERR_PTR(-EINVAL); 1724 if (nr_segs > fast_segs) { 1725 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); 1726 if (!iov) 1727 return ERR_PTR(-ENOMEM); 1728 } 1729 1730 if (compat) 1731 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); 1732 else 1733 ret = copy_iovec_from_user(iov, uvec, nr_segs); 1734 if (ret) { 1735 if (iov != fast_iov) 1736 kfree(iov); 1737 return ERR_PTR(ret); 1738 } 1739 1740 return iov; 1741 } 1742 1743 ssize_t __import_iovec(int type, const struct iovec __user *uvec, 1744 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, 1745 struct iov_iter *i, bool compat) 1746 { 1747 ssize_t total_len = 0; 1748 unsigned long seg; 1749 struct iovec *iov; 1750 1751 iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); 1752 if (IS_ERR(iov)) { 1753 *iovp = NULL; 1754 return PTR_ERR(iov); 1755 } 1756 1757 /* 1758 * According to the Single Unix Specification we should return EINVAL if 1759 * an element length is < 0 when cast to ssize_t or if the total length 1760 * would overflow the ssize_t return value of the system call. 1761 * 1762 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 1763 * overflow case. 1764 */ 1765 for (seg = 0; seg < nr_segs; seg++) { 1766 ssize_t len = (ssize_t)iov[seg].iov_len; 1767 1768 if (!access_ok(iov[seg].iov_base, len)) { 1769 if (iov != *iovp) 1770 kfree(iov); 1771 *iovp = NULL; 1772 return -EFAULT; 1773 } 1774 1775 if (len > MAX_RW_COUNT - total_len) { 1776 len = MAX_RW_COUNT - total_len; 1777 iov[seg].iov_len = len; 1778 } 1779 total_len += len; 1780 } 1781 1782 iov_iter_init(i, type, iov, nr_segs, total_len); 1783 if (iov == *iovp) 1784 *iovp = NULL; 1785 else 1786 *iovp = iov; 1787 return total_len; 1788 } 1789 1790 /** 1791 * import_iovec() - Copy an array of &struct iovec from userspace 1792 * into the kernel, check that it is valid, and initialize a new 1793 * &struct iov_iter iterator to access it. 1794 * 1795 * @type: One of %READ or %WRITE. 1796 * @uvec: Pointer to the userspace array. 1797 * @nr_segs: Number of elements in userspace array. 1798 * @fast_segs: Number of elements in @iov. 1799 * @iovp: (input and output parameter) Pointer to pointer to (usually small 1800 * on-stack) kernel array. 1801 * @i: Pointer to iterator that will be initialized on success. 1802 * 1803 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1804 * then this function places %NULL in *@iov on return. Otherwise, a new 1805 * array will be allocated and the result placed in *@iov. This means that 1806 * the caller may call kfree() on *@iov regardless of whether the small 1807 * on-stack array was used or not (and regardless of whether this function 1808 * returns an error or not). 1809 * 1810 * Return: Negative error code on error, bytes imported on success 1811 */ 1812 ssize_t import_iovec(int type, const struct iovec __user *uvec, 1813 unsigned nr_segs, unsigned fast_segs, 1814 struct iovec **iovp, struct iov_iter *i) 1815 { 1816 return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, 1817 in_compat_syscall()); 1818 } 1819 EXPORT_SYMBOL(import_iovec); 1820 1821 int import_single_range(int rw, void __user *buf, size_t len, 1822 struct iovec *iov, struct iov_iter *i) 1823 { 1824 if (len > MAX_RW_COUNT) 1825 len = MAX_RW_COUNT; 1826 if (unlikely(!access_ok(buf, len))) 1827 return -EFAULT; 1828 1829 iov->iov_base = buf; 1830 iov->iov_len = len; 1831 iov_iter_init(i, rw, iov, 1, len); 1832 return 0; 1833 } 1834 EXPORT_SYMBOL(import_single_range); 1835 1836 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, 1837 int (*f)(struct kvec *vec, void *context), 1838 void *context) 1839 { 1840 struct kvec w; 1841 int err = -EINVAL; 1842 if (!bytes) 1843 return 0; 1844 1845 iterate_all_kinds(i, bytes, v, -EINVAL, ({ 1846 w.iov_base = kmap(v.bv_page) + v.bv_offset; 1847 w.iov_len = v.bv_len; 1848 err = f(&w, context); 1849 kunmap(v.bv_page); 1850 err;}), ({ 1851 w = v; 1852 err = f(&w, context);}) 1853 ) 1854 return err; 1855 } 1856 EXPORT_SYMBOL(iov_iter_for_each_range); 1857