1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <crypto/hash.h> 3 #include <linux/export.h> 4 #include <linux/bvec.h> 5 #include <linux/fault-inject-usercopy.h> 6 #include <linux/uio.h> 7 #include <linux/pagemap.h> 8 #include <linux/slab.h> 9 #include <linux/vmalloc.h> 10 #include <linux/splice.h> 11 #include <linux/compat.h> 12 #include <net/checksum.h> 13 #include <linux/scatterlist.h> 14 #include <linux/instrumented.h> 15 16 #define PIPE_PARANOIA /* for now */ 17 18 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 19 size_t left; \ 20 size_t wanted = n; \ 21 __p = i->iov; \ 22 __v.iov_len = min(n, __p->iov_len - skip); \ 23 if (likely(__v.iov_len)) { \ 24 __v.iov_base = __p->iov_base + skip; \ 25 left = (STEP); \ 26 __v.iov_len -= left; \ 27 skip += __v.iov_len; \ 28 n -= __v.iov_len; \ 29 } else { \ 30 left = 0; \ 31 } \ 32 while (unlikely(!left && n)) { \ 33 __p++; \ 34 __v.iov_len = min(n, __p->iov_len); \ 35 if (unlikely(!__v.iov_len)) \ 36 continue; \ 37 __v.iov_base = __p->iov_base; \ 38 left = (STEP); \ 39 __v.iov_len -= left; \ 40 skip = __v.iov_len; \ 41 n -= __v.iov_len; \ 42 } \ 43 n = wanted - n; \ 44 } 45 46 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 47 size_t wanted = n; \ 48 __p = i->kvec; \ 49 __v.iov_len = min(n, __p->iov_len - skip); \ 50 if (likely(__v.iov_len)) { \ 51 __v.iov_base = __p->iov_base + skip; \ 52 (void)(STEP); \ 53 skip += __v.iov_len; \ 54 n -= __v.iov_len; \ 55 } \ 56 while (unlikely(n)) { \ 57 __p++; \ 58 __v.iov_len = min(n, __p->iov_len); \ 59 if (unlikely(!__v.iov_len)) \ 60 continue; \ 61 __v.iov_base = __p->iov_base; \ 62 (void)(STEP); \ 63 skip = __v.iov_len; \ 64 n -= __v.iov_len; \ 65 } \ 66 n = wanted; \ 67 } 68 69 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 70 struct bvec_iter __start; \ 71 __start.bi_size = n; \ 72 __start.bi_bvec_done = skip; \ 73 __start.bi_idx = 0; \ 74 for_each_bvec(__v, i->bvec, __bi, __start) { \ 75 if (!__v.bv_len) \ 76 continue; \ 77 (void)(STEP); \ 78 } \ 79 } 80 81 #define iterate_all_kinds(i, n, v, I, B, K) { \ 82 if (likely(n)) { \ 83 size_t skip = i->iov_offset; \ 84 if (unlikely(i->type & ITER_BVEC)) { \ 85 struct bio_vec v; \ 86 struct bvec_iter __bi; \ 87 iterate_bvec(i, n, v, __bi, skip, (B)) \ 88 } else if (unlikely(i->type & ITER_KVEC)) { \ 89 const struct kvec *kvec; \ 90 struct kvec v; \ 91 iterate_kvec(i, n, v, kvec, skip, (K)) \ 92 } else if (unlikely(i->type & ITER_DISCARD)) { \ 93 } else { \ 94 const struct iovec *iov; \ 95 struct iovec v; \ 96 iterate_iovec(i, n, v, iov, skip, (I)) \ 97 } \ 98 } \ 99 } 100 101 #define iterate_and_advance(i, n, v, I, B, K) { \ 102 if (unlikely(i->count < n)) \ 103 n = i->count; \ 104 if (i->count) { \ 105 size_t skip = i->iov_offset; \ 106 if (unlikely(i->type & ITER_BVEC)) { \ 107 const struct bio_vec *bvec = i->bvec; \ 108 struct bio_vec v; \ 109 struct bvec_iter __bi; \ 110 iterate_bvec(i, n, v, __bi, skip, (B)) \ 111 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 112 i->nr_segs -= i->bvec - bvec; \ 113 skip = __bi.bi_bvec_done; \ 114 } else if (unlikely(i->type & ITER_KVEC)) { \ 115 const struct kvec *kvec; \ 116 struct kvec v; \ 117 iterate_kvec(i, n, v, kvec, skip, (K)) \ 118 if (skip == kvec->iov_len) { \ 119 kvec++; \ 120 skip = 0; \ 121 } \ 122 i->nr_segs -= kvec - i->kvec; \ 123 i->kvec = kvec; \ 124 } else if (unlikely(i->type & ITER_DISCARD)) { \ 125 skip += n; \ 126 } else { \ 127 const struct iovec *iov; \ 128 struct iovec v; \ 129 iterate_iovec(i, n, v, iov, skip, (I)) \ 130 if (skip == iov->iov_len) { \ 131 iov++; \ 132 skip = 0; \ 133 } \ 134 i->nr_segs -= iov - i->iov; \ 135 i->iov = iov; \ 136 } \ 137 i->count -= n; \ 138 i->iov_offset = skip; \ 139 } \ 140 } 141 142 static int copyout(void __user *to, const void *from, size_t n) 143 { 144 if (should_fail_usercopy()) 145 return n; 146 if (access_ok(to, n)) { 147 instrument_copy_to_user(to, from, n); 148 n = raw_copy_to_user(to, from, n); 149 } 150 return n; 151 } 152 153 static int copyin(void *to, const void __user *from, size_t n) 154 { 155 if (should_fail_usercopy()) 156 return n; 157 if (access_ok(from, n)) { 158 instrument_copy_from_user(to, from, n); 159 n = raw_copy_from_user(to, from, n); 160 } 161 return n; 162 } 163 164 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 165 struct iov_iter *i) 166 { 167 size_t skip, copy, left, wanted; 168 const struct iovec *iov; 169 char __user *buf; 170 void *kaddr, *from; 171 172 if (unlikely(bytes > i->count)) 173 bytes = i->count; 174 175 if (unlikely(!bytes)) 176 return 0; 177 178 might_fault(); 179 wanted = bytes; 180 iov = i->iov; 181 skip = i->iov_offset; 182 buf = iov->iov_base + skip; 183 copy = min(bytes, iov->iov_len - skip); 184 185 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 186 kaddr = kmap_atomic(page); 187 from = kaddr + offset; 188 189 /* first chunk, usually the only one */ 190 left = copyout(buf, from, copy); 191 copy -= left; 192 skip += copy; 193 from += copy; 194 bytes -= copy; 195 196 while (unlikely(!left && bytes)) { 197 iov++; 198 buf = iov->iov_base; 199 copy = min(bytes, iov->iov_len); 200 left = copyout(buf, from, copy); 201 copy -= left; 202 skip = copy; 203 from += copy; 204 bytes -= copy; 205 } 206 if (likely(!bytes)) { 207 kunmap_atomic(kaddr); 208 goto done; 209 } 210 offset = from - kaddr; 211 buf += copy; 212 kunmap_atomic(kaddr); 213 copy = min(bytes, iov->iov_len - skip); 214 } 215 /* Too bad - revert to non-atomic kmap */ 216 217 kaddr = kmap(page); 218 from = kaddr + offset; 219 left = copyout(buf, from, copy); 220 copy -= left; 221 skip += copy; 222 from += copy; 223 bytes -= copy; 224 while (unlikely(!left && bytes)) { 225 iov++; 226 buf = iov->iov_base; 227 copy = min(bytes, iov->iov_len); 228 left = copyout(buf, from, copy); 229 copy -= left; 230 skip = copy; 231 from += copy; 232 bytes -= copy; 233 } 234 kunmap(page); 235 236 done: 237 if (skip == iov->iov_len) { 238 iov++; 239 skip = 0; 240 } 241 i->count -= wanted - bytes; 242 i->nr_segs -= iov - i->iov; 243 i->iov = iov; 244 i->iov_offset = skip; 245 return wanted - bytes; 246 } 247 248 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 249 struct iov_iter *i) 250 { 251 size_t skip, copy, left, wanted; 252 const struct iovec *iov; 253 char __user *buf; 254 void *kaddr, *to; 255 256 if (unlikely(bytes > i->count)) 257 bytes = i->count; 258 259 if (unlikely(!bytes)) 260 return 0; 261 262 might_fault(); 263 wanted = bytes; 264 iov = i->iov; 265 skip = i->iov_offset; 266 buf = iov->iov_base + skip; 267 copy = min(bytes, iov->iov_len - skip); 268 269 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 270 kaddr = kmap_atomic(page); 271 to = kaddr + offset; 272 273 /* first chunk, usually the only one */ 274 left = copyin(to, buf, copy); 275 copy -= left; 276 skip += copy; 277 to += copy; 278 bytes -= copy; 279 280 while (unlikely(!left && bytes)) { 281 iov++; 282 buf = iov->iov_base; 283 copy = min(bytes, iov->iov_len); 284 left = copyin(to, buf, copy); 285 copy -= left; 286 skip = copy; 287 to += copy; 288 bytes -= copy; 289 } 290 if (likely(!bytes)) { 291 kunmap_atomic(kaddr); 292 goto done; 293 } 294 offset = to - kaddr; 295 buf += copy; 296 kunmap_atomic(kaddr); 297 copy = min(bytes, iov->iov_len - skip); 298 } 299 /* Too bad - revert to non-atomic kmap */ 300 301 kaddr = kmap(page); 302 to = kaddr + offset; 303 left = copyin(to, buf, copy); 304 copy -= left; 305 skip += copy; 306 to += copy; 307 bytes -= copy; 308 while (unlikely(!left && bytes)) { 309 iov++; 310 buf = iov->iov_base; 311 copy = min(bytes, iov->iov_len); 312 left = copyin(to, buf, copy); 313 copy -= left; 314 skip = copy; 315 to += copy; 316 bytes -= copy; 317 } 318 kunmap(page); 319 320 done: 321 if (skip == iov->iov_len) { 322 iov++; 323 skip = 0; 324 } 325 i->count -= wanted - bytes; 326 i->nr_segs -= iov - i->iov; 327 i->iov = iov; 328 i->iov_offset = skip; 329 return wanted - bytes; 330 } 331 332 #ifdef PIPE_PARANOIA 333 static bool sanity(const struct iov_iter *i) 334 { 335 struct pipe_inode_info *pipe = i->pipe; 336 unsigned int p_head = pipe->head; 337 unsigned int p_tail = pipe->tail; 338 unsigned int p_mask = pipe->ring_size - 1; 339 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); 340 unsigned int i_head = i->head; 341 unsigned int idx; 342 343 if (i->iov_offset) { 344 struct pipe_buffer *p; 345 if (unlikely(p_occupancy == 0)) 346 goto Bad; // pipe must be non-empty 347 if (unlikely(i_head != p_head - 1)) 348 goto Bad; // must be at the last buffer... 349 350 p = &pipe->bufs[i_head & p_mask]; 351 if (unlikely(p->offset + p->len != i->iov_offset)) 352 goto Bad; // ... at the end of segment 353 } else { 354 if (i_head != p_head) 355 goto Bad; // must be right after the last buffer 356 } 357 return true; 358 Bad: 359 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); 360 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", 361 p_head, p_tail, pipe->ring_size); 362 for (idx = 0; idx < pipe->ring_size; idx++) 363 printk(KERN_ERR "[%p %p %d %d]\n", 364 pipe->bufs[idx].ops, 365 pipe->bufs[idx].page, 366 pipe->bufs[idx].offset, 367 pipe->bufs[idx].len); 368 WARN_ON(1); 369 return false; 370 } 371 #else 372 #define sanity(i) true 373 #endif 374 375 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 376 struct iov_iter *i) 377 { 378 struct pipe_inode_info *pipe = i->pipe; 379 struct pipe_buffer *buf; 380 unsigned int p_tail = pipe->tail; 381 unsigned int p_mask = pipe->ring_size - 1; 382 unsigned int i_head = i->head; 383 size_t off; 384 385 if (unlikely(bytes > i->count)) 386 bytes = i->count; 387 388 if (unlikely(!bytes)) 389 return 0; 390 391 if (!sanity(i)) 392 return 0; 393 394 off = i->iov_offset; 395 buf = &pipe->bufs[i_head & p_mask]; 396 if (off) { 397 if (offset == off && buf->page == page) { 398 /* merge with the last one */ 399 buf->len += bytes; 400 i->iov_offset += bytes; 401 goto out; 402 } 403 i_head++; 404 buf = &pipe->bufs[i_head & p_mask]; 405 } 406 if (pipe_full(i_head, p_tail, pipe->max_usage)) 407 return 0; 408 409 buf->ops = &page_cache_pipe_buf_ops; 410 get_page(page); 411 buf->page = page; 412 buf->offset = offset; 413 buf->len = bytes; 414 415 pipe->head = i_head + 1; 416 i->iov_offset = offset + bytes; 417 i->head = i_head; 418 out: 419 i->count -= bytes; 420 return bytes; 421 } 422 423 /* 424 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 425 * bytes. For each iovec, fault in each page that constitutes the iovec. 426 * 427 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 428 * because it is an invalid address). 429 */ 430 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 431 { 432 size_t skip = i->iov_offset; 433 const struct iovec *iov; 434 int err; 435 struct iovec v; 436 437 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 438 iterate_iovec(i, bytes, v, iov, skip, ({ 439 err = fault_in_pages_readable(v.iov_base, v.iov_len); 440 if (unlikely(err)) 441 return err; 442 0;})) 443 } 444 return 0; 445 } 446 EXPORT_SYMBOL(iov_iter_fault_in_readable); 447 448 void iov_iter_init(struct iov_iter *i, unsigned int direction, 449 const struct iovec *iov, unsigned long nr_segs, 450 size_t count) 451 { 452 WARN_ON(direction & ~(READ | WRITE)); 453 direction &= READ | WRITE; 454 455 /* It will get better. Eventually... */ 456 if (uaccess_kernel()) { 457 i->type = ITER_KVEC | direction; 458 i->kvec = (struct kvec *)iov; 459 } else { 460 i->type = ITER_IOVEC | direction; 461 i->iov = iov; 462 } 463 i->nr_segs = nr_segs; 464 i->iov_offset = 0; 465 i->count = count; 466 } 467 EXPORT_SYMBOL(iov_iter_init); 468 469 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 470 { 471 char *from = kmap_atomic(page); 472 memcpy(to, from + offset, len); 473 kunmap_atomic(from); 474 } 475 476 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 477 { 478 char *to = kmap_atomic(page); 479 memcpy(to + offset, from, len); 480 kunmap_atomic(to); 481 } 482 483 static void memzero_page(struct page *page, size_t offset, size_t len) 484 { 485 char *addr = kmap_atomic(page); 486 memset(addr + offset, 0, len); 487 kunmap_atomic(addr); 488 } 489 490 static inline bool allocated(struct pipe_buffer *buf) 491 { 492 return buf->ops == &default_pipe_buf_ops; 493 } 494 495 static inline void data_start(const struct iov_iter *i, 496 unsigned int *iter_headp, size_t *offp) 497 { 498 unsigned int p_mask = i->pipe->ring_size - 1; 499 unsigned int iter_head = i->head; 500 size_t off = i->iov_offset; 501 502 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || 503 off == PAGE_SIZE)) { 504 iter_head++; 505 off = 0; 506 } 507 *iter_headp = iter_head; 508 *offp = off; 509 } 510 511 static size_t push_pipe(struct iov_iter *i, size_t size, 512 int *iter_headp, size_t *offp) 513 { 514 struct pipe_inode_info *pipe = i->pipe; 515 unsigned int p_tail = pipe->tail; 516 unsigned int p_mask = pipe->ring_size - 1; 517 unsigned int iter_head; 518 size_t off; 519 ssize_t left; 520 521 if (unlikely(size > i->count)) 522 size = i->count; 523 if (unlikely(!size)) 524 return 0; 525 526 left = size; 527 data_start(i, &iter_head, &off); 528 *iter_headp = iter_head; 529 *offp = off; 530 if (off) { 531 left -= PAGE_SIZE - off; 532 if (left <= 0) { 533 pipe->bufs[iter_head & p_mask].len += size; 534 return size; 535 } 536 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; 537 iter_head++; 538 } 539 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { 540 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; 541 struct page *page = alloc_page(GFP_USER); 542 if (!page) 543 break; 544 545 buf->ops = &default_pipe_buf_ops; 546 buf->page = page; 547 buf->offset = 0; 548 buf->len = min_t(ssize_t, left, PAGE_SIZE); 549 left -= buf->len; 550 iter_head++; 551 pipe->head = iter_head; 552 553 if (left == 0) 554 return size; 555 } 556 return size - left; 557 } 558 559 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 560 struct iov_iter *i) 561 { 562 struct pipe_inode_info *pipe = i->pipe; 563 unsigned int p_mask = pipe->ring_size - 1; 564 unsigned int i_head; 565 size_t n, off; 566 567 if (!sanity(i)) 568 return 0; 569 570 bytes = n = push_pipe(i, bytes, &i_head, &off); 571 if (unlikely(!n)) 572 return 0; 573 do { 574 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 575 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); 576 i->head = i_head; 577 i->iov_offset = off + chunk; 578 n -= chunk; 579 addr += chunk; 580 off = 0; 581 i_head++; 582 } while (n); 583 i->count -= bytes; 584 return bytes; 585 } 586 587 static __wsum csum_and_memcpy(void *to, const void *from, size_t len, 588 __wsum sum, size_t off) 589 { 590 __wsum next = csum_partial_copy_nocheck(from, to, len); 591 return csum_block_add(sum, next, off); 592 } 593 594 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 595 __wsum *csum, struct iov_iter *i) 596 { 597 struct pipe_inode_info *pipe = i->pipe; 598 unsigned int p_mask = pipe->ring_size - 1; 599 unsigned int i_head; 600 size_t n, r; 601 size_t off = 0; 602 __wsum sum = *csum; 603 604 if (!sanity(i)) 605 return 0; 606 607 bytes = n = push_pipe(i, bytes, &i_head, &r); 608 if (unlikely(!n)) 609 return 0; 610 do { 611 size_t chunk = min_t(size_t, n, PAGE_SIZE - r); 612 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); 613 sum = csum_and_memcpy(p + r, addr, chunk, sum, off); 614 kunmap_atomic(p); 615 i->head = i_head; 616 i->iov_offset = r + chunk; 617 n -= chunk; 618 off += chunk; 619 addr += chunk; 620 r = 0; 621 i_head++; 622 } while (n); 623 i->count -= bytes; 624 *csum = sum; 625 return bytes; 626 } 627 628 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 629 { 630 const char *from = addr; 631 if (unlikely(iov_iter_is_pipe(i))) 632 return copy_pipe_to_iter(addr, bytes, i); 633 if (iter_is_iovec(i)) 634 might_fault(); 635 iterate_and_advance(i, bytes, v, 636 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 637 memcpy_to_page(v.bv_page, v.bv_offset, 638 (from += v.bv_len) - v.bv_len, v.bv_len), 639 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 640 ) 641 642 return bytes; 643 } 644 EXPORT_SYMBOL(_copy_to_iter); 645 646 #ifdef CONFIG_ARCH_HAS_COPY_MC 647 static int copyout_mc(void __user *to, const void *from, size_t n) 648 { 649 if (access_ok(to, n)) { 650 instrument_copy_to_user(to, from, n); 651 n = copy_mc_to_user((__force void *) to, from, n); 652 } 653 return n; 654 } 655 656 static unsigned long copy_mc_to_page(struct page *page, size_t offset, 657 const char *from, size_t len) 658 { 659 unsigned long ret; 660 char *to; 661 662 to = kmap_atomic(page); 663 ret = copy_mc_to_kernel(to + offset, from, len); 664 kunmap_atomic(to); 665 666 return ret; 667 } 668 669 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes, 670 struct iov_iter *i) 671 { 672 struct pipe_inode_info *pipe = i->pipe; 673 unsigned int p_mask = pipe->ring_size - 1; 674 unsigned int i_head; 675 size_t n, off, xfer = 0; 676 677 if (!sanity(i)) 678 return 0; 679 680 bytes = n = push_pipe(i, bytes, &i_head, &off); 681 if (unlikely(!n)) 682 return 0; 683 do { 684 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 685 unsigned long rem; 686 687 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page, 688 off, addr, chunk); 689 i->head = i_head; 690 i->iov_offset = off + chunk - rem; 691 xfer += chunk - rem; 692 if (rem) 693 break; 694 n -= chunk; 695 addr += chunk; 696 off = 0; 697 i_head++; 698 } while (n); 699 i->count -= xfer; 700 return xfer; 701 } 702 703 /** 704 * _copy_mc_to_iter - copy to iter with source memory error exception handling 705 * @addr: source kernel address 706 * @bytes: total transfer length 707 * @iter: destination iterator 708 * 709 * The pmem driver deploys this for the dax operation 710 * (dax_copy_to_iter()) for dax reads (bypass page-cache and the 711 * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes 712 * successfully copied. 713 * 714 * The main differences between this and typical _copy_to_iter(). 715 * 716 * * Typical tail/residue handling after a fault retries the copy 717 * byte-by-byte until the fault happens again. Re-triggering machine 718 * checks is potentially fatal so the implementation uses source 719 * alignment and poison alignment assumptions to avoid re-triggering 720 * hardware exceptions. 721 * 722 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. 723 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return 724 * a short copy. 725 */ 726 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 727 { 728 const char *from = addr; 729 unsigned long rem, curr_addr, s_addr = (unsigned long) addr; 730 731 if (unlikely(iov_iter_is_pipe(i))) 732 return copy_mc_pipe_to_iter(addr, bytes, i); 733 if (iter_is_iovec(i)) 734 might_fault(); 735 iterate_and_advance(i, bytes, v, 736 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len, 737 v.iov_len), 738 ({ 739 rem = copy_mc_to_page(v.bv_page, v.bv_offset, 740 (from += v.bv_len) - v.bv_len, v.bv_len); 741 if (rem) { 742 curr_addr = (unsigned long) from; 743 bytes = curr_addr - s_addr - rem; 744 return bytes; 745 } 746 }), 747 ({ 748 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len) 749 - v.iov_len, v.iov_len); 750 if (rem) { 751 curr_addr = (unsigned long) from; 752 bytes = curr_addr - s_addr - rem; 753 return bytes; 754 } 755 }) 756 ) 757 758 return bytes; 759 } 760 EXPORT_SYMBOL_GPL(_copy_mc_to_iter); 761 #endif /* CONFIG_ARCH_HAS_COPY_MC */ 762 763 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 764 { 765 char *to = addr; 766 if (unlikely(iov_iter_is_pipe(i))) { 767 WARN_ON(1); 768 return 0; 769 } 770 if (iter_is_iovec(i)) 771 might_fault(); 772 iterate_and_advance(i, bytes, v, 773 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 774 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 775 v.bv_offset, v.bv_len), 776 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 777 ) 778 779 return bytes; 780 } 781 EXPORT_SYMBOL(_copy_from_iter); 782 783 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 784 { 785 char *to = addr; 786 if (unlikely(iov_iter_is_pipe(i))) { 787 WARN_ON(1); 788 return false; 789 } 790 if (unlikely(i->count < bytes)) 791 return false; 792 793 if (iter_is_iovec(i)) 794 might_fault(); 795 iterate_all_kinds(i, bytes, v, ({ 796 if (copyin((to += v.iov_len) - v.iov_len, 797 v.iov_base, v.iov_len)) 798 return false; 799 0;}), 800 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 801 v.bv_offset, v.bv_len), 802 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 803 ) 804 805 iov_iter_advance(i, bytes); 806 return true; 807 } 808 EXPORT_SYMBOL(_copy_from_iter_full); 809 810 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 811 { 812 char *to = addr; 813 if (unlikely(iov_iter_is_pipe(i))) { 814 WARN_ON(1); 815 return 0; 816 } 817 iterate_and_advance(i, bytes, v, 818 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 819 v.iov_base, v.iov_len), 820 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 821 v.bv_offset, v.bv_len), 822 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 823 ) 824 825 return bytes; 826 } 827 EXPORT_SYMBOL(_copy_from_iter_nocache); 828 829 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 830 /** 831 * _copy_from_iter_flushcache - write destination through cpu cache 832 * @addr: destination kernel address 833 * @bytes: total transfer length 834 * @iter: source iterator 835 * 836 * The pmem driver arranges for filesystem-dax to use this facility via 837 * dax_copy_from_iter() for ensuring that writes to persistent memory 838 * are flushed through the CPU cache. It is differentiated from 839 * _copy_from_iter_nocache() in that guarantees all data is flushed for 840 * all iterator types. The _copy_from_iter_nocache() only attempts to 841 * bypass the cache for the ITER_IOVEC case, and on some archs may use 842 * instructions that strand dirty-data in the cache. 843 */ 844 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 845 { 846 char *to = addr; 847 if (unlikely(iov_iter_is_pipe(i))) { 848 WARN_ON(1); 849 return 0; 850 } 851 iterate_and_advance(i, bytes, v, 852 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 853 v.iov_base, v.iov_len), 854 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 855 v.bv_offset, v.bv_len), 856 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 857 v.iov_len) 858 ) 859 860 return bytes; 861 } 862 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); 863 #endif 864 865 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 866 { 867 char *to = addr; 868 if (unlikely(iov_iter_is_pipe(i))) { 869 WARN_ON(1); 870 return false; 871 } 872 if (unlikely(i->count < bytes)) 873 return false; 874 iterate_all_kinds(i, bytes, v, ({ 875 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 876 v.iov_base, v.iov_len)) 877 return false; 878 0;}), 879 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 880 v.bv_offset, v.bv_len), 881 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 882 ) 883 884 iov_iter_advance(i, bytes); 885 return true; 886 } 887 EXPORT_SYMBOL(_copy_from_iter_full_nocache); 888 889 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) 890 { 891 struct page *head; 892 size_t v = n + offset; 893 894 /* 895 * The general case needs to access the page order in order 896 * to compute the page size. 897 * However, we mostly deal with order-0 pages and thus can 898 * avoid a possible cache line miss for requests that fit all 899 * page orders. 900 */ 901 if (n <= v && v <= PAGE_SIZE) 902 return true; 903 904 head = compound_head(page); 905 v += (page - head) << PAGE_SHIFT; 906 907 if (likely(n <= v && v <= (page_size(head)))) 908 return true; 909 WARN_ON(1); 910 return false; 911 } 912 913 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 914 struct iov_iter *i) 915 { 916 if (unlikely(!page_copy_sane(page, offset, bytes))) 917 return 0; 918 if (i->type & (ITER_BVEC|ITER_KVEC)) { 919 void *kaddr = kmap_atomic(page); 920 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 921 kunmap_atomic(kaddr); 922 return wanted; 923 } else if (unlikely(iov_iter_is_discard(i))) 924 return bytes; 925 else if (likely(!iov_iter_is_pipe(i))) 926 return copy_page_to_iter_iovec(page, offset, bytes, i); 927 else 928 return copy_page_to_iter_pipe(page, offset, bytes, i); 929 } 930 EXPORT_SYMBOL(copy_page_to_iter); 931 932 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 933 struct iov_iter *i) 934 { 935 if (unlikely(!page_copy_sane(page, offset, bytes))) 936 return 0; 937 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 938 WARN_ON(1); 939 return 0; 940 } 941 if (i->type & (ITER_BVEC|ITER_KVEC)) { 942 void *kaddr = kmap_atomic(page); 943 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 944 kunmap_atomic(kaddr); 945 return wanted; 946 } else 947 return copy_page_from_iter_iovec(page, offset, bytes, i); 948 } 949 EXPORT_SYMBOL(copy_page_from_iter); 950 951 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 952 { 953 struct pipe_inode_info *pipe = i->pipe; 954 unsigned int p_mask = pipe->ring_size - 1; 955 unsigned int i_head; 956 size_t n, off; 957 958 if (!sanity(i)) 959 return 0; 960 961 bytes = n = push_pipe(i, bytes, &i_head, &off); 962 if (unlikely(!n)) 963 return 0; 964 965 do { 966 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 967 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); 968 i->head = i_head; 969 i->iov_offset = off + chunk; 970 n -= chunk; 971 off = 0; 972 i_head++; 973 } while (n); 974 i->count -= bytes; 975 return bytes; 976 } 977 978 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 979 { 980 if (unlikely(iov_iter_is_pipe(i))) 981 return pipe_zero(bytes, i); 982 iterate_and_advance(i, bytes, v, 983 clear_user(v.iov_base, v.iov_len), 984 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 985 memset(v.iov_base, 0, v.iov_len) 986 ) 987 988 return bytes; 989 } 990 EXPORT_SYMBOL(iov_iter_zero); 991 992 size_t iov_iter_copy_from_user_atomic(struct page *page, 993 struct iov_iter *i, unsigned long offset, size_t bytes) 994 { 995 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 996 if (unlikely(!page_copy_sane(page, offset, bytes))) { 997 kunmap_atomic(kaddr); 998 return 0; 999 } 1000 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1001 kunmap_atomic(kaddr); 1002 WARN_ON(1); 1003 return 0; 1004 } 1005 iterate_all_kinds(i, bytes, v, 1006 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 1007 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 1008 v.bv_offset, v.bv_len), 1009 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 1010 ) 1011 kunmap_atomic(kaddr); 1012 return bytes; 1013 } 1014 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 1015 1016 static inline void pipe_truncate(struct iov_iter *i) 1017 { 1018 struct pipe_inode_info *pipe = i->pipe; 1019 unsigned int p_tail = pipe->tail; 1020 unsigned int p_head = pipe->head; 1021 unsigned int p_mask = pipe->ring_size - 1; 1022 1023 if (!pipe_empty(p_head, p_tail)) { 1024 struct pipe_buffer *buf; 1025 unsigned int i_head = i->head; 1026 size_t off = i->iov_offset; 1027 1028 if (off) { 1029 buf = &pipe->bufs[i_head & p_mask]; 1030 buf->len = off - buf->offset; 1031 i_head++; 1032 } 1033 while (p_head != i_head) { 1034 p_head--; 1035 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); 1036 } 1037 1038 pipe->head = p_head; 1039 } 1040 } 1041 1042 static void pipe_advance(struct iov_iter *i, size_t size) 1043 { 1044 struct pipe_inode_info *pipe = i->pipe; 1045 if (unlikely(i->count < size)) 1046 size = i->count; 1047 if (size) { 1048 struct pipe_buffer *buf; 1049 unsigned int p_mask = pipe->ring_size - 1; 1050 unsigned int i_head = i->head; 1051 size_t off = i->iov_offset, left = size; 1052 1053 if (off) /* make it relative to the beginning of buffer */ 1054 left += off - pipe->bufs[i_head & p_mask].offset; 1055 while (1) { 1056 buf = &pipe->bufs[i_head & p_mask]; 1057 if (left <= buf->len) 1058 break; 1059 left -= buf->len; 1060 i_head++; 1061 } 1062 i->head = i_head; 1063 i->iov_offset = buf->offset + left; 1064 } 1065 i->count -= size; 1066 /* ... and discard everything past that point */ 1067 pipe_truncate(i); 1068 } 1069 1070 void iov_iter_advance(struct iov_iter *i, size_t size) 1071 { 1072 if (unlikely(iov_iter_is_pipe(i))) { 1073 pipe_advance(i, size); 1074 return; 1075 } 1076 if (unlikely(iov_iter_is_discard(i))) { 1077 i->count -= size; 1078 return; 1079 } 1080 iterate_and_advance(i, size, v, 0, 0, 0) 1081 } 1082 EXPORT_SYMBOL(iov_iter_advance); 1083 1084 void iov_iter_revert(struct iov_iter *i, size_t unroll) 1085 { 1086 if (!unroll) 1087 return; 1088 if (WARN_ON(unroll > MAX_RW_COUNT)) 1089 return; 1090 i->count += unroll; 1091 if (unlikely(iov_iter_is_pipe(i))) { 1092 struct pipe_inode_info *pipe = i->pipe; 1093 unsigned int p_mask = pipe->ring_size - 1; 1094 unsigned int i_head = i->head; 1095 size_t off = i->iov_offset; 1096 while (1) { 1097 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; 1098 size_t n = off - b->offset; 1099 if (unroll < n) { 1100 off -= unroll; 1101 break; 1102 } 1103 unroll -= n; 1104 if (!unroll && i_head == i->start_head) { 1105 off = 0; 1106 break; 1107 } 1108 i_head--; 1109 b = &pipe->bufs[i_head & p_mask]; 1110 off = b->offset + b->len; 1111 } 1112 i->iov_offset = off; 1113 i->head = i_head; 1114 pipe_truncate(i); 1115 return; 1116 } 1117 if (unlikely(iov_iter_is_discard(i))) 1118 return; 1119 if (unroll <= i->iov_offset) { 1120 i->iov_offset -= unroll; 1121 return; 1122 } 1123 unroll -= i->iov_offset; 1124 if (iov_iter_is_bvec(i)) { 1125 const struct bio_vec *bvec = i->bvec; 1126 while (1) { 1127 size_t n = (--bvec)->bv_len; 1128 i->nr_segs++; 1129 if (unroll <= n) { 1130 i->bvec = bvec; 1131 i->iov_offset = n - unroll; 1132 return; 1133 } 1134 unroll -= n; 1135 } 1136 } else { /* same logics for iovec and kvec */ 1137 const struct iovec *iov = i->iov; 1138 while (1) { 1139 size_t n = (--iov)->iov_len; 1140 i->nr_segs++; 1141 if (unroll <= n) { 1142 i->iov = iov; 1143 i->iov_offset = n - unroll; 1144 return; 1145 } 1146 unroll -= n; 1147 } 1148 } 1149 } 1150 EXPORT_SYMBOL(iov_iter_revert); 1151 1152 /* 1153 * Return the count of just the current iov_iter segment. 1154 */ 1155 size_t iov_iter_single_seg_count(const struct iov_iter *i) 1156 { 1157 if (unlikely(iov_iter_is_pipe(i))) 1158 return i->count; // it is a silly place, anyway 1159 if (i->nr_segs == 1) 1160 return i->count; 1161 if (unlikely(iov_iter_is_discard(i))) 1162 return i->count; 1163 else if (iov_iter_is_bvec(i)) 1164 return min(i->count, i->bvec->bv_len - i->iov_offset); 1165 else 1166 return min(i->count, i->iov->iov_len - i->iov_offset); 1167 } 1168 EXPORT_SYMBOL(iov_iter_single_seg_count); 1169 1170 void iov_iter_kvec(struct iov_iter *i, unsigned int direction, 1171 const struct kvec *kvec, unsigned long nr_segs, 1172 size_t count) 1173 { 1174 WARN_ON(direction & ~(READ | WRITE)); 1175 i->type = ITER_KVEC | (direction & (READ | WRITE)); 1176 i->kvec = kvec; 1177 i->nr_segs = nr_segs; 1178 i->iov_offset = 0; 1179 i->count = count; 1180 } 1181 EXPORT_SYMBOL(iov_iter_kvec); 1182 1183 void iov_iter_bvec(struct iov_iter *i, unsigned int direction, 1184 const struct bio_vec *bvec, unsigned long nr_segs, 1185 size_t count) 1186 { 1187 WARN_ON(direction & ~(READ | WRITE)); 1188 i->type = ITER_BVEC | (direction & (READ | WRITE)); 1189 i->bvec = bvec; 1190 i->nr_segs = nr_segs; 1191 i->iov_offset = 0; 1192 i->count = count; 1193 } 1194 EXPORT_SYMBOL(iov_iter_bvec); 1195 1196 void iov_iter_pipe(struct iov_iter *i, unsigned int direction, 1197 struct pipe_inode_info *pipe, 1198 size_t count) 1199 { 1200 BUG_ON(direction != READ); 1201 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); 1202 i->type = ITER_PIPE | READ; 1203 i->pipe = pipe; 1204 i->head = pipe->head; 1205 i->iov_offset = 0; 1206 i->count = count; 1207 i->start_head = i->head; 1208 } 1209 EXPORT_SYMBOL(iov_iter_pipe); 1210 1211 /** 1212 * iov_iter_discard - Initialise an I/O iterator that discards data 1213 * @i: The iterator to initialise. 1214 * @direction: The direction of the transfer. 1215 * @count: The size of the I/O buffer in bytes. 1216 * 1217 * Set up an I/O iterator that just discards everything that's written to it. 1218 * It's only available as a READ iterator. 1219 */ 1220 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) 1221 { 1222 BUG_ON(direction != READ); 1223 i->type = ITER_DISCARD | READ; 1224 i->count = count; 1225 i->iov_offset = 0; 1226 } 1227 EXPORT_SYMBOL(iov_iter_discard); 1228 1229 unsigned long iov_iter_alignment(const struct iov_iter *i) 1230 { 1231 unsigned long res = 0; 1232 size_t size = i->count; 1233 1234 if (unlikely(iov_iter_is_pipe(i))) { 1235 unsigned int p_mask = i->pipe->ring_size - 1; 1236 1237 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) 1238 return size | i->iov_offset; 1239 return size; 1240 } 1241 iterate_all_kinds(i, size, v, 1242 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 1243 res |= v.bv_offset | v.bv_len, 1244 res |= (unsigned long)v.iov_base | v.iov_len 1245 ) 1246 return res; 1247 } 1248 EXPORT_SYMBOL(iov_iter_alignment); 1249 1250 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 1251 { 1252 unsigned long res = 0; 1253 size_t size = i->count; 1254 1255 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1256 WARN_ON(1); 1257 return ~0U; 1258 } 1259 1260 iterate_all_kinds(i, size, v, 1261 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1262 (size != v.iov_len ? size : 0), 0), 1263 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 1264 (size != v.bv_len ? size : 0)), 1265 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1266 (size != v.iov_len ? size : 0)) 1267 ); 1268 return res; 1269 } 1270 EXPORT_SYMBOL(iov_iter_gap_alignment); 1271 1272 static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1273 size_t maxsize, 1274 struct page **pages, 1275 int iter_head, 1276 size_t *start) 1277 { 1278 struct pipe_inode_info *pipe = i->pipe; 1279 unsigned int p_mask = pipe->ring_size - 1; 1280 ssize_t n = push_pipe(i, maxsize, &iter_head, start); 1281 if (!n) 1282 return -EFAULT; 1283 1284 maxsize = n; 1285 n += *start; 1286 while (n > 0) { 1287 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); 1288 iter_head++; 1289 n -= PAGE_SIZE; 1290 } 1291 1292 return maxsize; 1293 } 1294 1295 static ssize_t pipe_get_pages(struct iov_iter *i, 1296 struct page **pages, size_t maxsize, unsigned maxpages, 1297 size_t *start) 1298 { 1299 unsigned int iter_head, npages; 1300 size_t capacity; 1301 1302 if (!maxsize) 1303 return 0; 1304 1305 if (!sanity(i)) 1306 return -EFAULT; 1307 1308 data_start(i, &iter_head, start); 1309 /* Amount of free space: some of this one + all after this one */ 1310 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1311 capacity = min(npages, maxpages) * PAGE_SIZE - *start; 1312 1313 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); 1314 } 1315 1316 ssize_t iov_iter_get_pages(struct iov_iter *i, 1317 struct page **pages, size_t maxsize, unsigned maxpages, 1318 size_t *start) 1319 { 1320 if (maxsize > i->count) 1321 maxsize = i->count; 1322 1323 if (unlikely(iov_iter_is_pipe(i))) 1324 return pipe_get_pages(i, pages, maxsize, maxpages, start); 1325 if (unlikely(iov_iter_is_discard(i))) 1326 return -EFAULT; 1327 1328 iterate_all_kinds(i, maxsize, v, ({ 1329 unsigned long addr = (unsigned long)v.iov_base; 1330 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1331 int n; 1332 int res; 1333 1334 if (len > maxpages * PAGE_SIZE) 1335 len = maxpages * PAGE_SIZE; 1336 addr &= ~(PAGE_SIZE - 1); 1337 n = DIV_ROUND_UP(len, PAGE_SIZE); 1338 res = get_user_pages_fast(addr, n, 1339 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, 1340 pages); 1341 if (unlikely(res < 0)) 1342 return res; 1343 return (res == n ? len : res * PAGE_SIZE) - *start; 1344 0;}),({ 1345 /* can't be more than PAGE_SIZE */ 1346 *start = v.bv_offset; 1347 get_page(*pages = v.bv_page); 1348 return v.bv_len; 1349 }),({ 1350 return -EFAULT; 1351 }) 1352 ) 1353 return 0; 1354 } 1355 EXPORT_SYMBOL(iov_iter_get_pages); 1356 1357 static struct page **get_pages_array(size_t n) 1358 { 1359 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1360 } 1361 1362 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1363 struct page ***pages, size_t maxsize, 1364 size_t *start) 1365 { 1366 struct page **p; 1367 unsigned int iter_head, npages; 1368 ssize_t n; 1369 1370 if (!maxsize) 1371 return 0; 1372 1373 if (!sanity(i)) 1374 return -EFAULT; 1375 1376 data_start(i, &iter_head, start); 1377 /* Amount of free space: some of this one + all after this one */ 1378 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1379 n = npages * PAGE_SIZE - *start; 1380 if (maxsize > n) 1381 maxsize = n; 1382 else 1383 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1384 p = get_pages_array(npages); 1385 if (!p) 1386 return -ENOMEM; 1387 n = __pipe_get_pages(i, maxsize, p, iter_head, start); 1388 if (n > 0) 1389 *pages = p; 1390 else 1391 kvfree(p); 1392 return n; 1393 } 1394 1395 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1396 struct page ***pages, size_t maxsize, 1397 size_t *start) 1398 { 1399 struct page **p; 1400 1401 if (maxsize > i->count) 1402 maxsize = i->count; 1403 1404 if (unlikely(iov_iter_is_pipe(i))) 1405 return pipe_get_pages_alloc(i, pages, maxsize, start); 1406 if (unlikely(iov_iter_is_discard(i))) 1407 return -EFAULT; 1408 1409 iterate_all_kinds(i, maxsize, v, ({ 1410 unsigned long addr = (unsigned long)v.iov_base; 1411 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1412 int n; 1413 int res; 1414 1415 addr &= ~(PAGE_SIZE - 1); 1416 n = DIV_ROUND_UP(len, PAGE_SIZE); 1417 p = get_pages_array(n); 1418 if (!p) 1419 return -ENOMEM; 1420 res = get_user_pages_fast(addr, n, 1421 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); 1422 if (unlikely(res < 0)) { 1423 kvfree(p); 1424 return res; 1425 } 1426 *pages = p; 1427 return (res == n ? len : res * PAGE_SIZE) - *start; 1428 0;}),({ 1429 /* can't be more than PAGE_SIZE */ 1430 *start = v.bv_offset; 1431 *pages = p = get_pages_array(1); 1432 if (!p) 1433 return -ENOMEM; 1434 get_page(*p = v.bv_page); 1435 return v.bv_len; 1436 }),({ 1437 return -EFAULT; 1438 }) 1439 ) 1440 return 0; 1441 } 1442 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1443 1444 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1445 struct iov_iter *i) 1446 { 1447 char *to = addr; 1448 __wsum sum, next; 1449 size_t off = 0; 1450 sum = *csum; 1451 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1452 WARN_ON(1); 1453 return 0; 1454 } 1455 iterate_and_advance(i, bytes, v, ({ 1456 next = csum_and_copy_from_user(v.iov_base, 1457 (to += v.iov_len) - v.iov_len, 1458 v.iov_len); 1459 if (next) { 1460 sum = csum_block_add(sum, next, off); 1461 off += v.iov_len; 1462 } 1463 next ? 0 : v.iov_len; 1464 }), ({ 1465 char *p = kmap_atomic(v.bv_page); 1466 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1467 p + v.bv_offset, v.bv_len, 1468 sum, off); 1469 kunmap_atomic(p); 1470 off += v.bv_len; 1471 }),({ 1472 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1473 v.iov_base, v.iov_len, 1474 sum, off); 1475 off += v.iov_len; 1476 }) 1477 ) 1478 *csum = sum; 1479 return bytes; 1480 } 1481 EXPORT_SYMBOL(csum_and_copy_from_iter); 1482 1483 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1484 struct iov_iter *i) 1485 { 1486 char *to = addr; 1487 __wsum sum, next; 1488 size_t off = 0; 1489 sum = *csum; 1490 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1491 WARN_ON(1); 1492 return false; 1493 } 1494 if (unlikely(i->count < bytes)) 1495 return false; 1496 iterate_all_kinds(i, bytes, v, ({ 1497 next = csum_and_copy_from_user(v.iov_base, 1498 (to += v.iov_len) - v.iov_len, 1499 v.iov_len); 1500 if (!next) 1501 return false; 1502 sum = csum_block_add(sum, next, off); 1503 off += v.iov_len; 1504 0; 1505 }), ({ 1506 char *p = kmap_atomic(v.bv_page); 1507 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1508 p + v.bv_offset, v.bv_len, 1509 sum, off); 1510 kunmap_atomic(p); 1511 off += v.bv_len; 1512 }),({ 1513 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1514 v.iov_base, v.iov_len, 1515 sum, off); 1516 off += v.iov_len; 1517 }) 1518 ) 1519 *csum = sum; 1520 iov_iter_advance(i, bytes); 1521 return true; 1522 } 1523 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1524 1525 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, 1526 struct iov_iter *i) 1527 { 1528 const char *from = addr; 1529 __wsum *csum = csump; 1530 __wsum sum, next; 1531 size_t off = 0; 1532 1533 if (unlikely(iov_iter_is_pipe(i))) 1534 return csum_and_copy_to_pipe_iter(addr, bytes, csum, i); 1535 1536 sum = *csum; 1537 if (unlikely(iov_iter_is_discard(i))) { 1538 WARN_ON(1); /* for now */ 1539 return 0; 1540 } 1541 iterate_and_advance(i, bytes, v, ({ 1542 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1543 v.iov_base, 1544 v.iov_len); 1545 if (next) { 1546 sum = csum_block_add(sum, next, off); 1547 off += v.iov_len; 1548 } 1549 next ? 0 : v.iov_len; 1550 }), ({ 1551 char *p = kmap_atomic(v.bv_page); 1552 sum = csum_and_memcpy(p + v.bv_offset, 1553 (from += v.bv_len) - v.bv_len, 1554 v.bv_len, sum, off); 1555 kunmap_atomic(p); 1556 off += v.bv_len; 1557 }),({ 1558 sum = csum_and_memcpy(v.iov_base, 1559 (from += v.iov_len) - v.iov_len, 1560 v.iov_len, sum, off); 1561 off += v.iov_len; 1562 }) 1563 ) 1564 *csum = sum; 1565 return bytes; 1566 } 1567 EXPORT_SYMBOL(csum_and_copy_to_iter); 1568 1569 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, 1570 struct iov_iter *i) 1571 { 1572 #ifdef CONFIG_CRYPTO_HASH 1573 struct ahash_request *hash = hashp; 1574 struct scatterlist sg; 1575 size_t copied; 1576 1577 copied = copy_to_iter(addr, bytes, i); 1578 sg_init_one(&sg, addr, copied); 1579 ahash_request_set_crypt(hash, &sg, NULL, copied); 1580 crypto_ahash_update(hash); 1581 return copied; 1582 #else 1583 return 0; 1584 #endif 1585 } 1586 EXPORT_SYMBOL(hash_and_copy_to_iter); 1587 1588 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1589 { 1590 size_t size = i->count; 1591 int npages = 0; 1592 1593 if (!size) 1594 return 0; 1595 if (unlikely(iov_iter_is_discard(i))) 1596 return 0; 1597 1598 if (unlikely(iov_iter_is_pipe(i))) { 1599 struct pipe_inode_info *pipe = i->pipe; 1600 unsigned int iter_head; 1601 size_t off; 1602 1603 if (!sanity(i)) 1604 return 0; 1605 1606 data_start(i, &iter_head, &off); 1607 /* some of this one + all after this one */ 1608 npages = pipe_space_for_user(iter_head, pipe->tail, pipe); 1609 if (npages >= maxpages) 1610 return maxpages; 1611 } else iterate_all_kinds(i, size, v, ({ 1612 unsigned long p = (unsigned long)v.iov_base; 1613 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1614 - p / PAGE_SIZE; 1615 if (npages >= maxpages) 1616 return maxpages; 1617 0;}),({ 1618 npages++; 1619 if (npages >= maxpages) 1620 return maxpages; 1621 }),({ 1622 unsigned long p = (unsigned long)v.iov_base; 1623 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1624 - p / PAGE_SIZE; 1625 if (npages >= maxpages) 1626 return maxpages; 1627 }) 1628 ) 1629 return npages; 1630 } 1631 EXPORT_SYMBOL(iov_iter_npages); 1632 1633 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1634 { 1635 *new = *old; 1636 if (unlikely(iov_iter_is_pipe(new))) { 1637 WARN_ON(1); 1638 return NULL; 1639 } 1640 if (unlikely(iov_iter_is_discard(new))) 1641 return NULL; 1642 if (iov_iter_is_bvec(new)) 1643 return new->bvec = kmemdup(new->bvec, 1644 new->nr_segs * sizeof(struct bio_vec), 1645 flags); 1646 else 1647 /* iovec and kvec have identical layout */ 1648 return new->iov = kmemdup(new->iov, 1649 new->nr_segs * sizeof(struct iovec), 1650 flags); 1651 } 1652 EXPORT_SYMBOL(dup_iter); 1653 1654 static int copy_compat_iovec_from_user(struct iovec *iov, 1655 const struct iovec __user *uvec, unsigned long nr_segs) 1656 { 1657 const struct compat_iovec __user *uiov = 1658 (const struct compat_iovec __user *)uvec; 1659 int ret = -EFAULT, i; 1660 1661 if (!user_access_begin(uvec, nr_segs * sizeof(*uvec))) 1662 return -EFAULT; 1663 1664 for (i = 0; i < nr_segs; i++) { 1665 compat_uptr_t buf; 1666 compat_ssize_t len; 1667 1668 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end); 1669 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end); 1670 1671 /* check for compat_size_t not fitting in compat_ssize_t .. */ 1672 if (len < 0) { 1673 ret = -EINVAL; 1674 goto uaccess_end; 1675 } 1676 iov[i].iov_base = compat_ptr(buf); 1677 iov[i].iov_len = len; 1678 } 1679 1680 ret = 0; 1681 uaccess_end: 1682 user_access_end(); 1683 return ret; 1684 } 1685 1686 static int copy_iovec_from_user(struct iovec *iov, 1687 const struct iovec __user *uvec, unsigned long nr_segs) 1688 { 1689 unsigned long seg; 1690 1691 if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec))) 1692 return -EFAULT; 1693 for (seg = 0; seg < nr_segs; seg++) { 1694 if ((ssize_t)iov[seg].iov_len < 0) 1695 return -EINVAL; 1696 } 1697 1698 return 0; 1699 } 1700 1701 struct iovec *iovec_from_user(const struct iovec __user *uvec, 1702 unsigned long nr_segs, unsigned long fast_segs, 1703 struct iovec *fast_iov, bool compat) 1704 { 1705 struct iovec *iov = fast_iov; 1706 int ret; 1707 1708 /* 1709 * SuS says "The readv() function *may* fail if the iovcnt argument was 1710 * less than or equal to 0, or greater than {IOV_MAX}. Linux has 1711 * traditionally returned zero for zero segments, so... 1712 */ 1713 if (nr_segs == 0) 1714 return iov; 1715 if (nr_segs > UIO_MAXIOV) 1716 return ERR_PTR(-EINVAL); 1717 if (nr_segs > fast_segs) { 1718 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL); 1719 if (!iov) 1720 return ERR_PTR(-ENOMEM); 1721 } 1722 1723 if (compat) 1724 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs); 1725 else 1726 ret = copy_iovec_from_user(iov, uvec, nr_segs); 1727 if (ret) { 1728 if (iov != fast_iov) 1729 kfree(iov); 1730 return ERR_PTR(ret); 1731 } 1732 1733 return iov; 1734 } 1735 1736 ssize_t __import_iovec(int type, const struct iovec __user *uvec, 1737 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp, 1738 struct iov_iter *i, bool compat) 1739 { 1740 ssize_t total_len = 0; 1741 unsigned long seg; 1742 struct iovec *iov; 1743 1744 iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat); 1745 if (IS_ERR(iov)) { 1746 *iovp = NULL; 1747 return PTR_ERR(iov); 1748 } 1749 1750 /* 1751 * According to the Single Unix Specification we should return EINVAL if 1752 * an element length is < 0 when cast to ssize_t or if the total length 1753 * would overflow the ssize_t return value of the system call. 1754 * 1755 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 1756 * overflow case. 1757 */ 1758 for (seg = 0; seg < nr_segs; seg++) { 1759 ssize_t len = (ssize_t)iov[seg].iov_len; 1760 1761 if (!access_ok(iov[seg].iov_base, len)) { 1762 if (iov != *iovp) 1763 kfree(iov); 1764 *iovp = NULL; 1765 return -EFAULT; 1766 } 1767 1768 if (len > MAX_RW_COUNT - total_len) { 1769 len = MAX_RW_COUNT - total_len; 1770 iov[seg].iov_len = len; 1771 } 1772 total_len += len; 1773 } 1774 1775 iov_iter_init(i, type, iov, nr_segs, total_len); 1776 if (iov == *iovp) 1777 *iovp = NULL; 1778 else 1779 *iovp = iov; 1780 return total_len; 1781 } 1782 1783 /** 1784 * import_iovec() - Copy an array of &struct iovec from userspace 1785 * into the kernel, check that it is valid, and initialize a new 1786 * &struct iov_iter iterator to access it. 1787 * 1788 * @type: One of %READ or %WRITE. 1789 * @uvec: Pointer to the userspace array. 1790 * @nr_segs: Number of elements in userspace array. 1791 * @fast_segs: Number of elements in @iov. 1792 * @iovp: (input and output parameter) Pointer to pointer to (usually small 1793 * on-stack) kernel array. 1794 * @i: Pointer to iterator that will be initialized on success. 1795 * 1796 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1797 * then this function places %NULL in *@iov on return. Otherwise, a new 1798 * array will be allocated and the result placed in *@iov. This means that 1799 * the caller may call kfree() on *@iov regardless of whether the small 1800 * on-stack array was used or not (and regardless of whether this function 1801 * returns an error or not). 1802 * 1803 * Return: Negative error code on error, bytes imported on success 1804 */ 1805 ssize_t import_iovec(int type, const struct iovec __user *uvec, 1806 unsigned nr_segs, unsigned fast_segs, 1807 struct iovec **iovp, struct iov_iter *i) 1808 { 1809 return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, 1810 in_compat_syscall()); 1811 } 1812 EXPORT_SYMBOL(import_iovec); 1813 1814 int import_single_range(int rw, void __user *buf, size_t len, 1815 struct iovec *iov, struct iov_iter *i) 1816 { 1817 if (len > MAX_RW_COUNT) 1818 len = MAX_RW_COUNT; 1819 if (unlikely(!access_ok(buf, len))) 1820 return -EFAULT; 1821 1822 iov->iov_base = buf; 1823 iov->iov_len = len; 1824 iov_iter_init(i, rw, iov, 1, len); 1825 return 0; 1826 } 1827 EXPORT_SYMBOL(import_single_range); 1828 1829 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, 1830 int (*f)(struct kvec *vec, void *context), 1831 void *context) 1832 { 1833 struct kvec w; 1834 int err = -EINVAL; 1835 if (!bytes) 1836 return 0; 1837 1838 iterate_all_kinds(i, bytes, v, -EINVAL, ({ 1839 w.iov_base = kmap(v.bv_page) + v.bv_offset; 1840 w.iov_len = v.bv_len; 1841 err = f(&w, context); 1842 kunmap(v.bv_page); 1843 err;}), ({ 1844 w = v; 1845 err = f(&w, context);}) 1846 ) 1847 return err; 1848 } 1849 EXPORT_SYMBOL(iov_iter_for_each_range); 1850