1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/export.h> 3 #include <linux/bvec.h> 4 #include <linux/uio.h> 5 #include <linux/pagemap.h> 6 #include <linux/slab.h> 7 #include <linux/vmalloc.h> 8 #include <linux/splice.h> 9 #include <net/checksum.h> 10 #include <linux/scatterlist.h> 11 12 #define PIPE_PARANOIA /* for now */ 13 14 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 15 size_t left; \ 16 size_t wanted = n; \ 17 __p = i->iov; \ 18 __v.iov_len = min(n, __p->iov_len - skip); \ 19 if (likely(__v.iov_len)) { \ 20 __v.iov_base = __p->iov_base + skip; \ 21 left = (STEP); \ 22 __v.iov_len -= left; \ 23 skip += __v.iov_len; \ 24 n -= __v.iov_len; \ 25 } else { \ 26 left = 0; \ 27 } \ 28 while (unlikely(!left && n)) { \ 29 __p++; \ 30 __v.iov_len = min(n, __p->iov_len); \ 31 if (unlikely(!__v.iov_len)) \ 32 continue; \ 33 __v.iov_base = __p->iov_base; \ 34 left = (STEP); \ 35 __v.iov_len -= left; \ 36 skip = __v.iov_len; \ 37 n -= __v.iov_len; \ 38 } \ 39 n = wanted - n; \ 40 } 41 42 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 43 size_t wanted = n; \ 44 __p = i->kvec; \ 45 __v.iov_len = min(n, __p->iov_len - skip); \ 46 if (likely(__v.iov_len)) { \ 47 __v.iov_base = __p->iov_base + skip; \ 48 (void)(STEP); \ 49 skip += __v.iov_len; \ 50 n -= __v.iov_len; \ 51 } \ 52 while (unlikely(n)) { \ 53 __p++; \ 54 __v.iov_len = min(n, __p->iov_len); \ 55 if (unlikely(!__v.iov_len)) \ 56 continue; \ 57 __v.iov_base = __p->iov_base; \ 58 (void)(STEP); \ 59 skip = __v.iov_len; \ 60 n -= __v.iov_len; \ 61 } \ 62 n = wanted; \ 63 } 64 65 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 66 struct bvec_iter __start; \ 67 __start.bi_size = n; \ 68 __start.bi_bvec_done = skip; \ 69 __start.bi_idx = 0; \ 70 for_each_bvec(__v, i->bvec, __bi, __start) { \ 71 if (!__v.bv_len) \ 72 continue; \ 73 (void)(STEP); \ 74 } \ 75 } 76 77 #define iterate_all_kinds(i, n, v, I, B, K) { \ 78 if (likely(n)) { \ 79 size_t skip = i->iov_offset; \ 80 if (unlikely(i->type & ITER_BVEC)) { \ 81 struct bio_vec v; \ 82 struct bvec_iter __bi; \ 83 iterate_bvec(i, n, v, __bi, skip, (B)) \ 84 } else if (unlikely(i->type & ITER_KVEC)) { \ 85 const struct kvec *kvec; \ 86 struct kvec v; \ 87 iterate_kvec(i, n, v, kvec, skip, (K)) \ 88 } else if (unlikely(i->type & ITER_DISCARD)) { \ 89 } else { \ 90 const struct iovec *iov; \ 91 struct iovec v; \ 92 iterate_iovec(i, n, v, iov, skip, (I)) \ 93 } \ 94 } \ 95 } 96 97 #define iterate_and_advance(i, n, v, I, B, K) { \ 98 if (unlikely(i->count < n)) \ 99 n = i->count; \ 100 if (i->count) { \ 101 size_t skip = i->iov_offset; \ 102 if (unlikely(i->type & ITER_BVEC)) { \ 103 const struct bio_vec *bvec = i->bvec; \ 104 struct bio_vec v; \ 105 struct bvec_iter __bi; \ 106 iterate_bvec(i, n, v, __bi, skip, (B)) \ 107 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 108 i->nr_segs -= i->bvec - bvec; \ 109 skip = __bi.bi_bvec_done; \ 110 } else if (unlikely(i->type & ITER_KVEC)) { \ 111 const struct kvec *kvec; \ 112 struct kvec v; \ 113 iterate_kvec(i, n, v, kvec, skip, (K)) \ 114 if (skip == kvec->iov_len) { \ 115 kvec++; \ 116 skip = 0; \ 117 } \ 118 i->nr_segs -= kvec - i->kvec; \ 119 i->kvec = kvec; \ 120 } else if (unlikely(i->type & ITER_DISCARD)) { \ 121 skip += n; \ 122 } else { \ 123 const struct iovec *iov; \ 124 struct iovec v; \ 125 iterate_iovec(i, n, v, iov, skip, (I)) \ 126 if (skip == iov->iov_len) { \ 127 iov++; \ 128 skip = 0; \ 129 } \ 130 i->nr_segs -= iov - i->iov; \ 131 i->iov = iov; \ 132 } \ 133 i->count -= n; \ 134 i->iov_offset = skip; \ 135 } \ 136 } 137 138 static int copyout(void __user *to, const void *from, size_t n) 139 { 140 if (access_ok(to, n)) { 141 kasan_check_read(from, n); 142 n = raw_copy_to_user(to, from, n); 143 } 144 return n; 145 } 146 147 static int copyin(void *to, const void __user *from, size_t n) 148 { 149 if (access_ok(from, n)) { 150 kasan_check_write(to, n); 151 n = raw_copy_from_user(to, from, n); 152 } 153 return n; 154 } 155 156 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 157 struct iov_iter *i) 158 { 159 size_t skip, copy, left, wanted; 160 const struct iovec *iov; 161 char __user *buf; 162 void *kaddr, *from; 163 164 if (unlikely(bytes > i->count)) 165 bytes = i->count; 166 167 if (unlikely(!bytes)) 168 return 0; 169 170 might_fault(); 171 wanted = bytes; 172 iov = i->iov; 173 skip = i->iov_offset; 174 buf = iov->iov_base + skip; 175 copy = min(bytes, iov->iov_len - skip); 176 177 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 178 kaddr = kmap_atomic(page); 179 from = kaddr + offset; 180 181 /* first chunk, usually the only one */ 182 left = copyout(buf, from, copy); 183 copy -= left; 184 skip += copy; 185 from += copy; 186 bytes -= copy; 187 188 while (unlikely(!left && bytes)) { 189 iov++; 190 buf = iov->iov_base; 191 copy = min(bytes, iov->iov_len); 192 left = copyout(buf, from, copy); 193 copy -= left; 194 skip = copy; 195 from += copy; 196 bytes -= copy; 197 } 198 if (likely(!bytes)) { 199 kunmap_atomic(kaddr); 200 goto done; 201 } 202 offset = from - kaddr; 203 buf += copy; 204 kunmap_atomic(kaddr); 205 copy = min(bytes, iov->iov_len - skip); 206 } 207 /* Too bad - revert to non-atomic kmap */ 208 209 kaddr = kmap(page); 210 from = kaddr + offset; 211 left = copyout(buf, from, copy); 212 copy -= left; 213 skip += copy; 214 from += copy; 215 bytes -= copy; 216 while (unlikely(!left && bytes)) { 217 iov++; 218 buf = iov->iov_base; 219 copy = min(bytes, iov->iov_len); 220 left = copyout(buf, from, copy); 221 copy -= left; 222 skip = copy; 223 from += copy; 224 bytes -= copy; 225 } 226 kunmap(page); 227 228 done: 229 if (skip == iov->iov_len) { 230 iov++; 231 skip = 0; 232 } 233 i->count -= wanted - bytes; 234 i->nr_segs -= iov - i->iov; 235 i->iov = iov; 236 i->iov_offset = skip; 237 return wanted - bytes; 238 } 239 240 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 241 struct iov_iter *i) 242 { 243 size_t skip, copy, left, wanted; 244 const struct iovec *iov; 245 char __user *buf; 246 void *kaddr, *to; 247 248 if (unlikely(bytes > i->count)) 249 bytes = i->count; 250 251 if (unlikely(!bytes)) 252 return 0; 253 254 might_fault(); 255 wanted = bytes; 256 iov = i->iov; 257 skip = i->iov_offset; 258 buf = iov->iov_base + skip; 259 copy = min(bytes, iov->iov_len - skip); 260 261 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 262 kaddr = kmap_atomic(page); 263 to = kaddr + offset; 264 265 /* first chunk, usually the only one */ 266 left = copyin(to, buf, copy); 267 copy -= left; 268 skip += copy; 269 to += copy; 270 bytes -= copy; 271 272 while (unlikely(!left && bytes)) { 273 iov++; 274 buf = iov->iov_base; 275 copy = min(bytes, iov->iov_len); 276 left = copyin(to, buf, copy); 277 copy -= left; 278 skip = copy; 279 to += copy; 280 bytes -= copy; 281 } 282 if (likely(!bytes)) { 283 kunmap_atomic(kaddr); 284 goto done; 285 } 286 offset = to - kaddr; 287 buf += copy; 288 kunmap_atomic(kaddr); 289 copy = min(bytes, iov->iov_len - skip); 290 } 291 /* Too bad - revert to non-atomic kmap */ 292 293 kaddr = kmap(page); 294 to = kaddr + offset; 295 left = copyin(to, buf, copy); 296 copy -= left; 297 skip += copy; 298 to += copy; 299 bytes -= copy; 300 while (unlikely(!left && bytes)) { 301 iov++; 302 buf = iov->iov_base; 303 copy = min(bytes, iov->iov_len); 304 left = copyin(to, buf, copy); 305 copy -= left; 306 skip = copy; 307 to += copy; 308 bytes -= copy; 309 } 310 kunmap(page); 311 312 done: 313 if (skip == iov->iov_len) { 314 iov++; 315 skip = 0; 316 } 317 i->count -= wanted - bytes; 318 i->nr_segs -= iov - i->iov; 319 i->iov = iov; 320 i->iov_offset = skip; 321 return wanted - bytes; 322 } 323 324 #ifdef PIPE_PARANOIA 325 static bool sanity(const struct iov_iter *i) 326 { 327 struct pipe_inode_info *pipe = i->pipe; 328 unsigned int p_head = pipe->head; 329 unsigned int p_tail = pipe->tail; 330 unsigned int p_mask = pipe->ring_size - 1; 331 unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); 332 unsigned int i_head = i->head; 333 unsigned int idx; 334 335 if (i->iov_offset) { 336 struct pipe_buffer *p; 337 if (unlikely(p_occupancy == 0)) 338 goto Bad; // pipe must be non-empty 339 if (unlikely(i_head != p_head - 1)) 340 goto Bad; // must be at the last buffer... 341 342 p = &pipe->bufs[i_head & p_mask]; 343 if (unlikely(p->offset + p->len != i->iov_offset)) 344 goto Bad; // ... at the end of segment 345 } else { 346 if (i_head != p_head) 347 goto Bad; // must be right after the last buffer 348 } 349 return true; 350 Bad: 351 printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); 352 printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", 353 p_head, p_tail, pipe->ring_size); 354 for (idx = 0; idx < pipe->ring_size; idx++) 355 printk(KERN_ERR "[%p %p %d %d]\n", 356 pipe->bufs[idx].ops, 357 pipe->bufs[idx].page, 358 pipe->bufs[idx].offset, 359 pipe->bufs[idx].len); 360 WARN_ON(1); 361 return false; 362 } 363 #else 364 #define sanity(i) true 365 #endif 366 367 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 368 struct iov_iter *i) 369 { 370 struct pipe_inode_info *pipe = i->pipe; 371 struct pipe_buffer *buf; 372 unsigned int p_tail = pipe->tail; 373 unsigned int p_mask = pipe->ring_size - 1; 374 unsigned int i_head = i->head; 375 size_t off; 376 377 if (unlikely(bytes > i->count)) 378 bytes = i->count; 379 380 if (unlikely(!bytes)) 381 return 0; 382 383 if (!sanity(i)) 384 return 0; 385 386 off = i->iov_offset; 387 buf = &pipe->bufs[i_head & p_mask]; 388 if (off) { 389 if (offset == off && buf->page == page) { 390 /* merge with the last one */ 391 buf->len += bytes; 392 i->iov_offset += bytes; 393 goto out; 394 } 395 i_head++; 396 buf = &pipe->bufs[i_head & p_mask]; 397 } 398 if (pipe_full(i_head, p_tail, pipe->max_usage)) 399 return 0; 400 401 buf->ops = &page_cache_pipe_buf_ops; 402 get_page(page); 403 buf->page = page; 404 buf->offset = offset; 405 buf->len = bytes; 406 407 pipe->head = i_head + 1; 408 i->iov_offset = offset + bytes; 409 i->head = i_head; 410 out: 411 i->count -= bytes; 412 return bytes; 413 } 414 415 /* 416 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 417 * bytes. For each iovec, fault in each page that constitutes the iovec. 418 * 419 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 420 * because it is an invalid address). 421 */ 422 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 423 { 424 size_t skip = i->iov_offset; 425 const struct iovec *iov; 426 int err; 427 struct iovec v; 428 429 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 430 iterate_iovec(i, bytes, v, iov, skip, ({ 431 err = fault_in_pages_readable(v.iov_base, v.iov_len); 432 if (unlikely(err)) 433 return err; 434 0;})) 435 } 436 return 0; 437 } 438 EXPORT_SYMBOL(iov_iter_fault_in_readable); 439 440 void iov_iter_init(struct iov_iter *i, unsigned int direction, 441 const struct iovec *iov, unsigned long nr_segs, 442 size_t count) 443 { 444 WARN_ON(direction & ~(READ | WRITE)); 445 direction &= READ | WRITE; 446 447 /* It will get better. Eventually... */ 448 if (uaccess_kernel()) { 449 i->type = ITER_KVEC | direction; 450 i->kvec = (struct kvec *)iov; 451 } else { 452 i->type = ITER_IOVEC | direction; 453 i->iov = iov; 454 } 455 i->nr_segs = nr_segs; 456 i->iov_offset = 0; 457 i->count = count; 458 } 459 EXPORT_SYMBOL(iov_iter_init); 460 461 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 462 { 463 char *from = kmap_atomic(page); 464 memcpy(to, from + offset, len); 465 kunmap_atomic(from); 466 } 467 468 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 469 { 470 char *to = kmap_atomic(page); 471 memcpy(to + offset, from, len); 472 kunmap_atomic(to); 473 } 474 475 static void memzero_page(struct page *page, size_t offset, size_t len) 476 { 477 char *addr = kmap_atomic(page); 478 memset(addr + offset, 0, len); 479 kunmap_atomic(addr); 480 } 481 482 static inline bool allocated(struct pipe_buffer *buf) 483 { 484 return buf->ops == &default_pipe_buf_ops; 485 } 486 487 static inline void data_start(const struct iov_iter *i, 488 unsigned int *iter_headp, size_t *offp) 489 { 490 unsigned int p_mask = i->pipe->ring_size - 1; 491 unsigned int iter_head = i->head; 492 size_t off = i->iov_offset; 493 494 if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || 495 off == PAGE_SIZE)) { 496 iter_head++; 497 off = 0; 498 } 499 *iter_headp = iter_head; 500 *offp = off; 501 } 502 503 static size_t push_pipe(struct iov_iter *i, size_t size, 504 int *iter_headp, size_t *offp) 505 { 506 struct pipe_inode_info *pipe = i->pipe; 507 unsigned int p_tail = pipe->tail; 508 unsigned int p_mask = pipe->ring_size - 1; 509 unsigned int iter_head; 510 size_t off; 511 ssize_t left; 512 513 if (unlikely(size > i->count)) 514 size = i->count; 515 if (unlikely(!size)) 516 return 0; 517 518 left = size; 519 data_start(i, &iter_head, &off); 520 *iter_headp = iter_head; 521 *offp = off; 522 if (off) { 523 left -= PAGE_SIZE - off; 524 if (left <= 0) { 525 pipe->bufs[iter_head & p_mask].len += size; 526 return size; 527 } 528 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; 529 iter_head++; 530 } 531 while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { 532 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; 533 struct page *page = alloc_page(GFP_USER); 534 if (!page) 535 break; 536 537 buf->ops = &default_pipe_buf_ops; 538 buf->page = page; 539 buf->offset = 0; 540 buf->len = min_t(ssize_t, left, PAGE_SIZE); 541 left -= buf->len; 542 iter_head++; 543 pipe->head = iter_head; 544 545 if (left == 0) 546 return size; 547 } 548 return size - left; 549 } 550 551 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 552 struct iov_iter *i) 553 { 554 struct pipe_inode_info *pipe = i->pipe; 555 unsigned int p_mask = pipe->ring_size - 1; 556 unsigned int i_head; 557 size_t n, off; 558 559 if (!sanity(i)) 560 return 0; 561 562 bytes = n = push_pipe(i, bytes, &i_head, &off); 563 if (unlikely(!n)) 564 return 0; 565 do { 566 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 567 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); 568 i->head = i_head; 569 i->iov_offset = off + chunk; 570 n -= chunk; 571 addr += chunk; 572 off = 0; 573 i_head++; 574 } while (n); 575 i->count -= bytes; 576 return bytes; 577 } 578 579 static __wsum csum_and_memcpy(void *to, const void *from, size_t len, 580 __wsum sum, size_t off) 581 { 582 __wsum next = csum_partial_copy_nocheck(from, to, len, 0); 583 return csum_block_add(sum, next, off); 584 } 585 586 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, 587 __wsum *csum, struct iov_iter *i) 588 { 589 struct pipe_inode_info *pipe = i->pipe; 590 unsigned int p_mask = pipe->ring_size - 1; 591 unsigned int i_head; 592 size_t n, r; 593 size_t off = 0; 594 __wsum sum = *csum; 595 596 if (!sanity(i)) 597 return 0; 598 599 bytes = n = push_pipe(i, bytes, &i_head, &r); 600 if (unlikely(!n)) 601 return 0; 602 do { 603 size_t chunk = min_t(size_t, n, PAGE_SIZE - r); 604 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); 605 sum = csum_and_memcpy(p + r, addr, chunk, sum, off); 606 kunmap_atomic(p); 607 i->head = i_head; 608 i->iov_offset = r + chunk; 609 n -= chunk; 610 off += chunk; 611 addr += chunk; 612 r = 0; 613 i_head++; 614 } while (n); 615 i->count -= bytes; 616 *csum = sum; 617 return bytes; 618 } 619 620 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 621 { 622 const char *from = addr; 623 if (unlikely(iov_iter_is_pipe(i))) 624 return copy_pipe_to_iter(addr, bytes, i); 625 if (iter_is_iovec(i)) 626 might_fault(); 627 iterate_and_advance(i, bytes, v, 628 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 629 memcpy_to_page(v.bv_page, v.bv_offset, 630 (from += v.bv_len) - v.bv_len, v.bv_len), 631 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 632 ) 633 634 return bytes; 635 } 636 EXPORT_SYMBOL(_copy_to_iter); 637 638 #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE 639 static int copyout_mcsafe(void __user *to, const void *from, size_t n) 640 { 641 if (access_ok(to, n)) { 642 kasan_check_read(from, n); 643 n = copy_to_user_mcsafe((__force void *) to, from, n); 644 } 645 return n; 646 } 647 648 static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, 649 const char *from, size_t len) 650 { 651 unsigned long ret; 652 char *to; 653 654 to = kmap_atomic(page); 655 ret = memcpy_mcsafe(to + offset, from, len); 656 kunmap_atomic(to); 657 658 return ret; 659 } 660 661 static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, 662 struct iov_iter *i) 663 { 664 struct pipe_inode_info *pipe = i->pipe; 665 unsigned int p_mask = pipe->ring_size - 1; 666 unsigned int i_head; 667 size_t n, off, xfer = 0; 668 669 if (!sanity(i)) 670 return 0; 671 672 bytes = n = push_pipe(i, bytes, &i_head, &off); 673 if (unlikely(!n)) 674 return 0; 675 do { 676 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 677 unsigned long rem; 678 679 rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page, 680 off, addr, chunk); 681 i->head = i_head; 682 i->iov_offset = off + chunk - rem; 683 xfer += chunk - rem; 684 if (rem) 685 break; 686 n -= chunk; 687 addr += chunk; 688 off = 0; 689 i_head++; 690 } while (n); 691 i->count -= xfer; 692 return xfer; 693 } 694 695 /** 696 * _copy_to_iter_mcsafe - copy to user with source-read error exception handling 697 * @addr: source kernel address 698 * @bytes: total transfer length 699 * @iter: destination iterator 700 * 701 * The pmem driver arranges for filesystem-dax to use this facility via 702 * dax_copy_to_iter() for protecting read/write to persistent memory. 703 * Unless / until an architecture can guarantee identical performance 704 * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a 705 * performance regression to switch more users to the mcsafe version. 706 * 707 * Otherwise, the main differences between this and typical _copy_to_iter(). 708 * 709 * * Typical tail/residue handling after a fault retries the copy 710 * byte-by-byte until the fault happens again. Re-triggering machine 711 * checks is potentially fatal so the implementation uses source 712 * alignment and poison alignment assumptions to avoid re-triggering 713 * hardware exceptions. 714 * 715 * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. 716 * Compare to copy_to_iter() where only ITER_IOVEC attempts might return 717 * a short copy. 718 * 719 * See MCSAFE_TEST for self-test. 720 */ 721 size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) 722 { 723 const char *from = addr; 724 unsigned long rem, curr_addr, s_addr = (unsigned long) addr; 725 726 if (unlikely(iov_iter_is_pipe(i))) 727 return copy_pipe_to_iter_mcsafe(addr, bytes, i); 728 if (iter_is_iovec(i)) 729 might_fault(); 730 iterate_and_advance(i, bytes, v, 731 copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), 732 ({ 733 rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset, 734 (from += v.bv_len) - v.bv_len, v.bv_len); 735 if (rem) { 736 curr_addr = (unsigned long) from; 737 bytes = curr_addr - s_addr - rem; 738 return bytes; 739 } 740 }), 741 ({ 742 rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, 743 v.iov_len); 744 if (rem) { 745 curr_addr = (unsigned long) from; 746 bytes = curr_addr - s_addr - rem; 747 return bytes; 748 } 749 }) 750 ) 751 752 return bytes; 753 } 754 EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe); 755 #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */ 756 757 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 758 { 759 char *to = addr; 760 if (unlikely(iov_iter_is_pipe(i))) { 761 WARN_ON(1); 762 return 0; 763 } 764 if (iter_is_iovec(i)) 765 might_fault(); 766 iterate_and_advance(i, bytes, v, 767 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 768 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 769 v.bv_offset, v.bv_len), 770 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 771 ) 772 773 return bytes; 774 } 775 EXPORT_SYMBOL(_copy_from_iter); 776 777 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 778 { 779 char *to = addr; 780 if (unlikely(iov_iter_is_pipe(i))) { 781 WARN_ON(1); 782 return false; 783 } 784 if (unlikely(i->count < bytes)) 785 return false; 786 787 if (iter_is_iovec(i)) 788 might_fault(); 789 iterate_all_kinds(i, bytes, v, ({ 790 if (copyin((to += v.iov_len) - v.iov_len, 791 v.iov_base, v.iov_len)) 792 return false; 793 0;}), 794 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 795 v.bv_offset, v.bv_len), 796 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 797 ) 798 799 iov_iter_advance(i, bytes); 800 return true; 801 } 802 EXPORT_SYMBOL(_copy_from_iter_full); 803 804 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 805 { 806 char *to = addr; 807 if (unlikely(iov_iter_is_pipe(i))) { 808 WARN_ON(1); 809 return 0; 810 } 811 iterate_and_advance(i, bytes, v, 812 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 813 v.iov_base, v.iov_len), 814 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 815 v.bv_offset, v.bv_len), 816 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 817 ) 818 819 return bytes; 820 } 821 EXPORT_SYMBOL(_copy_from_iter_nocache); 822 823 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 824 /** 825 * _copy_from_iter_flushcache - write destination through cpu cache 826 * @addr: destination kernel address 827 * @bytes: total transfer length 828 * @iter: source iterator 829 * 830 * The pmem driver arranges for filesystem-dax to use this facility via 831 * dax_copy_from_iter() for ensuring that writes to persistent memory 832 * are flushed through the CPU cache. It is differentiated from 833 * _copy_from_iter_nocache() in that guarantees all data is flushed for 834 * all iterator types. The _copy_from_iter_nocache() only attempts to 835 * bypass the cache for the ITER_IOVEC case, and on some archs may use 836 * instructions that strand dirty-data in the cache. 837 */ 838 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 839 { 840 char *to = addr; 841 if (unlikely(iov_iter_is_pipe(i))) { 842 WARN_ON(1); 843 return 0; 844 } 845 iterate_and_advance(i, bytes, v, 846 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 847 v.iov_base, v.iov_len), 848 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 849 v.bv_offset, v.bv_len), 850 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 851 v.iov_len) 852 ) 853 854 return bytes; 855 } 856 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); 857 #endif 858 859 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 860 { 861 char *to = addr; 862 if (unlikely(iov_iter_is_pipe(i))) { 863 WARN_ON(1); 864 return false; 865 } 866 if (unlikely(i->count < bytes)) 867 return false; 868 iterate_all_kinds(i, bytes, v, ({ 869 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 870 v.iov_base, v.iov_len)) 871 return false; 872 0;}), 873 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 874 v.bv_offset, v.bv_len), 875 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 876 ) 877 878 iov_iter_advance(i, bytes); 879 return true; 880 } 881 EXPORT_SYMBOL(_copy_from_iter_full_nocache); 882 883 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) 884 { 885 struct page *head; 886 size_t v = n + offset; 887 888 /* 889 * The general case needs to access the page order in order 890 * to compute the page size. 891 * However, we mostly deal with order-0 pages and thus can 892 * avoid a possible cache line miss for requests that fit all 893 * page orders. 894 */ 895 if (n <= v && v <= PAGE_SIZE) 896 return true; 897 898 head = compound_head(page); 899 v += (page - head) << PAGE_SHIFT; 900 901 if (likely(n <= v && v <= (page_size(head)))) 902 return true; 903 WARN_ON(1); 904 return false; 905 } 906 907 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 908 struct iov_iter *i) 909 { 910 if (unlikely(!page_copy_sane(page, offset, bytes))) 911 return 0; 912 if (i->type & (ITER_BVEC|ITER_KVEC)) { 913 void *kaddr = kmap_atomic(page); 914 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 915 kunmap_atomic(kaddr); 916 return wanted; 917 } else if (unlikely(iov_iter_is_discard(i))) 918 return bytes; 919 else if (likely(!iov_iter_is_pipe(i))) 920 return copy_page_to_iter_iovec(page, offset, bytes, i); 921 else 922 return copy_page_to_iter_pipe(page, offset, bytes, i); 923 } 924 EXPORT_SYMBOL(copy_page_to_iter); 925 926 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 927 struct iov_iter *i) 928 { 929 if (unlikely(!page_copy_sane(page, offset, bytes))) 930 return 0; 931 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 932 WARN_ON(1); 933 return 0; 934 } 935 if (i->type & (ITER_BVEC|ITER_KVEC)) { 936 void *kaddr = kmap_atomic(page); 937 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); 938 kunmap_atomic(kaddr); 939 return wanted; 940 } else 941 return copy_page_from_iter_iovec(page, offset, bytes, i); 942 } 943 EXPORT_SYMBOL(copy_page_from_iter); 944 945 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 946 { 947 struct pipe_inode_info *pipe = i->pipe; 948 unsigned int p_mask = pipe->ring_size - 1; 949 unsigned int i_head; 950 size_t n, off; 951 952 if (!sanity(i)) 953 return 0; 954 955 bytes = n = push_pipe(i, bytes, &i_head, &off); 956 if (unlikely(!n)) 957 return 0; 958 959 do { 960 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 961 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); 962 i->head = i_head; 963 i->iov_offset = off + chunk; 964 n -= chunk; 965 off = 0; 966 i_head++; 967 } while (n); 968 i->count -= bytes; 969 return bytes; 970 } 971 972 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 973 { 974 if (unlikely(iov_iter_is_pipe(i))) 975 return pipe_zero(bytes, i); 976 iterate_and_advance(i, bytes, v, 977 clear_user(v.iov_base, v.iov_len), 978 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 979 memset(v.iov_base, 0, v.iov_len) 980 ) 981 982 return bytes; 983 } 984 EXPORT_SYMBOL(iov_iter_zero); 985 986 size_t iov_iter_copy_from_user_atomic(struct page *page, 987 struct iov_iter *i, unsigned long offset, size_t bytes) 988 { 989 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 990 if (unlikely(!page_copy_sane(page, offset, bytes))) { 991 kunmap_atomic(kaddr); 992 return 0; 993 } 994 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 995 kunmap_atomic(kaddr); 996 WARN_ON(1); 997 return 0; 998 } 999 iterate_all_kinds(i, bytes, v, 1000 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), 1001 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 1002 v.bv_offset, v.bv_len), 1003 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 1004 ) 1005 kunmap_atomic(kaddr); 1006 return bytes; 1007 } 1008 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 1009 1010 static inline void pipe_truncate(struct iov_iter *i) 1011 { 1012 struct pipe_inode_info *pipe = i->pipe; 1013 unsigned int p_tail = pipe->tail; 1014 unsigned int p_head = pipe->head; 1015 unsigned int p_mask = pipe->ring_size - 1; 1016 1017 if (!pipe_empty(p_head, p_tail)) { 1018 struct pipe_buffer *buf; 1019 unsigned int i_head = i->head; 1020 size_t off = i->iov_offset; 1021 1022 if (off) { 1023 buf = &pipe->bufs[i_head & p_mask]; 1024 buf->len = off - buf->offset; 1025 i_head++; 1026 } 1027 while (p_head != i_head) { 1028 p_head--; 1029 pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); 1030 } 1031 1032 pipe->head = p_head; 1033 } 1034 } 1035 1036 static void pipe_advance(struct iov_iter *i, size_t size) 1037 { 1038 struct pipe_inode_info *pipe = i->pipe; 1039 if (unlikely(i->count < size)) 1040 size = i->count; 1041 if (size) { 1042 struct pipe_buffer *buf; 1043 unsigned int p_mask = pipe->ring_size - 1; 1044 unsigned int i_head = i->head; 1045 size_t off = i->iov_offset, left = size; 1046 1047 if (off) /* make it relative to the beginning of buffer */ 1048 left += off - pipe->bufs[i_head & p_mask].offset; 1049 while (1) { 1050 buf = &pipe->bufs[i_head & p_mask]; 1051 if (left <= buf->len) 1052 break; 1053 left -= buf->len; 1054 i_head++; 1055 } 1056 i->head = i_head; 1057 i->iov_offset = buf->offset + left; 1058 } 1059 i->count -= size; 1060 /* ... and discard everything past that point */ 1061 pipe_truncate(i); 1062 } 1063 1064 void iov_iter_advance(struct iov_iter *i, size_t size) 1065 { 1066 if (unlikely(iov_iter_is_pipe(i))) { 1067 pipe_advance(i, size); 1068 return; 1069 } 1070 if (unlikely(iov_iter_is_discard(i))) { 1071 i->count -= size; 1072 return; 1073 } 1074 iterate_and_advance(i, size, v, 0, 0, 0) 1075 } 1076 EXPORT_SYMBOL(iov_iter_advance); 1077 1078 void iov_iter_revert(struct iov_iter *i, size_t unroll) 1079 { 1080 if (!unroll) 1081 return; 1082 if (WARN_ON(unroll > MAX_RW_COUNT)) 1083 return; 1084 i->count += unroll; 1085 if (unlikely(iov_iter_is_pipe(i))) { 1086 struct pipe_inode_info *pipe = i->pipe; 1087 unsigned int p_mask = pipe->ring_size - 1; 1088 unsigned int i_head = i->head; 1089 size_t off = i->iov_offset; 1090 while (1) { 1091 struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; 1092 size_t n = off - b->offset; 1093 if (unroll < n) { 1094 off -= unroll; 1095 break; 1096 } 1097 unroll -= n; 1098 if (!unroll && i_head == i->start_head) { 1099 off = 0; 1100 break; 1101 } 1102 i_head--; 1103 b = &pipe->bufs[i_head & p_mask]; 1104 off = b->offset + b->len; 1105 } 1106 i->iov_offset = off; 1107 i->head = i_head; 1108 pipe_truncate(i); 1109 return; 1110 } 1111 if (unlikely(iov_iter_is_discard(i))) 1112 return; 1113 if (unroll <= i->iov_offset) { 1114 i->iov_offset -= unroll; 1115 return; 1116 } 1117 unroll -= i->iov_offset; 1118 if (iov_iter_is_bvec(i)) { 1119 const struct bio_vec *bvec = i->bvec; 1120 while (1) { 1121 size_t n = (--bvec)->bv_len; 1122 i->nr_segs++; 1123 if (unroll <= n) { 1124 i->bvec = bvec; 1125 i->iov_offset = n - unroll; 1126 return; 1127 } 1128 unroll -= n; 1129 } 1130 } else { /* same logics for iovec and kvec */ 1131 const struct iovec *iov = i->iov; 1132 while (1) { 1133 size_t n = (--iov)->iov_len; 1134 i->nr_segs++; 1135 if (unroll <= n) { 1136 i->iov = iov; 1137 i->iov_offset = n - unroll; 1138 return; 1139 } 1140 unroll -= n; 1141 } 1142 } 1143 } 1144 EXPORT_SYMBOL(iov_iter_revert); 1145 1146 /* 1147 * Return the count of just the current iov_iter segment. 1148 */ 1149 size_t iov_iter_single_seg_count(const struct iov_iter *i) 1150 { 1151 if (unlikely(iov_iter_is_pipe(i))) 1152 return i->count; // it is a silly place, anyway 1153 if (i->nr_segs == 1) 1154 return i->count; 1155 if (unlikely(iov_iter_is_discard(i))) 1156 return i->count; 1157 else if (iov_iter_is_bvec(i)) 1158 return min(i->count, i->bvec->bv_len - i->iov_offset); 1159 else 1160 return min(i->count, i->iov->iov_len - i->iov_offset); 1161 } 1162 EXPORT_SYMBOL(iov_iter_single_seg_count); 1163 1164 void iov_iter_kvec(struct iov_iter *i, unsigned int direction, 1165 const struct kvec *kvec, unsigned long nr_segs, 1166 size_t count) 1167 { 1168 WARN_ON(direction & ~(READ | WRITE)); 1169 i->type = ITER_KVEC | (direction & (READ | WRITE)); 1170 i->kvec = kvec; 1171 i->nr_segs = nr_segs; 1172 i->iov_offset = 0; 1173 i->count = count; 1174 } 1175 EXPORT_SYMBOL(iov_iter_kvec); 1176 1177 void iov_iter_bvec(struct iov_iter *i, unsigned int direction, 1178 const struct bio_vec *bvec, unsigned long nr_segs, 1179 size_t count) 1180 { 1181 WARN_ON(direction & ~(READ | WRITE)); 1182 i->type = ITER_BVEC | (direction & (READ | WRITE)); 1183 i->bvec = bvec; 1184 i->nr_segs = nr_segs; 1185 i->iov_offset = 0; 1186 i->count = count; 1187 } 1188 EXPORT_SYMBOL(iov_iter_bvec); 1189 1190 void iov_iter_pipe(struct iov_iter *i, unsigned int direction, 1191 struct pipe_inode_info *pipe, 1192 size_t count) 1193 { 1194 BUG_ON(direction != READ); 1195 WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); 1196 i->type = ITER_PIPE | READ; 1197 i->pipe = pipe; 1198 i->head = pipe->head; 1199 i->iov_offset = 0; 1200 i->count = count; 1201 i->start_head = i->head; 1202 } 1203 EXPORT_SYMBOL(iov_iter_pipe); 1204 1205 /** 1206 * iov_iter_discard - Initialise an I/O iterator that discards data 1207 * @i: The iterator to initialise. 1208 * @direction: The direction of the transfer. 1209 * @count: The size of the I/O buffer in bytes. 1210 * 1211 * Set up an I/O iterator that just discards everything that's written to it. 1212 * It's only available as a READ iterator. 1213 */ 1214 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count) 1215 { 1216 BUG_ON(direction != READ); 1217 i->type = ITER_DISCARD | READ; 1218 i->count = count; 1219 i->iov_offset = 0; 1220 } 1221 EXPORT_SYMBOL(iov_iter_discard); 1222 1223 unsigned long iov_iter_alignment(const struct iov_iter *i) 1224 { 1225 unsigned long res = 0; 1226 size_t size = i->count; 1227 1228 if (unlikely(iov_iter_is_pipe(i))) { 1229 unsigned int p_mask = i->pipe->ring_size - 1; 1230 1231 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) 1232 return size | i->iov_offset; 1233 return size; 1234 } 1235 iterate_all_kinds(i, size, v, 1236 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 1237 res |= v.bv_offset | v.bv_len, 1238 res |= (unsigned long)v.iov_base | v.iov_len 1239 ) 1240 return res; 1241 } 1242 EXPORT_SYMBOL(iov_iter_alignment); 1243 1244 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 1245 { 1246 unsigned long res = 0; 1247 size_t size = i->count; 1248 1249 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1250 WARN_ON(1); 1251 return ~0U; 1252 } 1253 1254 iterate_all_kinds(i, size, v, 1255 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1256 (size != v.iov_len ? size : 0), 0), 1257 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 1258 (size != v.bv_len ? size : 0)), 1259 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 1260 (size != v.iov_len ? size : 0)) 1261 ); 1262 return res; 1263 } 1264 EXPORT_SYMBOL(iov_iter_gap_alignment); 1265 1266 static inline ssize_t __pipe_get_pages(struct iov_iter *i, 1267 size_t maxsize, 1268 struct page **pages, 1269 int iter_head, 1270 size_t *start) 1271 { 1272 struct pipe_inode_info *pipe = i->pipe; 1273 unsigned int p_mask = pipe->ring_size - 1; 1274 ssize_t n = push_pipe(i, maxsize, &iter_head, start); 1275 if (!n) 1276 return -EFAULT; 1277 1278 maxsize = n; 1279 n += *start; 1280 while (n > 0) { 1281 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); 1282 iter_head++; 1283 n -= PAGE_SIZE; 1284 } 1285 1286 return maxsize; 1287 } 1288 1289 static ssize_t pipe_get_pages(struct iov_iter *i, 1290 struct page **pages, size_t maxsize, unsigned maxpages, 1291 size_t *start) 1292 { 1293 unsigned int iter_head, npages; 1294 size_t capacity; 1295 1296 if (!maxsize) 1297 return 0; 1298 1299 if (!sanity(i)) 1300 return -EFAULT; 1301 1302 data_start(i, &iter_head, start); 1303 /* Amount of free space: some of this one + all after this one */ 1304 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1305 capacity = min(npages, maxpages) * PAGE_SIZE - *start; 1306 1307 return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); 1308 } 1309 1310 ssize_t iov_iter_get_pages(struct iov_iter *i, 1311 struct page **pages, size_t maxsize, unsigned maxpages, 1312 size_t *start) 1313 { 1314 if (maxsize > i->count) 1315 maxsize = i->count; 1316 1317 if (unlikely(iov_iter_is_pipe(i))) 1318 return pipe_get_pages(i, pages, maxsize, maxpages, start); 1319 if (unlikely(iov_iter_is_discard(i))) 1320 return -EFAULT; 1321 1322 iterate_all_kinds(i, maxsize, v, ({ 1323 unsigned long addr = (unsigned long)v.iov_base; 1324 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1325 int n; 1326 int res; 1327 1328 if (len > maxpages * PAGE_SIZE) 1329 len = maxpages * PAGE_SIZE; 1330 addr &= ~(PAGE_SIZE - 1); 1331 n = DIV_ROUND_UP(len, PAGE_SIZE); 1332 res = get_user_pages_fast(addr, n, 1333 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, 1334 pages); 1335 if (unlikely(res < 0)) 1336 return res; 1337 return (res == n ? len : res * PAGE_SIZE) - *start; 1338 0;}),({ 1339 /* can't be more than PAGE_SIZE */ 1340 *start = v.bv_offset; 1341 get_page(*pages = v.bv_page); 1342 return v.bv_len; 1343 }),({ 1344 return -EFAULT; 1345 }) 1346 ) 1347 return 0; 1348 } 1349 EXPORT_SYMBOL(iov_iter_get_pages); 1350 1351 static struct page **get_pages_array(size_t n) 1352 { 1353 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1354 } 1355 1356 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1357 struct page ***pages, size_t maxsize, 1358 size_t *start) 1359 { 1360 struct page **p; 1361 unsigned int iter_head, npages; 1362 ssize_t n; 1363 1364 if (!maxsize) 1365 return 0; 1366 1367 if (!sanity(i)) 1368 return -EFAULT; 1369 1370 data_start(i, &iter_head, start); 1371 /* Amount of free space: some of this one + all after this one */ 1372 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); 1373 n = npages * PAGE_SIZE - *start; 1374 if (maxsize > n) 1375 maxsize = n; 1376 else 1377 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1378 p = get_pages_array(npages); 1379 if (!p) 1380 return -ENOMEM; 1381 n = __pipe_get_pages(i, maxsize, p, iter_head, start); 1382 if (n > 0) 1383 *pages = p; 1384 else 1385 kvfree(p); 1386 return n; 1387 } 1388 1389 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1390 struct page ***pages, size_t maxsize, 1391 size_t *start) 1392 { 1393 struct page **p; 1394 1395 if (maxsize > i->count) 1396 maxsize = i->count; 1397 1398 if (unlikely(iov_iter_is_pipe(i))) 1399 return pipe_get_pages_alloc(i, pages, maxsize, start); 1400 if (unlikely(iov_iter_is_discard(i))) 1401 return -EFAULT; 1402 1403 iterate_all_kinds(i, maxsize, v, ({ 1404 unsigned long addr = (unsigned long)v.iov_base; 1405 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1406 int n; 1407 int res; 1408 1409 addr &= ~(PAGE_SIZE - 1); 1410 n = DIV_ROUND_UP(len, PAGE_SIZE); 1411 p = get_pages_array(n); 1412 if (!p) 1413 return -ENOMEM; 1414 res = get_user_pages_fast(addr, n, 1415 iov_iter_rw(i) != WRITE ? FOLL_WRITE : 0, p); 1416 if (unlikely(res < 0)) { 1417 kvfree(p); 1418 return res; 1419 } 1420 *pages = p; 1421 return (res == n ? len : res * PAGE_SIZE) - *start; 1422 0;}),({ 1423 /* can't be more than PAGE_SIZE */ 1424 *start = v.bv_offset; 1425 *pages = p = get_pages_array(1); 1426 if (!p) 1427 return -ENOMEM; 1428 get_page(*p = v.bv_page); 1429 return v.bv_len; 1430 }),({ 1431 return -EFAULT; 1432 }) 1433 ) 1434 return 0; 1435 } 1436 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1437 1438 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1439 struct iov_iter *i) 1440 { 1441 char *to = addr; 1442 __wsum sum, next; 1443 size_t off = 0; 1444 sum = *csum; 1445 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1446 WARN_ON(1); 1447 return 0; 1448 } 1449 iterate_and_advance(i, bytes, v, ({ 1450 int err = 0; 1451 next = csum_and_copy_from_user(v.iov_base, 1452 (to += v.iov_len) - v.iov_len, 1453 v.iov_len, 0, &err); 1454 if (!err) { 1455 sum = csum_block_add(sum, next, off); 1456 off += v.iov_len; 1457 } 1458 err ? v.iov_len : 0; 1459 }), ({ 1460 char *p = kmap_atomic(v.bv_page); 1461 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1462 p + v.bv_offset, v.bv_len, 1463 sum, off); 1464 kunmap_atomic(p); 1465 off += v.bv_len; 1466 }),({ 1467 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1468 v.iov_base, v.iov_len, 1469 sum, off); 1470 off += v.iov_len; 1471 }) 1472 ) 1473 *csum = sum; 1474 return bytes; 1475 } 1476 EXPORT_SYMBOL(csum_and_copy_from_iter); 1477 1478 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1479 struct iov_iter *i) 1480 { 1481 char *to = addr; 1482 __wsum sum, next; 1483 size_t off = 0; 1484 sum = *csum; 1485 if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) { 1486 WARN_ON(1); 1487 return false; 1488 } 1489 if (unlikely(i->count < bytes)) 1490 return false; 1491 iterate_all_kinds(i, bytes, v, ({ 1492 int err = 0; 1493 next = csum_and_copy_from_user(v.iov_base, 1494 (to += v.iov_len) - v.iov_len, 1495 v.iov_len, 0, &err); 1496 if (err) 1497 return false; 1498 sum = csum_block_add(sum, next, off); 1499 off += v.iov_len; 1500 0; 1501 }), ({ 1502 char *p = kmap_atomic(v.bv_page); 1503 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len, 1504 p + v.bv_offset, v.bv_len, 1505 sum, off); 1506 kunmap_atomic(p); 1507 off += v.bv_len; 1508 }),({ 1509 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len, 1510 v.iov_base, v.iov_len, 1511 sum, off); 1512 off += v.iov_len; 1513 }) 1514 ) 1515 *csum = sum; 1516 iov_iter_advance(i, bytes); 1517 return true; 1518 } 1519 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1520 1521 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, 1522 struct iov_iter *i) 1523 { 1524 const char *from = addr; 1525 __wsum *csum = csump; 1526 __wsum sum, next; 1527 size_t off = 0; 1528 1529 if (unlikely(iov_iter_is_pipe(i))) 1530 return csum_and_copy_to_pipe_iter(addr, bytes, csum, i); 1531 1532 sum = *csum; 1533 if (unlikely(iov_iter_is_discard(i))) { 1534 WARN_ON(1); /* for now */ 1535 return 0; 1536 } 1537 iterate_and_advance(i, bytes, v, ({ 1538 int err = 0; 1539 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1540 v.iov_base, 1541 v.iov_len, 0, &err); 1542 if (!err) { 1543 sum = csum_block_add(sum, next, off); 1544 off += v.iov_len; 1545 } 1546 err ? v.iov_len : 0; 1547 }), ({ 1548 char *p = kmap_atomic(v.bv_page); 1549 sum = csum_and_memcpy(p + v.bv_offset, 1550 (from += v.bv_len) - v.bv_len, 1551 v.bv_len, sum, off); 1552 kunmap_atomic(p); 1553 off += v.bv_len; 1554 }),({ 1555 sum = csum_and_memcpy(v.iov_base, 1556 (from += v.iov_len) - v.iov_len, 1557 v.iov_len, sum, off); 1558 off += v.iov_len; 1559 }) 1560 ) 1561 *csum = sum; 1562 return bytes; 1563 } 1564 EXPORT_SYMBOL(csum_and_copy_to_iter); 1565 1566 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, 1567 struct iov_iter *i) 1568 { 1569 #ifdef CONFIG_CRYPTO 1570 struct ahash_request *hash = hashp; 1571 struct scatterlist sg; 1572 size_t copied; 1573 1574 copied = copy_to_iter(addr, bytes, i); 1575 sg_init_one(&sg, addr, copied); 1576 ahash_request_set_crypt(hash, &sg, NULL, copied); 1577 crypto_ahash_update(hash); 1578 return copied; 1579 #else 1580 return 0; 1581 #endif 1582 } 1583 EXPORT_SYMBOL(hash_and_copy_to_iter); 1584 1585 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1586 { 1587 size_t size = i->count; 1588 int npages = 0; 1589 1590 if (!size) 1591 return 0; 1592 if (unlikely(iov_iter_is_discard(i))) 1593 return 0; 1594 1595 if (unlikely(iov_iter_is_pipe(i))) { 1596 struct pipe_inode_info *pipe = i->pipe; 1597 unsigned int iter_head; 1598 size_t off; 1599 1600 if (!sanity(i)) 1601 return 0; 1602 1603 data_start(i, &iter_head, &off); 1604 /* some of this one + all after this one */ 1605 npages = pipe_space_for_user(iter_head, pipe->tail, pipe); 1606 if (npages >= maxpages) 1607 return maxpages; 1608 } else iterate_all_kinds(i, size, v, ({ 1609 unsigned long p = (unsigned long)v.iov_base; 1610 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1611 - p / PAGE_SIZE; 1612 if (npages >= maxpages) 1613 return maxpages; 1614 0;}),({ 1615 npages++; 1616 if (npages >= maxpages) 1617 return maxpages; 1618 }),({ 1619 unsigned long p = (unsigned long)v.iov_base; 1620 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1621 - p / PAGE_SIZE; 1622 if (npages >= maxpages) 1623 return maxpages; 1624 }) 1625 ) 1626 return npages; 1627 } 1628 EXPORT_SYMBOL(iov_iter_npages); 1629 1630 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1631 { 1632 *new = *old; 1633 if (unlikely(iov_iter_is_pipe(new))) { 1634 WARN_ON(1); 1635 return NULL; 1636 } 1637 if (unlikely(iov_iter_is_discard(new))) 1638 return NULL; 1639 if (iov_iter_is_bvec(new)) 1640 return new->bvec = kmemdup(new->bvec, 1641 new->nr_segs * sizeof(struct bio_vec), 1642 flags); 1643 else 1644 /* iovec and kvec have identical layout */ 1645 return new->iov = kmemdup(new->iov, 1646 new->nr_segs * sizeof(struct iovec), 1647 flags); 1648 } 1649 EXPORT_SYMBOL(dup_iter); 1650 1651 /** 1652 * import_iovec() - Copy an array of &struct iovec from userspace 1653 * into the kernel, check that it is valid, and initialize a new 1654 * &struct iov_iter iterator to access it. 1655 * 1656 * @type: One of %READ or %WRITE. 1657 * @uvector: Pointer to the userspace array. 1658 * @nr_segs: Number of elements in userspace array. 1659 * @fast_segs: Number of elements in @iov. 1660 * @iov: (input and output parameter) Pointer to pointer to (usually small 1661 * on-stack) kernel array. 1662 * @i: Pointer to iterator that will be initialized on success. 1663 * 1664 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1665 * then this function places %NULL in *@iov on return. Otherwise, a new 1666 * array will be allocated and the result placed in *@iov. This means that 1667 * the caller may call kfree() on *@iov regardless of whether the small 1668 * on-stack array was used or not (and regardless of whether this function 1669 * returns an error or not). 1670 * 1671 * Return: Negative error code on error, bytes imported on success 1672 */ 1673 ssize_t import_iovec(int type, const struct iovec __user * uvector, 1674 unsigned nr_segs, unsigned fast_segs, 1675 struct iovec **iov, struct iov_iter *i) 1676 { 1677 ssize_t n; 1678 struct iovec *p; 1679 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1680 *iov, &p); 1681 if (n < 0) { 1682 if (p != *iov) 1683 kfree(p); 1684 *iov = NULL; 1685 return n; 1686 } 1687 iov_iter_init(i, type, p, nr_segs, n); 1688 *iov = p == *iov ? NULL : p; 1689 return n; 1690 } 1691 EXPORT_SYMBOL(import_iovec); 1692 1693 #ifdef CONFIG_COMPAT 1694 #include <linux/compat.h> 1695 1696 ssize_t compat_import_iovec(int type, 1697 const struct compat_iovec __user * uvector, 1698 unsigned nr_segs, unsigned fast_segs, 1699 struct iovec **iov, struct iov_iter *i) 1700 { 1701 ssize_t n; 1702 struct iovec *p; 1703 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1704 *iov, &p); 1705 if (n < 0) { 1706 if (p != *iov) 1707 kfree(p); 1708 *iov = NULL; 1709 return n; 1710 } 1711 iov_iter_init(i, type, p, nr_segs, n); 1712 *iov = p == *iov ? NULL : p; 1713 return n; 1714 } 1715 EXPORT_SYMBOL(compat_import_iovec); 1716 #endif 1717 1718 int import_single_range(int rw, void __user *buf, size_t len, 1719 struct iovec *iov, struct iov_iter *i) 1720 { 1721 if (len > MAX_RW_COUNT) 1722 len = MAX_RW_COUNT; 1723 if (unlikely(!access_ok(buf, len))) 1724 return -EFAULT; 1725 1726 iov->iov_base = buf; 1727 iov->iov_len = len; 1728 iov_iter_init(i, rw, iov, 1, len); 1729 return 0; 1730 } 1731 EXPORT_SYMBOL(import_single_range); 1732 1733 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, 1734 int (*f)(struct kvec *vec, void *context), 1735 void *context) 1736 { 1737 struct kvec w; 1738 int err = -EINVAL; 1739 if (!bytes) 1740 return 0; 1741 1742 iterate_all_kinds(i, bytes, v, -EINVAL, ({ 1743 w.iov_base = kmap(v.bv_page) + v.bv_offset; 1744 w.iov_len = v.bv_len; 1745 err = f(&w, context); 1746 kunmap(v.bv_page); 1747 err;}), ({ 1748 w = v; 1749 err = f(&w, context);}) 1750 ) 1751 return err; 1752 } 1753 EXPORT_SYMBOL(iov_iter_for_each_range); 1754