1 #include <linux/export.h> 2 #include <linux/bvec.h> 3 #include <linux/uio.h> 4 #include <linux/pagemap.h> 5 #include <linux/slab.h> 6 #include <linux/vmalloc.h> 7 #include <linux/splice.h> 8 #include <net/checksum.h> 9 10 #define PIPE_PARANOIA /* for now */ 11 12 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 13 size_t left; \ 14 size_t wanted = n; \ 15 __p = i->iov; \ 16 __v.iov_len = min(n, __p->iov_len - skip); \ 17 if (likely(__v.iov_len)) { \ 18 __v.iov_base = __p->iov_base + skip; \ 19 left = (STEP); \ 20 __v.iov_len -= left; \ 21 skip += __v.iov_len; \ 22 n -= __v.iov_len; \ 23 } else { \ 24 left = 0; \ 25 } \ 26 while (unlikely(!left && n)) { \ 27 __p++; \ 28 __v.iov_len = min(n, __p->iov_len); \ 29 if (unlikely(!__v.iov_len)) \ 30 continue; \ 31 __v.iov_base = __p->iov_base; \ 32 left = (STEP); \ 33 __v.iov_len -= left; \ 34 skip = __v.iov_len; \ 35 n -= __v.iov_len; \ 36 } \ 37 n = wanted - n; \ 38 } 39 40 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 41 size_t wanted = n; \ 42 __p = i->kvec; \ 43 __v.iov_len = min(n, __p->iov_len - skip); \ 44 if (likely(__v.iov_len)) { \ 45 __v.iov_base = __p->iov_base + skip; \ 46 (void)(STEP); \ 47 skip += __v.iov_len; \ 48 n -= __v.iov_len; \ 49 } \ 50 while (unlikely(n)) { \ 51 __p++; \ 52 __v.iov_len = min(n, __p->iov_len); \ 53 if (unlikely(!__v.iov_len)) \ 54 continue; \ 55 __v.iov_base = __p->iov_base; \ 56 (void)(STEP); \ 57 skip = __v.iov_len; \ 58 n -= __v.iov_len; \ 59 } \ 60 n = wanted; \ 61 } 62 63 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 64 struct bvec_iter __start; \ 65 __start.bi_size = n; \ 66 __start.bi_bvec_done = skip; \ 67 __start.bi_idx = 0; \ 68 for_each_bvec(__v, i->bvec, __bi, __start) { \ 69 if (!__v.bv_len) \ 70 continue; \ 71 (void)(STEP); \ 72 } \ 73 } 74 75 #define iterate_all_kinds(i, n, v, I, B, K) { \ 76 if (likely(n)) { \ 77 size_t skip = i->iov_offset; \ 78 if (unlikely(i->type & ITER_BVEC)) { \ 79 struct bio_vec v; \ 80 struct bvec_iter __bi; \ 81 iterate_bvec(i, n, v, __bi, skip, (B)) \ 82 } else if (unlikely(i->type & ITER_KVEC)) { \ 83 const struct kvec *kvec; \ 84 struct kvec v; \ 85 iterate_kvec(i, n, v, kvec, skip, (K)) \ 86 } else { \ 87 const struct iovec *iov; \ 88 struct iovec v; \ 89 iterate_iovec(i, n, v, iov, skip, (I)) \ 90 } \ 91 } \ 92 } 93 94 #define iterate_and_advance(i, n, v, I, B, K) { \ 95 if (unlikely(i->count < n)) \ 96 n = i->count; \ 97 if (i->count) { \ 98 size_t skip = i->iov_offset; \ 99 if (unlikely(i->type & ITER_BVEC)) { \ 100 const struct bio_vec *bvec = i->bvec; \ 101 struct bio_vec v; \ 102 struct bvec_iter __bi; \ 103 iterate_bvec(i, n, v, __bi, skip, (B)) \ 104 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 105 i->nr_segs -= i->bvec - bvec; \ 106 skip = __bi.bi_bvec_done; \ 107 } else if (unlikely(i->type & ITER_KVEC)) { \ 108 const struct kvec *kvec; \ 109 struct kvec v; \ 110 iterate_kvec(i, n, v, kvec, skip, (K)) \ 111 if (skip == kvec->iov_len) { \ 112 kvec++; \ 113 skip = 0; \ 114 } \ 115 i->nr_segs -= kvec - i->kvec; \ 116 i->kvec = kvec; \ 117 } else { \ 118 const struct iovec *iov; \ 119 struct iovec v; \ 120 iterate_iovec(i, n, v, iov, skip, (I)) \ 121 if (skip == iov->iov_len) { \ 122 iov++; \ 123 skip = 0; \ 124 } \ 125 i->nr_segs -= iov - i->iov; \ 126 i->iov = iov; \ 127 } \ 128 i->count -= n; \ 129 i->iov_offset = skip; \ 130 } \ 131 } 132 133 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 134 struct iov_iter *i) 135 { 136 size_t skip, copy, left, wanted; 137 const struct iovec *iov; 138 char __user *buf; 139 void *kaddr, *from; 140 141 if (unlikely(bytes > i->count)) 142 bytes = i->count; 143 144 if (unlikely(!bytes)) 145 return 0; 146 147 wanted = bytes; 148 iov = i->iov; 149 skip = i->iov_offset; 150 buf = iov->iov_base + skip; 151 copy = min(bytes, iov->iov_len - skip); 152 153 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 154 kaddr = kmap_atomic(page); 155 from = kaddr + offset; 156 157 /* first chunk, usually the only one */ 158 left = __copy_to_user_inatomic(buf, from, copy); 159 copy -= left; 160 skip += copy; 161 from += copy; 162 bytes -= copy; 163 164 while (unlikely(!left && bytes)) { 165 iov++; 166 buf = iov->iov_base; 167 copy = min(bytes, iov->iov_len); 168 left = __copy_to_user_inatomic(buf, from, copy); 169 copy -= left; 170 skip = copy; 171 from += copy; 172 bytes -= copy; 173 } 174 if (likely(!bytes)) { 175 kunmap_atomic(kaddr); 176 goto done; 177 } 178 offset = from - kaddr; 179 buf += copy; 180 kunmap_atomic(kaddr); 181 copy = min(bytes, iov->iov_len - skip); 182 } 183 /* Too bad - revert to non-atomic kmap */ 184 185 kaddr = kmap(page); 186 from = kaddr + offset; 187 left = __copy_to_user(buf, from, copy); 188 copy -= left; 189 skip += copy; 190 from += copy; 191 bytes -= copy; 192 while (unlikely(!left && bytes)) { 193 iov++; 194 buf = iov->iov_base; 195 copy = min(bytes, iov->iov_len); 196 left = __copy_to_user(buf, from, copy); 197 copy -= left; 198 skip = copy; 199 from += copy; 200 bytes -= copy; 201 } 202 kunmap(page); 203 204 done: 205 if (skip == iov->iov_len) { 206 iov++; 207 skip = 0; 208 } 209 i->count -= wanted - bytes; 210 i->nr_segs -= iov - i->iov; 211 i->iov = iov; 212 i->iov_offset = skip; 213 return wanted - bytes; 214 } 215 216 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 217 struct iov_iter *i) 218 { 219 size_t skip, copy, left, wanted; 220 const struct iovec *iov; 221 char __user *buf; 222 void *kaddr, *to; 223 224 if (unlikely(bytes > i->count)) 225 bytes = i->count; 226 227 if (unlikely(!bytes)) 228 return 0; 229 230 wanted = bytes; 231 iov = i->iov; 232 skip = i->iov_offset; 233 buf = iov->iov_base + skip; 234 copy = min(bytes, iov->iov_len - skip); 235 236 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 237 kaddr = kmap_atomic(page); 238 to = kaddr + offset; 239 240 /* first chunk, usually the only one */ 241 left = __copy_from_user_inatomic(to, buf, copy); 242 copy -= left; 243 skip += copy; 244 to += copy; 245 bytes -= copy; 246 247 while (unlikely(!left && bytes)) { 248 iov++; 249 buf = iov->iov_base; 250 copy = min(bytes, iov->iov_len); 251 left = __copy_from_user_inatomic(to, buf, copy); 252 copy -= left; 253 skip = copy; 254 to += copy; 255 bytes -= copy; 256 } 257 if (likely(!bytes)) { 258 kunmap_atomic(kaddr); 259 goto done; 260 } 261 offset = to - kaddr; 262 buf += copy; 263 kunmap_atomic(kaddr); 264 copy = min(bytes, iov->iov_len - skip); 265 } 266 /* Too bad - revert to non-atomic kmap */ 267 268 kaddr = kmap(page); 269 to = kaddr + offset; 270 left = __copy_from_user(to, buf, copy); 271 copy -= left; 272 skip += copy; 273 to += copy; 274 bytes -= copy; 275 while (unlikely(!left && bytes)) { 276 iov++; 277 buf = iov->iov_base; 278 copy = min(bytes, iov->iov_len); 279 left = __copy_from_user(to, buf, copy); 280 copy -= left; 281 skip = copy; 282 to += copy; 283 bytes -= copy; 284 } 285 kunmap(page); 286 287 done: 288 if (skip == iov->iov_len) { 289 iov++; 290 skip = 0; 291 } 292 i->count -= wanted - bytes; 293 i->nr_segs -= iov - i->iov; 294 i->iov = iov; 295 i->iov_offset = skip; 296 return wanted - bytes; 297 } 298 299 #ifdef PIPE_PARANOIA 300 static bool sanity(const struct iov_iter *i) 301 { 302 struct pipe_inode_info *pipe = i->pipe; 303 int idx = i->idx; 304 int next = pipe->curbuf + pipe->nrbufs; 305 if (i->iov_offset) { 306 struct pipe_buffer *p; 307 if (unlikely(!pipe->nrbufs)) 308 goto Bad; // pipe must be non-empty 309 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) 310 goto Bad; // must be at the last buffer... 311 312 p = &pipe->bufs[idx]; 313 if (unlikely(p->offset + p->len != i->iov_offset)) 314 goto Bad; // ... at the end of segment 315 } else { 316 if (idx != (next & (pipe->buffers - 1))) 317 goto Bad; // must be right after the last buffer 318 } 319 return true; 320 Bad: 321 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); 322 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", 323 pipe->curbuf, pipe->nrbufs, pipe->buffers); 324 for (idx = 0; idx < pipe->buffers; idx++) 325 printk(KERN_ERR "[%p %p %d %d]\n", 326 pipe->bufs[idx].ops, 327 pipe->bufs[idx].page, 328 pipe->bufs[idx].offset, 329 pipe->bufs[idx].len); 330 WARN_ON(1); 331 return false; 332 } 333 #else 334 #define sanity(i) true 335 #endif 336 337 static inline int next_idx(int idx, struct pipe_inode_info *pipe) 338 { 339 return (idx + 1) & (pipe->buffers - 1); 340 } 341 342 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 343 struct iov_iter *i) 344 { 345 struct pipe_inode_info *pipe = i->pipe; 346 struct pipe_buffer *buf; 347 size_t off; 348 int idx; 349 350 if (unlikely(bytes > i->count)) 351 bytes = i->count; 352 353 if (unlikely(!bytes)) 354 return 0; 355 356 if (!sanity(i)) 357 return 0; 358 359 off = i->iov_offset; 360 idx = i->idx; 361 buf = &pipe->bufs[idx]; 362 if (off) { 363 if (offset == off && buf->page == page) { 364 /* merge with the last one */ 365 buf->len += bytes; 366 i->iov_offset += bytes; 367 goto out; 368 } 369 idx = next_idx(idx, pipe); 370 buf = &pipe->bufs[idx]; 371 } 372 if (idx == pipe->curbuf && pipe->nrbufs) 373 return 0; 374 pipe->nrbufs++; 375 buf->ops = &page_cache_pipe_buf_ops; 376 get_page(buf->page = page); 377 buf->offset = offset; 378 buf->len = bytes; 379 i->iov_offset = offset + bytes; 380 i->idx = idx; 381 out: 382 i->count -= bytes; 383 return bytes; 384 } 385 386 /* 387 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 388 * bytes. For each iovec, fault in each page that constitutes the iovec. 389 * 390 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 391 * because it is an invalid address). 392 */ 393 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 394 { 395 size_t skip = i->iov_offset; 396 const struct iovec *iov; 397 int err; 398 struct iovec v; 399 400 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 401 iterate_iovec(i, bytes, v, iov, skip, ({ 402 err = fault_in_pages_readable(v.iov_base, v.iov_len); 403 if (unlikely(err)) 404 return err; 405 0;})) 406 } 407 return 0; 408 } 409 EXPORT_SYMBOL(iov_iter_fault_in_readable); 410 411 void iov_iter_init(struct iov_iter *i, int direction, 412 const struct iovec *iov, unsigned long nr_segs, 413 size_t count) 414 { 415 /* It will get better. Eventually... */ 416 if (uaccess_kernel()) { 417 direction |= ITER_KVEC; 418 i->type = direction; 419 i->kvec = (struct kvec *)iov; 420 } else { 421 i->type = direction; 422 i->iov = iov; 423 } 424 i->nr_segs = nr_segs; 425 i->iov_offset = 0; 426 i->count = count; 427 } 428 EXPORT_SYMBOL(iov_iter_init); 429 430 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 431 { 432 char *from = kmap_atomic(page); 433 memcpy(to, from + offset, len); 434 kunmap_atomic(from); 435 } 436 437 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 438 { 439 char *to = kmap_atomic(page); 440 memcpy(to + offset, from, len); 441 kunmap_atomic(to); 442 } 443 444 static void memzero_page(struct page *page, size_t offset, size_t len) 445 { 446 char *addr = kmap_atomic(page); 447 memset(addr + offset, 0, len); 448 kunmap_atomic(addr); 449 } 450 451 static inline bool allocated(struct pipe_buffer *buf) 452 { 453 return buf->ops == &default_pipe_buf_ops; 454 } 455 456 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) 457 { 458 size_t off = i->iov_offset; 459 int idx = i->idx; 460 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { 461 idx = next_idx(idx, i->pipe); 462 off = 0; 463 } 464 *idxp = idx; 465 *offp = off; 466 } 467 468 static size_t push_pipe(struct iov_iter *i, size_t size, 469 int *idxp, size_t *offp) 470 { 471 struct pipe_inode_info *pipe = i->pipe; 472 size_t off; 473 int idx; 474 ssize_t left; 475 476 if (unlikely(size > i->count)) 477 size = i->count; 478 if (unlikely(!size)) 479 return 0; 480 481 left = size; 482 data_start(i, &idx, &off); 483 *idxp = idx; 484 *offp = off; 485 if (off) { 486 left -= PAGE_SIZE - off; 487 if (left <= 0) { 488 pipe->bufs[idx].len += size; 489 return size; 490 } 491 pipe->bufs[idx].len = PAGE_SIZE; 492 idx = next_idx(idx, pipe); 493 } 494 while (idx != pipe->curbuf || !pipe->nrbufs) { 495 struct page *page = alloc_page(GFP_USER); 496 if (!page) 497 break; 498 pipe->nrbufs++; 499 pipe->bufs[idx].ops = &default_pipe_buf_ops; 500 pipe->bufs[idx].page = page; 501 pipe->bufs[idx].offset = 0; 502 if (left <= PAGE_SIZE) { 503 pipe->bufs[idx].len = left; 504 return size; 505 } 506 pipe->bufs[idx].len = PAGE_SIZE; 507 left -= PAGE_SIZE; 508 idx = next_idx(idx, pipe); 509 } 510 return size - left; 511 } 512 513 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 514 struct iov_iter *i) 515 { 516 struct pipe_inode_info *pipe = i->pipe; 517 size_t n, off; 518 int idx; 519 520 if (!sanity(i)) 521 return 0; 522 523 bytes = n = push_pipe(i, bytes, &idx, &off); 524 if (unlikely(!n)) 525 return 0; 526 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 527 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 528 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); 529 i->idx = idx; 530 i->iov_offset = off + chunk; 531 n -= chunk; 532 addr += chunk; 533 } 534 i->count -= bytes; 535 return bytes; 536 } 537 538 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 539 { 540 const char *from = addr; 541 if (unlikely(i->type & ITER_PIPE)) 542 return copy_pipe_to_iter(addr, bytes, i); 543 iterate_and_advance(i, bytes, v, 544 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, 545 v.iov_len), 546 memcpy_to_page(v.bv_page, v.bv_offset, 547 (from += v.bv_len) - v.bv_len, v.bv_len), 548 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 549 ) 550 551 return bytes; 552 } 553 EXPORT_SYMBOL(copy_to_iter); 554 555 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 556 { 557 char *to = addr; 558 if (unlikely(i->type & ITER_PIPE)) { 559 WARN_ON(1); 560 return 0; 561 } 562 iterate_and_advance(i, bytes, v, 563 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, 564 v.iov_len), 565 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 566 v.bv_offset, v.bv_len), 567 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 568 ) 569 570 return bytes; 571 } 572 EXPORT_SYMBOL(copy_from_iter); 573 574 bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 575 { 576 char *to = addr; 577 if (unlikely(i->type & ITER_PIPE)) { 578 WARN_ON(1); 579 return false; 580 } 581 if (unlikely(i->count < bytes)) 582 return false; 583 584 iterate_all_kinds(i, bytes, v, ({ 585 if (__copy_from_user((to += v.iov_len) - v.iov_len, 586 v.iov_base, v.iov_len)) 587 return false; 588 0;}), 589 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 590 v.bv_offset, v.bv_len), 591 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 592 ) 593 594 iov_iter_advance(i, bytes); 595 return true; 596 } 597 EXPORT_SYMBOL(copy_from_iter_full); 598 599 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 600 { 601 char *to = addr; 602 if (unlikely(i->type & ITER_PIPE)) { 603 WARN_ON(1); 604 return 0; 605 } 606 iterate_and_advance(i, bytes, v, 607 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 608 v.iov_base, v.iov_len), 609 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 610 v.bv_offset, v.bv_len), 611 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 612 ) 613 614 return bytes; 615 } 616 EXPORT_SYMBOL(copy_from_iter_nocache); 617 618 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE 619 size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) 620 { 621 char *to = addr; 622 if (unlikely(i->type & ITER_PIPE)) { 623 WARN_ON(1); 624 return 0; 625 } 626 iterate_and_advance(i, bytes, v, 627 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, 628 v.iov_base, v.iov_len), 629 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, 630 v.bv_offset, v.bv_len), 631 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, 632 v.iov_len) 633 ) 634 635 return bytes; 636 } 637 EXPORT_SYMBOL_GPL(copy_from_iter_flushcache); 638 #endif 639 640 bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 641 { 642 char *to = addr; 643 if (unlikely(i->type & ITER_PIPE)) { 644 WARN_ON(1); 645 return false; 646 } 647 if (unlikely(i->count < bytes)) 648 return false; 649 iterate_all_kinds(i, bytes, v, ({ 650 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len, 651 v.iov_base, v.iov_len)) 652 return false; 653 0;}), 654 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 655 v.bv_offset, v.bv_len), 656 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 657 ) 658 659 iov_iter_advance(i, bytes); 660 return true; 661 } 662 EXPORT_SYMBOL(copy_from_iter_full_nocache); 663 664 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 665 struct iov_iter *i) 666 { 667 if (i->type & (ITER_BVEC|ITER_KVEC)) { 668 void *kaddr = kmap_atomic(page); 669 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 670 kunmap_atomic(kaddr); 671 return wanted; 672 } else if (likely(!(i->type & ITER_PIPE))) 673 return copy_page_to_iter_iovec(page, offset, bytes, i); 674 else 675 return copy_page_to_iter_pipe(page, offset, bytes, i); 676 } 677 EXPORT_SYMBOL(copy_page_to_iter); 678 679 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 680 struct iov_iter *i) 681 { 682 if (unlikely(i->type & ITER_PIPE)) { 683 WARN_ON(1); 684 return 0; 685 } 686 if (i->type & (ITER_BVEC|ITER_KVEC)) { 687 void *kaddr = kmap_atomic(page); 688 size_t wanted = copy_from_iter(kaddr + offset, bytes, i); 689 kunmap_atomic(kaddr); 690 return wanted; 691 } else 692 return copy_page_from_iter_iovec(page, offset, bytes, i); 693 } 694 EXPORT_SYMBOL(copy_page_from_iter); 695 696 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 697 { 698 struct pipe_inode_info *pipe = i->pipe; 699 size_t n, off; 700 int idx; 701 702 if (!sanity(i)) 703 return 0; 704 705 bytes = n = push_pipe(i, bytes, &idx, &off); 706 if (unlikely(!n)) 707 return 0; 708 709 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 710 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 711 memzero_page(pipe->bufs[idx].page, off, chunk); 712 i->idx = idx; 713 i->iov_offset = off + chunk; 714 n -= chunk; 715 } 716 i->count -= bytes; 717 return bytes; 718 } 719 720 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 721 { 722 if (unlikely(i->type & ITER_PIPE)) 723 return pipe_zero(bytes, i); 724 iterate_and_advance(i, bytes, v, 725 __clear_user(v.iov_base, v.iov_len), 726 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 727 memset(v.iov_base, 0, v.iov_len) 728 ) 729 730 return bytes; 731 } 732 EXPORT_SYMBOL(iov_iter_zero); 733 734 size_t iov_iter_copy_from_user_atomic(struct page *page, 735 struct iov_iter *i, unsigned long offset, size_t bytes) 736 { 737 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 738 if (unlikely(i->type & ITER_PIPE)) { 739 kunmap_atomic(kaddr); 740 WARN_ON(1); 741 return 0; 742 } 743 iterate_all_kinds(i, bytes, v, 744 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, 745 v.iov_base, v.iov_len), 746 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 747 v.bv_offset, v.bv_len), 748 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 749 ) 750 kunmap_atomic(kaddr); 751 return bytes; 752 } 753 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 754 755 static inline void pipe_truncate(struct iov_iter *i) 756 { 757 struct pipe_inode_info *pipe = i->pipe; 758 if (pipe->nrbufs) { 759 size_t off = i->iov_offset; 760 int idx = i->idx; 761 int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1); 762 if (off) { 763 pipe->bufs[idx].len = off - pipe->bufs[idx].offset; 764 idx = next_idx(idx, pipe); 765 nrbufs++; 766 } 767 while (pipe->nrbufs > nrbufs) { 768 pipe_buf_release(pipe, &pipe->bufs[idx]); 769 idx = next_idx(idx, pipe); 770 pipe->nrbufs--; 771 } 772 } 773 } 774 775 static void pipe_advance(struct iov_iter *i, size_t size) 776 { 777 struct pipe_inode_info *pipe = i->pipe; 778 if (unlikely(i->count < size)) 779 size = i->count; 780 if (size) { 781 struct pipe_buffer *buf; 782 size_t off = i->iov_offset, left = size; 783 int idx = i->idx; 784 if (off) /* make it relative to the beginning of buffer */ 785 left += off - pipe->bufs[idx].offset; 786 while (1) { 787 buf = &pipe->bufs[idx]; 788 if (left <= buf->len) 789 break; 790 left -= buf->len; 791 idx = next_idx(idx, pipe); 792 } 793 i->idx = idx; 794 i->iov_offset = buf->offset + left; 795 } 796 i->count -= size; 797 /* ... and discard everything past that point */ 798 pipe_truncate(i); 799 } 800 801 void iov_iter_advance(struct iov_iter *i, size_t size) 802 { 803 if (unlikely(i->type & ITER_PIPE)) { 804 pipe_advance(i, size); 805 return; 806 } 807 iterate_and_advance(i, size, v, 0, 0, 0) 808 } 809 EXPORT_SYMBOL(iov_iter_advance); 810 811 void iov_iter_revert(struct iov_iter *i, size_t unroll) 812 { 813 if (!unroll) 814 return; 815 if (WARN_ON(unroll > MAX_RW_COUNT)) 816 return; 817 i->count += unroll; 818 if (unlikely(i->type & ITER_PIPE)) { 819 struct pipe_inode_info *pipe = i->pipe; 820 int idx = i->idx; 821 size_t off = i->iov_offset; 822 while (1) { 823 size_t n = off - pipe->bufs[idx].offset; 824 if (unroll < n) { 825 off -= unroll; 826 break; 827 } 828 unroll -= n; 829 if (!unroll && idx == i->start_idx) { 830 off = 0; 831 break; 832 } 833 if (!idx--) 834 idx = pipe->buffers - 1; 835 off = pipe->bufs[idx].offset + pipe->bufs[idx].len; 836 } 837 i->iov_offset = off; 838 i->idx = idx; 839 pipe_truncate(i); 840 return; 841 } 842 if (unroll <= i->iov_offset) { 843 i->iov_offset -= unroll; 844 return; 845 } 846 unroll -= i->iov_offset; 847 if (i->type & ITER_BVEC) { 848 const struct bio_vec *bvec = i->bvec; 849 while (1) { 850 size_t n = (--bvec)->bv_len; 851 i->nr_segs++; 852 if (unroll <= n) { 853 i->bvec = bvec; 854 i->iov_offset = n - unroll; 855 return; 856 } 857 unroll -= n; 858 } 859 } else { /* same logics for iovec and kvec */ 860 const struct iovec *iov = i->iov; 861 while (1) { 862 size_t n = (--iov)->iov_len; 863 i->nr_segs++; 864 if (unroll <= n) { 865 i->iov = iov; 866 i->iov_offset = n - unroll; 867 return; 868 } 869 unroll -= n; 870 } 871 } 872 } 873 EXPORT_SYMBOL(iov_iter_revert); 874 875 /* 876 * Return the count of just the current iov_iter segment. 877 */ 878 size_t iov_iter_single_seg_count(const struct iov_iter *i) 879 { 880 if (unlikely(i->type & ITER_PIPE)) 881 return i->count; // it is a silly place, anyway 882 if (i->nr_segs == 1) 883 return i->count; 884 else if (i->type & ITER_BVEC) 885 return min(i->count, i->bvec->bv_len - i->iov_offset); 886 else 887 return min(i->count, i->iov->iov_len - i->iov_offset); 888 } 889 EXPORT_SYMBOL(iov_iter_single_seg_count); 890 891 void iov_iter_kvec(struct iov_iter *i, int direction, 892 const struct kvec *kvec, unsigned long nr_segs, 893 size_t count) 894 { 895 BUG_ON(!(direction & ITER_KVEC)); 896 i->type = direction; 897 i->kvec = kvec; 898 i->nr_segs = nr_segs; 899 i->iov_offset = 0; 900 i->count = count; 901 } 902 EXPORT_SYMBOL(iov_iter_kvec); 903 904 void iov_iter_bvec(struct iov_iter *i, int direction, 905 const struct bio_vec *bvec, unsigned long nr_segs, 906 size_t count) 907 { 908 BUG_ON(!(direction & ITER_BVEC)); 909 i->type = direction; 910 i->bvec = bvec; 911 i->nr_segs = nr_segs; 912 i->iov_offset = 0; 913 i->count = count; 914 } 915 EXPORT_SYMBOL(iov_iter_bvec); 916 917 void iov_iter_pipe(struct iov_iter *i, int direction, 918 struct pipe_inode_info *pipe, 919 size_t count) 920 { 921 BUG_ON(direction != ITER_PIPE); 922 WARN_ON(pipe->nrbufs == pipe->buffers); 923 i->type = direction; 924 i->pipe = pipe; 925 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 926 i->iov_offset = 0; 927 i->count = count; 928 i->start_idx = i->idx; 929 } 930 EXPORT_SYMBOL(iov_iter_pipe); 931 932 unsigned long iov_iter_alignment(const struct iov_iter *i) 933 { 934 unsigned long res = 0; 935 size_t size = i->count; 936 937 if (unlikely(i->type & ITER_PIPE)) { 938 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) 939 return size | i->iov_offset; 940 return size; 941 } 942 iterate_all_kinds(i, size, v, 943 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 944 res |= v.bv_offset | v.bv_len, 945 res |= (unsigned long)v.iov_base | v.iov_len 946 ) 947 return res; 948 } 949 EXPORT_SYMBOL(iov_iter_alignment); 950 951 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 952 { 953 unsigned long res = 0; 954 size_t size = i->count; 955 956 if (unlikely(i->type & ITER_PIPE)) { 957 WARN_ON(1); 958 return ~0U; 959 } 960 961 iterate_all_kinds(i, size, v, 962 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 963 (size != v.iov_len ? size : 0), 0), 964 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 965 (size != v.bv_len ? size : 0)), 966 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 967 (size != v.iov_len ? size : 0)) 968 ); 969 return res; 970 } 971 EXPORT_SYMBOL(iov_iter_gap_alignment); 972 973 static inline size_t __pipe_get_pages(struct iov_iter *i, 974 size_t maxsize, 975 struct page **pages, 976 int idx, 977 size_t *start) 978 { 979 struct pipe_inode_info *pipe = i->pipe; 980 ssize_t n = push_pipe(i, maxsize, &idx, start); 981 if (!n) 982 return -EFAULT; 983 984 maxsize = n; 985 n += *start; 986 while (n > 0) { 987 get_page(*pages++ = pipe->bufs[idx].page); 988 idx = next_idx(idx, pipe); 989 n -= PAGE_SIZE; 990 } 991 992 return maxsize; 993 } 994 995 static ssize_t pipe_get_pages(struct iov_iter *i, 996 struct page **pages, size_t maxsize, unsigned maxpages, 997 size_t *start) 998 { 999 unsigned npages; 1000 size_t capacity; 1001 int idx; 1002 1003 if (!maxsize) 1004 return 0; 1005 1006 if (!sanity(i)) 1007 return -EFAULT; 1008 1009 data_start(i, &idx, start); 1010 /* some of this one + all after this one */ 1011 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 1012 capacity = min(npages,maxpages) * PAGE_SIZE - *start; 1013 1014 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); 1015 } 1016 1017 ssize_t iov_iter_get_pages(struct iov_iter *i, 1018 struct page **pages, size_t maxsize, unsigned maxpages, 1019 size_t *start) 1020 { 1021 if (maxsize > i->count) 1022 maxsize = i->count; 1023 1024 if (unlikely(i->type & ITER_PIPE)) 1025 return pipe_get_pages(i, pages, maxsize, maxpages, start); 1026 iterate_all_kinds(i, maxsize, v, ({ 1027 unsigned long addr = (unsigned long)v.iov_base; 1028 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1029 int n; 1030 int res; 1031 1032 if (len > maxpages * PAGE_SIZE) 1033 len = maxpages * PAGE_SIZE; 1034 addr &= ~(PAGE_SIZE - 1); 1035 n = DIV_ROUND_UP(len, PAGE_SIZE); 1036 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); 1037 if (unlikely(res < 0)) 1038 return res; 1039 return (res == n ? len : res * PAGE_SIZE) - *start; 1040 0;}),({ 1041 /* can't be more than PAGE_SIZE */ 1042 *start = v.bv_offset; 1043 get_page(*pages = v.bv_page); 1044 return v.bv_len; 1045 }),({ 1046 return -EFAULT; 1047 }) 1048 ) 1049 return 0; 1050 } 1051 EXPORT_SYMBOL(iov_iter_get_pages); 1052 1053 static struct page **get_pages_array(size_t n) 1054 { 1055 return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL); 1056 } 1057 1058 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 1059 struct page ***pages, size_t maxsize, 1060 size_t *start) 1061 { 1062 struct page **p; 1063 size_t n; 1064 int idx; 1065 int npages; 1066 1067 if (!maxsize) 1068 return 0; 1069 1070 if (!sanity(i)) 1071 return -EFAULT; 1072 1073 data_start(i, &idx, start); 1074 /* some of this one + all after this one */ 1075 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 1076 n = npages * PAGE_SIZE - *start; 1077 if (maxsize > n) 1078 maxsize = n; 1079 else 1080 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 1081 p = get_pages_array(npages); 1082 if (!p) 1083 return -ENOMEM; 1084 n = __pipe_get_pages(i, maxsize, p, idx, start); 1085 if (n > 0) 1086 *pages = p; 1087 else 1088 kvfree(p); 1089 return n; 1090 } 1091 1092 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1093 struct page ***pages, size_t maxsize, 1094 size_t *start) 1095 { 1096 struct page **p; 1097 1098 if (maxsize > i->count) 1099 maxsize = i->count; 1100 1101 if (unlikely(i->type & ITER_PIPE)) 1102 return pipe_get_pages_alloc(i, pages, maxsize, start); 1103 iterate_all_kinds(i, maxsize, v, ({ 1104 unsigned long addr = (unsigned long)v.iov_base; 1105 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1106 int n; 1107 int res; 1108 1109 addr &= ~(PAGE_SIZE - 1); 1110 n = DIV_ROUND_UP(len, PAGE_SIZE); 1111 p = get_pages_array(n); 1112 if (!p) 1113 return -ENOMEM; 1114 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); 1115 if (unlikely(res < 0)) { 1116 kvfree(p); 1117 return res; 1118 } 1119 *pages = p; 1120 return (res == n ? len : res * PAGE_SIZE) - *start; 1121 0;}),({ 1122 /* can't be more than PAGE_SIZE */ 1123 *start = v.bv_offset; 1124 *pages = p = get_pages_array(1); 1125 if (!p) 1126 return -ENOMEM; 1127 get_page(*p = v.bv_page); 1128 return v.bv_len; 1129 }),({ 1130 return -EFAULT; 1131 }) 1132 ) 1133 return 0; 1134 } 1135 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1136 1137 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1138 struct iov_iter *i) 1139 { 1140 char *to = addr; 1141 __wsum sum, next; 1142 size_t off = 0; 1143 sum = *csum; 1144 if (unlikely(i->type & ITER_PIPE)) { 1145 WARN_ON(1); 1146 return 0; 1147 } 1148 iterate_and_advance(i, bytes, v, ({ 1149 int err = 0; 1150 next = csum_and_copy_from_user(v.iov_base, 1151 (to += v.iov_len) - v.iov_len, 1152 v.iov_len, 0, &err); 1153 if (!err) { 1154 sum = csum_block_add(sum, next, off); 1155 off += v.iov_len; 1156 } 1157 err ? v.iov_len : 0; 1158 }), ({ 1159 char *p = kmap_atomic(v.bv_page); 1160 next = csum_partial_copy_nocheck(p + v.bv_offset, 1161 (to += v.bv_len) - v.bv_len, 1162 v.bv_len, 0); 1163 kunmap_atomic(p); 1164 sum = csum_block_add(sum, next, off); 1165 off += v.bv_len; 1166 }),({ 1167 next = csum_partial_copy_nocheck(v.iov_base, 1168 (to += v.iov_len) - v.iov_len, 1169 v.iov_len, 0); 1170 sum = csum_block_add(sum, next, off); 1171 off += v.iov_len; 1172 }) 1173 ) 1174 *csum = sum; 1175 return bytes; 1176 } 1177 EXPORT_SYMBOL(csum_and_copy_from_iter); 1178 1179 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1180 struct iov_iter *i) 1181 { 1182 char *to = addr; 1183 __wsum sum, next; 1184 size_t off = 0; 1185 sum = *csum; 1186 if (unlikely(i->type & ITER_PIPE)) { 1187 WARN_ON(1); 1188 return false; 1189 } 1190 if (unlikely(i->count < bytes)) 1191 return false; 1192 iterate_all_kinds(i, bytes, v, ({ 1193 int err = 0; 1194 next = csum_and_copy_from_user(v.iov_base, 1195 (to += v.iov_len) - v.iov_len, 1196 v.iov_len, 0, &err); 1197 if (err) 1198 return false; 1199 sum = csum_block_add(sum, next, off); 1200 off += v.iov_len; 1201 0; 1202 }), ({ 1203 char *p = kmap_atomic(v.bv_page); 1204 next = csum_partial_copy_nocheck(p + v.bv_offset, 1205 (to += v.bv_len) - v.bv_len, 1206 v.bv_len, 0); 1207 kunmap_atomic(p); 1208 sum = csum_block_add(sum, next, off); 1209 off += v.bv_len; 1210 }),({ 1211 next = csum_partial_copy_nocheck(v.iov_base, 1212 (to += v.iov_len) - v.iov_len, 1213 v.iov_len, 0); 1214 sum = csum_block_add(sum, next, off); 1215 off += v.iov_len; 1216 }) 1217 ) 1218 *csum = sum; 1219 iov_iter_advance(i, bytes); 1220 return true; 1221 } 1222 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1223 1224 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, 1225 struct iov_iter *i) 1226 { 1227 const char *from = addr; 1228 __wsum sum, next; 1229 size_t off = 0; 1230 sum = *csum; 1231 if (unlikely(i->type & ITER_PIPE)) { 1232 WARN_ON(1); /* for now */ 1233 return 0; 1234 } 1235 iterate_and_advance(i, bytes, v, ({ 1236 int err = 0; 1237 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1238 v.iov_base, 1239 v.iov_len, 0, &err); 1240 if (!err) { 1241 sum = csum_block_add(sum, next, off); 1242 off += v.iov_len; 1243 } 1244 err ? v.iov_len : 0; 1245 }), ({ 1246 char *p = kmap_atomic(v.bv_page); 1247 next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, 1248 p + v.bv_offset, 1249 v.bv_len, 0); 1250 kunmap_atomic(p); 1251 sum = csum_block_add(sum, next, off); 1252 off += v.bv_len; 1253 }),({ 1254 next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, 1255 v.iov_base, 1256 v.iov_len, 0); 1257 sum = csum_block_add(sum, next, off); 1258 off += v.iov_len; 1259 }) 1260 ) 1261 *csum = sum; 1262 return bytes; 1263 } 1264 EXPORT_SYMBOL(csum_and_copy_to_iter); 1265 1266 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1267 { 1268 size_t size = i->count; 1269 int npages = 0; 1270 1271 if (!size) 1272 return 0; 1273 1274 if (unlikely(i->type & ITER_PIPE)) { 1275 struct pipe_inode_info *pipe = i->pipe; 1276 size_t off; 1277 int idx; 1278 1279 if (!sanity(i)) 1280 return 0; 1281 1282 data_start(i, &idx, &off); 1283 /* some of this one + all after this one */ 1284 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; 1285 if (npages >= maxpages) 1286 return maxpages; 1287 } else iterate_all_kinds(i, size, v, ({ 1288 unsigned long p = (unsigned long)v.iov_base; 1289 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1290 - p / PAGE_SIZE; 1291 if (npages >= maxpages) 1292 return maxpages; 1293 0;}),({ 1294 npages++; 1295 if (npages >= maxpages) 1296 return maxpages; 1297 }),({ 1298 unsigned long p = (unsigned long)v.iov_base; 1299 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1300 - p / PAGE_SIZE; 1301 if (npages >= maxpages) 1302 return maxpages; 1303 }) 1304 ) 1305 return npages; 1306 } 1307 EXPORT_SYMBOL(iov_iter_npages); 1308 1309 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1310 { 1311 *new = *old; 1312 if (unlikely(new->type & ITER_PIPE)) { 1313 WARN_ON(1); 1314 return NULL; 1315 } 1316 if (new->type & ITER_BVEC) 1317 return new->bvec = kmemdup(new->bvec, 1318 new->nr_segs * sizeof(struct bio_vec), 1319 flags); 1320 else 1321 /* iovec and kvec have identical layout */ 1322 return new->iov = kmemdup(new->iov, 1323 new->nr_segs * sizeof(struct iovec), 1324 flags); 1325 } 1326 EXPORT_SYMBOL(dup_iter); 1327 1328 /** 1329 * import_iovec() - Copy an array of &struct iovec from userspace 1330 * into the kernel, check that it is valid, and initialize a new 1331 * &struct iov_iter iterator to access it. 1332 * 1333 * @type: One of %READ or %WRITE. 1334 * @uvector: Pointer to the userspace array. 1335 * @nr_segs: Number of elements in userspace array. 1336 * @fast_segs: Number of elements in @iov. 1337 * @iov: (input and output parameter) Pointer to pointer to (usually small 1338 * on-stack) kernel array. 1339 * @i: Pointer to iterator that will be initialized on success. 1340 * 1341 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1342 * then this function places %NULL in *@iov on return. Otherwise, a new 1343 * array will be allocated and the result placed in *@iov. This means that 1344 * the caller may call kfree() on *@iov regardless of whether the small 1345 * on-stack array was used or not (and regardless of whether this function 1346 * returns an error or not). 1347 * 1348 * Return: 0 on success or negative error code on error. 1349 */ 1350 int import_iovec(int type, const struct iovec __user * uvector, 1351 unsigned nr_segs, unsigned fast_segs, 1352 struct iovec **iov, struct iov_iter *i) 1353 { 1354 ssize_t n; 1355 struct iovec *p; 1356 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1357 *iov, &p); 1358 if (n < 0) { 1359 if (p != *iov) 1360 kfree(p); 1361 *iov = NULL; 1362 return n; 1363 } 1364 iov_iter_init(i, type, p, nr_segs, n); 1365 *iov = p == *iov ? NULL : p; 1366 return 0; 1367 } 1368 EXPORT_SYMBOL(import_iovec); 1369 1370 #ifdef CONFIG_COMPAT 1371 #include <linux/compat.h> 1372 1373 int compat_import_iovec(int type, const struct compat_iovec __user * uvector, 1374 unsigned nr_segs, unsigned fast_segs, 1375 struct iovec **iov, struct iov_iter *i) 1376 { 1377 ssize_t n; 1378 struct iovec *p; 1379 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1380 *iov, &p); 1381 if (n < 0) { 1382 if (p != *iov) 1383 kfree(p); 1384 *iov = NULL; 1385 return n; 1386 } 1387 iov_iter_init(i, type, p, nr_segs, n); 1388 *iov = p == *iov ? NULL : p; 1389 return 0; 1390 } 1391 #endif 1392 1393 int import_single_range(int rw, void __user *buf, size_t len, 1394 struct iovec *iov, struct iov_iter *i) 1395 { 1396 if (len > MAX_RW_COUNT) 1397 len = MAX_RW_COUNT; 1398 if (unlikely(!access_ok(!rw, buf, len))) 1399 return -EFAULT; 1400 1401 iov->iov_base = buf; 1402 iov->iov_len = len; 1403 iov_iter_init(i, rw, iov, 1, len); 1404 return 0; 1405 } 1406 EXPORT_SYMBOL(import_single_range); 1407