1 #include <linux/export.h> 2 #include <linux/uio.h> 3 #include <linux/pagemap.h> 4 #include <linux/slab.h> 5 #include <linux/vmalloc.h> 6 #include <linux/splice.h> 7 #include <net/checksum.h> 8 9 #define PIPE_PARANOIA /* for now */ 10 11 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 12 size_t left; \ 13 size_t wanted = n; \ 14 __p = i->iov; \ 15 __v.iov_len = min(n, __p->iov_len - skip); \ 16 if (likely(__v.iov_len)) { \ 17 __v.iov_base = __p->iov_base + skip; \ 18 left = (STEP); \ 19 __v.iov_len -= left; \ 20 skip += __v.iov_len; \ 21 n -= __v.iov_len; \ 22 } else { \ 23 left = 0; \ 24 } \ 25 while (unlikely(!left && n)) { \ 26 __p++; \ 27 __v.iov_len = min(n, __p->iov_len); \ 28 if (unlikely(!__v.iov_len)) \ 29 continue; \ 30 __v.iov_base = __p->iov_base; \ 31 left = (STEP); \ 32 __v.iov_len -= left; \ 33 skip = __v.iov_len; \ 34 n -= __v.iov_len; \ 35 } \ 36 n = wanted - n; \ 37 } 38 39 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 40 size_t wanted = n; \ 41 __p = i->kvec; \ 42 __v.iov_len = min(n, __p->iov_len - skip); \ 43 if (likely(__v.iov_len)) { \ 44 __v.iov_base = __p->iov_base + skip; \ 45 (void)(STEP); \ 46 skip += __v.iov_len; \ 47 n -= __v.iov_len; \ 48 } \ 49 while (unlikely(n)) { \ 50 __p++; \ 51 __v.iov_len = min(n, __p->iov_len); \ 52 if (unlikely(!__v.iov_len)) \ 53 continue; \ 54 __v.iov_base = __p->iov_base; \ 55 (void)(STEP); \ 56 skip = __v.iov_len; \ 57 n -= __v.iov_len; \ 58 } \ 59 n = wanted; \ 60 } 61 62 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 63 struct bvec_iter __start; \ 64 __start.bi_size = n; \ 65 __start.bi_bvec_done = skip; \ 66 __start.bi_idx = 0; \ 67 for_each_bvec(__v, i->bvec, __bi, __start) { \ 68 if (!__v.bv_len) \ 69 continue; \ 70 (void)(STEP); \ 71 } \ 72 } 73 74 #define iterate_all_kinds(i, n, v, I, B, K) { \ 75 size_t skip = i->iov_offset; \ 76 if (unlikely(i->type & ITER_BVEC)) { \ 77 struct bio_vec v; \ 78 struct bvec_iter __bi; \ 79 iterate_bvec(i, n, v, __bi, skip, (B)) \ 80 } else if (unlikely(i->type & ITER_KVEC)) { \ 81 const struct kvec *kvec; \ 82 struct kvec v; \ 83 iterate_kvec(i, n, v, kvec, skip, (K)) \ 84 } else { \ 85 const struct iovec *iov; \ 86 struct iovec v; \ 87 iterate_iovec(i, n, v, iov, skip, (I)) \ 88 } \ 89 } 90 91 #define iterate_and_advance(i, n, v, I, B, K) { \ 92 if (unlikely(i->count < n)) \ 93 n = i->count; \ 94 if (i->count) { \ 95 size_t skip = i->iov_offset; \ 96 if (unlikely(i->type & ITER_BVEC)) { \ 97 const struct bio_vec *bvec = i->bvec; \ 98 struct bio_vec v; \ 99 struct bvec_iter __bi; \ 100 iterate_bvec(i, n, v, __bi, skip, (B)) \ 101 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 102 i->nr_segs -= i->bvec - bvec; \ 103 skip = __bi.bi_bvec_done; \ 104 } else if (unlikely(i->type & ITER_KVEC)) { \ 105 const struct kvec *kvec; \ 106 struct kvec v; \ 107 iterate_kvec(i, n, v, kvec, skip, (K)) \ 108 if (skip == kvec->iov_len) { \ 109 kvec++; \ 110 skip = 0; \ 111 } \ 112 i->nr_segs -= kvec - i->kvec; \ 113 i->kvec = kvec; \ 114 } else { \ 115 const struct iovec *iov; \ 116 struct iovec v; \ 117 iterate_iovec(i, n, v, iov, skip, (I)) \ 118 if (skip == iov->iov_len) { \ 119 iov++; \ 120 skip = 0; \ 121 } \ 122 i->nr_segs -= iov - i->iov; \ 123 i->iov = iov; \ 124 } \ 125 i->count -= n; \ 126 i->iov_offset = skip; \ 127 } \ 128 } 129 130 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 131 struct iov_iter *i) 132 { 133 size_t skip, copy, left, wanted; 134 const struct iovec *iov; 135 char __user *buf; 136 void *kaddr, *from; 137 138 if (unlikely(bytes > i->count)) 139 bytes = i->count; 140 141 if (unlikely(!bytes)) 142 return 0; 143 144 wanted = bytes; 145 iov = i->iov; 146 skip = i->iov_offset; 147 buf = iov->iov_base + skip; 148 copy = min(bytes, iov->iov_len - skip); 149 150 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 151 kaddr = kmap_atomic(page); 152 from = kaddr + offset; 153 154 /* first chunk, usually the only one */ 155 left = __copy_to_user_inatomic(buf, from, copy); 156 copy -= left; 157 skip += copy; 158 from += copy; 159 bytes -= copy; 160 161 while (unlikely(!left && bytes)) { 162 iov++; 163 buf = iov->iov_base; 164 copy = min(bytes, iov->iov_len); 165 left = __copy_to_user_inatomic(buf, from, copy); 166 copy -= left; 167 skip = copy; 168 from += copy; 169 bytes -= copy; 170 } 171 if (likely(!bytes)) { 172 kunmap_atomic(kaddr); 173 goto done; 174 } 175 offset = from - kaddr; 176 buf += copy; 177 kunmap_atomic(kaddr); 178 copy = min(bytes, iov->iov_len - skip); 179 } 180 /* Too bad - revert to non-atomic kmap */ 181 182 kaddr = kmap(page); 183 from = kaddr + offset; 184 left = __copy_to_user(buf, from, copy); 185 copy -= left; 186 skip += copy; 187 from += copy; 188 bytes -= copy; 189 while (unlikely(!left && bytes)) { 190 iov++; 191 buf = iov->iov_base; 192 copy = min(bytes, iov->iov_len); 193 left = __copy_to_user(buf, from, copy); 194 copy -= left; 195 skip = copy; 196 from += copy; 197 bytes -= copy; 198 } 199 kunmap(page); 200 201 done: 202 if (skip == iov->iov_len) { 203 iov++; 204 skip = 0; 205 } 206 i->count -= wanted - bytes; 207 i->nr_segs -= iov - i->iov; 208 i->iov = iov; 209 i->iov_offset = skip; 210 return wanted - bytes; 211 } 212 213 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 214 struct iov_iter *i) 215 { 216 size_t skip, copy, left, wanted; 217 const struct iovec *iov; 218 char __user *buf; 219 void *kaddr, *to; 220 221 if (unlikely(bytes > i->count)) 222 bytes = i->count; 223 224 if (unlikely(!bytes)) 225 return 0; 226 227 wanted = bytes; 228 iov = i->iov; 229 skip = i->iov_offset; 230 buf = iov->iov_base + skip; 231 copy = min(bytes, iov->iov_len - skip); 232 233 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 234 kaddr = kmap_atomic(page); 235 to = kaddr + offset; 236 237 /* first chunk, usually the only one */ 238 left = __copy_from_user_inatomic(to, buf, copy); 239 copy -= left; 240 skip += copy; 241 to += copy; 242 bytes -= copy; 243 244 while (unlikely(!left && bytes)) { 245 iov++; 246 buf = iov->iov_base; 247 copy = min(bytes, iov->iov_len); 248 left = __copy_from_user_inatomic(to, buf, copy); 249 copy -= left; 250 skip = copy; 251 to += copy; 252 bytes -= copy; 253 } 254 if (likely(!bytes)) { 255 kunmap_atomic(kaddr); 256 goto done; 257 } 258 offset = to - kaddr; 259 buf += copy; 260 kunmap_atomic(kaddr); 261 copy = min(bytes, iov->iov_len - skip); 262 } 263 /* Too bad - revert to non-atomic kmap */ 264 265 kaddr = kmap(page); 266 to = kaddr + offset; 267 left = __copy_from_user(to, buf, copy); 268 copy -= left; 269 skip += copy; 270 to += copy; 271 bytes -= copy; 272 while (unlikely(!left && bytes)) { 273 iov++; 274 buf = iov->iov_base; 275 copy = min(bytes, iov->iov_len); 276 left = __copy_from_user(to, buf, copy); 277 copy -= left; 278 skip = copy; 279 to += copy; 280 bytes -= copy; 281 } 282 kunmap(page); 283 284 done: 285 if (skip == iov->iov_len) { 286 iov++; 287 skip = 0; 288 } 289 i->count -= wanted - bytes; 290 i->nr_segs -= iov - i->iov; 291 i->iov = iov; 292 i->iov_offset = skip; 293 return wanted - bytes; 294 } 295 296 #ifdef PIPE_PARANOIA 297 static bool sanity(const struct iov_iter *i) 298 { 299 struct pipe_inode_info *pipe = i->pipe; 300 int idx = i->idx; 301 int next = pipe->curbuf + pipe->nrbufs; 302 if (i->iov_offset) { 303 struct pipe_buffer *p; 304 if (unlikely(!pipe->nrbufs)) 305 goto Bad; // pipe must be non-empty 306 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) 307 goto Bad; // must be at the last buffer... 308 309 p = &pipe->bufs[idx]; 310 if (unlikely(p->offset + p->len != i->iov_offset)) 311 goto Bad; // ... at the end of segment 312 } else { 313 if (idx != (next & (pipe->buffers - 1))) 314 goto Bad; // must be right after the last buffer 315 } 316 return true; 317 Bad: 318 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); 319 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", 320 pipe->curbuf, pipe->nrbufs, pipe->buffers); 321 for (idx = 0; idx < pipe->buffers; idx++) 322 printk(KERN_ERR "[%p %p %d %d]\n", 323 pipe->bufs[idx].ops, 324 pipe->bufs[idx].page, 325 pipe->bufs[idx].offset, 326 pipe->bufs[idx].len); 327 WARN_ON(1); 328 return false; 329 } 330 #else 331 #define sanity(i) true 332 #endif 333 334 static inline int next_idx(int idx, struct pipe_inode_info *pipe) 335 { 336 return (idx + 1) & (pipe->buffers - 1); 337 } 338 339 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 340 struct iov_iter *i) 341 { 342 struct pipe_inode_info *pipe = i->pipe; 343 struct pipe_buffer *buf; 344 size_t off; 345 int idx; 346 347 if (unlikely(bytes > i->count)) 348 bytes = i->count; 349 350 if (unlikely(!bytes)) 351 return 0; 352 353 if (!sanity(i)) 354 return 0; 355 356 off = i->iov_offset; 357 idx = i->idx; 358 buf = &pipe->bufs[idx]; 359 if (off) { 360 if (offset == off && buf->page == page) { 361 /* merge with the last one */ 362 buf->len += bytes; 363 i->iov_offset += bytes; 364 goto out; 365 } 366 idx = next_idx(idx, pipe); 367 buf = &pipe->bufs[idx]; 368 } 369 if (idx == pipe->curbuf && pipe->nrbufs) 370 return 0; 371 pipe->nrbufs++; 372 buf->ops = &page_cache_pipe_buf_ops; 373 get_page(buf->page = page); 374 buf->offset = offset; 375 buf->len = bytes; 376 i->iov_offset = offset + bytes; 377 i->idx = idx; 378 out: 379 i->count -= bytes; 380 return bytes; 381 } 382 383 /* 384 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 385 * bytes. For each iovec, fault in each page that constitutes the iovec. 386 * 387 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 388 * because it is an invalid address). 389 */ 390 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 391 { 392 size_t skip = i->iov_offset; 393 const struct iovec *iov; 394 int err; 395 struct iovec v; 396 397 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 398 iterate_iovec(i, bytes, v, iov, skip, ({ 399 err = fault_in_pages_readable(v.iov_base, v.iov_len); 400 if (unlikely(err)) 401 return err; 402 0;})) 403 } 404 return 0; 405 } 406 EXPORT_SYMBOL(iov_iter_fault_in_readable); 407 408 void iov_iter_init(struct iov_iter *i, int direction, 409 const struct iovec *iov, unsigned long nr_segs, 410 size_t count) 411 { 412 /* It will get better. Eventually... */ 413 if (segment_eq(get_fs(), KERNEL_DS)) { 414 direction |= ITER_KVEC; 415 i->type = direction; 416 i->kvec = (struct kvec *)iov; 417 } else { 418 i->type = direction; 419 i->iov = iov; 420 } 421 i->nr_segs = nr_segs; 422 i->iov_offset = 0; 423 i->count = count; 424 } 425 EXPORT_SYMBOL(iov_iter_init); 426 427 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 428 { 429 char *from = kmap_atomic(page); 430 memcpy(to, from + offset, len); 431 kunmap_atomic(from); 432 } 433 434 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 435 { 436 char *to = kmap_atomic(page); 437 memcpy(to + offset, from, len); 438 kunmap_atomic(to); 439 } 440 441 static void memzero_page(struct page *page, size_t offset, size_t len) 442 { 443 char *addr = kmap_atomic(page); 444 memset(addr + offset, 0, len); 445 kunmap_atomic(addr); 446 } 447 448 static inline bool allocated(struct pipe_buffer *buf) 449 { 450 return buf->ops == &default_pipe_buf_ops; 451 } 452 453 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) 454 { 455 size_t off = i->iov_offset; 456 int idx = i->idx; 457 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { 458 idx = next_idx(idx, i->pipe); 459 off = 0; 460 } 461 *idxp = idx; 462 *offp = off; 463 } 464 465 static size_t push_pipe(struct iov_iter *i, size_t size, 466 int *idxp, size_t *offp) 467 { 468 struct pipe_inode_info *pipe = i->pipe; 469 size_t off; 470 int idx; 471 ssize_t left; 472 473 if (unlikely(size > i->count)) 474 size = i->count; 475 if (unlikely(!size)) 476 return 0; 477 478 left = size; 479 data_start(i, &idx, &off); 480 *idxp = idx; 481 *offp = off; 482 if (off) { 483 left -= PAGE_SIZE - off; 484 if (left <= 0) { 485 pipe->bufs[idx].len += size; 486 return size; 487 } 488 pipe->bufs[idx].len = PAGE_SIZE; 489 idx = next_idx(idx, pipe); 490 } 491 while (idx != pipe->curbuf || !pipe->nrbufs) { 492 struct page *page = alloc_page(GFP_USER); 493 if (!page) 494 break; 495 pipe->nrbufs++; 496 pipe->bufs[idx].ops = &default_pipe_buf_ops; 497 pipe->bufs[idx].page = page; 498 pipe->bufs[idx].offset = 0; 499 if (left <= PAGE_SIZE) { 500 pipe->bufs[idx].len = left; 501 return size; 502 } 503 pipe->bufs[idx].len = PAGE_SIZE; 504 left -= PAGE_SIZE; 505 idx = next_idx(idx, pipe); 506 } 507 return size - left; 508 } 509 510 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 511 struct iov_iter *i) 512 { 513 struct pipe_inode_info *pipe = i->pipe; 514 size_t n, off; 515 int idx; 516 517 if (!sanity(i)) 518 return 0; 519 520 bytes = n = push_pipe(i, bytes, &idx, &off); 521 if (unlikely(!n)) 522 return 0; 523 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 524 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 525 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); 526 i->idx = idx; 527 i->iov_offset = off + chunk; 528 n -= chunk; 529 addr += chunk; 530 } 531 i->count -= bytes; 532 return bytes; 533 } 534 535 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 536 { 537 const char *from = addr; 538 if (unlikely(i->type & ITER_PIPE)) 539 return copy_pipe_to_iter(addr, bytes, i); 540 iterate_and_advance(i, bytes, v, 541 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, 542 v.iov_len), 543 memcpy_to_page(v.bv_page, v.bv_offset, 544 (from += v.bv_len) - v.bv_len, v.bv_len), 545 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 546 ) 547 548 return bytes; 549 } 550 EXPORT_SYMBOL(copy_to_iter); 551 552 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 553 { 554 char *to = addr; 555 if (unlikely(i->type & ITER_PIPE)) { 556 WARN_ON(1); 557 return 0; 558 } 559 iterate_and_advance(i, bytes, v, 560 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, 561 v.iov_len), 562 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 563 v.bv_offset, v.bv_len), 564 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 565 ) 566 567 return bytes; 568 } 569 EXPORT_SYMBOL(copy_from_iter); 570 571 bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) 572 { 573 char *to = addr; 574 if (unlikely(i->type & ITER_PIPE)) { 575 WARN_ON(1); 576 return false; 577 } 578 if (unlikely(i->count < bytes)) \ 579 return false; 580 581 iterate_all_kinds(i, bytes, v, ({ 582 if (__copy_from_user((to += v.iov_len) - v.iov_len, 583 v.iov_base, v.iov_len)) 584 return false; 585 0;}), 586 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 587 v.bv_offset, v.bv_len), 588 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 589 ) 590 591 iov_iter_advance(i, bytes); 592 return true; 593 } 594 EXPORT_SYMBOL(copy_from_iter_full); 595 596 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 597 { 598 char *to = addr; 599 if (unlikely(i->type & ITER_PIPE)) { 600 WARN_ON(1); 601 return 0; 602 } 603 iterate_and_advance(i, bytes, v, 604 __copy_from_user_nocache((to += v.iov_len) - v.iov_len, 605 v.iov_base, v.iov_len), 606 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 607 v.bv_offset, v.bv_len), 608 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 609 ) 610 611 return bytes; 612 } 613 EXPORT_SYMBOL(copy_from_iter_nocache); 614 615 bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) 616 { 617 char *to = addr; 618 if (unlikely(i->type & ITER_PIPE)) { 619 WARN_ON(1); 620 return false; 621 } 622 if (unlikely(i->count < bytes)) \ 623 return false; 624 iterate_all_kinds(i, bytes, v, ({ 625 if (__copy_from_user_nocache((to += v.iov_len) - v.iov_len, 626 v.iov_base, v.iov_len)) 627 return false; 628 0;}), 629 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 630 v.bv_offset, v.bv_len), 631 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 632 ) 633 634 iov_iter_advance(i, bytes); 635 return true; 636 } 637 EXPORT_SYMBOL(copy_from_iter_full_nocache); 638 639 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 640 struct iov_iter *i) 641 { 642 if (i->type & (ITER_BVEC|ITER_KVEC)) { 643 void *kaddr = kmap_atomic(page); 644 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 645 kunmap_atomic(kaddr); 646 return wanted; 647 } else if (likely(!(i->type & ITER_PIPE))) 648 return copy_page_to_iter_iovec(page, offset, bytes, i); 649 else 650 return copy_page_to_iter_pipe(page, offset, bytes, i); 651 } 652 EXPORT_SYMBOL(copy_page_to_iter); 653 654 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 655 struct iov_iter *i) 656 { 657 if (unlikely(i->type & ITER_PIPE)) { 658 WARN_ON(1); 659 return 0; 660 } 661 if (i->type & (ITER_BVEC|ITER_KVEC)) { 662 void *kaddr = kmap_atomic(page); 663 size_t wanted = copy_from_iter(kaddr + offset, bytes, i); 664 kunmap_atomic(kaddr); 665 return wanted; 666 } else 667 return copy_page_from_iter_iovec(page, offset, bytes, i); 668 } 669 EXPORT_SYMBOL(copy_page_from_iter); 670 671 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 672 { 673 struct pipe_inode_info *pipe = i->pipe; 674 size_t n, off; 675 int idx; 676 677 if (!sanity(i)) 678 return 0; 679 680 bytes = n = push_pipe(i, bytes, &idx, &off); 681 if (unlikely(!n)) 682 return 0; 683 684 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 685 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 686 memzero_page(pipe->bufs[idx].page, off, chunk); 687 i->idx = idx; 688 i->iov_offset = off + chunk; 689 n -= chunk; 690 } 691 i->count -= bytes; 692 return bytes; 693 } 694 695 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 696 { 697 if (unlikely(i->type & ITER_PIPE)) 698 return pipe_zero(bytes, i); 699 iterate_and_advance(i, bytes, v, 700 __clear_user(v.iov_base, v.iov_len), 701 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 702 memset(v.iov_base, 0, v.iov_len) 703 ) 704 705 return bytes; 706 } 707 EXPORT_SYMBOL(iov_iter_zero); 708 709 size_t iov_iter_copy_from_user_atomic(struct page *page, 710 struct iov_iter *i, unsigned long offset, size_t bytes) 711 { 712 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 713 if (unlikely(i->type & ITER_PIPE)) { 714 kunmap_atomic(kaddr); 715 WARN_ON(1); 716 return 0; 717 } 718 iterate_all_kinds(i, bytes, v, 719 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, 720 v.iov_base, v.iov_len), 721 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 722 v.bv_offset, v.bv_len), 723 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 724 ) 725 kunmap_atomic(kaddr); 726 return bytes; 727 } 728 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 729 730 static void pipe_advance(struct iov_iter *i, size_t size) 731 { 732 struct pipe_inode_info *pipe = i->pipe; 733 struct pipe_buffer *buf; 734 int idx = i->idx; 735 size_t off = i->iov_offset, orig_sz; 736 737 if (unlikely(i->count < size)) 738 size = i->count; 739 orig_sz = size; 740 741 if (size) { 742 if (off) /* make it relative to the beginning of buffer */ 743 size += off - pipe->bufs[idx].offset; 744 while (1) { 745 buf = &pipe->bufs[idx]; 746 if (size <= buf->len) 747 break; 748 size -= buf->len; 749 idx = next_idx(idx, pipe); 750 } 751 buf->len = size; 752 i->idx = idx; 753 off = i->iov_offset = buf->offset + size; 754 } 755 if (off) 756 idx = next_idx(idx, pipe); 757 if (pipe->nrbufs) { 758 int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 759 /* [curbuf,unused) is in use. Free [idx,unused) */ 760 while (idx != unused) { 761 pipe_buf_release(pipe, &pipe->bufs[idx]); 762 idx = next_idx(idx, pipe); 763 pipe->nrbufs--; 764 } 765 } 766 i->count -= orig_sz; 767 } 768 769 void iov_iter_advance(struct iov_iter *i, size_t size) 770 { 771 if (unlikely(i->type & ITER_PIPE)) { 772 pipe_advance(i, size); 773 return; 774 } 775 iterate_and_advance(i, size, v, 0, 0, 0) 776 } 777 EXPORT_SYMBOL(iov_iter_advance); 778 779 /* 780 * Return the count of just the current iov_iter segment. 781 */ 782 size_t iov_iter_single_seg_count(const struct iov_iter *i) 783 { 784 if (unlikely(i->type & ITER_PIPE)) 785 return i->count; // it is a silly place, anyway 786 if (i->nr_segs == 1) 787 return i->count; 788 else if (i->type & ITER_BVEC) 789 return min(i->count, i->bvec->bv_len - i->iov_offset); 790 else 791 return min(i->count, i->iov->iov_len - i->iov_offset); 792 } 793 EXPORT_SYMBOL(iov_iter_single_seg_count); 794 795 void iov_iter_kvec(struct iov_iter *i, int direction, 796 const struct kvec *kvec, unsigned long nr_segs, 797 size_t count) 798 { 799 BUG_ON(!(direction & ITER_KVEC)); 800 i->type = direction; 801 i->kvec = kvec; 802 i->nr_segs = nr_segs; 803 i->iov_offset = 0; 804 i->count = count; 805 } 806 EXPORT_SYMBOL(iov_iter_kvec); 807 808 void iov_iter_bvec(struct iov_iter *i, int direction, 809 const struct bio_vec *bvec, unsigned long nr_segs, 810 size_t count) 811 { 812 BUG_ON(!(direction & ITER_BVEC)); 813 i->type = direction; 814 i->bvec = bvec; 815 i->nr_segs = nr_segs; 816 i->iov_offset = 0; 817 i->count = count; 818 } 819 EXPORT_SYMBOL(iov_iter_bvec); 820 821 void iov_iter_pipe(struct iov_iter *i, int direction, 822 struct pipe_inode_info *pipe, 823 size_t count) 824 { 825 BUG_ON(direction != ITER_PIPE); 826 i->type = direction; 827 i->pipe = pipe; 828 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 829 i->iov_offset = 0; 830 i->count = count; 831 } 832 EXPORT_SYMBOL(iov_iter_pipe); 833 834 unsigned long iov_iter_alignment(const struct iov_iter *i) 835 { 836 unsigned long res = 0; 837 size_t size = i->count; 838 839 if (!size) 840 return 0; 841 842 if (unlikely(i->type & ITER_PIPE)) { 843 if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) 844 return size | i->iov_offset; 845 return size; 846 } 847 iterate_all_kinds(i, size, v, 848 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 849 res |= v.bv_offset | v.bv_len, 850 res |= (unsigned long)v.iov_base | v.iov_len 851 ) 852 return res; 853 } 854 EXPORT_SYMBOL(iov_iter_alignment); 855 856 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 857 { 858 unsigned long res = 0; 859 size_t size = i->count; 860 if (!size) 861 return 0; 862 863 if (unlikely(i->type & ITER_PIPE)) { 864 WARN_ON(1); 865 return ~0U; 866 } 867 868 iterate_all_kinds(i, size, v, 869 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 870 (size != v.iov_len ? size : 0), 0), 871 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 872 (size != v.bv_len ? size : 0)), 873 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 874 (size != v.iov_len ? size : 0)) 875 ); 876 return res; 877 } 878 EXPORT_SYMBOL(iov_iter_gap_alignment); 879 880 static inline size_t __pipe_get_pages(struct iov_iter *i, 881 size_t maxsize, 882 struct page **pages, 883 int idx, 884 size_t *start) 885 { 886 struct pipe_inode_info *pipe = i->pipe; 887 ssize_t n = push_pipe(i, maxsize, &idx, start); 888 if (!n) 889 return -EFAULT; 890 891 maxsize = n; 892 n += *start; 893 while (n > 0) { 894 get_page(*pages++ = pipe->bufs[idx].page); 895 idx = next_idx(idx, pipe); 896 n -= PAGE_SIZE; 897 } 898 899 return maxsize; 900 } 901 902 static ssize_t pipe_get_pages(struct iov_iter *i, 903 struct page **pages, size_t maxsize, unsigned maxpages, 904 size_t *start) 905 { 906 unsigned npages; 907 size_t capacity; 908 int idx; 909 910 if (!sanity(i)) 911 return -EFAULT; 912 913 data_start(i, &idx, start); 914 /* some of this one + all after this one */ 915 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 916 capacity = min(npages,maxpages) * PAGE_SIZE - *start; 917 918 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); 919 } 920 921 ssize_t iov_iter_get_pages(struct iov_iter *i, 922 struct page **pages, size_t maxsize, unsigned maxpages, 923 size_t *start) 924 { 925 if (maxsize > i->count) 926 maxsize = i->count; 927 928 if (!maxsize) 929 return 0; 930 931 if (unlikely(i->type & ITER_PIPE)) 932 return pipe_get_pages(i, pages, maxsize, maxpages, start); 933 iterate_all_kinds(i, maxsize, v, ({ 934 unsigned long addr = (unsigned long)v.iov_base; 935 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 936 int n; 937 int res; 938 939 if (len > maxpages * PAGE_SIZE) 940 len = maxpages * PAGE_SIZE; 941 addr &= ~(PAGE_SIZE - 1); 942 n = DIV_ROUND_UP(len, PAGE_SIZE); 943 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); 944 if (unlikely(res < 0)) 945 return res; 946 return (res == n ? len : res * PAGE_SIZE) - *start; 947 0;}),({ 948 /* can't be more than PAGE_SIZE */ 949 *start = v.bv_offset; 950 get_page(*pages = v.bv_page); 951 return v.bv_len; 952 }),({ 953 return -EFAULT; 954 }) 955 ) 956 return 0; 957 } 958 EXPORT_SYMBOL(iov_iter_get_pages); 959 960 static struct page **get_pages_array(size_t n) 961 { 962 struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); 963 if (!p) 964 p = vmalloc(n * sizeof(struct page *)); 965 return p; 966 } 967 968 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 969 struct page ***pages, size_t maxsize, 970 size_t *start) 971 { 972 struct page **p; 973 size_t n; 974 int idx; 975 int npages; 976 977 if (!sanity(i)) 978 return -EFAULT; 979 980 data_start(i, &idx, start); 981 /* some of this one + all after this one */ 982 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 983 n = npages * PAGE_SIZE - *start; 984 if (maxsize > n) 985 maxsize = n; 986 else 987 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 988 p = get_pages_array(npages); 989 if (!p) 990 return -ENOMEM; 991 n = __pipe_get_pages(i, maxsize, p, idx, start); 992 if (n > 0) 993 *pages = p; 994 else 995 kvfree(p); 996 return n; 997 } 998 999 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 1000 struct page ***pages, size_t maxsize, 1001 size_t *start) 1002 { 1003 struct page **p; 1004 1005 if (maxsize > i->count) 1006 maxsize = i->count; 1007 1008 if (!maxsize) 1009 return 0; 1010 1011 if (unlikely(i->type & ITER_PIPE)) 1012 return pipe_get_pages_alloc(i, pages, maxsize, start); 1013 iterate_all_kinds(i, maxsize, v, ({ 1014 unsigned long addr = (unsigned long)v.iov_base; 1015 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 1016 int n; 1017 int res; 1018 1019 addr &= ~(PAGE_SIZE - 1); 1020 n = DIV_ROUND_UP(len, PAGE_SIZE); 1021 p = get_pages_array(n); 1022 if (!p) 1023 return -ENOMEM; 1024 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); 1025 if (unlikely(res < 0)) { 1026 kvfree(p); 1027 return res; 1028 } 1029 *pages = p; 1030 return (res == n ? len : res * PAGE_SIZE) - *start; 1031 0;}),({ 1032 /* can't be more than PAGE_SIZE */ 1033 *start = v.bv_offset; 1034 *pages = p = get_pages_array(1); 1035 if (!p) 1036 return -ENOMEM; 1037 get_page(*p = v.bv_page); 1038 return v.bv_len; 1039 }),({ 1040 return -EFAULT; 1041 }) 1042 ) 1043 return 0; 1044 } 1045 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 1046 1047 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1048 struct iov_iter *i) 1049 { 1050 char *to = addr; 1051 __wsum sum, next; 1052 size_t off = 0; 1053 sum = *csum; 1054 if (unlikely(i->type & ITER_PIPE)) { 1055 WARN_ON(1); 1056 return 0; 1057 } 1058 iterate_and_advance(i, bytes, v, ({ 1059 int err = 0; 1060 next = csum_and_copy_from_user(v.iov_base, 1061 (to += v.iov_len) - v.iov_len, 1062 v.iov_len, 0, &err); 1063 if (!err) { 1064 sum = csum_block_add(sum, next, off); 1065 off += v.iov_len; 1066 } 1067 err ? v.iov_len : 0; 1068 }), ({ 1069 char *p = kmap_atomic(v.bv_page); 1070 next = csum_partial_copy_nocheck(p + v.bv_offset, 1071 (to += v.bv_len) - v.bv_len, 1072 v.bv_len, 0); 1073 kunmap_atomic(p); 1074 sum = csum_block_add(sum, next, off); 1075 off += v.bv_len; 1076 }),({ 1077 next = csum_partial_copy_nocheck(v.iov_base, 1078 (to += v.iov_len) - v.iov_len, 1079 v.iov_len, 0); 1080 sum = csum_block_add(sum, next, off); 1081 off += v.iov_len; 1082 }) 1083 ) 1084 *csum = sum; 1085 return bytes; 1086 } 1087 EXPORT_SYMBOL(csum_and_copy_from_iter); 1088 1089 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, 1090 struct iov_iter *i) 1091 { 1092 char *to = addr; 1093 __wsum sum, next; 1094 size_t off = 0; 1095 sum = *csum; 1096 if (unlikely(i->type & ITER_PIPE)) { 1097 WARN_ON(1); 1098 return false; 1099 } 1100 if (unlikely(i->count < bytes)) 1101 return false; 1102 iterate_all_kinds(i, bytes, v, ({ 1103 int err = 0; 1104 next = csum_and_copy_from_user(v.iov_base, 1105 (to += v.iov_len) - v.iov_len, 1106 v.iov_len, 0, &err); 1107 if (err) 1108 return false; 1109 sum = csum_block_add(sum, next, off); 1110 off += v.iov_len; 1111 0; 1112 }), ({ 1113 char *p = kmap_atomic(v.bv_page); 1114 next = csum_partial_copy_nocheck(p + v.bv_offset, 1115 (to += v.bv_len) - v.bv_len, 1116 v.bv_len, 0); 1117 kunmap_atomic(p); 1118 sum = csum_block_add(sum, next, off); 1119 off += v.bv_len; 1120 }),({ 1121 next = csum_partial_copy_nocheck(v.iov_base, 1122 (to += v.iov_len) - v.iov_len, 1123 v.iov_len, 0); 1124 sum = csum_block_add(sum, next, off); 1125 off += v.iov_len; 1126 }) 1127 ) 1128 *csum = sum; 1129 iov_iter_advance(i, bytes); 1130 return true; 1131 } 1132 EXPORT_SYMBOL(csum_and_copy_from_iter_full); 1133 1134 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, 1135 struct iov_iter *i) 1136 { 1137 const char *from = addr; 1138 __wsum sum, next; 1139 size_t off = 0; 1140 sum = *csum; 1141 if (unlikely(i->type & ITER_PIPE)) { 1142 WARN_ON(1); /* for now */ 1143 return 0; 1144 } 1145 iterate_and_advance(i, bytes, v, ({ 1146 int err = 0; 1147 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1148 v.iov_base, 1149 v.iov_len, 0, &err); 1150 if (!err) { 1151 sum = csum_block_add(sum, next, off); 1152 off += v.iov_len; 1153 } 1154 err ? v.iov_len : 0; 1155 }), ({ 1156 char *p = kmap_atomic(v.bv_page); 1157 next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, 1158 p + v.bv_offset, 1159 v.bv_len, 0); 1160 kunmap_atomic(p); 1161 sum = csum_block_add(sum, next, off); 1162 off += v.bv_len; 1163 }),({ 1164 next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, 1165 v.iov_base, 1166 v.iov_len, 0); 1167 sum = csum_block_add(sum, next, off); 1168 off += v.iov_len; 1169 }) 1170 ) 1171 *csum = sum; 1172 return bytes; 1173 } 1174 EXPORT_SYMBOL(csum_and_copy_to_iter); 1175 1176 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1177 { 1178 size_t size = i->count; 1179 int npages = 0; 1180 1181 if (!size) 1182 return 0; 1183 1184 if (unlikely(i->type & ITER_PIPE)) { 1185 struct pipe_inode_info *pipe = i->pipe; 1186 size_t off; 1187 int idx; 1188 1189 if (!sanity(i)) 1190 return 0; 1191 1192 data_start(i, &idx, &off); 1193 /* some of this one + all after this one */ 1194 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; 1195 if (npages >= maxpages) 1196 return maxpages; 1197 } else iterate_all_kinds(i, size, v, ({ 1198 unsigned long p = (unsigned long)v.iov_base; 1199 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1200 - p / PAGE_SIZE; 1201 if (npages >= maxpages) 1202 return maxpages; 1203 0;}),({ 1204 npages++; 1205 if (npages >= maxpages) 1206 return maxpages; 1207 }),({ 1208 unsigned long p = (unsigned long)v.iov_base; 1209 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1210 - p / PAGE_SIZE; 1211 if (npages >= maxpages) 1212 return maxpages; 1213 }) 1214 ) 1215 return npages; 1216 } 1217 EXPORT_SYMBOL(iov_iter_npages); 1218 1219 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1220 { 1221 *new = *old; 1222 if (unlikely(new->type & ITER_PIPE)) { 1223 WARN_ON(1); 1224 return NULL; 1225 } 1226 if (new->type & ITER_BVEC) 1227 return new->bvec = kmemdup(new->bvec, 1228 new->nr_segs * sizeof(struct bio_vec), 1229 flags); 1230 else 1231 /* iovec and kvec have identical layout */ 1232 return new->iov = kmemdup(new->iov, 1233 new->nr_segs * sizeof(struct iovec), 1234 flags); 1235 } 1236 EXPORT_SYMBOL(dup_iter); 1237 1238 /** 1239 * import_iovec() - Copy an array of &struct iovec from userspace 1240 * into the kernel, check that it is valid, and initialize a new 1241 * &struct iov_iter iterator to access it. 1242 * 1243 * @type: One of %READ or %WRITE. 1244 * @uvector: Pointer to the userspace array. 1245 * @nr_segs: Number of elements in userspace array. 1246 * @fast_segs: Number of elements in @iov. 1247 * @iov: (input and output parameter) Pointer to pointer to (usually small 1248 * on-stack) kernel array. 1249 * @i: Pointer to iterator that will be initialized on success. 1250 * 1251 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1252 * then this function places %NULL in *@iov on return. Otherwise, a new 1253 * array will be allocated and the result placed in *@iov. This means that 1254 * the caller may call kfree() on *@iov regardless of whether the small 1255 * on-stack array was used or not (and regardless of whether this function 1256 * returns an error or not). 1257 * 1258 * Return: 0 on success or negative error code on error. 1259 */ 1260 int import_iovec(int type, const struct iovec __user * uvector, 1261 unsigned nr_segs, unsigned fast_segs, 1262 struct iovec **iov, struct iov_iter *i) 1263 { 1264 ssize_t n; 1265 struct iovec *p; 1266 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1267 *iov, &p); 1268 if (n < 0) { 1269 if (p != *iov) 1270 kfree(p); 1271 *iov = NULL; 1272 return n; 1273 } 1274 iov_iter_init(i, type, p, nr_segs, n); 1275 *iov = p == *iov ? NULL : p; 1276 return 0; 1277 } 1278 EXPORT_SYMBOL(import_iovec); 1279 1280 #ifdef CONFIG_COMPAT 1281 #include <linux/compat.h> 1282 1283 int compat_import_iovec(int type, const struct compat_iovec __user * uvector, 1284 unsigned nr_segs, unsigned fast_segs, 1285 struct iovec **iov, struct iov_iter *i) 1286 { 1287 ssize_t n; 1288 struct iovec *p; 1289 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1290 *iov, &p); 1291 if (n < 0) { 1292 if (p != *iov) 1293 kfree(p); 1294 *iov = NULL; 1295 return n; 1296 } 1297 iov_iter_init(i, type, p, nr_segs, n); 1298 *iov = p == *iov ? NULL : p; 1299 return 0; 1300 } 1301 #endif 1302 1303 int import_single_range(int rw, void __user *buf, size_t len, 1304 struct iovec *iov, struct iov_iter *i) 1305 { 1306 if (len > MAX_RW_COUNT) 1307 len = MAX_RW_COUNT; 1308 if (unlikely(!access_ok(!rw, buf, len))) 1309 return -EFAULT; 1310 1311 iov->iov_base = buf; 1312 iov->iov_len = len; 1313 iov_iter_init(i, rw, iov, 1, len); 1314 return 0; 1315 } 1316 EXPORT_SYMBOL(import_single_range); 1317