1 #include <linux/export.h> 2 #include <linux/bvec.h> 3 #include <linux/uio.h> 4 #include <linux/pagemap.h> 5 #include <linux/slab.h> 6 #include <linux/vmalloc.h> 7 #include <linux/splice.h> 8 #include <net/checksum.h> 9 10 #define PIPE_PARANOIA /* for now */ 11 12 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 13 size_t left; \ 14 size_t wanted = n; \ 15 __p = i->iov; \ 16 __v.iov_len = min(n, __p->iov_len - skip); \ 17 if (likely(__v.iov_len)) { \ 18 __v.iov_base = __p->iov_base + skip; \ 19 left = (STEP); \ 20 __v.iov_len -= left; \ 21 skip += __v.iov_len; \ 22 n -= __v.iov_len; \ 23 } else { \ 24 left = 0; \ 25 } \ 26 while (unlikely(!left && n)) { \ 27 __p++; \ 28 __v.iov_len = min(n, __p->iov_len); \ 29 if (unlikely(!__v.iov_len)) \ 30 continue; \ 31 __v.iov_base = __p->iov_base; \ 32 left = (STEP); \ 33 __v.iov_len -= left; \ 34 skip = __v.iov_len; \ 35 n -= __v.iov_len; \ 36 } \ 37 n = wanted - n; \ 38 } 39 40 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 41 size_t wanted = n; \ 42 __p = i->kvec; \ 43 __v.iov_len = min(n, __p->iov_len - skip); \ 44 if (likely(__v.iov_len)) { \ 45 __v.iov_base = __p->iov_base + skip; \ 46 (void)(STEP); \ 47 skip += __v.iov_len; \ 48 n -= __v.iov_len; \ 49 } \ 50 while (unlikely(n)) { \ 51 __p++; \ 52 __v.iov_len = min(n, __p->iov_len); \ 53 if (unlikely(!__v.iov_len)) \ 54 continue; \ 55 __v.iov_base = __p->iov_base; \ 56 (void)(STEP); \ 57 skip = __v.iov_len; \ 58 n -= __v.iov_len; \ 59 } \ 60 n = wanted; \ 61 } 62 63 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 64 struct bvec_iter __start; \ 65 __start.bi_size = n; \ 66 __start.bi_bvec_done = skip; \ 67 __start.bi_idx = 0; \ 68 for_each_bvec(__v, i->bvec, __bi, __start) { \ 69 if (!__v.bv_len) \ 70 continue; \ 71 (void)(STEP); \ 72 } \ 73 } 74 75 #define iterate_all_kinds(i, n, v, I, B, K) { \ 76 size_t skip = i->iov_offset; \ 77 if (unlikely(i->type & ITER_BVEC)) { \ 78 struct bio_vec v; \ 79 struct bvec_iter __bi; \ 80 iterate_bvec(i, n, v, __bi, skip, (B)) \ 81 } else if (unlikely(i->type & ITER_KVEC)) { \ 82 const struct kvec *kvec; \ 83 struct kvec v; \ 84 iterate_kvec(i, n, v, kvec, skip, (K)) \ 85 } else { \ 86 const struct iovec *iov; \ 87 struct iovec v; \ 88 iterate_iovec(i, n, v, iov, skip, (I)) \ 89 } \ 90 } 91 92 #define iterate_and_advance(i, n, v, I, B, K) { \ 93 if (unlikely(i->count < n)) \ 94 n = i->count; \ 95 if (i->count) { \ 96 size_t skip = i->iov_offset; \ 97 if (unlikely(i->type & ITER_BVEC)) { \ 98 const struct bio_vec *bvec = i->bvec; \ 99 struct bio_vec v; \ 100 struct bvec_iter __bi; \ 101 iterate_bvec(i, n, v, __bi, skip, (B)) \ 102 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 103 i->nr_segs -= i->bvec - bvec; \ 104 skip = __bi.bi_bvec_done; \ 105 } else if (unlikely(i->type & ITER_KVEC)) { \ 106 const struct kvec *kvec; \ 107 struct kvec v; \ 108 iterate_kvec(i, n, v, kvec, skip, (K)) \ 109 if (skip == kvec->iov_len) { \ 110 kvec++; \ 111 skip = 0; \ 112 } \ 113 i->nr_segs -= kvec - i->kvec; \ 114 i->kvec = kvec; \ 115 } else { \ 116 const struct iovec *iov; \ 117 struct iovec v; \ 118 iterate_iovec(i, n, v, iov, skip, (I)) \ 119 if (skip == iov->iov_len) { \ 120 iov++; \ 121 skip = 0; \ 122 } \ 123 i->nr_segs -= iov - i->iov; \ 124 i->iov = iov; \ 125 } \ 126 i->count -= n; \ 127 i->iov_offset = skip; \ 128 } \ 129 } 130 131 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 132 struct iov_iter *i) 133 { 134 size_t skip, copy, left, wanted; 135 const struct iovec *iov; 136 char __user *buf; 137 void *kaddr, *from; 138 139 if (unlikely(bytes > i->count)) 140 bytes = i->count; 141 142 if (unlikely(!bytes)) 143 return 0; 144 145 wanted = bytes; 146 iov = i->iov; 147 skip = i->iov_offset; 148 buf = iov->iov_base + skip; 149 copy = min(bytes, iov->iov_len - skip); 150 151 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 152 kaddr = kmap_atomic(page); 153 from = kaddr + offset; 154 155 /* first chunk, usually the only one */ 156 left = __copy_to_user_inatomic(buf, from, copy); 157 copy -= left; 158 skip += copy; 159 from += copy; 160 bytes -= copy; 161 162 while (unlikely(!left && bytes)) { 163 iov++; 164 buf = iov->iov_base; 165 copy = min(bytes, iov->iov_len); 166 left = __copy_to_user_inatomic(buf, from, copy); 167 copy -= left; 168 skip = copy; 169 from += copy; 170 bytes -= copy; 171 } 172 if (likely(!bytes)) { 173 kunmap_atomic(kaddr); 174 goto done; 175 } 176 offset = from - kaddr; 177 buf += copy; 178 kunmap_atomic(kaddr); 179 copy = min(bytes, iov->iov_len - skip); 180 } 181 /* Too bad - revert to non-atomic kmap */ 182 183 kaddr = kmap(page); 184 from = kaddr + offset; 185 left = __copy_to_user(buf, from, copy); 186 copy -= left; 187 skip += copy; 188 from += copy; 189 bytes -= copy; 190 while (unlikely(!left && bytes)) { 191 iov++; 192 buf = iov->iov_base; 193 copy = min(bytes, iov->iov_len); 194 left = __copy_to_user(buf, from, copy); 195 copy -= left; 196 skip = copy; 197 from += copy; 198 bytes -= copy; 199 } 200 kunmap(page); 201 202 done: 203 if (skip == iov->iov_len) { 204 iov++; 205 skip = 0; 206 } 207 i->count -= wanted - bytes; 208 i->nr_segs -= iov - i->iov; 209 i->iov = iov; 210 i->iov_offset = skip; 211 return wanted - bytes; 212 } 213 214 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 215 struct iov_iter *i) 216 { 217 size_t skip, copy, left, wanted; 218 const struct iovec *iov; 219 char __user *buf; 220 void *kaddr, *to; 221 222 if (unlikely(bytes > i->count)) 223 bytes = i->count; 224 225 if (unlikely(!bytes)) 226 return 0; 227 228 wanted = bytes; 229 iov = i->iov; 230 skip = i->iov_offset; 231 buf = iov->iov_base + skip; 232 copy = min(bytes, iov->iov_len - skip); 233 234 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 235 kaddr = kmap_atomic(page); 236 to = kaddr + offset; 237 238 /* first chunk, usually the only one */ 239 left = __copy_from_user_inatomic(to, buf, copy); 240 copy -= left; 241 skip += copy; 242 to += copy; 243 bytes -= copy; 244 245 while (unlikely(!left && bytes)) { 246 iov++; 247 buf = iov->iov_base; 248 copy = min(bytes, iov->iov_len); 249 left = __copy_from_user_inatomic(to, buf, copy); 250 copy -= left; 251 skip = copy; 252 to += copy; 253 bytes -= copy; 254 } 255 if (likely(!bytes)) { 256 kunmap_atomic(kaddr); 257 goto done; 258 } 259 offset = to - kaddr; 260 buf += copy; 261 kunmap_atomic(kaddr); 262 copy = min(bytes, iov->iov_len - skip); 263 } 264 /* Too bad - revert to non-atomic kmap */ 265 266 kaddr = kmap(page); 267 to = kaddr + offset; 268 left = __copy_from_user(to, buf, copy); 269 copy -= left; 270 skip += copy; 271 to += copy; 272 bytes -= copy; 273 while (unlikely(!left && bytes)) { 274 iov++; 275 buf = iov->iov_base; 276 copy = min(bytes, iov->iov_len); 277 left = __copy_from_user(to, buf, copy); 278 copy -= left; 279 skip = copy; 280 to += copy; 281 bytes -= copy; 282 } 283 kunmap(page); 284 285 done: 286 if (skip == iov->iov_len) { 287 iov++; 288 skip = 0; 289 } 290 i->count -= wanted - bytes; 291 i->nr_segs -= iov - i->iov; 292 i->iov = iov; 293 i->iov_offset = skip; 294 return wanted - bytes; 295 } 296 297 #ifdef PIPE_PARANOIA 298 static bool sanity(const struct iov_iter *i) 299 { 300 struct pipe_inode_info *pipe = i->pipe; 301 int idx = i->idx; 302 int next = pipe->curbuf + pipe->nrbufs; 303 if (i->iov_offset) { 304 struct pipe_buffer *p; 305 if (unlikely(!pipe->nrbufs)) 306 goto Bad; // pipe must be non-empty 307 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) 308 goto Bad; // must be at the last buffer... 309 310 p = &pipe->bufs[idx]; 311 if (unlikely(p->offset + p->len != i->iov_offset)) 312 goto Bad; // ... at the end of segment 313 } else { 314 if (idx != (next & (pipe->buffers - 1))) 315 goto Bad; // must be right after the last buffer 316 } 317 return true; 318 Bad: 319 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); 320 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", 321 pipe->curbuf, pipe->nrbufs, pipe->buffers); 322 for (idx = 0; idx < pipe->buffers; idx++) 323 printk(KERN_ERR "[%p %p %d %d]\n", 324 pipe->bufs[idx].ops, 325 pipe->bufs[idx].page, 326 pipe->bufs[idx].offset, 327 pipe->bufs[idx].len); 328 WARN_ON(1); 329 return false; 330 } 331 #else 332 #define sanity(i) true 333 #endif 334 335 static inline int next_idx(int idx, struct pipe_inode_info *pipe) 336 { 337 return (idx + 1) & (pipe->buffers - 1); 338 } 339 340 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 341 struct iov_iter *i) 342 { 343 struct pipe_inode_info *pipe = i->pipe; 344 struct pipe_buffer *buf; 345 size_t off; 346 int idx; 347 348 if (unlikely(bytes > i->count)) 349 bytes = i->count; 350 351 if (unlikely(!bytes)) 352 return 0; 353 354 if (!sanity(i)) 355 return 0; 356 357 off = i->iov_offset; 358 idx = i->idx; 359 buf = &pipe->bufs[idx]; 360 if (off) { 361 if (offset == off && buf->page == page) { 362 /* merge with the last one */ 363 buf->len += bytes; 364 i->iov_offset += bytes; 365 goto out; 366 } 367 idx = next_idx(idx, pipe); 368 buf = &pipe->bufs[idx]; 369 } 370 if (idx == pipe->curbuf && pipe->nrbufs) 371 return 0; 372 pipe->nrbufs++; 373 buf->ops = &page_cache_pipe_buf_ops; 374 get_page(buf->page = page); 375 buf->offset = offset; 376 buf->len = bytes; 377 i->iov_offset = offset + bytes; 378 i->idx = idx; 379 out: 380 i->count -= bytes; 381 return bytes; 382 } 383 384 /* 385 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 386 * bytes. For each iovec, fault in each page that constitutes the iovec. 387 * 388 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 389 * because it is an invalid address). 390 */ 391 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 392 { 393 size_t skip = i->iov_offset; 394 const struct iovec *iov; 395 int err; 396 struct iovec v; 397 398 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 399 iterate_iovec(i, bytes, v, iov, skip, ({ 400 err = fault_in_pages_readable(v.iov_base, v.iov_len); 401 if (unlikely(err)) 402 return err; 403 0;})) 404 } 405 return 0; 406 } 407 EXPORT_SYMBOL(iov_iter_fault_in_readable); 408 409 void iov_iter_init(struct iov_iter *i, int direction, 410 const struct iovec *iov, unsigned long nr_segs, 411 size_t count) 412 { 413 /* It will get better. Eventually... */ 414 if (segment_eq(get_fs(), KERNEL_DS)) { 415 direction |= ITER_KVEC; 416 i->type = direction; 417 i->kvec = (struct kvec *)iov; 418 } else { 419 i->type = direction; 420 i->iov = iov; 421 } 422 i->nr_segs = nr_segs; 423 i->iov_offset = 0; 424 i->count = count; 425 } 426 EXPORT_SYMBOL(iov_iter_init); 427 428 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 429 { 430 char *from = kmap_atomic(page); 431 memcpy(to, from + offset, len); 432 kunmap_atomic(from); 433 } 434 435 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 436 { 437 char *to = kmap_atomic(page); 438 memcpy(to + offset, from, len); 439 kunmap_atomic(to); 440 } 441 442 static void memzero_page(struct page *page, size_t offset, size_t len) 443 { 444 char *addr = kmap_atomic(page); 445 memset(addr + offset, 0, len); 446 kunmap_atomic(addr); 447 } 448 449 static inline bool allocated(struct pipe_buffer *buf) 450 { 451 return buf->ops == &default_pipe_buf_ops; 452 } 453 454 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) 455 { 456 size_t off = i->iov_offset; 457 int idx = i->idx; 458 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { 459 idx = next_idx(idx, i->pipe); 460 off = 0; 461 } 462 *idxp = idx; 463 *offp = off; 464 } 465 466 static size_t push_pipe(struct iov_iter *i, size_t size, 467 int *idxp, size_t *offp) 468 { 469 struct pipe_inode_info *pipe = i->pipe; 470 size_t off; 471 int idx; 472 ssize_t left; 473 474 if (unlikely(size > i->count)) 475 size = i->count; 476 if (unlikely(!size)) 477 return 0; 478 479 left = size; 480 data_start(i, &idx, &off); 481 *idxp = idx; 482 *offp = off; 483 if (off) { 484 left -= PAGE_SIZE - off; 485 if (left <= 0) { 486 pipe->bufs[idx].len += size; 487 return size; 488 } 489 pipe->bufs[idx].len = PAGE_SIZE; 490 idx = next_idx(idx, pipe); 491 } 492 while (idx != pipe->curbuf || !pipe->nrbufs) { 493 struct page *page = alloc_page(GFP_USER); 494 if (!page) 495 break; 496 pipe->nrbufs++; 497 pipe->bufs[idx].ops = &default_pipe_buf_ops; 498 pipe->bufs[idx].page = page; 499 pipe->bufs[idx].offset = 0; 500 if (left <= PAGE_SIZE) { 501 pipe->bufs[idx].len = left; 502 return size; 503 } 504 pipe->bufs[idx].len = PAGE_SIZE; 505 left -= PAGE_SIZE; 506 idx = next_idx(idx, pipe); 507 } 508 return size - left; 509 } 510 511 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 512 struct iov_iter *i) 513 { 514 struct pipe_inode_info *pipe = i->pipe; 515 size_t n, off; 516 int idx; 517 518 if (!sanity(i)) 519 return 0; 520 521 bytes = n = push_pipe(i, bytes, &idx, &off); 522 if (unlikely(!n)) 523 return 0; 524 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 525 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 526 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); 527 i->idx = idx; 528 i->iov_offset = off + chunk; 529 n -= chunk; 530 addr += chunk; 531 } 532 i->count -= bytes; 533 return bytes; 534 } 535 536 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 537 { 538 const char *from = addr; 539 if (unlikely(i->type & ITER_PIPE)) 540 return copy_pipe_to_iter(addr, bytes, i); 541 iterate_and_advance(i, bytes, v, 542 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, 543 v.iov_len), 544 memcpy_to_page(v.bv_page, v.bv_offset, 545 (from += v.bv_len) - v.bv_len, v.bv_len), 546 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 547 ) 548 549 return bytes; 550 } 551 EXPORT_SYMBOL(copy_to_iter); 552 553 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 554 { 555 char *to = addr; 556 if (unlikely(i->type & ITER_PIPE)) { 557 WARN_ON(1); 558 return 0; 559 } 560 iterate_and_advance(i, bytes, v, 561 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, 562 v.iov_len), 563 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 564 v.bv_offset, v.bv_len), 565 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 566 ) 567 568 return bytes; 569 } 570 EXPORT_SYMBOL(copy_from_iter); 571 572 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 573 { 574 char *to = addr; 575 if (unlikely(i->type & ITER_PIPE)) { 576 WARN_ON(1); 577 return 0; 578 } 579 iterate_and_advance(i, bytes, v, 580 __copy_from_user_nocache((to += v.iov_len) - v.iov_len, 581 v.iov_base, v.iov_len), 582 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 583 v.bv_offset, v.bv_len), 584 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 585 ) 586 587 return bytes; 588 } 589 EXPORT_SYMBOL(copy_from_iter_nocache); 590 591 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 592 struct iov_iter *i) 593 { 594 if (i->type & (ITER_BVEC|ITER_KVEC)) { 595 void *kaddr = kmap_atomic(page); 596 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 597 kunmap_atomic(kaddr); 598 return wanted; 599 } else if (likely(!(i->type & ITER_PIPE))) 600 return copy_page_to_iter_iovec(page, offset, bytes, i); 601 else 602 return copy_page_to_iter_pipe(page, offset, bytes, i); 603 } 604 EXPORT_SYMBOL(copy_page_to_iter); 605 606 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 607 struct iov_iter *i) 608 { 609 if (unlikely(i->type & ITER_PIPE)) { 610 WARN_ON(1); 611 return 0; 612 } 613 if (i->type & (ITER_BVEC|ITER_KVEC)) { 614 void *kaddr = kmap_atomic(page); 615 size_t wanted = copy_from_iter(kaddr + offset, bytes, i); 616 kunmap_atomic(kaddr); 617 return wanted; 618 } else 619 return copy_page_from_iter_iovec(page, offset, bytes, i); 620 } 621 EXPORT_SYMBOL(copy_page_from_iter); 622 623 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 624 { 625 struct pipe_inode_info *pipe = i->pipe; 626 size_t n, off; 627 int idx; 628 629 if (!sanity(i)) 630 return 0; 631 632 bytes = n = push_pipe(i, bytes, &idx, &off); 633 if (unlikely(!n)) 634 return 0; 635 636 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 637 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 638 memzero_page(pipe->bufs[idx].page, off, chunk); 639 i->idx = idx; 640 i->iov_offset = off + chunk; 641 n -= chunk; 642 } 643 i->count -= bytes; 644 return bytes; 645 } 646 647 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 648 { 649 if (unlikely(i->type & ITER_PIPE)) 650 return pipe_zero(bytes, i); 651 iterate_and_advance(i, bytes, v, 652 __clear_user(v.iov_base, v.iov_len), 653 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 654 memset(v.iov_base, 0, v.iov_len) 655 ) 656 657 return bytes; 658 } 659 EXPORT_SYMBOL(iov_iter_zero); 660 661 size_t iov_iter_copy_from_user_atomic(struct page *page, 662 struct iov_iter *i, unsigned long offset, size_t bytes) 663 { 664 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 665 if (unlikely(i->type & ITER_PIPE)) { 666 kunmap_atomic(kaddr); 667 WARN_ON(1); 668 return 0; 669 } 670 iterate_all_kinds(i, bytes, v, 671 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, 672 v.iov_base, v.iov_len), 673 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 674 v.bv_offset, v.bv_len), 675 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 676 ) 677 kunmap_atomic(kaddr); 678 return bytes; 679 } 680 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 681 682 static void pipe_advance(struct iov_iter *i, size_t size) 683 { 684 struct pipe_inode_info *pipe = i->pipe; 685 struct pipe_buffer *buf; 686 int idx = i->idx; 687 size_t off = i->iov_offset, orig_sz; 688 689 if (unlikely(i->count < size)) 690 size = i->count; 691 orig_sz = size; 692 693 if (size) { 694 if (off) /* make it relative to the beginning of buffer */ 695 size += off - pipe->bufs[idx].offset; 696 while (1) { 697 buf = &pipe->bufs[idx]; 698 if (size <= buf->len) 699 break; 700 size -= buf->len; 701 idx = next_idx(idx, pipe); 702 } 703 buf->len = size; 704 i->idx = idx; 705 off = i->iov_offset = buf->offset + size; 706 } 707 if (off) 708 idx = next_idx(idx, pipe); 709 if (pipe->nrbufs) { 710 int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 711 /* [curbuf,unused) is in use. Free [idx,unused) */ 712 while (idx != unused) { 713 pipe_buf_release(pipe, &pipe->bufs[idx]); 714 idx = next_idx(idx, pipe); 715 pipe->nrbufs--; 716 } 717 } 718 i->count -= orig_sz; 719 } 720 721 void iov_iter_advance(struct iov_iter *i, size_t size) 722 { 723 if (unlikely(i->type & ITER_PIPE)) { 724 pipe_advance(i, size); 725 return; 726 } 727 iterate_and_advance(i, size, v, 0, 0, 0) 728 } 729 EXPORT_SYMBOL(iov_iter_advance); 730 731 /* 732 * Return the count of just the current iov_iter segment. 733 */ 734 size_t iov_iter_single_seg_count(const struct iov_iter *i) 735 { 736 if (unlikely(i->type & ITER_PIPE)) 737 return i->count; // it is a silly place, anyway 738 if (i->nr_segs == 1) 739 return i->count; 740 else if (i->type & ITER_BVEC) 741 return min(i->count, i->bvec->bv_len - i->iov_offset); 742 else 743 return min(i->count, i->iov->iov_len - i->iov_offset); 744 } 745 EXPORT_SYMBOL(iov_iter_single_seg_count); 746 747 void iov_iter_kvec(struct iov_iter *i, int direction, 748 const struct kvec *kvec, unsigned long nr_segs, 749 size_t count) 750 { 751 BUG_ON(!(direction & ITER_KVEC)); 752 i->type = direction; 753 i->kvec = kvec; 754 i->nr_segs = nr_segs; 755 i->iov_offset = 0; 756 i->count = count; 757 } 758 EXPORT_SYMBOL(iov_iter_kvec); 759 760 void iov_iter_bvec(struct iov_iter *i, int direction, 761 const struct bio_vec *bvec, unsigned long nr_segs, 762 size_t count) 763 { 764 BUG_ON(!(direction & ITER_BVEC)); 765 i->type = direction; 766 i->bvec = bvec; 767 i->nr_segs = nr_segs; 768 i->iov_offset = 0; 769 i->count = count; 770 } 771 EXPORT_SYMBOL(iov_iter_bvec); 772 773 void iov_iter_pipe(struct iov_iter *i, int direction, 774 struct pipe_inode_info *pipe, 775 size_t count) 776 { 777 BUG_ON(direction != ITER_PIPE); 778 i->type = direction; 779 i->pipe = pipe; 780 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 781 i->iov_offset = 0; 782 i->count = count; 783 } 784 EXPORT_SYMBOL(iov_iter_pipe); 785 786 unsigned long iov_iter_alignment(const struct iov_iter *i) 787 { 788 unsigned long res = 0; 789 size_t size = i->count; 790 791 if (!size) 792 return 0; 793 794 if (unlikely(i->type & ITER_PIPE)) { 795 if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) 796 return size | i->iov_offset; 797 return size; 798 } 799 iterate_all_kinds(i, size, v, 800 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 801 res |= v.bv_offset | v.bv_len, 802 res |= (unsigned long)v.iov_base | v.iov_len 803 ) 804 return res; 805 } 806 EXPORT_SYMBOL(iov_iter_alignment); 807 808 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 809 { 810 unsigned long res = 0; 811 size_t size = i->count; 812 if (!size) 813 return 0; 814 815 if (unlikely(i->type & ITER_PIPE)) { 816 WARN_ON(1); 817 return ~0U; 818 } 819 820 iterate_all_kinds(i, size, v, 821 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 822 (size != v.iov_len ? size : 0), 0), 823 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 824 (size != v.bv_len ? size : 0)), 825 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 826 (size != v.iov_len ? size : 0)) 827 ); 828 return res; 829 } 830 EXPORT_SYMBOL(iov_iter_gap_alignment); 831 832 static inline size_t __pipe_get_pages(struct iov_iter *i, 833 size_t maxsize, 834 struct page **pages, 835 int idx, 836 size_t *start) 837 { 838 struct pipe_inode_info *pipe = i->pipe; 839 ssize_t n = push_pipe(i, maxsize, &idx, start); 840 if (!n) 841 return -EFAULT; 842 843 maxsize = n; 844 n += *start; 845 while (n > 0) { 846 get_page(*pages++ = pipe->bufs[idx].page); 847 idx = next_idx(idx, pipe); 848 n -= PAGE_SIZE; 849 } 850 851 return maxsize; 852 } 853 854 static ssize_t pipe_get_pages(struct iov_iter *i, 855 struct page **pages, size_t maxsize, unsigned maxpages, 856 size_t *start) 857 { 858 unsigned npages; 859 size_t capacity; 860 int idx; 861 862 if (!sanity(i)) 863 return -EFAULT; 864 865 data_start(i, &idx, start); 866 /* some of this one + all after this one */ 867 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 868 capacity = min(npages,maxpages) * PAGE_SIZE - *start; 869 870 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); 871 } 872 873 ssize_t iov_iter_get_pages(struct iov_iter *i, 874 struct page **pages, size_t maxsize, unsigned maxpages, 875 size_t *start) 876 { 877 if (maxsize > i->count) 878 maxsize = i->count; 879 880 if (!maxsize) 881 return 0; 882 883 if (unlikely(i->type & ITER_PIPE)) 884 return pipe_get_pages(i, pages, maxsize, maxpages, start); 885 iterate_all_kinds(i, maxsize, v, ({ 886 unsigned long addr = (unsigned long)v.iov_base; 887 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 888 int n; 889 int res; 890 891 if (len > maxpages * PAGE_SIZE) 892 len = maxpages * PAGE_SIZE; 893 addr &= ~(PAGE_SIZE - 1); 894 n = DIV_ROUND_UP(len, PAGE_SIZE); 895 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); 896 if (unlikely(res < 0)) 897 return res; 898 return (res == n ? len : res * PAGE_SIZE) - *start; 899 0;}),({ 900 /* can't be more than PAGE_SIZE */ 901 *start = v.bv_offset; 902 get_page(*pages = v.bv_page); 903 return v.bv_len; 904 }),({ 905 return -EFAULT; 906 }) 907 ) 908 return 0; 909 } 910 EXPORT_SYMBOL(iov_iter_get_pages); 911 912 static struct page **get_pages_array(size_t n) 913 { 914 struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); 915 if (!p) 916 p = vmalloc(n * sizeof(struct page *)); 917 return p; 918 } 919 920 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 921 struct page ***pages, size_t maxsize, 922 size_t *start) 923 { 924 struct page **p; 925 size_t n; 926 int idx; 927 int npages; 928 929 if (!sanity(i)) 930 return -EFAULT; 931 932 data_start(i, &idx, start); 933 /* some of this one + all after this one */ 934 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 935 n = npages * PAGE_SIZE - *start; 936 if (maxsize > n) 937 maxsize = n; 938 else 939 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 940 p = get_pages_array(npages); 941 if (!p) 942 return -ENOMEM; 943 n = __pipe_get_pages(i, maxsize, p, idx, start); 944 if (n > 0) 945 *pages = p; 946 else 947 kvfree(p); 948 return n; 949 } 950 951 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 952 struct page ***pages, size_t maxsize, 953 size_t *start) 954 { 955 struct page **p; 956 957 if (maxsize > i->count) 958 maxsize = i->count; 959 960 if (!maxsize) 961 return 0; 962 963 if (unlikely(i->type & ITER_PIPE)) 964 return pipe_get_pages_alloc(i, pages, maxsize, start); 965 iterate_all_kinds(i, maxsize, v, ({ 966 unsigned long addr = (unsigned long)v.iov_base; 967 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 968 int n; 969 int res; 970 971 addr &= ~(PAGE_SIZE - 1); 972 n = DIV_ROUND_UP(len, PAGE_SIZE); 973 p = get_pages_array(n); 974 if (!p) 975 return -ENOMEM; 976 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); 977 if (unlikely(res < 0)) { 978 kvfree(p); 979 return res; 980 } 981 *pages = p; 982 return (res == n ? len : res * PAGE_SIZE) - *start; 983 0;}),({ 984 /* can't be more than PAGE_SIZE */ 985 *start = v.bv_offset; 986 *pages = p = get_pages_array(1); 987 if (!p) 988 return -ENOMEM; 989 get_page(*p = v.bv_page); 990 return v.bv_len; 991 }),({ 992 return -EFAULT; 993 }) 994 ) 995 return 0; 996 } 997 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 998 999 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 1000 struct iov_iter *i) 1001 { 1002 char *to = addr; 1003 __wsum sum, next; 1004 size_t off = 0; 1005 sum = *csum; 1006 if (unlikely(i->type & ITER_PIPE)) { 1007 WARN_ON(1); 1008 return 0; 1009 } 1010 iterate_and_advance(i, bytes, v, ({ 1011 int err = 0; 1012 next = csum_and_copy_from_user(v.iov_base, 1013 (to += v.iov_len) - v.iov_len, 1014 v.iov_len, 0, &err); 1015 if (!err) { 1016 sum = csum_block_add(sum, next, off); 1017 off += v.iov_len; 1018 } 1019 err ? v.iov_len : 0; 1020 }), ({ 1021 char *p = kmap_atomic(v.bv_page); 1022 next = csum_partial_copy_nocheck(p + v.bv_offset, 1023 (to += v.bv_len) - v.bv_len, 1024 v.bv_len, 0); 1025 kunmap_atomic(p); 1026 sum = csum_block_add(sum, next, off); 1027 off += v.bv_len; 1028 }),({ 1029 next = csum_partial_copy_nocheck(v.iov_base, 1030 (to += v.iov_len) - v.iov_len, 1031 v.iov_len, 0); 1032 sum = csum_block_add(sum, next, off); 1033 off += v.iov_len; 1034 }) 1035 ) 1036 *csum = sum; 1037 return bytes; 1038 } 1039 EXPORT_SYMBOL(csum_and_copy_from_iter); 1040 1041 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, 1042 struct iov_iter *i) 1043 { 1044 const char *from = addr; 1045 __wsum sum, next; 1046 size_t off = 0; 1047 sum = *csum; 1048 if (unlikely(i->type & ITER_PIPE)) { 1049 WARN_ON(1); /* for now */ 1050 return 0; 1051 } 1052 iterate_and_advance(i, bytes, v, ({ 1053 int err = 0; 1054 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1055 v.iov_base, 1056 v.iov_len, 0, &err); 1057 if (!err) { 1058 sum = csum_block_add(sum, next, off); 1059 off += v.iov_len; 1060 } 1061 err ? v.iov_len : 0; 1062 }), ({ 1063 char *p = kmap_atomic(v.bv_page); 1064 next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, 1065 p + v.bv_offset, 1066 v.bv_len, 0); 1067 kunmap_atomic(p); 1068 sum = csum_block_add(sum, next, off); 1069 off += v.bv_len; 1070 }),({ 1071 next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, 1072 v.iov_base, 1073 v.iov_len, 0); 1074 sum = csum_block_add(sum, next, off); 1075 off += v.iov_len; 1076 }) 1077 ) 1078 *csum = sum; 1079 return bytes; 1080 } 1081 EXPORT_SYMBOL(csum_and_copy_to_iter); 1082 1083 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1084 { 1085 size_t size = i->count; 1086 int npages = 0; 1087 1088 if (!size) 1089 return 0; 1090 1091 if (unlikely(i->type & ITER_PIPE)) { 1092 struct pipe_inode_info *pipe = i->pipe; 1093 size_t off; 1094 int idx; 1095 1096 if (!sanity(i)) 1097 return 0; 1098 1099 data_start(i, &idx, &off); 1100 /* some of this one + all after this one */ 1101 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; 1102 if (npages >= maxpages) 1103 return maxpages; 1104 } else iterate_all_kinds(i, size, v, ({ 1105 unsigned long p = (unsigned long)v.iov_base; 1106 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1107 - p / PAGE_SIZE; 1108 if (npages >= maxpages) 1109 return maxpages; 1110 0;}),({ 1111 npages++; 1112 if (npages >= maxpages) 1113 return maxpages; 1114 }),({ 1115 unsigned long p = (unsigned long)v.iov_base; 1116 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1117 - p / PAGE_SIZE; 1118 if (npages >= maxpages) 1119 return maxpages; 1120 }) 1121 ) 1122 return npages; 1123 } 1124 EXPORT_SYMBOL(iov_iter_npages); 1125 1126 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1127 { 1128 *new = *old; 1129 if (unlikely(new->type & ITER_PIPE)) { 1130 WARN_ON(1); 1131 return NULL; 1132 } 1133 if (new->type & ITER_BVEC) 1134 return new->bvec = kmemdup(new->bvec, 1135 new->nr_segs * sizeof(struct bio_vec), 1136 flags); 1137 else 1138 /* iovec and kvec have identical layout */ 1139 return new->iov = kmemdup(new->iov, 1140 new->nr_segs * sizeof(struct iovec), 1141 flags); 1142 } 1143 EXPORT_SYMBOL(dup_iter); 1144 1145 /** 1146 * import_iovec() - Copy an array of &struct iovec from userspace 1147 * into the kernel, check that it is valid, and initialize a new 1148 * &struct iov_iter iterator to access it. 1149 * 1150 * @type: One of %READ or %WRITE. 1151 * @uvector: Pointer to the userspace array. 1152 * @nr_segs: Number of elements in userspace array. 1153 * @fast_segs: Number of elements in @iov. 1154 * @iov: (input and output parameter) Pointer to pointer to (usually small 1155 * on-stack) kernel array. 1156 * @i: Pointer to iterator that will be initialized on success. 1157 * 1158 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1159 * then this function places %NULL in *@iov on return. Otherwise, a new 1160 * array will be allocated and the result placed in *@iov. This means that 1161 * the caller may call kfree() on *@iov regardless of whether the small 1162 * on-stack array was used or not (and regardless of whether this function 1163 * returns an error or not). 1164 * 1165 * Return: 0 on success or negative error code on error. 1166 */ 1167 int import_iovec(int type, const struct iovec __user * uvector, 1168 unsigned nr_segs, unsigned fast_segs, 1169 struct iovec **iov, struct iov_iter *i) 1170 { 1171 ssize_t n; 1172 struct iovec *p; 1173 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1174 *iov, &p); 1175 if (n < 0) { 1176 if (p != *iov) 1177 kfree(p); 1178 *iov = NULL; 1179 return n; 1180 } 1181 iov_iter_init(i, type, p, nr_segs, n); 1182 *iov = p == *iov ? NULL : p; 1183 return 0; 1184 } 1185 EXPORT_SYMBOL(import_iovec); 1186 1187 #ifdef CONFIG_COMPAT 1188 #include <linux/compat.h> 1189 1190 int compat_import_iovec(int type, const struct compat_iovec __user * uvector, 1191 unsigned nr_segs, unsigned fast_segs, 1192 struct iovec **iov, struct iov_iter *i) 1193 { 1194 ssize_t n; 1195 struct iovec *p; 1196 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1197 *iov, &p); 1198 if (n < 0) { 1199 if (p != *iov) 1200 kfree(p); 1201 *iov = NULL; 1202 return n; 1203 } 1204 iov_iter_init(i, type, p, nr_segs, n); 1205 *iov = p == *iov ? NULL : p; 1206 return 0; 1207 } 1208 #endif 1209 1210 int import_single_range(int rw, void __user *buf, size_t len, 1211 struct iovec *iov, struct iov_iter *i) 1212 { 1213 if (len > MAX_RW_COUNT) 1214 len = MAX_RW_COUNT; 1215 if (unlikely(!access_ok(!rw, buf, len))) 1216 return -EFAULT; 1217 1218 iov->iov_base = buf; 1219 iov->iov_len = len; 1220 iov_iter_init(i, rw, iov, 1, len); 1221 return 0; 1222 } 1223 EXPORT_SYMBOL(import_single_range); 1224