1 #include <linux/export.h> 2 #include <linux/uio.h> 3 #include <linux/pagemap.h> 4 #include <linux/slab.h> 5 #include <linux/vmalloc.h> 6 #include <linux/splice.h> 7 #include <net/checksum.h> 8 9 #define PIPE_PARANOIA /* for now */ 10 11 #define iterate_iovec(i, n, __v, __p, skip, STEP) { \ 12 size_t left; \ 13 size_t wanted = n; \ 14 __p = i->iov; \ 15 __v.iov_len = min(n, __p->iov_len - skip); \ 16 if (likely(__v.iov_len)) { \ 17 __v.iov_base = __p->iov_base + skip; \ 18 left = (STEP); \ 19 __v.iov_len -= left; \ 20 skip += __v.iov_len; \ 21 n -= __v.iov_len; \ 22 } else { \ 23 left = 0; \ 24 } \ 25 while (unlikely(!left && n)) { \ 26 __p++; \ 27 __v.iov_len = min(n, __p->iov_len); \ 28 if (unlikely(!__v.iov_len)) \ 29 continue; \ 30 __v.iov_base = __p->iov_base; \ 31 left = (STEP); \ 32 __v.iov_len -= left; \ 33 skip = __v.iov_len; \ 34 n -= __v.iov_len; \ 35 } \ 36 n = wanted - n; \ 37 } 38 39 #define iterate_kvec(i, n, __v, __p, skip, STEP) { \ 40 size_t wanted = n; \ 41 __p = i->kvec; \ 42 __v.iov_len = min(n, __p->iov_len - skip); \ 43 if (likely(__v.iov_len)) { \ 44 __v.iov_base = __p->iov_base + skip; \ 45 (void)(STEP); \ 46 skip += __v.iov_len; \ 47 n -= __v.iov_len; \ 48 } \ 49 while (unlikely(n)) { \ 50 __p++; \ 51 __v.iov_len = min(n, __p->iov_len); \ 52 if (unlikely(!__v.iov_len)) \ 53 continue; \ 54 __v.iov_base = __p->iov_base; \ 55 (void)(STEP); \ 56 skip = __v.iov_len; \ 57 n -= __v.iov_len; \ 58 } \ 59 n = wanted; \ 60 } 61 62 #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \ 63 struct bvec_iter __start; \ 64 __start.bi_size = n; \ 65 __start.bi_bvec_done = skip; \ 66 __start.bi_idx = 0; \ 67 for_each_bvec(__v, i->bvec, __bi, __start) { \ 68 if (!__v.bv_len) \ 69 continue; \ 70 (void)(STEP); \ 71 } \ 72 } 73 74 #define iterate_all_kinds(i, n, v, I, B, K) { \ 75 size_t skip = i->iov_offset; \ 76 if (unlikely(i->type & ITER_BVEC)) { \ 77 struct bio_vec v; \ 78 struct bvec_iter __bi; \ 79 iterate_bvec(i, n, v, __bi, skip, (B)) \ 80 } else if (unlikely(i->type & ITER_KVEC)) { \ 81 const struct kvec *kvec; \ 82 struct kvec v; \ 83 iterate_kvec(i, n, v, kvec, skip, (K)) \ 84 } else { \ 85 const struct iovec *iov; \ 86 struct iovec v; \ 87 iterate_iovec(i, n, v, iov, skip, (I)) \ 88 } \ 89 } 90 91 #define iterate_and_advance(i, n, v, I, B, K) { \ 92 if (unlikely(i->count < n)) \ 93 n = i->count; \ 94 if (i->count) { \ 95 size_t skip = i->iov_offset; \ 96 if (unlikely(i->type & ITER_BVEC)) { \ 97 const struct bio_vec *bvec = i->bvec; \ 98 struct bio_vec v; \ 99 struct bvec_iter __bi; \ 100 iterate_bvec(i, n, v, __bi, skip, (B)) \ 101 i->bvec = __bvec_iter_bvec(i->bvec, __bi); \ 102 i->nr_segs -= i->bvec - bvec; \ 103 skip = __bi.bi_bvec_done; \ 104 } else if (unlikely(i->type & ITER_KVEC)) { \ 105 const struct kvec *kvec; \ 106 struct kvec v; \ 107 iterate_kvec(i, n, v, kvec, skip, (K)) \ 108 if (skip == kvec->iov_len) { \ 109 kvec++; \ 110 skip = 0; \ 111 } \ 112 i->nr_segs -= kvec - i->kvec; \ 113 i->kvec = kvec; \ 114 } else { \ 115 const struct iovec *iov; \ 116 struct iovec v; \ 117 iterate_iovec(i, n, v, iov, skip, (I)) \ 118 if (skip == iov->iov_len) { \ 119 iov++; \ 120 skip = 0; \ 121 } \ 122 i->nr_segs -= iov - i->iov; \ 123 i->iov = iov; \ 124 } \ 125 i->count -= n; \ 126 i->iov_offset = skip; \ 127 } \ 128 } 129 130 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, 131 struct iov_iter *i) 132 { 133 size_t skip, copy, left, wanted; 134 const struct iovec *iov; 135 char __user *buf; 136 void *kaddr, *from; 137 138 if (unlikely(bytes > i->count)) 139 bytes = i->count; 140 141 if (unlikely(!bytes)) 142 return 0; 143 144 wanted = bytes; 145 iov = i->iov; 146 skip = i->iov_offset; 147 buf = iov->iov_base + skip; 148 copy = min(bytes, iov->iov_len - skip); 149 150 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) { 151 kaddr = kmap_atomic(page); 152 from = kaddr + offset; 153 154 /* first chunk, usually the only one */ 155 left = __copy_to_user_inatomic(buf, from, copy); 156 copy -= left; 157 skip += copy; 158 from += copy; 159 bytes -= copy; 160 161 while (unlikely(!left && bytes)) { 162 iov++; 163 buf = iov->iov_base; 164 copy = min(bytes, iov->iov_len); 165 left = __copy_to_user_inatomic(buf, from, copy); 166 copy -= left; 167 skip = copy; 168 from += copy; 169 bytes -= copy; 170 } 171 if (likely(!bytes)) { 172 kunmap_atomic(kaddr); 173 goto done; 174 } 175 offset = from - kaddr; 176 buf += copy; 177 kunmap_atomic(kaddr); 178 copy = min(bytes, iov->iov_len - skip); 179 } 180 /* Too bad - revert to non-atomic kmap */ 181 182 kaddr = kmap(page); 183 from = kaddr + offset; 184 left = __copy_to_user(buf, from, copy); 185 copy -= left; 186 skip += copy; 187 from += copy; 188 bytes -= copy; 189 while (unlikely(!left && bytes)) { 190 iov++; 191 buf = iov->iov_base; 192 copy = min(bytes, iov->iov_len); 193 left = __copy_to_user(buf, from, copy); 194 copy -= left; 195 skip = copy; 196 from += copy; 197 bytes -= copy; 198 } 199 kunmap(page); 200 201 done: 202 if (skip == iov->iov_len) { 203 iov++; 204 skip = 0; 205 } 206 i->count -= wanted - bytes; 207 i->nr_segs -= iov - i->iov; 208 i->iov = iov; 209 i->iov_offset = skip; 210 return wanted - bytes; 211 } 212 213 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, 214 struct iov_iter *i) 215 { 216 size_t skip, copy, left, wanted; 217 const struct iovec *iov; 218 char __user *buf; 219 void *kaddr, *to; 220 221 if (unlikely(bytes > i->count)) 222 bytes = i->count; 223 224 if (unlikely(!bytes)) 225 return 0; 226 227 wanted = bytes; 228 iov = i->iov; 229 skip = i->iov_offset; 230 buf = iov->iov_base + skip; 231 copy = min(bytes, iov->iov_len - skip); 232 233 if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) { 234 kaddr = kmap_atomic(page); 235 to = kaddr + offset; 236 237 /* first chunk, usually the only one */ 238 left = __copy_from_user_inatomic(to, buf, copy); 239 copy -= left; 240 skip += copy; 241 to += copy; 242 bytes -= copy; 243 244 while (unlikely(!left && bytes)) { 245 iov++; 246 buf = iov->iov_base; 247 copy = min(bytes, iov->iov_len); 248 left = __copy_from_user_inatomic(to, buf, copy); 249 copy -= left; 250 skip = copy; 251 to += copy; 252 bytes -= copy; 253 } 254 if (likely(!bytes)) { 255 kunmap_atomic(kaddr); 256 goto done; 257 } 258 offset = to - kaddr; 259 buf += copy; 260 kunmap_atomic(kaddr); 261 copy = min(bytes, iov->iov_len - skip); 262 } 263 /* Too bad - revert to non-atomic kmap */ 264 265 kaddr = kmap(page); 266 to = kaddr + offset; 267 left = __copy_from_user(to, buf, copy); 268 copy -= left; 269 skip += copy; 270 to += copy; 271 bytes -= copy; 272 while (unlikely(!left && bytes)) { 273 iov++; 274 buf = iov->iov_base; 275 copy = min(bytes, iov->iov_len); 276 left = __copy_from_user(to, buf, copy); 277 copy -= left; 278 skip = copy; 279 to += copy; 280 bytes -= copy; 281 } 282 kunmap(page); 283 284 done: 285 if (skip == iov->iov_len) { 286 iov++; 287 skip = 0; 288 } 289 i->count -= wanted - bytes; 290 i->nr_segs -= iov - i->iov; 291 i->iov = iov; 292 i->iov_offset = skip; 293 return wanted - bytes; 294 } 295 296 #ifdef PIPE_PARANOIA 297 static bool sanity(const struct iov_iter *i) 298 { 299 struct pipe_inode_info *pipe = i->pipe; 300 int idx = i->idx; 301 int next = pipe->curbuf + pipe->nrbufs; 302 if (i->iov_offset) { 303 struct pipe_buffer *p; 304 if (unlikely(!pipe->nrbufs)) 305 goto Bad; // pipe must be non-empty 306 if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) 307 goto Bad; // must be at the last buffer... 308 309 p = &pipe->bufs[idx]; 310 if (unlikely(p->offset + p->len != i->iov_offset)) 311 goto Bad; // ... at the end of segment 312 } else { 313 if (idx != (next & (pipe->buffers - 1))) 314 goto Bad; // must be right after the last buffer 315 } 316 return true; 317 Bad: 318 printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); 319 printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", 320 pipe->curbuf, pipe->nrbufs, pipe->buffers); 321 for (idx = 0; idx < pipe->buffers; idx++) 322 printk(KERN_ERR "[%p %p %d %d]\n", 323 pipe->bufs[idx].ops, 324 pipe->bufs[idx].page, 325 pipe->bufs[idx].offset, 326 pipe->bufs[idx].len); 327 WARN_ON(1); 328 return false; 329 } 330 #else 331 #define sanity(i) true 332 #endif 333 334 static inline int next_idx(int idx, struct pipe_inode_info *pipe) 335 { 336 return (idx + 1) & (pipe->buffers - 1); 337 } 338 339 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, 340 struct iov_iter *i) 341 { 342 struct pipe_inode_info *pipe = i->pipe; 343 struct pipe_buffer *buf; 344 size_t off; 345 int idx; 346 347 if (unlikely(bytes > i->count)) 348 bytes = i->count; 349 350 if (unlikely(!bytes)) 351 return 0; 352 353 if (!sanity(i)) 354 return 0; 355 356 off = i->iov_offset; 357 idx = i->idx; 358 buf = &pipe->bufs[idx]; 359 if (off) { 360 if (offset == off && buf->page == page) { 361 /* merge with the last one */ 362 buf->len += bytes; 363 i->iov_offset += bytes; 364 goto out; 365 } 366 idx = next_idx(idx, pipe); 367 buf = &pipe->bufs[idx]; 368 } 369 if (idx == pipe->curbuf && pipe->nrbufs) 370 return 0; 371 pipe->nrbufs++; 372 buf->ops = &page_cache_pipe_buf_ops; 373 get_page(buf->page = page); 374 buf->offset = offset; 375 buf->len = bytes; 376 i->iov_offset = offset + bytes; 377 i->idx = idx; 378 out: 379 i->count -= bytes; 380 return bytes; 381 } 382 383 /* 384 * Fault in one or more iovecs of the given iov_iter, to a maximum length of 385 * bytes. For each iovec, fault in each page that constitutes the iovec. 386 * 387 * Return 0 on success, or non-zero if the memory could not be accessed (i.e. 388 * because it is an invalid address). 389 */ 390 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) 391 { 392 size_t skip = i->iov_offset; 393 const struct iovec *iov; 394 int err; 395 struct iovec v; 396 397 if (!(i->type & (ITER_BVEC|ITER_KVEC))) { 398 iterate_iovec(i, bytes, v, iov, skip, ({ 399 err = fault_in_pages_readable(v.iov_base, v.iov_len); 400 if (unlikely(err)) 401 return err; 402 0;})) 403 } 404 return 0; 405 } 406 EXPORT_SYMBOL(iov_iter_fault_in_readable); 407 408 void iov_iter_init(struct iov_iter *i, int direction, 409 const struct iovec *iov, unsigned long nr_segs, 410 size_t count) 411 { 412 /* It will get better. Eventually... */ 413 if (segment_eq(get_fs(), KERNEL_DS)) { 414 direction |= ITER_KVEC; 415 i->type = direction; 416 i->kvec = (struct kvec *)iov; 417 } else { 418 i->type = direction; 419 i->iov = iov; 420 } 421 i->nr_segs = nr_segs; 422 i->iov_offset = 0; 423 i->count = count; 424 } 425 EXPORT_SYMBOL(iov_iter_init); 426 427 static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) 428 { 429 char *from = kmap_atomic(page); 430 memcpy(to, from + offset, len); 431 kunmap_atomic(from); 432 } 433 434 static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len) 435 { 436 char *to = kmap_atomic(page); 437 memcpy(to + offset, from, len); 438 kunmap_atomic(to); 439 } 440 441 static void memzero_page(struct page *page, size_t offset, size_t len) 442 { 443 char *addr = kmap_atomic(page); 444 memset(addr + offset, 0, len); 445 kunmap_atomic(addr); 446 } 447 448 static inline bool allocated(struct pipe_buffer *buf) 449 { 450 return buf->ops == &default_pipe_buf_ops; 451 } 452 453 static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) 454 { 455 size_t off = i->iov_offset; 456 int idx = i->idx; 457 if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { 458 idx = next_idx(idx, i->pipe); 459 off = 0; 460 } 461 *idxp = idx; 462 *offp = off; 463 } 464 465 static size_t push_pipe(struct iov_iter *i, size_t size, 466 int *idxp, size_t *offp) 467 { 468 struct pipe_inode_info *pipe = i->pipe; 469 size_t off; 470 int idx; 471 ssize_t left; 472 473 if (unlikely(size > i->count)) 474 size = i->count; 475 if (unlikely(!size)) 476 return 0; 477 478 left = size; 479 data_start(i, &idx, &off); 480 *idxp = idx; 481 *offp = off; 482 if (off) { 483 left -= PAGE_SIZE - off; 484 if (left <= 0) { 485 pipe->bufs[idx].len += size; 486 return size; 487 } 488 pipe->bufs[idx].len = PAGE_SIZE; 489 idx = next_idx(idx, pipe); 490 } 491 while (idx != pipe->curbuf || !pipe->nrbufs) { 492 struct page *page = alloc_page(GFP_USER); 493 if (!page) 494 break; 495 pipe->nrbufs++; 496 pipe->bufs[idx].ops = &default_pipe_buf_ops; 497 pipe->bufs[idx].page = page; 498 pipe->bufs[idx].offset = 0; 499 if (left <= PAGE_SIZE) { 500 pipe->bufs[idx].len = left; 501 return size; 502 } 503 pipe->bufs[idx].len = PAGE_SIZE; 504 left -= PAGE_SIZE; 505 idx = next_idx(idx, pipe); 506 } 507 return size - left; 508 } 509 510 static size_t copy_pipe_to_iter(const void *addr, size_t bytes, 511 struct iov_iter *i) 512 { 513 struct pipe_inode_info *pipe = i->pipe; 514 size_t n, off; 515 int idx; 516 517 if (!sanity(i)) 518 return 0; 519 520 bytes = n = push_pipe(i, bytes, &idx, &off); 521 if (unlikely(!n)) 522 return 0; 523 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 524 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 525 memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); 526 i->idx = idx; 527 i->iov_offset = off + chunk; 528 n -= chunk; 529 addr += chunk; 530 } 531 i->count -= bytes; 532 return bytes; 533 } 534 535 size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) 536 { 537 const char *from = addr; 538 if (unlikely(i->type & ITER_PIPE)) 539 return copy_pipe_to_iter(addr, bytes, i); 540 iterate_and_advance(i, bytes, v, 541 __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, 542 v.iov_len), 543 memcpy_to_page(v.bv_page, v.bv_offset, 544 (from += v.bv_len) - v.bv_len, v.bv_len), 545 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) 546 ) 547 548 return bytes; 549 } 550 EXPORT_SYMBOL(copy_to_iter); 551 552 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) 553 { 554 char *to = addr; 555 if (unlikely(i->type & ITER_PIPE)) { 556 WARN_ON(1); 557 return 0; 558 } 559 iterate_and_advance(i, bytes, v, 560 __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, 561 v.iov_len), 562 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 563 v.bv_offset, v.bv_len), 564 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 565 ) 566 567 return bytes; 568 } 569 EXPORT_SYMBOL(copy_from_iter); 570 571 size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) 572 { 573 char *to = addr; 574 if (unlikely(i->type & ITER_PIPE)) { 575 WARN_ON(1); 576 return 0; 577 } 578 iterate_and_advance(i, bytes, v, 579 __copy_from_user_nocache((to += v.iov_len) - v.iov_len, 580 v.iov_base, v.iov_len), 581 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, 582 v.bv_offset, v.bv_len), 583 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 584 ) 585 586 return bytes; 587 } 588 EXPORT_SYMBOL(copy_from_iter_nocache); 589 590 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, 591 struct iov_iter *i) 592 { 593 if (i->type & (ITER_BVEC|ITER_KVEC)) { 594 void *kaddr = kmap_atomic(page); 595 size_t wanted = copy_to_iter(kaddr + offset, bytes, i); 596 kunmap_atomic(kaddr); 597 return wanted; 598 } else if (likely(!(i->type & ITER_PIPE))) 599 return copy_page_to_iter_iovec(page, offset, bytes, i); 600 else 601 return copy_page_to_iter_pipe(page, offset, bytes, i); 602 } 603 EXPORT_SYMBOL(copy_page_to_iter); 604 605 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, 606 struct iov_iter *i) 607 { 608 if (unlikely(i->type & ITER_PIPE)) { 609 WARN_ON(1); 610 return 0; 611 } 612 if (i->type & (ITER_BVEC|ITER_KVEC)) { 613 void *kaddr = kmap_atomic(page); 614 size_t wanted = copy_from_iter(kaddr + offset, bytes, i); 615 kunmap_atomic(kaddr); 616 return wanted; 617 } else 618 return copy_page_from_iter_iovec(page, offset, bytes, i); 619 } 620 EXPORT_SYMBOL(copy_page_from_iter); 621 622 static size_t pipe_zero(size_t bytes, struct iov_iter *i) 623 { 624 struct pipe_inode_info *pipe = i->pipe; 625 size_t n, off; 626 int idx; 627 628 if (!sanity(i)) 629 return 0; 630 631 bytes = n = push_pipe(i, bytes, &idx, &off); 632 if (unlikely(!n)) 633 return 0; 634 635 for ( ; n; idx = next_idx(idx, pipe), off = 0) { 636 size_t chunk = min_t(size_t, n, PAGE_SIZE - off); 637 memzero_page(pipe->bufs[idx].page, off, chunk); 638 i->idx = idx; 639 i->iov_offset = off + chunk; 640 n -= chunk; 641 } 642 i->count -= bytes; 643 return bytes; 644 } 645 646 size_t iov_iter_zero(size_t bytes, struct iov_iter *i) 647 { 648 if (unlikely(i->type & ITER_PIPE)) 649 return pipe_zero(bytes, i); 650 iterate_and_advance(i, bytes, v, 651 __clear_user(v.iov_base, v.iov_len), 652 memzero_page(v.bv_page, v.bv_offset, v.bv_len), 653 memset(v.iov_base, 0, v.iov_len) 654 ) 655 656 return bytes; 657 } 658 EXPORT_SYMBOL(iov_iter_zero); 659 660 size_t iov_iter_copy_from_user_atomic(struct page *page, 661 struct iov_iter *i, unsigned long offset, size_t bytes) 662 { 663 char *kaddr = kmap_atomic(page), *p = kaddr + offset; 664 if (unlikely(i->type & ITER_PIPE)) { 665 kunmap_atomic(kaddr); 666 WARN_ON(1); 667 return 0; 668 } 669 iterate_all_kinds(i, bytes, v, 670 __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, 671 v.iov_base, v.iov_len), 672 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, 673 v.bv_offset, v.bv_len), 674 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) 675 ) 676 kunmap_atomic(kaddr); 677 return bytes; 678 } 679 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); 680 681 static void pipe_advance(struct iov_iter *i, size_t size) 682 { 683 struct pipe_inode_info *pipe = i->pipe; 684 struct pipe_buffer *buf; 685 int idx = i->idx; 686 size_t off = i->iov_offset, orig_sz; 687 688 if (unlikely(i->count < size)) 689 size = i->count; 690 orig_sz = size; 691 692 if (size) { 693 if (off) /* make it relative to the beginning of buffer */ 694 size += off - pipe->bufs[idx].offset; 695 while (1) { 696 buf = &pipe->bufs[idx]; 697 if (size <= buf->len) 698 break; 699 size -= buf->len; 700 idx = next_idx(idx, pipe); 701 } 702 buf->len = size; 703 i->idx = idx; 704 off = i->iov_offset = buf->offset + size; 705 } 706 if (off) 707 idx = next_idx(idx, pipe); 708 if (pipe->nrbufs) { 709 int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 710 /* [curbuf,unused) is in use. Free [idx,unused) */ 711 while (idx != unused) { 712 pipe_buf_release(pipe, &pipe->bufs[idx]); 713 idx = next_idx(idx, pipe); 714 pipe->nrbufs--; 715 } 716 } 717 i->count -= orig_sz; 718 } 719 720 void iov_iter_advance(struct iov_iter *i, size_t size) 721 { 722 if (unlikely(i->type & ITER_PIPE)) { 723 pipe_advance(i, size); 724 return; 725 } 726 iterate_and_advance(i, size, v, 0, 0, 0) 727 } 728 EXPORT_SYMBOL(iov_iter_advance); 729 730 /* 731 * Return the count of just the current iov_iter segment. 732 */ 733 size_t iov_iter_single_seg_count(const struct iov_iter *i) 734 { 735 if (unlikely(i->type & ITER_PIPE)) 736 return i->count; // it is a silly place, anyway 737 if (i->nr_segs == 1) 738 return i->count; 739 else if (i->type & ITER_BVEC) 740 return min(i->count, i->bvec->bv_len - i->iov_offset); 741 else 742 return min(i->count, i->iov->iov_len - i->iov_offset); 743 } 744 EXPORT_SYMBOL(iov_iter_single_seg_count); 745 746 void iov_iter_kvec(struct iov_iter *i, int direction, 747 const struct kvec *kvec, unsigned long nr_segs, 748 size_t count) 749 { 750 BUG_ON(!(direction & ITER_KVEC)); 751 i->type = direction; 752 i->kvec = kvec; 753 i->nr_segs = nr_segs; 754 i->iov_offset = 0; 755 i->count = count; 756 } 757 EXPORT_SYMBOL(iov_iter_kvec); 758 759 void iov_iter_bvec(struct iov_iter *i, int direction, 760 const struct bio_vec *bvec, unsigned long nr_segs, 761 size_t count) 762 { 763 BUG_ON(!(direction & ITER_BVEC)); 764 i->type = direction; 765 i->bvec = bvec; 766 i->nr_segs = nr_segs; 767 i->iov_offset = 0; 768 i->count = count; 769 } 770 EXPORT_SYMBOL(iov_iter_bvec); 771 772 void iov_iter_pipe(struct iov_iter *i, int direction, 773 struct pipe_inode_info *pipe, 774 size_t count) 775 { 776 BUG_ON(direction != ITER_PIPE); 777 i->type = direction; 778 i->pipe = pipe; 779 i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); 780 i->iov_offset = 0; 781 i->count = count; 782 } 783 EXPORT_SYMBOL(iov_iter_pipe); 784 785 unsigned long iov_iter_alignment(const struct iov_iter *i) 786 { 787 unsigned long res = 0; 788 size_t size = i->count; 789 790 if (!size) 791 return 0; 792 793 if (unlikely(i->type & ITER_PIPE)) { 794 if (i->iov_offset && allocated(&i->pipe->bufs[i->idx])) 795 return size | i->iov_offset; 796 return size; 797 } 798 iterate_all_kinds(i, size, v, 799 (res |= (unsigned long)v.iov_base | v.iov_len, 0), 800 res |= v.bv_offset | v.bv_len, 801 res |= (unsigned long)v.iov_base | v.iov_len 802 ) 803 return res; 804 } 805 EXPORT_SYMBOL(iov_iter_alignment); 806 807 unsigned long iov_iter_gap_alignment(const struct iov_iter *i) 808 { 809 unsigned long res = 0; 810 size_t size = i->count; 811 if (!size) 812 return 0; 813 814 if (unlikely(i->type & ITER_PIPE)) { 815 WARN_ON(1); 816 return ~0U; 817 } 818 819 iterate_all_kinds(i, size, v, 820 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 821 (size != v.iov_len ? size : 0), 0), 822 (res |= (!res ? 0 : (unsigned long)v.bv_offset) | 823 (size != v.bv_len ? size : 0)), 824 (res |= (!res ? 0 : (unsigned long)v.iov_base) | 825 (size != v.iov_len ? size : 0)) 826 ); 827 return res; 828 } 829 EXPORT_SYMBOL(iov_iter_gap_alignment); 830 831 static inline size_t __pipe_get_pages(struct iov_iter *i, 832 size_t maxsize, 833 struct page **pages, 834 int idx, 835 size_t *start) 836 { 837 struct pipe_inode_info *pipe = i->pipe; 838 ssize_t n = push_pipe(i, maxsize, &idx, start); 839 if (!n) 840 return -EFAULT; 841 842 maxsize = n; 843 n += *start; 844 while (n > 0) { 845 get_page(*pages++ = pipe->bufs[idx].page); 846 idx = next_idx(idx, pipe); 847 n -= PAGE_SIZE; 848 } 849 850 return maxsize; 851 } 852 853 static ssize_t pipe_get_pages(struct iov_iter *i, 854 struct page **pages, size_t maxsize, unsigned maxpages, 855 size_t *start) 856 { 857 unsigned npages; 858 size_t capacity; 859 int idx; 860 861 if (!sanity(i)) 862 return -EFAULT; 863 864 data_start(i, &idx, start); 865 /* some of this one + all after this one */ 866 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 867 capacity = min(npages,maxpages) * PAGE_SIZE - *start; 868 869 return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); 870 } 871 872 ssize_t iov_iter_get_pages(struct iov_iter *i, 873 struct page **pages, size_t maxsize, unsigned maxpages, 874 size_t *start) 875 { 876 if (maxsize > i->count) 877 maxsize = i->count; 878 879 if (!maxsize) 880 return 0; 881 882 if (unlikely(i->type & ITER_PIPE)) 883 return pipe_get_pages(i, pages, maxsize, maxpages, start); 884 iterate_all_kinds(i, maxsize, v, ({ 885 unsigned long addr = (unsigned long)v.iov_base; 886 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 887 int n; 888 int res; 889 890 if (len > maxpages * PAGE_SIZE) 891 len = maxpages * PAGE_SIZE; 892 addr &= ~(PAGE_SIZE - 1); 893 n = DIV_ROUND_UP(len, PAGE_SIZE); 894 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); 895 if (unlikely(res < 0)) 896 return res; 897 return (res == n ? len : res * PAGE_SIZE) - *start; 898 0;}),({ 899 /* can't be more than PAGE_SIZE */ 900 *start = v.bv_offset; 901 get_page(*pages = v.bv_page); 902 return v.bv_len; 903 }),({ 904 return -EFAULT; 905 }) 906 ) 907 return 0; 908 } 909 EXPORT_SYMBOL(iov_iter_get_pages); 910 911 static struct page **get_pages_array(size_t n) 912 { 913 struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); 914 if (!p) 915 p = vmalloc(n * sizeof(struct page *)); 916 return p; 917 } 918 919 static ssize_t pipe_get_pages_alloc(struct iov_iter *i, 920 struct page ***pages, size_t maxsize, 921 size_t *start) 922 { 923 struct page **p; 924 size_t n; 925 int idx; 926 int npages; 927 928 if (!sanity(i)) 929 return -EFAULT; 930 931 data_start(i, &idx, start); 932 /* some of this one + all after this one */ 933 npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; 934 n = npages * PAGE_SIZE - *start; 935 if (maxsize > n) 936 maxsize = n; 937 else 938 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE); 939 p = get_pages_array(npages); 940 if (!p) 941 return -ENOMEM; 942 n = __pipe_get_pages(i, maxsize, p, idx, start); 943 if (n > 0) 944 *pages = p; 945 else 946 kvfree(p); 947 return n; 948 } 949 950 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, 951 struct page ***pages, size_t maxsize, 952 size_t *start) 953 { 954 struct page **p; 955 956 if (maxsize > i->count) 957 maxsize = i->count; 958 959 if (!maxsize) 960 return 0; 961 962 if (unlikely(i->type & ITER_PIPE)) 963 return pipe_get_pages_alloc(i, pages, maxsize, start); 964 iterate_all_kinds(i, maxsize, v, ({ 965 unsigned long addr = (unsigned long)v.iov_base; 966 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); 967 int n; 968 int res; 969 970 addr &= ~(PAGE_SIZE - 1); 971 n = DIV_ROUND_UP(len, PAGE_SIZE); 972 p = get_pages_array(n); 973 if (!p) 974 return -ENOMEM; 975 res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); 976 if (unlikely(res < 0)) { 977 kvfree(p); 978 return res; 979 } 980 *pages = p; 981 return (res == n ? len : res * PAGE_SIZE) - *start; 982 0;}),({ 983 /* can't be more than PAGE_SIZE */ 984 *start = v.bv_offset; 985 *pages = p = get_pages_array(1); 986 if (!p) 987 return -ENOMEM; 988 get_page(*p = v.bv_page); 989 return v.bv_len; 990 }),({ 991 return -EFAULT; 992 }) 993 ) 994 return 0; 995 } 996 EXPORT_SYMBOL(iov_iter_get_pages_alloc); 997 998 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, 999 struct iov_iter *i) 1000 { 1001 char *to = addr; 1002 __wsum sum, next; 1003 size_t off = 0; 1004 sum = *csum; 1005 if (unlikely(i->type & ITER_PIPE)) { 1006 WARN_ON(1); 1007 return 0; 1008 } 1009 iterate_and_advance(i, bytes, v, ({ 1010 int err = 0; 1011 next = csum_and_copy_from_user(v.iov_base, 1012 (to += v.iov_len) - v.iov_len, 1013 v.iov_len, 0, &err); 1014 if (!err) { 1015 sum = csum_block_add(sum, next, off); 1016 off += v.iov_len; 1017 } 1018 err ? v.iov_len : 0; 1019 }), ({ 1020 char *p = kmap_atomic(v.bv_page); 1021 next = csum_partial_copy_nocheck(p + v.bv_offset, 1022 (to += v.bv_len) - v.bv_len, 1023 v.bv_len, 0); 1024 kunmap_atomic(p); 1025 sum = csum_block_add(sum, next, off); 1026 off += v.bv_len; 1027 }),({ 1028 next = csum_partial_copy_nocheck(v.iov_base, 1029 (to += v.iov_len) - v.iov_len, 1030 v.iov_len, 0); 1031 sum = csum_block_add(sum, next, off); 1032 off += v.iov_len; 1033 }) 1034 ) 1035 *csum = sum; 1036 return bytes; 1037 } 1038 EXPORT_SYMBOL(csum_and_copy_from_iter); 1039 1040 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, 1041 struct iov_iter *i) 1042 { 1043 const char *from = addr; 1044 __wsum sum, next; 1045 size_t off = 0; 1046 sum = *csum; 1047 if (unlikely(i->type & ITER_PIPE)) { 1048 WARN_ON(1); /* for now */ 1049 return 0; 1050 } 1051 iterate_and_advance(i, bytes, v, ({ 1052 int err = 0; 1053 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, 1054 v.iov_base, 1055 v.iov_len, 0, &err); 1056 if (!err) { 1057 sum = csum_block_add(sum, next, off); 1058 off += v.iov_len; 1059 } 1060 err ? v.iov_len : 0; 1061 }), ({ 1062 char *p = kmap_atomic(v.bv_page); 1063 next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, 1064 p + v.bv_offset, 1065 v.bv_len, 0); 1066 kunmap_atomic(p); 1067 sum = csum_block_add(sum, next, off); 1068 off += v.bv_len; 1069 }),({ 1070 next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, 1071 v.iov_base, 1072 v.iov_len, 0); 1073 sum = csum_block_add(sum, next, off); 1074 off += v.iov_len; 1075 }) 1076 ) 1077 *csum = sum; 1078 return bytes; 1079 } 1080 EXPORT_SYMBOL(csum_and_copy_to_iter); 1081 1082 int iov_iter_npages(const struct iov_iter *i, int maxpages) 1083 { 1084 size_t size = i->count; 1085 int npages = 0; 1086 1087 if (!size) 1088 return 0; 1089 1090 if (unlikely(i->type & ITER_PIPE)) { 1091 struct pipe_inode_info *pipe = i->pipe; 1092 size_t off; 1093 int idx; 1094 1095 if (!sanity(i)) 1096 return 0; 1097 1098 data_start(i, &idx, &off); 1099 /* some of this one + all after this one */ 1100 npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; 1101 if (npages >= maxpages) 1102 return maxpages; 1103 } else iterate_all_kinds(i, size, v, ({ 1104 unsigned long p = (unsigned long)v.iov_base; 1105 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1106 - p / PAGE_SIZE; 1107 if (npages >= maxpages) 1108 return maxpages; 1109 0;}),({ 1110 npages++; 1111 if (npages >= maxpages) 1112 return maxpages; 1113 }),({ 1114 unsigned long p = (unsigned long)v.iov_base; 1115 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) 1116 - p / PAGE_SIZE; 1117 if (npages >= maxpages) 1118 return maxpages; 1119 }) 1120 ) 1121 return npages; 1122 } 1123 EXPORT_SYMBOL(iov_iter_npages); 1124 1125 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) 1126 { 1127 *new = *old; 1128 if (unlikely(new->type & ITER_PIPE)) { 1129 WARN_ON(1); 1130 return NULL; 1131 } 1132 if (new->type & ITER_BVEC) 1133 return new->bvec = kmemdup(new->bvec, 1134 new->nr_segs * sizeof(struct bio_vec), 1135 flags); 1136 else 1137 /* iovec and kvec have identical layout */ 1138 return new->iov = kmemdup(new->iov, 1139 new->nr_segs * sizeof(struct iovec), 1140 flags); 1141 } 1142 EXPORT_SYMBOL(dup_iter); 1143 1144 /** 1145 * import_iovec() - Copy an array of &struct iovec from userspace 1146 * into the kernel, check that it is valid, and initialize a new 1147 * &struct iov_iter iterator to access it. 1148 * 1149 * @type: One of %READ or %WRITE. 1150 * @uvector: Pointer to the userspace array. 1151 * @nr_segs: Number of elements in userspace array. 1152 * @fast_segs: Number of elements in @iov. 1153 * @iov: (input and output parameter) Pointer to pointer to (usually small 1154 * on-stack) kernel array. 1155 * @i: Pointer to iterator that will be initialized on success. 1156 * 1157 * If the array pointed to by *@iov is large enough to hold all @nr_segs, 1158 * then this function places %NULL in *@iov on return. Otherwise, a new 1159 * array will be allocated and the result placed in *@iov. This means that 1160 * the caller may call kfree() on *@iov regardless of whether the small 1161 * on-stack array was used or not (and regardless of whether this function 1162 * returns an error or not). 1163 * 1164 * Return: 0 on success or negative error code on error. 1165 */ 1166 int import_iovec(int type, const struct iovec __user * uvector, 1167 unsigned nr_segs, unsigned fast_segs, 1168 struct iovec **iov, struct iov_iter *i) 1169 { 1170 ssize_t n; 1171 struct iovec *p; 1172 n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1173 *iov, &p); 1174 if (n < 0) { 1175 if (p != *iov) 1176 kfree(p); 1177 *iov = NULL; 1178 return n; 1179 } 1180 iov_iter_init(i, type, p, nr_segs, n); 1181 *iov = p == *iov ? NULL : p; 1182 return 0; 1183 } 1184 EXPORT_SYMBOL(import_iovec); 1185 1186 #ifdef CONFIG_COMPAT 1187 #include <linux/compat.h> 1188 1189 int compat_import_iovec(int type, const struct compat_iovec __user * uvector, 1190 unsigned nr_segs, unsigned fast_segs, 1191 struct iovec **iov, struct iov_iter *i) 1192 { 1193 ssize_t n; 1194 struct iovec *p; 1195 n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, 1196 *iov, &p); 1197 if (n < 0) { 1198 if (p != *iov) 1199 kfree(p); 1200 *iov = NULL; 1201 return n; 1202 } 1203 iov_iter_init(i, type, p, nr_segs, n); 1204 *iov = p == *iov ? NULL : p; 1205 return 0; 1206 } 1207 #endif 1208 1209 int import_single_range(int rw, void __user *buf, size_t len, 1210 struct iovec *iov, struct iov_iter *i) 1211 { 1212 if (len > MAX_RW_COUNT) 1213 len = MAX_RW_COUNT; 1214 if (unlikely(!access_ok(!rw, buf, len))) 1215 return -EFAULT; 1216 1217 iov->iov_base = buf; 1218 iov->iov_len = len; 1219 iov_iter_init(i, rw, iov, 1, len); 1220 return 0; 1221 } 1222 EXPORT_SYMBOL(import_single_range); 1223