1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to mapping data to requests 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sched/task_stack.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/uio.h> 11 12 #include "blk.h" 13 14 struct bio_map_data { 15 int is_our_pages; 16 struct iov_iter iter; 17 struct iovec iov[]; 18 }; 19 20 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 21 gfp_t gfp_mask) 22 { 23 struct bio_map_data *bmd; 24 25 if (data->nr_segs > UIO_MAXIOV) 26 return NULL; 27 28 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 29 if (!bmd) 30 return NULL; 31 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); 32 bmd->iter = *data; 33 bmd->iter.iov = bmd->iov; 34 return bmd; 35 } 36 37 /** 38 * bio_copy_from_iter - copy all pages from iov_iter to bio 39 * @bio: The &struct bio which describes the I/O as destination 40 * @iter: iov_iter as source 41 * 42 * Copy all pages from iov_iter to bio. 43 * Returns 0 on success, or error on failure. 44 */ 45 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 46 { 47 struct bio_vec *bvec; 48 struct bvec_iter_all iter_all; 49 50 bio_for_each_segment_all(bvec, bio, iter_all) { 51 ssize_t ret; 52 53 ret = copy_page_from_iter(bvec->bv_page, 54 bvec->bv_offset, 55 bvec->bv_len, 56 iter); 57 58 if (!iov_iter_count(iter)) 59 break; 60 61 if (ret < bvec->bv_len) 62 return -EFAULT; 63 } 64 65 return 0; 66 } 67 68 /** 69 * bio_copy_to_iter - copy all pages from bio to iov_iter 70 * @bio: The &struct bio which describes the I/O as source 71 * @iter: iov_iter as destination 72 * 73 * Copy all pages from bio to iov_iter. 74 * Returns 0 on success, or error on failure. 75 */ 76 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 77 { 78 struct bio_vec *bvec; 79 struct bvec_iter_all iter_all; 80 81 bio_for_each_segment_all(bvec, bio, iter_all) { 82 ssize_t ret; 83 84 ret = copy_page_to_iter(bvec->bv_page, 85 bvec->bv_offset, 86 bvec->bv_len, 87 &iter); 88 89 if (!iov_iter_count(&iter)) 90 break; 91 92 if (ret < bvec->bv_len) 93 return -EFAULT; 94 } 95 96 return 0; 97 } 98 99 /** 100 * bio_uncopy_user - finish previously mapped bio 101 * @bio: bio being terminated 102 * 103 * Free pages allocated from bio_copy_user_iov() and write back data 104 * to user space in case of a read. 105 */ 106 static int bio_uncopy_user(struct bio *bio) 107 { 108 struct bio_map_data *bmd = bio->bi_private; 109 int ret = 0; 110 111 if (!bio_flagged(bio, BIO_NULL_MAPPED)) { 112 /* 113 * if we're in a workqueue, the request is orphaned, so 114 * don't copy into a random user address space, just free 115 * and return -EINTR so user space doesn't expect any data. 116 */ 117 if (!current->mm) 118 ret = -EINTR; 119 else if (bio_data_dir(bio) == READ) 120 ret = bio_copy_to_iter(bio, bmd->iter); 121 if (bmd->is_our_pages) 122 bio_free_pages(bio); 123 } 124 kfree(bmd); 125 bio_put(bio); 126 return ret; 127 } 128 129 /** 130 * bio_copy_user_iov - copy user data to bio 131 * @q: destination block queue 132 * @map_data: pointer to the rq_map_data holding pages (if necessary) 133 * @iter: iovec iterator 134 * @gfp_mask: memory allocation flags 135 * 136 * Prepares and returns a bio for indirect user io, bouncing data 137 * to/from kernel pages as necessary. Must be paired with 138 * call bio_uncopy_user() on io completion. 139 */ 140 static struct bio *bio_copy_user_iov(struct request_queue *q, 141 struct rq_map_data *map_data, struct iov_iter *iter, 142 gfp_t gfp_mask) 143 { 144 struct bio_map_data *bmd; 145 struct page *page; 146 struct bio *bio; 147 int i = 0, ret; 148 int nr_pages; 149 unsigned int len = iter->count; 150 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 151 152 bmd = bio_alloc_map_data(iter, gfp_mask); 153 if (!bmd) 154 return ERR_PTR(-ENOMEM); 155 156 /* 157 * We need to do a deep copy of the iov_iter including the iovecs. 158 * The caller provided iov might point to an on-stack or otherwise 159 * shortlived one. 160 */ 161 bmd->is_our_pages = map_data ? 0 : 1; 162 163 nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); 164 if (nr_pages > BIO_MAX_PAGES) 165 nr_pages = BIO_MAX_PAGES; 166 167 ret = -ENOMEM; 168 bio = bio_kmalloc(gfp_mask, nr_pages); 169 if (!bio) 170 goto out_bmd; 171 172 ret = 0; 173 174 if (map_data) { 175 nr_pages = 1 << map_data->page_order; 176 i = map_data->offset / PAGE_SIZE; 177 } 178 while (len) { 179 unsigned int bytes = PAGE_SIZE; 180 181 bytes -= offset; 182 183 if (bytes > len) 184 bytes = len; 185 186 if (map_data) { 187 if (i == map_data->nr_entries * nr_pages) { 188 ret = -ENOMEM; 189 break; 190 } 191 192 page = map_data->pages[i / nr_pages]; 193 page += (i % nr_pages); 194 195 i++; 196 } else { 197 page = alloc_page(q->bounce_gfp | gfp_mask); 198 if (!page) { 199 ret = -ENOMEM; 200 break; 201 } 202 } 203 204 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { 205 if (!map_data) 206 __free_page(page); 207 break; 208 } 209 210 len -= bytes; 211 offset = 0; 212 } 213 214 if (ret) 215 goto cleanup; 216 217 if (map_data) 218 map_data->offset += bio->bi_iter.bi_size; 219 220 /* 221 * success 222 */ 223 if ((iov_iter_rw(iter) == WRITE && 224 (!map_data || !map_data->null_mapped)) || 225 (map_data && map_data->from_user)) { 226 ret = bio_copy_from_iter(bio, iter); 227 if (ret) 228 goto cleanup; 229 } else { 230 if (bmd->is_our_pages) 231 zero_fill_bio(bio); 232 iov_iter_advance(iter, bio->bi_iter.bi_size); 233 } 234 235 bio->bi_private = bmd; 236 if (map_data && map_data->null_mapped) 237 bio_set_flag(bio, BIO_NULL_MAPPED); 238 return bio; 239 cleanup: 240 if (!map_data) 241 bio_free_pages(bio); 242 bio_put(bio); 243 out_bmd: 244 kfree(bmd); 245 return ERR_PTR(ret); 246 } 247 248 /** 249 * bio_map_user_iov - map user iovec into bio 250 * @q: the struct request_queue for the bio 251 * @iter: iovec iterator 252 * @gfp_mask: memory allocation flags 253 * 254 * Map the user space address into a bio suitable for io to a block 255 * device. Returns an error pointer in case of error. 256 */ 257 static struct bio *bio_map_user_iov(struct request_queue *q, 258 struct iov_iter *iter, gfp_t gfp_mask) 259 { 260 int j; 261 struct bio *bio; 262 int ret; 263 264 if (!iov_iter_count(iter)) 265 return ERR_PTR(-EINVAL); 266 267 bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); 268 if (!bio) 269 return ERR_PTR(-ENOMEM); 270 271 while (iov_iter_count(iter)) { 272 struct page **pages; 273 ssize_t bytes; 274 size_t offs, added = 0; 275 int npages; 276 277 bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); 278 if (unlikely(bytes <= 0)) { 279 ret = bytes ? bytes : -EFAULT; 280 goto out_unmap; 281 } 282 283 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); 284 285 if (unlikely(offs & queue_dma_alignment(q))) { 286 ret = -EINVAL; 287 j = 0; 288 } else { 289 for (j = 0; j < npages; j++) { 290 struct page *page = pages[j]; 291 unsigned int n = PAGE_SIZE - offs; 292 bool same_page = false; 293 294 if (n > bytes) 295 n = bytes; 296 297 if (!__bio_add_pc_page(q, bio, page, n, offs, 298 &same_page)) { 299 if (same_page) 300 put_page(page); 301 break; 302 } 303 304 added += n; 305 bytes -= n; 306 offs = 0; 307 } 308 iov_iter_advance(iter, added); 309 } 310 /* 311 * release the pages we didn't map into the bio, if any 312 */ 313 while (j < npages) 314 put_page(pages[j++]); 315 kvfree(pages); 316 /* couldn't stuff something into bio? */ 317 if (bytes) 318 break; 319 } 320 321 bio_set_flag(bio, BIO_USER_MAPPED); 322 323 /* 324 * subtle -- if bio_map_user_iov() ended up bouncing a bio, 325 * it would normally disappear when its bi_end_io is run. 326 * however, we need it for the unmap, so grab an extra 327 * reference to it 328 */ 329 bio_get(bio); 330 return bio; 331 332 out_unmap: 333 bio_release_pages(bio, false); 334 bio_put(bio); 335 return ERR_PTR(ret); 336 } 337 338 /** 339 * bio_unmap_user - unmap a bio 340 * @bio: the bio being unmapped 341 * 342 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from 343 * process context. 344 * 345 * bio_unmap_user() may sleep. 346 */ 347 static void bio_unmap_user(struct bio *bio) 348 { 349 bio_release_pages(bio, bio_data_dir(bio) == READ); 350 bio_put(bio); 351 bio_put(bio); 352 } 353 354 static void bio_invalidate_vmalloc_pages(struct bio *bio) 355 { 356 #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE 357 if (bio->bi_private && !op_is_write(bio_op(bio))) { 358 unsigned long i, len = 0; 359 360 for (i = 0; i < bio->bi_vcnt; i++) 361 len += bio->bi_io_vec[i].bv_len; 362 invalidate_kernel_vmap_range(bio->bi_private, len); 363 } 364 #endif 365 } 366 367 static void bio_map_kern_endio(struct bio *bio) 368 { 369 bio_invalidate_vmalloc_pages(bio); 370 bio_put(bio); 371 } 372 373 /** 374 * bio_map_kern - map kernel address into bio 375 * @q: the struct request_queue for the bio 376 * @data: pointer to buffer to map 377 * @len: length in bytes 378 * @gfp_mask: allocation flags for bio allocation 379 * 380 * Map the kernel address into a bio suitable for io to a block 381 * device. Returns an error pointer in case of error. 382 */ 383 static struct bio *bio_map_kern(struct request_queue *q, void *data, 384 unsigned int len, gfp_t gfp_mask) 385 { 386 unsigned long kaddr = (unsigned long)data; 387 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 388 unsigned long start = kaddr >> PAGE_SHIFT; 389 const int nr_pages = end - start; 390 bool is_vmalloc = is_vmalloc_addr(data); 391 struct page *page; 392 int offset, i; 393 struct bio *bio; 394 395 bio = bio_kmalloc(gfp_mask, nr_pages); 396 if (!bio) 397 return ERR_PTR(-ENOMEM); 398 399 if (is_vmalloc) { 400 flush_kernel_vmap_range(data, len); 401 bio->bi_private = data; 402 } 403 404 offset = offset_in_page(kaddr); 405 for (i = 0; i < nr_pages; i++) { 406 unsigned int bytes = PAGE_SIZE - offset; 407 408 if (len <= 0) 409 break; 410 411 if (bytes > len) 412 bytes = len; 413 414 if (!is_vmalloc) 415 page = virt_to_page(data); 416 else 417 page = vmalloc_to_page(data); 418 if (bio_add_pc_page(q, bio, page, bytes, 419 offset) < bytes) { 420 /* we don't support partial mappings */ 421 bio_put(bio); 422 return ERR_PTR(-EINVAL); 423 } 424 425 data += bytes; 426 len -= bytes; 427 offset = 0; 428 } 429 430 bio->bi_end_io = bio_map_kern_endio; 431 return bio; 432 } 433 434 static void bio_copy_kern_endio(struct bio *bio) 435 { 436 bio_free_pages(bio); 437 bio_put(bio); 438 } 439 440 static void bio_copy_kern_endio_read(struct bio *bio) 441 { 442 char *p = bio->bi_private; 443 struct bio_vec *bvec; 444 struct bvec_iter_all iter_all; 445 446 bio_for_each_segment_all(bvec, bio, iter_all) { 447 memcpy(p, page_address(bvec->bv_page), bvec->bv_len); 448 p += bvec->bv_len; 449 } 450 451 bio_copy_kern_endio(bio); 452 } 453 454 /** 455 * bio_copy_kern - copy kernel address into bio 456 * @q: the struct request_queue for the bio 457 * @data: pointer to buffer to copy 458 * @len: length in bytes 459 * @gfp_mask: allocation flags for bio and page allocation 460 * @reading: data direction is READ 461 * 462 * copy the kernel address into a bio suitable for io to a block 463 * device. Returns an error pointer in case of error. 464 */ 465 static struct bio *bio_copy_kern(struct request_queue *q, void *data, 466 unsigned int len, gfp_t gfp_mask, int reading) 467 { 468 unsigned long kaddr = (unsigned long)data; 469 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 470 unsigned long start = kaddr >> PAGE_SHIFT; 471 struct bio *bio; 472 void *p = data; 473 int nr_pages = 0; 474 475 /* 476 * Overflow, abort 477 */ 478 if (end < start) 479 return ERR_PTR(-EINVAL); 480 481 nr_pages = end - start; 482 bio = bio_kmalloc(gfp_mask, nr_pages); 483 if (!bio) 484 return ERR_PTR(-ENOMEM); 485 486 while (len) { 487 struct page *page; 488 unsigned int bytes = PAGE_SIZE; 489 490 if (bytes > len) 491 bytes = len; 492 493 page = alloc_page(q->bounce_gfp | gfp_mask); 494 if (!page) 495 goto cleanup; 496 497 if (!reading) 498 memcpy(page_address(page), p, bytes); 499 500 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) 501 break; 502 503 len -= bytes; 504 p += bytes; 505 } 506 507 if (reading) { 508 bio->bi_end_io = bio_copy_kern_endio_read; 509 bio->bi_private = data; 510 } else { 511 bio->bi_end_io = bio_copy_kern_endio; 512 } 513 514 return bio; 515 516 cleanup: 517 bio_free_pages(bio); 518 bio_put(bio); 519 return ERR_PTR(-ENOMEM); 520 } 521 522 /* 523 * Append a bio to a passthrough request. Only works if the bio can be merged 524 * into the request based on the driver constraints. 525 */ 526 int blk_rq_append_bio(struct request *rq, struct bio **bio) 527 { 528 struct bio *orig_bio = *bio; 529 struct bvec_iter iter; 530 struct bio_vec bv; 531 unsigned int nr_segs = 0; 532 533 blk_queue_bounce(rq->q, bio); 534 535 bio_for_each_bvec(bv, *bio, iter) 536 nr_segs++; 537 538 if (!rq->bio) { 539 blk_rq_bio_prep(rq, *bio, nr_segs); 540 } else { 541 if (!ll_back_merge_fn(rq, *bio, nr_segs)) { 542 if (orig_bio != *bio) { 543 bio_put(*bio); 544 *bio = orig_bio; 545 } 546 return -EINVAL; 547 } 548 549 rq->biotail->bi_next = *bio; 550 rq->biotail = *bio; 551 rq->__data_len += (*bio)->bi_iter.bi_size; 552 } 553 554 return 0; 555 } 556 EXPORT_SYMBOL(blk_rq_append_bio); 557 558 static int __blk_rq_unmap_user(struct bio *bio) 559 { 560 int ret = 0; 561 562 if (bio) { 563 if (bio_flagged(bio, BIO_USER_MAPPED)) 564 bio_unmap_user(bio); 565 else 566 ret = bio_uncopy_user(bio); 567 } 568 569 return ret; 570 } 571 572 static int __blk_rq_map_user_iov(struct request *rq, 573 struct rq_map_data *map_data, struct iov_iter *iter, 574 gfp_t gfp_mask, bool copy) 575 { 576 struct request_queue *q = rq->q; 577 struct bio *bio, *orig_bio; 578 int ret; 579 580 if (copy) 581 bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); 582 else 583 bio = bio_map_user_iov(q, iter, gfp_mask); 584 585 if (IS_ERR(bio)) 586 return PTR_ERR(bio); 587 588 bio->bi_opf &= ~REQ_OP_MASK; 589 bio->bi_opf |= req_op(rq); 590 591 orig_bio = bio; 592 593 /* 594 * We link the bounce buffer in and could have to traverse it 595 * later so we have to get a ref to prevent it from being freed 596 */ 597 ret = blk_rq_append_bio(rq, &bio); 598 if (ret) { 599 __blk_rq_unmap_user(orig_bio); 600 return ret; 601 } 602 bio_get(bio); 603 604 return 0; 605 } 606 607 /** 608 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 609 * @q: request queue where request should be inserted 610 * @rq: request to map data to 611 * @map_data: pointer to the rq_map_data holding pages (if necessary) 612 * @iter: iovec iterator 613 * @gfp_mask: memory allocation flags 614 * 615 * Description: 616 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 617 * a kernel bounce buffer is used. 618 * 619 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 620 * still in process context. 621 * 622 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 623 * before being submitted to the device, as pages mapped may be out of 624 * reach. It's the callers responsibility to make sure this happens. The 625 * original bio must be passed back in to blk_rq_unmap_user() for proper 626 * unmapping. 627 */ 628 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 629 struct rq_map_data *map_data, 630 const struct iov_iter *iter, gfp_t gfp_mask) 631 { 632 bool copy = false; 633 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); 634 struct bio *bio = NULL; 635 struct iov_iter i; 636 int ret = -EINVAL; 637 638 if (!iter_is_iovec(iter)) 639 goto fail; 640 641 if (map_data) 642 copy = true; 643 else if (iov_iter_alignment(iter) & align) 644 copy = true; 645 else if (queue_virt_boundary(q)) 646 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 647 648 i = *iter; 649 do { 650 ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); 651 if (ret) 652 goto unmap_rq; 653 if (!bio) 654 bio = rq->bio; 655 } while (iov_iter_count(&i)); 656 657 if (!bio_flagged(bio, BIO_USER_MAPPED)) 658 rq->rq_flags |= RQF_COPY_USER; 659 return 0; 660 661 unmap_rq: 662 blk_rq_unmap_user(bio); 663 fail: 664 rq->bio = NULL; 665 return ret; 666 } 667 EXPORT_SYMBOL(blk_rq_map_user_iov); 668 669 int blk_rq_map_user(struct request_queue *q, struct request *rq, 670 struct rq_map_data *map_data, void __user *ubuf, 671 unsigned long len, gfp_t gfp_mask) 672 { 673 struct iovec iov; 674 struct iov_iter i; 675 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i); 676 677 if (unlikely(ret < 0)) 678 return ret; 679 680 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 681 } 682 EXPORT_SYMBOL(blk_rq_map_user); 683 684 /** 685 * blk_rq_unmap_user - unmap a request with user data 686 * @bio: start of bio list 687 * 688 * Description: 689 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 690 * supply the original rq->bio from the blk_rq_map_user() return, since 691 * the I/O completion may have changed rq->bio. 692 */ 693 int blk_rq_unmap_user(struct bio *bio) 694 { 695 struct bio *mapped_bio; 696 int ret = 0, ret2; 697 698 while (bio) { 699 mapped_bio = bio; 700 if (unlikely(bio_flagged(bio, BIO_BOUNCED))) 701 mapped_bio = bio->bi_private; 702 703 ret2 = __blk_rq_unmap_user(mapped_bio); 704 if (ret2 && !ret) 705 ret = ret2; 706 707 mapped_bio = bio; 708 bio = bio->bi_next; 709 bio_put(mapped_bio); 710 } 711 712 return ret; 713 } 714 EXPORT_SYMBOL(blk_rq_unmap_user); 715 716 /** 717 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 718 * @q: request queue where request should be inserted 719 * @rq: request to fill 720 * @kbuf: the kernel buffer 721 * @len: length of user data 722 * @gfp_mask: memory allocation flags 723 * 724 * Description: 725 * Data will be mapped directly if possible. Otherwise a bounce 726 * buffer is used. Can be called multiple times to append multiple 727 * buffers. 728 */ 729 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 730 unsigned int len, gfp_t gfp_mask) 731 { 732 int reading = rq_data_dir(rq) == READ; 733 unsigned long addr = (unsigned long) kbuf; 734 int do_copy = 0; 735 struct bio *bio, *orig_bio; 736 int ret; 737 738 if (len > (queue_max_hw_sectors(q) << 9)) 739 return -EINVAL; 740 if (!len || !kbuf) 741 return -EINVAL; 742 743 do_copy = !blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf); 744 if (do_copy) 745 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 746 else 747 bio = bio_map_kern(q, kbuf, len, gfp_mask); 748 749 if (IS_ERR(bio)) 750 return PTR_ERR(bio); 751 752 bio->bi_opf &= ~REQ_OP_MASK; 753 bio->bi_opf |= req_op(rq); 754 755 if (do_copy) 756 rq->rq_flags |= RQF_COPY_USER; 757 758 orig_bio = bio; 759 ret = blk_rq_append_bio(rq, &bio); 760 if (unlikely(ret)) { 761 /* request is too big */ 762 bio_put(orig_bio); 763 return ret; 764 } 765 766 return 0; 767 } 768 EXPORT_SYMBOL(blk_rq_map_kern); 769