1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to mapping data to requests 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sched/task_stack.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/uio.h> 11 12 #include "blk.h" 13 14 struct bio_map_data { 15 int is_our_pages; 16 struct iov_iter iter; 17 struct iovec iov[]; 18 }; 19 20 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 21 gfp_t gfp_mask) 22 { 23 struct bio_map_data *bmd; 24 25 if (data->nr_segs > UIO_MAXIOV) 26 return NULL; 27 28 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 29 if (!bmd) 30 return NULL; 31 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); 32 bmd->iter = *data; 33 bmd->iter.iov = bmd->iov; 34 return bmd; 35 } 36 37 /** 38 * bio_copy_from_iter - copy all pages from iov_iter to bio 39 * @bio: The &struct bio which describes the I/O as destination 40 * @iter: iov_iter as source 41 * 42 * Copy all pages from iov_iter to bio. 43 * Returns 0 on success, or error on failure. 44 */ 45 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 46 { 47 struct bio_vec *bvec; 48 struct bvec_iter_all iter_all; 49 50 bio_for_each_segment_all(bvec, bio, iter_all) { 51 ssize_t ret; 52 53 ret = copy_page_from_iter(bvec->bv_page, 54 bvec->bv_offset, 55 bvec->bv_len, 56 iter); 57 58 if (!iov_iter_count(iter)) 59 break; 60 61 if (ret < bvec->bv_len) 62 return -EFAULT; 63 } 64 65 return 0; 66 } 67 68 /** 69 * bio_copy_to_iter - copy all pages from bio to iov_iter 70 * @bio: The &struct bio which describes the I/O as source 71 * @iter: iov_iter as destination 72 * 73 * Copy all pages from bio to iov_iter. 74 * Returns 0 on success, or error on failure. 75 */ 76 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 77 { 78 struct bio_vec *bvec; 79 struct bvec_iter_all iter_all; 80 81 bio_for_each_segment_all(bvec, bio, iter_all) { 82 ssize_t ret; 83 84 ret = copy_page_to_iter(bvec->bv_page, 85 bvec->bv_offset, 86 bvec->bv_len, 87 &iter); 88 89 if (!iov_iter_count(&iter)) 90 break; 91 92 if (ret < bvec->bv_len) 93 return -EFAULT; 94 } 95 96 return 0; 97 } 98 99 /** 100 * bio_uncopy_user - finish previously mapped bio 101 * @bio: bio being terminated 102 * 103 * Free pages allocated from bio_copy_user_iov() and write back data 104 * to user space in case of a read. 105 */ 106 static int bio_uncopy_user(struct bio *bio) 107 { 108 struct bio_map_data *bmd = bio->bi_private; 109 int ret = 0; 110 111 if (!bio_flagged(bio, BIO_NULL_MAPPED)) { 112 /* 113 * if we're in a workqueue, the request is orphaned, so 114 * don't copy into a random user address space, just free 115 * and return -EINTR so user space doesn't expect any data. 116 */ 117 if (!current->mm) 118 ret = -EINTR; 119 else if (bio_data_dir(bio) == READ) 120 ret = bio_copy_to_iter(bio, bmd->iter); 121 if (bmd->is_our_pages) 122 bio_free_pages(bio); 123 } 124 kfree(bmd); 125 bio_put(bio); 126 return ret; 127 } 128 129 /** 130 * bio_copy_user_iov - copy user data to bio 131 * @q: destination block queue 132 * @map_data: pointer to the rq_map_data holding pages (if necessary) 133 * @iter: iovec iterator 134 * @gfp_mask: memory allocation flags 135 * 136 * Prepares and returns a bio for indirect user io, bouncing data 137 * to/from kernel pages as necessary. Must be paired with 138 * call bio_uncopy_user() on io completion. 139 */ 140 static struct bio *bio_copy_user_iov(struct request_queue *q, 141 struct rq_map_data *map_data, struct iov_iter *iter, 142 gfp_t gfp_mask) 143 { 144 struct bio_map_data *bmd; 145 struct page *page; 146 struct bio *bio; 147 int i = 0, ret; 148 int nr_pages; 149 unsigned int len = iter->count; 150 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 151 152 bmd = bio_alloc_map_data(iter, gfp_mask); 153 if (!bmd) 154 return ERR_PTR(-ENOMEM); 155 156 /* 157 * We need to do a deep copy of the iov_iter including the iovecs. 158 * The caller provided iov might point to an on-stack or otherwise 159 * shortlived one. 160 */ 161 bmd->is_our_pages = map_data ? 0 : 1; 162 163 nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); 164 if (nr_pages > BIO_MAX_PAGES) 165 nr_pages = BIO_MAX_PAGES; 166 167 ret = -ENOMEM; 168 bio = bio_kmalloc(gfp_mask, nr_pages); 169 if (!bio) 170 goto out_bmd; 171 172 ret = 0; 173 174 if (map_data) { 175 nr_pages = 1 << map_data->page_order; 176 i = map_data->offset / PAGE_SIZE; 177 } 178 while (len) { 179 unsigned int bytes = PAGE_SIZE; 180 181 bytes -= offset; 182 183 if (bytes > len) 184 bytes = len; 185 186 if (map_data) { 187 if (i == map_data->nr_entries * nr_pages) { 188 ret = -ENOMEM; 189 break; 190 } 191 192 page = map_data->pages[i / nr_pages]; 193 page += (i % nr_pages); 194 195 i++; 196 } else { 197 page = alloc_page(q->bounce_gfp | gfp_mask); 198 if (!page) { 199 ret = -ENOMEM; 200 break; 201 } 202 } 203 204 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { 205 if (!map_data) 206 __free_page(page); 207 break; 208 } 209 210 len -= bytes; 211 offset = 0; 212 } 213 214 if (ret) 215 goto cleanup; 216 217 if (map_data) 218 map_data->offset += bio->bi_iter.bi_size; 219 220 /* 221 * success 222 */ 223 if ((iov_iter_rw(iter) == WRITE && 224 (!map_data || !map_data->null_mapped)) || 225 (map_data && map_data->from_user)) { 226 ret = bio_copy_from_iter(bio, iter); 227 if (ret) 228 goto cleanup; 229 } else { 230 if (bmd->is_our_pages) 231 zero_fill_bio(bio); 232 iov_iter_advance(iter, bio->bi_iter.bi_size); 233 } 234 235 bio->bi_private = bmd; 236 if (map_data && map_data->null_mapped) 237 bio_set_flag(bio, BIO_NULL_MAPPED); 238 return bio; 239 cleanup: 240 if (!map_data) 241 bio_free_pages(bio); 242 bio_put(bio); 243 out_bmd: 244 kfree(bmd); 245 return ERR_PTR(ret); 246 } 247 248 /** 249 * bio_map_user_iov - map user iovec into bio 250 * @q: the struct request_queue for the bio 251 * @iter: iovec iterator 252 * @gfp_mask: memory allocation flags 253 * 254 * Map the user space address into a bio suitable for io to a block 255 * device. Returns an error pointer in case of error. 256 */ 257 static struct bio *bio_map_user_iov(struct request_queue *q, 258 struct iov_iter *iter, gfp_t gfp_mask) 259 { 260 unsigned int max_sectors = queue_max_hw_sectors(q); 261 int j; 262 struct bio *bio; 263 int ret; 264 265 if (!iov_iter_count(iter)) 266 return ERR_PTR(-EINVAL); 267 268 bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); 269 if (!bio) 270 return ERR_PTR(-ENOMEM); 271 272 while (iov_iter_count(iter)) { 273 struct page **pages; 274 ssize_t bytes; 275 size_t offs, added = 0; 276 int npages; 277 278 bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); 279 if (unlikely(bytes <= 0)) { 280 ret = bytes ? bytes : -EFAULT; 281 goto out_unmap; 282 } 283 284 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); 285 286 if (unlikely(offs & queue_dma_alignment(q))) { 287 ret = -EINVAL; 288 j = 0; 289 } else { 290 for (j = 0; j < npages; j++) { 291 struct page *page = pages[j]; 292 unsigned int n = PAGE_SIZE - offs; 293 bool same_page = false; 294 295 if (n > bytes) 296 n = bytes; 297 298 if (!bio_add_hw_page(q, bio, page, n, offs, 299 max_sectors, &same_page)) { 300 if (same_page) 301 put_page(page); 302 break; 303 } 304 305 added += n; 306 bytes -= n; 307 offs = 0; 308 } 309 iov_iter_advance(iter, added); 310 } 311 /* 312 * release the pages we didn't map into the bio, if any 313 */ 314 while (j < npages) 315 put_page(pages[j++]); 316 kvfree(pages); 317 /* couldn't stuff something into bio? */ 318 if (bytes) 319 break; 320 } 321 322 bio_set_flag(bio, BIO_USER_MAPPED); 323 324 /* 325 * subtle -- if bio_map_user_iov() ended up bouncing a bio, 326 * it would normally disappear when its bi_end_io is run. 327 * however, we need it for the unmap, so grab an extra 328 * reference to it 329 */ 330 bio_get(bio); 331 return bio; 332 333 out_unmap: 334 bio_release_pages(bio, false); 335 bio_put(bio); 336 return ERR_PTR(ret); 337 } 338 339 /** 340 * bio_unmap_user - unmap a bio 341 * @bio: the bio being unmapped 342 * 343 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from 344 * process context. 345 * 346 * bio_unmap_user() may sleep. 347 */ 348 static void bio_unmap_user(struct bio *bio) 349 { 350 bio_release_pages(bio, bio_data_dir(bio) == READ); 351 bio_put(bio); 352 bio_put(bio); 353 } 354 355 static void bio_invalidate_vmalloc_pages(struct bio *bio) 356 { 357 #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE 358 if (bio->bi_private && !op_is_write(bio_op(bio))) { 359 unsigned long i, len = 0; 360 361 for (i = 0; i < bio->bi_vcnt; i++) 362 len += bio->bi_io_vec[i].bv_len; 363 invalidate_kernel_vmap_range(bio->bi_private, len); 364 } 365 #endif 366 } 367 368 static void bio_map_kern_endio(struct bio *bio) 369 { 370 bio_invalidate_vmalloc_pages(bio); 371 bio_put(bio); 372 } 373 374 /** 375 * bio_map_kern - map kernel address into bio 376 * @q: the struct request_queue for the bio 377 * @data: pointer to buffer to map 378 * @len: length in bytes 379 * @gfp_mask: allocation flags for bio allocation 380 * 381 * Map the kernel address into a bio suitable for io to a block 382 * device. Returns an error pointer in case of error. 383 */ 384 static struct bio *bio_map_kern(struct request_queue *q, void *data, 385 unsigned int len, gfp_t gfp_mask) 386 { 387 unsigned long kaddr = (unsigned long)data; 388 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 389 unsigned long start = kaddr >> PAGE_SHIFT; 390 const int nr_pages = end - start; 391 bool is_vmalloc = is_vmalloc_addr(data); 392 struct page *page; 393 int offset, i; 394 struct bio *bio; 395 396 bio = bio_kmalloc(gfp_mask, nr_pages); 397 if (!bio) 398 return ERR_PTR(-ENOMEM); 399 400 if (is_vmalloc) { 401 flush_kernel_vmap_range(data, len); 402 bio->bi_private = data; 403 } 404 405 offset = offset_in_page(kaddr); 406 for (i = 0; i < nr_pages; i++) { 407 unsigned int bytes = PAGE_SIZE - offset; 408 409 if (len <= 0) 410 break; 411 412 if (bytes > len) 413 bytes = len; 414 415 if (!is_vmalloc) 416 page = virt_to_page(data); 417 else 418 page = vmalloc_to_page(data); 419 if (bio_add_pc_page(q, bio, page, bytes, 420 offset) < bytes) { 421 /* we don't support partial mappings */ 422 bio_put(bio); 423 return ERR_PTR(-EINVAL); 424 } 425 426 data += bytes; 427 len -= bytes; 428 offset = 0; 429 } 430 431 bio->bi_end_io = bio_map_kern_endio; 432 return bio; 433 } 434 435 static void bio_copy_kern_endio(struct bio *bio) 436 { 437 bio_free_pages(bio); 438 bio_put(bio); 439 } 440 441 static void bio_copy_kern_endio_read(struct bio *bio) 442 { 443 char *p = bio->bi_private; 444 struct bio_vec *bvec; 445 struct bvec_iter_all iter_all; 446 447 bio_for_each_segment_all(bvec, bio, iter_all) { 448 memcpy(p, page_address(bvec->bv_page), bvec->bv_len); 449 p += bvec->bv_len; 450 } 451 452 bio_copy_kern_endio(bio); 453 } 454 455 /** 456 * bio_copy_kern - copy kernel address into bio 457 * @q: the struct request_queue for the bio 458 * @data: pointer to buffer to copy 459 * @len: length in bytes 460 * @gfp_mask: allocation flags for bio and page allocation 461 * @reading: data direction is READ 462 * 463 * copy the kernel address into a bio suitable for io to a block 464 * device. Returns an error pointer in case of error. 465 */ 466 static struct bio *bio_copy_kern(struct request_queue *q, void *data, 467 unsigned int len, gfp_t gfp_mask, int reading) 468 { 469 unsigned long kaddr = (unsigned long)data; 470 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 471 unsigned long start = kaddr >> PAGE_SHIFT; 472 struct bio *bio; 473 void *p = data; 474 int nr_pages = 0; 475 476 /* 477 * Overflow, abort 478 */ 479 if (end < start) 480 return ERR_PTR(-EINVAL); 481 482 nr_pages = end - start; 483 bio = bio_kmalloc(gfp_mask, nr_pages); 484 if (!bio) 485 return ERR_PTR(-ENOMEM); 486 487 while (len) { 488 struct page *page; 489 unsigned int bytes = PAGE_SIZE; 490 491 if (bytes > len) 492 bytes = len; 493 494 page = alloc_page(q->bounce_gfp | gfp_mask); 495 if (!page) 496 goto cleanup; 497 498 if (!reading) 499 memcpy(page_address(page), p, bytes); 500 501 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) 502 break; 503 504 len -= bytes; 505 p += bytes; 506 } 507 508 if (reading) { 509 bio->bi_end_io = bio_copy_kern_endio_read; 510 bio->bi_private = data; 511 } else { 512 bio->bi_end_io = bio_copy_kern_endio; 513 } 514 515 return bio; 516 517 cleanup: 518 bio_free_pages(bio); 519 bio_put(bio); 520 return ERR_PTR(-ENOMEM); 521 } 522 523 /* 524 * Append a bio to a passthrough request. Only works if the bio can be merged 525 * into the request based on the driver constraints. 526 */ 527 int blk_rq_append_bio(struct request *rq, struct bio **bio) 528 { 529 struct bio *orig_bio = *bio; 530 struct bvec_iter iter; 531 struct bio_vec bv; 532 unsigned int nr_segs = 0; 533 534 blk_queue_bounce(rq->q, bio); 535 536 bio_for_each_bvec(bv, *bio, iter) 537 nr_segs++; 538 539 if (!rq->bio) { 540 blk_rq_bio_prep(rq, *bio, nr_segs); 541 } else { 542 if (!ll_back_merge_fn(rq, *bio, nr_segs)) { 543 if (orig_bio != *bio) { 544 bio_put(*bio); 545 *bio = orig_bio; 546 } 547 return -EINVAL; 548 } 549 550 rq->biotail->bi_next = *bio; 551 rq->biotail = *bio; 552 rq->__data_len += (*bio)->bi_iter.bi_size; 553 } 554 555 return 0; 556 } 557 EXPORT_SYMBOL(blk_rq_append_bio); 558 559 static int __blk_rq_unmap_user(struct bio *bio) 560 { 561 int ret = 0; 562 563 if (bio) { 564 if (bio_flagged(bio, BIO_USER_MAPPED)) 565 bio_unmap_user(bio); 566 else 567 ret = bio_uncopy_user(bio); 568 } 569 570 return ret; 571 } 572 573 static int __blk_rq_map_user_iov(struct request *rq, 574 struct rq_map_data *map_data, struct iov_iter *iter, 575 gfp_t gfp_mask, bool copy) 576 { 577 struct request_queue *q = rq->q; 578 struct bio *bio, *orig_bio; 579 int ret; 580 581 if (copy) 582 bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); 583 else 584 bio = bio_map_user_iov(q, iter, gfp_mask); 585 586 if (IS_ERR(bio)) 587 return PTR_ERR(bio); 588 589 bio->bi_opf &= ~REQ_OP_MASK; 590 bio->bi_opf |= req_op(rq); 591 592 orig_bio = bio; 593 594 /* 595 * We link the bounce buffer in and could have to traverse it 596 * later so we have to get a ref to prevent it from being freed 597 */ 598 ret = blk_rq_append_bio(rq, &bio); 599 if (ret) { 600 __blk_rq_unmap_user(orig_bio); 601 return ret; 602 } 603 bio_get(bio); 604 605 return 0; 606 } 607 608 /** 609 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 610 * @q: request queue where request should be inserted 611 * @rq: request to map data to 612 * @map_data: pointer to the rq_map_data holding pages (if necessary) 613 * @iter: iovec iterator 614 * @gfp_mask: memory allocation flags 615 * 616 * Description: 617 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 618 * a kernel bounce buffer is used. 619 * 620 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 621 * still in process context. 622 * 623 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 624 * before being submitted to the device, as pages mapped may be out of 625 * reach. It's the callers responsibility to make sure this happens. The 626 * original bio must be passed back in to blk_rq_unmap_user() for proper 627 * unmapping. 628 */ 629 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 630 struct rq_map_data *map_data, 631 const struct iov_iter *iter, gfp_t gfp_mask) 632 { 633 bool copy = false; 634 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); 635 struct bio *bio = NULL; 636 struct iov_iter i; 637 int ret = -EINVAL; 638 639 if (!iter_is_iovec(iter)) 640 goto fail; 641 642 if (map_data) 643 copy = true; 644 else if (iov_iter_alignment(iter) & align) 645 copy = true; 646 else if (queue_virt_boundary(q)) 647 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 648 649 i = *iter; 650 do { 651 ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); 652 if (ret) 653 goto unmap_rq; 654 if (!bio) 655 bio = rq->bio; 656 } while (iov_iter_count(&i)); 657 658 return 0; 659 660 unmap_rq: 661 blk_rq_unmap_user(bio); 662 fail: 663 rq->bio = NULL; 664 return ret; 665 } 666 EXPORT_SYMBOL(blk_rq_map_user_iov); 667 668 int blk_rq_map_user(struct request_queue *q, struct request *rq, 669 struct rq_map_data *map_data, void __user *ubuf, 670 unsigned long len, gfp_t gfp_mask) 671 { 672 struct iovec iov; 673 struct iov_iter i; 674 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i); 675 676 if (unlikely(ret < 0)) 677 return ret; 678 679 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 680 } 681 EXPORT_SYMBOL(blk_rq_map_user); 682 683 /** 684 * blk_rq_unmap_user - unmap a request with user data 685 * @bio: start of bio list 686 * 687 * Description: 688 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 689 * supply the original rq->bio from the blk_rq_map_user() return, since 690 * the I/O completion may have changed rq->bio. 691 */ 692 int blk_rq_unmap_user(struct bio *bio) 693 { 694 struct bio *mapped_bio; 695 int ret = 0, ret2; 696 697 while (bio) { 698 mapped_bio = bio; 699 if (unlikely(bio_flagged(bio, BIO_BOUNCED))) 700 mapped_bio = bio->bi_private; 701 702 ret2 = __blk_rq_unmap_user(mapped_bio); 703 if (ret2 && !ret) 704 ret = ret2; 705 706 mapped_bio = bio; 707 bio = bio->bi_next; 708 bio_put(mapped_bio); 709 } 710 711 return ret; 712 } 713 EXPORT_SYMBOL(blk_rq_unmap_user); 714 715 /** 716 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 717 * @q: request queue where request should be inserted 718 * @rq: request to fill 719 * @kbuf: the kernel buffer 720 * @len: length of user data 721 * @gfp_mask: memory allocation flags 722 * 723 * Description: 724 * Data will be mapped directly if possible. Otherwise a bounce 725 * buffer is used. Can be called multiple times to append multiple 726 * buffers. 727 */ 728 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 729 unsigned int len, gfp_t gfp_mask) 730 { 731 int reading = rq_data_dir(rq) == READ; 732 unsigned long addr = (unsigned long) kbuf; 733 struct bio *bio, *orig_bio; 734 int ret; 735 736 if (len > (queue_max_hw_sectors(q) << 9)) 737 return -EINVAL; 738 if (!len || !kbuf) 739 return -EINVAL; 740 741 if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf)) 742 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 743 else 744 bio = bio_map_kern(q, kbuf, len, gfp_mask); 745 746 if (IS_ERR(bio)) 747 return PTR_ERR(bio); 748 749 bio->bi_opf &= ~REQ_OP_MASK; 750 bio->bi_opf |= req_op(rq); 751 752 orig_bio = bio; 753 ret = blk_rq_append_bio(rq, &bio); 754 if (unlikely(ret)) { 755 /* request is too big */ 756 bio_put(orig_bio); 757 return ret; 758 } 759 760 return 0; 761 } 762 EXPORT_SYMBOL(blk_rq_map_kern); 763