1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to mapping data to requests 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sched/task_stack.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/uio.h> 11 12 #include "blk.h" 13 14 struct bio_map_data { 15 bool is_our_pages : 1; 16 bool is_null_mapped : 1; 17 struct iov_iter iter; 18 struct iovec iov[]; 19 }; 20 21 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 22 gfp_t gfp_mask) 23 { 24 struct bio_map_data *bmd; 25 26 if (data->nr_segs > UIO_MAXIOV) 27 return NULL; 28 29 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 30 if (!bmd) 31 return NULL; 32 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); 33 bmd->iter = *data; 34 bmd->iter.iov = bmd->iov; 35 return bmd; 36 } 37 38 /** 39 * bio_copy_from_iter - copy all pages from iov_iter to bio 40 * @bio: The &struct bio which describes the I/O as destination 41 * @iter: iov_iter as source 42 * 43 * Copy all pages from iov_iter to bio. 44 * Returns 0 on success, or error on failure. 45 */ 46 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 47 { 48 struct bio_vec *bvec; 49 struct bvec_iter_all iter_all; 50 51 bio_for_each_segment_all(bvec, bio, iter_all) { 52 ssize_t ret; 53 54 ret = copy_page_from_iter(bvec->bv_page, 55 bvec->bv_offset, 56 bvec->bv_len, 57 iter); 58 59 if (!iov_iter_count(iter)) 60 break; 61 62 if (ret < bvec->bv_len) 63 return -EFAULT; 64 } 65 66 return 0; 67 } 68 69 /** 70 * bio_copy_to_iter - copy all pages from bio to iov_iter 71 * @bio: The &struct bio which describes the I/O as source 72 * @iter: iov_iter as destination 73 * 74 * Copy all pages from bio to iov_iter. 75 * Returns 0 on success, or error on failure. 76 */ 77 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 78 { 79 struct bio_vec *bvec; 80 struct bvec_iter_all iter_all; 81 82 bio_for_each_segment_all(bvec, bio, iter_all) { 83 ssize_t ret; 84 85 ret = copy_page_to_iter(bvec->bv_page, 86 bvec->bv_offset, 87 bvec->bv_len, 88 &iter); 89 90 if (!iov_iter_count(&iter)) 91 break; 92 93 if (ret < bvec->bv_len) 94 return -EFAULT; 95 } 96 97 return 0; 98 } 99 100 /** 101 * bio_uncopy_user - finish previously mapped bio 102 * @bio: bio being terminated 103 * 104 * Free pages allocated from bio_copy_user_iov() and write back data 105 * to user space in case of a read. 106 */ 107 static int bio_uncopy_user(struct bio *bio) 108 { 109 struct bio_map_data *bmd = bio->bi_private; 110 int ret = 0; 111 112 if (!bmd || !bmd->is_null_mapped) { 113 /* 114 * if we're in a workqueue, the request is orphaned, so 115 * don't copy into a random user address space, just free 116 * and return -EINTR so user space doesn't expect any data. 117 */ 118 if (!current->mm) 119 ret = -EINTR; 120 else if (bio_data_dir(bio) == READ) 121 ret = bio_copy_to_iter(bio, bmd->iter); 122 if (bmd->is_our_pages) 123 bio_free_pages(bio); 124 } 125 kfree(bmd); 126 bio_put(bio); 127 return ret; 128 } 129 130 /** 131 * bio_copy_user_iov - copy user data to bio 132 * @q: destination block queue 133 * @map_data: pointer to the rq_map_data holding pages (if necessary) 134 * @iter: iovec iterator 135 * @gfp_mask: memory allocation flags 136 * 137 * Prepares and returns a bio for indirect user io, bouncing data 138 * to/from kernel pages as necessary. Must be paired with 139 * call bio_uncopy_user() on io completion. 140 */ 141 static struct bio *bio_copy_user_iov(struct request_queue *q, 142 struct rq_map_data *map_data, struct iov_iter *iter, 143 gfp_t gfp_mask) 144 { 145 struct bio_map_data *bmd; 146 struct page *page; 147 struct bio *bio; 148 int i = 0, ret; 149 int nr_pages; 150 unsigned int len = iter->count; 151 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 152 153 bmd = bio_alloc_map_data(iter, gfp_mask); 154 if (!bmd) 155 return ERR_PTR(-ENOMEM); 156 157 /* 158 * We need to do a deep copy of the iov_iter including the iovecs. 159 * The caller provided iov might point to an on-stack or otherwise 160 * shortlived one. 161 */ 162 bmd->is_our_pages = !map_data; 163 164 nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); 165 if (nr_pages > BIO_MAX_PAGES) 166 nr_pages = BIO_MAX_PAGES; 167 168 ret = -ENOMEM; 169 bio = bio_kmalloc(gfp_mask, nr_pages); 170 if (!bio) 171 goto out_bmd; 172 173 ret = 0; 174 175 if (map_data) { 176 nr_pages = 1 << map_data->page_order; 177 i = map_data->offset / PAGE_SIZE; 178 } 179 while (len) { 180 unsigned int bytes = PAGE_SIZE; 181 182 bytes -= offset; 183 184 if (bytes > len) 185 bytes = len; 186 187 if (map_data) { 188 if (i == map_data->nr_entries * nr_pages) { 189 ret = -ENOMEM; 190 break; 191 } 192 193 page = map_data->pages[i / nr_pages]; 194 page += (i % nr_pages); 195 196 i++; 197 } else { 198 page = alloc_page(q->bounce_gfp | gfp_mask); 199 if (!page) { 200 ret = -ENOMEM; 201 break; 202 } 203 } 204 205 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { 206 if (!map_data) 207 __free_page(page); 208 break; 209 } 210 211 len -= bytes; 212 offset = 0; 213 } 214 215 if (ret) 216 goto cleanup; 217 218 if (map_data) 219 map_data->offset += bio->bi_iter.bi_size; 220 221 /* 222 * success 223 */ 224 if ((iov_iter_rw(iter) == WRITE && 225 (!map_data || !map_data->null_mapped)) || 226 (map_data && map_data->from_user)) { 227 ret = bio_copy_from_iter(bio, iter); 228 if (ret) 229 goto cleanup; 230 } else { 231 if (bmd->is_our_pages) 232 zero_fill_bio(bio); 233 iov_iter_advance(iter, bio->bi_iter.bi_size); 234 } 235 236 bio->bi_private = bmd; 237 if (map_data && map_data->null_mapped) 238 bmd->is_null_mapped = true; 239 return bio; 240 cleanup: 241 if (!map_data) 242 bio_free_pages(bio); 243 bio_put(bio); 244 out_bmd: 245 kfree(bmd); 246 return ERR_PTR(ret); 247 } 248 249 /** 250 * bio_map_user_iov - map user iovec into bio 251 * @q: the struct request_queue for the bio 252 * @iter: iovec iterator 253 * @gfp_mask: memory allocation flags 254 * 255 * Map the user space address into a bio suitable for io to a block 256 * device. Returns an error pointer in case of error. 257 */ 258 static struct bio *bio_map_user_iov(struct request_queue *q, 259 struct iov_iter *iter, gfp_t gfp_mask) 260 { 261 unsigned int max_sectors = queue_max_hw_sectors(q); 262 int j; 263 struct bio *bio; 264 int ret; 265 266 if (!iov_iter_count(iter)) 267 return ERR_PTR(-EINVAL); 268 269 bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); 270 if (!bio) 271 return ERR_PTR(-ENOMEM); 272 273 while (iov_iter_count(iter)) { 274 struct page **pages; 275 ssize_t bytes; 276 size_t offs, added = 0; 277 int npages; 278 279 bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); 280 if (unlikely(bytes <= 0)) { 281 ret = bytes ? bytes : -EFAULT; 282 goto out_unmap; 283 } 284 285 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); 286 287 if (unlikely(offs & queue_dma_alignment(q))) { 288 ret = -EINVAL; 289 j = 0; 290 } else { 291 for (j = 0; j < npages; j++) { 292 struct page *page = pages[j]; 293 unsigned int n = PAGE_SIZE - offs; 294 bool same_page = false; 295 296 if (n > bytes) 297 n = bytes; 298 299 if (!bio_add_hw_page(q, bio, page, n, offs, 300 max_sectors, &same_page)) { 301 if (same_page) 302 put_page(page); 303 break; 304 } 305 306 added += n; 307 bytes -= n; 308 offs = 0; 309 } 310 iov_iter_advance(iter, added); 311 } 312 /* 313 * release the pages we didn't map into the bio, if any 314 */ 315 while (j < npages) 316 put_page(pages[j++]); 317 kvfree(pages); 318 /* couldn't stuff something into bio? */ 319 if (bytes) 320 break; 321 } 322 323 bio_set_flag(bio, BIO_USER_MAPPED); 324 325 /* 326 * subtle -- if bio_map_user_iov() ended up bouncing a bio, 327 * it would normally disappear when its bi_end_io is run. 328 * however, we need it for the unmap, so grab an extra 329 * reference to it 330 */ 331 bio_get(bio); 332 return bio; 333 334 out_unmap: 335 bio_release_pages(bio, false); 336 bio_put(bio); 337 return ERR_PTR(ret); 338 } 339 340 /** 341 * bio_unmap_user - unmap a bio 342 * @bio: the bio being unmapped 343 * 344 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from 345 * process context. 346 * 347 * bio_unmap_user() may sleep. 348 */ 349 static void bio_unmap_user(struct bio *bio) 350 { 351 bio_release_pages(bio, bio_data_dir(bio) == READ); 352 bio_put(bio); 353 bio_put(bio); 354 } 355 356 static void bio_invalidate_vmalloc_pages(struct bio *bio) 357 { 358 #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE 359 if (bio->bi_private && !op_is_write(bio_op(bio))) { 360 unsigned long i, len = 0; 361 362 for (i = 0; i < bio->bi_vcnt; i++) 363 len += bio->bi_io_vec[i].bv_len; 364 invalidate_kernel_vmap_range(bio->bi_private, len); 365 } 366 #endif 367 } 368 369 static void bio_map_kern_endio(struct bio *bio) 370 { 371 bio_invalidate_vmalloc_pages(bio); 372 bio_put(bio); 373 } 374 375 /** 376 * bio_map_kern - map kernel address into bio 377 * @q: the struct request_queue for the bio 378 * @data: pointer to buffer to map 379 * @len: length in bytes 380 * @gfp_mask: allocation flags for bio allocation 381 * 382 * Map the kernel address into a bio suitable for io to a block 383 * device. Returns an error pointer in case of error. 384 */ 385 static struct bio *bio_map_kern(struct request_queue *q, void *data, 386 unsigned int len, gfp_t gfp_mask) 387 { 388 unsigned long kaddr = (unsigned long)data; 389 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 390 unsigned long start = kaddr >> PAGE_SHIFT; 391 const int nr_pages = end - start; 392 bool is_vmalloc = is_vmalloc_addr(data); 393 struct page *page; 394 int offset, i; 395 struct bio *bio; 396 397 bio = bio_kmalloc(gfp_mask, nr_pages); 398 if (!bio) 399 return ERR_PTR(-ENOMEM); 400 401 if (is_vmalloc) { 402 flush_kernel_vmap_range(data, len); 403 bio->bi_private = data; 404 } 405 406 offset = offset_in_page(kaddr); 407 for (i = 0; i < nr_pages; i++) { 408 unsigned int bytes = PAGE_SIZE - offset; 409 410 if (len <= 0) 411 break; 412 413 if (bytes > len) 414 bytes = len; 415 416 if (!is_vmalloc) 417 page = virt_to_page(data); 418 else 419 page = vmalloc_to_page(data); 420 if (bio_add_pc_page(q, bio, page, bytes, 421 offset) < bytes) { 422 /* we don't support partial mappings */ 423 bio_put(bio); 424 return ERR_PTR(-EINVAL); 425 } 426 427 data += bytes; 428 len -= bytes; 429 offset = 0; 430 } 431 432 bio->bi_end_io = bio_map_kern_endio; 433 return bio; 434 } 435 436 static void bio_copy_kern_endio(struct bio *bio) 437 { 438 bio_free_pages(bio); 439 bio_put(bio); 440 } 441 442 static void bio_copy_kern_endio_read(struct bio *bio) 443 { 444 char *p = bio->bi_private; 445 struct bio_vec *bvec; 446 struct bvec_iter_all iter_all; 447 448 bio_for_each_segment_all(bvec, bio, iter_all) { 449 memcpy(p, page_address(bvec->bv_page), bvec->bv_len); 450 p += bvec->bv_len; 451 } 452 453 bio_copy_kern_endio(bio); 454 } 455 456 /** 457 * bio_copy_kern - copy kernel address into bio 458 * @q: the struct request_queue for the bio 459 * @data: pointer to buffer to copy 460 * @len: length in bytes 461 * @gfp_mask: allocation flags for bio and page allocation 462 * @reading: data direction is READ 463 * 464 * copy the kernel address into a bio suitable for io to a block 465 * device. Returns an error pointer in case of error. 466 */ 467 static struct bio *bio_copy_kern(struct request_queue *q, void *data, 468 unsigned int len, gfp_t gfp_mask, int reading) 469 { 470 unsigned long kaddr = (unsigned long)data; 471 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 472 unsigned long start = kaddr >> PAGE_SHIFT; 473 struct bio *bio; 474 void *p = data; 475 int nr_pages = 0; 476 477 /* 478 * Overflow, abort 479 */ 480 if (end < start) 481 return ERR_PTR(-EINVAL); 482 483 nr_pages = end - start; 484 bio = bio_kmalloc(gfp_mask, nr_pages); 485 if (!bio) 486 return ERR_PTR(-ENOMEM); 487 488 while (len) { 489 struct page *page; 490 unsigned int bytes = PAGE_SIZE; 491 492 if (bytes > len) 493 bytes = len; 494 495 page = alloc_page(q->bounce_gfp | gfp_mask); 496 if (!page) 497 goto cleanup; 498 499 if (!reading) 500 memcpy(page_address(page), p, bytes); 501 502 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) 503 break; 504 505 len -= bytes; 506 p += bytes; 507 } 508 509 if (reading) { 510 bio->bi_end_io = bio_copy_kern_endio_read; 511 bio->bi_private = data; 512 } else { 513 bio->bi_end_io = bio_copy_kern_endio; 514 } 515 516 return bio; 517 518 cleanup: 519 bio_free_pages(bio); 520 bio_put(bio); 521 return ERR_PTR(-ENOMEM); 522 } 523 524 /* 525 * Append a bio to a passthrough request. Only works if the bio can be merged 526 * into the request based on the driver constraints. 527 */ 528 int blk_rq_append_bio(struct request *rq, struct bio **bio) 529 { 530 struct bio *orig_bio = *bio; 531 struct bvec_iter iter; 532 struct bio_vec bv; 533 unsigned int nr_segs = 0; 534 535 blk_queue_bounce(rq->q, bio); 536 537 bio_for_each_bvec(bv, *bio, iter) 538 nr_segs++; 539 540 if (!rq->bio) { 541 blk_rq_bio_prep(rq, *bio, nr_segs); 542 } else { 543 if (!ll_back_merge_fn(rq, *bio, nr_segs)) { 544 if (orig_bio != *bio) { 545 bio_put(*bio); 546 *bio = orig_bio; 547 } 548 return -EINVAL; 549 } 550 551 rq->biotail->bi_next = *bio; 552 rq->biotail = *bio; 553 rq->__data_len += (*bio)->bi_iter.bi_size; 554 bio_crypt_free_ctx(*bio); 555 } 556 557 return 0; 558 } 559 EXPORT_SYMBOL(blk_rq_append_bio); 560 561 static int __blk_rq_unmap_user(struct bio *bio) 562 { 563 int ret = 0; 564 565 if (bio) { 566 if (bio_flagged(bio, BIO_USER_MAPPED)) 567 bio_unmap_user(bio); 568 else 569 ret = bio_uncopy_user(bio); 570 } 571 572 return ret; 573 } 574 575 static int __blk_rq_map_user_iov(struct request *rq, 576 struct rq_map_data *map_data, struct iov_iter *iter, 577 gfp_t gfp_mask, bool copy) 578 { 579 struct request_queue *q = rq->q; 580 struct bio *bio, *orig_bio; 581 int ret; 582 583 if (copy) 584 bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); 585 else 586 bio = bio_map_user_iov(q, iter, gfp_mask); 587 588 if (IS_ERR(bio)) 589 return PTR_ERR(bio); 590 591 bio->bi_opf &= ~REQ_OP_MASK; 592 bio->bi_opf |= req_op(rq); 593 594 orig_bio = bio; 595 596 /* 597 * We link the bounce buffer in and could have to traverse it 598 * later so we have to get a ref to prevent it from being freed 599 */ 600 ret = blk_rq_append_bio(rq, &bio); 601 if (ret) { 602 __blk_rq_unmap_user(orig_bio); 603 return ret; 604 } 605 bio_get(bio); 606 607 return 0; 608 } 609 610 /** 611 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 612 * @q: request queue where request should be inserted 613 * @rq: request to map data to 614 * @map_data: pointer to the rq_map_data holding pages (if necessary) 615 * @iter: iovec iterator 616 * @gfp_mask: memory allocation flags 617 * 618 * Description: 619 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 620 * a kernel bounce buffer is used. 621 * 622 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 623 * still in process context. 624 * 625 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 626 * before being submitted to the device, as pages mapped may be out of 627 * reach. It's the callers responsibility to make sure this happens. The 628 * original bio must be passed back in to blk_rq_unmap_user() for proper 629 * unmapping. 630 */ 631 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 632 struct rq_map_data *map_data, 633 const struct iov_iter *iter, gfp_t gfp_mask) 634 { 635 bool copy = false; 636 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); 637 struct bio *bio = NULL; 638 struct iov_iter i; 639 int ret = -EINVAL; 640 641 if (!iter_is_iovec(iter)) 642 goto fail; 643 644 if (map_data) 645 copy = true; 646 else if (iov_iter_alignment(iter) & align) 647 copy = true; 648 else if (queue_virt_boundary(q)) 649 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 650 651 i = *iter; 652 do { 653 ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); 654 if (ret) 655 goto unmap_rq; 656 if (!bio) 657 bio = rq->bio; 658 } while (iov_iter_count(&i)); 659 660 return 0; 661 662 unmap_rq: 663 blk_rq_unmap_user(bio); 664 fail: 665 rq->bio = NULL; 666 return ret; 667 } 668 EXPORT_SYMBOL(blk_rq_map_user_iov); 669 670 int blk_rq_map_user(struct request_queue *q, struct request *rq, 671 struct rq_map_data *map_data, void __user *ubuf, 672 unsigned long len, gfp_t gfp_mask) 673 { 674 struct iovec iov; 675 struct iov_iter i; 676 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i); 677 678 if (unlikely(ret < 0)) 679 return ret; 680 681 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 682 } 683 EXPORT_SYMBOL(blk_rq_map_user); 684 685 /** 686 * blk_rq_unmap_user - unmap a request with user data 687 * @bio: start of bio list 688 * 689 * Description: 690 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 691 * supply the original rq->bio from the blk_rq_map_user() return, since 692 * the I/O completion may have changed rq->bio. 693 */ 694 int blk_rq_unmap_user(struct bio *bio) 695 { 696 struct bio *mapped_bio; 697 int ret = 0, ret2; 698 699 while (bio) { 700 mapped_bio = bio; 701 if (unlikely(bio_flagged(bio, BIO_BOUNCED))) 702 mapped_bio = bio->bi_private; 703 704 ret2 = __blk_rq_unmap_user(mapped_bio); 705 if (ret2 && !ret) 706 ret = ret2; 707 708 mapped_bio = bio; 709 bio = bio->bi_next; 710 bio_put(mapped_bio); 711 } 712 713 return ret; 714 } 715 EXPORT_SYMBOL(blk_rq_unmap_user); 716 717 /** 718 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 719 * @q: request queue where request should be inserted 720 * @rq: request to fill 721 * @kbuf: the kernel buffer 722 * @len: length of user data 723 * @gfp_mask: memory allocation flags 724 * 725 * Description: 726 * Data will be mapped directly if possible. Otherwise a bounce 727 * buffer is used. Can be called multiple times to append multiple 728 * buffers. 729 */ 730 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 731 unsigned int len, gfp_t gfp_mask) 732 { 733 int reading = rq_data_dir(rq) == READ; 734 unsigned long addr = (unsigned long) kbuf; 735 struct bio *bio, *orig_bio; 736 int ret; 737 738 if (len > (queue_max_hw_sectors(q) << 9)) 739 return -EINVAL; 740 if (!len || !kbuf) 741 return -EINVAL; 742 743 if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf)) 744 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 745 else 746 bio = bio_map_kern(q, kbuf, len, gfp_mask); 747 748 if (IS_ERR(bio)) 749 return PTR_ERR(bio); 750 751 bio->bi_opf &= ~REQ_OP_MASK; 752 bio->bi_opf |= req_op(rq); 753 754 orig_bio = bio; 755 ret = blk_rq_append_bio(rq, &bio); 756 if (unlikely(ret)) { 757 /* request is too big */ 758 bio_put(orig_bio); 759 return ret; 760 } 761 762 return 0; 763 } 764 EXPORT_SYMBOL(blk_rq_map_kern); 765