1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to mapping data to requests 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sched/task_stack.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/uio.h> 11 12 #include "blk.h" 13 14 struct bio_map_data { 15 int is_our_pages; 16 struct iov_iter iter; 17 struct iovec iov[]; 18 }; 19 20 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 21 gfp_t gfp_mask) 22 { 23 struct bio_map_data *bmd; 24 25 if (data->nr_segs > UIO_MAXIOV) 26 return NULL; 27 28 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 29 if (!bmd) 30 return NULL; 31 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); 32 bmd->iter = *data; 33 bmd->iter.iov = bmd->iov; 34 return bmd; 35 } 36 37 /** 38 * bio_copy_from_iter - copy all pages from iov_iter to bio 39 * @bio: The &struct bio which describes the I/O as destination 40 * @iter: iov_iter as source 41 * 42 * Copy all pages from iov_iter to bio. 43 * Returns 0 on success, or error on failure. 44 */ 45 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 46 { 47 struct bio_vec *bvec; 48 struct bvec_iter_all iter_all; 49 50 bio_for_each_segment_all(bvec, bio, iter_all) { 51 ssize_t ret; 52 53 ret = copy_page_from_iter(bvec->bv_page, 54 bvec->bv_offset, 55 bvec->bv_len, 56 iter); 57 58 if (!iov_iter_count(iter)) 59 break; 60 61 if (ret < bvec->bv_len) 62 return -EFAULT; 63 } 64 65 return 0; 66 } 67 68 /** 69 * bio_copy_to_iter - copy all pages from bio to iov_iter 70 * @bio: The &struct bio which describes the I/O as source 71 * @iter: iov_iter as destination 72 * 73 * Copy all pages from bio to iov_iter. 74 * Returns 0 on success, or error on failure. 75 */ 76 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 77 { 78 struct bio_vec *bvec; 79 struct bvec_iter_all iter_all; 80 81 bio_for_each_segment_all(bvec, bio, iter_all) { 82 ssize_t ret; 83 84 ret = copy_page_to_iter(bvec->bv_page, 85 bvec->bv_offset, 86 bvec->bv_len, 87 &iter); 88 89 if (!iov_iter_count(&iter)) 90 break; 91 92 if (ret < bvec->bv_len) 93 return -EFAULT; 94 } 95 96 return 0; 97 } 98 99 /** 100 * bio_uncopy_user - finish previously mapped bio 101 * @bio: bio being terminated 102 * 103 * Free pages allocated from bio_copy_user_iov() and write back data 104 * to user space in case of a read. 105 */ 106 static int bio_uncopy_user(struct bio *bio) 107 { 108 struct bio_map_data *bmd = bio->bi_private; 109 int ret = 0; 110 111 if (!bio_flagged(bio, BIO_NULL_MAPPED)) { 112 /* 113 * if we're in a workqueue, the request is orphaned, so 114 * don't copy into a random user address space, just free 115 * and return -EINTR so user space doesn't expect any data. 116 */ 117 if (!current->mm) 118 ret = -EINTR; 119 else if (bio_data_dir(bio) == READ) 120 ret = bio_copy_to_iter(bio, bmd->iter); 121 if (bmd->is_our_pages) 122 bio_free_pages(bio); 123 } 124 kfree(bmd); 125 bio_put(bio); 126 return ret; 127 } 128 129 /** 130 * bio_copy_user_iov - copy user data to bio 131 * @q: destination block queue 132 * @map_data: pointer to the rq_map_data holding pages (if necessary) 133 * @iter: iovec iterator 134 * @gfp_mask: memory allocation flags 135 * 136 * Prepares and returns a bio for indirect user io, bouncing data 137 * to/from kernel pages as necessary. Must be paired with 138 * call bio_uncopy_user() on io completion. 139 */ 140 static struct bio *bio_copy_user_iov(struct request_queue *q, 141 struct rq_map_data *map_data, struct iov_iter *iter, 142 gfp_t gfp_mask) 143 { 144 struct bio_map_data *bmd; 145 struct page *page; 146 struct bio *bio; 147 int i = 0, ret; 148 int nr_pages; 149 unsigned int len = iter->count; 150 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 151 152 bmd = bio_alloc_map_data(iter, gfp_mask); 153 if (!bmd) 154 return ERR_PTR(-ENOMEM); 155 156 /* 157 * We need to do a deep copy of the iov_iter including the iovecs. 158 * The caller provided iov might point to an on-stack or otherwise 159 * shortlived one. 160 */ 161 bmd->is_our_pages = map_data ? 0 : 1; 162 163 nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); 164 if (nr_pages > BIO_MAX_PAGES) 165 nr_pages = BIO_MAX_PAGES; 166 167 ret = -ENOMEM; 168 bio = bio_kmalloc(gfp_mask, nr_pages); 169 if (!bio) 170 goto out_bmd; 171 172 ret = 0; 173 174 if (map_data) { 175 nr_pages = 1 << map_data->page_order; 176 i = map_data->offset / PAGE_SIZE; 177 } 178 while (len) { 179 unsigned int bytes = PAGE_SIZE; 180 181 bytes -= offset; 182 183 if (bytes > len) 184 bytes = len; 185 186 if (map_data) { 187 if (i == map_data->nr_entries * nr_pages) { 188 ret = -ENOMEM; 189 break; 190 } 191 192 page = map_data->pages[i / nr_pages]; 193 page += (i % nr_pages); 194 195 i++; 196 } else { 197 page = alloc_page(q->bounce_gfp | gfp_mask); 198 if (!page) { 199 ret = -ENOMEM; 200 break; 201 } 202 } 203 204 if (bio_add_pc_page(q, bio, page, bytes, offset) < bytes) { 205 if (!map_data) 206 __free_page(page); 207 break; 208 } 209 210 len -= bytes; 211 offset = 0; 212 } 213 214 if (ret) 215 goto cleanup; 216 217 if (map_data) 218 map_data->offset += bio->bi_iter.bi_size; 219 220 /* 221 * success 222 */ 223 if ((iov_iter_rw(iter) == WRITE && 224 (!map_data || !map_data->null_mapped)) || 225 (map_data && map_data->from_user)) { 226 ret = bio_copy_from_iter(bio, iter); 227 if (ret) 228 goto cleanup; 229 } else { 230 if (bmd->is_our_pages) 231 zero_fill_bio(bio); 232 iov_iter_advance(iter, bio->bi_iter.bi_size); 233 } 234 235 bio->bi_private = bmd; 236 if (map_data && map_data->null_mapped) 237 bio_set_flag(bio, BIO_NULL_MAPPED); 238 return bio; 239 cleanup: 240 if (!map_data) 241 bio_free_pages(bio); 242 bio_put(bio); 243 out_bmd: 244 kfree(bmd); 245 return ERR_PTR(ret); 246 } 247 248 /** 249 * bio_map_user_iov - map user iovec into bio 250 * @q: the struct request_queue for the bio 251 * @iter: iovec iterator 252 * @gfp_mask: memory allocation flags 253 * 254 * Map the user space address into a bio suitable for io to a block 255 * device. Returns an error pointer in case of error. 256 */ 257 static struct bio *bio_map_user_iov(struct request_queue *q, 258 struct iov_iter *iter, gfp_t gfp_mask) 259 { 260 unsigned int max_sectors = queue_max_hw_sectors(q); 261 int j; 262 struct bio *bio; 263 int ret; 264 265 if (!iov_iter_count(iter)) 266 return ERR_PTR(-EINVAL); 267 268 bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); 269 if (!bio) 270 return ERR_PTR(-ENOMEM); 271 272 while (iov_iter_count(iter)) { 273 struct page **pages; 274 ssize_t bytes; 275 size_t offs, added = 0; 276 int npages; 277 278 bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); 279 if (unlikely(bytes <= 0)) { 280 ret = bytes ? bytes : -EFAULT; 281 goto out_unmap; 282 } 283 284 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); 285 286 if (unlikely(offs & queue_dma_alignment(q))) { 287 ret = -EINVAL; 288 j = 0; 289 } else { 290 for (j = 0; j < npages; j++) { 291 struct page *page = pages[j]; 292 unsigned int n = PAGE_SIZE - offs; 293 bool same_page = false; 294 295 if (n > bytes) 296 n = bytes; 297 298 if (!bio_add_hw_page(q, bio, page, n, offs, 299 max_sectors, &same_page)) { 300 if (same_page) 301 put_page(page); 302 break; 303 } 304 305 added += n; 306 bytes -= n; 307 offs = 0; 308 } 309 iov_iter_advance(iter, added); 310 } 311 /* 312 * release the pages we didn't map into the bio, if any 313 */ 314 while (j < npages) 315 put_page(pages[j++]); 316 kvfree(pages); 317 /* couldn't stuff something into bio? */ 318 if (bytes) 319 break; 320 } 321 322 bio_set_flag(bio, BIO_USER_MAPPED); 323 324 /* 325 * subtle -- if bio_map_user_iov() ended up bouncing a bio, 326 * it would normally disappear when its bi_end_io is run. 327 * however, we need it for the unmap, so grab an extra 328 * reference to it 329 */ 330 bio_get(bio); 331 return bio; 332 333 out_unmap: 334 bio_release_pages(bio, false); 335 bio_put(bio); 336 return ERR_PTR(ret); 337 } 338 339 /** 340 * bio_unmap_user - unmap a bio 341 * @bio: the bio being unmapped 342 * 343 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from 344 * process context. 345 * 346 * bio_unmap_user() may sleep. 347 */ 348 static void bio_unmap_user(struct bio *bio) 349 { 350 bio_release_pages(bio, bio_data_dir(bio) == READ); 351 bio_put(bio); 352 bio_put(bio); 353 } 354 355 static void bio_invalidate_vmalloc_pages(struct bio *bio) 356 { 357 #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE 358 if (bio->bi_private && !op_is_write(bio_op(bio))) { 359 unsigned long i, len = 0; 360 361 for (i = 0; i < bio->bi_vcnt; i++) 362 len += bio->bi_io_vec[i].bv_len; 363 invalidate_kernel_vmap_range(bio->bi_private, len); 364 } 365 #endif 366 } 367 368 static void bio_map_kern_endio(struct bio *bio) 369 { 370 bio_invalidate_vmalloc_pages(bio); 371 bio_put(bio); 372 } 373 374 /** 375 * bio_map_kern - map kernel address into bio 376 * @q: the struct request_queue for the bio 377 * @data: pointer to buffer to map 378 * @len: length in bytes 379 * @gfp_mask: allocation flags for bio allocation 380 * 381 * Map the kernel address into a bio suitable for io to a block 382 * device. Returns an error pointer in case of error. 383 */ 384 static struct bio *bio_map_kern(struct request_queue *q, void *data, 385 unsigned int len, gfp_t gfp_mask) 386 { 387 unsigned long kaddr = (unsigned long)data; 388 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 389 unsigned long start = kaddr >> PAGE_SHIFT; 390 const int nr_pages = end - start; 391 bool is_vmalloc = is_vmalloc_addr(data); 392 struct page *page; 393 int offset, i; 394 struct bio *bio; 395 396 bio = bio_kmalloc(gfp_mask, nr_pages); 397 if (!bio) 398 return ERR_PTR(-ENOMEM); 399 400 if (is_vmalloc) { 401 flush_kernel_vmap_range(data, len); 402 bio->bi_private = data; 403 } 404 405 offset = offset_in_page(kaddr); 406 for (i = 0; i < nr_pages; i++) { 407 unsigned int bytes = PAGE_SIZE - offset; 408 409 if (len <= 0) 410 break; 411 412 if (bytes > len) 413 bytes = len; 414 415 if (!is_vmalloc) 416 page = virt_to_page(data); 417 else 418 page = vmalloc_to_page(data); 419 if (bio_add_pc_page(q, bio, page, bytes, 420 offset) < bytes) { 421 /* we don't support partial mappings */ 422 bio_put(bio); 423 return ERR_PTR(-EINVAL); 424 } 425 426 data += bytes; 427 len -= bytes; 428 offset = 0; 429 } 430 431 bio->bi_end_io = bio_map_kern_endio; 432 return bio; 433 } 434 435 static void bio_copy_kern_endio(struct bio *bio) 436 { 437 bio_free_pages(bio); 438 bio_put(bio); 439 } 440 441 static void bio_copy_kern_endio_read(struct bio *bio) 442 { 443 char *p = bio->bi_private; 444 struct bio_vec *bvec; 445 struct bvec_iter_all iter_all; 446 447 bio_for_each_segment_all(bvec, bio, iter_all) { 448 memcpy(p, page_address(bvec->bv_page), bvec->bv_len); 449 p += bvec->bv_len; 450 } 451 452 bio_copy_kern_endio(bio); 453 } 454 455 /** 456 * bio_copy_kern - copy kernel address into bio 457 * @q: the struct request_queue for the bio 458 * @data: pointer to buffer to copy 459 * @len: length in bytes 460 * @gfp_mask: allocation flags for bio and page allocation 461 * @reading: data direction is READ 462 * 463 * copy the kernel address into a bio suitable for io to a block 464 * device. Returns an error pointer in case of error. 465 */ 466 static struct bio *bio_copy_kern(struct request_queue *q, void *data, 467 unsigned int len, gfp_t gfp_mask, int reading) 468 { 469 unsigned long kaddr = (unsigned long)data; 470 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 471 unsigned long start = kaddr >> PAGE_SHIFT; 472 struct bio *bio; 473 void *p = data; 474 int nr_pages = 0; 475 476 /* 477 * Overflow, abort 478 */ 479 if (end < start) 480 return ERR_PTR(-EINVAL); 481 482 nr_pages = end - start; 483 bio = bio_kmalloc(gfp_mask, nr_pages); 484 if (!bio) 485 return ERR_PTR(-ENOMEM); 486 487 while (len) { 488 struct page *page; 489 unsigned int bytes = PAGE_SIZE; 490 491 if (bytes > len) 492 bytes = len; 493 494 page = alloc_page(q->bounce_gfp | gfp_mask); 495 if (!page) 496 goto cleanup; 497 498 if (!reading) 499 memcpy(page_address(page), p, bytes); 500 501 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) 502 break; 503 504 len -= bytes; 505 p += bytes; 506 } 507 508 if (reading) { 509 bio->bi_end_io = bio_copy_kern_endio_read; 510 bio->bi_private = data; 511 } else { 512 bio->bi_end_io = bio_copy_kern_endio; 513 } 514 515 return bio; 516 517 cleanup: 518 bio_free_pages(bio); 519 bio_put(bio); 520 return ERR_PTR(-ENOMEM); 521 } 522 523 /* 524 * Append a bio to a passthrough request. Only works if the bio can be merged 525 * into the request based on the driver constraints. 526 */ 527 int blk_rq_append_bio(struct request *rq, struct bio **bio) 528 { 529 struct bio *orig_bio = *bio; 530 struct bvec_iter iter; 531 struct bio_vec bv; 532 unsigned int nr_segs = 0; 533 534 blk_queue_bounce(rq->q, bio); 535 536 bio_for_each_bvec(bv, *bio, iter) 537 nr_segs++; 538 539 if (!rq->bio) { 540 blk_rq_bio_prep(rq, *bio, nr_segs); 541 } else { 542 if (!ll_back_merge_fn(rq, *bio, nr_segs)) { 543 if (orig_bio != *bio) { 544 bio_put(*bio); 545 *bio = orig_bio; 546 } 547 return -EINVAL; 548 } 549 550 rq->biotail->bi_next = *bio; 551 rq->biotail = *bio; 552 rq->__data_len += (*bio)->bi_iter.bi_size; 553 bio_crypt_free_ctx(*bio); 554 } 555 556 return 0; 557 } 558 EXPORT_SYMBOL(blk_rq_append_bio); 559 560 static int __blk_rq_unmap_user(struct bio *bio) 561 { 562 int ret = 0; 563 564 if (bio) { 565 if (bio_flagged(bio, BIO_USER_MAPPED)) 566 bio_unmap_user(bio); 567 else 568 ret = bio_uncopy_user(bio); 569 } 570 571 return ret; 572 } 573 574 static int __blk_rq_map_user_iov(struct request *rq, 575 struct rq_map_data *map_data, struct iov_iter *iter, 576 gfp_t gfp_mask, bool copy) 577 { 578 struct request_queue *q = rq->q; 579 struct bio *bio, *orig_bio; 580 int ret; 581 582 if (copy) 583 bio = bio_copy_user_iov(q, map_data, iter, gfp_mask); 584 else 585 bio = bio_map_user_iov(q, iter, gfp_mask); 586 587 if (IS_ERR(bio)) 588 return PTR_ERR(bio); 589 590 bio->bi_opf &= ~REQ_OP_MASK; 591 bio->bi_opf |= req_op(rq); 592 593 orig_bio = bio; 594 595 /* 596 * We link the bounce buffer in and could have to traverse it 597 * later so we have to get a ref to prevent it from being freed 598 */ 599 ret = blk_rq_append_bio(rq, &bio); 600 if (ret) { 601 __blk_rq_unmap_user(orig_bio); 602 return ret; 603 } 604 bio_get(bio); 605 606 return 0; 607 } 608 609 /** 610 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 611 * @q: request queue where request should be inserted 612 * @rq: request to map data to 613 * @map_data: pointer to the rq_map_data holding pages (if necessary) 614 * @iter: iovec iterator 615 * @gfp_mask: memory allocation flags 616 * 617 * Description: 618 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 619 * a kernel bounce buffer is used. 620 * 621 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 622 * still in process context. 623 * 624 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 625 * before being submitted to the device, as pages mapped may be out of 626 * reach. It's the callers responsibility to make sure this happens. The 627 * original bio must be passed back in to blk_rq_unmap_user() for proper 628 * unmapping. 629 */ 630 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 631 struct rq_map_data *map_data, 632 const struct iov_iter *iter, gfp_t gfp_mask) 633 { 634 bool copy = false; 635 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); 636 struct bio *bio = NULL; 637 struct iov_iter i; 638 int ret = -EINVAL; 639 640 if (!iter_is_iovec(iter)) 641 goto fail; 642 643 if (map_data) 644 copy = true; 645 else if (iov_iter_alignment(iter) & align) 646 copy = true; 647 else if (queue_virt_boundary(q)) 648 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 649 650 i = *iter; 651 do { 652 ret =__blk_rq_map_user_iov(rq, map_data, &i, gfp_mask, copy); 653 if (ret) 654 goto unmap_rq; 655 if (!bio) 656 bio = rq->bio; 657 } while (iov_iter_count(&i)); 658 659 return 0; 660 661 unmap_rq: 662 blk_rq_unmap_user(bio); 663 fail: 664 rq->bio = NULL; 665 return ret; 666 } 667 EXPORT_SYMBOL(blk_rq_map_user_iov); 668 669 int blk_rq_map_user(struct request_queue *q, struct request *rq, 670 struct rq_map_data *map_data, void __user *ubuf, 671 unsigned long len, gfp_t gfp_mask) 672 { 673 struct iovec iov; 674 struct iov_iter i; 675 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i); 676 677 if (unlikely(ret < 0)) 678 return ret; 679 680 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 681 } 682 EXPORT_SYMBOL(blk_rq_map_user); 683 684 /** 685 * blk_rq_unmap_user - unmap a request with user data 686 * @bio: start of bio list 687 * 688 * Description: 689 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 690 * supply the original rq->bio from the blk_rq_map_user() return, since 691 * the I/O completion may have changed rq->bio. 692 */ 693 int blk_rq_unmap_user(struct bio *bio) 694 { 695 struct bio *mapped_bio; 696 int ret = 0, ret2; 697 698 while (bio) { 699 mapped_bio = bio; 700 if (unlikely(bio_flagged(bio, BIO_BOUNCED))) 701 mapped_bio = bio->bi_private; 702 703 ret2 = __blk_rq_unmap_user(mapped_bio); 704 if (ret2 && !ret) 705 ret = ret2; 706 707 mapped_bio = bio; 708 bio = bio->bi_next; 709 bio_put(mapped_bio); 710 } 711 712 return ret; 713 } 714 EXPORT_SYMBOL(blk_rq_unmap_user); 715 716 /** 717 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 718 * @q: request queue where request should be inserted 719 * @rq: request to fill 720 * @kbuf: the kernel buffer 721 * @len: length of user data 722 * @gfp_mask: memory allocation flags 723 * 724 * Description: 725 * Data will be mapped directly if possible. Otherwise a bounce 726 * buffer is used. Can be called multiple times to append multiple 727 * buffers. 728 */ 729 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 730 unsigned int len, gfp_t gfp_mask) 731 { 732 int reading = rq_data_dir(rq) == READ; 733 unsigned long addr = (unsigned long) kbuf; 734 struct bio *bio, *orig_bio; 735 int ret; 736 737 if (len > (queue_max_hw_sectors(q) << 9)) 738 return -EINVAL; 739 if (!len || !kbuf) 740 return -EINVAL; 741 742 if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf)) 743 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 744 else 745 bio = bio_map_kern(q, kbuf, len, gfp_mask); 746 747 if (IS_ERR(bio)) 748 return PTR_ERR(bio); 749 750 bio->bi_opf &= ~REQ_OP_MASK; 751 bio->bi_opf |= req_op(rq); 752 753 orig_bio = bio; 754 ret = blk_rq_append_bio(rq, &bio); 755 if (unlikely(ret)) { 756 /* request is too big */ 757 bio_put(orig_bio); 758 return ret; 759 } 760 761 return 0; 762 } 763 EXPORT_SYMBOL(blk_rq_map_kern); 764