1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to mapping data to requests 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sched/task_stack.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/uio.h> 11 12 #include "blk.h" 13 14 struct bio_map_data { 15 bool is_our_pages : 1; 16 bool is_null_mapped : 1; 17 struct iov_iter iter; 18 struct iovec iov[]; 19 }; 20 21 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 22 gfp_t gfp_mask) 23 { 24 struct bio_map_data *bmd; 25 26 if (data->nr_segs > UIO_MAXIOV) 27 return NULL; 28 29 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 30 if (!bmd) 31 return NULL; 32 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); 33 bmd->iter = *data; 34 bmd->iter.iov = bmd->iov; 35 return bmd; 36 } 37 38 /** 39 * bio_copy_from_iter - copy all pages from iov_iter to bio 40 * @bio: The &struct bio which describes the I/O as destination 41 * @iter: iov_iter as source 42 * 43 * Copy all pages from iov_iter to bio. 44 * Returns 0 on success, or error on failure. 45 */ 46 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 47 { 48 struct bio_vec *bvec; 49 struct bvec_iter_all iter_all; 50 51 bio_for_each_segment_all(bvec, bio, iter_all) { 52 ssize_t ret; 53 54 ret = copy_page_from_iter(bvec->bv_page, 55 bvec->bv_offset, 56 bvec->bv_len, 57 iter); 58 59 if (!iov_iter_count(iter)) 60 break; 61 62 if (ret < bvec->bv_len) 63 return -EFAULT; 64 } 65 66 return 0; 67 } 68 69 /** 70 * bio_copy_to_iter - copy all pages from bio to iov_iter 71 * @bio: The &struct bio which describes the I/O as source 72 * @iter: iov_iter as destination 73 * 74 * Copy all pages from bio to iov_iter. 75 * Returns 0 on success, or error on failure. 76 */ 77 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 78 { 79 struct bio_vec *bvec; 80 struct bvec_iter_all iter_all; 81 82 bio_for_each_segment_all(bvec, bio, iter_all) { 83 ssize_t ret; 84 85 ret = copy_page_to_iter(bvec->bv_page, 86 bvec->bv_offset, 87 bvec->bv_len, 88 &iter); 89 90 if (!iov_iter_count(&iter)) 91 break; 92 93 if (ret < bvec->bv_len) 94 return -EFAULT; 95 } 96 97 return 0; 98 } 99 100 /** 101 * bio_uncopy_user - finish previously mapped bio 102 * @bio: bio being terminated 103 * 104 * Free pages allocated from bio_copy_user_iov() and write back data 105 * to user space in case of a read. 106 */ 107 static int bio_uncopy_user(struct bio *bio) 108 { 109 struct bio_map_data *bmd = bio->bi_private; 110 int ret = 0; 111 112 if (!bmd->is_null_mapped) { 113 /* 114 * if we're in a workqueue, the request is orphaned, so 115 * don't copy into a random user address space, just free 116 * and return -EINTR so user space doesn't expect any data. 117 */ 118 if (!current->mm) 119 ret = -EINTR; 120 else if (bio_data_dir(bio) == READ) 121 ret = bio_copy_to_iter(bio, bmd->iter); 122 if (bmd->is_our_pages) 123 bio_free_pages(bio); 124 } 125 kfree(bmd); 126 return ret; 127 } 128 129 static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, 130 struct iov_iter *iter, gfp_t gfp_mask) 131 { 132 struct bio_map_data *bmd; 133 struct page *page; 134 struct bio *bio; 135 int i = 0, ret; 136 int nr_pages; 137 unsigned int len = iter->count; 138 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 139 140 bmd = bio_alloc_map_data(iter, gfp_mask); 141 if (!bmd) 142 return -ENOMEM; 143 144 /* 145 * We need to do a deep copy of the iov_iter including the iovecs. 146 * The caller provided iov might point to an on-stack or otherwise 147 * shortlived one. 148 */ 149 bmd->is_our_pages = !map_data; 150 bmd->is_null_mapped = (map_data && map_data->null_mapped); 151 152 nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE)); 153 154 ret = -ENOMEM; 155 bio = bio_kmalloc(nr_pages, gfp_mask); 156 if (!bio) 157 goto out_bmd; 158 bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, req_op(rq)); 159 160 if (map_data) { 161 nr_pages = 1U << map_data->page_order; 162 i = map_data->offset / PAGE_SIZE; 163 } 164 while (len) { 165 unsigned int bytes = PAGE_SIZE; 166 167 bytes -= offset; 168 169 if (bytes > len) 170 bytes = len; 171 172 if (map_data) { 173 if (i == map_data->nr_entries * nr_pages) { 174 ret = -ENOMEM; 175 goto cleanup; 176 } 177 178 page = map_data->pages[i / nr_pages]; 179 page += (i % nr_pages); 180 181 i++; 182 } else { 183 page = alloc_page(GFP_NOIO | gfp_mask); 184 if (!page) { 185 ret = -ENOMEM; 186 goto cleanup; 187 } 188 } 189 190 if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) { 191 if (!map_data) 192 __free_page(page); 193 break; 194 } 195 196 len -= bytes; 197 offset = 0; 198 } 199 200 if (map_data) 201 map_data->offset += bio->bi_iter.bi_size; 202 203 /* 204 * success 205 */ 206 if ((iov_iter_rw(iter) == WRITE && 207 (!map_data || !map_data->null_mapped)) || 208 (map_data && map_data->from_user)) { 209 ret = bio_copy_from_iter(bio, iter); 210 if (ret) 211 goto cleanup; 212 } else { 213 if (bmd->is_our_pages) 214 zero_fill_bio(bio); 215 iov_iter_advance(iter, bio->bi_iter.bi_size); 216 } 217 218 bio->bi_private = bmd; 219 220 ret = blk_rq_append_bio(rq, bio); 221 if (ret) 222 goto cleanup; 223 return 0; 224 cleanup: 225 if (!map_data) 226 bio_free_pages(bio); 227 bio_uninit(bio); 228 kfree(bio); 229 out_bmd: 230 kfree(bmd); 231 return ret; 232 } 233 234 static void blk_mq_map_bio_put(struct bio *bio) 235 { 236 if (bio->bi_opf & REQ_ALLOC_CACHE) { 237 bio_put(bio); 238 } else { 239 bio_uninit(bio); 240 kfree(bio); 241 } 242 } 243 244 static struct bio *blk_rq_map_bio_alloc(struct request *rq, 245 unsigned int nr_vecs, gfp_t gfp_mask) 246 { 247 struct bio *bio; 248 249 if (rq->cmd_flags & REQ_POLLED) { 250 blk_opf_t opf = rq->cmd_flags | REQ_ALLOC_CACHE; 251 252 bio = bio_alloc_bioset(NULL, nr_vecs, opf, gfp_mask, 253 &fs_bio_set); 254 if (!bio) 255 return NULL; 256 } else { 257 bio = bio_kmalloc(nr_vecs, gfp_mask); 258 if (!bio) 259 return NULL; 260 bio_init(bio, NULL, bio->bi_inline_vecs, nr_vecs, req_op(rq)); 261 } 262 return bio; 263 } 264 265 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, 266 gfp_t gfp_mask) 267 { 268 unsigned int max_sectors = queue_max_hw_sectors(rq->q); 269 unsigned int nr_vecs = iov_iter_npages(iter, BIO_MAX_VECS); 270 struct bio *bio; 271 int ret; 272 int j; 273 274 if (!iov_iter_count(iter)) 275 return -EINVAL; 276 277 bio = blk_rq_map_bio_alloc(rq, nr_vecs, gfp_mask); 278 if (bio == NULL) 279 return -ENOMEM; 280 281 while (iov_iter_count(iter)) { 282 struct page **pages, *stack_pages[UIO_FASTIOV]; 283 ssize_t bytes; 284 size_t offs; 285 int npages; 286 287 if (nr_vecs <= ARRAY_SIZE(stack_pages)) { 288 pages = stack_pages; 289 bytes = iov_iter_get_pages2(iter, pages, LONG_MAX, 290 nr_vecs, &offs); 291 } else { 292 bytes = iov_iter_get_pages_alloc2(iter, &pages, 293 LONG_MAX, &offs); 294 } 295 if (unlikely(bytes <= 0)) { 296 ret = bytes ? bytes : -EFAULT; 297 goto out_unmap; 298 } 299 300 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); 301 302 if (unlikely(offs & queue_dma_alignment(rq->q))) 303 j = 0; 304 else { 305 for (j = 0; j < npages; j++) { 306 struct page *page = pages[j]; 307 unsigned int n = PAGE_SIZE - offs; 308 bool same_page = false; 309 310 if (n > bytes) 311 n = bytes; 312 313 if (!bio_add_hw_page(rq->q, bio, page, n, offs, 314 max_sectors, &same_page)) { 315 if (same_page) 316 put_page(page); 317 break; 318 } 319 320 bytes -= n; 321 offs = 0; 322 } 323 } 324 /* 325 * release the pages we didn't map into the bio, if any 326 */ 327 while (j < npages) 328 put_page(pages[j++]); 329 if (pages != stack_pages) 330 kvfree(pages); 331 /* couldn't stuff something into bio? */ 332 if (bytes) { 333 iov_iter_revert(iter, bytes); 334 break; 335 } 336 } 337 338 ret = blk_rq_append_bio(rq, bio); 339 if (ret) 340 goto out_unmap; 341 return 0; 342 343 out_unmap: 344 bio_release_pages(bio, false); 345 blk_mq_map_bio_put(bio); 346 return ret; 347 } 348 349 static void bio_invalidate_vmalloc_pages(struct bio *bio) 350 { 351 #ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE 352 if (bio->bi_private && !op_is_write(bio_op(bio))) { 353 unsigned long i, len = 0; 354 355 for (i = 0; i < bio->bi_vcnt; i++) 356 len += bio->bi_io_vec[i].bv_len; 357 invalidate_kernel_vmap_range(bio->bi_private, len); 358 } 359 #endif 360 } 361 362 static void bio_map_kern_endio(struct bio *bio) 363 { 364 bio_invalidate_vmalloc_pages(bio); 365 bio_uninit(bio); 366 kfree(bio); 367 } 368 369 /** 370 * bio_map_kern - map kernel address into bio 371 * @q: the struct request_queue for the bio 372 * @data: pointer to buffer to map 373 * @len: length in bytes 374 * @gfp_mask: allocation flags for bio allocation 375 * 376 * Map the kernel address into a bio suitable for io to a block 377 * device. Returns an error pointer in case of error. 378 */ 379 static struct bio *bio_map_kern(struct request_queue *q, void *data, 380 unsigned int len, gfp_t gfp_mask) 381 { 382 unsigned long kaddr = (unsigned long)data; 383 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 384 unsigned long start = kaddr >> PAGE_SHIFT; 385 const int nr_pages = end - start; 386 bool is_vmalloc = is_vmalloc_addr(data); 387 struct page *page; 388 int offset, i; 389 struct bio *bio; 390 391 bio = bio_kmalloc(nr_pages, gfp_mask); 392 if (!bio) 393 return ERR_PTR(-ENOMEM); 394 bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); 395 396 if (is_vmalloc) { 397 flush_kernel_vmap_range(data, len); 398 bio->bi_private = data; 399 } 400 401 offset = offset_in_page(kaddr); 402 for (i = 0; i < nr_pages; i++) { 403 unsigned int bytes = PAGE_SIZE - offset; 404 405 if (len <= 0) 406 break; 407 408 if (bytes > len) 409 bytes = len; 410 411 if (!is_vmalloc) 412 page = virt_to_page(data); 413 else 414 page = vmalloc_to_page(data); 415 if (bio_add_pc_page(q, bio, page, bytes, 416 offset) < bytes) { 417 /* we don't support partial mappings */ 418 bio_uninit(bio); 419 kfree(bio); 420 return ERR_PTR(-EINVAL); 421 } 422 423 data += bytes; 424 len -= bytes; 425 offset = 0; 426 } 427 428 bio->bi_end_io = bio_map_kern_endio; 429 return bio; 430 } 431 432 static void bio_copy_kern_endio(struct bio *bio) 433 { 434 bio_free_pages(bio); 435 bio_uninit(bio); 436 kfree(bio); 437 } 438 439 static void bio_copy_kern_endio_read(struct bio *bio) 440 { 441 char *p = bio->bi_private; 442 struct bio_vec *bvec; 443 struct bvec_iter_all iter_all; 444 445 bio_for_each_segment_all(bvec, bio, iter_all) { 446 memcpy_from_bvec(p, bvec); 447 p += bvec->bv_len; 448 } 449 450 bio_copy_kern_endio(bio); 451 } 452 453 /** 454 * bio_copy_kern - copy kernel address into bio 455 * @q: the struct request_queue for the bio 456 * @data: pointer to buffer to copy 457 * @len: length in bytes 458 * @gfp_mask: allocation flags for bio and page allocation 459 * @reading: data direction is READ 460 * 461 * copy the kernel address into a bio suitable for io to a block 462 * device. Returns an error pointer in case of error. 463 */ 464 static struct bio *bio_copy_kern(struct request_queue *q, void *data, 465 unsigned int len, gfp_t gfp_mask, int reading) 466 { 467 unsigned long kaddr = (unsigned long)data; 468 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 469 unsigned long start = kaddr >> PAGE_SHIFT; 470 struct bio *bio; 471 void *p = data; 472 int nr_pages = 0; 473 474 /* 475 * Overflow, abort 476 */ 477 if (end < start) 478 return ERR_PTR(-EINVAL); 479 480 nr_pages = end - start; 481 bio = bio_kmalloc(nr_pages, gfp_mask); 482 if (!bio) 483 return ERR_PTR(-ENOMEM); 484 bio_init(bio, NULL, bio->bi_inline_vecs, nr_pages, 0); 485 486 while (len) { 487 struct page *page; 488 unsigned int bytes = PAGE_SIZE; 489 490 if (bytes > len) 491 bytes = len; 492 493 page = alloc_page(GFP_NOIO | __GFP_ZERO | gfp_mask); 494 if (!page) 495 goto cleanup; 496 497 if (!reading) 498 memcpy(page_address(page), p, bytes); 499 500 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) 501 break; 502 503 len -= bytes; 504 p += bytes; 505 } 506 507 if (reading) { 508 bio->bi_end_io = bio_copy_kern_endio_read; 509 bio->bi_private = data; 510 } else { 511 bio->bi_end_io = bio_copy_kern_endio; 512 } 513 514 return bio; 515 516 cleanup: 517 bio_free_pages(bio); 518 bio_uninit(bio); 519 kfree(bio); 520 return ERR_PTR(-ENOMEM); 521 } 522 523 /* 524 * Append a bio to a passthrough request. Only works if the bio can be merged 525 * into the request based on the driver constraints. 526 */ 527 int blk_rq_append_bio(struct request *rq, struct bio *bio) 528 { 529 struct bvec_iter iter; 530 struct bio_vec bv; 531 unsigned int nr_segs = 0; 532 533 bio_for_each_bvec(bv, bio, iter) 534 nr_segs++; 535 536 if (!rq->bio) { 537 blk_rq_bio_prep(rq, bio, nr_segs); 538 } else { 539 if (!ll_back_merge_fn(rq, bio, nr_segs)) 540 return -EINVAL; 541 rq->biotail->bi_next = bio; 542 rq->biotail = bio; 543 rq->__data_len += (bio)->bi_iter.bi_size; 544 bio_crypt_free_ctx(bio); 545 } 546 547 return 0; 548 } 549 EXPORT_SYMBOL(blk_rq_append_bio); 550 551 /** 552 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 553 * @q: request queue where request should be inserted 554 * @rq: request to map data to 555 * @map_data: pointer to the rq_map_data holding pages (if necessary) 556 * @iter: iovec iterator 557 * @gfp_mask: memory allocation flags 558 * 559 * Description: 560 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 561 * a kernel bounce buffer is used. 562 * 563 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 564 * still in process context. 565 */ 566 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 567 struct rq_map_data *map_data, 568 const struct iov_iter *iter, gfp_t gfp_mask) 569 { 570 bool copy = false; 571 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); 572 struct bio *bio = NULL; 573 struct iov_iter i; 574 int ret = -EINVAL; 575 576 if (!iter_is_iovec(iter)) 577 goto fail; 578 579 if (map_data) 580 copy = true; 581 else if (blk_queue_may_bounce(q)) 582 copy = true; 583 else if (iov_iter_alignment(iter) & align) 584 copy = true; 585 else if (queue_virt_boundary(q)) 586 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 587 588 i = *iter; 589 do { 590 if (copy) 591 ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); 592 else 593 ret = bio_map_user_iov(rq, &i, gfp_mask); 594 if (ret) 595 goto unmap_rq; 596 if (!bio) 597 bio = rq->bio; 598 } while (iov_iter_count(&i)); 599 600 return 0; 601 602 unmap_rq: 603 blk_rq_unmap_user(bio); 604 fail: 605 rq->bio = NULL; 606 return ret; 607 } 608 EXPORT_SYMBOL(blk_rq_map_user_iov); 609 610 int blk_rq_map_user(struct request_queue *q, struct request *rq, 611 struct rq_map_data *map_data, void __user *ubuf, 612 unsigned long len, gfp_t gfp_mask) 613 { 614 struct iovec iov; 615 struct iov_iter i; 616 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i); 617 618 if (unlikely(ret < 0)) 619 return ret; 620 621 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 622 } 623 EXPORT_SYMBOL(blk_rq_map_user); 624 625 int blk_rq_map_user_io(struct request *req, struct rq_map_data *map_data, 626 void __user *ubuf, unsigned long buf_len, gfp_t gfp_mask, 627 bool vec, int iov_count, bool check_iter_count, int rw) 628 { 629 int ret = 0; 630 631 if (vec) { 632 struct iovec fast_iov[UIO_FASTIOV]; 633 struct iovec *iov = fast_iov; 634 struct iov_iter iter; 635 636 ret = import_iovec(rw, ubuf, iov_count ? iov_count : buf_len, 637 UIO_FASTIOV, &iov, &iter); 638 if (ret < 0) 639 return ret; 640 641 if (iov_count) { 642 /* SG_IO howto says that the shorter of the two wins */ 643 iov_iter_truncate(&iter, buf_len); 644 if (check_iter_count && !iov_iter_count(&iter)) { 645 kfree(iov); 646 return -EINVAL; 647 } 648 } 649 650 ret = blk_rq_map_user_iov(req->q, req, map_data, &iter, 651 gfp_mask); 652 kfree(iov); 653 } else if (buf_len) { 654 ret = blk_rq_map_user(req->q, req, map_data, ubuf, buf_len, 655 gfp_mask); 656 } 657 return ret; 658 } 659 EXPORT_SYMBOL(blk_rq_map_user_io); 660 661 /** 662 * blk_rq_unmap_user - unmap a request with user data 663 * @bio: start of bio list 664 * 665 * Description: 666 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 667 * supply the original rq->bio from the blk_rq_map_user() return, since 668 * the I/O completion may have changed rq->bio. 669 */ 670 int blk_rq_unmap_user(struct bio *bio) 671 { 672 struct bio *next_bio; 673 int ret = 0, ret2; 674 675 while (bio) { 676 if (bio->bi_private) { 677 ret2 = bio_uncopy_user(bio); 678 if (ret2 && !ret) 679 ret = ret2; 680 } else { 681 bio_release_pages(bio, bio_data_dir(bio) == READ); 682 } 683 684 next_bio = bio; 685 bio = bio->bi_next; 686 blk_mq_map_bio_put(next_bio); 687 } 688 689 return ret; 690 } 691 EXPORT_SYMBOL(blk_rq_unmap_user); 692 693 /** 694 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 695 * @q: request queue where request should be inserted 696 * @rq: request to fill 697 * @kbuf: the kernel buffer 698 * @len: length of user data 699 * @gfp_mask: memory allocation flags 700 * 701 * Description: 702 * Data will be mapped directly if possible. Otherwise a bounce 703 * buffer is used. Can be called multiple times to append multiple 704 * buffers. 705 */ 706 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 707 unsigned int len, gfp_t gfp_mask) 708 { 709 int reading = rq_data_dir(rq) == READ; 710 unsigned long addr = (unsigned long) kbuf; 711 struct bio *bio; 712 int ret; 713 714 if (len > (queue_max_hw_sectors(q) << 9)) 715 return -EINVAL; 716 if (!len || !kbuf) 717 return -EINVAL; 718 719 if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf) || 720 blk_queue_may_bounce(q)) 721 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 722 else 723 bio = bio_map_kern(q, kbuf, len, gfp_mask); 724 725 if (IS_ERR(bio)) 726 return PTR_ERR(bio); 727 728 bio->bi_opf &= ~REQ_OP_MASK; 729 bio->bi_opf |= req_op(rq); 730 731 ret = blk_rq_append_bio(rq, bio); 732 if (unlikely(ret)) { 733 bio_uninit(bio); 734 kfree(bio); 735 } 736 return ret; 737 } 738 EXPORT_SYMBOL(blk_rq_map_kern); 739