1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to mapping data to requests 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sched/task_stack.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/uio.h> 11 12 #include "blk.h" 13 14 struct bio_map_data { 15 bool is_our_pages : 1; 16 bool is_null_mapped : 1; 17 struct iov_iter iter; 18 struct iovec iov[]; 19 }; 20 21 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 22 gfp_t gfp_mask) 23 { 24 struct bio_map_data *bmd; 25 26 if (data->nr_segs > UIO_MAXIOV) 27 return NULL; 28 29 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 30 if (!bmd) 31 return NULL; 32 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); 33 bmd->iter = *data; 34 bmd->iter.iov = bmd->iov; 35 return bmd; 36 } 37 38 /** 39 * bio_copy_from_iter - copy all pages from iov_iter to bio 40 * @bio: The &struct bio which describes the I/O as destination 41 * @iter: iov_iter as source 42 * 43 * Copy all pages from iov_iter to bio. 44 * Returns 0 on success, or error on failure. 45 */ 46 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 47 { 48 struct bio_vec *bvec; 49 struct bvec_iter_all iter_all; 50 51 bio_for_each_segment_all(bvec, bio, iter_all) { 52 ssize_t ret; 53 54 ret = copy_page_from_iter(bvec->bv_page, 55 bvec->bv_offset, 56 bvec->bv_len, 57 iter); 58 59 if (!iov_iter_count(iter)) 60 break; 61 62 if (ret < bvec->bv_len) 63 return -EFAULT; 64 } 65 66 return 0; 67 } 68 69 /** 70 * bio_copy_to_iter - copy all pages from bio to iov_iter 71 * @bio: The &struct bio which describes the I/O as source 72 * @iter: iov_iter as destination 73 * 74 * Copy all pages from bio to iov_iter. 75 * Returns 0 on success, or error on failure. 76 */ 77 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 78 { 79 struct bio_vec *bvec; 80 struct bvec_iter_all iter_all; 81 82 bio_for_each_segment_all(bvec, bio, iter_all) { 83 ssize_t ret; 84 85 ret = copy_page_to_iter(bvec->bv_page, 86 bvec->bv_offset, 87 bvec->bv_len, 88 &iter); 89 90 if (!iov_iter_count(&iter)) 91 break; 92 93 if (ret < bvec->bv_len) 94 return -EFAULT; 95 } 96 97 return 0; 98 } 99 100 /** 101 * bio_uncopy_user - finish previously mapped bio 102 * @bio: bio being terminated 103 * 104 * Free pages allocated from bio_copy_user_iov() and write back data 105 * to user space in case of a read. 106 */ 107 static int bio_uncopy_user(struct bio *bio) 108 { 109 struct bio_map_data *bmd = bio->bi_private; 110 int ret = 0; 111 112 if (!bmd->is_null_mapped) { 113 /* 114 * if we're in a workqueue, the request is orphaned, so 115 * don't copy into a random user address space, just free 116 * and return -EINTR so user space doesn't expect any data. 117 */ 118 if (!current->mm) 119 ret = -EINTR; 120 else if (bio_data_dir(bio) == READ) 121 ret = bio_copy_to_iter(bio, bmd->iter); 122 if (bmd->is_our_pages) 123 bio_free_pages(bio); 124 } 125 kfree(bmd); 126 bio_put(bio); 127 return ret; 128 } 129 130 static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, 131 struct iov_iter *iter, gfp_t gfp_mask) 132 { 133 struct bio_map_data *bmd; 134 struct page *page; 135 struct bio *bio, *bounce_bio; 136 int i = 0, ret; 137 int nr_pages; 138 unsigned int len = iter->count; 139 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 140 141 bmd = bio_alloc_map_data(iter, gfp_mask); 142 if (!bmd) 143 return -ENOMEM; 144 145 /* 146 * We need to do a deep copy of the iov_iter including the iovecs. 147 * The caller provided iov might point to an on-stack or otherwise 148 * shortlived one. 149 */ 150 bmd->is_our_pages = !map_data; 151 bmd->is_null_mapped = (map_data && map_data->null_mapped); 152 153 nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); 154 if (nr_pages > BIO_MAX_PAGES) 155 nr_pages = BIO_MAX_PAGES; 156 157 ret = -ENOMEM; 158 bio = bio_kmalloc(gfp_mask, nr_pages); 159 if (!bio) 160 goto out_bmd; 161 bio->bi_opf |= req_op(rq); 162 163 if (map_data) { 164 nr_pages = 1 << map_data->page_order; 165 i = map_data->offset / PAGE_SIZE; 166 } 167 while (len) { 168 unsigned int bytes = PAGE_SIZE; 169 170 bytes -= offset; 171 172 if (bytes > len) 173 bytes = len; 174 175 if (map_data) { 176 if (i == map_data->nr_entries * nr_pages) { 177 ret = -ENOMEM; 178 goto cleanup; 179 } 180 181 page = map_data->pages[i / nr_pages]; 182 page += (i % nr_pages); 183 184 i++; 185 } else { 186 page = alloc_page(rq->q->bounce_gfp | gfp_mask); 187 if (!page) { 188 ret = -ENOMEM; 189 goto cleanup; 190 } 191 } 192 193 if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) { 194 if (!map_data) 195 __free_page(page); 196 break; 197 } 198 199 len -= bytes; 200 offset = 0; 201 } 202 203 if (map_data) 204 map_data->offset += bio->bi_iter.bi_size; 205 206 /* 207 * success 208 */ 209 if ((iov_iter_rw(iter) == WRITE && 210 (!map_data || !map_data->null_mapped)) || 211 (map_data && map_data->from_user)) { 212 ret = bio_copy_from_iter(bio, iter); 213 if (ret) 214 goto cleanup; 215 } else { 216 if (bmd->is_our_pages) 217 zero_fill_bio(bio); 218 iov_iter_advance(iter, bio->bi_iter.bi_size); 219 } 220 221 bio->bi_private = bmd; 222 223 bounce_bio = bio; 224 ret = blk_rq_append_bio(rq, &bounce_bio); 225 if (ret) 226 goto cleanup; 227 228 /* 229 * We link the bounce buffer in and could have to traverse it later, so 230 * we have to get a ref to prevent it from being freed 231 */ 232 bio_get(bounce_bio); 233 return 0; 234 cleanup: 235 if (!map_data) 236 bio_free_pages(bio); 237 bio_put(bio); 238 out_bmd: 239 kfree(bmd); 240 return ret; 241 } 242 243 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, 244 gfp_t gfp_mask) 245 { 246 unsigned int max_sectors = queue_max_hw_sectors(rq->q); 247 struct bio *bio, *bounce_bio; 248 int ret; 249 int j; 250 251 if (!iov_iter_count(iter)) 252 return -EINVAL; 253 254 bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); 255 if (!bio) 256 return -ENOMEM; 257 bio->bi_opf |= req_op(rq); 258 259 while (iov_iter_count(iter)) { 260 struct page **pages; 261 ssize_t bytes; 262 size_t offs, added = 0; 263 int npages; 264 265 bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); 266 if (unlikely(bytes <= 0)) { 267 ret = bytes ? bytes : -EFAULT; 268 goto out_unmap; 269 } 270 271 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); 272 273 if (unlikely(offs & queue_dma_alignment(rq->q))) { 274 ret = -EINVAL; 275 j = 0; 276 } else { 277 for (j = 0; j < npages; j++) { 278 struct page *page = pages[j]; 279 unsigned int n = PAGE_SIZE - offs; 280 bool same_page = false; 281 282 if (n > bytes) 283 n = bytes; 284 285 if (!bio_add_hw_page(rq->q, bio, page, n, offs, 286 max_sectors, &same_page)) { 287 if (same_page) 288 put_page(page); 289 break; 290 } 291 292 added += n; 293 bytes -= n; 294 offs = 0; 295 } 296 iov_iter_advance(iter, added); 297 } 298 /* 299 * release the pages we didn't map into the bio, if any 300 */ 301 while (j < npages) 302 put_page(pages[j++]); 303 kvfree(pages); 304 /* couldn't stuff something into bio? */ 305 if (bytes) 306 break; 307 } 308 309 /* 310 * Subtle: if we end up needing to bounce a bio, it would normally 311 * disappear when its bi_end_io is run. However, we need the original 312 * bio for the unmap, so grab an extra reference to it 313 */ 314 bio_get(bio); 315 316 bounce_bio = bio; 317 ret = blk_rq_append_bio(rq, &bounce_bio); 318 if (ret) 319 goto out_put_orig; 320 321 /* 322 * We link the bounce buffer in and could have to traverse it 323 * later, so we have to get a ref to prevent it from being freed 324 */ 325 bio_get(bounce_bio); 326 return 0; 327 328 out_put_orig: 329 bio_put(bio); 330 out_unmap: 331 bio_release_pages(bio, false); 332 bio_put(bio); 333 return ret; 334 } 335 336 /** 337 * bio_unmap_user - unmap a bio 338 * @bio: the bio being unmapped 339 * 340 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from 341 * process context. 342 * 343 * bio_unmap_user() may sleep. 344 */ 345 static void bio_unmap_user(struct bio *bio) 346 { 347 bio_release_pages(bio, bio_data_dir(bio) == READ); 348 bio_put(bio); 349 bio_put(bio); 350 } 351 352 static void bio_invalidate_vmalloc_pages(struct bio *bio) 353 { 354 #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE 355 if (bio->bi_private && !op_is_write(bio_op(bio))) { 356 unsigned long i, len = 0; 357 358 for (i = 0; i < bio->bi_vcnt; i++) 359 len += bio->bi_io_vec[i].bv_len; 360 invalidate_kernel_vmap_range(bio->bi_private, len); 361 } 362 #endif 363 } 364 365 static void bio_map_kern_endio(struct bio *bio) 366 { 367 bio_invalidate_vmalloc_pages(bio); 368 bio_put(bio); 369 } 370 371 /** 372 * bio_map_kern - map kernel address into bio 373 * @q: the struct request_queue for the bio 374 * @data: pointer to buffer to map 375 * @len: length in bytes 376 * @gfp_mask: allocation flags for bio allocation 377 * 378 * Map the kernel address into a bio suitable for io to a block 379 * device. Returns an error pointer in case of error. 380 */ 381 static struct bio *bio_map_kern(struct request_queue *q, void *data, 382 unsigned int len, gfp_t gfp_mask) 383 { 384 unsigned long kaddr = (unsigned long)data; 385 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 386 unsigned long start = kaddr >> PAGE_SHIFT; 387 const int nr_pages = end - start; 388 bool is_vmalloc = is_vmalloc_addr(data); 389 struct page *page; 390 int offset, i; 391 struct bio *bio; 392 393 bio = bio_kmalloc(gfp_mask, nr_pages); 394 if (!bio) 395 return ERR_PTR(-ENOMEM); 396 397 if (is_vmalloc) { 398 flush_kernel_vmap_range(data, len); 399 bio->bi_private = data; 400 } 401 402 offset = offset_in_page(kaddr); 403 for (i = 0; i < nr_pages; i++) { 404 unsigned int bytes = PAGE_SIZE - offset; 405 406 if (len <= 0) 407 break; 408 409 if (bytes > len) 410 bytes = len; 411 412 if (!is_vmalloc) 413 page = virt_to_page(data); 414 else 415 page = vmalloc_to_page(data); 416 if (bio_add_pc_page(q, bio, page, bytes, 417 offset) < bytes) { 418 /* we don't support partial mappings */ 419 bio_put(bio); 420 return ERR_PTR(-EINVAL); 421 } 422 423 data += bytes; 424 len -= bytes; 425 offset = 0; 426 } 427 428 bio->bi_end_io = bio_map_kern_endio; 429 return bio; 430 } 431 432 static void bio_copy_kern_endio(struct bio *bio) 433 { 434 bio_free_pages(bio); 435 bio_put(bio); 436 } 437 438 static void bio_copy_kern_endio_read(struct bio *bio) 439 { 440 char *p = bio->bi_private; 441 struct bio_vec *bvec; 442 struct bvec_iter_all iter_all; 443 444 bio_for_each_segment_all(bvec, bio, iter_all) { 445 memcpy(p, page_address(bvec->bv_page), bvec->bv_len); 446 p += bvec->bv_len; 447 } 448 449 bio_copy_kern_endio(bio); 450 } 451 452 /** 453 * bio_copy_kern - copy kernel address into bio 454 * @q: the struct request_queue for the bio 455 * @data: pointer to buffer to copy 456 * @len: length in bytes 457 * @gfp_mask: allocation flags for bio and page allocation 458 * @reading: data direction is READ 459 * 460 * copy the kernel address into a bio suitable for io to a block 461 * device. Returns an error pointer in case of error. 462 */ 463 static struct bio *bio_copy_kern(struct request_queue *q, void *data, 464 unsigned int len, gfp_t gfp_mask, int reading) 465 { 466 unsigned long kaddr = (unsigned long)data; 467 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 468 unsigned long start = kaddr >> PAGE_SHIFT; 469 struct bio *bio; 470 void *p = data; 471 int nr_pages = 0; 472 473 /* 474 * Overflow, abort 475 */ 476 if (end < start) 477 return ERR_PTR(-EINVAL); 478 479 nr_pages = end - start; 480 bio = bio_kmalloc(gfp_mask, nr_pages); 481 if (!bio) 482 return ERR_PTR(-ENOMEM); 483 484 while (len) { 485 struct page *page; 486 unsigned int bytes = PAGE_SIZE; 487 488 if (bytes > len) 489 bytes = len; 490 491 page = alloc_page(q->bounce_gfp | gfp_mask); 492 if (!page) 493 goto cleanup; 494 495 if (!reading) 496 memcpy(page_address(page), p, bytes); 497 498 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) 499 break; 500 501 len -= bytes; 502 p += bytes; 503 } 504 505 if (reading) { 506 bio->bi_end_io = bio_copy_kern_endio_read; 507 bio->bi_private = data; 508 } else { 509 bio->bi_end_io = bio_copy_kern_endio; 510 } 511 512 return bio; 513 514 cleanup: 515 bio_free_pages(bio); 516 bio_put(bio); 517 return ERR_PTR(-ENOMEM); 518 } 519 520 /* 521 * Append a bio to a passthrough request. Only works if the bio can be merged 522 * into the request based on the driver constraints. 523 */ 524 int blk_rq_append_bio(struct request *rq, struct bio **bio) 525 { 526 struct bio *orig_bio = *bio; 527 struct bvec_iter iter; 528 struct bio_vec bv; 529 unsigned int nr_segs = 0; 530 531 blk_queue_bounce(rq->q, bio); 532 533 bio_for_each_bvec(bv, *bio, iter) 534 nr_segs++; 535 536 if (!rq->bio) { 537 blk_rq_bio_prep(rq, *bio, nr_segs); 538 } else { 539 if (!ll_back_merge_fn(rq, *bio, nr_segs)) { 540 if (orig_bio != *bio) { 541 bio_put(*bio); 542 *bio = orig_bio; 543 } 544 return -EINVAL; 545 } 546 547 rq->biotail->bi_next = *bio; 548 rq->biotail = *bio; 549 rq->__data_len += (*bio)->bi_iter.bi_size; 550 bio_crypt_free_ctx(*bio); 551 } 552 553 return 0; 554 } 555 EXPORT_SYMBOL(blk_rq_append_bio); 556 557 /** 558 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 559 * @q: request queue where request should be inserted 560 * @rq: request to map data to 561 * @map_data: pointer to the rq_map_data holding pages (if necessary) 562 * @iter: iovec iterator 563 * @gfp_mask: memory allocation flags 564 * 565 * Description: 566 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 567 * a kernel bounce buffer is used. 568 * 569 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 570 * still in process context. 571 * 572 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 573 * before being submitted to the device, as pages mapped may be out of 574 * reach. It's the callers responsibility to make sure this happens. The 575 * original bio must be passed back in to blk_rq_unmap_user() for proper 576 * unmapping. 577 */ 578 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 579 struct rq_map_data *map_data, 580 const struct iov_iter *iter, gfp_t gfp_mask) 581 { 582 bool copy = false; 583 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); 584 struct bio *bio = NULL; 585 struct iov_iter i; 586 int ret = -EINVAL; 587 588 if (!iter_is_iovec(iter)) 589 goto fail; 590 591 if (map_data) 592 copy = true; 593 else if (iov_iter_alignment(iter) & align) 594 copy = true; 595 else if (queue_virt_boundary(q)) 596 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 597 598 i = *iter; 599 do { 600 if (copy) 601 ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); 602 else 603 ret = bio_map_user_iov(rq, &i, gfp_mask); 604 if (ret) 605 goto unmap_rq; 606 if (!bio) 607 bio = rq->bio; 608 } while (iov_iter_count(&i)); 609 610 return 0; 611 612 unmap_rq: 613 blk_rq_unmap_user(bio); 614 fail: 615 rq->bio = NULL; 616 return ret; 617 } 618 EXPORT_SYMBOL(blk_rq_map_user_iov); 619 620 int blk_rq_map_user(struct request_queue *q, struct request *rq, 621 struct rq_map_data *map_data, void __user *ubuf, 622 unsigned long len, gfp_t gfp_mask) 623 { 624 struct iovec iov; 625 struct iov_iter i; 626 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i); 627 628 if (unlikely(ret < 0)) 629 return ret; 630 631 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 632 } 633 EXPORT_SYMBOL(blk_rq_map_user); 634 635 /** 636 * blk_rq_unmap_user - unmap a request with user data 637 * @bio: start of bio list 638 * 639 * Description: 640 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 641 * supply the original rq->bio from the blk_rq_map_user() return, since 642 * the I/O completion may have changed rq->bio. 643 */ 644 int blk_rq_unmap_user(struct bio *bio) 645 { 646 struct bio *mapped_bio; 647 int ret = 0, ret2; 648 649 while (bio) { 650 mapped_bio = bio; 651 if (unlikely(bio_flagged(bio, BIO_BOUNCED))) 652 mapped_bio = bio->bi_private; 653 654 if (bio->bi_private) { 655 ret2 = bio_uncopy_user(mapped_bio); 656 if (ret2 && !ret) 657 ret = ret2; 658 } else { 659 bio_unmap_user(mapped_bio); 660 } 661 662 mapped_bio = bio; 663 bio = bio->bi_next; 664 bio_put(mapped_bio); 665 } 666 667 return ret; 668 } 669 EXPORT_SYMBOL(blk_rq_unmap_user); 670 671 /** 672 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 673 * @q: request queue where request should be inserted 674 * @rq: request to fill 675 * @kbuf: the kernel buffer 676 * @len: length of user data 677 * @gfp_mask: memory allocation flags 678 * 679 * Description: 680 * Data will be mapped directly if possible. Otherwise a bounce 681 * buffer is used. Can be called multiple times to append multiple 682 * buffers. 683 */ 684 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 685 unsigned int len, gfp_t gfp_mask) 686 { 687 int reading = rq_data_dir(rq) == READ; 688 unsigned long addr = (unsigned long) kbuf; 689 struct bio *bio, *orig_bio; 690 int ret; 691 692 if (len > (queue_max_hw_sectors(q) << 9)) 693 return -EINVAL; 694 if (!len || !kbuf) 695 return -EINVAL; 696 697 if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf)) 698 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 699 else 700 bio = bio_map_kern(q, kbuf, len, gfp_mask); 701 702 if (IS_ERR(bio)) 703 return PTR_ERR(bio); 704 705 bio->bi_opf &= ~REQ_OP_MASK; 706 bio->bi_opf |= req_op(rq); 707 708 orig_bio = bio; 709 ret = blk_rq_append_bio(rq, &bio); 710 if (unlikely(ret)) { 711 /* request is too big */ 712 bio_put(orig_bio); 713 return ret; 714 } 715 716 return 0; 717 } 718 EXPORT_SYMBOL(blk_rq_map_kern); 719