1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to mapping data to requests 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sched/task_stack.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/uio.h> 11 12 #include "blk.h" 13 14 struct bio_map_data { 15 bool is_our_pages : 1; 16 bool is_null_mapped : 1; 17 struct iov_iter iter; 18 struct iovec iov[]; 19 }; 20 21 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 22 gfp_t gfp_mask) 23 { 24 struct bio_map_data *bmd; 25 26 if (data->nr_segs > UIO_MAXIOV) 27 return NULL; 28 29 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 30 if (!bmd) 31 return NULL; 32 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); 33 bmd->iter = *data; 34 bmd->iter.iov = bmd->iov; 35 return bmd; 36 } 37 38 /** 39 * bio_copy_from_iter - copy all pages from iov_iter to bio 40 * @bio: The &struct bio which describes the I/O as destination 41 * @iter: iov_iter as source 42 * 43 * Copy all pages from iov_iter to bio. 44 * Returns 0 on success, or error on failure. 45 */ 46 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 47 { 48 struct bio_vec *bvec; 49 struct bvec_iter_all iter_all; 50 51 bio_for_each_segment_all(bvec, bio, iter_all) { 52 ssize_t ret; 53 54 ret = copy_page_from_iter(bvec->bv_page, 55 bvec->bv_offset, 56 bvec->bv_len, 57 iter); 58 59 if (!iov_iter_count(iter)) 60 break; 61 62 if (ret < bvec->bv_len) 63 return -EFAULT; 64 } 65 66 return 0; 67 } 68 69 /** 70 * bio_copy_to_iter - copy all pages from bio to iov_iter 71 * @bio: The &struct bio which describes the I/O as source 72 * @iter: iov_iter as destination 73 * 74 * Copy all pages from bio to iov_iter. 75 * Returns 0 on success, or error on failure. 76 */ 77 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 78 { 79 struct bio_vec *bvec; 80 struct bvec_iter_all iter_all; 81 82 bio_for_each_segment_all(bvec, bio, iter_all) { 83 ssize_t ret; 84 85 ret = copy_page_to_iter(bvec->bv_page, 86 bvec->bv_offset, 87 bvec->bv_len, 88 &iter); 89 90 if (!iov_iter_count(&iter)) 91 break; 92 93 if (ret < bvec->bv_len) 94 return -EFAULT; 95 } 96 97 return 0; 98 } 99 100 /** 101 * bio_uncopy_user - finish previously mapped bio 102 * @bio: bio being terminated 103 * 104 * Free pages allocated from bio_copy_user_iov() and write back data 105 * to user space in case of a read. 106 */ 107 static int bio_uncopy_user(struct bio *bio) 108 { 109 struct bio_map_data *bmd = bio->bi_private; 110 int ret = 0; 111 112 if (!bmd->is_null_mapped) { 113 /* 114 * if we're in a workqueue, the request is orphaned, so 115 * don't copy into a random user address space, just free 116 * and return -EINTR so user space doesn't expect any data. 117 */ 118 if (!current->mm) 119 ret = -EINTR; 120 else if (bio_data_dir(bio) == READ) 121 ret = bio_copy_to_iter(bio, bmd->iter); 122 if (bmd->is_our_pages) 123 bio_free_pages(bio); 124 } 125 kfree(bmd); 126 bio_put(bio); 127 return ret; 128 } 129 130 static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, 131 struct iov_iter *iter, gfp_t gfp_mask) 132 { 133 struct bio_map_data *bmd; 134 struct page *page; 135 struct bio *bio, *bounce_bio; 136 int i = 0, ret; 137 int nr_pages; 138 unsigned int len = iter->count; 139 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 140 141 bmd = bio_alloc_map_data(iter, gfp_mask); 142 if (!bmd) 143 return -ENOMEM; 144 145 /* 146 * We need to do a deep copy of the iov_iter including the iovecs. 147 * The caller provided iov might point to an on-stack or otherwise 148 * shortlived one. 149 */ 150 bmd->is_our_pages = !map_data; 151 152 nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE); 153 if (nr_pages > BIO_MAX_PAGES) 154 nr_pages = BIO_MAX_PAGES; 155 156 ret = -ENOMEM; 157 bio = bio_kmalloc(gfp_mask, nr_pages); 158 if (!bio) 159 goto out_bmd; 160 bio->bi_opf |= req_op(rq); 161 162 if (map_data) { 163 nr_pages = 1 << map_data->page_order; 164 i = map_data->offset / PAGE_SIZE; 165 } 166 while (len) { 167 unsigned int bytes = PAGE_SIZE; 168 169 bytes -= offset; 170 171 if (bytes > len) 172 bytes = len; 173 174 if (map_data) { 175 if (i == map_data->nr_entries * nr_pages) { 176 ret = -ENOMEM; 177 goto cleanup; 178 } 179 180 page = map_data->pages[i / nr_pages]; 181 page += (i % nr_pages); 182 183 i++; 184 } else { 185 page = alloc_page(rq->q->bounce_gfp | gfp_mask); 186 if (!page) { 187 ret = -ENOMEM; 188 goto cleanup; 189 } 190 } 191 192 if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) { 193 if (!map_data) 194 __free_page(page); 195 break; 196 } 197 198 len -= bytes; 199 offset = 0; 200 } 201 202 if (map_data) 203 map_data->offset += bio->bi_iter.bi_size; 204 205 /* 206 * success 207 */ 208 if ((iov_iter_rw(iter) == WRITE && 209 (!map_data || !map_data->null_mapped)) || 210 (map_data && map_data->from_user)) { 211 ret = bio_copy_from_iter(bio, iter); 212 if (ret) 213 goto cleanup; 214 } else { 215 if (bmd->is_our_pages) 216 zero_fill_bio(bio); 217 iov_iter_advance(iter, bio->bi_iter.bi_size); 218 } 219 220 bio->bi_private = bmd; 221 if (map_data && map_data->null_mapped) 222 bmd->is_null_mapped = true; 223 224 bounce_bio = bio; 225 ret = blk_rq_append_bio(rq, &bounce_bio); 226 if (ret) 227 goto cleanup; 228 229 /* 230 * We link the bounce buffer in and could have to traverse it later, so 231 * we have to get a ref to prevent it from being freed 232 */ 233 bio_get(bounce_bio); 234 return 0; 235 cleanup: 236 if (!map_data) 237 bio_free_pages(bio); 238 bio_put(bio); 239 out_bmd: 240 kfree(bmd); 241 return ret; 242 } 243 244 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, 245 gfp_t gfp_mask) 246 { 247 unsigned int max_sectors = queue_max_hw_sectors(rq->q); 248 struct bio *bio, *bounce_bio; 249 int ret; 250 int j; 251 252 if (!iov_iter_count(iter)) 253 return -EINVAL; 254 255 bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES)); 256 if (!bio) 257 return -ENOMEM; 258 bio->bi_opf |= req_op(rq); 259 260 while (iov_iter_count(iter)) { 261 struct page **pages; 262 ssize_t bytes; 263 size_t offs, added = 0; 264 int npages; 265 266 bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); 267 if (unlikely(bytes <= 0)) { 268 ret = bytes ? bytes : -EFAULT; 269 goto out_unmap; 270 } 271 272 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); 273 274 if (unlikely(offs & queue_dma_alignment(rq->q))) { 275 ret = -EINVAL; 276 j = 0; 277 } else { 278 for (j = 0; j < npages; j++) { 279 struct page *page = pages[j]; 280 unsigned int n = PAGE_SIZE - offs; 281 bool same_page = false; 282 283 if (n > bytes) 284 n = bytes; 285 286 if (!bio_add_hw_page(rq->q, bio, page, n, offs, 287 max_sectors, &same_page)) { 288 if (same_page) 289 put_page(page); 290 break; 291 } 292 293 added += n; 294 bytes -= n; 295 offs = 0; 296 } 297 iov_iter_advance(iter, added); 298 } 299 /* 300 * release the pages we didn't map into the bio, if any 301 */ 302 while (j < npages) 303 put_page(pages[j++]); 304 kvfree(pages); 305 /* couldn't stuff something into bio? */ 306 if (bytes) 307 break; 308 } 309 310 /* 311 * Subtle: if we end up needing to bounce a bio, it would normally 312 * disappear when its bi_end_io is run. However, we need the original 313 * bio for the unmap, so grab an extra reference to it 314 */ 315 bio_get(bio); 316 317 bounce_bio = bio; 318 ret = blk_rq_append_bio(rq, &bounce_bio); 319 if (ret) 320 goto out_put_orig; 321 322 /* 323 * We link the bounce buffer in and could have to traverse it 324 * later, so we have to get a ref to prevent it from being freed 325 */ 326 bio_get(bounce_bio); 327 return 0; 328 329 out_put_orig: 330 bio_put(bio); 331 out_unmap: 332 bio_release_pages(bio, false); 333 bio_put(bio); 334 return ret; 335 } 336 337 /** 338 * bio_unmap_user - unmap a bio 339 * @bio: the bio being unmapped 340 * 341 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from 342 * process context. 343 * 344 * bio_unmap_user() may sleep. 345 */ 346 static void bio_unmap_user(struct bio *bio) 347 { 348 bio_release_pages(bio, bio_data_dir(bio) == READ); 349 bio_put(bio); 350 bio_put(bio); 351 } 352 353 static void bio_invalidate_vmalloc_pages(struct bio *bio) 354 { 355 #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE 356 if (bio->bi_private && !op_is_write(bio_op(bio))) { 357 unsigned long i, len = 0; 358 359 for (i = 0; i < bio->bi_vcnt; i++) 360 len += bio->bi_io_vec[i].bv_len; 361 invalidate_kernel_vmap_range(bio->bi_private, len); 362 } 363 #endif 364 } 365 366 static void bio_map_kern_endio(struct bio *bio) 367 { 368 bio_invalidate_vmalloc_pages(bio); 369 bio_put(bio); 370 } 371 372 /** 373 * bio_map_kern - map kernel address into bio 374 * @q: the struct request_queue for the bio 375 * @data: pointer to buffer to map 376 * @len: length in bytes 377 * @gfp_mask: allocation flags for bio allocation 378 * 379 * Map the kernel address into a bio suitable for io to a block 380 * device. Returns an error pointer in case of error. 381 */ 382 static struct bio *bio_map_kern(struct request_queue *q, void *data, 383 unsigned int len, gfp_t gfp_mask) 384 { 385 unsigned long kaddr = (unsigned long)data; 386 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 387 unsigned long start = kaddr >> PAGE_SHIFT; 388 const int nr_pages = end - start; 389 bool is_vmalloc = is_vmalloc_addr(data); 390 struct page *page; 391 int offset, i; 392 struct bio *bio; 393 394 bio = bio_kmalloc(gfp_mask, nr_pages); 395 if (!bio) 396 return ERR_PTR(-ENOMEM); 397 398 if (is_vmalloc) { 399 flush_kernel_vmap_range(data, len); 400 bio->bi_private = data; 401 } 402 403 offset = offset_in_page(kaddr); 404 for (i = 0; i < nr_pages; i++) { 405 unsigned int bytes = PAGE_SIZE - offset; 406 407 if (len <= 0) 408 break; 409 410 if (bytes > len) 411 bytes = len; 412 413 if (!is_vmalloc) 414 page = virt_to_page(data); 415 else 416 page = vmalloc_to_page(data); 417 if (bio_add_pc_page(q, bio, page, bytes, 418 offset) < bytes) { 419 /* we don't support partial mappings */ 420 bio_put(bio); 421 return ERR_PTR(-EINVAL); 422 } 423 424 data += bytes; 425 len -= bytes; 426 offset = 0; 427 } 428 429 bio->bi_end_io = bio_map_kern_endio; 430 return bio; 431 } 432 433 static void bio_copy_kern_endio(struct bio *bio) 434 { 435 bio_free_pages(bio); 436 bio_put(bio); 437 } 438 439 static void bio_copy_kern_endio_read(struct bio *bio) 440 { 441 char *p = bio->bi_private; 442 struct bio_vec *bvec; 443 struct bvec_iter_all iter_all; 444 445 bio_for_each_segment_all(bvec, bio, iter_all) { 446 memcpy(p, page_address(bvec->bv_page), bvec->bv_len); 447 p += bvec->bv_len; 448 } 449 450 bio_copy_kern_endio(bio); 451 } 452 453 /** 454 * bio_copy_kern - copy kernel address into bio 455 * @q: the struct request_queue for the bio 456 * @data: pointer to buffer to copy 457 * @len: length in bytes 458 * @gfp_mask: allocation flags for bio and page allocation 459 * @reading: data direction is READ 460 * 461 * copy the kernel address into a bio suitable for io to a block 462 * device. Returns an error pointer in case of error. 463 */ 464 static struct bio *bio_copy_kern(struct request_queue *q, void *data, 465 unsigned int len, gfp_t gfp_mask, int reading) 466 { 467 unsigned long kaddr = (unsigned long)data; 468 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 469 unsigned long start = kaddr >> PAGE_SHIFT; 470 struct bio *bio; 471 void *p = data; 472 int nr_pages = 0; 473 474 /* 475 * Overflow, abort 476 */ 477 if (end < start) 478 return ERR_PTR(-EINVAL); 479 480 nr_pages = end - start; 481 bio = bio_kmalloc(gfp_mask, nr_pages); 482 if (!bio) 483 return ERR_PTR(-ENOMEM); 484 485 while (len) { 486 struct page *page; 487 unsigned int bytes = PAGE_SIZE; 488 489 if (bytes > len) 490 bytes = len; 491 492 page = alloc_page(q->bounce_gfp | gfp_mask); 493 if (!page) 494 goto cleanup; 495 496 if (!reading) 497 memcpy(page_address(page), p, bytes); 498 499 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) 500 break; 501 502 len -= bytes; 503 p += bytes; 504 } 505 506 if (reading) { 507 bio->bi_end_io = bio_copy_kern_endio_read; 508 bio->bi_private = data; 509 } else { 510 bio->bi_end_io = bio_copy_kern_endio; 511 } 512 513 return bio; 514 515 cleanup: 516 bio_free_pages(bio); 517 bio_put(bio); 518 return ERR_PTR(-ENOMEM); 519 } 520 521 /* 522 * Append a bio to a passthrough request. Only works if the bio can be merged 523 * into the request based on the driver constraints. 524 */ 525 int blk_rq_append_bio(struct request *rq, struct bio **bio) 526 { 527 struct bio *orig_bio = *bio; 528 struct bvec_iter iter; 529 struct bio_vec bv; 530 unsigned int nr_segs = 0; 531 532 blk_queue_bounce(rq->q, bio); 533 534 bio_for_each_bvec(bv, *bio, iter) 535 nr_segs++; 536 537 if (!rq->bio) { 538 blk_rq_bio_prep(rq, *bio, nr_segs); 539 } else { 540 if (!ll_back_merge_fn(rq, *bio, nr_segs)) { 541 if (orig_bio != *bio) { 542 bio_put(*bio); 543 *bio = orig_bio; 544 } 545 return -EINVAL; 546 } 547 548 rq->biotail->bi_next = *bio; 549 rq->biotail = *bio; 550 rq->__data_len += (*bio)->bi_iter.bi_size; 551 bio_crypt_free_ctx(*bio); 552 } 553 554 return 0; 555 } 556 EXPORT_SYMBOL(blk_rq_append_bio); 557 558 /** 559 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 560 * @q: request queue where request should be inserted 561 * @rq: request to map data to 562 * @map_data: pointer to the rq_map_data holding pages (if necessary) 563 * @iter: iovec iterator 564 * @gfp_mask: memory allocation flags 565 * 566 * Description: 567 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 568 * a kernel bounce buffer is used. 569 * 570 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 571 * still in process context. 572 * 573 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 574 * before being submitted to the device, as pages mapped may be out of 575 * reach. It's the callers responsibility to make sure this happens. The 576 * original bio must be passed back in to blk_rq_unmap_user() for proper 577 * unmapping. 578 */ 579 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 580 struct rq_map_data *map_data, 581 const struct iov_iter *iter, gfp_t gfp_mask) 582 { 583 bool copy = false; 584 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); 585 struct bio *bio = NULL; 586 struct iov_iter i; 587 int ret = -EINVAL; 588 589 if (!iter_is_iovec(iter)) 590 goto fail; 591 592 if (map_data) 593 copy = true; 594 else if (iov_iter_alignment(iter) & align) 595 copy = true; 596 else if (queue_virt_boundary(q)) 597 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 598 599 i = *iter; 600 do { 601 if (copy) 602 ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); 603 else 604 ret = bio_map_user_iov(rq, &i, gfp_mask); 605 if (ret) 606 goto unmap_rq; 607 if (!bio) 608 bio = rq->bio; 609 } while (iov_iter_count(&i)); 610 611 return 0; 612 613 unmap_rq: 614 blk_rq_unmap_user(bio); 615 fail: 616 rq->bio = NULL; 617 return ret; 618 } 619 EXPORT_SYMBOL(blk_rq_map_user_iov); 620 621 int blk_rq_map_user(struct request_queue *q, struct request *rq, 622 struct rq_map_data *map_data, void __user *ubuf, 623 unsigned long len, gfp_t gfp_mask) 624 { 625 struct iovec iov; 626 struct iov_iter i; 627 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i); 628 629 if (unlikely(ret < 0)) 630 return ret; 631 632 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 633 } 634 EXPORT_SYMBOL(blk_rq_map_user); 635 636 /** 637 * blk_rq_unmap_user - unmap a request with user data 638 * @bio: start of bio list 639 * 640 * Description: 641 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 642 * supply the original rq->bio from the blk_rq_map_user() return, since 643 * the I/O completion may have changed rq->bio. 644 */ 645 int blk_rq_unmap_user(struct bio *bio) 646 { 647 struct bio *mapped_bio; 648 int ret = 0, ret2; 649 650 while (bio) { 651 mapped_bio = bio; 652 if (unlikely(bio_flagged(bio, BIO_BOUNCED))) 653 mapped_bio = bio->bi_private; 654 655 if (bio->bi_private) { 656 ret2 = bio_uncopy_user(mapped_bio); 657 if (ret2 && !ret) 658 ret = ret2; 659 } else { 660 bio_unmap_user(mapped_bio); 661 } 662 663 mapped_bio = bio; 664 bio = bio->bi_next; 665 bio_put(mapped_bio); 666 } 667 668 return ret; 669 } 670 EXPORT_SYMBOL(blk_rq_unmap_user); 671 672 /** 673 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 674 * @q: request queue where request should be inserted 675 * @rq: request to fill 676 * @kbuf: the kernel buffer 677 * @len: length of user data 678 * @gfp_mask: memory allocation flags 679 * 680 * Description: 681 * Data will be mapped directly if possible. Otherwise a bounce 682 * buffer is used. Can be called multiple times to append multiple 683 * buffers. 684 */ 685 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 686 unsigned int len, gfp_t gfp_mask) 687 { 688 int reading = rq_data_dir(rq) == READ; 689 unsigned long addr = (unsigned long) kbuf; 690 struct bio *bio, *orig_bio; 691 int ret; 692 693 if (len > (queue_max_hw_sectors(q) << 9)) 694 return -EINVAL; 695 if (!len || !kbuf) 696 return -EINVAL; 697 698 if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf)) 699 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 700 else 701 bio = bio_map_kern(q, kbuf, len, gfp_mask); 702 703 if (IS_ERR(bio)) 704 return PTR_ERR(bio); 705 706 bio->bi_opf &= ~REQ_OP_MASK; 707 bio->bi_opf |= req_op(rq); 708 709 orig_bio = bio; 710 ret = blk_rq_append_bio(rq, &bio); 711 if (unlikely(ret)) { 712 /* request is too big */ 713 bio_put(orig_bio); 714 return ret; 715 } 716 717 return 0; 718 } 719 EXPORT_SYMBOL(blk_rq_map_kern); 720