1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Functions related to mapping data to requests 4 */ 5 #include <linux/kernel.h> 6 #include <linux/sched/task_stack.h> 7 #include <linux/module.h> 8 #include <linux/bio.h> 9 #include <linux/blkdev.h> 10 #include <linux/uio.h> 11 12 #include "blk.h" 13 14 struct bio_map_data { 15 bool is_our_pages : 1; 16 bool is_null_mapped : 1; 17 struct iov_iter iter; 18 struct iovec iov[]; 19 }; 20 21 static struct bio_map_data *bio_alloc_map_data(struct iov_iter *data, 22 gfp_t gfp_mask) 23 { 24 struct bio_map_data *bmd; 25 26 if (data->nr_segs > UIO_MAXIOV) 27 return NULL; 28 29 bmd = kmalloc(struct_size(bmd, iov, data->nr_segs), gfp_mask); 30 if (!bmd) 31 return NULL; 32 memcpy(bmd->iov, data->iov, sizeof(struct iovec) * data->nr_segs); 33 bmd->iter = *data; 34 bmd->iter.iov = bmd->iov; 35 return bmd; 36 } 37 38 /** 39 * bio_copy_from_iter - copy all pages from iov_iter to bio 40 * @bio: The &struct bio which describes the I/O as destination 41 * @iter: iov_iter as source 42 * 43 * Copy all pages from iov_iter to bio. 44 * Returns 0 on success, or error on failure. 45 */ 46 static int bio_copy_from_iter(struct bio *bio, struct iov_iter *iter) 47 { 48 struct bio_vec *bvec; 49 struct bvec_iter_all iter_all; 50 51 bio_for_each_segment_all(bvec, bio, iter_all) { 52 ssize_t ret; 53 54 ret = copy_page_from_iter(bvec->bv_page, 55 bvec->bv_offset, 56 bvec->bv_len, 57 iter); 58 59 if (!iov_iter_count(iter)) 60 break; 61 62 if (ret < bvec->bv_len) 63 return -EFAULT; 64 } 65 66 return 0; 67 } 68 69 /** 70 * bio_copy_to_iter - copy all pages from bio to iov_iter 71 * @bio: The &struct bio which describes the I/O as source 72 * @iter: iov_iter as destination 73 * 74 * Copy all pages from bio to iov_iter. 75 * Returns 0 on success, or error on failure. 76 */ 77 static int bio_copy_to_iter(struct bio *bio, struct iov_iter iter) 78 { 79 struct bio_vec *bvec; 80 struct bvec_iter_all iter_all; 81 82 bio_for_each_segment_all(bvec, bio, iter_all) { 83 ssize_t ret; 84 85 ret = copy_page_to_iter(bvec->bv_page, 86 bvec->bv_offset, 87 bvec->bv_len, 88 &iter); 89 90 if (!iov_iter_count(&iter)) 91 break; 92 93 if (ret < bvec->bv_len) 94 return -EFAULT; 95 } 96 97 return 0; 98 } 99 100 /** 101 * bio_uncopy_user - finish previously mapped bio 102 * @bio: bio being terminated 103 * 104 * Free pages allocated from bio_copy_user_iov() and write back data 105 * to user space in case of a read. 106 */ 107 static int bio_uncopy_user(struct bio *bio) 108 { 109 struct bio_map_data *bmd = bio->bi_private; 110 int ret = 0; 111 112 if (!bmd->is_null_mapped) { 113 /* 114 * if we're in a workqueue, the request is orphaned, so 115 * don't copy into a random user address space, just free 116 * and return -EINTR so user space doesn't expect any data. 117 */ 118 if (!current->mm) 119 ret = -EINTR; 120 else if (bio_data_dir(bio) == READ) 121 ret = bio_copy_to_iter(bio, bmd->iter); 122 if (bmd->is_our_pages) 123 bio_free_pages(bio); 124 } 125 kfree(bmd); 126 bio_put(bio); 127 return ret; 128 } 129 130 static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data, 131 struct iov_iter *iter, gfp_t gfp_mask) 132 { 133 struct bio_map_data *bmd; 134 struct page *page; 135 struct bio *bio, *bounce_bio; 136 int i = 0, ret; 137 int nr_pages; 138 unsigned int len = iter->count; 139 unsigned int offset = map_data ? offset_in_page(map_data->offset) : 0; 140 141 bmd = bio_alloc_map_data(iter, gfp_mask); 142 if (!bmd) 143 return -ENOMEM; 144 145 /* 146 * We need to do a deep copy of the iov_iter including the iovecs. 147 * The caller provided iov might point to an on-stack or otherwise 148 * shortlived one. 149 */ 150 bmd->is_our_pages = !map_data; 151 bmd->is_null_mapped = (map_data && map_data->null_mapped); 152 153 nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE)); 154 155 ret = -ENOMEM; 156 bio = bio_kmalloc(gfp_mask, nr_pages); 157 if (!bio) 158 goto out_bmd; 159 bio->bi_opf |= req_op(rq); 160 161 if (map_data) { 162 nr_pages = 1 << map_data->page_order; 163 i = map_data->offset / PAGE_SIZE; 164 } 165 while (len) { 166 unsigned int bytes = PAGE_SIZE; 167 168 bytes -= offset; 169 170 if (bytes > len) 171 bytes = len; 172 173 if (map_data) { 174 if (i == map_data->nr_entries * nr_pages) { 175 ret = -ENOMEM; 176 goto cleanup; 177 } 178 179 page = map_data->pages[i / nr_pages]; 180 page += (i % nr_pages); 181 182 i++; 183 } else { 184 page = alloc_page(rq->q->bounce_gfp | gfp_mask); 185 if (!page) { 186 ret = -ENOMEM; 187 goto cleanup; 188 } 189 } 190 191 if (bio_add_pc_page(rq->q, bio, page, bytes, offset) < bytes) { 192 if (!map_data) 193 __free_page(page); 194 break; 195 } 196 197 len -= bytes; 198 offset = 0; 199 } 200 201 if (map_data) 202 map_data->offset += bio->bi_iter.bi_size; 203 204 /* 205 * success 206 */ 207 if ((iov_iter_rw(iter) == WRITE && 208 (!map_data || !map_data->null_mapped)) || 209 (map_data && map_data->from_user)) { 210 ret = bio_copy_from_iter(bio, iter); 211 if (ret) 212 goto cleanup; 213 } else { 214 if (bmd->is_our_pages) 215 zero_fill_bio(bio); 216 iov_iter_advance(iter, bio->bi_iter.bi_size); 217 } 218 219 bio->bi_private = bmd; 220 221 bounce_bio = bio; 222 ret = blk_rq_append_bio(rq, &bounce_bio); 223 if (ret) 224 goto cleanup; 225 226 /* 227 * We link the bounce buffer in and could have to traverse it later, so 228 * we have to get a ref to prevent it from being freed 229 */ 230 bio_get(bounce_bio); 231 return 0; 232 cleanup: 233 if (!map_data) 234 bio_free_pages(bio); 235 bio_put(bio); 236 out_bmd: 237 kfree(bmd); 238 return ret; 239 } 240 241 static int bio_map_user_iov(struct request *rq, struct iov_iter *iter, 242 gfp_t gfp_mask) 243 { 244 unsigned int max_sectors = queue_max_hw_sectors(rq->q); 245 struct bio *bio, *bounce_bio; 246 int ret; 247 int j; 248 249 if (!iov_iter_count(iter)) 250 return -EINVAL; 251 252 bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS)); 253 if (!bio) 254 return -ENOMEM; 255 bio->bi_opf |= req_op(rq); 256 257 while (iov_iter_count(iter)) { 258 struct page **pages; 259 ssize_t bytes; 260 size_t offs, added = 0; 261 int npages; 262 263 bytes = iov_iter_get_pages_alloc(iter, &pages, LONG_MAX, &offs); 264 if (unlikely(bytes <= 0)) { 265 ret = bytes ? bytes : -EFAULT; 266 goto out_unmap; 267 } 268 269 npages = DIV_ROUND_UP(offs + bytes, PAGE_SIZE); 270 271 if (unlikely(offs & queue_dma_alignment(rq->q))) { 272 ret = -EINVAL; 273 j = 0; 274 } else { 275 for (j = 0; j < npages; j++) { 276 struct page *page = pages[j]; 277 unsigned int n = PAGE_SIZE - offs; 278 bool same_page = false; 279 280 if (n > bytes) 281 n = bytes; 282 283 if (!bio_add_hw_page(rq->q, bio, page, n, offs, 284 max_sectors, &same_page)) { 285 if (same_page) 286 put_page(page); 287 break; 288 } 289 290 added += n; 291 bytes -= n; 292 offs = 0; 293 } 294 iov_iter_advance(iter, added); 295 } 296 /* 297 * release the pages we didn't map into the bio, if any 298 */ 299 while (j < npages) 300 put_page(pages[j++]); 301 kvfree(pages); 302 /* couldn't stuff something into bio? */ 303 if (bytes) 304 break; 305 } 306 307 /* 308 * Subtle: if we end up needing to bounce a bio, it would normally 309 * disappear when its bi_end_io is run. However, we need the original 310 * bio for the unmap, so grab an extra reference to it 311 */ 312 bio_get(bio); 313 314 bounce_bio = bio; 315 ret = blk_rq_append_bio(rq, &bounce_bio); 316 if (ret) 317 goto out_put_orig; 318 319 /* 320 * We link the bounce buffer in and could have to traverse it 321 * later, so we have to get a ref to prevent it from being freed 322 */ 323 bio_get(bounce_bio); 324 return 0; 325 326 out_put_orig: 327 bio_put(bio); 328 out_unmap: 329 bio_release_pages(bio, false); 330 bio_put(bio); 331 return ret; 332 } 333 334 /** 335 * bio_unmap_user - unmap a bio 336 * @bio: the bio being unmapped 337 * 338 * Unmap a bio previously mapped by bio_map_user_iov(). Must be called from 339 * process context. 340 * 341 * bio_unmap_user() may sleep. 342 */ 343 static void bio_unmap_user(struct bio *bio) 344 { 345 bio_release_pages(bio, bio_data_dir(bio) == READ); 346 bio_put(bio); 347 bio_put(bio); 348 } 349 350 static void bio_invalidate_vmalloc_pages(struct bio *bio) 351 { 352 #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE 353 if (bio->bi_private && !op_is_write(bio_op(bio))) { 354 unsigned long i, len = 0; 355 356 for (i = 0; i < bio->bi_vcnt; i++) 357 len += bio->bi_io_vec[i].bv_len; 358 invalidate_kernel_vmap_range(bio->bi_private, len); 359 } 360 #endif 361 } 362 363 static void bio_map_kern_endio(struct bio *bio) 364 { 365 bio_invalidate_vmalloc_pages(bio); 366 bio_put(bio); 367 } 368 369 /** 370 * bio_map_kern - map kernel address into bio 371 * @q: the struct request_queue for the bio 372 * @data: pointer to buffer to map 373 * @len: length in bytes 374 * @gfp_mask: allocation flags for bio allocation 375 * 376 * Map the kernel address into a bio suitable for io to a block 377 * device. Returns an error pointer in case of error. 378 */ 379 static struct bio *bio_map_kern(struct request_queue *q, void *data, 380 unsigned int len, gfp_t gfp_mask) 381 { 382 unsigned long kaddr = (unsigned long)data; 383 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 384 unsigned long start = kaddr >> PAGE_SHIFT; 385 const int nr_pages = end - start; 386 bool is_vmalloc = is_vmalloc_addr(data); 387 struct page *page; 388 int offset, i; 389 struct bio *bio; 390 391 bio = bio_kmalloc(gfp_mask, nr_pages); 392 if (!bio) 393 return ERR_PTR(-ENOMEM); 394 395 if (is_vmalloc) { 396 flush_kernel_vmap_range(data, len); 397 bio->bi_private = data; 398 } 399 400 offset = offset_in_page(kaddr); 401 for (i = 0; i < nr_pages; i++) { 402 unsigned int bytes = PAGE_SIZE - offset; 403 404 if (len <= 0) 405 break; 406 407 if (bytes > len) 408 bytes = len; 409 410 if (!is_vmalloc) 411 page = virt_to_page(data); 412 else 413 page = vmalloc_to_page(data); 414 if (bio_add_pc_page(q, bio, page, bytes, 415 offset) < bytes) { 416 /* we don't support partial mappings */ 417 bio_put(bio); 418 return ERR_PTR(-EINVAL); 419 } 420 421 data += bytes; 422 len -= bytes; 423 offset = 0; 424 } 425 426 bio->bi_end_io = bio_map_kern_endio; 427 return bio; 428 } 429 430 static void bio_copy_kern_endio(struct bio *bio) 431 { 432 bio_free_pages(bio); 433 bio_put(bio); 434 } 435 436 static void bio_copy_kern_endio_read(struct bio *bio) 437 { 438 char *p = bio->bi_private; 439 struct bio_vec *bvec; 440 struct bvec_iter_all iter_all; 441 442 bio_for_each_segment_all(bvec, bio, iter_all) { 443 memcpy(p, page_address(bvec->bv_page), bvec->bv_len); 444 p += bvec->bv_len; 445 } 446 447 bio_copy_kern_endio(bio); 448 } 449 450 /** 451 * bio_copy_kern - copy kernel address into bio 452 * @q: the struct request_queue for the bio 453 * @data: pointer to buffer to copy 454 * @len: length in bytes 455 * @gfp_mask: allocation flags for bio and page allocation 456 * @reading: data direction is READ 457 * 458 * copy the kernel address into a bio suitable for io to a block 459 * device. Returns an error pointer in case of error. 460 */ 461 static struct bio *bio_copy_kern(struct request_queue *q, void *data, 462 unsigned int len, gfp_t gfp_mask, int reading) 463 { 464 unsigned long kaddr = (unsigned long)data; 465 unsigned long end = (kaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 466 unsigned long start = kaddr >> PAGE_SHIFT; 467 struct bio *bio; 468 void *p = data; 469 int nr_pages = 0; 470 471 /* 472 * Overflow, abort 473 */ 474 if (end < start) 475 return ERR_PTR(-EINVAL); 476 477 nr_pages = end - start; 478 bio = bio_kmalloc(gfp_mask, nr_pages); 479 if (!bio) 480 return ERR_PTR(-ENOMEM); 481 482 while (len) { 483 struct page *page; 484 unsigned int bytes = PAGE_SIZE; 485 486 if (bytes > len) 487 bytes = len; 488 489 page = alloc_page(q->bounce_gfp | gfp_mask); 490 if (!page) 491 goto cleanup; 492 493 if (!reading) 494 memcpy(page_address(page), p, bytes); 495 496 if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) 497 break; 498 499 len -= bytes; 500 p += bytes; 501 } 502 503 if (reading) { 504 bio->bi_end_io = bio_copy_kern_endio_read; 505 bio->bi_private = data; 506 } else { 507 bio->bi_end_io = bio_copy_kern_endio; 508 } 509 510 return bio; 511 512 cleanup: 513 bio_free_pages(bio); 514 bio_put(bio); 515 return ERR_PTR(-ENOMEM); 516 } 517 518 /* 519 * Append a bio to a passthrough request. Only works if the bio can be merged 520 * into the request based on the driver constraints. 521 */ 522 int blk_rq_append_bio(struct request *rq, struct bio **bio) 523 { 524 struct bio *orig_bio = *bio; 525 struct bvec_iter iter; 526 struct bio_vec bv; 527 unsigned int nr_segs = 0; 528 529 blk_queue_bounce(rq->q, bio); 530 531 bio_for_each_bvec(bv, *bio, iter) 532 nr_segs++; 533 534 if (!rq->bio) { 535 blk_rq_bio_prep(rq, *bio, nr_segs); 536 } else { 537 if (!ll_back_merge_fn(rq, *bio, nr_segs)) { 538 if (orig_bio != *bio) { 539 bio_put(*bio); 540 *bio = orig_bio; 541 } 542 return -EINVAL; 543 } 544 545 rq->biotail->bi_next = *bio; 546 rq->biotail = *bio; 547 rq->__data_len += (*bio)->bi_iter.bi_size; 548 bio_crypt_free_ctx(*bio); 549 } 550 551 return 0; 552 } 553 EXPORT_SYMBOL(blk_rq_append_bio); 554 555 /** 556 * blk_rq_map_user_iov - map user data to a request, for passthrough requests 557 * @q: request queue where request should be inserted 558 * @rq: request to map data to 559 * @map_data: pointer to the rq_map_data holding pages (if necessary) 560 * @iter: iovec iterator 561 * @gfp_mask: memory allocation flags 562 * 563 * Description: 564 * Data will be mapped directly for zero copy I/O, if possible. Otherwise 565 * a kernel bounce buffer is used. 566 * 567 * A matching blk_rq_unmap_user() must be issued at the end of I/O, while 568 * still in process context. 569 * 570 * Note: The mapped bio may need to be bounced through blk_queue_bounce() 571 * before being submitted to the device, as pages mapped may be out of 572 * reach. It's the callers responsibility to make sure this happens. The 573 * original bio must be passed back in to blk_rq_unmap_user() for proper 574 * unmapping. 575 */ 576 int blk_rq_map_user_iov(struct request_queue *q, struct request *rq, 577 struct rq_map_data *map_data, 578 const struct iov_iter *iter, gfp_t gfp_mask) 579 { 580 bool copy = false; 581 unsigned long align = q->dma_pad_mask | queue_dma_alignment(q); 582 struct bio *bio = NULL; 583 struct iov_iter i; 584 int ret = -EINVAL; 585 586 if (!iter_is_iovec(iter)) 587 goto fail; 588 589 if (map_data) 590 copy = true; 591 else if (iov_iter_alignment(iter) & align) 592 copy = true; 593 else if (queue_virt_boundary(q)) 594 copy = queue_virt_boundary(q) & iov_iter_gap_alignment(iter); 595 596 i = *iter; 597 do { 598 if (copy) 599 ret = bio_copy_user_iov(rq, map_data, &i, gfp_mask); 600 else 601 ret = bio_map_user_iov(rq, &i, gfp_mask); 602 if (ret) 603 goto unmap_rq; 604 if (!bio) 605 bio = rq->bio; 606 } while (iov_iter_count(&i)); 607 608 return 0; 609 610 unmap_rq: 611 blk_rq_unmap_user(bio); 612 fail: 613 rq->bio = NULL; 614 return ret; 615 } 616 EXPORT_SYMBOL(blk_rq_map_user_iov); 617 618 int blk_rq_map_user(struct request_queue *q, struct request *rq, 619 struct rq_map_data *map_data, void __user *ubuf, 620 unsigned long len, gfp_t gfp_mask) 621 { 622 struct iovec iov; 623 struct iov_iter i; 624 int ret = import_single_range(rq_data_dir(rq), ubuf, len, &iov, &i); 625 626 if (unlikely(ret < 0)) 627 return ret; 628 629 return blk_rq_map_user_iov(q, rq, map_data, &i, gfp_mask); 630 } 631 EXPORT_SYMBOL(blk_rq_map_user); 632 633 /** 634 * blk_rq_unmap_user - unmap a request with user data 635 * @bio: start of bio list 636 * 637 * Description: 638 * Unmap a rq previously mapped by blk_rq_map_user(). The caller must 639 * supply the original rq->bio from the blk_rq_map_user() return, since 640 * the I/O completion may have changed rq->bio. 641 */ 642 int blk_rq_unmap_user(struct bio *bio) 643 { 644 struct bio *mapped_bio; 645 int ret = 0, ret2; 646 647 while (bio) { 648 mapped_bio = bio; 649 if (unlikely(bio_flagged(bio, BIO_BOUNCED))) 650 mapped_bio = bio->bi_private; 651 652 if (bio->bi_private) { 653 ret2 = bio_uncopy_user(mapped_bio); 654 if (ret2 && !ret) 655 ret = ret2; 656 } else { 657 bio_unmap_user(mapped_bio); 658 } 659 660 mapped_bio = bio; 661 bio = bio->bi_next; 662 bio_put(mapped_bio); 663 } 664 665 return ret; 666 } 667 EXPORT_SYMBOL(blk_rq_unmap_user); 668 669 /** 670 * blk_rq_map_kern - map kernel data to a request, for passthrough requests 671 * @q: request queue where request should be inserted 672 * @rq: request to fill 673 * @kbuf: the kernel buffer 674 * @len: length of user data 675 * @gfp_mask: memory allocation flags 676 * 677 * Description: 678 * Data will be mapped directly if possible. Otherwise a bounce 679 * buffer is used. Can be called multiple times to append multiple 680 * buffers. 681 */ 682 int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, 683 unsigned int len, gfp_t gfp_mask) 684 { 685 int reading = rq_data_dir(rq) == READ; 686 unsigned long addr = (unsigned long) kbuf; 687 struct bio *bio, *orig_bio; 688 int ret; 689 690 if (len > (queue_max_hw_sectors(q) << 9)) 691 return -EINVAL; 692 if (!len || !kbuf) 693 return -EINVAL; 694 695 if (!blk_rq_aligned(q, addr, len) || object_is_on_stack(kbuf)) 696 bio = bio_copy_kern(q, kbuf, len, gfp_mask, reading); 697 else 698 bio = bio_map_kern(q, kbuf, len, gfp_mask); 699 700 if (IS_ERR(bio)) 701 return PTR_ERR(bio); 702 703 bio->bi_opf &= ~REQ_OP_MASK; 704 bio->bi_opf |= req_op(rq); 705 706 orig_bio = bio; 707 ret = blk_rq_append_bio(rq, &bio); 708 if (unlikely(ret)) { 709 /* request is too big */ 710 bio_put(orig_bio); 711 return ret; 712 } 713 714 return 0; 715 } 716 EXPORT_SYMBOL(blk_rq_map_kern); 717