1 /* 2 * Copyright (C) 2016 Red Hat, Inc. All rights reserved. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-core.h" 8 #include "dm-rq.h" 9 10 #include <linux/elevator.h> /* for rq_end_sector() */ 11 #include <linux/blk-mq.h> 12 13 #define DM_MSG_PREFIX "core-rq" 14 15 #define DM_MQ_NR_HW_QUEUES 1 16 #define DM_MQ_QUEUE_DEPTH 2048 17 static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES; 18 static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH; 19 20 /* 21 * Request-based DM's mempools' reserved IOs set by the user. 22 */ 23 #define RESERVED_REQUEST_BASED_IOS 256 24 static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; 25 26 static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT); 27 28 bool dm_use_blk_mq_default(void) 29 { 30 return use_blk_mq; 31 } 32 33 bool dm_use_blk_mq(struct mapped_device *md) 34 { 35 return md->use_blk_mq; 36 } 37 EXPORT_SYMBOL_GPL(dm_use_blk_mq); 38 39 unsigned dm_get_reserved_rq_based_ios(void) 40 { 41 return __dm_get_module_param(&reserved_rq_based_ios, 42 RESERVED_REQUEST_BASED_IOS, DM_RESERVED_MAX_IOS); 43 } 44 EXPORT_SYMBOL_GPL(dm_get_reserved_rq_based_ios); 45 46 static unsigned dm_get_blk_mq_nr_hw_queues(void) 47 { 48 return __dm_get_module_param(&dm_mq_nr_hw_queues, 1, 32); 49 } 50 51 static unsigned dm_get_blk_mq_queue_depth(void) 52 { 53 return __dm_get_module_param(&dm_mq_queue_depth, 54 DM_MQ_QUEUE_DEPTH, BLK_MQ_MAX_DEPTH); 55 } 56 57 int dm_request_based(struct mapped_device *md) 58 { 59 return blk_queue_stackable(md->queue); 60 } 61 62 static void dm_old_start_queue(struct request_queue *q) 63 { 64 unsigned long flags; 65 66 spin_lock_irqsave(q->queue_lock, flags); 67 if (blk_queue_stopped(q)) 68 blk_start_queue(q); 69 spin_unlock_irqrestore(q->queue_lock, flags); 70 } 71 72 static void dm_mq_start_queue(struct request_queue *q) 73 { 74 blk_mq_start_stopped_hw_queues(q, true); 75 blk_mq_kick_requeue_list(q); 76 } 77 78 void dm_start_queue(struct request_queue *q) 79 { 80 if (!q->mq_ops) 81 dm_old_start_queue(q); 82 else 83 dm_mq_start_queue(q); 84 } 85 86 static void dm_old_stop_queue(struct request_queue *q) 87 { 88 unsigned long flags; 89 90 spin_lock_irqsave(q->queue_lock, flags); 91 if (!blk_queue_stopped(q)) 92 blk_stop_queue(q); 93 spin_unlock_irqrestore(q->queue_lock, flags); 94 } 95 96 static void dm_mq_stop_queue(struct request_queue *q) 97 { 98 if (blk_mq_queue_stopped(q)) 99 return; 100 101 blk_mq_quiesce_queue(q); 102 } 103 104 void dm_stop_queue(struct request_queue *q) 105 { 106 if (!q->mq_ops) 107 dm_old_stop_queue(q); 108 else 109 dm_mq_stop_queue(q); 110 } 111 112 static struct dm_rq_target_io *alloc_old_rq_tio(struct mapped_device *md, 113 gfp_t gfp_mask) 114 { 115 return mempool_alloc(md->io_pool, gfp_mask); 116 } 117 118 static void free_old_rq_tio(struct dm_rq_target_io *tio) 119 { 120 mempool_free(tio, tio->md->io_pool); 121 } 122 123 static struct request *alloc_old_clone_request(struct mapped_device *md, 124 gfp_t gfp_mask) 125 { 126 return mempool_alloc(md->rq_pool, gfp_mask); 127 } 128 129 static void free_old_clone_request(struct mapped_device *md, struct request *rq) 130 { 131 mempool_free(rq, md->rq_pool); 132 } 133 134 /* 135 * Partial completion handling for request-based dm 136 */ 137 static void end_clone_bio(struct bio *clone) 138 { 139 struct dm_rq_clone_bio_info *info = 140 container_of(clone, struct dm_rq_clone_bio_info, clone); 141 struct dm_rq_target_io *tio = info->tio; 142 struct bio *bio = info->orig; 143 unsigned int nr_bytes = info->orig->bi_iter.bi_size; 144 int error = clone->bi_error; 145 146 bio_put(clone); 147 148 if (tio->error) 149 /* 150 * An error has already been detected on the request. 151 * Once error occurred, just let clone->end_io() handle 152 * the remainder. 153 */ 154 return; 155 else if (error) { 156 /* 157 * Don't notice the error to the upper layer yet. 158 * The error handling decision is made by the target driver, 159 * when the request is completed. 160 */ 161 tio->error = error; 162 return; 163 } 164 165 /* 166 * I/O for the bio successfully completed. 167 * Notice the data completion to the upper layer. 168 */ 169 170 /* 171 * bios are processed from the head of the list. 172 * So the completing bio should always be rq->bio. 173 * If it's not, something wrong is happening. 174 */ 175 if (tio->orig->bio != bio) 176 DMERR("bio completion is going in the middle of the request"); 177 178 /* 179 * Update the original request. 180 * Do not use blk_end_request() here, because it may complete 181 * the original request before the clone, and break the ordering. 182 */ 183 blk_update_request(tio->orig, 0, nr_bytes); 184 } 185 186 static struct dm_rq_target_io *tio_from_request(struct request *rq) 187 { 188 return (rq->q->mq_ops ? blk_mq_rq_to_pdu(rq) : rq->special); 189 } 190 191 static void rq_end_stats(struct mapped_device *md, struct request *orig) 192 { 193 if (unlikely(dm_stats_used(&md->stats))) { 194 struct dm_rq_target_io *tio = tio_from_request(orig); 195 tio->duration_jiffies = jiffies - tio->duration_jiffies; 196 dm_stats_account_io(&md->stats, rq_data_dir(orig), 197 blk_rq_pos(orig), tio->n_sectors, true, 198 tio->duration_jiffies, &tio->stats_aux); 199 } 200 } 201 202 /* 203 * Don't touch any member of the md after calling this function because 204 * the md may be freed in dm_put() at the end of this function. 205 * Or do dm_get() before calling this function and dm_put() later. 206 */ 207 static void rq_completed(struct mapped_device *md, int rw, bool run_queue) 208 { 209 struct request_queue *q = md->queue; 210 unsigned long flags; 211 212 atomic_dec(&md->pending[rw]); 213 214 /* nudge anyone waiting on suspend queue */ 215 if (!md_in_flight(md)) 216 wake_up(&md->wait); 217 218 /* 219 * Run this off this callpath, as drivers could invoke end_io while 220 * inside their request_fn (and holding the queue lock). Calling 221 * back into ->request_fn() could deadlock attempting to grab the 222 * queue lock again. 223 */ 224 if (!q->mq_ops && run_queue) { 225 spin_lock_irqsave(q->queue_lock, flags); 226 blk_run_queue_async(q); 227 spin_unlock_irqrestore(q->queue_lock, flags); 228 } 229 230 /* 231 * dm_put() must be at the end of this function. See the comment above 232 */ 233 dm_put(md); 234 } 235 236 static void free_rq_clone(struct request *clone) 237 { 238 struct dm_rq_target_io *tio = clone->end_io_data; 239 struct mapped_device *md = tio->md; 240 241 blk_rq_unprep_clone(clone); 242 243 /* 244 * It is possible for a clone_old_rq() allocated clone to 245 * get passed in -- it may not yet have a request_queue. 246 * This is known to occur if the error target replaces 247 * a multipath target that has a request_fn queue stacked 248 * on blk-mq queue(s). 249 */ 250 if (clone->q && clone->q->mq_ops) 251 /* stacked on blk-mq queue(s) */ 252 tio->ti->type->release_clone_rq(clone); 253 else if (!md->queue->mq_ops) 254 /* request_fn queue stacked on request_fn queue(s) */ 255 free_old_clone_request(md, clone); 256 257 if (!md->queue->mq_ops) 258 free_old_rq_tio(tio); 259 } 260 261 /* 262 * Complete the clone and the original request. 263 * Must be called without clone's queue lock held, 264 * see end_clone_request() for more details. 265 */ 266 static void dm_end_request(struct request *clone, int error) 267 { 268 int rw = rq_data_dir(clone); 269 struct dm_rq_target_io *tio = clone->end_io_data; 270 struct mapped_device *md = tio->md; 271 struct request *rq = tio->orig; 272 273 if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { 274 rq->errors = clone->errors; 275 rq->resid_len = clone->resid_len; 276 277 if (rq->sense) 278 /* 279 * We are using the sense buffer of the original 280 * request. 281 * So setting the length of the sense data is enough. 282 */ 283 rq->sense_len = clone->sense_len; 284 } 285 286 free_rq_clone(clone); 287 rq_end_stats(md, rq); 288 if (!rq->q->mq_ops) 289 blk_end_request_all(rq, error); 290 else 291 blk_mq_end_request(rq, error); 292 rq_completed(md, rw, true); 293 } 294 295 static void dm_unprep_request(struct request *rq) 296 { 297 struct dm_rq_target_io *tio = tio_from_request(rq); 298 struct request *clone = tio->clone; 299 300 if (!rq->q->mq_ops) { 301 rq->special = NULL; 302 rq->rq_flags &= ~RQF_DONTPREP; 303 } 304 305 if (clone) 306 free_rq_clone(clone); 307 else if (!tio->md->queue->mq_ops) 308 free_old_rq_tio(tio); 309 } 310 311 /* 312 * Requeue the original request of a clone. 313 */ 314 static void dm_old_requeue_request(struct request *rq) 315 { 316 struct request_queue *q = rq->q; 317 unsigned long flags; 318 319 spin_lock_irqsave(q->queue_lock, flags); 320 blk_requeue_request(q, rq); 321 blk_run_queue_async(q); 322 spin_unlock_irqrestore(q->queue_lock, flags); 323 } 324 325 static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs) 326 { 327 blk_mq_delay_kick_requeue_list(q, msecs); 328 } 329 330 void dm_mq_kick_requeue_list(struct mapped_device *md) 331 { 332 __dm_mq_kick_requeue_list(dm_get_md_queue(md), 0); 333 } 334 EXPORT_SYMBOL(dm_mq_kick_requeue_list); 335 336 static void dm_mq_delay_requeue_request(struct request *rq, unsigned long msecs) 337 { 338 blk_mq_requeue_request(rq, false); 339 __dm_mq_kick_requeue_list(rq->q, msecs); 340 } 341 342 static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_requeue) 343 { 344 struct mapped_device *md = tio->md; 345 struct request *rq = tio->orig; 346 int rw = rq_data_dir(rq); 347 348 rq_end_stats(md, rq); 349 dm_unprep_request(rq); 350 351 if (!rq->q->mq_ops) 352 dm_old_requeue_request(rq); 353 else 354 dm_mq_delay_requeue_request(rq, delay_requeue ? 5000 : 0); 355 356 rq_completed(md, rw, false); 357 } 358 359 static void dm_done(struct request *clone, int error, bool mapped) 360 { 361 int r = error; 362 struct dm_rq_target_io *tio = clone->end_io_data; 363 dm_request_endio_fn rq_end_io = NULL; 364 365 if (tio->ti) { 366 rq_end_io = tio->ti->type->rq_end_io; 367 368 if (mapped && rq_end_io) 369 r = rq_end_io(tio->ti, clone, error, &tio->info); 370 } 371 372 if (unlikely(r == -EREMOTEIO && (req_op(clone) == REQ_OP_WRITE_SAME) && 373 !clone->q->limits.max_write_same_sectors)) 374 disable_write_same(tio->md); 375 376 if (r <= 0) 377 /* The target wants to complete the I/O */ 378 dm_end_request(clone, r); 379 else if (r == DM_ENDIO_INCOMPLETE) 380 /* The target will handle the I/O */ 381 return; 382 else if (r == DM_ENDIO_REQUEUE) 383 /* The target wants to requeue the I/O */ 384 dm_requeue_original_request(tio, false); 385 else { 386 DMWARN("unimplemented target endio return value: %d", r); 387 BUG(); 388 } 389 } 390 391 /* 392 * Request completion handler for request-based dm 393 */ 394 static void dm_softirq_done(struct request *rq) 395 { 396 bool mapped = true; 397 struct dm_rq_target_io *tio = tio_from_request(rq); 398 struct request *clone = tio->clone; 399 int rw; 400 401 if (!clone) { 402 rq_end_stats(tio->md, rq); 403 rw = rq_data_dir(rq); 404 if (!rq->q->mq_ops) { 405 blk_end_request_all(rq, tio->error); 406 rq_completed(tio->md, rw, false); 407 free_old_rq_tio(tio); 408 } else { 409 blk_mq_end_request(rq, tio->error); 410 rq_completed(tio->md, rw, false); 411 } 412 return; 413 } 414 415 if (rq->rq_flags & RQF_FAILED) 416 mapped = false; 417 418 dm_done(clone, tio->error, mapped); 419 } 420 421 /* 422 * Complete the clone and the original request with the error status 423 * through softirq context. 424 */ 425 static void dm_complete_request(struct request *rq, int error) 426 { 427 struct dm_rq_target_io *tio = tio_from_request(rq); 428 429 tio->error = error; 430 if (!rq->q->mq_ops) 431 blk_complete_request(rq); 432 else 433 blk_mq_complete_request(rq, error); 434 } 435 436 /* 437 * Complete the not-mapped clone and the original request with the error status 438 * through softirq context. 439 * Target's rq_end_io() function isn't called. 440 * This may be used when the target's map_rq() or clone_and_map_rq() functions fail. 441 */ 442 static void dm_kill_unmapped_request(struct request *rq, int error) 443 { 444 rq->rq_flags |= RQF_FAILED; 445 dm_complete_request(rq, error); 446 } 447 448 /* 449 * Called with the clone's queue lock held (in the case of .request_fn) 450 */ 451 static void end_clone_request(struct request *clone, int error) 452 { 453 struct dm_rq_target_io *tio = clone->end_io_data; 454 455 if (!clone->q->mq_ops) { 456 /* 457 * For just cleaning up the information of the queue in which 458 * the clone was dispatched. 459 * The clone is *NOT* freed actually here because it is alloced 460 * from dm own mempool (RQF_ALLOCED isn't set). 461 */ 462 __blk_put_request(clone->q, clone); 463 } 464 465 /* 466 * Actual request completion is done in a softirq context which doesn't 467 * hold the clone's queue lock. Otherwise, deadlock could occur because: 468 * - another request may be submitted by the upper level driver 469 * of the stacking during the completion 470 * - the submission which requires queue lock may be done 471 * against this clone's queue 472 */ 473 dm_complete_request(tio->orig, error); 474 } 475 476 static void dm_dispatch_clone_request(struct request *clone, struct request *rq) 477 { 478 int r; 479 480 if (blk_queue_io_stat(clone->q)) 481 clone->rq_flags |= RQF_IO_STAT; 482 483 clone->start_time = jiffies; 484 r = blk_insert_cloned_request(clone->q, clone); 485 if (r) 486 /* must complete clone in terms of original request */ 487 dm_complete_request(rq, r); 488 } 489 490 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig, 491 void *data) 492 { 493 struct dm_rq_target_io *tio = data; 494 struct dm_rq_clone_bio_info *info = 495 container_of(bio, struct dm_rq_clone_bio_info, clone); 496 497 info->orig = bio_orig; 498 info->tio = tio; 499 bio->bi_end_io = end_clone_bio; 500 501 return 0; 502 } 503 504 static int setup_clone(struct request *clone, struct request *rq, 505 struct dm_rq_target_io *tio, gfp_t gfp_mask) 506 { 507 int r; 508 509 r = blk_rq_prep_clone(clone, rq, tio->md->bs, gfp_mask, 510 dm_rq_bio_constructor, tio); 511 if (r) 512 return r; 513 514 clone->cmd = rq->cmd; 515 clone->cmd_len = rq->cmd_len; 516 clone->sense = rq->sense; 517 clone->end_io = end_clone_request; 518 clone->end_io_data = tio; 519 520 tio->clone = clone; 521 522 return 0; 523 } 524 525 static struct request *clone_old_rq(struct request *rq, struct mapped_device *md, 526 struct dm_rq_target_io *tio, gfp_t gfp_mask) 527 { 528 /* 529 * Create clone for use with .request_fn request_queue 530 */ 531 struct request *clone; 532 533 clone = alloc_old_clone_request(md, gfp_mask); 534 if (!clone) 535 return NULL; 536 537 blk_rq_init(NULL, clone); 538 if (setup_clone(clone, rq, tio, gfp_mask)) { 539 /* -ENOMEM */ 540 free_old_clone_request(md, clone); 541 return NULL; 542 } 543 544 return clone; 545 } 546 547 static void map_tio_request(struct kthread_work *work); 548 549 static void init_tio(struct dm_rq_target_io *tio, struct request *rq, 550 struct mapped_device *md) 551 { 552 tio->md = md; 553 tio->ti = NULL; 554 tio->clone = NULL; 555 tio->orig = rq; 556 tio->error = 0; 557 /* 558 * Avoid initializing info for blk-mq; it passes 559 * target-specific data through info.ptr 560 * (see: dm_mq_init_request) 561 */ 562 if (!md->init_tio_pdu) 563 memset(&tio->info, 0, sizeof(tio->info)); 564 if (md->kworker_task) 565 kthread_init_work(&tio->work, map_tio_request); 566 } 567 568 static struct dm_rq_target_io *dm_old_prep_tio(struct request *rq, 569 struct mapped_device *md, 570 gfp_t gfp_mask) 571 { 572 struct dm_rq_target_io *tio; 573 int srcu_idx; 574 struct dm_table *table; 575 576 tio = alloc_old_rq_tio(md, gfp_mask); 577 if (!tio) 578 return NULL; 579 580 init_tio(tio, rq, md); 581 582 table = dm_get_live_table(md, &srcu_idx); 583 /* 584 * Must clone a request if this .request_fn DM device 585 * is stacked on .request_fn device(s). 586 */ 587 if (!dm_table_all_blk_mq_devices(table)) { 588 if (!clone_old_rq(rq, md, tio, gfp_mask)) { 589 dm_put_live_table(md, srcu_idx); 590 free_old_rq_tio(tio); 591 return NULL; 592 } 593 } 594 dm_put_live_table(md, srcu_idx); 595 596 return tio; 597 } 598 599 /* 600 * Called with the queue lock held. 601 */ 602 static int dm_old_prep_fn(struct request_queue *q, struct request *rq) 603 { 604 struct mapped_device *md = q->queuedata; 605 struct dm_rq_target_io *tio; 606 607 if (unlikely(rq->special)) { 608 DMWARN("Already has something in rq->special."); 609 return BLKPREP_KILL; 610 } 611 612 tio = dm_old_prep_tio(rq, md, GFP_ATOMIC); 613 if (!tio) 614 return BLKPREP_DEFER; 615 616 rq->special = tio; 617 rq->rq_flags |= RQF_DONTPREP; 618 619 return BLKPREP_OK; 620 } 621 622 /* 623 * Returns: 624 * DM_MAPIO_* : the request has been processed as indicated 625 * DM_MAPIO_REQUEUE : the original request needs to be immediately requeued 626 * < 0 : the request was completed due to failure 627 */ 628 static int map_request(struct dm_rq_target_io *tio) 629 { 630 int r; 631 struct dm_target *ti = tio->ti; 632 struct mapped_device *md = tio->md; 633 struct request *rq = tio->orig; 634 struct request *clone = NULL; 635 636 if (tio->clone) { 637 clone = tio->clone; 638 r = ti->type->map_rq(ti, clone, &tio->info); 639 if (r == DM_MAPIO_DELAY_REQUEUE) 640 return DM_MAPIO_REQUEUE; /* .request_fn requeue is always immediate */ 641 } else { 642 r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone); 643 if (r < 0) { 644 /* The target wants to complete the I/O */ 645 dm_kill_unmapped_request(rq, r); 646 return r; 647 } 648 if (r == DM_MAPIO_REMAPPED && 649 setup_clone(clone, rq, tio, GFP_ATOMIC)) { 650 /* -ENOMEM */ 651 ti->type->release_clone_rq(clone); 652 return DM_MAPIO_REQUEUE; 653 } 654 } 655 656 switch (r) { 657 case DM_MAPIO_SUBMITTED: 658 /* The target has taken the I/O to submit by itself later */ 659 break; 660 case DM_MAPIO_REMAPPED: 661 /* The target has remapped the I/O so dispatch it */ 662 trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)), 663 blk_rq_pos(rq)); 664 dm_dispatch_clone_request(clone, rq); 665 break; 666 case DM_MAPIO_REQUEUE: 667 /* The target wants to requeue the I/O */ 668 break; 669 case DM_MAPIO_DELAY_REQUEUE: 670 /* The target wants to requeue the I/O after a delay */ 671 dm_requeue_original_request(tio, true); 672 break; 673 default: 674 if (r > 0) { 675 DMWARN("unimplemented target map return value: %d", r); 676 BUG(); 677 } 678 679 /* The target wants to complete the I/O */ 680 dm_kill_unmapped_request(rq, r); 681 } 682 683 return r; 684 } 685 686 static void dm_start_request(struct mapped_device *md, struct request *orig) 687 { 688 if (!orig->q->mq_ops) 689 blk_start_request(orig); 690 else 691 blk_mq_start_request(orig); 692 atomic_inc(&md->pending[rq_data_dir(orig)]); 693 694 if (md->seq_rq_merge_deadline_usecs) { 695 md->last_rq_pos = rq_end_sector(orig); 696 md->last_rq_rw = rq_data_dir(orig); 697 md->last_rq_start_time = ktime_get(); 698 } 699 700 if (unlikely(dm_stats_used(&md->stats))) { 701 struct dm_rq_target_io *tio = tio_from_request(orig); 702 tio->duration_jiffies = jiffies; 703 tio->n_sectors = blk_rq_sectors(orig); 704 dm_stats_account_io(&md->stats, rq_data_dir(orig), 705 blk_rq_pos(orig), tio->n_sectors, false, 0, 706 &tio->stats_aux); 707 } 708 709 /* 710 * Hold the md reference here for the in-flight I/O. 711 * We can't rely on the reference count by device opener, 712 * because the device may be closed during the request completion 713 * when all bios are completed. 714 * See the comment in rq_completed() too. 715 */ 716 dm_get(md); 717 } 718 719 static void map_tio_request(struct kthread_work *work) 720 { 721 struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); 722 723 if (map_request(tio) == DM_MAPIO_REQUEUE) 724 dm_requeue_original_request(tio, false); 725 } 726 727 ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf) 728 { 729 return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs); 730 } 731 732 #define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000 733 734 ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, 735 const char *buf, size_t count) 736 { 737 unsigned deadline; 738 739 if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED) 740 return count; 741 742 if (kstrtouint(buf, 10, &deadline)) 743 return -EINVAL; 744 745 if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS) 746 deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS; 747 748 md->seq_rq_merge_deadline_usecs = deadline; 749 750 return count; 751 } 752 753 static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md) 754 { 755 ktime_t kt_deadline; 756 757 if (!md->seq_rq_merge_deadline_usecs) 758 return false; 759 760 kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC); 761 kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline); 762 763 return !ktime_after(ktime_get(), kt_deadline); 764 } 765 766 /* 767 * q->request_fn for old request-based dm. 768 * Called with the queue lock held. 769 */ 770 static void dm_old_request_fn(struct request_queue *q) 771 { 772 struct mapped_device *md = q->queuedata; 773 struct dm_target *ti = md->immutable_target; 774 struct request *rq; 775 struct dm_rq_target_io *tio; 776 sector_t pos = 0; 777 778 if (unlikely(!ti)) { 779 int srcu_idx; 780 struct dm_table *map = dm_get_live_table(md, &srcu_idx); 781 782 ti = dm_table_find_target(map, pos); 783 dm_put_live_table(md, srcu_idx); 784 } 785 786 /* 787 * For suspend, check blk_queue_stopped() and increment 788 * ->pending within a single queue_lock not to increment the 789 * number of in-flight I/Os after the queue is stopped in 790 * dm_suspend(). 791 */ 792 while (!blk_queue_stopped(q)) { 793 rq = blk_peek_request(q); 794 if (!rq) 795 return; 796 797 /* always use block 0 to find the target for flushes for now */ 798 pos = 0; 799 if (req_op(rq) != REQ_OP_FLUSH) 800 pos = blk_rq_pos(rq); 801 802 if ((dm_old_request_peeked_before_merge_deadline(md) && 803 md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) && 804 md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) || 805 (ti->type->busy && ti->type->busy(ti))) { 806 blk_delay_queue(q, 10); 807 return; 808 } 809 810 dm_start_request(md, rq); 811 812 tio = tio_from_request(rq); 813 /* Establish tio->ti before queuing work (map_tio_request) */ 814 tio->ti = ti; 815 kthread_queue_work(&md->kworker, &tio->work); 816 BUG_ON(!irqs_disabled()); 817 } 818 } 819 820 /* 821 * Fully initialize a .request_fn request-based queue. 822 */ 823 int dm_old_init_request_queue(struct mapped_device *md) 824 { 825 /* Fully initialize the queue */ 826 if (!blk_init_allocated_queue(md->queue, dm_old_request_fn, NULL)) 827 return -EINVAL; 828 829 /* disable dm_old_request_fn's merge heuristic by default */ 830 md->seq_rq_merge_deadline_usecs = 0; 831 832 dm_init_normal_md_queue(md); 833 blk_queue_softirq_done(md->queue, dm_softirq_done); 834 blk_queue_prep_rq(md->queue, dm_old_prep_fn); 835 836 /* Initialize the request-based DM worker thread */ 837 kthread_init_worker(&md->kworker); 838 md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, 839 "kdmwork-%s", dm_device_name(md)); 840 if (IS_ERR(md->kworker_task)) { 841 int error = PTR_ERR(md->kworker_task); 842 md->kworker_task = NULL; 843 return error; 844 } 845 846 elv_register_queue(md->queue); 847 848 return 0; 849 } 850 851 static int dm_mq_init_request(void *data, struct request *rq, 852 unsigned int hctx_idx, unsigned int request_idx, 853 unsigned int numa_node) 854 { 855 struct mapped_device *md = data; 856 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); 857 858 /* 859 * Must initialize md member of tio, otherwise it won't 860 * be available in dm_mq_queue_rq. 861 */ 862 tio->md = md; 863 864 if (md->init_tio_pdu) { 865 /* target-specific per-io data is immediately after the tio */ 866 tio->info.ptr = tio + 1; 867 } 868 869 return 0; 870 } 871 872 static int dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, 873 const struct blk_mq_queue_data *bd) 874 { 875 struct request *rq = bd->rq; 876 struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); 877 struct mapped_device *md = tio->md; 878 struct dm_target *ti = md->immutable_target; 879 880 if (unlikely(!ti)) { 881 int srcu_idx; 882 struct dm_table *map = dm_get_live_table(md, &srcu_idx); 883 884 ti = dm_table_find_target(map, 0); 885 dm_put_live_table(md, srcu_idx); 886 } 887 888 if (ti->type->busy && ti->type->busy(ti)) 889 return BLK_MQ_RQ_QUEUE_BUSY; 890 891 dm_start_request(md, rq); 892 893 /* Init tio using md established in .init_request */ 894 init_tio(tio, rq, md); 895 896 /* 897 * Establish tio->ti before calling map_request(). 898 */ 899 tio->ti = ti; 900 901 /* Direct call is fine since .queue_rq allows allocations */ 902 if (map_request(tio) == DM_MAPIO_REQUEUE) { 903 /* Undo dm_start_request() before requeuing */ 904 rq_end_stats(md, rq); 905 rq_completed(md, rq_data_dir(rq), false); 906 return BLK_MQ_RQ_QUEUE_BUSY; 907 } 908 909 return BLK_MQ_RQ_QUEUE_OK; 910 } 911 912 static struct blk_mq_ops dm_mq_ops = { 913 .queue_rq = dm_mq_queue_rq, 914 .complete = dm_softirq_done, 915 .init_request = dm_mq_init_request, 916 }; 917 918 int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) 919 { 920 struct request_queue *q; 921 struct dm_target *immutable_tgt; 922 int err; 923 924 if (!dm_table_all_blk_mq_devices(t)) { 925 DMERR("request-based dm-mq may only be stacked on blk-mq device(s)"); 926 return -EINVAL; 927 } 928 929 md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id); 930 if (!md->tag_set) 931 return -ENOMEM; 932 933 md->tag_set->ops = &dm_mq_ops; 934 md->tag_set->queue_depth = dm_get_blk_mq_queue_depth(); 935 md->tag_set->numa_node = md->numa_node_id; 936 md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_SG_MERGE; 937 md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues(); 938 md->tag_set->driver_data = md; 939 940 md->tag_set->cmd_size = sizeof(struct dm_rq_target_io); 941 immutable_tgt = dm_table_get_immutable_target(t); 942 if (immutable_tgt && immutable_tgt->per_io_data_size) { 943 /* any target-specific per-io data is immediately after the tio */ 944 md->tag_set->cmd_size += immutable_tgt->per_io_data_size; 945 md->init_tio_pdu = true; 946 } 947 948 err = blk_mq_alloc_tag_set(md->tag_set); 949 if (err) 950 goto out_kfree_tag_set; 951 952 q = blk_mq_init_allocated_queue(md->tag_set, md->queue); 953 if (IS_ERR(q)) { 954 err = PTR_ERR(q); 955 goto out_tag_set; 956 } 957 dm_init_md_queue(md); 958 959 /* backfill 'mq' sysfs registration normally done in blk_register_queue */ 960 blk_mq_register_dev(disk_to_dev(md->disk), q); 961 962 return 0; 963 964 out_tag_set: 965 blk_mq_free_tag_set(md->tag_set); 966 out_kfree_tag_set: 967 kfree(md->tag_set); 968 969 return err; 970 } 971 972 void dm_mq_cleanup_mapped_device(struct mapped_device *md) 973 { 974 if (md->tag_set) { 975 blk_mq_free_tag_set(md->tag_set); 976 kfree(md->tag_set); 977 } 978 } 979 980 module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR); 981 MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); 982 983 module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR); 984 MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices"); 985 986 module_param(dm_mq_nr_hw_queues, uint, S_IRUGO | S_IWUSR); 987 MODULE_PARM_DESC(dm_mq_nr_hw_queues, "Number of hardware queues for request-based dm-mq devices"); 988 989 module_param(dm_mq_queue_depth, uint, S_IRUGO | S_IWUSR); 990 MODULE_PARM_DESC(dm_mq_queue_depth, "Queue depth for request-based dm-mq devices"); 991