1 /* 2 * Copyright (C) 2003 Sistina Software Limited. 3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include <linux/device-mapper.h> 9 10 #include "dm.h" 11 #include "dm-path-selector.h" 12 #include "dm-uevent.h" 13 14 #include <linux/blkdev.h> 15 #include <linux/ctype.h> 16 #include <linux/init.h> 17 #include <linux/mempool.h> 18 #include <linux/module.h> 19 #include <linux/pagemap.h> 20 #include <linux/slab.h> 21 #include <linux/time.h> 22 #include <linux/workqueue.h> 23 #include <linux/delay.h> 24 #include <scsi/scsi_dh.h> 25 #include <linux/atomic.h> 26 27 #define DM_MSG_PREFIX "multipath" 28 #define DM_PG_INIT_DELAY_MSECS 2000 29 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) 30 31 /* Path properties */ 32 struct pgpath { 33 struct list_head list; 34 35 struct priority_group *pg; /* Owning PG */ 36 unsigned is_active; /* Path status */ 37 unsigned fail_count; /* Cumulative failure count */ 38 39 struct dm_path path; 40 struct delayed_work activate_path; 41 }; 42 43 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 44 45 /* 46 * Paths are grouped into Priority Groups and numbered from 1 upwards. 47 * Each has a path selector which controls which path gets used. 48 */ 49 struct priority_group { 50 struct list_head list; 51 52 struct multipath *m; /* Owning multipath instance */ 53 struct path_selector ps; 54 55 unsigned pg_num; /* Reference number */ 56 unsigned bypassed; /* Temporarily bypass this PG? */ 57 58 unsigned nr_pgpaths; /* Number of paths in PG */ 59 struct list_head pgpaths; 60 }; 61 62 /* Multipath context */ 63 struct multipath { 64 struct list_head list; 65 struct dm_target *ti; 66 67 const char *hw_handler_name; 68 char *hw_handler_params; 69 70 spinlock_t lock; 71 72 unsigned nr_priority_groups; 73 struct list_head priority_groups; 74 75 wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ 76 77 unsigned pg_init_required; /* pg_init needs calling? */ 78 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ 79 unsigned pg_init_delay_retry; /* Delay pg_init retry? */ 80 81 unsigned nr_valid_paths; /* Total number of usable paths */ 82 struct pgpath *current_pgpath; 83 struct priority_group *current_pg; 84 struct priority_group *next_pg; /* Switch to this PG if set */ 85 unsigned repeat_count; /* I/Os left before calling PS again */ 86 87 unsigned queue_io:1; /* Must we queue all I/O? */ 88 unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ 89 unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ 90 unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ 91 unsigned pg_init_disabled:1; /* pg_init is not currently allowed */ 92 93 unsigned pg_init_retries; /* Number of times to retry pg_init */ 94 unsigned pg_init_count; /* Number of times pg_init called */ 95 unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ 96 97 struct work_struct trigger_event; 98 99 /* 100 * We must use a mempool of dm_mpath_io structs so that we 101 * can resubmit bios on error. 102 */ 103 mempool_t *mpio_pool; 104 105 struct mutex work_mutex; 106 }; 107 108 /* 109 * Context information attached to each bio we process. 110 */ 111 struct dm_mpath_io { 112 struct pgpath *pgpath; 113 size_t nr_bytes; 114 }; 115 116 typedef int (*action_fn) (struct pgpath *pgpath); 117 118 static struct kmem_cache *_mpio_cache; 119 120 static struct workqueue_struct *kmultipathd, *kmpath_handlerd; 121 static void trigger_event(struct work_struct *work); 122 static void activate_path(struct work_struct *work); 123 static int __pgpath_busy(struct pgpath *pgpath); 124 125 126 /*----------------------------------------------- 127 * Allocation routines 128 *-----------------------------------------------*/ 129 130 static struct pgpath *alloc_pgpath(void) 131 { 132 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); 133 134 if (pgpath) { 135 pgpath->is_active = 1; 136 INIT_DELAYED_WORK(&pgpath->activate_path, activate_path); 137 } 138 139 return pgpath; 140 } 141 142 static void free_pgpath(struct pgpath *pgpath) 143 { 144 kfree(pgpath); 145 } 146 147 static struct priority_group *alloc_priority_group(void) 148 { 149 struct priority_group *pg; 150 151 pg = kzalloc(sizeof(*pg), GFP_KERNEL); 152 153 if (pg) 154 INIT_LIST_HEAD(&pg->pgpaths); 155 156 return pg; 157 } 158 159 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 160 { 161 struct pgpath *pgpath, *tmp; 162 struct multipath *m = ti->private; 163 164 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { 165 list_del(&pgpath->list); 166 if (m->hw_handler_name) 167 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); 168 dm_put_device(ti, pgpath->path.dev); 169 free_pgpath(pgpath); 170 } 171 } 172 173 static void free_priority_group(struct priority_group *pg, 174 struct dm_target *ti) 175 { 176 struct path_selector *ps = &pg->ps; 177 178 if (ps->type) { 179 ps->type->destroy(ps); 180 dm_put_path_selector(ps->type); 181 } 182 183 free_pgpaths(&pg->pgpaths, ti); 184 kfree(pg); 185 } 186 187 static struct multipath *alloc_multipath(struct dm_target *ti) 188 { 189 struct multipath *m; 190 unsigned min_ios = dm_get_reserved_rq_based_ios(); 191 192 m = kzalloc(sizeof(*m), GFP_KERNEL); 193 if (m) { 194 INIT_LIST_HEAD(&m->priority_groups); 195 spin_lock_init(&m->lock); 196 m->queue_io = 1; 197 m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; 198 INIT_WORK(&m->trigger_event, trigger_event); 199 init_waitqueue_head(&m->pg_init_wait); 200 mutex_init(&m->work_mutex); 201 m->mpio_pool = mempool_create_slab_pool(min_ios, _mpio_cache); 202 if (!m->mpio_pool) { 203 kfree(m); 204 return NULL; 205 } 206 m->ti = ti; 207 ti->private = m; 208 } 209 210 return m; 211 } 212 213 static void free_multipath(struct multipath *m) 214 { 215 struct priority_group *pg, *tmp; 216 217 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { 218 list_del(&pg->list); 219 free_priority_group(pg, m->ti); 220 } 221 222 kfree(m->hw_handler_name); 223 kfree(m->hw_handler_params); 224 mempool_destroy(m->mpio_pool); 225 kfree(m); 226 } 227 228 static int set_mapinfo(struct multipath *m, union map_info *info) 229 { 230 struct dm_mpath_io *mpio; 231 232 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); 233 if (!mpio) 234 return -ENOMEM; 235 236 memset(mpio, 0, sizeof(*mpio)); 237 info->ptr = mpio; 238 239 return 0; 240 } 241 242 static void clear_mapinfo(struct multipath *m, union map_info *info) 243 { 244 struct dm_mpath_io *mpio = info->ptr; 245 246 info->ptr = NULL; 247 mempool_free(mpio, m->mpio_pool); 248 } 249 250 /*----------------------------------------------- 251 * Path selection 252 *-----------------------------------------------*/ 253 254 static int __pg_init_all_paths(struct multipath *m) 255 { 256 struct pgpath *pgpath; 257 unsigned long pg_init_delay = 0; 258 259 if (m->pg_init_in_progress || m->pg_init_disabled) 260 return 0; 261 262 m->pg_init_count++; 263 m->pg_init_required = 0; 264 265 /* Check here to reset pg_init_required */ 266 if (!m->current_pg) 267 return 0; 268 269 if (m->pg_init_delay_retry) 270 pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ? 271 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS); 272 list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { 273 /* Skip failed paths */ 274 if (!pgpath->is_active) 275 continue; 276 if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path, 277 pg_init_delay)) 278 m->pg_init_in_progress++; 279 } 280 return m->pg_init_in_progress; 281 } 282 283 static void __switch_pg(struct multipath *m, struct pgpath *pgpath) 284 { 285 m->current_pg = pgpath->pg; 286 287 /* Must we initialise the PG first, and queue I/O till it's ready? */ 288 if (m->hw_handler_name) { 289 m->pg_init_required = 1; 290 m->queue_io = 1; 291 } else { 292 m->pg_init_required = 0; 293 m->queue_io = 0; 294 } 295 296 m->pg_init_count = 0; 297 } 298 299 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg, 300 size_t nr_bytes) 301 { 302 struct dm_path *path; 303 304 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes); 305 if (!path) 306 return -ENXIO; 307 308 m->current_pgpath = path_to_pgpath(path); 309 310 if (m->current_pg != pg) 311 __switch_pg(m, m->current_pgpath); 312 313 return 0; 314 } 315 316 static void __choose_pgpath(struct multipath *m, size_t nr_bytes) 317 { 318 struct priority_group *pg; 319 unsigned bypassed = 1; 320 321 if (!m->nr_valid_paths) { 322 m->queue_io = 0; 323 goto failed; 324 } 325 326 /* Were we instructed to switch PG? */ 327 if (m->next_pg) { 328 pg = m->next_pg; 329 m->next_pg = NULL; 330 if (!__choose_path_in_pg(m, pg, nr_bytes)) 331 return; 332 } 333 334 /* Don't change PG until it has no remaining paths */ 335 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes)) 336 return; 337 338 /* 339 * Loop through priority groups until we find a valid path. 340 * First time we skip PGs marked 'bypassed'. 341 * Second time we only try the ones we skipped, but set 342 * pg_init_delay_retry so we do not hammer controllers. 343 */ 344 do { 345 list_for_each_entry(pg, &m->priority_groups, list) { 346 if (pg->bypassed == bypassed) 347 continue; 348 if (!__choose_path_in_pg(m, pg, nr_bytes)) { 349 if (!bypassed) 350 m->pg_init_delay_retry = 1; 351 return; 352 } 353 } 354 } while (bypassed--); 355 356 failed: 357 m->current_pgpath = NULL; 358 m->current_pg = NULL; 359 } 360 361 /* 362 * Check whether bios must be queued in the device-mapper core rather 363 * than here in the target. 364 * 365 * m->lock must be held on entry. 366 * 367 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the 368 * same value then we are not between multipath_presuspend() 369 * and multipath_resume() calls and we have no need to check 370 * for the DMF_NOFLUSH_SUSPENDING flag. 371 */ 372 static int __must_push_back(struct multipath *m) 373 { 374 return (m->queue_if_no_path || 375 (m->queue_if_no_path != m->saved_queue_if_no_path && 376 dm_noflush_suspending(m->ti))); 377 } 378 379 /* 380 * Map cloned requests 381 */ 382 static int __multipath_map(struct dm_target *ti, struct request *clone, 383 union map_info *map_context, 384 struct request *rq, struct request **__clone) 385 { 386 struct multipath *m = (struct multipath *) ti->private; 387 int r = DM_MAPIO_REQUEUE; 388 size_t nr_bytes = clone ? blk_rq_bytes(clone) : blk_rq_bytes(rq); 389 struct pgpath *pgpath; 390 struct block_device *bdev; 391 struct dm_mpath_io *mpio; 392 393 spin_lock_irq(&m->lock); 394 395 /* Do we need to select a new pgpath? */ 396 if (!m->current_pgpath || 397 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) 398 __choose_pgpath(m, nr_bytes); 399 400 pgpath = m->current_pgpath; 401 402 if (!pgpath) { 403 if (!__must_push_back(m)) 404 r = -EIO; /* Failed */ 405 goto out_unlock; 406 } else if (m->queue_io || m->pg_init_required) { 407 __pg_init_all_paths(m); 408 goto out_unlock; 409 } 410 411 if (set_mapinfo(m, map_context) < 0) 412 /* ENOMEM, requeue */ 413 goto out_unlock; 414 415 mpio = map_context->ptr; 416 mpio->pgpath = pgpath; 417 mpio->nr_bytes = nr_bytes; 418 419 bdev = pgpath->path.dev->bdev; 420 421 spin_unlock_irq(&m->lock); 422 423 if (clone) { 424 /* Old request-based interface: allocated clone is passed in */ 425 clone->q = bdev_get_queue(bdev); 426 clone->rq_disk = bdev->bd_disk; 427 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; 428 } else { 429 /* blk-mq request-based interface */ 430 *__clone = blk_get_request(bdev_get_queue(bdev), 431 rq_data_dir(rq), GFP_ATOMIC); 432 if (IS_ERR(*__clone)) { 433 /* ENOMEM, requeue */ 434 clear_mapinfo(m, map_context); 435 return r; 436 } 437 (*__clone)->bio = (*__clone)->biotail = NULL; 438 (*__clone)->rq_disk = bdev->bd_disk; 439 (*__clone)->cmd_flags |= REQ_FAILFAST_TRANSPORT; 440 } 441 442 if (pgpath->pg->ps.type->start_io) 443 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, 444 &pgpath->path, 445 nr_bytes); 446 return DM_MAPIO_REMAPPED; 447 448 out_unlock: 449 spin_unlock_irq(&m->lock); 450 451 return r; 452 } 453 454 static int multipath_map(struct dm_target *ti, struct request *clone, 455 union map_info *map_context) 456 { 457 return __multipath_map(ti, clone, map_context, NULL, NULL); 458 } 459 460 static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, 461 union map_info *map_context, 462 struct request **clone) 463 { 464 return __multipath_map(ti, NULL, map_context, rq, clone); 465 } 466 467 static void multipath_release_clone(struct request *clone) 468 { 469 blk_put_request(clone); 470 } 471 472 /* 473 * If we run out of usable paths, should we queue I/O or error it? 474 */ 475 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, 476 unsigned save_old_value) 477 { 478 unsigned long flags; 479 480 spin_lock_irqsave(&m->lock, flags); 481 482 if (save_old_value) 483 m->saved_queue_if_no_path = m->queue_if_no_path; 484 else 485 m->saved_queue_if_no_path = queue_if_no_path; 486 m->queue_if_no_path = queue_if_no_path; 487 spin_unlock_irqrestore(&m->lock, flags); 488 489 if (!queue_if_no_path) 490 dm_table_run_md_queue_async(m->ti->table); 491 492 return 0; 493 } 494 495 /* 496 * An event is triggered whenever a path is taken out of use. 497 * Includes path failure and PG bypass. 498 */ 499 static void trigger_event(struct work_struct *work) 500 { 501 struct multipath *m = 502 container_of(work, struct multipath, trigger_event); 503 504 dm_table_event(m->ti->table); 505 } 506 507 /*----------------------------------------------------------------- 508 * Constructor/argument parsing: 509 * <#multipath feature args> [<arg>]* 510 * <#hw_handler args> [hw_handler [<arg>]*] 511 * <#priority groups> 512 * <initial priority group> 513 * [<selector> <#selector args> [<arg>]* 514 * <#paths> <#per-path selector args> 515 * [<path> [<arg>]* ]+ ]+ 516 *---------------------------------------------------------------*/ 517 static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, 518 struct dm_target *ti) 519 { 520 int r; 521 struct path_selector_type *pst; 522 unsigned ps_argc; 523 524 static struct dm_arg _args[] = { 525 {0, 1024, "invalid number of path selector args"}, 526 }; 527 528 pst = dm_get_path_selector(dm_shift_arg(as)); 529 if (!pst) { 530 ti->error = "unknown path selector type"; 531 return -EINVAL; 532 } 533 534 r = dm_read_arg_group(_args, as, &ps_argc, &ti->error); 535 if (r) { 536 dm_put_path_selector(pst); 537 return -EINVAL; 538 } 539 540 r = pst->create(&pg->ps, ps_argc, as->argv); 541 if (r) { 542 dm_put_path_selector(pst); 543 ti->error = "path selector constructor failed"; 544 return r; 545 } 546 547 pg->ps.type = pst; 548 dm_consume_args(as, ps_argc); 549 550 return 0; 551 } 552 553 static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, 554 struct dm_target *ti) 555 { 556 int r; 557 struct pgpath *p; 558 struct multipath *m = ti->private; 559 struct request_queue *q = NULL; 560 const char *attached_handler_name; 561 562 /* we need at least a path arg */ 563 if (as->argc < 1) { 564 ti->error = "no device given"; 565 return ERR_PTR(-EINVAL); 566 } 567 568 p = alloc_pgpath(); 569 if (!p) 570 return ERR_PTR(-ENOMEM); 571 572 r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), 573 &p->path.dev); 574 if (r) { 575 ti->error = "error getting device"; 576 goto bad; 577 } 578 579 if (m->retain_attached_hw_handler || m->hw_handler_name) 580 q = bdev_get_queue(p->path.dev->bdev); 581 582 if (m->retain_attached_hw_handler) { 583 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); 584 if (attached_handler_name) { 585 /* 586 * Reset hw_handler_name to match the attached handler 587 * and clear any hw_handler_params associated with the 588 * ignored handler. 589 * 590 * NB. This modifies the table line to show the actual 591 * handler instead of the original table passed in. 592 */ 593 kfree(m->hw_handler_name); 594 m->hw_handler_name = attached_handler_name; 595 596 kfree(m->hw_handler_params); 597 m->hw_handler_params = NULL; 598 } 599 } 600 601 if (m->hw_handler_name) { 602 /* 603 * Increments scsi_dh reference, even when using an 604 * already-attached handler. 605 */ 606 r = scsi_dh_attach(q, m->hw_handler_name); 607 if (r == -EBUSY) { 608 /* 609 * Already attached to different hw_handler: 610 * try to reattach with correct one. 611 */ 612 scsi_dh_detach(q); 613 r = scsi_dh_attach(q, m->hw_handler_name); 614 } 615 616 if (r < 0) { 617 ti->error = "error attaching hardware handler"; 618 dm_put_device(ti, p->path.dev); 619 goto bad; 620 } 621 622 if (m->hw_handler_params) { 623 r = scsi_dh_set_params(q, m->hw_handler_params); 624 if (r < 0) { 625 ti->error = "unable to set hardware " 626 "handler parameters"; 627 scsi_dh_detach(q); 628 dm_put_device(ti, p->path.dev); 629 goto bad; 630 } 631 } 632 } 633 634 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); 635 if (r) { 636 dm_put_device(ti, p->path.dev); 637 goto bad; 638 } 639 640 return p; 641 642 bad: 643 free_pgpath(p); 644 return ERR_PTR(r); 645 } 646 647 static struct priority_group *parse_priority_group(struct dm_arg_set *as, 648 struct multipath *m) 649 { 650 static struct dm_arg _args[] = { 651 {1, 1024, "invalid number of paths"}, 652 {0, 1024, "invalid number of selector args"} 653 }; 654 655 int r; 656 unsigned i, nr_selector_args, nr_args; 657 struct priority_group *pg; 658 struct dm_target *ti = m->ti; 659 660 if (as->argc < 2) { 661 as->argc = 0; 662 ti->error = "not enough priority group arguments"; 663 return ERR_PTR(-EINVAL); 664 } 665 666 pg = alloc_priority_group(); 667 if (!pg) { 668 ti->error = "couldn't allocate priority group"; 669 return ERR_PTR(-ENOMEM); 670 } 671 pg->m = m; 672 673 r = parse_path_selector(as, pg, ti); 674 if (r) 675 goto bad; 676 677 /* 678 * read the paths 679 */ 680 r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error); 681 if (r) 682 goto bad; 683 684 r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error); 685 if (r) 686 goto bad; 687 688 nr_args = 1 + nr_selector_args; 689 for (i = 0; i < pg->nr_pgpaths; i++) { 690 struct pgpath *pgpath; 691 struct dm_arg_set path_args; 692 693 if (as->argc < nr_args) { 694 ti->error = "not enough path parameters"; 695 r = -EINVAL; 696 goto bad; 697 } 698 699 path_args.argc = nr_args; 700 path_args.argv = as->argv; 701 702 pgpath = parse_path(&path_args, &pg->ps, ti); 703 if (IS_ERR(pgpath)) { 704 r = PTR_ERR(pgpath); 705 goto bad; 706 } 707 708 pgpath->pg = pg; 709 list_add_tail(&pgpath->list, &pg->pgpaths); 710 dm_consume_args(as, nr_args); 711 } 712 713 return pg; 714 715 bad: 716 free_priority_group(pg, ti); 717 return ERR_PTR(r); 718 } 719 720 static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) 721 { 722 unsigned hw_argc; 723 int ret; 724 struct dm_target *ti = m->ti; 725 726 static struct dm_arg _args[] = { 727 {0, 1024, "invalid number of hardware handler args"}, 728 }; 729 730 if (dm_read_arg_group(_args, as, &hw_argc, &ti->error)) 731 return -EINVAL; 732 733 if (!hw_argc) 734 return 0; 735 736 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); 737 if (!try_then_request_module(scsi_dh_handler_exist(m->hw_handler_name), 738 "scsi_dh_%s", m->hw_handler_name)) { 739 ti->error = "unknown hardware handler type"; 740 ret = -EINVAL; 741 goto fail; 742 } 743 744 if (hw_argc > 1) { 745 char *p; 746 int i, j, len = 4; 747 748 for (i = 0; i <= hw_argc - 2; i++) 749 len += strlen(as->argv[i]) + 1; 750 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL); 751 if (!p) { 752 ti->error = "memory allocation failed"; 753 ret = -ENOMEM; 754 goto fail; 755 } 756 j = sprintf(p, "%d", hw_argc - 1); 757 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) 758 j = sprintf(p, "%s", as->argv[i]); 759 } 760 dm_consume_args(as, hw_argc - 1); 761 762 return 0; 763 fail: 764 kfree(m->hw_handler_name); 765 m->hw_handler_name = NULL; 766 return ret; 767 } 768 769 static int parse_features(struct dm_arg_set *as, struct multipath *m) 770 { 771 int r; 772 unsigned argc; 773 struct dm_target *ti = m->ti; 774 const char *arg_name; 775 776 static struct dm_arg _args[] = { 777 {0, 6, "invalid number of feature args"}, 778 {1, 50, "pg_init_retries must be between 1 and 50"}, 779 {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, 780 }; 781 782 r = dm_read_arg_group(_args, as, &argc, &ti->error); 783 if (r) 784 return -EINVAL; 785 786 if (!argc) 787 return 0; 788 789 do { 790 arg_name = dm_shift_arg(as); 791 argc--; 792 793 if (!strcasecmp(arg_name, "queue_if_no_path")) { 794 r = queue_if_no_path(m, 1, 0); 795 continue; 796 } 797 798 if (!strcasecmp(arg_name, "retain_attached_hw_handler")) { 799 m->retain_attached_hw_handler = 1; 800 continue; 801 } 802 803 if (!strcasecmp(arg_name, "pg_init_retries") && 804 (argc >= 1)) { 805 r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); 806 argc--; 807 continue; 808 } 809 810 if (!strcasecmp(arg_name, "pg_init_delay_msecs") && 811 (argc >= 1)) { 812 r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error); 813 argc--; 814 continue; 815 } 816 817 ti->error = "Unrecognised multipath feature request"; 818 r = -EINVAL; 819 } while (argc && !r); 820 821 return r; 822 } 823 824 static int multipath_ctr(struct dm_target *ti, unsigned int argc, 825 char **argv) 826 { 827 /* target arguments */ 828 static struct dm_arg _args[] = { 829 {0, 1024, "invalid number of priority groups"}, 830 {0, 1024, "invalid initial priority group number"}, 831 }; 832 833 int r; 834 struct multipath *m; 835 struct dm_arg_set as; 836 unsigned pg_count = 0; 837 unsigned next_pg_num; 838 839 as.argc = argc; 840 as.argv = argv; 841 842 m = alloc_multipath(ti); 843 if (!m) { 844 ti->error = "can't allocate multipath"; 845 return -EINVAL; 846 } 847 848 r = parse_features(&as, m); 849 if (r) 850 goto bad; 851 852 r = parse_hw_handler(&as, m); 853 if (r) 854 goto bad; 855 856 r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); 857 if (r) 858 goto bad; 859 860 r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error); 861 if (r) 862 goto bad; 863 864 if ((!m->nr_priority_groups && next_pg_num) || 865 (m->nr_priority_groups && !next_pg_num)) { 866 ti->error = "invalid initial priority group"; 867 r = -EINVAL; 868 goto bad; 869 } 870 871 /* parse the priority groups */ 872 while (as.argc) { 873 struct priority_group *pg; 874 875 pg = parse_priority_group(&as, m); 876 if (IS_ERR(pg)) { 877 r = PTR_ERR(pg); 878 goto bad; 879 } 880 881 m->nr_valid_paths += pg->nr_pgpaths; 882 list_add_tail(&pg->list, &m->priority_groups); 883 pg_count++; 884 pg->pg_num = pg_count; 885 if (!--next_pg_num) 886 m->next_pg = pg; 887 } 888 889 if (pg_count != m->nr_priority_groups) { 890 ti->error = "priority group count mismatch"; 891 r = -EINVAL; 892 goto bad; 893 } 894 895 ti->num_flush_bios = 1; 896 ti->num_discard_bios = 1; 897 ti->num_write_same_bios = 1; 898 899 return 0; 900 901 bad: 902 free_multipath(m); 903 return r; 904 } 905 906 static void multipath_wait_for_pg_init_completion(struct multipath *m) 907 { 908 DECLARE_WAITQUEUE(wait, current); 909 unsigned long flags; 910 911 add_wait_queue(&m->pg_init_wait, &wait); 912 913 while (1) { 914 set_current_state(TASK_UNINTERRUPTIBLE); 915 916 spin_lock_irqsave(&m->lock, flags); 917 if (!m->pg_init_in_progress) { 918 spin_unlock_irqrestore(&m->lock, flags); 919 break; 920 } 921 spin_unlock_irqrestore(&m->lock, flags); 922 923 io_schedule(); 924 } 925 set_current_state(TASK_RUNNING); 926 927 remove_wait_queue(&m->pg_init_wait, &wait); 928 } 929 930 static void flush_multipath_work(struct multipath *m) 931 { 932 unsigned long flags; 933 934 spin_lock_irqsave(&m->lock, flags); 935 m->pg_init_disabled = 1; 936 spin_unlock_irqrestore(&m->lock, flags); 937 938 flush_workqueue(kmpath_handlerd); 939 multipath_wait_for_pg_init_completion(m); 940 flush_workqueue(kmultipathd); 941 flush_work(&m->trigger_event); 942 943 spin_lock_irqsave(&m->lock, flags); 944 m->pg_init_disabled = 0; 945 spin_unlock_irqrestore(&m->lock, flags); 946 } 947 948 static void multipath_dtr(struct dm_target *ti) 949 { 950 struct multipath *m = ti->private; 951 952 flush_multipath_work(m); 953 free_multipath(m); 954 } 955 956 /* 957 * Take a path out of use. 958 */ 959 static int fail_path(struct pgpath *pgpath) 960 { 961 unsigned long flags; 962 struct multipath *m = pgpath->pg->m; 963 964 spin_lock_irqsave(&m->lock, flags); 965 966 if (!pgpath->is_active) 967 goto out; 968 969 DMWARN("Failing path %s.", pgpath->path.dev->name); 970 971 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); 972 pgpath->is_active = 0; 973 pgpath->fail_count++; 974 975 m->nr_valid_paths--; 976 977 if (pgpath == m->current_pgpath) 978 m->current_pgpath = NULL; 979 980 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, 981 pgpath->path.dev->name, m->nr_valid_paths); 982 983 schedule_work(&m->trigger_event); 984 985 out: 986 spin_unlock_irqrestore(&m->lock, flags); 987 988 return 0; 989 } 990 991 /* 992 * Reinstate a previously-failed path 993 */ 994 static int reinstate_path(struct pgpath *pgpath) 995 { 996 int r = 0, run_queue = 0; 997 unsigned long flags; 998 struct multipath *m = pgpath->pg->m; 999 1000 spin_lock_irqsave(&m->lock, flags); 1001 1002 if (pgpath->is_active) 1003 goto out; 1004 1005 if (!pgpath->pg->ps.type->reinstate_path) { 1006 DMWARN("Reinstate path not supported by path selector %s", 1007 pgpath->pg->ps.type->name); 1008 r = -EINVAL; 1009 goto out; 1010 } 1011 1012 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); 1013 if (r) 1014 goto out; 1015 1016 pgpath->is_active = 1; 1017 1018 if (!m->nr_valid_paths++) { 1019 m->current_pgpath = NULL; 1020 run_queue = 1; 1021 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { 1022 if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) 1023 m->pg_init_in_progress++; 1024 } 1025 1026 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 1027 pgpath->path.dev->name, m->nr_valid_paths); 1028 1029 schedule_work(&m->trigger_event); 1030 1031 out: 1032 spin_unlock_irqrestore(&m->lock, flags); 1033 if (run_queue) 1034 dm_table_run_md_queue_async(m->ti->table); 1035 1036 return r; 1037 } 1038 1039 /* 1040 * Fail or reinstate all paths that match the provided struct dm_dev. 1041 */ 1042 static int action_dev(struct multipath *m, struct dm_dev *dev, 1043 action_fn action) 1044 { 1045 int r = -EINVAL; 1046 struct pgpath *pgpath; 1047 struct priority_group *pg; 1048 1049 list_for_each_entry(pg, &m->priority_groups, list) { 1050 list_for_each_entry(pgpath, &pg->pgpaths, list) { 1051 if (pgpath->path.dev == dev) 1052 r = action(pgpath); 1053 } 1054 } 1055 1056 return r; 1057 } 1058 1059 /* 1060 * Temporarily try to avoid having to use the specified PG 1061 */ 1062 static void bypass_pg(struct multipath *m, struct priority_group *pg, 1063 int bypassed) 1064 { 1065 unsigned long flags; 1066 1067 spin_lock_irqsave(&m->lock, flags); 1068 1069 pg->bypassed = bypassed; 1070 m->current_pgpath = NULL; 1071 m->current_pg = NULL; 1072 1073 spin_unlock_irqrestore(&m->lock, flags); 1074 1075 schedule_work(&m->trigger_event); 1076 } 1077 1078 /* 1079 * Switch to using the specified PG from the next I/O that gets mapped 1080 */ 1081 static int switch_pg_num(struct multipath *m, const char *pgstr) 1082 { 1083 struct priority_group *pg; 1084 unsigned pgnum; 1085 unsigned long flags; 1086 char dummy; 1087 1088 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1089 (pgnum > m->nr_priority_groups)) { 1090 DMWARN("invalid PG number supplied to switch_pg_num"); 1091 return -EINVAL; 1092 } 1093 1094 spin_lock_irqsave(&m->lock, flags); 1095 list_for_each_entry(pg, &m->priority_groups, list) { 1096 pg->bypassed = 0; 1097 if (--pgnum) 1098 continue; 1099 1100 m->current_pgpath = NULL; 1101 m->current_pg = NULL; 1102 m->next_pg = pg; 1103 } 1104 spin_unlock_irqrestore(&m->lock, flags); 1105 1106 schedule_work(&m->trigger_event); 1107 return 0; 1108 } 1109 1110 /* 1111 * Set/clear bypassed status of a PG. 1112 * PGs are numbered upwards from 1 in the order they were declared. 1113 */ 1114 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) 1115 { 1116 struct priority_group *pg; 1117 unsigned pgnum; 1118 char dummy; 1119 1120 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1121 (pgnum > m->nr_priority_groups)) { 1122 DMWARN("invalid PG number supplied to bypass_pg"); 1123 return -EINVAL; 1124 } 1125 1126 list_for_each_entry(pg, &m->priority_groups, list) { 1127 if (!--pgnum) 1128 break; 1129 } 1130 1131 bypass_pg(m, pg, bypassed); 1132 return 0; 1133 } 1134 1135 /* 1136 * Should we retry pg_init immediately? 1137 */ 1138 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) 1139 { 1140 unsigned long flags; 1141 int limit_reached = 0; 1142 1143 spin_lock_irqsave(&m->lock, flags); 1144 1145 if (m->pg_init_count <= m->pg_init_retries && !m->pg_init_disabled) 1146 m->pg_init_required = 1; 1147 else 1148 limit_reached = 1; 1149 1150 spin_unlock_irqrestore(&m->lock, flags); 1151 1152 return limit_reached; 1153 } 1154 1155 static void pg_init_done(void *data, int errors) 1156 { 1157 struct pgpath *pgpath = data; 1158 struct priority_group *pg = pgpath->pg; 1159 struct multipath *m = pg->m; 1160 unsigned long flags; 1161 unsigned delay_retry = 0; 1162 1163 /* device or driver problems */ 1164 switch (errors) { 1165 case SCSI_DH_OK: 1166 break; 1167 case SCSI_DH_NOSYS: 1168 if (!m->hw_handler_name) { 1169 errors = 0; 1170 break; 1171 } 1172 DMERR("Could not failover the device: Handler scsi_dh_%s " 1173 "Error %d.", m->hw_handler_name, errors); 1174 /* 1175 * Fail path for now, so we do not ping pong 1176 */ 1177 fail_path(pgpath); 1178 break; 1179 case SCSI_DH_DEV_TEMP_BUSY: 1180 /* 1181 * Probably doing something like FW upgrade on the 1182 * controller so try the other pg. 1183 */ 1184 bypass_pg(m, pg, 1); 1185 break; 1186 case SCSI_DH_RETRY: 1187 /* Wait before retrying. */ 1188 delay_retry = 1; 1189 case SCSI_DH_IMM_RETRY: 1190 case SCSI_DH_RES_TEMP_UNAVAIL: 1191 if (pg_init_limit_reached(m, pgpath)) 1192 fail_path(pgpath); 1193 errors = 0; 1194 break; 1195 default: 1196 /* 1197 * We probably do not want to fail the path for a device 1198 * error, but this is what the old dm did. In future 1199 * patches we can do more advanced handling. 1200 */ 1201 fail_path(pgpath); 1202 } 1203 1204 spin_lock_irqsave(&m->lock, flags); 1205 if (errors) { 1206 if (pgpath == m->current_pgpath) { 1207 DMERR("Could not failover device. Error %d.", errors); 1208 m->current_pgpath = NULL; 1209 m->current_pg = NULL; 1210 } 1211 } else if (!m->pg_init_required) 1212 pg->bypassed = 0; 1213 1214 if (--m->pg_init_in_progress) 1215 /* Activations of other paths are still on going */ 1216 goto out; 1217 1218 if (m->pg_init_required) { 1219 m->pg_init_delay_retry = delay_retry; 1220 if (__pg_init_all_paths(m)) 1221 goto out; 1222 } 1223 m->queue_io = 0; 1224 1225 /* 1226 * Wake up any thread waiting to suspend. 1227 */ 1228 wake_up(&m->pg_init_wait); 1229 1230 out: 1231 spin_unlock_irqrestore(&m->lock, flags); 1232 } 1233 1234 static void activate_path(struct work_struct *work) 1235 { 1236 struct pgpath *pgpath = 1237 container_of(work, struct pgpath, activate_path.work); 1238 1239 if (pgpath->is_active) 1240 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), 1241 pg_init_done, pgpath); 1242 else 1243 pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED); 1244 } 1245 1246 static int noretry_error(int error) 1247 { 1248 switch (error) { 1249 case -EOPNOTSUPP: 1250 case -EREMOTEIO: 1251 case -EILSEQ: 1252 case -ENODATA: 1253 case -ENOSPC: 1254 return 1; 1255 } 1256 1257 /* Anything else could be a path failure, so should be retried */ 1258 return 0; 1259 } 1260 1261 /* 1262 * end_io handling 1263 */ 1264 static int do_end_io(struct multipath *m, struct request *clone, 1265 int error, struct dm_mpath_io *mpio) 1266 { 1267 /* 1268 * We don't queue any clone request inside the multipath target 1269 * during end I/O handling, since those clone requests don't have 1270 * bio clones. If we queue them inside the multipath target, 1271 * we need to make bio clones, that requires memory allocation. 1272 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests 1273 * don't have bio clones.) 1274 * Instead of queueing the clone request here, we queue the original 1275 * request into dm core, which will remake a clone request and 1276 * clone bios for it and resubmit it later. 1277 */ 1278 int r = DM_ENDIO_REQUEUE; 1279 unsigned long flags; 1280 1281 if (!error && !clone->errors) 1282 return 0; /* I/O complete */ 1283 1284 if (noretry_error(error)) 1285 return error; 1286 1287 if (mpio->pgpath) 1288 fail_path(mpio->pgpath); 1289 1290 spin_lock_irqsave(&m->lock, flags); 1291 if (!m->nr_valid_paths) { 1292 if (!m->queue_if_no_path) { 1293 if (!__must_push_back(m)) 1294 r = -EIO; 1295 } else { 1296 if (error == -EBADE) 1297 r = error; 1298 } 1299 } 1300 spin_unlock_irqrestore(&m->lock, flags); 1301 1302 return r; 1303 } 1304 1305 static int multipath_end_io(struct dm_target *ti, struct request *clone, 1306 int error, union map_info *map_context) 1307 { 1308 struct multipath *m = ti->private; 1309 struct dm_mpath_io *mpio = map_context->ptr; 1310 struct pgpath *pgpath; 1311 struct path_selector *ps; 1312 int r; 1313 1314 BUG_ON(!mpio); 1315 1316 r = do_end_io(m, clone, error, mpio); 1317 pgpath = mpio->pgpath; 1318 if (pgpath) { 1319 ps = &pgpath->pg->ps; 1320 if (ps->type->end_io) 1321 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); 1322 } 1323 clear_mapinfo(m, map_context); 1324 1325 return r; 1326 } 1327 1328 /* 1329 * Suspend can't complete until all the I/O is processed so if 1330 * the last path fails we must error any remaining I/O. 1331 * Note that if the freeze_bdev fails while suspending, the 1332 * queue_if_no_path state is lost - userspace should reset it. 1333 */ 1334 static void multipath_presuspend(struct dm_target *ti) 1335 { 1336 struct multipath *m = (struct multipath *) ti->private; 1337 1338 queue_if_no_path(m, 0, 1); 1339 } 1340 1341 static void multipath_postsuspend(struct dm_target *ti) 1342 { 1343 struct multipath *m = ti->private; 1344 1345 mutex_lock(&m->work_mutex); 1346 flush_multipath_work(m); 1347 mutex_unlock(&m->work_mutex); 1348 } 1349 1350 /* 1351 * Restore the queue_if_no_path setting. 1352 */ 1353 static void multipath_resume(struct dm_target *ti) 1354 { 1355 struct multipath *m = (struct multipath *) ti->private; 1356 unsigned long flags; 1357 1358 spin_lock_irqsave(&m->lock, flags); 1359 m->queue_if_no_path = m->saved_queue_if_no_path; 1360 spin_unlock_irqrestore(&m->lock, flags); 1361 } 1362 1363 /* 1364 * Info output has the following format: 1365 * num_multipath_feature_args [multipath_feature_args]* 1366 * num_handler_status_args [handler_status_args]* 1367 * num_groups init_group_number 1368 * [A|D|E num_ps_status_args [ps_status_args]* 1369 * num_paths num_selector_args 1370 * [path_dev A|F fail_count [selector_args]* ]+ ]+ 1371 * 1372 * Table output has the following format (identical to the constructor string): 1373 * num_feature_args [features_args]* 1374 * num_handler_args hw_handler [hw_handler_args]* 1375 * num_groups init_group_number 1376 * [priority selector-name num_ps_args [ps_args]* 1377 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ 1378 */ 1379 static void multipath_status(struct dm_target *ti, status_type_t type, 1380 unsigned status_flags, char *result, unsigned maxlen) 1381 { 1382 int sz = 0; 1383 unsigned long flags; 1384 struct multipath *m = (struct multipath *) ti->private; 1385 struct priority_group *pg; 1386 struct pgpath *p; 1387 unsigned pg_num; 1388 char state; 1389 1390 spin_lock_irqsave(&m->lock, flags); 1391 1392 /* Features */ 1393 if (type == STATUSTYPE_INFO) 1394 DMEMIT("2 %u %u ", m->queue_io, m->pg_init_count); 1395 else { 1396 DMEMIT("%u ", m->queue_if_no_path + 1397 (m->pg_init_retries > 0) * 2 + 1398 (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + 1399 m->retain_attached_hw_handler); 1400 if (m->queue_if_no_path) 1401 DMEMIT("queue_if_no_path "); 1402 if (m->pg_init_retries) 1403 DMEMIT("pg_init_retries %u ", m->pg_init_retries); 1404 if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) 1405 DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); 1406 if (m->retain_attached_hw_handler) 1407 DMEMIT("retain_attached_hw_handler "); 1408 } 1409 1410 if (!m->hw_handler_name || type == STATUSTYPE_INFO) 1411 DMEMIT("0 "); 1412 else 1413 DMEMIT("1 %s ", m->hw_handler_name); 1414 1415 DMEMIT("%u ", m->nr_priority_groups); 1416 1417 if (m->next_pg) 1418 pg_num = m->next_pg->pg_num; 1419 else if (m->current_pg) 1420 pg_num = m->current_pg->pg_num; 1421 else 1422 pg_num = (m->nr_priority_groups ? 1 : 0); 1423 1424 DMEMIT("%u ", pg_num); 1425 1426 switch (type) { 1427 case STATUSTYPE_INFO: 1428 list_for_each_entry(pg, &m->priority_groups, list) { 1429 if (pg->bypassed) 1430 state = 'D'; /* Disabled */ 1431 else if (pg == m->current_pg) 1432 state = 'A'; /* Currently Active */ 1433 else 1434 state = 'E'; /* Enabled */ 1435 1436 DMEMIT("%c ", state); 1437 1438 if (pg->ps.type->status) 1439 sz += pg->ps.type->status(&pg->ps, NULL, type, 1440 result + sz, 1441 maxlen - sz); 1442 else 1443 DMEMIT("0 "); 1444 1445 DMEMIT("%u %u ", pg->nr_pgpaths, 1446 pg->ps.type->info_args); 1447 1448 list_for_each_entry(p, &pg->pgpaths, list) { 1449 DMEMIT("%s %s %u ", p->path.dev->name, 1450 p->is_active ? "A" : "F", 1451 p->fail_count); 1452 if (pg->ps.type->status) 1453 sz += pg->ps.type->status(&pg->ps, 1454 &p->path, type, result + sz, 1455 maxlen - sz); 1456 } 1457 } 1458 break; 1459 1460 case STATUSTYPE_TABLE: 1461 list_for_each_entry(pg, &m->priority_groups, list) { 1462 DMEMIT("%s ", pg->ps.type->name); 1463 1464 if (pg->ps.type->status) 1465 sz += pg->ps.type->status(&pg->ps, NULL, type, 1466 result + sz, 1467 maxlen - sz); 1468 else 1469 DMEMIT("0 "); 1470 1471 DMEMIT("%u %u ", pg->nr_pgpaths, 1472 pg->ps.type->table_args); 1473 1474 list_for_each_entry(p, &pg->pgpaths, list) { 1475 DMEMIT("%s ", p->path.dev->name); 1476 if (pg->ps.type->status) 1477 sz += pg->ps.type->status(&pg->ps, 1478 &p->path, type, result + sz, 1479 maxlen - sz); 1480 } 1481 } 1482 break; 1483 } 1484 1485 spin_unlock_irqrestore(&m->lock, flags); 1486 } 1487 1488 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) 1489 { 1490 int r = -EINVAL; 1491 struct dm_dev *dev; 1492 struct multipath *m = (struct multipath *) ti->private; 1493 action_fn action; 1494 1495 mutex_lock(&m->work_mutex); 1496 1497 if (dm_suspended(ti)) { 1498 r = -EBUSY; 1499 goto out; 1500 } 1501 1502 if (argc == 1) { 1503 if (!strcasecmp(argv[0], "queue_if_no_path")) { 1504 r = queue_if_no_path(m, 1, 0); 1505 goto out; 1506 } else if (!strcasecmp(argv[0], "fail_if_no_path")) { 1507 r = queue_if_no_path(m, 0, 0); 1508 goto out; 1509 } 1510 } 1511 1512 if (argc != 2) { 1513 DMWARN("Invalid multipath message arguments. Expected 2 arguments, got %d.", argc); 1514 goto out; 1515 } 1516 1517 if (!strcasecmp(argv[0], "disable_group")) { 1518 r = bypass_pg_num(m, argv[1], 1); 1519 goto out; 1520 } else if (!strcasecmp(argv[0], "enable_group")) { 1521 r = bypass_pg_num(m, argv[1], 0); 1522 goto out; 1523 } else if (!strcasecmp(argv[0], "switch_group")) { 1524 r = switch_pg_num(m, argv[1]); 1525 goto out; 1526 } else if (!strcasecmp(argv[0], "reinstate_path")) 1527 action = reinstate_path; 1528 else if (!strcasecmp(argv[0], "fail_path")) 1529 action = fail_path; 1530 else { 1531 DMWARN("Unrecognised multipath message received: %s", argv[0]); 1532 goto out; 1533 } 1534 1535 r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); 1536 if (r) { 1537 DMWARN("message: error getting device %s", 1538 argv[1]); 1539 goto out; 1540 } 1541 1542 r = action_dev(m, dev, action); 1543 1544 dm_put_device(ti, dev); 1545 1546 out: 1547 mutex_unlock(&m->work_mutex); 1548 return r; 1549 } 1550 1551 static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, 1552 unsigned long arg) 1553 { 1554 struct multipath *m = ti->private; 1555 struct pgpath *pgpath; 1556 struct block_device *bdev; 1557 fmode_t mode; 1558 unsigned long flags; 1559 int r; 1560 1561 bdev = NULL; 1562 mode = 0; 1563 r = 0; 1564 1565 spin_lock_irqsave(&m->lock, flags); 1566 1567 if (!m->current_pgpath) 1568 __choose_pgpath(m, 0); 1569 1570 pgpath = m->current_pgpath; 1571 1572 if (pgpath) { 1573 bdev = pgpath->path.dev->bdev; 1574 mode = pgpath->path.dev->mode; 1575 } 1576 1577 if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) 1578 r = -ENOTCONN; 1579 else if (!bdev) 1580 r = -EIO; 1581 1582 spin_unlock_irqrestore(&m->lock, flags); 1583 1584 /* 1585 * Only pass ioctls through if the device sizes match exactly. 1586 */ 1587 if (!bdev || ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) { 1588 int err = scsi_verify_blk_ioctl(NULL, cmd); 1589 if (err) 1590 r = err; 1591 } 1592 1593 if (r == -ENOTCONN && !fatal_signal_pending(current)) { 1594 spin_lock_irqsave(&m->lock, flags); 1595 if (!m->current_pg) { 1596 /* Path status changed, redo selection */ 1597 __choose_pgpath(m, 0); 1598 } 1599 if (m->pg_init_required) 1600 __pg_init_all_paths(m); 1601 spin_unlock_irqrestore(&m->lock, flags); 1602 dm_table_run_md_queue_async(m->ti->table); 1603 } 1604 1605 return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); 1606 } 1607 1608 static int multipath_iterate_devices(struct dm_target *ti, 1609 iterate_devices_callout_fn fn, void *data) 1610 { 1611 struct multipath *m = ti->private; 1612 struct priority_group *pg; 1613 struct pgpath *p; 1614 int ret = 0; 1615 1616 list_for_each_entry(pg, &m->priority_groups, list) { 1617 list_for_each_entry(p, &pg->pgpaths, list) { 1618 ret = fn(ti, p->path.dev, ti->begin, ti->len, data); 1619 if (ret) 1620 goto out; 1621 } 1622 } 1623 1624 out: 1625 return ret; 1626 } 1627 1628 static int __pgpath_busy(struct pgpath *pgpath) 1629 { 1630 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); 1631 1632 return blk_lld_busy(q); 1633 } 1634 1635 /* 1636 * We return "busy", only when we can map I/Os but underlying devices 1637 * are busy (so even if we map I/Os now, the I/Os will wait on 1638 * the underlying queue). 1639 * In other words, if we want to kill I/Os or queue them inside us 1640 * due to map unavailability, we don't return "busy". Otherwise, 1641 * dm core won't give us the I/Os and we can't do what we want. 1642 */ 1643 static int multipath_busy(struct dm_target *ti) 1644 { 1645 int busy = 0, has_active = 0; 1646 struct multipath *m = ti->private; 1647 struct priority_group *pg; 1648 struct pgpath *pgpath; 1649 unsigned long flags; 1650 1651 spin_lock_irqsave(&m->lock, flags); 1652 1653 /* pg_init in progress or no paths available */ 1654 if (m->pg_init_in_progress || 1655 (!m->nr_valid_paths && m->queue_if_no_path)) { 1656 busy = 1; 1657 goto out; 1658 } 1659 /* Guess which priority_group will be used at next mapping time */ 1660 if (unlikely(!m->current_pgpath && m->next_pg)) 1661 pg = m->next_pg; 1662 else if (likely(m->current_pg)) 1663 pg = m->current_pg; 1664 else 1665 /* 1666 * We don't know which pg will be used at next mapping time. 1667 * We don't call __choose_pgpath() here to avoid to trigger 1668 * pg_init just by busy checking. 1669 * So we don't know whether underlying devices we will be using 1670 * at next mapping time are busy or not. Just try mapping. 1671 */ 1672 goto out; 1673 1674 /* 1675 * If there is one non-busy active path at least, the path selector 1676 * will be able to select it. So we consider such a pg as not busy. 1677 */ 1678 busy = 1; 1679 list_for_each_entry(pgpath, &pg->pgpaths, list) 1680 if (pgpath->is_active) { 1681 has_active = 1; 1682 1683 if (!__pgpath_busy(pgpath)) { 1684 busy = 0; 1685 break; 1686 } 1687 } 1688 1689 if (!has_active) 1690 /* 1691 * No active path in this pg, so this pg won't be used and 1692 * the current_pg will be changed at next mapping time. 1693 * We need to try mapping to determine it. 1694 */ 1695 busy = 0; 1696 1697 out: 1698 spin_unlock_irqrestore(&m->lock, flags); 1699 1700 return busy; 1701 } 1702 1703 /*----------------------------------------------------------------- 1704 * Module setup 1705 *---------------------------------------------------------------*/ 1706 static struct target_type multipath_target = { 1707 .name = "multipath", 1708 .version = {1, 9, 0}, 1709 .module = THIS_MODULE, 1710 .ctr = multipath_ctr, 1711 .dtr = multipath_dtr, 1712 .map_rq = multipath_map, 1713 .clone_and_map_rq = multipath_clone_and_map, 1714 .release_clone_rq = multipath_release_clone, 1715 .rq_end_io = multipath_end_io, 1716 .presuspend = multipath_presuspend, 1717 .postsuspend = multipath_postsuspend, 1718 .resume = multipath_resume, 1719 .status = multipath_status, 1720 .message = multipath_message, 1721 .ioctl = multipath_ioctl, 1722 .iterate_devices = multipath_iterate_devices, 1723 .busy = multipath_busy, 1724 }; 1725 1726 static int __init dm_multipath_init(void) 1727 { 1728 int r; 1729 1730 /* allocate a slab for the dm_ios */ 1731 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0); 1732 if (!_mpio_cache) 1733 return -ENOMEM; 1734 1735 r = dm_register_target(&multipath_target); 1736 if (r < 0) { 1737 DMERR("register failed %d", r); 1738 r = -EINVAL; 1739 goto bad_register_target; 1740 } 1741 1742 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); 1743 if (!kmultipathd) { 1744 DMERR("failed to create workqueue kmpathd"); 1745 r = -ENOMEM; 1746 goto bad_alloc_kmultipathd; 1747 } 1748 1749 /* 1750 * A separate workqueue is used to handle the device handlers 1751 * to avoid overloading existing workqueue. Overloading the 1752 * old workqueue would also create a bottleneck in the 1753 * path of the storage hardware device activation. 1754 */ 1755 kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd", 1756 WQ_MEM_RECLAIM); 1757 if (!kmpath_handlerd) { 1758 DMERR("failed to create workqueue kmpath_handlerd"); 1759 r = -ENOMEM; 1760 goto bad_alloc_kmpath_handlerd; 1761 } 1762 1763 DMINFO("version %u.%u.%u loaded", 1764 multipath_target.version[0], multipath_target.version[1], 1765 multipath_target.version[2]); 1766 1767 return 0; 1768 1769 bad_alloc_kmpath_handlerd: 1770 destroy_workqueue(kmultipathd); 1771 bad_alloc_kmultipathd: 1772 dm_unregister_target(&multipath_target); 1773 bad_register_target: 1774 kmem_cache_destroy(_mpio_cache); 1775 1776 return r; 1777 } 1778 1779 static void __exit dm_multipath_exit(void) 1780 { 1781 destroy_workqueue(kmpath_handlerd); 1782 destroy_workqueue(kmultipathd); 1783 1784 dm_unregister_target(&multipath_target); 1785 kmem_cache_destroy(_mpio_cache); 1786 } 1787 1788 module_init(dm_multipath_init); 1789 module_exit(dm_multipath_exit); 1790 1791 MODULE_DESCRIPTION(DM_NAME " multipath target"); 1792 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); 1793 MODULE_LICENSE("GPL"); 1794