1 /* 2 * Copyright (C) 2003 Sistina Software Limited. 3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include <linux/device-mapper.h> 9 10 #include "dm-path-selector.h" 11 #include "dm-uevent.h" 12 13 #include <linux/ctype.h> 14 #include <linux/init.h> 15 #include <linux/mempool.h> 16 #include <linux/module.h> 17 #include <linux/pagemap.h> 18 #include <linux/slab.h> 19 #include <linux/time.h> 20 #include <linux/workqueue.h> 21 #include <linux/delay.h> 22 #include <scsi/scsi_dh.h> 23 #include <linux/atomic.h> 24 25 #define DM_MSG_PREFIX "multipath" 26 #define DM_PG_INIT_DELAY_MSECS 2000 27 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) 28 29 /* Path properties */ 30 struct pgpath { 31 struct list_head list; 32 33 struct priority_group *pg; /* Owning PG */ 34 unsigned is_active; /* Path status */ 35 unsigned fail_count; /* Cumulative failure count */ 36 37 struct dm_path path; 38 struct delayed_work activate_path; 39 }; 40 41 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 42 43 /* 44 * Paths are grouped into Priority Groups and numbered from 1 upwards. 45 * Each has a path selector which controls which path gets used. 46 */ 47 struct priority_group { 48 struct list_head list; 49 50 struct multipath *m; /* Owning multipath instance */ 51 struct path_selector ps; 52 53 unsigned pg_num; /* Reference number */ 54 unsigned bypassed; /* Temporarily bypass this PG? */ 55 56 unsigned nr_pgpaths; /* Number of paths in PG */ 57 struct list_head pgpaths; 58 }; 59 60 /* Multipath context */ 61 struct multipath { 62 struct list_head list; 63 struct dm_target *ti; 64 65 const char *hw_handler_name; 66 char *hw_handler_params; 67 68 spinlock_t lock; 69 70 unsigned nr_priority_groups; 71 struct list_head priority_groups; 72 73 wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ 74 75 unsigned pg_init_required; /* pg_init needs calling? */ 76 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ 77 unsigned pg_init_delay_retry; /* Delay pg_init retry? */ 78 79 unsigned nr_valid_paths; /* Total number of usable paths */ 80 struct pgpath *current_pgpath; 81 struct priority_group *current_pg; 82 struct priority_group *next_pg; /* Switch to this PG if set */ 83 unsigned repeat_count; /* I/Os left before calling PS again */ 84 85 unsigned queue_io:1; /* Must we queue all I/O? */ 86 unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ 87 unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ 88 unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ 89 90 unsigned pg_init_retries; /* Number of times to retry pg_init */ 91 unsigned pg_init_count; /* Number of times pg_init called */ 92 unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ 93 94 unsigned queue_size; 95 struct work_struct process_queued_ios; 96 struct list_head queued_ios; 97 98 struct work_struct trigger_event; 99 100 /* 101 * We must use a mempool of dm_mpath_io structs so that we 102 * can resubmit bios on error. 103 */ 104 mempool_t *mpio_pool; 105 106 struct mutex work_mutex; 107 }; 108 109 /* 110 * Context information attached to each bio we process. 111 */ 112 struct dm_mpath_io { 113 struct pgpath *pgpath; 114 size_t nr_bytes; 115 }; 116 117 typedef int (*action_fn) (struct pgpath *pgpath); 118 119 #define MIN_IOS 256 /* Mempool size */ 120 121 static struct kmem_cache *_mpio_cache; 122 123 static struct workqueue_struct *kmultipathd, *kmpath_handlerd; 124 static void process_queued_ios(struct work_struct *work); 125 static void trigger_event(struct work_struct *work); 126 static void activate_path(struct work_struct *work); 127 128 129 /*----------------------------------------------- 130 * Allocation routines 131 *-----------------------------------------------*/ 132 133 static struct pgpath *alloc_pgpath(void) 134 { 135 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); 136 137 if (pgpath) { 138 pgpath->is_active = 1; 139 INIT_DELAYED_WORK(&pgpath->activate_path, activate_path); 140 } 141 142 return pgpath; 143 } 144 145 static void free_pgpath(struct pgpath *pgpath) 146 { 147 kfree(pgpath); 148 } 149 150 static struct priority_group *alloc_priority_group(void) 151 { 152 struct priority_group *pg; 153 154 pg = kzalloc(sizeof(*pg), GFP_KERNEL); 155 156 if (pg) 157 INIT_LIST_HEAD(&pg->pgpaths); 158 159 return pg; 160 } 161 162 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 163 { 164 struct pgpath *pgpath, *tmp; 165 struct multipath *m = ti->private; 166 167 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { 168 list_del(&pgpath->list); 169 if (m->hw_handler_name) 170 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); 171 dm_put_device(ti, pgpath->path.dev); 172 free_pgpath(pgpath); 173 } 174 } 175 176 static void free_priority_group(struct priority_group *pg, 177 struct dm_target *ti) 178 { 179 struct path_selector *ps = &pg->ps; 180 181 if (ps->type) { 182 ps->type->destroy(ps); 183 dm_put_path_selector(ps->type); 184 } 185 186 free_pgpaths(&pg->pgpaths, ti); 187 kfree(pg); 188 } 189 190 static struct multipath *alloc_multipath(struct dm_target *ti) 191 { 192 struct multipath *m; 193 194 m = kzalloc(sizeof(*m), GFP_KERNEL); 195 if (m) { 196 INIT_LIST_HEAD(&m->priority_groups); 197 INIT_LIST_HEAD(&m->queued_ios); 198 spin_lock_init(&m->lock); 199 m->queue_io = 1; 200 m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; 201 INIT_WORK(&m->process_queued_ios, process_queued_ios); 202 INIT_WORK(&m->trigger_event, trigger_event); 203 init_waitqueue_head(&m->pg_init_wait); 204 mutex_init(&m->work_mutex); 205 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 206 if (!m->mpio_pool) { 207 kfree(m); 208 return NULL; 209 } 210 m->ti = ti; 211 ti->private = m; 212 } 213 214 return m; 215 } 216 217 static void free_multipath(struct multipath *m) 218 { 219 struct priority_group *pg, *tmp; 220 221 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { 222 list_del(&pg->list); 223 free_priority_group(pg, m->ti); 224 } 225 226 kfree(m->hw_handler_name); 227 kfree(m->hw_handler_params); 228 mempool_destroy(m->mpio_pool); 229 kfree(m); 230 } 231 232 static int set_mapinfo(struct multipath *m, union map_info *info) 233 { 234 struct dm_mpath_io *mpio; 235 236 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); 237 if (!mpio) 238 return -ENOMEM; 239 240 memset(mpio, 0, sizeof(*mpio)); 241 info->ptr = mpio; 242 243 return 0; 244 } 245 246 static void clear_mapinfo(struct multipath *m, union map_info *info) 247 { 248 struct dm_mpath_io *mpio = info->ptr; 249 250 info->ptr = NULL; 251 mempool_free(mpio, m->mpio_pool); 252 } 253 254 /*----------------------------------------------- 255 * Path selection 256 *-----------------------------------------------*/ 257 258 static void __pg_init_all_paths(struct multipath *m) 259 { 260 struct pgpath *pgpath; 261 unsigned long pg_init_delay = 0; 262 263 m->pg_init_count++; 264 m->pg_init_required = 0; 265 if (m->pg_init_delay_retry) 266 pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ? 267 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS); 268 list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { 269 /* Skip failed paths */ 270 if (!pgpath->is_active) 271 continue; 272 if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path, 273 pg_init_delay)) 274 m->pg_init_in_progress++; 275 } 276 } 277 278 static void __switch_pg(struct multipath *m, struct pgpath *pgpath) 279 { 280 m->current_pg = pgpath->pg; 281 282 /* Must we initialise the PG first, and queue I/O till it's ready? */ 283 if (m->hw_handler_name) { 284 m->pg_init_required = 1; 285 m->queue_io = 1; 286 } else { 287 m->pg_init_required = 0; 288 m->queue_io = 0; 289 } 290 291 m->pg_init_count = 0; 292 } 293 294 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg, 295 size_t nr_bytes) 296 { 297 struct dm_path *path; 298 299 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes); 300 if (!path) 301 return -ENXIO; 302 303 m->current_pgpath = path_to_pgpath(path); 304 305 if (m->current_pg != pg) 306 __switch_pg(m, m->current_pgpath); 307 308 return 0; 309 } 310 311 static void __choose_pgpath(struct multipath *m, size_t nr_bytes) 312 { 313 struct priority_group *pg; 314 unsigned bypassed = 1; 315 316 if (!m->nr_valid_paths) 317 goto failed; 318 319 /* Were we instructed to switch PG? */ 320 if (m->next_pg) { 321 pg = m->next_pg; 322 m->next_pg = NULL; 323 if (!__choose_path_in_pg(m, pg, nr_bytes)) 324 return; 325 } 326 327 /* Don't change PG until it has no remaining paths */ 328 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes)) 329 return; 330 331 /* 332 * Loop through priority groups until we find a valid path. 333 * First time we skip PGs marked 'bypassed'. 334 * Second time we only try the ones we skipped, but set 335 * pg_init_delay_retry so we do not hammer controllers. 336 */ 337 do { 338 list_for_each_entry(pg, &m->priority_groups, list) { 339 if (pg->bypassed == bypassed) 340 continue; 341 if (!__choose_path_in_pg(m, pg, nr_bytes)) { 342 if (!bypassed) 343 m->pg_init_delay_retry = 1; 344 return; 345 } 346 } 347 } while (bypassed--); 348 349 failed: 350 m->current_pgpath = NULL; 351 m->current_pg = NULL; 352 } 353 354 /* 355 * Check whether bios must be queued in the device-mapper core rather 356 * than here in the target. 357 * 358 * m->lock must be held on entry. 359 * 360 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the 361 * same value then we are not between multipath_presuspend() 362 * and multipath_resume() calls and we have no need to check 363 * for the DMF_NOFLUSH_SUSPENDING flag. 364 */ 365 static int __must_push_back(struct multipath *m) 366 { 367 return (m->queue_if_no_path != m->saved_queue_if_no_path && 368 dm_noflush_suspending(m->ti)); 369 } 370 371 static int map_io(struct multipath *m, struct request *clone, 372 union map_info *map_context, unsigned was_queued) 373 { 374 int r = DM_MAPIO_REMAPPED; 375 size_t nr_bytes = blk_rq_bytes(clone); 376 unsigned long flags; 377 struct pgpath *pgpath; 378 struct block_device *bdev; 379 struct dm_mpath_io *mpio = map_context->ptr; 380 381 spin_lock_irqsave(&m->lock, flags); 382 383 /* Do we need to select a new pgpath? */ 384 if (!m->current_pgpath || 385 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) 386 __choose_pgpath(m, nr_bytes); 387 388 pgpath = m->current_pgpath; 389 390 if (was_queued) 391 m->queue_size--; 392 393 if ((pgpath && m->queue_io) || 394 (!pgpath && m->queue_if_no_path)) { 395 /* Queue for the daemon to resubmit */ 396 list_add_tail(&clone->queuelist, &m->queued_ios); 397 m->queue_size++; 398 if ((m->pg_init_required && !m->pg_init_in_progress) || 399 !m->queue_io) 400 queue_work(kmultipathd, &m->process_queued_ios); 401 pgpath = NULL; 402 r = DM_MAPIO_SUBMITTED; 403 } else if (pgpath) { 404 bdev = pgpath->path.dev->bdev; 405 clone->q = bdev_get_queue(bdev); 406 clone->rq_disk = bdev->bd_disk; 407 } else if (__must_push_back(m)) 408 r = DM_MAPIO_REQUEUE; 409 else 410 r = -EIO; /* Failed */ 411 412 mpio->pgpath = pgpath; 413 mpio->nr_bytes = nr_bytes; 414 415 if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io) 416 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path, 417 nr_bytes); 418 419 spin_unlock_irqrestore(&m->lock, flags); 420 421 return r; 422 } 423 424 /* 425 * If we run out of usable paths, should we queue I/O or error it? 426 */ 427 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, 428 unsigned save_old_value) 429 { 430 unsigned long flags; 431 432 spin_lock_irqsave(&m->lock, flags); 433 434 if (save_old_value) 435 m->saved_queue_if_no_path = m->queue_if_no_path; 436 else 437 m->saved_queue_if_no_path = queue_if_no_path; 438 m->queue_if_no_path = queue_if_no_path; 439 if (!m->queue_if_no_path && m->queue_size) 440 queue_work(kmultipathd, &m->process_queued_ios); 441 442 spin_unlock_irqrestore(&m->lock, flags); 443 444 return 0; 445 } 446 447 /*----------------------------------------------------------------- 448 * The multipath daemon is responsible for resubmitting queued ios. 449 *---------------------------------------------------------------*/ 450 451 static void dispatch_queued_ios(struct multipath *m) 452 { 453 int r; 454 unsigned long flags; 455 union map_info *info; 456 struct request *clone, *n; 457 LIST_HEAD(cl); 458 459 spin_lock_irqsave(&m->lock, flags); 460 list_splice_init(&m->queued_ios, &cl); 461 spin_unlock_irqrestore(&m->lock, flags); 462 463 list_for_each_entry_safe(clone, n, &cl, queuelist) { 464 list_del_init(&clone->queuelist); 465 466 info = dm_get_rq_mapinfo(clone); 467 468 r = map_io(m, clone, info, 1); 469 if (r < 0) { 470 clear_mapinfo(m, info); 471 dm_kill_unmapped_request(clone, r); 472 } else if (r == DM_MAPIO_REMAPPED) 473 dm_dispatch_request(clone); 474 else if (r == DM_MAPIO_REQUEUE) { 475 clear_mapinfo(m, info); 476 dm_requeue_unmapped_request(clone); 477 } 478 } 479 } 480 481 static void process_queued_ios(struct work_struct *work) 482 { 483 struct multipath *m = 484 container_of(work, struct multipath, process_queued_ios); 485 struct pgpath *pgpath = NULL; 486 unsigned must_queue = 1; 487 unsigned long flags; 488 489 spin_lock_irqsave(&m->lock, flags); 490 491 if (!m->current_pgpath) 492 __choose_pgpath(m, 0); 493 494 pgpath = m->current_pgpath; 495 496 if ((pgpath && !m->queue_io) || 497 (!pgpath && !m->queue_if_no_path)) 498 must_queue = 0; 499 500 if (m->pg_init_required && !m->pg_init_in_progress && pgpath) 501 __pg_init_all_paths(m); 502 503 spin_unlock_irqrestore(&m->lock, flags); 504 if (!must_queue) 505 dispatch_queued_ios(m); 506 } 507 508 /* 509 * An event is triggered whenever a path is taken out of use. 510 * Includes path failure and PG bypass. 511 */ 512 static void trigger_event(struct work_struct *work) 513 { 514 struct multipath *m = 515 container_of(work, struct multipath, trigger_event); 516 517 dm_table_event(m->ti->table); 518 } 519 520 /*----------------------------------------------------------------- 521 * Constructor/argument parsing: 522 * <#multipath feature args> [<arg>]* 523 * <#hw_handler args> [hw_handler [<arg>]*] 524 * <#priority groups> 525 * <initial priority group> 526 * [<selector> <#selector args> [<arg>]* 527 * <#paths> <#per-path selector args> 528 * [<path> [<arg>]* ]+ ]+ 529 *---------------------------------------------------------------*/ 530 static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, 531 struct dm_target *ti) 532 { 533 int r; 534 struct path_selector_type *pst; 535 unsigned ps_argc; 536 537 static struct dm_arg _args[] = { 538 {0, 1024, "invalid number of path selector args"}, 539 }; 540 541 pst = dm_get_path_selector(dm_shift_arg(as)); 542 if (!pst) { 543 ti->error = "unknown path selector type"; 544 return -EINVAL; 545 } 546 547 r = dm_read_arg_group(_args, as, &ps_argc, &ti->error); 548 if (r) { 549 dm_put_path_selector(pst); 550 return -EINVAL; 551 } 552 553 r = pst->create(&pg->ps, ps_argc, as->argv); 554 if (r) { 555 dm_put_path_selector(pst); 556 ti->error = "path selector constructor failed"; 557 return r; 558 } 559 560 pg->ps.type = pst; 561 dm_consume_args(as, ps_argc); 562 563 return 0; 564 } 565 566 static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, 567 struct dm_target *ti) 568 { 569 int r; 570 struct pgpath *p; 571 struct multipath *m = ti->private; 572 struct request_queue *q = NULL; 573 const char *attached_handler_name; 574 575 /* we need at least a path arg */ 576 if (as->argc < 1) { 577 ti->error = "no device given"; 578 return ERR_PTR(-EINVAL); 579 } 580 581 p = alloc_pgpath(); 582 if (!p) 583 return ERR_PTR(-ENOMEM); 584 585 r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), 586 &p->path.dev); 587 if (r) { 588 ti->error = "error getting device"; 589 goto bad; 590 } 591 592 if (m->retain_attached_hw_handler || m->hw_handler_name) 593 q = bdev_get_queue(p->path.dev->bdev); 594 595 if (m->retain_attached_hw_handler) { 596 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); 597 if (attached_handler_name) { 598 /* 599 * Reset hw_handler_name to match the attached handler 600 * and clear any hw_handler_params associated with the 601 * ignored handler. 602 * 603 * NB. This modifies the table line to show the actual 604 * handler instead of the original table passed in. 605 */ 606 kfree(m->hw_handler_name); 607 m->hw_handler_name = attached_handler_name; 608 609 kfree(m->hw_handler_params); 610 m->hw_handler_params = NULL; 611 } 612 } 613 614 if (m->hw_handler_name) { 615 /* 616 * Increments scsi_dh reference, even when using an 617 * already-attached handler. 618 */ 619 r = scsi_dh_attach(q, m->hw_handler_name); 620 if (r == -EBUSY) { 621 /* 622 * Already attached to different hw_handler: 623 * try to reattach with correct one. 624 */ 625 scsi_dh_detach(q); 626 r = scsi_dh_attach(q, m->hw_handler_name); 627 } 628 629 if (r < 0) { 630 ti->error = "error attaching hardware handler"; 631 dm_put_device(ti, p->path.dev); 632 goto bad; 633 } 634 635 if (m->hw_handler_params) { 636 r = scsi_dh_set_params(q, m->hw_handler_params); 637 if (r < 0) { 638 ti->error = "unable to set hardware " 639 "handler parameters"; 640 scsi_dh_detach(q); 641 dm_put_device(ti, p->path.dev); 642 goto bad; 643 } 644 } 645 } 646 647 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); 648 if (r) { 649 dm_put_device(ti, p->path.dev); 650 goto bad; 651 } 652 653 return p; 654 655 bad: 656 free_pgpath(p); 657 return ERR_PTR(r); 658 } 659 660 static struct priority_group *parse_priority_group(struct dm_arg_set *as, 661 struct multipath *m) 662 { 663 static struct dm_arg _args[] = { 664 {1, 1024, "invalid number of paths"}, 665 {0, 1024, "invalid number of selector args"} 666 }; 667 668 int r; 669 unsigned i, nr_selector_args, nr_args; 670 struct priority_group *pg; 671 struct dm_target *ti = m->ti; 672 673 if (as->argc < 2) { 674 as->argc = 0; 675 ti->error = "not enough priority group arguments"; 676 return ERR_PTR(-EINVAL); 677 } 678 679 pg = alloc_priority_group(); 680 if (!pg) { 681 ti->error = "couldn't allocate priority group"; 682 return ERR_PTR(-ENOMEM); 683 } 684 pg->m = m; 685 686 r = parse_path_selector(as, pg, ti); 687 if (r) 688 goto bad; 689 690 /* 691 * read the paths 692 */ 693 r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error); 694 if (r) 695 goto bad; 696 697 r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error); 698 if (r) 699 goto bad; 700 701 nr_args = 1 + nr_selector_args; 702 for (i = 0; i < pg->nr_pgpaths; i++) { 703 struct pgpath *pgpath; 704 struct dm_arg_set path_args; 705 706 if (as->argc < nr_args) { 707 ti->error = "not enough path parameters"; 708 r = -EINVAL; 709 goto bad; 710 } 711 712 path_args.argc = nr_args; 713 path_args.argv = as->argv; 714 715 pgpath = parse_path(&path_args, &pg->ps, ti); 716 if (IS_ERR(pgpath)) { 717 r = PTR_ERR(pgpath); 718 goto bad; 719 } 720 721 pgpath->pg = pg; 722 list_add_tail(&pgpath->list, &pg->pgpaths); 723 dm_consume_args(as, nr_args); 724 } 725 726 return pg; 727 728 bad: 729 free_priority_group(pg, ti); 730 return ERR_PTR(r); 731 } 732 733 static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) 734 { 735 unsigned hw_argc; 736 int ret; 737 struct dm_target *ti = m->ti; 738 739 static struct dm_arg _args[] = { 740 {0, 1024, "invalid number of hardware handler args"}, 741 }; 742 743 if (dm_read_arg_group(_args, as, &hw_argc, &ti->error)) 744 return -EINVAL; 745 746 if (!hw_argc) 747 return 0; 748 749 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); 750 if (!try_then_request_module(scsi_dh_handler_exist(m->hw_handler_name), 751 "scsi_dh_%s", m->hw_handler_name)) { 752 ti->error = "unknown hardware handler type"; 753 ret = -EINVAL; 754 goto fail; 755 } 756 757 if (hw_argc > 1) { 758 char *p; 759 int i, j, len = 4; 760 761 for (i = 0; i <= hw_argc - 2; i++) 762 len += strlen(as->argv[i]) + 1; 763 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL); 764 if (!p) { 765 ti->error = "memory allocation failed"; 766 ret = -ENOMEM; 767 goto fail; 768 } 769 j = sprintf(p, "%d", hw_argc - 1); 770 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) 771 j = sprintf(p, "%s", as->argv[i]); 772 } 773 dm_consume_args(as, hw_argc - 1); 774 775 return 0; 776 fail: 777 kfree(m->hw_handler_name); 778 m->hw_handler_name = NULL; 779 return ret; 780 } 781 782 static int parse_features(struct dm_arg_set *as, struct multipath *m) 783 { 784 int r; 785 unsigned argc; 786 struct dm_target *ti = m->ti; 787 const char *arg_name; 788 789 static struct dm_arg _args[] = { 790 {0, 6, "invalid number of feature args"}, 791 {1, 50, "pg_init_retries must be between 1 and 50"}, 792 {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, 793 }; 794 795 r = dm_read_arg_group(_args, as, &argc, &ti->error); 796 if (r) 797 return -EINVAL; 798 799 if (!argc) 800 return 0; 801 802 do { 803 arg_name = dm_shift_arg(as); 804 argc--; 805 806 if (!strcasecmp(arg_name, "queue_if_no_path")) { 807 r = queue_if_no_path(m, 1, 0); 808 continue; 809 } 810 811 if (!strcasecmp(arg_name, "retain_attached_hw_handler")) { 812 m->retain_attached_hw_handler = 1; 813 continue; 814 } 815 816 if (!strcasecmp(arg_name, "pg_init_retries") && 817 (argc >= 1)) { 818 r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); 819 argc--; 820 continue; 821 } 822 823 if (!strcasecmp(arg_name, "pg_init_delay_msecs") && 824 (argc >= 1)) { 825 r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error); 826 argc--; 827 continue; 828 } 829 830 ti->error = "Unrecognised multipath feature request"; 831 r = -EINVAL; 832 } while (argc && !r); 833 834 return r; 835 } 836 837 static int multipath_ctr(struct dm_target *ti, unsigned int argc, 838 char **argv) 839 { 840 /* target arguments */ 841 static struct dm_arg _args[] = { 842 {0, 1024, "invalid number of priority groups"}, 843 {0, 1024, "invalid initial priority group number"}, 844 }; 845 846 int r; 847 struct multipath *m; 848 struct dm_arg_set as; 849 unsigned pg_count = 0; 850 unsigned next_pg_num; 851 852 as.argc = argc; 853 as.argv = argv; 854 855 m = alloc_multipath(ti); 856 if (!m) { 857 ti->error = "can't allocate multipath"; 858 return -EINVAL; 859 } 860 861 r = parse_features(&as, m); 862 if (r) 863 goto bad; 864 865 r = parse_hw_handler(&as, m); 866 if (r) 867 goto bad; 868 869 r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); 870 if (r) 871 goto bad; 872 873 r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error); 874 if (r) 875 goto bad; 876 877 if ((!m->nr_priority_groups && next_pg_num) || 878 (m->nr_priority_groups && !next_pg_num)) { 879 ti->error = "invalid initial priority group"; 880 r = -EINVAL; 881 goto bad; 882 } 883 884 /* parse the priority groups */ 885 while (as.argc) { 886 struct priority_group *pg; 887 888 pg = parse_priority_group(&as, m); 889 if (IS_ERR(pg)) { 890 r = PTR_ERR(pg); 891 goto bad; 892 } 893 894 m->nr_valid_paths += pg->nr_pgpaths; 895 list_add_tail(&pg->list, &m->priority_groups); 896 pg_count++; 897 pg->pg_num = pg_count; 898 if (!--next_pg_num) 899 m->next_pg = pg; 900 } 901 902 if (pg_count != m->nr_priority_groups) { 903 ti->error = "priority group count mismatch"; 904 r = -EINVAL; 905 goto bad; 906 } 907 908 ti->num_flush_bios = 1; 909 ti->num_discard_bios = 1; 910 ti->num_write_same_bios = 1; 911 912 return 0; 913 914 bad: 915 free_multipath(m); 916 return r; 917 } 918 919 static void multipath_wait_for_pg_init_completion(struct multipath *m) 920 { 921 DECLARE_WAITQUEUE(wait, current); 922 unsigned long flags; 923 924 add_wait_queue(&m->pg_init_wait, &wait); 925 926 while (1) { 927 set_current_state(TASK_UNINTERRUPTIBLE); 928 929 spin_lock_irqsave(&m->lock, flags); 930 if (!m->pg_init_in_progress) { 931 spin_unlock_irqrestore(&m->lock, flags); 932 break; 933 } 934 spin_unlock_irqrestore(&m->lock, flags); 935 936 io_schedule(); 937 } 938 set_current_state(TASK_RUNNING); 939 940 remove_wait_queue(&m->pg_init_wait, &wait); 941 } 942 943 static void flush_multipath_work(struct multipath *m) 944 { 945 flush_workqueue(kmpath_handlerd); 946 multipath_wait_for_pg_init_completion(m); 947 flush_workqueue(kmultipathd); 948 flush_work(&m->trigger_event); 949 } 950 951 static void multipath_dtr(struct dm_target *ti) 952 { 953 struct multipath *m = ti->private; 954 955 flush_multipath_work(m); 956 free_multipath(m); 957 } 958 959 /* 960 * Map cloned requests 961 */ 962 static int multipath_map(struct dm_target *ti, struct request *clone, 963 union map_info *map_context) 964 { 965 int r; 966 struct multipath *m = (struct multipath *) ti->private; 967 968 if (set_mapinfo(m, map_context) < 0) 969 /* ENOMEM, requeue */ 970 return DM_MAPIO_REQUEUE; 971 972 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; 973 r = map_io(m, clone, map_context, 0); 974 if (r < 0 || r == DM_MAPIO_REQUEUE) 975 clear_mapinfo(m, map_context); 976 977 return r; 978 } 979 980 /* 981 * Take a path out of use. 982 */ 983 static int fail_path(struct pgpath *pgpath) 984 { 985 unsigned long flags; 986 struct multipath *m = pgpath->pg->m; 987 988 spin_lock_irqsave(&m->lock, flags); 989 990 if (!pgpath->is_active) 991 goto out; 992 993 DMWARN("Failing path %s.", pgpath->path.dev->name); 994 995 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); 996 pgpath->is_active = 0; 997 pgpath->fail_count++; 998 999 m->nr_valid_paths--; 1000 1001 if (pgpath == m->current_pgpath) 1002 m->current_pgpath = NULL; 1003 1004 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, 1005 pgpath->path.dev->name, m->nr_valid_paths); 1006 1007 schedule_work(&m->trigger_event); 1008 1009 out: 1010 spin_unlock_irqrestore(&m->lock, flags); 1011 1012 return 0; 1013 } 1014 1015 /* 1016 * Reinstate a previously-failed path 1017 */ 1018 static int reinstate_path(struct pgpath *pgpath) 1019 { 1020 int r = 0; 1021 unsigned long flags; 1022 struct multipath *m = pgpath->pg->m; 1023 1024 spin_lock_irqsave(&m->lock, flags); 1025 1026 if (pgpath->is_active) 1027 goto out; 1028 1029 if (!pgpath->pg->ps.type->reinstate_path) { 1030 DMWARN("Reinstate path not supported by path selector %s", 1031 pgpath->pg->ps.type->name); 1032 r = -EINVAL; 1033 goto out; 1034 } 1035 1036 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); 1037 if (r) 1038 goto out; 1039 1040 pgpath->is_active = 1; 1041 1042 if (!m->nr_valid_paths++ && m->queue_size) { 1043 m->current_pgpath = NULL; 1044 queue_work(kmultipathd, &m->process_queued_ios); 1045 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { 1046 if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) 1047 m->pg_init_in_progress++; 1048 } 1049 1050 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 1051 pgpath->path.dev->name, m->nr_valid_paths); 1052 1053 schedule_work(&m->trigger_event); 1054 1055 out: 1056 spin_unlock_irqrestore(&m->lock, flags); 1057 1058 return r; 1059 } 1060 1061 /* 1062 * Fail or reinstate all paths that match the provided struct dm_dev. 1063 */ 1064 static int action_dev(struct multipath *m, struct dm_dev *dev, 1065 action_fn action) 1066 { 1067 int r = -EINVAL; 1068 struct pgpath *pgpath; 1069 struct priority_group *pg; 1070 1071 list_for_each_entry(pg, &m->priority_groups, list) { 1072 list_for_each_entry(pgpath, &pg->pgpaths, list) { 1073 if (pgpath->path.dev == dev) 1074 r = action(pgpath); 1075 } 1076 } 1077 1078 return r; 1079 } 1080 1081 /* 1082 * Temporarily try to avoid having to use the specified PG 1083 */ 1084 static void bypass_pg(struct multipath *m, struct priority_group *pg, 1085 int bypassed) 1086 { 1087 unsigned long flags; 1088 1089 spin_lock_irqsave(&m->lock, flags); 1090 1091 pg->bypassed = bypassed; 1092 m->current_pgpath = NULL; 1093 m->current_pg = NULL; 1094 1095 spin_unlock_irqrestore(&m->lock, flags); 1096 1097 schedule_work(&m->trigger_event); 1098 } 1099 1100 /* 1101 * Switch to using the specified PG from the next I/O that gets mapped 1102 */ 1103 static int switch_pg_num(struct multipath *m, const char *pgstr) 1104 { 1105 struct priority_group *pg; 1106 unsigned pgnum; 1107 unsigned long flags; 1108 char dummy; 1109 1110 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1111 (pgnum > m->nr_priority_groups)) { 1112 DMWARN("invalid PG number supplied to switch_pg_num"); 1113 return -EINVAL; 1114 } 1115 1116 spin_lock_irqsave(&m->lock, flags); 1117 list_for_each_entry(pg, &m->priority_groups, list) { 1118 pg->bypassed = 0; 1119 if (--pgnum) 1120 continue; 1121 1122 m->current_pgpath = NULL; 1123 m->current_pg = NULL; 1124 m->next_pg = pg; 1125 } 1126 spin_unlock_irqrestore(&m->lock, flags); 1127 1128 schedule_work(&m->trigger_event); 1129 return 0; 1130 } 1131 1132 /* 1133 * Set/clear bypassed status of a PG. 1134 * PGs are numbered upwards from 1 in the order they were declared. 1135 */ 1136 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) 1137 { 1138 struct priority_group *pg; 1139 unsigned pgnum; 1140 char dummy; 1141 1142 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1143 (pgnum > m->nr_priority_groups)) { 1144 DMWARN("invalid PG number supplied to bypass_pg"); 1145 return -EINVAL; 1146 } 1147 1148 list_for_each_entry(pg, &m->priority_groups, list) { 1149 if (!--pgnum) 1150 break; 1151 } 1152 1153 bypass_pg(m, pg, bypassed); 1154 return 0; 1155 } 1156 1157 /* 1158 * Should we retry pg_init immediately? 1159 */ 1160 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) 1161 { 1162 unsigned long flags; 1163 int limit_reached = 0; 1164 1165 spin_lock_irqsave(&m->lock, flags); 1166 1167 if (m->pg_init_count <= m->pg_init_retries) 1168 m->pg_init_required = 1; 1169 else 1170 limit_reached = 1; 1171 1172 spin_unlock_irqrestore(&m->lock, flags); 1173 1174 return limit_reached; 1175 } 1176 1177 static void pg_init_done(void *data, int errors) 1178 { 1179 struct pgpath *pgpath = data; 1180 struct priority_group *pg = pgpath->pg; 1181 struct multipath *m = pg->m; 1182 unsigned long flags; 1183 unsigned delay_retry = 0; 1184 1185 /* device or driver problems */ 1186 switch (errors) { 1187 case SCSI_DH_OK: 1188 break; 1189 case SCSI_DH_NOSYS: 1190 if (!m->hw_handler_name) { 1191 errors = 0; 1192 break; 1193 } 1194 DMERR("Could not failover the device: Handler scsi_dh_%s " 1195 "Error %d.", m->hw_handler_name, errors); 1196 /* 1197 * Fail path for now, so we do not ping pong 1198 */ 1199 fail_path(pgpath); 1200 break; 1201 case SCSI_DH_DEV_TEMP_BUSY: 1202 /* 1203 * Probably doing something like FW upgrade on the 1204 * controller so try the other pg. 1205 */ 1206 bypass_pg(m, pg, 1); 1207 break; 1208 case SCSI_DH_RETRY: 1209 /* Wait before retrying. */ 1210 delay_retry = 1; 1211 case SCSI_DH_IMM_RETRY: 1212 case SCSI_DH_RES_TEMP_UNAVAIL: 1213 if (pg_init_limit_reached(m, pgpath)) 1214 fail_path(pgpath); 1215 errors = 0; 1216 break; 1217 default: 1218 /* 1219 * We probably do not want to fail the path for a device 1220 * error, but this is what the old dm did. In future 1221 * patches we can do more advanced handling. 1222 */ 1223 fail_path(pgpath); 1224 } 1225 1226 spin_lock_irqsave(&m->lock, flags); 1227 if (errors) { 1228 if (pgpath == m->current_pgpath) { 1229 DMERR("Could not failover device. Error %d.", errors); 1230 m->current_pgpath = NULL; 1231 m->current_pg = NULL; 1232 } 1233 } else if (!m->pg_init_required) 1234 pg->bypassed = 0; 1235 1236 if (--m->pg_init_in_progress) 1237 /* Activations of other paths are still on going */ 1238 goto out; 1239 1240 if (!m->pg_init_required) 1241 m->queue_io = 0; 1242 1243 m->pg_init_delay_retry = delay_retry; 1244 queue_work(kmultipathd, &m->process_queued_ios); 1245 1246 /* 1247 * Wake up any thread waiting to suspend. 1248 */ 1249 wake_up(&m->pg_init_wait); 1250 1251 out: 1252 spin_unlock_irqrestore(&m->lock, flags); 1253 } 1254 1255 static void activate_path(struct work_struct *work) 1256 { 1257 struct pgpath *pgpath = 1258 container_of(work, struct pgpath, activate_path.work); 1259 1260 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), 1261 pg_init_done, pgpath); 1262 } 1263 1264 /* 1265 * end_io handling 1266 */ 1267 static int do_end_io(struct multipath *m, struct request *clone, 1268 int error, struct dm_mpath_io *mpio) 1269 { 1270 /* 1271 * We don't queue any clone request inside the multipath target 1272 * during end I/O handling, since those clone requests don't have 1273 * bio clones. If we queue them inside the multipath target, 1274 * we need to make bio clones, that requires memory allocation. 1275 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests 1276 * don't have bio clones.) 1277 * Instead of queueing the clone request here, we queue the original 1278 * request into dm core, which will remake a clone request and 1279 * clone bios for it and resubmit it later. 1280 */ 1281 int r = DM_ENDIO_REQUEUE; 1282 unsigned long flags; 1283 1284 if (!error && !clone->errors) 1285 return 0; /* I/O complete */ 1286 1287 if (error == -EOPNOTSUPP || error == -EREMOTEIO || error == -EILSEQ) 1288 return error; 1289 1290 if (mpio->pgpath) 1291 fail_path(mpio->pgpath); 1292 1293 spin_lock_irqsave(&m->lock, flags); 1294 if (!m->nr_valid_paths) { 1295 if (!m->queue_if_no_path) { 1296 if (!__must_push_back(m)) 1297 r = -EIO; 1298 } else { 1299 if (error == -EBADE) 1300 r = error; 1301 } 1302 } 1303 spin_unlock_irqrestore(&m->lock, flags); 1304 1305 return r; 1306 } 1307 1308 static int multipath_end_io(struct dm_target *ti, struct request *clone, 1309 int error, union map_info *map_context) 1310 { 1311 struct multipath *m = ti->private; 1312 struct dm_mpath_io *mpio = map_context->ptr; 1313 struct pgpath *pgpath; 1314 struct path_selector *ps; 1315 int r; 1316 1317 BUG_ON(!mpio); 1318 1319 r = do_end_io(m, clone, error, mpio); 1320 pgpath = mpio->pgpath; 1321 if (pgpath) { 1322 ps = &pgpath->pg->ps; 1323 if (ps->type->end_io) 1324 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); 1325 } 1326 clear_mapinfo(m, map_context); 1327 1328 return r; 1329 } 1330 1331 /* 1332 * Suspend can't complete until all the I/O is processed so if 1333 * the last path fails we must error any remaining I/O. 1334 * Note that if the freeze_bdev fails while suspending, the 1335 * queue_if_no_path state is lost - userspace should reset it. 1336 */ 1337 static void multipath_presuspend(struct dm_target *ti) 1338 { 1339 struct multipath *m = (struct multipath *) ti->private; 1340 1341 queue_if_no_path(m, 0, 1); 1342 } 1343 1344 static void multipath_postsuspend(struct dm_target *ti) 1345 { 1346 struct multipath *m = ti->private; 1347 1348 mutex_lock(&m->work_mutex); 1349 flush_multipath_work(m); 1350 mutex_unlock(&m->work_mutex); 1351 } 1352 1353 /* 1354 * Restore the queue_if_no_path setting. 1355 */ 1356 static void multipath_resume(struct dm_target *ti) 1357 { 1358 struct multipath *m = (struct multipath *) ti->private; 1359 unsigned long flags; 1360 1361 spin_lock_irqsave(&m->lock, flags); 1362 m->queue_if_no_path = m->saved_queue_if_no_path; 1363 spin_unlock_irqrestore(&m->lock, flags); 1364 } 1365 1366 /* 1367 * Info output has the following format: 1368 * num_multipath_feature_args [multipath_feature_args]* 1369 * num_handler_status_args [handler_status_args]* 1370 * num_groups init_group_number 1371 * [A|D|E num_ps_status_args [ps_status_args]* 1372 * num_paths num_selector_args 1373 * [path_dev A|F fail_count [selector_args]* ]+ ]+ 1374 * 1375 * Table output has the following format (identical to the constructor string): 1376 * num_feature_args [features_args]* 1377 * num_handler_args hw_handler [hw_handler_args]* 1378 * num_groups init_group_number 1379 * [priority selector-name num_ps_args [ps_args]* 1380 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ 1381 */ 1382 static void multipath_status(struct dm_target *ti, status_type_t type, 1383 unsigned status_flags, char *result, unsigned maxlen) 1384 { 1385 int sz = 0; 1386 unsigned long flags; 1387 struct multipath *m = (struct multipath *) ti->private; 1388 struct priority_group *pg; 1389 struct pgpath *p; 1390 unsigned pg_num; 1391 char state; 1392 1393 spin_lock_irqsave(&m->lock, flags); 1394 1395 /* Features */ 1396 if (type == STATUSTYPE_INFO) 1397 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); 1398 else { 1399 DMEMIT("%u ", m->queue_if_no_path + 1400 (m->pg_init_retries > 0) * 2 + 1401 (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + 1402 m->retain_attached_hw_handler); 1403 if (m->queue_if_no_path) 1404 DMEMIT("queue_if_no_path "); 1405 if (m->pg_init_retries) 1406 DMEMIT("pg_init_retries %u ", m->pg_init_retries); 1407 if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) 1408 DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); 1409 if (m->retain_attached_hw_handler) 1410 DMEMIT("retain_attached_hw_handler "); 1411 } 1412 1413 if (!m->hw_handler_name || type == STATUSTYPE_INFO) 1414 DMEMIT("0 "); 1415 else 1416 DMEMIT("1 %s ", m->hw_handler_name); 1417 1418 DMEMIT("%u ", m->nr_priority_groups); 1419 1420 if (m->next_pg) 1421 pg_num = m->next_pg->pg_num; 1422 else if (m->current_pg) 1423 pg_num = m->current_pg->pg_num; 1424 else 1425 pg_num = (m->nr_priority_groups ? 1 : 0); 1426 1427 DMEMIT("%u ", pg_num); 1428 1429 switch (type) { 1430 case STATUSTYPE_INFO: 1431 list_for_each_entry(pg, &m->priority_groups, list) { 1432 if (pg->bypassed) 1433 state = 'D'; /* Disabled */ 1434 else if (pg == m->current_pg) 1435 state = 'A'; /* Currently Active */ 1436 else 1437 state = 'E'; /* Enabled */ 1438 1439 DMEMIT("%c ", state); 1440 1441 if (pg->ps.type->status) 1442 sz += pg->ps.type->status(&pg->ps, NULL, type, 1443 result + sz, 1444 maxlen - sz); 1445 else 1446 DMEMIT("0 "); 1447 1448 DMEMIT("%u %u ", pg->nr_pgpaths, 1449 pg->ps.type->info_args); 1450 1451 list_for_each_entry(p, &pg->pgpaths, list) { 1452 DMEMIT("%s %s %u ", p->path.dev->name, 1453 p->is_active ? "A" : "F", 1454 p->fail_count); 1455 if (pg->ps.type->status) 1456 sz += pg->ps.type->status(&pg->ps, 1457 &p->path, type, result + sz, 1458 maxlen - sz); 1459 } 1460 } 1461 break; 1462 1463 case STATUSTYPE_TABLE: 1464 list_for_each_entry(pg, &m->priority_groups, list) { 1465 DMEMIT("%s ", pg->ps.type->name); 1466 1467 if (pg->ps.type->status) 1468 sz += pg->ps.type->status(&pg->ps, NULL, type, 1469 result + sz, 1470 maxlen - sz); 1471 else 1472 DMEMIT("0 "); 1473 1474 DMEMIT("%u %u ", pg->nr_pgpaths, 1475 pg->ps.type->table_args); 1476 1477 list_for_each_entry(p, &pg->pgpaths, list) { 1478 DMEMIT("%s ", p->path.dev->name); 1479 if (pg->ps.type->status) 1480 sz += pg->ps.type->status(&pg->ps, 1481 &p->path, type, result + sz, 1482 maxlen - sz); 1483 } 1484 } 1485 break; 1486 } 1487 1488 spin_unlock_irqrestore(&m->lock, flags); 1489 } 1490 1491 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) 1492 { 1493 int r = -EINVAL; 1494 struct dm_dev *dev; 1495 struct multipath *m = (struct multipath *) ti->private; 1496 action_fn action; 1497 1498 mutex_lock(&m->work_mutex); 1499 1500 if (dm_suspended(ti)) { 1501 r = -EBUSY; 1502 goto out; 1503 } 1504 1505 if (argc == 1) { 1506 if (!strcasecmp(argv[0], "queue_if_no_path")) { 1507 r = queue_if_no_path(m, 1, 0); 1508 goto out; 1509 } else if (!strcasecmp(argv[0], "fail_if_no_path")) { 1510 r = queue_if_no_path(m, 0, 0); 1511 goto out; 1512 } 1513 } 1514 1515 if (argc != 2) { 1516 DMWARN("Unrecognised multipath message received."); 1517 goto out; 1518 } 1519 1520 if (!strcasecmp(argv[0], "disable_group")) { 1521 r = bypass_pg_num(m, argv[1], 1); 1522 goto out; 1523 } else if (!strcasecmp(argv[0], "enable_group")) { 1524 r = bypass_pg_num(m, argv[1], 0); 1525 goto out; 1526 } else if (!strcasecmp(argv[0], "switch_group")) { 1527 r = switch_pg_num(m, argv[1]); 1528 goto out; 1529 } else if (!strcasecmp(argv[0], "reinstate_path")) 1530 action = reinstate_path; 1531 else if (!strcasecmp(argv[0], "fail_path")) 1532 action = fail_path; 1533 else { 1534 DMWARN("Unrecognised multipath message received."); 1535 goto out; 1536 } 1537 1538 r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); 1539 if (r) { 1540 DMWARN("message: error getting device %s", 1541 argv[1]); 1542 goto out; 1543 } 1544 1545 r = action_dev(m, dev, action); 1546 1547 dm_put_device(ti, dev); 1548 1549 out: 1550 mutex_unlock(&m->work_mutex); 1551 return r; 1552 } 1553 1554 static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, 1555 unsigned long arg) 1556 { 1557 struct multipath *m = ti->private; 1558 struct pgpath *pgpath; 1559 struct block_device *bdev; 1560 fmode_t mode; 1561 unsigned long flags; 1562 int r; 1563 1564 bdev = NULL; 1565 mode = 0; 1566 r = 0; 1567 1568 spin_lock_irqsave(&m->lock, flags); 1569 1570 if (!m->current_pgpath) 1571 __choose_pgpath(m, 0); 1572 1573 pgpath = m->current_pgpath; 1574 1575 if (pgpath) { 1576 bdev = pgpath->path.dev->bdev; 1577 mode = pgpath->path.dev->mode; 1578 } 1579 1580 if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) 1581 r = -ENOTCONN; 1582 else if (!bdev) 1583 r = -EIO; 1584 1585 spin_unlock_irqrestore(&m->lock, flags); 1586 1587 /* 1588 * Only pass ioctls through if the device sizes match exactly. 1589 */ 1590 if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) 1591 r = scsi_verify_blk_ioctl(NULL, cmd); 1592 1593 if (r == -ENOTCONN && !fatal_signal_pending(current)) 1594 queue_work(kmultipathd, &m->process_queued_ios); 1595 1596 return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); 1597 } 1598 1599 static int multipath_iterate_devices(struct dm_target *ti, 1600 iterate_devices_callout_fn fn, void *data) 1601 { 1602 struct multipath *m = ti->private; 1603 struct priority_group *pg; 1604 struct pgpath *p; 1605 int ret = 0; 1606 1607 list_for_each_entry(pg, &m->priority_groups, list) { 1608 list_for_each_entry(p, &pg->pgpaths, list) { 1609 ret = fn(ti, p->path.dev, ti->begin, ti->len, data); 1610 if (ret) 1611 goto out; 1612 } 1613 } 1614 1615 out: 1616 return ret; 1617 } 1618 1619 static int __pgpath_busy(struct pgpath *pgpath) 1620 { 1621 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); 1622 1623 return dm_underlying_device_busy(q); 1624 } 1625 1626 /* 1627 * We return "busy", only when we can map I/Os but underlying devices 1628 * are busy (so even if we map I/Os now, the I/Os will wait on 1629 * the underlying queue). 1630 * In other words, if we want to kill I/Os or queue them inside us 1631 * due to map unavailability, we don't return "busy". Otherwise, 1632 * dm core won't give us the I/Os and we can't do what we want. 1633 */ 1634 static int multipath_busy(struct dm_target *ti) 1635 { 1636 int busy = 0, has_active = 0; 1637 struct multipath *m = ti->private; 1638 struct priority_group *pg; 1639 struct pgpath *pgpath; 1640 unsigned long flags; 1641 1642 spin_lock_irqsave(&m->lock, flags); 1643 1644 /* Guess which priority_group will be used at next mapping time */ 1645 if (unlikely(!m->current_pgpath && m->next_pg)) 1646 pg = m->next_pg; 1647 else if (likely(m->current_pg)) 1648 pg = m->current_pg; 1649 else 1650 /* 1651 * We don't know which pg will be used at next mapping time. 1652 * We don't call __choose_pgpath() here to avoid to trigger 1653 * pg_init just by busy checking. 1654 * So we don't know whether underlying devices we will be using 1655 * at next mapping time are busy or not. Just try mapping. 1656 */ 1657 goto out; 1658 1659 /* 1660 * If there is one non-busy active path at least, the path selector 1661 * will be able to select it. So we consider such a pg as not busy. 1662 */ 1663 busy = 1; 1664 list_for_each_entry(pgpath, &pg->pgpaths, list) 1665 if (pgpath->is_active) { 1666 has_active = 1; 1667 1668 if (!__pgpath_busy(pgpath)) { 1669 busy = 0; 1670 break; 1671 } 1672 } 1673 1674 if (!has_active) 1675 /* 1676 * No active path in this pg, so this pg won't be used and 1677 * the current_pg will be changed at next mapping time. 1678 * We need to try mapping to determine it. 1679 */ 1680 busy = 0; 1681 1682 out: 1683 spin_unlock_irqrestore(&m->lock, flags); 1684 1685 return busy; 1686 } 1687 1688 /*----------------------------------------------------------------- 1689 * Module setup 1690 *---------------------------------------------------------------*/ 1691 static struct target_type multipath_target = { 1692 .name = "multipath", 1693 .version = {1, 5, 1}, 1694 .module = THIS_MODULE, 1695 .ctr = multipath_ctr, 1696 .dtr = multipath_dtr, 1697 .map_rq = multipath_map, 1698 .rq_end_io = multipath_end_io, 1699 .presuspend = multipath_presuspend, 1700 .postsuspend = multipath_postsuspend, 1701 .resume = multipath_resume, 1702 .status = multipath_status, 1703 .message = multipath_message, 1704 .ioctl = multipath_ioctl, 1705 .iterate_devices = multipath_iterate_devices, 1706 .busy = multipath_busy, 1707 }; 1708 1709 static int __init dm_multipath_init(void) 1710 { 1711 int r; 1712 1713 /* allocate a slab for the dm_ios */ 1714 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0); 1715 if (!_mpio_cache) 1716 return -ENOMEM; 1717 1718 r = dm_register_target(&multipath_target); 1719 if (r < 0) { 1720 DMERR("register failed %d", r); 1721 kmem_cache_destroy(_mpio_cache); 1722 return -EINVAL; 1723 } 1724 1725 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); 1726 if (!kmultipathd) { 1727 DMERR("failed to create workqueue kmpathd"); 1728 dm_unregister_target(&multipath_target); 1729 kmem_cache_destroy(_mpio_cache); 1730 return -ENOMEM; 1731 } 1732 1733 /* 1734 * A separate workqueue is used to handle the device handlers 1735 * to avoid overloading existing workqueue. Overloading the 1736 * old workqueue would also create a bottleneck in the 1737 * path of the storage hardware device activation. 1738 */ 1739 kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd", 1740 WQ_MEM_RECLAIM); 1741 if (!kmpath_handlerd) { 1742 DMERR("failed to create workqueue kmpath_handlerd"); 1743 destroy_workqueue(kmultipathd); 1744 dm_unregister_target(&multipath_target); 1745 kmem_cache_destroy(_mpio_cache); 1746 return -ENOMEM; 1747 } 1748 1749 DMINFO("version %u.%u.%u loaded", 1750 multipath_target.version[0], multipath_target.version[1], 1751 multipath_target.version[2]); 1752 1753 return r; 1754 } 1755 1756 static void __exit dm_multipath_exit(void) 1757 { 1758 destroy_workqueue(kmpath_handlerd); 1759 destroy_workqueue(kmultipathd); 1760 1761 dm_unregister_target(&multipath_target); 1762 kmem_cache_destroy(_mpio_cache); 1763 } 1764 1765 module_init(dm_multipath_init); 1766 module_exit(dm_multipath_exit); 1767 1768 MODULE_DESCRIPTION(DM_NAME " multipath target"); 1769 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); 1770 MODULE_LICENSE("GPL"); 1771