1 /* 2 * Copyright (C) 2003 Sistina Software Limited. 3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include <linux/device-mapper.h> 9 10 #include "dm-path-selector.h" 11 #include "dm-uevent.h" 12 13 #include <linux/ctype.h> 14 #include <linux/init.h> 15 #include <linux/mempool.h> 16 #include <linux/module.h> 17 #include <linux/pagemap.h> 18 #include <linux/slab.h> 19 #include <linux/time.h> 20 #include <linux/workqueue.h> 21 #include <scsi/scsi_dh.h> 22 #include <asm/atomic.h> 23 24 #define DM_MSG_PREFIX "multipath" 25 #define MESG_STR(x) x, sizeof(x) 26 27 /* Path properties */ 28 struct pgpath { 29 struct list_head list; 30 31 struct priority_group *pg; /* Owning PG */ 32 unsigned is_active; /* Path status */ 33 unsigned fail_count; /* Cumulative failure count */ 34 35 struct dm_path path; 36 struct work_struct deactivate_path; 37 struct work_struct activate_path; 38 }; 39 40 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 41 42 /* 43 * Paths are grouped into Priority Groups and numbered from 1 upwards. 44 * Each has a path selector which controls which path gets used. 45 */ 46 struct priority_group { 47 struct list_head list; 48 49 struct multipath *m; /* Owning multipath instance */ 50 struct path_selector ps; 51 52 unsigned pg_num; /* Reference number */ 53 unsigned bypassed; /* Temporarily bypass this PG? */ 54 55 unsigned nr_pgpaths; /* Number of paths in PG */ 56 struct list_head pgpaths; 57 }; 58 59 /* Multipath context */ 60 struct multipath { 61 struct list_head list; 62 struct dm_target *ti; 63 64 spinlock_t lock; 65 66 const char *hw_handler_name; 67 char *hw_handler_params; 68 unsigned nr_priority_groups; 69 struct list_head priority_groups; 70 unsigned pg_init_required; /* pg_init needs calling? */ 71 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ 72 73 unsigned nr_valid_paths; /* Total number of usable paths */ 74 struct pgpath *current_pgpath; 75 struct priority_group *current_pg; 76 struct priority_group *next_pg; /* Switch to this PG if set */ 77 unsigned repeat_count; /* I/Os left before calling PS again */ 78 79 unsigned queue_io; /* Must we queue all I/O? */ 80 unsigned queue_if_no_path; /* Queue I/O if last path fails? */ 81 unsigned saved_queue_if_no_path;/* Saved state during suspension */ 82 unsigned pg_init_retries; /* Number of times to retry pg_init */ 83 unsigned pg_init_count; /* Number of times pg_init called */ 84 85 struct work_struct process_queued_ios; 86 struct list_head queued_ios; 87 unsigned queue_size; 88 89 struct work_struct trigger_event; 90 91 /* 92 * We must use a mempool of dm_mpath_io structs so that we 93 * can resubmit bios on error. 94 */ 95 mempool_t *mpio_pool; 96 97 struct mutex work_mutex; 98 99 unsigned suspended; /* Don't create new I/O internally when set. */ 100 }; 101 102 /* 103 * Context information attached to each bio we process. 104 */ 105 struct dm_mpath_io { 106 struct pgpath *pgpath; 107 size_t nr_bytes; 108 }; 109 110 typedef int (*action_fn) (struct pgpath *pgpath); 111 112 #define MIN_IOS 256 /* Mempool size */ 113 114 static struct kmem_cache *_mpio_cache; 115 116 static struct workqueue_struct *kmultipathd, *kmpath_handlerd; 117 static void process_queued_ios(struct work_struct *work); 118 static void trigger_event(struct work_struct *work); 119 static void activate_path(struct work_struct *work); 120 static void deactivate_path(struct work_struct *work); 121 122 123 /*----------------------------------------------- 124 * Allocation routines 125 *-----------------------------------------------*/ 126 127 static struct pgpath *alloc_pgpath(void) 128 { 129 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); 130 131 if (pgpath) { 132 pgpath->is_active = 1; 133 INIT_WORK(&pgpath->deactivate_path, deactivate_path); 134 INIT_WORK(&pgpath->activate_path, activate_path); 135 } 136 137 return pgpath; 138 } 139 140 static void free_pgpath(struct pgpath *pgpath) 141 { 142 kfree(pgpath); 143 } 144 145 static void deactivate_path(struct work_struct *work) 146 { 147 struct pgpath *pgpath = 148 container_of(work, struct pgpath, deactivate_path); 149 150 blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue); 151 } 152 153 static struct priority_group *alloc_priority_group(void) 154 { 155 struct priority_group *pg; 156 157 pg = kzalloc(sizeof(*pg), GFP_KERNEL); 158 159 if (pg) 160 INIT_LIST_HEAD(&pg->pgpaths); 161 162 return pg; 163 } 164 165 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 166 { 167 struct pgpath *pgpath, *tmp; 168 struct multipath *m = ti->private; 169 170 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { 171 list_del(&pgpath->list); 172 if (m->hw_handler_name) 173 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); 174 dm_put_device(ti, pgpath->path.dev); 175 free_pgpath(pgpath); 176 } 177 } 178 179 static void free_priority_group(struct priority_group *pg, 180 struct dm_target *ti) 181 { 182 struct path_selector *ps = &pg->ps; 183 184 if (ps->type) { 185 ps->type->destroy(ps); 186 dm_put_path_selector(ps->type); 187 } 188 189 free_pgpaths(&pg->pgpaths, ti); 190 kfree(pg); 191 } 192 193 static struct multipath *alloc_multipath(struct dm_target *ti) 194 { 195 struct multipath *m; 196 197 m = kzalloc(sizeof(*m), GFP_KERNEL); 198 if (m) { 199 INIT_LIST_HEAD(&m->priority_groups); 200 INIT_LIST_HEAD(&m->queued_ios); 201 spin_lock_init(&m->lock); 202 m->queue_io = 1; 203 INIT_WORK(&m->process_queued_ios, process_queued_ios); 204 INIT_WORK(&m->trigger_event, trigger_event); 205 mutex_init(&m->work_mutex); 206 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 207 if (!m->mpio_pool) { 208 kfree(m); 209 return NULL; 210 } 211 m->ti = ti; 212 ti->private = m; 213 } 214 215 return m; 216 } 217 218 static void free_multipath(struct multipath *m) 219 { 220 struct priority_group *pg, *tmp; 221 222 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { 223 list_del(&pg->list); 224 free_priority_group(pg, m->ti); 225 } 226 227 kfree(m->hw_handler_name); 228 kfree(m->hw_handler_params); 229 mempool_destroy(m->mpio_pool); 230 kfree(m); 231 } 232 233 234 /*----------------------------------------------- 235 * Path selection 236 *-----------------------------------------------*/ 237 238 static void __switch_pg(struct multipath *m, struct pgpath *pgpath) 239 { 240 m->current_pg = pgpath->pg; 241 242 /* Must we initialise the PG first, and queue I/O till it's ready? */ 243 if (m->hw_handler_name) { 244 m->pg_init_required = 1; 245 m->queue_io = 1; 246 } else { 247 m->pg_init_required = 0; 248 m->queue_io = 0; 249 } 250 251 m->pg_init_count = 0; 252 } 253 254 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg, 255 size_t nr_bytes) 256 { 257 struct dm_path *path; 258 259 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes); 260 if (!path) 261 return -ENXIO; 262 263 m->current_pgpath = path_to_pgpath(path); 264 265 if (m->current_pg != pg) 266 __switch_pg(m, m->current_pgpath); 267 268 return 0; 269 } 270 271 static void __choose_pgpath(struct multipath *m, size_t nr_bytes) 272 { 273 struct priority_group *pg; 274 unsigned bypassed = 1; 275 276 if (!m->nr_valid_paths) 277 goto failed; 278 279 /* Were we instructed to switch PG? */ 280 if (m->next_pg) { 281 pg = m->next_pg; 282 m->next_pg = NULL; 283 if (!__choose_path_in_pg(m, pg, nr_bytes)) 284 return; 285 } 286 287 /* Don't change PG until it has no remaining paths */ 288 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes)) 289 return; 290 291 /* 292 * Loop through priority groups until we find a valid path. 293 * First time we skip PGs marked 'bypassed'. 294 * Second time we only try the ones we skipped. 295 */ 296 do { 297 list_for_each_entry(pg, &m->priority_groups, list) { 298 if (pg->bypassed == bypassed) 299 continue; 300 if (!__choose_path_in_pg(m, pg, nr_bytes)) 301 return; 302 } 303 } while (bypassed--); 304 305 failed: 306 m->current_pgpath = NULL; 307 m->current_pg = NULL; 308 } 309 310 /* 311 * Check whether bios must be queued in the device-mapper core rather 312 * than here in the target. 313 * 314 * m->lock must be held on entry. 315 * 316 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the 317 * same value then we are not between multipath_presuspend() 318 * and multipath_resume() calls and we have no need to check 319 * for the DMF_NOFLUSH_SUSPENDING flag. 320 */ 321 static int __must_push_back(struct multipath *m) 322 { 323 return (m->queue_if_no_path != m->saved_queue_if_no_path && 324 dm_noflush_suspending(m->ti)); 325 } 326 327 static int map_io(struct multipath *m, struct request *clone, 328 struct dm_mpath_io *mpio, unsigned was_queued) 329 { 330 int r = DM_MAPIO_REMAPPED; 331 size_t nr_bytes = blk_rq_bytes(clone); 332 unsigned long flags; 333 struct pgpath *pgpath; 334 struct block_device *bdev; 335 336 spin_lock_irqsave(&m->lock, flags); 337 338 /* Do we need to select a new pgpath? */ 339 if (!m->current_pgpath || 340 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) 341 __choose_pgpath(m, nr_bytes); 342 343 pgpath = m->current_pgpath; 344 345 if (was_queued) 346 m->queue_size--; 347 348 if ((pgpath && m->queue_io) || 349 (!pgpath && m->queue_if_no_path)) { 350 /* Queue for the daemon to resubmit */ 351 list_add_tail(&clone->queuelist, &m->queued_ios); 352 m->queue_size++; 353 if ((m->pg_init_required && !m->pg_init_in_progress) || 354 !m->queue_io) 355 queue_work(kmultipathd, &m->process_queued_ios); 356 pgpath = NULL; 357 r = DM_MAPIO_SUBMITTED; 358 } else if (pgpath) { 359 bdev = pgpath->path.dev->bdev; 360 clone->q = bdev_get_queue(bdev); 361 clone->rq_disk = bdev->bd_disk; 362 } else if (__must_push_back(m)) 363 r = DM_MAPIO_REQUEUE; 364 else 365 r = -EIO; /* Failed */ 366 367 mpio->pgpath = pgpath; 368 mpio->nr_bytes = nr_bytes; 369 370 if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io) 371 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path, 372 nr_bytes); 373 374 spin_unlock_irqrestore(&m->lock, flags); 375 376 return r; 377 } 378 379 /* 380 * If we run out of usable paths, should we queue I/O or error it? 381 */ 382 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, 383 unsigned save_old_value) 384 { 385 unsigned long flags; 386 387 spin_lock_irqsave(&m->lock, flags); 388 389 if (save_old_value) 390 m->saved_queue_if_no_path = m->queue_if_no_path; 391 else 392 m->saved_queue_if_no_path = queue_if_no_path; 393 m->queue_if_no_path = queue_if_no_path; 394 if (!m->queue_if_no_path && m->queue_size) 395 queue_work(kmultipathd, &m->process_queued_ios); 396 397 spin_unlock_irqrestore(&m->lock, flags); 398 399 return 0; 400 } 401 402 /*----------------------------------------------------------------- 403 * The multipath daemon is responsible for resubmitting queued ios. 404 *---------------------------------------------------------------*/ 405 406 static void dispatch_queued_ios(struct multipath *m) 407 { 408 int r; 409 unsigned long flags; 410 struct dm_mpath_io *mpio; 411 union map_info *info; 412 struct request *clone, *n; 413 LIST_HEAD(cl); 414 415 spin_lock_irqsave(&m->lock, flags); 416 list_splice_init(&m->queued_ios, &cl); 417 spin_unlock_irqrestore(&m->lock, flags); 418 419 list_for_each_entry_safe(clone, n, &cl, queuelist) { 420 list_del_init(&clone->queuelist); 421 422 info = dm_get_rq_mapinfo(clone); 423 mpio = info->ptr; 424 425 r = map_io(m, clone, mpio, 1); 426 if (r < 0) { 427 mempool_free(mpio, m->mpio_pool); 428 dm_kill_unmapped_request(clone, r); 429 } else if (r == DM_MAPIO_REMAPPED) 430 dm_dispatch_request(clone); 431 else if (r == DM_MAPIO_REQUEUE) { 432 mempool_free(mpio, m->mpio_pool); 433 dm_requeue_unmapped_request(clone); 434 } 435 } 436 } 437 438 static void process_queued_ios(struct work_struct *work) 439 { 440 struct multipath *m = 441 container_of(work, struct multipath, process_queued_ios); 442 struct pgpath *pgpath = NULL, *tmp; 443 unsigned must_queue = 1; 444 unsigned long flags; 445 446 spin_lock_irqsave(&m->lock, flags); 447 448 if (!m->queue_size) 449 goto out; 450 451 if (!m->current_pgpath) 452 __choose_pgpath(m, 0); 453 454 pgpath = m->current_pgpath; 455 456 if ((pgpath && !m->queue_io) || 457 (!pgpath && !m->queue_if_no_path)) 458 must_queue = 0; 459 460 if (m->pg_init_required && !m->pg_init_in_progress && pgpath) { 461 m->pg_init_count++; 462 m->pg_init_required = 0; 463 list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) { 464 if (queue_work(kmpath_handlerd, &tmp->activate_path)) 465 m->pg_init_in_progress++; 466 } 467 } 468 out: 469 spin_unlock_irqrestore(&m->lock, flags); 470 if (!must_queue) 471 dispatch_queued_ios(m); 472 } 473 474 /* 475 * An event is triggered whenever a path is taken out of use. 476 * Includes path failure and PG bypass. 477 */ 478 static void trigger_event(struct work_struct *work) 479 { 480 struct multipath *m = 481 container_of(work, struct multipath, trigger_event); 482 483 dm_table_event(m->ti->table); 484 } 485 486 /*----------------------------------------------------------------- 487 * Constructor/argument parsing: 488 * <#multipath feature args> [<arg>]* 489 * <#hw_handler args> [hw_handler [<arg>]*] 490 * <#priority groups> 491 * <initial priority group> 492 * [<selector> <#selector args> [<arg>]* 493 * <#paths> <#per-path selector args> 494 * [<path> [<arg>]* ]+ ]+ 495 *---------------------------------------------------------------*/ 496 struct param { 497 unsigned min; 498 unsigned max; 499 char *error; 500 }; 501 502 static int read_param(struct param *param, char *str, unsigned *v, char **error) 503 { 504 if (!str || 505 (sscanf(str, "%u", v) != 1) || 506 (*v < param->min) || 507 (*v > param->max)) { 508 *error = param->error; 509 return -EINVAL; 510 } 511 512 return 0; 513 } 514 515 struct arg_set { 516 unsigned argc; 517 char **argv; 518 }; 519 520 static char *shift(struct arg_set *as) 521 { 522 char *r; 523 524 if (as->argc) { 525 as->argc--; 526 r = *as->argv; 527 as->argv++; 528 return r; 529 } 530 531 return NULL; 532 } 533 534 static void consume(struct arg_set *as, unsigned n) 535 { 536 BUG_ON (as->argc < n); 537 as->argc -= n; 538 as->argv += n; 539 } 540 541 static int parse_path_selector(struct arg_set *as, struct priority_group *pg, 542 struct dm_target *ti) 543 { 544 int r; 545 struct path_selector_type *pst; 546 unsigned ps_argc; 547 548 static struct param _params[] = { 549 {0, 1024, "invalid number of path selector args"}, 550 }; 551 552 pst = dm_get_path_selector(shift(as)); 553 if (!pst) { 554 ti->error = "unknown path selector type"; 555 return -EINVAL; 556 } 557 558 r = read_param(_params, shift(as), &ps_argc, &ti->error); 559 if (r) { 560 dm_put_path_selector(pst); 561 return -EINVAL; 562 } 563 564 if (ps_argc > as->argc) { 565 dm_put_path_selector(pst); 566 ti->error = "not enough arguments for path selector"; 567 return -EINVAL; 568 } 569 570 r = pst->create(&pg->ps, ps_argc, as->argv); 571 if (r) { 572 dm_put_path_selector(pst); 573 ti->error = "path selector constructor failed"; 574 return r; 575 } 576 577 pg->ps.type = pst; 578 consume(as, ps_argc); 579 580 return 0; 581 } 582 583 static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, 584 struct dm_target *ti) 585 { 586 int r; 587 struct pgpath *p; 588 struct multipath *m = ti->private; 589 590 /* we need at least a path arg */ 591 if (as->argc < 1) { 592 ti->error = "no device given"; 593 return ERR_PTR(-EINVAL); 594 } 595 596 p = alloc_pgpath(); 597 if (!p) 598 return ERR_PTR(-ENOMEM); 599 600 r = dm_get_device(ti, shift(as), ti->begin, ti->len, 601 dm_table_get_mode(ti->table), &p->path.dev); 602 if (r) { 603 ti->error = "error getting device"; 604 goto bad; 605 } 606 607 if (m->hw_handler_name) { 608 struct request_queue *q = bdev_get_queue(p->path.dev->bdev); 609 610 r = scsi_dh_attach(q, m->hw_handler_name); 611 if (r == -EBUSY) { 612 /* 613 * Already attached to different hw_handler, 614 * try to reattach with correct one. 615 */ 616 scsi_dh_detach(q); 617 r = scsi_dh_attach(q, m->hw_handler_name); 618 } 619 620 if (r < 0) { 621 ti->error = "error attaching hardware handler"; 622 dm_put_device(ti, p->path.dev); 623 goto bad; 624 } 625 626 if (m->hw_handler_params) { 627 r = scsi_dh_set_params(q, m->hw_handler_params); 628 if (r < 0) { 629 ti->error = "unable to set hardware " 630 "handler parameters"; 631 scsi_dh_detach(q); 632 dm_put_device(ti, p->path.dev); 633 goto bad; 634 } 635 } 636 } 637 638 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); 639 if (r) { 640 dm_put_device(ti, p->path.dev); 641 goto bad; 642 } 643 644 return p; 645 646 bad: 647 free_pgpath(p); 648 return ERR_PTR(r); 649 } 650 651 static struct priority_group *parse_priority_group(struct arg_set *as, 652 struct multipath *m) 653 { 654 static struct param _params[] = { 655 {1, 1024, "invalid number of paths"}, 656 {0, 1024, "invalid number of selector args"} 657 }; 658 659 int r; 660 unsigned i, nr_selector_args, nr_params; 661 struct priority_group *pg; 662 struct dm_target *ti = m->ti; 663 664 if (as->argc < 2) { 665 as->argc = 0; 666 ti->error = "not enough priority group arguments"; 667 return ERR_PTR(-EINVAL); 668 } 669 670 pg = alloc_priority_group(); 671 if (!pg) { 672 ti->error = "couldn't allocate priority group"; 673 return ERR_PTR(-ENOMEM); 674 } 675 pg->m = m; 676 677 r = parse_path_selector(as, pg, ti); 678 if (r) 679 goto bad; 680 681 /* 682 * read the paths 683 */ 684 r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error); 685 if (r) 686 goto bad; 687 688 r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error); 689 if (r) 690 goto bad; 691 692 nr_params = 1 + nr_selector_args; 693 for (i = 0; i < pg->nr_pgpaths; i++) { 694 struct pgpath *pgpath; 695 struct arg_set path_args; 696 697 if (as->argc < nr_params) { 698 ti->error = "not enough path parameters"; 699 goto bad; 700 } 701 702 path_args.argc = nr_params; 703 path_args.argv = as->argv; 704 705 pgpath = parse_path(&path_args, &pg->ps, ti); 706 if (IS_ERR(pgpath)) { 707 r = PTR_ERR(pgpath); 708 goto bad; 709 } 710 711 pgpath->pg = pg; 712 list_add_tail(&pgpath->list, &pg->pgpaths); 713 consume(as, nr_params); 714 } 715 716 return pg; 717 718 bad: 719 free_priority_group(pg, ti); 720 return ERR_PTR(r); 721 } 722 723 static int parse_hw_handler(struct arg_set *as, struct multipath *m) 724 { 725 unsigned hw_argc; 726 int ret; 727 struct dm_target *ti = m->ti; 728 729 static struct param _params[] = { 730 {0, 1024, "invalid number of hardware handler args"}, 731 }; 732 733 if (read_param(_params, shift(as), &hw_argc, &ti->error)) 734 return -EINVAL; 735 736 if (!hw_argc) 737 return 0; 738 739 if (hw_argc > as->argc) { 740 ti->error = "not enough arguments for hardware handler"; 741 return -EINVAL; 742 } 743 744 m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL); 745 request_module("scsi_dh_%s", m->hw_handler_name); 746 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { 747 ti->error = "unknown hardware handler type"; 748 ret = -EINVAL; 749 goto fail; 750 } 751 752 if (hw_argc > 1) { 753 char *p; 754 int i, j, len = 4; 755 756 for (i = 0; i <= hw_argc - 2; i++) 757 len += strlen(as->argv[i]) + 1; 758 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL); 759 if (!p) { 760 ti->error = "memory allocation failed"; 761 ret = -ENOMEM; 762 goto fail; 763 } 764 j = sprintf(p, "%d", hw_argc - 1); 765 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) 766 j = sprintf(p, "%s", as->argv[i]); 767 } 768 consume(as, hw_argc - 1); 769 770 return 0; 771 fail: 772 kfree(m->hw_handler_name); 773 m->hw_handler_name = NULL; 774 return ret; 775 } 776 777 static int parse_features(struct arg_set *as, struct multipath *m) 778 { 779 int r; 780 unsigned argc; 781 struct dm_target *ti = m->ti; 782 const char *param_name; 783 784 static struct param _params[] = { 785 {0, 3, "invalid number of feature args"}, 786 {1, 50, "pg_init_retries must be between 1 and 50"}, 787 }; 788 789 r = read_param(_params, shift(as), &argc, &ti->error); 790 if (r) 791 return -EINVAL; 792 793 if (!argc) 794 return 0; 795 796 do { 797 param_name = shift(as); 798 argc--; 799 800 if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) { 801 r = queue_if_no_path(m, 1, 0); 802 continue; 803 } 804 805 if (!strnicmp(param_name, MESG_STR("pg_init_retries")) && 806 (argc >= 1)) { 807 r = read_param(_params + 1, shift(as), 808 &m->pg_init_retries, &ti->error); 809 argc--; 810 continue; 811 } 812 813 ti->error = "Unrecognised multipath feature request"; 814 r = -EINVAL; 815 } while (argc && !r); 816 817 return r; 818 } 819 820 static int multipath_ctr(struct dm_target *ti, unsigned int argc, 821 char **argv) 822 { 823 /* target parameters */ 824 static struct param _params[] = { 825 {1, 1024, "invalid number of priority groups"}, 826 {1, 1024, "invalid initial priority group number"}, 827 }; 828 829 int r; 830 struct multipath *m; 831 struct arg_set as; 832 unsigned pg_count = 0; 833 unsigned next_pg_num; 834 835 as.argc = argc; 836 as.argv = argv; 837 838 m = alloc_multipath(ti); 839 if (!m) { 840 ti->error = "can't allocate multipath"; 841 return -EINVAL; 842 } 843 844 r = parse_features(&as, m); 845 if (r) 846 goto bad; 847 848 r = parse_hw_handler(&as, m); 849 if (r) 850 goto bad; 851 852 r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error); 853 if (r) 854 goto bad; 855 856 r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error); 857 if (r) 858 goto bad; 859 860 /* parse the priority groups */ 861 while (as.argc) { 862 struct priority_group *pg; 863 864 pg = parse_priority_group(&as, m); 865 if (IS_ERR(pg)) { 866 r = PTR_ERR(pg); 867 goto bad; 868 } 869 870 m->nr_valid_paths += pg->nr_pgpaths; 871 list_add_tail(&pg->list, &m->priority_groups); 872 pg_count++; 873 pg->pg_num = pg_count; 874 if (!--next_pg_num) 875 m->next_pg = pg; 876 } 877 878 if (pg_count != m->nr_priority_groups) { 879 ti->error = "priority group count mismatch"; 880 r = -EINVAL; 881 goto bad; 882 } 883 884 ti->num_flush_requests = 1; 885 886 return 0; 887 888 bad: 889 free_multipath(m); 890 return r; 891 } 892 893 static void flush_multipath_work(void) 894 { 895 flush_workqueue(kmpath_handlerd); 896 flush_workqueue(kmultipathd); 897 flush_scheduled_work(); 898 } 899 900 static void multipath_dtr(struct dm_target *ti) 901 { 902 struct multipath *m = ti->private; 903 904 flush_multipath_work(); 905 free_multipath(m); 906 } 907 908 /* 909 * Map cloned requests 910 */ 911 static int multipath_map(struct dm_target *ti, struct request *clone, 912 union map_info *map_context) 913 { 914 int r; 915 struct dm_mpath_io *mpio; 916 struct multipath *m = (struct multipath *) ti->private; 917 918 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); 919 if (!mpio) 920 /* ENOMEM, requeue */ 921 return DM_MAPIO_REQUEUE; 922 memset(mpio, 0, sizeof(*mpio)); 923 924 map_context->ptr = mpio; 925 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; 926 r = map_io(m, clone, mpio, 0); 927 if (r < 0 || r == DM_MAPIO_REQUEUE) 928 mempool_free(mpio, m->mpio_pool); 929 930 return r; 931 } 932 933 /* 934 * Take a path out of use. 935 */ 936 static int fail_path(struct pgpath *pgpath) 937 { 938 unsigned long flags; 939 struct multipath *m = pgpath->pg->m; 940 941 spin_lock_irqsave(&m->lock, flags); 942 943 if (!pgpath->is_active) 944 goto out; 945 946 DMWARN("Failing path %s.", pgpath->path.dev->name); 947 948 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); 949 pgpath->is_active = 0; 950 pgpath->fail_count++; 951 952 m->nr_valid_paths--; 953 954 if (pgpath == m->current_pgpath) 955 m->current_pgpath = NULL; 956 957 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, 958 pgpath->path.dev->name, m->nr_valid_paths); 959 960 schedule_work(&m->trigger_event); 961 queue_work(kmultipathd, &pgpath->deactivate_path); 962 963 out: 964 spin_unlock_irqrestore(&m->lock, flags); 965 966 return 0; 967 } 968 969 /* 970 * Reinstate a previously-failed path 971 */ 972 static int reinstate_path(struct pgpath *pgpath) 973 { 974 int r = 0; 975 unsigned long flags; 976 struct multipath *m = pgpath->pg->m; 977 978 spin_lock_irqsave(&m->lock, flags); 979 980 if (pgpath->is_active) 981 goto out; 982 983 if (!pgpath->pg->ps.type->reinstate_path) { 984 DMWARN("Reinstate path not supported by path selector %s", 985 pgpath->pg->ps.type->name); 986 r = -EINVAL; 987 goto out; 988 } 989 990 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); 991 if (r) 992 goto out; 993 994 pgpath->is_active = 1; 995 996 if (!m->nr_valid_paths++ && m->queue_size) { 997 m->current_pgpath = NULL; 998 queue_work(kmultipathd, &m->process_queued_ios); 999 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { 1000 if (queue_work(kmpath_handlerd, &pgpath->activate_path)) 1001 m->pg_init_in_progress++; 1002 } 1003 1004 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 1005 pgpath->path.dev->name, m->nr_valid_paths); 1006 1007 schedule_work(&m->trigger_event); 1008 1009 out: 1010 spin_unlock_irqrestore(&m->lock, flags); 1011 1012 return r; 1013 } 1014 1015 /* 1016 * Fail or reinstate all paths that match the provided struct dm_dev. 1017 */ 1018 static int action_dev(struct multipath *m, struct dm_dev *dev, 1019 action_fn action) 1020 { 1021 int r = 0; 1022 struct pgpath *pgpath; 1023 struct priority_group *pg; 1024 1025 list_for_each_entry(pg, &m->priority_groups, list) { 1026 list_for_each_entry(pgpath, &pg->pgpaths, list) { 1027 if (pgpath->path.dev == dev) 1028 r = action(pgpath); 1029 } 1030 } 1031 1032 return r; 1033 } 1034 1035 /* 1036 * Temporarily try to avoid having to use the specified PG 1037 */ 1038 static void bypass_pg(struct multipath *m, struct priority_group *pg, 1039 int bypassed) 1040 { 1041 unsigned long flags; 1042 1043 spin_lock_irqsave(&m->lock, flags); 1044 1045 pg->bypassed = bypassed; 1046 m->current_pgpath = NULL; 1047 m->current_pg = NULL; 1048 1049 spin_unlock_irqrestore(&m->lock, flags); 1050 1051 schedule_work(&m->trigger_event); 1052 } 1053 1054 /* 1055 * Switch to using the specified PG from the next I/O that gets mapped 1056 */ 1057 static int switch_pg_num(struct multipath *m, const char *pgstr) 1058 { 1059 struct priority_group *pg; 1060 unsigned pgnum; 1061 unsigned long flags; 1062 1063 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum || 1064 (pgnum > m->nr_priority_groups)) { 1065 DMWARN("invalid PG number supplied to switch_pg_num"); 1066 return -EINVAL; 1067 } 1068 1069 spin_lock_irqsave(&m->lock, flags); 1070 list_for_each_entry(pg, &m->priority_groups, list) { 1071 pg->bypassed = 0; 1072 if (--pgnum) 1073 continue; 1074 1075 m->current_pgpath = NULL; 1076 m->current_pg = NULL; 1077 m->next_pg = pg; 1078 } 1079 spin_unlock_irqrestore(&m->lock, flags); 1080 1081 schedule_work(&m->trigger_event); 1082 return 0; 1083 } 1084 1085 /* 1086 * Set/clear bypassed status of a PG. 1087 * PGs are numbered upwards from 1 in the order they were declared. 1088 */ 1089 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) 1090 { 1091 struct priority_group *pg; 1092 unsigned pgnum; 1093 1094 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum || 1095 (pgnum > m->nr_priority_groups)) { 1096 DMWARN("invalid PG number supplied to bypass_pg"); 1097 return -EINVAL; 1098 } 1099 1100 list_for_each_entry(pg, &m->priority_groups, list) { 1101 if (!--pgnum) 1102 break; 1103 } 1104 1105 bypass_pg(m, pg, bypassed); 1106 return 0; 1107 } 1108 1109 /* 1110 * Should we retry pg_init immediately? 1111 */ 1112 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) 1113 { 1114 unsigned long flags; 1115 int limit_reached = 0; 1116 1117 spin_lock_irqsave(&m->lock, flags); 1118 1119 if (m->pg_init_count <= m->pg_init_retries) 1120 m->pg_init_required = 1; 1121 else 1122 limit_reached = 1; 1123 1124 spin_unlock_irqrestore(&m->lock, flags); 1125 1126 return limit_reached; 1127 } 1128 1129 static void pg_init_done(void *data, int errors) 1130 { 1131 struct dm_path *path = data; 1132 struct pgpath *pgpath = path_to_pgpath(path); 1133 struct priority_group *pg = pgpath->pg; 1134 struct multipath *m = pg->m; 1135 unsigned long flags; 1136 1137 /* device or driver problems */ 1138 switch (errors) { 1139 case SCSI_DH_OK: 1140 break; 1141 case SCSI_DH_NOSYS: 1142 if (!m->hw_handler_name) { 1143 errors = 0; 1144 break; 1145 } 1146 DMERR("Cannot failover device because scsi_dh_%s was not " 1147 "loaded.", m->hw_handler_name); 1148 /* 1149 * Fail path for now, so we do not ping pong 1150 */ 1151 fail_path(pgpath); 1152 break; 1153 case SCSI_DH_DEV_TEMP_BUSY: 1154 /* 1155 * Probably doing something like FW upgrade on the 1156 * controller so try the other pg. 1157 */ 1158 bypass_pg(m, pg, 1); 1159 break; 1160 /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */ 1161 case SCSI_DH_RETRY: 1162 case SCSI_DH_IMM_RETRY: 1163 case SCSI_DH_RES_TEMP_UNAVAIL: 1164 if (pg_init_limit_reached(m, pgpath)) 1165 fail_path(pgpath); 1166 errors = 0; 1167 break; 1168 default: 1169 /* 1170 * We probably do not want to fail the path for a device 1171 * error, but this is what the old dm did. In future 1172 * patches we can do more advanced handling. 1173 */ 1174 fail_path(pgpath); 1175 } 1176 1177 spin_lock_irqsave(&m->lock, flags); 1178 if (errors) { 1179 if (pgpath == m->current_pgpath) { 1180 DMERR("Could not failover device. Error %d.", errors); 1181 m->current_pgpath = NULL; 1182 m->current_pg = NULL; 1183 } 1184 } else if (!m->pg_init_required) { 1185 m->queue_io = 0; 1186 pg->bypassed = 0; 1187 } 1188 1189 m->pg_init_in_progress--; 1190 if (!m->pg_init_in_progress) 1191 queue_work(kmultipathd, &m->process_queued_ios); 1192 spin_unlock_irqrestore(&m->lock, flags); 1193 } 1194 1195 static void activate_path(struct work_struct *work) 1196 { 1197 struct pgpath *pgpath = 1198 container_of(work, struct pgpath, activate_path); 1199 1200 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), 1201 pg_init_done, &pgpath->path); 1202 } 1203 1204 /* 1205 * end_io handling 1206 */ 1207 static int do_end_io(struct multipath *m, struct request *clone, 1208 int error, struct dm_mpath_io *mpio) 1209 { 1210 /* 1211 * We don't queue any clone request inside the multipath target 1212 * during end I/O handling, since those clone requests don't have 1213 * bio clones. If we queue them inside the multipath target, 1214 * we need to make bio clones, that requires memory allocation. 1215 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests 1216 * don't have bio clones.) 1217 * Instead of queueing the clone request here, we queue the original 1218 * request into dm core, which will remake a clone request and 1219 * clone bios for it and resubmit it later. 1220 */ 1221 int r = DM_ENDIO_REQUEUE; 1222 unsigned long flags; 1223 1224 if (!error && !clone->errors) 1225 return 0; /* I/O complete */ 1226 1227 if (error == -EOPNOTSUPP) 1228 return error; 1229 1230 if (mpio->pgpath) 1231 fail_path(mpio->pgpath); 1232 1233 spin_lock_irqsave(&m->lock, flags); 1234 if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m)) 1235 r = -EIO; 1236 spin_unlock_irqrestore(&m->lock, flags); 1237 1238 return r; 1239 } 1240 1241 static int multipath_end_io(struct dm_target *ti, struct request *clone, 1242 int error, union map_info *map_context) 1243 { 1244 struct multipath *m = ti->private; 1245 struct dm_mpath_io *mpio = map_context->ptr; 1246 struct pgpath *pgpath = mpio->pgpath; 1247 struct path_selector *ps; 1248 int r; 1249 1250 r = do_end_io(m, clone, error, mpio); 1251 if (pgpath) { 1252 ps = &pgpath->pg->ps; 1253 if (ps->type->end_io) 1254 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); 1255 } 1256 mempool_free(mpio, m->mpio_pool); 1257 1258 return r; 1259 } 1260 1261 /* 1262 * Suspend can't complete until all the I/O is processed so if 1263 * the last path fails we must error any remaining I/O. 1264 * Note that if the freeze_bdev fails while suspending, the 1265 * queue_if_no_path state is lost - userspace should reset it. 1266 */ 1267 static void multipath_presuspend(struct dm_target *ti) 1268 { 1269 struct multipath *m = (struct multipath *) ti->private; 1270 1271 queue_if_no_path(m, 0, 1); 1272 } 1273 1274 static void multipath_postsuspend(struct dm_target *ti) 1275 { 1276 struct multipath *m = ti->private; 1277 1278 mutex_lock(&m->work_mutex); 1279 m->suspended = 1; 1280 flush_multipath_work(); 1281 mutex_unlock(&m->work_mutex); 1282 } 1283 1284 /* 1285 * Restore the queue_if_no_path setting. 1286 */ 1287 static void multipath_resume(struct dm_target *ti) 1288 { 1289 struct multipath *m = (struct multipath *) ti->private; 1290 unsigned long flags; 1291 1292 mutex_lock(&m->work_mutex); 1293 m->suspended = 0; 1294 mutex_unlock(&m->work_mutex); 1295 1296 spin_lock_irqsave(&m->lock, flags); 1297 m->queue_if_no_path = m->saved_queue_if_no_path; 1298 spin_unlock_irqrestore(&m->lock, flags); 1299 } 1300 1301 /* 1302 * Info output has the following format: 1303 * num_multipath_feature_args [multipath_feature_args]* 1304 * num_handler_status_args [handler_status_args]* 1305 * num_groups init_group_number 1306 * [A|D|E num_ps_status_args [ps_status_args]* 1307 * num_paths num_selector_args 1308 * [path_dev A|F fail_count [selector_args]* ]+ ]+ 1309 * 1310 * Table output has the following format (identical to the constructor string): 1311 * num_feature_args [features_args]* 1312 * num_handler_args hw_handler [hw_handler_args]* 1313 * num_groups init_group_number 1314 * [priority selector-name num_ps_args [ps_args]* 1315 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ 1316 */ 1317 static int multipath_status(struct dm_target *ti, status_type_t type, 1318 char *result, unsigned int maxlen) 1319 { 1320 int sz = 0; 1321 unsigned long flags; 1322 struct multipath *m = (struct multipath *) ti->private; 1323 struct priority_group *pg; 1324 struct pgpath *p; 1325 unsigned pg_num; 1326 char state; 1327 1328 spin_lock_irqsave(&m->lock, flags); 1329 1330 /* Features */ 1331 if (type == STATUSTYPE_INFO) 1332 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); 1333 else { 1334 DMEMIT("%u ", m->queue_if_no_path + 1335 (m->pg_init_retries > 0) * 2); 1336 if (m->queue_if_no_path) 1337 DMEMIT("queue_if_no_path "); 1338 if (m->pg_init_retries) 1339 DMEMIT("pg_init_retries %u ", m->pg_init_retries); 1340 } 1341 1342 if (!m->hw_handler_name || type == STATUSTYPE_INFO) 1343 DMEMIT("0 "); 1344 else 1345 DMEMIT("1 %s ", m->hw_handler_name); 1346 1347 DMEMIT("%u ", m->nr_priority_groups); 1348 1349 if (m->next_pg) 1350 pg_num = m->next_pg->pg_num; 1351 else if (m->current_pg) 1352 pg_num = m->current_pg->pg_num; 1353 else 1354 pg_num = 1; 1355 1356 DMEMIT("%u ", pg_num); 1357 1358 switch (type) { 1359 case STATUSTYPE_INFO: 1360 list_for_each_entry(pg, &m->priority_groups, list) { 1361 if (pg->bypassed) 1362 state = 'D'; /* Disabled */ 1363 else if (pg == m->current_pg) 1364 state = 'A'; /* Currently Active */ 1365 else 1366 state = 'E'; /* Enabled */ 1367 1368 DMEMIT("%c ", state); 1369 1370 if (pg->ps.type->status) 1371 sz += pg->ps.type->status(&pg->ps, NULL, type, 1372 result + sz, 1373 maxlen - sz); 1374 else 1375 DMEMIT("0 "); 1376 1377 DMEMIT("%u %u ", pg->nr_pgpaths, 1378 pg->ps.type->info_args); 1379 1380 list_for_each_entry(p, &pg->pgpaths, list) { 1381 DMEMIT("%s %s %u ", p->path.dev->name, 1382 p->is_active ? "A" : "F", 1383 p->fail_count); 1384 if (pg->ps.type->status) 1385 sz += pg->ps.type->status(&pg->ps, 1386 &p->path, type, result + sz, 1387 maxlen - sz); 1388 } 1389 } 1390 break; 1391 1392 case STATUSTYPE_TABLE: 1393 list_for_each_entry(pg, &m->priority_groups, list) { 1394 DMEMIT("%s ", pg->ps.type->name); 1395 1396 if (pg->ps.type->status) 1397 sz += pg->ps.type->status(&pg->ps, NULL, type, 1398 result + sz, 1399 maxlen - sz); 1400 else 1401 DMEMIT("0 "); 1402 1403 DMEMIT("%u %u ", pg->nr_pgpaths, 1404 pg->ps.type->table_args); 1405 1406 list_for_each_entry(p, &pg->pgpaths, list) { 1407 DMEMIT("%s ", p->path.dev->name); 1408 if (pg->ps.type->status) 1409 sz += pg->ps.type->status(&pg->ps, 1410 &p->path, type, result + sz, 1411 maxlen - sz); 1412 } 1413 } 1414 break; 1415 } 1416 1417 spin_unlock_irqrestore(&m->lock, flags); 1418 1419 return 0; 1420 } 1421 1422 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) 1423 { 1424 int r = -EINVAL; 1425 struct dm_dev *dev; 1426 struct multipath *m = (struct multipath *) ti->private; 1427 action_fn action; 1428 1429 mutex_lock(&m->work_mutex); 1430 1431 if (m->suspended) { 1432 r = -EBUSY; 1433 goto out; 1434 } 1435 1436 if (dm_suspended(ti)) { 1437 r = -EBUSY; 1438 goto out; 1439 } 1440 1441 if (argc == 1) { 1442 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) { 1443 r = queue_if_no_path(m, 1, 0); 1444 goto out; 1445 } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) { 1446 r = queue_if_no_path(m, 0, 0); 1447 goto out; 1448 } 1449 } 1450 1451 if (argc != 2) { 1452 DMWARN("Unrecognised multipath message received."); 1453 goto out; 1454 } 1455 1456 if (!strnicmp(argv[0], MESG_STR("disable_group"))) { 1457 r = bypass_pg_num(m, argv[1], 1); 1458 goto out; 1459 } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) { 1460 r = bypass_pg_num(m, argv[1], 0); 1461 goto out; 1462 } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) { 1463 r = switch_pg_num(m, argv[1]); 1464 goto out; 1465 } else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) 1466 action = reinstate_path; 1467 else if (!strnicmp(argv[0], MESG_STR("fail_path"))) 1468 action = fail_path; 1469 else { 1470 DMWARN("Unrecognised multipath message received."); 1471 goto out; 1472 } 1473 1474 r = dm_get_device(ti, argv[1], ti->begin, ti->len, 1475 dm_table_get_mode(ti->table), &dev); 1476 if (r) { 1477 DMWARN("message: error getting device %s", 1478 argv[1]); 1479 goto out; 1480 } 1481 1482 r = action_dev(m, dev, action); 1483 1484 dm_put_device(ti, dev); 1485 1486 out: 1487 mutex_unlock(&m->work_mutex); 1488 return r; 1489 } 1490 1491 static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, 1492 unsigned long arg) 1493 { 1494 struct multipath *m = (struct multipath *) ti->private; 1495 struct block_device *bdev = NULL; 1496 fmode_t mode = 0; 1497 unsigned long flags; 1498 int r = 0; 1499 1500 spin_lock_irqsave(&m->lock, flags); 1501 1502 if (!m->current_pgpath) 1503 __choose_pgpath(m, 0); 1504 1505 if (m->current_pgpath) { 1506 bdev = m->current_pgpath->path.dev->bdev; 1507 mode = m->current_pgpath->path.dev->mode; 1508 } 1509 1510 if (m->queue_io) 1511 r = -EAGAIN; 1512 else if (!bdev) 1513 r = -EIO; 1514 1515 spin_unlock_irqrestore(&m->lock, flags); 1516 1517 return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); 1518 } 1519 1520 static int multipath_iterate_devices(struct dm_target *ti, 1521 iterate_devices_callout_fn fn, void *data) 1522 { 1523 struct multipath *m = ti->private; 1524 struct priority_group *pg; 1525 struct pgpath *p; 1526 int ret = 0; 1527 1528 list_for_each_entry(pg, &m->priority_groups, list) { 1529 list_for_each_entry(p, &pg->pgpaths, list) { 1530 ret = fn(ti, p->path.dev, ti->begin, ti->len, data); 1531 if (ret) 1532 goto out; 1533 } 1534 } 1535 1536 out: 1537 return ret; 1538 } 1539 1540 static int __pgpath_busy(struct pgpath *pgpath) 1541 { 1542 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); 1543 1544 return dm_underlying_device_busy(q); 1545 } 1546 1547 /* 1548 * We return "busy", only when we can map I/Os but underlying devices 1549 * are busy (so even if we map I/Os now, the I/Os will wait on 1550 * the underlying queue). 1551 * In other words, if we want to kill I/Os or queue them inside us 1552 * due to map unavailability, we don't return "busy". Otherwise, 1553 * dm core won't give us the I/Os and we can't do what we want. 1554 */ 1555 static int multipath_busy(struct dm_target *ti) 1556 { 1557 int busy = 0, has_active = 0; 1558 struct multipath *m = ti->private; 1559 struct priority_group *pg; 1560 struct pgpath *pgpath; 1561 unsigned long flags; 1562 1563 spin_lock_irqsave(&m->lock, flags); 1564 1565 /* Guess which priority_group will be used at next mapping time */ 1566 if (unlikely(!m->current_pgpath && m->next_pg)) 1567 pg = m->next_pg; 1568 else if (likely(m->current_pg)) 1569 pg = m->current_pg; 1570 else 1571 /* 1572 * We don't know which pg will be used at next mapping time. 1573 * We don't call __choose_pgpath() here to avoid to trigger 1574 * pg_init just by busy checking. 1575 * So we don't know whether underlying devices we will be using 1576 * at next mapping time are busy or not. Just try mapping. 1577 */ 1578 goto out; 1579 1580 /* 1581 * If there is one non-busy active path at least, the path selector 1582 * will be able to select it. So we consider such a pg as not busy. 1583 */ 1584 busy = 1; 1585 list_for_each_entry(pgpath, &pg->pgpaths, list) 1586 if (pgpath->is_active) { 1587 has_active = 1; 1588 1589 if (!__pgpath_busy(pgpath)) { 1590 busy = 0; 1591 break; 1592 } 1593 } 1594 1595 if (!has_active) 1596 /* 1597 * No active path in this pg, so this pg won't be used and 1598 * the current_pg will be changed at next mapping time. 1599 * We need to try mapping to determine it. 1600 */ 1601 busy = 0; 1602 1603 out: 1604 spin_unlock_irqrestore(&m->lock, flags); 1605 1606 return busy; 1607 } 1608 1609 /*----------------------------------------------------------------- 1610 * Module setup 1611 *---------------------------------------------------------------*/ 1612 static struct target_type multipath_target = { 1613 .name = "multipath", 1614 .version = {1, 1, 1}, 1615 .module = THIS_MODULE, 1616 .ctr = multipath_ctr, 1617 .dtr = multipath_dtr, 1618 .map_rq = multipath_map, 1619 .rq_end_io = multipath_end_io, 1620 .presuspend = multipath_presuspend, 1621 .postsuspend = multipath_postsuspend, 1622 .resume = multipath_resume, 1623 .status = multipath_status, 1624 .message = multipath_message, 1625 .ioctl = multipath_ioctl, 1626 .iterate_devices = multipath_iterate_devices, 1627 .busy = multipath_busy, 1628 }; 1629 1630 static int __init dm_multipath_init(void) 1631 { 1632 int r; 1633 1634 /* allocate a slab for the dm_ios */ 1635 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0); 1636 if (!_mpio_cache) 1637 return -ENOMEM; 1638 1639 r = dm_register_target(&multipath_target); 1640 if (r < 0) { 1641 DMERR("register failed %d", r); 1642 kmem_cache_destroy(_mpio_cache); 1643 return -EINVAL; 1644 } 1645 1646 kmultipathd = create_workqueue("kmpathd"); 1647 if (!kmultipathd) { 1648 DMERR("failed to create workqueue kmpathd"); 1649 dm_unregister_target(&multipath_target); 1650 kmem_cache_destroy(_mpio_cache); 1651 return -ENOMEM; 1652 } 1653 1654 /* 1655 * A separate workqueue is used to handle the device handlers 1656 * to avoid overloading existing workqueue. Overloading the 1657 * old workqueue would also create a bottleneck in the 1658 * path of the storage hardware device activation. 1659 */ 1660 kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd"); 1661 if (!kmpath_handlerd) { 1662 DMERR("failed to create workqueue kmpath_handlerd"); 1663 destroy_workqueue(kmultipathd); 1664 dm_unregister_target(&multipath_target); 1665 kmem_cache_destroy(_mpio_cache); 1666 return -ENOMEM; 1667 } 1668 1669 DMINFO("version %u.%u.%u loaded", 1670 multipath_target.version[0], multipath_target.version[1], 1671 multipath_target.version[2]); 1672 1673 return r; 1674 } 1675 1676 static void __exit dm_multipath_exit(void) 1677 { 1678 destroy_workqueue(kmpath_handlerd); 1679 destroy_workqueue(kmultipathd); 1680 1681 dm_unregister_target(&multipath_target); 1682 kmem_cache_destroy(_mpio_cache); 1683 } 1684 1685 module_init(dm_multipath_init); 1686 module_exit(dm_multipath_exit); 1687 1688 MODULE_DESCRIPTION(DM_NAME " multipath target"); 1689 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); 1690 MODULE_LICENSE("GPL"); 1691