1 /* 2 * Copyright (C) 2003 Sistina Software Limited. 3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include <linux/device-mapper.h> 9 10 #include "dm-path-selector.h" 11 #include "dm-uevent.h" 12 13 #include <linux/ctype.h> 14 #include <linux/init.h> 15 #include <linux/mempool.h> 16 #include <linux/module.h> 17 #include <linux/pagemap.h> 18 #include <linux/slab.h> 19 #include <linux/time.h> 20 #include <linux/workqueue.h> 21 #include <scsi/scsi_dh.h> 22 #include <asm/atomic.h> 23 24 #define DM_MSG_PREFIX "multipath" 25 #define MESG_STR(x) x, sizeof(x) 26 27 /* Path properties */ 28 struct pgpath { 29 struct list_head list; 30 31 struct priority_group *pg; /* Owning PG */ 32 unsigned is_active; /* Path status */ 33 unsigned fail_count; /* Cumulative failure count */ 34 35 struct dm_path path; 36 struct work_struct deactivate_path; 37 struct work_struct activate_path; 38 }; 39 40 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 41 42 /* 43 * Paths are grouped into Priority Groups and numbered from 1 upwards. 44 * Each has a path selector which controls which path gets used. 45 */ 46 struct priority_group { 47 struct list_head list; 48 49 struct multipath *m; /* Owning multipath instance */ 50 struct path_selector ps; 51 52 unsigned pg_num; /* Reference number */ 53 unsigned bypassed; /* Temporarily bypass this PG? */ 54 55 unsigned nr_pgpaths; /* Number of paths in PG */ 56 struct list_head pgpaths; 57 }; 58 59 /* Multipath context */ 60 struct multipath { 61 struct list_head list; 62 struct dm_target *ti; 63 64 spinlock_t lock; 65 66 const char *hw_handler_name; 67 char *hw_handler_params; 68 unsigned nr_priority_groups; 69 struct list_head priority_groups; 70 unsigned pg_init_required; /* pg_init needs calling? */ 71 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ 72 wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ 73 74 unsigned nr_valid_paths; /* Total number of usable paths */ 75 struct pgpath *current_pgpath; 76 struct priority_group *current_pg; 77 struct priority_group *next_pg; /* Switch to this PG if set */ 78 unsigned repeat_count; /* I/Os left before calling PS again */ 79 80 unsigned queue_io; /* Must we queue all I/O? */ 81 unsigned queue_if_no_path; /* Queue I/O if last path fails? */ 82 unsigned saved_queue_if_no_path;/* Saved state during suspension */ 83 unsigned pg_init_retries; /* Number of times to retry pg_init */ 84 unsigned pg_init_count; /* Number of times pg_init called */ 85 86 struct work_struct process_queued_ios; 87 struct list_head queued_ios; 88 unsigned queue_size; 89 90 struct work_struct trigger_event; 91 92 /* 93 * We must use a mempool of dm_mpath_io structs so that we 94 * can resubmit bios on error. 95 */ 96 mempool_t *mpio_pool; 97 98 struct mutex work_mutex; 99 }; 100 101 /* 102 * Context information attached to each bio we process. 103 */ 104 struct dm_mpath_io { 105 struct pgpath *pgpath; 106 size_t nr_bytes; 107 }; 108 109 typedef int (*action_fn) (struct pgpath *pgpath); 110 111 #define MIN_IOS 256 /* Mempool size */ 112 113 static struct kmem_cache *_mpio_cache; 114 115 static struct workqueue_struct *kmultipathd, *kmpath_handlerd; 116 static void process_queued_ios(struct work_struct *work); 117 static void trigger_event(struct work_struct *work); 118 static void activate_path(struct work_struct *work); 119 static void deactivate_path(struct work_struct *work); 120 121 122 /*----------------------------------------------- 123 * Allocation routines 124 *-----------------------------------------------*/ 125 126 static struct pgpath *alloc_pgpath(void) 127 { 128 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); 129 130 if (pgpath) { 131 pgpath->is_active = 1; 132 INIT_WORK(&pgpath->deactivate_path, deactivate_path); 133 INIT_WORK(&pgpath->activate_path, activate_path); 134 } 135 136 return pgpath; 137 } 138 139 static void free_pgpath(struct pgpath *pgpath) 140 { 141 kfree(pgpath); 142 } 143 144 static void deactivate_path(struct work_struct *work) 145 { 146 struct pgpath *pgpath = 147 container_of(work, struct pgpath, deactivate_path); 148 149 blk_abort_queue(pgpath->path.dev->bdev->bd_disk->queue); 150 } 151 152 static struct priority_group *alloc_priority_group(void) 153 { 154 struct priority_group *pg; 155 156 pg = kzalloc(sizeof(*pg), GFP_KERNEL); 157 158 if (pg) 159 INIT_LIST_HEAD(&pg->pgpaths); 160 161 return pg; 162 } 163 164 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 165 { 166 struct pgpath *pgpath, *tmp; 167 struct multipath *m = ti->private; 168 169 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { 170 list_del(&pgpath->list); 171 if (m->hw_handler_name) 172 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); 173 dm_put_device(ti, pgpath->path.dev); 174 free_pgpath(pgpath); 175 } 176 } 177 178 static void free_priority_group(struct priority_group *pg, 179 struct dm_target *ti) 180 { 181 struct path_selector *ps = &pg->ps; 182 183 if (ps->type) { 184 ps->type->destroy(ps); 185 dm_put_path_selector(ps->type); 186 } 187 188 free_pgpaths(&pg->pgpaths, ti); 189 kfree(pg); 190 } 191 192 static struct multipath *alloc_multipath(struct dm_target *ti) 193 { 194 struct multipath *m; 195 196 m = kzalloc(sizeof(*m), GFP_KERNEL); 197 if (m) { 198 INIT_LIST_HEAD(&m->priority_groups); 199 INIT_LIST_HEAD(&m->queued_ios); 200 spin_lock_init(&m->lock); 201 m->queue_io = 1; 202 INIT_WORK(&m->process_queued_ios, process_queued_ios); 203 INIT_WORK(&m->trigger_event, trigger_event); 204 init_waitqueue_head(&m->pg_init_wait); 205 mutex_init(&m->work_mutex); 206 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 207 if (!m->mpio_pool) { 208 kfree(m); 209 return NULL; 210 } 211 m->ti = ti; 212 ti->private = m; 213 } 214 215 return m; 216 } 217 218 static void free_multipath(struct multipath *m) 219 { 220 struct priority_group *pg, *tmp; 221 222 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { 223 list_del(&pg->list); 224 free_priority_group(pg, m->ti); 225 } 226 227 kfree(m->hw_handler_name); 228 kfree(m->hw_handler_params); 229 mempool_destroy(m->mpio_pool); 230 kfree(m); 231 } 232 233 234 /*----------------------------------------------- 235 * Path selection 236 *-----------------------------------------------*/ 237 238 static void __pg_init_all_paths(struct multipath *m) 239 { 240 struct pgpath *pgpath; 241 242 m->pg_init_count++; 243 m->pg_init_required = 0; 244 list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { 245 /* Skip failed paths */ 246 if (!pgpath->is_active) 247 continue; 248 if (queue_work(kmpath_handlerd, &pgpath->activate_path)) 249 m->pg_init_in_progress++; 250 } 251 } 252 253 static void __switch_pg(struct multipath *m, struct pgpath *pgpath) 254 { 255 m->current_pg = pgpath->pg; 256 257 /* Must we initialise the PG first, and queue I/O till it's ready? */ 258 if (m->hw_handler_name) { 259 m->pg_init_required = 1; 260 m->queue_io = 1; 261 } else { 262 m->pg_init_required = 0; 263 m->queue_io = 0; 264 } 265 266 m->pg_init_count = 0; 267 } 268 269 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg, 270 size_t nr_bytes) 271 { 272 struct dm_path *path; 273 274 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes); 275 if (!path) 276 return -ENXIO; 277 278 m->current_pgpath = path_to_pgpath(path); 279 280 if (m->current_pg != pg) 281 __switch_pg(m, m->current_pgpath); 282 283 return 0; 284 } 285 286 static void __choose_pgpath(struct multipath *m, size_t nr_bytes) 287 { 288 struct priority_group *pg; 289 unsigned bypassed = 1; 290 291 if (!m->nr_valid_paths) 292 goto failed; 293 294 /* Were we instructed to switch PG? */ 295 if (m->next_pg) { 296 pg = m->next_pg; 297 m->next_pg = NULL; 298 if (!__choose_path_in_pg(m, pg, nr_bytes)) 299 return; 300 } 301 302 /* Don't change PG until it has no remaining paths */ 303 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes)) 304 return; 305 306 /* 307 * Loop through priority groups until we find a valid path. 308 * First time we skip PGs marked 'bypassed'. 309 * Second time we only try the ones we skipped. 310 */ 311 do { 312 list_for_each_entry(pg, &m->priority_groups, list) { 313 if (pg->bypassed == bypassed) 314 continue; 315 if (!__choose_path_in_pg(m, pg, nr_bytes)) 316 return; 317 } 318 } while (bypassed--); 319 320 failed: 321 m->current_pgpath = NULL; 322 m->current_pg = NULL; 323 } 324 325 /* 326 * Check whether bios must be queued in the device-mapper core rather 327 * than here in the target. 328 * 329 * m->lock must be held on entry. 330 * 331 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the 332 * same value then we are not between multipath_presuspend() 333 * and multipath_resume() calls and we have no need to check 334 * for the DMF_NOFLUSH_SUSPENDING flag. 335 */ 336 static int __must_push_back(struct multipath *m) 337 { 338 return (m->queue_if_no_path != m->saved_queue_if_no_path && 339 dm_noflush_suspending(m->ti)); 340 } 341 342 static int map_io(struct multipath *m, struct request *clone, 343 struct dm_mpath_io *mpio, unsigned was_queued) 344 { 345 int r = DM_MAPIO_REMAPPED; 346 size_t nr_bytes = blk_rq_bytes(clone); 347 unsigned long flags; 348 struct pgpath *pgpath; 349 struct block_device *bdev; 350 351 spin_lock_irqsave(&m->lock, flags); 352 353 /* Do we need to select a new pgpath? */ 354 if (!m->current_pgpath || 355 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) 356 __choose_pgpath(m, nr_bytes); 357 358 pgpath = m->current_pgpath; 359 360 if (was_queued) 361 m->queue_size--; 362 363 if ((pgpath && m->queue_io) || 364 (!pgpath && m->queue_if_no_path)) { 365 /* Queue for the daemon to resubmit */ 366 list_add_tail(&clone->queuelist, &m->queued_ios); 367 m->queue_size++; 368 if ((m->pg_init_required && !m->pg_init_in_progress) || 369 !m->queue_io) 370 queue_work(kmultipathd, &m->process_queued_ios); 371 pgpath = NULL; 372 r = DM_MAPIO_SUBMITTED; 373 } else if (pgpath) { 374 bdev = pgpath->path.dev->bdev; 375 clone->q = bdev_get_queue(bdev); 376 clone->rq_disk = bdev->bd_disk; 377 } else if (__must_push_back(m)) 378 r = DM_MAPIO_REQUEUE; 379 else 380 r = -EIO; /* Failed */ 381 382 mpio->pgpath = pgpath; 383 mpio->nr_bytes = nr_bytes; 384 385 if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io) 386 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path, 387 nr_bytes); 388 389 spin_unlock_irqrestore(&m->lock, flags); 390 391 return r; 392 } 393 394 /* 395 * If we run out of usable paths, should we queue I/O or error it? 396 */ 397 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, 398 unsigned save_old_value) 399 { 400 unsigned long flags; 401 402 spin_lock_irqsave(&m->lock, flags); 403 404 if (save_old_value) 405 m->saved_queue_if_no_path = m->queue_if_no_path; 406 else 407 m->saved_queue_if_no_path = queue_if_no_path; 408 m->queue_if_no_path = queue_if_no_path; 409 if (!m->queue_if_no_path && m->queue_size) 410 queue_work(kmultipathd, &m->process_queued_ios); 411 412 spin_unlock_irqrestore(&m->lock, flags); 413 414 return 0; 415 } 416 417 /*----------------------------------------------------------------- 418 * The multipath daemon is responsible for resubmitting queued ios. 419 *---------------------------------------------------------------*/ 420 421 static void dispatch_queued_ios(struct multipath *m) 422 { 423 int r; 424 unsigned long flags; 425 struct dm_mpath_io *mpio; 426 union map_info *info; 427 struct request *clone, *n; 428 LIST_HEAD(cl); 429 430 spin_lock_irqsave(&m->lock, flags); 431 list_splice_init(&m->queued_ios, &cl); 432 spin_unlock_irqrestore(&m->lock, flags); 433 434 list_for_each_entry_safe(clone, n, &cl, queuelist) { 435 list_del_init(&clone->queuelist); 436 437 info = dm_get_rq_mapinfo(clone); 438 mpio = info->ptr; 439 440 r = map_io(m, clone, mpio, 1); 441 if (r < 0) { 442 mempool_free(mpio, m->mpio_pool); 443 dm_kill_unmapped_request(clone, r); 444 } else if (r == DM_MAPIO_REMAPPED) 445 dm_dispatch_request(clone); 446 else if (r == DM_MAPIO_REQUEUE) { 447 mempool_free(mpio, m->mpio_pool); 448 dm_requeue_unmapped_request(clone); 449 } 450 } 451 } 452 453 static void process_queued_ios(struct work_struct *work) 454 { 455 struct multipath *m = 456 container_of(work, struct multipath, process_queued_ios); 457 struct pgpath *pgpath = NULL; 458 unsigned must_queue = 1; 459 unsigned long flags; 460 461 spin_lock_irqsave(&m->lock, flags); 462 463 if (!m->queue_size) 464 goto out; 465 466 if (!m->current_pgpath) 467 __choose_pgpath(m, 0); 468 469 pgpath = m->current_pgpath; 470 471 if ((pgpath && !m->queue_io) || 472 (!pgpath && !m->queue_if_no_path)) 473 must_queue = 0; 474 475 if (m->pg_init_required && !m->pg_init_in_progress && pgpath) 476 __pg_init_all_paths(m); 477 478 out: 479 spin_unlock_irqrestore(&m->lock, flags); 480 if (!must_queue) 481 dispatch_queued_ios(m); 482 } 483 484 /* 485 * An event is triggered whenever a path is taken out of use. 486 * Includes path failure and PG bypass. 487 */ 488 static void trigger_event(struct work_struct *work) 489 { 490 struct multipath *m = 491 container_of(work, struct multipath, trigger_event); 492 493 dm_table_event(m->ti->table); 494 } 495 496 /*----------------------------------------------------------------- 497 * Constructor/argument parsing: 498 * <#multipath feature args> [<arg>]* 499 * <#hw_handler args> [hw_handler [<arg>]*] 500 * <#priority groups> 501 * <initial priority group> 502 * [<selector> <#selector args> [<arg>]* 503 * <#paths> <#per-path selector args> 504 * [<path> [<arg>]* ]+ ]+ 505 *---------------------------------------------------------------*/ 506 struct param { 507 unsigned min; 508 unsigned max; 509 char *error; 510 }; 511 512 static int read_param(struct param *param, char *str, unsigned *v, char **error) 513 { 514 if (!str || 515 (sscanf(str, "%u", v) != 1) || 516 (*v < param->min) || 517 (*v > param->max)) { 518 *error = param->error; 519 return -EINVAL; 520 } 521 522 return 0; 523 } 524 525 struct arg_set { 526 unsigned argc; 527 char **argv; 528 }; 529 530 static char *shift(struct arg_set *as) 531 { 532 char *r; 533 534 if (as->argc) { 535 as->argc--; 536 r = *as->argv; 537 as->argv++; 538 return r; 539 } 540 541 return NULL; 542 } 543 544 static void consume(struct arg_set *as, unsigned n) 545 { 546 BUG_ON (as->argc < n); 547 as->argc -= n; 548 as->argv += n; 549 } 550 551 static int parse_path_selector(struct arg_set *as, struct priority_group *pg, 552 struct dm_target *ti) 553 { 554 int r; 555 struct path_selector_type *pst; 556 unsigned ps_argc; 557 558 static struct param _params[] = { 559 {0, 1024, "invalid number of path selector args"}, 560 }; 561 562 pst = dm_get_path_selector(shift(as)); 563 if (!pst) { 564 ti->error = "unknown path selector type"; 565 return -EINVAL; 566 } 567 568 r = read_param(_params, shift(as), &ps_argc, &ti->error); 569 if (r) { 570 dm_put_path_selector(pst); 571 return -EINVAL; 572 } 573 574 if (ps_argc > as->argc) { 575 dm_put_path_selector(pst); 576 ti->error = "not enough arguments for path selector"; 577 return -EINVAL; 578 } 579 580 r = pst->create(&pg->ps, ps_argc, as->argv); 581 if (r) { 582 dm_put_path_selector(pst); 583 ti->error = "path selector constructor failed"; 584 return r; 585 } 586 587 pg->ps.type = pst; 588 consume(as, ps_argc); 589 590 return 0; 591 } 592 593 static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, 594 struct dm_target *ti) 595 { 596 int r; 597 struct pgpath *p; 598 struct multipath *m = ti->private; 599 600 /* we need at least a path arg */ 601 if (as->argc < 1) { 602 ti->error = "no device given"; 603 return ERR_PTR(-EINVAL); 604 } 605 606 p = alloc_pgpath(); 607 if (!p) 608 return ERR_PTR(-ENOMEM); 609 610 r = dm_get_device(ti, shift(as), dm_table_get_mode(ti->table), 611 &p->path.dev); 612 if (r) { 613 ti->error = "error getting device"; 614 goto bad; 615 } 616 617 if (m->hw_handler_name) { 618 struct request_queue *q = bdev_get_queue(p->path.dev->bdev); 619 620 r = scsi_dh_attach(q, m->hw_handler_name); 621 if (r == -EBUSY) { 622 /* 623 * Already attached to different hw_handler, 624 * try to reattach with correct one. 625 */ 626 scsi_dh_detach(q); 627 r = scsi_dh_attach(q, m->hw_handler_name); 628 } 629 630 if (r < 0) { 631 ti->error = "error attaching hardware handler"; 632 dm_put_device(ti, p->path.dev); 633 goto bad; 634 } 635 636 if (m->hw_handler_params) { 637 r = scsi_dh_set_params(q, m->hw_handler_params); 638 if (r < 0) { 639 ti->error = "unable to set hardware " 640 "handler parameters"; 641 scsi_dh_detach(q); 642 dm_put_device(ti, p->path.dev); 643 goto bad; 644 } 645 } 646 } 647 648 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); 649 if (r) { 650 dm_put_device(ti, p->path.dev); 651 goto bad; 652 } 653 654 return p; 655 656 bad: 657 free_pgpath(p); 658 return ERR_PTR(r); 659 } 660 661 static struct priority_group *parse_priority_group(struct arg_set *as, 662 struct multipath *m) 663 { 664 static struct param _params[] = { 665 {1, 1024, "invalid number of paths"}, 666 {0, 1024, "invalid number of selector args"} 667 }; 668 669 int r; 670 unsigned i, nr_selector_args, nr_params; 671 struct priority_group *pg; 672 struct dm_target *ti = m->ti; 673 674 if (as->argc < 2) { 675 as->argc = 0; 676 ti->error = "not enough priority group arguments"; 677 return ERR_PTR(-EINVAL); 678 } 679 680 pg = alloc_priority_group(); 681 if (!pg) { 682 ti->error = "couldn't allocate priority group"; 683 return ERR_PTR(-ENOMEM); 684 } 685 pg->m = m; 686 687 r = parse_path_selector(as, pg, ti); 688 if (r) 689 goto bad; 690 691 /* 692 * read the paths 693 */ 694 r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error); 695 if (r) 696 goto bad; 697 698 r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error); 699 if (r) 700 goto bad; 701 702 nr_params = 1 + nr_selector_args; 703 for (i = 0; i < pg->nr_pgpaths; i++) { 704 struct pgpath *pgpath; 705 struct arg_set path_args; 706 707 if (as->argc < nr_params) { 708 ti->error = "not enough path parameters"; 709 r = -EINVAL; 710 goto bad; 711 } 712 713 path_args.argc = nr_params; 714 path_args.argv = as->argv; 715 716 pgpath = parse_path(&path_args, &pg->ps, ti); 717 if (IS_ERR(pgpath)) { 718 r = PTR_ERR(pgpath); 719 goto bad; 720 } 721 722 pgpath->pg = pg; 723 list_add_tail(&pgpath->list, &pg->pgpaths); 724 consume(as, nr_params); 725 } 726 727 return pg; 728 729 bad: 730 free_priority_group(pg, ti); 731 return ERR_PTR(r); 732 } 733 734 static int parse_hw_handler(struct arg_set *as, struct multipath *m) 735 { 736 unsigned hw_argc; 737 int ret; 738 struct dm_target *ti = m->ti; 739 740 static struct param _params[] = { 741 {0, 1024, "invalid number of hardware handler args"}, 742 }; 743 744 if (read_param(_params, shift(as), &hw_argc, &ti->error)) 745 return -EINVAL; 746 747 if (!hw_argc) 748 return 0; 749 750 if (hw_argc > as->argc) { 751 ti->error = "not enough arguments for hardware handler"; 752 return -EINVAL; 753 } 754 755 m->hw_handler_name = kstrdup(shift(as), GFP_KERNEL); 756 request_module("scsi_dh_%s", m->hw_handler_name); 757 if (scsi_dh_handler_exist(m->hw_handler_name) == 0) { 758 ti->error = "unknown hardware handler type"; 759 ret = -EINVAL; 760 goto fail; 761 } 762 763 if (hw_argc > 1) { 764 char *p; 765 int i, j, len = 4; 766 767 for (i = 0; i <= hw_argc - 2; i++) 768 len += strlen(as->argv[i]) + 1; 769 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL); 770 if (!p) { 771 ti->error = "memory allocation failed"; 772 ret = -ENOMEM; 773 goto fail; 774 } 775 j = sprintf(p, "%d", hw_argc - 1); 776 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) 777 j = sprintf(p, "%s", as->argv[i]); 778 } 779 consume(as, hw_argc - 1); 780 781 return 0; 782 fail: 783 kfree(m->hw_handler_name); 784 m->hw_handler_name = NULL; 785 return ret; 786 } 787 788 static int parse_features(struct arg_set *as, struct multipath *m) 789 { 790 int r; 791 unsigned argc; 792 struct dm_target *ti = m->ti; 793 const char *param_name; 794 795 static struct param _params[] = { 796 {0, 3, "invalid number of feature args"}, 797 {1, 50, "pg_init_retries must be between 1 and 50"}, 798 }; 799 800 r = read_param(_params, shift(as), &argc, &ti->error); 801 if (r) 802 return -EINVAL; 803 804 if (!argc) 805 return 0; 806 807 do { 808 param_name = shift(as); 809 argc--; 810 811 if (!strnicmp(param_name, MESG_STR("queue_if_no_path"))) { 812 r = queue_if_no_path(m, 1, 0); 813 continue; 814 } 815 816 if (!strnicmp(param_name, MESG_STR("pg_init_retries")) && 817 (argc >= 1)) { 818 r = read_param(_params + 1, shift(as), 819 &m->pg_init_retries, &ti->error); 820 argc--; 821 continue; 822 } 823 824 ti->error = "Unrecognised multipath feature request"; 825 r = -EINVAL; 826 } while (argc && !r); 827 828 return r; 829 } 830 831 static int multipath_ctr(struct dm_target *ti, unsigned int argc, 832 char **argv) 833 { 834 /* target parameters */ 835 static struct param _params[] = { 836 {1, 1024, "invalid number of priority groups"}, 837 {1, 1024, "invalid initial priority group number"}, 838 }; 839 840 int r; 841 struct multipath *m; 842 struct arg_set as; 843 unsigned pg_count = 0; 844 unsigned next_pg_num; 845 846 as.argc = argc; 847 as.argv = argv; 848 849 m = alloc_multipath(ti); 850 if (!m) { 851 ti->error = "can't allocate multipath"; 852 return -EINVAL; 853 } 854 855 r = parse_features(&as, m); 856 if (r) 857 goto bad; 858 859 r = parse_hw_handler(&as, m); 860 if (r) 861 goto bad; 862 863 r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error); 864 if (r) 865 goto bad; 866 867 r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error); 868 if (r) 869 goto bad; 870 871 /* parse the priority groups */ 872 while (as.argc) { 873 struct priority_group *pg; 874 875 pg = parse_priority_group(&as, m); 876 if (IS_ERR(pg)) { 877 r = PTR_ERR(pg); 878 goto bad; 879 } 880 881 m->nr_valid_paths += pg->nr_pgpaths; 882 list_add_tail(&pg->list, &m->priority_groups); 883 pg_count++; 884 pg->pg_num = pg_count; 885 if (!--next_pg_num) 886 m->next_pg = pg; 887 } 888 889 if (pg_count != m->nr_priority_groups) { 890 ti->error = "priority group count mismatch"; 891 r = -EINVAL; 892 goto bad; 893 } 894 895 ti->num_flush_requests = 1; 896 ti->num_discard_requests = 1; 897 898 return 0; 899 900 bad: 901 free_multipath(m); 902 return r; 903 } 904 905 static void multipath_wait_for_pg_init_completion(struct multipath *m) 906 { 907 DECLARE_WAITQUEUE(wait, current); 908 unsigned long flags; 909 910 add_wait_queue(&m->pg_init_wait, &wait); 911 912 while (1) { 913 set_current_state(TASK_UNINTERRUPTIBLE); 914 915 spin_lock_irqsave(&m->lock, flags); 916 if (!m->pg_init_in_progress) { 917 spin_unlock_irqrestore(&m->lock, flags); 918 break; 919 } 920 spin_unlock_irqrestore(&m->lock, flags); 921 922 io_schedule(); 923 } 924 set_current_state(TASK_RUNNING); 925 926 remove_wait_queue(&m->pg_init_wait, &wait); 927 } 928 929 static void flush_multipath_work(struct multipath *m) 930 { 931 flush_workqueue(kmpath_handlerd); 932 multipath_wait_for_pg_init_completion(m); 933 flush_workqueue(kmultipathd); 934 flush_scheduled_work(); 935 } 936 937 static void multipath_dtr(struct dm_target *ti) 938 { 939 struct multipath *m = ti->private; 940 941 flush_multipath_work(m); 942 free_multipath(m); 943 } 944 945 /* 946 * Map cloned requests 947 */ 948 static int multipath_map(struct dm_target *ti, struct request *clone, 949 union map_info *map_context) 950 { 951 int r; 952 struct dm_mpath_io *mpio; 953 struct multipath *m = (struct multipath *) ti->private; 954 955 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); 956 if (!mpio) 957 /* ENOMEM, requeue */ 958 return DM_MAPIO_REQUEUE; 959 memset(mpio, 0, sizeof(*mpio)); 960 961 map_context->ptr = mpio; 962 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; 963 r = map_io(m, clone, mpio, 0); 964 if (r < 0 || r == DM_MAPIO_REQUEUE) 965 mempool_free(mpio, m->mpio_pool); 966 967 return r; 968 } 969 970 /* 971 * Take a path out of use. 972 */ 973 static int fail_path(struct pgpath *pgpath) 974 { 975 unsigned long flags; 976 struct multipath *m = pgpath->pg->m; 977 978 spin_lock_irqsave(&m->lock, flags); 979 980 if (!pgpath->is_active) 981 goto out; 982 983 DMWARN("Failing path %s.", pgpath->path.dev->name); 984 985 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); 986 pgpath->is_active = 0; 987 pgpath->fail_count++; 988 989 m->nr_valid_paths--; 990 991 if (pgpath == m->current_pgpath) 992 m->current_pgpath = NULL; 993 994 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, 995 pgpath->path.dev->name, m->nr_valid_paths); 996 997 schedule_work(&m->trigger_event); 998 queue_work(kmultipathd, &pgpath->deactivate_path); 999 1000 out: 1001 spin_unlock_irqrestore(&m->lock, flags); 1002 1003 return 0; 1004 } 1005 1006 /* 1007 * Reinstate a previously-failed path 1008 */ 1009 static int reinstate_path(struct pgpath *pgpath) 1010 { 1011 int r = 0; 1012 unsigned long flags; 1013 struct multipath *m = pgpath->pg->m; 1014 1015 spin_lock_irqsave(&m->lock, flags); 1016 1017 if (pgpath->is_active) 1018 goto out; 1019 1020 if (!pgpath->pg->ps.type->reinstate_path) { 1021 DMWARN("Reinstate path not supported by path selector %s", 1022 pgpath->pg->ps.type->name); 1023 r = -EINVAL; 1024 goto out; 1025 } 1026 1027 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); 1028 if (r) 1029 goto out; 1030 1031 pgpath->is_active = 1; 1032 1033 if (!m->nr_valid_paths++ && m->queue_size) { 1034 m->current_pgpath = NULL; 1035 queue_work(kmultipathd, &m->process_queued_ios); 1036 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { 1037 if (queue_work(kmpath_handlerd, &pgpath->activate_path)) 1038 m->pg_init_in_progress++; 1039 } 1040 1041 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 1042 pgpath->path.dev->name, m->nr_valid_paths); 1043 1044 schedule_work(&m->trigger_event); 1045 1046 out: 1047 spin_unlock_irqrestore(&m->lock, flags); 1048 1049 return r; 1050 } 1051 1052 /* 1053 * Fail or reinstate all paths that match the provided struct dm_dev. 1054 */ 1055 static int action_dev(struct multipath *m, struct dm_dev *dev, 1056 action_fn action) 1057 { 1058 int r = 0; 1059 struct pgpath *pgpath; 1060 struct priority_group *pg; 1061 1062 list_for_each_entry(pg, &m->priority_groups, list) { 1063 list_for_each_entry(pgpath, &pg->pgpaths, list) { 1064 if (pgpath->path.dev == dev) 1065 r = action(pgpath); 1066 } 1067 } 1068 1069 return r; 1070 } 1071 1072 /* 1073 * Temporarily try to avoid having to use the specified PG 1074 */ 1075 static void bypass_pg(struct multipath *m, struct priority_group *pg, 1076 int bypassed) 1077 { 1078 unsigned long flags; 1079 1080 spin_lock_irqsave(&m->lock, flags); 1081 1082 pg->bypassed = bypassed; 1083 m->current_pgpath = NULL; 1084 m->current_pg = NULL; 1085 1086 spin_unlock_irqrestore(&m->lock, flags); 1087 1088 schedule_work(&m->trigger_event); 1089 } 1090 1091 /* 1092 * Switch to using the specified PG from the next I/O that gets mapped 1093 */ 1094 static int switch_pg_num(struct multipath *m, const char *pgstr) 1095 { 1096 struct priority_group *pg; 1097 unsigned pgnum; 1098 unsigned long flags; 1099 1100 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum || 1101 (pgnum > m->nr_priority_groups)) { 1102 DMWARN("invalid PG number supplied to switch_pg_num"); 1103 return -EINVAL; 1104 } 1105 1106 spin_lock_irqsave(&m->lock, flags); 1107 list_for_each_entry(pg, &m->priority_groups, list) { 1108 pg->bypassed = 0; 1109 if (--pgnum) 1110 continue; 1111 1112 m->current_pgpath = NULL; 1113 m->current_pg = NULL; 1114 m->next_pg = pg; 1115 } 1116 spin_unlock_irqrestore(&m->lock, flags); 1117 1118 schedule_work(&m->trigger_event); 1119 return 0; 1120 } 1121 1122 /* 1123 * Set/clear bypassed status of a PG. 1124 * PGs are numbered upwards from 1 in the order they were declared. 1125 */ 1126 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) 1127 { 1128 struct priority_group *pg; 1129 unsigned pgnum; 1130 1131 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum || 1132 (pgnum > m->nr_priority_groups)) { 1133 DMWARN("invalid PG number supplied to bypass_pg"); 1134 return -EINVAL; 1135 } 1136 1137 list_for_each_entry(pg, &m->priority_groups, list) { 1138 if (!--pgnum) 1139 break; 1140 } 1141 1142 bypass_pg(m, pg, bypassed); 1143 return 0; 1144 } 1145 1146 /* 1147 * Should we retry pg_init immediately? 1148 */ 1149 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) 1150 { 1151 unsigned long flags; 1152 int limit_reached = 0; 1153 1154 spin_lock_irqsave(&m->lock, flags); 1155 1156 if (m->pg_init_count <= m->pg_init_retries) 1157 m->pg_init_required = 1; 1158 else 1159 limit_reached = 1; 1160 1161 spin_unlock_irqrestore(&m->lock, flags); 1162 1163 return limit_reached; 1164 } 1165 1166 static void pg_init_done(void *data, int errors) 1167 { 1168 struct pgpath *pgpath = data; 1169 struct priority_group *pg = pgpath->pg; 1170 struct multipath *m = pg->m; 1171 unsigned long flags; 1172 1173 /* device or driver problems */ 1174 switch (errors) { 1175 case SCSI_DH_OK: 1176 break; 1177 case SCSI_DH_NOSYS: 1178 if (!m->hw_handler_name) { 1179 errors = 0; 1180 break; 1181 } 1182 DMERR("Could not failover the device: Handler scsi_dh_%s " 1183 "Error %d.", m->hw_handler_name, errors); 1184 /* 1185 * Fail path for now, so we do not ping pong 1186 */ 1187 fail_path(pgpath); 1188 break; 1189 case SCSI_DH_DEV_TEMP_BUSY: 1190 /* 1191 * Probably doing something like FW upgrade on the 1192 * controller so try the other pg. 1193 */ 1194 bypass_pg(m, pg, 1); 1195 break; 1196 /* TODO: For SCSI_DH_RETRY we should wait a couple seconds */ 1197 case SCSI_DH_RETRY: 1198 case SCSI_DH_IMM_RETRY: 1199 case SCSI_DH_RES_TEMP_UNAVAIL: 1200 if (pg_init_limit_reached(m, pgpath)) 1201 fail_path(pgpath); 1202 errors = 0; 1203 break; 1204 default: 1205 /* 1206 * We probably do not want to fail the path for a device 1207 * error, but this is what the old dm did. In future 1208 * patches we can do more advanced handling. 1209 */ 1210 fail_path(pgpath); 1211 } 1212 1213 spin_lock_irqsave(&m->lock, flags); 1214 if (errors) { 1215 if (pgpath == m->current_pgpath) { 1216 DMERR("Could not failover device. Error %d.", errors); 1217 m->current_pgpath = NULL; 1218 m->current_pg = NULL; 1219 } 1220 } else if (!m->pg_init_required) 1221 pg->bypassed = 0; 1222 1223 if (--m->pg_init_in_progress) 1224 /* Activations of other paths are still on going */ 1225 goto out; 1226 1227 if (!m->pg_init_required) 1228 m->queue_io = 0; 1229 1230 queue_work(kmultipathd, &m->process_queued_ios); 1231 1232 /* 1233 * Wake up any thread waiting to suspend. 1234 */ 1235 wake_up(&m->pg_init_wait); 1236 1237 out: 1238 spin_unlock_irqrestore(&m->lock, flags); 1239 } 1240 1241 static void activate_path(struct work_struct *work) 1242 { 1243 struct pgpath *pgpath = 1244 container_of(work, struct pgpath, activate_path); 1245 1246 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), 1247 pg_init_done, pgpath); 1248 } 1249 1250 /* 1251 * end_io handling 1252 */ 1253 static int do_end_io(struct multipath *m, struct request *clone, 1254 int error, struct dm_mpath_io *mpio) 1255 { 1256 /* 1257 * We don't queue any clone request inside the multipath target 1258 * during end I/O handling, since those clone requests don't have 1259 * bio clones. If we queue them inside the multipath target, 1260 * we need to make bio clones, that requires memory allocation. 1261 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests 1262 * don't have bio clones.) 1263 * Instead of queueing the clone request here, we queue the original 1264 * request into dm core, which will remake a clone request and 1265 * clone bios for it and resubmit it later. 1266 */ 1267 int r = DM_ENDIO_REQUEUE; 1268 unsigned long flags; 1269 1270 if (!error && !clone->errors) 1271 return 0; /* I/O complete */ 1272 1273 if (error == -EOPNOTSUPP) 1274 return error; 1275 1276 if (clone->cmd_flags & REQ_DISCARD) 1277 /* 1278 * Pass all discard request failures up. 1279 * FIXME: only fail_path if the discard failed due to a 1280 * transport problem. This requires precise understanding 1281 * of the underlying failure (e.g. the SCSI sense). 1282 */ 1283 return error; 1284 1285 if (mpio->pgpath) 1286 fail_path(mpio->pgpath); 1287 1288 spin_lock_irqsave(&m->lock, flags); 1289 if (!m->nr_valid_paths && !m->queue_if_no_path && !__must_push_back(m)) 1290 r = -EIO; 1291 spin_unlock_irqrestore(&m->lock, flags); 1292 1293 return r; 1294 } 1295 1296 static int multipath_end_io(struct dm_target *ti, struct request *clone, 1297 int error, union map_info *map_context) 1298 { 1299 struct multipath *m = ti->private; 1300 struct dm_mpath_io *mpio = map_context->ptr; 1301 struct pgpath *pgpath = mpio->pgpath; 1302 struct path_selector *ps; 1303 int r; 1304 1305 r = do_end_io(m, clone, error, mpio); 1306 if (pgpath) { 1307 ps = &pgpath->pg->ps; 1308 if (ps->type->end_io) 1309 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); 1310 } 1311 mempool_free(mpio, m->mpio_pool); 1312 1313 return r; 1314 } 1315 1316 /* 1317 * Suspend can't complete until all the I/O is processed so if 1318 * the last path fails we must error any remaining I/O. 1319 * Note that if the freeze_bdev fails while suspending, the 1320 * queue_if_no_path state is lost - userspace should reset it. 1321 */ 1322 static void multipath_presuspend(struct dm_target *ti) 1323 { 1324 struct multipath *m = (struct multipath *) ti->private; 1325 1326 queue_if_no_path(m, 0, 1); 1327 } 1328 1329 static void multipath_postsuspend(struct dm_target *ti) 1330 { 1331 struct multipath *m = ti->private; 1332 1333 mutex_lock(&m->work_mutex); 1334 flush_multipath_work(m); 1335 mutex_unlock(&m->work_mutex); 1336 } 1337 1338 /* 1339 * Restore the queue_if_no_path setting. 1340 */ 1341 static void multipath_resume(struct dm_target *ti) 1342 { 1343 struct multipath *m = (struct multipath *) ti->private; 1344 unsigned long flags; 1345 1346 spin_lock_irqsave(&m->lock, flags); 1347 m->queue_if_no_path = m->saved_queue_if_no_path; 1348 spin_unlock_irqrestore(&m->lock, flags); 1349 } 1350 1351 /* 1352 * Info output has the following format: 1353 * num_multipath_feature_args [multipath_feature_args]* 1354 * num_handler_status_args [handler_status_args]* 1355 * num_groups init_group_number 1356 * [A|D|E num_ps_status_args [ps_status_args]* 1357 * num_paths num_selector_args 1358 * [path_dev A|F fail_count [selector_args]* ]+ ]+ 1359 * 1360 * Table output has the following format (identical to the constructor string): 1361 * num_feature_args [features_args]* 1362 * num_handler_args hw_handler [hw_handler_args]* 1363 * num_groups init_group_number 1364 * [priority selector-name num_ps_args [ps_args]* 1365 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ 1366 */ 1367 static int multipath_status(struct dm_target *ti, status_type_t type, 1368 char *result, unsigned int maxlen) 1369 { 1370 int sz = 0; 1371 unsigned long flags; 1372 struct multipath *m = (struct multipath *) ti->private; 1373 struct priority_group *pg; 1374 struct pgpath *p; 1375 unsigned pg_num; 1376 char state; 1377 1378 spin_lock_irqsave(&m->lock, flags); 1379 1380 /* Features */ 1381 if (type == STATUSTYPE_INFO) 1382 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); 1383 else { 1384 DMEMIT("%u ", m->queue_if_no_path + 1385 (m->pg_init_retries > 0) * 2); 1386 if (m->queue_if_no_path) 1387 DMEMIT("queue_if_no_path "); 1388 if (m->pg_init_retries) 1389 DMEMIT("pg_init_retries %u ", m->pg_init_retries); 1390 } 1391 1392 if (!m->hw_handler_name || type == STATUSTYPE_INFO) 1393 DMEMIT("0 "); 1394 else 1395 DMEMIT("1 %s ", m->hw_handler_name); 1396 1397 DMEMIT("%u ", m->nr_priority_groups); 1398 1399 if (m->next_pg) 1400 pg_num = m->next_pg->pg_num; 1401 else if (m->current_pg) 1402 pg_num = m->current_pg->pg_num; 1403 else 1404 pg_num = 1; 1405 1406 DMEMIT("%u ", pg_num); 1407 1408 switch (type) { 1409 case STATUSTYPE_INFO: 1410 list_for_each_entry(pg, &m->priority_groups, list) { 1411 if (pg->bypassed) 1412 state = 'D'; /* Disabled */ 1413 else if (pg == m->current_pg) 1414 state = 'A'; /* Currently Active */ 1415 else 1416 state = 'E'; /* Enabled */ 1417 1418 DMEMIT("%c ", state); 1419 1420 if (pg->ps.type->status) 1421 sz += pg->ps.type->status(&pg->ps, NULL, type, 1422 result + sz, 1423 maxlen - sz); 1424 else 1425 DMEMIT("0 "); 1426 1427 DMEMIT("%u %u ", pg->nr_pgpaths, 1428 pg->ps.type->info_args); 1429 1430 list_for_each_entry(p, &pg->pgpaths, list) { 1431 DMEMIT("%s %s %u ", p->path.dev->name, 1432 p->is_active ? "A" : "F", 1433 p->fail_count); 1434 if (pg->ps.type->status) 1435 sz += pg->ps.type->status(&pg->ps, 1436 &p->path, type, result + sz, 1437 maxlen - sz); 1438 } 1439 } 1440 break; 1441 1442 case STATUSTYPE_TABLE: 1443 list_for_each_entry(pg, &m->priority_groups, list) { 1444 DMEMIT("%s ", pg->ps.type->name); 1445 1446 if (pg->ps.type->status) 1447 sz += pg->ps.type->status(&pg->ps, NULL, type, 1448 result + sz, 1449 maxlen - sz); 1450 else 1451 DMEMIT("0 "); 1452 1453 DMEMIT("%u %u ", pg->nr_pgpaths, 1454 pg->ps.type->table_args); 1455 1456 list_for_each_entry(p, &pg->pgpaths, list) { 1457 DMEMIT("%s ", p->path.dev->name); 1458 if (pg->ps.type->status) 1459 sz += pg->ps.type->status(&pg->ps, 1460 &p->path, type, result + sz, 1461 maxlen - sz); 1462 } 1463 } 1464 break; 1465 } 1466 1467 spin_unlock_irqrestore(&m->lock, flags); 1468 1469 return 0; 1470 } 1471 1472 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) 1473 { 1474 int r = -EINVAL; 1475 struct dm_dev *dev; 1476 struct multipath *m = (struct multipath *) ti->private; 1477 action_fn action; 1478 1479 mutex_lock(&m->work_mutex); 1480 1481 if (dm_suspended(ti)) { 1482 r = -EBUSY; 1483 goto out; 1484 } 1485 1486 if (argc == 1) { 1487 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) { 1488 r = queue_if_no_path(m, 1, 0); 1489 goto out; 1490 } else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) { 1491 r = queue_if_no_path(m, 0, 0); 1492 goto out; 1493 } 1494 } 1495 1496 if (argc != 2) { 1497 DMWARN("Unrecognised multipath message received."); 1498 goto out; 1499 } 1500 1501 if (!strnicmp(argv[0], MESG_STR("disable_group"))) { 1502 r = bypass_pg_num(m, argv[1], 1); 1503 goto out; 1504 } else if (!strnicmp(argv[0], MESG_STR("enable_group"))) { 1505 r = bypass_pg_num(m, argv[1], 0); 1506 goto out; 1507 } else if (!strnicmp(argv[0], MESG_STR("switch_group"))) { 1508 r = switch_pg_num(m, argv[1]); 1509 goto out; 1510 } else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) 1511 action = reinstate_path; 1512 else if (!strnicmp(argv[0], MESG_STR("fail_path"))) 1513 action = fail_path; 1514 else { 1515 DMWARN("Unrecognised multipath message received."); 1516 goto out; 1517 } 1518 1519 r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); 1520 if (r) { 1521 DMWARN("message: error getting device %s", 1522 argv[1]); 1523 goto out; 1524 } 1525 1526 r = action_dev(m, dev, action); 1527 1528 dm_put_device(ti, dev); 1529 1530 out: 1531 mutex_unlock(&m->work_mutex); 1532 return r; 1533 } 1534 1535 static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, 1536 unsigned long arg) 1537 { 1538 struct multipath *m = (struct multipath *) ti->private; 1539 struct block_device *bdev = NULL; 1540 fmode_t mode = 0; 1541 unsigned long flags; 1542 int r = 0; 1543 1544 spin_lock_irqsave(&m->lock, flags); 1545 1546 if (!m->current_pgpath) 1547 __choose_pgpath(m, 0); 1548 1549 if (m->current_pgpath) { 1550 bdev = m->current_pgpath->path.dev->bdev; 1551 mode = m->current_pgpath->path.dev->mode; 1552 } 1553 1554 if (m->queue_io) 1555 r = -EAGAIN; 1556 else if (!bdev) 1557 r = -EIO; 1558 1559 spin_unlock_irqrestore(&m->lock, flags); 1560 1561 return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); 1562 } 1563 1564 static int multipath_iterate_devices(struct dm_target *ti, 1565 iterate_devices_callout_fn fn, void *data) 1566 { 1567 struct multipath *m = ti->private; 1568 struct priority_group *pg; 1569 struct pgpath *p; 1570 int ret = 0; 1571 1572 list_for_each_entry(pg, &m->priority_groups, list) { 1573 list_for_each_entry(p, &pg->pgpaths, list) { 1574 ret = fn(ti, p->path.dev, ti->begin, ti->len, data); 1575 if (ret) 1576 goto out; 1577 } 1578 } 1579 1580 out: 1581 return ret; 1582 } 1583 1584 static int __pgpath_busy(struct pgpath *pgpath) 1585 { 1586 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); 1587 1588 return dm_underlying_device_busy(q); 1589 } 1590 1591 /* 1592 * We return "busy", only when we can map I/Os but underlying devices 1593 * are busy (so even if we map I/Os now, the I/Os will wait on 1594 * the underlying queue). 1595 * In other words, if we want to kill I/Os or queue them inside us 1596 * due to map unavailability, we don't return "busy". Otherwise, 1597 * dm core won't give us the I/Os and we can't do what we want. 1598 */ 1599 static int multipath_busy(struct dm_target *ti) 1600 { 1601 int busy = 0, has_active = 0; 1602 struct multipath *m = ti->private; 1603 struct priority_group *pg; 1604 struct pgpath *pgpath; 1605 unsigned long flags; 1606 1607 spin_lock_irqsave(&m->lock, flags); 1608 1609 /* Guess which priority_group will be used at next mapping time */ 1610 if (unlikely(!m->current_pgpath && m->next_pg)) 1611 pg = m->next_pg; 1612 else if (likely(m->current_pg)) 1613 pg = m->current_pg; 1614 else 1615 /* 1616 * We don't know which pg will be used at next mapping time. 1617 * We don't call __choose_pgpath() here to avoid to trigger 1618 * pg_init just by busy checking. 1619 * So we don't know whether underlying devices we will be using 1620 * at next mapping time are busy or not. Just try mapping. 1621 */ 1622 goto out; 1623 1624 /* 1625 * If there is one non-busy active path at least, the path selector 1626 * will be able to select it. So we consider such a pg as not busy. 1627 */ 1628 busy = 1; 1629 list_for_each_entry(pgpath, &pg->pgpaths, list) 1630 if (pgpath->is_active) { 1631 has_active = 1; 1632 1633 if (!__pgpath_busy(pgpath)) { 1634 busy = 0; 1635 break; 1636 } 1637 } 1638 1639 if (!has_active) 1640 /* 1641 * No active path in this pg, so this pg won't be used and 1642 * the current_pg will be changed at next mapping time. 1643 * We need to try mapping to determine it. 1644 */ 1645 busy = 0; 1646 1647 out: 1648 spin_unlock_irqrestore(&m->lock, flags); 1649 1650 return busy; 1651 } 1652 1653 /*----------------------------------------------------------------- 1654 * Module setup 1655 *---------------------------------------------------------------*/ 1656 static struct target_type multipath_target = { 1657 .name = "multipath", 1658 .version = {1, 1, 1}, 1659 .module = THIS_MODULE, 1660 .ctr = multipath_ctr, 1661 .dtr = multipath_dtr, 1662 .map_rq = multipath_map, 1663 .rq_end_io = multipath_end_io, 1664 .presuspend = multipath_presuspend, 1665 .postsuspend = multipath_postsuspend, 1666 .resume = multipath_resume, 1667 .status = multipath_status, 1668 .message = multipath_message, 1669 .ioctl = multipath_ioctl, 1670 .iterate_devices = multipath_iterate_devices, 1671 .busy = multipath_busy, 1672 }; 1673 1674 static int __init dm_multipath_init(void) 1675 { 1676 int r; 1677 1678 /* allocate a slab for the dm_ios */ 1679 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0); 1680 if (!_mpio_cache) 1681 return -ENOMEM; 1682 1683 r = dm_register_target(&multipath_target); 1684 if (r < 0) { 1685 DMERR("register failed %d", r); 1686 kmem_cache_destroy(_mpio_cache); 1687 return -EINVAL; 1688 } 1689 1690 kmultipathd = create_workqueue("kmpathd"); 1691 if (!kmultipathd) { 1692 DMERR("failed to create workqueue kmpathd"); 1693 dm_unregister_target(&multipath_target); 1694 kmem_cache_destroy(_mpio_cache); 1695 return -ENOMEM; 1696 } 1697 1698 /* 1699 * A separate workqueue is used to handle the device handlers 1700 * to avoid overloading existing workqueue. Overloading the 1701 * old workqueue would also create a bottleneck in the 1702 * path of the storage hardware device activation. 1703 */ 1704 kmpath_handlerd = create_singlethread_workqueue("kmpath_handlerd"); 1705 if (!kmpath_handlerd) { 1706 DMERR("failed to create workqueue kmpath_handlerd"); 1707 destroy_workqueue(kmultipathd); 1708 dm_unregister_target(&multipath_target); 1709 kmem_cache_destroy(_mpio_cache); 1710 return -ENOMEM; 1711 } 1712 1713 DMINFO("version %u.%u.%u loaded", 1714 multipath_target.version[0], multipath_target.version[1], 1715 multipath_target.version[2]); 1716 1717 return r; 1718 } 1719 1720 static void __exit dm_multipath_exit(void) 1721 { 1722 destroy_workqueue(kmpath_handlerd); 1723 destroy_workqueue(kmultipathd); 1724 1725 dm_unregister_target(&multipath_target); 1726 kmem_cache_destroy(_mpio_cache); 1727 } 1728 1729 module_init(dm_multipath_init); 1730 module_exit(dm_multipath_exit); 1731 1732 MODULE_DESCRIPTION(DM_NAME " multipath target"); 1733 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); 1734 MODULE_LICENSE("GPL"); 1735