1 /* 2 * Copyright (C) 2003 Sistina Software Limited. 3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include <linux/device-mapper.h> 9 10 #include "dm-path-selector.h" 11 #include "dm-uevent.h" 12 13 #include <linux/ctype.h> 14 #include <linux/init.h> 15 #include <linux/mempool.h> 16 #include <linux/module.h> 17 #include <linux/pagemap.h> 18 #include <linux/slab.h> 19 #include <linux/time.h> 20 #include <linux/workqueue.h> 21 #include <scsi/scsi_dh.h> 22 #include <linux/atomic.h> 23 24 #define DM_MSG_PREFIX "multipath" 25 #define DM_PG_INIT_DELAY_MSECS 2000 26 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) 27 28 /* Path properties */ 29 struct pgpath { 30 struct list_head list; 31 32 struct priority_group *pg; /* Owning PG */ 33 unsigned is_active; /* Path status */ 34 unsigned fail_count; /* Cumulative failure count */ 35 36 struct dm_path path; 37 struct delayed_work activate_path; 38 }; 39 40 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 41 42 /* 43 * Paths are grouped into Priority Groups and numbered from 1 upwards. 44 * Each has a path selector which controls which path gets used. 45 */ 46 struct priority_group { 47 struct list_head list; 48 49 struct multipath *m; /* Owning multipath instance */ 50 struct path_selector ps; 51 52 unsigned pg_num; /* Reference number */ 53 unsigned bypassed; /* Temporarily bypass this PG? */ 54 55 unsigned nr_pgpaths; /* Number of paths in PG */ 56 struct list_head pgpaths; 57 }; 58 59 /* Multipath context */ 60 struct multipath { 61 struct list_head list; 62 struct dm_target *ti; 63 64 spinlock_t lock; 65 66 const char *hw_handler_name; 67 char *hw_handler_params; 68 69 unsigned nr_priority_groups; 70 struct list_head priority_groups; 71 72 wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ 73 74 unsigned pg_init_required; /* pg_init needs calling? */ 75 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ 76 unsigned pg_init_delay_retry; /* Delay pg_init retry? */ 77 78 unsigned nr_valid_paths; /* Total number of usable paths */ 79 struct pgpath *current_pgpath; 80 struct priority_group *current_pg; 81 struct priority_group *next_pg; /* Switch to this PG if set */ 82 unsigned repeat_count; /* I/Os left before calling PS again */ 83 84 unsigned queue_io; /* Must we queue all I/O? */ 85 unsigned queue_if_no_path; /* Queue I/O if last path fails? */ 86 unsigned saved_queue_if_no_path;/* Saved state during suspension */ 87 unsigned pg_init_retries; /* Number of times to retry pg_init */ 88 unsigned pg_init_count; /* Number of times pg_init called */ 89 unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ 90 91 struct work_struct process_queued_ios; 92 struct list_head queued_ios; 93 unsigned queue_size; 94 95 struct work_struct trigger_event; 96 97 /* 98 * We must use a mempool of dm_mpath_io structs so that we 99 * can resubmit bios on error. 100 */ 101 mempool_t *mpio_pool; 102 103 struct mutex work_mutex; 104 }; 105 106 /* 107 * Context information attached to each bio we process. 108 */ 109 struct dm_mpath_io { 110 struct pgpath *pgpath; 111 size_t nr_bytes; 112 }; 113 114 typedef int (*action_fn) (struct pgpath *pgpath); 115 116 #define MIN_IOS 256 /* Mempool size */ 117 118 static struct kmem_cache *_mpio_cache; 119 120 static struct workqueue_struct *kmultipathd, *kmpath_handlerd; 121 static void process_queued_ios(struct work_struct *work); 122 static void trigger_event(struct work_struct *work); 123 static void activate_path(struct work_struct *work); 124 125 126 /*----------------------------------------------- 127 * Allocation routines 128 *-----------------------------------------------*/ 129 130 static struct pgpath *alloc_pgpath(void) 131 { 132 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); 133 134 if (pgpath) { 135 pgpath->is_active = 1; 136 INIT_DELAYED_WORK(&pgpath->activate_path, activate_path); 137 } 138 139 return pgpath; 140 } 141 142 static void free_pgpath(struct pgpath *pgpath) 143 { 144 kfree(pgpath); 145 } 146 147 static struct priority_group *alloc_priority_group(void) 148 { 149 struct priority_group *pg; 150 151 pg = kzalloc(sizeof(*pg), GFP_KERNEL); 152 153 if (pg) 154 INIT_LIST_HEAD(&pg->pgpaths); 155 156 return pg; 157 } 158 159 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 160 { 161 struct pgpath *pgpath, *tmp; 162 struct multipath *m = ti->private; 163 164 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { 165 list_del(&pgpath->list); 166 if (m->hw_handler_name) 167 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); 168 dm_put_device(ti, pgpath->path.dev); 169 free_pgpath(pgpath); 170 } 171 } 172 173 static void free_priority_group(struct priority_group *pg, 174 struct dm_target *ti) 175 { 176 struct path_selector *ps = &pg->ps; 177 178 if (ps->type) { 179 ps->type->destroy(ps); 180 dm_put_path_selector(ps->type); 181 } 182 183 free_pgpaths(&pg->pgpaths, ti); 184 kfree(pg); 185 } 186 187 static struct multipath *alloc_multipath(struct dm_target *ti) 188 { 189 struct multipath *m; 190 191 m = kzalloc(sizeof(*m), GFP_KERNEL); 192 if (m) { 193 INIT_LIST_HEAD(&m->priority_groups); 194 INIT_LIST_HEAD(&m->queued_ios); 195 spin_lock_init(&m->lock); 196 m->queue_io = 1; 197 m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; 198 INIT_WORK(&m->process_queued_ios, process_queued_ios); 199 INIT_WORK(&m->trigger_event, trigger_event); 200 init_waitqueue_head(&m->pg_init_wait); 201 mutex_init(&m->work_mutex); 202 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 203 if (!m->mpio_pool) { 204 kfree(m); 205 return NULL; 206 } 207 m->ti = ti; 208 ti->private = m; 209 } 210 211 return m; 212 } 213 214 static void free_multipath(struct multipath *m) 215 { 216 struct priority_group *pg, *tmp; 217 218 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { 219 list_del(&pg->list); 220 free_priority_group(pg, m->ti); 221 } 222 223 kfree(m->hw_handler_name); 224 kfree(m->hw_handler_params); 225 mempool_destroy(m->mpio_pool); 226 kfree(m); 227 } 228 229 static int set_mapinfo(struct multipath *m, union map_info *info) 230 { 231 struct dm_mpath_io *mpio; 232 233 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); 234 if (!mpio) 235 return -ENOMEM; 236 237 memset(mpio, 0, sizeof(*mpio)); 238 info->ptr = mpio; 239 240 return 0; 241 } 242 243 static void clear_mapinfo(struct multipath *m, union map_info *info) 244 { 245 struct dm_mpath_io *mpio = info->ptr; 246 247 info->ptr = NULL; 248 mempool_free(mpio, m->mpio_pool); 249 } 250 251 /*----------------------------------------------- 252 * Path selection 253 *-----------------------------------------------*/ 254 255 static void __pg_init_all_paths(struct multipath *m) 256 { 257 struct pgpath *pgpath; 258 unsigned long pg_init_delay = 0; 259 260 m->pg_init_count++; 261 m->pg_init_required = 0; 262 if (m->pg_init_delay_retry) 263 pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ? 264 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS); 265 list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { 266 /* Skip failed paths */ 267 if (!pgpath->is_active) 268 continue; 269 if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path, 270 pg_init_delay)) 271 m->pg_init_in_progress++; 272 } 273 } 274 275 static void __switch_pg(struct multipath *m, struct pgpath *pgpath) 276 { 277 m->current_pg = pgpath->pg; 278 279 /* Must we initialise the PG first, and queue I/O till it's ready? */ 280 if (m->hw_handler_name) { 281 m->pg_init_required = 1; 282 m->queue_io = 1; 283 } else { 284 m->pg_init_required = 0; 285 m->queue_io = 0; 286 } 287 288 m->pg_init_count = 0; 289 } 290 291 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg, 292 size_t nr_bytes) 293 { 294 struct dm_path *path; 295 296 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes); 297 if (!path) 298 return -ENXIO; 299 300 m->current_pgpath = path_to_pgpath(path); 301 302 if (m->current_pg != pg) 303 __switch_pg(m, m->current_pgpath); 304 305 return 0; 306 } 307 308 static void __choose_pgpath(struct multipath *m, size_t nr_bytes) 309 { 310 struct priority_group *pg; 311 unsigned bypassed = 1; 312 313 if (!m->nr_valid_paths) 314 goto failed; 315 316 /* Were we instructed to switch PG? */ 317 if (m->next_pg) { 318 pg = m->next_pg; 319 m->next_pg = NULL; 320 if (!__choose_path_in_pg(m, pg, nr_bytes)) 321 return; 322 } 323 324 /* Don't change PG until it has no remaining paths */ 325 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes)) 326 return; 327 328 /* 329 * Loop through priority groups until we find a valid path. 330 * First time we skip PGs marked 'bypassed'. 331 * Second time we only try the ones we skipped. 332 */ 333 do { 334 list_for_each_entry(pg, &m->priority_groups, list) { 335 if (pg->bypassed == bypassed) 336 continue; 337 if (!__choose_path_in_pg(m, pg, nr_bytes)) 338 return; 339 } 340 } while (bypassed--); 341 342 failed: 343 m->current_pgpath = NULL; 344 m->current_pg = NULL; 345 } 346 347 /* 348 * Check whether bios must be queued in the device-mapper core rather 349 * than here in the target. 350 * 351 * m->lock must be held on entry. 352 * 353 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the 354 * same value then we are not between multipath_presuspend() 355 * and multipath_resume() calls and we have no need to check 356 * for the DMF_NOFLUSH_SUSPENDING flag. 357 */ 358 static int __must_push_back(struct multipath *m) 359 { 360 return (m->queue_if_no_path != m->saved_queue_if_no_path && 361 dm_noflush_suspending(m->ti)); 362 } 363 364 static int map_io(struct multipath *m, struct request *clone, 365 union map_info *map_context, unsigned was_queued) 366 { 367 int r = DM_MAPIO_REMAPPED; 368 size_t nr_bytes = blk_rq_bytes(clone); 369 unsigned long flags; 370 struct pgpath *pgpath; 371 struct block_device *bdev; 372 struct dm_mpath_io *mpio = map_context->ptr; 373 374 spin_lock_irqsave(&m->lock, flags); 375 376 /* Do we need to select a new pgpath? */ 377 if (!m->current_pgpath || 378 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) 379 __choose_pgpath(m, nr_bytes); 380 381 pgpath = m->current_pgpath; 382 383 if (was_queued) 384 m->queue_size--; 385 386 if ((pgpath && m->queue_io) || 387 (!pgpath && m->queue_if_no_path)) { 388 /* Queue for the daemon to resubmit */ 389 list_add_tail(&clone->queuelist, &m->queued_ios); 390 m->queue_size++; 391 if ((m->pg_init_required && !m->pg_init_in_progress) || 392 !m->queue_io) 393 queue_work(kmultipathd, &m->process_queued_ios); 394 pgpath = NULL; 395 r = DM_MAPIO_SUBMITTED; 396 } else if (pgpath) { 397 bdev = pgpath->path.dev->bdev; 398 clone->q = bdev_get_queue(bdev); 399 clone->rq_disk = bdev->bd_disk; 400 } else if (__must_push_back(m)) 401 r = DM_MAPIO_REQUEUE; 402 else 403 r = -EIO; /* Failed */ 404 405 mpio->pgpath = pgpath; 406 mpio->nr_bytes = nr_bytes; 407 408 if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io) 409 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path, 410 nr_bytes); 411 412 spin_unlock_irqrestore(&m->lock, flags); 413 414 return r; 415 } 416 417 /* 418 * If we run out of usable paths, should we queue I/O or error it? 419 */ 420 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, 421 unsigned save_old_value) 422 { 423 unsigned long flags; 424 425 spin_lock_irqsave(&m->lock, flags); 426 427 if (save_old_value) 428 m->saved_queue_if_no_path = m->queue_if_no_path; 429 else 430 m->saved_queue_if_no_path = queue_if_no_path; 431 m->queue_if_no_path = queue_if_no_path; 432 if (!m->queue_if_no_path && m->queue_size) 433 queue_work(kmultipathd, &m->process_queued_ios); 434 435 spin_unlock_irqrestore(&m->lock, flags); 436 437 return 0; 438 } 439 440 /*----------------------------------------------------------------- 441 * The multipath daemon is responsible for resubmitting queued ios. 442 *---------------------------------------------------------------*/ 443 444 static void dispatch_queued_ios(struct multipath *m) 445 { 446 int r; 447 unsigned long flags; 448 union map_info *info; 449 struct request *clone, *n; 450 LIST_HEAD(cl); 451 452 spin_lock_irqsave(&m->lock, flags); 453 list_splice_init(&m->queued_ios, &cl); 454 spin_unlock_irqrestore(&m->lock, flags); 455 456 list_for_each_entry_safe(clone, n, &cl, queuelist) { 457 list_del_init(&clone->queuelist); 458 459 info = dm_get_rq_mapinfo(clone); 460 461 r = map_io(m, clone, info, 1); 462 if (r < 0) { 463 clear_mapinfo(m, info); 464 dm_kill_unmapped_request(clone, r); 465 } else if (r == DM_MAPIO_REMAPPED) 466 dm_dispatch_request(clone); 467 else if (r == DM_MAPIO_REQUEUE) { 468 clear_mapinfo(m, info); 469 dm_requeue_unmapped_request(clone); 470 } 471 } 472 } 473 474 static void process_queued_ios(struct work_struct *work) 475 { 476 struct multipath *m = 477 container_of(work, struct multipath, process_queued_ios); 478 struct pgpath *pgpath = NULL; 479 unsigned must_queue = 1; 480 unsigned long flags; 481 482 spin_lock_irqsave(&m->lock, flags); 483 484 if (!m->queue_size) 485 goto out; 486 487 if (!m->current_pgpath) 488 __choose_pgpath(m, 0); 489 490 pgpath = m->current_pgpath; 491 492 if ((pgpath && !m->queue_io) || 493 (!pgpath && !m->queue_if_no_path)) 494 must_queue = 0; 495 496 if (m->pg_init_required && !m->pg_init_in_progress && pgpath) 497 __pg_init_all_paths(m); 498 499 out: 500 spin_unlock_irqrestore(&m->lock, flags); 501 if (!must_queue) 502 dispatch_queued_ios(m); 503 } 504 505 /* 506 * An event is triggered whenever a path is taken out of use. 507 * Includes path failure and PG bypass. 508 */ 509 static void trigger_event(struct work_struct *work) 510 { 511 struct multipath *m = 512 container_of(work, struct multipath, trigger_event); 513 514 dm_table_event(m->ti->table); 515 } 516 517 /*----------------------------------------------------------------- 518 * Constructor/argument parsing: 519 * <#multipath feature args> [<arg>]* 520 * <#hw_handler args> [hw_handler [<arg>]*] 521 * <#priority groups> 522 * <initial priority group> 523 * [<selector> <#selector args> [<arg>]* 524 * <#paths> <#per-path selector args> 525 * [<path> [<arg>]* ]+ ]+ 526 *---------------------------------------------------------------*/ 527 static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, 528 struct dm_target *ti) 529 { 530 int r; 531 struct path_selector_type *pst; 532 unsigned ps_argc; 533 534 static struct dm_arg _args[] = { 535 {0, 1024, "invalid number of path selector args"}, 536 }; 537 538 pst = dm_get_path_selector(dm_shift_arg(as)); 539 if (!pst) { 540 ti->error = "unknown path selector type"; 541 return -EINVAL; 542 } 543 544 r = dm_read_arg_group(_args, as, &ps_argc, &ti->error); 545 if (r) { 546 dm_put_path_selector(pst); 547 return -EINVAL; 548 } 549 550 r = pst->create(&pg->ps, ps_argc, as->argv); 551 if (r) { 552 dm_put_path_selector(pst); 553 ti->error = "path selector constructor failed"; 554 return r; 555 } 556 557 pg->ps.type = pst; 558 dm_consume_args(as, ps_argc); 559 560 return 0; 561 } 562 563 static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, 564 struct dm_target *ti) 565 { 566 int r; 567 struct pgpath *p; 568 struct multipath *m = ti->private; 569 570 /* we need at least a path arg */ 571 if (as->argc < 1) { 572 ti->error = "no device given"; 573 return ERR_PTR(-EINVAL); 574 } 575 576 p = alloc_pgpath(); 577 if (!p) 578 return ERR_PTR(-ENOMEM); 579 580 r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), 581 &p->path.dev); 582 if (r) { 583 ti->error = "error getting device"; 584 goto bad; 585 } 586 587 if (m->hw_handler_name) { 588 struct request_queue *q = bdev_get_queue(p->path.dev->bdev); 589 590 r = scsi_dh_attach(q, m->hw_handler_name); 591 if (r == -EBUSY) { 592 /* 593 * Already attached to different hw_handler, 594 * try to reattach with correct one. 595 */ 596 scsi_dh_detach(q); 597 r = scsi_dh_attach(q, m->hw_handler_name); 598 } 599 600 if (r < 0) { 601 ti->error = "error attaching hardware handler"; 602 dm_put_device(ti, p->path.dev); 603 goto bad; 604 } 605 606 if (m->hw_handler_params) { 607 r = scsi_dh_set_params(q, m->hw_handler_params); 608 if (r < 0) { 609 ti->error = "unable to set hardware " 610 "handler parameters"; 611 scsi_dh_detach(q); 612 dm_put_device(ti, p->path.dev); 613 goto bad; 614 } 615 } 616 } 617 618 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); 619 if (r) { 620 dm_put_device(ti, p->path.dev); 621 goto bad; 622 } 623 624 return p; 625 626 bad: 627 free_pgpath(p); 628 return ERR_PTR(r); 629 } 630 631 static struct priority_group *parse_priority_group(struct dm_arg_set *as, 632 struct multipath *m) 633 { 634 static struct dm_arg _args[] = { 635 {1, 1024, "invalid number of paths"}, 636 {0, 1024, "invalid number of selector args"} 637 }; 638 639 int r; 640 unsigned i, nr_selector_args, nr_args; 641 struct priority_group *pg; 642 struct dm_target *ti = m->ti; 643 644 if (as->argc < 2) { 645 as->argc = 0; 646 ti->error = "not enough priority group arguments"; 647 return ERR_PTR(-EINVAL); 648 } 649 650 pg = alloc_priority_group(); 651 if (!pg) { 652 ti->error = "couldn't allocate priority group"; 653 return ERR_PTR(-ENOMEM); 654 } 655 pg->m = m; 656 657 r = parse_path_selector(as, pg, ti); 658 if (r) 659 goto bad; 660 661 /* 662 * read the paths 663 */ 664 r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error); 665 if (r) 666 goto bad; 667 668 r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error); 669 if (r) 670 goto bad; 671 672 nr_args = 1 + nr_selector_args; 673 for (i = 0; i < pg->nr_pgpaths; i++) { 674 struct pgpath *pgpath; 675 struct dm_arg_set path_args; 676 677 if (as->argc < nr_args) { 678 ti->error = "not enough path parameters"; 679 r = -EINVAL; 680 goto bad; 681 } 682 683 path_args.argc = nr_args; 684 path_args.argv = as->argv; 685 686 pgpath = parse_path(&path_args, &pg->ps, ti); 687 if (IS_ERR(pgpath)) { 688 r = PTR_ERR(pgpath); 689 goto bad; 690 } 691 692 pgpath->pg = pg; 693 list_add_tail(&pgpath->list, &pg->pgpaths); 694 dm_consume_args(as, nr_args); 695 } 696 697 return pg; 698 699 bad: 700 free_priority_group(pg, ti); 701 return ERR_PTR(r); 702 } 703 704 static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) 705 { 706 unsigned hw_argc; 707 int ret; 708 struct dm_target *ti = m->ti; 709 710 static struct dm_arg _args[] = { 711 {0, 1024, "invalid number of hardware handler args"}, 712 }; 713 714 if (dm_read_arg_group(_args, as, &hw_argc, &ti->error)) 715 return -EINVAL; 716 717 if (!hw_argc) 718 return 0; 719 720 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); 721 if (!try_then_request_module(scsi_dh_handler_exist(m->hw_handler_name), 722 "scsi_dh_%s", m->hw_handler_name)) { 723 ti->error = "unknown hardware handler type"; 724 ret = -EINVAL; 725 goto fail; 726 } 727 728 if (hw_argc > 1) { 729 char *p; 730 int i, j, len = 4; 731 732 for (i = 0; i <= hw_argc - 2; i++) 733 len += strlen(as->argv[i]) + 1; 734 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL); 735 if (!p) { 736 ti->error = "memory allocation failed"; 737 ret = -ENOMEM; 738 goto fail; 739 } 740 j = sprintf(p, "%d", hw_argc - 1); 741 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) 742 j = sprintf(p, "%s", as->argv[i]); 743 } 744 dm_consume_args(as, hw_argc - 1); 745 746 return 0; 747 fail: 748 kfree(m->hw_handler_name); 749 m->hw_handler_name = NULL; 750 return ret; 751 } 752 753 static int parse_features(struct dm_arg_set *as, struct multipath *m) 754 { 755 int r; 756 unsigned argc; 757 struct dm_target *ti = m->ti; 758 const char *arg_name; 759 760 static struct dm_arg _args[] = { 761 {0, 5, "invalid number of feature args"}, 762 {1, 50, "pg_init_retries must be between 1 and 50"}, 763 {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, 764 }; 765 766 r = dm_read_arg_group(_args, as, &argc, &ti->error); 767 if (r) 768 return -EINVAL; 769 770 if (!argc) 771 return 0; 772 773 do { 774 arg_name = dm_shift_arg(as); 775 argc--; 776 777 if (!strcasecmp(arg_name, "queue_if_no_path")) { 778 r = queue_if_no_path(m, 1, 0); 779 continue; 780 } 781 782 if (!strcasecmp(arg_name, "pg_init_retries") && 783 (argc >= 1)) { 784 r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); 785 argc--; 786 continue; 787 } 788 789 if (!strcasecmp(arg_name, "pg_init_delay_msecs") && 790 (argc >= 1)) { 791 r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error); 792 argc--; 793 continue; 794 } 795 796 ti->error = "Unrecognised multipath feature request"; 797 r = -EINVAL; 798 } while (argc && !r); 799 800 return r; 801 } 802 803 static int multipath_ctr(struct dm_target *ti, unsigned int argc, 804 char **argv) 805 { 806 /* target arguments */ 807 static struct dm_arg _args[] = { 808 {0, 1024, "invalid number of priority groups"}, 809 {0, 1024, "invalid initial priority group number"}, 810 }; 811 812 int r; 813 struct multipath *m; 814 struct dm_arg_set as; 815 unsigned pg_count = 0; 816 unsigned next_pg_num; 817 818 as.argc = argc; 819 as.argv = argv; 820 821 m = alloc_multipath(ti); 822 if (!m) { 823 ti->error = "can't allocate multipath"; 824 return -EINVAL; 825 } 826 827 r = parse_features(&as, m); 828 if (r) 829 goto bad; 830 831 r = parse_hw_handler(&as, m); 832 if (r) 833 goto bad; 834 835 r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); 836 if (r) 837 goto bad; 838 839 r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error); 840 if (r) 841 goto bad; 842 843 if ((!m->nr_priority_groups && next_pg_num) || 844 (m->nr_priority_groups && !next_pg_num)) { 845 ti->error = "invalid initial priority group"; 846 r = -EINVAL; 847 goto bad; 848 } 849 850 /* parse the priority groups */ 851 while (as.argc) { 852 struct priority_group *pg; 853 854 pg = parse_priority_group(&as, m); 855 if (IS_ERR(pg)) { 856 r = PTR_ERR(pg); 857 goto bad; 858 } 859 860 m->nr_valid_paths += pg->nr_pgpaths; 861 list_add_tail(&pg->list, &m->priority_groups); 862 pg_count++; 863 pg->pg_num = pg_count; 864 if (!--next_pg_num) 865 m->next_pg = pg; 866 } 867 868 if (pg_count != m->nr_priority_groups) { 869 ti->error = "priority group count mismatch"; 870 r = -EINVAL; 871 goto bad; 872 } 873 874 ti->num_flush_requests = 1; 875 ti->num_discard_requests = 1; 876 877 return 0; 878 879 bad: 880 free_multipath(m); 881 return r; 882 } 883 884 static void multipath_wait_for_pg_init_completion(struct multipath *m) 885 { 886 DECLARE_WAITQUEUE(wait, current); 887 unsigned long flags; 888 889 add_wait_queue(&m->pg_init_wait, &wait); 890 891 while (1) { 892 set_current_state(TASK_UNINTERRUPTIBLE); 893 894 spin_lock_irqsave(&m->lock, flags); 895 if (!m->pg_init_in_progress) { 896 spin_unlock_irqrestore(&m->lock, flags); 897 break; 898 } 899 spin_unlock_irqrestore(&m->lock, flags); 900 901 io_schedule(); 902 } 903 set_current_state(TASK_RUNNING); 904 905 remove_wait_queue(&m->pg_init_wait, &wait); 906 } 907 908 static void flush_multipath_work(struct multipath *m) 909 { 910 flush_workqueue(kmpath_handlerd); 911 multipath_wait_for_pg_init_completion(m); 912 flush_workqueue(kmultipathd); 913 flush_work_sync(&m->trigger_event); 914 } 915 916 static void multipath_dtr(struct dm_target *ti) 917 { 918 struct multipath *m = ti->private; 919 920 flush_multipath_work(m); 921 free_multipath(m); 922 } 923 924 /* 925 * Map cloned requests 926 */ 927 static int multipath_map(struct dm_target *ti, struct request *clone, 928 union map_info *map_context) 929 { 930 int r; 931 struct multipath *m = (struct multipath *) ti->private; 932 933 if (set_mapinfo(m, map_context) < 0) 934 /* ENOMEM, requeue */ 935 return DM_MAPIO_REQUEUE; 936 937 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; 938 r = map_io(m, clone, map_context, 0); 939 if (r < 0 || r == DM_MAPIO_REQUEUE) 940 clear_mapinfo(m, map_context); 941 942 return r; 943 } 944 945 /* 946 * Take a path out of use. 947 */ 948 static int fail_path(struct pgpath *pgpath) 949 { 950 unsigned long flags; 951 struct multipath *m = pgpath->pg->m; 952 953 spin_lock_irqsave(&m->lock, flags); 954 955 if (!pgpath->is_active) 956 goto out; 957 958 DMWARN("Failing path %s.", pgpath->path.dev->name); 959 960 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); 961 pgpath->is_active = 0; 962 pgpath->fail_count++; 963 964 m->nr_valid_paths--; 965 966 if (pgpath == m->current_pgpath) 967 m->current_pgpath = NULL; 968 969 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, 970 pgpath->path.dev->name, m->nr_valid_paths); 971 972 schedule_work(&m->trigger_event); 973 974 out: 975 spin_unlock_irqrestore(&m->lock, flags); 976 977 return 0; 978 } 979 980 /* 981 * Reinstate a previously-failed path 982 */ 983 static int reinstate_path(struct pgpath *pgpath) 984 { 985 int r = 0; 986 unsigned long flags; 987 struct multipath *m = pgpath->pg->m; 988 989 spin_lock_irqsave(&m->lock, flags); 990 991 if (pgpath->is_active) 992 goto out; 993 994 if (!pgpath->pg->ps.type->reinstate_path) { 995 DMWARN("Reinstate path not supported by path selector %s", 996 pgpath->pg->ps.type->name); 997 r = -EINVAL; 998 goto out; 999 } 1000 1001 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); 1002 if (r) 1003 goto out; 1004 1005 pgpath->is_active = 1; 1006 1007 if (!m->nr_valid_paths++ && m->queue_size) { 1008 m->current_pgpath = NULL; 1009 queue_work(kmultipathd, &m->process_queued_ios); 1010 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { 1011 if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) 1012 m->pg_init_in_progress++; 1013 } 1014 1015 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 1016 pgpath->path.dev->name, m->nr_valid_paths); 1017 1018 schedule_work(&m->trigger_event); 1019 1020 out: 1021 spin_unlock_irqrestore(&m->lock, flags); 1022 1023 return r; 1024 } 1025 1026 /* 1027 * Fail or reinstate all paths that match the provided struct dm_dev. 1028 */ 1029 static int action_dev(struct multipath *m, struct dm_dev *dev, 1030 action_fn action) 1031 { 1032 int r = -EINVAL; 1033 struct pgpath *pgpath; 1034 struct priority_group *pg; 1035 1036 list_for_each_entry(pg, &m->priority_groups, list) { 1037 list_for_each_entry(pgpath, &pg->pgpaths, list) { 1038 if (pgpath->path.dev == dev) 1039 r = action(pgpath); 1040 } 1041 } 1042 1043 return r; 1044 } 1045 1046 /* 1047 * Temporarily try to avoid having to use the specified PG 1048 */ 1049 static void bypass_pg(struct multipath *m, struct priority_group *pg, 1050 int bypassed) 1051 { 1052 unsigned long flags; 1053 1054 spin_lock_irqsave(&m->lock, flags); 1055 1056 pg->bypassed = bypassed; 1057 m->current_pgpath = NULL; 1058 m->current_pg = NULL; 1059 1060 spin_unlock_irqrestore(&m->lock, flags); 1061 1062 schedule_work(&m->trigger_event); 1063 } 1064 1065 /* 1066 * Switch to using the specified PG from the next I/O that gets mapped 1067 */ 1068 static int switch_pg_num(struct multipath *m, const char *pgstr) 1069 { 1070 struct priority_group *pg; 1071 unsigned pgnum; 1072 unsigned long flags; 1073 char dummy; 1074 1075 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1076 (pgnum > m->nr_priority_groups)) { 1077 DMWARN("invalid PG number supplied to switch_pg_num"); 1078 return -EINVAL; 1079 } 1080 1081 spin_lock_irqsave(&m->lock, flags); 1082 list_for_each_entry(pg, &m->priority_groups, list) { 1083 pg->bypassed = 0; 1084 if (--pgnum) 1085 continue; 1086 1087 m->current_pgpath = NULL; 1088 m->current_pg = NULL; 1089 m->next_pg = pg; 1090 } 1091 spin_unlock_irqrestore(&m->lock, flags); 1092 1093 schedule_work(&m->trigger_event); 1094 return 0; 1095 } 1096 1097 /* 1098 * Set/clear bypassed status of a PG. 1099 * PGs are numbered upwards from 1 in the order they were declared. 1100 */ 1101 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) 1102 { 1103 struct priority_group *pg; 1104 unsigned pgnum; 1105 char dummy; 1106 1107 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1108 (pgnum > m->nr_priority_groups)) { 1109 DMWARN("invalid PG number supplied to bypass_pg"); 1110 return -EINVAL; 1111 } 1112 1113 list_for_each_entry(pg, &m->priority_groups, list) { 1114 if (!--pgnum) 1115 break; 1116 } 1117 1118 bypass_pg(m, pg, bypassed); 1119 return 0; 1120 } 1121 1122 /* 1123 * Should we retry pg_init immediately? 1124 */ 1125 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) 1126 { 1127 unsigned long flags; 1128 int limit_reached = 0; 1129 1130 spin_lock_irqsave(&m->lock, flags); 1131 1132 if (m->pg_init_count <= m->pg_init_retries) 1133 m->pg_init_required = 1; 1134 else 1135 limit_reached = 1; 1136 1137 spin_unlock_irqrestore(&m->lock, flags); 1138 1139 return limit_reached; 1140 } 1141 1142 static void pg_init_done(void *data, int errors) 1143 { 1144 struct pgpath *pgpath = data; 1145 struct priority_group *pg = pgpath->pg; 1146 struct multipath *m = pg->m; 1147 unsigned long flags; 1148 unsigned delay_retry = 0; 1149 1150 /* device or driver problems */ 1151 switch (errors) { 1152 case SCSI_DH_OK: 1153 break; 1154 case SCSI_DH_NOSYS: 1155 if (!m->hw_handler_name) { 1156 errors = 0; 1157 break; 1158 } 1159 DMERR("Could not failover the device: Handler scsi_dh_%s " 1160 "Error %d.", m->hw_handler_name, errors); 1161 /* 1162 * Fail path for now, so we do not ping pong 1163 */ 1164 fail_path(pgpath); 1165 break; 1166 case SCSI_DH_DEV_TEMP_BUSY: 1167 /* 1168 * Probably doing something like FW upgrade on the 1169 * controller so try the other pg. 1170 */ 1171 bypass_pg(m, pg, 1); 1172 break; 1173 case SCSI_DH_RETRY: 1174 /* Wait before retrying. */ 1175 delay_retry = 1; 1176 case SCSI_DH_IMM_RETRY: 1177 case SCSI_DH_RES_TEMP_UNAVAIL: 1178 if (pg_init_limit_reached(m, pgpath)) 1179 fail_path(pgpath); 1180 errors = 0; 1181 break; 1182 default: 1183 /* 1184 * We probably do not want to fail the path for a device 1185 * error, but this is what the old dm did. In future 1186 * patches we can do more advanced handling. 1187 */ 1188 fail_path(pgpath); 1189 } 1190 1191 spin_lock_irqsave(&m->lock, flags); 1192 if (errors) { 1193 if (pgpath == m->current_pgpath) { 1194 DMERR("Could not failover device. Error %d.", errors); 1195 m->current_pgpath = NULL; 1196 m->current_pg = NULL; 1197 } 1198 } else if (!m->pg_init_required) 1199 pg->bypassed = 0; 1200 1201 if (--m->pg_init_in_progress) 1202 /* Activations of other paths are still on going */ 1203 goto out; 1204 1205 if (!m->pg_init_required) 1206 m->queue_io = 0; 1207 1208 m->pg_init_delay_retry = delay_retry; 1209 queue_work(kmultipathd, &m->process_queued_ios); 1210 1211 /* 1212 * Wake up any thread waiting to suspend. 1213 */ 1214 wake_up(&m->pg_init_wait); 1215 1216 out: 1217 spin_unlock_irqrestore(&m->lock, flags); 1218 } 1219 1220 static void activate_path(struct work_struct *work) 1221 { 1222 struct pgpath *pgpath = 1223 container_of(work, struct pgpath, activate_path.work); 1224 1225 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), 1226 pg_init_done, pgpath); 1227 } 1228 1229 /* 1230 * end_io handling 1231 */ 1232 static int do_end_io(struct multipath *m, struct request *clone, 1233 int error, struct dm_mpath_io *mpio) 1234 { 1235 /* 1236 * We don't queue any clone request inside the multipath target 1237 * during end I/O handling, since those clone requests don't have 1238 * bio clones. If we queue them inside the multipath target, 1239 * we need to make bio clones, that requires memory allocation. 1240 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests 1241 * don't have bio clones.) 1242 * Instead of queueing the clone request here, we queue the original 1243 * request into dm core, which will remake a clone request and 1244 * clone bios for it and resubmit it later. 1245 */ 1246 int r = DM_ENDIO_REQUEUE; 1247 unsigned long flags; 1248 1249 if (!error && !clone->errors) 1250 return 0; /* I/O complete */ 1251 1252 if (error == -EOPNOTSUPP || error == -EREMOTEIO || error == -EILSEQ) 1253 return error; 1254 1255 if (mpio->pgpath) 1256 fail_path(mpio->pgpath); 1257 1258 spin_lock_irqsave(&m->lock, flags); 1259 if (!m->nr_valid_paths) { 1260 if (!m->queue_if_no_path) { 1261 if (!__must_push_back(m)) 1262 r = -EIO; 1263 } else { 1264 if (error == -EBADE) 1265 r = error; 1266 } 1267 } 1268 spin_unlock_irqrestore(&m->lock, flags); 1269 1270 return r; 1271 } 1272 1273 static int multipath_end_io(struct dm_target *ti, struct request *clone, 1274 int error, union map_info *map_context) 1275 { 1276 struct multipath *m = ti->private; 1277 struct dm_mpath_io *mpio = map_context->ptr; 1278 struct pgpath *pgpath = mpio->pgpath; 1279 struct path_selector *ps; 1280 int r; 1281 1282 BUG_ON(!mpio); 1283 1284 r = do_end_io(m, clone, error, mpio); 1285 if (pgpath) { 1286 ps = &pgpath->pg->ps; 1287 if (ps->type->end_io) 1288 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); 1289 } 1290 clear_mapinfo(m, map_context); 1291 1292 return r; 1293 } 1294 1295 /* 1296 * Suspend can't complete until all the I/O is processed so if 1297 * the last path fails we must error any remaining I/O. 1298 * Note that if the freeze_bdev fails while suspending, the 1299 * queue_if_no_path state is lost - userspace should reset it. 1300 */ 1301 static void multipath_presuspend(struct dm_target *ti) 1302 { 1303 struct multipath *m = (struct multipath *) ti->private; 1304 1305 queue_if_no_path(m, 0, 1); 1306 } 1307 1308 static void multipath_postsuspend(struct dm_target *ti) 1309 { 1310 struct multipath *m = ti->private; 1311 1312 mutex_lock(&m->work_mutex); 1313 flush_multipath_work(m); 1314 mutex_unlock(&m->work_mutex); 1315 } 1316 1317 /* 1318 * Restore the queue_if_no_path setting. 1319 */ 1320 static void multipath_resume(struct dm_target *ti) 1321 { 1322 struct multipath *m = (struct multipath *) ti->private; 1323 unsigned long flags; 1324 1325 spin_lock_irqsave(&m->lock, flags); 1326 m->queue_if_no_path = m->saved_queue_if_no_path; 1327 spin_unlock_irqrestore(&m->lock, flags); 1328 } 1329 1330 /* 1331 * Info output has the following format: 1332 * num_multipath_feature_args [multipath_feature_args]* 1333 * num_handler_status_args [handler_status_args]* 1334 * num_groups init_group_number 1335 * [A|D|E num_ps_status_args [ps_status_args]* 1336 * num_paths num_selector_args 1337 * [path_dev A|F fail_count [selector_args]* ]+ ]+ 1338 * 1339 * Table output has the following format (identical to the constructor string): 1340 * num_feature_args [features_args]* 1341 * num_handler_args hw_handler [hw_handler_args]* 1342 * num_groups init_group_number 1343 * [priority selector-name num_ps_args [ps_args]* 1344 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ 1345 */ 1346 static int multipath_status(struct dm_target *ti, status_type_t type, 1347 char *result, unsigned int maxlen) 1348 { 1349 int sz = 0; 1350 unsigned long flags; 1351 struct multipath *m = (struct multipath *) ti->private; 1352 struct priority_group *pg; 1353 struct pgpath *p; 1354 unsigned pg_num; 1355 char state; 1356 1357 spin_lock_irqsave(&m->lock, flags); 1358 1359 /* Features */ 1360 if (type == STATUSTYPE_INFO) 1361 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); 1362 else { 1363 DMEMIT("%u ", m->queue_if_no_path + 1364 (m->pg_init_retries > 0) * 2 + 1365 (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2); 1366 if (m->queue_if_no_path) 1367 DMEMIT("queue_if_no_path "); 1368 if (m->pg_init_retries) 1369 DMEMIT("pg_init_retries %u ", m->pg_init_retries); 1370 if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) 1371 DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); 1372 } 1373 1374 if (!m->hw_handler_name || type == STATUSTYPE_INFO) 1375 DMEMIT("0 "); 1376 else 1377 DMEMIT("1 %s ", m->hw_handler_name); 1378 1379 DMEMIT("%u ", m->nr_priority_groups); 1380 1381 if (m->next_pg) 1382 pg_num = m->next_pg->pg_num; 1383 else if (m->current_pg) 1384 pg_num = m->current_pg->pg_num; 1385 else 1386 pg_num = (m->nr_priority_groups ? 1 : 0); 1387 1388 DMEMIT("%u ", pg_num); 1389 1390 switch (type) { 1391 case STATUSTYPE_INFO: 1392 list_for_each_entry(pg, &m->priority_groups, list) { 1393 if (pg->bypassed) 1394 state = 'D'; /* Disabled */ 1395 else if (pg == m->current_pg) 1396 state = 'A'; /* Currently Active */ 1397 else 1398 state = 'E'; /* Enabled */ 1399 1400 DMEMIT("%c ", state); 1401 1402 if (pg->ps.type->status) 1403 sz += pg->ps.type->status(&pg->ps, NULL, type, 1404 result + sz, 1405 maxlen - sz); 1406 else 1407 DMEMIT("0 "); 1408 1409 DMEMIT("%u %u ", pg->nr_pgpaths, 1410 pg->ps.type->info_args); 1411 1412 list_for_each_entry(p, &pg->pgpaths, list) { 1413 DMEMIT("%s %s %u ", p->path.dev->name, 1414 p->is_active ? "A" : "F", 1415 p->fail_count); 1416 if (pg->ps.type->status) 1417 sz += pg->ps.type->status(&pg->ps, 1418 &p->path, type, result + sz, 1419 maxlen - sz); 1420 } 1421 } 1422 break; 1423 1424 case STATUSTYPE_TABLE: 1425 list_for_each_entry(pg, &m->priority_groups, list) { 1426 DMEMIT("%s ", pg->ps.type->name); 1427 1428 if (pg->ps.type->status) 1429 sz += pg->ps.type->status(&pg->ps, NULL, type, 1430 result + sz, 1431 maxlen - sz); 1432 else 1433 DMEMIT("0 "); 1434 1435 DMEMIT("%u %u ", pg->nr_pgpaths, 1436 pg->ps.type->table_args); 1437 1438 list_for_each_entry(p, &pg->pgpaths, list) { 1439 DMEMIT("%s ", p->path.dev->name); 1440 if (pg->ps.type->status) 1441 sz += pg->ps.type->status(&pg->ps, 1442 &p->path, type, result + sz, 1443 maxlen - sz); 1444 } 1445 } 1446 break; 1447 } 1448 1449 spin_unlock_irqrestore(&m->lock, flags); 1450 1451 return 0; 1452 } 1453 1454 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) 1455 { 1456 int r = -EINVAL; 1457 struct dm_dev *dev; 1458 struct multipath *m = (struct multipath *) ti->private; 1459 action_fn action; 1460 1461 mutex_lock(&m->work_mutex); 1462 1463 if (dm_suspended(ti)) { 1464 r = -EBUSY; 1465 goto out; 1466 } 1467 1468 if (argc == 1) { 1469 if (!strcasecmp(argv[0], "queue_if_no_path")) { 1470 r = queue_if_no_path(m, 1, 0); 1471 goto out; 1472 } else if (!strcasecmp(argv[0], "fail_if_no_path")) { 1473 r = queue_if_no_path(m, 0, 0); 1474 goto out; 1475 } 1476 } 1477 1478 if (argc != 2) { 1479 DMWARN("Unrecognised multipath message received."); 1480 goto out; 1481 } 1482 1483 if (!strcasecmp(argv[0], "disable_group")) { 1484 r = bypass_pg_num(m, argv[1], 1); 1485 goto out; 1486 } else if (!strcasecmp(argv[0], "enable_group")) { 1487 r = bypass_pg_num(m, argv[1], 0); 1488 goto out; 1489 } else if (!strcasecmp(argv[0], "switch_group")) { 1490 r = switch_pg_num(m, argv[1]); 1491 goto out; 1492 } else if (!strcasecmp(argv[0], "reinstate_path")) 1493 action = reinstate_path; 1494 else if (!strcasecmp(argv[0], "fail_path")) 1495 action = fail_path; 1496 else { 1497 DMWARN("Unrecognised multipath message received."); 1498 goto out; 1499 } 1500 1501 r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); 1502 if (r) { 1503 DMWARN("message: error getting device %s", 1504 argv[1]); 1505 goto out; 1506 } 1507 1508 r = action_dev(m, dev, action); 1509 1510 dm_put_device(ti, dev); 1511 1512 out: 1513 mutex_unlock(&m->work_mutex); 1514 return r; 1515 } 1516 1517 static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, 1518 unsigned long arg) 1519 { 1520 struct multipath *m = (struct multipath *) ti->private; 1521 struct block_device *bdev = NULL; 1522 fmode_t mode = 0; 1523 unsigned long flags; 1524 int r = 0; 1525 1526 spin_lock_irqsave(&m->lock, flags); 1527 1528 if (!m->current_pgpath) 1529 __choose_pgpath(m, 0); 1530 1531 if (m->current_pgpath) { 1532 bdev = m->current_pgpath->path.dev->bdev; 1533 mode = m->current_pgpath->path.dev->mode; 1534 } 1535 1536 if (m->queue_io) 1537 r = -EAGAIN; 1538 else if (!bdev) 1539 r = -EIO; 1540 1541 spin_unlock_irqrestore(&m->lock, flags); 1542 1543 /* 1544 * Only pass ioctls through if the device sizes match exactly. 1545 */ 1546 if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) 1547 r = scsi_verify_blk_ioctl(NULL, cmd); 1548 1549 return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); 1550 } 1551 1552 static int multipath_iterate_devices(struct dm_target *ti, 1553 iterate_devices_callout_fn fn, void *data) 1554 { 1555 struct multipath *m = ti->private; 1556 struct priority_group *pg; 1557 struct pgpath *p; 1558 int ret = 0; 1559 1560 list_for_each_entry(pg, &m->priority_groups, list) { 1561 list_for_each_entry(p, &pg->pgpaths, list) { 1562 ret = fn(ti, p->path.dev, ti->begin, ti->len, data); 1563 if (ret) 1564 goto out; 1565 } 1566 } 1567 1568 out: 1569 return ret; 1570 } 1571 1572 static int __pgpath_busy(struct pgpath *pgpath) 1573 { 1574 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); 1575 1576 return dm_underlying_device_busy(q); 1577 } 1578 1579 /* 1580 * We return "busy", only when we can map I/Os but underlying devices 1581 * are busy (so even if we map I/Os now, the I/Os will wait on 1582 * the underlying queue). 1583 * In other words, if we want to kill I/Os or queue them inside us 1584 * due to map unavailability, we don't return "busy". Otherwise, 1585 * dm core won't give us the I/Os and we can't do what we want. 1586 */ 1587 static int multipath_busy(struct dm_target *ti) 1588 { 1589 int busy = 0, has_active = 0; 1590 struct multipath *m = ti->private; 1591 struct priority_group *pg; 1592 struct pgpath *pgpath; 1593 unsigned long flags; 1594 1595 spin_lock_irqsave(&m->lock, flags); 1596 1597 /* Guess which priority_group will be used at next mapping time */ 1598 if (unlikely(!m->current_pgpath && m->next_pg)) 1599 pg = m->next_pg; 1600 else if (likely(m->current_pg)) 1601 pg = m->current_pg; 1602 else 1603 /* 1604 * We don't know which pg will be used at next mapping time. 1605 * We don't call __choose_pgpath() here to avoid to trigger 1606 * pg_init just by busy checking. 1607 * So we don't know whether underlying devices we will be using 1608 * at next mapping time are busy or not. Just try mapping. 1609 */ 1610 goto out; 1611 1612 /* 1613 * If there is one non-busy active path at least, the path selector 1614 * will be able to select it. So we consider such a pg as not busy. 1615 */ 1616 busy = 1; 1617 list_for_each_entry(pgpath, &pg->pgpaths, list) 1618 if (pgpath->is_active) { 1619 has_active = 1; 1620 1621 if (!__pgpath_busy(pgpath)) { 1622 busy = 0; 1623 break; 1624 } 1625 } 1626 1627 if (!has_active) 1628 /* 1629 * No active path in this pg, so this pg won't be used and 1630 * the current_pg will be changed at next mapping time. 1631 * We need to try mapping to determine it. 1632 */ 1633 busy = 0; 1634 1635 out: 1636 spin_unlock_irqrestore(&m->lock, flags); 1637 1638 return busy; 1639 } 1640 1641 /*----------------------------------------------------------------- 1642 * Module setup 1643 *---------------------------------------------------------------*/ 1644 static struct target_type multipath_target = { 1645 .name = "multipath", 1646 .version = {1, 3, 0}, 1647 .module = THIS_MODULE, 1648 .ctr = multipath_ctr, 1649 .dtr = multipath_dtr, 1650 .map_rq = multipath_map, 1651 .rq_end_io = multipath_end_io, 1652 .presuspend = multipath_presuspend, 1653 .postsuspend = multipath_postsuspend, 1654 .resume = multipath_resume, 1655 .status = multipath_status, 1656 .message = multipath_message, 1657 .ioctl = multipath_ioctl, 1658 .iterate_devices = multipath_iterate_devices, 1659 .busy = multipath_busy, 1660 }; 1661 1662 static int __init dm_multipath_init(void) 1663 { 1664 int r; 1665 1666 /* allocate a slab for the dm_ios */ 1667 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0); 1668 if (!_mpio_cache) 1669 return -ENOMEM; 1670 1671 r = dm_register_target(&multipath_target); 1672 if (r < 0) { 1673 DMERR("register failed %d", r); 1674 kmem_cache_destroy(_mpio_cache); 1675 return -EINVAL; 1676 } 1677 1678 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); 1679 if (!kmultipathd) { 1680 DMERR("failed to create workqueue kmpathd"); 1681 dm_unregister_target(&multipath_target); 1682 kmem_cache_destroy(_mpio_cache); 1683 return -ENOMEM; 1684 } 1685 1686 /* 1687 * A separate workqueue is used to handle the device handlers 1688 * to avoid overloading existing workqueue. Overloading the 1689 * old workqueue would also create a bottleneck in the 1690 * path of the storage hardware device activation. 1691 */ 1692 kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd", 1693 WQ_MEM_RECLAIM); 1694 if (!kmpath_handlerd) { 1695 DMERR("failed to create workqueue kmpath_handlerd"); 1696 destroy_workqueue(kmultipathd); 1697 dm_unregister_target(&multipath_target); 1698 kmem_cache_destroy(_mpio_cache); 1699 return -ENOMEM; 1700 } 1701 1702 DMINFO("version %u.%u.%u loaded", 1703 multipath_target.version[0], multipath_target.version[1], 1704 multipath_target.version[2]); 1705 1706 return r; 1707 } 1708 1709 static void __exit dm_multipath_exit(void) 1710 { 1711 destroy_workqueue(kmpath_handlerd); 1712 destroy_workqueue(kmultipathd); 1713 1714 dm_unregister_target(&multipath_target); 1715 kmem_cache_destroy(_mpio_cache); 1716 } 1717 1718 module_init(dm_multipath_init); 1719 module_exit(dm_multipath_exit); 1720 1721 MODULE_DESCRIPTION(DM_NAME " multipath target"); 1722 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); 1723 MODULE_LICENSE("GPL"); 1724