1 /* 2 * Generic SCSI-3 ALUA SCSI Device Handler 3 * 4 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH. 5 * All rights reserved. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 20 * 21 */ 22 #include <linux/slab.h> 23 #include <linux/delay.h> 24 #include <linux/module.h> 25 #include <asm/unaligned.h> 26 #include <scsi/scsi.h> 27 #include <scsi/scsi_proto.h> 28 #include <scsi/scsi_dbg.h> 29 #include <scsi/scsi_eh.h> 30 #include <scsi/scsi_dh.h> 31 32 #define ALUA_DH_NAME "alua" 33 #define ALUA_DH_VER "2.0" 34 35 #define TPGS_SUPPORT_NONE 0x00 36 #define TPGS_SUPPORT_OPTIMIZED 0x01 37 #define TPGS_SUPPORT_NONOPTIMIZED 0x02 38 #define TPGS_SUPPORT_STANDBY 0x04 39 #define TPGS_SUPPORT_UNAVAILABLE 0x08 40 #define TPGS_SUPPORT_LBA_DEPENDENT 0x10 41 #define TPGS_SUPPORT_OFFLINE 0x40 42 #define TPGS_SUPPORT_TRANSITION 0x80 43 44 #define RTPG_FMT_MASK 0x70 45 #define RTPG_FMT_EXT_HDR 0x10 46 47 #define TPGS_MODE_UNINITIALIZED -1 48 #define TPGS_MODE_NONE 0x0 49 #define TPGS_MODE_IMPLICIT 0x1 50 #define TPGS_MODE_EXPLICIT 0x2 51 52 #define ALUA_RTPG_SIZE 128 53 #define ALUA_FAILOVER_TIMEOUT 60 54 #define ALUA_FAILOVER_RETRIES 5 55 #define ALUA_RTPG_DELAY_MSECS 5 56 57 /* device handler flags */ 58 #define ALUA_OPTIMIZE_STPG 0x01 59 #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02 60 #define ALUA_SYNC_STPG 0x04 61 /* State machine flags */ 62 #define ALUA_PG_RUN_RTPG 0x10 63 #define ALUA_PG_RUN_STPG 0x20 64 #define ALUA_PG_RUNNING 0x40 65 66 static uint optimize_stpg; 67 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); 68 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); 69 70 static LIST_HEAD(port_group_list); 71 static DEFINE_SPINLOCK(port_group_lock); 72 static struct workqueue_struct *kaluad_wq; 73 static struct workqueue_struct *kaluad_sync_wq; 74 75 struct alua_port_group { 76 struct kref kref; 77 struct rcu_head rcu; 78 struct list_head node; 79 struct list_head dh_list; 80 unsigned char device_id_str[256]; 81 int device_id_len; 82 int group_id; 83 int tpgs; 84 int state; 85 int pref; 86 unsigned flags; /* used for optimizing STPG */ 87 unsigned char transition_tmo; 88 unsigned long expiry; 89 unsigned long interval; 90 struct delayed_work rtpg_work; 91 spinlock_t lock; 92 struct list_head rtpg_list; 93 struct scsi_device *rtpg_sdev; 94 }; 95 96 struct alua_dh_data { 97 struct list_head node; 98 struct alua_port_group __rcu *pg; 99 int group_id; 100 spinlock_t pg_lock; 101 struct scsi_device *sdev; 102 int init_error; 103 struct mutex init_mutex; 104 }; 105 106 struct alua_queue_data { 107 struct list_head entry; 108 activate_complete callback_fn; 109 void *callback_data; 110 }; 111 112 #define ALUA_POLICY_SWITCH_CURRENT 0 113 #define ALUA_POLICY_SWITCH_ALL 1 114 115 static void alua_rtpg_work(struct work_struct *work); 116 static void alua_rtpg_queue(struct alua_port_group *pg, 117 struct scsi_device *sdev, 118 struct alua_queue_data *qdata, bool force); 119 static void alua_check(struct scsi_device *sdev, bool force); 120 121 static void release_port_group(struct kref *kref) 122 { 123 struct alua_port_group *pg; 124 125 pg = container_of(kref, struct alua_port_group, kref); 126 if (pg->rtpg_sdev) 127 flush_delayed_work(&pg->rtpg_work); 128 spin_lock(&port_group_lock); 129 list_del(&pg->node); 130 spin_unlock(&port_group_lock); 131 kfree_rcu(pg, rcu); 132 } 133 134 /* 135 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command 136 * @sdev: sdev the command should be sent to 137 */ 138 static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, 139 int bufflen, struct scsi_sense_hdr *sshdr, int flags) 140 { 141 u8 cdb[COMMAND_SIZE(MAINTENANCE_IN)]; 142 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 143 REQ_FAILFAST_DRIVER; 144 145 /* Prepare the command. */ 146 memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_IN)); 147 cdb[0] = MAINTENANCE_IN; 148 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP)) 149 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; 150 else 151 cdb[1] = MI_REPORT_TARGET_PGS; 152 put_unaligned_be32(bufflen, &cdb[6]); 153 154 return scsi_execute_req_flags(sdev, cdb, DMA_FROM_DEVICE, 155 buff, bufflen, sshdr, 156 ALUA_FAILOVER_TIMEOUT * HZ, 157 ALUA_FAILOVER_RETRIES, NULL, 158 req_flags, 0); 159 } 160 161 /* 162 * submit_stpg - Issue a SET TARGET PORT GROUP command 163 * 164 * Currently we're only setting the current target port group state 165 * to 'active/optimized' and let the array firmware figure out 166 * the states of the remaining groups. 167 */ 168 static int submit_stpg(struct scsi_device *sdev, int group_id, 169 struct scsi_sense_hdr *sshdr) 170 { 171 u8 cdb[COMMAND_SIZE(MAINTENANCE_OUT)]; 172 unsigned char stpg_data[8]; 173 int stpg_len = 8; 174 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 175 REQ_FAILFAST_DRIVER; 176 177 /* Prepare the data buffer */ 178 memset(stpg_data, 0, stpg_len); 179 stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL; 180 put_unaligned_be16(group_id, &stpg_data[6]); 181 182 /* Prepare the command. */ 183 memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_OUT)); 184 cdb[0] = MAINTENANCE_OUT; 185 cdb[1] = MO_SET_TARGET_PGS; 186 put_unaligned_be32(stpg_len, &cdb[6]); 187 188 return scsi_execute_req_flags(sdev, cdb, DMA_TO_DEVICE, 189 stpg_data, stpg_len, 190 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 191 ALUA_FAILOVER_RETRIES, NULL, 192 req_flags, 0); 193 } 194 195 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, 196 int group_id) 197 { 198 struct alua_port_group *pg; 199 200 if (!id_str || !id_size || !strlen(id_str)) 201 return NULL; 202 203 list_for_each_entry(pg, &port_group_list, node) { 204 if (pg->group_id != group_id) 205 continue; 206 if (!pg->device_id_len || pg->device_id_len != id_size) 207 continue; 208 if (strncmp(pg->device_id_str, id_str, id_size)) 209 continue; 210 if (!kref_get_unless_zero(&pg->kref)) 211 continue; 212 return pg; 213 } 214 215 return NULL; 216 } 217 218 /* 219 * alua_alloc_pg - Allocate a new port_group structure 220 * @sdev: scsi device 221 * @h: alua device_handler data 222 * @group_id: port group id 223 * 224 * Allocate a new port_group structure for a given 225 * device. 226 */ 227 static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev, 228 int group_id, int tpgs) 229 { 230 struct alua_port_group *pg, *tmp_pg; 231 232 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL); 233 if (!pg) 234 return ERR_PTR(-ENOMEM); 235 236 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str, 237 sizeof(pg->device_id_str)); 238 if (pg->device_id_len <= 0) { 239 /* 240 * TPGS supported but no device identification found. 241 * Generate private device identification. 242 */ 243 sdev_printk(KERN_INFO, sdev, 244 "%s: No device descriptors found\n", 245 ALUA_DH_NAME); 246 pg->device_id_str[0] = '\0'; 247 pg->device_id_len = 0; 248 } 249 pg->group_id = group_id; 250 pg->tpgs = tpgs; 251 pg->state = SCSI_ACCESS_STATE_OPTIMAL; 252 if (optimize_stpg) 253 pg->flags |= ALUA_OPTIMIZE_STPG; 254 kref_init(&pg->kref); 255 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work); 256 INIT_LIST_HEAD(&pg->rtpg_list); 257 INIT_LIST_HEAD(&pg->node); 258 INIT_LIST_HEAD(&pg->dh_list); 259 spin_lock_init(&pg->lock); 260 261 spin_lock(&port_group_lock); 262 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 263 group_id); 264 if (tmp_pg) { 265 spin_unlock(&port_group_lock); 266 kfree(pg); 267 return tmp_pg; 268 } 269 270 list_add(&pg->node, &port_group_list); 271 spin_unlock(&port_group_lock); 272 273 return pg; 274 } 275 276 /* 277 * alua_check_tpgs - Evaluate TPGS setting 278 * @sdev: device to be checked 279 * 280 * Examine the TPGS setting of the sdev to find out if ALUA 281 * is supported. 282 */ 283 static int alua_check_tpgs(struct scsi_device *sdev) 284 { 285 int tpgs = TPGS_MODE_NONE; 286 287 /* 288 * ALUA support for non-disk devices is fraught with 289 * difficulties, so disable it for now. 290 */ 291 if (sdev->type != TYPE_DISK) { 292 sdev_printk(KERN_INFO, sdev, 293 "%s: disable for non-disk devices\n", 294 ALUA_DH_NAME); 295 return tpgs; 296 } 297 298 tpgs = scsi_device_tpgs(sdev); 299 switch (tpgs) { 300 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT: 301 sdev_printk(KERN_INFO, sdev, 302 "%s: supports implicit and explicit TPGS\n", 303 ALUA_DH_NAME); 304 break; 305 case TPGS_MODE_EXPLICIT: 306 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n", 307 ALUA_DH_NAME); 308 break; 309 case TPGS_MODE_IMPLICIT: 310 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n", 311 ALUA_DH_NAME); 312 break; 313 case TPGS_MODE_NONE: 314 sdev_printk(KERN_INFO, sdev, "%s: not supported\n", 315 ALUA_DH_NAME); 316 break; 317 default: 318 sdev_printk(KERN_INFO, sdev, 319 "%s: unsupported TPGS setting %d\n", 320 ALUA_DH_NAME, tpgs); 321 tpgs = TPGS_MODE_NONE; 322 break; 323 } 324 325 return tpgs; 326 } 327 328 /* 329 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83 330 * @sdev: device to be checked 331 * 332 * Extract the relative target port and the target port group 333 * descriptor from the list of identificators. 334 */ 335 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, 336 int tpgs) 337 { 338 int rel_port = -1, group_id; 339 struct alua_port_group *pg, *old_pg = NULL; 340 bool pg_updated = false; 341 unsigned long flags; 342 343 group_id = scsi_vpd_tpg_id(sdev, &rel_port); 344 if (group_id < 0) { 345 /* 346 * Internal error; TPGS supported but required 347 * VPD identification descriptors not present. 348 * Disable ALUA support 349 */ 350 sdev_printk(KERN_INFO, sdev, 351 "%s: No target port descriptors found\n", 352 ALUA_DH_NAME); 353 return SCSI_DH_DEV_UNSUPP; 354 } 355 356 pg = alua_alloc_pg(sdev, group_id, tpgs); 357 if (IS_ERR(pg)) { 358 if (PTR_ERR(pg) == -ENOMEM) 359 return SCSI_DH_NOMEM; 360 return SCSI_DH_DEV_UNSUPP; 361 } 362 if (pg->device_id_len) 363 sdev_printk(KERN_INFO, sdev, 364 "%s: device %s port group %x rel port %x\n", 365 ALUA_DH_NAME, pg->device_id_str, 366 group_id, rel_port); 367 else 368 sdev_printk(KERN_INFO, sdev, 369 "%s: port group %x rel port %x\n", 370 ALUA_DH_NAME, group_id, rel_port); 371 372 /* Check for existing port group references */ 373 spin_lock(&h->pg_lock); 374 old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 375 if (old_pg != pg) { 376 /* port group has changed. Update to new port group */ 377 if (h->pg) { 378 spin_lock_irqsave(&old_pg->lock, flags); 379 list_del_rcu(&h->node); 380 spin_unlock_irqrestore(&old_pg->lock, flags); 381 } 382 rcu_assign_pointer(h->pg, pg); 383 pg_updated = true; 384 } 385 386 spin_lock_irqsave(&pg->lock, flags); 387 if (sdev->synchronous_alua) 388 pg->flags |= ALUA_SYNC_STPG; 389 if (pg_updated) 390 list_add_rcu(&h->node, &pg->dh_list); 391 spin_unlock_irqrestore(&pg->lock, flags); 392 393 alua_rtpg_queue(rcu_dereference_protected(h->pg, 394 lockdep_is_held(&h->pg_lock)), 395 sdev, NULL, true); 396 spin_unlock(&h->pg_lock); 397 398 if (old_pg) 399 kref_put(&old_pg->kref, release_port_group); 400 401 return SCSI_DH_OK; 402 } 403 404 static char print_alua_state(unsigned char state) 405 { 406 switch (state) { 407 case SCSI_ACCESS_STATE_OPTIMAL: 408 return 'A'; 409 case SCSI_ACCESS_STATE_ACTIVE: 410 return 'N'; 411 case SCSI_ACCESS_STATE_STANDBY: 412 return 'S'; 413 case SCSI_ACCESS_STATE_UNAVAILABLE: 414 return 'U'; 415 case SCSI_ACCESS_STATE_LBA: 416 return 'L'; 417 case SCSI_ACCESS_STATE_OFFLINE: 418 return 'O'; 419 case SCSI_ACCESS_STATE_TRANSITIONING: 420 return 'T'; 421 default: 422 return 'X'; 423 } 424 } 425 426 static int alua_check_sense(struct scsi_device *sdev, 427 struct scsi_sense_hdr *sense_hdr) 428 { 429 switch (sense_hdr->sense_key) { 430 case NOT_READY: 431 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { 432 /* 433 * LUN Not Accessible - ALUA state transition 434 */ 435 alua_check(sdev, false); 436 return NEEDS_RETRY; 437 } 438 break; 439 case UNIT_ATTENTION: 440 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { 441 /* 442 * Power On, Reset, or Bus Device Reset. 443 * Might have obscured a state transition, 444 * so schedule a recheck. 445 */ 446 alua_check(sdev, true); 447 return ADD_TO_MLQUEUE; 448 } 449 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04) 450 /* 451 * Device internal reset 452 */ 453 return ADD_TO_MLQUEUE; 454 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01) 455 /* 456 * Mode Parameters Changed 457 */ 458 return ADD_TO_MLQUEUE; 459 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) { 460 /* 461 * ALUA state changed 462 */ 463 alua_check(sdev, true); 464 return ADD_TO_MLQUEUE; 465 } 466 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) { 467 /* 468 * Implicit ALUA state transition failed 469 */ 470 alua_check(sdev, true); 471 return ADD_TO_MLQUEUE; 472 } 473 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03) 474 /* 475 * Inquiry data has changed 476 */ 477 return ADD_TO_MLQUEUE; 478 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e) 479 /* 480 * REPORTED_LUNS_DATA_HAS_CHANGED is reported 481 * when switching controllers on targets like 482 * Intel Multi-Flex. We can just retry. 483 */ 484 return ADD_TO_MLQUEUE; 485 break; 486 } 487 488 return SCSI_RETURN_NOT_HANDLED; 489 } 490 491 /* 492 * alua_tur - Send a TEST UNIT READY 493 * @sdev: device to which the TEST UNIT READY command should be send 494 * 495 * Send a TEST UNIT READY to @sdev to figure out the device state 496 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING, 497 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise. 498 */ 499 static int alua_tur(struct scsi_device *sdev) 500 { 501 struct scsi_sense_hdr sense_hdr; 502 int retval; 503 504 retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, 505 ALUA_FAILOVER_RETRIES, &sense_hdr); 506 if (sense_hdr.sense_key == NOT_READY && 507 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 508 return SCSI_DH_RETRY; 509 else if (retval) 510 return SCSI_DH_IO; 511 else 512 return SCSI_DH_OK; 513 } 514 515 /* 516 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES 517 * @sdev: the device to be evaluated. 518 * 519 * Evaluate the Target Port Group State. 520 * Returns SCSI_DH_DEV_OFFLINED if the path is 521 * found to be unusable. 522 */ 523 static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) 524 { 525 struct scsi_sense_hdr sense_hdr; 526 struct alua_port_group *tmp_pg; 527 int len, k, off, valid_states = 0, bufflen = ALUA_RTPG_SIZE; 528 unsigned char *desc, *buff; 529 unsigned err, retval; 530 unsigned int tpg_desc_tbl_off; 531 unsigned char orig_transition_tmo; 532 unsigned long flags; 533 534 if (!pg->expiry) { 535 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; 536 537 if (pg->transition_tmo) 538 transition_tmo = pg->transition_tmo * HZ; 539 540 pg->expiry = round_jiffies_up(jiffies + transition_tmo); 541 } 542 543 buff = kzalloc(bufflen, GFP_KERNEL); 544 if (!buff) 545 return SCSI_DH_DEV_TEMP_BUSY; 546 547 retry: 548 err = 0; 549 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags); 550 551 if (retval) { 552 if (!scsi_sense_valid(&sense_hdr)) { 553 sdev_printk(KERN_INFO, sdev, 554 "%s: rtpg failed, result %d\n", 555 ALUA_DH_NAME, retval); 556 kfree(buff); 557 if (driver_byte(retval) == DRIVER_ERROR) 558 return SCSI_DH_DEV_TEMP_BUSY; 559 return SCSI_DH_IO; 560 } 561 562 /* 563 * submit_rtpg() has failed on existing arrays 564 * when requesting extended header info, and 565 * the array doesn't support extended headers, 566 * even though it shouldn't according to T10. 567 * The retry without rtpg_ext_hdr_req set 568 * handles this. 569 */ 570 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) && 571 sense_hdr.sense_key == ILLEGAL_REQUEST && 572 sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) { 573 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP; 574 goto retry; 575 } 576 /* 577 * Retry on ALUA state transition or if any 578 * UNIT ATTENTION occurred. 579 */ 580 if (sense_hdr.sense_key == NOT_READY && 581 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 582 err = SCSI_DH_RETRY; 583 else if (sense_hdr.sense_key == UNIT_ATTENTION) 584 err = SCSI_DH_RETRY; 585 if (err == SCSI_DH_RETRY && 586 pg->expiry != 0 && time_before(jiffies, pg->expiry)) { 587 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n", 588 ALUA_DH_NAME); 589 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 590 kfree(buff); 591 return err; 592 } 593 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n", 594 ALUA_DH_NAME); 595 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 596 kfree(buff); 597 pg->expiry = 0; 598 return SCSI_DH_IO; 599 } 600 601 len = get_unaligned_be32(&buff[0]) + 4; 602 603 if (len > bufflen) { 604 /* Resubmit with the correct length */ 605 kfree(buff); 606 bufflen = len; 607 buff = kmalloc(bufflen, GFP_KERNEL); 608 if (!buff) { 609 sdev_printk(KERN_WARNING, sdev, 610 "%s: kmalloc buffer failed\n",__func__); 611 /* Temporary failure, bypass */ 612 pg->expiry = 0; 613 return SCSI_DH_DEV_TEMP_BUSY; 614 } 615 goto retry; 616 } 617 618 orig_transition_tmo = pg->transition_tmo; 619 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0) 620 pg->transition_tmo = buff[5]; 621 else 622 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT; 623 624 if (orig_transition_tmo != pg->transition_tmo) { 625 sdev_printk(KERN_INFO, sdev, 626 "%s: transition timeout set to %d seconds\n", 627 ALUA_DH_NAME, pg->transition_tmo); 628 pg->expiry = jiffies + pg->transition_tmo * HZ; 629 } 630 631 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) 632 tpg_desc_tbl_off = 8; 633 else 634 tpg_desc_tbl_off = 4; 635 636 for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off; 637 k < len; 638 k += off, desc += off) { 639 u16 group_id = get_unaligned_be16(&desc[2]); 640 641 spin_lock_irqsave(&port_group_lock, flags); 642 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 643 group_id); 644 spin_unlock_irqrestore(&port_group_lock, flags); 645 if (tmp_pg) { 646 if (spin_trylock_irqsave(&tmp_pg->lock, flags)) { 647 if ((tmp_pg == pg) || 648 !(tmp_pg->flags & ALUA_PG_RUNNING)) { 649 struct alua_dh_data *h; 650 651 tmp_pg->state = desc[0] & 0x0f; 652 tmp_pg->pref = desc[0] >> 7; 653 rcu_read_lock(); 654 list_for_each_entry_rcu(h, 655 &tmp_pg->dh_list, node) { 656 /* h->sdev should always be valid */ 657 BUG_ON(!h->sdev); 658 h->sdev->access_state = desc[0]; 659 } 660 rcu_read_unlock(); 661 } 662 if (tmp_pg == pg) 663 valid_states = desc[1]; 664 spin_unlock_irqrestore(&tmp_pg->lock, flags); 665 } 666 kref_put(&tmp_pg->kref, release_port_group); 667 } 668 off = 8 + (desc[7] * 4); 669 } 670 671 spin_lock_irqsave(&pg->lock, flags); 672 sdev_printk(KERN_INFO, sdev, 673 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", 674 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), 675 pg->pref ? "preferred" : "non-preferred", 676 valid_states&TPGS_SUPPORT_TRANSITION?'T':'t', 677 valid_states&TPGS_SUPPORT_OFFLINE?'O':'o', 678 valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l', 679 valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u', 680 valid_states&TPGS_SUPPORT_STANDBY?'S':'s', 681 valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n', 682 valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a'); 683 684 switch (pg->state) { 685 case SCSI_ACCESS_STATE_TRANSITIONING: 686 if (time_before(jiffies, pg->expiry)) { 687 /* State transition, retry */ 688 pg->interval = 2; 689 err = SCSI_DH_RETRY; 690 } else { 691 struct alua_dh_data *h; 692 693 /* Transitioning time exceeded, set port to standby */ 694 err = SCSI_DH_IO; 695 pg->state = SCSI_ACCESS_STATE_STANDBY; 696 pg->expiry = 0; 697 rcu_read_lock(); 698 list_for_each_entry_rcu(h, &pg->dh_list, node) { 699 BUG_ON(!h->sdev); 700 h->sdev->access_state = 701 (pg->state & SCSI_ACCESS_STATE_MASK); 702 if (pg->pref) 703 h->sdev->access_state |= 704 SCSI_ACCESS_STATE_PREFERRED; 705 } 706 rcu_read_unlock(); 707 } 708 break; 709 case SCSI_ACCESS_STATE_OFFLINE: 710 /* Path unusable */ 711 err = SCSI_DH_DEV_OFFLINED; 712 pg->expiry = 0; 713 break; 714 default: 715 /* Useable path if active */ 716 err = SCSI_DH_OK; 717 pg->expiry = 0; 718 break; 719 } 720 spin_unlock_irqrestore(&pg->lock, flags); 721 kfree(buff); 722 return err; 723 } 724 725 /* 726 * alua_stpg - Issue a SET TARGET PORT GROUP command 727 * 728 * Issue a SET TARGET PORT GROUP command and evaluate the 729 * response. Returns SCSI_DH_RETRY per default to trigger 730 * a re-evaluation of the target group state or SCSI_DH_OK 731 * if no further action needs to be taken. 732 */ 733 static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) 734 { 735 int retval; 736 struct scsi_sense_hdr sense_hdr; 737 738 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) { 739 /* Only implicit ALUA supported, retry */ 740 return SCSI_DH_RETRY; 741 } 742 switch (pg->state) { 743 case SCSI_ACCESS_STATE_OPTIMAL: 744 return SCSI_DH_OK; 745 case SCSI_ACCESS_STATE_ACTIVE: 746 if ((pg->flags & ALUA_OPTIMIZE_STPG) && 747 !pg->pref && 748 (pg->tpgs & TPGS_MODE_IMPLICIT)) 749 return SCSI_DH_OK; 750 break; 751 case SCSI_ACCESS_STATE_STANDBY: 752 case SCSI_ACCESS_STATE_UNAVAILABLE: 753 break; 754 case SCSI_ACCESS_STATE_OFFLINE: 755 return SCSI_DH_IO; 756 case SCSI_ACCESS_STATE_TRANSITIONING: 757 break; 758 default: 759 sdev_printk(KERN_INFO, sdev, 760 "%s: stpg failed, unhandled TPGS state %d", 761 ALUA_DH_NAME, pg->state); 762 return SCSI_DH_NOSYS; 763 } 764 retval = submit_stpg(sdev, pg->group_id, &sense_hdr); 765 766 if (retval) { 767 if (!scsi_sense_valid(&sense_hdr)) { 768 sdev_printk(KERN_INFO, sdev, 769 "%s: stpg failed, result %d", 770 ALUA_DH_NAME, retval); 771 if (driver_byte(retval) == DRIVER_ERROR) 772 return SCSI_DH_DEV_TEMP_BUSY; 773 } else { 774 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n", 775 ALUA_DH_NAME); 776 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 777 } 778 } 779 /* Retry RTPG */ 780 return SCSI_DH_RETRY; 781 } 782 783 static void alua_rtpg_work(struct work_struct *work) 784 { 785 struct alua_port_group *pg = 786 container_of(work, struct alua_port_group, rtpg_work.work); 787 struct scsi_device *sdev; 788 LIST_HEAD(qdata_list); 789 int err = SCSI_DH_OK; 790 struct alua_queue_data *qdata, *tmp; 791 unsigned long flags; 792 struct workqueue_struct *alua_wq = kaluad_wq; 793 794 spin_lock_irqsave(&pg->lock, flags); 795 sdev = pg->rtpg_sdev; 796 if (!sdev) { 797 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); 798 WARN_ON(pg->flags & ALUA_PG_RUN_STPG); 799 spin_unlock_irqrestore(&pg->lock, flags); 800 kref_put(&pg->kref, release_port_group); 801 return; 802 } 803 if (pg->flags & ALUA_SYNC_STPG) 804 alua_wq = kaluad_sync_wq; 805 pg->flags |= ALUA_PG_RUNNING; 806 if (pg->flags & ALUA_PG_RUN_RTPG) { 807 int state = pg->state; 808 809 pg->flags &= ~ALUA_PG_RUN_RTPG; 810 spin_unlock_irqrestore(&pg->lock, flags); 811 if (state == SCSI_ACCESS_STATE_TRANSITIONING) { 812 if (alua_tur(sdev) == SCSI_DH_RETRY) { 813 spin_lock_irqsave(&pg->lock, flags); 814 pg->flags &= ~ALUA_PG_RUNNING; 815 pg->flags |= ALUA_PG_RUN_RTPG; 816 spin_unlock_irqrestore(&pg->lock, flags); 817 queue_delayed_work(alua_wq, &pg->rtpg_work, 818 pg->interval * HZ); 819 return; 820 } 821 /* Send RTPG on failure or if TUR indicates SUCCESS */ 822 } 823 err = alua_rtpg(sdev, pg); 824 spin_lock_irqsave(&pg->lock, flags); 825 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 826 pg->flags &= ~ALUA_PG_RUNNING; 827 pg->flags |= ALUA_PG_RUN_RTPG; 828 spin_unlock_irqrestore(&pg->lock, flags); 829 queue_delayed_work(alua_wq, &pg->rtpg_work, 830 pg->interval * HZ); 831 return; 832 } 833 if (err != SCSI_DH_OK) 834 pg->flags &= ~ALUA_PG_RUN_STPG; 835 } 836 if (pg->flags & ALUA_PG_RUN_STPG) { 837 pg->flags &= ~ALUA_PG_RUN_STPG; 838 spin_unlock_irqrestore(&pg->lock, flags); 839 err = alua_stpg(sdev, pg); 840 spin_lock_irqsave(&pg->lock, flags); 841 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 842 pg->flags |= ALUA_PG_RUN_RTPG; 843 pg->interval = 0; 844 pg->flags &= ~ALUA_PG_RUNNING; 845 spin_unlock_irqrestore(&pg->lock, flags); 846 queue_delayed_work(alua_wq, &pg->rtpg_work, 847 pg->interval * HZ); 848 return; 849 } 850 } 851 852 list_splice_init(&pg->rtpg_list, &qdata_list); 853 pg->rtpg_sdev = NULL; 854 spin_unlock_irqrestore(&pg->lock, flags); 855 856 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { 857 list_del(&qdata->entry); 858 if (qdata->callback_fn) 859 qdata->callback_fn(qdata->callback_data, err); 860 kfree(qdata); 861 } 862 spin_lock_irqsave(&pg->lock, flags); 863 pg->flags &= ~ALUA_PG_RUNNING; 864 spin_unlock_irqrestore(&pg->lock, flags); 865 scsi_device_put(sdev); 866 kref_put(&pg->kref, release_port_group); 867 } 868 869 static void alua_rtpg_queue(struct alua_port_group *pg, 870 struct scsi_device *sdev, 871 struct alua_queue_data *qdata, bool force) 872 { 873 int start_queue = 0; 874 unsigned long flags; 875 struct workqueue_struct *alua_wq = kaluad_wq; 876 877 if (!pg) 878 return; 879 880 spin_lock_irqsave(&pg->lock, flags); 881 if (qdata) { 882 list_add_tail(&qdata->entry, &pg->rtpg_list); 883 pg->flags |= ALUA_PG_RUN_STPG; 884 force = true; 885 } 886 if (pg->rtpg_sdev == NULL) { 887 pg->interval = 0; 888 pg->flags |= ALUA_PG_RUN_RTPG; 889 kref_get(&pg->kref); 890 pg->rtpg_sdev = sdev; 891 scsi_device_get(sdev); 892 start_queue = 1; 893 } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { 894 pg->flags |= ALUA_PG_RUN_RTPG; 895 /* Do not queue if the worker is already running */ 896 if (!(pg->flags & ALUA_PG_RUNNING)) { 897 kref_get(&pg->kref); 898 sdev = NULL; 899 start_queue = 1; 900 } 901 } 902 903 if (pg->flags & ALUA_SYNC_STPG) 904 alua_wq = kaluad_sync_wq; 905 spin_unlock_irqrestore(&pg->lock, flags); 906 907 if (start_queue && 908 !queue_delayed_work(alua_wq, &pg->rtpg_work, 909 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) { 910 if (sdev) 911 scsi_device_put(sdev); 912 kref_put(&pg->kref, release_port_group); 913 } 914 } 915 916 /* 917 * alua_initialize - Initialize ALUA state 918 * @sdev: the device to be initialized 919 * 920 * For the prep_fn to work correctly we have 921 * to initialize the ALUA state for the device. 922 */ 923 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h) 924 { 925 int err = SCSI_DH_DEV_UNSUPP, tpgs; 926 927 mutex_lock(&h->init_mutex); 928 tpgs = alua_check_tpgs(sdev); 929 if (tpgs != TPGS_MODE_NONE) 930 err = alua_check_vpd(sdev, h, tpgs); 931 h->init_error = err; 932 mutex_unlock(&h->init_mutex); 933 return err; 934 } 935 /* 936 * alua_set_params - set/unset the optimize flag 937 * @sdev: device on the path to be activated 938 * params - parameters in the following format 939 * "no_of_params\0param1\0param2\0param3\0...\0" 940 * For example, to set the flag pass the following parameters 941 * from multipath.conf 942 * hardware_handler "2 alua 1" 943 */ 944 static int alua_set_params(struct scsi_device *sdev, const char *params) 945 { 946 struct alua_dh_data *h = sdev->handler_data; 947 struct alua_port_group *pg = NULL; 948 unsigned int optimize = 0, argc; 949 const char *p = params; 950 int result = SCSI_DH_OK; 951 unsigned long flags; 952 953 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1)) 954 return -EINVAL; 955 956 while (*p++) 957 ; 958 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1)) 959 return -EINVAL; 960 961 rcu_read_lock(); 962 pg = rcu_dereference(h->pg); 963 if (!pg) { 964 rcu_read_unlock(); 965 return -ENXIO; 966 } 967 spin_lock_irqsave(&pg->lock, flags); 968 if (optimize) 969 pg->flags |= ALUA_OPTIMIZE_STPG; 970 else 971 pg->flags &= ~ALUA_OPTIMIZE_STPG; 972 spin_unlock_irqrestore(&pg->lock, flags); 973 rcu_read_unlock(); 974 975 return result; 976 } 977 978 /* 979 * alua_activate - activate a path 980 * @sdev: device on the path to be activated 981 * 982 * We're currently switching the port group to be activated only and 983 * let the array figure out the rest. 984 * There may be other arrays which require us to switch all port groups 985 * based on a certain policy. But until we actually encounter them it 986 * should be okay. 987 */ 988 static int alua_activate(struct scsi_device *sdev, 989 activate_complete fn, void *data) 990 { 991 struct alua_dh_data *h = sdev->handler_data; 992 int err = SCSI_DH_OK; 993 struct alua_queue_data *qdata; 994 struct alua_port_group *pg; 995 996 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL); 997 if (!qdata) { 998 err = SCSI_DH_RES_TEMP_UNAVAIL; 999 goto out; 1000 } 1001 qdata->callback_fn = fn; 1002 qdata->callback_data = data; 1003 1004 mutex_lock(&h->init_mutex); 1005 rcu_read_lock(); 1006 pg = rcu_dereference(h->pg); 1007 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1008 rcu_read_unlock(); 1009 kfree(qdata); 1010 err = h->init_error; 1011 mutex_unlock(&h->init_mutex); 1012 goto out; 1013 } 1014 fn = NULL; 1015 rcu_read_unlock(); 1016 mutex_unlock(&h->init_mutex); 1017 1018 alua_rtpg_queue(pg, sdev, qdata, true); 1019 kref_put(&pg->kref, release_port_group); 1020 out: 1021 if (fn) 1022 fn(data, err); 1023 return 0; 1024 } 1025 1026 /* 1027 * alua_check - check path status 1028 * @sdev: device on the path to be checked 1029 * 1030 * Check the device status 1031 */ 1032 static void alua_check(struct scsi_device *sdev, bool force) 1033 { 1034 struct alua_dh_data *h = sdev->handler_data; 1035 struct alua_port_group *pg; 1036 1037 rcu_read_lock(); 1038 pg = rcu_dereference(h->pg); 1039 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1040 rcu_read_unlock(); 1041 return; 1042 } 1043 rcu_read_unlock(); 1044 1045 alua_rtpg_queue(pg, sdev, NULL, force); 1046 kref_put(&pg->kref, release_port_group); 1047 } 1048 1049 /* 1050 * alua_prep_fn - request callback 1051 * 1052 * Fail I/O to all paths not in state 1053 * active/optimized or active/non-optimized. 1054 */ 1055 static int alua_prep_fn(struct scsi_device *sdev, struct request *req) 1056 { 1057 struct alua_dh_data *h = sdev->handler_data; 1058 struct alua_port_group *pg; 1059 unsigned char state = SCSI_ACCESS_STATE_OPTIMAL; 1060 int ret = BLKPREP_OK; 1061 1062 rcu_read_lock(); 1063 pg = rcu_dereference(h->pg); 1064 if (pg) 1065 state = pg->state; 1066 rcu_read_unlock(); 1067 if (state == SCSI_ACCESS_STATE_TRANSITIONING) 1068 ret = BLKPREP_DEFER; 1069 else if (state != SCSI_ACCESS_STATE_OPTIMAL && 1070 state != SCSI_ACCESS_STATE_ACTIVE && 1071 state != SCSI_ACCESS_STATE_LBA) { 1072 ret = BLKPREP_KILL; 1073 req->rq_flags |= RQF_QUIET; 1074 } 1075 return ret; 1076 1077 } 1078 1079 static void alua_rescan(struct scsi_device *sdev) 1080 { 1081 struct alua_dh_data *h = sdev->handler_data; 1082 1083 alua_initialize(sdev, h); 1084 } 1085 1086 /* 1087 * alua_bus_attach - Attach device handler 1088 * @sdev: device to be attached to 1089 */ 1090 static int alua_bus_attach(struct scsi_device *sdev) 1091 { 1092 struct alua_dh_data *h; 1093 int err, ret = -EINVAL; 1094 1095 h = kzalloc(sizeof(*h) , GFP_KERNEL); 1096 if (!h) 1097 return -ENOMEM; 1098 spin_lock_init(&h->pg_lock); 1099 rcu_assign_pointer(h->pg, NULL); 1100 h->init_error = SCSI_DH_OK; 1101 h->sdev = sdev; 1102 INIT_LIST_HEAD(&h->node); 1103 1104 mutex_init(&h->init_mutex); 1105 err = alua_initialize(sdev, h); 1106 if (err == SCSI_DH_NOMEM) 1107 ret = -ENOMEM; 1108 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) 1109 goto failed; 1110 1111 sdev->handler_data = h; 1112 return 0; 1113 failed: 1114 kfree(h); 1115 return ret; 1116 } 1117 1118 /* 1119 * alua_bus_detach - Detach device handler 1120 * @sdev: device to be detached from 1121 */ 1122 static void alua_bus_detach(struct scsi_device *sdev) 1123 { 1124 struct alua_dh_data *h = sdev->handler_data; 1125 struct alua_port_group *pg; 1126 1127 spin_lock(&h->pg_lock); 1128 pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 1129 rcu_assign_pointer(h->pg, NULL); 1130 h->sdev = NULL; 1131 spin_unlock(&h->pg_lock); 1132 if (pg) { 1133 spin_lock_irq(&pg->lock); 1134 list_del_rcu(&h->node); 1135 spin_unlock_irq(&pg->lock); 1136 kref_put(&pg->kref, release_port_group); 1137 } 1138 sdev->handler_data = NULL; 1139 kfree(h); 1140 } 1141 1142 static struct scsi_device_handler alua_dh = { 1143 .name = ALUA_DH_NAME, 1144 .module = THIS_MODULE, 1145 .attach = alua_bus_attach, 1146 .detach = alua_bus_detach, 1147 .prep_fn = alua_prep_fn, 1148 .check_sense = alua_check_sense, 1149 .activate = alua_activate, 1150 .rescan = alua_rescan, 1151 .set_params = alua_set_params, 1152 }; 1153 1154 static int __init alua_init(void) 1155 { 1156 int r; 1157 1158 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0); 1159 if (!kaluad_wq) { 1160 /* Temporary failure, bypass */ 1161 return SCSI_DH_DEV_TEMP_BUSY; 1162 } 1163 kaluad_sync_wq = create_workqueue("kaluad_sync"); 1164 if (!kaluad_sync_wq) { 1165 destroy_workqueue(kaluad_wq); 1166 return SCSI_DH_DEV_TEMP_BUSY; 1167 } 1168 r = scsi_register_device_handler(&alua_dh); 1169 if (r != 0) { 1170 printk(KERN_ERR "%s: Failed to register scsi device handler", 1171 ALUA_DH_NAME); 1172 destroy_workqueue(kaluad_sync_wq); 1173 destroy_workqueue(kaluad_wq); 1174 } 1175 return r; 1176 } 1177 1178 static void __exit alua_exit(void) 1179 { 1180 scsi_unregister_device_handler(&alua_dh); 1181 destroy_workqueue(kaluad_sync_wq); 1182 destroy_workqueue(kaluad_wq); 1183 } 1184 1185 module_init(alua_init); 1186 module_exit(alua_exit); 1187 1188 MODULE_DESCRIPTION("DM Multipath ALUA support"); 1189 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>"); 1190 MODULE_LICENSE("GPL"); 1191 MODULE_VERSION(ALUA_DH_VER); 1192