1 /* 2 * Generic SCSI-3 ALUA SCSI Device Handler 3 * 4 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH. 5 * All rights reserved. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 20 * 21 */ 22 #include <linux/slab.h> 23 #include <linux/delay.h> 24 #include <linux/module.h> 25 #include <asm/unaligned.h> 26 #include <scsi/scsi.h> 27 #include <scsi/scsi_proto.h> 28 #include <scsi/scsi_dbg.h> 29 #include <scsi/scsi_eh.h> 30 #include <scsi/scsi_dh.h> 31 32 #define ALUA_DH_NAME "alua" 33 #define ALUA_DH_VER "2.0" 34 35 #define TPGS_SUPPORT_NONE 0x00 36 #define TPGS_SUPPORT_OPTIMIZED 0x01 37 #define TPGS_SUPPORT_NONOPTIMIZED 0x02 38 #define TPGS_SUPPORT_STANDBY 0x04 39 #define TPGS_SUPPORT_UNAVAILABLE 0x08 40 #define TPGS_SUPPORT_LBA_DEPENDENT 0x10 41 #define TPGS_SUPPORT_OFFLINE 0x40 42 #define TPGS_SUPPORT_TRANSITION 0x80 43 44 #define RTPG_FMT_MASK 0x70 45 #define RTPG_FMT_EXT_HDR 0x10 46 47 #define TPGS_MODE_UNINITIALIZED -1 48 #define TPGS_MODE_NONE 0x0 49 #define TPGS_MODE_IMPLICIT 0x1 50 #define TPGS_MODE_EXPLICIT 0x2 51 52 #define ALUA_RTPG_SIZE 128 53 #define ALUA_FAILOVER_TIMEOUT 60 54 #define ALUA_FAILOVER_RETRIES 5 55 #define ALUA_RTPG_DELAY_MSECS 5 56 57 /* device handler flags */ 58 #define ALUA_OPTIMIZE_STPG 0x01 59 #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02 60 /* State machine flags */ 61 #define ALUA_PG_RUN_RTPG 0x10 62 #define ALUA_PG_RUN_STPG 0x20 63 #define ALUA_PG_RUNNING 0x40 64 65 static uint optimize_stpg; 66 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); 67 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); 68 69 static LIST_HEAD(port_group_list); 70 static DEFINE_SPINLOCK(port_group_lock); 71 static struct workqueue_struct *kaluad_wq; 72 73 struct alua_port_group { 74 struct kref kref; 75 struct rcu_head rcu; 76 struct list_head node; 77 struct list_head dh_list; 78 unsigned char device_id_str[256]; 79 int device_id_len; 80 int group_id; 81 int tpgs; 82 int state; 83 int pref; 84 unsigned flags; /* used for optimizing STPG */ 85 unsigned char transition_tmo; 86 unsigned long expiry; 87 unsigned long interval; 88 struct delayed_work rtpg_work; 89 spinlock_t lock; 90 struct list_head rtpg_list; 91 struct scsi_device *rtpg_sdev; 92 }; 93 94 struct alua_dh_data { 95 struct list_head node; 96 struct alua_port_group __rcu *pg; 97 int group_id; 98 spinlock_t pg_lock; 99 struct scsi_device *sdev; 100 int init_error; 101 struct mutex init_mutex; 102 }; 103 104 struct alua_queue_data { 105 struct list_head entry; 106 activate_complete callback_fn; 107 void *callback_data; 108 }; 109 110 #define ALUA_POLICY_SWITCH_CURRENT 0 111 #define ALUA_POLICY_SWITCH_ALL 1 112 113 static void alua_rtpg_work(struct work_struct *work); 114 static bool alua_rtpg_queue(struct alua_port_group *pg, 115 struct scsi_device *sdev, 116 struct alua_queue_data *qdata, bool force); 117 static void alua_check(struct scsi_device *sdev, bool force); 118 119 static void release_port_group(struct kref *kref) 120 { 121 struct alua_port_group *pg; 122 123 pg = container_of(kref, struct alua_port_group, kref); 124 if (pg->rtpg_sdev) 125 flush_delayed_work(&pg->rtpg_work); 126 spin_lock(&port_group_lock); 127 list_del(&pg->node); 128 spin_unlock(&port_group_lock); 129 kfree_rcu(pg, rcu); 130 } 131 132 /* 133 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command 134 * @sdev: sdev the command should be sent to 135 */ 136 static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, 137 int bufflen, struct scsi_sense_hdr *sshdr, int flags) 138 { 139 u8 cdb[COMMAND_SIZE(MAINTENANCE_IN)]; 140 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 141 REQ_FAILFAST_DRIVER; 142 143 /* Prepare the command. */ 144 memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_IN)); 145 cdb[0] = MAINTENANCE_IN; 146 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP)) 147 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; 148 else 149 cdb[1] = MI_REPORT_TARGET_PGS; 150 put_unaligned_be32(bufflen, &cdb[6]); 151 152 return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL, 153 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 154 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL); 155 } 156 157 /* 158 * submit_stpg - Issue a SET TARGET PORT GROUP command 159 * 160 * Currently we're only setting the current target port group state 161 * to 'active/optimized' and let the array firmware figure out 162 * the states of the remaining groups. 163 */ 164 static int submit_stpg(struct scsi_device *sdev, int group_id, 165 struct scsi_sense_hdr *sshdr) 166 { 167 u8 cdb[COMMAND_SIZE(MAINTENANCE_OUT)]; 168 unsigned char stpg_data[8]; 169 int stpg_len = 8; 170 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 171 REQ_FAILFAST_DRIVER; 172 173 /* Prepare the data buffer */ 174 memset(stpg_data, 0, stpg_len); 175 stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL; 176 put_unaligned_be16(group_id, &stpg_data[6]); 177 178 /* Prepare the command. */ 179 memset(cdb, 0x0, COMMAND_SIZE(MAINTENANCE_OUT)); 180 cdb[0] = MAINTENANCE_OUT; 181 cdb[1] = MO_SET_TARGET_PGS; 182 put_unaligned_be32(stpg_len, &cdb[6]); 183 184 return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL, 185 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 186 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL); 187 } 188 189 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, 190 int group_id) 191 { 192 struct alua_port_group *pg; 193 194 if (!id_str || !id_size || !strlen(id_str)) 195 return NULL; 196 197 list_for_each_entry(pg, &port_group_list, node) { 198 if (pg->group_id != group_id) 199 continue; 200 if (!pg->device_id_len || pg->device_id_len != id_size) 201 continue; 202 if (strncmp(pg->device_id_str, id_str, id_size)) 203 continue; 204 if (!kref_get_unless_zero(&pg->kref)) 205 continue; 206 return pg; 207 } 208 209 return NULL; 210 } 211 212 /* 213 * alua_alloc_pg - Allocate a new port_group structure 214 * @sdev: scsi device 215 * @h: alua device_handler data 216 * @group_id: port group id 217 * 218 * Allocate a new port_group structure for a given 219 * device. 220 */ 221 static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev, 222 int group_id, int tpgs) 223 { 224 struct alua_port_group *pg, *tmp_pg; 225 226 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL); 227 if (!pg) 228 return ERR_PTR(-ENOMEM); 229 230 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str, 231 sizeof(pg->device_id_str)); 232 if (pg->device_id_len <= 0) { 233 /* 234 * TPGS supported but no device identification found. 235 * Generate private device identification. 236 */ 237 sdev_printk(KERN_INFO, sdev, 238 "%s: No device descriptors found\n", 239 ALUA_DH_NAME); 240 pg->device_id_str[0] = '\0'; 241 pg->device_id_len = 0; 242 } 243 pg->group_id = group_id; 244 pg->tpgs = tpgs; 245 pg->state = SCSI_ACCESS_STATE_OPTIMAL; 246 if (optimize_stpg) 247 pg->flags |= ALUA_OPTIMIZE_STPG; 248 kref_init(&pg->kref); 249 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work); 250 INIT_LIST_HEAD(&pg->rtpg_list); 251 INIT_LIST_HEAD(&pg->node); 252 INIT_LIST_HEAD(&pg->dh_list); 253 spin_lock_init(&pg->lock); 254 255 spin_lock(&port_group_lock); 256 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 257 group_id); 258 if (tmp_pg) { 259 spin_unlock(&port_group_lock); 260 kfree(pg); 261 return tmp_pg; 262 } 263 264 list_add(&pg->node, &port_group_list); 265 spin_unlock(&port_group_lock); 266 267 return pg; 268 } 269 270 /* 271 * alua_check_tpgs - Evaluate TPGS setting 272 * @sdev: device to be checked 273 * 274 * Examine the TPGS setting of the sdev to find out if ALUA 275 * is supported. 276 */ 277 static int alua_check_tpgs(struct scsi_device *sdev) 278 { 279 int tpgs = TPGS_MODE_NONE; 280 281 /* 282 * ALUA support for non-disk devices is fraught with 283 * difficulties, so disable it for now. 284 */ 285 if (sdev->type != TYPE_DISK) { 286 sdev_printk(KERN_INFO, sdev, 287 "%s: disable for non-disk devices\n", 288 ALUA_DH_NAME); 289 return tpgs; 290 } 291 292 tpgs = scsi_device_tpgs(sdev); 293 switch (tpgs) { 294 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT: 295 sdev_printk(KERN_INFO, sdev, 296 "%s: supports implicit and explicit TPGS\n", 297 ALUA_DH_NAME); 298 break; 299 case TPGS_MODE_EXPLICIT: 300 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n", 301 ALUA_DH_NAME); 302 break; 303 case TPGS_MODE_IMPLICIT: 304 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n", 305 ALUA_DH_NAME); 306 break; 307 case TPGS_MODE_NONE: 308 sdev_printk(KERN_INFO, sdev, "%s: not supported\n", 309 ALUA_DH_NAME); 310 break; 311 default: 312 sdev_printk(KERN_INFO, sdev, 313 "%s: unsupported TPGS setting %d\n", 314 ALUA_DH_NAME, tpgs); 315 tpgs = TPGS_MODE_NONE; 316 break; 317 } 318 319 return tpgs; 320 } 321 322 /* 323 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83 324 * @sdev: device to be checked 325 * 326 * Extract the relative target port and the target port group 327 * descriptor from the list of identificators. 328 */ 329 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, 330 int tpgs) 331 { 332 int rel_port = -1, group_id; 333 struct alua_port_group *pg, *old_pg = NULL; 334 bool pg_updated = false; 335 unsigned long flags; 336 337 group_id = scsi_vpd_tpg_id(sdev, &rel_port); 338 if (group_id < 0) { 339 /* 340 * Internal error; TPGS supported but required 341 * VPD identification descriptors not present. 342 * Disable ALUA support 343 */ 344 sdev_printk(KERN_INFO, sdev, 345 "%s: No target port descriptors found\n", 346 ALUA_DH_NAME); 347 return SCSI_DH_DEV_UNSUPP; 348 } 349 350 pg = alua_alloc_pg(sdev, group_id, tpgs); 351 if (IS_ERR(pg)) { 352 if (PTR_ERR(pg) == -ENOMEM) 353 return SCSI_DH_NOMEM; 354 return SCSI_DH_DEV_UNSUPP; 355 } 356 if (pg->device_id_len) 357 sdev_printk(KERN_INFO, sdev, 358 "%s: device %s port group %x rel port %x\n", 359 ALUA_DH_NAME, pg->device_id_str, 360 group_id, rel_port); 361 else 362 sdev_printk(KERN_INFO, sdev, 363 "%s: port group %x rel port %x\n", 364 ALUA_DH_NAME, group_id, rel_port); 365 366 /* Check for existing port group references */ 367 spin_lock(&h->pg_lock); 368 old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 369 if (old_pg != pg) { 370 /* port group has changed. Update to new port group */ 371 if (h->pg) { 372 spin_lock_irqsave(&old_pg->lock, flags); 373 list_del_rcu(&h->node); 374 spin_unlock_irqrestore(&old_pg->lock, flags); 375 } 376 rcu_assign_pointer(h->pg, pg); 377 pg_updated = true; 378 } 379 380 spin_lock_irqsave(&pg->lock, flags); 381 if (pg_updated) 382 list_add_rcu(&h->node, &pg->dh_list); 383 spin_unlock_irqrestore(&pg->lock, flags); 384 385 alua_rtpg_queue(rcu_dereference_protected(h->pg, 386 lockdep_is_held(&h->pg_lock)), 387 sdev, NULL, true); 388 spin_unlock(&h->pg_lock); 389 390 if (old_pg) 391 kref_put(&old_pg->kref, release_port_group); 392 393 return SCSI_DH_OK; 394 } 395 396 static char print_alua_state(unsigned char state) 397 { 398 switch (state) { 399 case SCSI_ACCESS_STATE_OPTIMAL: 400 return 'A'; 401 case SCSI_ACCESS_STATE_ACTIVE: 402 return 'N'; 403 case SCSI_ACCESS_STATE_STANDBY: 404 return 'S'; 405 case SCSI_ACCESS_STATE_UNAVAILABLE: 406 return 'U'; 407 case SCSI_ACCESS_STATE_LBA: 408 return 'L'; 409 case SCSI_ACCESS_STATE_OFFLINE: 410 return 'O'; 411 case SCSI_ACCESS_STATE_TRANSITIONING: 412 return 'T'; 413 default: 414 return 'X'; 415 } 416 } 417 418 static int alua_check_sense(struct scsi_device *sdev, 419 struct scsi_sense_hdr *sense_hdr) 420 { 421 switch (sense_hdr->sense_key) { 422 case NOT_READY: 423 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { 424 /* 425 * LUN Not Accessible - ALUA state transition 426 */ 427 alua_check(sdev, false); 428 return NEEDS_RETRY; 429 } 430 break; 431 case UNIT_ATTENTION: 432 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { 433 /* 434 * Power On, Reset, or Bus Device Reset. 435 * Might have obscured a state transition, 436 * so schedule a recheck. 437 */ 438 alua_check(sdev, true); 439 return ADD_TO_MLQUEUE; 440 } 441 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04) 442 /* 443 * Device internal reset 444 */ 445 return ADD_TO_MLQUEUE; 446 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01) 447 /* 448 * Mode Parameters Changed 449 */ 450 return ADD_TO_MLQUEUE; 451 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) { 452 /* 453 * ALUA state changed 454 */ 455 alua_check(sdev, true); 456 return ADD_TO_MLQUEUE; 457 } 458 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) { 459 /* 460 * Implicit ALUA state transition failed 461 */ 462 alua_check(sdev, true); 463 return ADD_TO_MLQUEUE; 464 } 465 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03) 466 /* 467 * Inquiry data has changed 468 */ 469 return ADD_TO_MLQUEUE; 470 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e) 471 /* 472 * REPORTED_LUNS_DATA_HAS_CHANGED is reported 473 * when switching controllers on targets like 474 * Intel Multi-Flex. We can just retry. 475 */ 476 return ADD_TO_MLQUEUE; 477 break; 478 } 479 480 return SCSI_RETURN_NOT_HANDLED; 481 } 482 483 /* 484 * alua_tur - Send a TEST UNIT READY 485 * @sdev: device to which the TEST UNIT READY command should be send 486 * 487 * Send a TEST UNIT READY to @sdev to figure out the device state 488 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING, 489 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise. 490 */ 491 static int alua_tur(struct scsi_device *sdev) 492 { 493 struct scsi_sense_hdr sense_hdr; 494 int retval; 495 496 retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, 497 ALUA_FAILOVER_RETRIES, &sense_hdr); 498 if (sense_hdr.sense_key == NOT_READY && 499 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 500 return SCSI_DH_RETRY; 501 else if (retval) 502 return SCSI_DH_IO; 503 else 504 return SCSI_DH_OK; 505 } 506 507 /* 508 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES 509 * @sdev: the device to be evaluated. 510 * 511 * Evaluate the Target Port Group State. 512 * Returns SCSI_DH_DEV_OFFLINED if the path is 513 * found to be unusable. 514 */ 515 static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) 516 { 517 struct scsi_sense_hdr sense_hdr; 518 struct alua_port_group *tmp_pg; 519 int len, k, off, valid_states = 0, bufflen = ALUA_RTPG_SIZE; 520 unsigned char *desc, *buff; 521 unsigned err, retval; 522 unsigned int tpg_desc_tbl_off; 523 unsigned char orig_transition_tmo; 524 unsigned long flags; 525 526 if (!pg->expiry) { 527 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; 528 529 if (pg->transition_tmo) 530 transition_tmo = pg->transition_tmo * HZ; 531 532 pg->expiry = round_jiffies_up(jiffies + transition_tmo); 533 } 534 535 buff = kzalloc(bufflen, GFP_KERNEL); 536 if (!buff) 537 return SCSI_DH_DEV_TEMP_BUSY; 538 539 retry: 540 err = 0; 541 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags); 542 543 if (retval) { 544 if (!scsi_sense_valid(&sense_hdr)) { 545 sdev_printk(KERN_INFO, sdev, 546 "%s: rtpg failed, result %d\n", 547 ALUA_DH_NAME, retval); 548 kfree(buff); 549 if (driver_byte(retval) == DRIVER_ERROR) 550 return SCSI_DH_DEV_TEMP_BUSY; 551 return SCSI_DH_IO; 552 } 553 554 /* 555 * submit_rtpg() has failed on existing arrays 556 * when requesting extended header info, and 557 * the array doesn't support extended headers, 558 * even though it shouldn't according to T10. 559 * The retry without rtpg_ext_hdr_req set 560 * handles this. 561 */ 562 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) && 563 sense_hdr.sense_key == ILLEGAL_REQUEST && 564 sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) { 565 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP; 566 goto retry; 567 } 568 /* 569 * Retry on ALUA state transition or if any 570 * UNIT ATTENTION occurred. 571 */ 572 if (sense_hdr.sense_key == NOT_READY && 573 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 574 err = SCSI_DH_RETRY; 575 else if (sense_hdr.sense_key == UNIT_ATTENTION) 576 err = SCSI_DH_RETRY; 577 if (err == SCSI_DH_RETRY && 578 pg->expiry != 0 && time_before(jiffies, pg->expiry)) { 579 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n", 580 ALUA_DH_NAME); 581 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 582 kfree(buff); 583 return err; 584 } 585 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n", 586 ALUA_DH_NAME); 587 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 588 kfree(buff); 589 pg->expiry = 0; 590 return SCSI_DH_IO; 591 } 592 593 len = get_unaligned_be32(&buff[0]) + 4; 594 595 if (len > bufflen) { 596 /* Resubmit with the correct length */ 597 kfree(buff); 598 bufflen = len; 599 buff = kmalloc(bufflen, GFP_KERNEL); 600 if (!buff) { 601 sdev_printk(KERN_WARNING, sdev, 602 "%s: kmalloc buffer failed\n",__func__); 603 /* Temporary failure, bypass */ 604 pg->expiry = 0; 605 return SCSI_DH_DEV_TEMP_BUSY; 606 } 607 goto retry; 608 } 609 610 orig_transition_tmo = pg->transition_tmo; 611 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0) 612 pg->transition_tmo = buff[5]; 613 else 614 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT; 615 616 if (orig_transition_tmo != pg->transition_tmo) { 617 sdev_printk(KERN_INFO, sdev, 618 "%s: transition timeout set to %d seconds\n", 619 ALUA_DH_NAME, pg->transition_tmo); 620 pg->expiry = jiffies + pg->transition_tmo * HZ; 621 } 622 623 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) 624 tpg_desc_tbl_off = 8; 625 else 626 tpg_desc_tbl_off = 4; 627 628 for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off; 629 k < len; 630 k += off, desc += off) { 631 u16 group_id = get_unaligned_be16(&desc[2]); 632 633 spin_lock_irqsave(&port_group_lock, flags); 634 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 635 group_id); 636 spin_unlock_irqrestore(&port_group_lock, flags); 637 if (tmp_pg) { 638 if (spin_trylock_irqsave(&tmp_pg->lock, flags)) { 639 if ((tmp_pg == pg) || 640 !(tmp_pg->flags & ALUA_PG_RUNNING)) { 641 struct alua_dh_data *h; 642 643 tmp_pg->state = desc[0] & 0x0f; 644 tmp_pg->pref = desc[0] >> 7; 645 rcu_read_lock(); 646 list_for_each_entry_rcu(h, 647 &tmp_pg->dh_list, node) { 648 /* h->sdev should always be valid */ 649 BUG_ON(!h->sdev); 650 h->sdev->access_state = desc[0]; 651 } 652 rcu_read_unlock(); 653 } 654 if (tmp_pg == pg) 655 valid_states = desc[1]; 656 spin_unlock_irqrestore(&tmp_pg->lock, flags); 657 } 658 kref_put(&tmp_pg->kref, release_port_group); 659 } 660 off = 8 + (desc[7] * 4); 661 } 662 663 spin_lock_irqsave(&pg->lock, flags); 664 sdev_printk(KERN_INFO, sdev, 665 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", 666 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), 667 pg->pref ? "preferred" : "non-preferred", 668 valid_states&TPGS_SUPPORT_TRANSITION?'T':'t', 669 valid_states&TPGS_SUPPORT_OFFLINE?'O':'o', 670 valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l', 671 valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u', 672 valid_states&TPGS_SUPPORT_STANDBY?'S':'s', 673 valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n', 674 valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a'); 675 676 switch (pg->state) { 677 case SCSI_ACCESS_STATE_TRANSITIONING: 678 if (time_before(jiffies, pg->expiry)) { 679 /* State transition, retry */ 680 pg->interval = 2; 681 err = SCSI_DH_RETRY; 682 } else { 683 struct alua_dh_data *h; 684 685 /* Transitioning time exceeded, set port to standby */ 686 err = SCSI_DH_IO; 687 pg->state = SCSI_ACCESS_STATE_STANDBY; 688 pg->expiry = 0; 689 rcu_read_lock(); 690 list_for_each_entry_rcu(h, &pg->dh_list, node) { 691 BUG_ON(!h->sdev); 692 h->sdev->access_state = 693 (pg->state & SCSI_ACCESS_STATE_MASK); 694 if (pg->pref) 695 h->sdev->access_state |= 696 SCSI_ACCESS_STATE_PREFERRED; 697 } 698 rcu_read_unlock(); 699 } 700 break; 701 case SCSI_ACCESS_STATE_OFFLINE: 702 /* Path unusable */ 703 err = SCSI_DH_DEV_OFFLINED; 704 pg->expiry = 0; 705 break; 706 default: 707 /* Useable path if active */ 708 err = SCSI_DH_OK; 709 pg->expiry = 0; 710 break; 711 } 712 spin_unlock_irqrestore(&pg->lock, flags); 713 kfree(buff); 714 return err; 715 } 716 717 /* 718 * alua_stpg - Issue a SET TARGET PORT GROUP command 719 * 720 * Issue a SET TARGET PORT GROUP command and evaluate the 721 * response. Returns SCSI_DH_RETRY per default to trigger 722 * a re-evaluation of the target group state or SCSI_DH_OK 723 * if no further action needs to be taken. 724 */ 725 static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) 726 { 727 int retval; 728 struct scsi_sense_hdr sense_hdr; 729 730 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) { 731 /* Only implicit ALUA supported, retry */ 732 return SCSI_DH_RETRY; 733 } 734 switch (pg->state) { 735 case SCSI_ACCESS_STATE_OPTIMAL: 736 return SCSI_DH_OK; 737 case SCSI_ACCESS_STATE_ACTIVE: 738 if ((pg->flags & ALUA_OPTIMIZE_STPG) && 739 !pg->pref && 740 (pg->tpgs & TPGS_MODE_IMPLICIT)) 741 return SCSI_DH_OK; 742 break; 743 case SCSI_ACCESS_STATE_STANDBY: 744 case SCSI_ACCESS_STATE_UNAVAILABLE: 745 break; 746 case SCSI_ACCESS_STATE_OFFLINE: 747 return SCSI_DH_IO; 748 case SCSI_ACCESS_STATE_TRANSITIONING: 749 break; 750 default: 751 sdev_printk(KERN_INFO, sdev, 752 "%s: stpg failed, unhandled TPGS state %d", 753 ALUA_DH_NAME, pg->state); 754 return SCSI_DH_NOSYS; 755 } 756 retval = submit_stpg(sdev, pg->group_id, &sense_hdr); 757 758 if (retval) { 759 if (!scsi_sense_valid(&sense_hdr)) { 760 sdev_printk(KERN_INFO, sdev, 761 "%s: stpg failed, result %d", 762 ALUA_DH_NAME, retval); 763 if (driver_byte(retval) == DRIVER_ERROR) 764 return SCSI_DH_DEV_TEMP_BUSY; 765 } else { 766 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n", 767 ALUA_DH_NAME); 768 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 769 } 770 } 771 /* Retry RTPG */ 772 return SCSI_DH_RETRY; 773 } 774 775 static void alua_rtpg_work(struct work_struct *work) 776 { 777 struct alua_port_group *pg = 778 container_of(work, struct alua_port_group, rtpg_work.work); 779 struct scsi_device *sdev; 780 LIST_HEAD(qdata_list); 781 int err = SCSI_DH_OK; 782 struct alua_queue_data *qdata, *tmp; 783 unsigned long flags; 784 785 spin_lock_irqsave(&pg->lock, flags); 786 sdev = pg->rtpg_sdev; 787 if (!sdev) { 788 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); 789 WARN_ON(pg->flags & ALUA_PG_RUN_STPG); 790 spin_unlock_irqrestore(&pg->lock, flags); 791 kref_put(&pg->kref, release_port_group); 792 return; 793 } 794 pg->flags |= ALUA_PG_RUNNING; 795 if (pg->flags & ALUA_PG_RUN_RTPG) { 796 int state = pg->state; 797 798 pg->flags &= ~ALUA_PG_RUN_RTPG; 799 spin_unlock_irqrestore(&pg->lock, flags); 800 if (state == SCSI_ACCESS_STATE_TRANSITIONING) { 801 if (alua_tur(sdev) == SCSI_DH_RETRY) { 802 spin_lock_irqsave(&pg->lock, flags); 803 pg->flags &= ~ALUA_PG_RUNNING; 804 pg->flags |= ALUA_PG_RUN_RTPG; 805 spin_unlock_irqrestore(&pg->lock, flags); 806 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 807 pg->interval * HZ); 808 return; 809 } 810 /* Send RTPG on failure or if TUR indicates SUCCESS */ 811 } 812 err = alua_rtpg(sdev, pg); 813 spin_lock_irqsave(&pg->lock, flags); 814 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 815 pg->flags &= ~ALUA_PG_RUNNING; 816 pg->flags |= ALUA_PG_RUN_RTPG; 817 spin_unlock_irqrestore(&pg->lock, flags); 818 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 819 pg->interval * HZ); 820 return; 821 } 822 if (err != SCSI_DH_OK) 823 pg->flags &= ~ALUA_PG_RUN_STPG; 824 } 825 if (pg->flags & ALUA_PG_RUN_STPG) { 826 pg->flags &= ~ALUA_PG_RUN_STPG; 827 spin_unlock_irqrestore(&pg->lock, flags); 828 err = alua_stpg(sdev, pg); 829 spin_lock_irqsave(&pg->lock, flags); 830 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 831 pg->flags |= ALUA_PG_RUN_RTPG; 832 pg->interval = 0; 833 pg->flags &= ~ALUA_PG_RUNNING; 834 spin_unlock_irqrestore(&pg->lock, flags); 835 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 836 pg->interval * HZ); 837 return; 838 } 839 } 840 841 list_splice_init(&pg->rtpg_list, &qdata_list); 842 pg->rtpg_sdev = NULL; 843 spin_unlock_irqrestore(&pg->lock, flags); 844 845 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { 846 list_del(&qdata->entry); 847 if (qdata->callback_fn) 848 qdata->callback_fn(qdata->callback_data, err); 849 kfree(qdata); 850 } 851 spin_lock_irqsave(&pg->lock, flags); 852 pg->flags &= ~ALUA_PG_RUNNING; 853 spin_unlock_irqrestore(&pg->lock, flags); 854 scsi_device_put(sdev); 855 kref_put(&pg->kref, release_port_group); 856 } 857 858 /** 859 * alua_rtpg_queue() - cause RTPG to be submitted asynchronously 860 * 861 * Returns true if and only if alua_rtpg_work() will be called asynchronously. 862 * That function is responsible for calling @qdata->fn(). 863 */ 864 static bool alua_rtpg_queue(struct alua_port_group *pg, 865 struct scsi_device *sdev, 866 struct alua_queue_data *qdata, bool force) 867 { 868 int start_queue = 0; 869 unsigned long flags; 870 if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev)) 871 return false; 872 873 spin_lock_irqsave(&pg->lock, flags); 874 if (qdata) { 875 list_add_tail(&qdata->entry, &pg->rtpg_list); 876 pg->flags |= ALUA_PG_RUN_STPG; 877 force = true; 878 } 879 if (pg->rtpg_sdev == NULL) { 880 pg->interval = 0; 881 pg->flags |= ALUA_PG_RUN_RTPG; 882 kref_get(&pg->kref); 883 pg->rtpg_sdev = sdev; 884 start_queue = 1; 885 } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { 886 pg->flags |= ALUA_PG_RUN_RTPG; 887 /* Do not queue if the worker is already running */ 888 if (!(pg->flags & ALUA_PG_RUNNING)) { 889 kref_get(&pg->kref); 890 start_queue = 1; 891 } 892 } 893 894 spin_unlock_irqrestore(&pg->lock, flags); 895 896 if (start_queue) { 897 if (queue_delayed_work(kaluad_wq, &pg->rtpg_work, 898 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) 899 sdev = NULL; 900 else 901 kref_put(&pg->kref, release_port_group); 902 } 903 if (sdev) 904 scsi_device_put(sdev); 905 906 return true; 907 } 908 909 /* 910 * alua_initialize - Initialize ALUA state 911 * @sdev: the device to be initialized 912 * 913 * For the prep_fn to work correctly we have 914 * to initialize the ALUA state for the device. 915 */ 916 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h) 917 { 918 int err = SCSI_DH_DEV_UNSUPP, tpgs; 919 920 mutex_lock(&h->init_mutex); 921 tpgs = alua_check_tpgs(sdev); 922 if (tpgs != TPGS_MODE_NONE) 923 err = alua_check_vpd(sdev, h, tpgs); 924 h->init_error = err; 925 mutex_unlock(&h->init_mutex); 926 return err; 927 } 928 /* 929 * alua_set_params - set/unset the optimize flag 930 * @sdev: device on the path to be activated 931 * params - parameters in the following format 932 * "no_of_params\0param1\0param2\0param3\0...\0" 933 * For example, to set the flag pass the following parameters 934 * from multipath.conf 935 * hardware_handler "2 alua 1" 936 */ 937 static int alua_set_params(struct scsi_device *sdev, const char *params) 938 { 939 struct alua_dh_data *h = sdev->handler_data; 940 struct alua_port_group *pg = NULL; 941 unsigned int optimize = 0, argc; 942 const char *p = params; 943 int result = SCSI_DH_OK; 944 unsigned long flags; 945 946 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1)) 947 return -EINVAL; 948 949 while (*p++) 950 ; 951 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1)) 952 return -EINVAL; 953 954 rcu_read_lock(); 955 pg = rcu_dereference(h->pg); 956 if (!pg) { 957 rcu_read_unlock(); 958 return -ENXIO; 959 } 960 spin_lock_irqsave(&pg->lock, flags); 961 if (optimize) 962 pg->flags |= ALUA_OPTIMIZE_STPG; 963 else 964 pg->flags &= ~ALUA_OPTIMIZE_STPG; 965 spin_unlock_irqrestore(&pg->lock, flags); 966 rcu_read_unlock(); 967 968 return result; 969 } 970 971 /* 972 * alua_activate - activate a path 973 * @sdev: device on the path to be activated 974 * 975 * We're currently switching the port group to be activated only and 976 * let the array figure out the rest. 977 * There may be other arrays which require us to switch all port groups 978 * based on a certain policy. But until we actually encounter them it 979 * should be okay. 980 */ 981 static int alua_activate(struct scsi_device *sdev, 982 activate_complete fn, void *data) 983 { 984 struct alua_dh_data *h = sdev->handler_data; 985 int err = SCSI_DH_OK; 986 struct alua_queue_data *qdata; 987 struct alua_port_group *pg; 988 989 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL); 990 if (!qdata) { 991 err = SCSI_DH_RES_TEMP_UNAVAIL; 992 goto out; 993 } 994 qdata->callback_fn = fn; 995 qdata->callback_data = data; 996 997 mutex_lock(&h->init_mutex); 998 rcu_read_lock(); 999 pg = rcu_dereference(h->pg); 1000 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1001 rcu_read_unlock(); 1002 kfree(qdata); 1003 err = h->init_error; 1004 mutex_unlock(&h->init_mutex); 1005 goto out; 1006 } 1007 rcu_read_unlock(); 1008 mutex_unlock(&h->init_mutex); 1009 1010 if (alua_rtpg_queue(pg, sdev, qdata, true)) 1011 fn = NULL; 1012 else 1013 err = SCSI_DH_DEV_OFFLINED; 1014 kref_put(&pg->kref, release_port_group); 1015 out: 1016 if (fn) 1017 fn(data, err); 1018 return 0; 1019 } 1020 1021 /* 1022 * alua_check - check path status 1023 * @sdev: device on the path to be checked 1024 * 1025 * Check the device status 1026 */ 1027 static void alua_check(struct scsi_device *sdev, bool force) 1028 { 1029 struct alua_dh_data *h = sdev->handler_data; 1030 struct alua_port_group *pg; 1031 1032 rcu_read_lock(); 1033 pg = rcu_dereference(h->pg); 1034 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1035 rcu_read_unlock(); 1036 return; 1037 } 1038 rcu_read_unlock(); 1039 1040 alua_rtpg_queue(pg, sdev, NULL, force); 1041 kref_put(&pg->kref, release_port_group); 1042 } 1043 1044 /* 1045 * alua_prep_fn - request callback 1046 * 1047 * Fail I/O to all paths not in state 1048 * active/optimized or active/non-optimized. 1049 */ 1050 static int alua_prep_fn(struct scsi_device *sdev, struct request *req) 1051 { 1052 struct alua_dh_data *h = sdev->handler_data; 1053 struct alua_port_group *pg; 1054 unsigned char state = SCSI_ACCESS_STATE_OPTIMAL; 1055 int ret = BLKPREP_OK; 1056 1057 rcu_read_lock(); 1058 pg = rcu_dereference(h->pg); 1059 if (pg) 1060 state = pg->state; 1061 rcu_read_unlock(); 1062 if (state == SCSI_ACCESS_STATE_TRANSITIONING) 1063 ret = BLKPREP_DEFER; 1064 else if (state != SCSI_ACCESS_STATE_OPTIMAL && 1065 state != SCSI_ACCESS_STATE_ACTIVE && 1066 state != SCSI_ACCESS_STATE_LBA) { 1067 ret = BLKPREP_KILL; 1068 req->rq_flags |= RQF_QUIET; 1069 } 1070 return ret; 1071 1072 } 1073 1074 static void alua_rescan(struct scsi_device *sdev) 1075 { 1076 struct alua_dh_data *h = sdev->handler_data; 1077 1078 alua_initialize(sdev, h); 1079 } 1080 1081 /* 1082 * alua_bus_attach - Attach device handler 1083 * @sdev: device to be attached to 1084 */ 1085 static int alua_bus_attach(struct scsi_device *sdev) 1086 { 1087 struct alua_dh_data *h; 1088 int err, ret = -EINVAL; 1089 1090 h = kzalloc(sizeof(*h) , GFP_KERNEL); 1091 if (!h) 1092 return -ENOMEM; 1093 spin_lock_init(&h->pg_lock); 1094 rcu_assign_pointer(h->pg, NULL); 1095 h->init_error = SCSI_DH_OK; 1096 h->sdev = sdev; 1097 INIT_LIST_HEAD(&h->node); 1098 1099 mutex_init(&h->init_mutex); 1100 err = alua_initialize(sdev, h); 1101 if (err == SCSI_DH_NOMEM) 1102 ret = -ENOMEM; 1103 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) 1104 goto failed; 1105 1106 sdev->handler_data = h; 1107 return 0; 1108 failed: 1109 kfree(h); 1110 return ret; 1111 } 1112 1113 /* 1114 * alua_bus_detach - Detach device handler 1115 * @sdev: device to be detached from 1116 */ 1117 static void alua_bus_detach(struct scsi_device *sdev) 1118 { 1119 struct alua_dh_data *h = sdev->handler_data; 1120 struct alua_port_group *pg; 1121 1122 spin_lock(&h->pg_lock); 1123 pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 1124 rcu_assign_pointer(h->pg, NULL); 1125 h->sdev = NULL; 1126 spin_unlock(&h->pg_lock); 1127 if (pg) { 1128 spin_lock_irq(&pg->lock); 1129 list_del_rcu(&h->node); 1130 spin_unlock_irq(&pg->lock); 1131 kref_put(&pg->kref, release_port_group); 1132 } 1133 sdev->handler_data = NULL; 1134 kfree(h); 1135 } 1136 1137 static struct scsi_device_handler alua_dh = { 1138 .name = ALUA_DH_NAME, 1139 .module = THIS_MODULE, 1140 .attach = alua_bus_attach, 1141 .detach = alua_bus_detach, 1142 .prep_fn = alua_prep_fn, 1143 .check_sense = alua_check_sense, 1144 .activate = alua_activate, 1145 .rescan = alua_rescan, 1146 .set_params = alua_set_params, 1147 }; 1148 1149 static int __init alua_init(void) 1150 { 1151 int r; 1152 1153 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0); 1154 if (!kaluad_wq) { 1155 /* Temporary failure, bypass */ 1156 return SCSI_DH_DEV_TEMP_BUSY; 1157 } 1158 1159 r = scsi_register_device_handler(&alua_dh); 1160 if (r != 0) { 1161 printk(KERN_ERR "%s: Failed to register scsi device handler", 1162 ALUA_DH_NAME); 1163 destroy_workqueue(kaluad_wq); 1164 } 1165 return r; 1166 } 1167 1168 static void __exit alua_exit(void) 1169 { 1170 scsi_unregister_device_handler(&alua_dh); 1171 destroy_workqueue(kaluad_wq); 1172 } 1173 1174 module_init(alua_init); 1175 module_exit(alua_exit); 1176 1177 MODULE_DESCRIPTION("DM Multipath ALUA support"); 1178 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>"); 1179 MODULE_LICENSE("GPL"); 1180 MODULE_VERSION(ALUA_DH_VER); 1181