1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Generic SCSI-3 ALUA SCSI Device Handler 4 * 5 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH. 6 * All rights reserved. 7 */ 8 #include <linux/slab.h> 9 #include <linux/delay.h> 10 #include <linux/module.h> 11 #include <asm/unaligned.h> 12 #include <scsi/scsi.h> 13 #include <scsi/scsi_proto.h> 14 #include <scsi/scsi_dbg.h> 15 #include <scsi/scsi_eh.h> 16 #include <scsi/scsi_dh.h> 17 18 #define ALUA_DH_NAME "alua" 19 #define ALUA_DH_VER "2.0" 20 21 #define TPGS_SUPPORT_NONE 0x00 22 #define TPGS_SUPPORT_OPTIMIZED 0x01 23 #define TPGS_SUPPORT_NONOPTIMIZED 0x02 24 #define TPGS_SUPPORT_STANDBY 0x04 25 #define TPGS_SUPPORT_UNAVAILABLE 0x08 26 #define TPGS_SUPPORT_LBA_DEPENDENT 0x10 27 #define TPGS_SUPPORT_OFFLINE 0x40 28 #define TPGS_SUPPORT_TRANSITION 0x80 29 #define TPGS_SUPPORT_ALL 0xdf 30 31 #define RTPG_FMT_MASK 0x70 32 #define RTPG_FMT_EXT_HDR 0x10 33 34 #define TPGS_MODE_UNINITIALIZED -1 35 #define TPGS_MODE_NONE 0x0 36 #define TPGS_MODE_IMPLICIT 0x1 37 #define TPGS_MODE_EXPLICIT 0x2 38 39 #define ALUA_RTPG_SIZE 128 40 #define ALUA_FAILOVER_TIMEOUT 60 41 #define ALUA_FAILOVER_RETRIES 5 42 #define ALUA_RTPG_DELAY_MSECS 5 43 #define ALUA_RTPG_RETRY_DELAY 2 44 45 /* device handler flags */ 46 #define ALUA_OPTIMIZE_STPG 0x01 47 #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02 48 /* State machine flags */ 49 #define ALUA_PG_RUN_RTPG 0x10 50 #define ALUA_PG_RUN_STPG 0x20 51 #define ALUA_PG_RUNNING 0x40 52 53 static uint optimize_stpg; 54 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); 55 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); 56 57 static LIST_HEAD(port_group_list); 58 static DEFINE_SPINLOCK(port_group_lock); 59 static struct workqueue_struct *kaluad_wq; 60 61 struct alua_port_group { 62 struct kref kref; 63 struct rcu_head rcu; 64 struct list_head node; 65 struct list_head dh_list; 66 unsigned char device_id_str[256]; 67 int device_id_len; 68 int group_id; 69 int tpgs; 70 int state; 71 int pref; 72 int valid_states; 73 unsigned flags; /* used for optimizing STPG */ 74 unsigned char transition_tmo; 75 unsigned long expiry; 76 unsigned long interval; 77 struct delayed_work rtpg_work; 78 spinlock_t lock; 79 struct list_head rtpg_list; 80 struct scsi_device *rtpg_sdev; 81 }; 82 83 struct alua_dh_data { 84 struct list_head node; 85 struct alua_port_group __rcu *pg; 86 int group_id; 87 spinlock_t pg_lock; 88 struct scsi_device *sdev; 89 int init_error; 90 struct mutex init_mutex; 91 }; 92 93 struct alua_queue_data { 94 struct list_head entry; 95 activate_complete callback_fn; 96 void *callback_data; 97 }; 98 99 #define ALUA_POLICY_SWITCH_CURRENT 0 100 #define ALUA_POLICY_SWITCH_ALL 1 101 102 static void alua_rtpg_work(struct work_struct *work); 103 static bool alua_rtpg_queue(struct alua_port_group *pg, 104 struct scsi_device *sdev, 105 struct alua_queue_data *qdata, bool force); 106 static void alua_check(struct scsi_device *sdev, bool force); 107 108 static void release_port_group(struct kref *kref) 109 { 110 struct alua_port_group *pg; 111 112 pg = container_of(kref, struct alua_port_group, kref); 113 if (pg->rtpg_sdev) 114 flush_delayed_work(&pg->rtpg_work); 115 spin_lock(&port_group_lock); 116 list_del(&pg->node); 117 spin_unlock(&port_group_lock); 118 kfree_rcu(pg, rcu); 119 } 120 121 /* 122 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command 123 * @sdev: sdev the command should be sent to 124 */ 125 static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, 126 int bufflen, struct scsi_sense_hdr *sshdr, int flags) 127 { 128 u8 cdb[MAX_COMMAND_SIZE]; 129 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 130 REQ_FAILFAST_DRIVER; 131 132 /* Prepare the command. */ 133 memset(cdb, 0x0, MAX_COMMAND_SIZE); 134 cdb[0] = MAINTENANCE_IN; 135 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP)) 136 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; 137 else 138 cdb[1] = MI_REPORT_TARGET_PGS; 139 put_unaligned_be32(bufflen, &cdb[6]); 140 141 return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL, 142 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 143 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL); 144 } 145 146 /* 147 * submit_stpg - Issue a SET TARGET PORT GROUP command 148 * 149 * Currently we're only setting the current target port group state 150 * to 'active/optimized' and let the array firmware figure out 151 * the states of the remaining groups. 152 */ 153 static int submit_stpg(struct scsi_device *sdev, int group_id, 154 struct scsi_sense_hdr *sshdr) 155 { 156 u8 cdb[MAX_COMMAND_SIZE]; 157 unsigned char stpg_data[8]; 158 int stpg_len = 8; 159 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 160 REQ_FAILFAST_DRIVER; 161 162 /* Prepare the data buffer */ 163 memset(stpg_data, 0, stpg_len); 164 stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL; 165 put_unaligned_be16(group_id, &stpg_data[6]); 166 167 /* Prepare the command. */ 168 memset(cdb, 0x0, MAX_COMMAND_SIZE); 169 cdb[0] = MAINTENANCE_OUT; 170 cdb[1] = MO_SET_TARGET_PGS; 171 put_unaligned_be32(stpg_len, &cdb[6]); 172 173 return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL, 174 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 175 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL); 176 } 177 178 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, 179 int group_id) 180 { 181 struct alua_port_group *pg; 182 183 if (!id_str || !id_size || !strlen(id_str)) 184 return NULL; 185 186 list_for_each_entry(pg, &port_group_list, node) { 187 if (pg->group_id != group_id) 188 continue; 189 if (!pg->device_id_len || pg->device_id_len != id_size) 190 continue; 191 if (strncmp(pg->device_id_str, id_str, id_size)) 192 continue; 193 if (!kref_get_unless_zero(&pg->kref)) 194 continue; 195 return pg; 196 } 197 198 return NULL; 199 } 200 201 /* 202 * alua_alloc_pg - Allocate a new port_group structure 203 * @sdev: scsi device 204 * @group_id: port group id 205 * @tpgs: target port group settings 206 * 207 * Allocate a new port_group structure for a given 208 * device. 209 */ 210 static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev, 211 int group_id, int tpgs) 212 { 213 struct alua_port_group *pg, *tmp_pg; 214 215 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL); 216 if (!pg) 217 return ERR_PTR(-ENOMEM); 218 219 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str, 220 sizeof(pg->device_id_str)); 221 if (pg->device_id_len <= 0) { 222 /* 223 * TPGS supported but no device identification found. 224 * Generate private device identification. 225 */ 226 sdev_printk(KERN_INFO, sdev, 227 "%s: No device descriptors found\n", 228 ALUA_DH_NAME); 229 pg->device_id_str[0] = '\0'; 230 pg->device_id_len = 0; 231 } 232 pg->group_id = group_id; 233 pg->tpgs = tpgs; 234 pg->state = SCSI_ACCESS_STATE_OPTIMAL; 235 pg->valid_states = TPGS_SUPPORT_ALL; 236 if (optimize_stpg) 237 pg->flags |= ALUA_OPTIMIZE_STPG; 238 kref_init(&pg->kref); 239 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work); 240 INIT_LIST_HEAD(&pg->rtpg_list); 241 INIT_LIST_HEAD(&pg->node); 242 INIT_LIST_HEAD(&pg->dh_list); 243 spin_lock_init(&pg->lock); 244 245 spin_lock(&port_group_lock); 246 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 247 group_id); 248 if (tmp_pg) { 249 spin_unlock(&port_group_lock); 250 kfree(pg); 251 return tmp_pg; 252 } 253 254 list_add(&pg->node, &port_group_list); 255 spin_unlock(&port_group_lock); 256 257 return pg; 258 } 259 260 /* 261 * alua_check_tpgs - Evaluate TPGS setting 262 * @sdev: device to be checked 263 * 264 * Examine the TPGS setting of the sdev to find out if ALUA 265 * is supported. 266 */ 267 static int alua_check_tpgs(struct scsi_device *sdev) 268 { 269 int tpgs = TPGS_MODE_NONE; 270 271 /* 272 * ALUA support for non-disk devices is fraught with 273 * difficulties, so disable it for now. 274 */ 275 if (sdev->type != TYPE_DISK) { 276 sdev_printk(KERN_INFO, sdev, 277 "%s: disable for non-disk devices\n", 278 ALUA_DH_NAME); 279 return tpgs; 280 } 281 282 tpgs = scsi_device_tpgs(sdev); 283 switch (tpgs) { 284 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT: 285 sdev_printk(KERN_INFO, sdev, 286 "%s: supports implicit and explicit TPGS\n", 287 ALUA_DH_NAME); 288 break; 289 case TPGS_MODE_EXPLICIT: 290 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n", 291 ALUA_DH_NAME); 292 break; 293 case TPGS_MODE_IMPLICIT: 294 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n", 295 ALUA_DH_NAME); 296 break; 297 case TPGS_MODE_NONE: 298 sdev_printk(KERN_INFO, sdev, "%s: not supported\n", 299 ALUA_DH_NAME); 300 break; 301 default: 302 sdev_printk(KERN_INFO, sdev, 303 "%s: unsupported TPGS setting %d\n", 304 ALUA_DH_NAME, tpgs); 305 tpgs = TPGS_MODE_NONE; 306 break; 307 } 308 309 return tpgs; 310 } 311 312 /* 313 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83 314 * @sdev: device to be checked 315 * 316 * Extract the relative target port and the target port group 317 * descriptor from the list of identificators. 318 */ 319 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, 320 int tpgs) 321 { 322 int rel_port = -1, group_id; 323 struct alua_port_group *pg, *old_pg = NULL; 324 bool pg_updated = false; 325 unsigned long flags; 326 327 group_id = scsi_vpd_tpg_id(sdev, &rel_port); 328 if (group_id < 0) { 329 /* 330 * Internal error; TPGS supported but required 331 * VPD identification descriptors not present. 332 * Disable ALUA support 333 */ 334 sdev_printk(KERN_INFO, sdev, 335 "%s: No target port descriptors found\n", 336 ALUA_DH_NAME); 337 return SCSI_DH_DEV_UNSUPP; 338 } 339 340 pg = alua_alloc_pg(sdev, group_id, tpgs); 341 if (IS_ERR(pg)) { 342 if (PTR_ERR(pg) == -ENOMEM) 343 return SCSI_DH_NOMEM; 344 return SCSI_DH_DEV_UNSUPP; 345 } 346 if (pg->device_id_len) 347 sdev_printk(KERN_INFO, sdev, 348 "%s: device %s port group %x rel port %x\n", 349 ALUA_DH_NAME, pg->device_id_str, 350 group_id, rel_port); 351 else 352 sdev_printk(KERN_INFO, sdev, 353 "%s: port group %x rel port %x\n", 354 ALUA_DH_NAME, group_id, rel_port); 355 356 /* Check for existing port group references */ 357 spin_lock(&h->pg_lock); 358 old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 359 if (old_pg != pg) { 360 /* port group has changed. Update to new port group */ 361 if (h->pg) { 362 spin_lock_irqsave(&old_pg->lock, flags); 363 list_del_rcu(&h->node); 364 spin_unlock_irqrestore(&old_pg->lock, flags); 365 } 366 rcu_assign_pointer(h->pg, pg); 367 pg_updated = true; 368 } 369 370 spin_lock_irqsave(&pg->lock, flags); 371 if (pg_updated) 372 list_add_rcu(&h->node, &pg->dh_list); 373 spin_unlock_irqrestore(&pg->lock, flags); 374 375 alua_rtpg_queue(rcu_dereference_protected(h->pg, 376 lockdep_is_held(&h->pg_lock)), 377 sdev, NULL, true); 378 spin_unlock(&h->pg_lock); 379 380 if (old_pg) 381 kref_put(&old_pg->kref, release_port_group); 382 383 return SCSI_DH_OK; 384 } 385 386 static char print_alua_state(unsigned char state) 387 { 388 switch (state) { 389 case SCSI_ACCESS_STATE_OPTIMAL: 390 return 'A'; 391 case SCSI_ACCESS_STATE_ACTIVE: 392 return 'N'; 393 case SCSI_ACCESS_STATE_STANDBY: 394 return 'S'; 395 case SCSI_ACCESS_STATE_UNAVAILABLE: 396 return 'U'; 397 case SCSI_ACCESS_STATE_LBA: 398 return 'L'; 399 case SCSI_ACCESS_STATE_OFFLINE: 400 return 'O'; 401 case SCSI_ACCESS_STATE_TRANSITIONING: 402 return 'T'; 403 default: 404 return 'X'; 405 } 406 } 407 408 static enum scsi_disposition alua_check_sense(struct scsi_device *sdev, 409 struct scsi_sense_hdr *sense_hdr) 410 { 411 struct alua_dh_data *h = sdev->handler_data; 412 struct alua_port_group *pg; 413 414 switch (sense_hdr->sense_key) { 415 case NOT_READY: 416 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { 417 /* 418 * LUN Not Accessible - ALUA state transition 419 */ 420 rcu_read_lock(); 421 pg = rcu_dereference(h->pg); 422 if (pg) 423 pg->state = SCSI_ACCESS_STATE_TRANSITIONING; 424 rcu_read_unlock(); 425 alua_check(sdev, false); 426 return NEEDS_RETRY; 427 } 428 break; 429 case UNIT_ATTENTION: 430 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { 431 /* 432 * Power On, Reset, or Bus Device Reset. 433 * Might have obscured a state transition, 434 * so schedule a recheck. 435 */ 436 alua_check(sdev, true); 437 return ADD_TO_MLQUEUE; 438 } 439 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04) 440 /* 441 * Device internal reset 442 */ 443 return ADD_TO_MLQUEUE; 444 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01) 445 /* 446 * Mode Parameters Changed 447 */ 448 return ADD_TO_MLQUEUE; 449 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) { 450 /* 451 * ALUA state changed 452 */ 453 alua_check(sdev, true); 454 return ADD_TO_MLQUEUE; 455 } 456 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) { 457 /* 458 * Implicit ALUA state transition failed 459 */ 460 alua_check(sdev, true); 461 return ADD_TO_MLQUEUE; 462 } 463 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03) 464 /* 465 * Inquiry data has changed 466 */ 467 return ADD_TO_MLQUEUE; 468 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e) 469 /* 470 * REPORTED_LUNS_DATA_HAS_CHANGED is reported 471 * when switching controllers on targets like 472 * Intel Multi-Flex. We can just retry. 473 */ 474 return ADD_TO_MLQUEUE; 475 break; 476 } 477 478 return SCSI_RETURN_NOT_HANDLED; 479 } 480 481 /* 482 * alua_tur - Send a TEST UNIT READY 483 * @sdev: device to which the TEST UNIT READY command should be send 484 * 485 * Send a TEST UNIT READY to @sdev to figure out the device state 486 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING, 487 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise. 488 */ 489 static int alua_tur(struct scsi_device *sdev) 490 { 491 struct scsi_sense_hdr sense_hdr; 492 int retval; 493 494 retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, 495 ALUA_FAILOVER_RETRIES, &sense_hdr); 496 if (sense_hdr.sense_key == NOT_READY && 497 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 498 return SCSI_DH_RETRY; 499 else if (retval) 500 return SCSI_DH_IO; 501 else 502 return SCSI_DH_OK; 503 } 504 505 /* 506 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES 507 * @sdev: the device to be evaluated. 508 * 509 * Evaluate the Target Port Group State. 510 * Returns SCSI_DH_DEV_OFFLINED if the path is 511 * found to be unusable. 512 */ 513 static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) 514 { 515 struct scsi_sense_hdr sense_hdr; 516 struct alua_port_group *tmp_pg; 517 int len, k, off, bufflen = ALUA_RTPG_SIZE; 518 int group_id_old, state_old, pref_old, valid_states_old; 519 unsigned char *desc, *buff; 520 unsigned err, retval; 521 unsigned int tpg_desc_tbl_off; 522 unsigned char orig_transition_tmo; 523 unsigned long flags; 524 bool transitioning_sense = false; 525 526 group_id_old = pg->group_id; 527 state_old = pg->state; 528 pref_old = pg->pref; 529 valid_states_old = pg->valid_states; 530 531 if (!pg->expiry) { 532 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; 533 534 if (pg->transition_tmo) 535 transition_tmo = pg->transition_tmo * HZ; 536 537 pg->expiry = round_jiffies_up(jiffies + transition_tmo); 538 } 539 540 buff = kzalloc(bufflen, GFP_KERNEL); 541 if (!buff) 542 return SCSI_DH_DEV_TEMP_BUSY; 543 544 retry: 545 err = 0; 546 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags); 547 548 if (retval) { 549 /* 550 * Some (broken) implementations have a habit of returning 551 * an error during things like firmware update etc. 552 * But if the target only supports active/optimized there's 553 * not much we can do; it's not that we can switch paths 554 * or anything. 555 * So ignore any errors to avoid spurious failures during 556 * path failover. 557 */ 558 if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) { 559 sdev_printk(KERN_INFO, sdev, 560 "%s: ignoring rtpg result %d\n", 561 ALUA_DH_NAME, retval); 562 kfree(buff); 563 return SCSI_DH_OK; 564 } 565 if (!scsi_sense_valid(&sense_hdr)) { 566 sdev_printk(KERN_INFO, sdev, 567 "%s: rtpg failed, result %d\n", 568 ALUA_DH_NAME, retval); 569 kfree(buff); 570 if (driver_byte(retval) == DRIVER_ERROR) 571 return SCSI_DH_DEV_TEMP_BUSY; 572 return SCSI_DH_IO; 573 } 574 575 /* 576 * submit_rtpg() has failed on existing arrays 577 * when requesting extended header info, and 578 * the array doesn't support extended headers, 579 * even though it shouldn't according to T10. 580 * The retry without rtpg_ext_hdr_req set 581 * handles this. 582 * Note: some arrays return a sense key of ILLEGAL_REQUEST 583 * with ASC 00h if they don't support the extended header. 584 */ 585 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) && 586 sense_hdr.sense_key == ILLEGAL_REQUEST) { 587 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP; 588 goto retry; 589 } 590 /* 591 * If the array returns with 'ALUA state transition' 592 * sense code here it cannot return RTPG data during 593 * transition. So set the state to 'transitioning' directly. 594 */ 595 if (sense_hdr.sense_key == NOT_READY && 596 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) { 597 transitioning_sense = true; 598 goto skip_rtpg; 599 } 600 /* 601 * Retry on any other UNIT ATTENTION occurred. 602 */ 603 if (sense_hdr.sense_key == UNIT_ATTENTION) 604 err = SCSI_DH_RETRY; 605 if (err == SCSI_DH_RETRY && 606 pg->expiry != 0 && time_before(jiffies, pg->expiry)) { 607 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n", 608 ALUA_DH_NAME); 609 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 610 kfree(buff); 611 return err; 612 } 613 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n", 614 ALUA_DH_NAME); 615 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 616 kfree(buff); 617 pg->expiry = 0; 618 return SCSI_DH_IO; 619 } 620 621 len = get_unaligned_be32(&buff[0]) + 4; 622 623 if (len > bufflen) { 624 /* Resubmit with the correct length */ 625 kfree(buff); 626 bufflen = len; 627 buff = kmalloc(bufflen, GFP_KERNEL); 628 if (!buff) { 629 sdev_printk(KERN_WARNING, sdev, 630 "%s: kmalloc buffer failed\n",__func__); 631 /* Temporary failure, bypass */ 632 pg->expiry = 0; 633 return SCSI_DH_DEV_TEMP_BUSY; 634 } 635 goto retry; 636 } 637 638 orig_transition_tmo = pg->transition_tmo; 639 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0) 640 pg->transition_tmo = buff[5]; 641 else 642 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT; 643 644 if (orig_transition_tmo != pg->transition_tmo) { 645 sdev_printk(KERN_INFO, sdev, 646 "%s: transition timeout set to %d seconds\n", 647 ALUA_DH_NAME, pg->transition_tmo); 648 pg->expiry = jiffies + pg->transition_tmo * HZ; 649 } 650 651 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) 652 tpg_desc_tbl_off = 8; 653 else 654 tpg_desc_tbl_off = 4; 655 656 for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off; 657 k < len; 658 k += off, desc += off) { 659 u16 group_id = get_unaligned_be16(&desc[2]); 660 661 spin_lock_irqsave(&port_group_lock, flags); 662 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 663 group_id); 664 spin_unlock_irqrestore(&port_group_lock, flags); 665 if (tmp_pg) { 666 if (spin_trylock_irqsave(&tmp_pg->lock, flags)) { 667 if ((tmp_pg == pg) || 668 !(tmp_pg->flags & ALUA_PG_RUNNING)) { 669 struct alua_dh_data *h; 670 671 tmp_pg->state = desc[0] & 0x0f; 672 tmp_pg->pref = desc[0] >> 7; 673 rcu_read_lock(); 674 list_for_each_entry_rcu(h, 675 &tmp_pg->dh_list, node) { 676 if (!h->sdev) 677 continue; 678 h->sdev->access_state = desc[0]; 679 } 680 rcu_read_unlock(); 681 } 682 if (tmp_pg == pg) 683 tmp_pg->valid_states = desc[1]; 684 spin_unlock_irqrestore(&tmp_pg->lock, flags); 685 } 686 kref_put(&tmp_pg->kref, release_port_group); 687 } 688 off = 8 + (desc[7] * 4); 689 } 690 691 skip_rtpg: 692 spin_lock_irqsave(&pg->lock, flags); 693 if (transitioning_sense) 694 pg->state = SCSI_ACCESS_STATE_TRANSITIONING; 695 696 if (group_id_old != pg->group_id || state_old != pg->state || 697 pref_old != pg->pref || valid_states_old != pg->valid_states) 698 sdev_printk(KERN_INFO, sdev, 699 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", 700 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), 701 pg->pref ? "preferred" : "non-preferred", 702 pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t', 703 pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o', 704 pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l', 705 pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u', 706 pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s', 707 pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n', 708 pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a'); 709 710 switch (pg->state) { 711 case SCSI_ACCESS_STATE_TRANSITIONING: 712 if (time_before(jiffies, pg->expiry)) { 713 /* State transition, retry */ 714 pg->interval = ALUA_RTPG_RETRY_DELAY; 715 err = SCSI_DH_RETRY; 716 } else { 717 struct alua_dh_data *h; 718 719 /* Transitioning time exceeded, set port to standby */ 720 err = SCSI_DH_IO; 721 pg->state = SCSI_ACCESS_STATE_STANDBY; 722 pg->expiry = 0; 723 rcu_read_lock(); 724 list_for_each_entry_rcu(h, &pg->dh_list, node) { 725 if (!h->sdev) 726 continue; 727 h->sdev->access_state = 728 (pg->state & SCSI_ACCESS_STATE_MASK); 729 if (pg->pref) 730 h->sdev->access_state |= 731 SCSI_ACCESS_STATE_PREFERRED; 732 } 733 rcu_read_unlock(); 734 } 735 break; 736 case SCSI_ACCESS_STATE_OFFLINE: 737 /* Path unusable */ 738 err = SCSI_DH_DEV_OFFLINED; 739 pg->expiry = 0; 740 break; 741 default: 742 /* Useable path if active */ 743 err = SCSI_DH_OK; 744 pg->expiry = 0; 745 break; 746 } 747 spin_unlock_irqrestore(&pg->lock, flags); 748 kfree(buff); 749 return err; 750 } 751 752 /* 753 * alua_stpg - Issue a SET TARGET PORT GROUP command 754 * 755 * Issue a SET TARGET PORT GROUP command and evaluate the 756 * response. Returns SCSI_DH_RETRY per default to trigger 757 * a re-evaluation of the target group state or SCSI_DH_OK 758 * if no further action needs to be taken. 759 */ 760 static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) 761 { 762 int retval; 763 struct scsi_sense_hdr sense_hdr; 764 765 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) { 766 /* Only implicit ALUA supported, retry */ 767 return SCSI_DH_RETRY; 768 } 769 switch (pg->state) { 770 case SCSI_ACCESS_STATE_OPTIMAL: 771 return SCSI_DH_OK; 772 case SCSI_ACCESS_STATE_ACTIVE: 773 if ((pg->flags & ALUA_OPTIMIZE_STPG) && 774 !pg->pref && 775 (pg->tpgs & TPGS_MODE_IMPLICIT)) 776 return SCSI_DH_OK; 777 break; 778 case SCSI_ACCESS_STATE_STANDBY: 779 case SCSI_ACCESS_STATE_UNAVAILABLE: 780 break; 781 case SCSI_ACCESS_STATE_OFFLINE: 782 return SCSI_DH_IO; 783 case SCSI_ACCESS_STATE_TRANSITIONING: 784 break; 785 default: 786 sdev_printk(KERN_INFO, sdev, 787 "%s: stpg failed, unhandled TPGS state %d", 788 ALUA_DH_NAME, pg->state); 789 return SCSI_DH_NOSYS; 790 } 791 retval = submit_stpg(sdev, pg->group_id, &sense_hdr); 792 793 if (retval) { 794 if (!scsi_sense_valid(&sense_hdr)) { 795 sdev_printk(KERN_INFO, sdev, 796 "%s: stpg failed, result %d", 797 ALUA_DH_NAME, retval); 798 if (driver_byte(retval) == DRIVER_ERROR) 799 return SCSI_DH_DEV_TEMP_BUSY; 800 } else { 801 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n", 802 ALUA_DH_NAME); 803 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 804 } 805 } 806 /* Retry RTPG */ 807 return SCSI_DH_RETRY; 808 } 809 810 static void alua_rtpg_work(struct work_struct *work) 811 { 812 struct alua_port_group *pg = 813 container_of(work, struct alua_port_group, rtpg_work.work); 814 struct scsi_device *sdev; 815 LIST_HEAD(qdata_list); 816 int err = SCSI_DH_OK; 817 struct alua_queue_data *qdata, *tmp; 818 unsigned long flags; 819 820 spin_lock_irqsave(&pg->lock, flags); 821 sdev = pg->rtpg_sdev; 822 if (!sdev) { 823 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); 824 WARN_ON(pg->flags & ALUA_PG_RUN_STPG); 825 spin_unlock_irqrestore(&pg->lock, flags); 826 kref_put(&pg->kref, release_port_group); 827 return; 828 } 829 pg->flags |= ALUA_PG_RUNNING; 830 if (pg->flags & ALUA_PG_RUN_RTPG) { 831 int state = pg->state; 832 833 pg->flags &= ~ALUA_PG_RUN_RTPG; 834 spin_unlock_irqrestore(&pg->lock, flags); 835 if (state == SCSI_ACCESS_STATE_TRANSITIONING) { 836 if (alua_tur(sdev) == SCSI_DH_RETRY) { 837 spin_lock_irqsave(&pg->lock, flags); 838 pg->flags &= ~ALUA_PG_RUNNING; 839 pg->flags |= ALUA_PG_RUN_RTPG; 840 if (!pg->interval) 841 pg->interval = ALUA_RTPG_RETRY_DELAY; 842 spin_unlock_irqrestore(&pg->lock, flags); 843 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 844 pg->interval * HZ); 845 return; 846 } 847 /* Send RTPG on failure or if TUR indicates SUCCESS */ 848 } 849 err = alua_rtpg(sdev, pg); 850 spin_lock_irqsave(&pg->lock, flags); 851 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 852 pg->flags &= ~ALUA_PG_RUNNING; 853 if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG)) 854 pg->interval = ALUA_RTPG_RETRY_DELAY; 855 pg->flags |= ALUA_PG_RUN_RTPG; 856 spin_unlock_irqrestore(&pg->lock, flags); 857 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 858 pg->interval * HZ); 859 return; 860 } 861 if (err != SCSI_DH_OK) 862 pg->flags &= ~ALUA_PG_RUN_STPG; 863 } 864 if (pg->flags & ALUA_PG_RUN_STPG) { 865 pg->flags &= ~ALUA_PG_RUN_STPG; 866 spin_unlock_irqrestore(&pg->lock, flags); 867 err = alua_stpg(sdev, pg); 868 spin_lock_irqsave(&pg->lock, flags); 869 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 870 pg->flags |= ALUA_PG_RUN_RTPG; 871 pg->interval = 0; 872 pg->flags &= ~ALUA_PG_RUNNING; 873 spin_unlock_irqrestore(&pg->lock, flags); 874 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 875 pg->interval * HZ); 876 return; 877 } 878 } 879 880 list_splice_init(&pg->rtpg_list, &qdata_list); 881 pg->rtpg_sdev = NULL; 882 spin_unlock_irqrestore(&pg->lock, flags); 883 884 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { 885 list_del(&qdata->entry); 886 if (qdata->callback_fn) 887 qdata->callback_fn(qdata->callback_data, err); 888 kfree(qdata); 889 } 890 spin_lock_irqsave(&pg->lock, flags); 891 pg->flags &= ~ALUA_PG_RUNNING; 892 spin_unlock_irqrestore(&pg->lock, flags); 893 scsi_device_put(sdev); 894 kref_put(&pg->kref, release_port_group); 895 } 896 897 /** 898 * alua_rtpg_queue() - cause RTPG to be submitted asynchronously 899 * @pg: ALUA port group associated with @sdev. 900 * @sdev: SCSI device for which to submit an RTPG. 901 * @qdata: Information about the callback to invoke after the RTPG. 902 * @force: Whether or not to submit an RTPG if a work item that will submit an 903 * RTPG already has been scheduled. 904 * 905 * Returns true if and only if alua_rtpg_work() will be called asynchronously. 906 * That function is responsible for calling @qdata->fn(). 907 */ 908 static bool alua_rtpg_queue(struct alua_port_group *pg, 909 struct scsi_device *sdev, 910 struct alua_queue_data *qdata, bool force) 911 { 912 int start_queue = 0; 913 unsigned long flags; 914 if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev)) 915 return false; 916 917 spin_lock_irqsave(&pg->lock, flags); 918 if (qdata) { 919 list_add_tail(&qdata->entry, &pg->rtpg_list); 920 pg->flags |= ALUA_PG_RUN_STPG; 921 force = true; 922 } 923 if (pg->rtpg_sdev == NULL) { 924 pg->interval = 0; 925 pg->flags |= ALUA_PG_RUN_RTPG; 926 kref_get(&pg->kref); 927 pg->rtpg_sdev = sdev; 928 start_queue = 1; 929 } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { 930 pg->flags |= ALUA_PG_RUN_RTPG; 931 /* Do not queue if the worker is already running */ 932 if (!(pg->flags & ALUA_PG_RUNNING)) { 933 kref_get(&pg->kref); 934 start_queue = 1; 935 } 936 } 937 938 spin_unlock_irqrestore(&pg->lock, flags); 939 940 if (start_queue) { 941 if (queue_delayed_work(kaluad_wq, &pg->rtpg_work, 942 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) 943 sdev = NULL; 944 else 945 kref_put(&pg->kref, release_port_group); 946 } 947 if (sdev) 948 scsi_device_put(sdev); 949 950 return true; 951 } 952 953 /* 954 * alua_initialize - Initialize ALUA state 955 * @sdev: the device to be initialized 956 * 957 * For the prep_fn to work correctly we have 958 * to initialize the ALUA state for the device. 959 */ 960 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h) 961 { 962 int err = SCSI_DH_DEV_UNSUPP, tpgs; 963 964 mutex_lock(&h->init_mutex); 965 tpgs = alua_check_tpgs(sdev); 966 if (tpgs != TPGS_MODE_NONE) 967 err = alua_check_vpd(sdev, h, tpgs); 968 h->init_error = err; 969 mutex_unlock(&h->init_mutex); 970 return err; 971 } 972 /* 973 * alua_set_params - set/unset the optimize flag 974 * @sdev: device on the path to be activated 975 * params - parameters in the following format 976 * "no_of_params\0param1\0param2\0param3\0...\0" 977 * For example, to set the flag pass the following parameters 978 * from multipath.conf 979 * hardware_handler "2 alua 1" 980 */ 981 static int alua_set_params(struct scsi_device *sdev, const char *params) 982 { 983 struct alua_dh_data *h = sdev->handler_data; 984 struct alua_port_group *pg = NULL; 985 unsigned int optimize = 0, argc; 986 const char *p = params; 987 int result = SCSI_DH_OK; 988 unsigned long flags; 989 990 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1)) 991 return -EINVAL; 992 993 while (*p++) 994 ; 995 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1)) 996 return -EINVAL; 997 998 rcu_read_lock(); 999 pg = rcu_dereference(h->pg); 1000 if (!pg) { 1001 rcu_read_unlock(); 1002 return -ENXIO; 1003 } 1004 spin_lock_irqsave(&pg->lock, flags); 1005 if (optimize) 1006 pg->flags |= ALUA_OPTIMIZE_STPG; 1007 else 1008 pg->flags &= ~ALUA_OPTIMIZE_STPG; 1009 spin_unlock_irqrestore(&pg->lock, flags); 1010 rcu_read_unlock(); 1011 1012 return result; 1013 } 1014 1015 /* 1016 * alua_activate - activate a path 1017 * @sdev: device on the path to be activated 1018 * 1019 * We're currently switching the port group to be activated only and 1020 * let the array figure out the rest. 1021 * There may be other arrays which require us to switch all port groups 1022 * based on a certain policy. But until we actually encounter them it 1023 * should be okay. 1024 */ 1025 static int alua_activate(struct scsi_device *sdev, 1026 activate_complete fn, void *data) 1027 { 1028 struct alua_dh_data *h = sdev->handler_data; 1029 int err = SCSI_DH_OK; 1030 struct alua_queue_data *qdata; 1031 struct alua_port_group *pg; 1032 1033 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL); 1034 if (!qdata) { 1035 err = SCSI_DH_RES_TEMP_UNAVAIL; 1036 goto out; 1037 } 1038 qdata->callback_fn = fn; 1039 qdata->callback_data = data; 1040 1041 mutex_lock(&h->init_mutex); 1042 rcu_read_lock(); 1043 pg = rcu_dereference(h->pg); 1044 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1045 rcu_read_unlock(); 1046 kfree(qdata); 1047 err = h->init_error; 1048 mutex_unlock(&h->init_mutex); 1049 goto out; 1050 } 1051 rcu_read_unlock(); 1052 mutex_unlock(&h->init_mutex); 1053 1054 if (alua_rtpg_queue(pg, sdev, qdata, true)) 1055 fn = NULL; 1056 else 1057 err = SCSI_DH_DEV_OFFLINED; 1058 kref_put(&pg->kref, release_port_group); 1059 out: 1060 if (fn) 1061 fn(data, err); 1062 return 0; 1063 } 1064 1065 /* 1066 * alua_check - check path status 1067 * @sdev: device on the path to be checked 1068 * 1069 * Check the device status 1070 */ 1071 static void alua_check(struct scsi_device *sdev, bool force) 1072 { 1073 struct alua_dh_data *h = sdev->handler_data; 1074 struct alua_port_group *pg; 1075 1076 rcu_read_lock(); 1077 pg = rcu_dereference(h->pg); 1078 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1079 rcu_read_unlock(); 1080 return; 1081 } 1082 rcu_read_unlock(); 1083 1084 alua_rtpg_queue(pg, sdev, NULL, force); 1085 kref_put(&pg->kref, release_port_group); 1086 } 1087 1088 /* 1089 * alua_prep_fn - request callback 1090 * 1091 * Fail I/O to all paths not in state 1092 * active/optimized or active/non-optimized. 1093 */ 1094 static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req) 1095 { 1096 struct alua_dh_data *h = sdev->handler_data; 1097 struct alua_port_group *pg; 1098 unsigned char state = SCSI_ACCESS_STATE_OPTIMAL; 1099 1100 rcu_read_lock(); 1101 pg = rcu_dereference(h->pg); 1102 if (pg) 1103 state = pg->state; 1104 rcu_read_unlock(); 1105 1106 switch (state) { 1107 case SCSI_ACCESS_STATE_OPTIMAL: 1108 case SCSI_ACCESS_STATE_ACTIVE: 1109 case SCSI_ACCESS_STATE_LBA: 1110 return BLK_STS_OK; 1111 case SCSI_ACCESS_STATE_TRANSITIONING: 1112 return BLK_STS_AGAIN; 1113 default: 1114 req->rq_flags |= RQF_QUIET; 1115 return BLK_STS_IOERR; 1116 } 1117 } 1118 1119 static void alua_rescan(struct scsi_device *sdev) 1120 { 1121 struct alua_dh_data *h = sdev->handler_data; 1122 1123 alua_initialize(sdev, h); 1124 } 1125 1126 /* 1127 * alua_bus_attach - Attach device handler 1128 * @sdev: device to be attached to 1129 */ 1130 static int alua_bus_attach(struct scsi_device *sdev) 1131 { 1132 struct alua_dh_data *h; 1133 int err; 1134 1135 h = kzalloc(sizeof(*h) , GFP_KERNEL); 1136 if (!h) 1137 return SCSI_DH_NOMEM; 1138 spin_lock_init(&h->pg_lock); 1139 rcu_assign_pointer(h->pg, NULL); 1140 h->init_error = SCSI_DH_OK; 1141 h->sdev = sdev; 1142 INIT_LIST_HEAD(&h->node); 1143 1144 mutex_init(&h->init_mutex); 1145 err = alua_initialize(sdev, h); 1146 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) 1147 goto failed; 1148 1149 sdev->handler_data = h; 1150 return SCSI_DH_OK; 1151 failed: 1152 kfree(h); 1153 return err; 1154 } 1155 1156 /* 1157 * alua_bus_detach - Detach device handler 1158 * @sdev: device to be detached from 1159 */ 1160 static void alua_bus_detach(struct scsi_device *sdev) 1161 { 1162 struct alua_dh_data *h = sdev->handler_data; 1163 struct alua_port_group *pg; 1164 1165 spin_lock(&h->pg_lock); 1166 pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 1167 rcu_assign_pointer(h->pg, NULL); 1168 spin_unlock(&h->pg_lock); 1169 if (pg) { 1170 spin_lock_irq(&pg->lock); 1171 list_del_rcu(&h->node); 1172 spin_unlock_irq(&pg->lock); 1173 kref_put(&pg->kref, release_port_group); 1174 } 1175 sdev->handler_data = NULL; 1176 synchronize_rcu(); 1177 kfree(h); 1178 } 1179 1180 static struct scsi_device_handler alua_dh = { 1181 .name = ALUA_DH_NAME, 1182 .module = THIS_MODULE, 1183 .attach = alua_bus_attach, 1184 .detach = alua_bus_detach, 1185 .prep_fn = alua_prep_fn, 1186 .check_sense = alua_check_sense, 1187 .activate = alua_activate, 1188 .rescan = alua_rescan, 1189 .set_params = alua_set_params, 1190 }; 1191 1192 static int __init alua_init(void) 1193 { 1194 int r; 1195 1196 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0); 1197 if (!kaluad_wq) 1198 return -ENOMEM; 1199 1200 r = scsi_register_device_handler(&alua_dh); 1201 if (r != 0) { 1202 printk(KERN_ERR "%s: Failed to register scsi device handler", 1203 ALUA_DH_NAME); 1204 destroy_workqueue(kaluad_wq); 1205 } 1206 return r; 1207 } 1208 1209 static void __exit alua_exit(void) 1210 { 1211 scsi_unregister_device_handler(&alua_dh); 1212 destroy_workqueue(kaluad_wq); 1213 } 1214 1215 module_init(alua_init); 1216 module_exit(alua_exit); 1217 1218 MODULE_DESCRIPTION("DM Multipath ALUA support"); 1219 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>"); 1220 MODULE_LICENSE("GPL"); 1221 MODULE_VERSION(ALUA_DH_VER); 1222