1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Generic SCSI-3 ALUA SCSI Device Handler 4 * 5 * Copyright (C) 2007-2010 Hannes Reinecke, SUSE Linux Products GmbH. 6 * All rights reserved. 7 */ 8 #include <linux/slab.h> 9 #include <linux/delay.h> 10 #include <linux/module.h> 11 #include <asm/unaligned.h> 12 #include <scsi/scsi.h> 13 #include <scsi/scsi_proto.h> 14 #include <scsi/scsi_dbg.h> 15 #include <scsi/scsi_eh.h> 16 #include <scsi/scsi_dh.h> 17 18 #define ALUA_DH_NAME "alua" 19 #define ALUA_DH_VER "2.0" 20 21 #define TPGS_SUPPORT_NONE 0x00 22 #define TPGS_SUPPORT_OPTIMIZED 0x01 23 #define TPGS_SUPPORT_NONOPTIMIZED 0x02 24 #define TPGS_SUPPORT_STANDBY 0x04 25 #define TPGS_SUPPORT_UNAVAILABLE 0x08 26 #define TPGS_SUPPORT_LBA_DEPENDENT 0x10 27 #define TPGS_SUPPORT_OFFLINE 0x40 28 #define TPGS_SUPPORT_TRANSITION 0x80 29 #define TPGS_SUPPORT_ALL 0xdf 30 31 #define RTPG_FMT_MASK 0x70 32 #define RTPG_FMT_EXT_HDR 0x10 33 34 #define TPGS_MODE_UNINITIALIZED -1 35 #define TPGS_MODE_NONE 0x0 36 #define TPGS_MODE_IMPLICIT 0x1 37 #define TPGS_MODE_EXPLICIT 0x2 38 39 #define ALUA_RTPG_SIZE 128 40 #define ALUA_FAILOVER_TIMEOUT 60 41 #define ALUA_FAILOVER_RETRIES 5 42 #define ALUA_RTPG_DELAY_MSECS 5 43 44 /* device handler flags */ 45 #define ALUA_OPTIMIZE_STPG 0x01 46 #define ALUA_RTPG_EXT_HDR_UNSUPP 0x02 47 /* State machine flags */ 48 #define ALUA_PG_RUN_RTPG 0x10 49 #define ALUA_PG_RUN_STPG 0x20 50 #define ALUA_PG_RUNNING 0x40 51 52 static uint optimize_stpg; 53 module_param(optimize_stpg, uint, S_IRUGO|S_IWUSR); 54 MODULE_PARM_DESC(optimize_stpg, "Allow use of a non-optimized path, rather than sending a STPG, when implicit TPGS is supported (0=No,1=Yes). Default is 0."); 55 56 static LIST_HEAD(port_group_list); 57 static DEFINE_SPINLOCK(port_group_lock); 58 static struct workqueue_struct *kaluad_wq; 59 60 struct alua_port_group { 61 struct kref kref; 62 struct rcu_head rcu; 63 struct list_head node; 64 struct list_head dh_list; 65 unsigned char device_id_str[256]; 66 int device_id_len; 67 int group_id; 68 int tpgs; 69 int state; 70 int pref; 71 int valid_states; 72 unsigned flags; /* used for optimizing STPG */ 73 unsigned char transition_tmo; 74 unsigned long expiry; 75 unsigned long interval; 76 struct delayed_work rtpg_work; 77 spinlock_t lock; 78 struct list_head rtpg_list; 79 struct scsi_device *rtpg_sdev; 80 }; 81 82 struct alua_dh_data { 83 struct list_head node; 84 struct alua_port_group __rcu *pg; 85 int group_id; 86 spinlock_t pg_lock; 87 struct scsi_device *sdev; 88 int init_error; 89 struct mutex init_mutex; 90 }; 91 92 struct alua_queue_data { 93 struct list_head entry; 94 activate_complete callback_fn; 95 void *callback_data; 96 }; 97 98 #define ALUA_POLICY_SWITCH_CURRENT 0 99 #define ALUA_POLICY_SWITCH_ALL 1 100 101 static void alua_rtpg_work(struct work_struct *work); 102 static bool alua_rtpg_queue(struct alua_port_group *pg, 103 struct scsi_device *sdev, 104 struct alua_queue_data *qdata, bool force); 105 static void alua_check(struct scsi_device *sdev, bool force); 106 107 static void release_port_group(struct kref *kref) 108 { 109 struct alua_port_group *pg; 110 111 pg = container_of(kref, struct alua_port_group, kref); 112 if (pg->rtpg_sdev) 113 flush_delayed_work(&pg->rtpg_work); 114 spin_lock(&port_group_lock); 115 list_del(&pg->node); 116 spin_unlock(&port_group_lock); 117 kfree_rcu(pg, rcu); 118 } 119 120 /* 121 * submit_rtpg - Issue a REPORT TARGET GROUP STATES command 122 * @sdev: sdev the command should be sent to 123 */ 124 static int submit_rtpg(struct scsi_device *sdev, unsigned char *buff, 125 int bufflen, struct scsi_sense_hdr *sshdr, int flags) 126 { 127 u8 cdb[MAX_COMMAND_SIZE]; 128 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 129 REQ_FAILFAST_DRIVER; 130 131 /* Prepare the command. */ 132 memset(cdb, 0x0, MAX_COMMAND_SIZE); 133 cdb[0] = MAINTENANCE_IN; 134 if (!(flags & ALUA_RTPG_EXT_HDR_UNSUPP)) 135 cdb[1] = MI_REPORT_TARGET_PGS | MI_EXT_HDR_PARAM_FMT; 136 else 137 cdb[1] = MI_REPORT_TARGET_PGS; 138 put_unaligned_be32(bufflen, &cdb[6]); 139 140 return scsi_execute(sdev, cdb, DMA_FROM_DEVICE, buff, bufflen, NULL, 141 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 142 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL); 143 } 144 145 /* 146 * submit_stpg - Issue a SET TARGET PORT GROUP command 147 * 148 * Currently we're only setting the current target port group state 149 * to 'active/optimized' and let the array firmware figure out 150 * the states of the remaining groups. 151 */ 152 static int submit_stpg(struct scsi_device *sdev, int group_id, 153 struct scsi_sense_hdr *sshdr) 154 { 155 u8 cdb[MAX_COMMAND_SIZE]; 156 unsigned char stpg_data[8]; 157 int stpg_len = 8; 158 int req_flags = REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | 159 REQ_FAILFAST_DRIVER; 160 161 /* Prepare the data buffer */ 162 memset(stpg_data, 0, stpg_len); 163 stpg_data[4] = SCSI_ACCESS_STATE_OPTIMAL; 164 put_unaligned_be16(group_id, &stpg_data[6]); 165 166 /* Prepare the command. */ 167 memset(cdb, 0x0, MAX_COMMAND_SIZE); 168 cdb[0] = MAINTENANCE_OUT; 169 cdb[1] = MO_SET_TARGET_PGS; 170 put_unaligned_be32(stpg_len, &cdb[6]); 171 172 return scsi_execute(sdev, cdb, DMA_TO_DEVICE, stpg_data, stpg_len, NULL, 173 sshdr, ALUA_FAILOVER_TIMEOUT * HZ, 174 ALUA_FAILOVER_RETRIES, req_flags, 0, NULL); 175 } 176 177 static struct alua_port_group *alua_find_get_pg(char *id_str, size_t id_size, 178 int group_id) 179 { 180 struct alua_port_group *pg; 181 182 if (!id_str || !id_size || !strlen(id_str)) 183 return NULL; 184 185 list_for_each_entry(pg, &port_group_list, node) { 186 if (pg->group_id != group_id) 187 continue; 188 if (!pg->device_id_len || pg->device_id_len != id_size) 189 continue; 190 if (strncmp(pg->device_id_str, id_str, id_size)) 191 continue; 192 if (!kref_get_unless_zero(&pg->kref)) 193 continue; 194 return pg; 195 } 196 197 return NULL; 198 } 199 200 /* 201 * alua_alloc_pg - Allocate a new port_group structure 202 * @sdev: scsi device 203 * @group_id: port group id 204 * @tpgs: target port group settings 205 * 206 * Allocate a new port_group structure for a given 207 * device. 208 */ 209 static struct alua_port_group *alua_alloc_pg(struct scsi_device *sdev, 210 int group_id, int tpgs) 211 { 212 struct alua_port_group *pg, *tmp_pg; 213 214 pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL); 215 if (!pg) 216 return ERR_PTR(-ENOMEM); 217 218 pg->device_id_len = scsi_vpd_lun_id(sdev, pg->device_id_str, 219 sizeof(pg->device_id_str)); 220 if (pg->device_id_len <= 0) { 221 /* 222 * TPGS supported but no device identification found. 223 * Generate private device identification. 224 */ 225 sdev_printk(KERN_INFO, sdev, 226 "%s: No device descriptors found\n", 227 ALUA_DH_NAME); 228 pg->device_id_str[0] = '\0'; 229 pg->device_id_len = 0; 230 } 231 pg->group_id = group_id; 232 pg->tpgs = tpgs; 233 pg->state = SCSI_ACCESS_STATE_OPTIMAL; 234 pg->valid_states = TPGS_SUPPORT_ALL; 235 if (optimize_stpg) 236 pg->flags |= ALUA_OPTIMIZE_STPG; 237 kref_init(&pg->kref); 238 INIT_DELAYED_WORK(&pg->rtpg_work, alua_rtpg_work); 239 INIT_LIST_HEAD(&pg->rtpg_list); 240 INIT_LIST_HEAD(&pg->node); 241 INIT_LIST_HEAD(&pg->dh_list); 242 spin_lock_init(&pg->lock); 243 244 spin_lock(&port_group_lock); 245 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 246 group_id); 247 if (tmp_pg) { 248 spin_unlock(&port_group_lock); 249 kfree(pg); 250 return tmp_pg; 251 } 252 253 list_add(&pg->node, &port_group_list); 254 spin_unlock(&port_group_lock); 255 256 return pg; 257 } 258 259 /* 260 * alua_check_tpgs - Evaluate TPGS setting 261 * @sdev: device to be checked 262 * 263 * Examine the TPGS setting of the sdev to find out if ALUA 264 * is supported. 265 */ 266 static int alua_check_tpgs(struct scsi_device *sdev) 267 { 268 int tpgs = TPGS_MODE_NONE; 269 270 /* 271 * ALUA support for non-disk devices is fraught with 272 * difficulties, so disable it for now. 273 */ 274 if (sdev->type != TYPE_DISK) { 275 sdev_printk(KERN_INFO, sdev, 276 "%s: disable for non-disk devices\n", 277 ALUA_DH_NAME); 278 return tpgs; 279 } 280 281 tpgs = scsi_device_tpgs(sdev); 282 switch (tpgs) { 283 case TPGS_MODE_EXPLICIT|TPGS_MODE_IMPLICIT: 284 sdev_printk(KERN_INFO, sdev, 285 "%s: supports implicit and explicit TPGS\n", 286 ALUA_DH_NAME); 287 break; 288 case TPGS_MODE_EXPLICIT: 289 sdev_printk(KERN_INFO, sdev, "%s: supports explicit TPGS\n", 290 ALUA_DH_NAME); 291 break; 292 case TPGS_MODE_IMPLICIT: 293 sdev_printk(KERN_INFO, sdev, "%s: supports implicit TPGS\n", 294 ALUA_DH_NAME); 295 break; 296 case TPGS_MODE_NONE: 297 sdev_printk(KERN_INFO, sdev, "%s: not supported\n", 298 ALUA_DH_NAME); 299 break; 300 default: 301 sdev_printk(KERN_INFO, sdev, 302 "%s: unsupported TPGS setting %d\n", 303 ALUA_DH_NAME, tpgs); 304 tpgs = TPGS_MODE_NONE; 305 break; 306 } 307 308 return tpgs; 309 } 310 311 /* 312 * alua_check_vpd - Evaluate INQUIRY vpd page 0x83 313 * @sdev: device to be checked 314 * 315 * Extract the relative target port and the target port group 316 * descriptor from the list of identificators. 317 */ 318 static int alua_check_vpd(struct scsi_device *sdev, struct alua_dh_data *h, 319 int tpgs) 320 { 321 int rel_port = -1, group_id; 322 struct alua_port_group *pg, *old_pg = NULL; 323 bool pg_updated = false; 324 unsigned long flags; 325 326 group_id = scsi_vpd_tpg_id(sdev, &rel_port); 327 if (group_id < 0) { 328 /* 329 * Internal error; TPGS supported but required 330 * VPD identification descriptors not present. 331 * Disable ALUA support 332 */ 333 sdev_printk(KERN_INFO, sdev, 334 "%s: No target port descriptors found\n", 335 ALUA_DH_NAME); 336 return SCSI_DH_DEV_UNSUPP; 337 } 338 339 pg = alua_alloc_pg(sdev, group_id, tpgs); 340 if (IS_ERR(pg)) { 341 if (PTR_ERR(pg) == -ENOMEM) 342 return SCSI_DH_NOMEM; 343 return SCSI_DH_DEV_UNSUPP; 344 } 345 if (pg->device_id_len) 346 sdev_printk(KERN_INFO, sdev, 347 "%s: device %s port group %x rel port %x\n", 348 ALUA_DH_NAME, pg->device_id_str, 349 group_id, rel_port); 350 else 351 sdev_printk(KERN_INFO, sdev, 352 "%s: port group %x rel port %x\n", 353 ALUA_DH_NAME, group_id, rel_port); 354 355 /* Check for existing port group references */ 356 spin_lock(&h->pg_lock); 357 old_pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 358 if (old_pg != pg) { 359 /* port group has changed. Update to new port group */ 360 if (h->pg) { 361 spin_lock_irqsave(&old_pg->lock, flags); 362 list_del_rcu(&h->node); 363 spin_unlock_irqrestore(&old_pg->lock, flags); 364 } 365 rcu_assign_pointer(h->pg, pg); 366 pg_updated = true; 367 } 368 369 spin_lock_irqsave(&pg->lock, flags); 370 if (pg_updated) 371 list_add_rcu(&h->node, &pg->dh_list); 372 spin_unlock_irqrestore(&pg->lock, flags); 373 374 alua_rtpg_queue(rcu_dereference_protected(h->pg, 375 lockdep_is_held(&h->pg_lock)), 376 sdev, NULL, true); 377 spin_unlock(&h->pg_lock); 378 379 if (old_pg) 380 kref_put(&old_pg->kref, release_port_group); 381 382 return SCSI_DH_OK; 383 } 384 385 static char print_alua_state(unsigned char state) 386 { 387 switch (state) { 388 case SCSI_ACCESS_STATE_OPTIMAL: 389 return 'A'; 390 case SCSI_ACCESS_STATE_ACTIVE: 391 return 'N'; 392 case SCSI_ACCESS_STATE_STANDBY: 393 return 'S'; 394 case SCSI_ACCESS_STATE_UNAVAILABLE: 395 return 'U'; 396 case SCSI_ACCESS_STATE_LBA: 397 return 'L'; 398 case SCSI_ACCESS_STATE_OFFLINE: 399 return 'O'; 400 case SCSI_ACCESS_STATE_TRANSITIONING: 401 return 'T'; 402 default: 403 return 'X'; 404 } 405 } 406 407 static int alua_check_sense(struct scsi_device *sdev, 408 struct scsi_sense_hdr *sense_hdr) 409 { 410 switch (sense_hdr->sense_key) { 411 case NOT_READY: 412 if (sense_hdr->asc == 0x04 && sense_hdr->ascq == 0x0a) { 413 /* 414 * LUN Not Accessible - ALUA state transition 415 */ 416 alua_check(sdev, false); 417 return NEEDS_RETRY; 418 } 419 break; 420 case UNIT_ATTENTION: 421 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x00) { 422 /* 423 * Power On, Reset, or Bus Device Reset. 424 * Might have obscured a state transition, 425 * so schedule a recheck. 426 */ 427 alua_check(sdev, true); 428 return ADD_TO_MLQUEUE; 429 } 430 if (sense_hdr->asc == 0x29 && sense_hdr->ascq == 0x04) 431 /* 432 * Device internal reset 433 */ 434 return ADD_TO_MLQUEUE; 435 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x01) 436 /* 437 * Mode Parameters Changed 438 */ 439 return ADD_TO_MLQUEUE; 440 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x06) { 441 /* 442 * ALUA state changed 443 */ 444 alua_check(sdev, true); 445 return ADD_TO_MLQUEUE; 446 } 447 if (sense_hdr->asc == 0x2a && sense_hdr->ascq == 0x07) { 448 /* 449 * Implicit ALUA state transition failed 450 */ 451 alua_check(sdev, true); 452 return ADD_TO_MLQUEUE; 453 } 454 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x03) 455 /* 456 * Inquiry data has changed 457 */ 458 return ADD_TO_MLQUEUE; 459 if (sense_hdr->asc == 0x3f && sense_hdr->ascq == 0x0e) 460 /* 461 * REPORTED_LUNS_DATA_HAS_CHANGED is reported 462 * when switching controllers on targets like 463 * Intel Multi-Flex. We can just retry. 464 */ 465 return ADD_TO_MLQUEUE; 466 break; 467 } 468 469 return SCSI_RETURN_NOT_HANDLED; 470 } 471 472 /* 473 * alua_tur - Send a TEST UNIT READY 474 * @sdev: device to which the TEST UNIT READY command should be send 475 * 476 * Send a TEST UNIT READY to @sdev to figure out the device state 477 * Returns SCSI_DH_RETRY if the sense code is NOT READY/ALUA TRANSITIONING, 478 * SCSI_DH_OK if no error occurred, and SCSI_DH_IO otherwise. 479 */ 480 static int alua_tur(struct scsi_device *sdev) 481 { 482 struct scsi_sense_hdr sense_hdr; 483 int retval; 484 485 retval = scsi_test_unit_ready(sdev, ALUA_FAILOVER_TIMEOUT * HZ, 486 ALUA_FAILOVER_RETRIES, &sense_hdr); 487 if (sense_hdr.sense_key == NOT_READY && 488 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 489 return SCSI_DH_RETRY; 490 else if (retval) 491 return SCSI_DH_IO; 492 else 493 return SCSI_DH_OK; 494 } 495 496 /* 497 * alua_rtpg - Evaluate REPORT TARGET GROUP STATES 498 * @sdev: the device to be evaluated. 499 * 500 * Evaluate the Target Port Group State. 501 * Returns SCSI_DH_DEV_OFFLINED if the path is 502 * found to be unusable. 503 */ 504 static int alua_rtpg(struct scsi_device *sdev, struct alua_port_group *pg) 505 { 506 struct scsi_sense_hdr sense_hdr; 507 struct alua_port_group *tmp_pg; 508 int len, k, off, bufflen = ALUA_RTPG_SIZE; 509 unsigned char *desc, *buff; 510 unsigned err, retval; 511 unsigned int tpg_desc_tbl_off; 512 unsigned char orig_transition_tmo; 513 unsigned long flags; 514 515 if (!pg->expiry) { 516 unsigned long transition_tmo = ALUA_FAILOVER_TIMEOUT * HZ; 517 518 if (pg->transition_tmo) 519 transition_tmo = pg->transition_tmo * HZ; 520 521 pg->expiry = round_jiffies_up(jiffies + transition_tmo); 522 } 523 524 buff = kzalloc(bufflen, GFP_KERNEL); 525 if (!buff) 526 return SCSI_DH_DEV_TEMP_BUSY; 527 528 retry: 529 err = 0; 530 retval = submit_rtpg(sdev, buff, bufflen, &sense_hdr, pg->flags); 531 532 if (retval) { 533 /* 534 * Some (broken) implementations have a habit of returning 535 * an error during things like firmware update etc. 536 * But if the target only supports active/optimized there's 537 * not much we can do; it's not that we can switch paths 538 * or anything. 539 * So ignore any errors to avoid spurious failures during 540 * path failover. 541 */ 542 if ((pg->valid_states & ~TPGS_SUPPORT_OPTIMIZED) == 0) { 543 sdev_printk(KERN_INFO, sdev, 544 "%s: ignoring rtpg result %d\n", 545 ALUA_DH_NAME, retval); 546 kfree(buff); 547 return SCSI_DH_OK; 548 } 549 if (!scsi_sense_valid(&sense_hdr)) { 550 sdev_printk(KERN_INFO, sdev, 551 "%s: rtpg failed, result %d\n", 552 ALUA_DH_NAME, retval); 553 kfree(buff); 554 if (driver_byte(retval) == DRIVER_ERROR) 555 return SCSI_DH_DEV_TEMP_BUSY; 556 return SCSI_DH_IO; 557 } 558 559 /* 560 * submit_rtpg() has failed on existing arrays 561 * when requesting extended header info, and 562 * the array doesn't support extended headers, 563 * even though it shouldn't according to T10. 564 * The retry without rtpg_ext_hdr_req set 565 * handles this. 566 */ 567 if (!(pg->flags & ALUA_RTPG_EXT_HDR_UNSUPP) && 568 sense_hdr.sense_key == ILLEGAL_REQUEST && 569 sense_hdr.asc == 0x24 && sense_hdr.ascq == 0) { 570 pg->flags |= ALUA_RTPG_EXT_HDR_UNSUPP; 571 goto retry; 572 } 573 /* 574 * Retry on ALUA state transition or if any 575 * UNIT ATTENTION occurred. 576 */ 577 if (sense_hdr.sense_key == NOT_READY && 578 sense_hdr.asc == 0x04 && sense_hdr.ascq == 0x0a) 579 err = SCSI_DH_RETRY; 580 else if (sense_hdr.sense_key == UNIT_ATTENTION) 581 err = SCSI_DH_RETRY; 582 if (err == SCSI_DH_RETRY && 583 pg->expiry != 0 && time_before(jiffies, pg->expiry)) { 584 sdev_printk(KERN_ERR, sdev, "%s: rtpg retry\n", 585 ALUA_DH_NAME); 586 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 587 kfree(buff); 588 return err; 589 } 590 sdev_printk(KERN_ERR, sdev, "%s: rtpg failed\n", 591 ALUA_DH_NAME); 592 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 593 kfree(buff); 594 pg->expiry = 0; 595 return SCSI_DH_IO; 596 } 597 598 len = get_unaligned_be32(&buff[0]) + 4; 599 600 if (len > bufflen) { 601 /* Resubmit with the correct length */ 602 kfree(buff); 603 bufflen = len; 604 buff = kmalloc(bufflen, GFP_KERNEL); 605 if (!buff) { 606 sdev_printk(KERN_WARNING, sdev, 607 "%s: kmalloc buffer failed\n",__func__); 608 /* Temporary failure, bypass */ 609 pg->expiry = 0; 610 return SCSI_DH_DEV_TEMP_BUSY; 611 } 612 goto retry; 613 } 614 615 orig_transition_tmo = pg->transition_tmo; 616 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR && buff[5] != 0) 617 pg->transition_tmo = buff[5]; 618 else 619 pg->transition_tmo = ALUA_FAILOVER_TIMEOUT; 620 621 if (orig_transition_tmo != pg->transition_tmo) { 622 sdev_printk(KERN_INFO, sdev, 623 "%s: transition timeout set to %d seconds\n", 624 ALUA_DH_NAME, pg->transition_tmo); 625 pg->expiry = jiffies + pg->transition_tmo * HZ; 626 } 627 628 if ((buff[4] & RTPG_FMT_MASK) == RTPG_FMT_EXT_HDR) 629 tpg_desc_tbl_off = 8; 630 else 631 tpg_desc_tbl_off = 4; 632 633 for (k = tpg_desc_tbl_off, desc = buff + tpg_desc_tbl_off; 634 k < len; 635 k += off, desc += off) { 636 u16 group_id = get_unaligned_be16(&desc[2]); 637 638 spin_lock_irqsave(&port_group_lock, flags); 639 tmp_pg = alua_find_get_pg(pg->device_id_str, pg->device_id_len, 640 group_id); 641 spin_unlock_irqrestore(&port_group_lock, flags); 642 if (tmp_pg) { 643 if (spin_trylock_irqsave(&tmp_pg->lock, flags)) { 644 if ((tmp_pg == pg) || 645 !(tmp_pg->flags & ALUA_PG_RUNNING)) { 646 struct alua_dh_data *h; 647 648 tmp_pg->state = desc[0] & 0x0f; 649 tmp_pg->pref = desc[0] >> 7; 650 rcu_read_lock(); 651 list_for_each_entry_rcu(h, 652 &tmp_pg->dh_list, node) { 653 /* h->sdev should always be valid */ 654 BUG_ON(!h->sdev); 655 h->sdev->access_state = desc[0]; 656 } 657 rcu_read_unlock(); 658 } 659 if (tmp_pg == pg) 660 tmp_pg->valid_states = desc[1]; 661 spin_unlock_irqrestore(&tmp_pg->lock, flags); 662 } 663 kref_put(&tmp_pg->kref, release_port_group); 664 } 665 off = 8 + (desc[7] * 4); 666 } 667 668 spin_lock_irqsave(&pg->lock, flags); 669 sdev_printk(KERN_INFO, sdev, 670 "%s: port group %02x state %c %s supports %c%c%c%c%c%c%c\n", 671 ALUA_DH_NAME, pg->group_id, print_alua_state(pg->state), 672 pg->pref ? "preferred" : "non-preferred", 673 pg->valid_states&TPGS_SUPPORT_TRANSITION?'T':'t', 674 pg->valid_states&TPGS_SUPPORT_OFFLINE?'O':'o', 675 pg->valid_states&TPGS_SUPPORT_LBA_DEPENDENT?'L':'l', 676 pg->valid_states&TPGS_SUPPORT_UNAVAILABLE?'U':'u', 677 pg->valid_states&TPGS_SUPPORT_STANDBY?'S':'s', 678 pg->valid_states&TPGS_SUPPORT_NONOPTIMIZED?'N':'n', 679 pg->valid_states&TPGS_SUPPORT_OPTIMIZED?'A':'a'); 680 681 switch (pg->state) { 682 case SCSI_ACCESS_STATE_TRANSITIONING: 683 if (time_before(jiffies, pg->expiry)) { 684 /* State transition, retry */ 685 pg->interval = 2; 686 err = SCSI_DH_RETRY; 687 } else { 688 struct alua_dh_data *h; 689 690 /* Transitioning time exceeded, set port to standby */ 691 err = SCSI_DH_IO; 692 pg->state = SCSI_ACCESS_STATE_STANDBY; 693 pg->expiry = 0; 694 rcu_read_lock(); 695 list_for_each_entry_rcu(h, &pg->dh_list, node) { 696 BUG_ON(!h->sdev); 697 h->sdev->access_state = 698 (pg->state & SCSI_ACCESS_STATE_MASK); 699 if (pg->pref) 700 h->sdev->access_state |= 701 SCSI_ACCESS_STATE_PREFERRED; 702 } 703 rcu_read_unlock(); 704 } 705 break; 706 case SCSI_ACCESS_STATE_OFFLINE: 707 /* Path unusable */ 708 err = SCSI_DH_DEV_OFFLINED; 709 pg->expiry = 0; 710 break; 711 default: 712 /* Useable path if active */ 713 err = SCSI_DH_OK; 714 pg->expiry = 0; 715 break; 716 } 717 spin_unlock_irqrestore(&pg->lock, flags); 718 kfree(buff); 719 return err; 720 } 721 722 /* 723 * alua_stpg - Issue a SET TARGET PORT GROUP command 724 * 725 * Issue a SET TARGET PORT GROUP command and evaluate the 726 * response. Returns SCSI_DH_RETRY per default to trigger 727 * a re-evaluation of the target group state or SCSI_DH_OK 728 * if no further action needs to be taken. 729 */ 730 static unsigned alua_stpg(struct scsi_device *sdev, struct alua_port_group *pg) 731 { 732 int retval; 733 struct scsi_sense_hdr sense_hdr; 734 735 if (!(pg->tpgs & TPGS_MODE_EXPLICIT)) { 736 /* Only implicit ALUA supported, retry */ 737 return SCSI_DH_RETRY; 738 } 739 switch (pg->state) { 740 case SCSI_ACCESS_STATE_OPTIMAL: 741 return SCSI_DH_OK; 742 case SCSI_ACCESS_STATE_ACTIVE: 743 if ((pg->flags & ALUA_OPTIMIZE_STPG) && 744 !pg->pref && 745 (pg->tpgs & TPGS_MODE_IMPLICIT)) 746 return SCSI_DH_OK; 747 break; 748 case SCSI_ACCESS_STATE_STANDBY: 749 case SCSI_ACCESS_STATE_UNAVAILABLE: 750 break; 751 case SCSI_ACCESS_STATE_OFFLINE: 752 return SCSI_DH_IO; 753 case SCSI_ACCESS_STATE_TRANSITIONING: 754 break; 755 default: 756 sdev_printk(KERN_INFO, sdev, 757 "%s: stpg failed, unhandled TPGS state %d", 758 ALUA_DH_NAME, pg->state); 759 return SCSI_DH_NOSYS; 760 } 761 retval = submit_stpg(sdev, pg->group_id, &sense_hdr); 762 763 if (retval) { 764 if (!scsi_sense_valid(&sense_hdr)) { 765 sdev_printk(KERN_INFO, sdev, 766 "%s: stpg failed, result %d", 767 ALUA_DH_NAME, retval); 768 if (driver_byte(retval) == DRIVER_ERROR) 769 return SCSI_DH_DEV_TEMP_BUSY; 770 } else { 771 sdev_printk(KERN_INFO, sdev, "%s: stpg failed\n", 772 ALUA_DH_NAME); 773 scsi_print_sense_hdr(sdev, ALUA_DH_NAME, &sense_hdr); 774 } 775 } 776 /* Retry RTPG */ 777 return SCSI_DH_RETRY; 778 } 779 780 static void alua_rtpg_work(struct work_struct *work) 781 { 782 struct alua_port_group *pg = 783 container_of(work, struct alua_port_group, rtpg_work.work); 784 struct scsi_device *sdev; 785 LIST_HEAD(qdata_list); 786 int err = SCSI_DH_OK; 787 struct alua_queue_data *qdata, *tmp; 788 unsigned long flags; 789 790 spin_lock_irqsave(&pg->lock, flags); 791 sdev = pg->rtpg_sdev; 792 if (!sdev) { 793 WARN_ON(pg->flags & ALUA_PG_RUN_RTPG); 794 WARN_ON(pg->flags & ALUA_PG_RUN_STPG); 795 spin_unlock_irqrestore(&pg->lock, flags); 796 kref_put(&pg->kref, release_port_group); 797 return; 798 } 799 pg->flags |= ALUA_PG_RUNNING; 800 if (pg->flags & ALUA_PG_RUN_RTPG) { 801 int state = pg->state; 802 803 pg->flags &= ~ALUA_PG_RUN_RTPG; 804 spin_unlock_irqrestore(&pg->lock, flags); 805 if (state == SCSI_ACCESS_STATE_TRANSITIONING) { 806 if (alua_tur(sdev) == SCSI_DH_RETRY) { 807 spin_lock_irqsave(&pg->lock, flags); 808 pg->flags &= ~ALUA_PG_RUNNING; 809 pg->flags |= ALUA_PG_RUN_RTPG; 810 spin_unlock_irqrestore(&pg->lock, flags); 811 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 812 pg->interval * HZ); 813 return; 814 } 815 /* Send RTPG on failure or if TUR indicates SUCCESS */ 816 } 817 err = alua_rtpg(sdev, pg); 818 spin_lock_irqsave(&pg->lock, flags); 819 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 820 pg->flags &= ~ALUA_PG_RUNNING; 821 pg->flags |= ALUA_PG_RUN_RTPG; 822 spin_unlock_irqrestore(&pg->lock, flags); 823 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 824 pg->interval * HZ); 825 return; 826 } 827 if (err != SCSI_DH_OK) 828 pg->flags &= ~ALUA_PG_RUN_STPG; 829 } 830 if (pg->flags & ALUA_PG_RUN_STPG) { 831 pg->flags &= ~ALUA_PG_RUN_STPG; 832 spin_unlock_irqrestore(&pg->lock, flags); 833 err = alua_stpg(sdev, pg); 834 spin_lock_irqsave(&pg->lock, flags); 835 if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) { 836 pg->flags |= ALUA_PG_RUN_RTPG; 837 pg->interval = 0; 838 pg->flags &= ~ALUA_PG_RUNNING; 839 spin_unlock_irqrestore(&pg->lock, flags); 840 queue_delayed_work(kaluad_wq, &pg->rtpg_work, 841 pg->interval * HZ); 842 return; 843 } 844 } 845 846 list_splice_init(&pg->rtpg_list, &qdata_list); 847 pg->rtpg_sdev = NULL; 848 spin_unlock_irqrestore(&pg->lock, flags); 849 850 list_for_each_entry_safe(qdata, tmp, &qdata_list, entry) { 851 list_del(&qdata->entry); 852 if (qdata->callback_fn) 853 qdata->callback_fn(qdata->callback_data, err); 854 kfree(qdata); 855 } 856 spin_lock_irqsave(&pg->lock, flags); 857 pg->flags &= ~ALUA_PG_RUNNING; 858 spin_unlock_irqrestore(&pg->lock, flags); 859 scsi_device_put(sdev); 860 kref_put(&pg->kref, release_port_group); 861 } 862 863 /** 864 * alua_rtpg_queue() - cause RTPG to be submitted asynchronously 865 * @pg: ALUA port group associated with @sdev. 866 * @sdev: SCSI device for which to submit an RTPG. 867 * @qdata: Information about the callback to invoke after the RTPG. 868 * @force: Whether or not to submit an RTPG if a work item that will submit an 869 * RTPG already has been scheduled. 870 * 871 * Returns true if and only if alua_rtpg_work() will be called asynchronously. 872 * That function is responsible for calling @qdata->fn(). 873 */ 874 static bool alua_rtpg_queue(struct alua_port_group *pg, 875 struct scsi_device *sdev, 876 struct alua_queue_data *qdata, bool force) 877 { 878 int start_queue = 0; 879 unsigned long flags; 880 if (WARN_ON_ONCE(!pg) || scsi_device_get(sdev)) 881 return false; 882 883 spin_lock_irqsave(&pg->lock, flags); 884 if (qdata) { 885 list_add_tail(&qdata->entry, &pg->rtpg_list); 886 pg->flags |= ALUA_PG_RUN_STPG; 887 force = true; 888 } 889 if (pg->rtpg_sdev == NULL) { 890 pg->interval = 0; 891 pg->flags |= ALUA_PG_RUN_RTPG; 892 kref_get(&pg->kref); 893 pg->rtpg_sdev = sdev; 894 start_queue = 1; 895 } else if (!(pg->flags & ALUA_PG_RUN_RTPG) && force) { 896 pg->flags |= ALUA_PG_RUN_RTPG; 897 /* Do not queue if the worker is already running */ 898 if (!(pg->flags & ALUA_PG_RUNNING)) { 899 kref_get(&pg->kref); 900 start_queue = 1; 901 } 902 } 903 904 spin_unlock_irqrestore(&pg->lock, flags); 905 906 if (start_queue) { 907 if (queue_delayed_work(kaluad_wq, &pg->rtpg_work, 908 msecs_to_jiffies(ALUA_RTPG_DELAY_MSECS))) 909 sdev = NULL; 910 else 911 kref_put(&pg->kref, release_port_group); 912 } 913 if (sdev) 914 scsi_device_put(sdev); 915 916 return true; 917 } 918 919 /* 920 * alua_initialize - Initialize ALUA state 921 * @sdev: the device to be initialized 922 * 923 * For the prep_fn to work correctly we have 924 * to initialize the ALUA state for the device. 925 */ 926 static int alua_initialize(struct scsi_device *sdev, struct alua_dh_data *h) 927 { 928 int err = SCSI_DH_DEV_UNSUPP, tpgs; 929 930 mutex_lock(&h->init_mutex); 931 tpgs = alua_check_tpgs(sdev); 932 if (tpgs != TPGS_MODE_NONE) 933 err = alua_check_vpd(sdev, h, tpgs); 934 h->init_error = err; 935 mutex_unlock(&h->init_mutex); 936 return err; 937 } 938 /* 939 * alua_set_params - set/unset the optimize flag 940 * @sdev: device on the path to be activated 941 * params - parameters in the following format 942 * "no_of_params\0param1\0param2\0param3\0...\0" 943 * For example, to set the flag pass the following parameters 944 * from multipath.conf 945 * hardware_handler "2 alua 1" 946 */ 947 static int alua_set_params(struct scsi_device *sdev, const char *params) 948 { 949 struct alua_dh_data *h = sdev->handler_data; 950 struct alua_port_group *pg = NULL; 951 unsigned int optimize = 0, argc; 952 const char *p = params; 953 int result = SCSI_DH_OK; 954 unsigned long flags; 955 956 if ((sscanf(params, "%u", &argc) != 1) || (argc != 1)) 957 return -EINVAL; 958 959 while (*p++) 960 ; 961 if ((sscanf(p, "%u", &optimize) != 1) || (optimize > 1)) 962 return -EINVAL; 963 964 rcu_read_lock(); 965 pg = rcu_dereference(h->pg); 966 if (!pg) { 967 rcu_read_unlock(); 968 return -ENXIO; 969 } 970 spin_lock_irqsave(&pg->lock, flags); 971 if (optimize) 972 pg->flags |= ALUA_OPTIMIZE_STPG; 973 else 974 pg->flags &= ~ALUA_OPTIMIZE_STPG; 975 spin_unlock_irqrestore(&pg->lock, flags); 976 rcu_read_unlock(); 977 978 return result; 979 } 980 981 /* 982 * alua_activate - activate a path 983 * @sdev: device on the path to be activated 984 * 985 * We're currently switching the port group to be activated only and 986 * let the array figure out the rest. 987 * There may be other arrays which require us to switch all port groups 988 * based on a certain policy. But until we actually encounter them it 989 * should be okay. 990 */ 991 static int alua_activate(struct scsi_device *sdev, 992 activate_complete fn, void *data) 993 { 994 struct alua_dh_data *h = sdev->handler_data; 995 int err = SCSI_DH_OK; 996 struct alua_queue_data *qdata; 997 struct alua_port_group *pg; 998 999 qdata = kzalloc(sizeof(*qdata), GFP_KERNEL); 1000 if (!qdata) { 1001 err = SCSI_DH_RES_TEMP_UNAVAIL; 1002 goto out; 1003 } 1004 qdata->callback_fn = fn; 1005 qdata->callback_data = data; 1006 1007 mutex_lock(&h->init_mutex); 1008 rcu_read_lock(); 1009 pg = rcu_dereference(h->pg); 1010 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1011 rcu_read_unlock(); 1012 kfree(qdata); 1013 err = h->init_error; 1014 mutex_unlock(&h->init_mutex); 1015 goto out; 1016 } 1017 rcu_read_unlock(); 1018 mutex_unlock(&h->init_mutex); 1019 1020 if (alua_rtpg_queue(pg, sdev, qdata, true)) 1021 fn = NULL; 1022 else 1023 err = SCSI_DH_DEV_OFFLINED; 1024 kref_put(&pg->kref, release_port_group); 1025 out: 1026 if (fn) 1027 fn(data, err); 1028 return 0; 1029 } 1030 1031 /* 1032 * alua_check - check path status 1033 * @sdev: device on the path to be checked 1034 * 1035 * Check the device status 1036 */ 1037 static void alua_check(struct scsi_device *sdev, bool force) 1038 { 1039 struct alua_dh_data *h = sdev->handler_data; 1040 struct alua_port_group *pg; 1041 1042 rcu_read_lock(); 1043 pg = rcu_dereference(h->pg); 1044 if (!pg || !kref_get_unless_zero(&pg->kref)) { 1045 rcu_read_unlock(); 1046 return; 1047 } 1048 rcu_read_unlock(); 1049 1050 alua_rtpg_queue(pg, sdev, NULL, force); 1051 kref_put(&pg->kref, release_port_group); 1052 } 1053 1054 /* 1055 * alua_prep_fn - request callback 1056 * 1057 * Fail I/O to all paths not in state 1058 * active/optimized or active/non-optimized. 1059 */ 1060 static blk_status_t alua_prep_fn(struct scsi_device *sdev, struct request *req) 1061 { 1062 struct alua_dh_data *h = sdev->handler_data; 1063 struct alua_port_group *pg; 1064 unsigned char state = SCSI_ACCESS_STATE_OPTIMAL; 1065 1066 rcu_read_lock(); 1067 pg = rcu_dereference(h->pg); 1068 if (pg) 1069 state = pg->state; 1070 rcu_read_unlock(); 1071 1072 switch (state) { 1073 case SCSI_ACCESS_STATE_OPTIMAL: 1074 case SCSI_ACCESS_STATE_ACTIVE: 1075 case SCSI_ACCESS_STATE_LBA: 1076 return BLK_STS_OK; 1077 case SCSI_ACCESS_STATE_TRANSITIONING: 1078 return BLK_STS_RESOURCE; 1079 default: 1080 req->rq_flags |= RQF_QUIET; 1081 return BLK_STS_IOERR; 1082 } 1083 } 1084 1085 static void alua_rescan(struct scsi_device *sdev) 1086 { 1087 struct alua_dh_data *h = sdev->handler_data; 1088 1089 alua_initialize(sdev, h); 1090 } 1091 1092 /* 1093 * alua_bus_attach - Attach device handler 1094 * @sdev: device to be attached to 1095 */ 1096 static int alua_bus_attach(struct scsi_device *sdev) 1097 { 1098 struct alua_dh_data *h; 1099 int err; 1100 1101 h = kzalloc(sizeof(*h) , GFP_KERNEL); 1102 if (!h) 1103 return SCSI_DH_NOMEM; 1104 spin_lock_init(&h->pg_lock); 1105 rcu_assign_pointer(h->pg, NULL); 1106 h->init_error = SCSI_DH_OK; 1107 h->sdev = sdev; 1108 INIT_LIST_HEAD(&h->node); 1109 1110 mutex_init(&h->init_mutex); 1111 err = alua_initialize(sdev, h); 1112 if (err != SCSI_DH_OK && err != SCSI_DH_DEV_OFFLINED) 1113 goto failed; 1114 1115 sdev->handler_data = h; 1116 return SCSI_DH_OK; 1117 failed: 1118 kfree(h); 1119 return err; 1120 } 1121 1122 /* 1123 * alua_bus_detach - Detach device handler 1124 * @sdev: device to be detached from 1125 */ 1126 static void alua_bus_detach(struct scsi_device *sdev) 1127 { 1128 struct alua_dh_data *h = sdev->handler_data; 1129 struct alua_port_group *pg; 1130 1131 spin_lock(&h->pg_lock); 1132 pg = rcu_dereference_protected(h->pg, lockdep_is_held(&h->pg_lock)); 1133 rcu_assign_pointer(h->pg, NULL); 1134 h->sdev = NULL; 1135 spin_unlock(&h->pg_lock); 1136 if (pg) { 1137 spin_lock_irq(&pg->lock); 1138 list_del_rcu(&h->node); 1139 spin_unlock_irq(&pg->lock); 1140 kref_put(&pg->kref, release_port_group); 1141 } 1142 sdev->handler_data = NULL; 1143 kfree(h); 1144 } 1145 1146 static struct scsi_device_handler alua_dh = { 1147 .name = ALUA_DH_NAME, 1148 .module = THIS_MODULE, 1149 .attach = alua_bus_attach, 1150 .detach = alua_bus_detach, 1151 .prep_fn = alua_prep_fn, 1152 .check_sense = alua_check_sense, 1153 .activate = alua_activate, 1154 .rescan = alua_rescan, 1155 .set_params = alua_set_params, 1156 }; 1157 1158 static int __init alua_init(void) 1159 { 1160 int r; 1161 1162 kaluad_wq = alloc_workqueue("kaluad", WQ_MEM_RECLAIM, 0); 1163 if (!kaluad_wq) 1164 return -ENOMEM; 1165 1166 r = scsi_register_device_handler(&alua_dh); 1167 if (r != 0) { 1168 printk(KERN_ERR "%s: Failed to register scsi device handler", 1169 ALUA_DH_NAME); 1170 destroy_workqueue(kaluad_wq); 1171 } 1172 return r; 1173 } 1174 1175 static void __exit alua_exit(void) 1176 { 1177 scsi_unregister_device_handler(&alua_dh); 1178 destroy_workqueue(kaluad_wq); 1179 } 1180 1181 module_init(alua_init); 1182 module_exit(alua_exit); 1183 1184 MODULE_DESCRIPTION("DM Multipath ALUA support"); 1185 MODULE_AUTHOR("Hannes Reinecke <hare@suse.de>"); 1186 MODULE_LICENSE("GPL"); 1187 MODULE_VERSION(ALUA_DH_VER); 1188