1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * libata-eh.c - libata error handling 4 * 5 * Copyright 2006 Tejun Heo <htejun@gmail.com> 6 * 7 * libata documentation is available via 'make {ps|pdf}docs', 8 * as Documentation/driver-api/libata.rst 9 * 10 * Hardware documentation available from http://www.t13.org/ and 11 * http://www.sata-io.org/ 12 */ 13 14 #include <linux/kernel.h> 15 #include <linux/blkdev.h> 16 #include <linux/export.h> 17 #include <linux/pci.h> 18 #include <scsi/scsi.h> 19 #include <scsi/scsi_host.h> 20 #include <scsi/scsi_eh.h> 21 #include <scsi/scsi_device.h> 22 #include <scsi/scsi_cmnd.h> 23 #include <scsi/scsi_dbg.h> 24 #include "../scsi/scsi_transport_api.h" 25 26 #include <linux/libata.h> 27 28 #include <trace/events/libata.h> 29 #include "libata.h" 30 31 enum { 32 /* speed down verdicts */ 33 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 34 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 35 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 36 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 37 38 /* error flags */ 39 ATA_EFLAG_IS_IO = (1 << 0), 40 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 41 ATA_EFLAG_OLD_ER = (1 << 31), 42 43 /* error categories */ 44 ATA_ECAT_NONE = 0, 45 ATA_ECAT_ATA_BUS = 1, 46 ATA_ECAT_TOUT_HSM = 2, 47 ATA_ECAT_UNK_DEV = 3, 48 ATA_ECAT_DUBIOUS_NONE = 4, 49 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 50 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 51 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 52 ATA_ECAT_NR = 8, 53 54 ATA_EH_CMD_DFL_TIMEOUT = 5000, 55 56 /* always put at least this amount of time between resets */ 57 ATA_EH_RESET_COOL_DOWN = 5000, 58 59 /* Waiting in ->prereset can never be reliable. It's 60 * sometimes nice to wait there but it can't be depended upon; 61 * otherwise, we wouldn't be resetting. Just give it enough 62 * time for most drives to spin up. 63 */ 64 ATA_EH_PRERESET_TIMEOUT = 10000, 65 ATA_EH_FASTDRAIN_INTERVAL = 3000, 66 67 ATA_EH_UA_TRIES = 5, 68 69 /* probe speed down parameters, see ata_eh_schedule_probe() */ 70 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 71 ATA_EH_PROBE_TRIALS = 2, 72 }; 73 74 /* The following table determines how we sequence resets. Each entry 75 * represents timeout for that try. The first try can be soft or 76 * hardreset. All others are hardreset if available. In most cases 77 * the first reset w/ 10sec timeout should succeed. Following entries 78 * are mostly for error handling, hotplug and those outlier devices that 79 * take an exceptionally long time to recover from reset. 80 */ 81 static const unsigned int ata_eh_reset_timeouts[] = { 82 10000, /* most drives spin up by 10sec */ 83 10000, /* > 99% working drives spin up before 20sec */ 84 35000, /* give > 30 secs of idleness for outlier devices */ 85 5000, /* and sweet one last chance */ 86 UINT_MAX, /* > 1 min has elapsed, give up */ 87 }; 88 89 static const unsigned int ata_eh_identify_timeouts[] = { 90 5000, /* covers > 99% of successes and not too boring on failures */ 91 10000, /* combined time till here is enough even for media access */ 92 30000, /* for true idiots */ 93 UINT_MAX, 94 }; 95 96 static const unsigned int ata_eh_revalidate_timeouts[] = { 97 15000, /* Some drives are slow to read log pages when waking-up */ 98 15000, /* combined time till here is enough even for media access */ 99 UINT_MAX, 100 }; 101 102 static const unsigned int ata_eh_flush_timeouts[] = { 103 15000, /* be generous with flush */ 104 15000, /* ditto */ 105 30000, /* and even more generous */ 106 UINT_MAX, 107 }; 108 109 static const unsigned int ata_eh_other_timeouts[] = { 110 5000, /* same rationale as identify timeout */ 111 10000, /* ditto */ 112 /* but no merciful 30sec for other commands, it just isn't worth it */ 113 UINT_MAX, 114 }; 115 116 struct ata_eh_cmd_timeout_ent { 117 const u8 *commands; 118 const unsigned int *timeouts; 119 }; 120 121 /* The following table determines timeouts to use for EH internal 122 * commands. Each table entry is a command class and matches the 123 * commands the entry applies to and the timeout table to use. 124 * 125 * On the retry after a command timed out, the next timeout value from 126 * the table is used. If the table doesn't contain further entries, 127 * the last value is used. 128 * 129 * ehc->cmd_timeout_idx keeps track of which timeout to use per 130 * command class, so if SET_FEATURES times out on the first try, the 131 * next try will use the second timeout value only for that class. 132 */ 133 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 134 static const struct ata_eh_cmd_timeout_ent 135 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 136 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 137 .timeouts = ata_eh_identify_timeouts, }, 138 { .commands = CMDS(ATA_CMD_READ_LOG_EXT, ATA_CMD_READ_LOG_DMA_EXT), 139 .timeouts = ata_eh_revalidate_timeouts, }, 140 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 141 .timeouts = ata_eh_other_timeouts, }, 142 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 143 .timeouts = ata_eh_other_timeouts, }, 144 { .commands = CMDS(ATA_CMD_SET_FEATURES), 145 .timeouts = ata_eh_other_timeouts, }, 146 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 147 .timeouts = ata_eh_other_timeouts, }, 148 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), 149 .timeouts = ata_eh_flush_timeouts }, 150 { .commands = CMDS(ATA_CMD_VERIFY), 151 .timeouts = ata_eh_reset_timeouts }, 152 }; 153 #undef CMDS 154 155 static void __ata_port_freeze(struct ata_port *ap); 156 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, 157 struct ata_device **r_failed_dev); 158 #ifdef CONFIG_PM 159 static void ata_eh_handle_port_suspend(struct ata_port *ap); 160 static void ata_eh_handle_port_resume(struct ata_port *ap); 161 #else /* CONFIG_PM */ 162 static void ata_eh_handle_port_suspend(struct ata_port *ap) 163 { } 164 165 static void ata_eh_handle_port_resume(struct ata_port *ap) 166 { } 167 #endif /* CONFIG_PM */ 168 169 static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, 170 const char *fmt, va_list args) 171 { 172 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 173 ATA_EH_DESC_LEN - ehi->desc_len, 174 fmt, args); 175 } 176 177 /** 178 * __ata_ehi_push_desc - push error description without adding separator 179 * @ehi: target EHI 180 * @fmt: printf format string 181 * 182 * Format string according to @fmt and append it to @ehi->desc. 183 * 184 * LOCKING: 185 * spin_lock_irqsave(host lock) 186 */ 187 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 188 { 189 va_list args; 190 191 va_start(args, fmt); 192 __ata_ehi_pushv_desc(ehi, fmt, args); 193 va_end(args); 194 } 195 EXPORT_SYMBOL_GPL(__ata_ehi_push_desc); 196 197 /** 198 * ata_ehi_push_desc - push error description with separator 199 * @ehi: target EHI 200 * @fmt: printf format string 201 * 202 * Format string according to @fmt and append it to @ehi->desc. 203 * If @ehi->desc is not empty, ", " is added in-between. 204 * 205 * LOCKING: 206 * spin_lock_irqsave(host lock) 207 */ 208 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 209 { 210 va_list args; 211 212 if (ehi->desc_len) 213 __ata_ehi_push_desc(ehi, ", "); 214 215 va_start(args, fmt); 216 __ata_ehi_pushv_desc(ehi, fmt, args); 217 va_end(args); 218 } 219 EXPORT_SYMBOL_GPL(ata_ehi_push_desc); 220 221 /** 222 * ata_ehi_clear_desc - clean error description 223 * @ehi: target EHI 224 * 225 * Clear @ehi->desc. 226 * 227 * LOCKING: 228 * spin_lock_irqsave(host lock) 229 */ 230 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 231 { 232 ehi->desc[0] = '\0'; 233 ehi->desc_len = 0; 234 } 235 EXPORT_SYMBOL_GPL(ata_ehi_clear_desc); 236 237 /** 238 * ata_port_desc - append port description 239 * @ap: target ATA port 240 * @fmt: printf format string 241 * 242 * Format string according to @fmt and append it to port 243 * description. If port description is not empty, " " is added 244 * in-between. This function is to be used while initializing 245 * ata_host. The description is printed on host registration. 246 * 247 * LOCKING: 248 * None. 249 */ 250 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 251 { 252 va_list args; 253 254 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 255 256 if (ap->link.eh_info.desc_len) 257 __ata_ehi_push_desc(&ap->link.eh_info, " "); 258 259 va_start(args, fmt); 260 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 261 va_end(args); 262 } 263 EXPORT_SYMBOL_GPL(ata_port_desc); 264 265 #ifdef CONFIG_PCI 266 /** 267 * ata_port_pbar_desc - append PCI BAR description 268 * @ap: target ATA port 269 * @bar: target PCI BAR 270 * @offset: offset into PCI BAR 271 * @name: name of the area 272 * 273 * If @offset is negative, this function formats a string which 274 * contains the name, address, size and type of the BAR and 275 * appends it to the port description. If @offset is zero or 276 * positive, only name and offsetted address is appended. 277 * 278 * LOCKING: 279 * None. 280 */ 281 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 282 const char *name) 283 { 284 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 285 char *type = ""; 286 unsigned long long start, len; 287 288 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 289 type = "m"; 290 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 291 type = "i"; 292 293 start = (unsigned long long)pci_resource_start(pdev, bar); 294 len = (unsigned long long)pci_resource_len(pdev, bar); 295 296 if (offset < 0) 297 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 298 else 299 ata_port_desc(ap, "%s 0x%llx", name, 300 start + (unsigned long long)offset); 301 } 302 EXPORT_SYMBOL_GPL(ata_port_pbar_desc); 303 #endif /* CONFIG_PCI */ 304 305 static int ata_lookup_timeout_table(u8 cmd) 306 { 307 int i; 308 309 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 310 const u8 *cur; 311 312 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 313 if (*cur == cmd) 314 return i; 315 } 316 317 return -1; 318 } 319 320 /** 321 * ata_internal_cmd_timeout - determine timeout for an internal command 322 * @dev: target device 323 * @cmd: internal command to be issued 324 * 325 * Determine timeout for internal command @cmd for @dev. 326 * 327 * LOCKING: 328 * EH context. 329 * 330 * RETURNS: 331 * Determined timeout. 332 */ 333 unsigned int ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 334 { 335 struct ata_eh_context *ehc = &dev->link->eh_context; 336 int ent = ata_lookup_timeout_table(cmd); 337 int idx; 338 339 if (ent < 0) 340 return ATA_EH_CMD_DFL_TIMEOUT; 341 342 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 343 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 344 } 345 346 /** 347 * ata_internal_cmd_timed_out - notification for internal command timeout 348 * @dev: target device 349 * @cmd: internal command which timed out 350 * 351 * Notify EH that internal command @cmd for @dev timed out. This 352 * function should be called only for commands whose timeouts are 353 * determined using ata_internal_cmd_timeout(). 354 * 355 * LOCKING: 356 * EH context. 357 */ 358 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 359 { 360 struct ata_eh_context *ehc = &dev->link->eh_context; 361 int ent = ata_lookup_timeout_table(cmd); 362 int idx; 363 364 if (ent < 0) 365 return; 366 367 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 368 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != UINT_MAX) 369 ehc->cmd_timeout_idx[dev->devno][ent]++; 370 } 371 372 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 373 unsigned int err_mask) 374 { 375 struct ata_ering_entry *ent; 376 377 WARN_ON(!err_mask); 378 379 ering->cursor++; 380 ering->cursor %= ATA_ERING_SIZE; 381 382 ent = &ering->ring[ering->cursor]; 383 ent->eflags = eflags; 384 ent->err_mask = err_mask; 385 ent->timestamp = get_jiffies_64(); 386 } 387 388 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 389 { 390 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 391 392 if (ent->err_mask) 393 return ent; 394 return NULL; 395 } 396 397 int ata_ering_map(struct ata_ering *ering, 398 int (*map_fn)(struct ata_ering_entry *, void *), 399 void *arg) 400 { 401 int idx, rc = 0; 402 struct ata_ering_entry *ent; 403 404 idx = ering->cursor; 405 do { 406 ent = &ering->ring[idx]; 407 if (!ent->err_mask) 408 break; 409 rc = map_fn(ent, arg); 410 if (rc) 411 break; 412 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 413 } while (idx != ering->cursor); 414 415 return rc; 416 } 417 418 static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) 419 { 420 ent->eflags |= ATA_EFLAG_OLD_ER; 421 return 0; 422 } 423 424 static void ata_ering_clear(struct ata_ering *ering) 425 { 426 ata_ering_map(ering, ata_ering_clear_cb, NULL); 427 } 428 429 static unsigned int ata_eh_dev_action(struct ata_device *dev) 430 { 431 struct ata_eh_context *ehc = &dev->link->eh_context; 432 433 return ehc->i.action | ehc->i.dev_action[dev->devno]; 434 } 435 436 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 437 struct ata_eh_info *ehi, unsigned int action) 438 { 439 struct ata_device *tdev; 440 441 if (!dev) { 442 ehi->action &= ~action; 443 ata_for_each_dev(tdev, link, ALL) 444 ehi->dev_action[tdev->devno] &= ~action; 445 } else { 446 /* doesn't make sense for port-wide EH actions */ 447 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 448 449 /* break ehi->action into ehi->dev_action */ 450 if (ehi->action & action) { 451 ata_for_each_dev(tdev, link, ALL) 452 ehi->dev_action[tdev->devno] |= 453 ehi->action & action; 454 ehi->action &= ~action; 455 } 456 457 /* turn off the specified per-dev action */ 458 ehi->dev_action[dev->devno] &= ~action; 459 } 460 } 461 462 /** 463 * ata_eh_acquire - acquire EH ownership 464 * @ap: ATA port to acquire EH ownership for 465 * 466 * Acquire EH ownership for @ap. This is the basic exclusion 467 * mechanism for ports sharing a host. Only one port hanging off 468 * the same host can claim the ownership of EH. 469 * 470 * LOCKING: 471 * EH context. 472 */ 473 void ata_eh_acquire(struct ata_port *ap) 474 { 475 mutex_lock(&ap->host->eh_mutex); 476 WARN_ON_ONCE(ap->host->eh_owner); 477 ap->host->eh_owner = current; 478 } 479 480 /** 481 * ata_eh_release - release EH ownership 482 * @ap: ATA port to release EH ownership for 483 * 484 * Release EH ownership for @ap if the caller. The caller must 485 * have acquired EH ownership using ata_eh_acquire() previously. 486 * 487 * LOCKING: 488 * EH context. 489 */ 490 void ata_eh_release(struct ata_port *ap) 491 { 492 WARN_ON_ONCE(ap->host->eh_owner != current); 493 ap->host->eh_owner = NULL; 494 mutex_unlock(&ap->host->eh_mutex); 495 } 496 497 static void ata_eh_unload(struct ata_port *ap) 498 { 499 struct ata_link *link; 500 struct ata_device *dev; 501 unsigned long flags; 502 503 /* 504 * Unless we are restarting, transition all enabled devices to 505 * standby power mode. 506 */ 507 if (system_state != SYSTEM_RESTART) { 508 ata_for_each_link(link, ap, PMP_FIRST) { 509 ata_for_each_dev(dev, link, ENABLED) 510 ata_dev_power_set_standby(dev); 511 } 512 } 513 514 /* 515 * Restore SControl IPM and SPD for the next driver and 516 * disable attached devices. 517 */ 518 ata_for_each_link(link, ap, PMP_FIRST) { 519 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 520 ata_for_each_dev(dev, link, ALL) 521 ata_dev_disable(dev); 522 } 523 524 /* freeze and set UNLOADED */ 525 spin_lock_irqsave(ap->lock, flags); 526 527 ata_port_freeze(ap); /* won't be thawed */ 528 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 529 ap->pflags |= ATA_PFLAG_UNLOADED; 530 531 spin_unlock_irqrestore(ap->lock, flags); 532 } 533 534 /** 535 * ata_scsi_error - SCSI layer error handler callback 536 * @host: SCSI host on which error occurred 537 * 538 * Handles SCSI-layer-thrown error events. 539 * 540 * LOCKING: 541 * Inherited from SCSI layer (none, can sleep) 542 * 543 * RETURNS: 544 * Zero. 545 */ 546 void ata_scsi_error(struct Scsi_Host *host) 547 { 548 struct ata_port *ap = ata_shost_to_port(host); 549 unsigned long flags; 550 LIST_HEAD(eh_work_q); 551 552 spin_lock_irqsave(host->host_lock, flags); 553 list_splice_init(&host->eh_cmd_q, &eh_work_q); 554 spin_unlock_irqrestore(host->host_lock, flags); 555 556 ata_scsi_cmd_error_handler(host, ap, &eh_work_q); 557 558 /* If we timed raced normal completion and there is nothing to 559 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 560 ata_scsi_port_error_handler(host, ap); 561 562 /* finish or retry handled scmd's and clean up */ 563 WARN_ON(!list_empty(&eh_work_q)); 564 565 } 566 567 /** 568 * ata_scsi_cmd_error_handler - error callback for a list of commands 569 * @host: scsi host containing the port 570 * @ap: ATA port within the host 571 * @eh_work_q: list of commands to process 572 * 573 * process the given list of commands and return those finished to the 574 * ap->eh_done_q. This function is the first part of the libata error 575 * handler which processes a given list of failed commands. 576 */ 577 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, 578 struct list_head *eh_work_q) 579 { 580 int i; 581 unsigned long flags; 582 struct scsi_cmnd *scmd, *tmp; 583 int nr_timedout = 0; 584 585 /* make sure sff pio task is not running */ 586 ata_sff_flush_pio_task(ap); 587 588 /* synchronize with host lock and sort out timeouts */ 589 590 /* 591 * For EH, all qcs are finished in one of three ways - 592 * normal completion, error completion, and SCSI timeout. 593 * Both completions can race against SCSI timeout. When normal 594 * completion wins, the qc never reaches EH. When error 595 * completion wins, the qc has ATA_QCFLAG_EH set. 596 * 597 * When SCSI timeout wins, things are a bit more complex. 598 * Normal or error completion can occur after the timeout but 599 * before this point. In such cases, both types of 600 * completions are honored. A scmd is determined to have 601 * timed out iff its associated qc is active and not failed. 602 */ 603 spin_lock_irqsave(ap->lock, flags); 604 605 /* 606 * This must occur under the ap->lock as we don't want 607 * a polled recovery to race the real interrupt handler 608 * 609 * The lost_interrupt handler checks for any completed but 610 * non-notified command and completes much like an IRQ handler. 611 * 612 * We then fall into the error recovery code which will treat 613 * this as if normal completion won the race 614 */ 615 if (ap->ops->lost_interrupt) 616 ap->ops->lost_interrupt(ap); 617 618 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { 619 struct ata_queued_cmd *qc; 620 621 ata_qc_for_each_raw(ap, qc, i) { 622 if (qc->flags & ATA_QCFLAG_ACTIVE && 623 qc->scsicmd == scmd) 624 break; 625 } 626 627 if (i < ATA_MAX_QUEUE) { 628 /* the scmd has an associated qc */ 629 if (!(qc->flags & ATA_QCFLAG_EH)) { 630 /* which hasn't failed yet, timeout */ 631 qc->err_mask |= AC_ERR_TIMEOUT; 632 qc->flags |= ATA_QCFLAG_EH; 633 nr_timedout++; 634 } 635 } else { 636 /* Normal completion occurred after 637 * SCSI timeout but before this point. 638 * Successfully complete it. 639 */ 640 scmd->retries = scmd->allowed; 641 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 642 } 643 } 644 645 /* 646 * If we have timed out qcs. They belong to EH from 647 * this point but the state of the controller is 648 * unknown. Freeze the port to make sure the IRQ 649 * handler doesn't diddle with those qcs. This must 650 * be done atomically w.r.t. setting ATA_QCFLAG_EH. 651 */ 652 if (nr_timedout) 653 __ata_port_freeze(ap); 654 655 /* initialize eh_tries */ 656 ap->eh_tries = ATA_EH_MAX_TRIES; 657 658 spin_unlock_irqrestore(ap->lock, flags); 659 } 660 EXPORT_SYMBOL(ata_scsi_cmd_error_handler); 661 662 /** 663 * ata_scsi_port_error_handler - recover the port after the commands 664 * @host: SCSI host containing the port 665 * @ap: the ATA port 666 * 667 * Handle the recovery of the port @ap after all the commands 668 * have been recovered. 669 */ 670 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) 671 { 672 unsigned long flags; 673 struct ata_link *link; 674 675 /* acquire EH ownership */ 676 ata_eh_acquire(ap); 677 repeat: 678 /* kill fast drain timer */ 679 del_timer_sync(&ap->fastdrain_timer); 680 681 /* process port resume request */ 682 ata_eh_handle_port_resume(ap); 683 684 /* fetch & clear EH info */ 685 spin_lock_irqsave(ap->lock, flags); 686 687 ata_for_each_link(link, ap, HOST_FIRST) { 688 struct ata_eh_context *ehc = &link->eh_context; 689 struct ata_device *dev; 690 691 memset(&link->eh_context, 0, sizeof(link->eh_context)); 692 link->eh_context.i = link->eh_info; 693 memset(&link->eh_info, 0, sizeof(link->eh_info)); 694 695 ata_for_each_dev(dev, link, ENABLED) { 696 int devno = dev->devno; 697 698 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 699 if (ata_ncq_enabled(dev)) 700 ehc->saved_ncq_enabled |= 1 << devno; 701 702 /* If we are resuming, wake up the device */ 703 if (ap->pflags & ATA_PFLAG_RESUMING) { 704 dev->flags |= ATA_DFLAG_RESUMING; 705 ehc->i.dev_action[devno] |= ATA_EH_SET_ACTIVE; 706 } 707 } 708 } 709 710 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 711 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 712 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 713 714 spin_unlock_irqrestore(ap->lock, flags); 715 716 /* invoke EH, skip if unloading or suspended */ 717 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 718 ap->ops->error_handler(ap); 719 else { 720 /* if unloading, commence suicide */ 721 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 722 !(ap->pflags & ATA_PFLAG_UNLOADED)) 723 ata_eh_unload(ap); 724 ata_eh_finish(ap); 725 } 726 727 /* process port suspend request */ 728 ata_eh_handle_port_suspend(ap); 729 730 /* 731 * Exception might have happened after ->error_handler recovered the 732 * port but before this point. Repeat EH in such case. 733 */ 734 spin_lock_irqsave(ap->lock, flags); 735 736 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 737 if (--ap->eh_tries) { 738 spin_unlock_irqrestore(ap->lock, flags); 739 goto repeat; 740 } 741 ata_port_err(ap, 742 "EH pending after %d tries, giving up\n", 743 ATA_EH_MAX_TRIES); 744 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 745 } 746 747 /* this run is complete, make sure EH info is clear */ 748 ata_for_each_link(link, ap, HOST_FIRST) 749 memset(&link->eh_info, 0, sizeof(link->eh_info)); 750 751 /* 752 * end eh (clear host_eh_scheduled) while holding ap->lock such that if 753 * exception occurs after this point but before EH completion, SCSI 754 * midlayer will re-initiate EH. 755 */ 756 ap->ops->end_eh(ap); 757 758 spin_unlock_irqrestore(ap->lock, flags); 759 ata_eh_release(ap); 760 761 scsi_eh_flush_done_q(&ap->eh_done_q); 762 763 /* clean up */ 764 spin_lock_irqsave(ap->lock, flags); 765 766 ap->pflags &= ~ATA_PFLAG_RESUMING; 767 768 if (ap->pflags & ATA_PFLAG_LOADING) 769 ap->pflags &= ~ATA_PFLAG_LOADING; 770 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && 771 !(ap->flags & ATA_FLAG_SAS_HOST)) 772 schedule_delayed_work(&ap->hotplug_task, 0); 773 774 if (ap->pflags & ATA_PFLAG_RECOVERED) 775 ata_port_info(ap, "EH complete\n"); 776 777 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 778 779 /* tell wait_eh that we're done */ 780 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 781 wake_up_all(&ap->eh_wait_q); 782 783 spin_unlock_irqrestore(ap->lock, flags); 784 } 785 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler); 786 787 /** 788 * ata_port_wait_eh - Wait for the currently pending EH to complete 789 * @ap: Port to wait EH for 790 * 791 * Wait until the currently pending EH is complete. 792 * 793 * LOCKING: 794 * Kernel thread context (may sleep). 795 */ 796 void ata_port_wait_eh(struct ata_port *ap) 797 { 798 unsigned long flags; 799 DEFINE_WAIT(wait); 800 801 retry: 802 spin_lock_irqsave(ap->lock, flags); 803 804 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 805 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 806 spin_unlock_irqrestore(ap->lock, flags); 807 schedule(); 808 spin_lock_irqsave(ap->lock, flags); 809 } 810 finish_wait(&ap->eh_wait_q, &wait); 811 812 spin_unlock_irqrestore(ap->lock, flags); 813 814 /* make sure SCSI EH is complete */ 815 if (scsi_host_in_recovery(ap->scsi_host)) { 816 ata_msleep(ap, 10); 817 goto retry; 818 } 819 } 820 EXPORT_SYMBOL_GPL(ata_port_wait_eh); 821 822 static unsigned int ata_eh_nr_in_flight(struct ata_port *ap) 823 { 824 struct ata_queued_cmd *qc; 825 unsigned int tag; 826 unsigned int nr = 0; 827 828 /* count only non-internal commands */ 829 ata_qc_for_each(ap, qc, tag) { 830 if (qc) 831 nr++; 832 } 833 834 return nr; 835 } 836 837 void ata_eh_fastdrain_timerfn(struct timer_list *t) 838 { 839 struct ata_port *ap = from_timer(ap, t, fastdrain_timer); 840 unsigned long flags; 841 unsigned int cnt; 842 843 spin_lock_irqsave(ap->lock, flags); 844 845 cnt = ata_eh_nr_in_flight(ap); 846 847 /* are we done? */ 848 if (!cnt) 849 goto out_unlock; 850 851 if (cnt == ap->fastdrain_cnt) { 852 struct ata_queued_cmd *qc; 853 unsigned int tag; 854 855 /* No progress during the last interval, tag all 856 * in-flight qcs as timed out and freeze the port. 857 */ 858 ata_qc_for_each(ap, qc, tag) { 859 if (qc) 860 qc->err_mask |= AC_ERR_TIMEOUT; 861 } 862 863 ata_port_freeze(ap); 864 } else { 865 /* some qcs have finished, give it another chance */ 866 ap->fastdrain_cnt = cnt; 867 ap->fastdrain_timer.expires = 868 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 869 add_timer(&ap->fastdrain_timer); 870 } 871 872 out_unlock: 873 spin_unlock_irqrestore(ap->lock, flags); 874 } 875 876 /** 877 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 878 * @ap: target ATA port 879 * @fastdrain: activate fast drain 880 * 881 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 882 * is non-zero and EH wasn't pending before. Fast drain ensures 883 * that EH kicks in in timely manner. 884 * 885 * LOCKING: 886 * spin_lock_irqsave(host lock) 887 */ 888 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 889 { 890 unsigned int cnt; 891 892 /* already scheduled? */ 893 if (ap->pflags & ATA_PFLAG_EH_PENDING) 894 return; 895 896 ap->pflags |= ATA_PFLAG_EH_PENDING; 897 898 if (!fastdrain) 899 return; 900 901 /* do we have in-flight qcs? */ 902 cnt = ata_eh_nr_in_flight(ap); 903 if (!cnt) 904 return; 905 906 /* activate fast drain */ 907 ap->fastdrain_cnt = cnt; 908 ap->fastdrain_timer.expires = 909 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 910 add_timer(&ap->fastdrain_timer); 911 } 912 913 /** 914 * ata_qc_schedule_eh - schedule qc for error handling 915 * @qc: command to schedule error handling for 916 * 917 * Schedule error handling for @qc. EH will kick in as soon as 918 * other commands are drained. 919 * 920 * LOCKING: 921 * spin_lock_irqsave(host lock) 922 */ 923 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 924 { 925 struct ata_port *ap = qc->ap; 926 927 qc->flags |= ATA_QCFLAG_EH; 928 ata_eh_set_pending(ap, 1); 929 930 /* The following will fail if timeout has already expired. 931 * ata_scsi_error() takes care of such scmds on EH entry. 932 * Note that ATA_QCFLAG_EH is unconditionally set after 933 * this function completes. 934 */ 935 blk_abort_request(scsi_cmd_to_rq(qc->scsicmd)); 936 } 937 938 /** 939 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine 940 * @ap: ATA port to schedule EH for 941 * 942 * LOCKING: inherited from ata_port_schedule_eh 943 * spin_lock_irqsave(host lock) 944 */ 945 void ata_std_sched_eh(struct ata_port *ap) 946 { 947 if (ap->pflags & ATA_PFLAG_INITIALIZING) 948 return; 949 950 ata_eh_set_pending(ap, 1); 951 scsi_schedule_eh(ap->scsi_host); 952 953 trace_ata_std_sched_eh(ap); 954 } 955 EXPORT_SYMBOL_GPL(ata_std_sched_eh); 956 957 /** 958 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine 959 * @ap: ATA port to end EH for 960 * 961 * In the libata object model there is a 1:1 mapping of ata_port to 962 * shost, so host fields can be directly manipulated under ap->lock, in 963 * the libsas case we need to hold a lock at the ha->level to coordinate 964 * these events. 965 * 966 * LOCKING: 967 * spin_lock_irqsave(host lock) 968 */ 969 void ata_std_end_eh(struct ata_port *ap) 970 { 971 struct Scsi_Host *host = ap->scsi_host; 972 973 host->host_eh_scheduled = 0; 974 } 975 EXPORT_SYMBOL(ata_std_end_eh); 976 977 978 /** 979 * ata_port_schedule_eh - schedule error handling without a qc 980 * @ap: ATA port to schedule EH for 981 * 982 * Schedule error handling for @ap. EH will kick in as soon as 983 * all commands are drained. 984 * 985 * LOCKING: 986 * spin_lock_irqsave(host lock) 987 */ 988 void ata_port_schedule_eh(struct ata_port *ap) 989 { 990 /* see: ata_std_sched_eh, unless you know better */ 991 ap->ops->sched_eh(ap); 992 } 993 EXPORT_SYMBOL_GPL(ata_port_schedule_eh); 994 995 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 996 { 997 struct ata_queued_cmd *qc; 998 int tag, nr_aborted = 0; 999 1000 /* we're gonna abort all commands, no need for fast drain */ 1001 ata_eh_set_pending(ap, 0); 1002 1003 /* include internal tag in iteration */ 1004 ata_qc_for_each_with_internal(ap, qc, tag) { 1005 if (qc && (!link || qc->dev->link == link)) { 1006 qc->flags |= ATA_QCFLAG_EH; 1007 ata_qc_complete(qc); 1008 nr_aborted++; 1009 } 1010 } 1011 1012 if (!nr_aborted) 1013 ata_port_schedule_eh(ap); 1014 1015 return nr_aborted; 1016 } 1017 1018 /** 1019 * ata_link_abort - abort all qc's on the link 1020 * @link: ATA link to abort qc's for 1021 * 1022 * Abort all active qc's active on @link and schedule EH. 1023 * 1024 * LOCKING: 1025 * spin_lock_irqsave(host lock) 1026 * 1027 * RETURNS: 1028 * Number of aborted qc's. 1029 */ 1030 int ata_link_abort(struct ata_link *link) 1031 { 1032 return ata_do_link_abort(link->ap, link); 1033 } 1034 EXPORT_SYMBOL_GPL(ata_link_abort); 1035 1036 /** 1037 * ata_port_abort - abort all qc's on the port 1038 * @ap: ATA port to abort qc's for 1039 * 1040 * Abort all active qc's of @ap and schedule EH. 1041 * 1042 * LOCKING: 1043 * spin_lock_irqsave(host_set lock) 1044 * 1045 * RETURNS: 1046 * Number of aborted qc's. 1047 */ 1048 int ata_port_abort(struct ata_port *ap) 1049 { 1050 return ata_do_link_abort(ap, NULL); 1051 } 1052 EXPORT_SYMBOL_GPL(ata_port_abort); 1053 1054 /** 1055 * __ata_port_freeze - freeze port 1056 * @ap: ATA port to freeze 1057 * 1058 * This function is called when HSM violation or some other 1059 * condition disrupts normal operation of the port. Frozen port 1060 * is not allowed to perform any operation until the port is 1061 * thawed, which usually follows a successful reset. 1062 * 1063 * ap->ops->freeze() callback can be used for freezing the port 1064 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 1065 * port cannot be frozen hardware-wise, the interrupt handler 1066 * must ack and clear interrupts unconditionally while the port 1067 * is frozen. 1068 * 1069 * LOCKING: 1070 * spin_lock_irqsave(host lock) 1071 */ 1072 static void __ata_port_freeze(struct ata_port *ap) 1073 { 1074 if (ap->ops->freeze) 1075 ap->ops->freeze(ap); 1076 1077 ap->pflags |= ATA_PFLAG_FROZEN; 1078 1079 trace_ata_port_freeze(ap); 1080 } 1081 1082 /** 1083 * ata_port_freeze - abort & freeze port 1084 * @ap: ATA port to freeze 1085 * 1086 * Abort and freeze @ap. The freeze operation must be called 1087 * first, because some hardware requires special operations 1088 * before the taskfile registers are accessible. 1089 * 1090 * LOCKING: 1091 * spin_lock_irqsave(host lock) 1092 * 1093 * RETURNS: 1094 * Number of aborted commands. 1095 */ 1096 int ata_port_freeze(struct ata_port *ap) 1097 { 1098 __ata_port_freeze(ap); 1099 1100 return ata_port_abort(ap); 1101 } 1102 EXPORT_SYMBOL_GPL(ata_port_freeze); 1103 1104 /** 1105 * ata_eh_freeze_port - EH helper to freeze port 1106 * @ap: ATA port to freeze 1107 * 1108 * Freeze @ap. 1109 * 1110 * LOCKING: 1111 * None. 1112 */ 1113 void ata_eh_freeze_port(struct ata_port *ap) 1114 { 1115 unsigned long flags; 1116 1117 spin_lock_irqsave(ap->lock, flags); 1118 __ata_port_freeze(ap); 1119 spin_unlock_irqrestore(ap->lock, flags); 1120 } 1121 EXPORT_SYMBOL_GPL(ata_eh_freeze_port); 1122 1123 /** 1124 * ata_eh_thaw_port - EH helper to thaw port 1125 * @ap: ATA port to thaw 1126 * 1127 * Thaw frozen port @ap. 1128 * 1129 * LOCKING: 1130 * None. 1131 */ 1132 void ata_eh_thaw_port(struct ata_port *ap) 1133 { 1134 unsigned long flags; 1135 1136 spin_lock_irqsave(ap->lock, flags); 1137 1138 ap->pflags &= ~ATA_PFLAG_FROZEN; 1139 1140 if (ap->ops->thaw) 1141 ap->ops->thaw(ap); 1142 1143 spin_unlock_irqrestore(ap->lock, flags); 1144 1145 trace_ata_port_thaw(ap); 1146 } 1147 1148 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1149 { 1150 /* nada */ 1151 } 1152 1153 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1154 { 1155 struct ata_port *ap = qc->ap; 1156 struct scsi_cmnd *scmd = qc->scsicmd; 1157 unsigned long flags; 1158 1159 spin_lock_irqsave(ap->lock, flags); 1160 qc->scsidone = ata_eh_scsidone; 1161 __ata_qc_complete(qc); 1162 WARN_ON(ata_tag_valid(qc->tag)); 1163 spin_unlock_irqrestore(ap->lock, flags); 1164 1165 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1166 } 1167 1168 /** 1169 * ata_eh_qc_complete - Complete an active ATA command from EH 1170 * @qc: Command to complete 1171 * 1172 * Indicate to the mid and upper layers that an ATA command has 1173 * completed. To be used from EH. 1174 */ 1175 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1176 { 1177 struct scsi_cmnd *scmd = qc->scsicmd; 1178 scmd->retries = scmd->allowed; 1179 __ata_eh_qc_complete(qc); 1180 } 1181 1182 /** 1183 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1184 * @qc: Command to retry 1185 * 1186 * Indicate to the mid and upper layers that an ATA command 1187 * should be retried. To be used from EH. 1188 * 1189 * SCSI midlayer limits the number of retries to scmd->allowed. 1190 * scmd->allowed is incremented for commands which get retried 1191 * due to unrelated failures (qc->err_mask is zero). 1192 */ 1193 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1194 { 1195 struct scsi_cmnd *scmd = qc->scsicmd; 1196 if (!qc->err_mask) 1197 scmd->allowed++; 1198 __ata_eh_qc_complete(qc); 1199 } 1200 1201 /** 1202 * ata_dev_disable - disable ATA device 1203 * @dev: ATA device to disable 1204 * 1205 * Disable @dev. 1206 * 1207 * Locking: 1208 * EH context. 1209 */ 1210 void ata_dev_disable(struct ata_device *dev) 1211 { 1212 if (!ata_dev_enabled(dev)) 1213 return; 1214 1215 ata_dev_warn(dev, "disable device\n"); 1216 ata_acpi_on_disable(dev); 1217 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1218 dev->class++; 1219 1220 /* From now till the next successful probe, ering is used to 1221 * track probe failures. Clear accumulated device error info. 1222 */ 1223 ata_ering_clear(&dev->ering); 1224 } 1225 EXPORT_SYMBOL_GPL(ata_dev_disable); 1226 1227 /** 1228 * ata_eh_detach_dev - detach ATA device 1229 * @dev: ATA device to detach 1230 * 1231 * Detach @dev. 1232 * 1233 * LOCKING: 1234 * None. 1235 */ 1236 void ata_eh_detach_dev(struct ata_device *dev) 1237 { 1238 struct ata_link *link = dev->link; 1239 struct ata_port *ap = link->ap; 1240 struct ata_eh_context *ehc = &link->eh_context; 1241 unsigned long flags; 1242 1243 /* 1244 * If the device is still enabled, transition it to standby power mode 1245 * (i.e. spin down HDDs). 1246 */ 1247 if (ata_dev_enabled(dev)) 1248 ata_dev_power_set_standby(dev); 1249 1250 ata_dev_disable(dev); 1251 1252 spin_lock_irqsave(ap->lock, flags); 1253 1254 dev->flags &= ~ATA_DFLAG_DETACH; 1255 1256 if (ata_scsi_offline_dev(dev)) { 1257 dev->flags |= ATA_DFLAG_DETACHED; 1258 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1259 } 1260 1261 /* clear per-dev EH info */ 1262 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1263 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1264 ehc->saved_xfer_mode[dev->devno] = 0; 1265 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1266 1267 spin_unlock_irqrestore(ap->lock, flags); 1268 } 1269 1270 /** 1271 * ata_eh_about_to_do - about to perform eh_action 1272 * @link: target ATA link 1273 * @dev: target ATA dev for per-dev action (can be NULL) 1274 * @action: action about to be performed 1275 * 1276 * Called just before performing EH actions to clear related bits 1277 * in @link->eh_info such that eh actions are not unnecessarily 1278 * repeated. 1279 * 1280 * LOCKING: 1281 * None. 1282 */ 1283 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1284 unsigned int action) 1285 { 1286 struct ata_port *ap = link->ap; 1287 struct ata_eh_info *ehi = &link->eh_info; 1288 struct ata_eh_context *ehc = &link->eh_context; 1289 unsigned long flags; 1290 1291 trace_ata_eh_about_to_do(link, dev ? dev->devno : 0, action); 1292 1293 spin_lock_irqsave(ap->lock, flags); 1294 1295 ata_eh_clear_action(link, dev, ehi, action); 1296 1297 /* About to take EH action, set RECOVERED. Ignore actions on 1298 * slave links as master will do them again. 1299 */ 1300 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1301 ap->pflags |= ATA_PFLAG_RECOVERED; 1302 1303 spin_unlock_irqrestore(ap->lock, flags); 1304 } 1305 1306 /** 1307 * ata_eh_done - EH action complete 1308 * @link: ATA link for which EH actions are complete 1309 * @dev: target ATA dev for per-dev action (can be NULL) 1310 * @action: action just completed 1311 * 1312 * Called right after performing EH actions to clear related bits 1313 * in @link->eh_context. 1314 * 1315 * LOCKING: 1316 * None. 1317 */ 1318 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1319 unsigned int action) 1320 { 1321 struct ata_eh_context *ehc = &link->eh_context; 1322 1323 trace_ata_eh_done(link, dev ? dev->devno : 0, action); 1324 1325 ata_eh_clear_action(link, dev, &ehc->i, action); 1326 } 1327 1328 /** 1329 * ata_err_string - convert err_mask to descriptive string 1330 * @err_mask: error mask to convert to string 1331 * 1332 * Convert @err_mask to descriptive string. Errors are 1333 * prioritized according to severity and only the most severe 1334 * error is reported. 1335 * 1336 * LOCKING: 1337 * None. 1338 * 1339 * RETURNS: 1340 * Descriptive string for @err_mask 1341 */ 1342 static const char *ata_err_string(unsigned int err_mask) 1343 { 1344 if (err_mask & AC_ERR_HOST_BUS) 1345 return "host bus error"; 1346 if (err_mask & AC_ERR_ATA_BUS) 1347 return "ATA bus error"; 1348 if (err_mask & AC_ERR_TIMEOUT) 1349 return "timeout"; 1350 if (err_mask & AC_ERR_HSM) 1351 return "HSM violation"; 1352 if (err_mask & AC_ERR_SYSTEM) 1353 return "internal error"; 1354 if (err_mask & AC_ERR_MEDIA) 1355 return "media error"; 1356 if (err_mask & AC_ERR_INVALID) 1357 return "invalid argument"; 1358 if (err_mask & AC_ERR_DEV) 1359 return "device error"; 1360 if (err_mask & AC_ERR_NCQ) 1361 return "NCQ error"; 1362 if (err_mask & AC_ERR_NODEV_HINT) 1363 return "Polling detection error"; 1364 return "unknown error"; 1365 } 1366 1367 /** 1368 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1369 * @dev: target ATAPI device 1370 * @r_sense_key: out parameter for sense_key 1371 * 1372 * Perform ATAPI TEST_UNIT_READY. 1373 * 1374 * LOCKING: 1375 * EH context (may sleep). 1376 * 1377 * RETURNS: 1378 * 0 on success, AC_ERR_* mask on failure. 1379 */ 1380 unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1381 { 1382 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1383 struct ata_taskfile tf; 1384 unsigned int err_mask; 1385 1386 ata_tf_init(dev, &tf); 1387 1388 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1389 tf.command = ATA_CMD_PACKET; 1390 tf.protocol = ATAPI_PROT_NODATA; 1391 1392 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1393 if (err_mask == AC_ERR_DEV) 1394 *r_sense_key = tf.error >> 4; 1395 return err_mask; 1396 } 1397 1398 /** 1399 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT 1400 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to 1401 * 1402 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK 1403 * SENSE. This function is an EH helper. 1404 * 1405 * LOCKING: 1406 * Kernel thread context (may sleep). 1407 * 1408 * RETURNS: 1409 * true if sense data could be fetched, false otherwise. 1410 */ 1411 static bool ata_eh_request_sense(struct ata_queued_cmd *qc) 1412 { 1413 struct scsi_cmnd *cmd = qc->scsicmd; 1414 struct ata_device *dev = qc->dev; 1415 struct ata_taskfile tf; 1416 unsigned int err_mask; 1417 1418 if (ata_port_is_frozen(qc->ap)) { 1419 ata_dev_warn(dev, "sense data available but port frozen\n"); 1420 return false; 1421 } 1422 1423 if (!ata_id_sense_reporting_enabled(dev->id)) { 1424 ata_dev_warn(qc->dev, "sense data reporting disabled\n"); 1425 return false; 1426 } 1427 1428 ata_tf_init(dev, &tf); 1429 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1430 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1431 tf.command = ATA_CMD_REQ_SENSE_DATA; 1432 tf.protocol = ATA_PROT_NODATA; 1433 1434 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 1435 /* Ignore err_mask; ATA_ERR might be set */ 1436 if (tf.status & ATA_SENSE) { 1437 if (ata_scsi_sense_is_valid(tf.lbah, tf.lbam, tf.lbal)) { 1438 /* Set sense without also setting scsicmd->result */ 1439 scsi_build_sense_buffer(dev->flags & ATA_DFLAG_D_SENSE, 1440 cmd->sense_buffer, tf.lbah, 1441 tf.lbam, tf.lbal); 1442 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1443 return true; 1444 } 1445 } else { 1446 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", 1447 tf.status, err_mask); 1448 } 1449 1450 return false; 1451 } 1452 1453 /** 1454 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1455 * @dev: device to perform REQUEST_SENSE to 1456 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1457 * @dfl_sense_key: default sense key to use 1458 * 1459 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1460 * SENSE. This function is EH helper. 1461 * 1462 * LOCKING: 1463 * Kernel thread context (may sleep). 1464 * 1465 * RETURNS: 1466 * 0 on success, AC_ERR_* mask on failure 1467 */ 1468 unsigned int atapi_eh_request_sense(struct ata_device *dev, 1469 u8 *sense_buf, u8 dfl_sense_key) 1470 { 1471 u8 cdb[ATAPI_CDB_LEN] = 1472 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1473 struct ata_port *ap = dev->link->ap; 1474 struct ata_taskfile tf; 1475 1476 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1477 1478 /* initialize sense_buf with the error register, 1479 * for the case where they are -not- overwritten 1480 */ 1481 sense_buf[0] = 0x70; 1482 sense_buf[2] = dfl_sense_key; 1483 1484 /* some devices time out if garbage left in tf */ 1485 ata_tf_init(dev, &tf); 1486 1487 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1488 tf.command = ATA_CMD_PACKET; 1489 1490 /* is it pointless to prefer PIO for "safety reasons"? */ 1491 if (ap->flags & ATA_FLAG_PIO_DMA) { 1492 tf.protocol = ATAPI_PROT_DMA; 1493 tf.feature |= ATAPI_PKT_DMA; 1494 } else { 1495 tf.protocol = ATAPI_PROT_PIO; 1496 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1497 tf.lbah = 0; 1498 } 1499 1500 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1501 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1502 } 1503 1504 /** 1505 * ata_eh_analyze_serror - analyze SError for a failed port 1506 * @link: ATA link to analyze SError for 1507 * 1508 * Analyze SError if available and further determine cause of 1509 * failure. 1510 * 1511 * LOCKING: 1512 * None. 1513 */ 1514 static void ata_eh_analyze_serror(struct ata_link *link) 1515 { 1516 struct ata_eh_context *ehc = &link->eh_context; 1517 u32 serror = ehc->i.serror; 1518 unsigned int err_mask = 0, action = 0; 1519 u32 hotplug_mask; 1520 1521 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1522 err_mask |= AC_ERR_ATA_BUS; 1523 action |= ATA_EH_RESET; 1524 } 1525 if (serror & SERR_PROTOCOL) { 1526 err_mask |= AC_ERR_HSM; 1527 action |= ATA_EH_RESET; 1528 } 1529 if (serror & SERR_INTERNAL) { 1530 err_mask |= AC_ERR_SYSTEM; 1531 action |= ATA_EH_RESET; 1532 } 1533 1534 /* Determine whether a hotplug event has occurred. Both 1535 * SError.N/X are considered hotplug events for enabled or 1536 * host links. For disabled PMP links, only N bit is 1537 * considered as X bit is left at 1 for link plugging. 1538 */ 1539 if (link->lpm_policy > ATA_LPM_MAX_POWER) 1540 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ 1541 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1542 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1543 else 1544 hotplug_mask = SERR_PHYRDY_CHG; 1545 1546 if (serror & hotplug_mask) 1547 ata_ehi_hotplugged(&ehc->i); 1548 1549 ehc->i.err_mask |= err_mask; 1550 ehc->i.action |= action; 1551 } 1552 1553 /** 1554 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1555 * @qc: qc to analyze 1556 * 1557 * Analyze taskfile of @qc and further determine cause of 1558 * failure. This function also requests ATAPI sense data if 1559 * available. 1560 * 1561 * LOCKING: 1562 * Kernel thread context (may sleep). 1563 * 1564 * RETURNS: 1565 * Determined recovery action 1566 */ 1567 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc) 1568 { 1569 const struct ata_taskfile *tf = &qc->result_tf; 1570 unsigned int tmp, action = 0; 1571 u8 stat = tf->status, err = tf->error; 1572 1573 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1574 qc->err_mask |= AC_ERR_HSM; 1575 return ATA_EH_RESET; 1576 } 1577 1578 if (stat & (ATA_ERR | ATA_DF)) { 1579 qc->err_mask |= AC_ERR_DEV; 1580 /* 1581 * Sense data reporting does not work if the 1582 * device fault bit is set. 1583 */ 1584 if (stat & ATA_DF) 1585 stat &= ~ATA_SENSE; 1586 } else { 1587 return 0; 1588 } 1589 1590 switch (qc->dev->class) { 1591 case ATA_DEV_ATA: 1592 case ATA_DEV_ZAC: 1593 /* 1594 * Fetch the sense data explicitly if: 1595 * -It was a non-NCQ command that failed, or 1596 * -It was a NCQ command that failed, but the sense data 1597 * was not included in the NCQ command error log 1598 * (i.e. NCQ autosense is not supported by the device). 1599 */ 1600 if (!(qc->flags & ATA_QCFLAG_SENSE_VALID) && 1601 (stat & ATA_SENSE) && ata_eh_request_sense(qc)) 1602 set_status_byte(qc->scsicmd, SAM_STAT_CHECK_CONDITION); 1603 if (err & ATA_ICRC) 1604 qc->err_mask |= AC_ERR_ATA_BUS; 1605 if (err & (ATA_UNC | ATA_AMNF)) 1606 qc->err_mask |= AC_ERR_MEDIA; 1607 if (err & ATA_IDNF) 1608 qc->err_mask |= AC_ERR_INVALID; 1609 break; 1610 1611 case ATA_DEV_ATAPI: 1612 if (!ata_port_is_frozen(qc->ap)) { 1613 tmp = atapi_eh_request_sense(qc->dev, 1614 qc->scsicmd->sense_buffer, 1615 qc->result_tf.error >> 4); 1616 if (!tmp) 1617 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1618 else 1619 qc->err_mask |= tmp; 1620 } 1621 } 1622 1623 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1624 enum scsi_disposition ret = scsi_check_sense(qc->scsicmd); 1625 /* 1626 * SUCCESS here means that the sense code could be 1627 * evaluated and should be passed to the upper layers 1628 * for correct evaluation. 1629 * FAILED means the sense code could not be interpreted 1630 * and the device would need to be reset. 1631 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the 1632 * command would need to be retried. 1633 */ 1634 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) { 1635 qc->flags |= ATA_QCFLAG_RETRY; 1636 qc->err_mask |= AC_ERR_OTHER; 1637 } else if (ret != SUCCESS) { 1638 qc->err_mask |= AC_ERR_HSM; 1639 } 1640 } 1641 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1642 action |= ATA_EH_RESET; 1643 1644 return action; 1645 } 1646 1647 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1648 int *xfer_ok) 1649 { 1650 int base = 0; 1651 1652 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1653 *xfer_ok = 1; 1654 1655 if (!*xfer_ok) 1656 base = ATA_ECAT_DUBIOUS_NONE; 1657 1658 if (err_mask & AC_ERR_ATA_BUS) 1659 return base + ATA_ECAT_ATA_BUS; 1660 1661 if (err_mask & AC_ERR_TIMEOUT) 1662 return base + ATA_ECAT_TOUT_HSM; 1663 1664 if (eflags & ATA_EFLAG_IS_IO) { 1665 if (err_mask & AC_ERR_HSM) 1666 return base + ATA_ECAT_TOUT_HSM; 1667 if ((err_mask & 1668 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1669 return base + ATA_ECAT_UNK_DEV; 1670 } 1671 1672 return 0; 1673 } 1674 1675 struct speed_down_verdict_arg { 1676 u64 since; 1677 int xfer_ok; 1678 int nr_errors[ATA_ECAT_NR]; 1679 }; 1680 1681 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1682 { 1683 struct speed_down_verdict_arg *arg = void_arg; 1684 int cat; 1685 1686 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) 1687 return -1; 1688 1689 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1690 &arg->xfer_ok); 1691 arg->nr_errors[cat]++; 1692 1693 return 0; 1694 } 1695 1696 /** 1697 * ata_eh_speed_down_verdict - Determine speed down verdict 1698 * @dev: Device of interest 1699 * 1700 * This function examines error ring of @dev and determines 1701 * whether NCQ needs to be turned off, transfer speed should be 1702 * stepped down, or falling back to PIO is necessary. 1703 * 1704 * ECAT_ATA_BUS : ATA_BUS error for any command 1705 * 1706 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1707 * IO commands 1708 * 1709 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1710 * 1711 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1712 * data transfer hasn't been verified. 1713 * 1714 * Verdicts are 1715 * 1716 * NCQ_OFF : Turn off NCQ. 1717 * 1718 * SPEED_DOWN : Speed down transfer speed but don't fall back 1719 * to PIO. 1720 * 1721 * FALLBACK_TO_PIO : Fall back to PIO. 1722 * 1723 * Even if multiple verdicts are returned, only one action is 1724 * taken per error. An action triggered by non-DUBIOUS errors 1725 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1726 * This is to expedite speed down decisions right after device is 1727 * initially configured. 1728 * 1729 * The following are speed down rules. #1 and #2 deal with 1730 * DUBIOUS errors. 1731 * 1732 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1733 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1734 * 1735 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1736 * occurred during last 5 mins, NCQ_OFF. 1737 * 1738 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1739 * occurred during last 5 mins, FALLBACK_TO_PIO 1740 * 1741 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1742 * during last 10 mins, NCQ_OFF. 1743 * 1744 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1745 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1746 * 1747 * LOCKING: 1748 * Inherited from caller. 1749 * 1750 * RETURNS: 1751 * OR of ATA_EH_SPDN_* flags. 1752 */ 1753 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1754 { 1755 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1756 u64 j64 = get_jiffies_64(); 1757 struct speed_down_verdict_arg arg; 1758 unsigned int verdict = 0; 1759 1760 /* scan past 5 mins of error history */ 1761 memset(&arg, 0, sizeof(arg)); 1762 arg.since = j64 - min(j64, j5mins); 1763 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1764 1765 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1766 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1767 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1768 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1769 1770 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1771 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1772 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1773 1774 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1775 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1776 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1777 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1778 1779 /* scan past 10 mins of error history */ 1780 memset(&arg, 0, sizeof(arg)); 1781 arg.since = j64 - min(j64, j10mins); 1782 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1783 1784 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1785 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1786 verdict |= ATA_EH_SPDN_NCQ_OFF; 1787 1788 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1789 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1790 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1791 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1792 1793 return verdict; 1794 } 1795 1796 /** 1797 * ata_eh_speed_down - record error and speed down if necessary 1798 * @dev: Failed device 1799 * @eflags: mask of ATA_EFLAG_* flags 1800 * @err_mask: err_mask of the error 1801 * 1802 * Record error and examine error history to determine whether 1803 * adjusting transmission speed is necessary. It also sets 1804 * transmission limits appropriately if such adjustment is 1805 * necessary. 1806 * 1807 * LOCKING: 1808 * Kernel thread context (may sleep). 1809 * 1810 * RETURNS: 1811 * Determined recovery action. 1812 */ 1813 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1814 unsigned int eflags, unsigned int err_mask) 1815 { 1816 struct ata_link *link = ata_dev_phys_link(dev); 1817 int xfer_ok = 0; 1818 unsigned int verdict; 1819 unsigned int action = 0; 1820 1821 /* don't bother if Cat-0 error */ 1822 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1823 return 0; 1824 1825 /* record error and determine whether speed down is necessary */ 1826 ata_ering_record(&dev->ering, eflags, err_mask); 1827 verdict = ata_eh_speed_down_verdict(dev); 1828 1829 /* turn off NCQ? */ 1830 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && ata_ncq_enabled(dev)) { 1831 dev->flags |= ATA_DFLAG_NCQ_OFF; 1832 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n"); 1833 goto done; 1834 } 1835 1836 /* speed down? */ 1837 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1838 /* speed down SATA link speed if possible */ 1839 if (sata_down_spd_limit(link, 0) == 0) { 1840 action |= ATA_EH_RESET; 1841 goto done; 1842 } 1843 1844 /* lower transfer mode */ 1845 if (dev->spdn_cnt < 2) { 1846 static const int dma_dnxfer_sel[] = 1847 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1848 static const int pio_dnxfer_sel[] = 1849 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1850 int sel; 1851 1852 if (dev->xfer_shift != ATA_SHIFT_PIO) 1853 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1854 else 1855 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1856 1857 dev->spdn_cnt++; 1858 1859 if (ata_down_xfermask_limit(dev, sel) == 0) { 1860 action |= ATA_EH_RESET; 1861 goto done; 1862 } 1863 } 1864 } 1865 1866 /* Fall back to PIO? Slowing down to PIO is meaningless for 1867 * SATA ATA devices. Consider it only for PATA and SATAPI. 1868 */ 1869 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1870 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 1871 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1872 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1873 dev->spdn_cnt = 0; 1874 action |= ATA_EH_RESET; 1875 goto done; 1876 } 1877 } 1878 1879 return 0; 1880 done: 1881 /* device has been slowed down, blow error history */ 1882 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 1883 ata_ering_clear(&dev->ering); 1884 return action; 1885 } 1886 1887 /** 1888 * ata_eh_worth_retry - analyze error and decide whether to retry 1889 * @qc: qc to possibly retry 1890 * 1891 * Look at the cause of the error and decide if a retry 1892 * might be useful or not. We don't want to retry media errors 1893 * because the drive itself has probably already taken 10-30 seconds 1894 * doing its own internal retries before reporting the failure. 1895 */ 1896 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) 1897 { 1898 if (qc->err_mask & AC_ERR_MEDIA) 1899 return 0; /* don't retry media errors */ 1900 if (qc->flags & ATA_QCFLAG_IO) 1901 return 1; /* otherwise retry anything from fs stack */ 1902 if (qc->err_mask & AC_ERR_INVALID) 1903 return 0; /* don't retry these */ 1904 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ 1905 } 1906 1907 /** 1908 * ata_eh_quiet - check if we need to be quiet about a command error 1909 * @qc: qc to check 1910 * 1911 * Look at the qc flags anbd its scsi command request flags to determine 1912 * if we need to be quiet about the command failure. 1913 */ 1914 static inline bool ata_eh_quiet(struct ata_queued_cmd *qc) 1915 { 1916 if (qc->scsicmd && scsi_cmd_to_rq(qc->scsicmd)->rq_flags & RQF_QUIET) 1917 qc->flags |= ATA_QCFLAG_QUIET; 1918 return qc->flags & ATA_QCFLAG_QUIET; 1919 } 1920 1921 static int ata_eh_read_sense_success_non_ncq(struct ata_link *link) 1922 { 1923 struct ata_port *ap = link->ap; 1924 struct ata_queued_cmd *qc; 1925 1926 qc = __ata_qc_from_tag(ap, link->active_tag); 1927 if (!qc) 1928 return -EIO; 1929 1930 if (!(qc->flags & ATA_QCFLAG_EH) || 1931 !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || 1932 qc->err_mask) 1933 return -EIO; 1934 1935 if (!ata_eh_request_sense(qc)) 1936 return -EIO; 1937 1938 /* 1939 * If we have sense data, call scsi_check_sense() in order to set the 1940 * correct SCSI ML byte (if any). No point in checking the return value, 1941 * since the command has already completed successfully. 1942 */ 1943 scsi_check_sense(qc->scsicmd); 1944 1945 return 0; 1946 } 1947 1948 static void ata_eh_get_success_sense(struct ata_link *link) 1949 { 1950 struct ata_eh_context *ehc = &link->eh_context; 1951 struct ata_device *dev = link->device; 1952 struct ata_port *ap = link->ap; 1953 struct ata_queued_cmd *qc; 1954 int tag, ret = 0; 1955 1956 if (!(ehc->i.dev_action[dev->devno] & ATA_EH_GET_SUCCESS_SENSE)) 1957 return; 1958 1959 /* if frozen, we can't do much */ 1960 if (ata_port_is_frozen(ap)) { 1961 ata_dev_warn(dev, 1962 "successful sense data available but port frozen\n"); 1963 goto out; 1964 } 1965 1966 /* 1967 * If the link has sactive set, then we have outstanding NCQ commands 1968 * and have to read the Successful NCQ Commands log to get the sense 1969 * data. Otherwise, we are dealing with a non-NCQ command and use 1970 * request sense ext command to retrieve the sense data. 1971 */ 1972 if (link->sactive) 1973 ret = ata_eh_read_sense_success_ncq_log(link); 1974 else 1975 ret = ata_eh_read_sense_success_non_ncq(link); 1976 if (ret) 1977 goto out; 1978 1979 ata_eh_done(link, dev, ATA_EH_GET_SUCCESS_SENSE); 1980 return; 1981 1982 out: 1983 /* 1984 * If we failed to get sense data for a successful command that ought to 1985 * have sense data, we cannot simply return BLK_STS_OK to user space. 1986 * This is because we can't know if the sense data that we couldn't get 1987 * was actually "DATA CURRENTLY UNAVAILABLE". Reporting such a command 1988 * as success to user space would result in a silent data corruption. 1989 * Thus, add a bogus ABORTED_COMMAND sense data to such commands, such 1990 * that SCSI will report these commands as BLK_STS_IOERR to user space. 1991 */ 1992 ata_qc_for_each_raw(ap, qc, tag) { 1993 if (!(qc->flags & ATA_QCFLAG_EH) || 1994 !(qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) || 1995 qc->err_mask || 1996 ata_dev_phys_link(qc->dev) != link) 1997 continue; 1998 1999 /* We managed to get sense for this success command, skip. */ 2000 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2001 continue; 2002 2003 /* This success command did not have any sense data, skip. */ 2004 if (!(qc->result_tf.status & ATA_SENSE)) 2005 continue; 2006 2007 /* This success command had sense data, but we failed to get. */ 2008 ata_scsi_set_sense(dev, qc->scsicmd, ABORTED_COMMAND, 0, 0); 2009 qc->flags |= ATA_QCFLAG_SENSE_VALID; 2010 } 2011 ata_eh_done(link, dev, ATA_EH_GET_SUCCESS_SENSE); 2012 } 2013 2014 /** 2015 * ata_eh_link_autopsy - analyze error and determine recovery action 2016 * @link: host link to perform autopsy on 2017 * 2018 * Analyze why @link failed and determine which recovery actions 2019 * are needed. This function also sets more detailed AC_ERR_* 2020 * values and fills sense data for ATAPI CHECK SENSE. 2021 * 2022 * LOCKING: 2023 * Kernel thread context (may sleep). 2024 */ 2025 static void ata_eh_link_autopsy(struct ata_link *link) 2026 { 2027 struct ata_port *ap = link->ap; 2028 struct ata_eh_context *ehc = &link->eh_context; 2029 struct ata_queued_cmd *qc; 2030 struct ata_device *dev; 2031 unsigned int all_err_mask = 0, eflags = 0; 2032 int tag, nr_failed = 0, nr_quiet = 0; 2033 u32 serror; 2034 int rc; 2035 2036 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 2037 return; 2038 2039 /* obtain and analyze SError */ 2040 rc = sata_scr_read(link, SCR_ERROR, &serror); 2041 if (rc == 0) { 2042 ehc->i.serror |= serror; 2043 ata_eh_analyze_serror(link); 2044 } else if (rc != -EOPNOTSUPP) { 2045 /* SError read failed, force reset and probing */ 2046 ehc->i.probe_mask |= ATA_ALL_DEVICES; 2047 ehc->i.action |= ATA_EH_RESET; 2048 ehc->i.err_mask |= AC_ERR_OTHER; 2049 } 2050 2051 /* analyze NCQ failure */ 2052 ata_eh_analyze_ncq_error(link); 2053 2054 /* 2055 * Check if this was a successful command that simply needs sense data. 2056 * Since the sense data is not part of the completion, we need to fetch 2057 * it using an additional command. Since this can't be done from irq 2058 * context, the sense data for successful commands are fetched by EH. 2059 */ 2060 ata_eh_get_success_sense(link); 2061 2062 /* any real error trumps AC_ERR_OTHER */ 2063 if (ehc->i.err_mask & ~AC_ERR_OTHER) 2064 ehc->i.err_mask &= ~AC_ERR_OTHER; 2065 2066 all_err_mask |= ehc->i.err_mask; 2067 2068 ata_qc_for_each_raw(ap, qc, tag) { 2069 if (!(qc->flags & ATA_QCFLAG_EH) || 2070 qc->flags & ATA_QCFLAG_RETRY || 2071 qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD || 2072 ata_dev_phys_link(qc->dev) != link) 2073 continue; 2074 2075 /* inherit upper level err_mask */ 2076 qc->err_mask |= ehc->i.err_mask; 2077 2078 /* analyze TF */ 2079 ehc->i.action |= ata_eh_analyze_tf(qc); 2080 2081 /* DEV errors are probably spurious in case of ATA_BUS error */ 2082 if (qc->err_mask & AC_ERR_ATA_BUS) 2083 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2084 AC_ERR_INVALID); 2085 2086 /* any real error trumps unknown error */ 2087 if (qc->err_mask & ~AC_ERR_OTHER) 2088 qc->err_mask &= ~AC_ERR_OTHER; 2089 2090 /* 2091 * SENSE_VALID trumps dev/unknown error and revalidation. Upper 2092 * layers will determine whether the command is worth retrying 2093 * based on the sense data and device class/type. Otherwise, 2094 * determine directly if the command is worth retrying using its 2095 * error mask and flags. 2096 */ 2097 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2098 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2099 else if (ata_eh_worth_retry(qc)) 2100 qc->flags |= ATA_QCFLAG_RETRY; 2101 2102 /* accumulate error info */ 2103 ehc->i.dev = qc->dev; 2104 all_err_mask |= qc->err_mask; 2105 if (qc->flags & ATA_QCFLAG_IO) 2106 eflags |= ATA_EFLAG_IS_IO; 2107 trace_ata_eh_link_autopsy_qc(qc); 2108 2109 /* Count quiet errors */ 2110 if (ata_eh_quiet(qc)) 2111 nr_quiet++; 2112 nr_failed++; 2113 } 2114 2115 /* If all failed commands requested silence, then be quiet */ 2116 if (nr_quiet == nr_failed) 2117 ehc->i.flags |= ATA_EHI_QUIET; 2118 2119 /* enforce default EH actions */ 2120 if (ata_port_is_frozen(ap) || 2121 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2122 ehc->i.action |= ATA_EH_RESET; 2123 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2124 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2125 ehc->i.action |= ATA_EH_REVALIDATE; 2126 2127 /* If we have offending qcs and the associated failed device, 2128 * perform per-dev EH action only on the offending device. 2129 */ 2130 if (ehc->i.dev) { 2131 ehc->i.dev_action[ehc->i.dev->devno] |= 2132 ehc->i.action & ATA_EH_PERDEV_MASK; 2133 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2134 } 2135 2136 /* propagate timeout to host link */ 2137 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2138 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2139 2140 /* record error and consider speeding down */ 2141 dev = ehc->i.dev; 2142 if (!dev && ((ata_link_max_devices(link) == 1 && 2143 ata_dev_enabled(link->device)))) 2144 dev = link->device; 2145 2146 if (dev) { 2147 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2148 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2149 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2150 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); 2151 } 2152 } 2153 2154 /** 2155 * ata_eh_autopsy - analyze error and determine recovery action 2156 * @ap: host port to perform autopsy on 2157 * 2158 * Analyze all links of @ap and determine why they failed and 2159 * which recovery actions are needed. 2160 * 2161 * LOCKING: 2162 * Kernel thread context (may sleep). 2163 */ 2164 void ata_eh_autopsy(struct ata_port *ap) 2165 { 2166 struct ata_link *link; 2167 2168 ata_for_each_link(link, ap, EDGE) 2169 ata_eh_link_autopsy(link); 2170 2171 /* Handle the frigging slave link. Autopsy is done similarly 2172 * but actions and flags are transferred over to the master 2173 * link and handled from there. 2174 */ 2175 if (ap->slave_link) { 2176 struct ata_eh_context *mehc = &ap->link.eh_context; 2177 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2178 2179 /* transfer control flags from master to slave */ 2180 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2181 2182 /* perform autopsy on the slave link */ 2183 ata_eh_link_autopsy(ap->slave_link); 2184 2185 /* transfer actions from slave to master and clear slave */ 2186 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2187 mehc->i.action |= sehc->i.action; 2188 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2189 mehc->i.flags |= sehc->i.flags; 2190 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2191 } 2192 2193 /* Autopsy of fanout ports can affect host link autopsy. 2194 * Perform host link autopsy last. 2195 */ 2196 if (sata_pmp_attached(ap)) 2197 ata_eh_link_autopsy(&ap->link); 2198 } 2199 2200 /** 2201 * ata_get_cmd_name - get name for ATA command 2202 * @command: ATA command code to get name for 2203 * 2204 * Return a textual name of the given command or "unknown" 2205 * 2206 * LOCKING: 2207 * None 2208 */ 2209 const char *ata_get_cmd_name(u8 command) 2210 { 2211 #ifdef CONFIG_ATA_VERBOSE_ERROR 2212 static const struct 2213 { 2214 u8 command; 2215 const char *text; 2216 } cmd_descr[] = { 2217 { ATA_CMD_DEV_RESET, "DEVICE RESET" }, 2218 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, 2219 { ATA_CMD_STANDBY, "STANDBY" }, 2220 { ATA_CMD_IDLE, "IDLE" }, 2221 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, 2222 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, 2223 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" }, 2224 { ATA_CMD_NOP, "NOP" }, 2225 { ATA_CMD_FLUSH, "FLUSH CACHE" }, 2226 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, 2227 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, 2228 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, 2229 { ATA_CMD_SERVICE, "SERVICE" }, 2230 { ATA_CMD_READ, "READ DMA" }, 2231 { ATA_CMD_READ_EXT, "READ DMA EXT" }, 2232 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, 2233 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, 2234 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, 2235 { ATA_CMD_WRITE, "WRITE DMA" }, 2236 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, 2237 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, 2238 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, 2239 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, 2240 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, 2241 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, 2242 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, 2243 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, 2244 { ATA_CMD_NCQ_NON_DATA, "NCQ NON-DATA" }, 2245 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, 2246 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, 2247 { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, 2248 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, 2249 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, 2250 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, 2251 { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, 2252 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, 2253 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, 2254 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, 2255 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, 2256 { ATA_CMD_SET_FEATURES, "SET FEATURES" }, 2257 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, 2258 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, 2259 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, 2260 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, 2261 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, 2262 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, 2263 { ATA_CMD_SLEEP, "SLEEP" }, 2264 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, 2265 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, 2266 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, 2267 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, 2268 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, 2269 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, 2270 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, 2271 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, 2272 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, 2273 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" }, 2274 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, 2275 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, 2276 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, 2277 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, 2278 { ATA_CMD_PMP_READ, "READ BUFFER" }, 2279 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" }, 2280 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, 2281 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" }, 2282 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, 2283 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, 2284 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, 2285 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, 2286 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, 2287 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, 2288 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, 2289 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, 2290 { ATA_CMD_SMART, "SMART" }, 2291 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, 2292 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, 2293 { ATA_CMD_DSM, "DATA SET MANAGEMENT" }, 2294 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, 2295 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, 2296 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, 2297 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, 2298 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, 2299 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, 2300 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" }, 2301 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" }, 2302 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" }, 2303 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" }, 2304 { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, 2305 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, 2306 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, 2307 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, 2308 { ATA_CMD_RESTORE, "RECALIBRATE" }, 2309 { 0, NULL } /* terminate list */ 2310 }; 2311 2312 unsigned int i; 2313 for (i = 0; cmd_descr[i].text; i++) 2314 if (cmd_descr[i].command == command) 2315 return cmd_descr[i].text; 2316 #endif 2317 2318 return "unknown"; 2319 } 2320 EXPORT_SYMBOL_GPL(ata_get_cmd_name); 2321 2322 /** 2323 * ata_eh_link_report - report error handling to user 2324 * @link: ATA link EH is going on 2325 * 2326 * Report EH to user. 2327 * 2328 * LOCKING: 2329 * None. 2330 */ 2331 static void ata_eh_link_report(struct ata_link *link) 2332 { 2333 struct ata_port *ap = link->ap; 2334 struct ata_eh_context *ehc = &link->eh_context; 2335 struct ata_queued_cmd *qc; 2336 const char *frozen, *desc; 2337 char tries_buf[16] = ""; 2338 int tag, nr_failed = 0; 2339 2340 if (ehc->i.flags & ATA_EHI_QUIET) 2341 return; 2342 2343 desc = NULL; 2344 if (ehc->i.desc[0] != '\0') 2345 desc = ehc->i.desc; 2346 2347 ata_qc_for_each_raw(ap, qc, tag) { 2348 if (!(qc->flags & ATA_QCFLAG_EH) || 2349 ata_dev_phys_link(qc->dev) != link || 2350 ((qc->flags & ATA_QCFLAG_QUIET) && 2351 qc->err_mask == AC_ERR_DEV)) 2352 continue; 2353 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2354 continue; 2355 2356 nr_failed++; 2357 } 2358 2359 if (!nr_failed && !ehc->i.err_mask) 2360 return; 2361 2362 frozen = ""; 2363 if (ata_port_is_frozen(ap)) 2364 frozen = " frozen"; 2365 2366 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2367 snprintf(tries_buf, sizeof(tries_buf), " t%d", 2368 ap->eh_tries); 2369 2370 if (ehc->i.dev) { 2371 ata_dev_err(ehc->i.dev, "exception Emask 0x%x " 2372 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2373 ehc->i.err_mask, link->sactive, ehc->i.serror, 2374 ehc->i.action, frozen, tries_buf); 2375 if (desc) 2376 ata_dev_err(ehc->i.dev, "%s\n", desc); 2377 } else { 2378 ata_link_err(link, "exception Emask 0x%x " 2379 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2380 ehc->i.err_mask, link->sactive, ehc->i.serror, 2381 ehc->i.action, frozen, tries_buf); 2382 if (desc) 2383 ata_link_err(link, "%s\n", desc); 2384 } 2385 2386 #ifdef CONFIG_ATA_VERBOSE_ERROR 2387 if (ehc->i.serror) 2388 ata_link_err(link, 2389 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2390 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2391 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2392 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2393 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2394 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2395 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2396 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2397 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2398 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2399 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2400 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2401 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2402 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2403 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2404 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2405 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2406 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2407 #endif 2408 2409 ata_qc_for_each_raw(ap, qc, tag) { 2410 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2411 char data_buf[20] = ""; 2412 char cdb_buf[70] = ""; 2413 2414 if (!(qc->flags & ATA_QCFLAG_EH) || 2415 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2416 continue; 2417 2418 if (qc->dma_dir != DMA_NONE) { 2419 static const char *dma_str[] = { 2420 [DMA_BIDIRECTIONAL] = "bidi", 2421 [DMA_TO_DEVICE] = "out", 2422 [DMA_FROM_DEVICE] = "in", 2423 }; 2424 const char *prot_str = NULL; 2425 2426 switch (qc->tf.protocol) { 2427 case ATA_PROT_UNKNOWN: 2428 prot_str = "unknown"; 2429 break; 2430 case ATA_PROT_NODATA: 2431 prot_str = "nodata"; 2432 break; 2433 case ATA_PROT_PIO: 2434 prot_str = "pio"; 2435 break; 2436 case ATA_PROT_DMA: 2437 prot_str = "dma"; 2438 break; 2439 case ATA_PROT_NCQ: 2440 prot_str = "ncq dma"; 2441 break; 2442 case ATA_PROT_NCQ_NODATA: 2443 prot_str = "ncq nodata"; 2444 break; 2445 case ATAPI_PROT_NODATA: 2446 prot_str = "nodata"; 2447 break; 2448 case ATAPI_PROT_PIO: 2449 prot_str = "pio"; 2450 break; 2451 case ATAPI_PROT_DMA: 2452 prot_str = "dma"; 2453 break; 2454 } 2455 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2456 prot_str, qc->nbytes, dma_str[qc->dma_dir]); 2457 } 2458 2459 if (ata_is_atapi(qc->tf.protocol)) { 2460 const u8 *cdb = qc->cdb; 2461 size_t cdb_len = qc->dev->cdb_len; 2462 2463 if (qc->scsicmd) { 2464 cdb = qc->scsicmd->cmnd; 2465 cdb_len = qc->scsicmd->cmd_len; 2466 } 2467 __scsi_format_command(cdb_buf, sizeof(cdb_buf), 2468 cdb, cdb_len); 2469 } else 2470 ata_dev_err(qc->dev, "failed command: %s\n", 2471 ata_get_cmd_name(cmd->command)); 2472 2473 ata_dev_err(qc->dev, 2474 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2475 "tag %d%s\n %s" 2476 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2477 "Emask 0x%x (%s)%s\n", 2478 cmd->command, cmd->feature, cmd->nsect, 2479 cmd->lbal, cmd->lbam, cmd->lbah, 2480 cmd->hob_feature, cmd->hob_nsect, 2481 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2482 cmd->device, qc->tag, data_buf, cdb_buf, 2483 res->status, res->error, res->nsect, 2484 res->lbal, res->lbam, res->lbah, 2485 res->hob_feature, res->hob_nsect, 2486 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2487 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2488 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2489 2490 #ifdef CONFIG_ATA_VERBOSE_ERROR 2491 if (res->status & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2492 ATA_SENSE | ATA_ERR)) { 2493 if (res->status & ATA_BUSY) 2494 ata_dev_err(qc->dev, "status: { Busy }\n"); 2495 else 2496 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", 2497 res->status & ATA_DRDY ? "DRDY " : "", 2498 res->status & ATA_DF ? "DF " : "", 2499 res->status & ATA_DRQ ? "DRQ " : "", 2500 res->status & ATA_SENSE ? "SENSE " : "", 2501 res->status & ATA_ERR ? "ERR " : ""); 2502 } 2503 2504 if (cmd->command != ATA_CMD_PACKET && 2505 (res->error & (ATA_ICRC | ATA_UNC | ATA_AMNF | ATA_IDNF | 2506 ATA_ABORTED))) 2507 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n", 2508 res->error & ATA_ICRC ? "ICRC " : "", 2509 res->error & ATA_UNC ? "UNC " : "", 2510 res->error & ATA_AMNF ? "AMNF " : "", 2511 res->error & ATA_IDNF ? "IDNF " : "", 2512 res->error & ATA_ABORTED ? "ABRT " : ""); 2513 #endif 2514 } 2515 } 2516 2517 /** 2518 * ata_eh_report - report error handling to user 2519 * @ap: ATA port to report EH about 2520 * 2521 * Report EH to user. 2522 * 2523 * LOCKING: 2524 * None. 2525 */ 2526 void ata_eh_report(struct ata_port *ap) 2527 { 2528 struct ata_link *link; 2529 2530 ata_for_each_link(link, ap, HOST_FIRST) 2531 ata_eh_link_report(link); 2532 } 2533 2534 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2535 unsigned int *classes, unsigned long deadline, 2536 bool clear_classes) 2537 { 2538 struct ata_device *dev; 2539 2540 if (clear_classes) 2541 ata_for_each_dev(dev, link, ALL) 2542 classes[dev->devno] = ATA_DEV_UNKNOWN; 2543 2544 return reset(link, classes, deadline); 2545 } 2546 2547 static int ata_eh_followup_srst_needed(struct ata_link *link, int rc) 2548 { 2549 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2550 return 0; 2551 if (rc == -EAGAIN) 2552 return 1; 2553 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2554 return 1; 2555 return 0; 2556 } 2557 2558 int ata_eh_reset(struct ata_link *link, int classify, 2559 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2560 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2561 { 2562 struct ata_port *ap = link->ap; 2563 struct ata_link *slave = ap->slave_link; 2564 struct ata_eh_context *ehc = &link->eh_context; 2565 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; 2566 unsigned int *classes = ehc->classes; 2567 unsigned int lflags = link->flags; 2568 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2569 int max_tries = 0, try = 0; 2570 struct ata_link *failed_link; 2571 struct ata_device *dev; 2572 unsigned long deadline, now; 2573 ata_reset_fn_t reset; 2574 unsigned long flags; 2575 u32 sstatus; 2576 int nr_unknown, rc; 2577 2578 /* 2579 * Prepare to reset 2580 */ 2581 while (ata_eh_reset_timeouts[max_tries] != UINT_MAX) 2582 max_tries++; 2583 if (link->flags & ATA_LFLAG_RST_ONCE) 2584 max_tries = 1; 2585 if (link->flags & ATA_LFLAG_NO_HRST) 2586 hardreset = NULL; 2587 if (link->flags & ATA_LFLAG_NO_SRST) 2588 softreset = NULL; 2589 2590 /* make sure each reset attempt is at least COOL_DOWN apart */ 2591 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2592 now = jiffies; 2593 WARN_ON(time_after(ehc->last_reset, now)); 2594 deadline = ata_deadline(ehc->last_reset, 2595 ATA_EH_RESET_COOL_DOWN); 2596 if (time_before(now, deadline)) 2597 schedule_timeout_uninterruptible(deadline - now); 2598 } 2599 2600 spin_lock_irqsave(ap->lock, flags); 2601 ap->pflags |= ATA_PFLAG_RESETTING; 2602 spin_unlock_irqrestore(ap->lock, flags); 2603 2604 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2605 2606 ata_for_each_dev(dev, link, ALL) { 2607 /* If we issue an SRST then an ATA drive (not ATAPI) 2608 * may change configuration and be in PIO0 timing. If 2609 * we do a hard reset (or are coming from power on) 2610 * this is true for ATA or ATAPI. Until we've set a 2611 * suitable controller mode we should not touch the 2612 * bus as we may be talking too fast. 2613 */ 2614 dev->pio_mode = XFER_PIO_0; 2615 dev->dma_mode = 0xff; 2616 2617 /* If the controller has a pio mode setup function 2618 * then use it to set the chipset to rights. Don't 2619 * touch the DMA setup as that will be dealt with when 2620 * configuring devices. 2621 */ 2622 if (ap->ops->set_piomode) 2623 ap->ops->set_piomode(ap, dev); 2624 } 2625 2626 /* prefer hardreset */ 2627 reset = NULL; 2628 ehc->i.action &= ~ATA_EH_RESET; 2629 if (hardreset) { 2630 reset = hardreset; 2631 ehc->i.action |= ATA_EH_HARDRESET; 2632 } else if (softreset) { 2633 reset = softreset; 2634 ehc->i.action |= ATA_EH_SOFTRESET; 2635 } 2636 2637 if (prereset) { 2638 unsigned long deadline = ata_deadline(jiffies, 2639 ATA_EH_PRERESET_TIMEOUT); 2640 2641 if (slave) { 2642 sehc->i.action &= ~ATA_EH_RESET; 2643 sehc->i.action |= ehc->i.action; 2644 } 2645 2646 rc = prereset(link, deadline); 2647 2648 /* If present, do prereset on slave link too. Reset 2649 * is skipped iff both master and slave links report 2650 * -ENOENT or clear ATA_EH_RESET. 2651 */ 2652 if (slave && (rc == 0 || rc == -ENOENT)) { 2653 int tmp; 2654 2655 tmp = prereset(slave, deadline); 2656 if (tmp != -ENOENT) 2657 rc = tmp; 2658 2659 ehc->i.action |= sehc->i.action; 2660 } 2661 2662 if (rc) { 2663 if (rc == -ENOENT) { 2664 ata_link_dbg(link, "port disabled--ignoring\n"); 2665 ehc->i.action &= ~ATA_EH_RESET; 2666 2667 ata_for_each_dev(dev, link, ALL) 2668 classes[dev->devno] = ATA_DEV_NONE; 2669 2670 rc = 0; 2671 } else 2672 ata_link_err(link, 2673 "prereset failed (errno=%d)\n", 2674 rc); 2675 goto out; 2676 } 2677 2678 /* prereset() might have cleared ATA_EH_RESET. If so, 2679 * bang classes, thaw and return. 2680 */ 2681 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2682 ata_for_each_dev(dev, link, ALL) 2683 classes[dev->devno] = ATA_DEV_NONE; 2684 if (ata_port_is_frozen(ap) && ata_is_host_link(link)) 2685 ata_eh_thaw_port(ap); 2686 rc = 0; 2687 goto out; 2688 } 2689 } 2690 2691 retry: 2692 /* 2693 * Perform reset 2694 */ 2695 if (ata_is_host_link(link)) 2696 ata_eh_freeze_port(ap); 2697 2698 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2699 2700 if (reset) { 2701 if (verbose) 2702 ata_link_info(link, "%s resetting link\n", 2703 reset == softreset ? "soft" : "hard"); 2704 2705 /* mark that this EH session started with reset */ 2706 ehc->last_reset = jiffies; 2707 if (reset == hardreset) { 2708 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2709 trace_ata_link_hardreset_begin(link, classes, deadline); 2710 } else { 2711 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2712 trace_ata_link_softreset_begin(link, classes, deadline); 2713 } 2714 2715 rc = ata_do_reset(link, reset, classes, deadline, true); 2716 if (reset == hardreset) 2717 trace_ata_link_hardreset_end(link, classes, rc); 2718 else 2719 trace_ata_link_softreset_end(link, classes, rc); 2720 if (rc && rc != -EAGAIN) { 2721 failed_link = link; 2722 goto fail; 2723 } 2724 2725 /* hardreset slave link if existent */ 2726 if (slave && reset == hardreset) { 2727 int tmp; 2728 2729 if (verbose) 2730 ata_link_info(slave, "hard resetting link\n"); 2731 2732 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2733 trace_ata_slave_hardreset_begin(slave, classes, 2734 deadline); 2735 tmp = ata_do_reset(slave, reset, classes, deadline, 2736 false); 2737 trace_ata_slave_hardreset_end(slave, classes, tmp); 2738 switch (tmp) { 2739 case -EAGAIN: 2740 rc = -EAGAIN; 2741 break; 2742 case 0: 2743 break; 2744 default: 2745 failed_link = slave; 2746 rc = tmp; 2747 goto fail; 2748 } 2749 } 2750 2751 /* perform follow-up SRST if necessary */ 2752 if (reset == hardreset && 2753 ata_eh_followup_srst_needed(link, rc)) { 2754 reset = softreset; 2755 2756 if (!reset) { 2757 ata_link_err(link, 2758 "follow-up softreset required but no softreset available\n"); 2759 failed_link = link; 2760 rc = -EINVAL; 2761 goto fail; 2762 } 2763 2764 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2765 trace_ata_link_softreset_begin(link, classes, deadline); 2766 rc = ata_do_reset(link, reset, classes, deadline, true); 2767 trace_ata_link_softreset_end(link, classes, rc); 2768 if (rc) { 2769 failed_link = link; 2770 goto fail; 2771 } 2772 } 2773 } else { 2774 if (verbose) 2775 ata_link_info(link, 2776 "no reset method available, skipping reset\n"); 2777 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2778 lflags |= ATA_LFLAG_ASSUME_ATA; 2779 } 2780 2781 /* 2782 * Post-reset processing 2783 */ 2784 ata_for_each_dev(dev, link, ALL) { 2785 /* After the reset, the device state is PIO 0 and the 2786 * controller state is undefined. Reset also wakes up 2787 * drives from sleeping mode. 2788 */ 2789 dev->pio_mode = XFER_PIO_0; 2790 dev->flags &= ~ATA_DFLAG_SLEEPING; 2791 2792 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 2793 continue; 2794 2795 /* apply class override */ 2796 if (lflags & ATA_LFLAG_ASSUME_ATA) 2797 classes[dev->devno] = ATA_DEV_ATA; 2798 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2799 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2800 } 2801 2802 /* record current link speed */ 2803 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2804 link->sata_spd = (sstatus >> 4) & 0xf; 2805 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2806 slave->sata_spd = (sstatus >> 4) & 0xf; 2807 2808 /* thaw the port */ 2809 if (ata_is_host_link(link)) 2810 ata_eh_thaw_port(ap); 2811 2812 /* postreset() should clear hardware SError. Although SError 2813 * is cleared during link resume, clearing SError here is 2814 * necessary as some PHYs raise hotplug events after SRST. 2815 * This introduces race condition where hotplug occurs between 2816 * reset and here. This race is mediated by cross checking 2817 * link onlineness and classification result later. 2818 */ 2819 if (postreset) { 2820 postreset(link, classes); 2821 trace_ata_link_postreset(link, classes, rc); 2822 if (slave) { 2823 postreset(slave, classes); 2824 trace_ata_slave_postreset(slave, classes, rc); 2825 } 2826 } 2827 2828 /* clear cached SError */ 2829 spin_lock_irqsave(link->ap->lock, flags); 2830 link->eh_info.serror = 0; 2831 if (slave) 2832 slave->eh_info.serror = 0; 2833 spin_unlock_irqrestore(link->ap->lock, flags); 2834 2835 /* 2836 * Make sure onlineness and classification result correspond. 2837 * Hotplug could have happened during reset and some 2838 * controllers fail to wait while a drive is spinning up after 2839 * being hotplugged causing misdetection. By cross checking 2840 * link on/offlineness and classification result, those 2841 * conditions can be reliably detected and retried. 2842 */ 2843 nr_unknown = 0; 2844 ata_for_each_dev(dev, link, ALL) { 2845 if (ata_phys_link_online(ata_dev_phys_link(dev))) { 2846 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2847 ata_dev_dbg(dev, "link online but device misclassified\n"); 2848 classes[dev->devno] = ATA_DEV_NONE; 2849 nr_unknown++; 2850 } 2851 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2852 if (ata_class_enabled(classes[dev->devno])) 2853 ata_dev_dbg(dev, 2854 "link offline, clearing class %d to NONE\n", 2855 classes[dev->devno]); 2856 classes[dev->devno] = ATA_DEV_NONE; 2857 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2858 ata_dev_dbg(dev, 2859 "link status unknown, clearing UNKNOWN to NONE\n"); 2860 classes[dev->devno] = ATA_DEV_NONE; 2861 } 2862 } 2863 2864 if (classify && nr_unknown) { 2865 if (try < max_tries) { 2866 ata_link_warn(link, 2867 "link online but %d devices misclassified, retrying\n", 2868 nr_unknown); 2869 failed_link = link; 2870 rc = -EAGAIN; 2871 goto fail; 2872 } 2873 ata_link_warn(link, 2874 "link online but %d devices misclassified, " 2875 "device detection might fail\n", nr_unknown); 2876 } 2877 2878 /* reset successful, schedule revalidation */ 2879 ata_eh_done(link, NULL, ATA_EH_RESET); 2880 if (slave) 2881 ata_eh_done(slave, NULL, ATA_EH_RESET); 2882 ehc->last_reset = jiffies; /* update to completion time */ 2883 ehc->i.action |= ATA_EH_REVALIDATE; 2884 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ 2885 2886 rc = 0; 2887 out: 2888 /* clear hotplug flag */ 2889 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2890 if (slave) 2891 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2892 2893 spin_lock_irqsave(ap->lock, flags); 2894 ap->pflags &= ~ATA_PFLAG_RESETTING; 2895 spin_unlock_irqrestore(ap->lock, flags); 2896 2897 return rc; 2898 2899 fail: 2900 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2901 if (!ata_is_host_link(link) && 2902 sata_scr_read(link, SCR_STATUS, &sstatus)) 2903 rc = -ERESTART; 2904 2905 if (try >= max_tries) { 2906 /* 2907 * Thaw host port even if reset failed, so that the port 2908 * can be retried on the next phy event. This risks 2909 * repeated EH runs but seems to be a better tradeoff than 2910 * shutting down a port after a botched hotplug attempt. 2911 */ 2912 if (ata_is_host_link(link)) 2913 ata_eh_thaw_port(ap); 2914 goto out; 2915 } 2916 2917 now = jiffies; 2918 if (time_before(now, deadline)) { 2919 unsigned long delta = deadline - now; 2920 2921 ata_link_warn(failed_link, 2922 "reset failed (errno=%d), retrying in %u secs\n", 2923 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2924 2925 ata_eh_release(ap); 2926 while (delta) 2927 delta = schedule_timeout_uninterruptible(delta); 2928 ata_eh_acquire(ap); 2929 } 2930 2931 /* 2932 * While disks spinup behind PMP, some controllers fail sending SRST. 2933 * They need to be reset - as well as the PMP - before retrying. 2934 */ 2935 if (rc == -ERESTART) { 2936 if (ata_is_host_link(link)) 2937 ata_eh_thaw_port(ap); 2938 goto out; 2939 } 2940 2941 if (try == max_tries - 1) { 2942 sata_down_spd_limit(link, 0); 2943 if (slave) 2944 sata_down_spd_limit(slave, 0); 2945 } else if (rc == -EPIPE) 2946 sata_down_spd_limit(failed_link, 0); 2947 2948 if (hardreset) 2949 reset = hardreset; 2950 goto retry; 2951 } 2952 2953 static inline void ata_eh_pull_park_action(struct ata_port *ap) 2954 { 2955 struct ata_link *link; 2956 struct ata_device *dev; 2957 unsigned long flags; 2958 2959 /* 2960 * This function can be thought of as an extended version of 2961 * ata_eh_about_to_do() specially crafted to accommodate the 2962 * requirements of ATA_EH_PARK handling. Since the EH thread 2963 * does not leave the do {} while () loop in ata_eh_recover as 2964 * long as the timeout for a park request to *one* device on 2965 * the port has not expired, and since we still want to pick 2966 * up park requests to other devices on the same port or 2967 * timeout updates for the same device, we have to pull 2968 * ATA_EH_PARK actions from eh_info into eh_context.i 2969 * ourselves at the beginning of each pass over the loop. 2970 * 2971 * Additionally, all write accesses to &ap->park_req_pending 2972 * through reinit_completion() (see below) or complete_all() 2973 * (see ata_scsi_park_store()) are protected by the host lock. 2974 * As a result we have that park_req_pending.done is zero on 2975 * exit from this function, i.e. when ATA_EH_PARK actions for 2976 * *all* devices on port ap have been pulled into the 2977 * respective eh_context structs. If, and only if, 2978 * park_req_pending.done is non-zero by the time we reach 2979 * wait_for_completion_timeout(), another ATA_EH_PARK action 2980 * has been scheduled for at least one of the devices on port 2981 * ap and we have to cycle over the do {} while () loop in 2982 * ata_eh_recover() again. 2983 */ 2984 2985 spin_lock_irqsave(ap->lock, flags); 2986 reinit_completion(&ap->park_req_pending); 2987 ata_for_each_link(link, ap, EDGE) { 2988 ata_for_each_dev(dev, link, ALL) { 2989 struct ata_eh_info *ehi = &link->eh_info; 2990 2991 link->eh_context.i.dev_action[dev->devno] |= 2992 ehi->dev_action[dev->devno] & ATA_EH_PARK; 2993 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 2994 } 2995 } 2996 spin_unlock_irqrestore(ap->lock, flags); 2997 } 2998 2999 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 3000 { 3001 struct ata_eh_context *ehc = &dev->link->eh_context; 3002 struct ata_taskfile tf; 3003 unsigned int err_mask; 3004 3005 ata_tf_init(dev, &tf); 3006 if (park) { 3007 ehc->unloaded_mask |= 1 << dev->devno; 3008 tf.command = ATA_CMD_IDLEIMMEDIATE; 3009 tf.feature = 0x44; 3010 tf.lbal = 0x4c; 3011 tf.lbam = 0x4e; 3012 tf.lbah = 0x55; 3013 } else { 3014 ehc->unloaded_mask &= ~(1 << dev->devno); 3015 tf.command = ATA_CMD_CHK_POWER; 3016 } 3017 3018 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 3019 tf.protocol = ATA_PROT_NODATA; 3020 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3021 if (park && (err_mask || tf.lbal != 0xc4)) { 3022 ata_dev_err(dev, "head unload failed!\n"); 3023 ehc->unloaded_mask &= ~(1 << dev->devno); 3024 } 3025 } 3026 3027 static int ata_eh_revalidate_and_attach(struct ata_link *link, 3028 struct ata_device **r_failed_dev) 3029 { 3030 struct ata_port *ap = link->ap; 3031 struct ata_eh_context *ehc = &link->eh_context; 3032 struct ata_device *dev; 3033 unsigned int new_mask = 0; 3034 unsigned long flags; 3035 int rc = 0; 3036 3037 /* For PATA drive side cable detection to work, IDENTIFY must 3038 * be done backwards such that PDIAG- is released by the slave 3039 * device before the master device is identified. 3040 */ 3041 ata_for_each_dev(dev, link, ALL_REVERSE) { 3042 unsigned int action = ata_eh_dev_action(dev); 3043 unsigned int readid_flags = 0; 3044 3045 if (ehc->i.flags & ATA_EHI_DID_RESET) 3046 readid_flags |= ATA_READID_POSTRESET; 3047 3048 /* 3049 * When resuming, before executing any command, make sure to 3050 * transition the device to the active power mode. 3051 */ 3052 if ((action & ATA_EH_SET_ACTIVE) && ata_dev_enabled(dev)) { 3053 ata_dev_power_set_active(dev); 3054 ata_eh_done(link, dev, ATA_EH_SET_ACTIVE); 3055 } 3056 3057 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 3058 WARN_ON(dev->class == ATA_DEV_PMP); 3059 3060 /* 3061 * The link may be in a deep sleep, wake it up. 3062 * 3063 * If the link is in deep sleep, ata_phys_link_offline() 3064 * will return true, causing the revalidation to fail, 3065 * which leads to a (potentially) needless hard reset. 3066 * 3067 * ata_eh_recover() will later restore the link policy 3068 * to ap->target_lpm_policy after revalidation is done. 3069 */ 3070 if (link->lpm_policy > ATA_LPM_MAX_POWER) { 3071 rc = ata_eh_set_lpm(link, ATA_LPM_MAX_POWER, 3072 r_failed_dev); 3073 if (rc) 3074 goto err; 3075 } 3076 3077 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 3078 rc = -EIO; 3079 goto err; 3080 } 3081 3082 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 3083 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 3084 readid_flags); 3085 if (rc) 3086 goto err; 3087 3088 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 3089 3090 /* Configuration may have changed, reconfigure 3091 * transfer mode. 3092 */ 3093 ehc->i.flags |= ATA_EHI_SETMODE; 3094 3095 /* schedule the scsi_rescan_device() here */ 3096 schedule_delayed_work(&ap->scsi_rescan_task, 0); 3097 } else if (dev->class == ATA_DEV_UNKNOWN && 3098 ehc->tries[dev->devno] && 3099 ata_class_enabled(ehc->classes[dev->devno])) { 3100 /* Temporarily set dev->class, it will be 3101 * permanently set once all configurations are 3102 * complete. This is necessary because new 3103 * device configuration is done in two 3104 * separate loops. 3105 */ 3106 dev->class = ehc->classes[dev->devno]; 3107 3108 if (dev->class == ATA_DEV_PMP) 3109 rc = sata_pmp_attach(dev); 3110 else 3111 rc = ata_dev_read_id(dev, &dev->class, 3112 readid_flags, dev->id); 3113 3114 /* read_id might have changed class, store and reset */ 3115 ehc->classes[dev->devno] = dev->class; 3116 dev->class = ATA_DEV_UNKNOWN; 3117 3118 switch (rc) { 3119 case 0: 3120 /* clear error info accumulated during probe */ 3121 ata_ering_clear(&dev->ering); 3122 new_mask |= 1 << dev->devno; 3123 break; 3124 case -ENOENT: 3125 /* IDENTIFY was issued to non-existent 3126 * device. No need to reset. Just 3127 * thaw and ignore the device. 3128 */ 3129 ata_eh_thaw_port(ap); 3130 break; 3131 default: 3132 goto err; 3133 } 3134 } 3135 } 3136 3137 /* PDIAG- should have been released, ask cable type if post-reset */ 3138 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 3139 if (ap->ops->cable_detect) 3140 ap->cbl = ap->ops->cable_detect(ap); 3141 ata_force_cbl(ap); 3142 } 3143 3144 /* Configure new devices forward such that user doesn't see 3145 * device detection messages backwards. 3146 */ 3147 ata_for_each_dev(dev, link, ALL) { 3148 if (!(new_mask & (1 << dev->devno))) 3149 continue; 3150 3151 dev->class = ehc->classes[dev->devno]; 3152 3153 if (dev->class == ATA_DEV_PMP) 3154 continue; 3155 3156 ehc->i.flags |= ATA_EHI_PRINTINFO; 3157 rc = ata_dev_configure(dev); 3158 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 3159 if (rc) { 3160 dev->class = ATA_DEV_UNKNOWN; 3161 goto err; 3162 } 3163 3164 spin_lock_irqsave(ap->lock, flags); 3165 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 3166 spin_unlock_irqrestore(ap->lock, flags); 3167 3168 /* new device discovered, configure xfermode */ 3169 ehc->i.flags |= ATA_EHI_SETMODE; 3170 } 3171 3172 return 0; 3173 3174 err: 3175 dev->flags &= ~ATA_DFLAG_RESUMING; 3176 *r_failed_dev = dev; 3177 return rc; 3178 } 3179 3180 /** 3181 * ata_set_mode - Program timings and issue SET FEATURES - XFER 3182 * @link: link on which timings will be programmed 3183 * @r_failed_dev: out parameter for failed device 3184 * 3185 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 3186 * ata_set_mode() fails, pointer to the failing device is 3187 * returned in @r_failed_dev. 3188 * 3189 * LOCKING: 3190 * PCI/etc. bus probe sem. 3191 * 3192 * RETURNS: 3193 * 0 on success, negative errno otherwise 3194 */ 3195 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 3196 { 3197 struct ata_port *ap = link->ap; 3198 struct ata_device *dev; 3199 int rc; 3200 3201 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 3202 ata_for_each_dev(dev, link, ENABLED) { 3203 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 3204 struct ata_ering_entry *ent; 3205 3206 ent = ata_ering_top(&dev->ering); 3207 if (ent) 3208 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 3209 } 3210 } 3211 3212 /* has private set_mode? */ 3213 if (ap->ops->set_mode) 3214 rc = ap->ops->set_mode(link, r_failed_dev); 3215 else 3216 rc = ata_do_set_mode(link, r_failed_dev); 3217 3218 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 3219 ata_for_each_dev(dev, link, ENABLED) { 3220 struct ata_eh_context *ehc = &link->eh_context; 3221 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 3222 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 3223 3224 if (dev->xfer_mode != saved_xfer_mode || 3225 ata_ncq_enabled(dev) != saved_ncq) 3226 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 3227 } 3228 3229 return rc; 3230 } 3231 3232 /** 3233 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 3234 * @dev: ATAPI device to clear UA for 3235 * 3236 * Resets and other operations can make an ATAPI device raise 3237 * UNIT ATTENTION which causes the next operation to fail. This 3238 * function clears UA. 3239 * 3240 * LOCKING: 3241 * EH context (may sleep). 3242 * 3243 * RETURNS: 3244 * 0 on success, -errno on failure. 3245 */ 3246 static int atapi_eh_clear_ua(struct ata_device *dev) 3247 { 3248 int i; 3249 3250 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 3251 u8 *sense_buffer = dev->link->ap->sector_buf; 3252 u8 sense_key = 0; 3253 unsigned int err_mask; 3254 3255 err_mask = atapi_eh_tur(dev, &sense_key); 3256 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 3257 ata_dev_warn(dev, 3258 "TEST_UNIT_READY failed (err_mask=0x%x)\n", 3259 err_mask); 3260 return -EIO; 3261 } 3262 3263 if (!err_mask || sense_key != UNIT_ATTENTION) 3264 return 0; 3265 3266 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 3267 if (err_mask) { 3268 ata_dev_warn(dev, "failed to clear " 3269 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 3270 return -EIO; 3271 } 3272 } 3273 3274 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n", 3275 ATA_EH_UA_TRIES); 3276 3277 return 0; 3278 } 3279 3280 /** 3281 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary 3282 * @dev: ATA device which may need FLUSH retry 3283 * 3284 * If @dev failed FLUSH, it needs to be reported upper layer 3285 * immediately as it means that @dev failed to remap and already 3286 * lost at least a sector and further FLUSH retrials won't make 3287 * any difference to the lost sector. However, if FLUSH failed 3288 * for other reasons, for example transmission error, FLUSH needs 3289 * to be retried. 3290 * 3291 * This function determines whether FLUSH failure retry is 3292 * necessary and performs it if so. 3293 * 3294 * RETURNS: 3295 * 0 if EH can continue, -errno if EH needs to be repeated. 3296 */ 3297 static int ata_eh_maybe_retry_flush(struct ata_device *dev) 3298 { 3299 struct ata_link *link = dev->link; 3300 struct ata_port *ap = link->ap; 3301 struct ata_queued_cmd *qc; 3302 struct ata_taskfile tf; 3303 unsigned int err_mask; 3304 int rc = 0; 3305 3306 /* did flush fail for this device? */ 3307 if (!ata_tag_valid(link->active_tag)) 3308 return 0; 3309 3310 qc = __ata_qc_from_tag(ap, link->active_tag); 3311 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && 3312 qc->tf.command != ATA_CMD_FLUSH)) 3313 return 0; 3314 3315 /* if the device failed it, it should be reported to upper layers */ 3316 if (qc->err_mask & AC_ERR_DEV) 3317 return 0; 3318 3319 /* flush failed for some other reason, give it another shot */ 3320 ata_tf_init(dev, &tf); 3321 3322 tf.command = qc->tf.command; 3323 tf.flags |= ATA_TFLAG_DEVICE; 3324 tf.protocol = ATA_PROT_NODATA; 3325 3326 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n", 3327 tf.command, qc->err_mask); 3328 3329 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3330 if (!err_mask) { 3331 /* 3332 * FLUSH is complete but there's no way to 3333 * successfully complete a failed command from EH. 3334 * Making sure retry is allowed at least once and 3335 * retrying it should do the trick - whatever was in 3336 * the cache is already on the platter and this won't 3337 * cause infinite loop. 3338 */ 3339 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); 3340 } else { 3341 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n", 3342 err_mask); 3343 rc = -EIO; 3344 3345 /* if device failed it, report it to upper layers */ 3346 if (err_mask & AC_ERR_DEV) { 3347 qc->err_mask |= AC_ERR_DEV; 3348 qc->result_tf = tf; 3349 if (!ata_port_is_frozen(ap)) 3350 rc = 0; 3351 } 3352 } 3353 return rc; 3354 } 3355 3356 /** 3357 * ata_eh_set_lpm - configure SATA interface power management 3358 * @link: link to configure power management 3359 * @policy: the link power management policy 3360 * @r_failed_dev: out parameter for failed device 3361 * 3362 * Enable SATA Interface power management. This will enable 3363 * Device Interface Power Management (DIPM) for min_power and 3364 * medium_power_with_dipm policies, and then call driver specific 3365 * callbacks for enabling Host Initiated Power management. 3366 * 3367 * LOCKING: 3368 * EH context. 3369 * 3370 * RETURNS: 3371 * 0 on success, -errno on failure. 3372 */ 3373 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, 3374 struct ata_device **r_failed_dev) 3375 { 3376 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; 3377 struct ata_eh_context *ehc = &link->eh_context; 3378 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; 3379 enum ata_lpm_policy old_policy = link->lpm_policy; 3380 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; 3381 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; 3382 unsigned int err_mask; 3383 int rc; 3384 3385 /* if the link or host doesn't do LPM, noop */ 3386 if (!IS_ENABLED(CONFIG_SATA_HOST) || 3387 (link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) 3388 return 0; 3389 3390 /* 3391 * DIPM is enabled only for MIN_POWER as some devices 3392 * misbehave when the host NACKs transition to SLUMBER. Order 3393 * device and link configurations such that the host always 3394 * allows DIPM requests. 3395 */ 3396 ata_for_each_dev(dev, link, ENABLED) { 3397 bool hipm = ata_id_has_hipm(dev->id); 3398 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm; 3399 3400 /* find the first enabled and LPM enabled devices */ 3401 if (!link_dev) 3402 link_dev = dev; 3403 3404 if (!lpm_dev && (hipm || dipm)) 3405 lpm_dev = dev; 3406 3407 hints &= ~ATA_LPM_EMPTY; 3408 if (!hipm) 3409 hints &= ~ATA_LPM_HIPM; 3410 3411 /* disable DIPM before changing link config */ 3412 if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) { 3413 err_mask = ata_dev_set_feature(dev, 3414 SETFEATURES_SATA_DISABLE, SATA_DIPM); 3415 if (err_mask && err_mask != AC_ERR_DEV) { 3416 ata_dev_warn(dev, 3417 "failed to disable DIPM, Emask 0x%x\n", 3418 err_mask); 3419 rc = -EIO; 3420 goto fail; 3421 } 3422 } 3423 } 3424 3425 if (ap) { 3426 rc = ap->ops->set_lpm(link, policy, hints); 3427 if (!rc && ap->slave_link) 3428 rc = ap->ops->set_lpm(ap->slave_link, policy, hints); 3429 } else 3430 rc = sata_pmp_set_lpm(link, policy, hints); 3431 3432 /* 3433 * Attribute link config failure to the first (LPM) enabled 3434 * device on the link. 3435 */ 3436 if (rc) { 3437 if (rc == -EOPNOTSUPP) { 3438 link->flags |= ATA_LFLAG_NO_LPM; 3439 return 0; 3440 } 3441 dev = lpm_dev ? lpm_dev : link_dev; 3442 goto fail; 3443 } 3444 3445 /* 3446 * Low level driver acked the transition. Issue DIPM command 3447 * with the new policy set. 3448 */ 3449 link->lpm_policy = policy; 3450 if (ap && ap->slave_link) 3451 ap->slave_link->lpm_policy = policy; 3452 3453 /* host config updated, enable DIPM if transitioning to MIN_POWER */ 3454 ata_for_each_dev(dev, link, ENABLED) { 3455 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm && 3456 ata_id_has_dipm(dev->id)) { 3457 err_mask = ata_dev_set_feature(dev, 3458 SETFEATURES_SATA_ENABLE, SATA_DIPM); 3459 if (err_mask && err_mask != AC_ERR_DEV) { 3460 ata_dev_warn(dev, 3461 "failed to enable DIPM, Emask 0x%x\n", 3462 err_mask); 3463 rc = -EIO; 3464 goto fail; 3465 } 3466 } 3467 } 3468 3469 link->last_lpm_change = jiffies; 3470 link->flags |= ATA_LFLAG_CHANGED; 3471 3472 return 0; 3473 3474 fail: 3475 /* restore the old policy */ 3476 link->lpm_policy = old_policy; 3477 if (ap && ap->slave_link) 3478 ap->slave_link->lpm_policy = old_policy; 3479 3480 /* if no device or only one more chance is left, disable LPM */ 3481 if (!dev || ehc->tries[dev->devno] <= 2) { 3482 ata_link_warn(link, "disabling LPM on the link\n"); 3483 link->flags |= ATA_LFLAG_NO_LPM; 3484 } 3485 if (r_failed_dev) 3486 *r_failed_dev = dev; 3487 return rc; 3488 } 3489 3490 int ata_link_nr_enabled(struct ata_link *link) 3491 { 3492 struct ata_device *dev; 3493 int cnt = 0; 3494 3495 ata_for_each_dev(dev, link, ENABLED) 3496 cnt++; 3497 return cnt; 3498 } 3499 3500 static int ata_link_nr_vacant(struct ata_link *link) 3501 { 3502 struct ata_device *dev; 3503 int cnt = 0; 3504 3505 ata_for_each_dev(dev, link, ALL) 3506 if (dev->class == ATA_DEV_UNKNOWN) 3507 cnt++; 3508 return cnt; 3509 } 3510 3511 static int ata_eh_skip_recovery(struct ata_link *link) 3512 { 3513 struct ata_port *ap = link->ap; 3514 struct ata_eh_context *ehc = &link->eh_context; 3515 struct ata_device *dev; 3516 3517 /* skip disabled links */ 3518 if (link->flags & ATA_LFLAG_DISABLED) 3519 return 1; 3520 3521 /* skip if explicitly requested */ 3522 if (ehc->i.flags & ATA_EHI_NO_RECOVERY) 3523 return 1; 3524 3525 /* thaw frozen port and recover failed devices */ 3526 if (ata_port_is_frozen(ap) || ata_link_nr_enabled(link)) 3527 return 0; 3528 3529 /* reset at least once if reset is requested */ 3530 if ((ehc->i.action & ATA_EH_RESET) && 3531 !(ehc->i.flags & ATA_EHI_DID_RESET)) 3532 return 0; 3533 3534 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 3535 ata_for_each_dev(dev, link, ALL) { 3536 if (dev->class == ATA_DEV_UNKNOWN && 3537 ehc->classes[dev->devno] != ATA_DEV_NONE) 3538 return 0; 3539 } 3540 3541 return 1; 3542 } 3543 3544 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3545 { 3546 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3547 u64 now = get_jiffies_64(); 3548 int *trials = void_arg; 3549 3550 if ((ent->eflags & ATA_EFLAG_OLD_ER) || 3551 (ent->timestamp < now - min(now, interval))) 3552 return -1; 3553 3554 (*trials)++; 3555 return 0; 3556 } 3557 3558 static int ata_eh_schedule_probe(struct ata_device *dev) 3559 { 3560 struct ata_eh_context *ehc = &dev->link->eh_context; 3561 struct ata_link *link = ata_dev_phys_link(dev); 3562 int trials = 0; 3563 3564 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3565 (ehc->did_probe_mask & (1 << dev->devno))) 3566 return 0; 3567 3568 ata_eh_detach_dev(dev); 3569 ata_dev_init(dev); 3570 ehc->did_probe_mask |= (1 << dev->devno); 3571 ehc->i.action |= ATA_EH_RESET; 3572 ehc->saved_xfer_mode[dev->devno] = 0; 3573 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3574 3575 /* the link maybe in a deep sleep, wake it up */ 3576 if (link->lpm_policy > ATA_LPM_MAX_POWER) { 3577 if (ata_is_host_link(link)) 3578 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, 3579 ATA_LPM_EMPTY); 3580 else 3581 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER, 3582 ATA_LPM_EMPTY); 3583 } 3584 3585 /* Record and count probe trials on the ering. The specific 3586 * error mask used is irrelevant. Because a successful device 3587 * detection clears the ering, this count accumulates only if 3588 * there are consecutive failed probes. 3589 * 3590 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3591 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3592 * forced to 1.5Gbps. 3593 * 3594 * This is to work around cases where failed link speed 3595 * negotiation results in device misdetection leading to 3596 * infinite DEVXCHG or PHRDY CHG events. 3597 */ 3598 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3599 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3600 3601 if (trials > ATA_EH_PROBE_TRIALS) 3602 sata_down_spd_limit(link, 1); 3603 3604 return 1; 3605 } 3606 3607 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3608 { 3609 struct ata_eh_context *ehc = &dev->link->eh_context; 3610 3611 /* -EAGAIN from EH routine indicates retry without prejudice. 3612 * The requester is responsible for ensuring forward progress. 3613 */ 3614 if (err != -EAGAIN) 3615 ehc->tries[dev->devno]--; 3616 3617 switch (err) { 3618 case -ENODEV: 3619 /* device missing or wrong IDENTIFY data, schedule probing */ 3620 ehc->i.probe_mask |= (1 << dev->devno); 3621 fallthrough; 3622 case -EINVAL: 3623 /* give it just one more chance */ 3624 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3625 fallthrough; 3626 case -EIO: 3627 if (ehc->tries[dev->devno] == 1) { 3628 /* This is the last chance, better to slow 3629 * down than lose it. 3630 */ 3631 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3632 if (dev->pio_mode > XFER_PIO_0) 3633 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3634 } 3635 } 3636 3637 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3638 /* disable device if it has used up all its chances */ 3639 ata_dev_disable(dev); 3640 3641 /* detach if offline */ 3642 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3643 ata_eh_detach_dev(dev); 3644 3645 /* schedule probe if necessary */ 3646 if (ata_eh_schedule_probe(dev)) { 3647 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3648 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3649 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3650 } 3651 3652 return 1; 3653 } else { 3654 ehc->i.action |= ATA_EH_RESET; 3655 return 0; 3656 } 3657 } 3658 3659 /** 3660 * ata_eh_recover - recover host port after error 3661 * @ap: host port to recover 3662 * @prereset: prereset method (can be NULL) 3663 * @softreset: softreset method (can be NULL) 3664 * @hardreset: hardreset method (can be NULL) 3665 * @postreset: postreset method (can be NULL) 3666 * @r_failed_link: out parameter for failed link 3667 * 3668 * This is the alpha and omega, eum and yang, heart and soul of 3669 * libata exception handling. On entry, actions required to 3670 * recover each link and hotplug requests are recorded in the 3671 * link's eh_context. This function executes all the operations 3672 * with appropriate retrials and fallbacks to resurrect failed 3673 * devices, detach goners and greet newcomers. 3674 * 3675 * LOCKING: 3676 * Kernel thread context (may sleep). 3677 * 3678 * RETURNS: 3679 * 0 on success, -errno on failure. 3680 */ 3681 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3682 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3683 ata_postreset_fn_t postreset, 3684 struct ata_link **r_failed_link) 3685 { 3686 struct ata_link *link; 3687 struct ata_device *dev; 3688 int rc, nr_fails; 3689 unsigned long flags, deadline; 3690 3691 /* prep for recovery */ 3692 ata_for_each_link(link, ap, EDGE) { 3693 struct ata_eh_context *ehc = &link->eh_context; 3694 3695 /* re-enable link? */ 3696 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3697 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3698 spin_lock_irqsave(ap->lock, flags); 3699 link->flags &= ~ATA_LFLAG_DISABLED; 3700 spin_unlock_irqrestore(ap->lock, flags); 3701 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3702 } 3703 3704 ata_for_each_dev(dev, link, ALL) { 3705 if (link->flags & ATA_LFLAG_NO_RETRY) 3706 ehc->tries[dev->devno] = 1; 3707 else 3708 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3709 3710 /* collect port action mask recorded in dev actions */ 3711 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3712 ~ATA_EH_PERDEV_MASK; 3713 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3714 3715 /* process hotplug request */ 3716 if (dev->flags & ATA_DFLAG_DETACH) 3717 ata_eh_detach_dev(dev); 3718 3719 /* schedule probe if necessary */ 3720 if (!ata_dev_enabled(dev)) 3721 ata_eh_schedule_probe(dev); 3722 } 3723 } 3724 3725 retry: 3726 rc = 0; 3727 3728 /* if UNLOADING, finish immediately */ 3729 if (ap->pflags & ATA_PFLAG_UNLOADING) 3730 goto out; 3731 3732 /* prep for EH */ 3733 ata_for_each_link(link, ap, EDGE) { 3734 struct ata_eh_context *ehc = &link->eh_context; 3735 3736 /* skip EH if possible. */ 3737 if (ata_eh_skip_recovery(link)) 3738 ehc->i.action = 0; 3739 3740 ata_for_each_dev(dev, link, ALL) 3741 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3742 } 3743 3744 /* reset */ 3745 ata_for_each_link(link, ap, EDGE) { 3746 struct ata_eh_context *ehc = &link->eh_context; 3747 3748 if (!(ehc->i.action & ATA_EH_RESET)) 3749 continue; 3750 3751 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3752 prereset, softreset, hardreset, postreset); 3753 if (rc) { 3754 ata_link_err(link, "reset failed, giving up\n"); 3755 goto out; 3756 } 3757 } 3758 3759 do { 3760 unsigned long now; 3761 3762 /* 3763 * clears ATA_EH_PARK in eh_info and resets 3764 * ap->park_req_pending 3765 */ 3766 ata_eh_pull_park_action(ap); 3767 3768 deadline = jiffies; 3769 ata_for_each_link(link, ap, EDGE) { 3770 ata_for_each_dev(dev, link, ALL) { 3771 struct ata_eh_context *ehc = &link->eh_context; 3772 unsigned long tmp; 3773 3774 if (dev->class != ATA_DEV_ATA && 3775 dev->class != ATA_DEV_ZAC) 3776 continue; 3777 if (!(ehc->i.dev_action[dev->devno] & 3778 ATA_EH_PARK)) 3779 continue; 3780 tmp = dev->unpark_deadline; 3781 if (time_before(deadline, tmp)) 3782 deadline = tmp; 3783 else if (time_before_eq(tmp, jiffies)) 3784 continue; 3785 if (ehc->unloaded_mask & (1 << dev->devno)) 3786 continue; 3787 3788 ata_eh_park_issue_cmd(dev, 1); 3789 } 3790 } 3791 3792 now = jiffies; 3793 if (time_before_eq(deadline, now)) 3794 break; 3795 3796 ata_eh_release(ap); 3797 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3798 deadline - now); 3799 ata_eh_acquire(ap); 3800 } while (deadline); 3801 ata_for_each_link(link, ap, EDGE) { 3802 ata_for_each_dev(dev, link, ALL) { 3803 if (!(link->eh_context.unloaded_mask & 3804 (1 << dev->devno))) 3805 continue; 3806 3807 ata_eh_park_issue_cmd(dev, 0); 3808 ata_eh_done(link, dev, ATA_EH_PARK); 3809 } 3810 } 3811 3812 /* the rest */ 3813 nr_fails = 0; 3814 ata_for_each_link(link, ap, PMP_FIRST) { 3815 struct ata_eh_context *ehc = &link->eh_context; 3816 3817 if (sata_pmp_attached(ap) && ata_is_host_link(link)) 3818 goto config_lpm; 3819 3820 /* revalidate existing devices and attach new ones */ 3821 rc = ata_eh_revalidate_and_attach(link, &dev); 3822 if (rc) 3823 goto rest_fail; 3824 3825 /* if PMP got attached, return, pmp EH will take care of it */ 3826 if (link->device->class == ATA_DEV_PMP) { 3827 ehc->i.action = 0; 3828 return 0; 3829 } 3830 3831 /* configure transfer mode if necessary */ 3832 if (ehc->i.flags & ATA_EHI_SETMODE) { 3833 rc = ata_set_mode(link, &dev); 3834 if (rc) 3835 goto rest_fail; 3836 ehc->i.flags &= ~ATA_EHI_SETMODE; 3837 } 3838 3839 /* If reset has been issued, clear UA to avoid 3840 * disrupting the current users of the device. 3841 */ 3842 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3843 ata_for_each_dev(dev, link, ALL) { 3844 if (dev->class != ATA_DEV_ATAPI) 3845 continue; 3846 rc = atapi_eh_clear_ua(dev); 3847 if (rc) 3848 goto rest_fail; 3849 if (zpodd_dev_enabled(dev)) 3850 zpodd_post_poweron(dev); 3851 } 3852 } 3853 3854 /* retry flush if necessary */ 3855 ata_for_each_dev(dev, link, ALL) { 3856 if (dev->class != ATA_DEV_ATA && 3857 dev->class != ATA_DEV_ZAC) 3858 continue; 3859 rc = ata_eh_maybe_retry_flush(dev); 3860 if (rc) 3861 goto rest_fail; 3862 } 3863 3864 config_lpm: 3865 /* configure link power saving */ 3866 if (link->lpm_policy != ap->target_lpm_policy) { 3867 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev); 3868 if (rc) 3869 goto rest_fail; 3870 } 3871 3872 /* this link is okay now */ 3873 ehc->i.flags = 0; 3874 continue; 3875 3876 rest_fail: 3877 nr_fails++; 3878 if (dev) 3879 ata_eh_handle_dev_fail(dev, rc); 3880 3881 if (ata_port_is_frozen(ap)) { 3882 /* PMP reset requires working host port. 3883 * Can't retry if it's frozen. 3884 */ 3885 if (sata_pmp_attached(ap)) 3886 goto out; 3887 break; 3888 } 3889 } 3890 3891 if (nr_fails) 3892 goto retry; 3893 3894 out: 3895 if (rc && r_failed_link) 3896 *r_failed_link = link; 3897 3898 return rc; 3899 } 3900 3901 /** 3902 * ata_eh_finish - finish up EH 3903 * @ap: host port to finish EH for 3904 * 3905 * Recovery is complete. Clean up EH states and retry or finish 3906 * failed qcs. 3907 * 3908 * LOCKING: 3909 * None. 3910 */ 3911 void ata_eh_finish(struct ata_port *ap) 3912 { 3913 struct ata_queued_cmd *qc; 3914 int tag; 3915 3916 /* retry or finish qcs */ 3917 ata_qc_for_each_raw(ap, qc, tag) { 3918 if (!(qc->flags & ATA_QCFLAG_EH)) 3919 continue; 3920 3921 if (qc->err_mask) { 3922 /* FIXME: Once EH migration is complete, 3923 * generate sense data in this function, 3924 * considering both err_mask and tf. 3925 */ 3926 if (qc->flags & ATA_QCFLAG_RETRY) { 3927 /* 3928 * Since qc->err_mask is set, ata_eh_qc_retry() 3929 * will not increment scmd->allowed, so upper 3930 * layer will only retry the command if it has 3931 * not already been retried too many times. 3932 */ 3933 ata_eh_qc_retry(qc); 3934 } else { 3935 ata_eh_qc_complete(qc); 3936 } 3937 } else { 3938 if (qc->flags & ATA_QCFLAG_SENSE_VALID || 3939 qc->flags & ATA_QCFLAG_EH_SUCCESS_CMD) { 3940 ata_eh_qc_complete(qc); 3941 } else { 3942 /* feed zero TF to sense generation */ 3943 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 3944 /* 3945 * Since qc->err_mask is not set, 3946 * ata_eh_qc_retry() will increment 3947 * scmd->allowed, so upper layer is guaranteed 3948 * to retry the command. 3949 */ 3950 ata_eh_qc_retry(qc); 3951 } 3952 } 3953 } 3954 3955 /* make sure nr_active_links is zero after EH */ 3956 WARN_ON(ap->nr_active_links); 3957 ap->nr_active_links = 0; 3958 } 3959 3960 /** 3961 * ata_do_eh - do standard error handling 3962 * @ap: host port to handle error for 3963 * 3964 * @prereset: prereset method (can be NULL) 3965 * @softreset: softreset method (can be NULL) 3966 * @hardreset: hardreset method (can be NULL) 3967 * @postreset: postreset method (can be NULL) 3968 * 3969 * Perform standard error handling sequence. 3970 * 3971 * LOCKING: 3972 * Kernel thread context (may sleep). 3973 */ 3974 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 3975 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3976 ata_postreset_fn_t postreset) 3977 { 3978 struct ata_device *dev; 3979 int rc; 3980 3981 ata_eh_autopsy(ap); 3982 ata_eh_report(ap); 3983 3984 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 3985 NULL); 3986 if (rc) { 3987 ata_for_each_dev(dev, &ap->link, ALL) 3988 ata_dev_disable(dev); 3989 } 3990 3991 ata_eh_finish(ap); 3992 } 3993 3994 /** 3995 * ata_std_error_handler - standard error handler 3996 * @ap: host port to handle error for 3997 * 3998 * Standard error handler 3999 * 4000 * LOCKING: 4001 * Kernel thread context (may sleep). 4002 */ 4003 void ata_std_error_handler(struct ata_port *ap) 4004 { 4005 struct ata_port_operations *ops = ap->ops; 4006 ata_reset_fn_t hardreset = ops->hardreset; 4007 4008 /* ignore built-in hardreset if SCR access is not available */ 4009 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link)) 4010 hardreset = NULL; 4011 4012 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 4013 } 4014 EXPORT_SYMBOL_GPL(ata_std_error_handler); 4015 4016 #ifdef CONFIG_PM 4017 /** 4018 * ata_eh_handle_port_suspend - perform port suspend operation 4019 * @ap: port to suspend 4020 * 4021 * Suspend @ap. 4022 * 4023 * LOCKING: 4024 * Kernel thread context (may sleep). 4025 */ 4026 static void ata_eh_handle_port_suspend(struct ata_port *ap) 4027 { 4028 unsigned long flags; 4029 int rc = 0; 4030 struct ata_device *dev; 4031 struct ata_link *link; 4032 4033 /* are we suspending? */ 4034 spin_lock_irqsave(ap->lock, flags); 4035 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4036 ap->pm_mesg.event & PM_EVENT_RESUME) { 4037 spin_unlock_irqrestore(ap->lock, flags); 4038 return; 4039 } 4040 spin_unlock_irqrestore(ap->lock, flags); 4041 4042 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 4043 4044 /* Set all devices attached to the port in standby mode */ 4045 ata_for_each_link(link, ap, HOST_FIRST) { 4046 ata_for_each_dev(dev, link, ENABLED) 4047 ata_dev_power_set_standby(dev); 4048 } 4049 4050 /* 4051 * If we have a ZPODD attached, check its zero 4052 * power ready status before the port is frozen. 4053 * Only needed for runtime suspend. 4054 */ 4055 if (PMSG_IS_AUTO(ap->pm_mesg)) { 4056 ata_for_each_dev(dev, &ap->link, ENABLED) { 4057 if (zpodd_dev_enabled(dev)) 4058 zpodd_on_suspend(dev); 4059 } 4060 } 4061 4062 /* suspend */ 4063 ata_eh_freeze_port(ap); 4064 4065 if (ap->ops->port_suspend) 4066 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 4067 4068 ata_acpi_set_state(ap, ap->pm_mesg); 4069 4070 /* update the flags */ 4071 spin_lock_irqsave(ap->lock, flags); 4072 4073 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 4074 if (rc == 0) 4075 ap->pflags |= ATA_PFLAG_SUSPENDED; 4076 else if (ata_port_is_frozen(ap)) 4077 ata_port_schedule_eh(ap); 4078 4079 spin_unlock_irqrestore(ap->lock, flags); 4080 4081 return; 4082 } 4083 4084 /** 4085 * ata_eh_handle_port_resume - perform port resume operation 4086 * @ap: port to resume 4087 * 4088 * Resume @ap. 4089 * 4090 * LOCKING: 4091 * Kernel thread context (may sleep). 4092 */ 4093 static void ata_eh_handle_port_resume(struct ata_port *ap) 4094 { 4095 struct ata_link *link; 4096 struct ata_device *dev; 4097 unsigned long flags; 4098 4099 /* are we resuming? */ 4100 spin_lock_irqsave(ap->lock, flags); 4101 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4102 !(ap->pm_mesg.event & PM_EVENT_RESUME)) { 4103 spin_unlock_irqrestore(ap->lock, flags); 4104 return; 4105 } 4106 spin_unlock_irqrestore(ap->lock, flags); 4107 4108 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 4109 4110 /* 4111 * Error timestamps are in jiffies which doesn't run while 4112 * suspended and PHY events during resume isn't too uncommon. 4113 * When the two are combined, it can lead to unnecessary speed 4114 * downs if the machine is suspended and resumed repeatedly. 4115 * Clear error history. 4116 */ 4117 ata_for_each_link(link, ap, HOST_FIRST) 4118 ata_for_each_dev(dev, link, ALL) 4119 ata_ering_clear(&dev->ering); 4120 4121 ata_acpi_set_state(ap, ap->pm_mesg); 4122 4123 if (ap->ops->port_resume) 4124 ap->ops->port_resume(ap); 4125 4126 /* tell ACPI that we're resuming */ 4127 ata_acpi_on_resume(ap); 4128 4129 /* update the flags */ 4130 spin_lock_irqsave(ap->lock, flags); 4131 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 4132 ap->pflags |= ATA_PFLAG_RESUMING; 4133 spin_unlock_irqrestore(ap->lock, flags); 4134 } 4135 #endif /* CONFIG_PM */ 4136