1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * libata-eh.c - libata error handling 4 * 5 * Maintained by: Tejun Heo <tj@kernel.org> 6 * Please ALWAYS copy linux-ide@vger.kernel.org 7 * on emails. 8 * 9 * Copyright 2006 Tejun Heo <htejun@gmail.com> 10 * 11 * libata documentation is available via 'make {ps|pdf}docs', 12 * as Documentation/driver-api/libata.rst 13 * 14 * Hardware documentation available from http://www.t13.org/ and 15 * http://www.sata-io.org/ 16 */ 17 18 #include <linux/kernel.h> 19 #include <linux/blkdev.h> 20 #include <linux/export.h> 21 #include <linux/pci.h> 22 #include <scsi/scsi.h> 23 #include <scsi/scsi_host.h> 24 #include <scsi/scsi_eh.h> 25 #include <scsi/scsi_device.h> 26 #include <scsi/scsi_cmnd.h> 27 #include <scsi/scsi_dbg.h> 28 #include "../scsi/scsi_transport_api.h" 29 30 #include <linux/libata.h> 31 32 #include <trace/events/libata.h> 33 #include "libata.h" 34 35 enum { 36 /* speed down verdicts */ 37 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 38 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 39 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 40 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 41 42 /* error flags */ 43 ATA_EFLAG_IS_IO = (1 << 0), 44 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 45 ATA_EFLAG_OLD_ER = (1 << 31), 46 47 /* error categories */ 48 ATA_ECAT_NONE = 0, 49 ATA_ECAT_ATA_BUS = 1, 50 ATA_ECAT_TOUT_HSM = 2, 51 ATA_ECAT_UNK_DEV = 3, 52 ATA_ECAT_DUBIOUS_NONE = 4, 53 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 54 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 55 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 56 ATA_ECAT_NR = 8, 57 58 ATA_EH_CMD_DFL_TIMEOUT = 5000, 59 60 /* always put at least this amount of time between resets */ 61 ATA_EH_RESET_COOL_DOWN = 5000, 62 63 /* Waiting in ->prereset can never be reliable. It's 64 * sometimes nice to wait there but it can't be depended upon; 65 * otherwise, we wouldn't be resetting. Just give it enough 66 * time for most drives to spin up. 67 */ 68 ATA_EH_PRERESET_TIMEOUT = 10000, 69 ATA_EH_FASTDRAIN_INTERVAL = 3000, 70 71 ATA_EH_UA_TRIES = 5, 72 73 /* probe speed down parameters, see ata_eh_schedule_probe() */ 74 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 75 ATA_EH_PROBE_TRIALS = 2, 76 }; 77 78 /* The following table determines how we sequence resets. Each entry 79 * represents timeout for that try. The first try can be soft or 80 * hardreset. All others are hardreset if available. In most cases 81 * the first reset w/ 10sec timeout should succeed. Following entries 82 * are mostly for error handling, hotplug and those outlier devices that 83 * take an exceptionally long time to recover from reset. 84 */ 85 static const unsigned long ata_eh_reset_timeouts[] = { 86 10000, /* most drives spin up by 10sec */ 87 10000, /* > 99% working drives spin up before 20sec */ 88 35000, /* give > 30 secs of idleness for outlier devices */ 89 5000, /* and sweet one last chance */ 90 ULONG_MAX, /* > 1 min has elapsed, give up */ 91 }; 92 93 static const unsigned long ata_eh_identify_timeouts[] = { 94 5000, /* covers > 99% of successes and not too boring on failures */ 95 10000, /* combined time till here is enough even for media access */ 96 30000, /* for true idiots */ 97 ULONG_MAX, 98 }; 99 100 static const unsigned long ata_eh_flush_timeouts[] = { 101 15000, /* be generous with flush */ 102 15000, /* ditto */ 103 30000, /* and even more generous */ 104 ULONG_MAX, 105 }; 106 107 static const unsigned long ata_eh_other_timeouts[] = { 108 5000, /* same rationale as identify timeout */ 109 10000, /* ditto */ 110 /* but no merciful 30sec for other commands, it just isn't worth it */ 111 ULONG_MAX, 112 }; 113 114 struct ata_eh_cmd_timeout_ent { 115 const u8 *commands; 116 const unsigned long *timeouts; 117 }; 118 119 /* The following table determines timeouts to use for EH internal 120 * commands. Each table entry is a command class and matches the 121 * commands the entry applies to and the timeout table to use. 122 * 123 * On the retry after a command timed out, the next timeout value from 124 * the table is used. If the table doesn't contain further entries, 125 * the last value is used. 126 * 127 * ehc->cmd_timeout_idx keeps track of which timeout to use per 128 * command class, so if SET_FEATURES times out on the first try, the 129 * next try will use the second timeout value only for that class. 130 */ 131 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 132 static const struct ata_eh_cmd_timeout_ent 133 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 134 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 135 .timeouts = ata_eh_identify_timeouts, }, 136 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 137 .timeouts = ata_eh_other_timeouts, }, 138 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 139 .timeouts = ata_eh_other_timeouts, }, 140 { .commands = CMDS(ATA_CMD_SET_FEATURES), 141 .timeouts = ata_eh_other_timeouts, }, 142 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 143 .timeouts = ata_eh_other_timeouts, }, 144 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), 145 .timeouts = ata_eh_flush_timeouts }, 146 }; 147 #undef CMDS 148 149 static void __ata_port_freeze(struct ata_port *ap); 150 #ifdef CONFIG_PM 151 static void ata_eh_handle_port_suspend(struct ata_port *ap); 152 static void ata_eh_handle_port_resume(struct ata_port *ap); 153 #else /* CONFIG_PM */ 154 static void ata_eh_handle_port_suspend(struct ata_port *ap) 155 { } 156 157 static void ata_eh_handle_port_resume(struct ata_port *ap) 158 { } 159 #endif /* CONFIG_PM */ 160 161 static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, 162 const char *fmt, va_list args) 163 { 164 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 165 ATA_EH_DESC_LEN - ehi->desc_len, 166 fmt, args); 167 } 168 169 /** 170 * __ata_ehi_push_desc - push error description without adding separator 171 * @ehi: target EHI 172 * @fmt: printf format string 173 * 174 * Format string according to @fmt and append it to @ehi->desc. 175 * 176 * LOCKING: 177 * spin_lock_irqsave(host lock) 178 */ 179 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 180 { 181 va_list args; 182 183 va_start(args, fmt); 184 __ata_ehi_pushv_desc(ehi, fmt, args); 185 va_end(args); 186 } 187 188 /** 189 * ata_ehi_push_desc - push error description with separator 190 * @ehi: target EHI 191 * @fmt: printf format string 192 * 193 * Format string according to @fmt and append it to @ehi->desc. 194 * If @ehi->desc is not empty, ", " is added in-between. 195 * 196 * LOCKING: 197 * spin_lock_irqsave(host lock) 198 */ 199 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 200 { 201 va_list args; 202 203 if (ehi->desc_len) 204 __ata_ehi_push_desc(ehi, ", "); 205 206 va_start(args, fmt); 207 __ata_ehi_pushv_desc(ehi, fmt, args); 208 va_end(args); 209 } 210 211 /** 212 * ata_ehi_clear_desc - clean error description 213 * @ehi: target EHI 214 * 215 * Clear @ehi->desc. 216 * 217 * LOCKING: 218 * spin_lock_irqsave(host lock) 219 */ 220 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 221 { 222 ehi->desc[0] = '\0'; 223 ehi->desc_len = 0; 224 } 225 226 /** 227 * ata_port_desc - append port description 228 * @ap: target ATA port 229 * @fmt: printf format string 230 * 231 * Format string according to @fmt and append it to port 232 * description. If port description is not empty, " " is added 233 * in-between. This function is to be used while initializing 234 * ata_host. The description is printed on host registration. 235 * 236 * LOCKING: 237 * None. 238 */ 239 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 240 { 241 va_list args; 242 243 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 244 245 if (ap->link.eh_info.desc_len) 246 __ata_ehi_push_desc(&ap->link.eh_info, " "); 247 248 va_start(args, fmt); 249 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 250 va_end(args); 251 } 252 253 #ifdef CONFIG_PCI 254 255 /** 256 * ata_port_pbar_desc - append PCI BAR description 257 * @ap: target ATA port 258 * @bar: target PCI BAR 259 * @offset: offset into PCI BAR 260 * @name: name of the area 261 * 262 * If @offset is negative, this function formats a string which 263 * contains the name, address, size and type of the BAR and 264 * appends it to the port description. If @offset is zero or 265 * positive, only name and offsetted address is appended. 266 * 267 * LOCKING: 268 * None. 269 */ 270 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 271 const char *name) 272 { 273 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 274 char *type = ""; 275 unsigned long long start, len; 276 277 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 278 type = "m"; 279 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 280 type = "i"; 281 282 start = (unsigned long long)pci_resource_start(pdev, bar); 283 len = (unsigned long long)pci_resource_len(pdev, bar); 284 285 if (offset < 0) 286 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 287 else 288 ata_port_desc(ap, "%s 0x%llx", name, 289 start + (unsigned long long)offset); 290 } 291 292 #endif /* CONFIG_PCI */ 293 294 static int ata_lookup_timeout_table(u8 cmd) 295 { 296 int i; 297 298 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 299 const u8 *cur; 300 301 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 302 if (*cur == cmd) 303 return i; 304 } 305 306 return -1; 307 } 308 309 /** 310 * ata_internal_cmd_timeout - determine timeout for an internal command 311 * @dev: target device 312 * @cmd: internal command to be issued 313 * 314 * Determine timeout for internal command @cmd for @dev. 315 * 316 * LOCKING: 317 * EH context. 318 * 319 * RETURNS: 320 * Determined timeout. 321 */ 322 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 323 { 324 struct ata_eh_context *ehc = &dev->link->eh_context; 325 int ent = ata_lookup_timeout_table(cmd); 326 int idx; 327 328 if (ent < 0) 329 return ATA_EH_CMD_DFL_TIMEOUT; 330 331 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 332 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 333 } 334 335 /** 336 * ata_internal_cmd_timed_out - notification for internal command timeout 337 * @dev: target device 338 * @cmd: internal command which timed out 339 * 340 * Notify EH that internal command @cmd for @dev timed out. This 341 * function should be called only for commands whose timeouts are 342 * determined using ata_internal_cmd_timeout(). 343 * 344 * LOCKING: 345 * EH context. 346 */ 347 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 348 { 349 struct ata_eh_context *ehc = &dev->link->eh_context; 350 int ent = ata_lookup_timeout_table(cmd); 351 int idx; 352 353 if (ent < 0) 354 return; 355 356 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 357 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 358 ehc->cmd_timeout_idx[dev->devno][ent]++; 359 } 360 361 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 362 unsigned int err_mask) 363 { 364 struct ata_ering_entry *ent; 365 366 WARN_ON(!err_mask); 367 368 ering->cursor++; 369 ering->cursor %= ATA_ERING_SIZE; 370 371 ent = &ering->ring[ering->cursor]; 372 ent->eflags = eflags; 373 ent->err_mask = err_mask; 374 ent->timestamp = get_jiffies_64(); 375 } 376 377 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 378 { 379 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 380 381 if (ent->err_mask) 382 return ent; 383 return NULL; 384 } 385 386 int ata_ering_map(struct ata_ering *ering, 387 int (*map_fn)(struct ata_ering_entry *, void *), 388 void *arg) 389 { 390 int idx, rc = 0; 391 struct ata_ering_entry *ent; 392 393 idx = ering->cursor; 394 do { 395 ent = &ering->ring[idx]; 396 if (!ent->err_mask) 397 break; 398 rc = map_fn(ent, arg); 399 if (rc) 400 break; 401 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 402 } while (idx != ering->cursor); 403 404 return rc; 405 } 406 407 static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) 408 { 409 ent->eflags |= ATA_EFLAG_OLD_ER; 410 return 0; 411 } 412 413 static void ata_ering_clear(struct ata_ering *ering) 414 { 415 ata_ering_map(ering, ata_ering_clear_cb, NULL); 416 } 417 418 static unsigned int ata_eh_dev_action(struct ata_device *dev) 419 { 420 struct ata_eh_context *ehc = &dev->link->eh_context; 421 422 return ehc->i.action | ehc->i.dev_action[dev->devno]; 423 } 424 425 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 426 struct ata_eh_info *ehi, unsigned int action) 427 { 428 struct ata_device *tdev; 429 430 if (!dev) { 431 ehi->action &= ~action; 432 ata_for_each_dev(tdev, link, ALL) 433 ehi->dev_action[tdev->devno] &= ~action; 434 } else { 435 /* doesn't make sense for port-wide EH actions */ 436 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 437 438 /* break ehi->action into ehi->dev_action */ 439 if (ehi->action & action) { 440 ata_for_each_dev(tdev, link, ALL) 441 ehi->dev_action[tdev->devno] |= 442 ehi->action & action; 443 ehi->action &= ~action; 444 } 445 446 /* turn off the specified per-dev action */ 447 ehi->dev_action[dev->devno] &= ~action; 448 } 449 } 450 451 /** 452 * ata_eh_acquire - acquire EH ownership 453 * @ap: ATA port to acquire EH ownership for 454 * 455 * Acquire EH ownership for @ap. This is the basic exclusion 456 * mechanism for ports sharing a host. Only one port hanging off 457 * the same host can claim the ownership of EH. 458 * 459 * LOCKING: 460 * EH context. 461 */ 462 void ata_eh_acquire(struct ata_port *ap) 463 { 464 mutex_lock(&ap->host->eh_mutex); 465 WARN_ON_ONCE(ap->host->eh_owner); 466 ap->host->eh_owner = current; 467 } 468 469 /** 470 * ata_eh_release - release EH ownership 471 * @ap: ATA port to release EH ownership for 472 * 473 * Release EH ownership for @ap if the caller. The caller must 474 * have acquired EH ownership using ata_eh_acquire() previously. 475 * 476 * LOCKING: 477 * EH context. 478 */ 479 void ata_eh_release(struct ata_port *ap) 480 { 481 WARN_ON_ONCE(ap->host->eh_owner != current); 482 ap->host->eh_owner = NULL; 483 mutex_unlock(&ap->host->eh_mutex); 484 } 485 486 static void ata_eh_unload(struct ata_port *ap) 487 { 488 struct ata_link *link; 489 struct ata_device *dev; 490 unsigned long flags; 491 492 /* Restore SControl IPM and SPD for the next driver and 493 * disable attached devices. 494 */ 495 ata_for_each_link(link, ap, PMP_FIRST) { 496 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 497 ata_for_each_dev(dev, link, ALL) 498 ata_dev_disable(dev); 499 } 500 501 /* freeze and set UNLOADED */ 502 spin_lock_irqsave(ap->lock, flags); 503 504 ata_port_freeze(ap); /* won't be thawed */ 505 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 506 ap->pflags |= ATA_PFLAG_UNLOADED; 507 508 spin_unlock_irqrestore(ap->lock, flags); 509 } 510 511 /** 512 * ata_scsi_error - SCSI layer error handler callback 513 * @host: SCSI host on which error occurred 514 * 515 * Handles SCSI-layer-thrown error events. 516 * 517 * LOCKING: 518 * Inherited from SCSI layer (none, can sleep) 519 * 520 * RETURNS: 521 * Zero. 522 */ 523 void ata_scsi_error(struct Scsi_Host *host) 524 { 525 struct ata_port *ap = ata_shost_to_port(host); 526 unsigned long flags; 527 LIST_HEAD(eh_work_q); 528 529 DPRINTK("ENTER\n"); 530 531 spin_lock_irqsave(host->host_lock, flags); 532 list_splice_init(&host->eh_cmd_q, &eh_work_q); 533 spin_unlock_irqrestore(host->host_lock, flags); 534 535 ata_scsi_cmd_error_handler(host, ap, &eh_work_q); 536 537 /* If we timed raced normal completion and there is nothing to 538 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 539 ata_scsi_port_error_handler(host, ap); 540 541 /* finish or retry handled scmd's and clean up */ 542 WARN_ON(!list_empty(&eh_work_q)); 543 544 DPRINTK("EXIT\n"); 545 } 546 547 /** 548 * ata_scsi_cmd_error_handler - error callback for a list of commands 549 * @host: scsi host containing the port 550 * @ap: ATA port within the host 551 * @eh_work_q: list of commands to process 552 * 553 * process the given list of commands and return those finished to the 554 * ap->eh_done_q. This function is the first part of the libata error 555 * handler which processes a given list of failed commands. 556 */ 557 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, 558 struct list_head *eh_work_q) 559 { 560 int i; 561 unsigned long flags; 562 563 /* make sure sff pio task is not running */ 564 ata_sff_flush_pio_task(ap); 565 566 /* synchronize with host lock and sort out timeouts */ 567 568 /* For new EH, all qcs are finished in one of three ways - 569 * normal completion, error completion, and SCSI timeout. 570 * Both completions can race against SCSI timeout. When normal 571 * completion wins, the qc never reaches EH. When error 572 * completion wins, the qc has ATA_QCFLAG_FAILED set. 573 * 574 * When SCSI timeout wins, things are a bit more complex. 575 * Normal or error completion can occur after the timeout but 576 * before this point. In such cases, both types of 577 * completions are honored. A scmd is determined to have 578 * timed out iff its associated qc is active and not failed. 579 */ 580 spin_lock_irqsave(ap->lock, flags); 581 if (ap->ops->error_handler) { 582 struct scsi_cmnd *scmd, *tmp; 583 int nr_timedout = 0; 584 585 /* This must occur under the ap->lock as we don't want 586 a polled recovery to race the real interrupt handler 587 588 The lost_interrupt handler checks for any completed but 589 non-notified command and completes much like an IRQ handler. 590 591 We then fall into the error recovery code which will treat 592 this as if normal completion won the race */ 593 594 if (ap->ops->lost_interrupt) 595 ap->ops->lost_interrupt(ap); 596 597 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { 598 struct ata_queued_cmd *qc; 599 600 ata_qc_for_each_raw(ap, qc, i) { 601 if (qc->flags & ATA_QCFLAG_ACTIVE && 602 qc->scsicmd == scmd) 603 break; 604 } 605 606 if (i < ATA_MAX_QUEUE) { 607 /* the scmd has an associated qc */ 608 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 609 /* which hasn't failed yet, timeout */ 610 qc->err_mask |= AC_ERR_TIMEOUT; 611 qc->flags |= ATA_QCFLAG_FAILED; 612 nr_timedout++; 613 } 614 } else { 615 /* Normal completion occurred after 616 * SCSI timeout but before this point. 617 * Successfully complete it. 618 */ 619 scmd->retries = scmd->allowed; 620 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 621 } 622 } 623 624 /* If we have timed out qcs. They belong to EH from 625 * this point but the state of the controller is 626 * unknown. Freeze the port to make sure the IRQ 627 * handler doesn't diddle with those qcs. This must 628 * be done atomically w.r.t. setting QCFLAG_FAILED. 629 */ 630 if (nr_timedout) 631 __ata_port_freeze(ap); 632 633 634 /* initialize eh_tries */ 635 ap->eh_tries = ATA_EH_MAX_TRIES; 636 } 637 spin_unlock_irqrestore(ap->lock, flags); 638 639 } 640 EXPORT_SYMBOL(ata_scsi_cmd_error_handler); 641 642 /** 643 * ata_scsi_port_error_handler - recover the port after the commands 644 * @host: SCSI host containing the port 645 * @ap: the ATA port 646 * 647 * Handle the recovery of the port @ap after all the commands 648 * have been recovered. 649 */ 650 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) 651 { 652 unsigned long flags; 653 654 /* invoke error handler */ 655 if (ap->ops->error_handler) { 656 struct ata_link *link; 657 658 /* acquire EH ownership */ 659 ata_eh_acquire(ap); 660 repeat: 661 /* kill fast drain timer */ 662 del_timer_sync(&ap->fastdrain_timer); 663 664 /* process port resume request */ 665 ata_eh_handle_port_resume(ap); 666 667 /* fetch & clear EH info */ 668 spin_lock_irqsave(ap->lock, flags); 669 670 ata_for_each_link(link, ap, HOST_FIRST) { 671 struct ata_eh_context *ehc = &link->eh_context; 672 struct ata_device *dev; 673 674 memset(&link->eh_context, 0, sizeof(link->eh_context)); 675 link->eh_context.i = link->eh_info; 676 memset(&link->eh_info, 0, sizeof(link->eh_info)); 677 678 ata_for_each_dev(dev, link, ENABLED) { 679 int devno = dev->devno; 680 681 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 682 if (ata_ncq_enabled(dev)) 683 ehc->saved_ncq_enabled |= 1 << devno; 684 } 685 } 686 687 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 688 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 689 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 690 691 spin_unlock_irqrestore(ap->lock, flags); 692 693 /* invoke EH, skip if unloading or suspended */ 694 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 695 ap->ops->error_handler(ap); 696 else { 697 /* if unloading, commence suicide */ 698 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 699 !(ap->pflags & ATA_PFLAG_UNLOADED)) 700 ata_eh_unload(ap); 701 ata_eh_finish(ap); 702 } 703 704 /* process port suspend request */ 705 ata_eh_handle_port_suspend(ap); 706 707 /* Exception might have happened after ->error_handler 708 * recovered the port but before this point. Repeat 709 * EH in such case. 710 */ 711 spin_lock_irqsave(ap->lock, flags); 712 713 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 714 if (--ap->eh_tries) { 715 spin_unlock_irqrestore(ap->lock, flags); 716 goto repeat; 717 } 718 ata_port_err(ap, 719 "EH pending after %d tries, giving up\n", 720 ATA_EH_MAX_TRIES); 721 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 722 } 723 724 /* this run is complete, make sure EH info is clear */ 725 ata_for_each_link(link, ap, HOST_FIRST) 726 memset(&link->eh_info, 0, sizeof(link->eh_info)); 727 728 /* end eh (clear host_eh_scheduled) while holding 729 * ap->lock such that if exception occurs after this 730 * point but before EH completion, SCSI midlayer will 731 * re-initiate EH. 732 */ 733 ap->ops->end_eh(ap); 734 735 spin_unlock_irqrestore(ap->lock, flags); 736 ata_eh_release(ap); 737 } else { 738 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 739 ap->ops->eng_timeout(ap); 740 } 741 742 scsi_eh_flush_done_q(&ap->eh_done_q); 743 744 /* clean up */ 745 spin_lock_irqsave(ap->lock, flags); 746 747 if (ap->pflags & ATA_PFLAG_LOADING) 748 ap->pflags &= ~ATA_PFLAG_LOADING; 749 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && 750 !(ap->flags & ATA_FLAG_SAS_HOST)) 751 schedule_delayed_work(&ap->hotplug_task, 0); 752 753 if (ap->pflags & ATA_PFLAG_RECOVERED) 754 ata_port_info(ap, "EH complete\n"); 755 756 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 757 758 /* tell wait_eh that we're done */ 759 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 760 wake_up_all(&ap->eh_wait_q); 761 762 spin_unlock_irqrestore(ap->lock, flags); 763 } 764 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler); 765 766 /** 767 * ata_port_wait_eh - Wait for the currently pending EH to complete 768 * @ap: Port to wait EH for 769 * 770 * Wait until the currently pending EH is complete. 771 * 772 * LOCKING: 773 * Kernel thread context (may sleep). 774 */ 775 void ata_port_wait_eh(struct ata_port *ap) 776 { 777 unsigned long flags; 778 DEFINE_WAIT(wait); 779 780 retry: 781 spin_lock_irqsave(ap->lock, flags); 782 783 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 784 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 785 spin_unlock_irqrestore(ap->lock, flags); 786 schedule(); 787 spin_lock_irqsave(ap->lock, flags); 788 } 789 finish_wait(&ap->eh_wait_q, &wait); 790 791 spin_unlock_irqrestore(ap->lock, flags); 792 793 /* make sure SCSI EH is complete */ 794 if (scsi_host_in_recovery(ap->scsi_host)) { 795 ata_msleep(ap, 10); 796 goto retry; 797 } 798 } 799 EXPORT_SYMBOL_GPL(ata_port_wait_eh); 800 801 static int ata_eh_nr_in_flight(struct ata_port *ap) 802 { 803 struct ata_queued_cmd *qc; 804 unsigned int tag; 805 int nr = 0; 806 807 /* count only non-internal commands */ 808 ata_qc_for_each(ap, qc, tag) { 809 if (qc) 810 nr++; 811 } 812 813 return nr; 814 } 815 816 void ata_eh_fastdrain_timerfn(struct timer_list *t) 817 { 818 struct ata_port *ap = from_timer(ap, t, fastdrain_timer); 819 unsigned long flags; 820 int cnt; 821 822 spin_lock_irqsave(ap->lock, flags); 823 824 cnt = ata_eh_nr_in_flight(ap); 825 826 /* are we done? */ 827 if (!cnt) 828 goto out_unlock; 829 830 if (cnt == ap->fastdrain_cnt) { 831 struct ata_queued_cmd *qc; 832 unsigned int tag; 833 834 /* No progress during the last interval, tag all 835 * in-flight qcs as timed out and freeze the port. 836 */ 837 ata_qc_for_each(ap, qc, tag) { 838 if (qc) 839 qc->err_mask |= AC_ERR_TIMEOUT; 840 } 841 842 ata_port_freeze(ap); 843 } else { 844 /* some qcs have finished, give it another chance */ 845 ap->fastdrain_cnt = cnt; 846 ap->fastdrain_timer.expires = 847 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 848 add_timer(&ap->fastdrain_timer); 849 } 850 851 out_unlock: 852 spin_unlock_irqrestore(ap->lock, flags); 853 } 854 855 /** 856 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 857 * @ap: target ATA port 858 * @fastdrain: activate fast drain 859 * 860 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 861 * is non-zero and EH wasn't pending before. Fast drain ensures 862 * that EH kicks in in timely manner. 863 * 864 * LOCKING: 865 * spin_lock_irqsave(host lock) 866 */ 867 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 868 { 869 int cnt; 870 871 /* already scheduled? */ 872 if (ap->pflags & ATA_PFLAG_EH_PENDING) 873 return; 874 875 ap->pflags |= ATA_PFLAG_EH_PENDING; 876 877 if (!fastdrain) 878 return; 879 880 /* do we have in-flight qcs? */ 881 cnt = ata_eh_nr_in_flight(ap); 882 if (!cnt) 883 return; 884 885 /* activate fast drain */ 886 ap->fastdrain_cnt = cnt; 887 ap->fastdrain_timer.expires = 888 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 889 add_timer(&ap->fastdrain_timer); 890 } 891 892 /** 893 * ata_qc_schedule_eh - schedule qc for error handling 894 * @qc: command to schedule error handling for 895 * 896 * Schedule error handling for @qc. EH will kick in as soon as 897 * other commands are drained. 898 * 899 * LOCKING: 900 * spin_lock_irqsave(host lock) 901 */ 902 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 903 { 904 struct ata_port *ap = qc->ap; 905 906 WARN_ON(!ap->ops->error_handler); 907 908 qc->flags |= ATA_QCFLAG_FAILED; 909 ata_eh_set_pending(ap, 1); 910 911 /* The following will fail if timeout has already expired. 912 * ata_scsi_error() takes care of such scmds on EH entry. 913 * Note that ATA_QCFLAG_FAILED is unconditionally set after 914 * this function completes. 915 */ 916 blk_abort_request(qc->scsicmd->request); 917 } 918 919 /** 920 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine 921 * @ap: ATA port to schedule EH for 922 * 923 * LOCKING: inherited from ata_port_schedule_eh 924 * spin_lock_irqsave(host lock) 925 */ 926 void ata_std_sched_eh(struct ata_port *ap) 927 { 928 WARN_ON(!ap->ops->error_handler); 929 930 if (ap->pflags & ATA_PFLAG_INITIALIZING) 931 return; 932 933 ata_eh_set_pending(ap, 1); 934 scsi_schedule_eh(ap->scsi_host); 935 936 DPRINTK("port EH scheduled\n"); 937 } 938 EXPORT_SYMBOL_GPL(ata_std_sched_eh); 939 940 /** 941 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine 942 * @ap: ATA port to end EH for 943 * 944 * In the libata object model there is a 1:1 mapping of ata_port to 945 * shost, so host fields can be directly manipulated under ap->lock, in 946 * the libsas case we need to hold a lock at the ha->level to coordinate 947 * these events. 948 * 949 * LOCKING: 950 * spin_lock_irqsave(host lock) 951 */ 952 void ata_std_end_eh(struct ata_port *ap) 953 { 954 struct Scsi_Host *host = ap->scsi_host; 955 956 host->host_eh_scheduled = 0; 957 } 958 EXPORT_SYMBOL(ata_std_end_eh); 959 960 961 /** 962 * ata_port_schedule_eh - schedule error handling without a qc 963 * @ap: ATA port to schedule EH for 964 * 965 * Schedule error handling for @ap. EH will kick in as soon as 966 * all commands are drained. 967 * 968 * LOCKING: 969 * spin_lock_irqsave(host lock) 970 */ 971 void ata_port_schedule_eh(struct ata_port *ap) 972 { 973 /* see: ata_std_sched_eh, unless you know better */ 974 ap->ops->sched_eh(ap); 975 } 976 977 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 978 { 979 struct ata_queued_cmd *qc; 980 int tag, nr_aborted = 0; 981 982 WARN_ON(!ap->ops->error_handler); 983 984 /* we're gonna abort all commands, no need for fast drain */ 985 ata_eh_set_pending(ap, 0); 986 987 /* include internal tag in iteration */ 988 ata_qc_for_each_with_internal(ap, qc, tag) { 989 if (qc && (!link || qc->dev->link == link)) { 990 qc->flags |= ATA_QCFLAG_FAILED; 991 ata_qc_complete(qc); 992 nr_aborted++; 993 } 994 } 995 996 if (!nr_aborted) 997 ata_port_schedule_eh(ap); 998 999 return nr_aborted; 1000 } 1001 1002 /** 1003 * ata_link_abort - abort all qc's on the link 1004 * @link: ATA link to abort qc's for 1005 * 1006 * Abort all active qc's active on @link and schedule EH. 1007 * 1008 * LOCKING: 1009 * spin_lock_irqsave(host lock) 1010 * 1011 * RETURNS: 1012 * Number of aborted qc's. 1013 */ 1014 int ata_link_abort(struct ata_link *link) 1015 { 1016 return ata_do_link_abort(link->ap, link); 1017 } 1018 1019 /** 1020 * ata_port_abort - abort all qc's on the port 1021 * @ap: ATA port to abort qc's for 1022 * 1023 * Abort all active qc's of @ap and schedule EH. 1024 * 1025 * LOCKING: 1026 * spin_lock_irqsave(host_set lock) 1027 * 1028 * RETURNS: 1029 * Number of aborted qc's. 1030 */ 1031 int ata_port_abort(struct ata_port *ap) 1032 { 1033 return ata_do_link_abort(ap, NULL); 1034 } 1035 1036 /** 1037 * __ata_port_freeze - freeze port 1038 * @ap: ATA port to freeze 1039 * 1040 * This function is called when HSM violation or some other 1041 * condition disrupts normal operation of the port. Frozen port 1042 * is not allowed to perform any operation until the port is 1043 * thawed, which usually follows a successful reset. 1044 * 1045 * ap->ops->freeze() callback can be used for freezing the port 1046 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 1047 * port cannot be frozen hardware-wise, the interrupt handler 1048 * must ack and clear interrupts unconditionally while the port 1049 * is frozen. 1050 * 1051 * LOCKING: 1052 * spin_lock_irqsave(host lock) 1053 */ 1054 static void __ata_port_freeze(struct ata_port *ap) 1055 { 1056 WARN_ON(!ap->ops->error_handler); 1057 1058 if (ap->ops->freeze) 1059 ap->ops->freeze(ap); 1060 1061 ap->pflags |= ATA_PFLAG_FROZEN; 1062 1063 DPRINTK("ata%u port frozen\n", ap->print_id); 1064 } 1065 1066 /** 1067 * ata_port_freeze - abort & freeze port 1068 * @ap: ATA port to freeze 1069 * 1070 * Abort and freeze @ap. The freeze operation must be called 1071 * first, because some hardware requires special operations 1072 * before the taskfile registers are accessible. 1073 * 1074 * LOCKING: 1075 * spin_lock_irqsave(host lock) 1076 * 1077 * RETURNS: 1078 * Number of aborted commands. 1079 */ 1080 int ata_port_freeze(struct ata_port *ap) 1081 { 1082 int nr_aborted; 1083 1084 WARN_ON(!ap->ops->error_handler); 1085 1086 __ata_port_freeze(ap); 1087 nr_aborted = ata_port_abort(ap); 1088 1089 return nr_aborted; 1090 } 1091 1092 /** 1093 * sata_async_notification - SATA async notification handler 1094 * @ap: ATA port where async notification is received 1095 * 1096 * Handler to be called when async notification via SDB FIS is 1097 * received. This function schedules EH if necessary. 1098 * 1099 * LOCKING: 1100 * spin_lock_irqsave(host lock) 1101 * 1102 * RETURNS: 1103 * 1 if EH is scheduled, 0 otherwise. 1104 */ 1105 int sata_async_notification(struct ata_port *ap) 1106 { 1107 u32 sntf; 1108 int rc; 1109 1110 if (!(ap->flags & ATA_FLAG_AN)) 1111 return 0; 1112 1113 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 1114 if (rc == 0) 1115 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 1116 1117 if (!sata_pmp_attached(ap) || rc) { 1118 /* PMP is not attached or SNTF is not available */ 1119 if (!sata_pmp_attached(ap)) { 1120 /* PMP is not attached. Check whether ATAPI 1121 * AN is configured. If so, notify media 1122 * change. 1123 */ 1124 struct ata_device *dev = ap->link.device; 1125 1126 if ((dev->class == ATA_DEV_ATAPI) && 1127 (dev->flags & ATA_DFLAG_AN)) 1128 ata_scsi_media_change_notify(dev); 1129 return 0; 1130 } else { 1131 /* PMP is attached but SNTF is not available. 1132 * ATAPI async media change notification is 1133 * not used. The PMP must be reporting PHY 1134 * status change, schedule EH. 1135 */ 1136 ata_port_schedule_eh(ap); 1137 return 1; 1138 } 1139 } else { 1140 /* PMP is attached and SNTF is available */ 1141 struct ata_link *link; 1142 1143 /* check and notify ATAPI AN */ 1144 ata_for_each_link(link, ap, EDGE) { 1145 if (!(sntf & (1 << link->pmp))) 1146 continue; 1147 1148 if ((link->device->class == ATA_DEV_ATAPI) && 1149 (link->device->flags & ATA_DFLAG_AN)) 1150 ata_scsi_media_change_notify(link->device); 1151 } 1152 1153 /* If PMP is reporting that PHY status of some 1154 * downstream ports has changed, schedule EH. 1155 */ 1156 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1157 ata_port_schedule_eh(ap); 1158 return 1; 1159 } 1160 1161 return 0; 1162 } 1163 } 1164 1165 /** 1166 * ata_eh_freeze_port - EH helper to freeze port 1167 * @ap: ATA port to freeze 1168 * 1169 * Freeze @ap. 1170 * 1171 * LOCKING: 1172 * None. 1173 */ 1174 void ata_eh_freeze_port(struct ata_port *ap) 1175 { 1176 unsigned long flags; 1177 1178 if (!ap->ops->error_handler) 1179 return; 1180 1181 spin_lock_irqsave(ap->lock, flags); 1182 __ata_port_freeze(ap); 1183 spin_unlock_irqrestore(ap->lock, flags); 1184 } 1185 1186 /** 1187 * ata_port_thaw_port - EH helper to thaw port 1188 * @ap: ATA port to thaw 1189 * 1190 * Thaw frozen port @ap. 1191 * 1192 * LOCKING: 1193 * None. 1194 */ 1195 void ata_eh_thaw_port(struct ata_port *ap) 1196 { 1197 unsigned long flags; 1198 1199 if (!ap->ops->error_handler) 1200 return; 1201 1202 spin_lock_irqsave(ap->lock, flags); 1203 1204 ap->pflags &= ~ATA_PFLAG_FROZEN; 1205 1206 if (ap->ops->thaw) 1207 ap->ops->thaw(ap); 1208 1209 spin_unlock_irqrestore(ap->lock, flags); 1210 1211 DPRINTK("ata%u port thawed\n", ap->print_id); 1212 } 1213 1214 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1215 { 1216 /* nada */ 1217 } 1218 1219 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1220 { 1221 struct ata_port *ap = qc->ap; 1222 struct scsi_cmnd *scmd = qc->scsicmd; 1223 unsigned long flags; 1224 1225 spin_lock_irqsave(ap->lock, flags); 1226 qc->scsidone = ata_eh_scsidone; 1227 __ata_qc_complete(qc); 1228 WARN_ON(ata_tag_valid(qc->tag)); 1229 spin_unlock_irqrestore(ap->lock, flags); 1230 1231 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1232 } 1233 1234 /** 1235 * ata_eh_qc_complete - Complete an active ATA command from EH 1236 * @qc: Command to complete 1237 * 1238 * Indicate to the mid and upper layers that an ATA command has 1239 * completed. To be used from EH. 1240 */ 1241 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1242 { 1243 struct scsi_cmnd *scmd = qc->scsicmd; 1244 scmd->retries = scmd->allowed; 1245 __ata_eh_qc_complete(qc); 1246 } 1247 1248 /** 1249 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1250 * @qc: Command to retry 1251 * 1252 * Indicate to the mid and upper layers that an ATA command 1253 * should be retried. To be used from EH. 1254 * 1255 * SCSI midlayer limits the number of retries to scmd->allowed. 1256 * scmd->allowed is incremented for commands which get retried 1257 * due to unrelated failures (qc->err_mask is zero). 1258 */ 1259 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1260 { 1261 struct scsi_cmnd *scmd = qc->scsicmd; 1262 if (!qc->err_mask) 1263 scmd->allowed++; 1264 __ata_eh_qc_complete(qc); 1265 } 1266 1267 /** 1268 * ata_dev_disable - disable ATA device 1269 * @dev: ATA device to disable 1270 * 1271 * Disable @dev. 1272 * 1273 * Locking: 1274 * EH context. 1275 */ 1276 void ata_dev_disable(struct ata_device *dev) 1277 { 1278 if (!ata_dev_enabled(dev)) 1279 return; 1280 1281 if (ata_msg_drv(dev->link->ap)) 1282 ata_dev_warn(dev, "disabled\n"); 1283 ata_acpi_on_disable(dev); 1284 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1285 dev->class++; 1286 1287 /* From now till the next successful probe, ering is used to 1288 * track probe failures. Clear accumulated device error info. 1289 */ 1290 ata_ering_clear(&dev->ering); 1291 } 1292 1293 /** 1294 * ata_eh_detach_dev - detach ATA device 1295 * @dev: ATA device to detach 1296 * 1297 * Detach @dev. 1298 * 1299 * LOCKING: 1300 * None. 1301 */ 1302 void ata_eh_detach_dev(struct ata_device *dev) 1303 { 1304 struct ata_link *link = dev->link; 1305 struct ata_port *ap = link->ap; 1306 struct ata_eh_context *ehc = &link->eh_context; 1307 unsigned long flags; 1308 1309 ata_dev_disable(dev); 1310 1311 spin_lock_irqsave(ap->lock, flags); 1312 1313 dev->flags &= ~ATA_DFLAG_DETACH; 1314 1315 if (ata_scsi_offline_dev(dev)) { 1316 dev->flags |= ATA_DFLAG_DETACHED; 1317 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1318 } 1319 1320 /* clear per-dev EH info */ 1321 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1322 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1323 ehc->saved_xfer_mode[dev->devno] = 0; 1324 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1325 1326 spin_unlock_irqrestore(ap->lock, flags); 1327 } 1328 1329 /** 1330 * ata_eh_about_to_do - about to perform eh_action 1331 * @link: target ATA link 1332 * @dev: target ATA dev for per-dev action (can be NULL) 1333 * @action: action about to be performed 1334 * 1335 * Called just before performing EH actions to clear related bits 1336 * in @link->eh_info such that eh actions are not unnecessarily 1337 * repeated. 1338 * 1339 * LOCKING: 1340 * None. 1341 */ 1342 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1343 unsigned int action) 1344 { 1345 struct ata_port *ap = link->ap; 1346 struct ata_eh_info *ehi = &link->eh_info; 1347 struct ata_eh_context *ehc = &link->eh_context; 1348 unsigned long flags; 1349 1350 spin_lock_irqsave(ap->lock, flags); 1351 1352 ata_eh_clear_action(link, dev, ehi, action); 1353 1354 /* About to take EH action, set RECOVERED. Ignore actions on 1355 * slave links as master will do them again. 1356 */ 1357 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1358 ap->pflags |= ATA_PFLAG_RECOVERED; 1359 1360 spin_unlock_irqrestore(ap->lock, flags); 1361 } 1362 1363 /** 1364 * ata_eh_done - EH action complete 1365 * @link: ATA link for which EH actions are complete 1366 * @dev: target ATA dev for per-dev action (can be NULL) 1367 * @action: action just completed 1368 * 1369 * Called right after performing EH actions to clear related bits 1370 * in @link->eh_context. 1371 * 1372 * LOCKING: 1373 * None. 1374 */ 1375 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1376 unsigned int action) 1377 { 1378 struct ata_eh_context *ehc = &link->eh_context; 1379 1380 ata_eh_clear_action(link, dev, &ehc->i, action); 1381 } 1382 1383 /** 1384 * ata_err_string - convert err_mask to descriptive string 1385 * @err_mask: error mask to convert to string 1386 * 1387 * Convert @err_mask to descriptive string. Errors are 1388 * prioritized according to severity and only the most severe 1389 * error is reported. 1390 * 1391 * LOCKING: 1392 * None. 1393 * 1394 * RETURNS: 1395 * Descriptive string for @err_mask 1396 */ 1397 static const char *ata_err_string(unsigned int err_mask) 1398 { 1399 if (err_mask & AC_ERR_HOST_BUS) 1400 return "host bus error"; 1401 if (err_mask & AC_ERR_ATA_BUS) 1402 return "ATA bus error"; 1403 if (err_mask & AC_ERR_TIMEOUT) 1404 return "timeout"; 1405 if (err_mask & AC_ERR_HSM) 1406 return "HSM violation"; 1407 if (err_mask & AC_ERR_SYSTEM) 1408 return "internal error"; 1409 if (err_mask & AC_ERR_MEDIA) 1410 return "media error"; 1411 if (err_mask & AC_ERR_INVALID) 1412 return "invalid argument"; 1413 if (err_mask & AC_ERR_DEV) 1414 return "device error"; 1415 if (err_mask & AC_ERR_NCQ) 1416 return "NCQ error"; 1417 if (err_mask & AC_ERR_NODEV_HINT) 1418 return "Polling detection error"; 1419 return "unknown error"; 1420 } 1421 1422 /** 1423 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1424 * @dev: Device to read log page 10h from 1425 * @tag: Resulting tag of the failed command 1426 * @tf: Resulting taskfile registers of the failed command 1427 * 1428 * Read log page 10h to obtain NCQ error details and clear error 1429 * condition. 1430 * 1431 * LOCKING: 1432 * Kernel thread context (may sleep). 1433 * 1434 * RETURNS: 1435 * 0 on success, -errno otherwise. 1436 */ 1437 static int ata_eh_read_log_10h(struct ata_device *dev, 1438 int *tag, struct ata_taskfile *tf) 1439 { 1440 u8 *buf = dev->link->ap->sector_buf; 1441 unsigned int err_mask; 1442 u8 csum; 1443 int i; 1444 1445 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1); 1446 if (err_mask) 1447 return -EIO; 1448 1449 csum = 0; 1450 for (i = 0; i < ATA_SECT_SIZE; i++) 1451 csum += buf[i]; 1452 if (csum) 1453 ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n", 1454 csum); 1455 1456 if (buf[0] & 0x80) 1457 return -ENOENT; 1458 1459 *tag = buf[0] & 0x1f; 1460 1461 tf->command = buf[2]; 1462 tf->feature = buf[3]; 1463 tf->lbal = buf[4]; 1464 tf->lbam = buf[5]; 1465 tf->lbah = buf[6]; 1466 tf->device = buf[7]; 1467 tf->hob_lbal = buf[8]; 1468 tf->hob_lbam = buf[9]; 1469 tf->hob_lbah = buf[10]; 1470 tf->nsect = buf[12]; 1471 tf->hob_nsect = buf[13]; 1472 if (ata_id_has_ncq_autosense(dev->id)) 1473 tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; 1474 1475 return 0; 1476 } 1477 1478 /** 1479 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1480 * @dev: target ATAPI device 1481 * @r_sense_key: out parameter for sense_key 1482 * 1483 * Perform ATAPI TEST_UNIT_READY. 1484 * 1485 * LOCKING: 1486 * EH context (may sleep). 1487 * 1488 * RETURNS: 1489 * 0 on success, AC_ERR_* mask on failure. 1490 */ 1491 unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1492 { 1493 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1494 struct ata_taskfile tf; 1495 unsigned int err_mask; 1496 1497 ata_tf_init(dev, &tf); 1498 1499 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1500 tf.command = ATA_CMD_PACKET; 1501 tf.protocol = ATAPI_PROT_NODATA; 1502 1503 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1504 if (err_mask == AC_ERR_DEV) 1505 *r_sense_key = tf.feature >> 4; 1506 return err_mask; 1507 } 1508 1509 /** 1510 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT 1511 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to 1512 * @cmd: scsi command for which the sense code should be set 1513 * 1514 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK 1515 * SENSE. This function is an EH helper. 1516 * 1517 * LOCKING: 1518 * Kernel thread context (may sleep). 1519 */ 1520 static void ata_eh_request_sense(struct ata_queued_cmd *qc, 1521 struct scsi_cmnd *cmd) 1522 { 1523 struct ata_device *dev = qc->dev; 1524 struct ata_taskfile tf; 1525 unsigned int err_mask; 1526 1527 if (qc->ap->pflags & ATA_PFLAG_FROZEN) { 1528 ata_dev_warn(dev, "sense data available but port frozen\n"); 1529 return; 1530 } 1531 1532 if (!cmd || qc->flags & ATA_QCFLAG_SENSE_VALID) 1533 return; 1534 1535 if (!ata_id_sense_reporting_enabled(dev->id)) { 1536 ata_dev_warn(qc->dev, "sense data reporting disabled\n"); 1537 return; 1538 } 1539 1540 DPRINTK("ATA request sense\n"); 1541 1542 ata_tf_init(dev, &tf); 1543 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1544 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1545 tf.command = ATA_CMD_REQ_SENSE_DATA; 1546 tf.protocol = ATA_PROT_NODATA; 1547 1548 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 1549 /* Ignore err_mask; ATA_ERR might be set */ 1550 if (tf.command & ATA_SENSE) { 1551 ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal); 1552 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1553 } else { 1554 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", 1555 tf.command, err_mask); 1556 } 1557 } 1558 1559 /** 1560 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1561 * @dev: device to perform REQUEST_SENSE to 1562 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1563 * @dfl_sense_key: default sense key to use 1564 * 1565 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1566 * SENSE. This function is EH helper. 1567 * 1568 * LOCKING: 1569 * Kernel thread context (may sleep). 1570 * 1571 * RETURNS: 1572 * 0 on success, AC_ERR_* mask on failure 1573 */ 1574 unsigned int atapi_eh_request_sense(struct ata_device *dev, 1575 u8 *sense_buf, u8 dfl_sense_key) 1576 { 1577 u8 cdb[ATAPI_CDB_LEN] = 1578 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1579 struct ata_port *ap = dev->link->ap; 1580 struct ata_taskfile tf; 1581 1582 DPRINTK("ATAPI request sense\n"); 1583 1584 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1585 1586 /* initialize sense_buf with the error register, 1587 * for the case where they are -not- overwritten 1588 */ 1589 sense_buf[0] = 0x70; 1590 sense_buf[2] = dfl_sense_key; 1591 1592 /* some devices time out if garbage left in tf */ 1593 ata_tf_init(dev, &tf); 1594 1595 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1596 tf.command = ATA_CMD_PACKET; 1597 1598 /* is it pointless to prefer PIO for "safety reasons"? */ 1599 if (ap->flags & ATA_FLAG_PIO_DMA) { 1600 tf.protocol = ATAPI_PROT_DMA; 1601 tf.feature |= ATAPI_PKT_DMA; 1602 } else { 1603 tf.protocol = ATAPI_PROT_PIO; 1604 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1605 tf.lbah = 0; 1606 } 1607 1608 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1609 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1610 } 1611 1612 /** 1613 * ata_eh_analyze_serror - analyze SError for a failed port 1614 * @link: ATA link to analyze SError for 1615 * 1616 * Analyze SError if available and further determine cause of 1617 * failure. 1618 * 1619 * LOCKING: 1620 * None. 1621 */ 1622 static void ata_eh_analyze_serror(struct ata_link *link) 1623 { 1624 struct ata_eh_context *ehc = &link->eh_context; 1625 u32 serror = ehc->i.serror; 1626 unsigned int err_mask = 0, action = 0; 1627 u32 hotplug_mask; 1628 1629 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1630 err_mask |= AC_ERR_ATA_BUS; 1631 action |= ATA_EH_RESET; 1632 } 1633 if (serror & SERR_PROTOCOL) { 1634 err_mask |= AC_ERR_HSM; 1635 action |= ATA_EH_RESET; 1636 } 1637 if (serror & SERR_INTERNAL) { 1638 err_mask |= AC_ERR_SYSTEM; 1639 action |= ATA_EH_RESET; 1640 } 1641 1642 /* Determine whether a hotplug event has occurred. Both 1643 * SError.N/X are considered hotplug events for enabled or 1644 * host links. For disabled PMP links, only N bit is 1645 * considered as X bit is left at 1 for link plugging. 1646 */ 1647 if (link->lpm_policy > ATA_LPM_MAX_POWER) 1648 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ 1649 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1650 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1651 else 1652 hotplug_mask = SERR_PHYRDY_CHG; 1653 1654 if (serror & hotplug_mask) 1655 ata_ehi_hotplugged(&ehc->i); 1656 1657 ehc->i.err_mask |= err_mask; 1658 ehc->i.action |= action; 1659 } 1660 1661 /** 1662 * ata_eh_analyze_ncq_error - analyze NCQ error 1663 * @link: ATA link to analyze NCQ error for 1664 * 1665 * Read log page 10h, determine the offending qc and acquire 1666 * error status TF. For NCQ device errors, all LLDDs have to do 1667 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1668 * care of the rest. 1669 * 1670 * LOCKING: 1671 * Kernel thread context (may sleep). 1672 */ 1673 void ata_eh_analyze_ncq_error(struct ata_link *link) 1674 { 1675 struct ata_port *ap = link->ap; 1676 struct ata_eh_context *ehc = &link->eh_context; 1677 struct ata_device *dev = link->device; 1678 struct ata_queued_cmd *qc; 1679 struct ata_taskfile tf; 1680 int tag, rc; 1681 1682 /* if frozen, we can't do much */ 1683 if (ap->pflags & ATA_PFLAG_FROZEN) 1684 return; 1685 1686 /* is it NCQ device error? */ 1687 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1688 return; 1689 1690 /* has LLDD analyzed already? */ 1691 ata_qc_for_each_raw(ap, qc, tag) { 1692 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1693 continue; 1694 1695 if (qc->err_mask) 1696 return; 1697 } 1698 1699 /* okay, this error is ours */ 1700 memset(&tf, 0, sizeof(tf)); 1701 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1702 if (rc) { 1703 ata_link_err(link, "failed to read log page 10h (errno=%d)\n", 1704 rc); 1705 return; 1706 } 1707 1708 if (!(link->sactive & (1 << tag))) { 1709 ata_link_err(link, "log page 10h reported inactive tag %d\n", 1710 tag); 1711 return; 1712 } 1713 1714 /* we've got the perpetrator, condemn it */ 1715 qc = __ata_qc_from_tag(ap, tag); 1716 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1717 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1718 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1719 if ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary) { 1720 char sense_key, asc, ascq; 1721 1722 sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; 1723 asc = (qc->result_tf.auxiliary >> 8) & 0xff; 1724 ascq = qc->result_tf.auxiliary & 0xff; 1725 ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq); 1726 ata_scsi_set_sense_information(dev, qc->scsicmd, 1727 &qc->result_tf); 1728 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1729 } 1730 1731 ehc->i.err_mask &= ~AC_ERR_DEV; 1732 } 1733 1734 /** 1735 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1736 * @qc: qc to analyze 1737 * @tf: Taskfile registers to analyze 1738 * 1739 * Analyze taskfile of @qc and further determine cause of 1740 * failure. This function also requests ATAPI sense data if 1741 * available. 1742 * 1743 * LOCKING: 1744 * Kernel thread context (may sleep). 1745 * 1746 * RETURNS: 1747 * Determined recovery action 1748 */ 1749 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1750 const struct ata_taskfile *tf) 1751 { 1752 unsigned int tmp, action = 0; 1753 u8 stat = tf->command, err = tf->feature; 1754 1755 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1756 qc->err_mask |= AC_ERR_HSM; 1757 return ATA_EH_RESET; 1758 } 1759 1760 if (stat & (ATA_ERR | ATA_DF)) { 1761 qc->err_mask |= AC_ERR_DEV; 1762 /* 1763 * Sense data reporting does not work if the 1764 * device fault bit is set. 1765 */ 1766 if (stat & ATA_DF) 1767 stat &= ~ATA_SENSE; 1768 } else { 1769 return 0; 1770 } 1771 1772 switch (qc->dev->class) { 1773 case ATA_DEV_ATA: 1774 case ATA_DEV_ZAC: 1775 if (stat & ATA_SENSE) 1776 ata_eh_request_sense(qc, qc->scsicmd); 1777 if (err & ATA_ICRC) 1778 qc->err_mask |= AC_ERR_ATA_BUS; 1779 if (err & (ATA_UNC | ATA_AMNF)) 1780 qc->err_mask |= AC_ERR_MEDIA; 1781 if (err & ATA_IDNF) 1782 qc->err_mask |= AC_ERR_INVALID; 1783 break; 1784 1785 case ATA_DEV_ATAPI: 1786 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1787 tmp = atapi_eh_request_sense(qc->dev, 1788 qc->scsicmd->sense_buffer, 1789 qc->result_tf.feature >> 4); 1790 if (!tmp) 1791 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1792 else 1793 qc->err_mask |= tmp; 1794 } 1795 } 1796 1797 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1798 int ret = scsi_check_sense(qc->scsicmd); 1799 /* 1800 * SUCCESS here means that the sense code could be 1801 * evaluated and should be passed to the upper layers 1802 * for correct evaluation. 1803 * FAILED means the sense code could not be interpreted 1804 * and the device would need to be reset. 1805 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the 1806 * command would need to be retried. 1807 */ 1808 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) { 1809 qc->flags |= ATA_QCFLAG_RETRY; 1810 qc->err_mask |= AC_ERR_OTHER; 1811 } else if (ret != SUCCESS) { 1812 qc->err_mask |= AC_ERR_HSM; 1813 } 1814 } 1815 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1816 action |= ATA_EH_RESET; 1817 1818 return action; 1819 } 1820 1821 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1822 int *xfer_ok) 1823 { 1824 int base = 0; 1825 1826 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1827 *xfer_ok = 1; 1828 1829 if (!*xfer_ok) 1830 base = ATA_ECAT_DUBIOUS_NONE; 1831 1832 if (err_mask & AC_ERR_ATA_BUS) 1833 return base + ATA_ECAT_ATA_BUS; 1834 1835 if (err_mask & AC_ERR_TIMEOUT) 1836 return base + ATA_ECAT_TOUT_HSM; 1837 1838 if (eflags & ATA_EFLAG_IS_IO) { 1839 if (err_mask & AC_ERR_HSM) 1840 return base + ATA_ECAT_TOUT_HSM; 1841 if ((err_mask & 1842 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1843 return base + ATA_ECAT_UNK_DEV; 1844 } 1845 1846 return 0; 1847 } 1848 1849 struct speed_down_verdict_arg { 1850 u64 since; 1851 int xfer_ok; 1852 int nr_errors[ATA_ECAT_NR]; 1853 }; 1854 1855 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1856 { 1857 struct speed_down_verdict_arg *arg = void_arg; 1858 int cat; 1859 1860 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) 1861 return -1; 1862 1863 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1864 &arg->xfer_ok); 1865 arg->nr_errors[cat]++; 1866 1867 return 0; 1868 } 1869 1870 /** 1871 * ata_eh_speed_down_verdict - Determine speed down verdict 1872 * @dev: Device of interest 1873 * 1874 * This function examines error ring of @dev and determines 1875 * whether NCQ needs to be turned off, transfer speed should be 1876 * stepped down, or falling back to PIO is necessary. 1877 * 1878 * ECAT_ATA_BUS : ATA_BUS error for any command 1879 * 1880 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1881 * IO commands 1882 * 1883 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1884 * 1885 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1886 * data transfer hasn't been verified. 1887 * 1888 * Verdicts are 1889 * 1890 * NCQ_OFF : Turn off NCQ. 1891 * 1892 * SPEED_DOWN : Speed down transfer speed but don't fall back 1893 * to PIO. 1894 * 1895 * FALLBACK_TO_PIO : Fall back to PIO. 1896 * 1897 * Even if multiple verdicts are returned, only one action is 1898 * taken per error. An action triggered by non-DUBIOUS errors 1899 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1900 * This is to expedite speed down decisions right after device is 1901 * initially configured. 1902 * 1903 * The following are speed down rules. #1 and #2 deal with 1904 * DUBIOUS errors. 1905 * 1906 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1907 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1908 * 1909 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1910 * occurred during last 5 mins, NCQ_OFF. 1911 * 1912 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1913 * occurred during last 5 mins, FALLBACK_TO_PIO 1914 * 1915 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1916 * during last 10 mins, NCQ_OFF. 1917 * 1918 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1919 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1920 * 1921 * LOCKING: 1922 * Inherited from caller. 1923 * 1924 * RETURNS: 1925 * OR of ATA_EH_SPDN_* flags. 1926 */ 1927 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1928 { 1929 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1930 u64 j64 = get_jiffies_64(); 1931 struct speed_down_verdict_arg arg; 1932 unsigned int verdict = 0; 1933 1934 /* scan past 5 mins of error history */ 1935 memset(&arg, 0, sizeof(arg)); 1936 arg.since = j64 - min(j64, j5mins); 1937 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1938 1939 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1940 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1941 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1942 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1943 1944 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1945 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1946 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1947 1948 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1949 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1950 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1951 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1952 1953 /* scan past 10 mins of error history */ 1954 memset(&arg, 0, sizeof(arg)); 1955 arg.since = j64 - min(j64, j10mins); 1956 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1957 1958 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1959 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1960 verdict |= ATA_EH_SPDN_NCQ_OFF; 1961 1962 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1963 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1964 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1965 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1966 1967 return verdict; 1968 } 1969 1970 /** 1971 * ata_eh_speed_down - record error and speed down if necessary 1972 * @dev: Failed device 1973 * @eflags: mask of ATA_EFLAG_* flags 1974 * @err_mask: err_mask of the error 1975 * 1976 * Record error and examine error history to determine whether 1977 * adjusting transmission speed is necessary. It also sets 1978 * transmission limits appropriately if such adjustment is 1979 * necessary. 1980 * 1981 * LOCKING: 1982 * Kernel thread context (may sleep). 1983 * 1984 * RETURNS: 1985 * Determined recovery action. 1986 */ 1987 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1988 unsigned int eflags, unsigned int err_mask) 1989 { 1990 struct ata_link *link = ata_dev_phys_link(dev); 1991 int xfer_ok = 0; 1992 unsigned int verdict; 1993 unsigned int action = 0; 1994 1995 /* don't bother if Cat-0 error */ 1996 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1997 return 0; 1998 1999 /* record error and determine whether speed down is necessary */ 2000 ata_ering_record(&dev->ering, eflags, err_mask); 2001 verdict = ata_eh_speed_down_verdict(dev); 2002 2003 /* turn off NCQ? */ 2004 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 2005 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 2006 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 2007 dev->flags |= ATA_DFLAG_NCQ_OFF; 2008 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n"); 2009 goto done; 2010 } 2011 2012 /* speed down? */ 2013 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 2014 /* speed down SATA link speed if possible */ 2015 if (sata_down_spd_limit(link, 0) == 0) { 2016 action |= ATA_EH_RESET; 2017 goto done; 2018 } 2019 2020 /* lower transfer mode */ 2021 if (dev->spdn_cnt < 2) { 2022 static const int dma_dnxfer_sel[] = 2023 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 2024 static const int pio_dnxfer_sel[] = 2025 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 2026 int sel; 2027 2028 if (dev->xfer_shift != ATA_SHIFT_PIO) 2029 sel = dma_dnxfer_sel[dev->spdn_cnt]; 2030 else 2031 sel = pio_dnxfer_sel[dev->spdn_cnt]; 2032 2033 dev->spdn_cnt++; 2034 2035 if (ata_down_xfermask_limit(dev, sel) == 0) { 2036 action |= ATA_EH_RESET; 2037 goto done; 2038 } 2039 } 2040 } 2041 2042 /* Fall back to PIO? Slowing down to PIO is meaningless for 2043 * SATA ATA devices. Consider it only for PATA and SATAPI. 2044 */ 2045 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 2046 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 2047 (dev->xfer_shift != ATA_SHIFT_PIO)) { 2048 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 2049 dev->spdn_cnt = 0; 2050 action |= ATA_EH_RESET; 2051 goto done; 2052 } 2053 } 2054 2055 return 0; 2056 done: 2057 /* device has been slowed down, blow error history */ 2058 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 2059 ata_ering_clear(&dev->ering); 2060 return action; 2061 } 2062 2063 /** 2064 * ata_eh_worth_retry - analyze error and decide whether to retry 2065 * @qc: qc to possibly retry 2066 * 2067 * Look at the cause of the error and decide if a retry 2068 * might be useful or not. We don't want to retry media errors 2069 * because the drive itself has probably already taken 10-30 seconds 2070 * doing its own internal retries before reporting the failure. 2071 */ 2072 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) 2073 { 2074 if (qc->err_mask & AC_ERR_MEDIA) 2075 return 0; /* don't retry media errors */ 2076 if (qc->flags & ATA_QCFLAG_IO) 2077 return 1; /* otherwise retry anything from fs stack */ 2078 if (qc->err_mask & AC_ERR_INVALID) 2079 return 0; /* don't retry these */ 2080 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ 2081 } 2082 2083 /** 2084 * ata_eh_quiet - check if we need to be quiet about a command error 2085 * @qc: qc to check 2086 * 2087 * Look at the qc flags anbd its scsi command request flags to determine 2088 * if we need to be quiet about the command failure. 2089 */ 2090 static inline bool ata_eh_quiet(struct ata_queued_cmd *qc) 2091 { 2092 if (qc->scsicmd && 2093 qc->scsicmd->request->rq_flags & RQF_QUIET) 2094 qc->flags |= ATA_QCFLAG_QUIET; 2095 return qc->flags & ATA_QCFLAG_QUIET; 2096 } 2097 2098 /** 2099 * ata_eh_link_autopsy - analyze error and determine recovery action 2100 * @link: host link to perform autopsy on 2101 * 2102 * Analyze why @link failed and determine which recovery actions 2103 * are needed. This function also sets more detailed AC_ERR_* 2104 * values and fills sense data for ATAPI CHECK SENSE. 2105 * 2106 * LOCKING: 2107 * Kernel thread context (may sleep). 2108 */ 2109 static void ata_eh_link_autopsy(struct ata_link *link) 2110 { 2111 struct ata_port *ap = link->ap; 2112 struct ata_eh_context *ehc = &link->eh_context; 2113 struct ata_queued_cmd *qc; 2114 struct ata_device *dev; 2115 unsigned int all_err_mask = 0, eflags = 0; 2116 int tag, nr_failed = 0, nr_quiet = 0; 2117 u32 serror; 2118 int rc; 2119 2120 DPRINTK("ENTER\n"); 2121 2122 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 2123 return; 2124 2125 /* obtain and analyze SError */ 2126 rc = sata_scr_read(link, SCR_ERROR, &serror); 2127 if (rc == 0) { 2128 ehc->i.serror |= serror; 2129 ata_eh_analyze_serror(link); 2130 } else if (rc != -EOPNOTSUPP) { 2131 /* SError read failed, force reset and probing */ 2132 ehc->i.probe_mask |= ATA_ALL_DEVICES; 2133 ehc->i.action |= ATA_EH_RESET; 2134 ehc->i.err_mask |= AC_ERR_OTHER; 2135 } 2136 2137 /* analyze NCQ failure */ 2138 ata_eh_analyze_ncq_error(link); 2139 2140 /* any real error trumps AC_ERR_OTHER */ 2141 if (ehc->i.err_mask & ~AC_ERR_OTHER) 2142 ehc->i.err_mask &= ~AC_ERR_OTHER; 2143 2144 all_err_mask |= ehc->i.err_mask; 2145 2146 ata_qc_for_each_raw(ap, qc, tag) { 2147 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2148 ata_dev_phys_link(qc->dev) != link) 2149 continue; 2150 2151 /* inherit upper level err_mask */ 2152 qc->err_mask |= ehc->i.err_mask; 2153 2154 /* analyze TF */ 2155 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 2156 2157 /* DEV errors are probably spurious in case of ATA_BUS error */ 2158 if (qc->err_mask & AC_ERR_ATA_BUS) 2159 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2160 AC_ERR_INVALID); 2161 2162 /* any real error trumps unknown error */ 2163 if (qc->err_mask & ~AC_ERR_OTHER) 2164 qc->err_mask &= ~AC_ERR_OTHER; 2165 2166 /* 2167 * SENSE_VALID trumps dev/unknown error and revalidation. Upper 2168 * layers will determine whether the command is worth retrying 2169 * based on the sense data and device class/type. Otherwise, 2170 * determine directly if the command is worth retrying using its 2171 * error mask and flags. 2172 */ 2173 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2174 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2175 else if (ata_eh_worth_retry(qc)) 2176 qc->flags |= ATA_QCFLAG_RETRY; 2177 2178 /* accumulate error info */ 2179 ehc->i.dev = qc->dev; 2180 all_err_mask |= qc->err_mask; 2181 if (qc->flags & ATA_QCFLAG_IO) 2182 eflags |= ATA_EFLAG_IS_IO; 2183 trace_ata_eh_link_autopsy_qc(qc); 2184 2185 /* Count quiet errors */ 2186 if (ata_eh_quiet(qc)) 2187 nr_quiet++; 2188 nr_failed++; 2189 } 2190 2191 /* If all failed commands requested silence, then be quiet */ 2192 if (nr_quiet == nr_failed) 2193 ehc->i.flags |= ATA_EHI_QUIET; 2194 2195 /* enforce default EH actions */ 2196 if (ap->pflags & ATA_PFLAG_FROZEN || 2197 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2198 ehc->i.action |= ATA_EH_RESET; 2199 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2200 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2201 ehc->i.action |= ATA_EH_REVALIDATE; 2202 2203 /* If we have offending qcs and the associated failed device, 2204 * perform per-dev EH action only on the offending device. 2205 */ 2206 if (ehc->i.dev) { 2207 ehc->i.dev_action[ehc->i.dev->devno] |= 2208 ehc->i.action & ATA_EH_PERDEV_MASK; 2209 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2210 } 2211 2212 /* propagate timeout to host link */ 2213 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2214 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2215 2216 /* record error and consider speeding down */ 2217 dev = ehc->i.dev; 2218 if (!dev && ((ata_link_max_devices(link) == 1 && 2219 ata_dev_enabled(link->device)))) 2220 dev = link->device; 2221 2222 if (dev) { 2223 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2224 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2225 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2226 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); 2227 } 2228 DPRINTK("EXIT\n"); 2229 } 2230 2231 /** 2232 * ata_eh_autopsy - analyze error and determine recovery action 2233 * @ap: host port to perform autopsy on 2234 * 2235 * Analyze all links of @ap and determine why they failed and 2236 * which recovery actions are needed. 2237 * 2238 * LOCKING: 2239 * Kernel thread context (may sleep). 2240 */ 2241 void ata_eh_autopsy(struct ata_port *ap) 2242 { 2243 struct ata_link *link; 2244 2245 ata_for_each_link(link, ap, EDGE) 2246 ata_eh_link_autopsy(link); 2247 2248 /* Handle the frigging slave link. Autopsy is done similarly 2249 * but actions and flags are transferred over to the master 2250 * link and handled from there. 2251 */ 2252 if (ap->slave_link) { 2253 struct ata_eh_context *mehc = &ap->link.eh_context; 2254 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2255 2256 /* transfer control flags from master to slave */ 2257 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2258 2259 /* perform autopsy on the slave link */ 2260 ata_eh_link_autopsy(ap->slave_link); 2261 2262 /* transfer actions from slave to master and clear slave */ 2263 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2264 mehc->i.action |= sehc->i.action; 2265 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2266 mehc->i.flags |= sehc->i.flags; 2267 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2268 } 2269 2270 /* Autopsy of fanout ports can affect host link autopsy. 2271 * Perform host link autopsy last. 2272 */ 2273 if (sata_pmp_attached(ap)) 2274 ata_eh_link_autopsy(&ap->link); 2275 } 2276 2277 /** 2278 * ata_get_cmd_descript - get description for ATA command 2279 * @command: ATA command code to get description for 2280 * 2281 * Return a textual description of the given command, or NULL if the 2282 * command is not known. 2283 * 2284 * LOCKING: 2285 * None 2286 */ 2287 const char *ata_get_cmd_descript(u8 command) 2288 { 2289 #ifdef CONFIG_ATA_VERBOSE_ERROR 2290 static const struct 2291 { 2292 u8 command; 2293 const char *text; 2294 } cmd_descr[] = { 2295 { ATA_CMD_DEV_RESET, "DEVICE RESET" }, 2296 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, 2297 { ATA_CMD_STANDBY, "STANDBY" }, 2298 { ATA_CMD_IDLE, "IDLE" }, 2299 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, 2300 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, 2301 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" }, 2302 { ATA_CMD_NOP, "NOP" }, 2303 { ATA_CMD_FLUSH, "FLUSH CACHE" }, 2304 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, 2305 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, 2306 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, 2307 { ATA_CMD_SERVICE, "SERVICE" }, 2308 { ATA_CMD_READ, "READ DMA" }, 2309 { ATA_CMD_READ_EXT, "READ DMA EXT" }, 2310 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, 2311 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, 2312 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, 2313 { ATA_CMD_WRITE, "WRITE DMA" }, 2314 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, 2315 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, 2316 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, 2317 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, 2318 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, 2319 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, 2320 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, 2321 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, 2322 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, 2323 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, 2324 { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, 2325 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, 2326 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, 2327 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, 2328 { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, 2329 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, 2330 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, 2331 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, 2332 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, 2333 { ATA_CMD_SET_FEATURES, "SET FEATURES" }, 2334 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, 2335 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, 2336 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, 2337 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, 2338 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, 2339 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, 2340 { ATA_CMD_SLEEP, "SLEEP" }, 2341 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, 2342 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, 2343 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, 2344 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, 2345 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, 2346 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, 2347 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, 2348 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, 2349 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, 2350 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" }, 2351 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, 2352 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, 2353 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, 2354 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, 2355 { ATA_CMD_PMP_READ, "READ BUFFER" }, 2356 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" }, 2357 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, 2358 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" }, 2359 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, 2360 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, 2361 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, 2362 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, 2363 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, 2364 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, 2365 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, 2366 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, 2367 { ATA_CMD_SMART, "SMART" }, 2368 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, 2369 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, 2370 { ATA_CMD_DSM, "DATA SET MANAGEMENT" }, 2371 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, 2372 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, 2373 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, 2374 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, 2375 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, 2376 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, 2377 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" }, 2378 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" }, 2379 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" }, 2380 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" }, 2381 { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, 2382 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, 2383 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, 2384 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, 2385 { ATA_CMD_RESTORE, "RECALIBRATE" }, 2386 { 0, NULL } /* terminate list */ 2387 }; 2388 2389 unsigned int i; 2390 for (i = 0; cmd_descr[i].text; i++) 2391 if (cmd_descr[i].command == command) 2392 return cmd_descr[i].text; 2393 #endif 2394 2395 return NULL; 2396 } 2397 EXPORT_SYMBOL_GPL(ata_get_cmd_descript); 2398 2399 /** 2400 * ata_eh_link_report - report error handling to user 2401 * @link: ATA link EH is going on 2402 * 2403 * Report EH to user. 2404 * 2405 * LOCKING: 2406 * None. 2407 */ 2408 static void ata_eh_link_report(struct ata_link *link) 2409 { 2410 struct ata_port *ap = link->ap; 2411 struct ata_eh_context *ehc = &link->eh_context; 2412 struct ata_queued_cmd *qc; 2413 const char *frozen, *desc; 2414 char tries_buf[6] = ""; 2415 int tag, nr_failed = 0; 2416 2417 if (ehc->i.flags & ATA_EHI_QUIET) 2418 return; 2419 2420 desc = NULL; 2421 if (ehc->i.desc[0] != '\0') 2422 desc = ehc->i.desc; 2423 2424 ata_qc_for_each_raw(ap, qc, tag) { 2425 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2426 ata_dev_phys_link(qc->dev) != link || 2427 ((qc->flags & ATA_QCFLAG_QUIET) && 2428 qc->err_mask == AC_ERR_DEV)) 2429 continue; 2430 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2431 continue; 2432 2433 nr_failed++; 2434 } 2435 2436 if (!nr_failed && !ehc->i.err_mask) 2437 return; 2438 2439 frozen = ""; 2440 if (ap->pflags & ATA_PFLAG_FROZEN) 2441 frozen = " frozen"; 2442 2443 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2444 snprintf(tries_buf, sizeof(tries_buf), " t%d", 2445 ap->eh_tries); 2446 2447 if (ehc->i.dev) { 2448 ata_dev_err(ehc->i.dev, "exception Emask 0x%x " 2449 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2450 ehc->i.err_mask, link->sactive, ehc->i.serror, 2451 ehc->i.action, frozen, tries_buf); 2452 if (desc) 2453 ata_dev_err(ehc->i.dev, "%s\n", desc); 2454 } else { 2455 ata_link_err(link, "exception Emask 0x%x " 2456 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2457 ehc->i.err_mask, link->sactive, ehc->i.serror, 2458 ehc->i.action, frozen, tries_buf); 2459 if (desc) 2460 ata_link_err(link, "%s\n", desc); 2461 } 2462 2463 #ifdef CONFIG_ATA_VERBOSE_ERROR 2464 if (ehc->i.serror) 2465 ata_link_err(link, 2466 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2467 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2468 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2469 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2470 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2471 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2472 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2473 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2474 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2475 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2476 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2477 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2478 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2479 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2480 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2481 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2482 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2483 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2484 #endif 2485 2486 ata_qc_for_each_raw(ap, qc, tag) { 2487 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2488 char data_buf[20] = ""; 2489 char cdb_buf[70] = ""; 2490 2491 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2492 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2493 continue; 2494 2495 if (qc->dma_dir != DMA_NONE) { 2496 static const char *dma_str[] = { 2497 [DMA_BIDIRECTIONAL] = "bidi", 2498 [DMA_TO_DEVICE] = "out", 2499 [DMA_FROM_DEVICE] = "in", 2500 }; 2501 const char *prot_str = NULL; 2502 2503 switch (qc->tf.protocol) { 2504 case ATA_PROT_UNKNOWN: 2505 prot_str = "unknown"; 2506 break; 2507 case ATA_PROT_NODATA: 2508 prot_str = "nodata"; 2509 break; 2510 case ATA_PROT_PIO: 2511 prot_str = "pio"; 2512 break; 2513 case ATA_PROT_DMA: 2514 prot_str = "dma"; 2515 break; 2516 case ATA_PROT_NCQ: 2517 prot_str = "ncq dma"; 2518 break; 2519 case ATA_PROT_NCQ_NODATA: 2520 prot_str = "ncq nodata"; 2521 break; 2522 case ATAPI_PROT_NODATA: 2523 prot_str = "nodata"; 2524 break; 2525 case ATAPI_PROT_PIO: 2526 prot_str = "pio"; 2527 break; 2528 case ATAPI_PROT_DMA: 2529 prot_str = "dma"; 2530 break; 2531 } 2532 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2533 prot_str, qc->nbytes, dma_str[qc->dma_dir]); 2534 } 2535 2536 if (ata_is_atapi(qc->tf.protocol)) { 2537 const u8 *cdb = qc->cdb; 2538 size_t cdb_len = qc->dev->cdb_len; 2539 2540 if (qc->scsicmd) { 2541 cdb = qc->scsicmd->cmnd; 2542 cdb_len = qc->scsicmd->cmd_len; 2543 } 2544 __scsi_format_command(cdb_buf, sizeof(cdb_buf), 2545 cdb, cdb_len); 2546 } else { 2547 const char *descr = ata_get_cmd_descript(cmd->command); 2548 if (descr) 2549 ata_dev_err(qc->dev, "failed command: %s\n", 2550 descr); 2551 } 2552 2553 ata_dev_err(qc->dev, 2554 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2555 "tag %d%s\n %s" 2556 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2557 "Emask 0x%x (%s)%s\n", 2558 cmd->command, cmd->feature, cmd->nsect, 2559 cmd->lbal, cmd->lbam, cmd->lbah, 2560 cmd->hob_feature, cmd->hob_nsect, 2561 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2562 cmd->device, qc->tag, data_buf, cdb_buf, 2563 res->command, res->feature, res->nsect, 2564 res->lbal, res->lbam, res->lbah, 2565 res->hob_feature, res->hob_nsect, 2566 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2567 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2568 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2569 2570 #ifdef CONFIG_ATA_VERBOSE_ERROR 2571 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2572 ATA_SENSE | ATA_ERR)) { 2573 if (res->command & ATA_BUSY) 2574 ata_dev_err(qc->dev, "status: { Busy }\n"); 2575 else 2576 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", 2577 res->command & ATA_DRDY ? "DRDY " : "", 2578 res->command & ATA_DF ? "DF " : "", 2579 res->command & ATA_DRQ ? "DRQ " : "", 2580 res->command & ATA_SENSE ? "SENSE " : "", 2581 res->command & ATA_ERR ? "ERR " : ""); 2582 } 2583 2584 if (cmd->command != ATA_CMD_PACKET && 2585 (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF | 2586 ATA_IDNF | ATA_ABORTED))) 2587 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n", 2588 res->feature & ATA_ICRC ? "ICRC " : "", 2589 res->feature & ATA_UNC ? "UNC " : "", 2590 res->feature & ATA_AMNF ? "AMNF " : "", 2591 res->feature & ATA_IDNF ? "IDNF " : "", 2592 res->feature & ATA_ABORTED ? "ABRT " : ""); 2593 #endif 2594 } 2595 } 2596 2597 /** 2598 * ata_eh_report - report error handling to user 2599 * @ap: ATA port to report EH about 2600 * 2601 * Report EH to user. 2602 * 2603 * LOCKING: 2604 * None. 2605 */ 2606 void ata_eh_report(struct ata_port *ap) 2607 { 2608 struct ata_link *link; 2609 2610 ata_for_each_link(link, ap, HOST_FIRST) 2611 ata_eh_link_report(link); 2612 } 2613 2614 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2615 unsigned int *classes, unsigned long deadline, 2616 bool clear_classes) 2617 { 2618 struct ata_device *dev; 2619 2620 if (clear_classes) 2621 ata_for_each_dev(dev, link, ALL) 2622 classes[dev->devno] = ATA_DEV_UNKNOWN; 2623 2624 return reset(link, classes, deadline); 2625 } 2626 2627 static int ata_eh_followup_srst_needed(struct ata_link *link, int rc) 2628 { 2629 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2630 return 0; 2631 if (rc == -EAGAIN) 2632 return 1; 2633 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2634 return 1; 2635 return 0; 2636 } 2637 2638 int ata_eh_reset(struct ata_link *link, int classify, 2639 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2640 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2641 { 2642 struct ata_port *ap = link->ap; 2643 struct ata_link *slave = ap->slave_link; 2644 struct ata_eh_context *ehc = &link->eh_context; 2645 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; 2646 unsigned int *classes = ehc->classes; 2647 unsigned int lflags = link->flags; 2648 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2649 int max_tries = 0, try = 0; 2650 struct ata_link *failed_link; 2651 struct ata_device *dev; 2652 unsigned long deadline, now; 2653 ata_reset_fn_t reset; 2654 unsigned long flags; 2655 u32 sstatus; 2656 int nr_unknown, rc; 2657 2658 /* 2659 * Prepare to reset 2660 */ 2661 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2662 max_tries++; 2663 if (link->flags & ATA_LFLAG_RST_ONCE) 2664 max_tries = 1; 2665 if (link->flags & ATA_LFLAG_NO_HRST) 2666 hardreset = NULL; 2667 if (link->flags & ATA_LFLAG_NO_SRST) 2668 softreset = NULL; 2669 2670 /* make sure each reset attempt is at least COOL_DOWN apart */ 2671 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2672 now = jiffies; 2673 WARN_ON(time_after(ehc->last_reset, now)); 2674 deadline = ata_deadline(ehc->last_reset, 2675 ATA_EH_RESET_COOL_DOWN); 2676 if (time_before(now, deadline)) 2677 schedule_timeout_uninterruptible(deadline - now); 2678 } 2679 2680 spin_lock_irqsave(ap->lock, flags); 2681 ap->pflags |= ATA_PFLAG_RESETTING; 2682 spin_unlock_irqrestore(ap->lock, flags); 2683 2684 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2685 2686 ata_for_each_dev(dev, link, ALL) { 2687 /* If we issue an SRST then an ATA drive (not ATAPI) 2688 * may change configuration and be in PIO0 timing. If 2689 * we do a hard reset (or are coming from power on) 2690 * this is true for ATA or ATAPI. Until we've set a 2691 * suitable controller mode we should not touch the 2692 * bus as we may be talking too fast. 2693 */ 2694 dev->pio_mode = XFER_PIO_0; 2695 dev->dma_mode = 0xff; 2696 2697 /* If the controller has a pio mode setup function 2698 * then use it to set the chipset to rights. Don't 2699 * touch the DMA setup as that will be dealt with when 2700 * configuring devices. 2701 */ 2702 if (ap->ops->set_piomode) 2703 ap->ops->set_piomode(ap, dev); 2704 } 2705 2706 /* prefer hardreset */ 2707 reset = NULL; 2708 ehc->i.action &= ~ATA_EH_RESET; 2709 if (hardreset) { 2710 reset = hardreset; 2711 ehc->i.action |= ATA_EH_HARDRESET; 2712 } else if (softreset) { 2713 reset = softreset; 2714 ehc->i.action |= ATA_EH_SOFTRESET; 2715 } 2716 2717 if (prereset) { 2718 unsigned long deadline = ata_deadline(jiffies, 2719 ATA_EH_PRERESET_TIMEOUT); 2720 2721 if (slave) { 2722 sehc->i.action &= ~ATA_EH_RESET; 2723 sehc->i.action |= ehc->i.action; 2724 } 2725 2726 rc = prereset(link, deadline); 2727 2728 /* If present, do prereset on slave link too. Reset 2729 * is skipped iff both master and slave links report 2730 * -ENOENT or clear ATA_EH_RESET. 2731 */ 2732 if (slave && (rc == 0 || rc == -ENOENT)) { 2733 int tmp; 2734 2735 tmp = prereset(slave, deadline); 2736 if (tmp != -ENOENT) 2737 rc = tmp; 2738 2739 ehc->i.action |= sehc->i.action; 2740 } 2741 2742 if (rc) { 2743 if (rc == -ENOENT) { 2744 ata_link_dbg(link, "port disabled--ignoring\n"); 2745 ehc->i.action &= ~ATA_EH_RESET; 2746 2747 ata_for_each_dev(dev, link, ALL) 2748 classes[dev->devno] = ATA_DEV_NONE; 2749 2750 rc = 0; 2751 } else 2752 ata_link_err(link, 2753 "prereset failed (errno=%d)\n", 2754 rc); 2755 goto out; 2756 } 2757 2758 /* prereset() might have cleared ATA_EH_RESET. If so, 2759 * bang classes, thaw and return. 2760 */ 2761 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2762 ata_for_each_dev(dev, link, ALL) 2763 classes[dev->devno] = ATA_DEV_NONE; 2764 if ((ap->pflags & ATA_PFLAG_FROZEN) && 2765 ata_is_host_link(link)) 2766 ata_eh_thaw_port(ap); 2767 rc = 0; 2768 goto out; 2769 } 2770 } 2771 2772 retry: 2773 /* 2774 * Perform reset 2775 */ 2776 if (ata_is_host_link(link)) 2777 ata_eh_freeze_port(ap); 2778 2779 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2780 2781 if (reset) { 2782 if (verbose) 2783 ata_link_info(link, "%s resetting link\n", 2784 reset == softreset ? "soft" : "hard"); 2785 2786 /* mark that this EH session started with reset */ 2787 ehc->last_reset = jiffies; 2788 if (reset == hardreset) 2789 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2790 else 2791 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2792 2793 rc = ata_do_reset(link, reset, classes, deadline, true); 2794 if (rc && rc != -EAGAIN) { 2795 failed_link = link; 2796 goto fail; 2797 } 2798 2799 /* hardreset slave link if existent */ 2800 if (slave && reset == hardreset) { 2801 int tmp; 2802 2803 if (verbose) 2804 ata_link_info(slave, "hard resetting link\n"); 2805 2806 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2807 tmp = ata_do_reset(slave, reset, classes, deadline, 2808 false); 2809 switch (tmp) { 2810 case -EAGAIN: 2811 rc = -EAGAIN; 2812 case 0: 2813 break; 2814 default: 2815 failed_link = slave; 2816 rc = tmp; 2817 goto fail; 2818 } 2819 } 2820 2821 /* perform follow-up SRST if necessary */ 2822 if (reset == hardreset && 2823 ata_eh_followup_srst_needed(link, rc)) { 2824 reset = softreset; 2825 2826 if (!reset) { 2827 ata_link_err(link, 2828 "follow-up softreset required but no softreset available\n"); 2829 failed_link = link; 2830 rc = -EINVAL; 2831 goto fail; 2832 } 2833 2834 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2835 rc = ata_do_reset(link, reset, classes, deadline, true); 2836 if (rc) { 2837 failed_link = link; 2838 goto fail; 2839 } 2840 } 2841 } else { 2842 if (verbose) 2843 ata_link_info(link, 2844 "no reset method available, skipping reset\n"); 2845 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2846 lflags |= ATA_LFLAG_ASSUME_ATA; 2847 } 2848 2849 /* 2850 * Post-reset processing 2851 */ 2852 ata_for_each_dev(dev, link, ALL) { 2853 /* After the reset, the device state is PIO 0 and the 2854 * controller state is undefined. Reset also wakes up 2855 * drives from sleeping mode. 2856 */ 2857 dev->pio_mode = XFER_PIO_0; 2858 dev->flags &= ~ATA_DFLAG_SLEEPING; 2859 2860 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 2861 continue; 2862 2863 /* apply class override */ 2864 if (lflags & ATA_LFLAG_ASSUME_ATA) 2865 classes[dev->devno] = ATA_DEV_ATA; 2866 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2867 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2868 } 2869 2870 /* record current link speed */ 2871 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2872 link->sata_spd = (sstatus >> 4) & 0xf; 2873 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2874 slave->sata_spd = (sstatus >> 4) & 0xf; 2875 2876 /* thaw the port */ 2877 if (ata_is_host_link(link)) 2878 ata_eh_thaw_port(ap); 2879 2880 /* postreset() should clear hardware SError. Although SError 2881 * is cleared during link resume, clearing SError here is 2882 * necessary as some PHYs raise hotplug events after SRST. 2883 * This introduces race condition where hotplug occurs between 2884 * reset and here. This race is mediated by cross checking 2885 * link onlineness and classification result later. 2886 */ 2887 if (postreset) { 2888 postreset(link, classes); 2889 if (slave) 2890 postreset(slave, classes); 2891 } 2892 2893 /* 2894 * Some controllers can't be frozen very well and may set spurious 2895 * error conditions during reset. Clear accumulated error 2896 * information and re-thaw the port if frozen. As reset is the 2897 * final recovery action and we cross check link onlineness against 2898 * device classification later, no hotplug event is lost by this. 2899 */ 2900 spin_lock_irqsave(link->ap->lock, flags); 2901 memset(&link->eh_info, 0, sizeof(link->eh_info)); 2902 if (slave) 2903 memset(&slave->eh_info, 0, sizeof(link->eh_info)); 2904 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 2905 spin_unlock_irqrestore(link->ap->lock, flags); 2906 2907 if (ap->pflags & ATA_PFLAG_FROZEN) 2908 ata_eh_thaw_port(ap); 2909 2910 /* 2911 * Make sure onlineness and classification result correspond. 2912 * Hotplug could have happened during reset and some 2913 * controllers fail to wait while a drive is spinning up after 2914 * being hotplugged causing misdetection. By cross checking 2915 * link on/offlineness and classification result, those 2916 * conditions can be reliably detected and retried. 2917 */ 2918 nr_unknown = 0; 2919 ata_for_each_dev(dev, link, ALL) { 2920 if (ata_phys_link_online(ata_dev_phys_link(dev))) { 2921 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2922 ata_dev_dbg(dev, "link online but device misclassified\n"); 2923 classes[dev->devno] = ATA_DEV_NONE; 2924 nr_unknown++; 2925 } 2926 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2927 if (ata_class_enabled(classes[dev->devno])) 2928 ata_dev_dbg(dev, 2929 "link offline, clearing class %d to NONE\n", 2930 classes[dev->devno]); 2931 classes[dev->devno] = ATA_DEV_NONE; 2932 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2933 ata_dev_dbg(dev, 2934 "link status unknown, clearing UNKNOWN to NONE\n"); 2935 classes[dev->devno] = ATA_DEV_NONE; 2936 } 2937 } 2938 2939 if (classify && nr_unknown) { 2940 if (try < max_tries) { 2941 ata_link_warn(link, 2942 "link online but %d devices misclassified, retrying\n", 2943 nr_unknown); 2944 failed_link = link; 2945 rc = -EAGAIN; 2946 goto fail; 2947 } 2948 ata_link_warn(link, 2949 "link online but %d devices misclassified, " 2950 "device detection might fail\n", nr_unknown); 2951 } 2952 2953 /* reset successful, schedule revalidation */ 2954 ata_eh_done(link, NULL, ATA_EH_RESET); 2955 if (slave) 2956 ata_eh_done(slave, NULL, ATA_EH_RESET); 2957 ehc->last_reset = jiffies; /* update to completion time */ 2958 ehc->i.action |= ATA_EH_REVALIDATE; 2959 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ 2960 2961 rc = 0; 2962 out: 2963 /* clear hotplug flag */ 2964 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2965 if (slave) 2966 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2967 2968 spin_lock_irqsave(ap->lock, flags); 2969 ap->pflags &= ~ATA_PFLAG_RESETTING; 2970 spin_unlock_irqrestore(ap->lock, flags); 2971 2972 return rc; 2973 2974 fail: 2975 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2976 if (!ata_is_host_link(link) && 2977 sata_scr_read(link, SCR_STATUS, &sstatus)) 2978 rc = -ERESTART; 2979 2980 if (try >= max_tries) { 2981 /* 2982 * Thaw host port even if reset failed, so that the port 2983 * can be retried on the next phy event. This risks 2984 * repeated EH runs but seems to be a better tradeoff than 2985 * shutting down a port after a botched hotplug attempt. 2986 */ 2987 if (ata_is_host_link(link)) 2988 ata_eh_thaw_port(ap); 2989 goto out; 2990 } 2991 2992 now = jiffies; 2993 if (time_before(now, deadline)) { 2994 unsigned long delta = deadline - now; 2995 2996 ata_link_warn(failed_link, 2997 "reset failed (errno=%d), retrying in %u secs\n", 2998 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2999 3000 ata_eh_release(ap); 3001 while (delta) 3002 delta = schedule_timeout_uninterruptible(delta); 3003 ata_eh_acquire(ap); 3004 } 3005 3006 /* 3007 * While disks spinup behind PMP, some controllers fail sending SRST. 3008 * They need to be reset - as well as the PMP - before retrying. 3009 */ 3010 if (rc == -ERESTART) { 3011 if (ata_is_host_link(link)) 3012 ata_eh_thaw_port(ap); 3013 goto out; 3014 } 3015 3016 if (try == max_tries - 1) { 3017 sata_down_spd_limit(link, 0); 3018 if (slave) 3019 sata_down_spd_limit(slave, 0); 3020 } else if (rc == -EPIPE) 3021 sata_down_spd_limit(failed_link, 0); 3022 3023 if (hardreset) 3024 reset = hardreset; 3025 goto retry; 3026 } 3027 3028 static inline void ata_eh_pull_park_action(struct ata_port *ap) 3029 { 3030 struct ata_link *link; 3031 struct ata_device *dev; 3032 unsigned long flags; 3033 3034 /* 3035 * This function can be thought of as an extended version of 3036 * ata_eh_about_to_do() specially crafted to accommodate the 3037 * requirements of ATA_EH_PARK handling. Since the EH thread 3038 * does not leave the do {} while () loop in ata_eh_recover as 3039 * long as the timeout for a park request to *one* device on 3040 * the port has not expired, and since we still want to pick 3041 * up park requests to other devices on the same port or 3042 * timeout updates for the same device, we have to pull 3043 * ATA_EH_PARK actions from eh_info into eh_context.i 3044 * ourselves at the beginning of each pass over the loop. 3045 * 3046 * Additionally, all write accesses to &ap->park_req_pending 3047 * through reinit_completion() (see below) or complete_all() 3048 * (see ata_scsi_park_store()) are protected by the host lock. 3049 * As a result we have that park_req_pending.done is zero on 3050 * exit from this function, i.e. when ATA_EH_PARK actions for 3051 * *all* devices on port ap have been pulled into the 3052 * respective eh_context structs. If, and only if, 3053 * park_req_pending.done is non-zero by the time we reach 3054 * wait_for_completion_timeout(), another ATA_EH_PARK action 3055 * has been scheduled for at least one of the devices on port 3056 * ap and we have to cycle over the do {} while () loop in 3057 * ata_eh_recover() again. 3058 */ 3059 3060 spin_lock_irqsave(ap->lock, flags); 3061 reinit_completion(&ap->park_req_pending); 3062 ata_for_each_link(link, ap, EDGE) { 3063 ata_for_each_dev(dev, link, ALL) { 3064 struct ata_eh_info *ehi = &link->eh_info; 3065 3066 link->eh_context.i.dev_action[dev->devno] |= 3067 ehi->dev_action[dev->devno] & ATA_EH_PARK; 3068 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 3069 } 3070 } 3071 spin_unlock_irqrestore(ap->lock, flags); 3072 } 3073 3074 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 3075 { 3076 struct ata_eh_context *ehc = &dev->link->eh_context; 3077 struct ata_taskfile tf; 3078 unsigned int err_mask; 3079 3080 ata_tf_init(dev, &tf); 3081 if (park) { 3082 ehc->unloaded_mask |= 1 << dev->devno; 3083 tf.command = ATA_CMD_IDLEIMMEDIATE; 3084 tf.feature = 0x44; 3085 tf.lbal = 0x4c; 3086 tf.lbam = 0x4e; 3087 tf.lbah = 0x55; 3088 } else { 3089 ehc->unloaded_mask &= ~(1 << dev->devno); 3090 tf.command = ATA_CMD_CHK_POWER; 3091 } 3092 3093 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 3094 tf.protocol = ATA_PROT_NODATA; 3095 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3096 if (park && (err_mask || tf.lbal != 0xc4)) { 3097 ata_dev_err(dev, "head unload failed!\n"); 3098 ehc->unloaded_mask &= ~(1 << dev->devno); 3099 } 3100 } 3101 3102 static int ata_eh_revalidate_and_attach(struct ata_link *link, 3103 struct ata_device **r_failed_dev) 3104 { 3105 struct ata_port *ap = link->ap; 3106 struct ata_eh_context *ehc = &link->eh_context; 3107 struct ata_device *dev; 3108 unsigned int new_mask = 0; 3109 unsigned long flags; 3110 int rc = 0; 3111 3112 DPRINTK("ENTER\n"); 3113 3114 /* For PATA drive side cable detection to work, IDENTIFY must 3115 * be done backwards such that PDIAG- is released by the slave 3116 * device before the master device is identified. 3117 */ 3118 ata_for_each_dev(dev, link, ALL_REVERSE) { 3119 unsigned int action = ata_eh_dev_action(dev); 3120 unsigned int readid_flags = 0; 3121 3122 if (ehc->i.flags & ATA_EHI_DID_RESET) 3123 readid_flags |= ATA_READID_POSTRESET; 3124 3125 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 3126 WARN_ON(dev->class == ATA_DEV_PMP); 3127 3128 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 3129 rc = -EIO; 3130 goto err; 3131 } 3132 3133 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 3134 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 3135 readid_flags); 3136 if (rc) 3137 goto err; 3138 3139 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 3140 3141 /* Configuration may have changed, reconfigure 3142 * transfer mode. 3143 */ 3144 ehc->i.flags |= ATA_EHI_SETMODE; 3145 3146 /* schedule the scsi_rescan_device() here */ 3147 schedule_work(&(ap->scsi_rescan_task)); 3148 } else if (dev->class == ATA_DEV_UNKNOWN && 3149 ehc->tries[dev->devno] && 3150 ata_class_enabled(ehc->classes[dev->devno])) { 3151 /* Temporarily set dev->class, it will be 3152 * permanently set once all configurations are 3153 * complete. This is necessary because new 3154 * device configuration is done in two 3155 * separate loops. 3156 */ 3157 dev->class = ehc->classes[dev->devno]; 3158 3159 if (dev->class == ATA_DEV_PMP) 3160 rc = sata_pmp_attach(dev); 3161 else 3162 rc = ata_dev_read_id(dev, &dev->class, 3163 readid_flags, dev->id); 3164 3165 /* read_id might have changed class, store and reset */ 3166 ehc->classes[dev->devno] = dev->class; 3167 dev->class = ATA_DEV_UNKNOWN; 3168 3169 switch (rc) { 3170 case 0: 3171 /* clear error info accumulated during probe */ 3172 ata_ering_clear(&dev->ering); 3173 new_mask |= 1 << dev->devno; 3174 break; 3175 case -ENOENT: 3176 /* IDENTIFY was issued to non-existent 3177 * device. No need to reset. Just 3178 * thaw and ignore the device. 3179 */ 3180 ata_eh_thaw_port(ap); 3181 break; 3182 default: 3183 goto err; 3184 } 3185 } 3186 } 3187 3188 /* PDIAG- should have been released, ask cable type if post-reset */ 3189 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 3190 if (ap->ops->cable_detect) 3191 ap->cbl = ap->ops->cable_detect(ap); 3192 ata_force_cbl(ap); 3193 } 3194 3195 /* Configure new devices forward such that user doesn't see 3196 * device detection messages backwards. 3197 */ 3198 ata_for_each_dev(dev, link, ALL) { 3199 if (!(new_mask & (1 << dev->devno))) 3200 continue; 3201 3202 dev->class = ehc->classes[dev->devno]; 3203 3204 if (dev->class == ATA_DEV_PMP) 3205 continue; 3206 3207 ehc->i.flags |= ATA_EHI_PRINTINFO; 3208 rc = ata_dev_configure(dev); 3209 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 3210 if (rc) { 3211 dev->class = ATA_DEV_UNKNOWN; 3212 goto err; 3213 } 3214 3215 spin_lock_irqsave(ap->lock, flags); 3216 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 3217 spin_unlock_irqrestore(ap->lock, flags); 3218 3219 /* new device discovered, configure xfermode */ 3220 ehc->i.flags |= ATA_EHI_SETMODE; 3221 } 3222 3223 return 0; 3224 3225 err: 3226 *r_failed_dev = dev; 3227 DPRINTK("EXIT rc=%d\n", rc); 3228 return rc; 3229 } 3230 3231 /** 3232 * ata_set_mode - Program timings and issue SET FEATURES - XFER 3233 * @link: link on which timings will be programmed 3234 * @r_failed_dev: out parameter for failed device 3235 * 3236 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 3237 * ata_set_mode() fails, pointer to the failing device is 3238 * returned in @r_failed_dev. 3239 * 3240 * LOCKING: 3241 * PCI/etc. bus probe sem. 3242 * 3243 * RETURNS: 3244 * 0 on success, negative errno otherwise 3245 */ 3246 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 3247 { 3248 struct ata_port *ap = link->ap; 3249 struct ata_device *dev; 3250 int rc; 3251 3252 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 3253 ata_for_each_dev(dev, link, ENABLED) { 3254 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 3255 struct ata_ering_entry *ent; 3256 3257 ent = ata_ering_top(&dev->ering); 3258 if (ent) 3259 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 3260 } 3261 } 3262 3263 /* has private set_mode? */ 3264 if (ap->ops->set_mode) 3265 rc = ap->ops->set_mode(link, r_failed_dev); 3266 else 3267 rc = ata_do_set_mode(link, r_failed_dev); 3268 3269 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 3270 ata_for_each_dev(dev, link, ENABLED) { 3271 struct ata_eh_context *ehc = &link->eh_context; 3272 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 3273 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 3274 3275 if (dev->xfer_mode != saved_xfer_mode || 3276 ata_ncq_enabled(dev) != saved_ncq) 3277 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 3278 } 3279 3280 return rc; 3281 } 3282 3283 /** 3284 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 3285 * @dev: ATAPI device to clear UA for 3286 * 3287 * Resets and other operations can make an ATAPI device raise 3288 * UNIT ATTENTION which causes the next operation to fail. This 3289 * function clears UA. 3290 * 3291 * LOCKING: 3292 * EH context (may sleep). 3293 * 3294 * RETURNS: 3295 * 0 on success, -errno on failure. 3296 */ 3297 static int atapi_eh_clear_ua(struct ata_device *dev) 3298 { 3299 int i; 3300 3301 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 3302 u8 *sense_buffer = dev->link->ap->sector_buf; 3303 u8 sense_key = 0; 3304 unsigned int err_mask; 3305 3306 err_mask = atapi_eh_tur(dev, &sense_key); 3307 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 3308 ata_dev_warn(dev, 3309 "TEST_UNIT_READY failed (err_mask=0x%x)\n", 3310 err_mask); 3311 return -EIO; 3312 } 3313 3314 if (!err_mask || sense_key != UNIT_ATTENTION) 3315 return 0; 3316 3317 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 3318 if (err_mask) { 3319 ata_dev_warn(dev, "failed to clear " 3320 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 3321 return -EIO; 3322 } 3323 } 3324 3325 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n", 3326 ATA_EH_UA_TRIES); 3327 3328 return 0; 3329 } 3330 3331 /** 3332 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary 3333 * @dev: ATA device which may need FLUSH retry 3334 * 3335 * If @dev failed FLUSH, it needs to be reported upper layer 3336 * immediately as it means that @dev failed to remap and already 3337 * lost at least a sector and further FLUSH retrials won't make 3338 * any difference to the lost sector. However, if FLUSH failed 3339 * for other reasons, for example transmission error, FLUSH needs 3340 * to be retried. 3341 * 3342 * This function determines whether FLUSH failure retry is 3343 * necessary and performs it if so. 3344 * 3345 * RETURNS: 3346 * 0 if EH can continue, -errno if EH needs to be repeated. 3347 */ 3348 static int ata_eh_maybe_retry_flush(struct ata_device *dev) 3349 { 3350 struct ata_link *link = dev->link; 3351 struct ata_port *ap = link->ap; 3352 struct ata_queued_cmd *qc; 3353 struct ata_taskfile tf; 3354 unsigned int err_mask; 3355 int rc = 0; 3356 3357 /* did flush fail for this device? */ 3358 if (!ata_tag_valid(link->active_tag)) 3359 return 0; 3360 3361 qc = __ata_qc_from_tag(ap, link->active_tag); 3362 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && 3363 qc->tf.command != ATA_CMD_FLUSH)) 3364 return 0; 3365 3366 /* if the device failed it, it should be reported to upper layers */ 3367 if (qc->err_mask & AC_ERR_DEV) 3368 return 0; 3369 3370 /* flush failed for some other reason, give it another shot */ 3371 ata_tf_init(dev, &tf); 3372 3373 tf.command = qc->tf.command; 3374 tf.flags |= ATA_TFLAG_DEVICE; 3375 tf.protocol = ATA_PROT_NODATA; 3376 3377 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n", 3378 tf.command, qc->err_mask); 3379 3380 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3381 if (!err_mask) { 3382 /* 3383 * FLUSH is complete but there's no way to 3384 * successfully complete a failed command from EH. 3385 * Making sure retry is allowed at least once and 3386 * retrying it should do the trick - whatever was in 3387 * the cache is already on the platter and this won't 3388 * cause infinite loop. 3389 */ 3390 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); 3391 } else { 3392 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n", 3393 err_mask); 3394 rc = -EIO; 3395 3396 /* if device failed it, report it to upper layers */ 3397 if (err_mask & AC_ERR_DEV) { 3398 qc->err_mask |= AC_ERR_DEV; 3399 qc->result_tf = tf; 3400 if (!(ap->pflags & ATA_PFLAG_FROZEN)) 3401 rc = 0; 3402 } 3403 } 3404 return rc; 3405 } 3406 3407 /** 3408 * ata_eh_set_lpm - configure SATA interface power management 3409 * @link: link to configure power management 3410 * @policy: the link power management policy 3411 * @r_failed_dev: out parameter for failed device 3412 * 3413 * Enable SATA Interface power management. This will enable 3414 * Device Interface Power Management (DIPM) for min_power and 3415 * medium_power_with_dipm policies, and then call driver specific 3416 * callbacks for enabling Host Initiated Power management. 3417 * 3418 * LOCKING: 3419 * EH context. 3420 * 3421 * RETURNS: 3422 * 0 on success, -errno on failure. 3423 */ 3424 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, 3425 struct ata_device **r_failed_dev) 3426 { 3427 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; 3428 struct ata_eh_context *ehc = &link->eh_context; 3429 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; 3430 enum ata_lpm_policy old_policy = link->lpm_policy; 3431 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; 3432 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; 3433 unsigned int err_mask; 3434 int rc; 3435 3436 /* if the link or host doesn't do LPM, noop */ 3437 if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) 3438 return 0; 3439 3440 /* 3441 * DIPM is enabled only for MIN_POWER as some devices 3442 * misbehave when the host NACKs transition to SLUMBER. Order 3443 * device and link configurations such that the host always 3444 * allows DIPM requests. 3445 */ 3446 ata_for_each_dev(dev, link, ENABLED) { 3447 bool hipm = ata_id_has_hipm(dev->id); 3448 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm; 3449 3450 /* find the first enabled and LPM enabled devices */ 3451 if (!link_dev) 3452 link_dev = dev; 3453 3454 if (!lpm_dev && (hipm || dipm)) 3455 lpm_dev = dev; 3456 3457 hints &= ~ATA_LPM_EMPTY; 3458 if (!hipm) 3459 hints &= ~ATA_LPM_HIPM; 3460 3461 /* disable DIPM before changing link config */ 3462 if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) { 3463 err_mask = ata_dev_set_feature(dev, 3464 SETFEATURES_SATA_DISABLE, SATA_DIPM); 3465 if (err_mask && err_mask != AC_ERR_DEV) { 3466 ata_dev_warn(dev, 3467 "failed to disable DIPM, Emask 0x%x\n", 3468 err_mask); 3469 rc = -EIO; 3470 goto fail; 3471 } 3472 } 3473 } 3474 3475 if (ap) { 3476 rc = ap->ops->set_lpm(link, policy, hints); 3477 if (!rc && ap->slave_link) 3478 rc = ap->ops->set_lpm(ap->slave_link, policy, hints); 3479 } else 3480 rc = sata_pmp_set_lpm(link, policy, hints); 3481 3482 /* 3483 * Attribute link config failure to the first (LPM) enabled 3484 * device on the link. 3485 */ 3486 if (rc) { 3487 if (rc == -EOPNOTSUPP) { 3488 link->flags |= ATA_LFLAG_NO_LPM; 3489 return 0; 3490 } 3491 dev = lpm_dev ? lpm_dev : link_dev; 3492 goto fail; 3493 } 3494 3495 /* 3496 * Low level driver acked the transition. Issue DIPM command 3497 * with the new policy set. 3498 */ 3499 link->lpm_policy = policy; 3500 if (ap && ap->slave_link) 3501 ap->slave_link->lpm_policy = policy; 3502 3503 /* host config updated, enable DIPM if transitioning to MIN_POWER */ 3504 ata_for_each_dev(dev, link, ENABLED) { 3505 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm && 3506 ata_id_has_dipm(dev->id)) { 3507 err_mask = ata_dev_set_feature(dev, 3508 SETFEATURES_SATA_ENABLE, SATA_DIPM); 3509 if (err_mask && err_mask != AC_ERR_DEV) { 3510 ata_dev_warn(dev, 3511 "failed to enable DIPM, Emask 0x%x\n", 3512 err_mask); 3513 rc = -EIO; 3514 goto fail; 3515 } 3516 } 3517 } 3518 3519 link->last_lpm_change = jiffies; 3520 link->flags |= ATA_LFLAG_CHANGED; 3521 3522 return 0; 3523 3524 fail: 3525 /* restore the old policy */ 3526 link->lpm_policy = old_policy; 3527 if (ap && ap->slave_link) 3528 ap->slave_link->lpm_policy = old_policy; 3529 3530 /* if no device or only one more chance is left, disable LPM */ 3531 if (!dev || ehc->tries[dev->devno] <= 2) { 3532 ata_link_warn(link, "disabling LPM on the link\n"); 3533 link->flags |= ATA_LFLAG_NO_LPM; 3534 } 3535 if (r_failed_dev) 3536 *r_failed_dev = dev; 3537 return rc; 3538 } 3539 3540 int ata_link_nr_enabled(struct ata_link *link) 3541 { 3542 struct ata_device *dev; 3543 int cnt = 0; 3544 3545 ata_for_each_dev(dev, link, ENABLED) 3546 cnt++; 3547 return cnt; 3548 } 3549 3550 static int ata_link_nr_vacant(struct ata_link *link) 3551 { 3552 struct ata_device *dev; 3553 int cnt = 0; 3554 3555 ata_for_each_dev(dev, link, ALL) 3556 if (dev->class == ATA_DEV_UNKNOWN) 3557 cnt++; 3558 return cnt; 3559 } 3560 3561 static int ata_eh_skip_recovery(struct ata_link *link) 3562 { 3563 struct ata_port *ap = link->ap; 3564 struct ata_eh_context *ehc = &link->eh_context; 3565 struct ata_device *dev; 3566 3567 /* skip disabled links */ 3568 if (link->flags & ATA_LFLAG_DISABLED) 3569 return 1; 3570 3571 /* skip if explicitly requested */ 3572 if (ehc->i.flags & ATA_EHI_NO_RECOVERY) 3573 return 1; 3574 3575 /* thaw frozen port and recover failed devices */ 3576 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 3577 return 0; 3578 3579 /* reset at least once if reset is requested */ 3580 if ((ehc->i.action & ATA_EH_RESET) && 3581 !(ehc->i.flags & ATA_EHI_DID_RESET)) 3582 return 0; 3583 3584 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 3585 ata_for_each_dev(dev, link, ALL) { 3586 if (dev->class == ATA_DEV_UNKNOWN && 3587 ehc->classes[dev->devno] != ATA_DEV_NONE) 3588 return 0; 3589 } 3590 3591 return 1; 3592 } 3593 3594 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3595 { 3596 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3597 u64 now = get_jiffies_64(); 3598 int *trials = void_arg; 3599 3600 if ((ent->eflags & ATA_EFLAG_OLD_ER) || 3601 (ent->timestamp < now - min(now, interval))) 3602 return -1; 3603 3604 (*trials)++; 3605 return 0; 3606 } 3607 3608 static int ata_eh_schedule_probe(struct ata_device *dev) 3609 { 3610 struct ata_eh_context *ehc = &dev->link->eh_context; 3611 struct ata_link *link = ata_dev_phys_link(dev); 3612 int trials = 0; 3613 3614 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3615 (ehc->did_probe_mask & (1 << dev->devno))) 3616 return 0; 3617 3618 ata_eh_detach_dev(dev); 3619 ata_dev_init(dev); 3620 ehc->did_probe_mask |= (1 << dev->devno); 3621 ehc->i.action |= ATA_EH_RESET; 3622 ehc->saved_xfer_mode[dev->devno] = 0; 3623 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3624 3625 /* the link maybe in a deep sleep, wake it up */ 3626 if (link->lpm_policy > ATA_LPM_MAX_POWER) { 3627 if (ata_is_host_link(link)) 3628 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, 3629 ATA_LPM_EMPTY); 3630 else 3631 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER, 3632 ATA_LPM_EMPTY); 3633 } 3634 3635 /* Record and count probe trials on the ering. The specific 3636 * error mask used is irrelevant. Because a successful device 3637 * detection clears the ering, this count accumulates only if 3638 * there are consecutive failed probes. 3639 * 3640 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3641 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3642 * forced to 1.5Gbps. 3643 * 3644 * This is to work around cases where failed link speed 3645 * negotiation results in device misdetection leading to 3646 * infinite DEVXCHG or PHRDY CHG events. 3647 */ 3648 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3649 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3650 3651 if (trials > ATA_EH_PROBE_TRIALS) 3652 sata_down_spd_limit(link, 1); 3653 3654 return 1; 3655 } 3656 3657 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3658 { 3659 struct ata_eh_context *ehc = &dev->link->eh_context; 3660 3661 /* -EAGAIN from EH routine indicates retry without prejudice. 3662 * The requester is responsible for ensuring forward progress. 3663 */ 3664 if (err != -EAGAIN) 3665 ehc->tries[dev->devno]--; 3666 3667 switch (err) { 3668 case -ENODEV: 3669 /* device missing or wrong IDENTIFY data, schedule probing */ 3670 ehc->i.probe_mask |= (1 << dev->devno); 3671 /* fall through */ 3672 case -EINVAL: 3673 /* give it just one more chance */ 3674 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3675 /* fall through */ 3676 case -EIO: 3677 if (ehc->tries[dev->devno] == 1) { 3678 /* This is the last chance, better to slow 3679 * down than lose it. 3680 */ 3681 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3682 if (dev->pio_mode > XFER_PIO_0) 3683 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3684 } 3685 } 3686 3687 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3688 /* disable device if it has used up all its chances */ 3689 ata_dev_disable(dev); 3690 3691 /* detach if offline */ 3692 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3693 ata_eh_detach_dev(dev); 3694 3695 /* schedule probe if necessary */ 3696 if (ata_eh_schedule_probe(dev)) { 3697 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3698 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3699 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3700 } 3701 3702 return 1; 3703 } else { 3704 ehc->i.action |= ATA_EH_RESET; 3705 return 0; 3706 } 3707 } 3708 3709 /** 3710 * ata_eh_recover - recover host port after error 3711 * @ap: host port to recover 3712 * @prereset: prereset method (can be NULL) 3713 * @softreset: softreset method (can be NULL) 3714 * @hardreset: hardreset method (can be NULL) 3715 * @postreset: postreset method (can be NULL) 3716 * @r_failed_link: out parameter for failed link 3717 * 3718 * This is the alpha and omega, eum and yang, heart and soul of 3719 * libata exception handling. On entry, actions required to 3720 * recover each link and hotplug requests are recorded in the 3721 * link's eh_context. This function executes all the operations 3722 * with appropriate retrials and fallbacks to resurrect failed 3723 * devices, detach goners and greet newcomers. 3724 * 3725 * LOCKING: 3726 * Kernel thread context (may sleep). 3727 * 3728 * RETURNS: 3729 * 0 on success, -errno on failure. 3730 */ 3731 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3732 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3733 ata_postreset_fn_t postreset, 3734 struct ata_link **r_failed_link) 3735 { 3736 struct ata_link *link; 3737 struct ata_device *dev; 3738 int rc, nr_fails; 3739 unsigned long flags, deadline; 3740 3741 DPRINTK("ENTER\n"); 3742 3743 /* prep for recovery */ 3744 ata_for_each_link(link, ap, EDGE) { 3745 struct ata_eh_context *ehc = &link->eh_context; 3746 3747 /* re-enable link? */ 3748 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3749 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3750 spin_lock_irqsave(ap->lock, flags); 3751 link->flags &= ~ATA_LFLAG_DISABLED; 3752 spin_unlock_irqrestore(ap->lock, flags); 3753 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3754 } 3755 3756 ata_for_each_dev(dev, link, ALL) { 3757 if (link->flags & ATA_LFLAG_NO_RETRY) 3758 ehc->tries[dev->devno] = 1; 3759 else 3760 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3761 3762 /* collect port action mask recorded in dev actions */ 3763 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3764 ~ATA_EH_PERDEV_MASK; 3765 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3766 3767 /* process hotplug request */ 3768 if (dev->flags & ATA_DFLAG_DETACH) 3769 ata_eh_detach_dev(dev); 3770 3771 /* schedule probe if necessary */ 3772 if (!ata_dev_enabled(dev)) 3773 ata_eh_schedule_probe(dev); 3774 } 3775 } 3776 3777 retry: 3778 rc = 0; 3779 3780 /* if UNLOADING, finish immediately */ 3781 if (ap->pflags & ATA_PFLAG_UNLOADING) 3782 goto out; 3783 3784 /* prep for EH */ 3785 ata_for_each_link(link, ap, EDGE) { 3786 struct ata_eh_context *ehc = &link->eh_context; 3787 3788 /* skip EH if possible. */ 3789 if (ata_eh_skip_recovery(link)) 3790 ehc->i.action = 0; 3791 3792 ata_for_each_dev(dev, link, ALL) 3793 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3794 } 3795 3796 /* reset */ 3797 ata_for_each_link(link, ap, EDGE) { 3798 struct ata_eh_context *ehc = &link->eh_context; 3799 3800 if (!(ehc->i.action & ATA_EH_RESET)) 3801 continue; 3802 3803 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3804 prereset, softreset, hardreset, postreset); 3805 if (rc) { 3806 ata_link_err(link, "reset failed, giving up\n"); 3807 goto out; 3808 } 3809 } 3810 3811 do { 3812 unsigned long now; 3813 3814 /* 3815 * clears ATA_EH_PARK in eh_info and resets 3816 * ap->park_req_pending 3817 */ 3818 ata_eh_pull_park_action(ap); 3819 3820 deadline = jiffies; 3821 ata_for_each_link(link, ap, EDGE) { 3822 ata_for_each_dev(dev, link, ALL) { 3823 struct ata_eh_context *ehc = &link->eh_context; 3824 unsigned long tmp; 3825 3826 if (dev->class != ATA_DEV_ATA && 3827 dev->class != ATA_DEV_ZAC) 3828 continue; 3829 if (!(ehc->i.dev_action[dev->devno] & 3830 ATA_EH_PARK)) 3831 continue; 3832 tmp = dev->unpark_deadline; 3833 if (time_before(deadline, tmp)) 3834 deadline = tmp; 3835 else if (time_before_eq(tmp, jiffies)) 3836 continue; 3837 if (ehc->unloaded_mask & (1 << dev->devno)) 3838 continue; 3839 3840 ata_eh_park_issue_cmd(dev, 1); 3841 } 3842 } 3843 3844 now = jiffies; 3845 if (time_before_eq(deadline, now)) 3846 break; 3847 3848 ata_eh_release(ap); 3849 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3850 deadline - now); 3851 ata_eh_acquire(ap); 3852 } while (deadline); 3853 ata_for_each_link(link, ap, EDGE) { 3854 ata_for_each_dev(dev, link, ALL) { 3855 if (!(link->eh_context.unloaded_mask & 3856 (1 << dev->devno))) 3857 continue; 3858 3859 ata_eh_park_issue_cmd(dev, 0); 3860 ata_eh_done(link, dev, ATA_EH_PARK); 3861 } 3862 } 3863 3864 /* the rest */ 3865 nr_fails = 0; 3866 ata_for_each_link(link, ap, PMP_FIRST) { 3867 struct ata_eh_context *ehc = &link->eh_context; 3868 3869 if (sata_pmp_attached(ap) && ata_is_host_link(link)) 3870 goto config_lpm; 3871 3872 /* revalidate existing devices and attach new ones */ 3873 rc = ata_eh_revalidate_and_attach(link, &dev); 3874 if (rc) 3875 goto rest_fail; 3876 3877 /* if PMP got attached, return, pmp EH will take care of it */ 3878 if (link->device->class == ATA_DEV_PMP) { 3879 ehc->i.action = 0; 3880 return 0; 3881 } 3882 3883 /* configure transfer mode if necessary */ 3884 if (ehc->i.flags & ATA_EHI_SETMODE) { 3885 rc = ata_set_mode(link, &dev); 3886 if (rc) 3887 goto rest_fail; 3888 ehc->i.flags &= ~ATA_EHI_SETMODE; 3889 } 3890 3891 /* If reset has been issued, clear UA to avoid 3892 * disrupting the current users of the device. 3893 */ 3894 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3895 ata_for_each_dev(dev, link, ALL) { 3896 if (dev->class != ATA_DEV_ATAPI) 3897 continue; 3898 rc = atapi_eh_clear_ua(dev); 3899 if (rc) 3900 goto rest_fail; 3901 if (zpodd_dev_enabled(dev)) 3902 zpodd_post_poweron(dev); 3903 } 3904 } 3905 3906 /* retry flush if necessary */ 3907 ata_for_each_dev(dev, link, ALL) { 3908 if (dev->class != ATA_DEV_ATA && 3909 dev->class != ATA_DEV_ZAC) 3910 continue; 3911 rc = ata_eh_maybe_retry_flush(dev); 3912 if (rc) 3913 goto rest_fail; 3914 } 3915 3916 config_lpm: 3917 /* configure link power saving */ 3918 if (link->lpm_policy != ap->target_lpm_policy) { 3919 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev); 3920 if (rc) 3921 goto rest_fail; 3922 } 3923 3924 /* this link is okay now */ 3925 ehc->i.flags = 0; 3926 continue; 3927 3928 rest_fail: 3929 nr_fails++; 3930 if (dev) 3931 ata_eh_handle_dev_fail(dev, rc); 3932 3933 if (ap->pflags & ATA_PFLAG_FROZEN) { 3934 /* PMP reset requires working host port. 3935 * Can't retry if it's frozen. 3936 */ 3937 if (sata_pmp_attached(ap)) 3938 goto out; 3939 break; 3940 } 3941 } 3942 3943 if (nr_fails) 3944 goto retry; 3945 3946 out: 3947 if (rc && r_failed_link) 3948 *r_failed_link = link; 3949 3950 DPRINTK("EXIT, rc=%d\n", rc); 3951 return rc; 3952 } 3953 3954 /** 3955 * ata_eh_finish - finish up EH 3956 * @ap: host port to finish EH for 3957 * 3958 * Recovery is complete. Clean up EH states and retry or finish 3959 * failed qcs. 3960 * 3961 * LOCKING: 3962 * None. 3963 */ 3964 void ata_eh_finish(struct ata_port *ap) 3965 { 3966 struct ata_queued_cmd *qc; 3967 int tag; 3968 3969 /* retry or finish qcs */ 3970 ata_qc_for_each_raw(ap, qc, tag) { 3971 if (!(qc->flags & ATA_QCFLAG_FAILED)) 3972 continue; 3973 3974 if (qc->err_mask) { 3975 /* FIXME: Once EH migration is complete, 3976 * generate sense data in this function, 3977 * considering both err_mask and tf. 3978 */ 3979 if (qc->flags & ATA_QCFLAG_RETRY) 3980 ata_eh_qc_retry(qc); 3981 else 3982 ata_eh_qc_complete(qc); 3983 } else { 3984 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 3985 ata_eh_qc_complete(qc); 3986 } else { 3987 /* feed zero TF to sense generation */ 3988 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 3989 ata_eh_qc_retry(qc); 3990 } 3991 } 3992 } 3993 3994 /* make sure nr_active_links is zero after EH */ 3995 WARN_ON(ap->nr_active_links); 3996 ap->nr_active_links = 0; 3997 } 3998 3999 /** 4000 * ata_do_eh - do standard error handling 4001 * @ap: host port to handle error for 4002 * 4003 * @prereset: prereset method (can be NULL) 4004 * @softreset: softreset method (can be NULL) 4005 * @hardreset: hardreset method (can be NULL) 4006 * @postreset: postreset method (can be NULL) 4007 * 4008 * Perform standard error handling sequence. 4009 * 4010 * LOCKING: 4011 * Kernel thread context (may sleep). 4012 */ 4013 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 4014 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 4015 ata_postreset_fn_t postreset) 4016 { 4017 struct ata_device *dev; 4018 int rc; 4019 4020 ata_eh_autopsy(ap); 4021 ata_eh_report(ap); 4022 4023 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 4024 NULL); 4025 if (rc) { 4026 ata_for_each_dev(dev, &ap->link, ALL) 4027 ata_dev_disable(dev); 4028 } 4029 4030 ata_eh_finish(ap); 4031 } 4032 4033 /** 4034 * ata_std_error_handler - standard error handler 4035 * @ap: host port to handle error for 4036 * 4037 * Standard error handler 4038 * 4039 * LOCKING: 4040 * Kernel thread context (may sleep). 4041 */ 4042 void ata_std_error_handler(struct ata_port *ap) 4043 { 4044 struct ata_port_operations *ops = ap->ops; 4045 ata_reset_fn_t hardreset = ops->hardreset; 4046 4047 /* ignore built-in hardreset if SCR access is not available */ 4048 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link)) 4049 hardreset = NULL; 4050 4051 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 4052 } 4053 4054 #ifdef CONFIG_PM 4055 /** 4056 * ata_eh_handle_port_suspend - perform port suspend operation 4057 * @ap: port to suspend 4058 * 4059 * Suspend @ap. 4060 * 4061 * LOCKING: 4062 * Kernel thread context (may sleep). 4063 */ 4064 static void ata_eh_handle_port_suspend(struct ata_port *ap) 4065 { 4066 unsigned long flags; 4067 int rc = 0; 4068 struct ata_device *dev; 4069 4070 /* are we suspending? */ 4071 spin_lock_irqsave(ap->lock, flags); 4072 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4073 ap->pm_mesg.event & PM_EVENT_RESUME) { 4074 spin_unlock_irqrestore(ap->lock, flags); 4075 return; 4076 } 4077 spin_unlock_irqrestore(ap->lock, flags); 4078 4079 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 4080 4081 /* 4082 * If we have a ZPODD attached, check its zero 4083 * power ready status before the port is frozen. 4084 * Only needed for runtime suspend. 4085 */ 4086 if (PMSG_IS_AUTO(ap->pm_mesg)) { 4087 ata_for_each_dev(dev, &ap->link, ENABLED) { 4088 if (zpodd_dev_enabled(dev)) 4089 zpodd_on_suspend(dev); 4090 } 4091 } 4092 4093 /* tell ACPI we're suspending */ 4094 rc = ata_acpi_on_suspend(ap); 4095 if (rc) 4096 goto out; 4097 4098 /* suspend */ 4099 ata_eh_freeze_port(ap); 4100 4101 if (ap->ops->port_suspend) 4102 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 4103 4104 ata_acpi_set_state(ap, ap->pm_mesg); 4105 out: 4106 /* update the flags */ 4107 spin_lock_irqsave(ap->lock, flags); 4108 4109 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 4110 if (rc == 0) 4111 ap->pflags |= ATA_PFLAG_SUSPENDED; 4112 else if (ap->pflags & ATA_PFLAG_FROZEN) 4113 ata_port_schedule_eh(ap); 4114 4115 spin_unlock_irqrestore(ap->lock, flags); 4116 4117 return; 4118 } 4119 4120 /** 4121 * ata_eh_handle_port_resume - perform port resume operation 4122 * @ap: port to resume 4123 * 4124 * Resume @ap. 4125 * 4126 * LOCKING: 4127 * Kernel thread context (may sleep). 4128 */ 4129 static void ata_eh_handle_port_resume(struct ata_port *ap) 4130 { 4131 struct ata_link *link; 4132 struct ata_device *dev; 4133 unsigned long flags; 4134 4135 /* are we resuming? */ 4136 spin_lock_irqsave(ap->lock, flags); 4137 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4138 !(ap->pm_mesg.event & PM_EVENT_RESUME)) { 4139 spin_unlock_irqrestore(ap->lock, flags); 4140 return; 4141 } 4142 spin_unlock_irqrestore(ap->lock, flags); 4143 4144 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 4145 4146 /* 4147 * Error timestamps are in jiffies which doesn't run while 4148 * suspended and PHY events during resume isn't too uncommon. 4149 * When the two are combined, it can lead to unnecessary speed 4150 * downs if the machine is suspended and resumed repeatedly. 4151 * Clear error history. 4152 */ 4153 ata_for_each_link(link, ap, HOST_FIRST) 4154 ata_for_each_dev(dev, link, ALL) 4155 ata_ering_clear(&dev->ering); 4156 4157 ata_acpi_set_state(ap, ap->pm_mesg); 4158 4159 if (ap->ops->port_resume) 4160 ap->ops->port_resume(ap); 4161 4162 /* tell ACPI that we're resuming */ 4163 ata_acpi_on_resume(ap); 4164 4165 /* update the flags */ 4166 spin_lock_irqsave(ap->lock, flags); 4167 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 4168 spin_unlock_irqrestore(ap->lock, flags); 4169 } 4170 #endif /* CONFIG_PM */ 4171