1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * libata-eh.c - libata error handling 4 * 5 * Maintained by: Tejun Heo <tj@kernel.org> 6 * Please ALWAYS copy linux-ide@vger.kernel.org 7 * on emails. 8 * 9 * Copyright 2006 Tejun Heo <htejun@gmail.com> 10 * 11 * libata documentation is available via 'make {ps|pdf}docs', 12 * as Documentation/driver-api/libata.rst 13 * 14 * Hardware documentation available from http://www.t13.org/ and 15 * http://www.sata-io.org/ 16 */ 17 18 #include <linux/kernel.h> 19 #include <linux/blkdev.h> 20 #include <linux/export.h> 21 #include <linux/pci.h> 22 #include <scsi/scsi.h> 23 #include <scsi/scsi_host.h> 24 #include <scsi/scsi_eh.h> 25 #include <scsi/scsi_device.h> 26 #include <scsi/scsi_cmnd.h> 27 #include <scsi/scsi_dbg.h> 28 #include "../scsi/scsi_transport_api.h" 29 30 #include <linux/libata.h> 31 32 #include <trace/events/libata.h> 33 #include "libata.h" 34 35 enum { 36 /* speed down verdicts */ 37 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 38 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 39 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 40 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 41 42 /* error flags */ 43 ATA_EFLAG_IS_IO = (1 << 0), 44 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 45 ATA_EFLAG_OLD_ER = (1 << 31), 46 47 /* error categories */ 48 ATA_ECAT_NONE = 0, 49 ATA_ECAT_ATA_BUS = 1, 50 ATA_ECAT_TOUT_HSM = 2, 51 ATA_ECAT_UNK_DEV = 3, 52 ATA_ECAT_DUBIOUS_NONE = 4, 53 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 54 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 55 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 56 ATA_ECAT_NR = 8, 57 58 ATA_EH_CMD_DFL_TIMEOUT = 5000, 59 60 /* always put at least this amount of time between resets */ 61 ATA_EH_RESET_COOL_DOWN = 5000, 62 63 /* Waiting in ->prereset can never be reliable. It's 64 * sometimes nice to wait there but it can't be depended upon; 65 * otherwise, we wouldn't be resetting. Just give it enough 66 * time for most drives to spin up. 67 */ 68 ATA_EH_PRERESET_TIMEOUT = 10000, 69 ATA_EH_FASTDRAIN_INTERVAL = 3000, 70 71 ATA_EH_UA_TRIES = 5, 72 73 /* probe speed down parameters, see ata_eh_schedule_probe() */ 74 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 75 ATA_EH_PROBE_TRIALS = 2, 76 }; 77 78 /* The following table determines how we sequence resets. Each entry 79 * represents timeout for that try. The first try can be soft or 80 * hardreset. All others are hardreset if available. In most cases 81 * the first reset w/ 10sec timeout should succeed. Following entries 82 * are mostly for error handling, hotplug and those outlier devices that 83 * take an exceptionally long time to recover from reset. 84 */ 85 static const unsigned long ata_eh_reset_timeouts[] = { 86 10000, /* most drives spin up by 10sec */ 87 10000, /* > 99% working drives spin up before 20sec */ 88 35000, /* give > 30 secs of idleness for outlier devices */ 89 5000, /* and sweet one last chance */ 90 ULONG_MAX, /* > 1 min has elapsed, give up */ 91 }; 92 93 static const unsigned long ata_eh_identify_timeouts[] = { 94 5000, /* covers > 99% of successes and not too boring on failures */ 95 10000, /* combined time till here is enough even for media access */ 96 30000, /* for true idiots */ 97 ULONG_MAX, 98 }; 99 100 static const unsigned long ata_eh_flush_timeouts[] = { 101 15000, /* be generous with flush */ 102 15000, /* ditto */ 103 30000, /* and even more generous */ 104 ULONG_MAX, 105 }; 106 107 static const unsigned long ata_eh_other_timeouts[] = { 108 5000, /* same rationale as identify timeout */ 109 10000, /* ditto */ 110 /* but no merciful 30sec for other commands, it just isn't worth it */ 111 ULONG_MAX, 112 }; 113 114 struct ata_eh_cmd_timeout_ent { 115 const u8 *commands; 116 const unsigned long *timeouts; 117 }; 118 119 /* The following table determines timeouts to use for EH internal 120 * commands. Each table entry is a command class and matches the 121 * commands the entry applies to and the timeout table to use. 122 * 123 * On the retry after a command timed out, the next timeout value from 124 * the table is used. If the table doesn't contain further entries, 125 * the last value is used. 126 * 127 * ehc->cmd_timeout_idx keeps track of which timeout to use per 128 * command class, so if SET_FEATURES times out on the first try, the 129 * next try will use the second timeout value only for that class. 130 */ 131 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 132 static const struct ata_eh_cmd_timeout_ent 133 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 134 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 135 .timeouts = ata_eh_identify_timeouts, }, 136 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 137 .timeouts = ata_eh_other_timeouts, }, 138 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 139 .timeouts = ata_eh_other_timeouts, }, 140 { .commands = CMDS(ATA_CMD_SET_FEATURES), 141 .timeouts = ata_eh_other_timeouts, }, 142 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 143 .timeouts = ata_eh_other_timeouts, }, 144 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), 145 .timeouts = ata_eh_flush_timeouts }, 146 }; 147 #undef CMDS 148 149 static void __ata_port_freeze(struct ata_port *ap); 150 #ifdef CONFIG_PM 151 static void ata_eh_handle_port_suspend(struct ata_port *ap); 152 static void ata_eh_handle_port_resume(struct ata_port *ap); 153 #else /* CONFIG_PM */ 154 static void ata_eh_handle_port_suspend(struct ata_port *ap) 155 { } 156 157 static void ata_eh_handle_port_resume(struct ata_port *ap) 158 { } 159 #endif /* CONFIG_PM */ 160 161 static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, 162 const char *fmt, va_list args) 163 { 164 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 165 ATA_EH_DESC_LEN - ehi->desc_len, 166 fmt, args); 167 } 168 169 /** 170 * __ata_ehi_push_desc - push error description without adding separator 171 * @ehi: target EHI 172 * @fmt: printf format string 173 * 174 * Format string according to @fmt and append it to @ehi->desc. 175 * 176 * LOCKING: 177 * spin_lock_irqsave(host lock) 178 */ 179 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 180 { 181 va_list args; 182 183 va_start(args, fmt); 184 __ata_ehi_pushv_desc(ehi, fmt, args); 185 va_end(args); 186 } 187 188 /** 189 * ata_ehi_push_desc - push error description with separator 190 * @ehi: target EHI 191 * @fmt: printf format string 192 * 193 * Format string according to @fmt and append it to @ehi->desc. 194 * If @ehi->desc is not empty, ", " is added in-between. 195 * 196 * LOCKING: 197 * spin_lock_irqsave(host lock) 198 */ 199 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 200 { 201 va_list args; 202 203 if (ehi->desc_len) 204 __ata_ehi_push_desc(ehi, ", "); 205 206 va_start(args, fmt); 207 __ata_ehi_pushv_desc(ehi, fmt, args); 208 va_end(args); 209 } 210 211 /** 212 * ata_ehi_clear_desc - clean error description 213 * @ehi: target EHI 214 * 215 * Clear @ehi->desc. 216 * 217 * LOCKING: 218 * spin_lock_irqsave(host lock) 219 */ 220 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 221 { 222 ehi->desc[0] = '\0'; 223 ehi->desc_len = 0; 224 } 225 226 /** 227 * ata_port_desc - append port description 228 * @ap: target ATA port 229 * @fmt: printf format string 230 * 231 * Format string according to @fmt and append it to port 232 * description. If port description is not empty, " " is added 233 * in-between. This function is to be used while initializing 234 * ata_host. The description is printed on host registration. 235 * 236 * LOCKING: 237 * None. 238 */ 239 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 240 { 241 va_list args; 242 243 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 244 245 if (ap->link.eh_info.desc_len) 246 __ata_ehi_push_desc(&ap->link.eh_info, " "); 247 248 va_start(args, fmt); 249 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 250 va_end(args); 251 } 252 253 #ifdef CONFIG_PCI 254 255 /** 256 * ata_port_pbar_desc - append PCI BAR description 257 * @ap: target ATA port 258 * @bar: target PCI BAR 259 * @offset: offset into PCI BAR 260 * @name: name of the area 261 * 262 * If @offset is negative, this function formats a string which 263 * contains the name, address, size and type of the BAR and 264 * appends it to the port description. If @offset is zero or 265 * positive, only name and offsetted address is appended. 266 * 267 * LOCKING: 268 * None. 269 */ 270 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 271 const char *name) 272 { 273 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 274 char *type = ""; 275 unsigned long long start, len; 276 277 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 278 type = "m"; 279 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 280 type = "i"; 281 282 start = (unsigned long long)pci_resource_start(pdev, bar); 283 len = (unsigned long long)pci_resource_len(pdev, bar); 284 285 if (offset < 0) 286 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 287 else 288 ata_port_desc(ap, "%s 0x%llx", name, 289 start + (unsigned long long)offset); 290 } 291 292 #endif /* CONFIG_PCI */ 293 294 static int ata_lookup_timeout_table(u8 cmd) 295 { 296 int i; 297 298 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 299 const u8 *cur; 300 301 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 302 if (*cur == cmd) 303 return i; 304 } 305 306 return -1; 307 } 308 309 /** 310 * ata_internal_cmd_timeout - determine timeout for an internal command 311 * @dev: target device 312 * @cmd: internal command to be issued 313 * 314 * Determine timeout for internal command @cmd for @dev. 315 * 316 * LOCKING: 317 * EH context. 318 * 319 * RETURNS: 320 * Determined timeout. 321 */ 322 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 323 { 324 struct ata_eh_context *ehc = &dev->link->eh_context; 325 int ent = ata_lookup_timeout_table(cmd); 326 int idx; 327 328 if (ent < 0) 329 return ATA_EH_CMD_DFL_TIMEOUT; 330 331 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 332 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 333 } 334 335 /** 336 * ata_internal_cmd_timed_out - notification for internal command timeout 337 * @dev: target device 338 * @cmd: internal command which timed out 339 * 340 * Notify EH that internal command @cmd for @dev timed out. This 341 * function should be called only for commands whose timeouts are 342 * determined using ata_internal_cmd_timeout(). 343 * 344 * LOCKING: 345 * EH context. 346 */ 347 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 348 { 349 struct ata_eh_context *ehc = &dev->link->eh_context; 350 int ent = ata_lookup_timeout_table(cmd); 351 int idx; 352 353 if (ent < 0) 354 return; 355 356 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 357 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 358 ehc->cmd_timeout_idx[dev->devno][ent]++; 359 } 360 361 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 362 unsigned int err_mask) 363 { 364 struct ata_ering_entry *ent; 365 366 WARN_ON(!err_mask); 367 368 ering->cursor++; 369 ering->cursor %= ATA_ERING_SIZE; 370 371 ent = &ering->ring[ering->cursor]; 372 ent->eflags = eflags; 373 ent->err_mask = err_mask; 374 ent->timestamp = get_jiffies_64(); 375 } 376 377 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 378 { 379 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 380 381 if (ent->err_mask) 382 return ent; 383 return NULL; 384 } 385 386 int ata_ering_map(struct ata_ering *ering, 387 int (*map_fn)(struct ata_ering_entry *, void *), 388 void *arg) 389 { 390 int idx, rc = 0; 391 struct ata_ering_entry *ent; 392 393 idx = ering->cursor; 394 do { 395 ent = &ering->ring[idx]; 396 if (!ent->err_mask) 397 break; 398 rc = map_fn(ent, arg); 399 if (rc) 400 break; 401 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 402 } while (idx != ering->cursor); 403 404 return rc; 405 } 406 407 static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) 408 { 409 ent->eflags |= ATA_EFLAG_OLD_ER; 410 return 0; 411 } 412 413 static void ata_ering_clear(struct ata_ering *ering) 414 { 415 ata_ering_map(ering, ata_ering_clear_cb, NULL); 416 } 417 418 static unsigned int ata_eh_dev_action(struct ata_device *dev) 419 { 420 struct ata_eh_context *ehc = &dev->link->eh_context; 421 422 return ehc->i.action | ehc->i.dev_action[dev->devno]; 423 } 424 425 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 426 struct ata_eh_info *ehi, unsigned int action) 427 { 428 struct ata_device *tdev; 429 430 if (!dev) { 431 ehi->action &= ~action; 432 ata_for_each_dev(tdev, link, ALL) 433 ehi->dev_action[tdev->devno] &= ~action; 434 } else { 435 /* doesn't make sense for port-wide EH actions */ 436 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 437 438 /* break ehi->action into ehi->dev_action */ 439 if (ehi->action & action) { 440 ata_for_each_dev(tdev, link, ALL) 441 ehi->dev_action[tdev->devno] |= 442 ehi->action & action; 443 ehi->action &= ~action; 444 } 445 446 /* turn off the specified per-dev action */ 447 ehi->dev_action[dev->devno] &= ~action; 448 } 449 } 450 451 /** 452 * ata_eh_acquire - acquire EH ownership 453 * @ap: ATA port to acquire EH ownership for 454 * 455 * Acquire EH ownership for @ap. This is the basic exclusion 456 * mechanism for ports sharing a host. Only one port hanging off 457 * the same host can claim the ownership of EH. 458 * 459 * LOCKING: 460 * EH context. 461 */ 462 void ata_eh_acquire(struct ata_port *ap) 463 { 464 mutex_lock(&ap->host->eh_mutex); 465 WARN_ON_ONCE(ap->host->eh_owner); 466 ap->host->eh_owner = current; 467 } 468 469 /** 470 * ata_eh_release - release EH ownership 471 * @ap: ATA port to release EH ownership for 472 * 473 * Release EH ownership for @ap if the caller. The caller must 474 * have acquired EH ownership using ata_eh_acquire() previously. 475 * 476 * LOCKING: 477 * EH context. 478 */ 479 void ata_eh_release(struct ata_port *ap) 480 { 481 WARN_ON_ONCE(ap->host->eh_owner != current); 482 ap->host->eh_owner = NULL; 483 mutex_unlock(&ap->host->eh_mutex); 484 } 485 486 static void ata_eh_unload(struct ata_port *ap) 487 { 488 struct ata_link *link; 489 struct ata_device *dev; 490 unsigned long flags; 491 492 /* Restore SControl IPM and SPD for the next driver and 493 * disable attached devices. 494 */ 495 ata_for_each_link(link, ap, PMP_FIRST) { 496 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 497 ata_for_each_dev(dev, link, ALL) 498 ata_dev_disable(dev); 499 } 500 501 /* freeze and set UNLOADED */ 502 spin_lock_irqsave(ap->lock, flags); 503 504 ata_port_freeze(ap); /* won't be thawed */ 505 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 506 ap->pflags |= ATA_PFLAG_UNLOADED; 507 508 spin_unlock_irqrestore(ap->lock, flags); 509 } 510 511 /** 512 * ata_scsi_error - SCSI layer error handler callback 513 * @host: SCSI host on which error occurred 514 * 515 * Handles SCSI-layer-thrown error events. 516 * 517 * LOCKING: 518 * Inherited from SCSI layer (none, can sleep) 519 * 520 * RETURNS: 521 * Zero. 522 */ 523 void ata_scsi_error(struct Scsi_Host *host) 524 { 525 struct ata_port *ap = ata_shost_to_port(host); 526 unsigned long flags; 527 LIST_HEAD(eh_work_q); 528 529 DPRINTK("ENTER\n"); 530 531 spin_lock_irqsave(host->host_lock, flags); 532 list_splice_init(&host->eh_cmd_q, &eh_work_q); 533 spin_unlock_irqrestore(host->host_lock, flags); 534 535 ata_scsi_cmd_error_handler(host, ap, &eh_work_q); 536 537 /* If we timed raced normal completion and there is nothing to 538 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 539 ata_scsi_port_error_handler(host, ap); 540 541 /* finish or retry handled scmd's and clean up */ 542 WARN_ON(!list_empty(&eh_work_q)); 543 544 DPRINTK("EXIT\n"); 545 } 546 547 /** 548 * ata_scsi_cmd_error_handler - error callback for a list of commands 549 * @host: scsi host containing the port 550 * @ap: ATA port within the host 551 * @eh_work_q: list of commands to process 552 * 553 * process the given list of commands and return those finished to the 554 * ap->eh_done_q. This function is the first part of the libata error 555 * handler which processes a given list of failed commands. 556 */ 557 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, 558 struct list_head *eh_work_q) 559 { 560 int i; 561 unsigned long flags; 562 563 /* make sure sff pio task is not running */ 564 ata_sff_flush_pio_task(ap); 565 566 /* synchronize with host lock and sort out timeouts */ 567 568 /* For new EH, all qcs are finished in one of three ways - 569 * normal completion, error completion, and SCSI timeout. 570 * Both completions can race against SCSI timeout. When normal 571 * completion wins, the qc never reaches EH. When error 572 * completion wins, the qc has ATA_QCFLAG_FAILED set. 573 * 574 * When SCSI timeout wins, things are a bit more complex. 575 * Normal or error completion can occur after the timeout but 576 * before this point. In such cases, both types of 577 * completions are honored. A scmd is determined to have 578 * timed out iff its associated qc is active and not failed. 579 */ 580 spin_lock_irqsave(ap->lock, flags); 581 if (ap->ops->error_handler) { 582 struct scsi_cmnd *scmd, *tmp; 583 int nr_timedout = 0; 584 585 /* This must occur under the ap->lock as we don't want 586 a polled recovery to race the real interrupt handler 587 588 The lost_interrupt handler checks for any completed but 589 non-notified command and completes much like an IRQ handler. 590 591 We then fall into the error recovery code which will treat 592 this as if normal completion won the race */ 593 594 if (ap->ops->lost_interrupt) 595 ap->ops->lost_interrupt(ap); 596 597 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { 598 struct ata_queued_cmd *qc; 599 600 ata_qc_for_each_raw(ap, qc, i) { 601 if (qc->flags & ATA_QCFLAG_ACTIVE && 602 qc->scsicmd == scmd) 603 break; 604 } 605 606 if (i < ATA_MAX_QUEUE) { 607 /* the scmd has an associated qc */ 608 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 609 /* which hasn't failed yet, timeout */ 610 qc->err_mask |= AC_ERR_TIMEOUT; 611 qc->flags |= ATA_QCFLAG_FAILED; 612 nr_timedout++; 613 } 614 } else { 615 /* Normal completion occurred after 616 * SCSI timeout but before this point. 617 * Successfully complete it. 618 */ 619 scmd->retries = scmd->allowed; 620 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 621 } 622 } 623 624 /* If we have timed out qcs. They belong to EH from 625 * this point but the state of the controller is 626 * unknown. Freeze the port to make sure the IRQ 627 * handler doesn't diddle with those qcs. This must 628 * be done atomically w.r.t. setting QCFLAG_FAILED. 629 */ 630 if (nr_timedout) 631 __ata_port_freeze(ap); 632 633 634 /* initialize eh_tries */ 635 ap->eh_tries = ATA_EH_MAX_TRIES; 636 } 637 spin_unlock_irqrestore(ap->lock, flags); 638 639 } 640 EXPORT_SYMBOL(ata_scsi_cmd_error_handler); 641 642 /** 643 * ata_scsi_port_error_handler - recover the port after the commands 644 * @host: SCSI host containing the port 645 * @ap: the ATA port 646 * 647 * Handle the recovery of the port @ap after all the commands 648 * have been recovered. 649 */ 650 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) 651 { 652 unsigned long flags; 653 654 /* invoke error handler */ 655 if (ap->ops->error_handler) { 656 struct ata_link *link; 657 658 /* acquire EH ownership */ 659 ata_eh_acquire(ap); 660 repeat: 661 /* kill fast drain timer */ 662 del_timer_sync(&ap->fastdrain_timer); 663 664 /* process port resume request */ 665 ata_eh_handle_port_resume(ap); 666 667 /* fetch & clear EH info */ 668 spin_lock_irqsave(ap->lock, flags); 669 670 ata_for_each_link(link, ap, HOST_FIRST) { 671 struct ata_eh_context *ehc = &link->eh_context; 672 struct ata_device *dev; 673 674 memset(&link->eh_context, 0, sizeof(link->eh_context)); 675 link->eh_context.i = link->eh_info; 676 memset(&link->eh_info, 0, sizeof(link->eh_info)); 677 678 ata_for_each_dev(dev, link, ENABLED) { 679 int devno = dev->devno; 680 681 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 682 if (ata_ncq_enabled(dev)) 683 ehc->saved_ncq_enabled |= 1 << devno; 684 } 685 } 686 687 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 688 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 689 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 690 691 spin_unlock_irqrestore(ap->lock, flags); 692 693 /* invoke EH, skip if unloading or suspended */ 694 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 695 ap->ops->error_handler(ap); 696 else { 697 /* if unloading, commence suicide */ 698 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 699 !(ap->pflags & ATA_PFLAG_UNLOADED)) 700 ata_eh_unload(ap); 701 ata_eh_finish(ap); 702 } 703 704 /* process port suspend request */ 705 ata_eh_handle_port_suspend(ap); 706 707 /* Exception might have happened after ->error_handler 708 * recovered the port but before this point. Repeat 709 * EH in such case. 710 */ 711 spin_lock_irqsave(ap->lock, flags); 712 713 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 714 if (--ap->eh_tries) { 715 spin_unlock_irqrestore(ap->lock, flags); 716 goto repeat; 717 } 718 ata_port_err(ap, 719 "EH pending after %d tries, giving up\n", 720 ATA_EH_MAX_TRIES); 721 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 722 } 723 724 /* this run is complete, make sure EH info is clear */ 725 ata_for_each_link(link, ap, HOST_FIRST) 726 memset(&link->eh_info, 0, sizeof(link->eh_info)); 727 728 /* end eh (clear host_eh_scheduled) while holding 729 * ap->lock such that if exception occurs after this 730 * point but before EH completion, SCSI midlayer will 731 * re-initiate EH. 732 */ 733 ap->ops->end_eh(ap); 734 735 spin_unlock_irqrestore(ap->lock, flags); 736 ata_eh_release(ap); 737 } else { 738 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 739 ap->ops->eng_timeout(ap); 740 } 741 742 scsi_eh_flush_done_q(&ap->eh_done_q); 743 744 /* clean up */ 745 spin_lock_irqsave(ap->lock, flags); 746 747 if (ap->pflags & ATA_PFLAG_LOADING) 748 ap->pflags &= ~ATA_PFLAG_LOADING; 749 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && 750 !(ap->flags & ATA_FLAG_SAS_HOST)) 751 schedule_delayed_work(&ap->hotplug_task, 0); 752 753 if (ap->pflags & ATA_PFLAG_RECOVERED) 754 ata_port_info(ap, "EH complete\n"); 755 756 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 757 758 /* tell wait_eh that we're done */ 759 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 760 wake_up_all(&ap->eh_wait_q); 761 762 spin_unlock_irqrestore(ap->lock, flags); 763 } 764 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler); 765 766 /** 767 * ata_port_wait_eh - Wait for the currently pending EH to complete 768 * @ap: Port to wait EH for 769 * 770 * Wait until the currently pending EH is complete. 771 * 772 * LOCKING: 773 * Kernel thread context (may sleep). 774 */ 775 void ata_port_wait_eh(struct ata_port *ap) 776 { 777 unsigned long flags; 778 DEFINE_WAIT(wait); 779 780 retry: 781 spin_lock_irqsave(ap->lock, flags); 782 783 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 784 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 785 spin_unlock_irqrestore(ap->lock, flags); 786 schedule(); 787 spin_lock_irqsave(ap->lock, flags); 788 } 789 finish_wait(&ap->eh_wait_q, &wait); 790 791 spin_unlock_irqrestore(ap->lock, flags); 792 793 /* make sure SCSI EH is complete */ 794 if (scsi_host_in_recovery(ap->scsi_host)) { 795 ata_msleep(ap, 10); 796 goto retry; 797 } 798 } 799 EXPORT_SYMBOL_GPL(ata_port_wait_eh); 800 801 static int ata_eh_nr_in_flight(struct ata_port *ap) 802 { 803 struct ata_queued_cmd *qc; 804 unsigned int tag; 805 int nr = 0; 806 807 /* count only non-internal commands */ 808 ata_qc_for_each(ap, qc, tag) { 809 if (qc) 810 nr++; 811 } 812 813 return nr; 814 } 815 816 void ata_eh_fastdrain_timerfn(struct timer_list *t) 817 { 818 struct ata_port *ap = from_timer(ap, t, fastdrain_timer); 819 unsigned long flags; 820 int cnt; 821 822 spin_lock_irqsave(ap->lock, flags); 823 824 cnt = ata_eh_nr_in_flight(ap); 825 826 /* are we done? */ 827 if (!cnt) 828 goto out_unlock; 829 830 if (cnt == ap->fastdrain_cnt) { 831 struct ata_queued_cmd *qc; 832 unsigned int tag; 833 834 /* No progress during the last interval, tag all 835 * in-flight qcs as timed out and freeze the port. 836 */ 837 ata_qc_for_each(ap, qc, tag) { 838 if (qc) 839 qc->err_mask |= AC_ERR_TIMEOUT; 840 } 841 842 ata_port_freeze(ap); 843 } else { 844 /* some qcs have finished, give it another chance */ 845 ap->fastdrain_cnt = cnt; 846 ap->fastdrain_timer.expires = 847 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 848 add_timer(&ap->fastdrain_timer); 849 } 850 851 out_unlock: 852 spin_unlock_irqrestore(ap->lock, flags); 853 } 854 855 /** 856 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 857 * @ap: target ATA port 858 * @fastdrain: activate fast drain 859 * 860 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 861 * is non-zero and EH wasn't pending before. Fast drain ensures 862 * that EH kicks in in timely manner. 863 * 864 * LOCKING: 865 * spin_lock_irqsave(host lock) 866 */ 867 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 868 { 869 int cnt; 870 871 /* already scheduled? */ 872 if (ap->pflags & ATA_PFLAG_EH_PENDING) 873 return; 874 875 ap->pflags |= ATA_PFLAG_EH_PENDING; 876 877 if (!fastdrain) 878 return; 879 880 /* do we have in-flight qcs? */ 881 cnt = ata_eh_nr_in_flight(ap); 882 if (!cnt) 883 return; 884 885 /* activate fast drain */ 886 ap->fastdrain_cnt = cnt; 887 ap->fastdrain_timer.expires = 888 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 889 add_timer(&ap->fastdrain_timer); 890 } 891 892 /** 893 * ata_qc_schedule_eh - schedule qc for error handling 894 * @qc: command to schedule error handling for 895 * 896 * Schedule error handling for @qc. EH will kick in as soon as 897 * other commands are drained. 898 * 899 * LOCKING: 900 * spin_lock_irqsave(host lock) 901 */ 902 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 903 { 904 struct ata_port *ap = qc->ap; 905 906 WARN_ON(!ap->ops->error_handler); 907 908 qc->flags |= ATA_QCFLAG_FAILED; 909 ata_eh_set_pending(ap, 1); 910 911 /* The following will fail if timeout has already expired. 912 * ata_scsi_error() takes care of such scmds on EH entry. 913 * Note that ATA_QCFLAG_FAILED is unconditionally set after 914 * this function completes. 915 */ 916 blk_abort_request(qc->scsicmd->request); 917 } 918 919 /** 920 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine 921 * @ap: ATA port to schedule EH for 922 * 923 * LOCKING: inherited from ata_port_schedule_eh 924 * spin_lock_irqsave(host lock) 925 */ 926 void ata_std_sched_eh(struct ata_port *ap) 927 { 928 WARN_ON(!ap->ops->error_handler); 929 930 if (ap->pflags & ATA_PFLAG_INITIALIZING) 931 return; 932 933 ata_eh_set_pending(ap, 1); 934 scsi_schedule_eh(ap->scsi_host); 935 936 DPRINTK("port EH scheduled\n"); 937 } 938 EXPORT_SYMBOL_GPL(ata_std_sched_eh); 939 940 /** 941 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine 942 * @ap: ATA port to end EH for 943 * 944 * In the libata object model there is a 1:1 mapping of ata_port to 945 * shost, so host fields can be directly manipulated under ap->lock, in 946 * the libsas case we need to hold a lock at the ha->level to coordinate 947 * these events. 948 * 949 * LOCKING: 950 * spin_lock_irqsave(host lock) 951 */ 952 void ata_std_end_eh(struct ata_port *ap) 953 { 954 struct Scsi_Host *host = ap->scsi_host; 955 956 host->host_eh_scheduled = 0; 957 } 958 EXPORT_SYMBOL(ata_std_end_eh); 959 960 961 /** 962 * ata_port_schedule_eh - schedule error handling without a qc 963 * @ap: ATA port to schedule EH for 964 * 965 * Schedule error handling for @ap. EH will kick in as soon as 966 * all commands are drained. 967 * 968 * LOCKING: 969 * spin_lock_irqsave(host lock) 970 */ 971 void ata_port_schedule_eh(struct ata_port *ap) 972 { 973 /* see: ata_std_sched_eh, unless you know better */ 974 ap->ops->sched_eh(ap); 975 } 976 977 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 978 { 979 struct ata_queued_cmd *qc; 980 int tag, nr_aborted = 0; 981 982 WARN_ON(!ap->ops->error_handler); 983 984 /* we're gonna abort all commands, no need for fast drain */ 985 ata_eh_set_pending(ap, 0); 986 987 /* include internal tag in iteration */ 988 ata_qc_for_each_with_internal(ap, qc, tag) { 989 if (qc && (!link || qc->dev->link == link)) { 990 qc->flags |= ATA_QCFLAG_FAILED; 991 ata_qc_complete(qc); 992 nr_aborted++; 993 } 994 } 995 996 if (!nr_aborted) 997 ata_port_schedule_eh(ap); 998 999 return nr_aborted; 1000 } 1001 1002 /** 1003 * ata_link_abort - abort all qc's on the link 1004 * @link: ATA link to abort qc's for 1005 * 1006 * Abort all active qc's active on @link and schedule EH. 1007 * 1008 * LOCKING: 1009 * spin_lock_irqsave(host lock) 1010 * 1011 * RETURNS: 1012 * Number of aborted qc's. 1013 */ 1014 int ata_link_abort(struct ata_link *link) 1015 { 1016 return ata_do_link_abort(link->ap, link); 1017 } 1018 1019 /** 1020 * ata_port_abort - abort all qc's on the port 1021 * @ap: ATA port to abort qc's for 1022 * 1023 * Abort all active qc's of @ap and schedule EH. 1024 * 1025 * LOCKING: 1026 * spin_lock_irqsave(host_set lock) 1027 * 1028 * RETURNS: 1029 * Number of aborted qc's. 1030 */ 1031 int ata_port_abort(struct ata_port *ap) 1032 { 1033 return ata_do_link_abort(ap, NULL); 1034 } 1035 1036 /** 1037 * __ata_port_freeze - freeze port 1038 * @ap: ATA port to freeze 1039 * 1040 * This function is called when HSM violation or some other 1041 * condition disrupts normal operation of the port. Frozen port 1042 * is not allowed to perform any operation until the port is 1043 * thawed, which usually follows a successful reset. 1044 * 1045 * ap->ops->freeze() callback can be used for freezing the port 1046 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 1047 * port cannot be frozen hardware-wise, the interrupt handler 1048 * must ack and clear interrupts unconditionally while the port 1049 * is frozen. 1050 * 1051 * LOCKING: 1052 * spin_lock_irqsave(host lock) 1053 */ 1054 static void __ata_port_freeze(struct ata_port *ap) 1055 { 1056 WARN_ON(!ap->ops->error_handler); 1057 1058 if (ap->ops->freeze) 1059 ap->ops->freeze(ap); 1060 1061 ap->pflags |= ATA_PFLAG_FROZEN; 1062 1063 DPRINTK("ata%u port frozen\n", ap->print_id); 1064 } 1065 1066 /** 1067 * ata_port_freeze - abort & freeze port 1068 * @ap: ATA port to freeze 1069 * 1070 * Abort and freeze @ap. The freeze operation must be called 1071 * first, because some hardware requires special operations 1072 * before the taskfile registers are accessible. 1073 * 1074 * LOCKING: 1075 * spin_lock_irqsave(host lock) 1076 * 1077 * RETURNS: 1078 * Number of aborted commands. 1079 */ 1080 int ata_port_freeze(struct ata_port *ap) 1081 { 1082 int nr_aborted; 1083 1084 WARN_ON(!ap->ops->error_handler); 1085 1086 __ata_port_freeze(ap); 1087 nr_aborted = ata_port_abort(ap); 1088 1089 return nr_aborted; 1090 } 1091 1092 /** 1093 * sata_async_notification - SATA async notification handler 1094 * @ap: ATA port where async notification is received 1095 * 1096 * Handler to be called when async notification via SDB FIS is 1097 * received. This function schedules EH if necessary. 1098 * 1099 * LOCKING: 1100 * spin_lock_irqsave(host lock) 1101 * 1102 * RETURNS: 1103 * 1 if EH is scheduled, 0 otherwise. 1104 */ 1105 int sata_async_notification(struct ata_port *ap) 1106 { 1107 u32 sntf; 1108 int rc; 1109 1110 if (!(ap->flags & ATA_FLAG_AN)) 1111 return 0; 1112 1113 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 1114 if (rc == 0) 1115 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 1116 1117 if (!sata_pmp_attached(ap) || rc) { 1118 /* PMP is not attached or SNTF is not available */ 1119 if (!sata_pmp_attached(ap)) { 1120 /* PMP is not attached. Check whether ATAPI 1121 * AN is configured. If so, notify media 1122 * change. 1123 */ 1124 struct ata_device *dev = ap->link.device; 1125 1126 if ((dev->class == ATA_DEV_ATAPI) && 1127 (dev->flags & ATA_DFLAG_AN)) 1128 ata_scsi_media_change_notify(dev); 1129 return 0; 1130 } else { 1131 /* PMP is attached but SNTF is not available. 1132 * ATAPI async media change notification is 1133 * not used. The PMP must be reporting PHY 1134 * status change, schedule EH. 1135 */ 1136 ata_port_schedule_eh(ap); 1137 return 1; 1138 } 1139 } else { 1140 /* PMP is attached and SNTF is available */ 1141 struct ata_link *link; 1142 1143 /* check and notify ATAPI AN */ 1144 ata_for_each_link(link, ap, EDGE) { 1145 if (!(sntf & (1 << link->pmp))) 1146 continue; 1147 1148 if ((link->device->class == ATA_DEV_ATAPI) && 1149 (link->device->flags & ATA_DFLAG_AN)) 1150 ata_scsi_media_change_notify(link->device); 1151 } 1152 1153 /* If PMP is reporting that PHY status of some 1154 * downstream ports has changed, schedule EH. 1155 */ 1156 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1157 ata_port_schedule_eh(ap); 1158 return 1; 1159 } 1160 1161 return 0; 1162 } 1163 } 1164 1165 /** 1166 * ata_eh_freeze_port - EH helper to freeze port 1167 * @ap: ATA port to freeze 1168 * 1169 * Freeze @ap. 1170 * 1171 * LOCKING: 1172 * None. 1173 */ 1174 void ata_eh_freeze_port(struct ata_port *ap) 1175 { 1176 unsigned long flags; 1177 1178 if (!ap->ops->error_handler) 1179 return; 1180 1181 spin_lock_irqsave(ap->lock, flags); 1182 __ata_port_freeze(ap); 1183 spin_unlock_irqrestore(ap->lock, flags); 1184 } 1185 1186 /** 1187 * ata_port_thaw_port - EH helper to thaw port 1188 * @ap: ATA port to thaw 1189 * 1190 * Thaw frozen port @ap. 1191 * 1192 * LOCKING: 1193 * None. 1194 */ 1195 void ata_eh_thaw_port(struct ata_port *ap) 1196 { 1197 unsigned long flags; 1198 1199 if (!ap->ops->error_handler) 1200 return; 1201 1202 spin_lock_irqsave(ap->lock, flags); 1203 1204 ap->pflags &= ~ATA_PFLAG_FROZEN; 1205 1206 if (ap->ops->thaw) 1207 ap->ops->thaw(ap); 1208 1209 spin_unlock_irqrestore(ap->lock, flags); 1210 1211 DPRINTK("ata%u port thawed\n", ap->print_id); 1212 } 1213 1214 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1215 { 1216 /* nada */ 1217 } 1218 1219 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1220 { 1221 struct ata_port *ap = qc->ap; 1222 struct scsi_cmnd *scmd = qc->scsicmd; 1223 unsigned long flags; 1224 1225 spin_lock_irqsave(ap->lock, flags); 1226 qc->scsidone = ata_eh_scsidone; 1227 __ata_qc_complete(qc); 1228 WARN_ON(ata_tag_valid(qc->tag)); 1229 spin_unlock_irqrestore(ap->lock, flags); 1230 1231 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1232 } 1233 1234 /** 1235 * ata_eh_qc_complete - Complete an active ATA command from EH 1236 * @qc: Command to complete 1237 * 1238 * Indicate to the mid and upper layers that an ATA command has 1239 * completed. To be used from EH. 1240 */ 1241 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1242 { 1243 struct scsi_cmnd *scmd = qc->scsicmd; 1244 scmd->retries = scmd->allowed; 1245 __ata_eh_qc_complete(qc); 1246 } 1247 1248 /** 1249 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1250 * @qc: Command to retry 1251 * 1252 * Indicate to the mid and upper layers that an ATA command 1253 * should be retried. To be used from EH. 1254 * 1255 * SCSI midlayer limits the number of retries to scmd->allowed. 1256 * scmd->allowed is incremented for commands which get retried 1257 * due to unrelated failures (qc->err_mask is zero). 1258 */ 1259 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1260 { 1261 struct scsi_cmnd *scmd = qc->scsicmd; 1262 if (!qc->err_mask) 1263 scmd->allowed++; 1264 __ata_eh_qc_complete(qc); 1265 } 1266 1267 /** 1268 * ata_dev_disable - disable ATA device 1269 * @dev: ATA device to disable 1270 * 1271 * Disable @dev. 1272 * 1273 * Locking: 1274 * EH context. 1275 */ 1276 void ata_dev_disable(struct ata_device *dev) 1277 { 1278 if (!ata_dev_enabled(dev)) 1279 return; 1280 1281 if (ata_msg_drv(dev->link->ap)) 1282 ata_dev_warn(dev, "disabled\n"); 1283 ata_acpi_on_disable(dev); 1284 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1285 dev->class++; 1286 1287 /* From now till the next successful probe, ering is used to 1288 * track probe failures. Clear accumulated device error info. 1289 */ 1290 ata_ering_clear(&dev->ering); 1291 } 1292 1293 /** 1294 * ata_eh_detach_dev - detach ATA device 1295 * @dev: ATA device to detach 1296 * 1297 * Detach @dev. 1298 * 1299 * LOCKING: 1300 * None. 1301 */ 1302 void ata_eh_detach_dev(struct ata_device *dev) 1303 { 1304 struct ata_link *link = dev->link; 1305 struct ata_port *ap = link->ap; 1306 struct ata_eh_context *ehc = &link->eh_context; 1307 unsigned long flags; 1308 1309 ata_dev_disable(dev); 1310 1311 spin_lock_irqsave(ap->lock, flags); 1312 1313 dev->flags &= ~ATA_DFLAG_DETACH; 1314 1315 if (ata_scsi_offline_dev(dev)) { 1316 dev->flags |= ATA_DFLAG_DETACHED; 1317 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1318 } 1319 1320 /* clear per-dev EH info */ 1321 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1322 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1323 ehc->saved_xfer_mode[dev->devno] = 0; 1324 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1325 1326 spin_unlock_irqrestore(ap->lock, flags); 1327 } 1328 1329 /** 1330 * ata_eh_about_to_do - about to perform eh_action 1331 * @link: target ATA link 1332 * @dev: target ATA dev for per-dev action (can be NULL) 1333 * @action: action about to be performed 1334 * 1335 * Called just before performing EH actions to clear related bits 1336 * in @link->eh_info such that eh actions are not unnecessarily 1337 * repeated. 1338 * 1339 * LOCKING: 1340 * None. 1341 */ 1342 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1343 unsigned int action) 1344 { 1345 struct ata_port *ap = link->ap; 1346 struct ata_eh_info *ehi = &link->eh_info; 1347 struct ata_eh_context *ehc = &link->eh_context; 1348 unsigned long flags; 1349 1350 spin_lock_irqsave(ap->lock, flags); 1351 1352 ata_eh_clear_action(link, dev, ehi, action); 1353 1354 /* About to take EH action, set RECOVERED. Ignore actions on 1355 * slave links as master will do them again. 1356 */ 1357 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1358 ap->pflags |= ATA_PFLAG_RECOVERED; 1359 1360 spin_unlock_irqrestore(ap->lock, flags); 1361 } 1362 1363 /** 1364 * ata_eh_done - EH action complete 1365 * @link: ATA link for which EH actions are complete 1366 * @dev: target ATA dev for per-dev action (can be NULL) 1367 * @action: action just completed 1368 * 1369 * Called right after performing EH actions to clear related bits 1370 * in @link->eh_context. 1371 * 1372 * LOCKING: 1373 * None. 1374 */ 1375 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1376 unsigned int action) 1377 { 1378 struct ata_eh_context *ehc = &link->eh_context; 1379 1380 ata_eh_clear_action(link, dev, &ehc->i, action); 1381 } 1382 1383 /** 1384 * ata_err_string - convert err_mask to descriptive string 1385 * @err_mask: error mask to convert to string 1386 * 1387 * Convert @err_mask to descriptive string. Errors are 1388 * prioritized according to severity and only the most severe 1389 * error is reported. 1390 * 1391 * LOCKING: 1392 * None. 1393 * 1394 * RETURNS: 1395 * Descriptive string for @err_mask 1396 */ 1397 static const char *ata_err_string(unsigned int err_mask) 1398 { 1399 if (err_mask & AC_ERR_HOST_BUS) 1400 return "host bus error"; 1401 if (err_mask & AC_ERR_ATA_BUS) 1402 return "ATA bus error"; 1403 if (err_mask & AC_ERR_TIMEOUT) 1404 return "timeout"; 1405 if (err_mask & AC_ERR_HSM) 1406 return "HSM violation"; 1407 if (err_mask & AC_ERR_SYSTEM) 1408 return "internal error"; 1409 if (err_mask & AC_ERR_MEDIA) 1410 return "media error"; 1411 if (err_mask & AC_ERR_INVALID) 1412 return "invalid argument"; 1413 if (err_mask & AC_ERR_DEV) 1414 return "device error"; 1415 if (err_mask & AC_ERR_NCQ) 1416 return "NCQ error"; 1417 if (err_mask & AC_ERR_NODEV_HINT) 1418 return "Polling detection error"; 1419 return "unknown error"; 1420 } 1421 1422 /** 1423 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1424 * @dev: Device to read log page 10h from 1425 * @tag: Resulting tag of the failed command 1426 * @tf: Resulting taskfile registers of the failed command 1427 * 1428 * Read log page 10h to obtain NCQ error details and clear error 1429 * condition. 1430 * 1431 * LOCKING: 1432 * Kernel thread context (may sleep). 1433 * 1434 * RETURNS: 1435 * 0 on success, -errno otherwise. 1436 */ 1437 static int ata_eh_read_log_10h(struct ata_device *dev, 1438 int *tag, struct ata_taskfile *tf) 1439 { 1440 u8 *buf = dev->link->ap->sector_buf; 1441 unsigned int err_mask; 1442 u8 csum; 1443 int i; 1444 1445 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1); 1446 if (err_mask) 1447 return -EIO; 1448 1449 csum = 0; 1450 for (i = 0; i < ATA_SECT_SIZE; i++) 1451 csum += buf[i]; 1452 if (csum) 1453 ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n", 1454 csum); 1455 1456 if (buf[0] & 0x80) 1457 return -ENOENT; 1458 1459 *tag = buf[0] & 0x1f; 1460 1461 tf->command = buf[2]; 1462 tf->feature = buf[3]; 1463 tf->lbal = buf[4]; 1464 tf->lbam = buf[5]; 1465 tf->lbah = buf[6]; 1466 tf->device = buf[7]; 1467 tf->hob_lbal = buf[8]; 1468 tf->hob_lbam = buf[9]; 1469 tf->hob_lbah = buf[10]; 1470 tf->nsect = buf[12]; 1471 tf->hob_nsect = buf[13]; 1472 if (dev->class == ATA_DEV_ZAC && ata_id_has_ncq_autosense(dev->id)) 1473 tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; 1474 1475 return 0; 1476 } 1477 1478 /** 1479 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1480 * @dev: target ATAPI device 1481 * @r_sense_key: out parameter for sense_key 1482 * 1483 * Perform ATAPI TEST_UNIT_READY. 1484 * 1485 * LOCKING: 1486 * EH context (may sleep). 1487 * 1488 * RETURNS: 1489 * 0 on success, AC_ERR_* mask on failure. 1490 */ 1491 unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1492 { 1493 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1494 struct ata_taskfile tf; 1495 unsigned int err_mask; 1496 1497 ata_tf_init(dev, &tf); 1498 1499 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1500 tf.command = ATA_CMD_PACKET; 1501 tf.protocol = ATAPI_PROT_NODATA; 1502 1503 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1504 if (err_mask == AC_ERR_DEV) 1505 *r_sense_key = tf.feature >> 4; 1506 return err_mask; 1507 } 1508 1509 /** 1510 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT 1511 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to 1512 * @cmd: scsi command for which the sense code should be set 1513 * 1514 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK 1515 * SENSE. This function is an EH helper. 1516 * 1517 * LOCKING: 1518 * Kernel thread context (may sleep). 1519 */ 1520 static void ata_eh_request_sense(struct ata_queued_cmd *qc, 1521 struct scsi_cmnd *cmd) 1522 { 1523 struct ata_device *dev = qc->dev; 1524 struct ata_taskfile tf; 1525 unsigned int err_mask; 1526 1527 if (qc->ap->pflags & ATA_PFLAG_FROZEN) { 1528 ata_dev_warn(dev, "sense data available but port frozen\n"); 1529 return; 1530 } 1531 1532 if (!cmd || qc->flags & ATA_QCFLAG_SENSE_VALID) 1533 return; 1534 1535 if (!ata_id_sense_reporting_enabled(dev->id)) { 1536 ata_dev_warn(qc->dev, "sense data reporting disabled\n"); 1537 return; 1538 } 1539 1540 DPRINTK("ATA request sense\n"); 1541 1542 ata_tf_init(dev, &tf); 1543 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1544 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1545 tf.command = ATA_CMD_REQ_SENSE_DATA; 1546 tf.protocol = ATA_PROT_NODATA; 1547 1548 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 1549 /* Ignore err_mask; ATA_ERR might be set */ 1550 if (tf.command & ATA_SENSE) { 1551 ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal); 1552 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1553 } else { 1554 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", 1555 tf.command, err_mask); 1556 } 1557 } 1558 1559 /** 1560 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1561 * @dev: device to perform REQUEST_SENSE to 1562 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1563 * @dfl_sense_key: default sense key to use 1564 * 1565 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1566 * SENSE. This function is EH helper. 1567 * 1568 * LOCKING: 1569 * Kernel thread context (may sleep). 1570 * 1571 * RETURNS: 1572 * 0 on success, AC_ERR_* mask on failure 1573 */ 1574 unsigned int atapi_eh_request_sense(struct ata_device *dev, 1575 u8 *sense_buf, u8 dfl_sense_key) 1576 { 1577 u8 cdb[ATAPI_CDB_LEN] = 1578 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1579 struct ata_port *ap = dev->link->ap; 1580 struct ata_taskfile tf; 1581 1582 DPRINTK("ATAPI request sense\n"); 1583 1584 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1585 1586 /* initialize sense_buf with the error register, 1587 * for the case where they are -not- overwritten 1588 */ 1589 sense_buf[0] = 0x70; 1590 sense_buf[2] = dfl_sense_key; 1591 1592 /* some devices time out if garbage left in tf */ 1593 ata_tf_init(dev, &tf); 1594 1595 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1596 tf.command = ATA_CMD_PACKET; 1597 1598 /* is it pointless to prefer PIO for "safety reasons"? */ 1599 if (ap->flags & ATA_FLAG_PIO_DMA) { 1600 tf.protocol = ATAPI_PROT_DMA; 1601 tf.feature |= ATAPI_PKT_DMA; 1602 } else { 1603 tf.protocol = ATAPI_PROT_PIO; 1604 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1605 tf.lbah = 0; 1606 } 1607 1608 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1609 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1610 } 1611 1612 /** 1613 * ata_eh_analyze_serror - analyze SError for a failed port 1614 * @link: ATA link to analyze SError for 1615 * 1616 * Analyze SError if available and further determine cause of 1617 * failure. 1618 * 1619 * LOCKING: 1620 * None. 1621 */ 1622 static void ata_eh_analyze_serror(struct ata_link *link) 1623 { 1624 struct ata_eh_context *ehc = &link->eh_context; 1625 u32 serror = ehc->i.serror; 1626 unsigned int err_mask = 0, action = 0; 1627 u32 hotplug_mask; 1628 1629 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1630 err_mask |= AC_ERR_ATA_BUS; 1631 action |= ATA_EH_RESET; 1632 } 1633 if (serror & SERR_PROTOCOL) { 1634 err_mask |= AC_ERR_HSM; 1635 action |= ATA_EH_RESET; 1636 } 1637 if (serror & SERR_INTERNAL) { 1638 err_mask |= AC_ERR_SYSTEM; 1639 action |= ATA_EH_RESET; 1640 } 1641 1642 /* Determine whether a hotplug event has occurred. Both 1643 * SError.N/X are considered hotplug events for enabled or 1644 * host links. For disabled PMP links, only N bit is 1645 * considered as X bit is left at 1 for link plugging. 1646 */ 1647 if (link->lpm_policy > ATA_LPM_MAX_POWER) 1648 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ 1649 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1650 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1651 else 1652 hotplug_mask = SERR_PHYRDY_CHG; 1653 1654 if (serror & hotplug_mask) 1655 ata_ehi_hotplugged(&ehc->i); 1656 1657 ehc->i.err_mask |= err_mask; 1658 ehc->i.action |= action; 1659 } 1660 1661 /** 1662 * ata_eh_analyze_ncq_error - analyze NCQ error 1663 * @link: ATA link to analyze NCQ error for 1664 * 1665 * Read log page 10h, determine the offending qc and acquire 1666 * error status TF. For NCQ device errors, all LLDDs have to do 1667 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1668 * care of the rest. 1669 * 1670 * LOCKING: 1671 * Kernel thread context (may sleep). 1672 */ 1673 void ata_eh_analyze_ncq_error(struct ata_link *link) 1674 { 1675 struct ata_port *ap = link->ap; 1676 struct ata_eh_context *ehc = &link->eh_context; 1677 struct ata_device *dev = link->device; 1678 struct ata_queued_cmd *qc; 1679 struct ata_taskfile tf; 1680 int tag, rc; 1681 1682 /* if frozen, we can't do much */ 1683 if (ap->pflags & ATA_PFLAG_FROZEN) 1684 return; 1685 1686 /* is it NCQ device error? */ 1687 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1688 return; 1689 1690 /* has LLDD analyzed already? */ 1691 ata_qc_for_each_raw(ap, qc, tag) { 1692 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1693 continue; 1694 1695 if (qc->err_mask) 1696 return; 1697 } 1698 1699 /* okay, this error is ours */ 1700 memset(&tf, 0, sizeof(tf)); 1701 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1702 if (rc) { 1703 ata_link_err(link, "failed to read log page 10h (errno=%d)\n", 1704 rc); 1705 return; 1706 } 1707 1708 if (!(link->sactive & (1 << tag))) { 1709 ata_link_err(link, "log page 10h reported inactive tag %d\n", 1710 tag); 1711 return; 1712 } 1713 1714 /* we've got the perpetrator, condemn it */ 1715 qc = __ata_qc_from_tag(ap, tag); 1716 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1717 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1718 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1719 if (dev->class == ATA_DEV_ZAC && 1720 ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary)) { 1721 char sense_key, asc, ascq; 1722 1723 sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; 1724 asc = (qc->result_tf.auxiliary >> 8) & 0xff; 1725 ascq = qc->result_tf.auxiliary & 0xff; 1726 ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq); 1727 ata_scsi_set_sense_information(dev, qc->scsicmd, 1728 &qc->result_tf); 1729 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1730 } 1731 1732 ehc->i.err_mask &= ~AC_ERR_DEV; 1733 } 1734 1735 /** 1736 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1737 * @qc: qc to analyze 1738 * @tf: Taskfile registers to analyze 1739 * 1740 * Analyze taskfile of @qc and further determine cause of 1741 * failure. This function also requests ATAPI sense data if 1742 * available. 1743 * 1744 * LOCKING: 1745 * Kernel thread context (may sleep). 1746 * 1747 * RETURNS: 1748 * Determined recovery action 1749 */ 1750 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1751 const struct ata_taskfile *tf) 1752 { 1753 unsigned int tmp, action = 0; 1754 u8 stat = tf->command, err = tf->feature; 1755 1756 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1757 qc->err_mask |= AC_ERR_HSM; 1758 return ATA_EH_RESET; 1759 } 1760 1761 if (stat & (ATA_ERR | ATA_DF)) { 1762 qc->err_mask |= AC_ERR_DEV; 1763 /* 1764 * Sense data reporting does not work if the 1765 * device fault bit is set. 1766 */ 1767 if (stat & ATA_DF) 1768 stat &= ~ATA_SENSE; 1769 } else { 1770 return 0; 1771 } 1772 1773 switch (qc->dev->class) { 1774 case ATA_DEV_ZAC: 1775 if (stat & ATA_SENSE) 1776 ata_eh_request_sense(qc, qc->scsicmd); 1777 /* fall through */ 1778 case ATA_DEV_ATA: 1779 if (err & ATA_ICRC) 1780 qc->err_mask |= AC_ERR_ATA_BUS; 1781 if (err & (ATA_UNC | ATA_AMNF)) 1782 qc->err_mask |= AC_ERR_MEDIA; 1783 if (err & ATA_IDNF) 1784 qc->err_mask |= AC_ERR_INVALID; 1785 break; 1786 1787 case ATA_DEV_ATAPI: 1788 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1789 tmp = atapi_eh_request_sense(qc->dev, 1790 qc->scsicmd->sense_buffer, 1791 qc->result_tf.feature >> 4); 1792 if (!tmp) 1793 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1794 else 1795 qc->err_mask |= tmp; 1796 } 1797 } 1798 1799 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1800 int ret = scsi_check_sense(qc->scsicmd); 1801 /* 1802 * SUCCESS here means that the sense code could be 1803 * evaluated and should be passed to the upper layers 1804 * for correct evaluation. 1805 * FAILED means the sense code could not be interpreted 1806 * and the device would need to be reset. 1807 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the 1808 * command would need to be retried. 1809 */ 1810 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) { 1811 qc->flags |= ATA_QCFLAG_RETRY; 1812 qc->err_mask |= AC_ERR_OTHER; 1813 } else if (ret != SUCCESS) { 1814 qc->err_mask |= AC_ERR_HSM; 1815 } 1816 } 1817 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1818 action |= ATA_EH_RESET; 1819 1820 return action; 1821 } 1822 1823 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1824 int *xfer_ok) 1825 { 1826 int base = 0; 1827 1828 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1829 *xfer_ok = 1; 1830 1831 if (!*xfer_ok) 1832 base = ATA_ECAT_DUBIOUS_NONE; 1833 1834 if (err_mask & AC_ERR_ATA_BUS) 1835 return base + ATA_ECAT_ATA_BUS; 1836 1837 if (err_mask & AC_ERR_TIMEOUT) 1838 return base + ATA_ECAT_TOUT_HSM; 1839 1840 if (eflags & ATA_EFLAG_IS_IO) { 1841 if (err_mask & AC_ERR_HSM) 1842 return base + ATA_ECAT_TOUT_HSM; 1843 if ((err_mask & 1844 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1845 return base + ATA_ECAT_UNK_DEV; 1846 } 1847 1848 return 0; 1849 } 1850 1851 struct speed_down_verdict_arg { 1852 u64 since; 1853 int xfer_ok; 1854 int nr_errors[ATA_ECAT_NR]; 1855 }; 1856 1857 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1858 { 1859 struct speed_down_verdict_arg *arg = void_arg; 1860 int cat; 1861 1862 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) 1863 return -1; 1864 1865 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1866 &arg->xfer_ok); 1867 arg->nr_errors[cat]++; 1868 1869 return 0; 1870 } 1871 1872 /** 1873 * ata_eh_speed_down_verdict - Determine speed down verdict 1874 * @dev: Device of interest 1875 * 1876 * This function examines error ring of @dev and determines 1877 * whether NCQ needs to be turned off, transfer speed should be 1878 * stepped down, or falling back to PIO is necessary. 1879 * 1880 * ECAT_ATA_BUS : ATA_BUS error for any command 1881 * 1882 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1883 * IO commands 1884 * 1885 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1886 * 1887 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1888 * data transfer hasn't been verified. 1889 * 1890 * Verdicts are 1891 * 1892 * NCQ_OFF : Turn off NCQ. 1893 * 1894 * SPEED_DOWN : Speed down transfer speed but don't fall back 1895 * to PIO. 1896 * 1897 * FALLBACK_TO_PIO : Fall back to PIO. 1898 * 1899 * Even if multiple verdicts are returned, only one action is 1900 * taken per error. An action triggered by non-DUBIOUS errors 1901 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1902 * This is to expedite speed down decisions right after device is 1903 * initially configured. 1904 * 1905 * The following are speed down rules. #1 and #2 deal with 1906 * DUBIOUS errors. 1907 * 1908 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1909 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1910 * 1911 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1912 * occurred during last 5 mins, NCQ_OFF. 1913 * 1914 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1915 * occurred during last 5 mins, FALLBACK_TO_PIO 1916 * 1917 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1918 * during last 10 mins, NCQ_OFF. 1919 * 1920 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1921 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1922 * 1923 * LOCKING: 1924 * Inherited from caller. 1925 * 1926 * RETURNS: 1927 * OR of ATA_EH_SPDN_* flags. 1928 */ 1929 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1930 { 1931 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1932 u64 j64 = get_jiffies_64(); 1933 struct speed_down_verdict_arg arg; 1934 unsigned int verdict = 0; 1935 1936 /* scan past 5 mins of error history */ 1937 memset(&arg, 0, sizeof(arg)); 1938 arg.since = j64 - min(j64, j5mins); 1939 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1940 1941 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1942 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1943 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1944 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1945 1946 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1947 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1948 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1949 1950 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1951 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1952 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1953 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1954 1955 /* scan past 10 mins of error history */ 1956 memset(&arg, 0, sizeof(arg)); 1957 arg.since = j64 - min(j64, j10mins); 1958 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1959 1960 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1961 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1962 verdict |= ATA_EH_SPDN_NCQ_OFF; 1963 1964 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1965 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1966 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1967 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1968 1969 return verdict; 1970 } 1971 1972 /** 1973 * ata_eh_speed_down - record error and speed down if necessary 1974 * @dev: Failed device 1975 * @eflags: mask of ATA_EFLAG_* flags 1976 * @err_mask: err_mask of the error 1977 * 1978 * Record error and examine error history to determine whether 1979 * adjusting transmission speed is necessary. It also sets 1980 * transmission limits appropriately if such adjustment is 1981 * necessary. 1982 * 1983 * LOCKING: 1984 * Kernel thread context (may sleep). 1985 * 1986 * RETURNS: 1987 * Determined recovery action. 1988 */ 1989 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1990 unsigned int eflags, unsigned int err_mask) 1991 { 1992 struct ata_link *link = ata_dev_phys_link(dev); 1993 int xfer_ok = 0; 1994 unsigned int verdict; 1995 unsigned int action = 0; 1996 1997 /* don't bother if Cat-0 error */ 1998 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1999 return 0; 2000 2001 /* record error and determine whether speed down is necessary */ 2002 ata_ering_record(&dev->ering, eflags, err_mask); 2003 verdict = ata_eh_speed_down_verdict(dev); 2004 2005 /* turn off NCQ? */ 2006 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 2007 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 2008 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 2009 dev->flags |= ATA_DFLAG_NCQ_OFF; 2010 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n"); 2011 goto done; 2012 } 2013 2014 /* speed down? */ 2015 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 2016 /* speed down SATA link speed if possible */ 2017 if (sata_down_spd_limit(link, 0) == 0) { 2018 action |= ATA_EH_RESET; 2019 goto done; 2020 } 2021 2022 /* lower transfer mode */ 2023 if (dev->spdn_cnt < 2) { 2024 static const int dma_dnxfer_sel[] = 2025 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 2026 static const int pio_dnxfer_sel[] = 2027 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 2028 int sel; 2029 2030 if (dev->xfer_shift != ATA_SHIFT_PIO) 2031 sel = dma_dnxfer_sel[dev->spdn_cnt]; 2032 else 2033 sel = pio_dnxfer_sel[dev->spdn_cnt]; 2034 2035 dev->spdn_cnt++; 2036 2037 if (ata_down_xfermask_limit(dev, sel) == 0) { 2038 action |= ATA_EH_RESET; 2039 goto done; 2040 } 2041 } 2042 } 2043 2044 /* Fall back to PIO? Slowing down to PIO is meaningless for 2045 * SATA ATA devices. Consider it only for PATA and SATAPI. 2046 */ 2047 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 2048 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 2049 (dev->xfer_shift != ATA_SHIFT_PIO)) { 2050 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 2051 dev->spdn_cnt = 0; 2052 action |= ATA_EH_RESET; 2053 goto done; 2054 } 2055 } 2056 2057 return 0; 2058 done: 2059 /* device has been slowed down, blow error history */ 2060 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 2061 ata_ering_clear(&dev->ering); 2062 return action; 2063 } 2064 2065 /** 2066 * ata_eh_worth_retry - analyze error and decide whether to retry 2067 * @qc: qc to possibly retry 2068 * 2069 * Look at the cause of the error and decide if a retry 2070 * might be useful or not. We don't want to retry media errors 2071 * because the drive itself has probably already taken 10-30 seconds 2072 * doing its own internal retries before reporting the failure. 2073 */ 2074 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) 2075 { 2076 if (qc->err_mask & AC_ERR_MEDIA) 2077 return 0; /* don't retry media errors */ 2078 if (qc->flags & ATA_QCFLAG_IO) 2079 return 1; /* otherwise retry anything from fs stack */ 2080 if (qc->err_mask & AC_ERR_INVALID) 2081 return 0; /* don't retry these */ 2082 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ 2083 } 2084 2085 /** 2086 * ata_eh_quiet - check if we need to be quiet about a command error 2087 * @qc: qc to check 2088 * 2089 * Look at the qc flags anbd its scsi command request flags to determine 2090 * if we need to be quiet about the command failure. 2091 */ 2092 static inline bool ata_eh_quiet(struct ata_queued_cmd *qc) 2093 { 2094 if (qc->scsicmd && 2095 qc->scsicmd->request->rq_flags & RQF_QUIET) 2096 qc->flags |= ATA_QCFLAG_QUIET; 2097 return qc->flags & ATA_QCFLAG_QUIET; 2098 } 2099 2100 /** 2101 * ata_eh_link_autopsy - analyze error and determine recovery action 2102 * @link: host link to perform autopsy on 2103 * 2104 * Analyze why @link failed and determine which recovery actions 2105 * are needed. This function also sets more detailed AC_ERR_* 2106 * values and fills sense data for ATAPI CHECK SENSE. 2107 * 2108 * LOCKING: 2109 * Kernel thread context (may sleep). 2110 */ 2111 static void ata_eh_link_autopsy(struct ata_link *link) 2112 { 2113 struct ata_port *ap = link->ap; 2114 struct ata_eh_context *ehc = &link->eh_context; 2115 struct ata_queued_cmd *qc; 2116 struct ata_device *dev; 2117 unsigned int all_err_mask = 0, eflags = 0; 2118 int tag, nr_failed = 0, nr_quiet = 0; 2119 u32 serror; 2120 int rc; 2121 2122 DPRINTK("ENTER\n"); 2123 2124 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 2125 return; 2126 2127 /* obtain and analyze SError */ 2128 rc = sata_scr_read(link, SCR_ERROR, &serror); 2129 if (rc == 0) { 2130 ehc->i.serror |= serror; 2131 ata_eh_analyze_serror(link); 2132 } else if (rc != -EOPNOTSUPP) { 2133 /* SError read failed, force reset and probing */ 2134 ehc->i.probe_mask |= ATA_ALL_DEVICES; 2135 ehc->i.action |= ATA_EH_RESET; 2136 ehc->i.err_mask |= AC_ERR_OTHER; 2137 } 2138 2139 /* analyze NCQ failure */ 2140 ata_eh_analyze_ncq_error(link); 2141 2142 /* any real error trumps AC_ERR_OTHER */ 2143 if (ehc->i.err_mask & ~AC_ERR_OTHER) 2144 ehc->i.err_mask &= ~AC_ERR_OTHER; 2145 2146 all_err_mask |= ehc->i.err_mask; 2147 2148 ata_qc_for_each_raw(ap, qc, tag) { 2149 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2150 ata_dev_phys_link(qc->dev) != link) 2151 continue; 2152 2153 /* inherit upper level err_mask */ 2154 qc->err_mask |= ehc->i.err_mask; 2155 2156 /* analyze TF */ 2157 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 2158 2159 /* DEV errors are probably spurious in case of ATA_BUS error */ 2160 if (qc->err_mask & AC_ERR_ATA_BUS) 2161 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2162 AC_ERR_INVALID); 2163 2164 /* any real error trumps unknown error */ 2165 if (qc->err_mask & ~AC_ERR_OTHER) 2166 qc->err_mask &= ~AC_ERR_OTHER; 2167 2168 /* 2169 * SENSE_VALID trumps dev/unknown error and revalidation. Upper 2170 * layers will determine whether the command is worth retrying 2171 * based on the sense data and device class/type. Otherwise, 2172 * determine directly if the command is worth retrying using its 2173 * error mask and flags. 2174 */ 2175 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2176 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2177 else if (ata_eh_worth_retry(qc)) 2178 qc->flags |= ATA_QCFLAG_RETRY; 2179 2180 /* accumulate error info */ 2181 ehc->i.dev = qc->dev; 2182 all_err_mask |= qc->err_mask; 2183 if (qc->flags & ATA_QCFLAG_IO) 2184 eflags |= ATA_EFLAG_IS_IO; 2185 trace_ata_eh_link_autopsy_qc(qc); 2186 2187 /* Count quiet errors */ 2188 if (ata_eh_quiet(qc)) 2189 nr_quiet++; 2190 nr_failed++; 2191 } 2192 2193 /* If all failed commands requested silence, then be quiet */ 2194 if (nr_quiet == nr_failed) 2195 ehc->i.flags |= ATA_EHI_QUIET; 2196 2197 /* enforce default EH actions */ 2198 if (ap->pflags & ATA_PFLAG_FROZEN || 2199 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2200 ehc->i.action |= ATA_EH_RESET; 2201 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2202 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2203 ehc->i.action |= ATA_EH_REVALIDATE; 2204 2205 /* If we have offending qcs and the associated failed device, 2206 * perform per-dev EH action only on the offending device. 2207 */ 2208 if (ehc->i.dev) { 2209 ehc->i.dev_action[ehc->i.dev->devno] |= 2210 ehc->i.action & ATA_EH_PERDEV_MASK; 2211 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2212 } 2213 2214 /* propagate timeout to host link */ 2215 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2216 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2217 2218 /* record error and consider speeding down */ 2219 dev = ehc->i.dev; 2220 if (!dev && ((ata_link_max_devices(link) == 1 && 2221 ata_dev_enabled(link->device)))) 2222 dev = link->device; 2223 2224 if (dev) { 2225 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2226 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2227 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2228 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); 2229 } 2230 DPRINTK("EXIT\n"); 2231 } 2232 2233 /** 2234 * ata_eh_autopsy - analyze error and determine recovery action 2235 * @ap: host port to perform autopsy on 2236 * 2237 * Analyze all links of @ap and determine why they failed and 2238 * which recovery actions are needed. 2239 * 2240 * LOCKING: 2241 * Kernel thread context (may sleep). 2242 */ 2243 void ata_eh_autopsy(struct ata_port *ap) 2244 { 2245 struct ata_link *link; 2246 2247 ata_for_each_link(link, ap, EDGE) 2248 ata_eh_link_autopsy(link); 2249 2250 /* Handle the frigging slave link. Autopsy is done similarly 2251 * but actions and flags are transferred over to the master 2252 * link and handled from there. 2253 */ 2254 if (ap->slave_link) { 2255 struct ata_eh_context *mehc = &ap->link.eh_context; 2256 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2257 2258 /* transfer control flags from master to slave */ 2259 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2260 2261 /* perform autopsy on the slave link */ 2262 ata_eh_link_autopsy(ap->slave_link); 2263 2264 /* transfer actions from slave to master and clear slave */ 2265 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2266 mehc->i.action |= sehc->i.action; 2267 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2268 mehc->i.flags |= sehc->i.flags; 2269 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2270 } 2271 2272 /* Autopsy of fanout ports can affect host link autopsy. 2273 * Perform host link autopsy last. 2274 */ 2275 if (sata_pmp_attached(ap)) 2276 ata_eh_link_autopsy(&ap->link); 2277 } 2278 2279 /** 2280 * ata_get_cmd_descript - get description for ATA command 2281 * @command: ATA command code to get description for 2282 * 2283 * Return a textual description of the given command, or NULL if the 2284 * command is not known. 2285 * 2286 * LOCKING: 2287 * None 2288 */ 2289 const char *ata_get_cmd_descript(u8 command) 2290 { 2291 #ifdef CONFIG_ATA_VERBOSE_ERROR 2292 static const struct 2293 { 2294 u8 command; 2295 const char *text; 2296 } cmd_descr[] = { 2297 { ATA_CMD_DEV_RESET, "DEVICE RESET" }, 2298 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, 2299 { ATA_CMD_STANDBY, "STANDBY" }, 2300 { ATA_CMD_IDLE, "IDLE" }, 2301 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, 2302 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, 2303 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" }, 2304 { ATA_CMD_NOP, "NOP" }, 2305 { ATA_CMD_FLUSH, "FLUSH CACHE" }, 2306 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, 2307 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, 2308 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, 2309 { ATA_CMD_SERVICE, "SERVICE" }, 2310 { ATA_CMD_READ, "READ DMA" }, 2311 { ATA_CMD_READ_EXT, "READ DMA EXT" }, 2312 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, 2313 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, 2314 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, 2315 { ATA_CMD_WRITE, "WRITE DMA" }, 2316 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, 2317 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, 2318 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, 2319 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, 2320 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, 2321 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, 2322 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, 2323 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, 2324 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, 2325 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, 2326 { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, 2327 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, 2328 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, 2329 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, 2330 { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, 2331 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, 2332 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, 2333 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, 2334 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, 2335 { ATA_CMD_SET_FEATURES, "SET FEATURES" }, 2336 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, 2337 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, 2338 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, 2339 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, 2340 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, 2341 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, 2342 { ATA_CMD_SLEEP, "SLEEP" }, 2343 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, 2344 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, 2345 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, 2346 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, 2347 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, 2348 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, 2349 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, 2350 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, 2351 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, 2352 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" }, 2353 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, 2354 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, 2355 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, 2356 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, 2357 { ATA_CMD_PMP_READ, "READ BUFFER" }, 2358 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" }, 2359 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, 2360 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" }, 2361 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, 2362 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, 2363 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, 2364 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, 2365 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, 2366 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, 2367 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, 2368 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, 2369 { ATA_CMD_SMART, "SMART" }, 2370 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, 2371 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, 2372 { ATA_CMD_DSM, "DATA SET MANAGEMENT" }, 2373 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, 2374 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, 2375 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, 2376 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, 2377 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, 2378 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, 2379 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" }, 2380 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" }, 2381 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" }, 2382 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" }, 2383 { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, 2384 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, 2385 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, 2386 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, 2387 { ATA_CMD_RESTORE, "RECALIBRATE" }, 2388 { 0, NULL } /* terminate list */ 2389 }; 2390 2391 unsigned int i; 2392 for (i = 0; cmd_descr[i].text; i++) 2393 if (cmd_descr[i].command == command) 2394 return cmd_descr[i].text; 2395 #endif 2396 2397 return NULL; 2398 } 2399 EXPORT_SYMBOL_GPL(ata_get_cmd_descript); 2400 2401 /** 2402 * ata_eh_link_report - report error handling to user 2403 * @link: ATA link EH is going on 2404 * 2405 * Report EH to user. 2406 * 2407 * LOCKING: 2408 * None. 2409 */ 2410 static void ata_eh_link_report(struct ata_link *link) 2411 { 2412 struct ata_port *ap = link->ap; 2413 struct ata_eh_context *ehc = &link->eh_context; 2414 struct ata_queued_cmd *qc; 2415 const char *frozen, *desc; 2416 char tries_buf[6] = ""; 2417 int tag, nr_failed = 0; 2418 2419 if (ehc->i.flags & ATA_EHI_QUIET) 2420 return; 2421 2422 desc = NULL; 2423 if (ehc->i.desc[0] != '\0') 2424 desc = ehc->i.desc; 2425 2426 ata_qc_for_each_raw(ap, qc, tag) { 2427 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2428 ata_dev_phys_link(qc->dev) != link || 2429 ((qc->flags & ATA_QCFLAG_QUIET) && 2430 qc->err_mask == AC_ERR_DEV)) 2431 continue; 2432 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2433 continue; 2434 2435 nr_failed++; 2436 } 2437 2438 if (!nr_failed && !ehc->i.err_mask) 2439 return; 2440 2441 frozen = ""; 2442 if (ap->pflags & ATA_PFLAG_FROZEN) 2443 frozen = " frozen"; 2444 2445 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2446 snprintf(tries_buf, sizeof(tries_buf), " t%d", 2447 ap->eh_tries); 2448 2449 if (ehc->i.dev) { 2450 ata_dev_err(ehc->i.dev, "exception Emask 0x%x " 2451 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2452 ehc->i.err_mask, link->sactive, ehc->i.serror, 2453 ehc->i.action, frozen, tries_buf); 2454 if (desc) 2455 ata_dev_err(ehc->i.dev, "%s\n", desc); 2456 } else { 2457 ata_link_err(link, "exception Emask 0x%x " 2458 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2459 ehc->i.err_mask, link->sactive, ehc->i.serror, 2460 ehc->i.action, frozen, tries_buf); 2461 if (desc) 2462 ata_link_err(link, "%s\n", desc); 2463 } 2464 2465 #ifdef CONFIG_ATA_VERBOSE_ERROR 2466 if (ehc->i.serror) 2467 ata_link_err(link, 2468 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2469 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2470 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2471 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2472 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2473 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2474 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2475 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2476 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2477 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2478 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2479 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2480 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2481 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2482 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2483 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2484 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2485 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2486 #endif 2487 2488 ata_qc_for_each_raw(ap, qc, tag) { 2489 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2490 char data_buf[20] = ""; 2491 char cdb_buf[70] = ""; 2492 2493 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2494 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2495 continue; 2496 2497 if (qc->dma_dir != DMA_NONE) { 2498 static const char *dma_str[] = { 2499 [DMA_BIDIRECTIONAL] = "bidi", 2500 [DMA_TO_DEVICE] = "out", 2501 [DMA_FROM_DEVICE] = "in", 2502 }; 2503 const char *prot_str = NULL; 2504 2505 switch (qc->tf.protocol) { 2506 case ATA_PROT_UNKNOWN: 2507 prot_str = "unknown"; 2508 break; 2509 case ATA_PROT_NODATA: 2510 prot_str = "nodata"; 2511 break; 2512 case ATA_PROT_PIO: 2513 prot_str = "pio"; 2514 break; 2515 case ATA_PROT_DMA: 2516 prot_str = "dma"; 2517 break; 2518 case ATA_PROT_NCQ: 2519 prot_str = "ncq dma"; 2520 break; 2521 case ATA_PROT_NCQ_NODATA: 2522 prot_str = "ncq nodata"; 2523 break; 2524 case ATAPI_PROT_NODATA: 2525 prot_str = "nodata"; 2526 break; 2527 case ATAPI_PROT_PIO: 2528 prot_str = "pio"; 2529 break; 2530 case ATAPI_PROT_DMA: 2531 prot_str = "dma"; 2532 break; 2533 } 2534 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2535 prot_str, qc->nbytes, dma_str[qc->dma_dir]); 2536 } 2537 2538 if (ata_is_atapi(qc->tf.protocol)) { 2539 const u8 *cdb = qc->cdb; 2540 size_t cdb_len = qc->dev->cdb_len; 2541 2542 if (qc->scsicmd) { 2543 cdb = qc->scsicmd->cmnd; 2544 cdb_len = qc->scsicmd->cmd_len; 2545 } 2546 __scsi_format_command(cdb_buf, sizeof(cdb_buf), 2547 cdb, cdb_len); 2548 } else { 2549 const char *descr = ata_get_cmd_descript(cmd->command); 2550 if (descr) 2551 ata_dev_err(qc->dev, "failed command: %s\n", 2552 descr); 2553 } 2554 2555 ata_dev_err(qc->dev, 2556 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2557 "tag %d%s\n %s" 2558 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2559 "Emask 0x%x (%s)%s\n", 2560 cmd->command, cmd->feature, cmd->nsect, 2561 cmd->lbal, cmd->lbam, cmd->lbah, 2562 cmd->hob_feature, cmd->hob_nsect, 2563 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2564 cmd->device, qc->tag, data_buf, cdb_buf, 2565 res->command, res->feature, res->nsect, 2566 res->lbal, res->lbam, res->lbah, 2567 res->hob_feature, res->hob_nsect, 2568 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2569 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2570 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2571 2572 #ifdef CONFIG_ATA_VERBOSE_ERROR 2573 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2574 ATA_SENSE | ATA_ERR)) { 2575 if (res->command & ATA_BUSY) 2576 ata_dev_err(qc->dev, "status: { Busy }\n"); 2577 else 2578 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", 2579 res->command & ATA_DRDY ? "DRDY " : "", 2580 res->command & ATA_DF ? "DF " : "", 2581 res->command & ATA_DRQ ? "DRQ " : "", 2582 res->command & ATA_SENSE ? "SENSE " : "", 2583 res->command & ATA_ERR ? "ERR " : ""); 2584 } 2585 2586 if (cmd->command != ATA_CMD_PACKET && 2587 (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF | 2588 ATA_IDNF | ATA_ABORTED))) 2589 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n", 2590 res->feature & ATA_ICRC ? "ICRC " : "", 2591 res->feature & ATA_UNC ? "UNC " : "", 2592 res->feature & ATA_AMNF ? "AMNF " : "", 2593 res->feature & ATA_IDNF ? "IDNF " : "", 2594 res->feature & ATA_ABORTED ? "ABRT " : ""); 2595 #endif 2596 } 2597 } 2598 2599 /** 2600 * ata_eh_report - report error handling to user 2601 * @ap: ATA port to report EH about 2602 * 2603 * Report EH to user. 2604 * 2605 * LOCKING: 2606 * None. 2607 */ 2608 void ata_eh_report(struct ata_port *ap) 2609 { 2610 struct ata_link *link; 2611 2612 ata_for_each_link(link, ap, HOST_FIRST) 2613 ata_eh_link_report(link); 2614 } 2615 2616 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2617 unsigned int *classes, unsigned long deadline, 2618 bool clear_classes) 2619 { 2620 struct ata_device *dev; 2621 2622 if (clear_classes) 2623 ata_for_each_dev(dev, link, ALL) 2624 classes[dev->devno] = ATA_DEV_UNKNOWN; 2625 2626 return reset(link, classes, deadline); 2627 } 2628 2629 static int ata_eh_followup_srst_needed(struct ata_link *link, int rc) 2630 { 2631 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2632 return 0; 2633 if (rc == -EAGAIN) 2634 return 1; 2635 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2636 return 1; 2637 return 0; 2638 } 2639 2640 int ata_eh_reset(struct ata_link *link, int classify, 2641 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2642 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2643 { 2644 struct ata_port *ap = link->ap; 2645 struct ata_link *slave = ap->slave_link; 2646 struct ata_eh_context *ehc = &link->eh_context; 2647 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; 2648 unsigned int *classes = ehc->classes; 2649 unsigned int lflags = link->flags; 2650 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2651 int max_tries = 0, try = 0; 2652 struct ata_link *failed_link; 2653 struct ata_device *dev; 2654 unsigned long deadline, now; 2655 ata_reset_fn_t reset; 2656 unsigned long flags; 2657 u32 sstatus; 2658 int nr_unknown, rc; 2659 2660 /* 2661 * Prepare to reset 2662 */ 2663 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2664 max_tries++; 2665 if (link->flags & ATA_LFLAG_RST_ONCE) 2666 max_tries = 1; 2667 if (link->flags & ATA_LFLAG_NO_HRST) 2668 hardreset = NULL; 2669 if (link->flags & ATA_LFLAG_NO_SRST) 2670 softreset = NULL; 2671 2672 /* make sure each reset attempt is at least COOL_DOWN apart */ 2673 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2674 now = jiffies; 2675 WARN_ON(time_after(ehc->last_reset, now)); 2676 deadline = ata_deadline(ehc->last_reset, 2677 ATA_EH_RESET_COOL_DOWN); 2678 if (time_before(now, deadline)) 2679 schedule_timeout_uninterruptible(deadline - now); 2680 } 2681 2682 spin_lock_irqsave(ap->lock, flags); 2683 ap->pflags |= ATA_PFLAG_RESETTING; 2684 spin_unlock_irqrestore(ap->lock, flags); 2685 2686 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2687 2688 ata_for_each_dev(dev, link, ALL) { 2689 /* If we issue an SRST then an ATA drive (not ATAPI) 2690 * may change configuration and be in PIO0 timing. If 2691 * we do a hard reset (or are coming from power on) 2692 * this is true for ATA or ATAPI. Until we've set a 2693 * suitable controller mode we should not touch the 2694 * bus as we may be talking too fast. 2695 */ 2696 dev->pio_mode = XFER_PIO_0; 2697 dev->dma_mode = 0xff; 2698 2699 /* If the controller has a pio mode setup function 2700 * then use it to set the chipset to rights. Don't 2701 * touch the DMA setup as that will be dealt with when 2702 * configuring devices. 2703 */ 2704 if (ap->ops->set_piomode) 2705 ap->ops->set_piomode(ap, dev); 2706 } 2707 2708 /* prefer hardreset */ 2709 reset = NULL; 2710 ehc->i.action &= ~ATA_EH_RESET; 2711 if (hardreset) { 2712 reset = hardreset; 2713 ehc->i.action |= ATA_EH_HARDRESET; 2714 } else if (softreset) { 2715 reset = softreset; 2716 ehc->i.action |= ATA_EH_SOFTRESET; 2717 } 2718 2719 if (prereset) { 2720 unsigned long deadline = ata_deadline(jiffies, 2721 ATA_EH_PRERESET_TIMEOUT); 2722 2723 if (slave) { 2724 sehc->i.action &= ~ATA_EH_RESET; 2725 sehc->i.action |= ehc->i.action; 2726 } 2727 2728 rc = prereset(link, deadline); 2729 2730 /* If present, do prereset on slave link too. Reset 2731 * is skipped iff both master and slave links report 2732 * -ENOENT or clear ATA_EH_RESET. 2733 */ 2734 if (slave && (rc == 0 || rc == -ENOENT)) { 2735 int tmp; 2736 2737 tmp = prereset(slave, deadline); 2738 if (tmp != -ENOENT) 2739 rc = tmp; 2740 2741 ehc->i.action |= sehc->i.action; 2742 } 2743 2744 if (rc) { 2745 if (rc == -ENOENT) { 2746 ata_link_dbg(link, "port disabled--ignoring\n"); 2747 ehc->i.action &= ~ATA_EH_RESET; 2748 2749 ata_for_each_dev(dev, link, ALL) 2750 classes[dev->devno] = ATA_DEV_NONE; 2751 2752 rc = 0; 2753 } else 2754 ata_link_err(link, 2755 "prereset failed (errno=%d)\n", 2756 rc); 2757 goto out; 2758 } 2759 2760 /* prereset() might have cleared ATA_EH_RESET. If so, 2761 * bang classes, thaw and return. 2762 */ 2763 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2764 ata_for_each_dev(dev, link, ALL) 2765 classes[dev->devno] = ATA_DEV_NONE; 2766 if ((ap->pflags & ATA_PFLAG_FROZEN) && 2767 ata_is_host_link(link)) 2768 ata_eh_thaw_port(ap); 2769 rc = 0; 2770 goto out; 2771 } 2772 } 2773 2774 retry: 2775 /* 2776 * Perform reset 2777 */ 2778 if (ata_is_host_link(link)) 2779 ata_eh_freeze_port(ap); 2780 2781 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2782 2783 if (reset) { 2784 if (verbose) 2785 ata_link_info(link, "%s resetting link\n", 2786 reset == softreset ? "soft" : "hard"); 2787 2788 /* mark that this EH session started with reset */ 2789 ehc->last_reset = jiffies; 2790 if (reset == hardreset) 2791 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2792 else 2793 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2794 2795 rc = ata_do_reset(link, reset, classes, deadline, true); 2796 if (rc && rc != -EAGAIN) { 2797 failed_link = link; 2798 goto fail; 2799 } 2800 2801 /* hardreset slave link if existent */ 2802 if (slave && reset == hardreset) { 2803 int tmp; 2804 2805 if (verbose) 2806 ata_link_info(slave, "hard resetting link\n"); 2807 2808 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2809 tmp = ata_do_reset(slave, reset, classes, deadline, 2810 false); 2811 switch (tmp) { 2812 case -EAGAIN: 2813 rc = -EAGAIN; 2814 case 0: 2815 break; 2816 default: 2817 failed_link = slave; 2818 rc = tmp; 2819 goto fail; 2820 } 2821 } 2822 2823 /* perform follow-up SRST if necessary */ 2824 if (reset == hardreset && 2825 ata_eh_followup_srst_needed(link, rc)) { 2826 reset = softreset; 2827 2828 if (!reset) { 2829 ata_link_err(link, 2830 "follow-up softreset required but no softreset available\n"); 2831 failed_link = link; 2832 rc = -EINVAL; 2833 goto fail; 2834 } 2835 2836 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2837 rc = ata_do_reset(link, reset, classes, deadline, true); 2838 if (rc) { 2839 failed_link = link; 2840 goto fail; 2841 } 2842 } 2843 } else { 2844 if (verbose) 2845 ata_link_info(link, 2846 "no reset method available, skipping reset\n"); 2847 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2848 lflags |= ATA_LFLAG_ASSUME_ATA; 2849 } 2850 2851 /* 2852 * Post-reset processing 2853 */ 2854 ata_for_each_dev(dev, link, ALL) { 2855 /* After the reset, the device state is PIO 0 and the 2856 * controller state is undefined. Reset also wakes up 2857 * drives from sleeping mode. 2858 */ 2859 dev->pio_mode = XFER_PIO_0; 2860 dev->flags &= ~ATA_DFLAG_SLEEPING; 2861 2862 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 2863 continue; 2864 2865 /* apply class override */ 2866 if (lflags & ATA_LFLAG_ASSUME_ATA) 2867 classes[dev->devno] = ATA_DEV_ATA; 2868 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2869 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2870 } 2871 2872 /* record current link speed */ 2873 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2874 link->sata_spd = (sstatus >> 4) & 0xf; 2875 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2876 slave->sata_spd = (sstatus >> 4) & 0xf; 2877 2878 /* thaw the port */ 2879 if (ata_is_host_link(link)) 2880 ata_eh_thaw_port(ap); 2881 2882 /* postreset() should clear hardware SError. Although SError 2883 * is cleared during link resume, clearing SError here is 2884 * necessary as some PHYs raise hotplug events after SRST. 2885 * This introduces race condition where hotplug occurs between 2886 * reset and here. This race is mediated by cross checking 2887 * link onlineness and classification result later. 2888 */ 2889 if (postreset) { 2890 postreset(link, classes); 2891 if (slave) 2892 postreset(slave, classes); 2893 } 2894 2895 /* 2896 * Some controllers can't be frozen very well and may set spurious 2897 * error conditions during reset. Clear accumulated error 2898 * information and re-thaw the port if frozen. As reset is the 2899 * final recovery action and we cross check link onlineness against 2900 * device classification later, no hotplug event is lost by this. 2901 */ 2902 spin_lock_irqsave(link->ap->lock, flags); 2903 memset(&link->eh_info, 0, sizeof(link->eh_info)); 2904 if (slave) 2905 memset(&slave->eh_info, 0, sizeof(link->eh_info)); 2906 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 2907 spin_unlock_irqrestore(link->ap->lock, flags); 2908 2909 if (ap->pflags & ATA_PFLAG_FROZEN) 2910 ata_eh_thaw_port(ap); 2911 2912 /* 2913 * Make sure onlineness and classification result correspond. 2914 * Hotplug could have happened during reset and some 2915 * controllers fail to wait while a drive is spinning up after 2916 * being hotplugged causing misdetection. By cross checking 2917 * link on/offlineness and classification result, those 2918 * conditions can be reliably detected and retried. 2919 */ 2920 nr_unknown = 0; 2921 ata_for_each_dev(dev, link, ALL) { 2922 if (ata_phys_link_online(ata_dev_phys_link(dev))) { 2923 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2924 ata_dev_dbg(dev, "link online but device misclassified\n"); 2925 classes[dev->devno] = ATA_DEV_NONE; 2926 nr_unknown++; 2927 } 2928 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2929 if (ata_class_enabled(classes[dev->devno])) 2930 ata_dev_dbg(dev, 2931 "link offline, clearing class %d to NONE\n", 2932 classes[dev->devno]); 2933 classes[dev->devno] = ATA_DEV_NONE; 2934 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2935 ata_dev_dbg(dev, 2936 "link status unknown, clearing UNKNOWN to NONE\n"); 2937 classes[dev->devno] = ATA_DEV_NONE; 2938 } 2939 } 2940 2941 if (classify && nr_unknown) { 2942 if (try < max_tries) { 2943 ata_link_warn(link, 2944 "link online but %d devices misclassified, retrying\n", 2945 nr_unknown); 2946 failed_link = link; 2947 rc = -EAGAIN; 2948 goto fail; 2949 } 2950 ata_link_warn(link, 2951 "link online but %d devices misclassified, " 2952 "device detection might fail\n", nr_unknown); 2953 } 2954 2955 /* reset successful, schedule revalidation */ 2956 ata_eh_done(link, NULL, ATA_EH_RESET); 2957 if (slave) 2958 ata_eh_done(slave, NULL, ATA_EH_RESET); 2959 ehc->last_reset = jiffies; /* update to completion time */ 2960 ehc->i.action |= ATA_EH_REVALIDATE; 2961 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ 2962 2963 rc = 0; 2964 out: 2965 /* clear hotplug flag */ 2966 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2967 if (slave) 2968 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2969 2970 spin_lock_irqsave(ap->lock, flags); 2971 ap->pflags &= ~ATA_PFLAG_RESETTING; 2972 spin_unlock_irqrestore(ap->lock, flags); 2973 2974 return rc; 2975 2976 fail: 2977 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2978 if (!ata_is_host_link(link) && 2979 sata_scr_read(link, SCR_STATUS, &sstatus)) 2980 rc = -ERESTART; 2981 2982 if (try >= max_tries) { 2983 /* 2984 * Thaw host port even if reset failed, so that the port 2985 * can be retried on the next phy event. This risks 2986 * repeated EH runs but seems to be a better tradeoff than 2987 * shutting down a port after a botched hotplug attempt. 2988 */ 2989 if (ata_is_host_link(link)) 2990 ata_eh_thaw_port(ap); 2991 goto out; 2992 } 2993 2994 now = jiffies; 2995 if (time_before(now, deadline)) { 2996 unsigned long delta = deadline - now; 2997 2998 ata_link_warn(failed_link, 2999 "reset failed (errno=%d), retrying in %u secs\n", 3000 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 3001 3002 ata_eh_release(ap); 3003 while (delta) 3004 delta = schedule_timeout_uninterruptible(delta); 3005 ata_eh_acquire(ap); 3006 } 3007 3008 /* 3009 * While disks spinup behind PMP, some controllers fail sending SRST. 3010 * They need to be reset - as well as the PMP - before retrying. 3011 */ 3012 if (rc == -ERESTART) { 3013 if (ata_is_host_link(link)) 3014 ata_eh_thaw_port(ap); 3015 goto out; 3016 } 3017 3018 if (try == max_tries - 1) { 3019 sata_down_spd_limit(link, 0); 3020 if (slave) 3021 sata_down_spd_limit(slave, 0); 3022 } else if (rc == -EPIPE) 3023 sata_down_spd_limit(failed_link, 0); 3024 3025 if (hardreset) 3026 reset = hardreset; 3027 goto retry; 3028 } 3029 3030 static inline void ata_eh_pull_park_action(struct ata_port *ap) 3031 { 3032 struct ata_link *link; 3033 struct ata_device *dev; 3034 unsigned long flags; 3035 3036 /* 3037 * This function can be thought of as an extended version of 3038 * ata_eh_about_to_do() specially crafted to accommodate the 3039 * requirements of ATA_EH_PARK handling. Since the EH thread 3040 * does not leave the do {} while () loop in ata_eh_recover as 3041 * long as the timeout for a park request to *one* device on 3042 * the port has not expired, and since we still want to pick 3043 * up park requests to other devices on the same port or 3044 * timeout updates for the same device, we have to pull 3045 * ATA_EH_PARK actions from eh_info into eh_context.i 3046 * ourselves at the beginning of each pass over the loop. 3047 * 3048 * Additionally, all write accesses to &ap->park_req_pending 3049 * through reinit_completion() (see below) or complete_all() 3050 * (see ata_scsi_park_store()) are protected by the host lock. 3051 * As a result we have that park_req_pending.done is zero on 3052 * exit from this function, i.e. when ATA_EH_PARK actions for 3053 * *all* devices on port ap have been pulled into the 3054 * respective eh_context structs. If, and only if, 3055 * park_req_pending.done is non-zero by the time we reach 3056 * wait_for_completion_timeout(), another ATA_EH_PARK action 3057 * has been scheduled for at least one of the devices on port 3058 * ap and we have to cycle over the do {} while () loop in 3059 * ata_eh_recover() again. 3060 */ 3061 3062 spin_lock_irqsave(ap->lock, flags); 3063 reinit_completion(&ap->park_req_pending); 3064 ata_for_each_link(link, ap, EDGE) { 3065 ata_for_each_dev(dev, link, ALL) { 3066 struct ata_eh_info *ehi = &link->eh_info; 3067 3068 link->eh_context.i.dev_action[dev->devno] |= 3069 ehi->dev_action[dev->devno] & ATA_EH_PARK; 3070 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 3071 } 3072 } 3073 spin_unlock_irqrestore(ap->lock, flags); 3074 } 3075 3076 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 3077 { 3078 struct ata_eh_context *ehc = &dev->link->eh_context; 3079 struct ata_taskfile tf; 3080 unsigned int err_mask; 3081 3082 ata_tf_init(dev, &tf); 3083 if (park) { 3084 ehc->unloaded_mask |= 1 << dev->devno; 3085 tf.command = ATA_CMD_IDLEIMMEDIATE; 3086 tf.feature = 0x44; 3087 tf.lbal = 0x4c; 3088 tf.lbam = 0x4e; 3089 tf.lbah = 0x55; 3090 } else { 3091 ehc->unloaded_mask &= ~(1 << dev->devno); 3092 tf.command = ATA_CMD_CHK_POWER; 3093 } 3094 3095 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 3096 tf.protocol = ATA_PROT_NODATA; 3097 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3098 if (park && (err_mask || tf.lbal != 0xc4)) { 3099 ata_dev_err(dev, "head unload failed!\n"); 3100 ehc->unloaded_mask &= ~(1 << dev->devno); 3101 } 3102 } 3103 3104 static int ata_eh_revalidate_and_attach(struct ata_link *link, 3105 struct ata_device **r_failed_dev) 3106 { 3107 struct ata_port *ap = link->ap; 3108 struct ata_eh_context *ehc = &link->eh_context; 3109 struct ata_device *dev; 3110 unsigned int new_mask = 0; 3111 unsigned long flags; 3112 int rc = 0; 3113 3114 DPRINTK("ENTER\n"); 3115 3116 /* For PATA drive side cable detection to work, IDENTIFY must 3117 * be done backwards such that PDIAG- is released by the slave 3118 * device before the master device is identified. 3119 */ 3120 ata_for_each_dev(dev, link, ALL_REVERSE) { 3121 unsigned int action = ata_eh_dev_action(dev); 3122 unsigned int readid_flags = 0; 3123 3124 if (ehc->i.flags & ATA_EHI_DID_RESET) 3125 readid_flags |= ATA_READID_POSTRESET; 3126 3127 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 3128 WARN_ON(dev->class == ATA_DEV_PMP); 3129 3130 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 3131 rc = -EIO; 3132 goto err; 3133 } 3134 3135 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 3136 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 3137 readid_flags); 3138 if (rc) 3139 goto err; 3140 3141 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 3142 3143 /* Configuration may have changed, reconfigure 3144 * transfer mode. 3145 */ 3146 ehc->i.flags |= ATA_EHI_SETMODE; 3147 3148 /* schedule the scsi_rescan_device() here */ 3149 schedule_work(&(ap->scsi_rescan_task)); 3150 } else if (dev->class == ATA_DEV_UNKNOWN && 3151 ehc->tries[dev->devno] && 3152 ata_class_enabled(ehc->classes[dev->devno])) { 3153 /* Temporarily set dev->class, it will be 3154 * permanently set once all configurations are 3155 * complete. This is necessary because new 3156 * device configuration is done in two 3157 * separate loops. 3158 */ 3159 dev->class = ehc->classes[dev->devno]; 3160 3161 if (dev->class == ATA_DEV_PMP) 3162 rc = sata_pmp_attach(dev); 3163 else 3164 rc = ata_dev_read_id(dev, &dev->class, 3165 readid_flags, dev->id); 3166 3167 /* read_id might have changed class, store and reset */ 3168 ehc->classes[dev->devno] = dev->class; 3169 dev->class = ATA_DEV_UNKNOWN; 3170 3171 switch (rc) { 3172 case 0: 3173 /* clear error info accumulated during probe */ 3174 ata_ering_clear(&dev->ering); 3175 new_mask |= 1 << dev->devno; 3176 break; 3177 case -ENOENT: 3178 /* IDENTIFY was issued to non-existent 3179 * device. No need to reset. Just 3180 * thaw and ignore the device. 3181 */ 3182 ata_eh_thaw_port(ap); 3183 break; 3184 default: 3185 goto err; 3186 } 3187 } 3188 } 3189 3190 /* PDIAG- should have been released, ask cable type if post-reset */ 3191 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 3192 if (ap->ops->cable_detect) 3193 ap->cbl = ap->ops->cable_detect(ap); 3194 ata_force_cbl(ap); 3195 } 3196 3197 /* Configure new devices forward such that user doesn't see 3198 * device detection messages backwards. 3199 */ 3200 ata_for_each_dev(dev, link, ALL) { 3201 if (!(new_mask & (1 << dev->devno))) 3202 continue; 3203 3204 dev->class = ehc->classes[dev->devno]; 3205 3206 if (dev->class == ATA_DEV_PMP) 3207 continue; 3208 3209 ehc->i.flags |= ATA_EHI_PRINTINFO; 3210 rc = ata_dev_configure(dev); 3211 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 3212 if (rc) { 3213 dev->class = ATA_DEV_UNKNOWN; 3214 goto err; 3215 } 3216 3217 spin_lock_irqsave(ap->lock, flags); 3218 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 3219 spin_unlock_irqrestore(ap->lock, flags); 3220 3221 /* new device discovered, configure xfermode */ 3222 ehc->i.flags |= ATA_EHI_SETMODE; 3223 } 3224 3225 return 0; 3226 3227 err: 3228 *r_failed_dev = dev; 3229 DPRINTK("EXIT rc=%d\n", rc); 3230 return rc; 3231 } 3232 3233 /** 3234 * ata_set_mode - Program timings and issue SET FEATURES - XFER 3235 * @link: link on which timings will be programmed 3236 * @r_failed_dev: out parameter for failed device 3237 * 3238 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 3239 * ata_set_mode() fails, pointer to the failing device is 3240 * returned in @r_failed_dev. 3241 * 3242 * LOCKING: 3243 * PCI/etc. bus probe sem. 3244 * 3245 * RETURNS: 3246 * 0 on success, negative errno otherwise 3247 */ 3248 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 3249 { 3250 struct ata_port *ap = link->ap; 3251 struct ata_device *dev; 3252 int rc; 3253 3254 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 3255 ata_for_each_dev(dev, link, ENABLED) { 3256 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 3257 struct ata_ering_entry *ent; 3258 3259 ent = ata_ering_top(&dev->ering); 3260 if (ent) 3261 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 3262 } 3263 } 3264 3265 /* has private set_mode? */ 3266 if (ap->ops->set_mode) 3267 rc = ap->ops->set_mode(link, r_failed_dev); 3268 else 3269 rc = ata_do_set_mode(link, r_failed_dev); 3270 3271 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 3272 ata_for_each_dev(dev, link, ENABLED) { 3273 struct ata_eh_context *ehc = &link->eh_context; 3274 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 3275 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 3276 3277 if (dev->xfer_mode != saved_xfer_mode || 3278 ata_ncq_enabled(dev) != saved_ncq) 3279 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 3280 } 3281 3282 return rc; 3283 } 3284 3285 /** 3286 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 3287 * @dev: ATAPI device to clear UA for 3288 * 3289 * Resets and other operations can make an ATAPI device raise 3290 * UNIT ATTENTION which causes the next operation to fail. This 3291 * function clears UA. 3292 * 3293 * LOCKING: 3294 * EH context (may sleep). 3295 * 3296 * RETURNS: 3297 * 0 on success, -errno on failure. 3298 */ 3299 static int atapi_eh_clear_ua(struct ata_device *dev) 3300 { 3301 int i; 3302 3303 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 3304 u8 *sense_buffer = dev->link->ap->sector_buf; 3305 u8 sense_key = 0; 3306 unsigned int err_mask; 3307 3308 err_mask = atapi_eh_tur(dev, &sense_key); 3309 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 3310 ata_dev_warn(dev, 3311 "TEST_UNIT_READY failed (err_mask=0x%x)\n", 3312 err_mask); 3313 return -EIO; 3314 } 3315 3316 if (!err_mask || sense_key != UNIT_ATTENTION) 3317 return 0; 3318 3319 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 3320 if (err_mask) { 3321 ata_dev_warn(dev, "failed to clear " 3322 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 3323 return -EIO; 3324 } 3325 } 3326 3327 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n", 3328 ATA_EH_UA_TRIES); 3329 3330 return 0; 3331 } 3332 3333 /** 3334 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary 3335 * @dev: ATA device which may need FLUSH retry 3336 * 3337 * If @dev failed FLUSH, it needs to be reported upper layer 3338 * immediately as it means that @dev failed to remap and already 3339 * lost at least a sector and further FLUSH retrials won't make 3340 * any difference to the lost sector. However, if FLUSH failed 3341 * for other reasons, for example transmission error, FLUSH needs 3342 * to be retried. 3343 * 3344 * This function determines whether FLUSH failure retry is 3345 * necessary and performs it if so. 3346 * 3347 * RETURNS: 3348 * 0 if EH can continue, -errno if EH needs to be repeated. 3349 */ 3350 static int ata_eh_maybe_retry_flush(struct ata_device *dev) 3351 { 3352 struct ata_link *link = dev->link; 3353 struct ata_port *ap = link->ap; 3354 struct ata_queued_cmd *qc; 3355 struct ata_taskfile tf; 3356 unsigned int err_mask; 3357 int rc = 0; 3358 3359 /* did flush fail for this device? */ 3360 if (!ata_tag_valid(link->active_tag)) 3361 return 0; 3362 3363 qc = __ata_qc_from_tag(ap, link->active_tag); 3364 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && 3365 qc->tf.command != ATA_CMD_FLUSH)) 3366 return 0; 3367 3368 /* if the device failed it, it should be reported to upper layers */ 3369 if (qc->err_mask & AC_ERR_DEV) 3370 return 0; 3371 3372 /* flush failed for some other reason, give it another shot */ 3373 ata_tf_init(dev, &tf); 3374 3375 tf.command = qc->tf.command; 3376 tf.flags |= ATA_TFLAG_DEVICE; 3377 tf.protocol = ATA_PROT_NODATA; 3378 3379 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n", 3380 tf.command, qc->err_mask); 3381 3382 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3383 if (!err_mask) { 3384 /* 3385 * FLUSH is complete but there's no way to 3386 * successfully complete a failed command from EH. 3387 * Making sure retry is allowed at least once and 3388 * retrying it should do the trick - whatever was in 3389 * the cache is already on the platter and this won't 3390 * cause infinite loop. 3391 */ 3392 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); 3393 } else { 3394 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n", 3395 err_mask); 3396 rc = -EIO; 3397 3398 /* if device failed it, report it to upper layers */ 3399 if (err_mask & AC_ERR_DEV) { 3400 qc->err_mask |= AC_ERR_DEV; 3401 qc->result_tf = tf; 3402 if (!(ap->pflags & ATA_PFLAG_FROZEN)) 3403 rc = 0; 3404 } 3405 } 3406 return rc; 3407 } 3408 3409 /** 3410 * ata_eh_set_lpm - configure SATA interface power management 3411 * @link: link to configure power management 3412 * @policy: the link power management policy 3413 * @r_failed_dev: out parameter for failed device 3414 * 3415 * Enable SATA Interface power management. This will enable 3416 * Device Interface Power Management (DIPM) for min_power and 3417 * medium_power_with_dipm policies, and then call driver specific 3418 * callbacks for enabling Host Initiated Power management. 3419 * 3420 * LOCKING: 3421 * EH context. 3422 * 3423 * RETURNS: 3424 * 0 on success, -errno on failure. 3425 */ 3426 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, 3427 struct ata_device **r_failed_dev) 3428 { 3429 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; 3430 struct ata_eh_context *ehc = &link->eh_context; 3431 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; 3432 enum ata_lpm_policy old_policy = link->lpm_policy; 3433 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; 3434 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; 3435 unsigned int err_mask; 3436 int rc; 3437 3438 /* if the link or host doesn't do LPM, noop */ 3439 if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) 3440 return 0; 3441 3442 /* 3443 * DIPM is enabled only for MIN_POWER as some devices 3444 * misbehave when the host NACKs transition to SLUMBER. Order 3445 * device and link configurations such that the host always 3446 * allows DIPM requests. 3447 */ 3448 ata_for_each_dev(dev, link, ENABLED) { 3449 bool hipm = ata_id_has_hipm(dev->id); 3450 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm; 3451 3452 /* find the first enabled and LPM enabled devices */ 3453 if (!link_dev) 3454 link_dev = dev; 3455 3456 if (!lpm_dev && (hipm || dipm)) 3457 lpm_dev = dev; 3458 3459 hints &= ~ATA_LPM_EMPTY; 3460 if (!hipm) 3461 hints &= ~ATA_LPM_HIPM; 3462 3463 /* disable DIPM before changing link config */ 3464 if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) { 3465 err_mask = ata_dev_set_feature(dev, 3466 SETFEATURES_SATA_DISABLE, SATA_DIPM); 3467 if (err_mask && err_mask != AC_ERR_DEV) { 3468 ata_dev_warn(dev, 3469 "failed to disable DIPM, Emask 0x%x\n", 3470 err_mask); 3471 rc = -EIO; 3472 goto fail; 3473 } 3474 } 3475 } 3476 3477 if (ap) { 3478 rc = ap->ops->set_lpm(link, policy, hints); 3479 if (!rc && ap->slave_link) 3480 rc = ap->ops->set_lpm(ap->slave_link, policy, hints); 3481 } else 3482 rc = sata_pmp_set_lpm(link, policy, hints); 3483 3484 /* 3485 * Attribute link config failure to the first (LPM) enabled 3486 * device on the link. 3487 */ 3488 if (rc) { 3489 if (rc == -EOPNOTSUPP) { 3490 link->flags |= ATA_LFLAG_NO_LPM; 3491 return 0; 3492 } 3493 dev = lpm_dev ? lpm_dev : link_dev; 3494 goto fail; 3495 } 3496 3497 /* 3498 * Low level driver acked the transition. Issue DIPM command 3499 * with the new policy set. 3500 */ 3501 link->lpm_policy = policy; 3502 if (ap && ap->slave_link) 3503 ap->slave_link->lpm_policy = policy; 3504 3505 /* host config updated, enable DIPM if transitioning to MIN_POWER */ 3506 ata_for_each_dev(dev, link, ENABLED) { 3507 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm && 3508 ata_id_has_dipm(dev->id)) { 3509 err_mask = ata_dev_set_feature(dev, 3510 SETFEATURES_SATA_ENABLE, SATA_DIPM); 3511 if (err_mask && err_mask != AC_ERR_DEV) { 3512 ata_dev_warn(dev, 3513 "failed to enable DIPM, Emask 0x%x\n", 3514 err_mask); 3515 rc = -EIO; 3516 goto fail; 3517 } 3518 } 3519 } 3520 3521 link->last_lpm_change = jiffies; 3522 link->flags |= ATA_LFLAG_CHANGED; 3523 3524 return 0; 3525 3526 fail: 3527 /* restore the old policy */ 3528 link->lpm_policy = old_policy; 3529 if (ap && ap->slave_link) 3530 ap->slave_link->lpm_policy = old_policy; 3531 3532 /* if no device or only one more chance is left, disable LPM */ 3533 if (!dev || ehc->tries[dev->devno] <= 2) { 3534 ata_link_warn(link, "disabling LPM on the link\n"); 3535 link->flags |= ATA_LFLAG_NO_LPM; 3536 } 3537 if (r_failed_dev) 3538 *r_failed_dev = dev; 3539 return rc; 3540 } 3541 3542 int ata_link_nr_enabled(struct ata_link *link) 3543 { 3544 struct ata_device *dev; 3545 int cnt = 0; 3546 3547 ata_for_each_dev(dev, link, ENABLED) 3548 cnt++; 3549 return cnt; 3550 } 3551 3552 static int ata_link_nr_vacant(struct ata_link *link) 3553 { 3554 struct ata_device *dev; 3555 int cnt = 0; 3556 3557 ata_for_each_dev(dev, link, ALL) 3558 if (dev->class == ATA_DEV_UNKNOWN) 3559 cnt++; 3560 return cnt; 3561 } 3562 3563 static int ata_eh_skip_recovery(struct ata_link *link) 3564 { 3565 struct ata_port *ap = link->ap; 3566 struct ata_eh_context *ehc = &link->eh_context; 3567 struct ata_device *dev; 3568 3569 /* skip disabled links */ 3570 if (link->flags & ATA_LFLAG_DISABLED) 3571 return 1; 3572 3573 /* skip if explicitly requested */ 3574 if (ehc->i.flags & ATA_EHI_NO_RECOVERY) 3575 return 1; 3576 3577 /* thaw frozen port and recover failed devices */ 3578 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 3579 return 0; 3580 3581 /* reset at least once if reset is requested */ 3582 if ((ehc->i.action & ATA_EH_RESET) && 3583 !(ehc->i.flags & ATA_EHI_DID_RESET)) 3584 return 0; 3585 3586 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 3587 ata_for_each_dev(dev, link, ALL) { 3588 if (dev->class == ATA_DEV_UNKNOWN && 3589 ehc->classes[dev->devno] != ATA_DEV_NONE) 3590 return 0; 3591 } 3592 3593 return 1; 3594 } 3595 3596 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3597 { 3598 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3599 u64 now = get_jiffies_64(); 3600 int *trials = void_arg; 3601 3602 if ((ent->eflags & ATA_EFLAG_OLD_ER) || 3603 (ent->timestamp < now - min(now, interval))) 3604 return -1; 3605 3606 (*trials)++; 3607 return 0; 3608 } 3609 3610 static int ata_eh_schedule_probe(struct ata_device *dev) 3611 { 3612 struct ata_eh_context *ehc = &dev->link->eh_context; 3613 struct ata_link *link = ata_dev_phys_link(dev); 3614 int trials = 0; 3615 3616 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3617 (ehc->did_probe_mask & (1 << dev->devno))) 3618 return 0; 3619 3620 ata_eh_detach_dev(dev); 3621 ata_dev_init(dev); 3622 ehc->did_probe_mask |= (1 << dev->devno); 3623 ehc->i.action |= ATA_EH_RESET; 3624 ehc->saved_xfer_mode[dev->devno] = 0; 3625 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3626 3627 /* the link maybe in a deep sleep, wake it up */ 3628 if (link->lpm_policy > ATA_LPM_MAX_POWER) { 3629 if (ata_is_host_link(link)) 3630 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, 3631 ATA_LPM_EMPTY); 3632 else 3633 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER, 3634 ATA_LPM_EMPTY); 3635 } 3636 3637 /* Record and count probe trials on the ering. The specific 3638 * error mask used is irrelevant. Because a successful device 3639 * detection clears the ering, this count accumulates only if 3640 * there are consecutive failed probes. 3641 * 3642 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3643 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3644 * forced to 1.5Gbps. 3645 * 3646 * This is to work around cases where failed link speed 3647 * negotiation results in device misdetection leading to 3648 * infinite DEVXCHG or PHRDY CHG events. 3649 */ 3650 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3651 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3652 3653 if (trials > ATA_EH_PROBE_TRIALS) 3654 sata_down_spd_limit(link, 1); 3655 3656 return 1; 3657 } 3658 3659 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3660 { 3661 struct ata_eh_context *ehc = &dev->link->eh_context; 3662 3663 /* -EAGAIN from EH routine indicates retry without prejudice. 3664 * The requester is responsible for ensuring forward progress. 3665 */ 3666 if (err != -EAGAIN) 3667 ehc->tries[dev->devno]--; 3668 3669 switch (err) { 3670 case -ENODEV: 3671 /* device missing or wrong IDENTIFY data, schedule probing */ 3672 ehc->i.probe_mask |= (1 << dev->devno); 3673 /* fall through */ 3674 case -EINVAL: 3675 /* give it just one more chance */ 3676 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3677 /* fall through */ 3678 case -EIO: 3679 if (ehc->tries[dev->devno] == 1) { 3680 /* This is the last chance, better to slow 3681 * down than lose it. 3682 */ 3683 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3684 if (dev->pio_mode > XFER_PIO_0) 3685 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3686 } 3687 } 3688 3689 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3690 /* disable device if it has used up all its chances */ 3691 ata_dev_disable(dev); 3692 3693 /* detach if offline */ 3694 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3695 ata_eh_detach_dev(dev); 3696 3697 /* schedule probe if necessary */ 3698 if (ata_eh_schedule_probe(dev)) { 3699 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3700 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3701 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3702 } 3703 3704 return 1; 3705 } else { 3706 ehc->i.action |= ATA_EH_RESET; 3707 return 0; 3708 } 3709 } 3710 3711 /** 3712 * ata_eh_recover - recover host port after error 3713 * @ap: host port to recover 3714 * @prereset: prereset method (can be NULL) 3715 * @softreset: softreset method (can be NULL) 3716 * @hardreset: hardreset method (can be NULL) 3717 * @postreset: postreset method (can be NULL) 3718 * @r_failed_link: out parameter for failed link 3719 * 3720 * This is the alpha and omega, eum and yang, heart and soul of 3721 * libata exception handling. On entry, actions required to 3722 * recover each link and hotplug requests are recorded in the 3723 * link's eh_context. This function executes all the operations 3724 * with appropriate retrials and fallbacks to resurrect failed 3725 * devices, detach goners and greet newcomers. 3726 * 3727 * LOCKING: 3728 * Kernel thread context (may sleep). 3729 * 3730 * RETURNS: 3731 * 0 on success, -errno on failure. 3732 */ 3733 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3734 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3735 ata_postreset_fn_t postreset, 3736 struct ata_link **r_failed_link) 3737 { 3738 struct ata_link *link; 3739 struct ata_device *dev; 3740 int rc, nr_fails; 3741 unsigned long flags, deadline; 3742 3743 DPRINTK("ENTER\n"); 3744 3745 /* prep for recovery */ 3746 ata_for_each_link(link, ap, EDGE) { 3747 struct ata_eh_context *ehc = &link->eh_context; 3748 3749 /* re-enable link? */ 3750 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3751 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3752 spin_lock_irqsave(ap->lock, flags); 3753 link->flags &= ~ATA_LFLAG_DISABLED; 3754 spin_unlock_irqrestore(ap->lock, flags); 3755 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3756 } 3757 3758 ata_for_each_dev(dev, link, ALL) { 3759 if (link->flags & ATA_LFLAG_NO_RETRY) 3760 ehc->tries[dev->devno] = 1; 3761 else 3762 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3763 3764 /* collect port action mask recorded in dev actions */ 3765 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3766 ~ATA_EH_PERDEV_MASK; 3767 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3768 3769 /* process hotplug request */ 3770 if (dev->flags & ATA_DFLAG_DETACH) 3771 ata_eh_detach_dev(dev); 3772 3773 /* schedule probe if necessary */ 3774 if (!ata_dev_enabled(dev)) 3775 ata_eh_schedule_probe(dev); 3776 } 3777 } 3778 3779 retry: 3780 rc = 0; 3781 3782 /* if UNLOADING, finish immediately */ 3783 if (ap->pflags & ATA_PFLAG_UNLOADING) 3784 goto out; 3785 3786 /* prep for EH */ 3787 ata_for_each_link(link, ap, EDGE) { 3788 struct ata_eh_context *ehc = &link->eh_context; 3789 3790 /* skip EH if possible. */ 3791 if (ata_eh_skip_recovery(link)) 3792 ehc->i.action = 0; 3793 3794 ata_for_each_dev(dev, link, ALL) 3795 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3796 } 3797 3798 /* reset */ 3799 ata_for_each_link(link, ap, EDGE) { 3800 struct ata_eh_context *ehc = &link->eh_context; 3801 3802 if (!(ehc->i.action & ATA_EH_RESET)) 3803 continue; 3804 3805 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3806 prereset, softreset, hardreset, postreset); 3807 if (rc) { 3808 ata_link_err(link, "reset failed, giving up\n"); 3809 goto out; 3810 } 3811 } 3812 3813 do { 3814 unsigned long now; 3815 3816 /* 3817 * clears ATA_EH_PARK in eh_info and resets 3818 * ap->park_req_pending 3819 */ 3820 ata_eh_pull_park_action(ap); 3821 3822 deadline = jiffies; 3823 ata_for_each_link(link, ap, EDGE) { 3824 ata_for_each_dev(dev, link, ALL) { 3825 struct ata_eh_context *ehc = &link->eh_context; 3826 unsigned long tmp; 3827 3828 if (dev->class != ATA_DEV_ATA && 3829 dev->class != ATA_DEV_ZAC) 3830 continue; 3831 if (!(ehc->i.dev_action[dev->devno] & 3832 ATA_EH_PARK)) 3833 continue; 3834 tmp = dev->unpark_deadline; 3835 if (time_before(deadline, tmp)) 3836 deadline = tmp; 3837 else if (time_before_eq(tmp, jiffies)) 3838 continue; 3839 if (ehc->unloaded_mask & (1 << dev->devno)) 3840 continue; 3841 3842 ata_eh_park_issue_cmd(dev, 1); 3843 } 3844 } 3845 3846 now = jiffies; 3847 if (time_before_eq(deadline, now)) 3848 break; 3849 3850 ata_eh_release(ap); 3851 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3852 deadline - now); 3853 ata_eh_acquire(ap); 3854 } while (deadline); 3855 ata_for_each_link(link, ap, EDGE) { 3856 ata_for_each_dev(dev, link, ALL) { 3857 if (!(link->eh_context.unloaded_mask & 3858 (1 << dev->devno))) 3859 continue; 3860 3861 ata_eh_park_issue_cmd(dev, 0); 3862 ata_eh_done(link, dev, ATA_EH_PARK); 3863 } 3864 } 3865 3866 /* the rest */ 3867 nr_fails = 0; 3868 ata_for_each_link(link, ap, PMP_FIRST) { 3869 struct ata_eh_context *ehc = &link->eh_context; 3870 3871 if (sata_pmp_attached(ap) && ata_is_host_link(link)) 3872 goto config_lpm; 3873 3874 /* revalidate existing devices and attach new ones */ 3875 rc = ata_eh_revalidate_and_attach(link, &dev); 3876 if (rc) 3877 goto rest_fail; 3878 3879 /* if PMP got attached, return, pmp EH will take care of it */ 3880 if (link->device->class == ATA_DEV_PMP) { 3881 ehc->i.action = 0; 3882 return 0; 3883 } 3884 3885 /* configure transfer mode if necessary */ 3886 if (ehc->i.flags & ATA_EHI_SETMODE) { 3887 rc = ata_set_mode(link, &dev); 3888 if (rc) 3889 goto rest_fail; 3890 ehc->i.flags &= ~ATA_EHI_SETMODE; 3891 } 3892 3893 /* If reset has been issued, clear UA to avoid 3894 * disrupting the current users of the device. 3895 */ 3896 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3897 ata_for_each_dev(dev, link, ALL) { 3898 if (dev->class != ATA_DEV_ATAPI) 3899 continue; 3900 rc = atapi_eh_clear_ua(dev); 3901 if (rc) 3902 goto rest_fail; 3903 if (zpodd_dev_enabled(dev)) 3904 zpodd_post_poweron(dev); 3905 } 3906 } 3907 3908 /* retry flush if necessary */ 3909 ata_for_each_dev(dev, link, ALL) { 3910 if (dev->class != ATA_DEV_ATA && 3911 dev->class != ATA_DEV_ZAC) 3912 continue; 3913 rc = ata_eh_maybe_retry_flush(dev); 3914 if (rc) 3915 goto rest_fail; 3916 } 3917 3918 config_lpm: 3919 /* configure link power saving */ 3920 if (link->lpm_policy != ap->target_lpm_policy) { 3921 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev); 3922 if (rc) 3923 goto rest_fail; 3924 } 3925 3926 /* this link is okay now */ 3927 ehc->i.flags = 0; 3928 continue; 3929 3930 rest_fail: 3931 nr_fails++; 3932 if (dev) 3933 ata_eh_handle_dev_fail(dev, rc); 3934 3935 if (ap->pflags & ATA_PFLAG_FROZEN) { 3936 /* PMP reset requires working host port. 3937 * Can't retry if it's frozen. 3938 */ 3939 if (sata_pmp_attached(ap)) 3940 goto out; 3941 break; 3942 } 3943 } 3944 3945 if (nr_fails) 3946 goto retry; 3947 3948 out: 3949 if (rc && r_failed_link) 3950 *r_failed_link = link; 3951 3952 DPRINTK("EXIT, rc=%d\n", rc); 3953 return rc; 3954 } 3955 3956 /** 3957 * ata_eh_finish - finish up EH 3958 * @ap: host port to finish EH for 3959 * 3960 * Recovery is complete. Clean up EH states and retry or finish 3961 * failed qcs. 3962 * 3963 * LOCKING: 3964 * None. 3965 */ 3966 void ata_eh_finish(struct ata_port *ap) 3967 { 3968 struct ata_queued_cmd *qc; 3969 int tag; 3970 3971 /* retry or finish qcs */ 3972 ata_qc_for_each_raw(ap, qc, tag) { 3973 if (!(qc->flags & ATA_QCFLAG_FAILED)) 3974 continue; 3975 3976 if (qc->err_mask) { 3977 /* FIXME: Once EH migration is complete, 3978 * generate sense data in this function, 3979 * considering both err_mask and tf. 3980 */ 3981 if (qc->flags & ATA_QCFLAG_RETRY) 3982 ata_eh_qc_retry(qc); 3983 else 3984 ata_eh_qc_complete(qc); 3985 } else { 3986 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 3987 ata_eh_qc_complete(qc); 3988 } else { 3989 /* feed zero TF to sense generation */ 3990 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 3991 ata_eh_qc_retry(qc); 3992 } 3993 } 3994 } 3995 3996 /* make sure nr_active_links is zero after EH */ 3997 WARN_ON(ap->nr_active_links); 3998 ap->nr_active_links = 0; 3999 } 4000 4001 /** 4002 * ata_do_eh - do standard error handling 4003 * @ap: host port to handle error for 4004 * 4005 * @prereset: prereset method (can be NULL) 4006 * @softreset: softreset method (can be NULL) 4007 * @hardreset: hardreset method (can be NULL) 4008 * @postreset: postreset method (can be NULL) 4009 * 4010 * Perform standard error handling sequence. 4011 * 4012 * LOCKING: 4013 * Kernel thread context (may sleep). 4014 */ 4015 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 4016 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 4017 ata_postreset_fn_t postreset) 4018 { 4019 struct ata_device *dev; 4020 int rc; 4021 4022 ata_eh_autopsy(ap); 4023 ata_eh_report(ap); 4024 4025 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 4026 NULL); 4027 if (rc) { 4028 ata_for_each_dev(dev, &ap->link, ALL) 4029 ata_dev_disable(dev); 4030 } 4031 4032 ata_eh_finish(ap); 4033 } 4034 4035 /** 4036 * ata_std_error_handler - standard error handler 4037 * @ap: host port to handle error for 4038 * 4039 * Standard error handler 4040 * 4041 * LOCKING: 4042 * Kernel thread context (may sleep). 4043 */ 4044 void ata_std_error_handler(struct ata_port *ap) 4045 { 4046 struct ata_port_operations *ops = ap->ops; 4047 ata_reset_fn_t hardreset = ops->hardreset; 4048 4049 /* ignore built-in hardreset if SCR access is not available */ 4050 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link)) 4051 hardreset = NULL; 4052 4053 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 4054 } 4055 4056 #ifdef CONFIG_PM 4057 /** 4058 * ata_eh_handle_port_suspend - perform port suspend operation 4059 * @ap: port to suspend 4060 * 4061 * Suspend @ap. 4062 * 4063 * LOCKING: 4064 * Kernel thread context (may sleep). 4065 */ 4066 static void ata_eh_handle_port_suspend(struct ata_port *ap) 4067 { 4068 unsigned long flags; 4069 int rc = 0; 4070 struct ata_device *dev; 4071 4072 /* are we suspending? */ 4073 spin_lock_irqsave(ap->lock, flags); 4074 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4075 ap->pm_mesg.event & PM_EVENT_RESUME) { 4076 spin_unlock_irqrestore(ap->lock, flags); 4077 return; 4078 } 4079 spin_unlock_irqrestore(ap->lock, flags); 4080 4081 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 4082 4083 /* 4084 * If we have a ZPODD attached, check its zero 4085 * power ready status before the port is frozen. 4086 * Only needed for runtime suspend. 4087 */ 4088 if (PMSG_IS_AUTO(ap->pm_mesg)) { 4089 ata_for_each_dev(dev, &ap->link, ENABLED) { 4090 if (zpodd_dev_enabled(dev)) 4091 zpodd_on_suspend(dev); 4092 } 4093 } 4094 4095 /* tell ACPI we're suspending */ 4096 rc = ata_acpi_on_suspend(ap); 4097 if (rc) 4098 goto out; 4099 4100 /* suspend */ 4101 ata_eh_freeze_port(ap); 4102 4103 if (ap->ops->port_suspend) 4104 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 4105 4106 ata_acpi_set_state(ap, ap->pm_mesg); 4107 out: 4108 /* update the flags */ 4109 spin_lock_irqsave(ap->lock, flags); 4110 4111 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 4112 if (rc == 0) 4113 ap->pflags |= ATA_PFLAG_SUSPENDED; 4114 else if (ap->pflags & ATA_PFLAG_FROZEN) 4115 ata_port_schedule_eh(ap); 4116 4117 spin_unlock_irqrestore(ap->lock, flags); 4118 4119 return; 4120 } 4121 4122 /** 4123 * ata_eh_handle_port_resume - perform port resume operation 4124 * @ap: port to resume 4125 * 4126 * Resume @ap. 4127 * 4128 * LOCKING: 4129 * Kernel thread context (may sleep). 4130 */ 4131 static void ata_eh_handle_port_resume(struct ata_port *ap) 4132 { 4133 struct ata_link *link; 4134 struct ata_device *dev; 4135 unsigned long flags; 4136 4137 /* are we resuming? */ 4138 spin_lock_irqsave(ap->lock, flags); 4139 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4140 !(ap->pm_mesg.event & PM_EVENT_RESUME)) { 4141 spin_unlock_irqrestore(ap->lock, flags); 4142 return; 4143 } 4144 spin_unlock_irqrestore(ap->lock, flags); 4145 4146 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 4147 4148 /* 4149 * Error timestamps are in jiffies which doesn't run while 4150 * suspended and PHY events during resume isn't too uncommon. 4151 * When the two are combined, it can lead to unnecessary speed 4152 * downs if the machine is suspended and resumed repeatedly. 4153 * Clear error history. 4154 */ 4155 ata_for_each_link(link, ap, HOST_FIRST) 4156 ata_for_each_dev(dev, link, ALL) 4157 ata_ering_clear(&dev->ering); 4158 4159 ata_acpi_set_state(ap, ap->pm_mesg); 4160 4161 if (ap->ops->port_resume) 4162 ap->ops->port_resume(ap); 4163 4164 /* tell ACPI that we're resuming */ 4165 ata_acpi_on_resume(ap); 4166 4167 /* update the flags */ 4168 spin_lock_irqsave(ap->lock, flags); 4169 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 4170 spin_unlock_irqrestore(ap->lock, flags); 4171 } 4172 #endif /* CONFIG_PM */ 4173