1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Tejun Heo <tj@kernel.org> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/driver-api/libata.rst 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/blkdev.h> 37 #include <linux/export.h> 38 #include <linux/pci.h> 39 #include <scsi/scsi.h> 40 #include <scsi/scsi_host.h> 41 #include <scsi/scsi_eh.h> 42 #include <scsi/scsi_device.h> 43 #include <scsi/scsi_cmnd.h> 44 #include <scsi/scsi_dbg.h> 45 #include "../scsi/scsi_transport_api.h" 46 47 #include <linux/libata.h> 48 49 #include <trace/events/libata.h> 50 #include "libata.h" 51 52 enum { 53 /* speed down verdicts */ 54 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 55 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 56 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 57 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 58 59 /* error flags */ 60 ATA_EFLAG_IS_IO = (1 << 0), 61 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 62 ATA_EFLAG_OLD_ER = (1 << 31), 63 64 /* error categories */ 65 ATA_ECAT_NONE = 0, 66 ATA_ECAT_ATA_BUS = 1, 67 ATA_ECAT_TOUT_HSM = 2, 68 ATA_ECAT_UNK_DEV = 3, 69 ATA_ECAT_DUBIOUS_NONE = 4, 70 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 71 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 72 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 73 ATA_ECAT_NR = 8, 74 75 ATA_EH_CMD_DFL_TIMEOUT = 5000, 76 77 /* always put at least this amount of time between resets */ 78 ATA_EH_RESET_COOL_DOWN = 5000, 79 80 /* Waiting in ->prereset can never be reliable. It's 81 * sometimes nice to wait there but it can't be depended upon; 82 * otherwise, we wouldn't be resetting. Just give it enough 83 * time for most drives to spin up. 84 */ 85 ATA_EH_PRERESET_TIMEOUT = 10000, 86 ATA_EH_FASTDRAIN_INTERVAL = 3000, 87 88 ATA_EH_UA_TRIES = 5, 89 90 /* probe speed down parameters, see ata_eh_schedule_probe() */ 91 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 92 ATA_EH_PROBE_TRIALS = 2, 93 }; 94 95 /* The following table determines how we sequence resets. Each entry 96 * represents timeout for that try. The first try can be soft or 97 * hardreset. All others are hardreset if available. In most cases 98 * the first reset w/ 10sec timeout should succeed. Following entries 99 * are mostly for error handling, hotplug and those outlier devices that 100 * take an exceptionally long time to recover from reset. 101 */ 102 static const unsigned long ata_eh_reset_timeouts[] = { 103 10000, /* most drives spin up by 10sec */ 104 10000, /* > 99% working drives spin up before 20sec */ 105 35000, /* give > 30 secs of idleness for outlier devices */ 106 5000, /* and sweet one last chance */ 107 ULONG_MAX, /* > 1 min has elapsed, give up */ 108 }; 109 110 static const unsigned long ata_eh_identify_timeouts[] = { 111 5000, /* covers > 99% of successes and not too boring on failures */ 112 10000, /* combined time till here is enough even for media access */ 113 30000, /* for true idiots */ 114 ULONG_MAX, 115 }; 116 117 static const unsigned long ata_eh_flush_timeouts[] = { 118 15000, /* be generous with flush */ 119 15000, /* ditto */ 120 30000, /* and even more generous */ 121 ULONG_MAX, 122 }; 123 124 static const unsigned long ata_eh_other_timeouts[] = { 125 5000, /* same rationale as identify timeout */ 126 10000, /* ditto */ 127 /* but no merciful 30sec for other commands, it just isn't worth it */ 128 ULONG_MAX, 129 }; 130 131 struct ata_eh_cmd_timeout_ent { 132 const u8 *commands; 133 const unsigned long *timeouts; 134 }; 135 136 /* The following table determines timeouts to use for EH internal 137 * commands. Each table entry is a command class and matches the 138 * commands the entry applies to and the timeout table to use. 139 * 140 * On the retry after a command timed out, the next timeout value from 141 * the table is used. If the table doesn't contain further entries, 142 * the last value is used. 143 * 144 * ehc->cmd_timeout_idx keeps track of which timeout to use per 145 * command class, so if SET_FEATURES times out on the first try, the 146 * next try will use the second timeout value only for that class. 147 */ 148 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 149 static const struct ata_eh_cmd_timeout_ent 150 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 151 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 152 .timeouts = ata_eh_identify_timeouts, }, 153 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 154 .timeouts = ata_eh_other_timeouts, }, 155 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 156 .timeouts = ata_eh_other_timeouts, }, 157 { .commands = CMDS(ATA_CMD_SET_FEATURES), 158 .timeouts = ata_eh_other_timeouts, }, 159 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 160 .timeouts = ata_eh_other_timeouts, }, 161 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), 162 .timeouts = ata_eh_flush_timeouts }, 163 }; 164 #undef CMDS 165 166 static void __ata_port_freeze(struct ata_port *ap); 167 #ifdef CONFIG_PM 168 static void ata_eh_handle_port_suspend(struct ata_port *ap); 169 static void ata_eh_handle_port_resume(struct ata_port *ap); 170 #else /* CONFIG_PM */ 171 static void ata_eh_handle_port_suspend(struct ata_port *ap) 172 { } 173 174 static void ata_eh_handle_port_resume(struct ata_port *ap) 175 { } 176 #endif /* CONFIG_PM */ 177 178 static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, 179 const char *fmt, va_list args) 180 { 181 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 182 ATA_EH_DESC_LEN - ehi->desc_len, 183 fmt, args); 184 } 185 186 /** 187 * __ata_ehi_push_desc - push error description without adding separator 188 * @ehi: target EHI 189 * @fmt: printf format string 190 * 191 * Format string according to @fmt and append it to @ehi->desc. 192 * 193 * LOCKING: 194 * spin_lock_irqsave(host lock) 195 */ 196 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 197 { 198 va_list args; 199 200 va_start(args, fmt); 201 __ata_ehi_pushv_desc(ehi, fmt, args); 202 va_end(args); 203 } 204 205 /** 206 * ata_ehi_push_desc - push error description with separator 207 * @ehi: target EHI 208 * @fmt: printf format string 209 * 210 * Format string according to @fmt and append it to @ehi->desc. 211 * If @ehi->desc is not empty, ", " is added in-between. 212 * 213 * LOCKING: 214 * spin_lock_irqsave(host lock) 215 */ 216 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 217 { 218 va_list args; 219 220 if (ehi->desc_len) 221 __ata_ehi_push_desc(ehi, ", "); 222 223 va_start(args, fmt); 224 __ata_ehi_pushv_desc(ehi, fmt, args); 225 va_end(args); 226 } 227 228 /** 229 * ata_ehi_clear_desc - clean error description 230 * @ehi: target EHI 231 * 232 * Clear @ehi->desc. 233 * 234 * LOCKING: 235 * spin_lock_irqsave(host lock) 236 */ 237 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 238 { 239 ehi->desc[0] = '\0'; 240 ehi->desc_len = 0; 241 } 242 243 /** 244 * ata_port_desc - append port description 245 * @ap: target ATA port 246 * @fmt: printf format string 247 * 248 * Format string according to @fmt and append it to port 249 * description. If port description is not empty, " " is added 250 * in-between. This function is to be used while initializing 251 * ata_host. The description is printed on host registration. 252 * 253 * LOCKING: 254 * None. 255 */ 256 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 257 { 258 va_list args; 259 260 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 261 262 if (ap->link.eh_info.desc_len) 263 __ata_ehi_push_desc(&ap->link.eh_info, " "); 264 265 va_start(args, fmt); 266 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 267 va_end(args); 268 } 269 270 #ifdef CONFIG_PCI 271 272 /** 273 * ata_port_pbar_desc - append PCI BAR description 274 * @ap: target ATA port 275 * @bar: target PCI BAR 276 * @offset: offset into PCI BAR 277 * @name: name of the area 278 * 279 * If @offset is negative, this function formats a string which 280 * contains the name, address, size and type of the BAR and 281 * appends it to the port description. If @offset is zero or 282 * positive, only name and offsetted address is appended. 283 * 284 * LOCKING: 285 * None. 286 */ 287 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 288 const char *name) 289 { 290 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 291 char *type = ""; 292 unsigned long long start, len; 293 294 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 295 type = "m"; 296 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 297 type = "i"; 298 299 start = (unsigned long long)pci_resource_start(pdev, bar); 300 len = (unsigned long long)pci_resource_len(pdev, bar); 301 302 if (offset < 0) 303 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 304 else 305 ata_port_desc(ap, "%s 0x%llx", name, 306 start + (unsigned long long)offset); 307 } 308 309 #endif /* CONFIG_PCI */ 310 311 static int ata_lookup_timeout_table(u8 cmd) 312 { 313 int i; 314 315 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 316 const u8 *cur; 317 318 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 319 if (*cur == cmd) 320 return i; 321 } 322 323 return -1; 324 } 325 326 /** 327 * ata_internal_cmd_timeout - determine timeout for an internal command 328 * @dev: target device 329 * @cmd: internal command to be issued 330 * 331 * Determine timeout for internal command @cmd for @dev. 332 * 333 * LOCKING: 334 * EH context. 335 * 336 * RETURNS: 337 * Determined timeout. 338 */ 339 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 340 { 341 struct ata_eh_context *ehc = &dev->link->eh_context; 342 int ent = ata_lookup_timeout_table(cmd); 343 int idx; 344 345 if (ent < 0) 346 return ATA_EH_CMD_DFL_TIMEOUT; 347 348 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 349 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 350 } 351 352 /** 353 * ata_internal_cmd_timed_out - notification for internal command timeout 354 * @dev: target device 355 * @cmd: internal command which timed out 356 * 357 * Notify EH that internal command @cmd for @dev timed out. This 358 * function should be called only for commands whose timeouts are 359 * determined using ata_internal_cmd_timeout(). 360 * 361 * LOCKING: 362 * EH context. 363 */ 364 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 365 { 366 struct ata_eh_context *ehc = &dev->link->eh_context; 367 int ent = ata_lookup_timeout_table(cmd); 368 int idx; 369 370 if (ent < 0) 371 return; 372 373 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 374 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 375 ehc->cmd_timeout_idx[dev->devno][ent]++; 376 } 377 378 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 379 unsigned int err_mask) 380 { 381 struct ata_ering_entry *ent; 382 383 WARN_ON(!err_mask); 384 385 ering->cursor++; 386 ering->cursor %= ATA_ERING_SIZE; 387 388 ent = &ering->ring[ering->cursor]; 389 ent->eflags = eflags; 390 ent->err_mask = err_mask; 391 ent->timestamp = get_jiffies_64(); 392 } 393 394 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 395 { 396 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 397 398 if (ent->err_mask) 399 return ent; 400 return NULL; 401 } 402 403 int ata_ering_map(struct ata_ering *ering, 404 int (*map_fn)(struct ata_ering_entry *, void *), 405 void *arg) 406 { 407 int idx, rc = 0; 408 struct ata_ering_entry *ent; 409 410 idx = ering->cursor; 411 do { 412 ent = &ering->ring[idx]; 413 if (!ent->err_mask) 414 break; 415 rc = map_fn(ent, arg); 416 if (rc) 417 break; 418 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 419 } while (idx != ering->cursor); 420 421 return rc; 422 } 423 424 static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) 425 { 426 ent->eflags |= ATA_EFLAG_OLD_ER; 427 return 0; 428 } 429 430 static void ata_ering_clear(struct ata_ering *ering) 431 { 432 ata_ering_map(ering, ata_ering_clear_cb, NULL); 433 } 434 435 static unsigned int ata_eh_dev_action(struct ata_device *dev) 436 { 437 struct ata_eh_context *ehc = &dev->link->eh_context; 438 439 return ehc->i.action | ehc->i.dev_action[dev->devno]; 440 } 441 442 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 443 struct ata_eh_info *ehi, unsigned int action) 444 { 445 struct ata_device *tdev; 446 447 if (!dev) { 448 ehi->action &= ~action; 449 ata_for_each_dev(tdev, link, ALL) 450 ehi->dev_action[tdev->devno] &= ~action; 451 } else { 452 /* doesn't make sense for port-wide EH actions */ 453 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 454 455 /* break ehi->action into ehi->dev_action */ 456 if (ehi->action & action) { 457 ata_for_each_dev(tdev, link, ALL) 458 ehi->dev_action[tdev->devno] |= 459 ehi->action & action; 460 ehi->action &= ~action; 461 } 462 463 /* turn off the specified per-dev action */ 464 ehi->dev_action[dev->devno] &= ~action; 465 } 466 } 467 468 /** 469 * ata_eh_acquire - acquire EH ownership 470 * @ap: ATA port to acquire EH ownership for 471 * 472 * Acquire EH ownership for @ap. This is the basic exclusion 473 * mechanism for ports sharing a host. Only one port hanging off 474 * the same host can claim the ownership of EH. 475 * 476 * LOCKING: 477 * EH context. 478 */ 479 void ata_eh_acquire(struct ata_port *ap) 480 { 481 mutex_lock(&ap->host->eh_mutex); 482 WARN_ON_ONCE(ap->host->eh_owner); 483 ap->host->eh_owner = current; 484 } 485 486 /** 487 * ata_eh_release - release EH ownership 488 * @ap: ATA port to release EH ownership for 489 * 490 * Release EH ownership for @ap if the caller. The caller must 491 * have acquired EH ownership using ata_eh_acquire() previously. 492 * 493 * LOCKING: 494 * EH context. 495 */ 496 void ata_eh_release(struct ata_port *ap) 497 { 498 WARN_ON_ONCE(ap->host->eh_owner != current); 499 ap->host->eh_owner = NULL; 500 mutex_unlock(&ap->host->eh_mutex); 501 } 502 503 static void ata_eh_unload(struct ata_port *ap) 504 { 505 struct ata_link *link; 506 struct ata_device *dev; 507 unsigned long flags; 508 509 /* Restore SControl IPM and SPD for the next driver and 510 * disable attached devices. 511 */ 512 ata_for_each_link(link, ap, PMP_FIRST) { 513 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 514 ata_for_each_dev(dev, link, ALL) 515 ata_dev_disable(dev); 516 } 517 518 /* freeze and set UNLOADED */ 519 spin_lock_irqsave(ap->lock, flags); 520 521 ata_port_freeze(ap); /* won't be thawed */ 522 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 523 ap->pflags |= ATA_PFLAG_UNLOADED; 524 525 spin_unlock_irqrestore(ap->lock, flags); 526 } 527 528 /** 529 * ata_scsi_error - SCSI layer error handler callback 530 * @host: SCSI host on which error occurred 531 * 532 * Handles SCSI-layer-thrown error events. 533 * 534 * LOCKING: 535 * Inherited from SCSI layer (none, can sleep) 536 * 537 * RETURNS: 538 * Zero. 539 */ 540 void ata_scsi_error(struct Scsi_Host *host) 541 { 542 struct ata_port *ap = ata_shost_to_port(host); 543 unsigned long flags; 544 LIST_HEAD(eh_work_q); 545 546 DPRINTK("ENTER\n"); 547 548 spin_lock_irqsave(host->host_lock, flags); 549 list_splice_init(&host->eh_cmd_q, &eh_work_q); 550 spin_unlock_irqrestore(host->host_lock, flags); 551 552 ata_scsi_cmd_error_handler(host, ap, &eh_work_q); 553 554 /* If we timed raced normal completion and there is nothing to 555 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 556 ata_scsi_port_error_handler(host, ap); 557 558 /* finish or retry handled scmd's and clean up */ 559 WARN_ON(!list_empty(&eh_work_q)); 560 561 DPRINTK("EXIT\n"); 562 } 563 564 /** 565 * ata_scsi_cmd_error_handler - error callback for a list of commands 566 * @host: scsi host containing the port 567 * @ap: ATA port within the host 568 * @eh_work_q: list of commands to process 569 * 570 * process the given list of commands and return those finished to the 571 * ap->eh_done_q. This function is the first part of the libata error 572 * handler which processes a given list of failed commands. 573 */ 574 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, 575 struct list_head *eh_work_q) 576 { 577 int i; 578 unsigned long flags; 579 580 /* make sure sff pio task is not running */ 581 ata_sff_flush_pio_task(ap); 582 583 /* synchronize with host lock and sort out timeouts */ 584 585 /* For new EH, all qcs are finished in one of three ways - 586 * normal completion, error completion, and SCSI timeout. 587 * Both completions can race against SCSI timeout. When normal 588 * completion wins, the qc never reaches EH. When error 589 * completion wins, the qc has ATA_QCFLAG_FAILED set. 590 * 591 * When SCSI timeout wins, things are a bit more complex. 592 * Normal or error completion can occur after the timeout but 593 * before this point. In such cases, both types of 594 * completions are honored. A scmd is determined to have 595 * timed out iff its associated qc is active and not failed. 596 */ 597 spin_lock_irqsave(ap->lock, flags); 598 if (ap->ops->error_handler) { 599 struct scsi_cmnd *scmd, *tmp; 600 int nr_timedout = 0; 601 602 /* This must occur under the ap->lock as we don't want 603 a polled recovery to race the real interrupt handler 604 605 The lost_interrupt handler checks for any completed but 606 non-notified command and completes much like an IRQ handler. 607 608 We then fall into the error recovery code which will treat 609 this as if normal completion won the race */ 610 611 if (ap->ops->lost_interrupt) 612 ap->ops->lost_interrupt(ap); 613 614 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { 615 struct ata_queued_cmd *qc; 616 617 for (i = 0; i < ATA_MAX_QUEUE; i++) { 618 qc = __ata_qc_from_tag(ap, i); 619 if (qc->flags & ATA_QCFLAG_ACTIVE && 620 qc->scsicmd == scmd) 621 break; 622 } 623 624 if (i < ATA_MAX_QUEUE) { 625 /* the scmd has an associated qc */ 626 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 627 /* which hasn't failed yet, timeout */ 628 qc->err_mask |= AC_ERR_TIMEOUT; 629 qc->flags |= ATA_QCFLAG_FAILED; 630 nr_timedout++; 631 } 632 } else { 633 /* Normal completion occurred after 634 * SCSI timeout but before this point. 635 * Successfully complete it. 636 */ 637 scmd->retries = scmd->allowed; 638 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 639 } 640 } 641 642 /* If we have timed out qcs. They belong to EH from 643 * this point but the state of the controller is 644 * unknown. Freeze the port to make sure the IRQ 645 * handler doesn't diddle with those qcs. This must 646 * be done atomically w.r.t. setting QCFLAG_FAILED. 647 */ 648 if (nr_timedout) 649 __ata_port_freeze(ap); 650 651 652 /* initialize eh_tries */ 653 ap->eh_tries = ATA_EH_MAX_TRIES; 654 } 655 spin_unlock_irqrestore(ap->lock, flags); 656 657 } 658 EXPORT_SYMBOL(ata_scsi_cmd_error_handler); 659 660 /** 661 * ata_scsi_port_error_handler - recover the port after the commands 662 * @host: SCSI host containing the port 663 * @ap: the ATA port 664 * 665 * Handle the recovery of the port @ap after all the commands 666 * have been recovered. 667 */ 668 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) 669 { 670 unsigned long flags; 671 672 /* invoke error handler */ 673 if (ap->ops->error_handler) { 674 struct ata_link *link; 675 676 /* acquire EH ownership */ 677 ata_eh_acquire(ap); 678 repeat: 679 /* kill fast drain timer */ 680 del_timer_sync(&ap->fastdrain_timer); 681 682 /* process port resume request */ 683 ata_eh_handle_port_resume(ap); 684 685 /* fetch & clear EH info */ 686 spin_lock_irqsave(ap->lock, flags); 687 688 ata_for_each_link(link, ap, HOST_FIRST) { 689 struct ata_eh_context *ehc = &link->eh_context; 690 struct ata_device *dev; 691 692 memset(&link->eh_context, 0, sizeof(link->eh_context)); 693 link->eh_context.i = link->eh_info; 694 memset(&link->eh_info, 0, sizeof(link->eh_info)); 695 696 ata_for_each_dev(dev, link, ENABLED) { 697 int devno = dev->devno; 698 699 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 700 if (ata_ncq_enabled(dev)) 701 ehc->saved_ncq_enabled |= 1 << devno; 702 } 703 } 704 705 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 706 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 707 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 708 709 spin_unlock_irqrestore(ap->lock, flags); 710 711 /* invoke EH, skip if unloading or suspended */ 712 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 713 ap->ops->error_handler(ap); 714 else { 715 /* if unloading, commence suicide */ 716 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 717 !(ap->pflags & ATA_PFLAG_UNLOADED)) 718 ata_eh_unload(ap); 719 ata_eh_finish(ap); 720 } 721 722 /* process port suspend request */ 723 ata_eh_handle_port_suspend(ap); 724 725 /* Exception might have happened after ->error_handler 726 * recovered the port but before this point. Repeat 727 * EH in such case. 728 */ 729 spin_lock_irqsave(ap->lock, flags); 730 731 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 732 if (--ap->eh_tries) { 733 spin_unlock_irqrestore(ap->lock, flags); 734 goto repeat; 735 } 736 ata_port_err(ap, 737 "EH pending after %d tries, giving up\n", 738 ATA_EH_MAX_TRIES); 739 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 740 } 741 742 /* this run is complete, make sure EH info is clear */ 743 ata_for_each_link(link, ap, HOST_FIRST) 744 memset(&link->eh_info, 0, sizeof(link->eh_info)); 745 746 /* end eh (clear host_eh_scheduled) while holding 747 * ap->lock such that if exception occurs after this 748 * point but before EH completion, SCSI midlayer will 749 * re-initiate EH. 750 */ 751 ap->ops->end_eh(ap); 752 753 spin_unlock_irqrestore(ap->lock, flags); 754 ata_eh_release(ap); 755 } else { 756 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 757 ap->ops->eng_timeout(ap); 758 } 759 760 scsi_eh_flush_done_q(&ap->eh_done_q); 761 762 /* clean up */ 763 spin_lock_irqsave(ap->lock, flags); 764 765 if (ap->pflags & ATA_PFLAG_LOADING) 766 ap->pflags &= ~ATA_PFLAG_LOADING; 767 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && 768 !(ap->flags & ATA_FLAG_SAS_HOST)) 769 schedule_delayed_work(&ap->hotplug_task, 0); 770 771 if (ap->pflags & ATA_PFLAG_RECOVERED) 772 ata_port_info(ap, "EH complete\n"); 773 774 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 775 776 /* tell wait_eh that we're done */ 777 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 778 wake_up_all(&ap->eh_wait_q); 779 780 spin_unlock_irqrestore(ap->lock, flags); 781 } 782 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler); 783 784 /** 785 * ata_port_wait_eh - Wait for the currently pending EH to complete 786 * @ap: Port to wait EH for 787 * 788 * Wait until the currently pending EH is complete. 789 * 790 * LOCKING: 791 * Kernel thread context (may sleep). 792 */ 793 void ata_port_wait_eh(struct ata_port *ap) 794 { 795 unsigned long flags; 796 DEFINE_WAIT(wait); 797 798 retry: 799 spin_lock_irqsave(ap->lock, flags); 800 801 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 802 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 803 spin_unlock_irqrestore(ap->lock, flags); 804 schedule(); 805 spin_lock_irqsave(ap->lock, flags); 806 } 807 finish_wait(&ap->eh_wait_q, &wait); 808 809 spin_unlock_irqrestore(ap->lock, flags); 810 811 /* make sure SCSI EH is complete */ 812 if (scsi_host_in_recovery(ap->scsi_host)) { 813 ata_msleep(ap, 10); 814 goto retry; 815 } 816 } 817 EXPORT_SYMBOL_GPL(ata_port_wait_eh); 818 819 static int ata_eh_nr_in_flight(struct ata_port *ap) 820 { 821 unsigned int tag; 822 int nr = 0; 823 824 /* count only non-internal commands */ 825 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 826 if (ata_tag_internal(tag)) 827 continue; 828 if (ata_qc_from_tag(ap, tag)) 829 nr++; 830 } 831 832 return nr; 833 } 834 835 void ata_eh_fastdrain_timerfn(struct timer_list *t) 836 { 837 struct ata_port *ap = from_timer(ap, t, fastdrain_timer); 838 unsigned long flags; 839 int cnt; 840 841 spin_lock_irqsave(ap->lock, flags); 842 843 cnt = ata_eh_nr_in_flight(ap); 844 845 /* are we done? */ 846 if (!cnt) 847 goto out_unlock; 848 849 if (cnt == ap->fastdrain_cnt) { 850 unsigned int tag; 851 852 /* No progress during the last interval, tag all 853 * in-flight qcs as timed out and freeze the port. 854 */ 855 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 856 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 857 if (qc) 858 qc->err_mask |= AC_ERR_TIMEOUT; 859 } 860 861 ata_port_freeze(ap); 862 } else { 863 /* some qcs have finished, give it another chance */ 864 ap->fastdrain_cnt = cnt; 865 ap->fastdrain_timer.expires = 866 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 867 add_timer(&ap->fastdrain_timer); 868 } 869 870 out_unlock: 871 spin_unlock_irqrestore(ap->lock, flags); 872 } 873 874 /** 875 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 876 * @ap: target ATA port 877 * @fastdrain: activate fast drain 878 * 879 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 880 * is non-zero and EH wasn't pending before. Fast drain ensures 881 * that EH kicks in in timely manner. 882 * 883 * LOCKING: 884 * spin_lock_irqsave(host lock) 885 */ 886 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 887 { 888 int cnt; 889 890 /* already scheduled? */ 891 if (ap->pflags & ATA_PFLAG_EH_PENDING) 892 return; 893 894 ap->pflags |= ATA_PFLAG_EH_PENDING; 895 896 if (!fastdrain) 897 return; 898 899 /* do we have in-flight qcs? */ 900 cnt = ata_eh_nr_in_flight(ap); 901 if (!cnt) 902 return; 903 904 /* activate fast drain */ 905 ap->fastdrain_cnt = cnt; 906 ap->fastdrain_timer.expires = 907 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 908 add_timer(&ap->fastdrain_timer); 909 } 910 911 /** 912 * ata_qc_schedule_eh - schedule qc for error handling 913 * @qc: command to schedule error handling for 914 * 915 * Schedule error handling for @qc. EH will kick in as soon as 916 * other commands are drained. 917 * 918 * LOCKING: 919 * spin_lock_irqsave(host lock) 920 */ 921 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 922 { 923 struct ata_port *ap = qc->ap; 924 struct request_queue *q = qc->scsicmd->device->request_queue; 925 unsigned long flags; 926 927 WARN_ON(!ap->ops->error_handler); 928 929 qc->flags |= ATA_QCFLAG_FAILED; 930 ata_eh_set_pending(ap, 1); 931 932 /* The following will fail if timeout has already expired. 933 * ata_scsi_error() takes care of such scmds on EH entry. 934 * Note that ATA_QCFLAG_FAILED is unconditionally set after 935 * this function completes. 936 */ 937 spin_lock_irqsave(q->queue_lock, flags); 938 blk_abort_request(qc->scsicmd->request); 939 spin_unlock_irqrestore(q->queue_lock, flags); 940 } 941 942 /** 943 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine 944 * @ap: ATA port to schedule EH for 945 * 946 * LOCKING: inherited from ata_port_schedule_eh 947 * spin_lock_irqsave(host lock) 948 */ 949 void ata_std_sched_eh(struct ata_port *ap) 950 { 951 WARN_ON(!ap->ops->error_handler); 952 953 if (ap->pflags & ATA_PFLAG_INITIALIZING) 954 return; 955 956 ata_eh_set_pending(ap, 1); 957 scsi_schedule_eh(ap->scsi_host); 958 959 DPRINTK("port EH scheduled\n"); 960 } 961 EXPORT_SYMBOL_GPL(ata_std_sched_eh); 962 963 /** 964 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine 965 * @ap: ATA port to end EH for 966 * 967 * In the libata object model there is a 1:1 mapping of ata_port to 968 * shost, so host fields can be directly manipulated under ap->lock, in 969 * the libsas case we need to hold a lock at the ha->level to coordinate 970 * these events. 971 * 972 * LOCKING: 973 * spin_lock_irqsave(host lock) 974 */ 975 void ata_std_end_eh(struct ata_port *ap) 976 { 977 struct Scsi_Host *host = ap->scsi_host; 978 979 host->host_eh_scheduled = 0; 980 } 981 EXPORT_SYMBOL(ata_std_end_eh); 982 983 984 /** 985 * ata_port_schedule_eh - schedule error handling without a qc 986 * @ap: ATA port to schedule EH for 987 * 988 * Schedule error handling for @ap. EH will kick in as soon as 989 * all commands are drained. 990 * 991 * LOCKING: 992 * spin_lock_irqsave(host lock) 993 */ 994 void ata_port_schedule_eh(struct ata_port *ap) 995 { 996 /* see: ata_std_sched_eh, unless you know better */ 997 ap->ops->sched_eh(ap); 998 } 999 1000 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 1001 { 1002 int tag, nr_aborted = 0; 1003 1004 WARN_ON(!ap->ops->error_handler); 1005 1006 /* we're gonna abort all commands, no need for fast drain */ 1007 ata_eh_set_pending(ap, 0); 1008 1009 /* include internal tag in iteration */ 1010 for (tag = 0; tag <= ATA_MAX_QUEUE; tag++) { 1011 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 1012 1013 if (qc && (!link || qc->dev->link == link)) { 1014 qc->flags |= ATA_QCFLAG_FAILED; 1015 ata_qc_complete(qc); 1016 nr_aborted++; 1017 } 1018 } 1019 1020 if (!nr_aborted) 1021 ata_port_schedule_eh(ap); 1022 1023 return nr_aborted; 1024 } 1025 1026 /** 1027 * ata_link_abort - abort all qc's on the link 1028 * @link: ATA link to abort qc's for 1029 * 1030 * Abort all active qc's active on @link and schedule EH. 1031 * 1032 * LOCKING: 1033 * spin_lock_irqsave(host lock) 1034 * 1035 * RETURNS: 1036 * Number of aborted qc's. 1037 */ 1038 int ata_link_abort(struct ata_link *link) 1039 { 1040 return ata_do_link_abort(link->ap, link); 1041 } 1042 1043 /** 1044 * ata_port_abort - abort all qc's on the port 1045 * @ap: ATA port to abort qc's for 1046 * 1047 * Abort all active qc's of @ap and schedule EH. 1048 * 1049 * LOCKING: 1050 * spin_lock_irqsave(host_set lock) 1051 * 1052 * RETURNS: 1053 * Number of aborted qc's. 1054 */ 1055 int ata_port_abort(struct ata_port *ap) 1056 { 1057 return ata_do_link_abort(ap, NULL); 1058 } 1059 1060 /** 1061 * __ata_port_freeze - freeze port 1062 * @ap: ATA port to freeze 1063 * 1064 * This function is called when HSM violation or some other 1065 * condition disrupts normal operation of the port. Frozen port 1066 * is not allowed to perform any operation until the port is 1067 * thawed, which usually follows a successful reset. 1068 * 1069 * ap->ops->freeze() callback can be used for freezing the port 1070 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 1071 * port cannot be frozen hardware-wise, the interrupt handler 1072 * must ack and clear interrupts unconditionally while the port 1073 * is frozen. 1074 * 1075 * LOCKING: 1076 * spin_lock_irqsave(host lock) 1077 */ 1078 static void __ata_port_freeze(struct ata_port *ap) 1079 { 1080 WARN_ON(!ap->ops->error_handler); 1081 1082 if (ap->ops->freeze) 1083 ap->ops->freeze(ap); 1084 1085 ap->pflags |= ATA_PFLAG_FROZEN; 1086 1087 DPRINTK("ata%u port frozen\n", ap->print_id); 1088 } 1089 1090 /** 1091 * ata_port_freeze - abort & freeze port 1092 * @ap: ATA port to freeze 1093 * 1094 * Abort and freeze @ap. The freeze operation must be called 1095 * first, because some hardware requires special operations 1096 * before the taskfile registers are accessible. 1097 * 1098 * LOCKING: 1099 * spin_lock_irqsave(host lock) 1100 * 1101 * RETURNS: 1102 * Number of aborted commands. 1103 */ 1104 int ata_port_freeze(struct ata_port *ap) 1105 { 1106 int nr_aborted; 1107 1108 WARN_ON(!ap->ops->error_handler); 1109 1110 __ata_port_freeze(ap); 1111 nr_aborted = ata_port_abort(ap); 1112 1113 return nr_aborted; 1114 } 1115 1116 /** 1117 * sata_async_notification - SATA async notification handler 1118 * @ap: ATA port where async notification is received 1119 * 1120 * Handler to be called when async notification via SDB FIS is 1121 * received. This function schedules EH if necessary. 1122 * 1123 * LOCKING: 1124 * spin_lock_irqsave(host lock) 1125 * 1126 * RETURNS: 1127 * 1 if EH is scheduled, 0 otherwise. 1128 */ 1129 int sata_async_notification(struct ata_port *ap) 1130 { 1131 u32 sntf; 1132 int rc; 1133 1134 if (!(ap->flags & ATA_FLAG_AN)) 1135 return 0; 1136 1137 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 1138 if (rc == 0) 1139 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 1140 1141 if (!sata_pmp_attached(ap) || rc) { 1142 /* PMP is not attached or SNTF is not available */ 1143 if (!sata_pmp_attached(ap)) { 1144 /* PMP is not attached. Check whether ATAPI 1145 * AN is configured. If so, notify media 1146 * change. 1147 */ 1148 struct ata_device *dev = ap->link.device; 1149 1150 if ((dev->class == ATA_DEV_ATAPI) && 1151 (dev->flags & ATA_DFLAG_AN)) 1152 ata_scsi_media_change_notify(dev); 1153 return 0; 1154 } else { 1155 /* PMP is attached but SNTF is not available. 1156 * ATAPI async media change notification is 1157 * not used. The PMP must be reporting PHY 1158 * status change, schedule EH. 1159 */ 1160 ata_port_schedule_eh(ap); 1161 return 1; 1162 } 1163 } else { 1164 /* PMP is attached and SNTF is available */ 1165 struct ata_link *link; 1166 1167 /* check and notify ATAPI AN */ 1168 ata_for_each_link(link, ap, EDGE) { 1169 if (!(sntf & (1 << link->pmp))) 1170 continue; 1171 1172 if ((link->device->class == ATA_DEV_ATAPI) && 1173 (link->device->flags & ATA_DFLAG_AN)) 1174 ata_scsi_media_change_notify(link->device); 1175 } 1176 1177 /* If PMP is reporting that PHY status of some 1178 * downstream ports has changed, schedule EH. 1179 */ 1180 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1181 ata_port_schedule_eh(ap); 1182 return 1; 1183 } 1184 1185 return 0; 1186 } 1187 } 1188 1189 /** 1190 * ata_eh_freeze_port - EH helper to freeze port 1191 * @ap: ATA port to freeze 1192 * 1193 * Freeze @ap. 1194 * 1195 * LOCKING: 1196 * None. 1197 */ 1198 void ata_eh_freeze_port(struct ata_port *ap) 1199 { 1200 unsigned long flags; 1201 1202 if (!ap->ops->error_handler) 1203 return; 1204 1205 spin_lock_irqsave(ap->lock, flags); 1206 __ata_port_freeze(ap); 1207 spin_unlock_irqrestore(ap->lock, flags); 1208 } 1209 1210 /** 1211 * ata_port_thaw_port - EH helper to thaw port 1212 * @ap: ATA port to thaw 1213 * 1214 * Thaw frozen port @ap. 1215 * 1216 * LOCKING: 1217 * None. 1218 */ 1219 void ata_eh_thaw_port(struct ata_port *ap) 1220 { 1221 unsigned long flags; 1222 1223 if (!ap->ops->error_handler) 1224 return; 1225 1226 spin_lock_irqsave(ap->lock, flags); 1227 1228 ap->pflags &= ~ATA_PFLAG_FROZEN; 1229 1230 if (ap->ops->thaw) 1231 ap->ops->thaw(ap); 1232 1233 spin_unlock_irqrestore(ap->lock, flags); 1234 1235 DPRINTK("ata%u port thawed\n", ap->print_id); 1236 } 1237 1238 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1239 { 1240 /* nada */ 1241 } 1242 1243 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1244 { 1245 struct ata_port *ap = qc->ap; 1246 struct scsi_cmnd *scmd = qc->scsicmd; 1247 unsigned long flags; 1248 1249 spin_lock_irqsave(ap->lock, flags); 1250 qc->scsidone = ata_eh_scsidone; 1251 __ata_qc_complete(qc); 1252 WARN_ON(ata_tag_valid(qc->tag)); 1253 spin_unlock_irqrestore(ap->lock, flags); 1254 1255 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1256 } 1257 1258 /** 1259 * ata_eh_qc_complete - Complete an active ATA command from EH 1260 * @qc: Command to complete 1261 * 1262 * Indicate to the mid and upper layers that an ATA command has 1263 * completed. To be used from EH. 1264 */ 1265 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1266 { 1267 struct scsi_cmnd *scmd = qc->scsicmd; 1268 scmd->retries = scmd->allowed; 1269 __ata_eh_qc_complete(qc); 1270 } 1271 1272 /** 1273 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1274 * @qc: Command to retry 1275 * 1276 * Indicate to the mid and upper layers that an ATA command 1277 * should be retried. To be used from EH. 1278 * 1279 * SCSI midlayer limits the number of retries to scmd->allowed. 1280 * scmd->allowed is incremented for commands which get retried 1281 * due to unrelated failures (qc->err_mask is zero). 1282 */ 1283 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1284 { 1285 struct scsi_cmnd *scmd = qc->scsicmd; 1286 if (!qc->err_mask) 1287 scmd->allowed++; 1288 __ata_eh_qc_complete(qc); 1289 } 1290 1291 /** 1292 * ata_dev_disable - disable ATA device 1293 * @dev: ATA device to disable 1294 * 1295 * Disable @dev. 1296 * 1297 * Locking: 1298 * EH context. 1299 */ 1300 void ata_dev_disable(struct ata_device *dev) 1301 { 1302 if (!ata_dev_enabled(dev)) 1303 return; 1304 1305 if (ata_msg_drv(dev->link->ap)) 1306 ata_dev_warn(dev, "disabled\n"); 1307 ata_acpi_on_disable(dev); 1308 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1309 dev->class++; 1310 1311 /* From now till the next successful probe, ering is used to 1312 * track probe failures. Clear accumulated device error info. 1313 */ 1314 ata_ering_clear(&dev->ering); 1315 } 1316 1317 /** 1318 * ata_eh_detach_dev - detach ATA device 1319 * @dev: ATA device to detach 1320 * 1321 * Detach @dev. 1322 * 1323 * LOCKING: 1324 * None. 1325 */ 1326 void ata_eh_detach_dev(struct ata_device *dev) 1327 { 1328 struct ata_link *link = dev->link; 1329 struct ata_port *ap = link->ap; 1330 struct ata_eh_context *ehc = &link->eh_context; 1331 unsigned long flags; 1332 1333 ata_dev_disable(dev); 1334 1335 spin_lock_irqsave(ap->lock, flags); 1336 1337 dev->flags &= ~ATA_DFLAG_DETACH; 1338 1339 if (ata_scsi_offline_dev(dev)) { 1340 dev->flags |= ATA_DFLAG_DETACHED; 1341 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1342 } 1343 1344 /* clear per-dev EH info */ 1345 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1346 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1347 ehc->saved_xfer_mode[dev->devno] = 0; 1348 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1349 1350 spin_unlock_irqrestore(ap->lock, flags); 1351 } 1352 1353 /** 1354 * ata_eh_about_to_do - about to perform eh_action 1355 * @link: target ATA link 1356 * @dev: target ATA dev for per-dev action (can be NULL) 1357 * @action: action about to be performed 1358 * 1359 * Called just before performing EH actions to clear related bits 1360 * in @link->eh_info such that eh actions are not unnecessarily 1361 * repeated. 1362 * 1363 * LOCKING: 1364 * None. 1365 */ 1366 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1367 unsigned int action) 1368 { 1369 struct ata_port *ap = link->ap; 1370 struct ata_eh_info *ehi = &link->eh_info; 1371 struct ata_eh_context *ehc = &link->eh_context; 1372 unsigned long flags; 1373 1374 spin_lock_irqsave(ap->lock, flags); 1375 1376 ata_eh_clear_action(link, dev, ehi, action); 1377 1378 /* About to take EH action, set RECOVERED. Ignore actions on 1379 * slave links as master will do them again. 1380 */ 1381 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1382 ap->pflags |= ATA_PFLAG_RECOVERED; 1383 1384 spin_unlock_irqrestore(ap->lock, flags); 1385 } 1386 1387 /** 1388 * ata_eh_done - EH action complete 1389 * @link: ATA link for which EH actions are complete 1390 * @dev: target ATA dev for per-dev action (can be NULL) 1391 * @action: action just completed 1392 * 1393 * Called right after performing EH actions to clear related bits 1394 * in @link->eh_context. 1395 * 1396 * LOCKING: 1397 * None. 1398 */ 1399 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1400 unsigned int action) 1401 { 1402 struct ata_eh_context *ehc = &link->eh_context; 1403 1404 ata_eh_clear_action(link, dev, &ehc->i, action); 1405 } 1406 1407 /** 1408 * ata_err_string - convert err_mask to descriptive string 1409 * @err_mask: error mask to convert to string 1410 * 1411 * Convert @err_mask to descriptive string. Errors are 1412 * prioritized according to severity and only the most severe 1413 * error is reported. 1414 * 1415 * LOCKING: 1416 * None. 1417 * 1418 * RETURNS: 1419 * Descriptive string for @err_mask 1420 */ 1421 static const char *ata_err_string(unsigned int err_mask) 1422 { 1423 if (err_mask & AC_ERR_HOST_BUS) 1424 return "host bus error"; 1425 if (err_mask & AC_ERR_ATA_BUS) 1426 return "ATA bus error"; 1427 if (err_mask & AC_ERR_TIMEOUT) 1428 return "timeout"; 1429 if (err_mask & AC_ERR_HSM) 1430 return "HSM violation"; 1431 if (err_mask & AC_ERR_SYSTEM) 1432 return "internal error"; 1433 if (err_mask & AC_ERR_MEDIA) 1434 return "media error"; 1435 if (err_mask & AC_ERR_INVALID) 1436 return "invalid argument"; 1437 if (err_mask & AC_ERR_DEV) 1438 return "device error"; 1439 if (err_mask & AC_ERR_NCQ) 1440 return "NCQ error"; 1441 if (err_mask & AC_ERR_NODEV_HINT) 1442 return "Polling detection error"; 1443 return "unknown error"; 1444 } 1445 1446 /** 1447 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1448 * @dev: Device to read log page 10h from 1449 * @tag: Resulting tag of the failed command 1450 * @tf: Resulting taskfile registers of the failed command 1451 * 1452 * Read log page 10h to obtain NCQ error details and clear error 1453 * condition. 1454 * 1455 * LOCKING: 1456 * Kernel thread context (may sleep). 1457 * 1458 * RETURNS: 1459 * 0 on success, -errno otherwise. 1460 */ 1461 static int ata_eh_read_log_10h(struct ata_device *dev, 1462 int *tag, struct ata_taskfile *tf) 1463 { 1464 u8 *buf = dev->link->ap->sector_buf; 1465 unsigned int err_mask; 1466 u8 csum; 1467 int i; 1468 1469 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1); 1470 if (err_mask) 1471 return -EIO; 1472 1473 csum = 0; 1474 for (i = 0; i < ATA_SECT_SIZE; i++) 1475 csum += buf[i]; 1476 if (csum) 1477 ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n", 1478 csum); 1479 1480 if (buf[0] & 0x80) 1481 return -ENOENT; 1482 1483 *tag = buf[0] & 0x1f; 1484 1485 tf->command = buf[2]; 1486 tf->feature = buf[3]; 1487 tf->lbal = buf[4]; 1488 tf->lbam = buf[5]; 1489 tf->lbah = buf[6]; 1490 tf->device = buf[7]; 1491 tf->hob_lbal = buf[8]; 1492 tf->hob_lbam = buf[9]; 1493 tf->hob_lbah = buf[10]; 1494 tf->nsect = buf[12]; 1495 tf->hob_nsect = buf[13]; 1496 if (ata_id_has_ncq_autosense(dev->id)) 1497 tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; 1498 1499 return 0; 1500 } 1501 1502 /** 1503 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1504 * @dev: target ATAPI device 1505 * @r_sense_key: out parameter for sense_key 1506 * 1507 * Perform ATAPI TEST_UNIT_READY. 1508 * 1509 * LOCKING: 1510 * EH context (may sleep). 1511 * 1512 * RETURNS: 1513 * 0 on success, AC_ERR_* mask on failure. 1514 */ 1515 unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1516 { 1517 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1518 struct ata_taskfile tf; 1519 unsigned int err_mask; 1520 1521 ata_tf_init(dev, &tf); 1522 1523 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1524 tf.command = ATA_CMD_PACKET; 1525 tf.protocol = ATAPI_PROT_NODATA; 1526 1527 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1528 if (err_mask == AC_ERR_DEV) 1529 *r_sense_key = tf.feature >> 4; 1530 return err_mask; 1531 } 1532 1533 /** 1534 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT 1535 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to 1536 * @cmd: scsi command for which the sense code should be set 1537 * 1538 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK 1539 * SENSE. This function is an EH helper. 1540 * 1541 * LOCKING: 1542 * Kernel thread context (may sleep). 1543 */ 1544 static void ata_eh_request_sense(struct ata_queued_cmd *qc, 1545 struct scsi_cmnd *cmd) 1546 { 1547 struct ata_device *dev = qc->dev; 1548 struct ata_taskfile tf; 1549 unsigned int err_mask; 1550 1551 if (qc->ap->pflags & ATA_PFLAG_FROZEN) { 1552 ata_dev_warn(dev, "sense data available but port frozen\n"); 1553 return; 1554 } 1555 1556 if (!cmd || qc->flags & ATA_QCFLAG_SENSE_VALID) 1557 return; 1558 1559 if (!ata_id_sense_reporting_enabled(dev->id)) { 1560 ata_dev_warn(qc->dev, "sense data reporting disabled\n"); 1561 return; 1562 } 1563 1564 DPRINTK("ATA request sense\n"); 1565 1566 ata_tf_init(dev, &tf); 1567 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1568 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1569 tf.command = ATA_CMD_REQ_SENSE_DATA; 1570 tf.protocol = ATA_PROT_NODATA; 1571 1572 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 1573 /* Ignore err_mask; ATA_ERR might be set */ 1574 if (tf.command & ATA_SENSE) { 1575 ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal); 1576 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1577 } else { 1578 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", 1579 tf.command, err_mask); 1580 } 1581 } 1582 1583 /** 1584 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1585 * @dev: device to perform REQUEST_SENSE to 1586 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1587 * @dfl_sense_key: default sense key to use 1588 * 1589 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1590 * SENSE. This function is EH helper. 1591 * 1592 * LOCKING: 1593 * Kernel thread context (may sleep). 1594 * 1595 * RETURNS: 1596 * 0 on success, AC_ERR_* mask on failure 1597 */ 1598 unsigned int atapi_eh_request_sense(struct ata_device *dev, 1599 u8 *sense_buf, u8 dfl_sense_key) 1600 { 1601 u8 cdb[ATAPI_CDB_LEN] = 1602 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1603 struct ata_port *ap = dev->link->ap; 1604 struct ata_taskfile tf; 1605 1606 DPRINTK("ATAPI request sense\n"); 1607 1608 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1609 1610 /* initialize sense_buf with the error register, 1611 * for the case where they are -not- overwritten 1612 */ 1613 sense_buf[0] = 0x70; 1614 sense_buf[2] = dfl_sense_key; 1615 1616 /* some devices time out if garbage left in tf */ 1617 ata_tf_init(dev, &tf); 1618 1619 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1620 tf.command = ATA_CMD_PACKET; 1621 1622 /* is it pointless to prefer PIO for "safety reasons"? */ 1623 if (ap->flags & ATA_FLAG_PIO_DMA) { 1624 tf.protocol = ATAPI_PROT_DMA; 1625 tf.feature |= ATAPI_PKT_DMA; 1626 } else { 1627 tf.protocol = ATAPI_PROT_PIO; 1628 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1629 tf.lbah = 0; 1630 } 1631 1632 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1633 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1634 } 1635 1636 /** 1637 * ata_eh_analyze_serror - analyze SError for a failed port 1638 * @link: ATA link to analyze SError for 1639 * 1640 * Analyze SError if available and further determine cause of 1641 * failure. 1642 * 1643 * LOCKING: 1644 * None. 1645 */ 1646 static void ata_eh_analyze_serror(struct ata_link *link) 1647 { 1648 struct ata_eh_context *ehc = &link->eh_context; 1649 u32 serror = ehc->i.serror; 1650 unsigned int err_mask = 0, action = 0; 1651 u32 hotplug_mask; 1652 1653 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1654 err_mask |= AC_ERR_ATA_BUS; 1655 action |= ATA_EH_RESET; 1656 } 1657 if (serror & SERR_PROTOCOL) { 1658 err_mask |= AC_ERR_HSM; 1659 action |= ATA_EH_RESET; 1660 } 1661 if (serror & SERR_INTERNAL) { 1662 err_mask |= AC_ERR_SYSTEM; 1663 action |= ATA_EH_RESET; 1664 } 1665 1666 /* Determine whether a hotplug event has occurred. Both 1667 * SError.N/X are considered hotplug events for enabled or 1668 * host links. For disabled PMP links, only N bit is 1669 * considered as X bit is left at 1 for link plugging. 1670 */ 1671 if (link->lpm_policy > ATA_LPM_MAX_POWER) 1672 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ 1673 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1674 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1675 else 1676 hotplug_mask = SERR_PHYRDY_CHG; 1677 1678 if (serror & hotplug_mask) 1679 ata_ehi_hotplugged(&ehc->i); 1680 1681 ehc->i.err_mask |= err_mask; 1682 ehc->i.action |= action; 1683 } 1684 1685 /** 1686 * ata_eh_analyze_ncq_error - analyze NCQ error 1687 * @link: ATA link to analyze NCQ error for 1688 * 1689 * Read log page 10h, determine the offending qc and acquire 1690 * error status TF. For NCQ device errors, all LLDDs have to do 1691 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1692 * care of the rest. 1693 * 1694 * LOCKING: 1695 * Kernel thread context (may sleep). 1696 */ 1697 void ata_eh_analyze_ncq_error(struct ata_link *link) 1698 { 1699 struct ata_port *ap = link->ap; 1700 struct ata_eh_context *ehc = &link->eh_context; 1701 struct ata_device *dev = link->device; 1702 struct ata_queued_cmd *qc; 1703 struct ata_taskfile tf; 1704 int tag, rc; 1705 1706 /* if frozen, we can't do much */ 1707 if (ap->pflags & ATA_PFLAG_FROZEN) 1708 return; 1709 1710 /* is it NCQ device error? */ 1711 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1712 return; 1713 1714 /* has LLDD analyzed already? */ 1715 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1716 qc = __ata_qc_from_tag(ap, tag); 1717 1718 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1719 continue; 1720 1721 if (qc->err_mask) 1722 return; 1723 } 1724 1725 /* okay, this error is ours */ 1726 memset(&tf, 0, sizeof(tf)); 1727 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1728 if (rc) { 1729 ata_link_err(link, "failed to read log page 10h (errno=%d)\n", 1730 rc); 1731 return; 1732 } 1733 1734 if (!(link->sactive & (1 << tag))) { 1735 ata_link_err(link, "log page 10h reported inactive tag %d\n", 1736 tag); 1737 return; 1738 } 1739 1740 /* we've got the perpetrator, condemn it */ 1741 qc = __ata_qc_from_tag(ap, tag); 1742 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1743 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1744 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1745 if ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary) { 1746 char sense_key, asc, ascq; 1747 1748 sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; 1749 asc = (qc->result_tf.auxiliary >> 8) & 0xff; 1750 ascq = qc->result_tf.auxiliary & 0xff; 1751 ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq); 1752 ata_scsi_set_sense_information(dev, qc->scsicmd, 1753 &qc->result_tf); 1754 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1755 } 1756 1757 ehc->i.err_mask &= ~AC_ERR_DEV; 1758 } 1759 1760 /** 1761 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1762 * @qc: qc to analyze 1763 * @tf: Taskfile registers to analyze 1764 * 1765 * Analyze taskfile of @qc and further determine cause of 1766 * failure. This function also requests ATAPI sense data if 1767 * available. 1768 * 1769 * LOCKING: 1770 * Kernel thread context (may sleep). 1771 * 1772 * RETURNS: 1773 * Determined recovery action 1774 */ 1775 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1776 const struct ata_taskfile *tf) 1777 { 1778 unsigned int tmp, action = 0; 1779 u8 stat = tf->command, err = tf->feature; 1780 1781 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1782 qc->err_mask |= AC_ERR_HSM; 1783 return ATA_EH_RESET; 1784 } 1785 1786 if (stat & (ATA_ERR | ATA_DF)) { 1787 qc->err_mask |= AC_ERR_DEV; 1788 /* 1789 * Sense data reporting does not work if the 1790 * device fault bit is set. 1791 */ 1792 if (stat & ATA_DF) 1793 stat &= ~ATA_SENSE; 1794 } else { 1795 return 0; 1796 } 1797 1798 switch (qc->dev->class) { 1799 case ATA_DEV_ATA: 1800 case ATA_DEV_ZAC: 1801 if (stat & ATA_SENSE) 1802 ata_eh_request_sense(qc, qc->scsicmd); 1803 if (err & ATA_ICRC) 1804 qc->err_mask |= AC_ERR_ATA_BUS; 1805 if (err & (ATA_UNC | ATA_AMNF)) 1806 qc->err_mask |= AC_ERR_MEDIA; 1807 if (err & ATA_IDNF) 1808 qc->err_mask |= AC_ERR_INVALID; 1809 break; 1810 1811 case ATA_DEV_ATAPI: 1812 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1813 tmp = atapi_eh_request_sense(qc->dev, 1814 qc->scsicmd->sense_buffer, 1815 qc->result_tf.feature >> 4); 1816 if (!tmp) 1817 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1818 else 1819 qc->err_mask |= tmp; 1820 } 1821 } 1822 1823 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1824 int ret = scsi_check_sense(qc->scsicmd); 1825 /* 1826 * SUCCESS here means that the sense code could be 1827 * evaluated and should be passed to the upper layers 1828 * for correct evaluation. 1829 * FAILED means the sense code could not be interpreted 1830 * and the device would need to be reset. 1831 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the 1832 * command would need to be retried. 1833 */ 1834 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) { 1835 qc->flags |= ATA_QCFLAG_RETRY; 1836 qc->err_mask |= AC_ERR_OTHER; 1837 } else if (ret != SUCCESS) { 1838 qc->err_mask |= AC_ERR_HSM; 1839 } 1840 } 1841 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1842 action |= ATA_EH_RESET; 1843 1844 return action; 1845 } 1846 1847 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1848 int *xfer_ok) 1849 { 1850 int base = 0; 1851 1852 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1853 *xfer_ok = 1; 1854 1855 if (!*xfer_ok) 1856 base = ATA_ECAT_DUBIOUS_NONE; 1857 1858 if (err_mask & AC_ERR_ATA_BUS) 1859 return base + ATA_ECAT_ATA_BUS; 1860 1861 if (err_mask & AC_ERR_TIMEOUT) 1862 return base + ATA_ECAT_TOUT_HSM; 1863 1864 if (eflags & ATA_EFLAG_IS_IO) { 1865 if (err_mask & AC_ERR_HSM) 1866 return base + ATA_ECAT_TOUT_HSM; 1867 if ((err_mask & 1868 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1869 return base + ATA_ECAT_UNK_DEV; 1870 } 1871 1872 return 0; 1873 } 1874 1875 struct speed_down_verdict_arg { 1876 u64 since; 1877 int xfer_ok; 1878 int nr_errors[ATA_ECAT_NR]; 1879 }; 1880 1881 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1882 { 1883 struct speed_down_verdict_arg *arg = void_arg; 1884 int cat; 1885 1886 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) 1887 return -1; 1888 1889 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1890 &arg->xfer_ok); 1891 arg->nr_errors[cat]++; 1892 1893 return 0; 1894 } 1895 1896 /** 1897 * ata_eh_speed_down_verdict - Determine speed down verdict 1898 * @dev: Device of interest 1899 * 1900 * This function examines error ring of @dev and determines 1901 * whether NCQ needs to be turned off, transfer speed should be 1902 * stepped down, or falling back to PIO is necessary. 1903 * 1904 * ECAT_ATA_BUS : ATA_BUS error for any command 1905 * 1906 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1907 * IO commands 1908 * 1909 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1910 * 1911 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1912 * data transfer hasn't been verified. 1913 * 1914 * Verdicts are 1915 * 1916 * NCQ_OFF : Turn off NCQ. 1917 * 1918 * SPEED_DOWN : Speed down transfer speed but don't fall back 1919 * to PIO. 1920 * 1921 * FALLBACK_TO_PIO : Fall back to PIO. 1922 * 1923 * Even if multiple verdicts are returned, only one action is 1924 * taken per error. An action triggered by non-DUBIOUS errors 1925 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1926 * This is to expedite speed down decisions right after device is 1927 * initially configured. 1928 * 1929 * The following are speed down rules. #1 and #2 deal with 1930 * DUBIOUS errors. 1931 * 1932 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1933 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1934 * 1935 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1936 * occurred during last 5 mins, NCQ_OFF. 1937 * 1938 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1939 * occurred during last 5 mins, FALLBACK_TO_PIO 1940 * 1941 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1942 * during last 10 mins, NCQ_OFF. 1943 * 1944 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1945 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1946 * 1947 * LOCKING: 1948 * Inherited from caller. 1949 * 1950 * RETURNS: 1951 * OR of ATA_EH_SPDN_* flags. 1952 */ 1953 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1954 { 1955 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1956 u64 j64 = get_jiffies_64(); 1957 struct speed_down_verdict_arg arg; 1958 unsigned int verdict = 0; 1959 1960 /* scan past 5 mins of error history */ 1961 memset(&arg, 0, sizeof(arg)); 1962 arg.since = j64 - min(j64, j5mins); 1963 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1964 1965 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1966 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1967 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1968 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1969 1970 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1971 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1972 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1973 1974 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1975 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1976 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1977 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1978 1979 /* scan past 10 mins of error history */ 1980 memset(&arg, 0, sizeof(arg)); 1981 arg.since = j64 - min(j64, j10mins); 1982 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1983 1984 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1985 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1986 verdict |= ATA_EH_SPDN_NCQ_OFF; 1987 1988 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1989 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1990 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1991 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1992 1993 return verdict; 1994 } 1995 1996 /** 1997 * ata_eh_speed_down - record error and speed down if necessary 1998 * @dev: Failed device 1999 * @eflags: mask of ATA_EFLAG_* flags 2000 * @err_mask: err_mask of the error 2001 * 2002 * Record error and examine error history to determine whether 2003 * adjusting transmission speed is necessary. It also sets 2004 * transmission limits appropriately if such adjustment is 2005 * necessary. 2006 * 2007 * LOCKING: 2008 * Kernel thread context (may sleep). 2009 * 2010 * RETURNS: 2011 * Determined recovery action. 2012 */ 2013 static unsigned int ata_eh_speed_down(struct ata_device *dev, 2014 unsigned int eflags, unsigned int err_mask) 2015 { 2016 struct ata_link *link = ata_dev_phys_link(dev); 2017 int xfer_ok = 0; 2018 unsigned int verdict; 2019 unsigned int action = 0; 2020 2021 /* don't bother if Cat-0 error */ 2022 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 2023 return 0; 2024 2025 /* record error and determine whether speed down is necessary */ 2026 ata_ering_record(&dev->ering, eflags, err_mask); 2027 verdict = ata_eh_speed_down_verdict(dev); 2028 2029 /* turn off NCQ? */ 2030 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 2031 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 2032 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 2033 dev->flags |= ATA_DFLAG_NCQ_OFF; 2034 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n"); 2035 goto done; 2036 } 2037 2038 /* speed down? */ 2039 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 2040 /* speed down SATA link speed if possible */ 2041 if (sata_down_spd_limit(link, 0) == 0) { 2042 action |= ATA_EH_RESET; 2043 goto done; 2044 } 2045 2046 /* lower transfer mode */ 2047 if (dev->spdn_cnt < 2) { 2048 static const int dma_dnxfer_sel[] = 2049 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 2050 static const int pio_dnxfer_sel[] = 2051 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 2052 int sel; 2053 2054 if (dev->xfer_shift != ATA_SHIFT_PIO) 2055 sel = dma_dnxfer_sel[dev->spdn_cnt]; 2056 else 2057 sel = pio_dnxfer_sel[dev->spdn_cnt]; 2058 2059 dev->spdn_cnt++; 2060 2061 if (ata_down_xfermask_limit(dev, sel) == 0) { 2062 action |= ATA_EH_RESET; 2063 goto done; 2064 } 2065 } 2066 } 2067 2068 /* Fall back to PIO? Slowing down to PIO is meaningless for 2069 * SATA ATA devices. Consider it only for PATA and SATAPI. 2070 */ 2071 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 2072 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 2073 (dev->xfer_shift != ATA_SHIFT_PIO)) { 2074 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 2075 dev->spdn_cnt = 0; 2076 action |= ATA_EH_RESET; 2077 goto done; 2078 } 2079 } 2080 2081 return 0; 2082 done: 2083 /* device has been slowed down, blow error history */ 2084 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 2085 ata_ering_clear(&dev->ering); 2086 return action; 2087 } 2088 2089 /** 2090 * ata_eh_worth_retry - analyze error and decide whether to retry 2091 * @qc: qc to possibly retry 2092 * 2093 * Look at the cause of the error and decide if a retry 2094 * might be useful or not. We don't want to retry media errors 2095 * because the drive itself has probably already taken 10-30 seconds 2096 * doing its own internal retries before reporting the failure. 2097 */ 2098 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) 2099 { 2100 if (qc->err_mask & AC_ERR_MEDIA) 2101 return 0; /* don't retry media errors */ 2102 if (qc->flags & ATA_QCFLAG_IO) 2103 return 1; /* otherwise retry anything from fs stack */ 2104 if (qc->err_mask & AC_ERR_INVALID) 2105 return 0; /* don't retry these */ 2106 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ 2107 } 2108 2109 /** 2110 * ata_eh_quiet - check if we need to be quiet about a command error 2111 * @qc: qc to check 2112 * 2113 * Look at the qc flags anbd its scsi command request flags to determine 2114 * if we need to be quiet about the command failure. 2115 */ 2116 static inline bool ata_eh_quiet(struct ata_queued_cmd *qc) 2117 { 2118 if (qc->scsicmd && 2119 qc->scsicmd->request->rq_flags & RQF_QUIET) 2120 qc->flags |= ATA_QCFLAG_QUIET; 2121 return qc->flags & ATA_QCFLAG_QUIET; 2122 } 2123 2124 /** 2125 * ata_eh_link_autopsy - analyze error and determine recovery action 2126 * @link: host link to perform autopsy on 2127 * 2128 * Analyze why @link failed and determine which recovery actions 2129 * are needed. This function also sets more detailed AC_ERR_* 2130 * values and fills sense data for ATAPI CHECK SENSE. 2131 * 2132 * LOCKING: 2133 * Kernel thread context (may sleep). 2134 */ 2135 static void ata_eh_link_autopsy(struct ata_link *link) 2136 { 2137 struct ata_port *ap = link->ap; 2138 struct ata_eh_context *ehc = &link->eh_context; 2139 struct ata_device *dev; 2140 unsigned int all_err_mask = 0, eflags = 0; 2141 int tag, nr_failed = 0, nr_quiet = 0; 2142 u32 serror; 2143 int rc; 2144 2145 DPRINTK("ENTER\n"); 2146 2147 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 2148 return; 2149 2150 /* obtain and analyze SError */ 2151 rc = sata_scr_read(link, SCR_ERROR, &serror); 2152 if (rc == 0) { 2153 ehc->i.serror |= serror; 2154 ata_eh_analyze_serror(link); 2155 } else if (rc != -EOPNOTSUPP) { 2156 /* SError read failed, force reset and probing */ 2157 ehc->i.probe_mask |= ATA_ALL_DEVICES; 2158 ehc->i.action |= ATA_EH_RESET; 2159 ehc->i.err_mask |= AC_ERR_OTHER; 2160 } 2161 2162 /* analyze NCQ failure */ 2163 ata_eh_analyze_ncq_error(link); 2164 2165 /* any real error trumps AC_ERR_OTHER */ 2166 if (ehc->i.err_mask & ~AC_ERR_OTHER) 2167 ehc->i.err_mask &= ~AC_ERR_OTHER; 2168 2169 all_err_mask |= ehc->i.err_mask; 2170 2171 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2172 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2173 2174 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2175 ata_dev_phys_link(qc->dev) != link) 2176 continue; 2177 2178 /* inherit upper level err_mask */ 2179 qc->err_mask |= ehc->i.err_mask; 2180 2181 /* analyze TF */ 2182 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 2183 2184 /* DEV errors are probably spurious in case of ATA_BUS error */ 2185 if (qc->err_mask & AC_ERR_ATA_BUS) 2186 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2187 AC_ERR_INVALID); 2188 2189 /* any real error trumps unknown error */ 2190 if (qc->err_mask & ~AC_ERR_OTHER) 2191 qc->err_mask &= ~AC_ERR_OTHER; 2192 2193 /* 2194 * SENSE_VALID trumps dev/unknown error and revalidation. Upper 2195 * layers will determine whether the command is worth retrying 2196 * based on the sense data and device class/type. Otherwise, 2197 * determine directly if the command is worth retrying using its 2198 * error mask and flags. 2199 */ 2200 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2201 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2202 else if (ata_eh_worth_retry(qc)) 2203 qc->flags |= ATA_QCFLAG_RETRY; 2204 2205 /* accumulate error info */ 2206 ehc->i.dev = qc->dev; 2207 all_err_mask |= qc->err_mask; 2208 if (qc->flags & ATA_QCFLAG_IO) 2209 eflags |= ATA_EFLAG_IS_IO; 2210 trace_ata_eh_link_autopsy_qc(qc); 2211 2212 /* Count quiet errors */ 2213 if (ata_eh_quiet(qc)) 2214 nr_quiet++; 2215 nr_failed++; 2216 } 2217 2218 /* If all failed commands requested silence, then be quiet */ 2219 if (nr_quiet == nr_failed) 2220 ehc->i.flags |= ATA_EHI_QUIET; 2221 2222 /* enforce default EH actions */ 2223 if (ap->pflags & ATA_PFLAG_FROZEN || 2224 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2225 ehc->i.action |= ATA_EH_RESET; 2226 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2227 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2228 ehc->i.action |= ATA_EH_REVALIDATE; 2229 2230 /* If we have offending qcs and the associated failed device, 2231 * perform per-dev EH action only on the offending device. 2232 */ 2233 if (ehc->i.dev) { 2234 ehc->i.dev_action[ehc->i.dev->devno] |= 2235 ehc->i.action & ATA_EH_PERDEV_MASK; 2236 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2237 } 2238 2239 /* propagate timeout to host link */ 2240 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2241 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2242 2243 /* record error and consider speeding down */ 2244 dev = ehc->i.dev; 2245 if (!dev && ((ata_link_max_devices(link) == 1 && 2246 ata_dev_enabled(link->device)))) 2247 dev = link->device; 2248 2249 if (dev) { 2250 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2251 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2252 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2253 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); 2254 } 2255 DPRINTK("EXIT\n"); 2256 } 2257 2258 /** 2259 * ata_eh_autopsy - analyze error and determine recovery action 2260 * @ap: host port to perform autopsy on 2261 * 2262 * Analyze all links of @ap and determine why they failed and 2263 * which recovery actions are needed. 2264 * 2265 * LOCKING: 2266 * Kernel thread context (may sleep). 2267 */ 2268 void ata_eh_autopsy(struct ata_port *ap) 2269 { 2270 struct ata_link *link; 2271 2272 ata_for_each_link(link, ap, EDGE) 2273 ata_eh_link_autopsy(link); 2274 2275 /* Handle the frigging slave link. Autopsy is done similarly 2276 * but actions and flags are transferred over to the master 2277 * link and handled from there. 2278 */ 2279 if (ap->slave_link) { 2280 struct ata_eh_context *mehc = &ap->link.eh_context; 2281 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2282 2283 /* transfer control flags from master to slave */ 2284 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2285 2286 /* perform autopsy on the slave link */ 2287 ata_eh_link_autopsy(ap->slave_link); 2288 2289 /* transfer actions from slave to master and clear slave */ 2290 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2291 mehc->i.action |= sehc->i.action; 2292 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2293 mehc->i.flags |= sehc->i.flags; 2294 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2295 } 2296 2297 /* Autopsy of fanout ports can affect host link autopsy. 2298 * Perform host link autopsy last. 2299 */ 2300 if (sata_pmp_attached(ap)) 2301 ata_eh_link_autopsy(&ap->link); 2302 } 2303 2304 /** 2305 * ata_get_cmd_descript - get description for ATA command 2306 * @command: ATA command code to get description for 2307 * 2308 * Return a textual description of the given command, or NULL if the 2309 * command is not known. 2310 * 2311 * LOCKING: 2312 * None 2313 */ 2314 const char *ata_get_cmd_descript(u8 command) 2315 { 2316 #ifdef CONFIG_ATA_VERBOSE_ERROR 2317 static const struct 2318 { 2319 u8 command; 2320 const char *text; 2321 } cmd_descr[] = { 2322 { ATA_CMD_DEV_RESET, "DEVICE RESET" }, 2323 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, 2324 { ATA_CMD_STANDBY, "STANDBY" }, 2325 { ATA_CMD_IDLE, "IDLE" }, 2326 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, 2327 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, 2328 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" }, 2329 { ATA_CMD_NOP, "NOP" }, 2330 { ATA_CMD_FLUSH, "FLUSH CACHE" }, 2331 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, 2332 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, 2333 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, 2334 { ATA_CMD_SERVICE, "SERVICE" }, 2335 { ATA_CMD_READ, "READ DMA" }, 2336 { ATA_CMD_READ_EXT, "READ DMA EXT" }, 2337 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, 2338 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, 2339 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, 2340 { ATA_CMD_WRITE, "WRITE DMA" }, 2341 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, 2342 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, 2343 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, 2344 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, 2345 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, 2346 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, 2347 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, 2348 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, 2349 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, 2350 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, 2351 { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, 2352 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, 2353 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, 2354 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, 2355 { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, 2356 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, 2357 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, 2358 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, 2359 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, 2360 { ATA_CMD_SET_FEATURES, "SET FEATURES" }, 2361 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, 2362 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, 2363 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, 2364 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, 2365 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, 2366 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, 2367 { ATA_CMD_SLEEP, "SLEEP" }, 2368 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, 2369 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, 2370 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, 2371 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, 2372 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, 2373 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, 2374 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, 2375 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, 2376 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, 2377 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" }, 2378 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, 2379 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, 2380 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, 2381 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, 2382 { ATA_CMD_PMP_READ, "READ BUFFER" }, 2383 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" }, 2384 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, 2385 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" }, 2386 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, 2387 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, 2388 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, 2389 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, 2390 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, 2391 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, 2392 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, 2393 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, 2394 { ATA_CMD_SMART, "SMART" }, 2395 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, 2396 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, 2397 { ATA_CMD_DSM, "DATA SET MANAGEMENT" }, 2398 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, 2399 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, 2400 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, 2401 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, 2402 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, 2403 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, 2404 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" }, 2405 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" }, 2406 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" }, 2407 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" }, 2408 { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, 2409 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, 2410 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, 2411 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, 2412 { ATA_CMD_RESTORE, "RECALIBRATE" }, 2413 { 0, NULL } /* terminate list */ 2414 }; 2415 2416 unsigned int i; 2417 for (i = 0; cmd_descr[i].text; i++) 2418 if (cmd_descr[i].command == command) 2419 return cmd_descr[i].text; 2420 #endif 2421 2422 return NULL; 2423 } 2424 EXPORT_SYMBOL_GPL(ata_get_cmd_descript); 2425 2426 /** 2427 * ata_eh_link_report - report error handling to user 2428 * @link: ATA link EH is going on 2429 * 2430 * Report EH to user. 2431 * 2432 * LOCKING: 2433 * None. 2434 */ 2435 static void ata_eh_link_report(struct ata_link *link) 2436 { 2437 struct ata_port *ap = link->ap; 2438 struct ata_eh_context *ehc = &link->eh_context; 2439 const char *frozen, *desc; 2440 char tries_buf[6] = ""; 2441 int tag, nr_failed = 0; 2442 2443 if (ehc->i.flags & ATA_EHI_QUIET) 2444 return; 2445 2446 desc = NULL; 2447 if (ehc->i.desc[0] != '\0') 2448 desc = ehc->i.desc; 2449 2450 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2451 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2452 2453 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2454 ata_dev_phys_link(qc->dev) != link || 2455 ((qc->flags & ATA_QCFLAG_QUIET) && 2456 qc->err_mask == AC_ERR_DEV)) 2457 continue; 2458 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2459 continue; 2460 2461 nr_failed++; 2462 } 2463 2464 if (!nr_failed && !ehc->i.err_mask) 2465 return; 2466 2467 frozen = ""; 2468 if (ap->pflags & ATA_PFLAG_FROZEN) 2469 frozen = " frozen"; 2470 2471 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2472 snprintf(tries_buf, sizeof(tries_buf), " t%d", 2473 ap->eh_tries); 2474 2475 if (ehc->i.dev) { 2476 ata_dev_err(ehc->i.dev, "exception Emask 0x%x " 2477 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2478 ehc->i.err_mask, link->sactive, ehc->i.serror, 2479 ehc->i.action, frozen, tries_buf); 2480 if (desc) 2481 ata_dev_err(ehc->i.dev, "%s\n", desc); 2482 } else { 2483 ata_link_err(link, "exception Emask 0x%x " 2484 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2485 ehc->i.err_mask, link->sactive, ehc->i.serror, 2486 ehc->i.action, frozen, tries_buf); 2487 if (desc) 2488 ata_link_err(link, "%s\n", desc); 2489 } 2490 2491 #ifdef CONFIG_ATA_VERBOSE_ERROR 2492 if (ehc->i.serror) 2493 ata_link_err(link, 2494 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2495 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2496 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2497 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2498 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2499 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2500 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2501 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2502 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2503 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2504 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2505 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2506 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2507 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2508 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2509 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2510 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2511 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2512 #endif 2513 2514 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2515 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2516 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2517 char data_buf[20] = ""; 2518 char cdb_buf[70] = ""; 2519 2520 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2521 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2522 continue; 2523 2524 if (qc->dma_dir != DMA_NONE) { 2525 static const char *dma_str[] = { 2526 [DMA_BIDIRECTIONAL] = "bidi", 2527 [DMA_TO_DEVICE] = "out", 2528 [DMA_FROM_DEVICE] = "in", 2529 }; 2530 const char *prot_str = NULL; 2531 2532 switch (qc->tf.protocol) { 2533 case ATA_PROT_UNKNOWN: 2534 prot_str = "unknown"; 2535 break; 2536 case ATA_PROT_NODATA: 2537 prot_str = "nodata"; 2538 break; 2539 case ATA_PROT_PIO: 2540 prot_str = "pio"; 2541 break; 2542 case ATA_PROT_DMA: 2543 prot_str = "dma"; 2544 break; 2545 case ATA_PROT_NCQ: 2546 prot_str = "ncq dma"; 2547 break; 2548 case ATA_PROT_NCQ_NODATA: 2549 prot_str = "ncq nodata"; 2550 break; 2551 case ATAPI_PROT_NODATA: 2552 prot_str = "nodata"; 2553 break; 2554 case ATAPI_PROT_PIO: 2555 prot_str = "pio"; 2556 break; 2557 case ATAPI_PROT_DMA: 2558 prot_str = "dma"; 2559 break; 2560 } 2561 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2562 prot_str, qc->nbytes, dma_str[qc->dma_dir]); 2563 } 2564 2565 if (ata_is_atapi(qc->tf.protocol)) { 2566 const u8 *cdb = qc->cdb; 2567 size_t cdb_len = qc->dev->cdb_len; 2568 2569 if (qc->scsicmd) { 2570 cdb = qc->scsicmd->cmnd; 2571 cdb_len = qc->scsicmd->cmd_len; 2572 } 2573 __scsi_format_command(cdb_buf, sizeof(cdb_buf), 2574 cdb, cdb_len); 2575 } else { 2576 const char *descr = ata_get_cmd_descript(cmd->command); 2577 if (descr) 2578 ata_dev_err(qc->dev, "failed command: %s\n", 2579 descr); 2580 } 2581 2582 ata_dev_err(qc->dev, 2583 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2584 "tag %d%s\n %s" 2585 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2586 "Emask 0x%x (%s)%s\n", 2587 cmd->command, cmd->feature, cmd->nsect, 2588 cmd->lbal, cmd->lbam, cmd->lbah, 2589 cmd->hob_feature, cmd->hob_nsect, 2590 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2591 cmd->device, qc->tag, data_buf, cdb_buf, 2592 res->command, res->feature, res->nsect, 2593 res->lbal, res->lbam, res->lbah, 2594 res->hob_feature, res->hob_nsect, 2595 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2596 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2597 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2598 2599 #ifdef CONFIG_ATA_VERBOSE_ERROR 2600 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2601 ATA_SENSE | ATA_ERR)) { 2602 if (res->command & ATA_BUSY) 2603 ata_dev_err(qc->dev, "status: { Busy }\n"); 2604 else 2605 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", 2606 res->command & ATA_DRDY ? "DRDY " : "", 2607 res->command & ATA_DF ? "DF " : "", 2608 res->command & ATA_DRQ ? "DRQ " : "", 2609 res->command & ATA_SENSE ? "SENSE " : "", 2610 res->command & ATA_ERR ? "ERR " : ""); 2611 } 2612 2613 if (cmd->command != ATA_CMD_PACKET && 2614 (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF | 2615 ATA_IDNF | ATA_ABORTED))) 2616 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n", 2617 res->feature & ATA_ICRC ? "ICRC " : "", 2618 res->feature & ATA_UNC ? "UNC " : "", 2619 res->feature & ATA_AMNF ? "AMNF " : "", 2620 res->feature & ATA_IDNF ? "IDNF " : "", 2621 res->feature & ATA_ABORTED ? "ABRT " : ""); 2622 #endif 2623 } 2624 } 2625 2626 /** 2627 * ata_eh_report - report error handling to user 2628 * @ap: ATA port to report EH about 2629 * 2630 * Report EH to user. 2631 * 2632 * LOCKING: 2633 * None. 2634 */ 2635 void ata_eh_report(struct ata_port *ap) 2636 { 2637 struct ata_link *link; 2638 2639 ata_for_each_link(link, ap, HOST_FIRST) 2640 ata_eh_link_report(link); 2641 } 2642 2643 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2644 unsigned int *classes, unsigned long deadline, 2645 bool clear_classes) 2646 { 2647 struct ata_device *dev; 2648 2649 if (clear_classes) 2650 ata_for_each_dev(dev, link, ALL) 2651 classes[dev->devno] = ATA_DEV_UNKNOWN; 2652 2653 return reset(link, classes, deadline); 2654 } 2655 2656 static int ata_eh_followup_srst_needed(struct ata_link *link, int rc) 2657 { 2658 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2659 return 0; 2660 if (rc == -EAGAIN) 2661 return 1; 2662 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2663 return 1; 2664 return 0; 2665 } 2666 2667 int ata_eh_reset(struct ata_link *link, int classify, 2668 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2669 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2670 { 2671 struct ata_port *ap = link->ap; 2672 struct ata_link *slave = ap->slave_link; 2673 struct ata_eh_context *ehc = &link->eh_context; 2674 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; 2675 unsigned int *classes = ehc->classes; 2676 unsigned int lflags = link->flags; 2677 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2678 int max_tries = 0, try = 0; 2679 struct ata_link *failed_link; 2680 struct ata_device *dev; 2681 unsigned long deadline, now; 2682 ata_reset_fn_t reset; 2683 unsigned long flags; 2684 u32 sstatus; 2685 int nr_unknown, rc; 2686 2687 /* 2688 * Prepare to reset 2689 */ 2690 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2691 max_tries++; 2692 if (link->flags & ATA_LFLAG_RST_ONCE) 2693 max_tries = 1; 2694 if (link->flags & ATA_LFLAG_NO_HRST) 2695 hardreset = NULL; 2696 if (link->flags & ATA_LFLAG_NO_SRST) 2697 softreset = NULL; 2698 2699 /* make sure each reset attempt is at least COOL_DOWN apart */ 2700 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2701 now = jiffies; 2702 WARN_ON(time_after(ehc->last_reset, now)); 2703 deadline = ata_deadline(ehc->last_reset, 2704 ATA_EH_RESET_COOL_DOWN); 2705 if (time_before(now, deadline)) 2706 schedule_timeout_uninterruptible(deadline - now); 2707 } 2708 2709 spin_lock_irqsave(ap->lock, flags); 2710 ap->pflags |= ATA_PFLAG_RESETTING; 2711 spin_unlock_irqrestore(ap->lock, flags); 2712 2713 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2714 2715 ata_for_each_dev(dev, link, ALL) { 2716 /* If we issue an SRST then an ATA drive (not ATAPI) 2717 * may change configuration and be in PIO0 timing. If 2718 * we do a hard reset (or are coming from power on) 2719 * this is true for ATA or ATAPI. Until we've set a 2720 * suitable controller mode we should not touch the 2721 * bus as we may be talking too fast. 2722 */ 2723 dev->pio_mode = XFER_PIO_0; 2724 dev->dma_mode = 0xff; 2725 2726 /* If the controller has a pio mode setup function 2727 * then use it to set the chipset to rights. Don't 2728 * touch the DMA setup as that will be dealt with when 2729 * configuring devices. 2730 */ 2731 if (ap->ops->set_piomode) 2732 ap->ops->set_piomode(ap, dev); 2733 } 2734 2735 /* prefer hardreset */ 2736 reset = NULL; 2737 ehc->i.action &= ~ATA_EH_RESET; 2738 if (hardreset) { 2739 reset = hardreset; 2740 ehc->i.action |= ATA_EH_HARDRESET; 2741 } else if (softreset) { 2742 reset = softreset; 2743 ehc->i.action |= ATA_EH_SOFTRESET; 2744 } 2745 2746 if (prereset) { 2747 unsigned long deadline = ata_deadline(jiffies, 2748 ATA_EH_PRERESET_TIMEOUT); 2749 2750 if (slave) { 2751 sehc->i.action &= ~ATA_EH_RESET; 2752 sehc->i.action |= ehc->i.action; 2753 } 2754 2755 rc = prereset(link, deadline); 2756 2757 /* If present, do prereset on slave link too. Reset 2758 * is skipped iff both master and slave links report 2759 * -ENOENT or clear ATA_EH_RESET. 2760 */ 2761 if (slave && (rc == 0 || rc == -ENOENT)) { 2762 int tmp; 2763 2764 tmp = prereset(slave, deadline); 2765 if (tmp != -ENOENT) 2766 rc = tmp; 2767 2768 ehc->i.action |= sehc->i.action; 2769 } 2770 2771 if (rc) { 2772 if (rc == -ENOENT) { 2773 ata_link_dbg(link, "port disabled--ignoring\n"); 2774 ehc->i.action &= ~ATA_EH_RESET; 2775 2776 ata_for_each_dev(dev, link, ALL) 2777 classes[dev->devno] = ATA_DEV_NONE; 2778 2779 rc = 0; 2780 } else 2781 ata_link_err(link, 2782 "prereset failed (errno=%d)\n", 2783 rc); 2784 goto out; 2785 } 2786 2787 /* prereset() might have cleared ATA_EH_RESET. If so, 2788 * bang classes, thaw and return. 2789 */ 2790 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2791 ata_for_each_dev(dev, link, ALL) 2792 classes[dev->devno] = ATA_DEV_NONE; 2793 if ((ap->pflags & ATA_PFLAG_FROZEN) && 2794 ata_is_host_link(link)) 2795 ata_eh_thaw_port(ap); 2796 rc = 0; 2797 goto out; 2798 } 2799 } 2800 2801 retry: 2802 /* 2803 * Perform reset 2804 */ 2805 if (ata_is_host_link(link)) 2806 ata_eh_freeze_port(ap); 2807 2808 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2809 2810 if (reset) { 2811 if (verbose) 2812 ata_link_info(link, "%s resetting link\n", 2813 reset == softreset ? "soft" : "hard"); 2814 2815 /* mark that this EH session started with reset */ 2816 ehc->last_reset = jiffies; 2817 if (reset == hardreset) 2818 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2819 else 2820 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2821 2822 rc = ata_do_reset(link, reset, classes, deadline, true); 2823 if (rc && rc != -EAGAIN) { 2824 failed_link = link; 2825 goto fail; 2826 } 2827 2828 /* hardreset slave link if existent */ 2829 if (slave && reset == hardreset) { 2830 int tmp; 2831 2832 if (verbose) 2833 ata_link_info(slave, "hard resetting link\n"); 2834 2835 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2836 tmp = ata_do_reset(slave, reset, classes, deadline, 2837 false); 2838 switch (tmp) { 2839 case -EAGAIN: 2840 rc = -EAGAIN; 2841 case 0: 2842 break; 2843 default: 2844 failed_link = slave; 2845 rc = tmp; 2846 goto fail; 2847 } 2848 } 2849 2850 /* perform follow-up SRST if necessary */ 2851 if (reset == hardreset && 2852 ata_eh_followup_srst_needed(link, rc)) { 2853 reset = softreset; 2854 2855 if (!reset) { 2856 ata_link_err(link, 2857 "follow-up softreset required but no softreset available\n"); 2858 failed_link = link; 2859 rc = -EINVAL; 2860 goto fail; 2861 } 2862 2863 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2864 rc = ata_do_reset(link, reset, classes, deadline, true); 2865 if (rc) { 2866 failed_link = link; 2867 goto fail; 2868 } 2869 } 2870 } else { 2871 if (verbose) 2872 ata_link_info(link, 2873 "no reset method available, skipping reset\n"); 2874 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2875 lflags |= ATA_LFLAG_ASSUME_ATA; 2876 } 2877 2878 /* 2879 * Post-reset processing 2880 */ 2881 ata_for_each_dev(dev, link, ALL) { 2882 /* After the reset, the device state is PIO 0 and the 2883 * controller state is undefined. Reset also wakes up 2884 * drives from sleeping mode. 2885 */ 2886 dev->pio_mode = XFER_PIO_0; 2887 dev->flags &= ~ATA_DFLAG_SLEEPING; 2888 2889 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 2890 continue; 2891 2892 /* apply class override */ 2893 if (lflags & ATA_LFLAG_ASSUME_ATA) 2894 classes[dev->devno] = ATA_DEV_ATA; 2895 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2896 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2897 } 2898 2899 /* record current link speed */ 2900 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2901 link->sata_spd = (sstatus >> 4) & 0xf; 2902 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2903 slave->sata_spd = (sstatus >> 4) & 0xf; 2904 2905 /* thaw the port */ 2906 if (ata_is_host_link(link)) 2907 ata_eh_thaw_port(ap); 2908 2909 /* postreset() should clear hardware SError. Although SError 2910 * is cleared during link resume, clearing SError here is 2911 * necessary as some PHYs raise hotplug events after SRST. 2912 * This introduces race condition where hotplug occurs between 2913 * reset and here. This race is mediated by cross checking 2914 * link onlineness and classification result later. 2915 */ 2916 if (postreset) { 2917 postreset(link, classes); 2918 if (slave) 2919 postreset(slave, classes); 2920 } 2921 2922 /* 2923 * Some controllers can't be frozen very well and may set spurious 2924 * error conditions during reset. Clear accumulated error 2925 * information and re-thaw the port if frozen. As reset is the 2926 * final recovery action and we cross check link onlineness against 2927 * device classification later, no hotplug event is lost by this. 2928 */ 2929 spin_lock_irqsave(link->ap->lock, flags); 2930 memset(&link->eh_info, 0, sizeof(link->eh_info)); 2931 if (slave) 2932 memset(&slave->eh_info, 0, sizeof(link->eh_info)); 2933 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 2934 spin_unlock_irqrestore(link->ap->lock, flags); 2935 2936 if (ap->pflags & ATA_PFLAG_FROZEN) 2937 ata_eh_thaw_port(ap); 2938 2939 /* 2940 * Make sure onlineness and classification result correspond. 2941 * Hotplug could have happened during reset and some 2942 * controllers fail to wait while a drive is spinning up after 2943 * being hotplugged causing misdetection. By cross checking 2944 * link on/offlineness and classification result, those 2945 * conditions can be reliably detected and retried. 2946 */ 2947 nr_unknown = 0; 2948 ata_for_each_dev(dev, link, ALL) { 2949 if (ata_phys_link_online(ata_dev_phys_link(dev))) { 2950 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2951 ata_dev_dbg(dev, "link online but device misclassified\n"); 2952 classes[dev->devno] = ATA_DEV_NONE; 2953 nr_unknown++; 2954 } 2955 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2956 if (ata_class_enabled(classes[dev->devno])) 2957 ata_dev_dbg(dev, 2958 "link offline, clearing class %d to NONE\n", 2959 classes[dev->devno]); 2960 classes[dev->devno] = ATA_DEV_NONE; 2961 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2962 ata_dev_dbg(dev, 2963 "link status unknown, clearing UNKNOWN to NONE\n"); 2964 classes[dev->devno] = ATA_DEV_NONE; 2965 } 2966 } 2967 2968 if (classify && nr_unknown) { 2969 if (try < max_tries) { 2970 ata_link_warn(link, 2971 "link online but %d devices misclassified, retrying\n", 2972 nr_unknown); 2973 failed_link = link; 2974 rc = -EAGAIN; 2975 goto fail; 2976 } 2977 ata_link_warn(link, 2978 "link online but %d devices misclassified, " 2979 "device detection might fail\n", nr_unknown); 2980 } 2981 2982 /* reset successful, schedule revalidation */ 2983 ata_eh_done(link, NULL, ATA_EH_RESET); 2984 if (slave) 2985 ata_eh_done(slave, NULL, ATA_EH_RESET); 2986 ehc->last_reset = jiffies; /* update to completion time */ 2987 ehc->i.action |= ATA_EH_REVALIDATE; 2988 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ 2989 2990 rc = 0; 2991 out: 2992 /* clear hotplug flag */ 2993 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2994 if (slave) 2995 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2996 2997 spin_lock_irqsave(ap->lock, flags); 2998 ap->pflags &= ~ATA_PFLAG_RESETTING; 2999 spin_unlock_irqrestore(ap->lock, flags); 3000 3001 return rc; 3002 3003 fail: 3004 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 3005 if (!ata_is_host_link(link) && 3006 sata_scr_read(link, SCR_STATUS, &sstatus)) 3007 rc = -ERESTART; 3008 3009 if (try >= max_tries) { 3010 /* 3011 * Thaw host port even if reset failed, so that the port 3012 * can be retried on the next phy event. This risks 3013 * repeated EH runs but seems to be a better tradeoff than 3014 * shutting down a port after a botched hotplug attempt. 3015 */ 3016 if (ata_is_host_link(link)) 3017 ata_eh_thaw_port(ap); 3018 goto out; 3019 } 3020 3021 now = jiffies; 3022 if (time_before(now, deadline)) { 3023 unsigned long delta = deadline - now; 3024 3025 ata_link_warn(failed_link, 3026 "reset failed (errno=%d), retrying in %u secs\n", 3027 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 3028 3029 ata_eh_release(ap); 3030 while (delta) 3031 delta = schedule_timeout_uninterruptible(delta); 3032 ata_eh_acquire(ap); 3033 } 3034 3035 /* 3036 * While disks spinup behind PMP, some controllers fail sending SRST. 3037 * They need to be reset - as well as the PMP - before retrying. 3038 */ 3039 if (rc == -ERESTART) { 3040 if (ata_is_host_link(link)) 3041 ata_eh_thaw_port(ap); 3042 goto out; 3043 } 3044 3045 if (try == max_tries - 1) { 3046 sata_down_spd_limit(link, 0); 3047 if (slave) 3048 sata_down_spd_limit(slave, 0); 3049 } else if (rc == -EPIPE) 3050 sata_down_spd_limit(failed_link, 0); 3051 3052 if (hardreset) 3053 reset = hardreset; 3054 goto retry; 3055 } 3056 3057 static inline void ata_eh_pull_park_action(struct ata_port *ap) 3058 { 3059 struct ata_link *link; 3060 struct ata_device *dev; 3061 unsigned long flags; 3062 3063 /* 3064 * This function can be thought of as an extended version of 3065 * ata_eh_about_to_do() specially crafted to accommodate the 3066 * requirements of ATA_EH_PARK handling. Since the EH thread 3067 * does not leave the do {} while () loop in ata_eh_recover as 3068 * long as the timeout for a park request to *one* device on 3069 * the port has not expired, and since we still want to pick 3070 * up park requests to other devices on the same port or 3071 * timeout updates for the same device, we have to pull 3072 * ATA_EH_PARK actions from eh_info into eh_context.i 3073 * ourselves at the beginning of each pass over the loop. 3074 * 3075 * Additionally, all write accesses to &ap->park_req_pending 3076 * through reinit_completion() (see below) or complete_all() 3077 * (see ata_scsi_park_store()) are protected by the host lock. 3078 * As a result we have that park_req_pending.done is zero on 3079 * exit from this function, i.e. when ATA_EH_PARK actions for 3080 * *all* devices on port ap have been pulled into the 3081 * respective eh_context structs. If, and only if, 3082 * park_req_pending.done is non-zero by the time we reach 3083 * wait_for_completion_timeout(), another ATA_EH_PARK action 3084 * has been scheduled for at least one of the devices on port 3085 * ap and we have to cycle over the do {} while () loop in 3086 * ata_eh_recover() again. 3087 */ 3088 3089 spin_lock_irqsave(ap->lock, flags); 3090 reinit_completion(&ap->park_req_pending); 3091 ata_for_each_link(link, ap, EDGE) { 3092 ata_for_each_dev(dev, link, ALL) { 3093 struct ata_eh_info *ehi = &link->eh_info; 3094 3095 link->eh_context.i.dev_action[dev->devno] |= 3096 ehi->dev_action[dev->devno] & ATA_EH_PARK; 3097 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 3098 } 3099 } 3100 spin_unlock_irqrestore(ap->lock, flags); 3101 } 3102 3103 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 3104 { 3105 struct ata_eh_context *ehc = &dev->link->eh_context; 3106 struct ata_taskfile tf; 3107 unsigned int err_mask; 3108 3109 ata_tf_init(dev, &tf); 3110 if (park) { 3111 ehc->unloaded_mask |= 1 << dev->devno; 3112 tf.command = ATA_CMD_IDLEIMMEDIATE; 3113 tf.feature = 0x44; 3114 tf.lbal = 0x4c; 3115 tf.lbam = 0x4e; 3116 tf.lbah = 0x55; 3117 } else { 3118 ehc->unloaded_mask &= ~(1 << dev->devno); 3119 tf.command = ATA_CMD_CHK_POWER; 3120 } 3121 3122 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 3123 tf.protocol = ATA_PROT_NODATA; 3124 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3125 if (park && (err_mask || tf.lbal != 0xc4)) { 3126 ata_dev_err(dev, "head unload failed!\n"); 3127 ehc->unloaded_mask &= ~(1 << dev->devno); 3128 } 3129 } 3130 3131 static int ata_eh_revalidate_and_attach(struct ata_link *link, 3132 struct ata_device **r_failed_dev) 3133 { 3134 struct ata_port *ap = link->ap; 3135 struct ata_eh_context *ehc = &link->eh_context; 3136 struct ata_device *dev; 3137 unsigned int new_mask = 0; 3138 unsigned long flags; 3139 int rc = 0; 3140 3141 DPRINTK("ENTER\n"); 3142 3143 /* For PATA drive side cable detection to work, IDENTIFY must 3144 * be done backwards such that PDIAG- is released by the slave 3145 * device before the master device is identified. 3146 */ 3147 ata_for_each_dev(dev, link, ALL_REVERSE) { 3148 unsigned int action = ata_eh_dev_action(dev); 3149 unsigned int readid_flags = 0; 3150 3151 if (ehc->i.flags & ATA_EHI_DID_RESET) 3152 readid_flags |= ATA_READID_POSTRESET; 3153 3154 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 3155 WARN_ON(dev->class == ATA_DEV_PMP); 3156 3157 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 3158 rc = -EIO; 3159 goto err; 3160 } 3161 3162 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 3163 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 3164 readid_flags); 3165 if (rc) 3166 goto err; 3167 3168 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 3169 3170 /* Configuration may have changed, reconfigure 3171 * transfer mode. 3172 */ 3173 ehc->i.flags |= ATA_EHI_SETMODE; 3174 3175 /* schedule the scsi_rescan_device() here */ 3176 schedule_work(&(ap->scsi_rescan_task)); 3177 } else if (dev->class == ATA_DEV_UNKNOWN && 3178 ehc->tries[dev->devno] && 3179 ata_class_enabled(ehc->classes[dev->devno])) { 3180 /* Temporarily set dev->class, it will be 3181 * permanently set once all configurations are 3182 * complete. This is necessary because new 3183 * device configuration is done in two 3184 * separate loops. 3185 */ 3186 dev->class = ehc->classes[dev->devno]; 3187 3188 if (dev->class == ATA_DEV_PMP) 3189 rc = sata_pmp_attach(dev); 3190 else 3191 rc = ata_dev_read_id(dev, &dev->class, 3192 readid_flags, dev->id); 3193 3194 /* read_id might have changed class, store and reset */ 3195 ehc->classes[dev->devno] = dev->class; 3196 dev->class = ATA_DEV_UNKNOWN; 3197 3198 switch (rc) { 3199 case 0: 3200 /* clear error info accumulated during probe */ 3201 ata_ering_clear(&dev->ering); 3202 new_mask |= 1 << dev->devno; 3203 break; 3204 case -ENOENT: 3205 /* IDENTIFY was issued to non-existent 3206 * device. No need to reset. Just 3207 * thaw and ignore the device. 3208 */ 3209 ata_eh_thaw_port(ap); 3210 break; 3211 default: 3212 goto err; 3213 } 3214 } 3215 } 3216 3217 /* PDIAG- should have been released, ask cable type if post-reset */ 3218 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 3219 if (ap->ops->cable_detect) 3220 ap->cbl = ap->ops->cable_detect(ap); 3221 ata_force_cbl(ap); 3222 } 3223 3224 /* Configure new devices forward such that user doesn't see 3225 * device detection messages backwards. 3226 */ 3227 ata_for_each_dev(dev, link, ALL) { 3228 if (!(new_mask & (1 << dev->devno))) 3229 continue; 3230 3231 dev->class = ehc->classes[dev->devno]; 3232 3233 if (dev->class == ATA_DEV_PMP) 3234 continue; 3235 3236 ehc->i.flags |= ATA_EHI_PRINTINFO; 3237 rc = ata_dev_configure(dev); 3238 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 3239 if (rc) { 3240 dev->class = ATA_DEV_UNKNOWN; 3241 goto err; 3242 } 3243 3244 spin_lock_irqsave(ap->lock, flags); 3245 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 3246 spin_unlock_irqrestore(ap->lock, flags); 3247 3248 /* new device discovered, configure xfermode */ 3249 ehc->i.flags |= ATA_EHI_SETMODE; 3250 } 3251 3252 return 0; 3253 3254 err: 3255 *r_failed_dev = dev; 3256 DPRINTK("EXIT rc=%d\n", rc); 3257 return rc; 3258 } 3259 3260 /** 3261 * ata_set_mode - Program timings and issue SET FEATURES - XFER 3262 * @link: link on which timings will be programmed 3263 * @r_failed_dev: out parameter for failed device 3264 * 3265 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 3266 * ata_set_mode() fails, pointer to the failing device is 3267 * returned in @r_failed_dev. 3268 * 3269 * LOCKING: 3270 * PCI/etc. bus probe sem. 3271 * 3272 * RETURNS: 3273 * 0 on success, negative errno otherwise 3274 */ 3275 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 3276 { 3277 struct ata_port *ap = link->ap; 3278 struct ata_device *dev; 3279 int rc; 3280 3281 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 3282 ata_for_each_dev(dev, link, ENABLED) { 3283 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 3284 struct ata_ering_entry *ent; 3285 3286 ent = ata_ering_top(&dev->ering); 3287 if (ent) 3288 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 3289 } 3290 } 3291 3292 /* has private set_mode? */ 3293 if (ap->ops->set_mode) 3294 rc = ap->ops->set_mode(link, r_failed_dev); 3295 else 3296 rc = ata_do_set_mode(link, r_failed_dev); 3297 3298 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 3299 ata_for_each_dev(dev, link, ENABLED) { 3300 struct ata_eh_context *ehc = &link->eh_context; 3301 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 3302 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 3303 3304 if (dev->xfer_mode != saved_xfer_mode || 3305 ata_ncq_enabled(dev) != saved_ncq) 3306 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 3307 } 3308 3309 return rc; 3310 } 3311 3312 /** 3313 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 3314 * @dev: ATAPI device to clear UA for 3315 * 3316 * Resets and other operations can make an ATAPI device raise 3317 * UNIT ATTENTION which causes the next operation to fail. This 3318 * function clears UA. 3319 * 3320 * LOCKING: 3321 * EH context (may sleep). 3322 * 3323 * RETURNS: 3324 * 0 on success, -errno on failure. 3325 */ 3326 static int atapi_eh_clear_ua(struct ata_device *dev) 3327 { 3328 int i; 3329 3330 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 3331 u8 *sense_buffer = dev->link->ap->sector_buf; 3332 u8 sense_key = 0; 3333 unsigned int err_mask; 3334 3335 err_mask = atapi_eh_tur(dev, &sense_key); 3336 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 3337 ata_dev_warn(dev, 3338 "TEST_UNIT_READY failed (err_mask=0x%x)\n", 3339 err_mask); 3340 return -EIO; 3341 } 3342 3343 if (!err_mask || sense_key != UNIT_ATTENTION) 3344 return 0; 3345 3346 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 3347 if (err_mask) { 3348 ata_dev_warn(dev, "failed to clear " 3349 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 3350 return -EIO; 3351 } 3352 } 3353 3354 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n", 3355 ATA_EH_UA_TRIES); 3356 3357 return 0; 3358 } 3359 3360 /** 3361 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary 3362 * @dev: ATA device which may need FLUSH retry 3363 * 3364 * If @dev failed FLUSH, it needs to be reported upper layer 3365 * immediately as it means that @dev failed to remap and already 3366 * lost at least a sector and further FLUSH retrials won't make 3367 * any difference to the lost sector. However, if FLUSH failed 3368 * for other reasons, for example transmission error, FLUSH needs 3369 * to be retried. 3370 * 3371 * This function determines whether FLUSH failure retry is 3372 * necessary and performs it if so. 3373 * 3374 * RETURNS: 3375 * 0 if EH can continue, -errno if EH needs to be repeated. 3376 */ 3377 static int ata_eh_maybe_retry_flush(struct ata_device *dev) 3378 { 3379 struct ata_link *link = dev->link; 3380 struct ata_port *ap = link->ap; 3381 struct ata_queued_cmd *qc; 3382 struct ata_taskfile tf; 3383 unsigned int err_mask; 3384 int rc = 0; 3385 3386 /* did flush fail for this device? */ 3387 if (!ata_tag_valid(link->active_tag)) 3388 return 0; 3389 3390 qc = __ata_qc_from_tag(ap, link->active_tag); 3391 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && 3392 qc->tf.command != ATA_CMD_FLUSH)) 3393 return 0; 3394 3395 /* if the device failed it, it should be reported to upper layers */ 3396 if (qc->err_mask & AC_ERR_DEV) 3397 return 0; 3398 3399 /* flush failed for some other reason, give it another shot */ 3400 ata_tf_init(dev, &tf); 3401 3402 tf.command = qc->tf.command; 3403 tf.flags |= ATA_TFLAG_DEVICE; 3404 tf.protocol = ATA_PROT_NODATA; 3405 3406 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n", 3407 tf.command, qc->err_mask); 3408 3409 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3410 if (!err_mask) { 3411 /* 3412 * FLUSH is complete but there's no way to 3413 * successfully complete a failed command from EH. 3414 * Making sure retry is allowed at least once and 3415 * retrying it should do the trick - whatever was in 3416 * the cache is already on the platter and this won't 3417 * cause infinite loop. 3418 */ 3419 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); 3420 } else { 3421 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n", 3422 err_mask); 3423 rc = -EIO; 3424 3425 /* if device failed it, report it to upper layers */ 3426 if (err_mask & AC_ERR_DEV) { 3427 qc->err_mask |= AC_ERR_DEV; 3428 qc->result_tf = tf; 3429 if (!(ap->pflags & ATA_PFLAG_FROZEN)) 3430 rc = 0; 3431 } 3432 } 3433 return rc; 3434 } 3435 3436 /** 3437 * ata_eh_set_lpm - configure SATA interface power management 3438 * @link: link to configure power management 3439 * @policy: the link power management policy 3440 * @r_failed_dev: out parameter for failed device 3441 * 3442 * Enable SATA Interface power management. This will enable 3443 * Device Interface Power Management (DIPM) for min_power and 3444 * medium_power_with_dipm policies, and then call driver specific 3445 * callbacks for enabling Host Initiated Power management. 3446 * 3447 * LOCKING: 3448 * EH context. 3449 * 3450 * RETURNS: 3451 * 0 on success, -errno on failure. 3452 */ 3453 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, 3454 struct ata_device **r_failed_dev) 3455 { 3456 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; 3457 struct ata_eh_context *ehc = &link->eh_context; 3458 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; 3459 enum ata_lpm_policy old_policy = link->lpm_policy; 3460 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; 3461 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; 3462 unsigned int err_mask; 3463 int rc; 3464 3465 /* if the link or host doesn't do LPM, noop */ 3466 if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) 3467 return 0; 3468 3469 /* 3470 * DIPM is enabled only for MIN_POWER as some devices 3471 * misbehave when the host NACKs transition to SLUMBER. Order 3472 * device and link configurations such that the host always 3473 * allows DIPM requests. 3474 */ 3475 ata_for_each_dev(dev, link, ENABLED) { 3476 bool hipm = ata_id_has_hipm(dev->id); 3477 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm; 3478 3479 /* find the first enabled and LPM enabled devices */ 3480 if (!link_dev) 3481 link_dev = dev; 3482 3483 if (!lpm_dev && (hipm || dipm)) 3484 lpm_dev = dev; 3485 3486 hints &= ~ATA_LPM_EMPTY; 3487 if (!hipm) 3488 hints &= ~ATA_LPM_HIPM; 3489 3490 /* disable DIPM before changing link config */ 3491 if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) { 3492 err_mask = ata_dev_set_feature(dev, 3493 SETFEATURES_SATA_DISABLE, SATA_DIPM); 3494 if (err_mask && err_mask != AC_ERR_DEV) { 3495 ata_dev_warn(dev, 3496 "failed to disable DIPM, Emask 0x%x\n", 3497 err_mask); 3498 rc = -EIO; 3499 goto fail; 3500 } 3501 } 3502 } 3503 3504 if (ap) { 3505 rc = ap->ops->set_lpm(link, policy, hints); 3506 if (!rc && ap->slave_link) 3507 rc = ap->ops->set_lpm(ap->slave_link, policy, hints); 3508 } else 3509 rc = sata_pmp_set_lpm(link, policy, hints); 3510 3511 /* 3512 * Attribute link config failure to the first (LPM) enabled 3513 * device on the link. 3514 */ 3515 if (rc) { 3516 if (rc == -EOPNOTSUPP) { 3517 link->flags |= ATA_LFLAG_NO_LPM; 3518 return 0; 3519 } 3520 dev = lpm_dev ? lpm_dev : link_dev; 3521 goto fail; 3522 } 3523 3524 /* 3525 * Low level driver acked the transition. Issue DIPM command 3526 * with the new policy set. 3527 */ 3528 link->lpm_policy = policy; 3529 if (ap && ap->slave_link) 3530 ap->slave_link->lpm_policy = policy; 3531 3532 /* host config updated, enable DIPM if transitioning to MIN_POWER */ 3533 ata_for_each_dev(dev, link, ENABLED) { 3534 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm && 3535 ata_id_has_dipm(dev->id)) { 3536 err_mask = ata_dev_set_feature(dev, 3537 SETFEATURES_SATA_ENABLE, SATA_DIPM); 3538 if (err_mask && err_mask != AC_ERR_DEV) { 3539 ata_dev_warn(dev, 3540 "failed to enable DIPM, Emask 0x%x\n", 3541 err_mask); 3542 rc = -EIO; 3543 goto fail; 3544 } 3545 } 3546 } 3547 3548 link->last_lpm_change = jiffies; 3549 link->flags |= ATA_LFLAG_CHANGED; 3550 3551 return 0; 3552 3553 fail: 3554 /* restore the old policy */ 3555 link->lpm_policy = old_policy; 3556 if (ap && ap->slave_link) 3557 ap->slave_link->lpm_policy = old_policy; 3558 3559 /* if no device or only one more chance is left, disable LPM */ 3560 if (!dev || ehc->tries[dev->devno] <= 2) { 3561 ata_link_warn(link, "disabling LPM on the link\n"); 3562 link->flags |= ATA_LFLAG_NO_LPM; 3563 } 3564 if (r_failed_dev) 3565 *r_failed_dev = dev; 3566 return rc; 3567 } 3568 3569 int ata_link_nr_enabled(struct ata_link *link) 3570 { 3571 struct ata_device *dev; 3572 int cnt = 0; 3573 3574 ata_for_each_dev(dev, link, ENABLED) 3575 cnt++; 3576 return cnt; 3577 } 3578 3579 static int ata_link_nr_vacant(struct ata_link *link) 3580 { 3581 struct ata_device *dev; 3582 int cnt = 0; 3583 3584 ata_for_each_dev(dev, link, ALL) 3585 if (dev->class == ATA_DEV_UNKNOWN) 3586 cnt++; 3587 return cnt; 3588 } 3589 3590 static int ata_eh_skip_recovery(struct ata_link *link) 3591 { 3592 struct ata_port *ap = link->ap; 3593 struct ata_eh_context *ehc = &link->eh_context; 3594 struct ata_device *dev; 3595 3596 /* skip disabled links */ 3597 if (link->flags & ATA_LFLAG_DISABLED) 3598 return 1; 3599 3600 /* skip if explicitly requested */ 3601 if (ehc->i.flags & ATA_EHI_NO_RECOVERY) 3602 return 1; 3603 3604 /* thaw frozen port and recover failed devices */ 3605 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 3606 return 0; 3607 3608 /* reset at least once if reset is requested */ 3609 if ((ehc->i.action & ATA_EH_RESET) && 3610 !(ehc->i.flags & ATA_EHI_DID_RESET)) 3611 return 0; 3612 3613 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 3614 ata_for_each_dev(dev, link, ALL) { 3615 if (dev->class == ATA_DEV_UNKNOWN && 3616 ehc->classes[dev->devno] != ATA_DEV_NONE) 3617 return 0; 3618 } 3619 3620 return 1; 3621 } 3622 3623 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3624 { 3625 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3626 u64 now = get_jiffies_64(); 3627 int *trials = void_arg; 3628 3629 if ((ent->eflags & ATA_EFLAG_OLD_ER) || 3630 (ent->timestamp < now - min(now, interval))) 3631 return -1; 3632 3633 (*trials)++; 3634 return 0; 3635 } 3636 3637 static int ata_eh_schedule_probe(struct ata_device *dev) 3638 { 3639 struct ata_eh_context *ehc = &dev->link->eh_context; 3640 struct ata_link *link = ata_dev_phys_link(dev); 3641 int trials = 0; 3642 3643 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3644 (ehc->did_probe_mask & (1 << dev->devno))) 3645 return 0; 3646 3647 ata_eh_detach_dev(dev); 3648 ata_dev_init(dev); 3649 ehc->did_probe_mask |= (1 << dev->devno); 3650 ehc->i.action |= ATA_EH_RESET; 3651 ehc->saved_xfer_mode[dev->devno] = 0; 3652 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3653 3654 /* the link maybe in a deep sleep, wake it up */ 3655 if (link->lpm_policy > ATA_LPM_MAX_POWER) { 3656 if (ata_is_host_link(link)) 3657 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, 3658 ATA_LPM_EMPTY); 3659 else 3660 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER, 3661 ATA_LPM_EMPTY); 3662 } 3663 3664 /* Record and count probe trials on the ering. The specific 3665 * error mask used is irrelevant. Because a successful device 3666 * detection clears the ering, this count accumulates only if 3667 * there are consecutive failed probes. 3668 * 3669 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3670 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3671 * forced to 1.5Gbps. 3672 * 3673 * This is to work around cases where failed link speed 3674 * negotiation results in device misdetection leading to 3675 * infinite DEVXCHG or PHRDY CHG events. 3676 */ 3677 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3678 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3679 3680 if (trials > ATA_EH_PROBE_TRIALS) 3681 sata_down_spd_limit(link, 1); 3682 3683 return 1; 3684 } 3685 3686 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3687 { 3688 struct ata_eh_context *ehc = &dev->link->eh_context; 3689 3690 /* -EAGAIN from EH routine indicates retry without prejudice. 3691 * The requester is responsible for ensuring forward progress. 3692 */ 3693 if (err != -EAGAIN) 3694 ehc->tries[dev->devno]--; 3695 3696 switch (err) { 3697 case -ENODEV: 3698 /* device missing or wrong IDENTIFY data, schedule probing */ 3699 ehc->i.probe_mask |= (1 << dev->devno); 3700 /* fall through */ 3701 case -EINVAL: 3702 /* give it just one more chance */ 3703 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3704 /* fall through */ 3705 case -EIO: 3706 if (ehc->tries[dev->devno] == 1) { 3707 /* This is the last chance, better to slow 3708 * down than lose it. 3709 */ 3710 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3711 if (dev->pio_mode > XFER_PIO_0) 3712 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3713 } 3714 } 3715 3716 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3717 /* disable device if it has used up all its chances */ 3718 ata_dev_disable(dev); 3719 3720 /* detach if offline */ 3721 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3722 ata_eh_detach_dev(dev); 3723 3724 /* schedule probe if necessary */ 3725 if (ata_eh_schedule_probe(dev)) { 3726 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3727 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3728 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3729 } 3730 3731 return 1; 3732 } else { 3733 ehc->i.action |= ATA_EH_RESET; 3734 return 0; 3735 } 3736 } 3737 3738 /** 3739 * ata_eh_recover - recover host port after error 3740 * @ap: host port to recover 3741 * @prereset: prereset method (can be NULL) 3742 * @softreset: softreset method (can be NULL) 3743 * @hardreset: hardreset method (can be NULL) 3744 * @postreset: postreset method (can be NULL) 3745 * @r_failed_link: out parameter for failed link 3746 * 3747 * This is the alpha and omega, eum and yang, heart and soul of 3748 * libata exception handling. On entry, actions required to 3749 * recover each link and hotplug requests are recorded in the 3750 * link's eh_context. This function executes all the operations 3751 * with appropriate retrials and fallbacks to resurrect failed 3752 * devices, detach goners and greet newcomers. 3753 * 3754 * LOCKING: 3755 * Kernel thread context (may sleep). 3756 * 3757 * RETURNS: 3758 * 0 on success, -errno on failure. 3759 */ 3760 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3761 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3762 ata_postreset_fn_t postreset, 3763 struct ata_link **r_failed_link) 3764 { 3765 struct ata_link *link; 3766 struct ata_device *dev; 3767 int rc, nr_fails; 3768 unsigned long flags, deadline; 3769 3770 DPRINTK("ENTER\n"); 3771 3772 /* prep for recovery */ 3773 ata_for_each_link(link, ap, EDGE) { 3774 struct ata_eh_context *ehc = &link->eh_context; 3775 3776 /* re-enable link? */ 3777 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3778 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3779 spin_lock_irqsave(ap->lock, flags); 3780 link->flags &= ~ATA_LFLAG_DISABLED; 3781 spin_unlock_irqrestore(ap->lock, flags); 3782 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3783 } 3784 3785 ata_for_each_dev(dev, link, ALL) { 3786 if (link->flags & ATA_LFLAG_NO_RETRY) 3787 ehc->tries[dev->devno] = 1; 3788 else 3789 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3790 3791 /* collect port action mask recorded in dev actions */ 3792 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3793 ~ATA_EH_PERDEV_MASK; 3794 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3795 3796 /* process hotplug request */ 3797 if (dev->flags & ATA_DFLAG_DETACH) 3798 ata_eh_detach_dev(dev); 3799 3800 /* schedule probe if necessary */ 3801 if (!ata_dev_enabled(dev)) 3802 ata_eh_schedule_probe(dev); 3803 } 3804 } 3805 3806 retry: 3807 rc = 0; 3808 3809 /* if UNLOADING, finish immediately */ 3810 if (ap->pflags & ATA_PFLAG_UNLOADING) 3811 goto out; 3812 3813 /* prep for EH */ 3814 ata_for_each_link(link, ap, EDGE) { 3815 struct ata_eh_context *ehc = &link->eh_context; 3816 3817 /* skip EH if possible. */ 3818 if (ata_eh_skip_recovery(link)) 3819 ehc->i.action = 0; 3820 3821 ata_for_each_dev(dev, link, ALL) 3822 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3823 } 3824 3825 /* reset */ 3826 ata_for_each_link(link, ap, EDGE) { 3827 struct ata_eh_context *ehc = &link->eh_context; 3828 3829 if (!(ehc->i.action & ATA_EH_RESET)) 3830 continue; 3831 3832 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3833 prereset, softreset, hardreset, postreset); 3834 if (rc) { 3835 ata_link_err(link, "reset failed, giving up\n"); 3836 goto out; 3837 } 3838 } 3839 3840 do { 3841 unsigned long now; 3842 3843 /* 3844 * clears ATA_EH_PARK in eh_info and resets 3845 * ap->park_req_pending 3846 */ 3847 ata_eh_pull_park_action(ap); 3848 3849 deadline = jiffies; 3850 ata_for_each_link(link, ap, EDGE) { 3851 ata_for_each_dev(dev, link, ALL) { 3852 struct ata_eh_context *ehc = &link->eh_context; 3853 unsigned long tmp; 3854 3855 if (dev->class != ATA_DEV_ATA && 3856 dev->class != ATA_DEV_ZAC) 3857 continue; 3858 if (!(ehc->i.dev_action[dev->devno] & 3859 ATA_EH_PARK)) 3860 continue; 3861 tmp = dev->unpark_deadline; 3862 if (time_before(deadline, tmp)) 3863 deadline = tmp; 3864 else if (time_before_eq(tmp, jiffies)) 3865 continue; 3866 if (ehc->unloaded_mask & (1 << dev->devno)) 3867 continue; 3868 3869 ata_eh_park_issue_cmd(dev, 1); 3870 } 3871 } 3872 3873 now = jiffies; 3874 if (time_before_eq(deadline, now)) 3875 break; 3876 3877 ata_eh_release(ap); 3878 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3879 deadline - now); 3880 ata_eh_acquire(ap); 3881 } while (deadline); 3882 ata_for_each_link(link, ap, EDGE) { 3883 ata_for_each_dev(dev, link, ALL) { 3884 if (!(link->eh_context.unloaded_mask & 3885 (1 << dev->devno))) 3886 continue; 3887 3888 ata_eh_park_issue_cmd(dev, 0); 3889 ata_eh_done(link, dev, ATA_EH_PARK); 3890 } 3891 } 3892 3893 /* the rest */ 3894 nr_fails = 0; 3895 ata_for_each_link(link, ap, PMP_FIRST) { 3896 struct ata_eh_context *ehc = &link->eh_context; 3897 3898 if (sata_pmp_attached(ap) && ata_is_host_link(link)) 3899 goto config_lpm; 3900 3901 /* revalidate existing devices and attach new ones */ 3902 rc = ata_eh_revalidate_and_attach(link, &dev); 3903 if (rc) 3904 goto rest_fail; 3905 3906 /* if PMP got attached, return, pmp EH will take care of it */ 3907 if (link->device->class == ATA_DEV_PMP) { 3908 ehc->i.action = 0; 3909 return 0; 3910 } 3911 3912 /* configure transfer mode if necessary */ 3913 if (ehc->i.flags & ATA_EHI_SETMODE) { 3914 rc = ata_set_mode(link, &dev); 3915 if (rc) 3916 goto rest_fail; 3917 ehc->i.flags &= ~ATA_EHI_SETMODE; 3918 } 3919 3920 /* If reset has been issued, clear UA to avoid 3921 * disrupting the current users of the device. 3922 */ 3923 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3924 ata_for_each_dev(dev, link, ALL) { 3925 if (dev->class != ATA_DEV_ATAPI) 3926 continue; 3927 rc = atapi_eh_clear_ua(dev); 3928 if (rc) 3929 goto rest_fail; 3930 if (zpodd_dev_enabled(dev)) 3931 zpodd_post_poweron(dev); 3932 } 3933 } 3934 3935 /* retry flush if necessary */ 3936 ata_for_each_dev(dev, link, ALL) { 3937 if (dev->class != ATA_DEV_ATA && 3938 dev->class != ATA_DEV_ZAC) 3939 continue; 3940 rc = ata_eh_maybe_retry_flush(dev); 3941 if (rc) 3942 goto rest_fail; 3943 } 3944 3945 config_lpm: 3946 /* configure link power saving */ 3947 if (link->lpm_policy != ap->target_lpm_policy) { 3948 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev); 3949 if (rc) 3950 goto rest_fail; 3951 } 3952 3953 /* this link is okay now */ 3954 ehc->i.flags = 0; 3955 continue; 3956 3957 rest_fail: 3958 nr_fails++; 3959 if (dev) 3960 ata_eh_handle_dev_fail(dev, rc); 3961 3962 if (ap->pflags & ATA_PFLAG_FROZEN) { 3963 /* PMP reset requires working host port. 3964 * Can't retry if it's frozen. 3965 */ 3966 if (sata_pmp_attached(ap)) 3967 goto out; 3968 break; 3969 } 3970 } 3971 3972 if (nr_fails) 3973 goto retry; 3974 3975 out: 3976 if (rc && r_failed_link) 3977 *r_failed_link = link; 3978 3979 DPRINTK("EXIT, rc=%d\n", rc); 3980 return rc; 3981 } 3982 3983 /** 3984 * ata_eh_finish - finish up EH 3985 * @ap: host port to finish EH for 3986 * 3987 * Recovery is complete. Clean up EH states and retry or finish 3988 * failed qcs. 3989 * 3990 * LOCKING: 3991 * None. 3992 */ 3993 void ata_eh_finish(struct ata_port *ap) 3994 { 3995 int tag; 3996 3997 /* retry or finish qcs */ 3998 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 3999 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 4000 4001 if (!(qc->flags & ATA_QCFLAG_FAILED)) 4002 continue; 4003 4004 if (qc->err_mask) { 4005 /* FIXME: Once EH migration is complete, 4006 * generate sense data in this function, 4007 * considering both err_mask and tf. 4008 */ 4009 if (qc->flags & ATA_QCFLAG_RETRY) 4010 ata_eh_qc_retry(qc); 4011 else 4012 ata_eh_qc_complete(qc); 4013 } else { 4014 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 4015 ata_eh_qc_complete(qc); 4016 } else { 4017 /* feed zero TF to sense generation */ 4018 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 4019 ata_eh_qc_retry(qc); 4020 } 4021 } 4022 } 4023 4024 /* make sure nr_active_links is zero after EH */ 4025 WARN_ON(ap->nr_active_links); 4026 ap->nr_active_links = 0; 4027 } 4028 4029 /** 4030 * ata_do_eh - do standard error handling 4031 * @ap: host port to handle error for 4032 * 4033 * @prereset: prereset method (can be NULL) 4034 * @softreset: softreset method (can be NULL) 4035 * @hardreset: hardreset method (can be NULL) 4036 * @postreset: postreset method (can be NULL) 4037 * 4038 * Perform standard error handling sequence. 4039 * 4040 * LOCKING: 4041 * Kernel thread context (may sleep). 4042 */ 4043 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 4044 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 4045 ata_postreset_fn_t postreset) 4046 { 4047 struct ata_device *dev; 4048 int rc; 4049 4050 ata_eh_autopsy(ap); 4051 ata_eh_report(ap); 4052 4053 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 4054 NULL); 4055 if (rc) { 4056 ata_for_each_dev(dev, &ap->link, ALL) 4057 ata_dev_disable(dev); 4058 } 4059 4060 ata_eh_finish(ap); 4061 } 4062 4063 /** 4064 * ata_std_error_handler - standard error handler 4065 * @ap: host port to handle error for 4066 * 4067 * Standard error handler 4068 * 4069 * LOCKING: 4070 * Kernel thread context (may sleep). 4071 */ 4072 void ata_std_error_handler(struct ata_port *ap) 4073 { 4074 struct ata_port_operations *ops = ap->ops; 4075 ata_reset_fn_t hardreset = ops->hardreset; 4076 4077 /* ignore built-in hardreset if SCR access is not available */ 4078 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link)) 4079 hardreset = NULL; 4080 4081 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 4082 } 4083 4084 #ifdef CONFIG_PM 4085 /** 4086 * ata_eh_handle_port_suspend - perform port suspend operation 4087 * @ap: port to suspend 4088 * 4089 * Suspend @ap. 4090 * 4091 * LOCKING: 4092 * Kernel thread context (may sleep). 4093 */ 4094 static void ata_eh_handle_port_suspend(struct ata_port *ap) 4095 { 4096 unsigned long flags; 4097 int rc = 0; 4098 struct ata_device *dev; 4099 4100 /* are we suspending? */ 4101 spin_lock_irqsave(ap->lock, flags); 4102 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4103 ap->pm_mesg.event & PM_EVENT_RESUME) { 4104 spin_unlock_irqrestore(ap->lock, flags); 4105 return; 4106 } 4107 spin_unlock_irqrestore(ap->lock, flags); 4108 4109 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 4110 4111 /* 4112 * If we have a ZPODD attached, check its zero 4113 * power ready status before the port is frozen. 4114 * Only needed for runtime suspend. 4115 */ 4116 if (PMSG_IS_AUTO(ap->pm_mesg)) { 4117 ata_for_each_dev(dev, &ap->link, ENABLED) { 4118 if (zpodd_dev_enabled(dev)) 4119 zpodd_on_suspend(dev); 4120 } 4121 } 4122 4123 /* tell ACPI we're suspending */ 4124 rc = ata_acpi_on_suspend(ap); 4125 if (rc) 4126 goto out; 4127 4128 /* suspend */ 4129 ata_eh_freeze_port(ap); 4130 4131 if (ap->ops->port_suspend) 4132 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 4133 4134 ata_acpi_set_state(ap, ap->pm_mesg); 4135 out: 4136 /* update the flags */ 4137 spin_lock_irqsave(ap->lock, flags); 4138 4139 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 4140 if (rc == 0) 4141 ap->pflags |= ATA_PFLAG_SUSPENDED; 4142 else if (ap->pflags & ATA_PFLAG_FROZEN) 4143 ata_port_schedule_eh(ap); 4144 4145 spin_unlock_irqrestore(ap->lock, flags); 4146 4147 return; 4148 } 4149 4150 /** 4151 * ata_eh_handle_port_resume - perform port resume operation 4152 * @ap: port to resume 4153 * 4154 * Resume @ap. 4155 * 4156 * LOCKING: 4157 * Kernel thread context (may sleep). 4158 */ 4159 static void ata_eh_handle_port_resume(struct ata_port *ap) 4160 { 4161 struct ata_link *link; 4162 struct ata_device *dev; 4163 unsigned long flags; 4164 4165 /* are we resuming? */ 4166 spin_lock_irqsave(ap->lock, flags); 4167 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4168 !(ap->pm_mesg.event & PM_EVENT_RESUME)) { 4169 spin_unlock_irqrestore(ap->lock, flags); 4170 return; 4171 } 4172 spin_unlock_irqrestore(ap->lock, flags); 4173 4174 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 4175 4176 /* 4177 * Error timestamps are in jiffies which doesn't run while 4178 * suspended and PHY events during resume isn't too uncommon. 4179 * When the two are combined, it can lead to unnecessary speed 4180 * downs if the machine is suspended and resumed repeatedly. 4181 * Clear error history. 4182 */ 4183 ata_for_each_link(link, ap, HOST_FIRST) 4184 ata_for_each_dev(dev, link, ALL) 4185 ata_ering_clear(&dev->ering); 4186 4187 ata_acpi_set_state(ap, ap->pm_mesg); 4188 4189 if (ap->ops->port_resume) 4190 ap->ops->port_resume(ap); 4191 4192 /* tell ACPI that we're resuming */ 4193 ata_acpi_on_resume(ap); 4194 4195 /* update the flags */ 4196 spin_lock_irqsave(ap->lock, flags); 4197 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 4198 spin_unlock_irqrestore(ap->lock, flags); 4199 } 4200 #endif /* CONFIG_PM */ 4201