1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Tejun Heo <tj@kernel.org> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/driver-api/libata.rst 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/blkdev.h> 37 #include <linux/export.h> 38 #include <linux/pci.h> 39 #include <scsi/scsi.h> 40 #include <scsi/scsi_host.h> 41 #include <scsi/scsi_eh.h> 42 #include <scsi/scsi_device.h> 43 #include <scsi/scsi_cmnd.h> 44 #include <scsi/scsi_dbg.h> 45 #include "../scsi/scsi_transport_api.h" 46 47 #include <linux/libata.h> 48 49 #include <trace/events/libata.h> 50 #include "libata.h" 51 52 enum { 53 /* speed down verdicts */ 54 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 55 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 56 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 57 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 58 59 /* error flags */ 60 ATA_EFLAG_IS_IO = (1 << 0), 61 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 62 ATA_EFLAG_OLD_ER = (1 << 31), 63 64 /* error categories */ 65 ATA_ECAT_NONE = 0, 66 ATA_ECAT_ATA_BUS = 1, 67 ATA_ECAT_TOUT_HSM = 2, 68 ATA_ECAT_UNK_DEV = 3, 69 ATA_ECAT_DUBIOUS_NONE = 4, 70 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 71 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 72 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 73 ATA_ECAT_NR = 8, 74 75 ATA_EH_CMD_DFL_TIMEOUT = 5000, 76 77 /* always put at least this amount of time between resets */ 78 ATA_EH_RESET_COOL_DOWN = 5000, 79 80 /* Waiting in ->prereset can never be reliable. It's 81 * sometimes nice to wait there but it can't be depended upon; 82 * otherwise, we wouldn't be resetting. Just give it enough 83 * time for most drives to spin up. 84 */ 85 ATA_EH_PRERESET_TIMEOUT = 10000, 86 ATA_EH_FASTDRAIN_INTERVAL = 3000, 87 88 ATA_EH_UA_TRIES = 5, 89 90 /* probe speed down parameters, see ata_eh_schedule_probe() */ 91 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 92 ATA_EH_PROBE_TRIALS = 2, 93 }; 94 95 /* The following table determines how we sequence resets. Each entry 96 * represents timeout for that try. The first try can be soft or 97 * hardreset. All others are hardreset if available. In most cases 98 * the first reset w/ 10sec timeout should succeed. Following entries 99 * are mostly for error handling, hotplug and those outlier devices that 100 * take an exceptionally long time to recover from reset. 101 */ 102 static const unsigned long ata_eh_reset_timeouts[] = { 103 10000, /* most drives spin up by 10sec */ 104 10000, /* > 99% working drives spin up before 20sec */ 105 35000, /* give > 30 secs of idleness for outlier devices */ 106 5000, /* and sweet one last chance */ 107 ULONG_MAX, /* > 1 min has elapsed, give up */ 108 }; 109 110 static const unsigned long ata_eh_identify_timeouts[] = { 111 5000, /* covers > 99% of successes and not too boring on failures */ 112 10000, /* combined time till here is enough even for media access */ 113 30000, /* for true idiots */ 114 ULONG_MAX, 115 }; 116 117 static const unsigned long ata_eh_flush_timeouts[] = { 118 15000, /* be generous with flush */ 119 15000, /* ditto */ 120 30000, /* and even more generous */ 121 ULONG_MAX, 122 }; 123 124 static const unsigned long ata_eh_other_timeouts[] = { 125 5000, /* same rationale as identify timeout */ 126 10000, /* ditto */ 127 /* but no merciful 30sec for other commands, it just isn't worth it */ 128 ULONG_MAX, 129 }; 130 131 struct ata_eh_cmd_timeout_ent { 132 const u8 *commands; 133 const unsigned long *timeouts; 134 }; 135 136 /* The following table determines timeouts to use for EH internal 137 * commands. Each table entry is a command class and matches the 138 * commands the entry applies to and the timeout table to use. 139 * 140 * On the retry after a command timed out, the next timeout value from 141 * the table is used. If the table doesn't contain further entries, 142 * the last value is used. 143 * 144 * ehc->cmd_timeout_idx keeps track of which timeout to use per 145 * command class, so if SET_FEATURES times out on the first try, the 146 * next try will use the second timeout value only for that class. 147 */ 148 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 149 static const struct ata_eh_cmd_timeout_ent 150 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 151 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 152 .timeouts = ata_eh_identify_timeouts, }, 153 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 154 .timeouts = ata_eh_other_timeouts, }, 155 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 156 .timeouts = ata_eh_other_timeouts, }, 157 { .commands = CMDS(ATA_CMD_SET_FEATURES), 158 .timeouts = ata_eh_other_timeouts, }, 159 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 160 .timeouts = ata_eh_other_timeouts, }, 161 { .commands = CMDS(ATA_CMD_FLUSH, ATA_CMD_FLUSH_EXT), 162 .timeouts = ata_eh_flush_timeouts }, 163 }; 164 #undef CMDS 165 166 static void __ata_port_freeze(struct ata_port *ap); 167 #ifdef CONFIG_PM 168 static void ata_eh_handle_port_suspend(struct ata_port *ap); 169 static void ata_eh_handle_port_resume(struct ata_port *ap); 170 #else /* CONFIG_PM */ 171 static void ata_eh_handle_port_suspend(struct ata_port *ap) 172 { } 173 174 static void ata_eh_handle_port_resume(struct ata_port *ap) 175 { } 176 #endif /* CONFIG_PM */ 177 178 static __printf(2, 0) void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, 179 const char *fmt, va_list args) 180 { 181 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 182 ATA_EH_DESC_LEN - ehi->desc_len, 183 fmt, args); 184 } 185 186 /** 187 * __ata_ehi_push_desc - push error description without adding separator 188 * @ehi: target EHI 189 * @fmt: printf format string 190 * 191 * Format string according to @fmt and append it to @ehi->desc. 192 * 193 * LOCKING: 194 * spin_lock_irqsave(host lock) 195 */ 196 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 197 { 198 va_list args; 199 200 va_start(args, fmt); 201 __ata_ehi_pushv_desc(ehi, fmt, args); 202 va_end(args); 203 } 204 205 /** 206 * ata_ehi_push_desc - push error description with separator 207 * @ehi: target EHI 208 * @fmt: printf format string 209 * 210 * Format string according to @fmt and append it to @ehi->desc. 211 * If @ehi->desc is not empty, ", " is added in-between. 212 * 213 * LOCKING: 214 * spin_lock_irqsave(host lock) 215 */ 216 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 217 { 218 va_list args; 219 220 if (ehi->desc_len) 221 __ata_ehi_push_desc(ehi, ", "); 222 223 va_start(args, fmt); 224 __ata_ehi_pushv_desc(ehi, fmt, args); 225 va_end(args); 226 } 227 228 /** 229 * ata_ehi_clear_desc - clean error description 230 * @ehi: target EHI 231 * 232 * Clear @ehi->desc. 233 * 234 * LOCKING: 235 * spin_lock_irqsave(host lock) 236 */ 237 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 238 { 239 ehi->desc[0] = '\0'; 240 ehi->desc_len = 0; 241 } 242 243 /** 244 * ata_port_desc - append port description 245 * @ap: target ATA port 246 * @fmt: printf format string 247 * 248 * Format string according to @fmt and append it to port 249 * description. If port description is not empty, " " is added 250 * in-between. This function is to be used while initializing 251 * ata_host. The description is printed on host registration. 252 * 253 * LOCKING: 254 * None. 255 */ 256 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 257 { 258 va_list args; 259 260 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 261 262 if (ap->link.eh_info.desc_len) 263 __ata_ehi_push_desc(&ap->link.eh_info, " "); 264 265 va_start(args, fmt); 266 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 267 va_end(args); 268 } 269 270 #ifdef CONFIG_PCI 271 272 /** 273 * ata_port_pbar_desc - append PCI BAR description 274 * @ap: target ATA port 275 * @bar: target PCI BAR 276 * @offset: offset into PCI BAR 277 * @name: name of the area 278 * 279 * If @offset is negative, this function formats a string which 280 * contains the name, address, size and type of the BAR and 281 * appends it to the port description. If @offset is zero or 282 * positive, only name and offsetted address is appended. 283 * 284 * LOCKING: 285 * None. 286 */ 287 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 288 const char *name) 289 { 290 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 291 char *type = ""; 292 unsigned long long start, len; 293 294 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 295 type = "m"; 296 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 297 type = "i"; 298 299 start = (unsigned long long)pci_resource_start(pdev, bar); 300 len = (unsigned long long)pci_resource_len(pdev, bar); 301 302 if (offset < 0) 303 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 304 else 305 ata_port_desc(ap, "%s 0x%llx", name, 306 start + (unsigned long long)offset); 307 } 308 309 #endif /* CONFIG_PCI */ 310 311 static int ata_lookup_timeout_table(u8 cmd) 312 { 313 int i; 314 315 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 316 const u8 *cur; 317 318 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 319 if (*cur == cmd) 320 return i; 321 } 322 323 return -1; 324 } 325 326 /** 327 * ata_internal_cmd_timeout - determine timeout for an internal command 328 * @dev: target device 329 * @cmd: internal command to be issued 330 * 331 * Determine timeout for internal command @cmd for @dev. 332 * 333 * LOCKING: 334 * EH context. 335 * 336 * RETURNS: 337 * Determined timeout. 338 */ 339 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 340 { 341 struct ata_eh_context *ehc = &dev->link->eh_context; 342 int ent = ata_lookup_timeout_table(cmd); 343 int idx; 344 345 if (ent < 0) 346 return ATA_EH_CMD_DFL_TIMEOUT; 347 348 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 349 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 350 } 351 352 /** 353 * ata_internal_cmd_timed_out - notification for internal command timeout 354 * @dev: target device 355 * @cmd: internal command which timed out 356 * 357 * Notify EH that internal command @cmd for @dev timed out. This 358 * function should be called only for commands whose timeouts are 359 * determined using ata_internal_cmd_timeout(). 360 * 361 * LOCKING: 362 * EH context. 363 */ 364 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 365 { 366 struct ata_eh_context *ehc = &dev->link->eh_context; 367 int ent = ata_lookup_timeout_table(cmd); 368 int idx; 369 370 if (ent < 0) 371 return; 372 373 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 374 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 375 ehc->cmd_timeout_idx[dev->devno][ent]++; 376 } 377 378 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 379 unsigned int err_mask) 380 { 381 struct ata_ering_entry *ent; 382 383 WARN_ON(!err_mask); 384 385 ering->cursor++; 386 ering->cursor %= ATA_ERING_SIZE; 387 388 ent = &ering->ring[ering->cursor]; 389 ent->eflags = eflags; 390 ent->err_mask = err_mask; 391 ent->timestamp = get_jiffies_64(); 392 } 393 394 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 395 { 396 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 397 398 if (ent->err_mask) 399 return ent; 400 return NULL; 401 } 402 403 int ata_ering_map(struct ata_ering *ering, 404 int (*map_fn)(struct ata_ering_entry *, void *), 405 void *arg) 406 { 407 int idx, rc = 0; 408 struct ata_ering_entry *ent; 409 410 idx = ering->cursor; 411 do { 412 ent = &ering->ring[idx]; 413 if (!ent->err_mask) 414 break; 415 rc = map_fn(ent, arg); 416 if (rc) 417 break; 418 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 419 } while (idx != ering->cursor); 420 421 return rc; 422 } 423 424 static int ata_ering_clear_cb(struct ata_ering_entry *ent, void *void_arg) 425 { 426 ent->eflags |= ATA_EFLAG_OLD_ER; 427 return 0; 428 } 429 430 static void ata_ering_clear(struct ata_ering *ering) 431 { 432 ata_ering_map(ering, ata_ering_clear_cb, NULL); 433 } 434 435 static unsigned int ata_eh_dev_action(struct ata_device *dev) 436 { 437 struct ata_eh_context *ehc = &dev->link->eh_context; 438 439 return ehc->i.action | ehc->i.dev_action[dev->devno]; 440 } 441 442 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 443 struct ata_eh_info *ehi, unsigned int action) 444 { 445 struct ata_device *tdev; 446 447 if (!dev) { 448 ehi->action &= ~action; 449 ata_for_each_dev(tdev, link, ALL) 450 ehi->dev_action[tdev->devno] &= ~action; 451 } else { 452 /* doesn't make sense for port-wide EH actions */ 453 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 454 455 /* break ehi->action into ehi->dev_action */ 456 if (ehi->action & action) { 457 ata_for_each_dev(tdev, link, ALL) 458 ehi->dev_action[tdev->devno] |= 459 ehi->action & action; 460 ehi->action &= ~action; 461 } 462 463 /* turn off the specified per-dev action */ 464 ehi->dev_action[dev->devno] &= ~action; 465 } 466 } 467 468 /** 469 * ata_eh_acquire - acquire EH ownership 470 * @ap: ATA port to acquire EH ownership for 471 * 472 * Acquire EH ownership for @ap. This is the basic exclusion 473 * mechanism for ports sharing a host. Only one port hanging off 474 * the same host can claim the ownership of EH. 475 * 476 * LOCKING: 477 * EH context. 478 */ 479 void ata_eh_acquire(struct ata_port *ap) 480 { 481 mutex_lock(&ap->host->eh_mutex); 482 WARN_ON_ONCE(ap->host->eh_owner); 483 ap->host->eh_owner = current; 484 } 485 486 /** 487 * ata_eh_release - release EH ownership 488 * @ap: ATA port to release EH ownership for 489 * 490 * Release EH ownership for @ap if the caller. The caller must 491 * have acquired EH ownership using ata_eh_acquire() previously. 492 * 493 * LOCKING: 494 * EH context. 495 */ 496 void ata_eh_release(struct ata_port *ap) 497 { 498 WARN_ON_ONCE(ap->host->eh_owner != current); 499 ap->host->eh_owner = NULL; 500 mutex_unlock(&ap->host->eh_mutex); 501 } 502 503 static void ata_eh_unload(struct ata_port *ap) 504 { 505 struct ata_link *link; 506 struct ata_device *dev; 507 unsigned long flags; 508 509 /* Restore SControl IPM and SPD for the next driver and 510 * disable attached devices. 511 */ 512 ata_for_each_link(link, ap, PMP_FIRST) { 513 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 514 ata_for_each_dev(dev, link, ALL) 515 ata_dev_disable(dev); 516 } 517 518 /* freeze and set UNLOADED */ 519 spin_lock_irqsave(ap->lock, flags); 520 521 ata_port_freeze(ap); /* won't be thawed */ 522 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 523 ap->pflags |= ATA_PFLAG_UNLOADED; 524 525 spin_unlock_irqrestore(ap->lock, flags); 526 } 527 528 /** 529 * ata_scsi_error - SCSI layer error handler callback 530 * @host: SCSI host on which error occurred 531 * 532 * Handles SCSI-layer-thrown error events. 533 * 534 * LOCKING: 535 * Inherited from SCSI layer (none, can sleep) 536 * 537 * RETURNS: 538 * Zero. 539 */ 540 void ata_scsi_error(struct Scsi_Host *host) 541 { 542 struct ata_port *ap = ata_shost_to_port(host); 543 unsigned long flags; 544 LIST_HEAD(eh_work_q); 545 546 DPRINTK("ENTER\n"); 547 548 spin_lock_irqsave(host->host_lock, flags); 549 list_splice_init(&host->eh_cmd_q, &eh_work_q); 550 spin_unlock_irqrestore(host->host_lock, flags); 551 552 ata_scsi_cmd_error_handler(host, ap, &eh_work_q); 553 554 /* If we timed raced normal completion and there is nothing to 555 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 556 ata_scsi_port_error_handler(host, ap); 557 558 /* finish or retry handled scmd's and clean up */ 559 WARN_ON(!list_empty(&eh_work_q)); 560 561 DPRINTK("EXIT\n"); 562 } 563 564 /** 565 * ata_scsi_cmd_error_handler - error callback for a list of commands 566 * @host: scsi host containing the port 567 * @ap: ATA port within the host 568 * @eh_work_q: list of commands to process 569 * 570 * process the given list of commands and return those finished to the 571 * ap->eh_done_q. This function is the first part of the libata error 572 * handler which processes a given list of failed commands. 573 */ 574 void ata_scsi_cmd_error_handler(struct Scsi_Host *host, struct ata_port *ap, 575 struct list_head *eh_work_q) 576 { 577 int i; 578 unsigned long flags; 579 580 /* make sure sff pio task is not running */ 581 ata_sff_flush_pio_task(ap); 582 583 /* synchronize with host lock and sort out timeouts */ 584 585 /* For new EH, all qcs are finished in one of three ways - 586 * normal completion, error completion, and SCSI timeout. 587 * Both completions can race against SCSI timeout. When normal 588 * completion wins, the qc never reaches EH. When error 589 * completion wins, the qc has ATA_QCFLAG_FAILED set. 590 * 591 * When SCSI timeout wins, things are a bit more complex. 592 * Normal or error completion can occur after the timeout but 593 * before this point. In such cases, both types of 594 * completions are honored. A scmd is determined to have 595 * timed out iff its associated qc is active and not failed. 596 */ 597 spin_lock_irqsave(ap->lock, flags); 598 if (ap->ops->error_handler) { 599 struct scsi_cmnd *scmd, *tmp; 600 int nr_timedout = 0; 601 602 /* This must occur under the ap->lock as we don't want 603 a polled recovery to race the real interrupt handler 604 605 The lost_interrupt handler checks for any completed but 606 non-notified command and completes much like an IRQ handler. 607 608 We then fall into the error recovery code which will treat 609 this as if normal completion won the race */ 610 611 if (ap->ops->lost_interrupt) 612 ap->ops->lost_interrupt(ap); 613 614 list_for_each_entry_safe(scmd, tmp, eh_work_q, eh_entry) { 615 struct ata_queued_cmd *qc; 616 617 ata_qc_for_each_raw(ap, qc, i) { 618 if (qc->flags & ATA_QCFLAG_ACTIVE && 619 qc->scsicmd == scmd) 620 break; 621 } 622 623 if (i < ATA_MAX_QUEUE) { 624 /* the scmd has an associated qc */ 625 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 626 /* which hasn't failed yet, timeout */ 627 qc->err_mask |= AC_ERR_TIMEOUT; 628 qc->flags |= ATA_QCFLAG_FAILED; 629 nr_timedout++; 630 } 631 } else { 632 /* Normal completion occurred after 633 * SCSI timeout but before this point. 634 * Successfully complete it. 635 */ 636 scmd->retries = scmd->allowed; 637 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 638 } 639 } 640 641 /* If we have timed out qcs. They belong to EH from 642 * this point but the state of the controller is 643 * unknown. Freeze the port to make sure the IRQ 644 * handler doesn't diddle with those qcs. This must 645 * be done atomically w.r.t. setting QCFLAG_FAILED. 646 */ 647 if (nr_timedout) 648 __ata_port_freeze(ap); 649 650 651 /* initialize eh_tries */ 652 ap->eh_tries = ATA_EH_MAX_TRIES; 653 } 654 spin_unlock_irqrestore(ap->lock, flags); 655 656 } 657 EXPORT_SYMBOL(ata_scsi_cmd_error_handler); 658 659 /** 660 * ata_scsi_port_error_handler - recover the port after the commands 661 * @host: SCSI host containing the port 662 * @ap: the ATA port 663 * 664 * Handle the recovery of the port @ap after all the commands 665 * have been recovered. 666 */ 667 void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap) 668 { 669 unsigned long flags; 670 671 /* invoke error handler */ 672 if (ap->ops->error_handler) { 673 struct ata_link *link; 674 675 /* acquire EH ownership */ 676 ata_eh_acquire(ap); 677 repeat: 678 /* kill fast drain timer */ 679 del_timer_sync(&ap->fastdrain_timer); 680 681 /* process port resume request */ 682 ata_eh_handle_port_resume(ap); 683 684 /* fetch & clear EH info */ 685 spin_lock_irqsave(ap->lock, flags); 686 687 ata_for_each_link(link, ap, HOST_FIRST) { 688 struct ata_eh_context *ehc = &link->eh_context; 689 struct ata_device *dev; 690 691 memset(&link->eh_context, 0, sizeof(link->eh_context)); 692 link->eh_context.i = link->eh_info; 693 memset(&link->eh_info, 0, sizeof(link->eh_info)); 694 695 ata_for_each_dev(dev, link, ENABLED) { 696 int devno = dev->devno; 697 698 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 699 if (ata_ncq_enabled(dev)) 700 ehc->saved_ncq_enabled |= 1 << devno; 701 } 702 } 703 704 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 705 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 706 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 707 708 spin_unlock_irqrestore(ap->lock, flags); 709 710 /* invoke EH, skip if unloading or suspended */ 711 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 712 ap->ops->error_handler(ap); 713 else { 714 /* if unloading, commence suicide */ 715 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 716 !(ap->pflags & ATA_PFLAG_UNLOADED)) 717 ata_eh_unload(ap); 718 ata_eh_finish(ap); 719 } 720 721 /* process port suspend request */ 722 ata_eh_handle_port_suspend(ap); 723 724 /* Exception might have happened after ->error_handler 725 * recovered the port but before this point. Repeat 726 * EH in such case. 727 */ 728 spin_lock_irqsave(ap->lock, flags); 729 730 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 731 if (--ap->eh_tries) { 732 spin_unlock_irqrestore(ap->lock, flags); 733 goto repeat; 734 } 735 ata_port_err(ap, 736 "EH pending after %d tries, giving up\n", 737 ATA_EH_MAX_TRIES); 738 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 739 } 740 741 /* this run is complete, make sure EH info is clear */ 742 ata_for_each_link(link, ap, HOST_FIRST) 743 memset(&link->eh_info, 0, sizeof(link->eh_info)); 744 745 /* end eh (clear host_eh_scheduled) while holding 746 * ap->lock such that if exception occurs after this 747 * point but before EH completion, SCSI midlayer will 748 * re-initiate EH. 749 */ 750 ap->ops->end_eh(ap); 751 752 spin_unlock_irqrestore(ap->lock, flags); 753 ata_eh_release(ap); 754 } else { 755 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 756 ap->ops->eng_timeout(ap); 757 } 758 759 scsi_eh_flush_done_q(&ap->eh_done_q); 760 761 /* clean up */ 762 spin_lock_irqsave(ap->lock, flags); 763 764 if (ap->pflags & ATA_PFLAG_LOADING) 765 ap->pflags &= ~ATA_PFLAG_LOADING; 766 else if ((ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) && 767 !(ap->flags & ATA_FLAG_SAS_HOST)) 768 schedule_delayed_work(&ap->hotplug_task, 0); 769 770 if (ap->pflags & ATA_PFLAG_RECOVERED) 771 ata_port_info(ap, "EH complete\n"); 772 773 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 774 775 /* tell wait_eh that we're done */ 776 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 777 wake_up_all(&ap->eh_wait_q); 778 779 spin_unlock_irqrestore(ap->lock, flags); 780 } 781 EXPORT_SYMBOL_GPL(ata_scsi_port_error_handler); 782 783 /** 784 * ata_port_wait_eh - Wait for the currently pending EH to complete 785 * @ap: Port to wait EH for 786 * 787 * Wait until the currently pending EH is complete. 788 * 789 * LOCKING: 790 * Kernel thread context (may sleep). 791 */ 792 void ata_port_wait_eh(struct ata_port *ap) 793 { 794 unsigned long flags; 795 DEFINE_WAIT(wait); 796 797 retry: 798 spin_lock_irqsave(ap->lock, flags); 799 800 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 801 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 802 spin_unlock_irqrestore(ap->lock, flags); 803 schedule(); 804 spin_lock_irqsave(ap->lock, flags); 805 } 806 finish_wait(&ap->eh_wait_q, &wait); 807 808 spin_unlock_irqrestore(ap->lock, flags); 809 810 /* make sure SCSI EH is complete */ 811 if (scsi_host_in_recovery(ap->scsi_host)) { 812 ata_msleep(ap, 10); 813 goto retry; 814 } 815 } 816 EXPORT_SYMBOL_GPL(ata_port_wait_eh); 817 818 static int ata_eh_nr_in_flight(struct ata_port *ap) 819 { 820 struct ata_queued_cmd *qc; 821 unsigned int tag; 822 int nr = 0; 823 824 /* count only non-internal commands */ 825 ata_qc_for_each(ap, qc, tag) { 826 if (qc) 827 nr++; 828 } 829 830 return nr; 831 } 832 833 void ata_eh_fastdrain_timerfn(struct timer_list *t) 834 { 835 struct ata_port *ap = from_timer(ap, t, fastdrain_timer); 836 unsigned long flags; 837 int cnt; 838 839 spin_lock_irqsave(ap->lock, flags); 840 841 cnt = ata_eh_nr_in_flight(ap); 842 843 /* are we done? */ 844 if (!cnt) 845 goto out_unlock; 846 847 if (cnt == ap->fastdrain_cnt) { 848 struct ata_queued_cmd *qc; 849 unsigned int tag; 850 851 /* No progress during the last interval, tag all 852 * in-flight qcs as timed out and freeze the port. 853 */ 854 ata_qc_for_each(ap, qc, tag) { 855 if (qc) 856 qc->err_mask |= AC_ERR_TIMEOUT; 857 } 858 859 ata_port_freeze(ap); 860 } else { 861 /* some qcs have finished, give it another chance */ 862 ap->fastdrain_cnt = cnt; 863 ap->fastdrain_timer.expires = 864 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 865 add_timer(&ap->fastdrain_timer); 866 } 867 868 out_unlock: 869 spin_unlock_irqrestore(ap->lock, flags); 870 } 871 872 /** 873 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 874 * @ap: target ATA port 875 * @fastdrain: activate fast drain 876 * 877 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 878 * is non-zero and EH wasn't pending before. Fast drain ensures 879 * that EH kicks in in timely manner. 880 * 881 * LOCKING: 882 * spin_lock_irqsave(host lock) 883 */ 884 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 885 { 886 int cnt; 887 888 /* already scheduled? */ 889 if (ap->pflags & ATA_PFLAG_EH_PENDING) 890 return; 891 892 ap->pflags |= ATA_PFLAG_EH_PENDING; 893 894 if (!fastdrain) 895 return; 896 897 /* do we have in-flight qcs? */ 898 cnt = ata_eh_nr_in_flight(ap); 899 if (!cnt) 900 return; 901 902 /* activate fast drain */ 903 ap->fastdrain_cnt = cnt; 904 ap->fastdrain_timer.expires = 905 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 906 add_timer(&ap->fastdrain_timer); 907 } 908 909 /** 910 * ata_qc_schedule_eh - schedule qc for error handling 911 * @qc: command to schedule error handling for 912 * 913 * Schedule error handling for @qc. EH will kick in as soon as 914 * other commands are drained. 915 * 916 * LOCKING: 917 * spin_lock_irqsave(host lock) 918 */ 919 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 920 { 921 struct ata_port *ap = qc->ap; 922 923 WARN_ON(!ap->ops->error_handler); 924 925 qc->flags |= ATA_QCFLAG_FAILED; 926 ata_eh_set_pending(ap, 1); 927 928 /* The following will fail if timeout has already expired. 929 * ata_scsi_error() takes care of such scmds on EH entry. 930 * Note that ATA_QCFLAG_FAILED is unconditionally set after 931 * this function completes. 932 */ 933 blk_abort_request(qc->scsicmd->request); 934 } 935 936 /** 937 * ata_std_sched_eh - non-libsas ata_ports issue eh with this common routine 938 * @ap: ATA port to schedule EH for 939 * 940 * LOCKING: inherited from ata_port_schedule_eh 941 * spin_lock_irqsave(host lock) 942 */ 943 void ata_std_sched_eh(struct ata_port *ap) 944 { 945 WARN_ON(!ap->ops->error_handler); 946 947 if (ap->pflags & ATA_PFLAG_INITIALIZING) 948 return; 949 950 ata_eh_set_pending(ap, 1); 951 scsi_schedule_eh(ap->scsi_host); 952 953 DPRINTK("port EH scheduled\n"); 954 } 955 EXPORT_SYMBOL_GPL(ata_std_sched_eh); 956 957 /** 958 * ata_std_end_eh - non-libsas ata_ports complete eh with this common routine 959 * @ap: ATA port to end EH for 960 * 961 * In the libata object model there is a 1:1 mapping of ata_port to 962 * shost, so host fields can be directly manipulated under ap->lock, in 963 * the libsas case we need to hold a lock at the ha->level to coordinate 964 * these events. 965 * 966 * LOCKING: 967 * spin_lock_irqsave(host lock) 968 */ 969 void ata_std_end_eh(struct ata_port *ap) 970 { 971 struct Scsi_Host *host = ap->scsi_host; 972 973 host->host_eh_scheduled = 0; 974 } 975 EXPORT_SYMBOL(ata_std_end_eh); 976 977 978 /** 979 * ata_port_schedule_eh - schedule error handling without a qc 980 * @ap: ATA port to schedule EH for 981 * 982 * Schedule error handling for @ap. EH will kick in as soon as 983 * all commands are drained. 984 * 985 * LOCKING: 986 * spin_lock_irqsave(host lock) 987 */ 988 void ata_port_schedule_eh(struct ata_port *ap) 989 { 990 /* see: ata_std_sched_eh, unless you know better */ 991 ap->ops->sched_eh(ap); 992 } 993 994 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 995 { 996 struct ata_queued_cmd *qc; 997 int tag, nr_aborted = 0; 998 999 WARN_ON(!ap->ops->error_handler); 1000 1001 /* we're gonna abort all commands, no need for fast drain */ 1002 ata_eh_set_pending(ap, 0); 1003 1004 /* include internal tag in iteration */ 1005 ata_qc_for_each_with_internal(ap, qc, tag) { 1006 if (qc && (!link || qc->dev->link == link)) { 1007 qc->flags |= ATA_QCFLAG_FAILED; 1008 ata_qc_complete(qc); 1009 nr_aborted++; 1010 } 1011 } 1012 1013 if (!nr_aborted) 1014 ata_port_schedule_eh(ap); 1015 1016 return nr_aborted; 1017 } 1018 1019 /** 1020 * ata_link_abort - abort all qc's on the link 1021 * @link: ATA link to abort qc's for 1022 * 1023 * Abort all active qc's active on @link and schedule EH. 1024 * 1025 * LOCKING: 1026 * spin_lock_irqsave(host lock) 1027 * 1028 * RETURNS: 1029 * Number of aborted qc's. 1030 */ 1031 int ata_link_abort(struct ata_link *link) 1032 { 1033 return ata_do_link_abort(link->ap, link); 1034 } 1035 1036 /** 1037 * ata_port_abort - abort all qc's on the port 1038 * @ap: ATA port to abort qc's for 1039 * 1040 * Abort all active qc's of @ap and schedule EH. 1041 * 1042 * LOCKING: 1043 * spin_lock_irqsave(host_set lock) 1044 * 1045 * RETURNS: 1046 * Number of aborted qc's. 1047 */ 1048 int ata_port_abort(struct ata_port *ap) 1049 { 1050 return ata_do_link_abort(ap, NULL); 1051 } 1052 1053 /** 1054 * __ata_port_freeze - freeze port 1055 * @ap: ATA port to freeze 1056 * 1057 * This function is called when HSM violation or some other 1058 * condition disrupts normal operation of the port. Frozen port 1059 * is not allowed to perform any operation until the port is 1060 * thawed, which usually follows a successful reset. 1061 * 1062 * ap->ops->freeze() callback can be used for freezing the port 1063 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 1064 * port cannot be frozen hardware-wise, the interrupt handler 1065 * must ack and clear interrupts unconditionally while the port 1066 * is frozen. 1067 * 1068 * LOCKING: 1069 * spin_lock_irqsave(host lock) 1070 */ 1071 static void __ata_port_freeze(struct ata_port *ap) 1072 { 1073 WARN_ON(!ap->ops->error_handler); 1074 1075 if (ap->ops->freeze) 1076 ap->ops->freeze(ap); 1077 1078 ap->pflags |= ATA_PFLAG_FROZEN; 1079 1080 DPRINTK("ata%u port frozen\n", ap->print_id); 1081 } 1082 1083 /** 1084 * ata_port_freeze - abort & freeze port 1085 * @ap: ATA port to freeze 1086 * 1087 * Abort and freeze @ap. The freeze operation must be called 1088 * first, because some hardware requires special operations 1089 * before the taskfile registers are accessible. 1090 * 1091 * LOCKING: 1092 * spin_lock_irqsave(host lock) 1093 * 1094 * RETURNS: 1095 * Number of aborted commands. 1096 */ 1097 int ata_port_freeze(struct ata_port *ap) 1098 { 1099 int nr_aborted; 1100 1101 WARN_ON(!ap->ops->error_handler); 1102 1103 __ata_port_freeze(ap); 1104 nr_aborted = ata_port_abort(ap); 1105 1106 return nr_aborted; 1107 } 1108 1109 /** 1110 * sata_async_notification - SATA async notification handler 1111 * @ap: ATA port where async notification is received 1112 * 1113 * Handler to be called when async notification via SDB FIS is 1114 * received. This function schedules EH if necessary. 1115 * 1116 * LOCKING: 1117 * spin_lock_irqsave(host lock) 1118 * 1119 * RETURNS: 1120 * 1 if EH is scheduled, 0 otherwise. 1121 */ 1122 int sata_async_notification(struct ata_port *ap) 1123 { 1124 u32 sntf; 1125 int rc; 1126 1127 if (!(ap->flags & ATA_FLAG_AN)) 1128 return 0; 1129 1130 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 1131 if (rc == 0) 1132 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 1133 1134 if (!sata_pmp_attached(ap) || rc) { 1135 /* PMP is not attached or SNTF is not available */ 1136 if (!sata_pmp_attached(ap)) { 1137 /* PMP is not attached. Check whether ATAPI 1138 * AN is configured. If so, notify media 1139 * change. 1140 */ 1141 struct ata_device *dev = ap->link.device; 1142 1143 if ((dev->class == ATA_DEV_ATAPI) && 1144 (dev->flags & ATA_DFLAG_AN)) 1145 ata_scsi_media_change_notify(dev); 1146 return 0; 1147 } else { 1148 /* PMP is attached but SNTF is not available. 1149 * ATAPI async media change notification is 1150 * not used. The PMP must be reporting PHY 1151 * status change, schedule EH. 1152 */ 1153 ata_port_schedule_eh(ap); 1154 return 1; 1155 } 1156 } else { 1157 /* PMP is attached and SNTF is available */ 1158 struct ata_link *link; 1159 1160 /* check and notify ATAPI AN */ 1161 ata_for_each_link(link, ap, EDGE) { 1162 if (!(sntf & (1 << link->pmp))) 1163 continue; 1164 1165 if ((link->device->class == ATA_DEV_ATAPI) && 1166 (link->device->flags & ATA_DFLAG_AN)) 1167 ata_scsi_media_change_notify(link->device); 1168 } 1169 1170 /* If PMP is reporting that PHY status of some 1171 * downstream ports has changed, schedule EH. 1172 */ 1173 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1174 ata_port_schedule_eh(ap); 1175 return 1; 1176 } 1177 1178 return 0; 1179 } 1180 } 1181 1182 /** 1183 * ata_eh_freeze_port - EH helper to freeze port 1184 * @ap: ATA port to freeze 1185 * 1186 * Freeze @ap. 1187 * 1188 * LOCKING: 1189 * None. 1190 */ 1191 void ata_eh_freeze_port(struct ata_port *ap) 1192 { 1193 unsigned long flags; 1194 1195 if (!ap->ops->error_handler) 1196 return; 1197 1198 spin_lock_irqsave(ap->lock, flags); 1199 __ata_port_freeze(ap); 1200 spin_unlock_irqrestore(ap->lock, flags); 1201 } 1202 1203 /** 1204 * ata_port_thaw_port - EH helper to thaw port 1205 * @ap: ATA port to thaw 1206 * 1207 * Thaw frozen port @ap. 1208 * 1209 * LOCKING: 1210 * None. 1211 */ 1212 void ata_eh_thaw_port(struct ata_port *ap) 1213 { 1214 unsigned long flags; 1215 1216 if (!ap->ops->error_handler) 1217 return; 1218 1219 spin_lock_irqsave(ap->lock, flags); 1220 1221 ap->pflags &= ~ATA_PFLAG_FROZEN; 1222 1223 if (ap->ops->thaw) 1224 ap->ops->thaw(ap); 1225 1226 spin_unlock_irqrestore(ap->lock, flags); 1227 1228 DPRINTK("ata%u port thawed\n", ap->print_id); 1229 } 1230 1231 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1232 { 1233 /* nada */ 1234 } 1235 1236 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1237 { 1238 struct ata_port *ap = qc->ap; 1239 struct scsi_cmnd *scmd = qc->scsicmd; 1240 unsigned long flags; 1241 1242 spin_lock_irqsave(ap->lock, flags); 1243 qc->scsidone = ata_eh_scsidone; 1244 __ata_qc_complete(qc); 1245 WARN_ON(ata_tag_valid(qc->tag)); 1246 spin_unlock_irqrestore(ap->lock, flags); 1247 1248 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1249 } 1250 1251 /** 1252 * ata_eh_qc_complete - Complete an active ATA command from EH 1253 * @qc: Command to complete 1254 * 1255 * Indicate to the mid and upper layers that an ATA command has 1256 * completed. To be used from EH. 1257 */ 1258 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1259 { 1260 struct scsi_cmnd *scmd = qc->scsicmd; 1261 scmd->retries = scmd->allowed; 1262 __ata_eh_qc_complete(qc); 1263 } 1264 1265 /** 1266 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1267 * @qc: Command to retry 1268 * 1269 * Indicate to the mid and upper layers that an ATA command 1270 * should be retried. To be used from EH. 1271 * 1272 * SCSI midlayer limits the number of retries to scmd->allowed. 1273 * scmd->allowed is incremented for commands which get retried 1274 * due to unrelated failures (qc->err_mask is zero). 1275 */ 1276 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1277 { 1278 struct scsi_cmnd *scmd = qc->scsicmd; 1279 if (!qc->err_mask) 1280 scmd->allowed++; 1281 __ata_eh_qc_complete(qc); 1282 } 1283 1284 /** 1285 * ata_dev_disable - disable ATA device 1286 * @dev: ATA device to disable 1287 * 1288 * Disable @dev. 1289 * 1290 * Locking: 1291 * EH context. 1292 */ 1293 void ata_dev_disable(struct ata_device *dev) 1294 { 1295 if (!ata_dev_enabled(dev)) 1296 return; 1297 1298 if (ata_msg_drv(dev->link->ap)) 1299 ata_dev_warn(dev, "disabled\n"); 1300 ata_acpi_on_disable(dev); 1301 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1302 dev->class++; 1303 1304 /* From now till the next successful probe, ering is used to 1305 * track probe failures. Clear accumulated device error info. 1306 */ 1307 ata_ering_clear(&dev->ering); 1308 } 1309 1310 /** 1311 * ata_eh_detach_dev - detach ATA device 1312 * @dev: ATA device to detach 1313 * 1314 * Detach @dev. 1315 * 1316 * LOCKING: 1317 * None. 1318 */ 1319 void ata_eh_detach_dev(struct ata_device *dev) 1320 { 1321 struct ata_link *link = dev->link; 1322 struct ata_port *ap = link->ap; 1323 struct ata_eh_context *ehc = &link->eh_context; 1324 unsigned long flags; 1325 1326 ata_dev_disable(dev); 1327 1328 spin_lock_irqsave(ap->lock, flags); 1329 1330 dev->flags &= ~ATA_DFLAG_DETACH; 1331 1332 if (ata_scsi_offline_dev(dev)) { 1333 dev->flags |= ATA_DFLAG_DETACHED; 1334 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1335 } 1336 1337 /* clear per-dev EH info */ 1338 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1339 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1340 ehc->saved_xfer_mode[dev->devno] = 0; 1341 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1342 1343 spin_unlock_irqrestore(ap->lock, flags); 1344 } 1345 1346 /** 1347 * ata_eh_about_to_do - about to perform eh_action 1348 * @link: target ATA link 1349 * @dev: target ATA dev for per-dev action (can be NULL) 1350 * @action: action about to be performed 1351 * 1352 * Called just before performing EH actions to clear related bits 1353 * in @link->eh_info such that eh actions are not unnecessarily 1354 * repeated. 1355 * 1356 * LOCKING: 1357 * None. 1358 */ 1359 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1360 unsigned int action) 1361 { 1362 struct ata_port *ap = link->ap; 1363 struct ata_eh_info *ehi = &link->eh_info; 1364 struct ata_eh_context *ehc = &link->eh_context; 1365 unsigned long flags; 1366 1367 spin_lock_irqsave(ap->lock, flags); 1368 1369 ata_eh_clear_action(link, dev, ehi, action); 1370 1371 /* About to take EH action, set RECOVERED. Ignore actions on 1372 * slave links as master will do them again. 1373 */ 1374 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1375 ap->pflags |= ATA_PFLAG_RECOVERED; 1376 1377 spin_unlock_irqrestore(ap->lock, flags); 1378 } 1379 1380 /** 1381 * ata_eh_done - EH action complete 1382 * @link: ATA link for which EH actions are complete 1383 * @dev: target ATA dev for per-dev action (can be NULL) 1384 * @action: action just completed 1385 * 1386 * Called right after performing EH actions to clear related bits 1387 * in @link->eh_context. 1388 * 1389 * LOCKING: 1390 * None. 1391 */ 1392 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1393 unsigned int action) 1394 { 1395 struct ata_eh_context *ehc = &link->eh_context; 1396 1397 ata_eh_clear_action(link, dev, &ehc->i, action); 1398 } 1399 1400 /** 1401 * ata_err_string - convert err_mask to descriptive string 1402 * @err_mask: error mask to convert to string 1403 * 1404 * Convert @err_mask to descriptive string. Errors are 1405 * prioritized according to severity and only the most severe 1406 * error is reported. 1407 * 1408 * LOCKING: 1409 * None. 1410 * 1411 * RETURNS: 1412 * Descriptive string for @err_mask 1413 */ 1414 static const char *ata_err_string(unsigned int err_mask) 1415 { 1416 if (err_mask & AC_ERR_HOST_BUS) 1417 return "host bus error"; 1418 if (err_mask & AC_ERR_ATA_BUS) 1419 return "ATA bus error"; 1420 if (err_mask & AC_ERR_TIMEOUT) 1421 return "timeout"; 1422 if (err_mask & AC_ERR_HSM) 1423 return "HSM violation"; 1424 if (err_mask & AC_ERR_SYSTEM) 1425 return "internal error"; 1426 if (err_mask & AC_ERR_MEDIA) 1427 return "media error"; 1428 if (err_mask & AC_ERR_INVALID) 1429 return "invalid argument"; 1430 if (err_mask & AC_ERR_DEV) 1431 return "device error"; 1432 if (err_mask & AC_ERR_NCQ) 1433 return "NCQ error"; 1434 if (err_mask & AC_ERR_NODEV_HINT) 1435 return "Polling detection error"; 1436 return "unknown error"; 1437 } 1438 1439 /** 1440 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1441 * @dev: Device to read log page 10h from 1442 * @tag: Resulting tag of the failed command 1443 * @tf: Resulting taskfile registers of the failed command 1444 * 1445 * Read log page 10h to obtain NCQ error details and clear error 1446 * condition. 1447 * 1448 * LOCKING: 1449 * Kernel thread context (may sleep). 1450 * 1451 * RETURNS: 1452 * 0 on success, -errno otherwise. 1453 */ 1454 static int ata_eh_read_log_10h(struct ata_device *dev, 1455 int *tag, struct ata_taskfile *tf) 1456 { 1457 u8 *buf = dev->link->ap->sector_buf; 1458 unsigned int err_mask; 1459 u8 csum; 1460 int i; 1461 1462 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, 0, buf, 1); 1463 if (err_mask) 1464 return -EIO; 1465 1466 csum = 0; 1467 for (i = 0; i < ATA_SECT_SIZE; i++) 1468 csum += buf[i]; 1469 if (csum) 1470 ata_dev_warn(dev, "invalid checksum 0x%x on log page 10h\n", 1471 csum); 1472 1473 if (buf[0] & 0x80) 1474 return -ENOENT; 1475 1476 *tag = buf[0] & 0x1f; 1477 1478 tf->command = buf[2]; 1479 tf->feature = buf[3]; 1480 tf->lbal = buf[4]; 1481 tf->lbam = buf[5]; 1482 tf->lbah = buf[6]; 1483 tf->device = buf[7]; 1484 tf->hob_lbal = buf[8]; 1485 tf->hob_lbam = buf[9]; 1486 tf->hob_lbah = buf[10]; 1487 tf->nsect = buf[12]; 1488 tf->hob_nsect = buf[13]; 1489 if (ata_id_has_ncq_autosense(dev->id)) 1490 tf->auxiliary = buf[14] << 16 | buf[15] << 8 | buf[16]; 1491 1492 return 0; 1493 } 1494 1495 /** 1496 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1497 * @dev: target ATAPI device 1498 * @r_sense_key: out parameter for sense_key 1499 * 1500 * Perform ATAPI TEST_UNIT_READY. 1501 * 1502 * LOCKING: 1503 * EH context (may sleep). 1504 * 1505 * RETURNS: 1506 * 0 on success, AC_ERR_* mask on failure. 1507 */ 1508 unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1509 { 1510 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1511 struct ata_taskfile tf; 1512 unsigned int err_mask; 1513 1514 ata_tf_init(dev, &tf); 1515 1516 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1517 tf.command = ATA_CMD_PACKET; 1518 tf.protocol = ATAPI_PROT_NODATA; 1519 1520 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1521 if (err_mask == AC_ERR_DEV) 1522 *r_sense_key = tf.feature >> 4; 1523 return err_mask; 1524 } 1525 1526 /** 1527 * ata_eh_request_sense - perform REQUEST_SENSE_DATA_EXT 1528 * @qc: qc to perform REQUEST_SENSE_SENSE_DATA_EXT to 1529 * @cmd: scsi command for which the sense code should be set 1530 * 1531 * Perform REQUEST_SENSE_DATA_EXT after the device reported CHECK 1532 * SENSE. This function is an EH helper. 1533 * 1534 * LOCKING: 1535 * Kernel thread context (may sleep). 1536 */ 1537 static void ata_eh_request_sense(struct ata_queued_cmd *qc, 1538 struct scsi_cmnd *cmd) 1539 { 1540 struct ata_device *dev = qc->dev; 1541 struct ata_taskfile tf; 1542 unsigned int err_mask; 1543 1544 if (qc->ap->pflags & ATA_PFLAG_FROZEN) { 1545 ata_dev_warn(dev, "sense data available but port frozen\n"); 1546 return; 1547 } 1548 1549 if (!cmd || qc->flags & ATA_QCFLAG_SENSE_VALID) 1550 return; 1551 1552 if (!ata_id_sense_reporting_enabled(dev->id)) { 1553 ata_dev_warn(qc->dev, "sense data reporting disabled\n"); 1554 return; 1555 } 1556 1557 DPRINTK("ATA request sense\n"); 1558 1559 ata_tf_init(dev, &tf); 1560 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1561 tf.flags |= ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1562 tf.command = ATA_CMD_REQ_SENSE_DATA; 1563 tf.protocol = ATA_PROT_NODATA; 1564 1565 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 1566 /* Ignore err_mask; ATA_ERR might be set */ 1567 if (tf.command & ATA_SENSE) { 1568 ata_scsi_set_sense(dev, cmd, tf.lbah, tf.lbam, tf.lbal); 1569 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1570 } else { 1571 ata_dev_warn(dev, "request sense failed stat %02x emask %x\n", 1572 tf.command, err_mask); 1573 } 1574 } 1575 1576 /** 1577 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1578 * @dev: device to perform REQUEST_SENSE to 1579 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1580 * @dfl_sense_key: default sense key to use 1581 * 1582 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1583 * SENSE. This function is EH helper. 1584 * 1585 * LOCKING: 1586 * Kernel thread context (may sleep). 1587 * 1588 * RETURNS: 1589 * 0 on success, AC_ERR_* mask on failure 1590 */ 1591 unsigned int atapi_eh_request_sense(struct ata_device *dev, 1592 u8 *sense_buf, u8 dfl_sense_key) 1593 { 1594 u8 cdb[ATAPI_CDB_LEN] = 1595 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1596 struct ata_port *ap = dev->link->ap; 1597 struct ata_taskfile tf; 1598 1599 DPRINTK("ATAPI request sense\n"); 1600 1601 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1602 1603 /* initialize sense_buf with the error register, 1604 * for the case where they are -not- overwritten 1605 */ 1606 sense_buf[0] = 0x70; 1607 sense_buf[2] = dfl_sense_key; 1608 1609 /* some devices time out if garbage left in tf */ 1610 ata_tf_init(dev, &tf); 1611 1612 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1613 tf.command = ATA_CMD_PACKET; 1614 1615 /* is it pointless to prefer PIO for "safety reasons"? */ 1616 if (ap->flags & ATA_FLAG_PIO_DMA) { 1617 tf.protocol = ATAPI_PROT_DMA; 1618 tf.feature |= ATAPI_PKT_DMA; 1619 } else { 1620 tf.protocol = ATAPI_PROT_PIO; 1621 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1622 tf.lbah = 0; 1623 } 1624 1625 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1626 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1627 } 1628 1629 /** 1630 * ata_eh_analyze_serror - analyze SError for a failed port 1631 * @link: ATA link to analyze SError for 1632 * 1633 * Analyze SError if available and further determine cause of 1634 * failure. 1635 * 1636 * LOCKING: 1637 * None. 1638 */ 1639 static void ata_eh_analyze_serror(struct ata_link *link) 1640 { 1641 struct ata_eh_context *ehc = &link->eh_context; 1642 u32 serror = ehc->i.serror; 1643 unsigned int err_mask = 0, action = 0; 1644 u32 hotplug_mask; 1645 1646 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1647 err_mask |= AC_ERR_ATA_BUS; 1648 action |= ATA_EH_RESET; 1649 } 1650 if (serror & SERR_PROTOCOL) { 1651 err_mask |= AC_ERR_HSM; 1652 action |= ATA_EH_RESET; 1653 } 1654 if (serror & SERR_INTERNAL) { 1655 err_mask |= AC_ERR_SYSTEM; 1656 action |= ATA_EH_RESET; 1657 } 1658 1659 /* Determine whether a hotplug event has occurred. Both 1660 * SError.N/X are considered hotplug events for enabled or 1661 * host links. For disabled PMP links, only N bit is 1662 * considered as X bit is left at 1 for link plugging. 1663 */ 1664 if (link->lpm_policy > ATA_LPM_MAX_POWER) 1665 hotplug_mask = 0; /* hotplug doesn't work w/ LPM */ 1666 else if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1667 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1668 else 1669 hotplug_mask = SERR_PHYRDY_CHG; 1670 1671 if (serror & hotplug_mask) 1672 ata_ehi_hotplugged(&ehc->i); 1673 1674 ehc->i.err_mask |= err_mask; 1675 ehc->i.action |= action; 1676 } 1677 1678 /** 1679 * ata_eh_analyze_ncq_error - analyze NCQ error 1680 * @link: ATA link to analyze NCQ error for 1681 * 1682 * Read log page 10h, determine the offending qc and acquire 1683 * error status TF. For NCQ device errors, all LLDDs have to do 1684 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1685 * care of the rest. 1686 * 1687 * LOCKING: 1688 * Kernel thread context (may sleep). 1689 */ 1690 void ata_eh_analyze_ncq_error(struct ata_link *link) 1691 { 1692 struct ata_port *ap = link->ap; 1693 struct ata_eh_context *ehc = &link->eh_context; 1694 struct ata_device *dev = link->device; 1695 struct ata_queued_cmd *qc; 1696 struct ata_taskfile tf; 1697 int tag, rc; 1698 1699 /* if frozen, we can't do much */ 1700 if (ap->pflags & ATA_PFLAG_FROZEN) 1701 return; 1702 1703 /* is it NCQ device error? */ 1704 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1705 return; 1706 1707 /* has LLDD analyzed already? */ 1708 ata_qc_for_each_raw(ap, qc, tag) { 1709 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1710 continue; 1711 1712 if (qc->err_mask) 1713 return; 1714 } 1715 1716 /* okay, this error is ours */ 1717 memset(&tf, 0, sizeof(tf)); 1718 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1719 if (rc) { 1720 ata_link_err(link, "failed to read log page 10h (errno=%d)\n", 1721 rc); 1722 return; 1723 } 1724 1725 if (!(link->sactive & (1 << tag))) { 1726 ata_link_err(link, "log page 10h reported inactive tag %d\n", 1727 tag); 1728 return; 1729 } 1730 1731 /* we've got the perpetrator, condemn it */ 1732 qc = __ata_qc_from_tag(ap, tag); 1733 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1734 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1735 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1736 if ((qc->result_tf.command & ATA_SENSE) || qc->result_tf.auxiliary) { 1737 char sense_key, asc, ascq; 1738 1739 sense_key = (qc->result_tf.auxiliary >> 16) & 0xff; 1740 asc = (qc->result_tf.auxiliary >> 8) & 0xff; 1741 ascq = qc->result_tf.auxiliary & 0xff; 1742 ata_scsi_set_sense(dev, qc->scsicmd, sense_key, asc, ascq); 1743 ata_scsi_set_sense_information(dev, qc->scsicmd, 1744 &qc->result_tf); 1745 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1746 } 1747 1748 ehc->i.err_mask &= ~AC_ERR_DEV; 1749 } 1750 1751 /** 1752 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1753 * @qc: qc to analyze 1754 * @tf: Taskfile registers to analyze 1755 * 1756 * Analyze taskfile of @qc and further determine cause of 1757 * failure. This function also requests ATAPI sense data if 1758 * available. 1759 * 1760 * LOCKING: 1761 * Kernel thread context (may sleep). 1762 * 1763 * RETURNS: 1764 * Determined recovery action 1765 */ 1766 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1767 const struct ata_taskfile *tf) 1768 { 1769 unsigned int tmp, action = 0; 1770 u8 stat = tf->command, err = tf->feature; 1771 1772 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1773 qc->err_mask |= AC_ERR_HSM; 1774 return ATA_EH_RESET; 1775 } 1776 1777 if (stat & (ATA_ERR | ATA_DF)) { 1778 qc->err_mask |= AC_ERR_DEV; 1779 /* 1780 * Sense data reporting does not work if the 1781 * device fault bit is set. 1782 */ 1783 if (stat & ATA_DF) 1784 stat &= ~ATA_SENSE; 1785 } else { 1786 return 0; 1787 } 1788 1789 switch (qc->dev->class) { 1790 case ATA_DEV_ATA: 1791 case ATA_DEV_ZAC: 1792 if (stat & ATA_SENSE) 1793 ata_eh_request_sense(qc, qc->scsicmd); 1794 if (err & ATA_ICRC) 1795 qc->err_mask |= AC_ERR_ATA_BUS; 1796 if (err & (ATA_UNC | ATA_AMNF)) 1797 qc->err_mask |= AC_ERR_MEDIA; 1798 if (err & ATA_IDNF) 1799 qc->err_mask |= AC_ERR_INVALID; 1800 break; 1801 1802 case ATA_DEV_ATAPI: 1803 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1804 tmp = atapi_eh_request_sense(qc->dev, 1805 qc->scsicmd->sense_buffer, 1806 qc->result_tf.feature >> 4); 1807 if (!tmp) 1808 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1809 else 1810 qc->err_mask |= tmp; 1811 } 1812 } 1813 1814 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1815 int ret = scsi_check_sense(qc->scsicmd); 1816 /* 1817 * SUCCESS here means that the sense code could be 1818 * evaluated and should be passed to the upper layers 1819 * for correct evaluation. 1820 * FAILED means the sense code could not be interpreted 1821 * and the device would need to be reset. 1822 * NEEDS_RETRY and ADD_TO_MLQUEUE means that the 1823 * command would need to be retried. 1824 */ 1825 if (ret == NEEDS_RETRY || ret == ADD_TO_MLQUEUE) { 1826 qc->flags |= ATA_QCFLAG_RETRY; 1827 qc->err_mask |= AC_ERR_OTHER; 1828 } else if (ret != SUCCESS) { 1829 qc->err_mask |= AC_ERR_HSM; 1830 } 1831 } 1832 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1833 action |= ATA_EH_RESET; 1834 1835 return action; 1836 } 1837 1838 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1839 int *xfer_ok) 1840 { 1841 int base = 0; 1842 1843 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1844 *xfer_ok = 1; 1845 1846 if (!*xfer_ok) 1847 base = ATA_ECAT_DUBIOUS_NONE; 1848 1849 if (err_mask & AC_ERR_ATA_BUS) 1850 return base + ATA_ECAT_ATA_BUS; 1851 1852 if (err_mask & AC_ERR_TIMEOUT) 1853 return base + ATA_ECAT_TOUT_HSM; 1854 1855 if (eflags & ATA_EFLAG_IS_IO) { 1856 if (err_mask & AC_ERR_HSM) 1857 return base + ATA_ECAT_TOUT_HSM; 1858 if ((err_mask & 1859 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1860 return base + ATA_ECAT_UNK_DEV; 1861 } 1862 1863 return 0; 1864 } 1865 1866 struct speed_down_verdict_arg { 1867 u64 since; 1868 int xfer_ok; 1869 int nr_errors[ATA_ECAT_NR]; 1870 }; 1871 1872 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1873 { 1874 struct speed_down_verdict_arg *arg = void_arg; 1875 int cat; 1876 1877 if ((ent->eflags & ATA_EFLAG_OLD_ER) || (ent->timestamp < arg->since)) 1878 return -1; 1879 1880 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1881 &arg->xfer_ok); 1882 arg->nr_errors[cat]++; 1883 1884 return 0; 1885 } 1886 1887 /** 1888 * ata_eh_speed_down_verdict - Determine speed down verdict 1889 * @dev: Device of interest 1890 * 1891 * This function examines error ring of @dev and determines 1892 * whether NCQ needs to be turned off, transfer speed should be 1893 * stepped down, or falling back to PIO is necessary. 1894 * 1895 * ECAT_ATA_BUS : ATA_BUS error for any command 1896 * 1897 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1898 * IO commands 1899 * 1900 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1901 * 1902 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1903 * data transfer hasn't been verified. 1904 * 1905 * Verdicts are 1906 * 1907 * NCQ_OFF : Turn off NCQ. 1908 * 1909 * SPEED_DOWN : Speed down transfer speed but don't fall back 1910 * to PIO. 1911 * 1912 * FALLBACK_TO_PIO : Fall back to PIO. 1913 * 1914 * Even if multiple verdicts are returned, only one action is 1915 * taken per error. An action triggered by non-DUBIOUS errors 1916 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1917 * This is to expedite speed down decisions right after device is 1918 * initially configured. 1919 * 1920 * The following are speed down rules. #1 and #2 deal with 1921 * DUBIOUS errors. 1922 * 1923 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1924 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1925 * 1926 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1927 * occurred during last 5 mins, NCQ_OFF. 1928 * 1929 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1930 * occurred during last 5 mins, FALLBACK_TO_PIO 1931 * 1932 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1933 * during last 10 mins, NCQ_OFF. 1934 * 1935 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1936 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1937 * 1938 * LOCKING: 1939 * Inherited from caller. 1940 * 1941 * RETURNS: 1942 * OR of ATA_EH_SPDN_* flags. 1943 */ 1944 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1945 { 1946 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1947 u64 j64 = get_jiffies_64(); 1948 struct speed_down_verdict_arg arg; 1949 unsigned int verdict = 0; 1950 1951 /* scan past 5 mins of error history */ 1952 memset(&arg, 0, sizeof(arg)); 1953 arg.since = j64 - min(j64, j5mins); 1954 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1955 1956 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1957 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1958 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1959 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1960 1961 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1962 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1963 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1964 1965 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1966 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1967 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1968 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1969 1970 /* scan past 10 mins of error history */ 1971 memset(&arg, 0, sizeof(arg)); 1972 arg.since = j64 - min(j64, j10mins); 1973 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1974 1975 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1976 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1977 verdict |= ATA_EH_SPDN_NCQ_OFF; 1978 1979 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1980 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1981 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1982 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1983 1984 return verdict; 1985 } 1986 1987 /** 1988 * ata_eh_speed_down - record error and speed down if necessary 1989 * @dev: Failed device 1990 * @eflags: mask of ATA_EFLAG_* flags 1991 * @err_mask: err_mask of the error 1992 * 1993 * Record error and examine error history to determine whether 1994 * adjusting transmission speed is necessary. It also sets 1995 * transmission limits appropriately if such adjustment is 1996 * necessary. 1997 * 1998 * LOCKING: 1999 * Kernel thread context (may sleep). 2000 * 2001 * RETURNS: 2002 * Determined recovery action. 2003 */ 2004 static unsigned int ata_eh_speed_down(struct ata_device *dev, 2005 unsigned int eflags, unsigned int err_mask) 2006 { 2007 struct ata_link *link = ata_dev_phys_link(dev); 2008 int xfer_ok = 0; 2009 unsigned int verdict; 2010 unsigned int action = 0; 2011 2012 /* don't bother if Cat-0 error */ 2013 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 2014 return 0; 2015 2016 /* record error and determine whether speed down is necessary */ 2017 ata_ering_record(&dev->ering, eflags, err_mask); 2018 verdict = ata_eh_speed_down_verdict(dev); 2019 2020 /* turn off NCQ? */ 2021 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 2022 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 2023 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 2024 dev->flags |= ATA_DFLAG_NCQ_OFF; 2025 ata_dev_warn(dev, "NCQ disabled due to excessive errors\n"); 2026 goto done; 2027 } 2028 2029 /* speed down? */ 2030 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 2031 /* speed down SATA link speed if possible */ 2032 if (sata_down_spd_limit(link, 0) == 0) { 2033 action |= ATA_EH_RESET; 2034 goto done; 2035 } 2036 2037 /* lower transfer mode */ 2038 if (dev->spdn_cnt < 2) { 2039 static const int dma_dnxfer_sel[] = 2040 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 2041 static const int pio_dnxfer_sel[] = 2042 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 2043 int sel; 2044 2045 if (dev->xfer_shift != ATA_SHIFT_PIO) 2046 sel = dma_dnxfer_sel[dev->spdn_cnt]; 2047 else 2048 sel = pio_dnxfer_sel[dev->spdn_cnt]; 2049 2050 dev->spdn_cnt++; 2051 2052 if (ata_down_xfermask_limit(dev, sel) == 0) { 2053 action |= ATA_EH_RESET; 2054 goto done; 2055 } 2056 } 2057 } 2058 2059 /* Fall back to PIO? Slowing down to PIO is meaningless for 2060 * SATA ATA devices. Consider it only for PATA and SATAPI. 2061 */ 2062 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 2063 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 2064 (dev->xfer_shift != ATA_SHIFT_PIO)) { 2065 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 2066 dev->spdn_cnt = 0; 2067 action |= ATA_EH_RESET; 2068 goto done; 2069 } 2070 } 2071 2072 return 0; 2073 done: 2074 /* device has been slowed down, blow error history */ 2075 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 2076 ata_ering_clear(&dev->ering); 2077 return action; 2078 } 2079 2080 /** 2081 * ata_eh_worth_retry - analyze error and decide whether to retry 2082 * @qc: qc to possibly retry 2083 * 2084 * Look at the cause of the error and decide if a retry 2085 * might be useful or not. We don't want to retry media errors 2086 * because the drive itself has probably already taken 10-30 seconds 2087 * doing its own internal retries before reporting the failure. 2088 */ 2089 static inline int ata_eh_worth_retry(struct ata_queued_cmd *qc) 2090 { 2091 if (qc->err_mask & AC_ERR_MEDIA) 2092 return 0; /* don't retry media errors */ 2093 if (qc->flags & ATA_QCFLAG_IO) 2094 return 1; /* otherwise retry anything from fs stack */ 2095 if (qc->err_mask & AC_ERR_INVALID) 2096 return 0; /* don't retry these */ 2097 return qc->err_mask != AC_ERR_DEV; /* retry if not dev error */ 2098 } 2099 2100 /** 2101 * ata_eh_quiet - check if we need to be quiet about a command error 2102 * @qc: qc to check 2103 * 2104 * Look at the qc flags anbd its scsi command request flags to determine 2105 * if we need to be quiet about the command failure. 2106 */ 2107 static inline bool ata_eh_quiet(struct ata_queued_cmd *qc) 2108 { 2109 if (qc->scsicmd && 2110 qc->scsicmd->request->rq_flags & RQF_QUIET) 2111 qc->flags |= ATA_QCFLAG_QUIET; 2112 return qc->flags & ATA_QCFLAG_QUIET; 2113 } 2114 2115 /** 2116 * ata_eh_link_autopsy - analyze error and determine recovery action 2117 * @link: host link to perform autopsy on 2118 * 2119 * Analyze why @link failed and determine which recovery actions 2120 * are needed. This function also sets more detailed AC_ERR_* 2121 * values and fills sense data for ATAPI CHECK SENSE. 2122 * 2123 * LOCKING: 2124 * Kernel thread context (may sleep). 2125 */ 2126 static void ata_eh_link_autopsy(struct ata_link *link) 2127 { 2128 struct ata_port *ap = link->ap; 2129 struct ata_eh_context *ehc = &link->eh_context; 2130 struct ata_queued_cmd *qc; 2131 struct ata_device *dev; 2132 unsigned int all_err_mask = 0, eflags = 0; 2133 int tag, nr_failed = 0, nr_quiet = 0; 2134 u32 serror; 2135 int rc; 2136 2137 DPRINTK("ENTER\n"); 2138 2139 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 2140 return; 2141 2142 /* obtain and analyze SError */ 2143 rc = sata_scr_read(link, SCR_ERROR, &serror); 2144 if (rc == 0) { 2145 ehc->i.serror |= serror; 2146 ata_eh_analyze_serror(link); 2147 } else if (rc != -EOPNOTSUPP) { 2148 /* SError read failed, force reset and probing */ 2149 ehc->i.probe_mask |= ATA_ALL_DEVICES; 2150 ehc->i.action |= ATA_EH_RESET; 2151 ehc->i.err_mask |= AC_ERR_OTHER; 2152 } 2153 2154 /* analyze NCQ failure */ 2155 ata_eh_analyze_ncq_error(link); 2156 2157 /* any real error trumps AC_ERR_OTHER */ 2158 if (ehc->i.err_mask & ~AC_ERR_OTHER) 2159 ehc->i.err_mask &= ~AC_ERR_OTHER; 2160 2161 all_err_mask |= ehc->i.err_mask; 2162 2163 ata_qc_for_each_raw(ap, qc, tag) { 2164 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2165 ata_dev_phys_link(qc->dev) != link) 2166 continue; 2167 2168 /* inherit upper level err_mask */ 2169 qc->err_mask |= ehc->i.err_mask; 2170 2171 /* analyze TF */ 2172 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 2173 2174 /* DEV errors are probably spurious in case of ATA_BUS error */ 2175 if (qc->err_mask & AC_ERR_ATA_BUS) 2176 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2177 AC_ERR_INVALID); 2178 2179 /* any real error trumps unknown error */ 2180 if (qc->err_mask & ~AC_ERR_OTHER) 2181 qc->err_mask &= ~AC_ERR_OTHER; 2182 2183 /* 2184 * SENSE_VALID trumps dev/unknown error and revalidation. Upper 2185 * layers will determine whether the command is worth retrying 2186 * based on the sense data and device class/type. Otherwise, 2187 * determine directly if the command is worth retrying using its 2188 * error mask and flags. 2189 */ 2190 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2191 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2192 else if (ata_eh_worth_retry(qc)) 2193 qc->flags |= ATA_QCFLAG_RETRY; 2194 2195 /* accumulate error info */ 2196 ehc->i.dev = qc->dev; 2197 all_err_mask |= qc->err_mask; 2198 if (qc->flags & ATA_QCFLAG_IO) 2199 eflags |= ATA_EFLAG_IS_IO; 2200 trace_ata_eh_link_autopsy_qc(qc); 2201 2202 /* Count quiet errors */ 2203 if (ata_eh_quiet(qc)) 2204 nr_quiet++; 2205 nr_failed++; 2206 } 2207 2208 /* If all failed commands requested silence, then be quiet */ 2209 if (nr_quiet == nr_failed) 2210 ehc->i.flags |= ATA_EHI_QUIET; 2211 2212 /* enforce default EH actions */ 2213 if (ap->pflags & ATA_PFLAG_FROZEN || 2214 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2215 ehc->i.action |= ATA_EH_RESET; 2216 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2217 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2218 ehc->i.action |= ATA_EH_REVALIDATE; 2219 2220 /* If we have offending qcs and the associated failed device, 2221 * perform per-dev EH action only on the offending device. 2222 */ 2223 if (ehc->i.dev) { 2224 ehc->i.dev_action[ehc->i.dev->devno] |= 2225 ehc->i.action & ATA_EH_PERDEV_MASK; 2226 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2227 } 2228 2229 /* propagate timeout to host link */ 2230 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2231 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2232 2233 /* record error and consider speeding down */ 2234 dev = ehc->i.dev; 2235 if (!dev && ((ata_link_max_devices(link) == 1 && 2236 ata_dev_enabled(link->device)))) 2237 dev = link->device; 2238 2239 if (dev) { 2240 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2241 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2242 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2243 trace_ata_eh_link_autopsy(dev, ehc->i.action, all_err_mask); 2244 } 2245 DPRINTK("EXIT\n"); 2246 } 2247 2248 /** 2249 * ata_eh_autopsy - analyze error and determine recovery action 2250 * @ap: host port to perform autopsy on 2251 * 2252 * Analyze all links of @ap and determine why they failed and 2253 * which recovery actions are needed. 2254 * 2255 * LOCKING: 2256 * Kernel thread context (may sleep). 2257 */ 2258 void ata_eh_autopsy(struct ata_port *ap) 2259 { 2260 struct ata_link *link; 2261 2262 ata_for_each_link(link, ap, EDGE) 2263 ata_eh_link_autopsy(link); 2264 2265 /* Handle the frigging slave link. Autopsy is done similarly 2266 * but actions and flags are transferred over to the master 2267 * link and handled from there. 2268 */ 2269 if (ap->slave_link) { 2270 struct ata_eh_context *mehc = &ap->link.eh_context; 2271 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2272 2273 /* transfer control flags from master to slave */ 2274 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2275 2276 /* perform autopsy on the slave link */ 2277 ata_eh_link_autopsy(ap->slave_link); 2278 2279 /* transfer actions from slave to master and clear slave */ 2280 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2281 mehc->i.action |= sehc->i.action; 2282 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2283 mehc->i.flags |= sehc->i.flags; 2284 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2285 } 2286 2287 /* Autopsy of fanout ports can affect host link autopsy. 2288 * Perform host link autopsy last. 2289 */ 2290 if (sata_pmp_attached(ap)) 2291 ata_eh_link_autopsy(&ap->link); 2292 } 2293 2294 /** 2295 * ata_get_cmd_descript - get description for ATA command 2296 * @command: ATA command code to get description for 2297 * 2298 * Return a textual description of the given command, or NULL if the 2299 * command is not known. 2300 * 2301 * LOCKING: 2302 * None 2303 */ 2304 const char *ata_get_cmd_descript(u8 command) 2305 { 2306 #ifdef CONFIG_ATA_VERBOSE_ERROR 2307 static const struct 2308 { 2309 u8 command; 2310 const char *text; 2311 } cmd_descr[] = { 2312 { ATA_CMD_DEV_RESET, "DEVICE RESET" }, 2313 { ATA_CMD_CHK_POWER, "CHECK POWER MODE" }, 2314 { ATA_CMD_STANDBY, "STANDBY" }, 2315 { ATA_CMD_IDLE, "IDLE" }, 2316 { ATA_CMD_EDD, "EXECUTE DEVICE DIAGNOSTIC" }, 2317 { ATA_CMD_DOWNLOAD_MICRO, "DOWNLOAD MICROCODE" }, 2318 { ATA_CMD_DOWNLOAD_MICRO_DMA, "DOWNLOAD MICROCODE DMA" }, 2319 { ATA_CMD_NOP, "NOP" }, 2320 { ATA_CMD_FLUSH, "FLUSH CACHE" }, 2321 { ATA_CMD_FLUSH_EXT, "FLUSH CACHE EXT" }, 2322 { ATA_CMD_ID_ATA, "IDENTIFY DEVICE" }, 2323 { ATA_CMD_ID_ATAPI, "IDENTIFY PACKET DEVICE" }, 2324 { ATA_CMD_SERVICE, "SERVICE" }, 2325 { ATA_CMD_READ, "READ DMA" }, 2326 { ATA_CMD_READ_EXT, "READ DMA EXT" }, 2327 { ATA_CMD_READ_QUEUED, "READ DMA QUEUED" }, 2328 { ATA_CMD_READ_STREAM_EXT, "READ STREAM EXT" }, 2329 { ATA_CMD_READ_STREAM_DMA_EXT, "READ STREAM DMA EXT" }, 2330 { ATA_CMD_WRITE, "WRITE DMA" }, 2331 { ATA_CMD_WRITE_EXT, "WRITE DMA EXT" }, 2332 { ATA_CMD_WRITE_QUEUED, "WRITE DMA QUEUED EXT" }, 2333 { ATA_CMD_WRITE_STREAM_EXT, "WRITE STREAM EXT" }, 2334 { ATA_CMD_WRITE_STREAM_DMA_EXT, "WRITE STREAM DMA EXT" }, 2335 { ATA_CMD_WRITE_FUA_EXT, "WRITE DMA FUA EXT" }, 2336 { ATA_CMD_WRITE_QUEUED_FUA_EXT, "WRITE DMA QUEUED FUA EXT" }, 2337 { ATA_CMD_FPDMA_READ, "READ FPDMA QUEUED" }, 2338 { ATA_CMD_FPDMA_WRITE, "WRITE FPDMA QUEUED" }, 2339 { ATA_CMD_FPDMA_SEND, "SEND FPDMA QUEUED" }, 2340 { ATA_CMD_FPDMA_RECV, "RECEIVE FPDMA QUEUED" }, 2341 { ATA_CMD_PIO_READ, "READ SECTOR(S)" }, 2342 { ATA_CMD_PIO_READ_EXT, "READ SECTOR(S) EXT" }, 2343 { ATA_CMD_PIO_WRITE, "WRITE SECTOR(S)" }, 2344 { ATA_CMD_PIO_WRITE_EXT, "WRITE SECTOR(S) EXT" }, 2345 { ATA_CMD_READ_MULTI, "READ MULTIPLE" }, 2346 { ATA_CMD_READ_MULTI_EXT, "READ MULTIPLE EXT" }, 2347 { ATA_CMD_WRITE_MULTI, "WRITE MULTIPLE" }, 2348 { ATA_CMD_WRITE_MULTI_EXT, "WRITE MULTIPLE EXT" }, 2349 { ATA_CMD_WRITE_MULTI_FUA_EXT, "WRITE MULTIPLE FUA EXT" }, 2350 { ATA_CMD_SET_FEATURES, "SET FEATURES" }, 2351 { ATA_CMD_SET_MULTI, "SET MULTIPLE MODE" }, 2352 { ATA_CMD_VERIFY, "READ VERIFY SECTOR(S)" }, 2353 { ATA_CMD_VERIFY_EXT, "READ VERIFY SECTOR(S) EXT" }, 2354 { ATA_CMD_WRITE_UNCORR_EXT, "WRITE UNCORRECTABLE EXT" }, 2355 { ATA_CMD_STANDBYNOW1, "STANDBY IMMEDIATE" }, 2356 { ATA_CMD_IDLEIMMEDIATE, "IDLE IMMEDIATE" }, 2357 { ATA_CMD_SLEEP, "SLEEP" }, 2358 { ATA_CMD_INIT_DEV_PARAMS, "INITIALIZE DEVICE PARAMETERS" }, 2359 { ATA_CMD_READ_NATIVE_MAX, "READ NATIVE MAX ADDRESS" }, 2360 { ATA_CMD_READ_NATIVE_MAX_EXT, "READ NATIVE MAX ADDRESS EXT" }, 2361 { ATA_CMD_SET_MAX, "SET MAX ADDRESS" }, 2362 { ATA_CMD_SET_MAX_EXT, "SET MAX ADDRESS EXT" }, 2363 { ATA_CMD_READ_LOG_EXT, "READ LOG EXT" }, 2364 { ATA_CMD_WRITE_LOG_EXT, "WRITE LOG EXT" }, 2365 { ATA_CMD_READ_LOG_DMA_EXT, "READ LOG DMA EXT" }, 2366 { ATA_CMD_WRITE_LOG_DMA_EXT, "WRITE LOG DMA EXT" }, 2367 { ATA_CMD_TRUSTED_NONDATA, "TRUSTED NON-DATA" }, 2368 { ATA_CMD_TRUSTED_RCV, "TRUSTED RECEIVE" }, 2369 { ATA_CMD_TRUSTED_RCV_DMA, "TRUSTED RECEIVE DMA" }, 2370 { ATA_CMD_TRUSTED_SND, "TRUSTED SEND" }, 2371 { ATA_CMD_TRUSTED_SND_DMA, "TRUSTED SEND DMA" }, 2372 { ATA_CMD_PMP_READ, "READ BUFFER" }, 2373 { ATA_CMD_PMP_READ_DMA, "READ BUFFER DMA" }, 2374 { ATA_CMD_PMP_WRITE, "WRITE BUFFER" }, 2375 { ATA_CMD_PMP_WRITE_DMA, "WRITE BUFFER DMA" }, 2376 { ATA_CMD_CONF_OVERLAY, "DEVICE CONFIGURATION OVERLAY" }, 2377 { ATA_CMD_SEC_SET_PASS, "SECURITY SET PASSWORD" }, 2378 { ATA_CMD_SEC_UNLOCK, "SECURITY UNLOCK" }, 2379 { ATA_CMD_SEC_ERASE_PREP, "SECURITY ERASE PREPARE" }, 2380 { ATA_CMD_SEC_ERASE_UNIT, "SECURITY ERASE UNIT" }, 2381 { ATA_CMD_SEC_FREEZE_LOCK, "SECURITY FREEZE LOCK" }, 2382 { ATA_CMD_SEC_DISABLE_PASS, "SECURITY DISABLE PASSWORD" }, 2383 { ATA_CMD_CONFIG_STREAM, "CONFIGURE STREAM" }, 2384 { ATA_CMD_SMART, "SMART" }, 2385 { ATA_CMD_MEDIA_LOCK, "DOOR LOCK" }, 2386 { ATA_CMD_MEDIA_UNLOCK, "DOOR UNLOCK" }, 2387 { ATA_CMD_DSM, "DATA SET MANAGEMENT" }, 2388 { ATA_CMD_CHK_MED_CRD_TYP, "CHECK MEDIA CARD TYPE" }, 2389 { ATA_CMD_CFA_REQ_EXT_ERR, "CFA REQUEST EXTENDED ERROR" }, 2390 { ATA_CMD_CFA_WRITE_NE, "CFA WRITE SECTORS WITHOUT ERASE" }, 2391 { ATA_CMD_CFA_TRANS_SECT, "CFA TRANSLATE SECTOR" }, 2392 { ATA_CMD_CFA_ERASE, "CFA ERASE SECTORS" }, 2393 { ATA_CMD_CFA_WRITE_MULT_NE, "CFA WRITE MULTIPLE WITHOUT ERASE" }, 2394 { ATA_CMD_REQ_SENSE_DATA, "REQUEST SENSE DATA EXT" }, 2395 { ATA_CMD_SANITIZE_DEVICE, "SANITIZE DEVICE" }, 2396 { ATA_CMD_ZAC_MGMT_IN, "ZAC MANAGEMENT IN" }, 2397 { ATA_CMD_ZAC_MGMT_OUT, "ZAC MANAGEMENT OUT" }, 2398 { ATA_CMD_READ_LONG, "READ LONG (with retries)" }, 2399 { ATA_CMD_READ_LONG_ONCE, "READ LONG (without retries)" }, 2400 { ATA_CMD_WRITE_LONG, "WRITE LONG (with retries)" }, 2401 { ATA_CMD_WRITE_LONG_ONCE, "WRITE LONG (without retries)" }, 2402 { ATA_CMD_RESTORE, "RECALIBRATE" }, 2403 { 0, NULL } /* terminate list */ 2404 }; 2405 2406 unsigned int i; 2407 for (i = 0; cmd_descr[i].text; i++) 2408 if (cmd_descr[i].command == command) 2409 return cmd_descr[i].text; 2410 #endif 2411 2412 return NULL; 2413 } 2414 EXPORT_SYMBOL_GPL(ata_get_cmd_descript); 2415 2416 /** 2417 * ata_eh_link_report - report error handling to user 2418 * @link: ATA link EH is going on 2419 * 2420 * Report EH to user. 2421 * 2422 * LOCKING: 2423 * None. 2424 */ 2425 static void ata_eh_link_report(struct ata_link *link) 2426 { 2427 struct ata_port *ap = link->ap; 2428 struct ata_eh_context *ehc = &link->eh_context; 2429 struct ata_queued_cmd *qc; 2430 const char *frozen, *desc; 2431 char tries_buf[6] = ""; 2432 int tag, nr_failed = 0; 2433 2434 if (ehc->i.flags & ATA_EHI_QUIET) 2435 return; 2436 2437 desc = NULL; 2438 if (ehc->i.desc[0] != '\0') 2439 desc = ehc->i.desc; 2440 2441 ata_qc_for_each_raw(ap, qc, tag) { 2442 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2443 ata_dev_phys_link(qc->dev) != link || 2444 ((qc->flags & ATA_QCFLAG_QUIET) && 2445 qc->err_mask == AC_ERR_DEV)) 2446 continue; 2447 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2448 continue; 2449 2450 nr_failed++; 2451 } 2452 2453 if (!nr_failed && !ehc->i.err_mask) 2454 return; 2455 2456 frozen = ""; 2457 if (ap->pflags & ATA_PFLAG_FROZEN) 2458 frozen = " frozen"; 2459 2460 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2461 snprintf(tries_buf, sizeof(tries_buf), " t%d", 2462 ap->eh_tries); 2463 2464 if (ehc->i.dev) { 2465 ata_dev_err(ehc->i.dev, "exception Emask 0x%x " 2466 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2467 ehc->i.err_mask, link->sactive, ehc->i.serror, 2468 ehc->i.action, frozen, tries_buf); 2469 if (desc) 2470 ata_dev_err(ehc->i.dev, "%s\n", desc); 2471 } else { 2472 ata_link_err(link, "exception Emask 0x%x " 2473 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2474 ehc->i.err_mask, link->sactive, ehc->i.serror, 2475 ehc->i.action, frozen, tries_buf); 2476 if (desc) 2477 ata_link_err(link, "%s\n", desc); 2478 } 2479 2480 #ifdef CONFIG_ATA_VERBOSE_ERROR 2481 if (ehc->i.serror) 2482 ata_link_err(link, 2483 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2484 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2485 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2486 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2487 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2488 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2489 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2490 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2491 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2492 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2493 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2494 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2495 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2496 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2497 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2498 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2499 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2500 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2501 #endif 2502 2503 ata_qc_for_each_raw(ap, qc, tag) { 2504 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2505 char data_buf[20] = ""; 2506 char cdb_buf[70] = ""; 2507 2508 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2509 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2510 continue; 2511 2512 if (qc->dma_dir != DMA_NONE) { 2513 static const char *dma_str[] = { 2514 [DMA_BIDIRECTIONAL] = "bidi", 2515 [DMA_TO_DEVICE] = "out", 2516 [DMA_FROM_DEVICE] = "in", 2517 }; 2518 const char *prot_str = NULL; 2519 2520 switch (qc->tf.protocol) { 2521 case ATA_PROT_UNKNOWN: 2522 prot_str = "unknown"; 2523 break; 2524 case ATA_PROT_NODATA: 2525 prot_str = "nodata"; 2526 break; 2527 case ATA_PROT_PIO: 2528 prot_str = "pio"; 2529 break; 2530 case ATA_PROT_DMA: 2531 prot_str = "dma"; 2532 break; 2533 case ATA_PROT_NCQ: 2534 prot_str = "ncq dma"; 2535 break; 2536 case ATA_PROT_NCQ_NODATA: 2537 prot_str = "ncq nodata"; 2538 break; 2539 case ATAPI_PROT_NODATA: 2540 prot_str = "nodata"; 2541 break; 2542 case ATAPI_PROT_PIO: 2543 prot_str = "pio"; 2544 break; 2545 case ATAPI_PROT_DMA: 2546 prot_str = "dma"; 2547 break; 2548 } 2549 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2550 prot_str, qc->nbytes, dma_str[qc->dma_dir]); 2551 } 2552 2553 if (ata_is_atapi(qc->tf.protocol)) { 2554 const u8 *cdb = qc->cdb; 2555 size_t cdb_len = qc->dev->cdb_len; 2556 2557 if (qc->scsicmd) { 2558 cdb = qc->scsicmd->cmnd; 2559 cdb_len = qc->scsicmd->cmd_len; 2560 } 2561 __scsi_format_command(cdb_buf, sizeof(cdb_buf), 2562 cdb, cdb_len); 2563 } else { 2564 const char *descr = ata_get_cmd_descript(cmd->command); 2565 if (descr) 2566 ata_dev_err(qc->dev, "failed command: %s\n", 2567 descr); 2568 } 2569 2570 ata_dev_err(qc->dev, 2571 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2572 "tag %d%s\n %s" 2573 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2574 "Emask 0x%x (%s)%s\n", 2575 cmd->command, cmd->feature, cmd->nsect, 2576 cmd->lbal, cmd->lbam, cmd->lbah, 2577 cmd->hob_feature, cmd->hob_nsect, 2578 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2579 cmd->device, qc->tag, data_buf, cdb_buf, 2580 res->command, res->feature, res->nsect, 2581 res->lbal, res->lbam, res->lbah, 2582 res->hob_feature, res->hob_nsect, 2583 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2584 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2585 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2586 2587 #ifdef CONFIG_ATA_VERBOSE_ERROR 2588 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2589 ATA_SENSE | ATA_ERR)) { 2590 if (res->command & ATA_BUSY) 2591 ata_dev_err(qc->dev, "status: { Busy }\n"); 2592 else 2593 ata_dev_err(qc->dev, "status: { %s%s%s%s%s}\n", 2594 res->command & ATA_DRDY ? "DRDY " : "", 2595 res->command & ATA_DF ? "DF " : "", 2596 res->command & ATA_DRQ ? "DRQ " : "", 2597 res->command & ATA_SENSE ? "SENSE " : "", 2598 res->command & ATA_ERR ? "ERR " : ""); 2599 } 2600 2601 if (cmd->command != ATA_CMD_PACKET && 2602 (res->feature & (ATA_ICRC | ATA_UNC | ATA_AMNF | 2603 ATA_IDNF | ATA_ABORTED))) 2604 ata_dev_err(qc->dev, "error: { %s%s%s%s%s}\n", 2605 res->feature & ATA_ICRC ? "ICRC " : "", 2606 res->feature & ATA_UNC ? "UNC " : "", 2607 res->feature & ATA_AMNF ? "AMNF " : "", 2608 res->feature & ATA_IDNF ? "IDNF " : "", 2609 res->feature & ATA_ABORTED ? "ABRT " : ""); 2610 #endif 2611 } 2612 } 2613 2614 /** 2615 * ata_eh_report - report error handling to user 2616 * @ap: ATA port to report EH about 2617 * 2618 * Report EH to user. 2619 * 2620 * LOCKING: 2621 * None. 2622 */ 2623 void ata_eh_report(struct ata_port *ap) 2624 { 2625 struct ata_link *link; 2626 2627 ata_for_each_link(link, ap, HOST_FIRST) 2628 ata_eh_link_report(link); 2629 } 2630 2631 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2632 unsigned int *classes, unsigned long deadline, 2633 bool clear_classes) 2634 { 2635 struct ata_device *dev; 2636 2637 if (clear_classes) 2638 ata_for_each_dev(dev, link, ALL) 2639 classes[dev->devno] = ATA_DEV_UNKNOWN; 2640 2641 return reset(link, classes, deadline); 2642 } 2643 2644 static int ata_eh_followup_srst_needed(struct ata_link *link, int rc) 2645 { 2646 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2647 return 0; 2648 if (rc == -EAGAIN) 2649 return 1; 2650 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2651 return 1; 2652 return 0; 2653 } 2654 2655 int ata_eh_reset(struct ata_link *link, int classify, 2656 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2657 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2658 { 2659 struct ata_port *ap = link->ap; 2660 struct ata_link *slave = ap->slave_link; 2661 struct ata_eh_context *ehc = &link->eh_context; 2662 struct ata_eh_context *sehc = slave ? &slave->eh_context : NULL; 2663 unsigned int *classes = ehc->classes; 2664 unsigned int lflags = link->flags; 2665 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2666 int max_tries = 0, try = 0; 2667 struct ata_link *failed_link; 2668 struct ata_device *dev; 2669 unsigned long deadline, now; 2670 ata_reset_fn_t reset; 2671 unsigned long flags; 2672 u32 sstatus; 2673 int nr_unknown, rc; 2674 2675 /* 2676 * Prepare to reset 2677 */ 2678 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2679 max_tries++; 2680 if (link->flags & ATA_LFLAG_RST_ONCE) 2681 max_tries = 1; 2682 if (link->flags & ATA_LFLAG_NO_HRST) 2683 hardreset = NULL; 2684 if (link->flags & ATA_LFLAG_NO_SRST) 2685 softreset = NULL; 2686 2687 /* make sure each reset attempt is at least COOL_DOWN apart */ 2688 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2689 now = jiffies; 2690 WARN_ON(time_after(ehc->last_reset, now)); 2691 deadline = ata_deadline(ehc->last_reset, 2692 ATA_EH_RESET_COOL_DOWN); 2693 if (time_before(now, deadline)) 2694 schedule_timeout_uninterruptible(deadline - now); 2695 } 2696 2697 spin_lock_irqsave(ap->lock, flags); 2698 ap->pflags |= ATA_PFLAG_RESETTING; 2699 spin_unlock_irqrestore(ap->lock, flags); 2700 2701 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2702 2703 ata_for_each_dev(dev, link, ALL) { 2704 /* If we issue an SRST then an ATA drive (not ATAPI) 2705 * may change configuration and be in PIO0 timing. If 2706 * we do a hard reset (or are coming from power on) 2707 * this is true for ATA or ATAPI. Until we've set a 2708 * suitable controller mode we should not touch the 2709 * bus as we may be talking too fast. 2710 */ 2711 dev->pio_mode = XFER_PIO_0; 2712 dev->dma_mode = 0xff; 2713 2714 /* If the controller has a pio mode setup function 2715 * then use it to set the chipset to rights. Don't 2716 * touch the DMA setup as that will be dealt with when 2717 * configuring devices. 2718 */ 2719 if (ap->ops->set_piomode) 2720 ap->ops->set_piomode(ap, dev); 2721 } 2722 2723 /* prefer hardreset */ 2724 reset = NULL; 2725 ehc->i.action &= ~ATA_EH_RESET; 2726 if (hardreset) { 2727 reset = hardreset; 2728 ehc->i.action |= ATA_EH_HARDRESET; 2729 } else if (softreset) { 2730 reset = softreset; 2731 ehc->i.action |= ATA_EH_SOFTRESET; 2732 } 2733 2734 if (prereset) { 2735 unsigned long deadline = ata_deadline(jiffies, 2736 ATA_EH_PRERESET_TIMEOUT); 2737 2738 if (slave) { 2739 sehc->i.action &= ~ATA_EH_RESET; 2740 sehc->i.action |= ehc->i.action; 2741 } 2742 2743 rc = prereset(link, deadline); 2744 2745 /* If present, do prereset on slave link too. Reset 2746 * is skipped iff both master and slave links report 2747 * -ENOENT or clear ATA_EH_RESET. 2748 */ 2749 if (slave && (rc == 0 || rc == -ENOENT)) { 2750 int tmp; 2751 2752 tmp = prereset(slave, deadline); 2753 if (tmp != -ENOENT) 2754 rc = tmp; 2755 2756 ehc->i.action |= sehc->i.action; 2757 } 2758 2759 if (rc) { 2760 if (rc == -ENOENT) { 2761 ata_link_dbg(link, "port disabled--ignoring\n"); 2762 ehc->i.action &= ~ATA_EH_RESET; 2763 2764 ata_for_each_dev(dev, link, ALL) 2765 classes[dev->devno] = ATA_DEV_NONE; 2766 2767 rc = 0; 2768 } else 2769 ata_link_err(link, 2770 "prereset failed (errno=%d)\n", 2771 rc); 2772 goto out; 2773 } 2774 2775 /* prereset() might have cleared ATA_EH_RESET. If so, 2776 * bang classes, thaw and return. 2777 */ 2778 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2779 ata_for_each_dev(dev, link, ALL) 2780 classes[dev->devno] = ATA_DEV_NONE; 2781 if ((ap->pflags & ATA_PFLAG_FROZEN) && 2782 ata_is_host_link(link)) 2783 ata_eh_thaw_port(ap); 2784 rc = 0; 2785 goto out; 2786 } 2787 } 2788 2789 retry: 2790 /* 2791 * Perform reset 2792 */ 2793 if (ata_is_host_link(link)) 2794 ata_eh_freeze_port(ap); 2795 2796 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2797 2798 if (reset) { 2799 if (verbose) 2800 ata_link_info(link, "%s resetting link\n", 2801 reset == softreset ? "soft" : "hard"); 2802 2803 /* mark that this EH session started with reset */ 2804 ehc->last_reset = jiffies; 2805 if (reset == hardreset) 2806 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2807 else 2808 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2809 2810 rc = ata_do_reset(link, reset, classes, deadline, true); 2811 if (rc && rc != -EAGAIN) { 2812 failed_link = link; 2813 goto fail; 2814 } 2815 2816 /* hardreset slave link if existent */ 2817 if (slave && reset == hardreset) { 2818 int tmp; 2819 2820 if (verbose) 2821 ata_link_info(slave, "hard resetting link\n"); 2822 2823 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2824 tmp = ata_do_reset(slave, reset, classes, deadline, 2825 false); 2826 switch (tmp) { 2827 case -EAGAIN: 2828 rc = -EAGAIN; 2829 case 0: 2830 break; 2831 default: 2832 failed_link = slave; 2833 rc = tmp; 2834 goto fail; 2835 } 2836 } 2837 2838 /* perform follow-up SRST if necessary */ 2839 if (reset == hardreset && 2840 ata_eh_followup_srst_needed(link, rc)) { 2841 reset = softreset; 2842 2843 if (!reset) { 2844 ata_link_err(link, 2845 "follow-up softreset required but no softreset available\n"); 2846 failed_link = link; 2847 rc = -EINVAL; 2848 goto fail; 2849 } 2850 2851 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2852 rc = ata_do_reset(link, reset, classes, deadline, true); 2853 if (rc) { 2854 failed_link = link; 2855 goto fail; 2856 } 2857 } 2858 } else { 2859 if (verbose) 2860 ata_link_info(link, 2861 "no reset method available, skipping reset\n"); 2862 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2863 lflags |= ATA_LFLAG_ASSUME_ATA; 2864 } 2865 2866 /* 2867 * Post-reset processing 2868 */ 2869 ata_for_each_dev(dev, link, ALL) { 2870 /* After the reset, the device state is PIO 0 and the 2871 * controller state is undefined. Reset also wakes up 2872 * drives from sleeping mode. 2873 */ 2874 dev->pio_mode = XFER_PIO_0; 2875 dev->flags &= ~ATA_DFLAG_SLEEPING; 2876 2877 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 2878 continue; 2879 2880 /* apply class override */ 2881 if (lflags & ATA_LFLAG_ASSUME_ATA) 2882 classes[dev->devno] = ATA_DEV_ATA; 2883 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2884 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2885 } 2886 2887 /* record current link speed */ 2888 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2889 link->sata_spd = (sstatus >> 4) & 0xf; 2890 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2891 slave->sata_spd = (sstatus >> 4) & 0xf; 2892 2893 /* thaw the port */ 2894 if (ata_is_host_link(link)) 2895 ata_eh_thaw_port(ap); 2896 2897 /* postreset() should clear hardware SError. Although SError 2898 * is cleared during link resume, clearing SError here is 2899 * necessary as some PHYs raise hotplug events after SRST. 2900 * This introduces race condition where hotplug occurs between 2901 * reset and here. This race is mediated by cross checking 2902 * link onlineness and classification result later. 2903 */ 2904 if (postreset) { 2905 postreset(link, classes); 2906 if (slave) 2907 postreset(slave, classes); 2908 } 2909 2910 /* 2911 * Some controllers can't be frozen very well and may set spurious 2912 * error conditions during reset. Clear accumulated error 2913 * information and re-thaw the port if frozen. As reset is the 2914 * final recovery action and we cross check link onlineness against 2915 * device classification later, no hotplug event is lost by this. 2916 */ 2917 spin_lock_irqsave(link->ap->lock, flags); 2918 memset(&link->eh_info, 0, sizeof(link->eh_info)); 2919 if (slave) 2920 memset(&slave->eh_info, 0, sizeof(link->eh_info)); 2921 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 2922 spin_unlock_irqrestore(link->ap->lock, flags); 2923 2924 if (ap->pflags & ATA_PFLAG_FROZEN) 2925 ata_eh_thaw_port(ap); 2926 2927 /* 2928 * Make sure onlineness and classification result correspond. 2929 * Hotplug could have happened during reset and some 2930 * controllers fail to wait while a drive is spinning up after 2931 * being hotplugged causing misdetection. By cross checking 2932 * link on/offlineness and classification result, those 2933 * conditions can be reliably detected and retried. 2934 */ 2935 nr_unknown = 0; 2936 ata_for_each_dev(dev, link, ALL) { 2937 if (ata_phys_link_online(ata_dev_phys_link(dev))) { 2938 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2939 ata_dev_dbg(dev, "link online but device misclassified\n"); 2940 classes[dev->devno] = ATA_DEV_NONE; 2941 nr_unknown++; 2942 } 2943 } else if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2944 if (ata_class_enabled(classes[dev->devno])) 2945 ata_dev_dbg(dev, 2946 "link offline, clearing class %d to NONE\n", 2947 classes[dev->devno]); 2948 classes[dev->devno] = ATA_DEV_NONE; 2949 } else if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2950 ata_dev_dbg(dev, 2951 "link status unknown, clearing UNKNOWN to NONE\n"); 2952 classes[dev->devno] = ATA_DEV_NONE; 2953 } 2954 } 2955 2956 if (classify && nr_unknown) { 2957 if (try < max_tries) { 2958 ata_link_warn(link, 2959 "link online but %d devices misclassified, retrying\n", 2960 nr_unknown); 2961 failed_link = link; 2962 rc = -EAGAIN; 2963 goto fail; 2964 } 2965 ata_link_warn(link, 2966 "link online but %d devices misclassified, " 2967 "device detection might fail\n", nr_unknown); 2968 } 2969 2970 /* reset successful, schedule revalidation */ 2971 ata_eh_done(link, NULL, ATA_EH_RESET); 2972 if (slave) 2973 ata_eh_done(slave, NULL, ATA_EH_RESET); 2974 ehc->last_reset = jiffies; /* update to completion time */ 2975 ehc->i.action |= ATA_EH_REVALIDATE; 2976 link->lpm_policy = ATA_LPM_UNKNOWN; /* reset LPM state */ 2977 2978 rc = 0; 2979 out: 2980 /* clear hotplug flag */ 2981 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2982 if (slave) 2983 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2984 2985 spin_lock_irqsave(ap->lock, flags); 2986 ap->pflags &= ~ATA_PFLAG_RESETTING; 2987 spin_unlock_irqrestore(ap->lock, flags); 2988 2989 return rc; 2990 2991 fail: 2992 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2993 if (!ata_is_host_link(link) && 2994 sata_scr_read(link, SCR_STATUS, &sstatus)) 2995 rc = -ERESTART; 2996 2997 if (try >= max_tries) { 2998 /* 2999 * Thaw host port even if reset failed, so that the port 3000 * can be retried on the next phy event. This risks 3001 * repeated EH runs but seems to be a better tradeoff than 3002 * shutting down a port after a botched hotplug attempt. 3003 */ 3004 if (ata_is_host_link(link)) 3005 ata_eh_thaw_port(ap); 3006 goto out; 3007 } 3008 3009 now = jiffies; 3010 if (time_before(now, deadline)) { 3011 unsigned long delta = deadline - now; 3012 3013 ata_link_warn(failed_link, 3014 "reset failed (errno=%d), retrying in %u secs\n", 3015 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 3016 3017 ata_eh_release(ap); 3018 while (delta) 3019 delta = schedule_timeout_uninterruptible(delta); 3020 ata_eh_acquire(ap); 3021 } 3022 3023 /* 3024 * While disks spinup behind PMP, some controllers fail sending SRST. 3025 * They need to be reset - as well as the PMP - before retrying. 3026 */ 3027 if (rc == -ERESTART) { 3028 if (ata_is_host_link(link)) 3029 ata_eh_thaw_port(ap); 3030 goto out; 3031 } 3032 3033 if (try == max_tries - 1) { 3034 sata_down_spd_limit(link, 0); 3035 if (slave) 3036 sata_down_spd_limit(slave, 0); 3037 } else if (rc == -EPIPE) 3038 sata_down_spd_limit(failed_link, 0); 3039 3040 if (hardreset) 3041 reset = hardreset; 3042 goto retry; 3043 } 3044 3045 static inline void ata_eh_pull_park_action(struct ata_port *ap) 3046 { 3047 struct ata_link *link; 3048 struct ata_device *dev; 3049 unsigned long flags; 3050 3051 /* 3052 * This function can be thought of as an extended version of 3053 * ata_eh_about_to_do() specially crafted to accommodate the 3054 * requirements of ATA_EH_PARK handling. Since the EH thread 3055 * does not leave the do {} while () loop in ata_eh_recover as 3056 * long as the timeout for a park request to *one* device on 3057 * the port has not expired, and since we still want to pick 3058 * up park requests to other devices on the same port or 3059 * timeout updates for the same device, we have to pull 3060 * ATA_EH_PARK actions from eh_info into eh_context.i 3061 * ourselves at the beginning of each pass over the loop. 3062 * 3063 * Additionally, all write accesses to &ap->park_req_pending 3064 * through reinit_completion() (see below) or complete_all() 3065 * (see ata_scsi_park_store()) are protected by the host lock. 3066 * As a result we have that park_req_pending.done is zero on 3067 * exit from this function, i.e. when ATA_EH_PARK actions for 3068 * *all* devices on port ap have been pulled into the 3069 * respective eh_context structs. If, and only if, 3070 * park_req_pending.done is non-zero by the time we reach 3071 * wait_for_completion_timeout(), another ATA_EH_PARK action 3072 * has been scheduled for at least one of the devices on port 3073 * ap and we have to cycle over the do {} while () loop in 3074 * ata_eh_recover() again. 3075 */ 3076 3077 spin_lock_irqsave(ap->lock, flags); 3078 reinit_completion(&ap->park_req_pending); 3079 ata_for_each_link(link, ap, EDGE) { 3080 ata_for_each_dev(dev, link, ALL) { 3081 struct ata_eh_info *ehi = &link->eh_info; 3082 3083 link->eh_context.i.dev_action[dev->devno] |= 3084 ehi->dev_action[dev->devno] & ATA_EH_PARK; 3085 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 3086 } 3087 } 3088 spin_unlock_irqrestore(ap->lock, flags); 3089 } 3090 3091 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 3092 { 3093 struct ata_eh_context *ehc = &dev->link->eh_context; 3094 struct ata_taskfile tf; 3095 unsigned int err_mask; 3096 3097 ata_tf_init(dev, &tf); 3098 if (park) { 3099 ehc->unloaded_mask |= 1 << dev->devno; 3100 tf.command = ATA_CMD_IDLEIMMEDIATE; 3101 tf.feature = 0x44; 3102 tf.lbal = 0x4c; 3103 tf.lbam = 0x4e; 3104 tf.lbah = 0x55; 3105 } else { 3106 ehc->unloaded_mask &= ~(1 << dev->devno); 3107 tf.command = ATA_CMD_CHK_POWER; 3108 } 3109 3110 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 3111 tf.protocol = ATA_PROT_NODATA; 3112 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3113 if (park && (err_mask || tf.lbal != 0xc4)) { 3114 ata_dev_err(dev, "head unload failed!\n"); 3115 ehc->unloaded_mask &= ~(1 << dev->devno); 3116 } 3117 } 3118 3119 static int ata_eh_revalidate_and_attach(struct ata_link *link, 3120 struct ata_device **r_failed_dev) 3121 { 3122 struct ata_port *ap = link->ap; 3123 struct ata_eh_context *ehc = &link->eh_context; 3124 struct ata_device *dev; 3125 unsigned int new_mask = 0; 3126 unsigned long flags; 3127 int rc = 0; 3128 3129 DPRINTK("ENTER\n"); 3130 3131 /* For PATA drive side cable detection to work, IDENTIFY must 3132 * be done backwards such that PDIAG- is released by the slave 3133 * device before the master device is identified. 3134 */ 3135 ata_for_each_dev(dev, link, ALL_REVERSE) { 3136 unsigned int action = ata_eh_dev_action(dev); 3137 unsigned int readid_flags = 0; 3138 3139 if (ehc->i.flags & ATA_EHI_DID_RESET) 3140 readid_flags |= ATA_READID_POSTRESET; 3141 3142 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 3143 WARN_ON(dev->class == ATA_DEV_PMP); 3144 3145 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 3146 rc = -EIO; 3147 goto err; 3148 } 3149 3150 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 3151 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 3152 readid_flags); 3153 if (rc) 3154 goto err; 3155 3156 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 3157 3158 /* Configuration may have changed, reconfigure 3159 * transfer mode. 3160 */ 3161 ehc->i.flags |= ATA_EHI_SETMODE; 3162 3163 /* schedule the scsi_rescan_device() here */ 3164 schedule_work(&(ap->scsi_rescan_task)); 3165 } else if (dev->class == ATA_DEV_UNKNOWN && 3166 ehc->tries[dev->devno] && 3167 ata_class_enabled(ehc->classes[dev->devno])) { 3168 /* Temporarily set dev->class, it will be 3169 * permanently set once all configurations are 3170 * complete. This is necessary because new 3171 * device configuration is done in two 3172 * separate loops. 3173 */ 3174 dev->class = ehc->classes[dev->devno]; 3175 3176 if (dev->class == ATA_DEV_PMP) 3177 rc = sata_pmp_attach(dev); 3178 else 3179 rc = ata_dev_read_id(dev, &dev->class, 3180 readid_flags, dev->id); 3181 3182 /* read_id might have changed class, store and reset */ 3183 ehc->classes[dev->devno] = dev->class; 3184 dev->class = ATA_DEV_UNKNOWN; 3185 3186 switch (rc) { 3187 case 0: 3188 /* clear error info accumulated during probe */ 3189 ata_ering_clear(&dev->ering); 3190 new_mask |= 1 << dev->devno; 3191 break; 3192 case -ENOENT: 3193 /* IDENTIFY was issued to non-existent 3194 * device. No need to reset. Just 3195 * thaw and ignore the device. 3196 */ 3197 ata_eh_thaw_port(ap); 3198 break; 3199 default: 3200 goto err; 3201 } 3202 } 3203 } 3204 3205 /* PDIAG- should have been released, ask cable type if post-reset */ 3206 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 3207 if (ap->ops->cable_detect) 3208 ap->cbl = ap->ops->cable_detect(ap); 3209 ata_force_cbl(ap); 3210 } 3211 3212 /* Configure new devices forward such that user doesn't see 3213 * device detection messages backwards. 3214 */ 3215 ata_for_each_dev(dev, link, ALL) { 3216 if (!(new_mask & (1 << dev->devno))) 3217 continue; 3218 3219 dev->class = ehc->classes[dev->devno]; 3220 3221 if (dev->class == ATA_DEV_PMP) 3222 continue; 3223 3224 ehc->i.flags |= ATA_EHI_PRINTINFO; 3225 rc = ata_dev_configure(dev); 3226 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 3227 if (rc) { 3228 dev->class = ATA_DEV_UNKNOWN; 3229 goto err; 3230 } 3231 3232 spin_lock_irqsave(ap->lock, flags); 3233 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 3234 spin_unlock_irqrestore(ap->lock, flags); 3235 3236 /* new device discovered, configure xfermode */ 3237 ehc->i.flags |= ATA_EHI_SETMODE; 3238 } 3239 3240 return 0; 3241 3242 err: 3243 *r_failed_dev = dev; 3244 DPRINTK("EXIT rc=%d\n", rc); 3245 return rc; 3246 } 3247 3248 /** 3249 * ata_set_mode - Program timings and issue SET FEATURES - XFER 3250 * @link: link on which timings will be programmed 3251 * @r_failed_dev: out parameter for failed device 3252 * 3253 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 3254 * ata_set_mode() fails, pointer to the failing device is 3255 * returned in @r_failed_dev. 3256 * 3257 * LOCKING: 3258 * PCI/etc. bus probe sem. 3259 * 3260 * RETURNS: 3261 * 0 on success, negative errno otherwise 3262 */ 3263 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 3264 { 3265 struct ata_port *ap = link->ap; 3266 struct ata_device *dev; 3267 int rc; 3268 3269 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 3270 ata_for_each_dev(dev, link, ENABLED) { 3271 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 3272 struct ata_ering_entry *ent; 3273 3274 ent = ata_ering_top(&dev->ering); 3275 if (ent) 3276 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 3277 } 3278 } 3279 3280 /* has private set_mode? */ 3281 if (ap->ops->set_mode) 3282 rc = ap->ops->set_mode(link, r_failed_dev); 3283 else 3284 rc = ata_do_set_mode(link, r_failed_dev); 3285 3286 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 3287 ata_for_each_dev(dev, link, ENABLED) { 3288 struct ata_eh_context *ehc = &link->eh_context; 3289 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 3290 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 3291 3292 if (dev->xfer_mode != saved_xfer_mode || 3293 ata_ncq_enabled(dev) != saved_ncq) 3294 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 3295 } 3296 3297 return rc; 3298 } 3299 3300 /** 3301 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 3302 * @dev: ATAPI device to clear UA for 3303 * 3304 * Resets and other operations can make an ATAPI device raise 3305 * UNIT ATTENTION which causes the next operation to fail. This 3306 * function clears UA. 3307 * 3308 * LOCKING: 3309 * EH context (may sleep). 3310 * 3311 * RETURNS: 3312 * 0 on success, -errno on failure. 3313 */ 3314 static int atapi_eh_clear_ua(struct ata_device *dev) 3315 { 3316 int i; 3317 3318 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 3319 u8 *sense_buffer = dev->link->ap->sector_buf; 3320 u8 sense_key = 0; 3321 unsigned int err_mask; 3322 3323 err_mask = atapi_eh_tur(dev, &sense_key); 3324 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 3325 ata_dev_warn(dev, 3326 "TEST_UNIT_READY failed (err_mask=0x%x)\n", 3327 err_mask); 3328 return -EIO; 3329 } 3330 3331 if (!err_mask || sense_key != UNIT_ATTENTION) 3332 return 0; 3333 3334 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 3335 if (err_mask) { 3336 ata_dev_warn(dev, "failed to clear " 3337 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 3338 return -EIO; 3339 } 3340 } 3341 3342 ata_dev_warn(dev, "UNIT ATTENTION persists after %d tries\n", 3343 ATA_EH_UA_TRIES); 3344 3345 return 0; 3346 } 3347 3348 /** 3349 * ata_eh_maybe_retry_flush - Retry FLUSH if necessary 3350 * @dev: ATA device which may need FLUSH retry 3351 * 3352 * If @dev failed FLUSH, it needs to be reported upper layer 3353 * immediately as it means that @dev failed to remap and already 3354 * lost at least a sector and further FLUSH retrials won't make 3355 * any difference to the lost sector. However, if FLUSH failed 3356 * for other reasons, for example transmission error, FLUSH needs 3357 * to be retried. 3358 * 3359 * This function determines whether FLUSH failure retry is 3360 * necessary and performs it if so. 3361 * 3362 * RETURNS: 3363 * 0 if EH can continue, -errno if EH needs to be repeated. 3364 */ 3365 static int ata_eh_maybe_retry_flush(struct ata_device *dev) 3366 { 3367 struct ata_link *link = dev->link; 3368 struct ata_port *ap = link->ap; 3369 struct ata_queued_cmd *qc; 3370 struct ata_taskfile tf; 3371 unsigned int err_mask; 3372 int rc = 0; 3373 3374 /* did flush fail for this device? */ 3375 if (!ata_tag_valid(link->active_tag)) 3376 return 0; 3377 3378 qc = __ata_qc_from_tag(ap, link->active_tag); 3379 if (qc->dev != dev || (qc->tf.command != ATA_CMD_FLUSH_EXT && 3380 qc->tf.command != ATA_CMD_FLUSH)) 3381 return 0; 3382 3383 /* if the device failed it, it should be reported to upper layers */ 3384 if (qc->err_mask & AC_ERR_DEV) 3385 return 0; 3386 3387 /* flush failed for some other reason, give it another shot */ 3388 ata_tf_init(dev, &tf); 3389 3390 tf.command = qc->tf.command; 3391 tf.flags |= ATA_TFLAG_DEVICE; 3392 tf.protocol = ATA_PROT_NODATA; 3393 3394 ata_dev_warn(dev, "retrying FLUSH 0x%x Emask 0x%x\n", 3395 tf.command, qc->err_mask); 3396 3397 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 3398 if (!err_mask) { 3399 /* 3400 * FLUSH is complete but there's no way to 3401 * successfully complete a failed command from EH. 3402 * Making sure retry is allowed at least once and 3403 * retrying it should do the trick - whatever was in 3404 * the cache is already on the platter and this won't 3405 * cause infinite loop. 3406 */ 3407 qc->scsicmd->allowed = max(qc->scsicmd->allowed, 1); 3408 } else { 3409 ata_dev_warn(dev, "FLUSH failed Emask 0x%x\n", 3410 err_mask); 3411 rc = -EIO; 3412 3413 /* if device failed it, report it to upper layers */ 3414 if (err_mask & AC_ERR_DEV) { 3415 qc->err_mask |= AC_ERR_DEV; 3416 qc->result_tf = tf; 3417 if (!(ap->pflags & ATA_PFLAG_FROZEN)) 3418 rc = 0; 3419 } 3420 } 3421 return rc; 3422 } 3423 3424 /** 3425 * ata_eh_set_lpm - configure SATA interface power management 3426 * @link: link to configure power management 3427 * @policy: the link power management policy 3428 * @r_failed_dev: out parameter for failed device 3429 * 3430 * Enable SATA Interface power management. This will enable 3431 * Device Interface Power Management (DIPM) for min_power and 3432 * medium_power_with_dipm policies, and then call driver specific 3433 * callbacks for enabling Host Initiated Power management. 3434 * 3435 * LOCKING: 3436 * EH context. 3437 * 3438 * RETURNS: 3439 * 0 on success, -errno on failure. 3440 */ 3441 static int ata_eh_set_lpm(struct ata_link *link, enum ata_lpm_policy policy, 3442 struct ata_device **r_failed_dev) 3443 { 3444 struct ata_port *ap = ata_is_host_link(link) ? link->ap : NULL; 3445 struct ata_eh_context *ehc = &link->eh_context; 3446 struct ata_device *dev, *link_dev = NULL, *lpm_dev = NULL; 3447 enum ata_lpm_policy old_policy = link->lpm_policy; 3448 bool no_dipm = link->ap->flags & ATA_FLAG_NO_DIPM; 3449 unsigned int hints = ATA_LPM_EMPTY | ATA_LPM_HIPM; 3450 unsigned int err_mask; 3451 int rc; 3452 3453 /* if the link or host doesn't do LPM, noop */ 3454 if ((link->flags & ATA_LFLAG_NO_LPM) || (ap && !ap->ops->set_lpm)) 3455 return 0; 3456 3457 /* 3458 * DIPM is enabled only for MIN_POWER as some devices 3459 * misbehave when the host NACKs transition to SLUMBER. Order 3460 * device and link configurations such that the host always 3461 * allows DIPM requests. 3462 */ 3463 ata_for_each_dev(dev, link, ENABLED) { 3464 bool hipm = ata_id_has_hipm(dev->id); 3465 bool dipm = ata_id_has_dipm(dev->id) && !no_dipm; 3466 3467 /* find the first enabled and LPM enabled devices */ 3468 if (!link_dev) 3469 link_dev = dev; 3470 3471 if (!lpm_dev && (hipm || dipm)) 3472 lpm_dev = dev; 3473 3474 hints &= ~ATA_LPM_EMPTY; 3475 if (!hipm) 3476 hints &= ~ATA_LPM_HIPM; 3477 3478 /* disable DIPM before changing link config */ 3479 if (policy < ATA_LPM_MED_POWER_WITH_DIPM && dipm) { 3480 err_mask = ata_dev_set_feature(dev, 3481 SETFEATURES_SATA_DISABLE, SATA_DIPM); 3482 if (err_mask && err_mask != AC_ERR_DEV) { 3483 ata_dev_warn(dev, 3484 "failed to disable DIPM, Emask 0x%x\n", 3485 err_mask); 3486 rc = -EIO; 3487 goto fail; 3488 } 3489 } 3490 } 3491 3492 if (ap) { 3493 rc = ap->ops->set_lpm(link, policy, hints); 3494 if (!rc && ap->slave_link) 3495 rc = ap->ops->set_lpm(ap->slave_link, policy, hints); 3496 } else 3497 rc = sata_pmp_set_lpm(link, policy, hints); 3498 3499 /* 3500 * Attribute link config failure to the first (LPM) enabled 3501 * device on the link. 3502 */ 3503 if (rc) { 3504 if (rc == -EOPNOTSUPP) { 3505 link->flags |= ATA_LFLAG_NO_LPM; 3506 return 0; 3507 } 3508 dev = lpm_dev ? lpm_dev : link_dev; 3509 goto fail; 3510 } 3511 3512 /* 3513 * Low level driver acked the transition. Issue DIPM command 3514 * with the new policy set. 3515 */ 3516 link->lpm_policy = policy; 3517 if (ap && ap->slave_link) 3518 ap->slave_link->lpm_policy = policy; 3519 3520 /* host config updated, enable DIPM if transitioning to MIN_POWER */ 3521 ata_for_each_dev(dev, link, ENABLED) { 3522 if (policy >= ATA_LPM_MED_POWER_WITH_DIPM && !no_dipm && 3523 ata_id_has_dipm(dev->id)) { 3524 err_mask = ata_dev_set_feature(dev, 3525 SETFEATURES_SATA_ENABLE, SATA_DIPM); 3526 if (err_mask && err_mask != AC_ERR_DEV) { 3527 ata_dev_warn(dev, 3528 "failed to enable DIPM, Emask 0x%x\n", 3529 err_mask); 3530 rc = -EIO; 3531 goto fail; 3532 } 3533 } 3534 } 3535 3536 link->last_lpm_change = jiffies; 3537 link->flags |= ATA_LFLAG_CHANGED; 3538 3539 return 0; 3540 3541 fail: 3542 /* restore the old policy */ 3543 link->lpm_policy = old_policy; 3544 if (ap && ap->slave_link) 3545 ap->slave_link->lpm_policy = old_policy; 3546 3547 /* if no device or only one more chance is left, disable LPM */ 3548 if (!dev || ehc->tries[dev->devno] <= 2) { 3549 ata_link_warn(link, "disabling LPM on the link\n"); 3550 link->flags |= ATA_LFLAG_NO_LPM; 3551 } 3552 if (r_failed_dev) 3553 *r_failed_dev = dev; 3554 return rc; 3555 } 3556 3557 int ata_link_nr_enabled(struct ata_link *link) 3558 { 3559 struct ata_device *dev; 3560 int cnt = 0; 3561 3562 ata_for_each_dev(dev, link, ENABLED) 3563 cnt++; 3564 return cnt; 3565 } 3566 3567 static int ata_link_nr_vacant(struct ata_link *link) 3568 { 3569 struct ata_device *dev; 3570 int cnt = 0; 3571 3572 ata_for_each_dev(dev, link, ALL) 3573 if (dev->class == ATA_DEV_UNKNOWN) 3574 cnt++; 3575 return cnt; 3576 } 3577 3578 static int ata_eh_skip_recovery(struct ata_link *link) 3579 { 3580 struct ata_port *ap = link->ap; 3581 struct ata_eh_context *ehc = &link->eh_context; 3582 struct ata_device *dev; 3583 3584 /* skip disabled links */ 3585 if (link->flags & ATA_LFLAG_DISABLED) 3586 return 1; 3587 3588 /* skip if explicitly requested */ 3589 if (ehc->i.flags & ATA_EHI_NO_RECOVERY) 3590 return 1; 3591 3592 /* thaw frozen port and recover failed devices */ 3593 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 3594 return 0; 3595 3596 /* reset at least once if reset is requested */ 3597 if ((ehc->i.action & ATA_EH_RESET) && 3598 !(ehc->i.flags & ATA_EHI_DID_RESET)) 3599 return 0; 3600 3601 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 3602 ata_for_each_dev(dev, link, ALL) { 3603 if (dev->class == ATA_DEV_UNKNOWN && 3604 ehc->classes[dev->devno] != ATA_DEV_NONE) 3605 return 0; 3606 } 3607 3608 return 1; 3609 } 3610 3611 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3612 { 3613 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3614 u64 now = get_jiffies_64(); 3615 int *trials = void_arg; 3616 3617 if ((ent->eflags & ATA_EFLAG_OLD_ER) || 3618 (ent->timestamp < now - min(now, interval))) 3619 return -1; 3620 3621 (*trials)++; 3622 return 0; 3623 } 3624 3625 static int ata_eh_schedule_probe(struct ata_device *dev) 3626 { 3627 struct ata_eh_context *ehc = &dev->link->eh_context; 3628 struct ata_link *link = ata_dev_phys_link(dev); 3629 int trials = 0; 3630 3631 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3632 (ehc->did_probe_mask & (1 << dev->devno))) 3633 return 0; 3634 3635 ata_eh_detach_dev(dev); 3636 ata_dev_init(dev); 3637 ehc->did_probe_mask |= (1 << dev->devno); 3638 ehc->i.action |= ATA_EH_RESET; 3639 ehc->saved_xfer_mode[dev->devno] = 0; 3640 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3641 3642 /* the link maybe in a deep sleep, wake it up */ 3643 if (link->lpm_policy > ATA_LPM_MAX_POWER) { 3644 if (ata_is_host_link(link)) 3645 link->ap->ops->set_lpm(link, ATA_LPM_MAX_POWER, 3646 ATA_LPM_EMPTY); 3647 else 3648 sata_pmp_set_lpm(link, ATA_LPM_MAX_POWER, 3649 ATA_LPM_EMPTY); 3650 } 3651 3652 /* Record and count probe trials on the ering. The specific 3653 * error mask used is irrelevant. Because a successful device 3654 * detection clears the ering, this count accumulates only if 3655 * there are consecutive failed probes. 3656 * 3657 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3658 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3659 * forced to 1.5Gbps. 3660 * 3661 * This is to work around cases where failed link speed 3662 * negotiation results in device misdetection leading to 3663 * infinite DEVXCHG or PHRDY CHG events. 3664 */ 3665 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3666 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3667 3668 if (trials > ATA_EH_PROBE_TRIALS) 3669 sata_down_spd_limit(link, 1); 3670 3671 return 1; 3672 } 3673 3674 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3675 { 3676 struct ata_eh_context *ehc = &dev->link->eh_context; 3677 3678 /* -EAGAIN from EH routine indicates retry without prejudice. 3679 * The requester is responsible for ensuring forward progress. 3680 */ 3681 if (err != -EAGAIN) 3682 ehc->tries[dev->devno]--; 3683 3684 switch (err) { 3685 case -ENODEV: 3686 /* device missing or wrong IDENTIFY data, schedule probing */ 3687 ehc->i.probe_mask |= (1 << dev->devno); 3688 /* fall through */ 3689 case -EINVAL: 3690 /* give it just one more chance */ 3691 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3692 /* fall through */ 3693 case -EIO: 3694 if (ehc->tries[dev->devno] == 1) { 3695 /* This is the last chance, better to slow 3696 * down than lose it. 3697 */ 3698 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3699 if (dev->pio_mode > XFER_PIO_0) 3700 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3701 } 3702 } 3703 3704 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3705 /* disable device if it has used up all its chances */ 3706 ata_dev_disable(dev); 3707 3708 /* detach if offline */ 3709 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3710 ata_eh_detach_dev(dev); 3711 3712 /* schedule probe if necessary */ 3713 if (ata_eh_schedule_probe(dev)) { 3714 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3715 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3716 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3717 } 3718 3719 return 1; 3720 } else { 3721 ehc->i.action |= ATA_EH_RESET; 3722 return 0; 3723 } 3724 } 3725 3726 /** 3727 * ata_eh_recover - recover host port after error 3728 * @ap: host port to recover 3729 * @prereset: prereset method (can be NULL) 3730 * @softreset: softreset method (can be NULL) 3731 * @hardreset: hardreset method (can be NULL) 3732 * @postreset: postreset method (can be NULL) 3733 * @r_failed_link: out parameter for failed link 3734 * 3735 * This is the alpha and omega, eum and yang, heart and soul of 3736 * libata exception handling. On entry, actions required to 3737 * recover each link and hotplug requests are recorded in the 3738 * link's eh_context. This function executes all the operations 3739 * with appropriate retrials and fallbacks to resurrect failed 3740 * devices, detach goners and greet newcomers. 3741 * 3742 * LOCKING: 3743 * Kernel thread context (may sleep). 3744 * 3745 * RETURNS: 3746 * 0 on success, -errno on failure. 3747 */ 3748 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3749 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3750 ata_postreset_fn_t postreset, 3751 struct ata_link **r_failed_link) 3752 { 3753 struct ata_link *link; 3754 struct ata_device *dev; 3755 int rc, nr_fails; 3756 unsigned long flags, deadline; 3757 3758 DPRINTK("ENTER\n"); 3759 3760 /* prep for recovery */ 3761 ata_for_each_link(link, ap, EDGE) { 3762 struct ata_eh_context *ehc = &link->eh_context; 3763 3764 /* re-enable link? */ 3765 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3766 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3767 spin_lock_irqsave(ap->lock, flags); 3768 link->flags &= ~ATA_LFLAG_DISABLED; 3769 spin_unlock_irqrestore(ap->lock, flags); 3770 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3771 } 3772 3773 ata_for_each_dev(dev, link, ALL) { 3774 if (link->flags & ATA_LFLAG_NO_RETRY) 3775 ehc->tries[dev->devno] = 1; 3776 else 3777 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3778 3779 /* collect port action mask recorded in dev actions */ 3780 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3781 ~ATA_EH_PERDEV_MASK; 3782 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3783 3784 /* process hotplug request */ 3785 if (dev->flags & ATA_DFLAG_DETACH) 3786 ata_eh_detach_dev(dev); 3787 3788 /* schedule probe if necessary */ 3789 if (!ata_dev_enabled(dev)) 3790 ata_eh_schedule_probe(dev); 3791 } 3792 } 3793 3794 retry: 3795 rc = 0; 3796 3797 /* if UNLOADING, finish immediately */ 3798 if (ap->pflags & ATA_PFLAG_UNLOADING) 3799 goto out; 3800 3801 /* prep for EH */ 3802 ata_for_each_link(link, ap, EDGE) { 3803 struct ata_eh_context *ehc = &link->eh_context; 3804 3805 /* skip EH if possible. */ 3806 if (ata_eh_skip_recovery(link)) 3807 ehc->i.action = 0; 3808 3809 ata_for_each_dev(dev, link, ALL) 3810 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3811 } 3812 3813 /* reset */ 3814 ata_for_each_link(link, ap, EDGE) { 3815 struct ata_eh_context *ehc = &link->eh_context; 3816 3817 if (!(ehc->i.action & ATA_EH_RESET)) 3818 continue; 3819 3820 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3821 prereset, softreset, hardreset, postreset); 3822 if (rc) { 3823 ata_link_err(link, "reset failed, giving up\n"); 3824 goto out; 3825 } 3826 } 3827 3828 do { 3829 unsigned long now; 3830 3831 /* 3832 * clears ATA_EH_PARK in eh_info and resets 3833 * ap->park_req_pending 3834 */ 3835 ata_eh_pull_park_action(ap); 3836 3837 deadline = jiffies; 3838 ata_for_each_link(link, ap, EDGE) { 3839 ata_for_each_dev(dev, link, ALL) { 3840 struct ata_eh_context *ehc = &link->eh_context; 3841 unsigned long tmp; 3842 3843 if (dev->class != ATA_DEV_ATA && 3844 dev->class != ATA_DEV_ZAC) 3845 continue; 3846 if (!(ehc->i.dev_action[dev->devno] & 3847 ATA_EH_PARK)) 3848 continue; 3849 tmp = dev->unpark_deadline; 3850 if (time_before(deadline, tmp)) 3851 deadline = tmp; 3852 else if (time_before_eq(tmp, jiffies)) 3853 continue; 3854 if (ehc->unloaded_mask & (1 << dev->devno)) 3855 continue; 3856 3857 ata_eh_park_issue_cmd(dev, 1); 3858 } 3859 } 3860 3861 now = jiffies; 3862 if (time_before_eq(deadline, now)) 3863 break; 3864 3865 ata_eh_release(ap); 3866 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3867 deadline - now); 3868 ata_eh_acquire(ap); 3869 } while (deadline); 3870 ata_for_each_link(link, ap, EDGE) { 3871 ata_for_each_dev(dev, link, ALL) { 3872 if (!(link->eh_context.unloaded_mask & 3873 (1 << dev->devno))) 3874 continue; 3875 3876 ata_eh_park_issue_cmd(dev, 0); 3877 ata_eh_done(link, dev, ATA_EH_PARK); 3878 } 3879 } 3880 3881 /* the rest */ 3882 nr_fails = 0; 3883 ata_for_each_link(link, ap, PMP_FIRST) { 3884 struct ata_eh_context *ehc = &link->eh_context; 3885 3886 if (sata_pmp_attached(ap) && ata_is_host_link(link)) 3887 goto config_lpm; 3888 3889 /* revalidate existing devices and attach new ones */ 3890 rc = ata_eh_revalidate_and_attach(link, &dev); 3891 if (rc) 3892 goto rest_fail; 3893 3894 /* if PMP got attached, return, pmp EH will take care of it */ 3895 if (link->device->class == ATA_DEV_PMP) { 3896 ehc->i.action = 0; 3897 return 0; 3898 } 3899 3900 /* configure transfer mode if necessary */ 3901 if (ehc->i.flags & ATA_EHI_SETMODE) { 3902 rc = ata_set_mode(link, &dev); 3903 if (rc) 3904 goto rest_fail; 3905 ehc->i.flags &= ~ATA_EHI_SETMODE; 3906 } 3907 3908 /* If reset has been issued, clear UA to avoid 3909 * disrupting the current users of the device. 3910 */ 3911 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3912 ata_for_each_dev(dev, link, ALL) { 3913 if (dev->class != ATA_DEV_ATAPI) 3914 continue; 3915 rc = atapi_eh_clear_ua(dev); 3916 if (rc) 3917 goto rest_fail; 3918 if (zpodd_dev_enabled(dev)) 3919 zpodd_post_poweron(dev); 3920 } 3921 } 3922 3923 /* retry flush if necessary */ 3924 ata_for_each_dev(dev, link, ALL) { 3925 if (dev->class != ATA_DEV_ATA && 3926 dev->class != ATA_DEV_ZAC) 3927 continue; 3928 rc = ata_eh_maybe_retry_flush(dev); 3929 if (rc) 3930 goto rest_fail; 3931 } 3932 3933 config_lpm: 3934 /* configure link power saving */ 3935 if (link->lpm_policy != ap->target_lpm_policy) { 3936 rc = ata_eh_set_lpm(link, ap->target_lpm_policy, &dev); 3937 if (rc) 3938 goto rest_fail; 3939 } 3940 3941 /* this link is okay now */ 3942 ehc->i.flags = 0; 3943 continue; 3944 3945 rest_fail: 3946 nr_fails++; 3947 if (dev) 3948 ata_eh_handle_dev_fail(dev, rc); 3949 3950 if (ap->pflags & ATA_PFLAG_FROZEN) { 3951 /* PMP reset requires working host port. 3952 * Can't retry if it's frozen. 3953 */ 3954 if (sata_pmp_attached(ap)) 3955 goto out; 3956 break; 3957 } 3958 } 3959 3960 if (nr_fails) 3961 goto retry; 3962 3963 out: 3964 if (rc && r_failed_link) 3965 *r_failed_link = link; 3966 3967 DPRINTK("EXIT, rc=%d\n", rc); 3968 return rc; 3969 } 3970 3971 /** 3972 * ata_eh_finish - finish up EH 3973 * @ap: host port to finish EH for 3974 * 3975 * Recovery is complete. Clean up EH states and retry or finish 3976 * failed qcs. 3977 * 3978 * LOCKING: 3979 * None. 3980 */ 3981 void ata_eh_finish(struct ata_port *ap) 3982 { 3983 struct ata_queued_cmd *qc; 3984 int tag; 3985 3986 /* retry or finish qcs */ 3987 ata_qc_for_each_raw(ap, qc, tag) { 3988 if (!(qc->flags & ATA_QCFLAG_FAILED)) 3989 continue; 3990 3991 if (qc->err_mask) { 3992 /* FIXME: Once EH migration is complete, 3993 * generate sense data in this function, 3994 * considering both err_mask and tf. 3995 */ 3996 if (qc->flags & ATA_QCFLAG_RETRY) 3997 ata_eh_qc_retry(qc); 3998 else 3999 ata_eh_qc_complete(qc); 4000 } else { 4001 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 4002 ata_eh_qc_complete(qc); 4003 } else { 4004 /* feed zero TF to sense generation */ 4005 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 4006 ata_eh_qc_retry(qc); 4007 } 4008 } 4009 } 4010 4011 /* make sure nr_active_links is zero after EH */ 4012 WARN_ON(ap->nr_active_links); 4013 ap->nr_active_links = 0; 4014 } 4015 4016 /** 4017 * ata_do_eh - do standard error handling 4018 * @ap: host port to handle error for 4019 * 4020 * @prereset: prereset method (can be NULL) 4021 * @softreset: softreset method (can be NULL) 4022 * @hardreset: hardreset method (can be NULL) 4023 * @postreset: postreset method (can be NULL) 4024 * 4025 * Perform standard error handling sequence. 4026 * 4027 * LOCKING: 4028 * Kernel thread context (may sleep). 4029 */ 4030 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 4031 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 4032 ata_postreset_fn_t postreset) 4033 { 4034 struct ata_device *dev; 4035 int rc; 4036 4037 ata_eh_autopsy(ap); 4038 ata_eh_report(ap); 4039 4040 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 4041 NULL); 4042 if (rc) { 4043 ata_for_each_dev(dev, &ap->link, ALL) 4044 ata_dev_disable(dev); 4045 } 4046 4047 ata_eh_finish(ap); 4048 } 4049 4050 /** 4051 * ata_std_error_handler - standard error handler 4052 * @ap: host port to handle error for 4053 * 4054 * Standard error handler 4055 * 4056 * LOCKING: 4057 * Kernel thread context (may sleep). 4058 */ 4059 void ata_std_error_handler(struct ata_port *ap) 4060 { 4061 struct ata_port_operations *ops = ap->ops; 4062 ata_reset_fn_t hardreset = ops->hardreset; 4063 4064 /* ignore built-in hardreset if SCR access is not available */ 4065 if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link)) 4066 hardreset = NULL; 4067 4068 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 4069 } 4070 4071 #ifdef CONFIG_PM 4072 /** 4073 * ata_eh_handle_port_suspend - perform port suspend operation 4074 * @ap: port to suspend 4075 * 4076 * Suspend @ap. 4077 * 4078 * LOCKING: 4079 * Kernel thread context (may sleep). 4080 */ 4081 static void ata_eh_handle_port_suspend(struct ata_port *ap) 4082 { 4083 unsigned long flags; 4084 int rc = 0; 4085 struct ata_device *dev; 4086 4087 /* are we suspending? */ 4088 spin_lock_irqsave(ap->lock, flags); 4089 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4090 ap->pm_mesg.event & PM_EVENT_RESUME) { 4091 spin_unlock_irqrestore(ap->lock, flags); 4092 return; 4093 } 4094 spin_unlock_irqrestore(ap->lock, flags); 4095 4096 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 4097 4098 /* 4099 * If we have a ZPODD attached, check its zero 4100 * power ready status before the port is frozen. 4101 * Only needed for runtime suspend. 4102 */ 4103 if (PMSG_IS_AUTO(ap->pm_mesg)) { 4104 ata_for_each_dev(dev, &ap->link, ENABLED) { 4105 if (zpodd_dev_enabled(dev)) 4106 zpodd_on_suspend(dev); 4107 } 4108 } 4109 4110 /* tell ACPI we're suspending */ 4111 rc = ata_acpi_on_suspend(ap); 4112 if (rc) 4113 goto out; 4114 4115 /* suspend */ 4116 ata_eh_freeze_port(ap); 4117 4118 if (ap->ops->port_suspend) 4119 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 4120 4121 ata_acpi_set_state(ap, ap->pm_mesg); 4122 out: 4123 /* update the flags */ 4124 spin_lock_irqsave(ap->lock, flags); 4125 4126 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 4127 if (rc == 0) 4128 ap->pflags |= ATA_PFLAG_SUSPENDED; 4129 else if (ap->pflags & ATA_PFLAG_FROZEN) 4130 ata_port_schedule_eh(ap); 4131 4132 spin_unlock_irqrestore(ap->lock, flags); 4133 4134 return; 4135 } 4136 4137 /** 4138 * ata_eh_handle_port_resume - perform port resume operation 4139 * @ap: port to resume 4140 * 4141 * Resume @ap. 4142 * 4143 * LOCKING: 4144 * Kernel thread context (may sleep). 4145 */ 4146 static void ata_eh_handle_port_resume(struct ata_port *ap) 4147 { 4148 struct ata_link *link; 4149 struct ata_device *dev; 4150 unsigned long flags; 4151 4152 /* are we resuming? */ 4153 spin_lock_irqsave(ap->lock, flags); 4154 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 4155 !(ap->pm_mesg.event & PM_EVENT_RESUME)) { 4156 spin_unlock_irqrestore(ap->lock, flags); 4157 return; 4158 } 4159 spin_unlock_irqrestore(ap->lock, flags); 4160 4161 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 4162 4163 /* 4164 * Error timestamps are in jiffies which doesn't run while 4165 * suspended and PHY events during resume isn't too uncommon. 4166 * When the two are combined, it can lead to unnecessary speed 4167 * downs if the machine is suspended and resumed repeatedly. 4168 * Clear error history. 4169 */ 4170 ata_for_each_link(link, ap, HOST_FIRST) 4171 ata_for_each_dev(dev, link, ALL) 4172 ata_ering_clear(&dev->ering); 4173 4174 ata_acpi_set_state(ap, ap->pm_mesg); 4175 4176 if (ap->ops->port_resume) 4177 ap->ops->port_resume(ap); 4178 4179 /* tell ACPI that we're resuming */ 4180 ata_acpi_on_resume(ap); 4181 4182 /* update the flags */ 4183 spin_lock_irqsave(ap->lock, flags); 4184 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 4185 spin_unlock_irqrestore(ap->lock, flags); 4186 } 4187 #endif /* CONFIG_PM */ 4188