1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/blkdev.h> 37 #include <linux/pci.h> 38 #include <scsi/scsi.h> 39 #include <scsi/scsi_host.h> 40 #include <scsi/scsi_eh.h> 41 #include <scsi/scsi_device.h> 42 #include <scsi/scsi_cmnd.h> 43 #include "../scsi/scsi_transport_api.h" 44 45 #include <linux/libata.h> 46 47 #include "libata.h" 48 49 enum { 50 /* speed down verdicts */ 51 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 52 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 53 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 54 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 55 56 /* error flags */ 57 ATA_EFLAG_IS_IO = (1 << 0), 58 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 59 60 /* error categories */ 61 ATA_ECAT_NONE = 0, 62 ATA_ECAT_ATA_BUS = 1, 63 ATA_ECAT_TOUT_HSM = 2, 64 ATA_ECAT_UNK_DEV = 3, 65 ATA_ECAT_DUBIOUS_NONE = 4, 66 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 67 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 68 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 69 ATA_ECAT_NR = 8, 70 71 ATA_EH_CMD_DFL_TIMEOUT = 5000, 72 73 /* always put at least this amount of time between resets */ 74 ATA_EH_RESET_COOL_DOWN = 5000, 75 76 /* Waiting in ->prereset can never be reliable. It's 77 * sometimes nice to wait there but it can't be depended upon; 78 * otherwise, we wouldn't be resetting. Just give it enough 79 * time for most drives to spin up. 80 */ 81 ATA_EH_PRERESET_TIMEOUT = 10000, 82 ATA_EH_FASTDRAIN_INTERVAL = 3000, 83 84 ATA_EH_UA_TRIES = 5, 85 86 /* probe speed down parameters, see ata_eh_schedule_probe() */ 87 ATA_EH_PROBE_TRIAL_INTERVAL = 60000, /* 1 min */ 88 ATA_EH_PROBE_TRIALS = 2, 89 }; 90 91 /* The following table determines how we sequence resets. Each entry 92 * represents timeout for that try. The first try can be soft or 93 * hardreset. All others are hardreset if available. In most cases 94 * the first reset w/ 10sec timeout should succeed. Following entries 95 * are mostly for error handling, hotplug and retarded devices. 96 */ 97 static const unsigned long ata_eh_reset_timeouts[] = { 98 10000, /* most drives spin up by 10sec */ 99 10000, /* > 99% working drives spin up before 20sec */ 100 35000, /* give > 30 secs of idleness for retarded devices */ 101 5000, /* and sweet one last chance */ 102 ULONG_MAX, /* > 1 min has elapsed, give up */ 103 }; 104 105 static const unsigned long ata_eh_identify_timeouts[] = { 106 5000, /* covers > 99% of successes and not too boring on failures */ 107 10000, /* combined time till here is enough even for media access */ 108 30000, /* for true idiots */ 109 ULONG_MAX, 110 }; 111 112 static const unsigned long ata_eh_other_timeouts[] = { 113 5000, /* same rationale as identify timeout */ 114 10000, /* ditto */ 115 /* but no merciful 30sec for other commands, it just isn't worth it */ 116 ULONG_MAX, 117 }; 118 119 struct ata_eh_cmd_timeout_ent { 120 const u8 *commands; 121 const unsigned long *timeouts; 122 }; 123 124 /* The following table determines timeouts to use for EH internal 125 * commands. Each table entry is a command class and matches the 126 * commands the entry applies to and the timeout table to use. 127 * 128 * On the retry after a command timed out, the next timeout value from 129 * the table is used. If the table doesn't contain further entries, 130 * the last value is used. 131 * 132 * ehc->cmd_timeout_idx keeps track of which timeout to use per 133 * command class, so if SET_FEATURES times out on the first try, the 134 * next try will use the second timeout value only for that class. 135 */ 136 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 137 static const struct ata_eh_cmd_timeout_ent 138 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 139 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 140 .timeouts = ata_eh_identify_timeouts, }, 141 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 142 .timeouts = ata_eh_other_timeouts, }, 143 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 144 .timeouts = ata_eh_other_timeouts, }, 145 { .commands = CMDS(ATA_CMD_SET_FEATURES), 146 .timeouts = ata_eh_other_timeouts, }, 147 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 148 .timeouts = ata_eh_other_timeouts, }, 149 }; 150 #undef CMDS 151 152 static void __ata_port_freeze(struct ata_port *ap); 153 #ifdef CONFIG_PM 154 static void ata_eh_handle_port_suspend(struct ata_port *ap); 155 static void ata_eh_handle_port_resume(struct ata_port *ap); 156 #else /* CONFIG_PM */ 157 static void ata_eh_handle_port_suspend(struct ata_port *ap) 158 { } 159 160 static void ata_eh_handle_port_resume(struct ata_port *ap) 161 { } 162 #endif /* CONFIG_PM */ 163 164 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, 165 va_list args) 166 { 167 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 168 ATA_EH_DESC_LEN - ehi->desc_len, 169 fmt, args); 170 } 171 172 /** 173 * __ata_ehi_push_desc - push error description without adding separator 174 * @ehi: target EHI 175 * @fmt: printf format string 176 * 177 * Format string according to @fmt and append it to @ehi->desc. 178 * 179 * LOCKING: 180 * spin_lock_irqsave(host lock) 181 */ 182 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 183 { 184 va_list args; 185 186 va_start(args, fmt); 187 __ata_ehi_pushv_desc(ehi, fmt, args); 188 va_end(args); 189 } 190 191 /** 192 * ata_ehi_push_desc - push error description with separator 193 * @ehi: target EHI 194 * @fmt: printf format string 195 * 196 * Format string according to @fmt and append it to @ehi->desc. 197 * If @ehi->desc is not empty, ", " is added in-between. 198 * 199 * LOCKING: 200 * spin_lock_irqsave(host lock) 201 */ 202 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 203 { 204 va_list args; 205 206 if (ehi->desc_len) 207 __ata_ehi_push_desc(ehi, ", "); 208 209 va_start(args, fmt); 210 __ata_ehi_pushv_desc(ehi, fmt, args); 211 va_end(args); 212 } 213 214 /** 215 * ata_ehi_clear_desc - clean error description 216 * @ehi: target EHI 217 * 218 * Clear @ehi->desc. 219 * 220 * LOCKING: 221 * spin_lock_irqsave(host lock) 222 */ 223 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 224 { 225 ehi->desc[0] = '\0'; 226 ehi->desc_len = 0; 227 } 228 229 /** 230 * ata_port_desc - append port description 231 * @ap: target ATA port 232 * @fmt: printf format string 233 * 234 * Format string according to @fmt and append it to port 235 * description. If port description is not empty, " " is added 236 * in-between. This function is to be used while initializing 237 * ata_host. The description is printed on host registration. 238 * 239 * LOCKING: 240 * None. 241 */ 242 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 243 { 244 va_list args; 245 246 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 247 248 if (ap->link.eh_info.desc_len) 249 __ata_ehi_push_desc(&ap->link.eh_info, " "); 250 251 va_start(args, fmt); 252 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 253 va_end(args); 254 } 255 256 #ifdef CONFIG_PCI 257 258 /** 259 * ata_port_pbar_desc - append PCI BAR description 260 * @ap: target ATA port 261 * @bar: target PCI BAR 262 * @offset: offset into PCI BAR 263 * @name: name of the area 264 * 265 * If @offset is negative, this function formats a string which 266 * contains the name, address, size and type of the BAR and 267 * appends it to the port description. If @offset is zero or 268 * positive, only name and offsetted address is appended. 269 * 270 * LOCKING: 271 * None. 272 */ 273 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 274 const char *name) 275 { 276 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 277 char *type = ""; 278 unsigned long long start, len; 279 280 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 281 type = "m"; 282 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 283 type = "i"; 284 285 start = (unsigned long long)pci_resource_start(pdev, bar); 286 len = (unsigned long long)pci_resource_len(pdev, bar); 287 288 if (offset < 0) 289 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 290 else 291 ata_port_desc(ap, "%s 0x%llx", name, 292 start + (unsigned long long)offset); 293 } 294 295 #endif /* CONFIG_PCI */ 296 297 static int ata_lookup_timeout_table(u8 cmd) 298 { 299 int i; 300 301 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 302 const u8 *cur; 303 304 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 305 if (*cur == cmd) 306 return i; 307 } 308 309 return -1; 310 } 311 312 /** 313 * ata_internal_cmd_timeout - determine timeout for an internal command 314 * @dev: target device 315 * @cmd: internal command to be issued 316 * 317 * Determine timeout for internal command @cmd for @dev. 318 * 319 * LOCKING: 320 * EH context. 321 * 322 * RETURNS: 323 * Determined timeout. 324 */ 325 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 326 { 327 struct ata_eh_context *ehc = &dev->link->eh_context; 328 int ent = ata_lookup_timeout_table(cmd); 329 int idx; 330 331 if (ent < 0) 332 return ATA_EH_CMD_DFL_TIMEOUT; 333 334 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 335 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 336 } 337 338 /** 339 * ata_internal_cmd_timed_out - notification for internal command timeout 340 * @dev: target device 341 * @cmd: internal command which timed out 342 * 343 * Notify EH that internal command @cmd for @dev timed out. This 344 * function should be called only for commands whose timeouts are 345 * determined using ata_internal_cmd_timeout(). 346 * 347 * LOCKING: 348 * EH context. 349 */ 350 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 351 { 352 struct ata_eh_context *ehc = &dev->link->eh_context; 353 int ent = ata_lookup_timeout_table(cmd); 354 int idx; 355 356 if (ent < 0) 357 return; 358 359 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 360 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 361 ehc->cmd_timeout_idx[dev->devno][ent]++; 362 } 363 364 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 365 unsigned int err_mask) 366 { 367 struct ata_ering_entry *ent; 368 369 WARN_ON(!err_mask); 370 371 ering->cursor++; 372 ering->cursor %= ATA_ERING_SIZE; 373 374 ent = &ering->ring[ering->cursor]; 375 ent->eflags = eflags; 376 ent->err_mask = err_mask; 377 ent->timestamp = get_jiffies_64(); 378 } 379 380 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 381 { 382 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 383 384 if (ent->err_mask) 385 return ent; 386 return NULL; 387 } 388 389 static void ata_ering_clear(struct ata_ering *ering) 390 { 391 memset(ering, 0, sizeof(*ering)); 392 } 393 394 static int ata_ering_map(struct ata_ering *ering, 395 int (*map_fn)(struct ata_ering_entry *, void *), 396 void *arg) 397 { 398 int idx, rc = 0; 399 struct ata_ering_entry *ent; 400 401 idx = ering->cursor; 402 do { 403 ent = &ering->ring[idx]; 404 if (!ent->err_mask) 405 break; 406 rc = map_fn(ent, arg); 407 if (rc) 408 break; 409 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 410 } while (idx != ering->cursor); 411 412 return rc; 413 } 414 415 static unsigned int ata_eh_dev_action(struct ata_device *dev) 416 { 417 struct ata_eh_context *ehc = &dev->link->eh_context; 418 419 return ehc->i.action | ehc->i.dev_action[dev->devno]; 420 } 421 422 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 423 struct ata_eh_info *ehi, unsigned int action) 424 { 425 struct ata_device *tdev; 426 427 if (!dev) { 428 ehi->action &= ~action; 429 ata_for_each_dev(tdev, link, ALL) 430 ehi->dev_action[tdev->devno] &= ~action; 431 } else { 432 /* doesn't make sense for port-wide EH actions */ 433 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 434 435 /* break ehi->action into ehi->dev_action */ 436 if (ehi->action & action) { 437 ata_for_each_dev(tdev, link, ALL) 438 ehi->dev_action[tdev->devno] |= 439 ehi->action & action; 440 ehi->action &= ~action; 441 } 442 443 /* turn off the specified per-dev action */ 444 ehi->dev_action[dev->devno] &= ~action; 445 } 446 } 447 448 /** 449 * ata_scsi_timed_out - SCSI layer time out callback 450 * @cmd: timed out SCSI command 451 * 452 * Handles SCSI layer timeout. We race with normal completion of 453 * the qc for @cmd. If the qc is already gone, we lose and let 454 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 455 * timed out and EH should be invoked. Prevent ata_qc_complete() 456 * from finishing it by setting EH_SCHEDULED and return 457 * EH_NOT_HANDLED. 458 * 459 * TODO: kill this function once old EH is gone. 460 * 461 * LOCKING: 462 * Called from timer context 463 * 464 * RETURNS: 465 * EH_HANDLED or EH_NOT_HANDLED 466 */ 467 enum blk_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 468 { 469 struct Scsi_Host *host = cmd->device->host; 470 struct ata_port *ap = ata_shost_to_port(host); 471 unsigned long flags; 472 struct ata_queued_cmd *qc; 473 enum blk_eh_timer_return ret; 474 475 DPRINTK("ENTER\n"); 476 477 if (ap->ops->error_handler) { 478 ret = BLK_EH_NOT_HANDLED; 479 goto out; 480 } 481 482 ret = BLK_EH_HANDLED; 483 spin_lock_irqsave(ap->lock, flags); 484 qc = ata_qc_from_tag(ap, ap->link.active_tag); 485 if (qc) { 486 WARN_ON(qc->scsicmd != cmd); 487 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 488 qc->err_mask |= AC_ERR_TIMEOUT; 489 ret = BLK_EH_NOT_HANDLED; 490 } 491 spin_unlock_irqrestore(ap->lock, flags); 492 493 out: 494 DPRINTK("EXIT, ret=%d\n", ret); 495 return ret; 496 } 497 498 static void ata_eh_unload(struct ata_port *ap) 499 { 500 struct ata_link *link; 501 struct ata_device *dev; 502 unsigned long flags; 503 504 /* Restore SControl IPM and SPD for the next driver and 505 * disable attached devices. 506 */ 507 ata_for_each_link(link, ap, PMP_FIRST) { 508 sata_scr_write(link, SCR_CONTROL, link->saved_scontrol & 0xff0); 509 ata_for_each_dev(dev, link, ALL) 510 ata_dev_disable(dev); 511 } 512 513 /* freeze and set UNLOADED */ 514 spin_lock_irqsave(ap->lock, flags); 515 516 ata_port_freeze(ap); /* won't be thawed */ 517 ap->pflags &= ~ATA_PFLAG_EH_PENDING; /* clear pending from freeze */ 518 ap->pflags |= ATA_PFLAG_UNLOADED; 519 520 spin_unlock_irqrestore(ap->lock, flags); 521 } 522 523 /** 524 * ata_scsi_error - SCSI layer error handler callback 525 * @host: SCSI host on which error occurred 526 * 527 * Handles SCSI-layer-thrown error events. 528 * 529 * LOCKING: 530 * Inherited from SCSI layer (none, can sleep) 531 * 532 * RETURNS: 533 * Zero. 534 */ 535 void ata_scsi_error(struct Scsi_Host *host) 536 { 537 struct ata_port *ap = ata_shost_to_port(host); 538 int i; 539 unsigned long flags; 540 541 DPRINTK("ENTER\n"); 542 543 /* synchronize with port task */ 544 ata_port_flush_task(ap); 545 546 /* synchronize with host lock and sort out timeouts */ 547 548 /* For new EH, all qcs are finished in one of three ways - 549 * normal completion, error completion, and SCSI timeout. 550 * Both completions can race against SCSI timeout. When normal 551 * completion wins, the qc never reaches EH. When error 552 * completion wins, the qc has ATA_QCFLAG_FAILED set. 553 * 554 * When SCSI timeout wins, things are a bit more complex. 555 * Normal or error completion can occur after the timeout but 556 * before this point. In such cases, both types of 557 * completions are honored. A scmd is determined to have 558 * timed out iff its associated qc is active and not failed. 559 */ 560 if (ap->ops->error_handler) { 561 struct scsi_cmnd *scmd, *tmp; 562 int nr_timedout = 0; 563 564 spin_lock_irqsave(ap->lock, flags); 565 566 /* This must occur under the ap->lock as we don't want 567 a polled recovery to race the real interrupt handler 568 569 The lost_interrupt handler checks for any completed but 570 non-notified command and completes much like an IRQ handler. 571 572 We then fall into the error recovery code which will treat 573 this as if normal completion won the race */ 574 575 if (ap->ops->lost_interrupt) 576 ap->ops->lost_interrupt(ap); 577 578 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 579 struct ata_queued_cmd *qc; 580 581 for (i = 0; i < ATA_MAX_QUEUE; i++) { 582 qc = __ata_qc_from_tag(ap, i); 583 if (qc->flags & ATA_QCFLAG_ACTIVE && 584 qc->scsicmd == scmd) 585 break; 586 } 587 588 if (i < ATA_MAX_QUEUE) { 589 /* the scmd has an associated qc */ 590 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 591 /* which hasn't failed yet, timeout */ 592 qc->err_mask |= AC_ERR_TIMEOUT; 593 qc->flags |= ATA_QCFLAG_FAILED; 594 nr_timedout++; 595 } 596 } else { 597 /* Normal completion occurred after 598 * SCSI timeout but before this point. 599 * Successfully complete it. 600 */ 601 scmd->retries = scmd->allowed; 602 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 603 } 604 } 605 606 /* If we have timed out qcs. They belong to EH from 607 * this point but the state of the controller is 608 * unknown. Freeze the port to make sure the IRQ 609 * handler doesn't diddle with those qcs. This must 610 * be done atomically w.r.t. setting QCFLAG_FAILED. 611 */ 612 if (nr_timedout) 613 __ata_port_freeze(ap); 614 615 spin_unlock_irqrestore(ap->lock, flags); 616 617 /* initialize eh_tries */ 618 ap->eh_tries = ATA_EH_MAX_TRIES; 619 } else 620 spin_unlock_wait(ap->lock); 621 622 /* If we timed raced normal completion and there is nothing to 623 recover nr_timedout == 0 why exactly are we doing error recovery ? */ 624 625 repeat: 626 /* invoke error handler */ 627 if (ap->ops->error_handler) { 628 struct ata_link *link; 629 630 /* kill fast drain timer */ 631 del_timer_sync(&ap->fastdrain_timer); 632 633 /* process port resume request */ 634 ata_eh_handle_port_resume(ap); 635 636 /* fetch & clear EH info */ 637 spin_lock_irqsave(ap->lock, flags); 638 639 ata_for_each_link(link, ap, HOST_FIRST) { 640 struct ata_eh_context *ehc = &link->eh_context; 641 struct ata_device *dev; 642 643 memset(&link->eh_context, 0, sizeof(link->eh_context)); 644 link->eh_context.i = link->eh_info; 645 memset(&link->eh_info, 0, sizeof(link->eh_info)); 646 647 ata_for_each_dev(dev, link, ENABLED) { 648 int devno = dev->devno; 649 650 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 651 if (ata_ncq_enabled(dev)) 652 ehc->saved_ncq_enabled |= 1 << devno; 653 } 654 } 655 656 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 657 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 658 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 659 660 spin_unlock_irqrestore(ap->lock, flags); 661 662 /* invoke EH, skip if unloading or suspended */ 663 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 664 ap->ops->error_handler(ap); 665 else { 666 /* if unloading, commence suicide */ 667 if ((ap->pflags & ATA_PFLAG_UNLOADING) && 668 !(ap->pflags & ATA_PFLAG_UNLOADED)) 669 ata_eh_unload(ap); 670 ata_eh_finish(ap); 671 } 672 673 /* process port suspend request */ 674 ata_eh_handle_port_suspend(ap); 675 676 /* Exception might have happend after ->error_handler 677 * recovered the port but before this point. Repeat 678 * EH in such case. 679 */ 680 spin_lock_irqsave(ap->lock, flags); 681 682 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 683 if (--ap->eh_tries) { 684 spin_unlock_irqrestore(ap->lock, flags); 685 goto repeat; 686 } 687 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 688 "tries, giving up\n", ATA_EH_MAX_TRIES); 689 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 690 } 691 692 /* this run is complete, make sure EH info is clear */ 693 ata_for_each_link(link, ap, HOST_FIRST) 694 memset(&link->eh_info, 0, sizeof(link->eh_info)); 695 696 /* Clear host_eh_scheduled while holding ap->lock such 697 * that if exception occurs after this point but 698 * before EH completion, SCSI midlayer will 699 * re-initiate EH. 700 */ 701 host->host_eh_scheduled = 0; 702 703 spin_unlock_irqrestore(ap->lock, flags); 704 } else { 705 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 706 ap->ops->eng_timeout(ap); 707 } 708 709 /* finish or retry handled scmd's and clean up */ 710 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 711 712 scsi_eh_flush_done_q(&ap->eh_done_q); 713 714 /* clean up */ 715 spin_lock_irqsave(ap->lock, flags); 716 717 if (ap->pflags & ATA_PFLAG_LOADING) 718 ap->pflags &= ~ATA_PFLAG_LOADING; 719 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 720 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 721 722 if (ap->pflags & ATA_PFLAG_RECOVERED) 723 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 724 725 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 726 727 /* tell wait_eh that we're done */ 728 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 729 wake_up_all(&ap->eh_wait_q); 730 731 spin_unlock_irqrestore(ap->lock, flags); 732 733 DPRINTK("EXIT\n"); 734 } 735 736 /** 737 * ata_port_wait_eh - Wait for the currently pending EH to complete 738 * @ap: Port to wait EH for 739 * 740 * Wait until the currently pending EH is complete. 741 * 742 * LOCKING: 743 * Kernel thread context (may sleep). 744 */ 745 void ata_port_wait_eh(struct ata_port *ap) 746 { 747 unsigned long flags; 748 DEFINE_WAIT(wait); 749 750 retry: 751 spin_lock_irqsave(ap->lock, flags); 752 753 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 754 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 755 spin_unlock_irqrestore(ap->lock, flags); 756 schedule(); 757 spin_lock_irqsave(ap->lock, flags); 758 } 759 finish_wait(&ap->eh_wait_q, &wait); 760 761 spin_unlock_irqrestore(ap->lock, flags); 762 763 /* make sure SCSI EH is complete */ 764 if (scsi_host_in_recovery(ap->scsi_host)) { 765 msleep(10); 766 goto retry; 767 } 768 } 769 770 static int ata_eh_nr_in_flight(struct ata_port *ap) 771 { 772 unsigned int tag; 773 int nr = 0; 774 775 /* count only non-internal commands */ 776 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 777 if (ata_qc_from_tag(ap, tag)) 778 nr++; 779 780 return nr; 781 } 782 783 void ata_eh_fastdrain_timerfn(unsigned long arg) 784 { 785 struct ata_port *ap = (void *)arg; 786 unsigned long flags; 787 int cnt; 788 789 spin_lock_irqsave(ap->lock, flags); 790 791 cnt = ata_eh_nr_in_flight(ap); 792 793 /* are we done? */ 794 if (!cnt) 795 goto out_unlock; 796 797 if (cnt == ap->fastdrain_cnt) { 798 unsigned int tag; 799 800 /* No progress during the last interval, tag all 801 * in-flight qcs as timed out and freeze the port. 802 */ 803 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 804 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 805 if (qc) 806 qc->err_mask |= AC_ERR_TIMEOUT; 807 } 808 809 ata_port_freeze(ap); 810 } else { 811 /* some qcs have finished, give it another chance */ 812 ap->fastdrain_cnt = cnt; 813 ap->fastdrain_timer.expires = 814 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 815 add_timer(&ap->fastdrain_timer); 816 } 817 818 out_unlock: 819 spin_unlock_irqrestore(ap->lock, flags); 820 } 821 822 /** 823 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 824 * @ap: target ATA port 825 * @fastdrain: activate fast drain 826 * 827 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 828 * is non-zero and EH wasn't pending before. Fast drain ensures 829 * that EH kicks in in timely manner. 830 * 831 * LOCKING: 832 * spin_lock_irqsave(host lock) 833 */ 834 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 835 { 836 int cnt; 837 838 /* already scheduled? */ 839 if (ap->pflags & ATA_PFLAG_EH_PENDING) 840 return; 841 842 ap->pflags |= ATA_PFLAG_EH_PENDING; 843 844 if (!fastdrain) 845 return; 846 847 /* do we have in-flight qcs? */ 848 cnt = ata_eh_nr_in_flight(ap); 849 if (!cnt) 850 return; 851 852 /* activate fast drain */ 853 ap->fastdrain_cnt = cnt; 854 ap->fastdrain_timer.expires = 855 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 856 add_timer(&ap->fastdrain_timer); 857 } 858 859 /** 860 * ata_qc_schedule_eh - schedule qc for error handling 861 * @qc: command to schedule error handling for 862 * 863 * Schedule error handling for @qc. EH will kick in as soon as 864 * other commands are drained. 865 * 866 * LOCKING: 867 * spin_lock_irqsave(host lock) 868 */ 869 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 870 { 871 struct ata_port *ap = qc->ap; 872 873 WARN_ON(!ap->ops->error_handler); 874 875 qc->flags |= ATA_QCFLAG_FAILED; 876 ata_eh_set_pending(ap, 1); 877 878 /* The following will fail if timeout has already expired. 879 * ata_scsi_error() takes care of such scmds on EH entry. 880 * Note that ATA_QCFLAG_FAILED is unconditionally set after 881 * this function completes. 882 */ 883 blk_abort_request(qc->scsicmd->request); 884 } 885 886 /** 887 * ata_port_schedule_eh - schedule error handling without a qc 888 * @ap: ATA port to schedule EH for 889 * 890 * Schedule error handling for @ap. EH will kick in as soon as 891 * all commands are drained. 892 * 893 * LOCKING: 894 * spin_lock_irqsave(host lock) 895 */ 896 void ata_port_schedule_eh(struct ata_port *ap) 897 { 898 WARN_ON(!ap->ops->error_handler); 899 900 if (ap->pflags & ATA_PFLAG_INITIALIZING) 901 return; 902 903 ata_eh_set_pending(ap, 1); 904 scsi_schedule_eh(ap->scsi_host); 905 906 DPRINTK("port EH scheduled\n"); 907 } 908 909 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 910 { 911 int tag, nr_aborted = 0; 912 913 WARN_ON(!ap->ops->error_handler); 914 915 /* we're gonna abort all commands, no need for fast drain */ 916 ata_eh_set_pending(ap, 0); 917 918 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 919 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 920 921 if (qc && (!link || qc->dev->link == link)) { 922 qc->flags |= ATA_QCFLAG_FAILED; 923 ata_qc_complete(qc); 924 nr_aborted++; 925 } 926 } 927 928 if (!nr_aborted) 929 ata_port_schedule_eh(ap); 930 931 return nr_aborted; 932 } 933 934 /** 935 * ata_link_abort - abort all qc's on the link 936 * @link: ATA link to abort qc's for 937 * 938 * Abort all active qc's active on @link and schedule EH. 939 * 940 * LOCKING: 941 * spin_lock_irqsave(host lock) 942 * 943 * RETURNS: 944 * Number of aborted qc's. 945 */ 946 int ata_link_abort(struct ata_link *link) 947 { 948 return ata_do_link_abort(link->ap, link); 949 } 950 951 /** 952 * ata_port_abort - abort all qc's on the port 953 * @ap: ATA port to abort qc's for 954 * 955 * Abort all active qc's of @ap and schedule EH. 956 * 957 * LOCKING: 958 * spin_lock_irqsave(host_set lock) 959 * 960 * RETURNS: 961 * Number of aborted qc's. 962 */ 963 int ata_port_abort(struct ata_port *ap) 964 { 965 return ata_do_link_abort(ap, NULL); 966 } 967 968 /** 969 * __ata_port_freeze - freeze port 970 * @ap: ATA port to freeze 971 * 972 * This function is called when HSM violation or some other 973 * condition disrupts normal operation of the port. Frozen port 974 * is not allowed to perform any operation until the port is 975 * thawed, which usually follows a successful reset. 976 * 977 * ap->ops->freeze() callback can be used for freezing the port 978 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 979 * port cannot be frozen hardware-wise, the interrupt handler 980 * must ack and clear interrupts unconditionally while the port 981 * is frozen. 982 * 983 * LOCKING: 984 * spin_lock_irqsave(host lock) 985 */ 986 static void __ata_port_freeze(struct ata_port *ap) 987 { 988 WARN_ON(!ap->ops->error_handler); 989 990 if (ap->ops->freeze) 991 ap->ops->freeze(ap); 992 993 ap->pflags |= ATA_PFLAG_FROZEN; 994 995 DPRINTK("ata%u port frozen\n", ap->print_id); 996 } 997 998 /** 999 * ata_port_freeze - abort & freeze port 1000 * @ap: ATA port to freeze 1001 * 1002 * Abort and freeze @ap. 1003 * 1004 * LOCKING: 1005 * spin_lock_irqsave(host lock) 1006 * 1007 * RETURNS: 1008 * Number of aborted commands. 1009 */ 1010 int ata_port_freeze(struct ata_port *ap) 1011 { 1012 int nr_aborted; 1013 1014 WARN_ON(!ap->ops->error_handler); 1015 1016 nr_aborted = ata_port_abort(ap); 1017 __ata_port_freeze(ap); 1018 1019 return nr_aborted; 1020 } 1021 1022 /** 1023 * sata_async_notification - SATA async notification handler 1024 * @ap: ATA port where async notification is received 1025 * 1026 * Handler to be called when async notification via SDB FIS is 1027 * received. This function schedules EH if necessary. 1028 * 1029 * LOCKING: 1030 * spin_lock_irqsave(host lock) 1031 * 1032 * RETURNS: 1033 * 1 if EH is scheduled, 0 otherwise. 1034 */ 1035 int sata_async_notification(struct ata_port *ap) 1036 { 1037 u32 sntf; 1038 int rc; 1039 1040 if (!(ap->flags & ATA_FLAG_AN)) 1041 return 0; 1042 1043 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 1044 if (rc == 0) 1045 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 1046 1047 if (!sata_pmp_attached(ap) || rc) { 1048 /* PMP is not attached or SNTF is not available */ 1049 if (!sata_pmp_attached(ap)) { 1050 /* PMP is not attached. Check whether ATAPI 1051 * AN is configured. If so, notify media 1052 * change. 1053 */ 1054 struct ata_device *dev = ap->link.device; 1055 1056 if ((dev->class == ATA_DEV_ATAPI) && 1057 (dev->flags & ATA_DFLAG_AN)) 1058 ata_scsi_media_change_notify(dev); 1059 return 0; 1060 } else { 1061 /* PMP is attached but SNTF is not available. 1062 * ATAPI async media change notification is 1063 * not used. The PMP must be reporting PHY 1064 * status change, schedule EH. 1065 */ 1066 ata_port_schedule_eh(ap); 1067 return 1; 1068 } 1069 } else { 1070 /* PMP is attached and SNTF is available */ 1071 struct ata_link *link; 1072 1073 /* check and notify ATAPI AN */ 1074 ata_for_each_link(link, ap, EDGE) { 1075 if (!(sntf & (1 << link->pmp))) 1076 continue; 1077 1078 if ((link->device->class == ATA_DEV_ATAPI) && 1079 (link->device->flags & ATA_DFLAG_AN)) 1080 ata_scsi_media_change_notify(link->device); 1081 } 1082 1083 /* If PMP is reporting that PHY status of some 1084 * downstream ports has changed, schedule EH. 1085 */ 1086 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1087 ata_port_schedule_eh(ap); 1088 return 1; 1089 } 1090 1091 return 0; 1092 } 1093 } 1094 1095 /** 1096 * ata_eh_freeze_port - EH helper to freeze port 1097 * @ap: ATA port to freeze 1098 * 1099 * Freeze @ap. 1100 * 1101 * LOCKING: 1102 * None. 1103 */ 1104 void ata_eh_freeze_port(struct ata_port *ap) 1105 { 1106 unsigned long flags; 1107 1108 if (!ap->ops->error_handler) 1109 return; 1110 1111 spin_lock_irqsave(ap->lock, flags); 1112 __ata_port_freeze(ap); 1113 spin_unlock_irqrestore(ap->lock, flags); 1114 } 1115 1116 /** 1117 * ata_port_thaw_port - EH helper to thaw port 1118 * @ap: ATA port to thaw 1119 * 1120 * Thaw frozen port @ap. 1121 * 1122 * LOCKING: 1123 * None. 1124 */ 1125 void ata_eh_thaw_port(struct ata_port *ap) 1126 { 1127 unsigned long flags; 1128 1129 if (!ap->ops->error_handler) 1130 return; 1131 1132 spin_lock_irqsave(ap->lock, flags); 1133 1134 ap->pflags &= ~ATA_PFLAG_FROZEN; 1135 1136 if (ap->ops->thaw) 1137 ap->ops->thaw(ap); 1138 1139 spin_unlock_irqrestore(ap->lock, flags); 1140 1141 DPRINTK("ata%u port thawed\n", ap->print_id); 1142 } 1143 1144 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1145 { 1146 /* nada */ 1147 } 1148 1149 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1150 { 1151 struct ata_port *ap = qc->ap; 1152 struct scsi_cmnd *scmd = qc->scsicmd; 1153 unsigned long flags; 1154 1155 spin_lock_irqsave(ap->lock, flags); 1156 qc->scsidone = ata_eh_scsidone; 1157 __ata_qc_complete(qc); 1158 WARN_ON(ata_tag_valid(qc->tag)); 1159 spin_unlock_irqrestore(ap->lock, flags); 1160 1161 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1162 } 1163 1164 /** 1165 * ata_eh_qc_complete - Complete an active ATA command from EH 1166 * @qc: Command to complete 1167 * 1168 * Indicate to the mid and upper layers that an ATA command has 1169 * completed. To be used from EH. 1170 */ 1171 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1172 { 1173 struct scsi_cmnd *scmd = qc->scsicmd; 1174 scmd->retries = scmd->allowed; 1175 __ata_eh_qc_complete(qc); 1176 } 1177 1178 /** 1179 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1180 * @qc: Command to retry 1181 * 1182 * Indicate to the mid and upper layers that an ATA command 1183 * should be retried. To be used from EH. 1184 * 1185 * SCSI midlayer limits the number of retries to scmd->allowed. 1186 * scmd->retries is decremented for commands which get retried 1187 * due to unrelated failures (qc->err_mask is zero). 1188 */ 1189 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1190 { 1191 struct scsi_cmnd *scmd = qc->scsicmd; 1192 if (!qc->err_mask && scmd->retries) 1193 scmd->retries--; 1194 __ata_eh_qc_complete(qc); 1195 } 1196 1197 /** 1198 * ata_dev_disable - disable ATA device 1199 * @dev: ATA device to disable 1200 * 1201 * Disable @dev. 1202 * 1203 * Locking: 1204 * EH context. 1205 */ 1206 void ata_dev_disable(struct ata_device *dev) 1207 { 1208 if (!ata_dev_enabled(dev)) 1209 return; 1210 1211 if (ata_msg_drv(dev->link->ap)) 1212 ata_dev_printk(dev, KERN_WARNING, "disabled\n"); 1213 ata_acpi_on_disable(dev); 1214 ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO0 | ATA_DNXFER_QUIET); 1215 dev->class++; 1216 1217 /* From now till the next successful probe, ering is used to 1218 * track probe failures. Clear accumulated device error info. 1219 */ 1220 ata_ering_clear(&dev->ering); 1221 } 1222 1223 /** 1224 * ata_eh_detach_dev - detach ATA device 1225 * @dev: ATA device to detach 1226 * 1227 * Detach @dev. 1228 * 1229 * LOCKING: 1230 * None. 1231 */ 1232 void ata_eh_detach_dev(struct ata_device *dev) 1233 { 1234 struct ata_link *link = dev->link; 1235 struct ata_port *ap = link->ap; 1236 struct ata_eh_context *ehc = &link->eh_context; 1237 unsigned long flags; 1238 1239 ata_dev_disable(dev); 1240 1241 spin_lock_irqsave(ap->lock, flags); 1242 1243 dev->flags &= ~ATA_DFLAG_DETACH; 1244 1245 if (ata_scsi_offline_dev(dev)) { 1246 dev->flags |= ATA_DFLAG_DETACHED; 1247 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1248 } 1249 1250 /* clear per-dev EH info */ 1251 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1252 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1253 ehc->saved_xfer_mode[dev->devno] = 0; 1254 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 1255 1256 spin_unlock_irqrestore(ap->lock, flags); 1257 } 1258 1259 /** 1260 * ata_eh_about_to_do - about to perform eh_action 1261 * @link: target ATA link 1262 * @dev: target ATA dev for per-dev action (can be NULL) 1263 * @action: action about to be performed 1264 * 1265 * Called just before performing EH actions to clear related bits 1266 * in @link->eh_info such that eh actions are not unnecessarily 1267 * repeated. 1268 * 1269 * LOCKING: 1270 * None. 1271 */ 1272 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1273 unsigned int action) 1274 { 1275 struct ata_port *ap = link->ap; 1276 struct ata_eh_info *ehi = &link->eh_info; 1277 struct ata_eh_context *ehc = &link->eh_context; 1278 unsigned long flags; 1279 1280 spin_lock_irqsave(ap->lock, flags); 1281 1282 ata_eh_clear_action(link, dev, ehi, action); 1283 1284 /* About to take EH action, set RECOVERED. Ignore actions on 1285 * slave links as master will do them again. 1286 */ 1287 if (!(ehc->i.flags & ATA_EHI_QUIET) && link != ap->slave_link) 1288 ap->pflags |= ATA_PFLAG_RECOVERED; 1289 1290 spin_unlock_irqrestore(ap->lock, flags); 1291 } 1292 1293 /** 1294 * ata_eh_done - EH action complete 1295 * @ap: target ATA port 1296 * @dev: target ATA dev for per-dev action (can be NULL) 1297 * @action: action just completed 1298 * 1299 * Called right after performing EH actions to clear related bits 1300 * in @link->eh_context. 1301 * 1302 * LOCKING: 1303 * None. 1304 */ 1305 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1306 unsigned int action) 1307 { 1308 struct ata_eh_context *ehc = &link->eh_context; 1309 1310 ata_eh_clear_action(link, dev, &ehc->i, action); 1311 } 1312 1313 /** 1314 * ata_err_string - convert err_mask to descriptive string 1315 * @err_mask: error mask to convert to string 1316 * 1317 * Convert @err_mask to descriptive string. Errors are 1318 * prioritized according to severity and only the most severe 1319 * error is reported. 1320 * 1321 * LOCKING: 1322 * None. 1323 * 1324 * RETURNS: 1325 * Descriptive string for @err_mask 1326 */ 1327 static const char *ata_err_string(unsigned int err_mask) 1328 { 1329 if (err_mask & AC_ERR_HOST_BUS) 1330 return "host bus error"; 1331 if (err_mask & AC_ERR_ATA_BUS) 1332 return "ATA bus error"; 1333 if (err_mask & AC_ERR_TIMEOUT) 1334 return "timeout"; 1335 if (err_mask & AC_ERR_HSM) 1336 return "HSM violation"; 1337 if (err_mask & AC_ERR_SYSTEM) 1338 return "internal error"; 1339 if (err_mask & AC_ERR_MEDIA) 1340 return "media error"; 1341 if (err_mask & AC_ERR_INVALID) 1342 return "invalid argument"; 1343 if (err_mask & AC_ERR_DEV) 1344 return "device error"; 1345 return "unknown error"; 1346 } 1347 1348 /** 1349 * ata_read_log_page - read a specific log page 1350 * @dev: target device 1351 * @page: page to read 1352 * @buf: buffer to store read page 1353 * @sectors: number of sectors to read 1354 * 1355 * Read log page using READ_LOG_EXT command. 1356 * 1357 * LOCKING: 1358 * Kernel thread context (may sleep). 1359 * 1360 * RETURNS: 1361 * 0 on success, AC_ERR_* mask otherwise. 1362 */ 1363 static unsigned int ata_read_log_page(struct ata_device *dev, 1364 u8 page, void *buf, unsigned int sectors) 1365 { 1366 struct ata_taskfile tf; 1367 unsigned int err_mask; 1368 1369 DPRINTK("read log page - page %d\n", page); 1370 1371 ata_tf_init(dev, &tf); 1372 tf.command = ATA_CMD_READ_LOG_EXT; 1373 tf.lbal = page; 1374 tf.nsect = sectors; 1375 tf.hob_nsect = sectors >> 8; 1376 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 1377 tf.protocol = ATA_PROT_PIO; 1378 1379 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 1380 buf, sectors * ATA_SECT_SIZE, 0); 1381 1382 DPRINTK("EXIT, err_mask=%x\n", err_mask); 1383 return err_mask; 1384 } 1385 1386 /** 1387 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1388 * @dev: Device to read log page 10h from 1389 * @tag: Resulting tag of the failed command 1390 * @tf: Resulting taskfile registers of the failed command 1391 * 1392 * Read log page 10h to obtain NCQ error details and clear error 1393 * condition. 1394 * 1395 * LOCKING: 1396 * Kernel thread context (may sleep). 1397 * 1398 * RETURNS: 1399 * 0 on success, -errno otherwise. 1400 */ 1401 static int ata_eh_read_log_10h(struct ata_device *dev, 1402 int *tag, struct ata_taskfile *tf) 1403 { 1404 u8 *buf = dev->link->ap->sector_buf; 1405 unsigned int err_mask; 1406 u8 csum; 1407 int i; 1408 1409 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 1410 if (err_mask) 1411 return -EIO; 1412 1413 csum = 0; 1414 for (i = 0; i < ATA_SECT_SIZE; i++) 1415 csum += buf[i]; 1416 if (csum) 1417 ata_dev_printk(dev, KERN_WARNING, 1418 "invalid checksum 0x%x on log page 10h\n", csum); 1419 1420 if (buf[0] & 0x80) 1421 return -ENOENT; 1422 1423 *tag = buf[0] & 0x1f; 1424 1425 tf->command = buf[2]; 1426 tf->feature = buf[3]; 1427 tf->lbal = buf[4]; 1428 tf->lbam = buf[5]; 1429 tf->lbah = buf[6]; 1430 tf->device = buf[7]; 1431 tf->hob_lbal = buf[8]; 1432 tf->hob_lbam = buf[9]; 1433 tf->hob_lbah = buf[10]; 1434 tf->nsect = buf[12]; 1435 tf->hob_nsect = buf[13]; 1436 1437 return 0; 1438 } 1439 1440 /** 1441 * atapi_eh_tur - perform ATAPI TEST_UNIT_READY 1442 * @dev: target ATAPI device 1443 * @r_sense_key: out parameter for sense_key 1444 * 1445 * Perform ATAPI TEST_UNIT_READY. 1446 * 1447 * LOCKING: 1448 * EH context (may sleep). 1449 * 1450 * RETURNS: 1451 * 0 on success, AC_ERR_* mask on failure. 1452 */ 1453 static unsigned int atapi_eh_tur(struct ata_device *dev, u8 *r_sense_key) 1454 { 1455 u8 cdb[ATAPI_CDB_LEN] = { TEST_UNIT_READY, 0, 0, 0, 0, 0 }; 1456 struct ata_taskfile tf; 1457 unsigned int err_mask; 1458 1459 ata_tf_init(dev, &tf); 1460 1461 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1462 tf.command = ATA_CMD_PACKET; 1463 tf.protocol = ATAPI_PROT_NODATA; 1464 1465 err_mask = ata_exec_internal(dev, &tf, cdb, DMA_NONE, NULL, 0, 0); 1466 if (err_mask == AC_ERR_DEV) 1467 *r_sense_key = tf.feature >> 4; 1468 return err_mask; 1469 } 1470 1471 /** 1472 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1473 * @dev: device to perform REQUEST_SENSE to 1474 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1475 * @dfl_sense_key: default sense key to use 1476 * 1477 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1478 * SENSE. This function is EH helper. 1479 * 1480 * LOCKING: 1481 * Kernel thread context (may sleep). 1482 * 1483 * RETURNS: 1484 * 0 on success, AC_ERR_* mask on failure 1485 */ 1486 static unsigned int atapi_eh_request_sense(struct ata_device *dev, 1487 u8 *sense_buf, u8 dfl_sense_key) 1488 { 1489 u8 cdb[ATAPI_CDB_LEN] = 1490 { REQUEST_SENSE, 0, 0, 0, SCSI_SENSE_BUFFERSIZE, 0 }; 1491 struct ata_port *ap = dev->link->ap; 1492 struct ata_taskfile tf; 1493 1494 DPRINTK("ATAPI request sense\n"); 1495 1496 /* FIXME: is this needed? */ 1497 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1498 1499 /* initialize sense_buf with the error register, 1500 * for the case where they are -not- overwritten 1501 */ 1502 sense_buf[0] = 0x70; 1503 sense_buf[2] = dfl_sense_key; 1504 1505 /* some devices time out if garbage left in tf */ 1506 ata_tf_init(dev, &tf); 1507 1508 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1509 tf.command = ATA_CMD_PACKET; 1510 1511 /* is it pointless to prefer PIO for "safety reasons"? */ 1512 if (ap->flags & ATA_FLAG_PIO_DMA) { 1513 tf.protocol = ATAPI_PROT_DMA; 1514 tf.feature |= ATAPI_PKT_DMA; 1515 } else { 1516 tf.protocol = ATAPI_PROT_PIO; 1517 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1518 tf.lbah = 0; 1519 } 1520 1521 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1522 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1523 } 1524 1525 /** 1526 * ata_eh_analyze_serror - analyze SError for a failed port 1527 * @link: ATA link to analyze SError for 1528 * 1529 * Analyze SError if available and further determine cause of 1530 * failure. 1531 * 1532 * LOCKING: 1533 * None. 1534 */ 1535 static void ata_eh_analyze_serror(struct ata_link *link) 1536 { 1537 struct ata_eh_context *ehc = &link->eh_context; 1538 u32 serror = ehc->i.serror; 1539 unsigned int err_mask = 0, action = 0; 1540 u32 hotplug_mask; 1541 1542 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1543 err_mask |= AC_ERR_ATA_BUS; 1544 action |= ATA_EH_RESET; 1545 } 1546 if (serror & SERR_PROTOCOL) { 1547 err_mask |= AC_ERR_HSM; 1548 action |= ATA_EH_RESET; 1549 } 1550 if (serror & SERR_INTERNAL) { 1551 err_mask |= AC_ERR_SYSTEM; 1552 action |= ATA_EH_RESET; 1553 } 1554 1555 /* Determine whether a hotplug event has occurred. Both 1556 * SError.N/X are considered hotplug events for enabled or 1557 * host links. For disabled PMP links, only N bit is 1558 * considered as X bit is left at 1 for link plugging. 1559 */ 1560 hotplug_mask = 0; 1561 1562 if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1563 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1564 else 1565 hotplug_mask = SERR_PHYRDY_CHG; 1566 1567 if (serror & hotplug_mask) 1568 ata_ehi_hotplugged(&ehc->i); 1569 1570 ehc->i.err_mask |= err_mask; 1571 ehc->i.action |= action; 1572 } 1573 1574 /** 1575 * ata_eh_analyze_ncq_error - analyze NCQ error 1576 * @link: ATA link to analyze NCQ error for 1577 * 1578 * Read log page 10h, determine the offending qc and acquire 1579 * error status TF. For NCQ device errors, all LLDDs have to do 1580 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1581 * care of the rest. 1582 * 1583 * LOCKING: 1584 * Kernel thread context (may sleep). 1585 */ 1586 void ata_eh_analyze_ncq_error(struct ata_link *link) 1587 { 1588 struct ata_port *ap = link->ap; 1589 struct ata_eh_context *ehc = &link->eh_context; 1590 struct ata_device *dev = link->device; 1591 struct ata_queued_cmd *qc; 1592 struct ata_taskfile tf; 1593 int tag, rc; 1594 1595 /* if frozen, we can't do much */ 1596 if (ap->pflags & ATA_PFLAG_FROZEN) 1597 return; 1598 1599 /* is it NCQ device error? */ 1600 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1601 return; 1602 1603 /* has LLDD analyzed already? */ 1604 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1605 qc = __ata_qc_from_tag(ap, tag); 1606 1607 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1608 continue; 1609 1610 if (qc->err_mask) 1611 return; 1612 } 1613 1614 /* okay, this error is ours */ 1615 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1616 if (rc) { 1617 ata_link_printk(link, KERN_ERR, "failed to read log page 10h " 1618 "(errno=%d)\n", rc); 1619 return; 1620 } 1621 1622 if (!(link->sactive & (1 << tag))) { 1623 ata_link_printk(link, KERN_ERR, "log page 10h reported " 1624 "inactive tag %d\n", tag); 1625 return; 1626 } 1627 1628 /* we've got the perpetrator, condemn it */ 1629 qc = __ata_qc_from_tag(ap, tag); 1630 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1631 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1632 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1633 ehc->i.err_mask &= ~AC_ERR_DEV; 1634 } 1635 1636 /** 1637 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1638 * @qc: qc to analyze 1639 * @tf: Taskfile registers to analyze 1640 * 1641 * Analyze taskfile of @qc and further determine cause of 1642 * failure. This function also requests ATAPI sense data if 1643 * avaliable. 1644 * 1645 * LOCKING: 1646 * Kernel thread context (may sleep). 1647 * 1648 * RETURNS: 1649 * Determined recovery action 1650 */ 1651 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1652 const struct ata_taskfile *tf) 1653 { 1654 unsigned int tmp, action = 0; 1655 u8 stat = tf->command, err = tf->feature; 1656 1657 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1658 qc->err_mask |= AC_ERR_HSM; 1659 return ATA_EH_RESET; 1660 } 1661 1662 if (stat & (ATA_ERR | ATA_DF)) 1663 qc->err_mask |= AC_ERR_DEV; 1664 else 1665 return 0; 1666 1667 switch (qc->dev->class) { 1668 case ATA_DEV_ATA: 1669 if (err & ATA_ICRC) 1670 qc->err_mask |= AC_ERR_ATA_BUS; 1671 if (err & ATA_UNC) 1672 qc->err_mask |= AC_ERR_MEDIA; 1673 if (err & ATA_IDNF) 1674 qc->err_mask |= AC_ERR_INVALID; 1675 break; 1676 1677 case ATA_DEV_ATAPI: 1678 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1679 tmp = atapi_eh_request_sense(qc->dev, 1680 qc->scsicmd->sense_buffer, 1681 qc->result_tf.feature >> 4); 1682 if (!tmp) { 1683 /* ATA_QCFLAG_SENSE_VALID is used to 1684 * tell atapi_qc_complete() that sense 1685 * data is already valid. 1686 * 1687 * TODO: interpret sense data and set 1688 * appropriate err_mask. 1689 */ 1690 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1691 } else 1692 qc->err_mask |= tmp; 1693 } 1694 } 1695 1696 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1697 action |= ATA_EH_RESET; 1698 1699 return action; 1700 } 1701 1702 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1703 int *xfer_ok) 1704 { 1705 int base = 0; 1706 1707 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1708 *xfer_ok = 1; 1709 1710 if (!*xfer_ok) 1711 base = ATA_ECAT_DUBIOUS_NONE; 1712 1713 if (err_mask & AC_ERR_ATA_BUS) 1714 return base + ATA_ECAT_ATA_BUS; 1715 1716 if (err_mask & AC_ERR_TIMEOUT) 1717 return base + ATA_ECAT_TOUT_HSM; 1718 1719 if (eflags & ATA_EFLAG_IS_IO) { 1720 if (err_mask & AC_ERR_HSM) 1721 return base + ATA_ECAT_TOUT_HSM; 1722 if ((err_mask & 1723 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1724 return base + ATA_ECAT_UNK_DEV; 1725 } 1726 1727 return 0; 1728 } 1729 1730 struct speed_down_verdict_arg { 1731 u64 since; 1732 int xfer_ok; 1733 int nr_errors[ATA_ECAT_NR]; 1734 }; 1735 1736 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1737 { 1738 struct speed_down_verdict_arg *arg = void_arg; 1739 int cat; 1740 1741 if (ent->timestamp < arg->since) 1742 return -1; 1743 1744 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1745 &arg->xfer_ok); 1746 arg->nr_errors[cat]++; 1747 1748 return 0; 1749 } 1750 1751 /** 1752 * ata_eh_speed_down_verdict - Determine speed down verdict 1753 * @dev: Device of interest 1754 * 1755 * This function examines error ring of @dev and determines 1756 * whether NCQ needs to be turned off, transfer speed should be 1757 * stepped down, or falling back to PIO is necessary. 1758 * 1759 * ECAT_ATA_BUS : ATA_BUS error for any command 1760 * 1761 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1762 * IO commands 1763 * 1764 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1765 * 1766 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1767 * data transfer hasn't been verified. 1768 * 1769 * Verdicts are 1770 * 1771 * NCQ_OFF : Turn off NCQ. 1772 * 1773 * SPEED_DOWN : Speed down transfer speed but don't fall back 1774 * to PIO. 1775 * 1776 * FALLBACK_TO_PIO : Fall back to PIO. 1777 * 1778 * Even if multiple verdicts are returned, only one action is 1779 * taken per error. An action triggered by non-DUBIOUS errors 1780 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1781 * This is to expedite speed down decisions right after device is 1782 * initially configured. 1783 * 1784 * The followings are speed down rules. #1 and #2 deal with 1785 * DUBIOUS errors. 1786 * 1787 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1788 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1789 * 1790 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1791 * occurred during last 5 mins, NCQ_OFF. 1792 * 1793 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1794 * ocurred during last 5 mins, FALLBACK_TO_PIO 1795 * 1796 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1797 * during last 10 mins, NCQ_OFF. 1798 * 1799 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1800 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1801 * 1802 * LOCKING: 1803 * Inherited from caller. 1804 * 1805 * RETURNS: 1806 * OR of ATA_EH_SPDN_* flags. 1807 */ 1808 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1809 { 1810 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1811 u64 j64 = get_jiffies_64(); 1812 struct speed_down_verdict_arg arg; 1813 unsigned int verdict = 0; 1814 1815 /* scan past 5 mins of error history */ 1816 memset(&arg, 0, sizeof(arg)); 1817 arg.since = j64 - min(j64, j5mins); 1818 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1819 1820 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1821 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1822 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1823 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1824 1825 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1826 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1827 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1828 1829 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1830 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1831 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1832 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1833 1834 /* scan past 10 mins of error history */ 1835 memset(&arg, 0, sizeof(arg)); 1836 arg.since = j64 - min(j64, j10mins); 1837 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1838 1839 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1840 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1841 verdict |= ATA_EH_SPDN_NCQ_OFF; 1842 1843 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1844 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1845 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1846 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1847 1848 return verdict; 1849 } 1850 1851 /** 1852 * ata_eh_speed_down - record error and speed down if necessary 1853 * @dev: Failed device 1854 * @eflags: mask of ATA_EFLAG_* flags 1855 * @err_mask: err_mask of the error 1856 * 1857 * Record error and examine error history to determine whether 1858 * adjusting transmission speed is necessary. It also sets 1859 * transmission limits appropriately if such adjustment is 1860 * necessary. 1861 * 1862 * LOCKING: 1863 * Kernel thread context (may sleep). 1864 * 1865 * RETURNS: 1866 * Determined recovery action. 1867 */ 1868 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1869 unsigned int eflags, unsigned int err_mask) 1870 { 1871 struct ata_link *link = ata_dev_phys_link(dev); 1872 int xfer_ok = 0; 1873 unsigned int verdict; 1874 unsigned int action = 0; 1875 1876 /* don't bother if Cat-0 error */ 1877 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1878 return 0; 1879 1880 /* record error and determine whether speed down is necessary */ 1881 ata_ering_record(&dev->ering, eflags, err_mask); 1882 verdict = ata_eh_speed_down_verdict(dev); 1883 1884 /* turn off NCQ? */ 1885 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1886 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1887 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1888 dev->flags |= ATA_DFLAG_NCQ_OFF; 1889 ata_dev_printk(dev, KERN_WARNING, 1890 "NCQ disabled due to excessive errors\n"); 1891 goto done; 1892 } 1893 1894 /* speed down? */ 1895 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1896 /* speed down SATA link speed if possible */ 1897 if (sata_down_spd_limit(link, 0) == 0) { 1898 action |= ATA_EH_RESET; 1899 goto done; 1900 } 1901 1902 /* lower transfer mode */ 1903 if (dev->spdn_cnt < 2) { 1904 static const int dma_dnxfer_sel[] = 1905 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1906 static const int pio_dnxfer_sel[] = 1907 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1908 int sel; 1909 1910 if (dev->xfer_shift != ATA_SHIFT_PIO) 1911 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1912 else 1913 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1914 1915 dev->spdn_cnt++; 1916 1917 if (ata_down_xfermask_limit(dev, sel) == 0) { 1918 action |= ATA_EH_RESET; 1919 goto done; 1920 } 1921 } 1922 } 1923 1924 /* Fall back to PIO? Slowing down to PIO is meaningless for 1925 * SATA ATA devices. Consider it only for PATA and SATAPI. 1926 */ 1927 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1928 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 1929 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1930 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1931 dev->spdn_cnt = 0; 1932 action |= ATA_EH_RESET; 1933 goto done; 1934 } 1935 } 1936 1937 return 0; 1938 done: 1939 /* device has been slowed down, blow error history */ 1940 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 1941 ata_ering_clear(&dev->ering); 1942 return action; 1943 } 1944 1945 /** 1946 * ata_eh_link_autopsy - analyze error and determine recovery action 1947 * @link: host link to perform autopsy on 1948 * 1949 * Analyze why @link failed and determine which recovery actions 1950 * are needed. This function also sets more detailed AC_ERR_* 1951 * values and fills sense data for ATAPI CHECK SENSE. 1952 * 1953 * LOCKING: 1954 * Kernel thread context (may sleep). 1955 */ 1956 static void ata_eh_link_autopsy(struct ata_link *link) 1957 { 1958 struct ata_port *ap = link->ap; 1959 struct ata_eh_context *ehc = &link->eh_context; 1960 struct ata_device *dev; 1961 unsigned int all_err_mask = 0, eflags = 0; 1962 int tag; 1963 u32 serror; 1964 int rc; 1965 1966 DPRINTK("ENTER\n"); 1967 1968 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1969 return; 1970 1971 /* obtain and analyze SError */ 1972 rc = sata_scr_read(link, SCR_ERROR, &serror); 1973 if (rc == 0) { 1974 ehc->i.serror |= serror; 1975 ata_eh_analyze_serror(link); 1976 } else if (rc != -EOPNOTSUPP) { 1977 /* SError read failed, force reset and probing */ 1978 ehc->i.probe_mask |= ATA_ALL_DEVICES; 1979 ehc->i.action |= ATA_EH_RESET; 1980 ehc->i.err_mask |= AC_ERR_OTHER; 1981 } 1982 1983 /* analyze NCQ failure */ 1984 ata_eh_analyze_ncq_error(link); 1985 1986 /* any real error trumps AC_ERR_OTHER */ 1987 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1988 ehc->i.err_mask &= ~AC_ERR_OTHER; 1989 1990 all_err_mask |= ehc->i.err_mask; 1991 1992 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1993 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1994 1995 if (!(qc->flags & ATA_QCFLAG_FAILED) || 1996 ata_dev_phys_link(qc->dev) != link) 1997 continue; 1998 1999 /* inherit upper level err_mask */ 2000 qc->err_mask |= ehc->i.err_mask; 2001 2002 /* analyze TF */ 2003 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 2004 2005 /* DEV errors are probably spurious in case of ATA_BUS error */ 2006 if (qc->err_mask & AC_ERR_ATA_BUS) 2007 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 2008 AC_ERR_INVALID); 2009 2010 /* any real error trumps unknown error */ 2011 if (qc->err_mask & ~AC_ERR_OTHER) 2012 qc->err_mask &= ~AC_ERR_OTHER; 2013 2014 /* SENSE_VALID trumps dev/unknown error and revalidation */ 2015 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 2016 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 2017 2018 /* determine whether the command is worth retrying */ 2019 if (!(qc->err_mask & AC_ERR_INVALID) && 2020 ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV)) 2021 qc->flags |= ATA_QCFLAG_RETRY; 2022 2023 /* accumulate error info */ 2024 ehc->i.dev = qc->dev; 2025 all_err_mask |= qc->err_mask; 2026 if (qc->flags & ATA_QCFLAG_IO) 2027 eflags |= ATA_EFLAG_IS_IO; 2028 } 2029 2030 /* enforce default EH actions */ 2031 if (ap->pflags & ATA_PFLAG_FROZEN || 2032 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 2033 ehc->i.action |= ATA_EH_RESET; 2034 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 2035 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 2036 ehc->i.action |= ATA_EH_REVALIDATE; 2037 2038 /* If we have offending qcs and the associated failed device, 2039 * perform per-dev EH action only on the offending device. 2040 */ 2041 if (ehc->i.dev) { 2042 ehc->i.dev_action[ehc->i.dev->devno] |= 2043 ehc->i.action & ATA_EH_PERDEV_MASK; 2044 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 2045 } 2046 2047 /* propagate timeout to host link */ 2048 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 2049 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 2050 2051 /* record error and consider speeding down */ 2052 dev = ehc->i.dev; 2053 if (!dev && ((ata_link_max_devices(link) == 1 && 2054 ata_dev_enabled(link->device)))) 2055 dev = link->device; 2056 2057 if (dev) { 2058 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 2059 eflags |= ATA_EFLAG_DUBIOUS_XFER; 2060 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 2061 } 2062 2063 DPRINTK("EXIT\n"); 2064 } 2065 2066 /** 2067 * ata_eh_autopsy - analyze error and determine recovery action 2068 * @ap: host port to perform autopsy on 2069 * 2070 * Analyze all links of @ap and determine why they failed and 2071 * which recovery actions are needed. 2072 * 2073 * LOCKING: 2074 * Kernel thread context (may sleep). 2075 */ 2076 void ata_eh_autopsy(struct ata_port *ap) 2077 { 2078 struct ata_link *link; 2079 2080 ata_for_each_link(link, ap, EDGE) 2081 ata_eh_link_autopsy(link); 2082 2083 /* Handle the frigging slave link. Autopsy is done similarly 2084 * but actions and flags are transferred over to the master 2085 * link and handled from there. 2086 */ 2087 if (ap->slave_link) { 2088 struct ata_eh_context *mehc = &ap->link.eh_context; 2089 struct ata_eh_context *sehc = &ap->slave_link->eh_context; 2090 2091 /* transfer control flags from master to slave */ 2092 sehc->i.flags |= mehc->i.flags & ATA_EHI_TO_SLAVE_MASK; 2093 2094 /* perform autopsy on the slave link */ 2095 ata_eh_link_autopsy(ap->slave_link); 2096 2097 /* transfer actions from slave to master and clear slave */ 2098 ata_eh_about_to_do(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2099 mehc->i.action |= sehc->i.action; 2100 mehc->i.dev_action[1] |= sehc->i.dev_action[1]; 2101 mehc->i.flags |= sehc->i.flags; 2102 ata_eh_done(ap->slave_link, NULL, ATA_EH_ALL_ACTIONS); 2103 } 2104 2105 /* Autopsy of fanout ports can affect host link autopsy. 2106 * Perform host link autopsy last. 2107 */ 2108 if (sata_pmp_attached(ap)) 2109 ata_eh_link_autopsy(&ap->link); 2110 } 2111 2112 /** 2113 * ata_eh_link_report - report error handling to user 2114 * @link: ATA link EH is going on 2115 * 2116 * Report EH to user. 2117 * 2118 * LOCKING: 2119 * None. 2120 */ 2121 static void ata_eh_link_report(struct ata_link *link) 2122 { 2123 struct ata_port *ap = link->ap; 2124 struct ata_eh_context *ehc = &link->eh_context; 2125 const char *frozen, *desc; 2126 char tries_buf[6]; 2127 int tag, nr_failed = 0; 2128 2129 if (ehc->i.flags & ATA_EHI_QUIET) 2130 return; 2131 2132 desc = NULL; 2133 if (ehc->i.desc[0] != '\0') 2134 desc = ehc->i.desc; 2135 2136 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2137 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2138 2139 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2140 ata_dev_phys_link(qc->dev) != link || 2141 ((qc->flags & ATA_QCFLAG_QUIET) && 2142 qc->err_mask == AC_ERR_DEV)) 2143 continue; 2144 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2145 continue; 2146 2147 nr_failed++; 2148 } 2149 2150 if (!nr_failed && !ehc->i.err_mask) 2151 return; 2152 2153 frozen = ""; 2154 if (ap->pflags & ATA_PFLAG_FROZEN) 2155 frozen = " frozen"; 2156 2157 memset(tries_buf, 0, sizeof(tries_buf)); 2158 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2159 snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d", 2160 ap->eh_tries); 2161 2162 if (ehc->i.dev) { 2163 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 2164 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2165 ehc->i.err_mask, link->sactive, ehc->i.serror, 2166 ehc->i.action, frozen, tries_buf); 2167 if (desc) 2168 ata_dev_printk(ehc->i.dev, KERN_ERR, "%s\n", desc); 2169 } else { 2170 ata_link_printk(link, KERN_ERR, "exception Emask 0x%x " 2171 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2172 ehc->i.err_mask, link->sactive, ehc->i.serror, 2173 ehc->i.action, frozen, tries_buf); 2174 if (desc) 2175 ata_link_printk(link, KERN_ERR, "%s\n", desc); 2176 } 2177 2178 if (ehc->i.serror) 2179 ata_link_printk(link, KERN_ERR, 2180 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2181 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2182 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2183 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2184 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2185 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2186 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2187 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2188 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2189 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2190 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2191 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2192 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2193 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2194 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2195 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2196 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2197 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2198 2199 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2200 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2201 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2202 const u8 *cdb = qc->cdb; 2203 char data_buf[20] = ""; 2204 char cdb_buf[70] = ""; 2205 2206 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2207 ata_dev_phys_link(qc->dev) != link || !qc->err_mask) 2208 continue; 2209 2210 if (qc->dma_dir != DMA_NONE) { 2211 static const char *dma_str[] = { 2212 [DMA_BIDIRECTIONAL] = "bidi", 2213 [DMA_TO_DEVICE] = "out", 2214 [DMA_FROM_DEVICE] = "in", 2215 }; 2216 static const char *prot_str[] = { 2217 [ATA_PROT_PIO] = "pio", 2218 [ATA_PROT_DMA] = "dma", 2219 [ATA_PROT_NCQ] = "ncq", 2220 [ATAPI_PROT_PIO] = "pio", 2221 [ATAPI_PROT_DMA] = "dma", 2222 }; 2223 2224 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2225 prot_str[qc->tf.protocol], qc->nbytes, 2226 dma_str[qc->dma_dir]); 2227 } 2228 2229 if (ata_is_atapi(qc->tf.protocol)) 2230 snprintf(cdb_buf, sizeof(cdb_buf), 2231 "cdb %02x %02x %02x %02x %02x %02x %02x %02x " 2232 "%02x %02x %02x %02x %02x %02x %02x %02x\n ", 2233 cdb[0], cdb[1], cdb[2], cdb[3], 2234 cdb[4], cdb[5], cdb[6], cdb[7], 2235 cdb[8], cdb[9], cdb[10], cdb[11], 2236 cdb[12], cdb[13], cdb[14], cdb[15]); 2237 2238 ata_dev_printk(qc->dev, KERN_ERR, 2239 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2240 "tag %d%s\n %s" 2241 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2242 "Emask 0x%x (%s)%s\n", 2243 cmd->command, cmd->feature, cmd->nsect, 2244 cmd->lbal, cmd->lbam, cmd->lbah, 2245 cmd->hob_feature, cmd->hob_nsect, 2246 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2247 cmd->device, qc->tag, data_buf, cdb_buf, 2248 res->command, res->feature, res->nsect, 2249 res->lbal, res->lbam, res->lbah, 2250 res->hob_feature, res->hob_nsect, 2251 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2252 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2253 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2254 2255 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2256 ATA_ERR)) { 2257 if (res->command & ATA_BUSY) 2258 ata_dev_printk(qc->dev, KERN_ERR, 2259 "status: { Busy }\n"); 2260 else 2261 ata_dev_printk(qc->dev, KERN_ERR, 2262 "status: { %s%s%s%s}\n", 2263 res->command & ATA_DRDY ? "DRDY " : "", 2264 res->command & ATA_DF ? "DF " : "", 2265 res->command & ATA_DRQ ? "DRQ " : "", 2266 res->command & ATA_ERR ? "ERR " : ""); 2267 } 2268 2269 if (cmd->command != ATA_CMD_PACKET && 2270 (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF | 2271 ATA_ABORTED))) 2272 ata_dev_printk(qc->dev, KERN_ERR, 2273 "error: { %s%s%s%s}\n", 2274 res->feature & ATA_ICRC ? "ICRC " : "", 2275 res->feature & ATA_UNC ? "UNC " : "", 2276 res->feature & ATA_IDNF ? "IDNF " : "", 2277 res->feature & ATA_ABORTED ? "ABRT " : ""); 2278 } 2279 } 2280 2281 /** 2282 * ata_eh_report - report error handling to user 2283 * @ap: ATA port to report EH about 2284 * 2285 * Report EH to user. 2286 * 2287 * LOCKING: 2288 * None. 2289 */ 2290 void ata_eh_report(struct ata_port *ap) 2291 { 2292 struct ata_link *link; 2293 2294 ata_for_each_link(link, ap, HOST_FIRST) 2295 ata_eh_link_report(link); 2296 } 2297 2298 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2299 unsigned int *classes, unsigned long deadline, 2300 bool clear_classes) 2301 { 2302 struct ata_device *dev; 2303 2304 if (clear_classes) 2305 ata_for_each_dev(dev, link, ALL) 2306 classes[dev->devno] = ATA_DEV_UNKNOWN; 2307 2308 return reset(link, classes, deadline); 2309 } 2310 2311 static int ata_eh_followup_srst_needed(struct ata_link *link, 2312 int rc, const unsigned int *classes) 2313 { 2314 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2315 return 0; 2316 if (rc == -EAGAIN) 2317 return 1; 2318 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2319 return 1; 2320 return 0; 2321 } 2322 2323 int ata_eh_reset(struct ata_link *link, int classify, 2324 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2325 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2326 { 2327 struct ata_port *ap = link->ap; 2328 struct ata_link *slave = ap->slave_link; 2329 struct ata_eh_context *ehc = &link->eh_context; 2330 struct ata_eh_context *sehc = &slave->eh_context; 2331 unsigned int *classes = ehc->classes; 2332 unsigned int lflags = link->flags; 2333 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2334 int max_tries = 0, try = 0; 2335 struct ata_link *failed_link; 2336 struct ata_device *dev; 2337 unsigned long deadline, now; 2338 ata_reset_fn_t reset; 2339 unsigned long flags; 2340 u32 sstatus; 2341 int nr_unknown, rc; 2342 2343 /* 2344 * Prepare to reset 2345 */ 2346 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2347 max_tries++; 2348 if (link->flags & ATA_LFLAG_NO_HRST) 2349 hardreset = NULL; 2350 if (link->flags & ATA_LFLAG_NO_SRST) 2351 softreset = NULL; 2352 2353 /* make sure each reset attemp is at least COOL_DOWN apart */ 2354 if (ehc->i.flags & ATA_EHI_DID_RESET) { 2355 now = jiffies; 2356 WARN_ON(time_after(ehc->last_reset, now)); 2357 deadline = ata_deadline(ehc->last_reset, 2358 ATA_EH_RESET_COOL_DOWN); 2359 if (time_before(now, deadline)) 2360 schedule_timeout_uninterruptible(deadline - now); 2361 } 2362 2363 spin_lock_irqsave(ap->lock, flags); 2364 ap->pflags |= ATA_PFLAG_RESETTING; 2365 spin_unlock_irqrestore(ap->lock, flags); 2366 2367 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2368 2369 ata_for_each_dev(dev, link, ALL) { 2370 /* If we issue an SRST then an ATA drive (not ATAPI) 2371 * may change configuration and be in PIO0 timing. If 2372 * we do a hard reset (or are coming from power on) 2373 * this is true for ATA or ATAPI. Until we've set a 2374 * suitable controller mode we should not touch the 2375 * bus as we may be talking too fast. 2376 */ 2377 dev->pio_mode = XFER_PIO_0; 2378 2379 /* If the controller has a pio mode setup function 2380 * then use it to set the chipset to rights. Don't 2381 * touch the DMA setup as that will be dealt with when 2382 * configuring devices. 2383 */ 2384 if (ap->ops->set_piomode) 2385 ap->ops->set_piomode(ap, dev); 2386 } 2387 2388 /* prefer hardreset */ 2389 reset = NULL; 2390 ehc->i.action &= ~ATA_EH_RESET; 2391 if (hardreset) { 2392 reset = hardreset; 2393 ehc->i.action |= ATA_EH_HARDRESET; 2394 } else if (softreset) { 2395 reset = softreset; 2396 ehc->i.action |= ATA_EH_SOFTRESET; 2397 } 2398 2399 if (prereset) { 2400 unsigned long deadline = ata_deadline(jiffies, 2401 ATA_EH_PRERESET_TIMEOUT); 2402 2403 if (slave) { 2404 sehc->i.action &= ~ATA_EH_RESET; 2405 sehc->i.action |= ehc->i.action; 2406 } 2407 2408 rc = prereset(link, deadline); 2409 2410 /* If present, do prereset on slave link too. Reset 2411 * is skipped iff both master and slave links report 2412 * -ENOENT or clear ATA_EH_RESET. 2413 */ 2414 if (slave && (rc == 0 || rc == -ENOENT)) { 2415 int tmp; 2416 2417 tmp = prereset(slave, deadline); 2418 if (tmp != -ENOENT) 2419 rc = tmp; 2420 2421 ehc->i.action |= sehc->i.action; 2422 } 2423 2424 if (rc) { 2425 if (rc == -ENOENT) { 2426 ata_link_printk(link, KERN_DEBUG, 2427 "port disabled. ignoring.\n"); 2428 ehc->i.action &= ~ATA_EH_RESET; 2429 2430 ata_for_each_dev(dev, link, ALL) 2431 classes[dev->devno] = ATA_DEV_NONE; 2432 2433 rc = 0; 2434 } else 2435 ata_link_printk(link, KERN_ERR, 2436 "prereset failed (errno=%d)\n", rc); 2437 goto out; 2438 } 2439 2440 /* prereset() might have cleared ATA_EH_RESET. If so, 2441 * bang classes, thaw and return. 2442 */ 2443 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2444 ata_for_each_dev(dev, link, ALL) 2445 classes[dev->devno] = ATA_DEV_NONE; 2446 if ((ap->pflags & ATA_PFLAG_FROZEN) && 2447 ata_is_host_link(link)) 2448 ata_eh_thaw_port(ap); 2449 rc = 0; 2450 goto out; 2451 } 2452 } 2453 2454 retry: 2455 /* 2456 * Perform reset 2457 */ 2458 if (ata_is_host_link(link)) 2459 ata_eh_freeze_port(ap); 2460 2461 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2462 2463 if (reset) { 2464 if (verbose) 2465 ata_link_printk(link, KERN_INFO, "%s resetting link\n", 2466 reset == softreset ? "soft" : "hard"); 2467 2468 /* mark that this EH session started with reset */ 2469 ehc->last_reset = jiffies; 2470 if (reset == hardreset) 2471 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2472 else 2473 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2474 2475 rc = ata_do_reset(link, reset, classes, deadline, true); 2476 if (rc && rc != -EAGAIN) { 2477 failed_link = link; 2478 goto fail; 2479 } 2480 2481 /* hardreset slave link if existent */ 2482 if (slave && reset == hardreset) { 2483 int tmp; 2484 2485 if (verbose) 2486 ata_link_printk(slave, KERN_INFO, 2487 "hard resetting link\n"); 2488 2489 ata_eh_about_to_do(slave, NULL, ATA_EH_RESET); 2490 tmp = ata_do_reset(slave, reset, classes, deadline, 2491 false); 2492 switch (tmp) { 2493 case -EAGAIN: 2494 rc = -EAGAIN; 2495 case 0: 2496 break; 2497 default: 2498 failed_link = slave; 2499 rc = tmp; 2500 goto fail; 2501 } 2502 } 2503 2504 /* perform follow-up SRST if necessary */ 2505 if (reset == hardreset && 2506 ata_eh_followup_srst_needed(link, rc, classes)) { 2507 reset = softreset; 2508 2509 if (!reset) { 2510 ata_link_printk(link, KERN_ERR, 2511 "follow-up softreset required " 2512 "but no softreset avaliable\n"); 2513 failed_link = link; 2514 rc = -EINVAL; 2515 goto fail; 2516 } 2517 2518 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2519 rc = ata_do_reset(link, reset, classes, deadline, true); 2520 } 2521 } else { 2522 if (verbose) 2523 ata_link_printk(link, KERN_INFO, "no reset method " 2524 "available, skipping reset\n"); 2525 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2526 lflags |= ATA_LFLAG_ASSUME_ATA; 2527 } 2528 2529 /* 2530 * Post-reset processing 2531 */ 2532 ata_for_each_dev(dev, link, ALL) { 2533 /* After the reset, the device state is PIO 0 and the 2534 * controller state is undefined. Reset also wakes up 2535 * drives from sleeping mode. 2536 */ 2537 dev->pio_mode = XFER_PIO_0; 2538 dev->flags &= ~ATA_DFLAG_SLEEPING; 2539 2540 if (!ata_phys_link_offline(ata_dev_phys_link(dev))) { 2541 /* apply class override */ 2542 if (lflags & ATA_LFLAG_ASSUME_ATA) 2543 classes[dev->devno] = ATA_DEV_ATA; 2544 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2545 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; 2546 } else 2547 classes[dev->devno] = ATA_DEV_NONE; 2548 } 2549 2550 /* record current link speed */ 2551 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2552 link->sata_spd = (sstatus >> 4) & 0xf; 2553 if (slave && sata_scr_read(slave, SCR_STATUS, &sstatus) == 0) 2554 slave->sata_spd = (sstatus >> 4) & 0xf; 2555 2556 /* thaw the port */ 2557 if (ata_is_host_link(link)) 2558 ata_eh_thaw_port(ap); 2559 2560 /* postreset() should clear hardware SError. Although SError 2561 * is cleared during link resume, clearing SError here is 2562 * necessary as some PHYs raise hotplug events after SRST. 2563 * This introduces race condition where hotplug occurs between 2564 * reset and here. This race is mediated by cross checking 2565 * link onlineness and classification result later. 2566 */ 2567 if (postreset) { 2568 postreset(link, classes); 2569 if (slave) 2570 postreset(slave, classes); 2571 } 2572 2573 /* clear cached SError */ 2574 spin_lock_irqsave(link->ap->lock, flags); 2575 link->eh_info.serror = 0; 2576 if (slave) 2577 slave->eh_info.serror = 0; 2578 spin_unlock_irqrestore(link->ap->lock, flags); 2579 2580 /* Make sure onlineness and classification result correspond. 2581 * Hotplug could have happened during reset and some 2582 * controllers fail to wait while a drive is spinning up after 2583 * being hotplugged causing misdetection. By cross checking 2584 * link onlineness and classification result, those conditions 2585 * can be reliably detected and retried. 2586 */ 2587 nr_unknown = 0; 2588 ata_for_each_dev(dev, link, ALL) { 2589 /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */ 2590 if (classes[dev->devno] == ATA_DEV_UNKNOWN) { 2591 classes[dev->devno] = ATA_DEV_NONE; 2592 if (ata_phys_link_online(ata_dev_phys_link(dev))) 2593 nr_unknown++; 2594 } 2595 } 2596 2597 if (classify && nr_unknown) { 2598 if (try < max_tries) { 2599 ata_link_printk(link, KERN_WARNING, "link online but " 2600 "device misclassified, retrying\n"); 2601 failed_link = link; 2602 rc = -EAGAIN; 2603 goto fail; 2604 } 2605 ata_link_printk(link, KERN_WARNING, 2606 "link online but device misclassified, " 2607 "device detection might fail\n"); 2608 } 2609 2610 /* reset successful, schedule revalidation */ 2611 ata_eh_done(link, NULL, ATA_EH_RESET); 2612 if (slave) 2613 ata_eh_done(slave, NULL, ATA_EH_RESET); 2614 ehc->last_reset = jiffies; /* update to completion time */ 2615 ehc->i.action |= ATA_EH_REVALIDATE; 2616 2617 rc = 0; 2618 out: 2619 /* clear hotplug flag */ 2620 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2621 if (slave) 2622 sehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2623 2624 spin_lock_irqsave(ap->lock, flags); 2625 ap->pflags &= ~ATA_PFLAG_RESETTING; 2626 spin_unlock_irqrestore(ap->lock, flags); 2627 2628 return rc; 2629 2630 fail: 2631 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2632 if (!ata_is_host_link(link) && 2633 sata_scr_read(link, SCR_STATUS, &sstatus)) 2634 rc = -ERESTART; 2635 2636 if (rc == -ERESTART || try >= max_tries) 2637 goto out; 2638 2639 now = jiffies; 2640 if (time_before(now, deadline)) { 2641 unsigned long delta = deadline - now; 2642 2643 ata_link_printk(failed_link, KERN_WARNING, 2644 "reset failed (errno=%d), retrying in %u secs\n", 2645 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2646 2647 while (delta) 2648 delta = schedule_timeout_uninterruptible(delta); 2649 } 2650 2651 if (try == max_tries - 1) { 2652 sata_down_spd_limit(link, 0); 2653 if (slave) 2654 sata_down_spd_limit(slave, 0); 2655 } else if (rc == -EPIPE) 2656 sata_down_spd_limit(failed_link, 0); 2657 2658 if (hardreset) 2659 reset = hardreset; 2660 goto retry; 2661 } 2662 2663 static inline void ata_eh_pull_park_action(struct ata_port *ap) 2664 { 2665 struct ata_link *link; 2666 struct ata_device *dev; 2667 unsigned long flags; 2668 2669 /* 2670 * This function can be thought of as an extended version of 2671 * ata_eh_about_to_do() specially crafted to accommodate the 2672 * requirements of ATA_EH_PARK handling. Since the EH thread 2673 * does not leave the do {} while () loop in ata_eh_recover as 2674 * long as the timeout for a park request to *one* device on 2675 * the port has not expired, and since we still want to pick 2676 * up park requests to other devices on the same port or 2677 * timeout updates for the same device, we have to pull 2678 * ATA_EH_PARK actions from eh_info into eh_context.i 2679 * ourselves at the beginning of each pass over the loop. 2680 * 2681 * Additionally, all write accesses to &ap->park_req_pending 2682 * through INIT_COMPLETION() (see below) or complete_all() 2683 * (see ata_scsi_park_store()) are protected by the host lock. 2684 * As a result we have that park_req_pending.done is zero on 2685 * exit from this function, i.e. when ATA_EH_PARK actions for 2686 * *all* devices on port ap have been pulled into the 2687 * respective eh_context structs. If, and only if, 2688 * park_req_pending.done is non-zero by the time we reach 2689 * wait_for_completion_timeout(), another ATA_EH_PARK action 2690 * has been scheduled for at least one of the devices on port 2691 * ap and we have to cycle over the do {} while () loop in 2692 * ata_eh_recover() again. 2693 */ 2694 2695 spin_lock_irqsave(ap->lock, flags); 2696 INIT_COMPLETION(ap->park_req_pending); 2697 ata_for_each_link(link, ap, EDGE) { 2698 ata_for_each_dev(dev, link, ALL) { 2699 struct ata_eh_info *ehi = &link->eh_info; 2700 2701 link->eh_context.i.dev_action[dev->devno] |= 2702 ehi->dev_action[dev->devno] & ATA_EH_PARK; 2703 ata_eh_clear_action(link, dev, ehi, ATA_EH_PARK); 2704 } 2705 } 2706 spin_unlock_irqrestore(ap->lock, flags); 2707 } 2708 2709 static void ata_eh_park_issue_cmd(struct ata_device *dev, int park) 2710 { 2711 struct ata_eh_context *ehc = &dev->link->eh_context; 2712 struct ata_taskfile tf; 2713 unsigned int err_mask; 2714 2715 ata_tf_init(dev, &tf); 2716 if (park) { 2717 ehc->unloaded_mask |= 1 << dev->devno; 2718 tf.command = ATA_CMD_IDLEIMMEDIATE; 2719 tf.feature = 0x44; 2720 tf.lbal = 0x4c; 2721 tf.lbam = 0x4e; 2722 tf.lbah = 0x55; 2723 } else { 2724 ehc->unloaded_mask &= ~(1 << dev->devno); 2725 tf.command = ATA_CMD_CHK_POWER; 2726 } 2727 2728 tf.flags |= ATA_TFLAG_DEVICE | ATA_TFLAG_ISADDR; 2729 tf.protocol |= ATA_PROT_NODATA; 2730 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_NONE, NULL, 0, 0); 2731 if (park && (err_mask || tf.lbal != 0xc4)) { 2732 ata_dev_printk(dev, KERN_ERR, "head unload failed!\n"); 2733 ehc->unloaded_mask &= ~(1 << dev->devno); 2734 } 2735 } 2736 2737 static int ata_eh_revalidate_and_attach(struct ata_link *link, 2738 struct ata_device **r_failed_dev) 2739 { 2740 struct ata_port *ap = link->ap; 2741 struct ata_eh_context *ehc = &link->eh_context; 2742 struct ata_device *dev; 2743 unsigned int new_mask = 0; 2744 unsigned long flags; 2745 int rc = 0; 2746 2747 DPRINTK("ENTER\n"); 2748 2749 /* For PATA drive side cable detection to work, IDENTIFY must 2750 * be done backwards such that PDIAG- is released by the slave 2751 * device before the master device is identified. 2752 */ 2753 ata_for_each_dev(dev, link, ALL_REVERSE) { 2754 unsigned int action = ata_eh_dev_action(dev); 2755 unsigned int readid_flags = 0; 2756 2757 if (ehc->i.flags & ATA_EHI_DID_RESET) 2758 readid_flags |= ATA_READID_POSTRESET; 2759 2760 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 2761 WARN_ON(dev->class == ATA_DEV_PMP); 2762 2763 if (ata_phys_link_offline(ata_dev_phys_link(dev))) { 2764 rc = -EIO; 2765 goto err; 2766 } 2767 2768 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 2769 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 2770 readid_flags); 2771 if (rc) 2772 goto err; 2773 2774 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 2775 2776 /* Configuration may have changed, reconfigure 2777 * transfer mode. 2778 */ 2779 ehc->i.flags |= ATA_EHI_SETMODE; 2780 2781 /* schedule the scsi_rescan_device() here */ 2782 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 2783 } else if (dev->class == ATA_DEV_UNKNOWN && 2784 ehc->tries[dev->devno] && 2785 ata_class_enabled(ehc->classes[dev->devno])) { 2786 dev->class = ehc->classes[dev->devno]; 2787 2788 if (dev->class == ATA_DEV_PMP) 2789 rc = sata_pmp_attach(dev); 2790 else 2791 rc = ata_dev_read_id(dev, &dev->class, 2792 readid_flags, dev->id); 2793 switch (rc) { 2794 case 0: 2795 /* clear error info accumulated during probe */ 2796 ata_ering_clear(&dev->ering); 2797 new_mask |= 1 << dev->devno; 2798 break; 2799 case -ENOENT: 2800 /* IDENTIFY was issued to non-existent 2801 * device. No need to reset. Just 2802 * thaw and kill the device. 2803 */ 2804 ata_eh_thaw_port(ap); 2805 dev->class = ATA_DEV_UNKNOWN; 2806 break; 2807 default: 2808 dev->class = ATA_DEV_UNKNOWN; 2809 goto err; 2810 } 2811 } 2812 } 2813 2814 /* PDIAG- should have been released, ask cable type if post-reset */ 2815 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 2816 if (ap->ops->cable_detect) 2817 ap->cbl = ap->ops->cable_detect(ap); 2818 ata_force_cbl(ap); 2819 } 2820 2821 /* Configure new devices forward such that user doesn't see 2822 * device detection messages backwards. 2823 */ 2824 ata_for_each_dev(dev, link, ALL) { 2825 if (!(new_mask & (1 << dev->devno)) || 2826 dev->class == ATA_DEV_PMP) 2827 continue; 2828 2829 ehc->i.flags |= ATA_EHI_PRINTINFO; 2830 rc = ata_dev_configure(dev); 2831 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 2832 if (rc) 2833 goto err; 2834 2835 spin_lock_irqsave(ap->lock, flags); 2836 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 2837 spin_unlock_irqrestore(ap->lock, flags); 2838 2839 /* new device discovered, configure xfermode */ 2840 ehc->i.flags |= ATA_EHI_SETMODE; 2841 } 2842 2843 return 0; 2844 2845 err: 2846 *r_failed_dev = dev; 2847 DPRINTK("EXIT rc=%d\n", rc); 2848 return rc; 2849 } 2850 2851 /** 2852 * ata_set_mode - Program timings and issue SET FEATURES - XFER 2853 * @link: link on which timings will be programmed 2854 * @r_failed_dev: out paramter for failed device 2855 * 2856 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 2857 * ata_set_mode() fails, pointer to the failing device is 2858 * returned in @r_failed_dev. 2859 * 2860 * LOCKING: 2861 * PCI/etc. bus probe sem. 2862 * 2863 * RETURNS: 2864 * 0 on success, negative errno otherwise 2865 */ 2866 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 2867 { 2868 struct ata_port *ap = link->ap; 2869 struct ata_device *dev; 2870 int rc; 2871 2872 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 2873 ata_for_each_dev(dev, link, ENABLED) { 2874 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 2875 struct ata_ering_entry *ent; 2876 2877 ent = ata_ering_top(&dev->ering); 2878 if (ent) 2879 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 2880 } 2881 } 2882 2883 /* has private set_mode? */ 2884 if (ap->ops->set_mode) 2885 rc = ap->ops->set_mode(link, r_failed_dev); 2886 else 2887 rc = ata_do_set_mode(link, r_failed_dev); 2888 2889 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 2890 ata_for_each_dev(dev, link, ENABLED) { 2891 struct ata_eh_context *ehc = &link->eh_context; 2892 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 2893 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 2894 2895 if (dev->xfer_mode != saved_xfer_mode || 2896 ata_ncq_enabled(dev) != saved_ncq) 2897 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 2898 } 2899 2900 return rc; 2901 } 2902 2903 /** 2904 * atapi_eh_clear_ua - Clear ATAPI UNIT ATTENTION after reset 2905 * @dev: ATAPI device to clear UA for 2906 * 2907 * Resets and other operations can make an ATAPI device raise 2908 * UNIT ATTENTION which causes the next operation to fail. This 2909 * function clears UA. 2910 * 2911 * LOCKING: 2912 * EH context (may sleep). 2913 * 2914 * RETURNS: 2915 * 0 on success, -errno on failure. 2916 */ 2917 static int atapi_eh_clear_ua(struct ata_device *dev) 2918 { 2919 int i; 2920 2921 for (i = 0; i < ATA_EH_UA_TRIES; i++) { 2922 u8 *sense_buffer = dev->link->ap->sector_buf; 2923 u8 sense_key = 0; 2924 unsigned int err_mask; 2925 2926 err_mask = atapi_eh_tur(dev, &sense_key); 2927 if (err_mask != 0 && err_mask != AC_ERR_DEV) { 2928 ata_dev_printk(dev, KERN_WARNING, "TEST_UNIT_READY " 2929 "failed (err_mask=0x%x)\n", err_mask); 2930 return -EIO; 2931 } 2932 2933 if (!err_mask || sense_key != UNIT_ATTENTION) 2934 return 0; 2935 2936 err_mask = atapi_eh_request_sense(dev, sense_buffer, sense_key); 2937 if (err_mask) { 2938 ata_dev_printk(dev, KERN_WARNING, "failed to clear " 2939 "UNIT ATTENTION (err_mask=0x%x)\n", err_mask); 2940 return -EIO; 2941 } 2942 } 2943 2944 ata_dev_printk(dev, KERN_WARNING, 2945 "UNIT ATTENTION persists after %d tries\n", ATA_EH_UA_TRIES); 2946 2947 return 0; 2948 } 2949 2950 static int ata_link_nr_enabled(struct ata_link *link) 2951 { 2952 struct ata_device *dev; 2953 int cnt = 0; 2954 2955 ata_for_each_dev(dev, link, ENABLED) 2956 cnt++; 2957 return cnt; 2958 } 2959 2960 static int ata_link_nr_vacant(struct ata_link *link) 2961 { 2962 struct ata_device *dev; 2963 int cnt = 0; 2964 2965 ata_for_each_dev(dev, link, ALL) 2966 if (dev->class == ATA_DEV_UNKNOWN) 2967 cnt++; 2968 return cnt; 2969 } 2970 2971 static int ata_eh_skip_recovery(struct ata_link *link) 2972 { 2973 struct ata_port *ap = link->ap; 2974 struct ata_eh_context *ehc = &link->eh_context; 2975 struct ata_device *dev; 2976 2977 /* skip disabled links */ 2978 if (link->flags & ATA_LFLAG_DISABLED) 2979 return 1; 2980 2981 /* thaw frozen port and recover failed devices */ 2982 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 2983 return 0; 2984 2985 /* reset at least once if reset is requested */ 2986 if ((ehc->i.action & ATA_EH_RESET) && 2987 !(ehc->i.flags & ATA_EHI_DID_RESET)) 2988 return 0; 2989 2990 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2991 ata_for_each_dev(dev, link, ALL) { 2992 if (dev->class == ATA_DEV_UNKNOWN && 2993 ehc->classes[dev->devno] != ATA_DEV_NONE) 2994 return 0; 2995 } 2996 2997 return 1; 2998 } 2999 3000 static int ata_count_probe_trials_cb(struct ata_ering_entry *ent, void *void_arg) 3001 { 3002 u64 interval = msecs_to_jiffies(ATA_EH_PROBE_TRIAL_INTERVAL); 3003 u64 now = get_jiffies_64(); 3004 int *trials = void_arg; 3005 3006 if (ent->timestamp < now - min(now, interval)) 3007 return -1; 3008 3009 (*trials)++; 3010 return 0; 3011 } 3012 3013 static int ata_eh_schedule_probe(struct ata_device *dev) 3014 { 3015 struct ata_eh_context *ehc = &dev->link->eh_context; 3016 struct ata_link *link = ata_dev_phys_link(dev); 3017 int trials = 0; 3018 3019 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 3020 (ehc->did_probe_mask & (1 << dev->devno))) 3021 return 0; 3022 3023 ata_eh_detach_dev(dev); 3024 ata_dev_init(dev); 3025 ehc->did_probe_mask |= (1 << dev->devno); 3026 ehc->i.action |= ATA_EH_RESET; 3027 ehc->saved_xfer_mode[dev->devno] = 0; 3028 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 3029 3030 /* Record and count probe trials on the ering. The specific 3031 * error mask used is irrelevant. Because a successful device 3032 * detection clears the ering, this count accumulates only if 3033 * there are consecutive failed probes. 3034 * 3035 * If the count is equal to or higher than ATA_EH_PROBE_TRIALS 3036 * in the last ATA_EH_PROBE_TRIAL_INTERVAL, link speed is 3037 * forced to 1.5Gbps. 3038 * 3039 * This is to work around cases where failed link speed 3040 * negotiation results in device misdetection leading to 3041 * infinite DEVXCHG or PHRDY CHG events. 3042 */ 3043 ata_ering_record(&dev->ering, 0, AC_ERR_OTHER); 3044 ata_ering_map(&dev->ering, ata_count_probe_trials_cb, &trials); 3045 3046 if (trials > ATA_EH_PROBE_TRIALS) 3047 sata_down_spd_limit(link, 1); 3048 3049 return 1; 3050 } 3051 3052 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 3053 { 3054 struct ata_eh_context *ehc = &dev->link->eh_context; 3055 3056 /* -EAGAIN from EH routine indicates retry without prejudice. 3057 * The requester is responsible for ensuring forward progress. 3058 */ 3059 if (err != -EAGAIN) 3060 ehc->tries[dev->devno]--; 3061 3062 switch (err) { 3063 case -ENODEV: 3064 /* device missing or wrong IDENTIFY data, schedule probing */ 3065 ehc->i.probe_mask |= (1 << dev->devno); 3066 case -EINVAL: 3067 /* give it just one more chance */ 3068 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 3069 case -EIO: 3070 if (ehc->tries[dev->devno] == 1) { 3071 /* This is the last chance, better to slow 3072 * down than lose it. 3073 */ 3074 sata_down_spd_limit(ata_dev_phys_link(dev), 0); 3075 if (dev->pio_mode > XFER_PIO_0) 3076 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 3077 } 3078 } 3079 3080 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 3081 /* disable device if it has used up all its chances */ 3082 ata_dev_disable(dev); 3083 3084 /* detach if offline */ 3085 if (ata_phys_link_offline(ata_dev_phys_link(dev))) 3086 ata_eh_detach_dev(dev); 3087 3088 /* schedule probe if necessary */ 3089 if (ata_eh_schedule_probe(dev)) { 3090 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3091 memset(ehc->cmd_timeout_idx[dev->devno], 0, 3092 sizeof(ehc->cmd_timeout_idx[dev->devno])); 3093 } 3094 3095 return 1; 3096 } else { 3097 ehc->i.action |= ATA_EH_RESET; 3098 return 0; 3099 } 3100 } 3101 3102 /** 3103 * ata_eh_recover - recover host port after error 3104 * @ap: host port to recover 3105 * @prereset: prereset method (can be NULL) 3106 * @softreset: softreset method (can be NULL) 3107 * @hardreset: hardreset method (can be NULL) 3108 * @postreset: postreset method (can be NULL) 3109 * @r_failed_link: out parameter for failed link 3110 * 3111 * This is the alpha and omega, eum and yang, heart and soul of 3112 * libata exception handling. On entry, actions required to 3113 * recover each link and hotplug requests are recorded in the 3114 * link's eh_context. This function executes all the operations 3115 * with appropriate retrials and fallbacks to resurrect failed 3116 * devices, detach goners and greet newcomers. 3117 * 3118 * LOCKING: 3119 * Kernel thread context (may sleep). 3120 * 3121 * RETURNS: 3122 * 0 on success, -errno on failure. 3123 */ 3124 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 3125 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3126 ata_postreset_fn_t postreset, 3127 struct ata_link **r_failed_link) 3128 { 3129 struct ata_link *link; 3130 struct ata_device *dev; 3131 int nr_failed_devs; 3132 int rc; 3133 unsigned long flags, deadline; 3134 3135 DPRINTK("ENTER\n"); 3136 3137 /* prep for recovery */ 3138 ata_for_each_link(link, ap, EDGE) { 3139 struct ata_eh_context *ehc = &link->eh_context; 3140 3141 /* re-enable link? */ 3142 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 3143 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 3144 spin_lock_irqsave(ap->lock, flags); 3145 link->flags &= ~ATA_LFLAG_DISABLED; 3146 spin_unlock_irqrestore(ap->lock, flags); 3147 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 3148 } 3149 3150 ata_for_each_dev(dev, link, ALL) { 3151 if (link->flags & ATA_LFLAG_NO_RETRY) 3152 ehc->tries[dev->devno] = 1; 3153 else 3154 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 3155 3156 /* collect port action mask recorded in dev actions */ 3157 ehc->i.action |= ehc->i.dev_action[dev->devno] & 3158 ~ATA_EH_PERDEV_MASK; 3159 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 3160 3161 /* process hotplug request */ 3162 if (dev->flags & ATA_DFLAG_DETACH) 3163 ata_eh_detach_dev(dev); 3164 3165 /* schedule probe if necessary */ 3166 if (!ata_dev_enabled(dev)) 3167 ata_eh_schedule_probe(dev); 3168 } 3169 } 3170 3171 retry: 3172 rc = 0; 3173 nr_failed_devs = 0; 3174 3175 /* if UNLOADING, finish immediately */ 3176 if (ap->pflags & ATA_PFLAG_UNLOADING) 3177 goto out; 3178 3179 /* prep for EH */ 3180 ata_for_each_link(link, ap, EDGE) { 3181 struct ata_eh_context *ehc = &link->eh_context; 3182 3183 /* skip EH if possible. */ 3184 if (ata_eh_skip_recovery(link)) 3185 ehc->i.action = 0; 3186 3187 ata_for_each_dev(dev, link, ALL) 3188 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 3189 } 3190 3191 /* reset */ 3192 ata_for_each_link(link, ap, EDGE) { 3193 struct ata_eh_context *ehc = &link->eh_context; 3194 3195 if (!(ehc->i.action & ATA_EH_RESET)) 3196 continue; 3197 3198 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 3199 prereset, softreset, hardreset, postreset); 3200 if (rc) { 3201 ata_link_printk(link, KERN_ERR, 3202 "reset failed, giving up\n"); 3203 goto out; 3204 } 3205 } 3206 3207 do { 3208 unsigned long now; 3209 3210 /* 3211 * clears ATA_EH_PARK in eh_info and resets 3212 * ap->park_req_pending 3213 */ 3214 ata_eh_pull_park_action(ap); 3215 3216 deadline = jiffies; 3217 ata_for_each_link(link, ap, EDGE) { 3218 ata_for_each_dev(dev, link, ALL) { 3219 struct ata_eh_context *ehc = &link->eh_context; 3220 unsigned long tmp; 3221 3222 if (dev->class != ATA_DEV_ATA) 3223 continue; 3224 if (!(ehc->i.dev_action[dev->devno] & 3225 ATA_EH_PARK)) 3226 continue; 3227 tmp = dev->unpark_deadline; 3228 if (time_before(deadline, tmp)) 3229 deadline = tmp; 3230 else if (time_before_eq(tmp, jiffies)) 3231 continue; 3232 if (ehc->unloaded_mask & (1 << dev->devno)) 3233 continue; 3234 3235 ata_eh_park_issue_cmd(dev, 1); 3236 } 3237 } 3238 3239 now = jiffies; 3240 if (time_before_eq(deadline, now)) 3241 break; 3242 3243 deadline = wait_for_completion_timeout(&ap->park_req_pending, 3244 deadline - now); 3245 } while (deadline); 3246 ata_for_each_link(link, ap, EDGE) { 3247 ata_for_each_dev(dev, link, ALL) { 3248 if (!(link->eh_context.unloaded_mask & 3249 (1 << dev->devno))) 3250 continue; 3251 3252 ata_eh_park_issue_cmd(dev, 0); 3253 ata_eh_done(link, dev, ATA_EH_PARK); 3254 } 3255 } 3256 3257 /* the rest */ 3258 ata_for_each_link(link, ap, EDGE) { 3259 struct ata_eh_context *ehc = &link->eh_context; 3260 3261 /* revalidate existing devices and attach new ones */ 3262 rc = ata_eh_revalidate_and_attach(link, &dev); 3263 if (rc) 3264 goto dev_fail; 3265 3266 /* if PMP got attached, return, pmp EH will take care of it */ 3267 if (link->device->class == ATA_DEV_PMP) { 3268 ehc->i.action = 0; 3269 return 0; 3270 } 3271 3272 /* configure transfer mode if necessary */ 3273 if (ehc->i.flags & ATA_EHI_SETMODE) { 3274 rc = ata_set_mode(link, &dev); 3275 if (rc) 3276 goto dev_fail; 3277 ehc->i.flags &= ~ATA_EHI_SETMODE; 3278 } 3279 3280 /* If reset has been issued, clear UA to avoid 3281 * disrupting the current users of the device. 3282 */ 3283 if (ehc->i.flags & ATA_EHI_DID_RESET) { 3284 ata_for_each_dev(dev, link, ALL) { 3285 if (dev->class != ATA_DEV_ATAPI) 3286 continue; 3287 rc = atapi_eh_clear_ua(dev); 3288 if (rc) 3289 goto dev_fail; 3290 } 3291 } 3292 3293 /* configure link power saving */ 3294 if (ehc->i.action & ATA_EH_LPM) 3295 ata_for_each_dev(dev, link, ALL) 3296 ata_dev_enable_pm(dev, ap->pm_policy); 3297 3298 /* this link is okay now */ 3299 ehc->i.flags = 0; 3300 continue; 3301 3302 dev_fail: 3303 nr_failed_devs++; 3304 ata_eh_handle_dev_fail(dev, rc); 3305 3306 if (ap->pflags & ATA_PFLAG_FROZEN) { 3307 /* PMP reset requires working host port. 3308 * Can't retry if it's frozen. 3309 */ 3310 if (sata_pmp_attached(ap)) 3311 goto out; 3312 break; 3313 } 3314 } 3315 3316 if (nr_failed_devs) 3317 goto retry; 3318 3319 out: 3320 if (rc && r_failed_link) 3321 *r_failed_link = link; 3322 3323 DPRINTK("EXIT, rc=%d\n", rc); 3324 return rc; 3325 } 3326 3327 /** 3328 * ata_eh_finish - finish up EH 3329 * @ap: host port to finish EH for 3330 * 3331 * Recovery is complete. Clean up EH states and retry or finish 3332 * failed qcs. 3333 * 3334 * LOCKING: 3335 * None. 3336 */ 3337 void ata_eh_finish(struct ata_port *ap) 3338 { 3339 int tag; 3340 3341 /* retry or finish qcs */ 3342 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 3343 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 3344 3345 if (!(qc->flags & ATA_QCFLAG_FAILED)) 3346 continue; 3347 3348 if (qc->err_mask) { 3349 /* FIXME: Once EH migration is complete, 3350 * generate sense data in this function, 3351 * considering both err_mask and tf. 3352 */ 3353 if (qc->flags & ATA_QCFLAG_RETRY) 3354 ata_eh_qc_retry(qc); 3355 else 3356 ata_eh_qc_complete(qc); 3357 } else { 3358 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 3359 ata_eh_qc_complete(qc); 3360 } else { 3361 /* feed zero TF to sense generation */ 3362 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 3363 ata_eh_qc_retry(qc); 3364 } 3365 } 3366 } 3367 3368 /* make sure nr_active_links is zero after EH */ 3369 WARN_ON(ap->nr_active_links); 3370 ap->nr_active_links = 0; 3371 } 3372 3373 /** 3374 * ata_do_eh - do standard error handling 3375 * @ap: host port to handle error for 3376 * 3377 * @prereset: prereset method (can be NULL) 3378 * @softreset: softreset method (can be NULL) 3379 * @hardreset: hardreset method (can be NULL) 3380 * @postreset: postreset method (can be NULL) 3381 * 3382 * Perform standard error handling sequence. 3383 * 3384 * LOCKING: 3385 * Kernel thread context (may sleep). 3386 */ 3387 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 3388 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 3389 ata_postreset_fn_t postreset) 3390 { 3391 struct ata_device *dev; 3392 int rc; 3393 3394 ata_eh_autopsy(ap); 3395 ata_eh_report(ap); 3396 3397 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 3398 NULL); 3399 if (rc) { 3400 ata_for_each_dev(dev, &ap->link, ALL) 3401 ata_dev_disable(dev); 3402 } 3403 3404 ata_eh_finish(ap); 3405 } 3406 3407 /** 3408 * ata_std_error_handler - standard error handler 3409 * @ap: host port to handle error for 3410 * 3411 * Standard error handler 3412 * 3413 * LOCKING: 3414 * Kernel thread context (may sleep). 3415 */ 3416 void ata_std_error_handler(struct ata_port *ap) 3417 { 3418 struct ata_port_operations *ops = ap->ops; 3419 ata_reset_fn_t hardreset = ops->hardreset; 3420 3421 /* ignore built-in hardreset if SCR access is not available */ 3422 if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link)) 3423 hardreset = NULL; 3424 3425 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 3426 } 3427 3428 #ifdef CONFIG_PM 3429 /** 3430 * ata_eh_handle_port_suspend - perform port suspend operation 3431 * @ap: port to suspend 3432 * 3433 * Suspend @ap. 3434 * 3435 * LOCKING: 3436 * Kernel thread context (may sleep). 3437 */ 3438 static void ata_eh_handle_port_suspend(struct ata_port *ap) 3439 { 3440 unsigned long flags; 3441 int rc = 0; 3442 3443 /* are we suspending? */ 3444 spin_lock_irqsave(ap->lock, flags); 3445 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3446 ap->pm_mesg.event == PM_EVENT_ON) { 3447 spin_unlock_irqrestore(ap->lock, flags); 3448 return; 3449 } 3450 spin_unlock_irqrestore(ap->lock, flags); 3451 3452 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 3453 3454 /* tell ACPI we're suspending */ 3455 rc = ata_acpi_on_suspend(ap); 3456 if (rc) 3457 goto out; 3458 3459 /* suspend */ 3460 ata_eh_freeze_port(ap); 3461 3462 if (ap->ops->port_suspend) 3463 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 3464 3465 ata_acpi_set_state(ap, PMSG_SUSPEND); 3466 out: 3467 /* report result */ 3468 spin_lock_irqsave(ap->lock, flags); 3469 3470 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 3471 if (rc == 0) 3472 ap->pflags |= ATA_PFLAG_SUSPENDED; 3473 else if (ap->pflags & ATA_PFLAG_FROZEN) 3474 ata_port_schedule_eh(ap); 3475 3476 if (ap->pm_result) { 3477 *ap->pm_result = rc; 3478 ap->pm_result = NULL; 3479 } 3480 3481 spin_unlock_irqrestore(ap->lock, flags); 3482 3483 return; 3484 } 3485 3486 /** 3487 * ata_eh_handle_port_resume - perform port resume operation 3488 * @ap: port to resume 3489 * 3490 * Resume @ap. 3491 * 3492 * LOCKING: 3493 * Kernel thread context (may sleep). 3494 */ 3495 static void ata_eh_handle_port_resume(struct ata_port *ap) 3496 { 3497 unsigned long flags; 3498 int rc = 0; 3499 3500 /* are we resuming? */ 3501 spin_lock_irqsave(ap->lock, flags); 3502 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3503 ap->pm_mesg.event != PM_EVENT_ON) { 3504 spin_unlock_irqrestore(ap->lock, flags); 3505 return; 3506 } 3507 spin_unlock_irqrestore(ap->lock, flags); 3508 3509 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 3510 3511 ata_acpi_set_state(ap, PMSG_ON); 3512 3513 if (ap->ops->port_resume) 3514 rc = ap->ops->port_resume(ap); 3515 3516 /* tell ACPI that we're resuming */ 3517 ata_acpi_on_resume(ap); 3518 3519 /* report result */ 3520 spin_lock_irqsave(ap->lock, flags); 3521 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 3522 if (ap->pm_result) { 3523 *ap->pm_result = rc; 3524 ap->pm_result = NULL; 3525 } 3526 spin_unlock_irqrestore(ap->lock, flags); 3527 } 3528 #endif /* CONFIG_PM */ 3529