1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/pci.h> 37 #include <scsi/scsi.h> 38 #include <scsi/scsi_host.h> 39 #include <scsi/scsi_eh.h> 40 #include <scsi/scsi_device.h> 41 #include <scsi/scsi_cmnd.h> 42 #include "../scsi/scsi_transport_api.h" 43 44 #include <linux/libata.h> 45 46 #include "libata.h" 47 48 enum { 49 /* speed down verdicts */ 50 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 51 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 52 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 53 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 54 55 /* error flags */ 56 ATA_EFLAG_IS_IO = (1 << 0), 57 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 58 59 /* error categories */ 60 ATA_ECAT_NONE = 0, 61 ATA_ECAT_ATA_BUS = 1, 62 ATA_ECAT_TOUT_HSM = 2, 63 ATA_ECAT_UNK_DEV = 3, 64 ATA_ECAT_DUBIOUS_NONE = 4, 65 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 66 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 67 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 68 ATA_ECAT_NR = 8, 69 70 ATA_EH_CMD_DFL_TIMEOUT = 5000, 71 72 /* always put at least this amount of time between resets */ 73 ATA_EH_RESET_COOL_DOWN = 5000, 74 75 /* Waiting in ->prereset can never be reliable. It's 76 * sometimes nice to wait there but it can't be depended upon; 77 * otherwise, we wouldn't be resetting. Just give it enough 78 * time for most drives to spin up. 79 */ 80 ATA_EH_PRERESET_TIMEOUT = 10000, 81 ATA_EH_FASTDRAIN_INTERVAL = 3000, 82 }; 83 84 /* The following table determines how we sequence resets. Each entry 85 * represents timeout for that try. The first try can be soft or 86 * hardreset. All others are hardreset if available. In most cases 87 * the first reset w/ 10sec timeout should succeed. Following entries 88 * are mostly for error handling, hotplug and retarded devices. 89 */ 90 static const unsigned long ata_eh_reset_timeouts[] = { 91 10000, /* most drives spin up by 10sec */ 92 10000, /* > 99% working drives spin up before 20sec */ 93 35000, /* give > 30 secs of idleness for retarded devices */ 94 5000, /* and sweet one last chance */ 95 ULONG_MAX, /* > 1 min has elapsed, give up */ 96 }; 97 98 static const unsigned long ata_eh_identify_timeouts[] = { 99 5000, /* covers > 99% of successes and not too boring on failures */ 100 10000, /* combined time till here is enough even for media access */ 101 30000, /* for true idiots */ 102 ULONG_MAX, 103 }; 104 105 static const unsigned long ata_eh_other_timeouts[] = { 106 5000, /* same rationale as identify timeout */ 107 10000, /* ditto */ 108 /* but no merciful 30sec for other commands, it just isn't worth it */ 109 ULONG_MAX, 110 }; 111 112 struct ata_eh_cmd_timeout_ent { 113 const u8 *commands; 114 const unsigned long *timeouts; 115 }; 116 117 /* The following table determines timeouts to use for EH internal 118 * commands. Each table entry is a command class and matches the 119 * commands the entry applies to and the timeout table to use. 120 * 121 * On the retry after a command timed out, the next timeout value from 122 * the table is used. If the table doesn't contain further entries, 123 * the last value is used. 124 * 125 * ehc->cmd_timeout_idx keeps track of which timeout to use per 126 * command class, so if SET_FEATURES times out on the first try, the 127 * next try will use the second timeout value only for that class. 128 */ 129 #define CMDS(cmds...) (const u8 []){ cmds, 0 } 130 static const struct ata_eh_cmd_timeout_ent 131 ata_eh_cmd_timeout_table[ATA_EH_CMD_TIMEOUT_TABLE_SIZE] = { 132 { .commands = CMDS(ATA_CMD_ID_ATA, ATA_CMD_ID_ATAPI), 133 .timeouts = ata_eh_identify_timeouts, }, 134 { .commands = CMDS(ATA_CMD_READ_NATIVE_MAX, ATA_CMD_READ_NATIVE_MAX_EXT), 135 .timeouts = ata_eh_other_timeouts, }, 136 { .commands = CMDS(ATA_CMD_SET_MAX, ATA_CMD_SET_MAX_EXT), 137 .timeouts = ata_eh_other_timeouts, }, 138 { .commands = CMDS(ATA_CMD_SET_FEATURES), 139 .timeouts = ata_eh_other_timeouts, }, 140 { .commands = CMDS(ATA_CMD_INIT_DEV_PARAMS), 141 .timeouts = ata_eh_other_timeouts, }, 142 }; 143 #undef CMDS 144 145 static void __ata_port_freeze(struct ata_port *ap); 146 #ifdef CONFIG_PM 147 static void ata_eh_handle_port_suspend(struct ata_port *ap); 148 static void ata_eh_handle_port_resume(struct ata_port *ap); 149 #else /* CONFIG_PM */ 150 static void ata_eh_handle_port_suspend(struct ata_port *ap) 151 { } 152 153 static void ata_eh_handle_port_resume(struct ata_port *ap) 154 { } 155 #endif /* CONFIG_PM */ 156 157 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, 158 va_list args) 159 { 160 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 161 ATA_EH_DESC_LEN - ehi->desc_len, 162 fmt, args); 163 } 164 165 /** 166 * __ata_ehi_push_desc - push error description without adding separator 167 * @ehi: target EHI 168 * @fmt: printf format string 169 * 170 * Format string according to @fmt and append it to @ehi->desc. 171 * 172 * LOCKING: 173 * spin_lock_irqsave(host lock) 174 */ 175 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 176 { 177 va_list args; 178 179 va_start(args, fmt); 180 __ata_ehi_pushv_desc(ehi, fmt, args); 181 va_end(args); 182 } 183 184 /** 185 * ata_ehi_push_desc - push error description with separator 186 * @ehi: target EHI 187 * @fmt: printf format string 188 * 189 * Format string according to @fmt and append it to @ehi->desc. 190 * If @ehi->desc is not empty, ", " is added in-between. 191 * 192 * LOCKING: 193 * spin_lock_irqsave(host lock) 194 */ 195 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 196 { 197 va_list args; 198 199 if (ehi->desc_len) 200 __ata_ehi_push_desc(ehi, ", "); 201 202 va_start(args, fmt); 203 __ata_ehi_pushv_desc(ehi, fmt, args); 204 va_end(args); 205 } 206 207 /** 208 * ata_ehi_clear_desc - clean error description 209 * @ehi: target EHI 210 * 211 * Clear @ehi->desc. 212 * 213 * LOCKING: 214 * spin_lock_irqsave(host lock) 215 */ 216 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 217 { 218 ehi->desc[0] = '\0'; 219 ehi->desc_len = 0; 220 } 221 222 /** 223 * ata_port_desc - append port description 224 * @ap: target ATA port 225 * @fmt: printf format string 226 * 227 * Format string according to @fmt and append it to port 228 * description. If port description is not empty, " " is added 229 * in-between. This function is to be used while initializing 230 * ata_host. The description is printed on host registration. 231 * 232 * LOCKING: 233 * None. 234 */ 235 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 236 { 237 va_list args; 238 239 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 240 241 if (ap->link.eh_info.desc_len) 242 __ata_ehi_push_desc(&ap->link.eh_info, " "); 243 244 va_start(args, fmt); 245 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 246 va_end(args); 247 } 248 249 #ifdef CONFIG_PCI 250 251 /** 252 * ata_port_pbar_desc - append PCI BAR description 253 * @ap: target ATA port 254 * @bar: target PCI BAR 255 * @offset: offset into PCI BAR 256 * @name: name of the area 257 * 258 * If @offset is negative, this function formats a string which 259 * contains the name, address, size and type of the BAR and 260 * appends it to the port description. If @offset is zero or 261 * positive, only name and offsetted address is appended. 262 * 263 * LOCKING: 264 * None. 265 */ 266 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 267 const char *name) 268 { 269 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 270 char *type = ""; 271 unsigned long long start, len; 272 273 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 274 type = "m"; 275 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 276 type = "i"; 277 278 start = (unsigned long long)pci_resource_start(pdev, bar); 279 len = (unsigned long long)pci_resource_len(pdev, bar); 280 281 if (offset < 0) 282 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 283 else 284 ata_port_desc(ap, "%s 0x%llx", name, 285 start + (unsigned long long)offset); 286 } 287 288 #endif /* CONFIG_PCI */ 289 290 static int ata_lookup_timeout_table(u8 cmd) 291 { 292 int i; 293 294 for (i = 0; i < ATA_EH_CMD_TIMEOUT_TABLE_SIZE; i++) { 295 const u8 *cur; 296 297 for (cur = ata_eh_cmd_timeout_table[i].commands; *cur; cur++) 298 if (*cur == cmd) 299 return i; 300 } 301 302 return -1; 303 } 304 305 /** 306 * ata_internal_cmd_timeout - determine timeout for an internal command 307 * @dev: target device 308 * @cmd: internal command to be issued 309 * 310 * Determine timeout for internal command @cmd for @dev. 311 * 312 * LOCKING: 313 * EH context. 314 * 315 * RETURNS: 316 * Determined timeout. 317 */ 318 unsigned long ata_internal_cmd_timeout(struct ata_device *dev, u8 cmd) 319 { 320 struct ata_eh_context *ehc = &dev->link->eh_context; 321 int ent = ata_lookup_timeout_table(cmd); 322 int idx; 323 324 if (ent < 0) 325 return ATA_EH_CMD_DFL_TIMEOUT; 326 327 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 328 return ata_eh_cmd_timeout_table[ent].timeouts[idx]; 329 } 330 331 /** 332 * ata_internal_cmd_timed_out - notification for internal command timeout 333 * @dev: target device 334 * @cmd: internal command which timed out 335 * 336 * Notify EH that internal command @cmd for @dev timed out. This 337 * function should be called only for commands whose timeouts are 338 * determined using ata_internal_cmd_timeout(). 339 * 340 * LOCKING: 341 * EH context. 342 */ 343 void ata_internal_cmd_timed_out(struct ata_device *dev, u8 cmd) 344 { 345 struct ata_eh_context *ehc = &dev->link->eh_context; 346 int ent = ata_lookup_timeout_table(cmd); 347 int idx; 348 349 if (ent < 0) 350 return; 351 352 idx = ehc->cmd_timeout_idx[dev->devno][ent]; 353 if (ata_eh_cmd_timeout_table[ent].timeouts[idx + 1] != ULONG_MAX) 354 ehc->cmd_timeout_idx[dev->devno][ent]++; 355 } 356 357 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 358 unsigned int err_mask) 359 { 360 struct ata_ering_entry *ent; 361 362 WARN_ON(!err_mask); 363 364 ering->cursor++; 365 ering->cursor %= ATA_ERING_SIZE; 366 367 ent = &ering->ring[ering->cursor]; 368 ent->eflags = eflags; 369 ent->err_mask = err_mask; 370 ent->timestamp = get_jiffies_64(); 371 } 372 373 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 374 { 375 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 376 377 if (ent->err_mask) 378 return ent; 379 return NULL; 380 } 381 382 static void ata_ering_clear(struct ata_ering *ering) 383 { 384 memset(ering, 0, sizeof(*ering)); 385 } 386 387 static int ata_ering_map(struct ata_ering *ering, 388 int (*map_fn)(struct ata_ering_entry *, void *), 389 void *arg) 390 { 391 int idx, rc = 0; 392 struct ata_ering_entry *ent; 393 394 idx = ering->cursor; 395 do { 396 ent = &ering->ring[idx]; 397 if (!ent->err_mask) 398 break; 399 rc = map_fn(ent, arg); 400 if (rc) 401 break; 402 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 403 } while (idx != ering->cursor); 404 405 return rc; 406 } 407 408 static unsigned int ata_eh_dev_action(struct ata_device *dev) 409 { 410 struct ata_eh_context *ehc = &dev->link->eh_context; 411 412 return ehc->i.action | ehc->i.dev_action[dev->devno]; 413 } 414 415 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 416 struct ata_eh_info *ehi, unsigned int action) 417 { 418 struct ata_device *tdev; 419 420 if (!dev) { 421 ehi->action &= ~action; 422 ata_link_for_each_dev(tdev, link) 423 ehi->dev_action[tdev->devno] &= ~action; 424 } else { 425 /* doesn't make sense for port-wide EH actions */ 426 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 427 428 /* break ehi->action into ehi->dev_action */ 429 if (ehi->action & action) { 430 ata_link_for_each_dev(tdev, link) 431 ehi->dev_action[tdev->devno] |= 432 ehi->action & action; 433 ehi->action &= ~action; 434 } 435 436 /* turn off the specified per-dev action */ 437 ehi->dev_action[dev->devno] &= ~action; 438 } 439 } 440 441 /** 442 * ata_scsi_timed_out - SCSI layer time out callback 443 * @cmd: timed out SCSI command 444 * 445 * Handles SCSI layer timeout. We race with normal completion of 446 * the qc for @cmd. If the qc is already gone, we lose and let 447 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 448 * timed out and EH should be invoked. Prevent ata_qc_complete() 449 * from finishing it by setting EH_SCHEDULED and return 450 * EH_NOT_HANDLED. 451 * 452 * TODO: kill this function once old EH is gone. 453 * 454 * LOCKING: 455 * Called from timer context 456 * 457 * RETURNS: 458 * EH_HANDLED or EH_NOT_HANDLED 459 */ 460 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 461 { 462 struct Scsi_Host *host = cmd->device->host; 463 struct ata_port *ap = ata_shost_to_port(host); 464 unsigned long flags; 465 struct ata_queued_cmd *qc; 466 enum scsi_eh_timer_return ret; 467 468 DPRINTK("ENTER\n"); 469 470 if (ap->ops->error_handler) { 471 ret = EH_NOT_HANDLED; 472 goto out; 473 } 474 475 ret = EH_HANDLED; 476 spin_lock_irqsave(ap->lock, flags); 477 qc = ata_qc_from_tag(ap, ap->link.active_tag); 478 if (qc) { 479 WARN_ON(qc->scsicmd != cmd); 480 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 481 qc->err_mask |= AC_ERR_TIMEOUT; 482 ret = EH_NOT_HANDLED; 483 } 484 spin_unlock_irqrestore(ap->lock, flags); 485 486 out: 487 DPRINTK("EXIT, ret=%d\n", ret); 488 return ret; 489 } 490 491 /** 492 * ata_scsi_error - SCSI layer error handler callback 493 * @host: SCSI host on which error occurred 494 * 495 * Handles SCSI-layer-thrown error events. 496 * 497 * LOCKING: 498 * Inherited from SCSI layer (none, can sleep) 499 * 500 * RETURNS: 501 * Zero. 502 */ 503 void ata_scsi_error(struct Scsi_Host *host) 504 { 505 struct ata_port *ap = ata_shost_to_port(host); 506 int i; 507 unsigned long flags; 508 509 DPRINTK("ENTER\n"); 510 511 /* synchronize with port task */ 512 ata_port_flush_task(ap); 513 514 /* synchronize with host lock and sort out timeouts */ 515 516 /* For new EH, all qcs are finished in one of three ways - 517 * normal completion, error completion, and SCSI timeout. 518 * Both cmpletions can race against SCSI timeout. When normal 519 * completion wins, the qc never reaches EH. When error 520 * completion wins, the qc has ATA_QCFLAG_FAILED set. 521 * 522 * When SCSI timeout wins, things are a bit more complex. 523 * Normal or error completion can occur after the timeout but 524 * before this point. In such cases, both types of 525 * completions are honored. A scmd is determined to have 526 * timed out iff its associated qc is active and not failed. 527 */ 528 if (ap->ops->error_handler) { 529 struct scsi_cmnd *scmd, *tmp; 530 int nr_timedout = 0; 531 532 spin_lock_irqsave(ap->lock, flags); 533 534 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 535 struct ata_queued_cmd *qc; 536 537 for (i = 0; i < ATA_MAX_QUEUE; i++) { 538 qc = __ata_qc_from_tag(ap, i); 539 if (qc->flags & ATA_QCFLAG_ACTIVE && 540 qc->scsicmd == scmd) 541 break; 542 } 543 544 if (i < ATA_MAX_QUEUE) { 545 /* the scmd has an associated qc */ 546 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 547 /* which hasn't failed yet, timeout */ 548 qc->err_mask |= AC_ERR_TIMEOUT; 549 qc->flags |= ATA_QCFLAG_FAILED; 550 nr_timedout++; 551 } 552 } else { 553 /* Normal completion occurred after 554 * SCSI timeout but before this point. 555 * Successfully complete it. 556 */ 557 scmd->retries = scmd->allowed; 558 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 559 } 560 } 561 562 /* If we have timed out qcs. They belong to EH from 563 * this point but the state of the controller is 564 * unknown. Freeze the port to make sure the IRQ 565 * handler doesn't diddle with those qcs. This must 566 * be done atomically w.r.t. setting QCFLAG_FAILED. 567 */ 568 if (nr_timedout) 569 __ata_port_freeze(ap); 570 571 spin_unlock_irqrestore(ap->lock, flags); 572 573 /* initialize eh_tries */ 574 ap->eh_tries = ATA_EH_MAX_TRIES; 575 } else 576 spin_unlock_wait(ap->lock); 577 578 repeat: 579 /* invoke error handler */ 580 if (ap->ops->error_handler) { 581 struct ata_link *link; 582 583 /* kill fast drain timer */ 584 del_timer_sync(&ap->fastdrain_timer); 585 586 /* process port resume request */ 587 ata_eh_handle_port_resume(ap); 588 589 /* fetch & clear EH info */ 590 spin_lock_irqsave(ap->lock, flags); 591 592 __ata_port_for_each_link(link, ap) { 593 struct ata_eh_context *ehc = &link->eh_context; 594 struct ata_device *dev; 595 596 memset(&link->eh_context, 0, sizeof(link->eh_context)); 597 link->eh_context.i = link->eh_info; 598 memset(&link->eh_info, 0, sizeof(link->eh_info)); 599 600 ata_link_for_each_dev(dev, link) { 601 int devno = dev->devno; 602 603 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 604 if (ata_ncq_enabled(dev)) 605 ehc->saved_ncq_enabled |= 1 << devno; 606 } 607 608 /* set last reset timestamp to some time in the past */ 609 ehc->last_reset = jiffies - 60 * HZ; 610 } 611 612 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 613 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 614 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 615 616 spin_unlock_irqrestore(ap->lock, flags); 617 618 /* invoke EH, skip if unloading or suspended */ 619 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 620 ap->ops->error_handler(ap); 621 else 622 ata_eh_finish(ap); 623 624 /* process port suspend request */ 625 ata_eh_handle_port_suspend(ap); 626 627 /* Exception might have happend after ->error_handler 628 * recovered the port but before this point. Repeat 629 * EH in such case. 630 */ 631 spin_lock_irqsave(ap->lock, flags); 632 633 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 634 if (--ap->eh_tries) { 635 spin_unlock_irqrestore(ap->lock, flags); 636 goto repeat; 637 } 638 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 639 "tries, giving up\n", ATA_EH_MAX_TRIES); 640 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 641 } 642 643 /* this run is complete, make sure EH info is clear */ 644 __ata_port_for_each_link(link, ap) 645 memset(&link->eh_info, 0, sizeof(link->eh_info)); 646 647 /* Clear host_eh_scheduled while holding ap->lock such 648 * that if exception occurs after this point but 649 * before EH completion, SCSI midlayer will 650 * re-initiate EH. 651 */ 652 host->host_eh_scheduled = 0; 653 654 spin_unlock_irqrestore(ap->lock, flags); 655 } else { 656 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 657 ap->ops->eng_timeout(ap); 658 } 659 660 /* finish or retry handled scmd's and clean up */ 661 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 662 663 scsi_eh_flush_done_q(&ap->eh_done_q); 664 665 /* clean up */ 666 spin_lock_irqsave(ap->lock, flags); 667 668 if (ap->pflags & ATA_PFLAG_LOADING) 669 ap->pflags &= ~ATA_PFLAG_LOADING; 670 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 671 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 672 673 if (ap->pflags & ATA_PFLAG_RECOVERED) 674 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 675 676 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 677 678 /* tell wait_eh that we're done */ 679 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 680 wake_up_all(&ap->eh_wait_q); 681 682 spin_unlock_irqrestore(ap->lock, flags); 683 684 DPRINTK("EXIT\n"); 685 } 686 687 /** 688 * ata_port_wait_eh - Wait for the currently pending EH to complete 689 * @ap: Port to wait EH for 690 * 691 * Wait until the currently pending EH is complete. 692 * 693 * LOCKING: 694 * Kernel thread context (may sleep). 695 */ 696 void ata_port_wait_eh(struct ata_port *ap) 697 { 698 unsigned long flags; 699 DEFINE_WAIT(wait); 700 701 retry: 702 spin_lock_irqsave(ap->lock, flags); 703 704 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 705 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 706 spin_unlock_irqrestore(ap->lock, flags); 707 schedule(); 708 spin_lock_irqsave(ap->lock, flags); 709 } 710 finish_wait(&ap->eh_wait_q, &wait); 711 712 spin_unlock_irqrestore(ap->lock, flags); 713 714 /* make sure SCSI EH is complete */ 715 if (scsi_host_in_recovery(ap->scsi_host)) { 716 msleep(10); 717 goto retry; 718 } 719 } 720 721 static int ata_eh_nr_in_flight(struct ata_port *ap) 722 { 723 unsigned int tag; 724 int nr = 0; 725 726 /* count only non-internal commands */ 727 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 728 if (ata_qc_from_tag(ap, tag)) 729 nr++; 730 731 return nr; 732 } 733 734 void ata_eh_fastdrain_timerfn(unsigned long arg) 735 { 736 struct ata_port *ap = (void *)arg; 737 unsigned long flags; 738 int cnt; 739 740 spin_lock_irqsave(ap->lock, flags); 741 742 cnt = ata_eh_nr_in_flight(ap); 743 744 /* are we done? */ 745 if (!cnt) 746 goto out_unlock; 747 748 if (cnt == ap->fastdrain_cnt) { 749 unsigned int tag; 750 751 /* No progress during the last interval, tag all 752 * in-flight qcs as timed out and freeze the port. 753 */ 754 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 755 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 756 if (qc) 757 qc->err_mask |= AC_ERR_TIMEOUT; 758 } 759 760 ata_port_freeze(ap); 761 } else { 762 /* some qcs have finished, give it another chance */ 763 ap->fastdrain_cnt = cnt; 764 ap->fastdrain_timer.expires = 765 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 766 add_timer(&ap->fastdrain_timer); 767 } 768 769 out_unlock: 770 spin_unlock_irqrestore(ap->lock, flags); 771 } 772 773 /** 774 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 775 * @ap: target ATA port 776 * @fastdrain: activate fast drain 777 * 778 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 779 * is non-zero and EH wasn't pending before. Fast drain ensures 780 * that EH kicks in in timely manner. 781 * 782 * LOCKING: 783 * spin_lock_irqsave(host lock) 784 */ 785 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 786 { 787 int cnt; 788 789 /* already scheduled? */ 790 if (ap->pflags & ATA_PFLAG_EH_PENDING) 791 return; 792 793 ap->pflags |= ATA_PFLAG_EH_PENDING; 794 795 if (!fastdrain) 796 return; 797 798 /* do we have in-flight qcs? */ 799 cnt = ata_eh_nr_in_flight(ap); 800 if (!cnt) 801 return; 802 803 /* activate fast drain */ 804 ap->fastdrain_cnt = cnt; 805 ap->fastdrain_timer.expires = 806 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 807 add_timer(&ap->fastdrain_timer); 808 } 809 810 /** 811 * ata_qc_schedule_eh - schedule qc for error handling 812 * @qc: command to schedule error handling for 813 * 814 * Schedule error handling for @qc. EH will kick in as soon as 815 * other commands are drained. 816 * 817 * LOCKING: 818 * spin_lock_irqsave(host lock) 819 */ 820 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 821 { 822 struct ata_port *ap = qc->ap; 823 824 WARN_ON(!ap->ops->error_handler); 825 826 qc->flags |= ATA_QCFLAG_FAILED; 827 ata_eh_set_pending(ap, 1); 828 829 /* The following will fail if timeout has already expired. 830 * ata_scsi_error() takes care of such scmds on EH entry. 831 * Note that ATA_QCFLAG_FAILED is unconditionally set after 832 * this function completes. 833 */ 834 scsi_req_abort_cmd(qc->scsicmd); 835 } 836 837 /** 838 * ata_port_schedule_eh - schedule error handling without a qc 839 * @ap: ATA port to schedule EH for 840 * 841 * Schedule error handling for @ap. EH will kick in as soon as 842 * all commands are drained. 843 * 844 * LOCKING: 845 * spin_lock_irqsave(host lock) 846 */ 847 void ata_port_schedule_eh(struct ata_port *ap) 848 { 849 WARN_ON(!ap->ops->error_handler); 850 851 if (ap->pflags & ATA_PFLAG_INITIALIZING) 852 return; 853 854 ata_eh_set_pending(ap, 1); 855 scsi_schedule_eh(ap->scsi_host); 856 857 DPRINTK("port EH scheduled\n"); 858 } 859 860 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 861 { 862 int tag, nr_aborted = 0; 863 864 WARN_ON(!ap->ops->error_handler); 865 866 /* we're gonna abort all commands, no need for fast drain */ 867 ata_eh_set_pending(ap, 0); 868 869 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 870 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 871 872 if (qc && (!link || qc->dev->link == link)) { 873 qc->flags |= ATA_QCFLAG_FAILED; 874 ata_qc_complete(qc); 875 nr_aborted++; 876 } 877 } 878 879 if (!nr_aborted) 880 ata_port_schedule_eh(ap); 881 882 return nr_aborted; 883 } 884 885 /** 886 * ata_link_abort - abort all qc's on the link 887 * @link: ATA link to abort qc's for 888 * 889 * Abort all active qc's active on @link and schedule EH. 890 * 891 * LOCKING: 892 * spin_lock_irqsave(host lock) 893 * 894 * RETURNS: 895 * Number of aborted qc's. 896 */ 897 int ata_link_abort(struct ata_link *link) 898 { 899 return ata_do_link_abort(link->ap, link); 900 } 901 902 /** 903 * ata_port_abort - abort all qc's on the port 904 * @ap: ATA port to abort qc's for 905 * 906 * Abort all active qc's of @ap and schedule EH. 907 * 908 * LOCKING: 909 * spin_lock_irqsave(host_set lock) 910 * 911 * RETURNS: 912 * Number of aborted qc's. 913 */ 914 int ata_port_abort(struct ata_port *ap) 915 { 916 return ata_do_link_abort(ap, NULL); 917 } 918 919 /** 920 * __ata_port_freeze - freeze port 921 * @ap: ATA port to freeze 922 * 923 * This function is called when HSM violation or some other 924 * condition disrupts normal operation of the port. Frozen port 925 * is not allowed to perform any operation until the port is 926 * thawed, which usually follows a successful reset. 927 * 928 * ap->ops->freeze() callback can be used for freezing the port 929 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 930 * port cannot be frozen hardware-wise, the interrupt handler 931 * must ack and clear interrupts unconditionally while the port 932 * is frozen. 933 * 934 * LOCKING: 935 * spin_lock_irqsave(host lock) 936 */ 937 static void __ata_port_freeze(struct ata_port *ap) 938 { 939 WARN_ON(!ap->ops->error_handler); 940 941 if (ap->ops->freeze) 942 ap->ops->freeze(ap); 943 944 ap->pflags |= ATA_PFLAG_FROZEN; 945 946 DPRINTK("ata%u port frozen\n", ap->print_id); 947 } 948 949 /** 950 * ata_port_freeze - abort & freeze port 951 * @ap: ATA port to freeze 952 * 953 * Abort and freeze @ap. 954 * 955 * LOCKING: 956 * spin_lock_irqsave(host lock) 957 * 958 * RETURNS: 959 * Number of aborted commands. 960 */ 961 int ata_port_freeze(struct ata_port *ap) 962 { 963 int nr_aborted; 964 965 WARN_ON(!ap->ops->error_handler); 966 967 nr_aborted = ata_port_abort(ap); 968 __ata_port_freeze(ap); 969 970 return nr_aborted; 971 } 972 973 /** 974 * sata_async_notification - SATA async notification handler 975 * @ap: ATA port where async notification is received 976 * 977 * Handler to be called when async notification via SDB FIS is 978 * received. This function schedules EH if necessary. 979 * 980 * LOCKING: 981 * spin_lock_irqsave(host lock) 982 * 983 * RETURNS: 984 * 1 if EH is scheduled, 0 otherwise. 985 */ 986 int sata_async_notification(struct ata_port *ap) 987 { 988 u32 sntf; 989 int rc; 990 991 if (!(ap->flags & ATA_FLAG_AN)) 992 return 0; 993 994 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 995 if (rc == 0) 996 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 997 998 if (!sata_pmp_attached(ap) || rc) { 999 /* PMP is not attached or SNTF is not available */ 1000 if (!sata_pmp_attached(ap)) { 1001 /* PMP is not attached. Check whether ATAPI 1002 * AN is configured. If so, notify media 1003 * change. 1004 */ 1005 struct ata_device *dev = ap->link.device; 1006 1007 if ((dev->class == ATA_DEV_ATAPI) && 1008 (dev->flags & ATA_DFLAG_AN)) 1009 ata_scsi_media_change_notify(dev); 1010 return 0; 1011 } else { 1012 /* PMP is attached but SNTF is not available. 1013 * ATAPI async media change notification is 1014 * not used. The PMP must be reporting PHY 1015 * status change, schedule EH. 1016 */ 1017 ata_port_schedule_eh(ap); 1018 return 1; 1019 } 1020 } else { 1021 /* PMP is attached and SNTF is available */ 1022 struct ata_link *link; 1023 1024 /* check and notify ATAPI AN */ 1025 ata_port_for_each_link(link, ap) { 1026 if (!(sntf & (1 << link->pmp))) 1027 continue; 1028 1029 if ((link->device->class == ATA_DEV_ATAPI) && 1030 (link->device->flags & ATA_DFLAG_AN)) 1031 ata_scsi_media_change_notify(link->device); 1032 } 1033 1034 /* If PMP is reporting that PHY status of some 1035 * downstream ports has changed, schedule EH. 1036 */ 1037 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 1038 ata_port_schedule_eh(ap); 1039 return 1; 1040 } 1041 1042 return 0; 1043 } 1044 } 1045 1046 /** 1047 * ata_eh_freeze_port - EH helper to freeze port 1048 * @ap: ATA port to freeze 1049 * 1050 * Freeze @ap. 1051 * 1052 * LOCKING: 1053 * None. 1054 */ 1055 void ata_eh_freeze_port(struct ata_port *ap) 1056 { 1057 unsigned long flags; 1058 1059 if (!ap->ops->error_handler) 1060 return; 1061 1062 spin_lock_irqsave(ap->lock, flags); 1063 __ata_port_freeze(ap); 1064 spin_unlock_irqrestore(ap->lock, flags); 1065 } 1066 1067 /** 1068 * ata_port_thaw_port - EH helper to thaw port 1069 * @ap: ATA port to thaw 1070 * 1071 * Thaw frozen port @ap. 1072 * 1073 * LOCKING: 1074 * None. 1075 */ 1076 void ata_eh_thaw_port(struct ata_port *ap) 1077 { 1078 unsigned long flags; 1079 1080 if (!ap->ops->error_handler) 1081 return; 1082 1083 spin_lock_irqsave(ap->lock, flags); 1084 1085 ap->pflags &= ~ATA_PFLAG_FROZEN; 1086 1087 if (ap->ops->thaw) 1088 ap->ops->thaw(ap); 1089 1090 spin_unlock_irqrestore(ap->lock, flags); 1091 1092 DPRINTK("ata%u port thawed\n", ap->print_id); 1093 } 1094 1095 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 1096 { 1097 /* nada */ 1098 } 1099 1100 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 1101 { 1102 struct ata_port *ap = qc->ap; 1103 struct scsi_cmnd *scmd = qc->scsicmd; 1104 unsigned long flags; 1105 1106 spin_lock_irqsave(ap->lock, flags); 1107 qc->scsidone = ata_eh_scsidone; 1108 __ata_qc_complete(qc); 1109 WARN_ON(ata_tag_valid(qc->tag)); 1110 spin_unlock_irqrestore(ap->lock, flags); 1111 1112 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 1113 } 1114 1115 /** 1116 * ata_eh_qc_complete - Complete an active ATA command from EH 1117 * @qc: Command to complete 1118 * 1119 * Indicate to the mid and upper layers that an ATA command has 1120 * completed. To be used from EH. 1121 */ 1122 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1123 { 1124 struct scsi_cmnd *scmd = qc->scsicmd; 1125 scmd->retries = scmd->allowed; 1126 __ata_eh_qc_complete(qc); 1127 } 1128 1129 /** 1130 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1131 * @qc: Command to retry 1132 * 1133 * Indicate to the mid and upper layers that an ATA command 1134 * should be retried. To be used from EH. 1135 * 1136 * SCSI midlayer limits the number of retries to scmd->allowed. 1137 * scmd->retries is decremented for commands which get retried 1138 * due to unrelated failures (qc->err_mask is zero). 1139 */ 1140 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1141 { 1142 struct scsi_cmnd *scmd = qc->scsicmd; 1143 if (!qc->err_mask && scmd->retries) 1144 scmd->retries--; 1145 __ata_eh_qc_complete(qc); 1146 } 1147 1148 /** 1149 * ata_eh_detach_dev - detach ATA device 1150 * @dev: ATA device to detach 1151 * 1152 * Detach @dev. 1153 * 1154 * LOCKING: 1155 * None. 1156 */ 1157 void ata_eh_detach_dev(struct ata_device *dev) 1158 { 1159 struct ata_link *link = dev->link; 1160 struct ata_port *ap = link->ap; 1161 unsigned long flags; 1162 1163 ata_dev_disable(dev); 1164 1165 spin_lock_irqsave(ap->lock, flags); 1166 1167 dev->flags &= ~ATA_DFLAG_DETACH; 1168 1169 if (ata_scsi_offline_dev(dev)) { 1170 dev->flags |= ATA_DFLAG_DETACHED; 1171 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1172 } 1173 1174 /* clear per-dev EH actions */ 1175 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1176 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1177 1178 spin_unlock_irqrestore(ap->lock, flags); 1179 } 1180 1181 /** 1182 * ata_eh_about_to_do - about to perform eh_action 1183 * @link: target ATA link 1184 * @dev: target ATA dev for per-dev action (can be NULL) 1185 * @action: action about to be performed 1186 * 1187 * Called just before performing EH actions to clear related bits 1188 * in @link->eh_info such that eh actions are not unnecessarily 1189 * repeated. 1190 * 1191 * LOCKING: 1192 * None. 1193 */ 1194 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1195 unsigned int action) 1196 { 1197 struct ata_port *ap = link->ap; 1198 struct ata_eh_info *ehi = &link->eh_info; 1199 struct ata_eh_context *ehc = &link->eh_context; 1200 unsigned long flags; 1201 1202 spin_lock_irqsave(ap->lock, flags); 1203 1204 ata_eh_clear_action(link, dev, ehi, action); 1205 1206 if (!(ehc->i.flags & ATA_EHI_QUIET)) 1207 ap->pflags |= ATA_PFLAG_RECOVERED; 1208 1209 spin_unlock_irqrestore(ap->lock, flags); 1210 } 1211 1212 /** 1213 * ata_eh_done - EH action complete 1214 * @ap: target ATA port 1215 * @dev: target ATA dev for per-dev action (can be NULL) 1216 * @action: action just completed 1217 * 1218 * Called right after performing EH actions to clear related bits 1219 * in @link->eh_context. 1220 * 1221 * LOCKING: 1222 * None. 1223 */ 1224 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1225 unsigned int action) 1226 { 1227 struct ata_eh_context *ehc = &link->eh_context; 1228 1229 ata_eh_clear_action(link, dev, &ehc->i, action); 1230 } 1231 1232 /** 1233 * ata_err_string - convert err_mask to descriptive string 1234 * @err_mask: error mask to convert to string 1235 * 1236 * Convert @err_mask to descriptive string. Errors are 1237 * prioritized according to severity and only the most severe 1238 * error is reported. 1239 * 1240 * LOCKING: 1241 * None. 1242 * 1243 * RETURNS: 1244 * Descriptive string for @err_mask 1245 */ 1246 static const char *ata_err_string(unsigned int err_mask) 1247 { 1248 if (err_mask & AC_ERR_HOST_BUS) 1249 return "host bus error"; 1250 if (err_mask & AC_ERR_ATA_BUS) 1251 return "ATA bus error"; 1252 if (err_mask & AC_ERR_TIMEOUT) 1253 return "timeout"; 1254 if (err_mask & AC_ERR_HSM) 1255 return "HSM violation"; 1256 if (err_mask & AC_ERR_SYSTEM) 1257 return "internal error"; 1258 if (err_mask & AC_ERR_MEDIA) 1259 return "media error"; 1260 if (err_mask & AC_ERR_INVALID) 1261 return "invalid argument"; 1262 if (err_mask & AC_ERR_DEV) 1263 return "device error"; 1264 return "unknown error"; 1265 } 1266 1267 /** 1268 * ata_read_log_page - read a specific log page 1269 * @dev: target device 1270 * @page: page to read 1271 * @buf: buffer to store read page 1272 * @sectors: number of sectors to read 1273 * 1274 * Read log page using READ_LOG_EXT command. 1275 * 1276 * LOCKING: 1277 * Kernel thread context (may sleep). 1278 * 1279 * RETURNS: 1280 * 0 on success, AC_ERR_* mask otherwise. 1281 */ 1282 static unsigned int ata_read_log_page(struct ata_device *dev, 1283 u8 page, void *buf, unsigned int sectors) 1284 { 1285 struct ata_taskfile tf; 1286 unsigned int err_mask; 1287 1288 DPRINTK("read log page - page %d\n", page); 1289 1290 ata_tf_init(dev, &tf); 1291 tf.command = ATA_CMD_READ_LOG_EXT; 1292 tf.lbal = page; 1293 tf.nsect = sectors; 1294 tf.hob_nsect = sectors >> 8; 1295 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 1296 tf.protocol = ATA_PROT_PIO; 1297 1298 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 1299 buf, sectors * ATA_SECT_SIZE, 0); 1300 1301 DPRINTK("EXIT, err_mask=%x\n", err_mask); 1302 return err_mask; 1303 } 1304 1305 /** 1306 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1307 * @dev: Device to read log page 10h from 1308 * @tag: Resulting tag of the failed command 1309 * @tf: Resulting taskfile registers of the failed command 1310 * 1311 * Read log page 10h to obtain NCQ error details and clear error 1312 * condition. 1313 * 1314 * LOCKING: 1315 * Kernel thread context (may sleep). 1316 * 1317 * RETURNS: 1318 * 0 on success, -errno otherwise. 1319 */ 1320 static int ata_eh_read_log_10h(struct ata_device *dev, 1321 int *tag, struct ata_taskfile *tf) 1322 { 1323 u8 *buf = dev->link->ap->sector_buf; 1324 unsigned int err_mask; 1325 u8 csum; 1326 int i; 1327 1328 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 1329 if (err_mask) 1330 return -EIO; 1331 1332 csum = 0; 1333 for (i = 0; i < ATA_SECT_SIZE; i++) 1334 csum += buf[i]; 1335 if (csum) 1336 ata_dev_printk(dev, KERN_WARNING, 1337 "invalid checksum 0x%x on log page 10h\n", csum); 1338 1339 if (buf[0] & 0x80) 1340 return -ENOENT; 1341 1342 *tag = buf[0] & 0x1f; 1343 1344 tf->command = buf[2]; 1345 tf->feature = buf[3]; 1346 tf->lbal = buf[4]; 1347 tf->lbam = buf[5]; 1348 tf->lbah = buf[6]; 1349 tf->device = buf[7]; 1350 tf->hob_lbal = buf[8]; 1351 tf->hob_lbam = buf[9]; 1352 tf->hob_lbah = buf[10]; 1353 tf->nsect = buf[12]; 1354 tf->hob_nsect = buf[13]; 1355 1356 return 0; 1357 } 1358 1359 /** 1360 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1361 * @dev: device to perform REQUEST_SENSE to 1362 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1363 * 1364 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1365 * SENSE. This function is EH helper. 1366 * 1367 * LOCKING: 1368 * Kernel thread context (may sleep). 1369 * 1370 * RETURNS: 1371 * 0 on success, AC_ERR_* mask on failure 1372 */ 1373 static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) 1374 { 1375 struct ata_device *dev = qc->dev; 1376 unsigned char *sense_buf = qc->scsicmd->sense_buffer; 1377 struct ata_port *ap = dev->link->ap; 1378 struct ata_taskfile tf; 1379 u8 cdb[ATAPI_CDB_LEN]; 1380 1381 DPRINTK("ATAPI request sense\n"); 1382 1383 /* FIXME: is this needed? */ 1384 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1385 1386 /* initialize sense_buf with the error register, 1387 * for the case where they are -not- overwritten 1388 */ 1389 sense_buf[0] = 0x70; 1390 sense_buf[2] = qc->result_tf.feature >> 4; 1391 1392 /* some devices time out if garbage left in tf */ 1393 ata_tf_init(dev, &tf); 1394 1395 memset(cdb, 0, ATAPI_CDB_LEN); 1396 cdb[0] = REQUEST_SENSE; 1397 cdb[4] = SCSI_SENSE_BUFFERSIZE; 1398 1399 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1400 tf.command = ATA_CMD_PACKET; 1401 1402 /* is it pointless to prefer PIO for "safety reasons"? */ 1403 if (ap->flags & ATA_FLAG_PIO_DMA) { 1404 tf.protocol = ATAPI_PROT_DMA; 1405 tf.feature |= ATAPI_PKT_DMA; 1406 } else { 1407 tf.protocol = ATAPI_PROT_PIO; 1408 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1409 tf.lbah = 0; 1410 } 1411 1412 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1413 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1414 } 1415 1416 /** 1417 * ata_eh_analyze_serror - analyze SError for a failed port 1418 * @link: ATA link to analyze SError for 1419 * 1420 * Analyze SError if available and further determine cause of 1421 * failure. 1422 * 1423 * LOCKING: 1424 * None. 1425 */ 1426 static void ata_eh_analyze_serror(struct ata_link *link) 1427 { 1428 struct ata_eh_context *ehc = &link->eh_context; 1429 u32 serror = ehc->i.serror; 1430 unsigned int err_mask = 0, action = 0; 1431 u32 hotplug_mask; 1432 1433 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1434 err_mask |= AC_ERR_ATA_BUS; 1435 action |= ATA_EH_RESET; 1436 } 1437 if (serror & SERR_PROTOCOL) { 1438 err_mask |= AC_ERR_HSM; 1439 action |= ATA_EH_RESET; 1440 } 1441 if (serror & SERR_INTERNAL) { 1442 err_mask |= AC_ERR_SYSTEM; 1443 action |= ATA_EH_RESET; 1444 } 1445 1446 /* Determine whether a hotplug event has occurred. Both 1447 * SError.N/X are considered hotplug events for enabled or 1448 * host links. For disabled PMP links, only N bit is 1449 * considered as X bit is left at 1 for link plugging. 1450 */ 1451 hotplug_mask = 0; 1452 1453 if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1454 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1455 else 1456 hotplug_mask = SERR_PHYRDY_CHG; 1457 1458 if (serror & hotplug_mask) 1459 ata_ehi_hotplugged(&ehc->i); 1460 1461 ehc->i.err_mask |= err_mask; 1462 ehc->i.action |= action; 1463 } 1464 1465 /** 1466 * ata_eh_analyze_ncq_error - analyze NCQ error 1467 * @link: ATA link to analyze NCQ error for 1468 * 1469 * Read log page 10h, determine the offending qc and acquire 1470 * error status TF. For NCQ device errors, all LLDDs have to do 1471 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1472 * care of the rest. 1473 * 1474 * LOCKING: 1475 * Kernel thread context (may sleep). 1476 */ 1477 void ata_eh_analyze_ncq_error(struct ata_link *link) 1478 { 1479 struct ata_port *ap = link->ap; 1480 struct ata_eh_context *ehc = &link->eh_context; 1481 struct ata_device *dev = link->device; 1482 struct ata_queued_cmd *qc; 1483 struct ata_taskfile tf; 1484 int tag, rc; 1485 1486 /* if frozen, we can't do much */ 1487 if (ap->pflags & ATA_PFLAG_FROZEN) 1488 return; 1489 1490 /* is it NCQ device error? */ 1491 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1492 return; 1493 1494 /* has LLDD analyzed already? */ 1495 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1496 qc = __ata_qc_from_tag(ap, tag); 1497 1498 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1499 continue; 1500 1501 if (qc->err_mask) 1502 return; 1503 } 1504 1505 /* okay, this error is ours */ 1506 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1507 if (rc) { 1508 ata_link_printk(link, KERN_ERR, "failed to read log page 10h " 1509 "(errno=%d)\n", rc); 1510 return; 1511 } 1512 1513 if (!(link->sactive & (1 << tag))) { 1514 ata_link_printk(link, KERN_ERR, "log page 10h reported " 1515 "inactive tag %d\n", tag); 1516 return; 1517 } 1518 1519 /* we've got the perpetrator, condemn it */ 1520 qc = __ata_qc_from_tag(ap, tag); 1521 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1522 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1523 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1524 ehc->i.err_mask &= ~AC_ERR_DEV; 1525 } 1526 1527 /** 1528 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1529 * @qc: qc to analyze 1530 * @tf: Taskfile registers to analyze 1531 * 1532 * Analyze taskfile of @qc and further determine cause of 1533 * failure. This function also requests ATAPI sense data if 1534 * avaliable. 1535 * 1536 * LOCKING: 1537 * Kernel thread context (may sleep). 1538 * 1539 * RETURNS: 1540 * Determined recovery action 1541 */ 1542 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1543 const struct ata_taskfile *tf) 1544 { 1545 unsigned int tmp, action = 0; 1546 u8 stat = tf->command, err = tf->feature; 1547 1548 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1549 qc->err_mask |= AC_ERR_HSM; 1550 return ATA_EH_RESET; 1551 } 1552 1553 if (stat & (ATA_ERR | ATA_DF)) 1554 qc->err_mask |= AC_ERR_DEV; 1555 else 1556 return 0; 1557 1558 switch (qc->dev->class) { 1559 case ATA_DEV_ATA: 1560 if (err & ATA_ICRC) 1561 qc->err_mask |= AC_ERR_ATA_BUS; 1562 if (err & ATA_UNC) 1563 qc->err_mask |= AC_ERR_MEDIA; 1564 if (err & ATA_IDNF) 1565 qc->err_mask |= AC_ERR_INVALID; 1566 break; 1567 1568 case ATA_DEV_ATAPI: 1569 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1570 tmp = atapi_eh_request_sense(qc); 1571 if (!tmp) { 1572 /* ATA_QCFLAG_SENSE_VALID is used to 1573 * tell atapi_qc_complete() that sense 1574 * data is already valid. 1575 * 1576 * TODO: interpret sense data and set 1577 * appropriate err_mask. 1578 */ 1579 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1580 } else 1581 qc->err_mask |= tmp; 1582 } 1583 } 1584 1585 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1586 action |= ATA_EH_RESET; 1587 1588 return action; 1589 } 1590 1591 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1592 int *xfer_ok) 1593 { 1594 int base = 0; 1595 1596 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1597 *xfer_ok = 1; 1598 1599 if (!*xfer_ok) 1600 base = ATA_ECAT_DUBIOUS_NONE; 1601 1602 if (err_mask & AC_ERR_ATA_BUS) 1603 return base + ATA_ECAT_ATA_BUS; 1604 1605 if (err_mask & AC_ERR_TIMEOUT) 1606 return base + ATA_ECAT_TOUT_HSM; 1607 1608 if (eflags & ATA_EFLAG_IS_IO) { 1609 if (err_mask & AC_ERR_HSM) 1610 return base + ATA_ECAT_TOUT_HSM; 1611 if ((err_mask & 1612 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1613 return base + ATA_ECAT_UNK_DEV; 1614 } 1615 1616 return 0; 1617 } 1618 1619 struct speed_down_verdict_arg { 1620 u64 since; 1621 int xfer_ok; 1622 int nr_errors[ATA_ECAT_NR]; 1623 }; 1624 1625 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1626 { 1627 struct speed_down_verdict_arg *arg = void_arg; 1628 int cat; 1629 1630 if (ent->timestamp < arg->since) 1631 return -1; 1632 1633 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1634 &arg->xfer_ok); 1635 arg->nr_errors[cat]++; 1636 1637 return 0; 1638 } 1639 1640 /** 1641 * ata_eh_speed_down_verdict - Determine speed down verdict 1642 * @dev: Device of interest 1643 * 1644 * This function examines error ring of @dev and determines 1645 * whether NCQ needs to be turned off, transfer speed should be 1646 * stepped down, or falling back to PIO is necessary. 1647 * 1648 * ECAT_ATA_BUS : ATA_BUS error for any command 1649 * 1650 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1651 * IO commands 1652 * 1653 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1654 * 1655 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1656 * data transfer hasn't been verified. 1657 * 1658 * Verdicts are 1659 * 1660 * NCQ_OFF : Turn off NCQ. 1661 * 1662 * SPEED_DOWN : Speed down transfer speed but don't fall back 1663 * to PIO. 1664 * 1665 * FALLBACK_TO_PIO : Fall back to PIO. 1666 * 1667 * Even if multiple verdicts are returned, only one action is 1668 * taken per error. An action triggered by non-DUBIOUS errors 1669 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1670 * This is to expedite speed down decisions right after device is 1671 * initially configured. 1672 * 1673 * The followings are speed down rules. #1 and #2 deal with 1674 * DUBIOUS errors. 1675 * 1676 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1677 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1678 * 1679 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1680 * occurred during last 5 mins, NCQ_OFF. 1681 * 1682 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1683 * ocurred during last 5 mins, FALLBACK_TO_PIO 1684 * 1685 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1686 * during last 10 mins, NCQ_OFF. 1687 * 1688 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1689 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1690 * 1691 * LOCKING: 1692 * Inherited from caller. 1693 * 1694 * RETURNS: 1695 * OR of ATA_EH_SPDN_* flags. 1696 */ 1697 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1698 { 1699 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1700 u64 j64 = get_jiffies_64(); 1701 struct speed_down_verdict_arg arg; 1702 unsigned int verdict = 0; 1703 1704 /* scan past 5 mins of error history */ 1705 memset(&arg, 0, sizeof(arg)); 1706 arg.since = j64 - min(j64, j5mins); 1707 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1708 1709 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1710 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1711 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1712 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1713 1714 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1715 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1716 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1717 1718 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1719 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1720 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1721 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1722 1723 /* scan past 10 mins of error history */ 1724 memset(&arg, 0, sizeof(arg)); 1725 arg.since = j64 - min(j64, j10mins); 1726 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1727 1728 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1729 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1730 verdict |= ATA_EH_SPDN_NCQ_OFF; 1731 1732 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1733 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1734 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1735 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1736 1737 return verdict; 1738 } 1739 1740 /** 1741 * ata_eh_speed_down - record error and speed down if necessary 1742 * @dev: Failed device 1743 * @eflags: mask of ATA_EFLAG_* flags 1744 * @err_mask: err_mask of the error 1745 * 1746 * Record error and examine error history to determine whether 1747 * adjusting transmission speed is necessary. It also sets 1748 * transmission limits appropriately if such adjustment is 1749 * necessary. 1750 * 1751 * LOCKING: 1752 * Kernel thread context (may sleep). 1753 * 1754 * RETURNS: 1755 * Determined recovery action. 1756 */ 1757 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1758 unsigned int eflags, unsigned int err_mask) 1759 { 1760 struct ata_link *link = dev->link; 1761 int xfer_ok = 0; 1762 unsigned int verdict; 1763 unsigned int action = 0; 1764 1765 /* don't bother if Cat-0 error */ 1766 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1767 return 0; 1768 1769 /* record error and determine whether speed down is necessary */ 1770 ata_ering_record(&dev->ering, eflags, err_mask); 1771 verdict = ata_eh_speed_down_verdict(dev); 1772 1773 /* turn off NCQ? */ 1774 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1775 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1776 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1777 dev->flags |= ATA_DFLAG_NCQ_OFF; 1778 ata_dev_printk(dev, KERN_WARNING, 1779 "NCQ disabled due to excessive errors\n"); 1780 goto done; 1781 } 1782 1783 /* speed down? */ 1784 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1785 /* speed down SATA link speed if possible */ 1786 if (sata_down_spd_limit(link) == 0) { 1787 action |= ATA_EH_RESET; 1788 goto done; 1789 } 1790 1791 /* lower transfer mode */ 1792 if (dev->spdn_cnt < 2) { 1793 static const int dma_dnxfer_sel[] = 1794 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1795 static const int pio_dnxfer_sel[] = 1796 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1797 int sel; 1798 1799 if (dev->xfer_shift != ATA_SHIFT_PIO) 1800 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1801 else 1802 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1803 1804 dev->spdn_cnt++; 1805 1806 if (ata_down_xfermask_limit(dev, sel) == 0) { 1807 action |= ATA_EH_RESET; 1808 goto done; 1809 } 1810 } 1811 } 1812 1813 /* Fall back to PIO? Slowing down to PIO is meaningless for 1814 * SATA ATA devices. Consider it only for PATA and SATAPI. 1815 */ 1816 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1817 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 1818 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1819 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1820 dev->spdn_cnt = 0; 1821 action |= ATA_EH_RESET; 1822 goto done; 1823 } 1824 } 1825 1826 return 0; 1827 done: 1828 /* device has been slowed down, blow error history */ 1829 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 1830 ata_ering_clear(&dev->ering); 1831 return action; 1832 } 1833 1834 /** 1835 * ata_eh_link_autopsy - analyze error and determine recovery action 1836 * @link: host link to perform autopsy on 1837 * 1838 * Analyze why @link failed and determine which recovery actions 1839 * are needed. This function also sets more detailed AC_ERR_* 1840 * values and fills sense data for ATAPI CHECK SENSE. 1841 * 1842 * LOCKING: 1843 * Kernel thread context (may sleep). 1844 */ 1845 static void ata_eh_link_autopsy(struct ata_link *link) 1846 { 1847 struct ata_port *ap = link->ap; 1848 struct ata_eh_context *ehc = &link->eh_context; 1849 struct ata_device *dev; 1850 unsigned int all_err_mask = 0, eflags = 0; 1851 int tag; 1852 u32 serror; 1853 int rc; 1854 1855 DPRINTK("ENTER\n"); 1856 1857 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1858 return; 1859 1860 /* obtain and analyze SError */ 1861 rc = sata_scr_read(link, SCR_ERROR, &serror); 1862 if (rc == 0) { 1863 ehc->i.serror |= serror; 1864 ata_eh_analyze_serror(link); 1865 } else if (rc != -EOPNOTSUPP) { 1866 /* SError read failed, force reset and probing */ 1867 ehc->i.probe_mask |= ATA_ALL_DEVICES; 1868 ehc->i.action |= ATA_EH_RESET; 1869 ehc->i.err_mask |= AC_ERR_OTHER; 1870 } 1871 1872 /* analyze NCQ failure */ 1873 ata_eh_analyze_ncq_error(link); 1874 1875 /* any real error trumps AC_ERR_OTHER */ 1876 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1877 ehc->i.err_mask &= ~AC_ERR_OTHER; 1878 1879 all_err_mask |= ehc->i.err_mask; 1880 1881 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1882 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1883 1884 if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link) 1885 continue; 1886 1887 /* inherit upper level err_mask */ 1888 qc->err_mask |= ehc->i.err_mask; 1889 1890 /* analyze TF */ 1891 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1892 1893 /* DEV errors are probably spurious in case of ATA_BUS error */ 1894 if (qc->err_mask & AC_ERR_ATA_BUS) 1895 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1896 AC_ERR_INVALID); 1897 1898 /* any real error trumps unknown error */ 1899 if (qc->err_mask & ~AC_ERR_OTHER) 1900 qc->err_mask &= ~AC_ERR_OTHER; 1901 1902 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1903 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 1904 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1905 1906 /* determine whether the command is worth retrying */ 1907 if (!(qc->err_mask & AC_ERR_INVALID) && 1908 ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV)) 1909 qc->flags |= ATA_QCFLAG_RETRY; 1910 1911 /* accumulate error info */ 1912 ehc->i.dev = qc->dev; 1913 all_err_mask |= qc->err_mask; 1914 if (qc->flags & ATA_QCFLAG_IO) 1915 eflags |= ATA_EFLAG_IS_IO; 1916 } 1917 1918 /* enforce default EH actions */ 1919 if (ap->pflags & ATA_PFLAG_FROZEN || 1920 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1921 ehc->i.action |= ATA_EH_RESET; 1922 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 1923 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 1924 ehc->i.action |= ATA_EH_REVALIDATE; 1925 1926 /* If we have offending qcs and the associated failed device, 1927 * perform per-dev EH action only on the offending device. 1928 */ 1929 if (ehc->i.dev) { 1930 ehc->i.dev_action[ehc->i.dev->devno] |= 1931 ehc->i.action & ATA_EH_PERDEV_MASK; 1932 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1933 } 1934 1935 /* propagate timeout to host link */ 1936 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 1937 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 1938 1939 /* record error and consider speeding down */ 1940 dev = ehc->i.dev; 1941 if (!dev && ((ata_link_max_devices(link) == 1 && 1942 ata_dev_enabled(link->device)))) 1943 dev = link->device; 1944 1945 if (dev) { 1946 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 1947 eflags |= ATA_EFLAG_DUBIOUS_XFER; 1948 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 1949 } 1950 1951 DPRINTK("EXIT\n"); 1952 } 1953 1954 /** 1955 * ata_eh_autopsy - analyze error and determine recovery action 1956 * @ap: host port to perform autopsy on 1957 * 1958 * Analyze all links of @ap and determine why they failed and 1959 * which recovery actions are needed. 1960 * 1961 * LOCKING: 1962 * Kernel thread context (may sleep). 1963 */ 1964 void ata_eh_autopsy(struct ata_port *ap) 1965 { 1966 struct ata_link *link; 1967 1968 ata_port_for_each_link(link, ap) 1969 ata_eh_link_autopsy(link); 1970 1971 /* Autopsy of fanout ports can affect host link autopsy. 1972 * Perform host link autopsy last. 1973 */ 1974 if (sata_pmp_attached(ap)) 1975 ata_eh_link_autopsy(&ap->link); 1976 } 1977 1978 /** 1979 * ata_eh_link_report - report error handling to user 1980 * @link: ATA link EH is going on 1981 * 1982 * Report EH to user. 1983 * 1984 * LOCKING: 1985 * None. 1986 */ 1987 static void ata_eh_link_report(struct ata_link *link) 1988 { 1989 struct ata_port *ap = link->ap; 1990 struct ata_eh_context *ehc = &link->eh_context; 1991 const char *frozen, *desc; 1992 char tries_buf[6]; 1993 int tag, nr_failed = 0; 1994 1995 if (ehc->i.flags & ATA_EHI_QUIET) 1996 return; 1997 1998 desc = NULL; 1999 if (ehc->i.desc[0] != '\0') 2000 desc = ehc->i.desc; 2001 2002 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2003 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2004 2005 if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link || 2006 ((qc->flags & ATA_QCFLAG_QUIET) && 2007 qc->err_mask == AC_ERR_DEV)) 2008 continue; 2009 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 2010 continue; 2011 2012 nr_failed++; 2013 } 2014 2015 if (!nr_failed && !ehc->i.err_mask) 2016 return; 2017 2018 frozen = ""; 2019 if (ap->pflags & ATA_PFLAG_FROZEN) 2020 frozen = " frozen"; 2021 2022 memset(tries_buf, 0, sizeof(tries_buf)); 2023 if (ap->eh_tries < ATA_EH_MAX_TRIES) 2024 snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d", 2025 ap->eh_tries); 2026 2027 if (ehc->i.dev) { 2028 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 2029 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2030 ehc->i.err_mask, link->sactive, ehc->i.serror, 2031 ehc->i.action, frozen, tries_buf); 2032 if (desc) 2033 ata_dev_printk(ehc->i.dev, KERN_ERR, "%s\n", desc); 2034 } else { 2035 ata_link_printk(link, KERN_ERR, "exception Emask 0x%x " 2036 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 2037 ehc->i.err_mask, link->sactive, ehc->i.serror, 2038 ehc->i.action, frozen, tries_buf); 2039 if (desc) 2040 ata_link_printk(link, KERN_ERR, "%s\n", desc); 2041 } 2042 2043 if (ehc->i.serror) 2044 ata_port_printk(ap, KERN_ERR, 2045 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 2046 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 2047 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 2048 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 2049 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 2050 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 2051 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 2052 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 2053 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 2054 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 2055 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 2056 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 2057 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 2058 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 2059 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 2060 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 2061 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 2062 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 2063 2064 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2065 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2066 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 2067 const u8 *cdb = qc->cdb; 2068 char data_buf[20] = ""; 2069 char cdb_buf[70] = ""; 2070 2071 if (!(qc->flags & ATA_QCFLAG_FAILED) || 2072 qc->dev->link != link || !qc->err_mask) 2073 continue; 2074 2075 if (qc->dma_dir != DMA_NONE) { 2076 static const char *dma_str[] = { 2077 [DMA_BIDIRECTIONAL] = "bidi", 2078 [DMA_TO_DEVICE] = "out", 2079 [DMA_FROM_DEVICE] = "in", 2080 }; 2081 static const char *prot_str[] = { 2082 [ATA_PROT_PIO] = "pio", 2083 [ATA_PROT_DMA] = "dma", 2084 [ATA_PROT_NCQ] = "ncq", 2085 [ATAPI_PROT_PIO] = "pio", 2086 [ATAPI_PROT_DMA] = "dma", 2087 }; 2088 2089 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 2090 prot_str[qc->tf.protocol], qc->nbytes, 2091 dma_str[qc->dma_dir]); 2092 } 2093 2094 if (ata_is_atapi(qc->tf.protocol)) 2095 snprintf(cdb_buf, sizeof(cdb_buf), 2096 "cdb %02x %02x %02x %02x %02x %02x %02x %02x " 2097 "%02x %02x %02x %02x %02x %02x %02x %02x\n ", 2098 cdb[0], cdb[1], cdb[2], cdb[3], 2099 cdb[4], cdb[5], cdb[6], cdb[7], 2100 cdb[8], cdb[9], cdb[10], cdb[11], 2101 cdb[12], cdb[13], cdb[14], cdb[15]); 2102 2103 ata_dev_printk(qc->dev, KERN_ERR, 2104 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2105 "tag %d%s\n %s" 2106 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2107 "Emask 0x%x (%s)%s\n", 2108 cmd->command, cmd->feature, cmd->nsect, 2109 cmd->lbal, cmd->lbam, cmd->lbah, 2110 cmd->hob_feature, cmd->hob_nsect, 2111 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2112 cmd->device, qc->tag, data_buf, cdb_buf, 2113 res->command, res->feature, res->nsect, 2114 res->lbal, res->lbam, res->lbah, 2115 res->hob_feature, res->hob_nsect, 2116 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2117 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2118 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2119 2120 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2121 ATA_ERR)) { 2122 if (res->command & ATA_BUSY) 2123 ata_dev_printk(qc->dev, KERN_ERR, 2124 "status: { Busy }\n"); 2125 else 2126 ata_dev_printk(qc->dev, KERN_ERR, 2127 "status: { %s%s%s%s}\n", 2128 res->command & ATA_DRDY ? "DRDY " : "", 2129 res->command & ATA_DF ? "DF " : "", 2130 res->command & ATA_DRQ ? "DRQ " : "", 2131 res->command & ATA_ERR ? "ERR " : ""); 2132 } 2133 2134 if (cmd->command != ATA_CMD_PACKET && 2135 (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF | 2136 ATA_ABORTED))) 2137 ata_dev_printk(qc->dev, KERN_ERR, 2138 "error: { %s%s%s%s}\n", 2139 res->feature & ATA_ICRC ? "ICRC " : "", 2140 res->feature & ATA_UNC ? "UNC " : "", 2141 res->feature & ATA_IDNF ? "IDNF " : "", 2142 res->feature & ATA_ABORTED ? "ABRT " : ""); 2143 } 2144 } 2145 2146 /** 2147 * ata_eh_report - report error handling to user 2148 * @ap: ATA port to report EH about 2149 * 2150 * Report EH to user. 2151 * 2152 * LOCKING: 2153 * None. 2154 */ 2155 void ata_eh_report(struct ata_port *ap) 2156 { 2157 struct ata_link *link; 2158 2159 __ata_port_for_each_link(link, ap) 2160 ata_eh_link_report(link); 2161 } 2162 2163 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2164 unsigned int *classes, unsigned long deadline) 2165 { 2166 struct ata_device *dev; 2167 2168 ata_link_for_each_dev(dev, link) 2169 classes[dev->devno] = ATA_DEV_UNKNOWN; 2170 2171 return reset(link, classes, deadline); 2172 } 2173 2174 static int ata_eh_followup_srst_needed(struct ata_link *link, 2175 int rc, int classify, 2176 const unsigned int *classes) 2177 { 2178 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2179 return 0; 2180 if (rc == -EAGAIN) { 2181 if (classify) 2182 return 1; 2183 rc = 0; 2184 } 2185 if (rc != 0) 2186 return 0; 2187 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2188 return 1; 2189 return 0; 2190 } 2191 2192 int ata_eh_reset(struct ata_link *link, int classify, 2193 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2194 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2195 { 2196 struct ata_port *ap = link->ap; 2197 struct ata_eh_context *ehc = &link->eh_context; 2198 unsigned int *classes = ehc->classes; 2199 unsigned int lflags = link->flags; 2200 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2201 int max_tries = 0, try = 0; 2202 struct ata_device *dev; 2203 unsigned long deadline, now; 2204 ata_reset_fn_t reset; 2205 unsigned long flags; 2206 u32 sstatus; 2207 int nr_known, rc; 2208 2209 /* 2210 * Prepare to reset 2211 */ 2212 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2213 max_tries++; 2214 2215 now = jiffies; 2216 deadline = ata_deadline(ehc->last_reset, ATA_EH_RESET_COOL_DOWN); 2217 if (time_before(now, deadline)) 2218 schedule_timeout_uninterruptible(deadline - now); 2219 2220 spin_lock_irqsave(ap->lock, flags); 2221 ap->pflags |= ATA_PFLAG_RESETTING; 2222 spin_unlock_irqrestore(ap->lock, flags); 2223 2224 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2225 ehc->last_reset = jiffies; 2226 2227 ata_link_for_each_dev(dev, link) { 2228 /* If we issue an SRST then an ATA drive (not ATAPI) 2229 * may change configuration and be in PIO0 timing. If 2230 * we do a hard reset (or are coming from power on) 2231 * this is true for ATA or ATAPI. Until we've set a 2232 * suitable controller mode we should not touch the 2233 * bus as we may be talking too fast. 2234 */ 2235 dev->pio_mode = XFER_PIO_0; 2236 2237 /* If the controller has a pio mode setup function 2238 * then use it to set the chipset to rights. Don't 2239 * touch the DMA setup as that will be dealt with when 2240 * configuring devices. 2241 */ 2242 if (ap->ops->set_piomode) 2243 ap->ops->set_piomode(ap, dev); 2244 } 2245 2246 /* prefer hardreset */ 2247 reset = NULL; 2248 ehc->i.action &= ~ATA_EH_RESET; 2249 if (hardreset) { 2250 reset = hardreset; 2251 ehc->i.action = ATA_EH_HARDRESET; 2252 } else if (softreset) { 2253 reset = softreset; 2254 ehc->i.action = ATA_EH_SOFTRESET; 2255 } 2256 2257 if (prereset) { 2258 rc = prereset(link, 2259 ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT)); 2260 if (rc) { 2261 if (rc == -ENOENT) { 2262 ata_link_printk(link, KERN_DEBUG, 2263 "port disabled. ignoring.\n"); 2264 ehc->i.action &= ~ATA_EH_RESET; 2265 2266 ata_link_for_each_dev(dev, link) 2267 classes[dev->devno] = ATA_DEV_NONE; 2268 2269 rc = 0; 2270 } else 2271 ata_link_printk(link, KERN_ERR, 2272 "prereset failed (errno=%d)\n", rc); 2273 goto out; 2274 } 2275 2276 /* prereset() might have cleared ATA_EH_RESET. If so, 2277 * bang classes and return. 2278 */ 2279 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2280 ata_link_for_each_dev(dev, link) 2281 classes[dev->devno] = ATA_DEV_NONE; 2282 rc = 0; 2283 goto out; 2284 } 2285 } 2286 2287 retry: 2288 /* 2289 * Perform reset 2290 */ 2291 ehc->last_reset = jiffies; 2292 if (ata_is_host_link(link)) 2293 ata_eh_freeze_port(ap); 2294 2295 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2296 2297 if (reset) { 2298 if (verbose) 2299 ata_link_printk(link, KERN_INFO, "%s resetting link\n", 2300 reset == softreset ? "soft" : "hard"); 2301 2302 /* mark that this EH session started with reset */ 2303 if (reset == hardreset) 2304 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2305 else 2306 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2307 2308 rc = ata_do_reset(link, reset, classes, deadline); 2309 2310 if (reset == hardreset && 2311 ata_eh_followup_srst_needed(link, rc, classify, classes)) { 2312 /* okay, let's do follow-up softreset */ 2313 reset = softreset; 2314 2315 if (!reset) { 2316 ata_link_printk(link, KERN_ERR, 2317 "follow-up softreset required " 2318 "but no softreset avaliable\n"); 2319 rc = -EINVAL; 2320 goto fail; 2321 } 2322 2323 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2324 rc = ata_do_reset(link, reset, classes, deadline); 2325 } 2326 2327 /* -EAGAIN can happen if we skipped followup SRST */ 2328 if (rc && rc != -EAGAIN) 2329 goto fail; 2330 } else { 2331 if (verbose) 2332 ata_link_printk(link, KERN_INFO, "no reset method " 2333 "available, skipping reset\n"); 2334 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2335 lflags |= ATA_LFLAG_ASSUME_ATA; 2336 } 2337 2338 /* 2339 * Post-reset processing 2340 */ 2341 ata_link_for_each_dev(dev, link) { 2342 /* After the reset, the device state is PIO 0 and the 2343 * controller state is undefined. Reset also wakes up 2344 * drives from sleeping mode. 2345 */ 2346 dev->pio_mode = XFER_PIO_0; 2347 dev->flags &= ~ATA_DFLAG_SLEEPING; 2348 2349 if (ata_link_offline(link)) 2350 continue; 2351 2352 /* apply class override */ 2353 if (lflags & ATA_LFLAG_ASSUME_ATA) 2354 classes[dev->devno] = ATA_DEV_ATA; 2355 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2356 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; /* not yet */ 2357 } 2358 2359 /* record current link speed */ 2360 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2361 link->sata_spd = (sstatus >> 4) & 0xf; 2362 2363 /* thaw the port */ 2364 if (ata_is_host_link(link)) 2365 ata_eh_thaw_port(ap); 2366 2367 /* postreset() should clear hardware SError. Although SError 2368 * is cleared during link resume, clearing SError here is 2369 * necessary as some PHYs raise hotplug events after SRST. 2370 * This introduces race condition where hotplug occurs between 2371 * reset and here. This race is mediated by cross checking 2372 * link onlineness and classification result later. 2373 */ 2374 if (postreset) 2375 postreset(link, classes); 2376 2377 /* clear cached SError */ 2378 spin_lock_irqsave(link->ap->lock, flags); 2379 link->eh_info.serror = 0; 2380 spin_unlock_irqrestore(link->ap->lock, flags); 2381 2382 /* Make sure onlineness and classification result correspond. 2383 * Hotplug could have happened during reset and some 2384 * controllers fail to wait while a drive is spinning up after 2385 * being hotplugged causing misdetection. By cross checking 2386 * link onlineness and classification result, those conditions 2387 * can be reliably detected and retried. 2388 */ 2389 nr_known = 0; 2390 ata_link_for_each_dev(dev, link) { 2391 /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */ 2392 if (classes[dev->devno] == ATA_DEV_UNKNOWN) 2393 classes[dev->devno] = ATA_DEV_NONE; 2394 else 2395 nr_known++; 2396 } 2397 2398 if (classify && !nr_known && ata_link_online(link)) { 2399 if (try < max_tries) { 2400 ata_link_printk(link, KERN_WARNING, "link online but " 2401 "device misclassified, retrying\n"); 2402 rc = -EAGAIN; 2403 goto fail; 2404 } 2405 ata_link_printk(link, KERN_WARNING, 2406 "link online but device misclassified, " 2407 "device detection might fail\n"); 2408 } 2409 2410 /* reset successful, schedule revalidation */ 2411 ata_eh_done(link, NULL, ATA_EH_RESET); 2412 ehc->last_reset = jiffies; 2413 ehc->i.action |= ATA_EH_REVALIDATE; 2414 2415 rc = 0; 2416 out: 2417 /* clear hotplug flag */ 2418 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2419 2420 spin_lock_irqsave(ap->lock, flags); 2421 ap->pflags &= ~ATA_PFLAG_RESETTING; 2422 spin_unlock_irqrestore(ap->lock, flags); 2423 2424 return rc; 2425 2426 fail: 2427 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2428 if (!ata_is_host_link(link) && 2429 sata_scr_read(link, SCR_STATUS, &sstatus)) 2430 rc = -ERESTART; 2431 2432 if (rc == -ERESTART || try >= max_tries) 2433 goto out; 2434 2435 now = jiffies; 2436 if (time_before(now, deadline)) { 2437 unsigned long delta = deadline - now; 2438 2439 ata_link_printk(link, KERN_WARNING, 2440 "reset failed (errno=%d), retrying in %u secs\n", 2441 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2442 2443 while (delta) 2444 delta = schedule_timeout_uninterruptible(delta); 2445 } 2446 2447 if (rc == -EPIPE || try == max_tries - 1) 2448 sata_down_spd_limit(link); 2449 if (hardreset) 2450 reset = hardreset; 2451 goto retry; 2452 } 2453 2454 static int ata_eh_revalidate_and_attach(struct ata_link *link, 2455 struct ata_device **r_failed_dev) 2456 { 2457 struct ata_port *ap = link->ap; 2458 struct ata_eh_context *ehc = &link->eh_context; 2459 struct ata_device *dev; 2460 unsigned int new_mask = 0; 2461 unsigned long flags; 2462 int rc = 0; 2463 2464 DPRINTK("ENTER\n"); 2465 2466 /* For PATA drive side cable detection to work, IDENTIFY must 2467 * be done backwards such that PDIAG- is released by the slave 2468 * device before the master device is identified. 2469 */ 2470 ata_link_for_each_dev_reverse(dev, link) { 2471 unsigned int action = ata_eh_dev_action(dev); 2472 unsigned int readid_flags = 0; 2473 2474 if (ehc->i.flags & ATA_EHI_DID_RESET) 2475 readid_flags |= ATA_READID_POSTRESET; 2476 2477 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 2478 WARN_ON(dev->class == ATA_DEV_PMP); 2479 2480 if (ata_link_offline(link)) { 2481 rc = -EIO; 2482 goto err; 2483 } 2484 2485 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 2486 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 2487 readid_flags); 2488 if (rc) 2489 goto err; 2490 2491 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 2492 2493 /* Configuration may have changed, reconfigure 2494 * transfer mode. 2495 */ 2496 ehc->i.flags |= ATA_EHI_SETMODE; 2497 2498 /* schedule the scsi_rescan_device() here */ 2499 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 2500 } else if (dev->class == ATA_DEV_UNKNOWN && 2501 ehc->tries[dev->devno] && 2502 ata_class_enabled(ehc->classes[dev->devno])) { 2503 dev->class = ehc->classes[dev->devno]; 2504 2505 if (dev->class == ATA_DEV_PMP) 2506 rc = sata_pmp_attach(dev); 2507 else 2508 rc = ata_dev_read_id(dev, &dev->class, 2509 readid_flags, dev->id); 2510 switch (rc) { 2511 case 0: 2512 new_mask |= 1 << dev->devno; 2513 break; 2514 case -ENOENT: 2515 /* IDENTIFY was issued to non-existent 2516 * device. No need to reset. Just 2517 * thaw and kill the device. 2518 */ 2519 ata_eh_thaw_port(ap); 2520 dev->class = ATA_DEV_UNKNOWN; 2521 break; 2522 default: 2523 dev->class = ATA_DEV_UNKNOWN; 2524 goto err; 2525 } 2526 } 2527 } 2528 2529 /* PDIAG- should have been released, ask cable type if post-reset */ 2530 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 2531 if (ap->ops->cable_detect) 2532 ap->cbl = ap->ops->cable_detect(ap); 2533 ata_force_cbl(ap); 2534 } 2535 2536 /* Configure new devices forward such that user doesn't see 2537 * device detection messages backwards. 2538 */ 2539 ata_link_for_each_dev(dev, link) { 2540 if (!(new_mask & (1 << dev->devno)) || 2541 dev->class == ATA_DEV_PMP) 2542 continue; 2543 2544 ehc->i.flags |= ATA_EHI_PRINTINFO; 2545 rc = ata_dev_configure(dev); 2546 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 2547 if (rc) 2548 goto err; 2549 2550 spin_lock_irqsave(ap->lock, flags); 2551 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 2552 spin_unlock_irqrestore(ap->lock, flags); 2553 2554 /* new device discovered, configure xfermode */ 2555 ehc->i.flags |= ATA_EHI_SETMODE; 2556 } 2557 2558 return 0; 2559 2560 err: 2561 *r_failed_dev = dev; 2562 DPRINTK("EXIT rc=%d\n", rc); 2563 return rc; 2564 } 2565 2566 /** 2567 * ata_set_mode - Program timings and issue SET FEATURES - XFER 2568 * @link: link on which timings will be programmed 2569 * @r_failed_dev: out paramter for failed device 2570 * 2571 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 2572 * ata_set_mode() fails, pointer to the failing device is 2573 * returned in @r_failed_dev. 2574 * 2575 * LOCKING: 2576 * PCI/etc. bus probe sem. 2577 * 2578 * RETURNS: 2579 * 0 on success, negative errno otherwise 2580 */ 2581 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 2582 { 2583 struct ata_port *ap = link->ap; 2584 struct ata_device *dev; 2585 int rc; 2586 2587 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 2588 ata_link_for_each_dev(dev, link) { 2589 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 2590 struct ata_ering_entry *ent; 2591 2592 ent = ata_ering_top(&dev->ering); 2593 if (ent) 2594 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 2595 } 2596 } 2597 2598 /* has private set_mode? */ 2599 if (ap->ops->set_mode) 2600 rc = ap->ops->set_mode(link, r_failed_dev); 2601 else 2602 rc = ata_do_set_mode(link, r_failed_dev); 2603 2604 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 2605 ata_link_for_each_dev(dev, link) { 2606 struct ata_eh_context *ehc = &link->eh_context; 2607 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 2608 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 2609 2610 if (dev->xfer_mode != saved_xfer_mode || 2611 ata_ncq_enabled(dev) != saved_ncq) 2612 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 2613 } 2614 2615 return rc; 2616 } 2617 2618 static int ata_link_nr_enabled(struct ata_link *link) 2619 { 2620 struct ata_device *dev; 2621 int cnt = 0; 2622 2623 ata_link_for_each_dev(dev, link) 2624 if (ata_dev_enabled(dev)) 2625 cnt++; 2626 return cnt; 2627 } 2628 2629 static int ata_link_nr_vacant(struct ata_link *link) 2630 { 2631 struct ata_device *dev; 2632 int cnt = 0; 2633 2634 ata_link_for_each_dev(dev, link) 2635 if (dev->class == ATA_DEV_UNKNOWN) 2636 cnt++; 2637 return cnt; 2638 } 2639 2640 static int ata_eh_skip_recovery(struct ata_link *link) 2641 { 2642 struct ata_port *ap = link->ap; 2643 struct ata_eh_context *ehc = &link->eh_context; 2644 struct ata_device *dev; 2645 2646 /* skip disabled links */ 2647 if (link->flags & ATA_LFLAG_DISABLED) 2648 return 1; 2649 2650 /* thaw frozen port and recover failed devices */ 2651 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 2652 return 0; 2653 2654 /* reset at least once if reset is requested */ 2655 if ((ehc->i.action & ATA_EH_RESET) && 2656 !(ehc->i.flags & ATA_EHI_DID_RESET)) 2657 return 0; 2658 2659 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2660 ata_link_for_each_dev(dev, link) { 2661 if (dev->class == ATA_DEV_UNKNOWN && 2662 ehc->classes[dev->devno] != ATA_DEV_NONE) 2663 return 0; 2664 } 2665 2666 return 1; 2667 } 2668 2669 static int ata_eh_schedule_probe(struct ata_device *dev) 2670 { 2671 struct ata_eh_context *ehc = &dev->link->eh_context; 2672 2673 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 2674 (ehc->did_probe_mask & (1 << dev->devno))) 2675 return 0; 2676 2677 ata_eh_detach_dev(dev); 2678 ata_dev_init(dev); 2679 ehc->did_probe_mask |= (1 << dev->devno); 2680 ehc->i.action |= ATA_EH_RESET; 2681 ehc->saved_xfer_mode[dev->devno] = 0; 2682 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 2683 2684 return 1; 2685 } 2686 2687 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 2688 { 2689 struct ata_eh_context *ehc = &dev->link->eh_context; 2690 2691 ehc->tries[dev->devno]--; 2692 2693 switch (err) { 2694 case -ENODEV: 2695 /* device missing or wrong IDENTIFY data, schedule probing */ 2696 ehc->i.probe_mask |= (1 << dev->devno); 2697 case -EINVAL: 2698 /* give it just one more chance */ 2699 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2700 case -EIO: 2701 if (ehc->tries[dev->devno] == 1 && dev->pio_mode > XFER_PIO_0) { 2702 /* This is the last chance, better to slow 2703 * down than lose it. 2704 */ 2705 sata_down_spd_limit(dev->link); 2706 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2707 } 2708 } 2709 2710 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2711 /* disable device if it has used up all its chances */ 2712 ata_dev_disable(dev); 2713 2714 /* detach if offline */ 2715 if (ata_link_offline(dev->link)) 2716 ata_eh_detach_dev(dev); 2717 2718 /* schedule probe if necessary */ 2719 if (ata_eh_schedule_probe(dev)) { 2720 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2721 memset(ehc->cmd_timeout_idx[dev->devno], 0, 2722 sizeof(ehc->cmd_timeout_idx[dev->devno])); 2723 } 2724 2725 return 1; 2726 } else { 2727 ehc->i.action |= ATA_EH_RESET; 2728 return 0; 2729 } 2730 } 2731 2732 /** 2733 * ata_eh_recover - recover host port after error 2734 * @ap: host port to recover 2735 * @prereset: prereset method (can be NULL) 2736 * @softreset: softreset method (can be NULL) 2737 * @hardreset: hardreset method (can be NULL) 2738 * @postreset: postreset method (can be NULL) 2739 * @r_failed_link: out parameter for failed link 2740 * 2741 * This is the alpha and omega, eum and yang, heart and soul of 2742 * libata exception handling. On entry, actions required to 2743 * recover each link and hotplug requests are recorded in the 2744 * link's eh_context. This function executes all the operations 2745 * with appropriate retrials and fallbacks to resurrect failed 2746 * devices, detach goners and greet newcomers. 2747 * 2748 * LOCKING: 2749 * Kernel thread context (may sleep). 2750 * 2751 * RETURNS: 2752 * 0 on success, -errno on failure. 2753 */ 2754 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 2755 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2756 ata_postreset_fn_t postreset, 2757 struct ata_link **r_failed_link) 2758 { 2759 struct ata_link *link; 2760 struct ata_device *dev; 2761 int nr_failed_devs; 2762 int rc; 2763 unsigned long flags; 2764 2765 DPRINTK("ENTER\n"); 2766 2767 /* prep for recovery */ 2768 ata_port_for_each_link(link, ap) { 2769 struct ata_eh_context *ehc = &link->eh_context; 2770 2771 /* re-enable link? */ 2772 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 2773 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 2774 spin_lock_irqsave(ap->lock, flags); 2775 link->flags &= ~ATA_LFLAG_DISABLED; 2776 spin_unlock_irqrestore(ap->lock, flags); 2777 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 2778 } 2779 2780 ata_link_for_each_dev(dev, link) { 2781 if (link->flags & ATA_LFLAG_NO_RETRY) 2782 ehc->tries[dev->devno] = 1; 2783 else 2784 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2785 2786 /* collect port action mask recorded in dev actions */ 2787 ehc->i.action |= ehc->i.dev_action[dev->devno] & 2788 ~ATA_EH_PERDEV_MASK; 2789 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 2790 2791 /* process hotplug request */ 2792 if (dev->flags & ATA_DFLAG_DETACH) 2793 ata_eh_detach_dev(dev); 2794 2795 /* schedule probe if necessary */ 2796 if (!ata_dev_enabled(dev)) 2797 ata_eh_schedule_probe(dev); 2798 } 2799 } 2800 2801 retry: 2802 rc = 0; 2803 nr_failed_devs = 0; 2804 2805 /* if UNLOADING, finish immediately */ 2806 if (ap->pflags & ATA_PFLAG_UNLOADING) 2807 goto out; 2808 2809 /* prep for EH */ 2810 ata_port_for_each_link(link, ap) { 2811 struct ata_eh_context *ehc = &link->eh_context; 2812 2813 /* skip EH if possible. */ 2814 if (ata_eh_skip_recovery(link)) 2815 ehc->i.action = 0; 2816 2817 ata_link_for_each_dev(dev, link) 2818 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 2819 } 2820 2821 /* reset */ 2822 ata_port_for_each_link(link, ap) { 2823 struct ata_eh_context *ehc = &link->eh_context; 2824 2825 if (!(ehc->i.action & ATA_EH_RESET)) 2826 continue; 2827 2828 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 2829 prereset, softreset, hardreset, postreset); 2830 if (rc) { 2831 ata_link_printk(link, KERN_ERR, 2832 "reset failed, giving up\n"); 2833 goto out; 2834 } 2835 } 2836 2837 /* the rest */ 2838 ata_port_for_each_link(link, ap) { 2839 struct ata_eh_context *ehc = &link->eh_context; 2840 2841 /* revalidate existing devices and attach new ones */ 2842 rc = ata_eh_revalidate_and_attach(link, &dev); 2843 if (rc) 2844 goto dev_fail; 2845 2846 /* if PMP got attached, return, pmp EH will take care of it */ 2847 if (link->device->class == ATA_DEV_PMP) { 2848 ehc->i.action = 0; 2849 return 0; 2850 } 2851 2852 /* configure transfer mode if necessary */ 2853 if (ehc->i.flags & ATA_EHI_SETMODE) { 2854 rc = ata_set_mode(link, &dev); 2855 if (rc) 2856 goto dev_fail; 2857 ehc->i.flags &= ~ATA_EHI_SETMODE; 2858 } 2859 2860 if (ehc->i.action & ATA_EH_LPM) 2861 ata_link_for_each_dev(dev, link) 2862 ata_dev_enable_pm(dev, ap->pm_policy); 2863 2864 /* this link is okay now */ 2865 ehc->i.flags = 0; 2866 continue; 2867 2868 dev_fail: 2869 nr_failed_devs++; 2870 ata_eh_handle_dev_fail(dev, rc); 2871 2872 if (ap->pflags & ATA_PFLAG_FROZEN) { 2873 /* PMP reset requires working host port. 2874 * Can't retry if it's frozen. 2875 */ 2876 if (sata_pmp_attached(ap)) 2877 goto out; 2878 break; 2879 } 2880 } 2881 2882 if (nr_failed_devs) 2883 goto retry; 2884 2885 out: 2886 if (rc && r_failed_link) 2887 *r_failed_link = link; 2888 2889 DPRINTK("EXIT, rc=%d\n", rc); 2890 return rc; 2891 } 2892 2893 /** 2894 * ata_eh_finish - finish up EH 2895 * @ap: host port to finish EH for 2896 * 2897 * Recovery is complete. Clean up EH states and retry or finish 2898 * failed qcs. 2899 * 2900 * LOCKING: 2901 * None. 2902 */ 2903 void ata_eh_finish(struct ata_port *ap) 2904 { 2905 int tag; 2906 2907 /* retry or finish qcs */ 2908 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2909 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2910 2911 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2912 continue; 2913 2914 if (qc->err_mask) { 2915 /* FIXME: Once EH migration is complete, 2916 * generate sense data in this function, 2917 * considering both err_mask and tf. 2918 */ 2919 if (qc->flags & ATA_QCFLAG_RETRY) 2920 ata_eh_qc_retry(qc); 2921 else 2922 ata_eh_qc_complete(qc); 2923 } else { 2924 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2925 ata_eh_qc_complete(qc); 2926 } else { 2927 /* feed zero TF to sense generation */ 2928 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2929 ata_eh_qc_retry(qc); 2930 } 2931 } 2932 } 2933 2934 /* make sure nr_active_links is zero after EH */ 2935 WARN_ON(ap->nr_active_links); 2936 ap->nr_active_links = 0; 2937 } 2938 2939 /** 2940 * ata_do_eh - do standard error handling 2941 * @ap: host port to handle error for 2942 * 2943 * @prereset: prereset method (can be NULL) 2944 * @softreset: softreset method (can be NULL) 2945 * @hardreset: hardreset method (can be NULL) 2946 * @postreset: postreset method (can be NULL) 2947 * 2948 * Perform standard error handling sequence. 2949 * 2950 * LOCKING: 2951 * Kernel thread context (may sleep). 2952 */ 2953 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2954 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2955 ata_postreset_fn_t postreset) 2956 { 2957 struct ata_device *dev; 2958 int rc; 2959 2960 ata_eh_autopsy(ap); 2961 ata_eh_report(ap); 2962 2963 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 2964 NULL); 2965 if (rc) { 2966 ata_link_for_each_dev(dev, &ap->link) 2967 ata_dev_disable(dev); 2968 } 2969 2970 ata_eh_finish(ap); 2971 } 2972 2973 /** 2974 * ata_std_error_handler - standard error handler 2975 * @ap: host port to handle error for 2976 * 2977 * Standard error handler 2978 * 2979 * LOCKING: 2980 * Kernel thread context (may sleep). 2981 */ 2982 void ata_std_error_handler(struct ata_port *ap) 2983 { 2984 struct ata_port_operations *ops = ap->ops; 2985 ata_reset_fn_t hardreset = ops->hardreset; 2986 2987 /* ignore built-in hardreset if SCR access is not available */ 2988 if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link)) 2989 hardreset = NULL; 2990 2991 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 2992 } 2993 2994 #ifdef CONFIG_PM 2995 /** 2996 * ata_eh_handle_port_suspend - perform port suspend operation 2997 * @ap: port to suspend 2998 * 2999 * Suspend @ap. 3000 * 3001 * LOCKING: 3002 * Kernel thread context (may sleep). 3003 */ 3004 static void ata_eh_handle_port_suspend(struct ata_port *ap) 3005 { 3006 unsigned long flags; 3007 int rc = 0; 3008 3009 /* are we suspending? */ 3010 spin_lock_irqsave(ap->lock, flags); 3011 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3012 ap->pm_mesg.event == PM_EVENT_ON) { 3013 spin_unlock_irqrestore(ap->lock, flags); 3014 return; 3015 } 3016 spin_unlock_irqrestore(ap->lock, flags); 3017 3018 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 3019 3020 /* tell ACPI we're suspending */ 3021 rc = ata_acpi_on_suspend(ap); 3022 if (rc) 3023 goto out; 3024 3025 /* suspend */ 3026 ata_eh_freeze_port(ap); 3027 3028 if (ap->ops->port_suspend) 3029 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 3030 3031 ata_acpi_set_state(ap, PMSG_SUSPEND); 3032 out: 3033 /* report result */ 3034 spin_lock_irqsave(ap->lock, flags); 3035 3036 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 3037 if (rc == 0) 3038 ap->pflags |= ATA_PFLAG_SUSPENDED; 3039 else if (ap->pflags & ATA_PFLAG_FROZEN) 3040 ata_port_schedule_eh(ap); 3041 3042 if (ap->pm_result) { 3043 *ap->pm_result = rc; 3044 ap->pm_result = NULL; 3045 } 3046 3047 spin_unlock_irqrestore(ap->lock, flags); 3048 3049 return; 3050 } 3051 3052 /** 3053 * ata_eh_handle_port_resume - perform port resume operation 3054 * @ap: port to resume 3055 * 3056 * Resume @ap. 3057 * 3058 * LOCKING: 3059 * Kernel thread context (may sleep). 3060 */ 3061 static void ata_eh_handle_port_resume(struct ata_port *ap) 3062 { 3063 unsigned long flags; 3064 int rc = 0; 3065 3066 /* are we resuming? */ 3067 spin_lock_irqsave(ap->lock, flags); 3068 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 3069 ap->pm_mesg.event != PM_EVENT_ON) { 3070 spin_unlock_irqrestore(ap->lock, flags); 3071 return; 3072 } 3073 spin_unlock_irqrestore(ap->lock, flags); 3074 3075 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 3076 3077 ata_acpi_set_state(ap, PMSG_ON); 3078 3079 if (ap->ops->port_resume) 3080 rc = ap->ops->port_resume(ap); 3081 3082 /* tell ACPI that we're resuming */ 3083 ata_acpi_on_resume(ap); 3084 3085 /* report result */ 3086 spin_lock_irqsave(ap->lock, flags); 3087 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 3088 if (ap->pm_result) { 3089 *ap->pm_result = rc; 3090 ap->pm_result = NULL; 3091 } 3092 spin_unlock_irqrestore(ap->lock, flags); 3093 } 3094 #endif /* CONFIG_PM */ 3095