1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/pci.h> 37 #include <scsi/scsi.h> 38 #include <scsi/scsi_host.h> 39 #include <scsi/scsi_eh.h> 40 #include <scsi/scsi_device.h> 41 #include <scsi/scsi_cmnd.h> 42 #include "../scsi/scsi_transport_api.h" 43 44 #include <linux/libata.h> 45 46 #include "libata.h" 47 48 enum { 49 /* speed down verdicts */ 50 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 51 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 52 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 53 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 54 55 /* error flags */ 56 ATA_EFLAG_IS_IO = (1 << 0), 57 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 58 59 /* error categories */ 60 ATA_ECAT_NONE = 0, 61 ATA_ECAT_ATA_BUS = 1, 62 ATA_ECAT_TOUT_HSM = 2, 63 ATA_ECAT_UNK_DEV = 3, 64 ATA_ECAT_DUBIOUS_NONE = 4, 65 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 66 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 67 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 68 ATA_ECAT_NR = 8, 69 70 /* Waiting in ->prereset can never be reliable. It's 71 * sometimes nice to wait there but it can't be depended upon; 72 * otherwise, we wouldn't be resetting. Just give it enough 73 * time for most drives to spin up. 74 */ 75 ATA_EH_PRERESET_TIMEOUT = 10000, 76 ATA_EH_FASTDRAIN_INTERVAL = 3000, 77 }; 78 79 /* The following table determines how we sequence resets. Each entry 80 * represents timeout for that try. The first try can be soft or 81 * hardreset. All others are hardreset if available. In most cases 82 * the first reset w/ 10sec timeout should succeed. Following entries 83 * are mostly for error handling, hotplug and retarded devices. 84 */ 85 static const unsigned long ata_eh_reset_timeouts[] = { 86 10000, /* most drives spin up by 10sec */ 87 10000, /* > 99% working drives spin up before 20sec */ 88 35000, /* give > 30 secs of idleness for retarded devices */ 89 5000, /* and sweet one last chance */ 90 /* > 1 min has elapsed, give up */ 91 }; 92 93 static void __ata_port_freeze(struct ata_port *ap); 94 #ifdef CONFIG_PM 95 static void ata_eh_handle_port_suspend(struct ata_port *ap); 96 static void ata_eh_handle_port_resume(struct ata_port *ap); 97 #else /* CONFIG_PM */ 98 static void ata_eh_handle_port_suspend(struct ata_port *ap) 99 { } 100 101 static void ata_eh_handle_port_resume(struct ata_port *ap) 102 { } 103 #endif /* CONFIG_PM */ 104 105 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, 106 va_list args) 107 { 108 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 109 ATA_EH_DESC_LEN - ehi->desc_len, 110 fmt, args); 111 } 112 113 /** 114 * __ata_ehi_push_desc - push error description without adding separator 115 * @ehi: target EHI 116 * @fmt: printf format string 117 * 118 * Format string according to @fmt and append it to @ehi->desc. 119 * 120 * LOCKING: 121 * spin_lock_irqsave(host lock) 122 */ 123 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 124 { 125 va_list args; 126 127 va_start(args, fmt); 128 __ata_ehi_pushv_desc(ehi, fmt, args); 129 va_end(args); 130 } 131 132 /** 133 * ata_ehi_push_desc - push error description with separator 134 * @ehi: target EHI 135 * @fmt: printf format string 136 * 137 * Format string according to @fmt and append it to @ehi->desc. 138 * If @ehi->desc is not empty, ", " is added in-between. 139 * 140 * LOCKING: 141 * spin_lock_irqsave(host lock) 142 */ 143 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 144 { 145 va_list args; 146 147 if (ehi->desc_len) 148 __ata_ehi_push_desc(ehi, ", "); 149 150 va_start(args, fmt); 151 __ata_ehi_pushv_desc(ehi, fmt, args); 152 va_end(args); 153 } 154 155 /** 156 * ata_ehi_clear_desc - clean error description 157 * @ehi: target EHI 158 * 159 * Clear @ehi->desc. 160 * 161 * LOCKING: 162 * spin_lock_irqsave(host lock) 163 */ 164 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 165 { 166 ehi->desc[0] = '\0'; 167 ehi->desc_len = 0; 168 } 169 170 /** 171 * ata_port_desc - append port description 172 * @ap: target ATA port 173 * @fmt: printf format string 174 * 175 * Format string according to @fmt and append it to port 176 * description. If port description is not empty, " " is added 177 * in-between. This function is to be used while initializing 178 * ata_host. The description is printed on host registration. 179 * 180 * LOCKING: 181 * None. 182 */ 183 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 184 { 185 va_list args; 186 187 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 188 189 if (ap->link.eh_info.desc_len) 190 __ata_ehi_push_desc(&ap->link.eh_info, " "); 191 192 va_start(args, fmt); 193 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 194 va_end(args); 195 } 196 197 #ifdef CONFIG_PCI 198 199 /** 200 * ata_port_pbar_desc - append PCI BAR description 201 * @ap: target ATA port 202 * @bar: target PCI BAR 203 * @offset: offset into PCI BAR 204 * @name: name of the area 205 * 206 * If @offset is negative, this function formats a string which 207 * contains the name, address, size and type of the BAR and 208 * appends it to the port description. If @offset is zero or 209 * positive, only name and offsetted address is appended. 210 * 211 * LOCKING: 212 * None. 213 */ 214 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 215 const char *name) 216 { 217 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 218 char *type = ""; 219 unsigned long long start, len; 220 221 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 222 type = "m"; 223 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 224 type = "i"; 225 226 start = (unsigned long long)pci_resource_start(pdev, bar); 227 len = (unsigned long long)pci_resource_len(pdev, bar); 228 229 if (offset < 0) 230 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 231 else 232 ata_port_desc(ap, "%s 0x%llx", name, 233 start + (unsigned long long)offset); 234 } 235 236 #endif /* CONFIG_PCI */ 237 238 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 239 unsigned int err_mask) 240 { 241 struct ata_ering_entry *ent; 242 243 WARN_ON(!err_mask); 244 245 ering->cursor++; 246 ering->cursor %= ATA_ERING_SIZE; 247 248 ent = &ering->ring[ering->cursor]; 249 ent->eflags = eflags; 250 ent->err_mask = err_mask; 251 ent->timestamp = get_jiffies_64(); 252 } 253 254 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 255 { 256 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 257 258 if (ent->err_mask) 259 return ent; 260 return NULL; 261 } 262 263 static void ata_ering_clear(struct ata_ering *ering) 264 { 265 memset(ering, 0, sizeof(*ering)); 266 } 267 268 static int ata_ering_map(struct ata_ering *ering, 269 int (*map_fn)(struct ata_ering_entry *, void *), 270 void *arg) 271 { 272 int idx, rc = 0; 273 struct ata_ering_entry *ent; 274 275 idx = ering->cursor; 276 do { 277 ent = &ering->ring[idx]; 278 if (!ent->err_mask) 279 break; 280 rc = map_fn(ent, arg); 281 if (rc) 282 break; 283 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 284 } while (idx != ering->cursor); 285 286 return rc; 287 } 288 289 static unsigned int ata_eh_dev_action(struct ata_device *dev) 290 { 291 struct ata_eh_context *ehc = &dev->link->eh_context; 292 293 return ehc->i.action | ehc->i.dev_action[dev->devno]; 294 } 295 296 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 297 struct ata_eh_info *ehi, unsigned int action) 298 { 299 struct ata_device *tdev; 300 301 if (!dev) { 302 ehi->action &= ~action; 303 ata_link_for_each_dev(tdev, link) 304 ehi->dev_action[tdev->devno] &= ~action; 305 } else { 306 /* doesn't make sense for port-wide EH actions */ 307 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 308 309 /* break ehi->action into ehi->dev_action */ 310 if (ehi->action & action) { 311 ata_link_for_each_dev(tdev, link) 312 ehi->dev_action[tdev->devno] |= 313 ehi->action & action; 314 ehi->action &= ~action; 315 } 316 317 /* turn off the specified per-dev action */ 318 ehi->dev_action[dev->devno] &= ~action; 319 } 320 } 321 322 /** 323 * ata_scsi_timed_out - SCSI layer time out callback 324 * @cmd: timed out SCSI command 325 * 326 * Handles SCSI layer timeout. We race with normal completion of 327 * the qc for @cmd. If the qc is already gone, we lose and let 328 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 329 * timed out and EH should be invoked. Prevent ata_qc_complete() 330 * from finishing it by setting EH_SCHEDULED and return 331 * EH_NOT_HANDLED. 332 * 333 * TODO: kill this function once old EH is gone. 334 * 335 * LOCKING: 336 * Called from timer context 337 * 338 * RETURNS: 339 * EH_HANDLED or EH_NOT_HANDLED 340 */ 341 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 342 { 343 struct Scsi_Host *host = cmd->device->host; 344 struct ata_port *ap = ata_shost_to_port(host); 345 unsigned long flags; 346 struct ata_queued_cmd *qc; 347 enum scsi_eh_timer_return ret; 348 349 DPRINTK("ENTER\n"); 350 351 if (ap->ops->error_handler) { 352 ret = EH_NOT_HANDLED; 353 goto out; 354 } 355 356 ret = EH_HANDLED; 357 spin_lock_irqsave(ap->lock, flags); 358 qc = ata_qc_from_tag(ap, ap->link.active_tag); 359 if (qc) { 360 WARN_ON(qc->scsicmd != cmd); 361 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 362 qc->err_mask |= AC_ERR_TIMEOUT; 363 ret = EH_NOT_HANDLED; 364 } 365 spin_unlock_irqrestore(ap->lock, flags); 366 367 out: 368 DPRINTK("EXIT, ret=%d\n", ret); 369 return ret; 370 } 371 372 /** 373 * ata_scsi_error - SCSI layer error handler callback 374 * @host: SCSI host on which error occurred 375 * 376 * Handles SCSI-layer-thrown error events. 377 * 378 * LOCKING: 379 * Inherited from SCSI layer (none, can sleep) 380 * 381 * RETURNS: 382 * Zero. 383 */ 384 void ata_scsi_error(struct Scsi_Host *host) 385 { 386 struct ata_port *ap = ata_shost_to_port(host); 387 int i; 388 unsigned long flags; 389 390 DPRINTK("ENTER\n"); 391 392 /* synchronize with port task */ 393 ata_port_flush_task(ap); 394 395 /* synchronize with host lock and sort out timeouts */ 396 397 /* For new EH, all qcs are finished in one of three ways - 398 * normal completion, error completion, and SCSI timeout. 399 * Both cmpletions can race against SCSI timeout. When normal 400 * completion wins, the qc never reaches EH. When error 401 * completion wins, the qc has ATA_QCFLAG_FAILED set. 402 * 403 * When SCSI timeout wins, things are a bit more complex. 404 * Normal or error completion can occur after the timeout but 405 * before this point. In such cases, both types of 406 * completions are honored. A scmd is determined to have 407 * timed out iff its associated qc is active and not failed. 408 */ 409 if (ap->ops->error_handler) { 410 struct scsi_cmnd *scmd, *tmp; 411 int nr_timedout = 0; 412 413 spin_lock_irqsave(ap->lock, flags); 414 415 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 416 struct ata_queued_cmd *qc; 417 418 for (i = 0; i < ATA_MAX_QUEUE; i++) { 419 qc = __ata_qc_from_tag(ap, i); 420 if (qc->flags & ATA_QCFLAG_ACTIVE && 421 qc->scsicmd == scmd) 422 break; 423 } 424 425 if (i < ATA_MAX_QUEUE) { 426 /* the scmd has an associated qc */ 427 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 428 /* which hasn't failed yet, timeout */ 429 qc->err_mask |= AC_ERR_TIMEOUT; 430 qc->flags |= ATA_QCFLAG_FAILED; 431 nr_timedout++; 432 } 433 } else { 434 /* Normal completion occurred after 435 * SCSI timeout but before this point. 436 * Successfully complete it. 437 */ 438 scmd->retries = scmd->allowed; 439 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 440 } 441 } 442 443 /* If we have timed out qcs. They belong to EH from 444 * this point but the state of the controller is 445 * unknown. Freeze the port to make sure the IRQ 446 * handler doesn't diddle with those qcs. This must 447 * be done atomically w.r.t. setting QCFLAG_FAILED. 448 */ 449 if (nr_timedout) 450 __ata_port_freeze(ap); 451 452 spin_unlock_irqrestore(ap->lock, flags); 453 454 /* initialize eh_tries */ 455 ap->eh_tries = ATA_EH_MAX_TRIES; 456 } else 457 spin_unlock_wait(ap->lock); 458 459 repeat: 460 /* invoke error handler */ 461 if (ap->ops->error_handler) { 462 struct ata_link *link; 463 464 /* kill fast drain timer */ 465 del_timer_sync(&ap->fastdrain_timer); 466 467 /* process port resume request */ 468 ata_eh_handle_port_resume(ap); 469 470 /* fetch & clear EH info */ 471 spin_lock_irqsave(ap->lock, flags); 472 473 __ata_port_for_each_link(link, ap) { 474 struct ata_eh_context *ehc = &link->eh_context; 475 struct ata_device *dev; 476 477 memset(&link->eh_context, 0, sizeof(link->eh_context)); 478 link->eh_context.i = link->eh_info; 479 memset(&link->eh_info, 0, sizeof(link->eh_info)); 480 481 ata_link_for_each_dev(dev, link) { 482 int devno = dev->devno; 483 484 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 485 if (ata_ncq_enabled(dev)) 486 ehc->saved_ncq_enabled |= 1 << devno; 487 } 488 } 489 490 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 491 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 492 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 493 494 spin_unlock_irqrestore(ap->lock, flags); 495 496 /* invoke EH, skip if unloading or suspended */ 497 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 498 ap->ops->error_handler(ap); 499 else 500 ata_eh_finish(ap); 501 502 /* process port suspend request */ 503 ata_eh_handle_port_suspend(ap); 504 505 /* Exception might have happend after ->error_handler 506 * recovered the port but before this point. Repeat 507 * EH in such case. 508 */ 509 spin_lock_irqsave(ap->lock, flags); 510 511 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 512 if (--ap->eh_tries) { 513 spin_unlock_irqrestore(ap->lock, flags); 514 goto repeat; 515 } 516 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 517 "tries, giving up\n", ATA_EH_MAX_TRIES); 518 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 519 } 520 521 /* this run is complete, make sure EH info is clear */ 522 __ata_port_for_each_link(link, ap) 523 memset(&link->eh_info, 0, sizeof(link->eh_info)); 524 525 /* Clear host_eh_scheduled while holding ap->lock such 526 * that if exception occurs after this point but 527 * before EH completion, SCSI midlayer will 528 * re-initiate EH. 529 */ 530 host->host_eh_scheduled = 0; 531 532 spin_unlock_irqrestore(ap->lock, flags); 533 } else { 534 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 535 ap->ops->eng_timeout(ap); 536 } 537 538 /* finish or retry handled scmd's and clean up */ 539 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 540 541 scsi_eh_flush_done_q(&ap->eh_done_q); 542 543 /* clean up */ 544 spin_lock_irqsave(ap->lock, flags); 545 546 if (ap->pflags & ATA_PFLAG_LOADING) 547 ap->pflags &= ~ATA_PFLAG_LOADING; 548 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 549 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 550 551 if (ap->pflags & ATA_PFLAG_RECOVERED) 552 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 553 554 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 555 556 /* tell wait_eh that we're done */ 557 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 558 wake_up_all(&ap->eh_wait_q); 559 560 spin_unlock_irqrestore(ap->lock, flags); 561 562 DPRINTK("EXIT\n"); 563 } 564 565 /** 566 * ata_port_wait_eh - Wait for the currently pending EH to complete 567 * @ap: Port to wait EH for 568 * 569 * Wait until the currently pending EH is complete. 570 * 571 * LOCKING: 572 * Kernel thread context (may sleep). 573 */ 574 void ata_port_wait_eh(struct ata_port *ap) 575 { 576 unsigned long flags; 577 DEFINE_WAIT(wait); 578 579 retry: 580 spin_lock_irqsave(ap->lock, flags); 581 582 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 583 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 584 spin_unlock_irqrestore(ap->lock, flags); 585 schedule(); 586 spin_lock_irqsave(ap->lock, flags); 587 } 588 finish_wait(&ap->eh_wait_q, &wait); 589 590 spin_unlock_irqrestore(ap->lock, flags); 591 592 /* make sure SCSI EH is complete */ 593 if (scsi_host_in_recovery(ap->scsi_host)) { 594 msleep(10); 595 goto retry; 596 } 597 } 598 599 static int ata_eh_nr_in_flight(struct ata_port *ap) 600 { 601 unsigned int tag; 602 int nr = 0; 603 604 /* count only non-internal commands */ 605 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 606 if (ata_qc_from_tag(ap, tag)) 607 nr++; 608 609 return nr; 610 } 611 612 void ata_eh_fastdrain_timerfn(unsigned long arg) 613 { 614 struct ata_port *ap = (void *)arg; 615 unsigned long flags; 616 int cnt; 617 618 spin_lock_irqsave(ap->lock, flags); 619 620 cnt = ata_eh_nr_in_flight(ap); 621 622 /* are we done? */ 623 if (!cnt) 624 goto out_unlock; 625 626 if (cnt == ap->fastdrain_cnt) { 627 unsigned int tag; 628 629 /* No progress during the last interval, tag all 630 * in-flight qcs as timed out and freeze the port. 631 */ 632 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 633 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 634 if (qc) 635 qc->err_mask |= AC_ERR_TIMEOUT; 636 } 637 638 ata_port_freeze(ap); 639 } else { 640 /* some qcs have finished, give it another chance */ 641 ap->fastdrain_cnt = cnt; 642 ap->fastdrain_timer.expires = 643 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 644 add_timer(&ap->fastdrain_timer); 645 } 646 647 out_unlock: 648 spin_unlock_irqrestore(ap->lock, flags); 649 } 650 651 /** 652 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 653 * @ap: target ATA port 654 * @fastdrain: activate fast drain 655 * 656 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 657 * is non-zero and EH wasn't pending before. Fast drain ensures 658 * that EH kicks in in timely manner. 659 * 660 * LOCKING: 661 * spin_lock_irqsave(host lock) 662 */ 663 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 664 { 665 int cnt; 666 667 /* already scheduled? */ 668 if (ap->pflags & ATA_PFLAG_EH_PENDING) 669 return; 670 671 ap->pflags |= ATA_PFLAG_EH_PENDING; 672 673 if (!fastdrain) 674 return; 675 676 /* do we have in-flight qcs? */ 677 cnt = ata_eh_nr_in_flight(ap); 678 if (!cnt) 679 return; 680 681 /* activate fast drain */ 682 ap->fastdrain_cnt = cnt; 683 ap->fastdrain_timer.expires = 684 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 685 add_timer(&ap->fastdrain_timer); 686 } 687 688 /** 689 * ata_qc_schedule_eh - schedule qc for error handling 690 * @qc: command to schedule error handling for 691 * 692 * Schedule error handling for @qc. EH will kick in as soon as 693 * other commands are drained. 694 * 695 * LOCKING: 696 * spin_lock_irqsave(host lock) 697 */ 698 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 699 { 700 struct ata_port *ap = qc->ap; 701 702 WARN_ON(!ap->ops->error_handler); 703 704 qc->flags |= ATA_QCFLAG_FAILED; 705 ata_eh_set_pending(ap, 1); 706 707 /* The following will fail if timeout has already expired. 708 * ata_scsi_error() takes care of such scmds on EH entry. 709 * Note that ATA_QCFLAG_FAILED is unconditionally set after 710 * this function completes. 711 */ 712 scsi_req_abort_cmd(qc->scsicmd); 713 } 714 715 /** 716 * ata_port_schedule_eh - schedule error handling without a qc 717 * @ap: ATA port to schedule EH for 718 * 719 * Schedule error handling for @ap. EH will kick in as soon as 720 * all commands are drained. 721 * 722 * LOCKING: 723 * spin_lock_irqsave(host lock) 724 */ 725 void ata_port_schedule_eh(struct ata_port *ap) 726 { 727 WARN_ON(!ap->ops->error_handler); 728 729 if (ap->pflags & ATA_PFLAG_INITIALIZING) 730 return; 731 732 ata_eh_set_pending(ap, 1); 733 scsi_schedule_eh(ap->scsi_host); 734 735 DPRINTK("port EH scheduled\n"); 736 } 737 738 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 739 { 740 int tag, nr_aborted = 0; 741 742 WARN_ON(!ap->ops->error_handler); 743 744 /* we're gonna abort all commands, no need for fast drain */ 745 ata_eh_set_pending(ap, 0); 746 747 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 748 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 749 750 if (qc && (!link || qc->dev->link == link)) { 751 qc->flags |= ATA_QCFLAG_FAILED; 752 ata_qc_complete(qc); 753 nr_aborted++; 754 } 755 } 756 757 if (!nr_aborted) 758 ata_port_schedule_eh(ap); 759 760 return nr_aborted; 761 } 762 763 /** 764 * ata_link_abort - abort all qc's on the link 765 * @link: ATA link to abort qc's for 766 * 767 * Abort all active qc's active on @link and schedule EH. 768 * 769 * LOCKING: 770 * spin_lock_irqsave(host lock) 771 * 772 * RETURNS: 773 * Number of aborted qc's. 774 */ 775 int ata_link_abort(struct ata_link *link) 776 { 777 return ata_do_link_abort(link->ap, link); 778 } 779 780 /** 781 * ata_port_abort - abort all qc's on the port 782 * @ap: ATA port to abort qc's for 783 * 784 * Abort all active qc's of @ap and schedule EH. 785 * 786 * LOCKING: 787 * spin_lock_irqsave(host_set lock) 788 * 789 * RETURNS: 790 * Number of aborted qc's. 791 */ 792 int ata_port_abort(struct ata_port *ap) 793 { 794 return ata_do_link_abort(ap, NULL); 795 } 796 797 /** 798 * __ata_port_freeze - freeze port 799 * @ap: ATA port to freeze 800 * 801 * This function is called when HSM violation or some other 802 * condition disrupts normal operation of the port. Frozen port 803 * is not allowed to perform any operation until the port is 804 * thawed, which usually follows a successful reset. 805 * 806 * ap->ops->freeze() callback can be used for freezing the port 807 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 808 * port cannot be frozen hardware-wise, the interrupt handler 809 * must ack and clear interrupts unconditionally while the port 810 * is frozen. 811 * 812 * LOCKING: 813 * spin_lock_irqsave(host lock) 814 */ 815 static void __ata_port_freeze(struct ata_port *ap) 816 { 817 WARN_ON(!ap->ops->error_handler); 818 819 if (ap->ops->freeze) 820 ap->ops->freeze(ap); 821 822 ap->pflags |= ATA_PFLAG_FROZEN; 823 824 DPRINTK("ata%u port frozen\n", ap->print_id); 825 } 826 827 /** 828 * ata_port_freeze - abort & freeze port 829 * @ap: ATA port to freeze 830 * 831 * Abort and freeze @ap. 832 * 833 * LOCKING: 834 * spin_lock_irqsave(host lock) 835 * 836 * RETURNS: 837 * Number of aborted commands. 838 */ 839 int ata_port_freeze(struct ata_port *ap) 840 { 841 int nr_aborted; 842 843 WARN_ON(!ap->ops->error_handler); 844 845 nr_aborted = ata_port_abort(ap); 846 __ata_port_freeze(ap); 847 848 return nr_aborted; 849 } 850 851 /** 852 * sata_async_notification - SATA async notification handler 853 * @ap: ATA port where async notification is received 854 * 855 * Handler to be called when async notification via SDB FIS is 856 * received. This function schedules EH if necessary. 857 * 858 * LOCKING: 859 * spin_lock_irqsave(host lock) 860 * 861 * RETURNS: 862 * 1 if EH is scheduled, 0 otherwise. 863 */ 864 int sata_async_notification(struct ata_port *ap) 865 { 866 u32 sntf; 867 int rc; 868 869 if (!(ap->flags & ATA_FLAG_AN)) 870 return 0; 871 872 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 873 if (rc == 0) 874 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 875 876 if (!sata_pmp_attached(ap) || rc) { 877 /* PMP is not attached or SNTF is not available */ 878 if (!sata_pmp_attached(ap)) { 879 /* PMP is not attached. Check whether ATAPI 880 * AN is configured. If so, notify media 881 * change. 882 */ 883 struct ata_device *dev = ap->link.device; 884 885 if ((dev->class == ATA_DEV_ATAPI) && 886 (dev->flags & ATA_DFLAG_AN)) 887 ata_scsi_media_change_notify(dev); 888 return 0; 889 } else { 890 /* PMP is attached but SNTF is not available. 891 * ATAPI async media change notification is 892 * not used. The PMP must be reporting PHY 893 * status change, schedule EH. 894 */ 895 ata_port_schedule_eh(ap); 896 return 1; 897 } 898 } else { 899 /* PMP is attached and SNTF is available */ 900 struct ata_link *link; 901 902 /* check and notify ATAPI AN */ 903 ata_port_for_each_link(link, ap) { 904 if (!(sntf & (1 << link->pmp))) 905 continue; 906 907 if ((link->device->class == ATA_DEV_ATAPI) && 908 (link->device->flags & ATA_DFLAG_AN)) 909 ata_scsi_media_change_notify(link->device); 910 } 911 912 /* If PMP is reporting that PHY status of some 913 * downstream ports has changed, schedule EH. 914 */ 915 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 916 ata_port_schedule_eh(ap); 917 return 1; 918 } 919 920 return 0; 921 } 922 } 923 924 /** 925 * ata_eh_freeze_port - EH helper to freeze port 926 * @ap: ATA port to freeze 927 * 928 * Freeze @ap. 929 * 930 * LOCKING: 931 * None. 932 */ 933 void ata_eh_freeze_port(struct ata_port *ap) 934 { 935 unsigned long flags; 936 937 if (!ap->ops->error_handler) 938 return; 939 940 spin_lock_irqsave(ap->lock, flags); 941 __ata_port_freeze(ap); 942 spin_unlock_irqrestore(ap->lock, flags); 943 } 944 945 /** 946 * ata_port_thaw_port - EH helper to thaw port 947 * @ap: ATA port to thaw 948 * 949 * Thaw frozen port @ap. 950 * 951 * LOCKING: 952 * None. 953 */ 954 void ata_eh_thaw_port(struct ata_port *ap) 955 { 956 unsigned long flags; 957 958 if (!ap->ops->error_handler) 959 return; 960 961 spin_lock_irqsave(ap->lock, flags); 962 963 ap->pflags &= ~ATA_PFLAG_FROZEN; 964 965 if (ap->ops->thaw) 966 ap->ops->thaw(ap); 967 968 spin_unlock_irqrestore(ap->lock, flags); 969 970 DPRINTK("ata%u port thawed\n", ap->print_id); 971 } 972 973 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 974 { 975 /* nada */ 976 } 977 978 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 979 { 980 struct ata_port *ap = qc->ap; 981 struct scsi_cmnd *scmd = qc->scsicmd; 982 unsigned long flags; 983 984 spin_lock_irqsave(ap->lock, flags); 985 qc->scsidone = ata_eh_scsidone; 986 __ata_qc_complete(qc); 987 WARN_ON(ata_tag_valid(qc->tag)); 988 spin_unlock_irqrestore(ap->lock, flags); 989 990 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 991 } 992 993 /** 994 * ata_eh_qc_complete - Complete an active ATA command from EH 995 * @qc: Command to complete 996 * 997 * Indicate to the mid and upper layers that an ATA command has 998 * completed. To be used from EH. 999 */ 1000 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1001 { 1002 struct scsi_cmnd *scmd = qc->scsicmd; 1003 scmd->retries = scmd->allowed; 1004 __ata_eh_qc_complete(qc); 1005 } 1006 1007 /** 1008 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1009 * @qc: Command to retry 1010 * 1011 * Indicate to the mid and upper layers that an ATA command 1012 * should be retried. To be used from EH. 1013 * 1014 * SCSI midlayer limits the number of retries to scmd->allowed. 1015 * scmd->retries is decremented for commands which get retried 1016 * due to unrelated failures (qc->err_mask is zero). 1017 */ 1018 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1019 { 1020 struct scsi_cmnd *scmd = qc->scsicmd; 1021 if (!qc->err_mask && scmd->retries) 1022 scmd->retries--; 1023 __ata_eh_qc_complete(qc); 1024 } 1025 1026 /** 1027 * ata_eh_detach_dev - detach ATA device 1028 * @dev: ATA device to detach 1029 * 1030 * Detach @dev. 1031 * 1032 * LOCKING: 1033 * None. 1034 */ 1035 void ata_eh_detach_dev(struct ata_device *dev) 1036 { 1037 struct ata_link *link = dev->link; 1038 struct ata_port *ap = link->ap; 1039 unsigned long flags; 1040 1041 ata_dev_disable(dev); 1042 1043 spin_lock_irqsave(ap->lock, flags); 1044 1045 dev->flags &= ~ATA_DFLAG_DETACH; 1046 1047 if (ata_scsi_offline_dev(dev)) { 1048 dev->flags |= ATA_DFLAG_DETACHED; 1049 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1050 } 1051 1052 /* clear per-dev EH actions */ 1053 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1054 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1055 1056 spin_unlock_irqrestore(ap->lock, flags); 1057 } 1058 1059 /** 1060 * ata_eh_about_to_do - about to perform eh_action 1061 * @link: target ATA link 1062 * @dev: target ATA dev for per-dev action (can be NULL) 1063 * @action: action about to be performed 1064 * 1065 * Called just before performing EH actions to clear related bits 1066 * in @link->eh_info such that eh actions are not unnecessarily 1067 * repeated. 1068 * 1069 * LOCKING: 1070 * None. 1071 */ 1072 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1073 unsigned int action) 1074 { 1075 struct ata_port *ap = link->ap; 1076 struct ata_eh_info *ehi = &link->eh_info; 1077 struct ata_eh_context *ehc = &link->eh_context; 1078 unsigned long flags; 1079 1080 spin_lock_irqsave(ap->lock, flags); 1081 1082 ata_eh_clear_action(link, dev, ehi, action); 1083 1084 if (!(ehc->i.flags & ATA_EHI_QUIET)) 1085 ap->pflags |= ATA_PFLAG_RECOVERED; 1086 1087 spin_unlock_irqrestore(ap->lock, flags); 1088 } 1089 1090 /** 1091 * ata_eh_done - EH action complete 1092 * @ap: target ATA port 1093 * @dev: target ATA dev for per-dev action (can be NULL) 1094 * @action: action just completed 1095 * 1096 * Called right after performing EH actions to clear related bits 1097 * in @link->eh_context. 1098 * 1099 * LOCKING: 1100 * None. 1101 */ 1102 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1103 unsigned int action) 1104 { 1105 struct ata_eh_context *ehc = &link->eh_context; 1106 1107 ata_eh_clear_action(link, dev, &ehc->i, action); 1108 } 1109 1110 /** 1111 * ata_err_string - convert err_mask to descriptive string 1112 * @err_mask: error mask to convert to string 1113 * 1114 * Convert @err_mask to descriptive string. Errors are 1115 * prioritized according to severity and only the most severe 1116 * error is reported. 1117 * 1118 * LOCKING: 1119 * None. 1120 * 1121 * RETURNS: 1122 * Descriptive string for @err_mask 1123 */ 1124 static const char *ata_err_string(unsigned int err_mask) 1125 { 1126 if (err_mask & AC_ERR_HOST_BUS) 1127 return "host bus error"; 1128 if (err_mask & AC_ERR_ATA_BUS) 1129 return "ATA bus error"; 1130 if (err_mask & AC_ERR_TIMEOUT) 1131 return "timeout"; 1132 if (err_mask & AC_ERR_HSM) 1133 return "HSM violation"; 1134 if (err_mask & AC_ERR_SYSTEM) 1135 return "internal error"; 1136 if (err_mask & AC_ERR_MEDIA) 1137 return "media error"; 1138 if (err_mask & AC_ERR_INVALID) 1139 return "invalid argument"; 1140 if (err_mask & AC_ERR_DEV) 1141 return "device error"; 1142 return "unknown error"; 1143 } 1144 1145 /** 1146 * ata_read_log_page - read a specific log page 1147 * @dev: target device 1148 * @page: page to read 1149 * @buf: buffer to store read page 1150 * @sectors: number of sectors to read 1151 * 1152 * Read log page using READ_LOG_EXT command. 1153 * 1154 * LOCKING: 1155 * Kernel thread context (may sleep). 1156 * 1157 * RETURNS: 1158 * 0 on success, AC_ERR_* mask otherwise. 1159 */ 1160 static unsigned int ata_read_log_page(struct ata_device *dev, 1161 u8 page, void *buf, unsigned int sectors) 1162 { 1163 struct ata_taskfile tf; 1164 unsigned int err_mask; 1165 1166 DPRINTK("read log page - page %d\n", page); 1167 1168 ata_tf_init(dev, &tf); 1169 tf.command = ATA_CMD_READ_LOG_EXT; 1170 tf.lbal = page; 1171 tf.nsect = sectors; 1172 tf.hob_nsect = sectors >> 8; 1173 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 1174 tf.protocol = ATA_PROT_PIO; 1175 1176 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 1177 buf, sectors * ATA_SECT_SIZE, 0); 1178 1179 DPRINTK("EXIT, err_mask=%x\n", err_mask); 1180 return err_mask; 1181 } 1182 1183 /** 1184 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1185 * @dev: Device to read log page 10h from 1186 * @tag: Resulting tag of the failed command 1187 * @tf: Resulting taskfile registers of the failed command 1188 * 1189 * Read log page 10h to obtain NCQ error details and clear error 1190 * condition. 1191 * 1192 * LOCKING: 1193 * Kernel thread context (may sleep). 1194 * 1195 * RETURNS: 1196 * 0 on success, -errno otherwise. 1197 */ 1198 static int ata_eh_read_log_10h(struct ata_device *dev, 1199 int *tag, struct ata_taskfile *tf) 1200 { 1201 u8 *buf = dev->link->ap->sector_buf; 1202 unsigned int err_mask; 1203 u8 csum; 1204 int i; 1205 1206 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 1207 if (err_mask) 1208 return -EIO; 1209 1210 csum = 0; 1211 for (i = 0; i < ATA_SECT_SIZE; i++) 1212 csum += buf[i]; 1213 if (csum) 1214 ata_dev_printk(dev, KERN_WARNING, 1215 "invalid checksum 0x%x on log page 10h\n", csum); 1216 1217 if (buf[0] & 0x80) 1218 return -ENOENT; 1219 1220 *tag = buf[0] & 0x1f; 1221 1222 tf->command = buf[2]; 1223 tf->feature = buf[3]; 1224 tf->lbal = buf[4]; 1225 tf->lbam = buf[5]; 1226 tf->lbah = buf[6]; 1227 tf->device = buf[7]; 1228 tf->hob_lbal = buf[8]; 1229 tf->hob_lbam = buf[9]; 1230 tf->hob_lbah = buf[10]; 1231 tf->nsect = buf[12]; 1232 tf->hob_nsect = buf[13]; 1233 1234 return 0; 1235 } 1236 1237 /** 1238 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1239 * @dev: device to perform REQUEST_SENSE to 1240 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1241 * 1242 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1243 * SENSE. This function is EH helper. 1244 * 1245 * LOCKING: 1246 * Kernel thread context (may sleep). 1247 * 1248 * RETURNS: 1249 * 0 on success, AC_ERR_* mask on failure 1250 */ 1251 static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) 1252 { 1253 struct ata_device *dev = qc->dev; 1254 unsigned char *sense_buf = qc->scsicmd->sense_buffer; 1255 struct ata_port *ap = dev->link->ap; 1256 struct ata_taskfile tf; 1257 u8 cdb[ATAPI_CDB_LEN]; 1258 1259 DPRINTK("ATAPI request sense\n"); 1260 1261 /* FIXME: is this needed? */ 1262 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1263 1264 /* initialize sense_buf with the error register, 1265 * for the case where they are -not- overwritten 1266 */ 1267 sense_buf[0] = 0x70; 1268 sense_buf[2] = qc->result_tf.feature >> 4; 1269 1270 /* some devices time out if garbage left in tf */ 1271 ata_tf_init(dev, &tf); 1272 1273 memset(cdb, 0, ATAPI_CDB_LEN); 1274 cdb[0] = REQUEST_SENSE; 1275 cdb[4] = SCSI_SENSE_BUFFERSIZE; 1276 1277 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1278 tf.command = ATA_CMD_PACKET; 1279 1280 /* is it pointless to prefer PIO for "safety reasons"? */ 1281 if (ap->flags & ATA_FLAG_PIO_DMA) { 1282 tf.protocol = ATAPI_PROT_DMA; 1283 tf.feature |= ATAPI_PKT_DMA; 1284 } else { 1285 tf.protocol = ATAPI_PROT_PIO; 1286 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1287 tf.lbah = 0; 1288 } 1289 1290 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1291 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1292 } 1293 1294 /** 1295 * ata_eh_analyze_serror - analyze SError for a failed port 1296 * @link: ATA link to analyze SError for 1297 * 1298 * Analyze SError if available and further determine cause of 1299 * failure. 1300 * 1301 * LOCKING: 1302 * None. 1303 */ 1304 static void ata_eh_analyze_serror(struct ata_link *link) 1305 { 1306 struct ata_eh_context *ehc = &link->eh_context; 1307 u32 serror = ehc->i.serror; 1308 unsigned int err_mask = 0, action = 0; 1309 u32 hotplug_mask; 1310 1311 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1312 err_mask |= AC_ERR_ATA_BUS; 1313 action |= ATA_EH_RESET; 1314 } 1315 if (serror & SERR_PROTOCOL) { 1316 err_mask |= AC_ERR_HSM; 1317 action |= ATA_EH_RESET; 1318 } 1319 if (serror & SERR_INTERNAL) { 1320 err_mask |= AC_ERR_SYSTEM; 1321 action |= ATA_EH_RESET; 1322 } 1323 1324 /* Determine whether a hotplug event has occurred. Both 1325 * SError.N/X are considered hotplug events for enabled or 1326 * host links. For disabled PMP links, only N bit is 1327 * considered as X bit is left at 1 for link plugging. 1328 */ 1329 hotplug_mask = 0; 1330 1331 if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1332 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1333 else 1334 hotplug_mask = SERR_PHYRDY_CHG; 1335 1336 if (serror & hotplug_mask) 1337 ata_ehi_hotplugged(&ehc->i); 1338 1339 ehc->i.err_mask |= err_mask; 1340 ehc->i.action |= action; 1341 } 1342 1343 /** 1344 * ata_eh_analyze_ncq_error - analyze NCQ error 1345 * @link: ATA link to analyze NCQ error for 1346 * 1347 * Read log page 10h, determine the offending qc and acquire 1348 * error status TF. For NCQ device errors, all LLDDs have to do 1349 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1350 * care of the rest. 1351 * 1352 * LOCKING: 1353 * Kernel thread context (may sleep). 1354 */ 1355 void ata_eh_analyze_ncq_error(struct ata_link *link) 1356 { 1357 struct ata_port *ap = link->ap; 1358 struct ata_eh_context *ehc = &link->eh_context; 1359 struct ata_device *dev = link->device; 1360 struct ata_queued_cmd *qc; 1361 struct ata_taskfile tf; 1362 int tag, rc; 1363 1364 /* if frozen, we can't do much */ 1365 if (ap->pflags & ATA_PFLAG_FROZEN) 1366 return; 1367 1368 /* is it NCQ device error? */ 1369 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1370 return; 1371 1372 /* has LLDD analyzed already? */ 1373 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1374 qc = __ata_qc_from_tag(ap, tag); 1375 1376 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1377 continue; 1378 1379 if (qc->err_mask) 1380 return; 1381 } 1382 1383 /* okay, this error is ours */ 1384 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1385 if (rc) { 1386 ata_link_printk(link, KERN_ERR, "failed to read log page 10h " 1387 "(errno=%d)\n", rc); 1388 return; 1389 } 1390 1391 if (!(link->sactive & (1 << tag))) { 1392 ata_link_printk(link, KERN_ERR, "log page 10h reported " 1393 "inactive tag %d\n", tag); 1394 return; 1395 } 1396 1397 /* we've got the perpetrator, condemn it */ 1398 qc = __ata_qc_from_tag(ap, tag); 1399 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1400 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1401 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1402 ehc->i.err_mask &= ~AC_ERR_DEV; 1403 } 1404 1405 /** 1406 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1407 * @qc: qc to analyze 1408 * @tf: Taskfile registers to analyze 1409 * 1410 * Analyze taskfile of @qc and further determine cause of 1411 * failure. This function also requests ATAPI sense data if 1412 * avaliable. 1413 * 1414 * LOCKING: 1415 * Kernel thread context (may sleep). 1416 * 1417 * RETURNS: 1418 * Determined recovery action 1419 */ 1420 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1421 const struct ata_taskfile *tf) 1422 { 1423 unsigned int tmp, action = 0; 1424 u8 stat = tf->command, err = tf->feature; 1425 1426 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1427 qc->err_mask |= AC_ERR_HSM; 1428 return ATA_EH_RESET; 1429 } 1430 1431 if (stat & (ATA_ERR | ATA_DF)) 1432 qc->err_mask |= AC_ERR_DEV; 1433 else 1434 return 0; 1435 1436 switch (qc->dev->class) { 1437 case ATA_DEV_ATA: 1438 if (err & ATA_ICRC) 1439 qc->err_mask |= AC_ERR_ATA_BUS; 1440 if (err & ATA_UNC) 1441 qc->err_mask |= AC_ERR_MEDIA; 1442 if (err & ATA_IDNF) 1443 qc->err_mask |= AC_ERR_INVALID; 1444 break; 1445 1446 case ATA_DEV_ATAPI: 1447 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1448 tmp = atapi_eh_request_sense(qc); 1449 if (!tmp) { 1450 /* ATA_QCFLAG_SENSE_VALID is used to 1451 * tell atapi_qc_complete() that sense 1452 * data is already valid. 1453 * 1454 * TODO: interpret sense data and set 1455 * appropriate err_mask. 1456 */ 1457 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1458 } else 1459 qc->err_mask |= tmp; 1460 } 1461 } 1462 1463 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1464 action |= ATA_EH_RESET; 1465 1466 return action; 1467 } 1468 1469 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1470 int *xfer_ok) 1471 { 1472 int base = 0; 1473 1474 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1475 *xfer_ok = 1; 1476 1477 if (!*xfer_ok) 1478 base = ATA_ECAT_DUBIOUS_NONE; 1479 1480 if (err_mask & AC_ERR_ATA_BUS) 1481 return base + ATA_ECAT_ATA_BUS; 1482 1483 if (err_mask & AC_ERR_TIMEOUT) 1484 return base + ATA_ECAT_TOUT_HSM; 1485 1486 if (eflags & ATA_EFLAG_IS_IO) { 1487 if (err_mask & AC_ERR_HSM) 1488 return base + ATA_ECAT_TOUT_HSM; 1489 if ((err_mask & 1490 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1491 return base + ATA_ECAT_UNK_DEV; 1492 } 1493 1494 return 0; 1495 } 1496 1497 struct speed_down_verdict_arg { 1498 u64 since; 1499 int xfer_ok; 1500 int nr_errors[ATA_ECAT_NR]; 1501 }; 1502 1503 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1504 { 1505 struct speed_down_verdict_arg *arg = void_arg; 1506 int cat; 1507 1508 if (ent->timestamp < arg->since) 1509 return -1; 1510 1511 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1512 &arg->xfer_ok); 1513 arg->nr_errors[cat]++; 1514 1515 return 0; 1516 } 1517 1518 /** 1519 * ata_eh_speed_down_verdict - Determine speed down verdict 1520 * @dev: Device of interest 1521 * 1522 * This function examines error ring of @dev and determines 1523 * whether NCQ needs to be turned off, transfer speed should be 1524 * stepped down, or falling back to PIO is necessary. 1525 * 1526 * ECAT_ATA_BUS : ATA_BUS error for any command 1527 * 1528 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1529 * IO commands 1530 * 1531 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1532 * 1533 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1534 * data transfer hasn't been verified. 1535 * 1536 * Verdicts are 1537 * 1538 * NCQ_OFF : Turn off NCQ. 1539 * 1540 * SPEED_DOWN : Speed down transfer speed but don't fall back 1541 * to PIO. 1542 * 1543 * FALLBACK_TO_PIO : Fall back to PIO. 1544 * 1545 * Even if multiple verdicts are returned, only one action is 1546 * taken per error. An action triggered by non-DUBIOUS errors 1547 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1548 * This is to expedite speed down decisions right after device is 1549 * initially configured. 1550 * 1551 * The followings are speed down rules. #1 and #2 deal with 1552 * DUBIOUS errors. 1553 * 1554 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1555 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1556 * 1557 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1558 * occurred during last 5 mins, NCQ_OFF. 1559 * 1560 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1561 * ocurred during last 5 mins, FALLBACK_TO_PIO 1562 * 1563 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1564 * during last 10 mins, NCQ_OFF. 1565 * 1566 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1567 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1568 * 1569 * LOCKING: 1570 * Inherited from caller. 1571 * 1572 * RETURNS: 1573 * OR of ATA_EH_SPDN_* flags. 1574 */ 1575 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1576 { 1577 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1578 u64 j64 = get_jiffies_64(); 1579 struct speed_down_verdict_arg arg; 1580 unsigned int verdict = 0; 1581 1582 /* scan past 5 mins of error history */ 1583 memset(&arg, 0, sizeof(arg)); 1584 arg.since = j64 - min(j64, j5mins); 1585 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1586 1587 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1588 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1589 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1590 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1591 1592 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1593 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1594 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1595 1596 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1597 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1598 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1599 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1600 1601 /* scan past 10 mins of error history */ 1602 memset(&arg, 0, sizeof(arg)); 1603 arg.since = j64 - min(j64, j10mins); 1604 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1605 1606 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1607 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1608 verdict |= ATA_EH_SPDN_NCQ_OFF; 1609 1610 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1611 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1612 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1613 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1614 1615 return verdict; 1616 } 1617 1618 /** 1619 * ata_eh_speed_down - record error and speed down if necessary 1620 * @dev: Failed device 1621 * @eflags: mask of ATA_EFLAG_* flags 1622 * @err_mask: err_mask of the error 1623 * 1624 * Record error and examine error history to determine whether 1625 * adjusting transmission speed is necessary. It also sets 1626 * transmission limits appropriately if such adjustment is 1627 * necessary. 1628 * 1629 * LOCKING: 1630 * Kernel thread context (may sleep). 1631 * 1632 * RETURNS: 1633 * Determined recovery action. 1634 */ 1635 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1636 unsigned int eflags, unsigned int err_mask) 1637 { 1638 struct ata_link *link = dev->link; 1639 int xfer_ok = 0; 1640 unsigned int verdict; 1641 unsigned int action = 0; 1642 1643 /* don't bother if Cat-0 error */ 1644 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1645 return 0; 1646 1647 /* record error and determine whether speed down is necessary */ 1648 ata_ering_record(&dev->ering, eflags, err_mask); 1649 verdict = ata_eh_speed_down_verdict(dev); 1650 1651 /* turn off NCQ? */ 1652 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1653 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1654 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1655 dev->flags |= ATA_DFLAG_NCQ_OFF; 1656 ata_dev_printk(dev, KERN_WARNING, 1657 "NCQ disabled due to excessive errors\n"); 1658 goto done; 1659 } 1660 1661 /* speed down? */ 1662 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1663 /* speed down SATA link speed if possible */ 1664 if (sata_down_spd_limit(link) == 0) { 1665 action |= ATA_EH_RESET; 1666 goto done; 1667 } 1668 1669 /* lower transfer mode */ 1670 if (dev->spdn_cnt < 2) { 1671 static const int dma_dnxfer_sel[] = 1672 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1673 static const int pio_dnxfer_sel[] = 1674 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1675 int sel; 1676 1677 if (dev->xfer_shift != ATA_SHIFT_PIO) 1678 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1679 else 1680 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1681 1682 dev->spdn_cnt++; 1683 1684 if (ata_down_xfermask_limit(dev, sel) == 0) { 1685 action |= ATA_EH_RESET; 1686 goto done; 1687 } 1688 } 1689 } 1690 1691 /* Fall back to PIO? Slowing down to PIO is meaningless for 1692 * SATA ATA devices. Consider it only for PATA and SATAPI. 1693 */ 1694 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1695 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 1696 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1697 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1698 dev->spdn_cnt = 0; 1699 action |= ATA_EH_RESET; 1700 goto done; 1701 } 1702 } 1703 1704 return 0; 1705 done: 1706 /* device has been slowed down, blow error history */ 1707 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 1708 ata_ering_clear(&dev->ering); 1709 return action; 1710 } 1711 1712 /** 1713 * ata_eh_link_autopsy - analyze error and determine recovery action 1714 * @link: host link to perform autopsy on 1715 * 1716 * Analyze why @link failed and determine which recovery actions 1717 * are needed. This function also sets more detailed AC_ERR_* 1718 * values and fills sense data for ATAPI CHECK SENSE. 1719 * 1720 * LOCKING: 1721 * Kernel thread context (may sleep). 1722 */ 1723 static void ata_eh_link_autopsy(struct ata_link *link) 1724 { 1725 struct ata_port *ap = link->ap; 1726 struct ata_eh_context *ehc = &link->eh_context; 1727 struct ata_device *dev; 1728 unsigned int all_err_mask = 0, eflags = 0; 1729 int tag; 1730 u32 serror; 1731 int rc; 1732 1733 DPRINTK("ENTER\n"); 1734 1735 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1736 return; 1737 1738 /* obtain and analyze SError */ 1739 rc = sata_scr_read(link, SCR_ERROR, &serror); 1740 if (rc == 0) { 1741 ehc->i.serror |= serror; 1742 ata_eh_analyze_serror(link); 1743 } else if (rc != -EOPNOTSUPP) { 1744 /* SError read failed, force reset and probing */ 1745 ehc->i.probe_mask |= ATA_ALL_DEVICES; 1746 ehc->i.action |= ATA_EH_RESET; 1747 ehc->i.err_mask |= AC_ERR_OTHER; 1748 } 1749 1750 /* analyze NCQ failure */ 1751 ata_eh_analyze_ncq_error(link); 1752 1753 /* any real error trumps AC_ERR_OTHER */ 1754 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1755 ehc->i.err_mask &= ~AC_ERR_OTHER; 1756 1757 all_err_mask |= ehc->i.err_mask; 1758 1759 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1760 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1761 1762 if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link) 1763 continue; 1764 1765 /* inherit upper level err_mask */ 1766 qc->err_mask |= ehc->i.err_mask; 1767 1768 /* analyze TF */ 1769 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1770 1771 /* DEV errors are probably spurious in case of ATA_BUS error */ 1772 if (qc->err_mask & AC_ERR_ATA_BUS) 1773 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1774 AC_ERR_INVALID); 1775 1776 /* any real error trumps unknown error */ 1777 if (qc->err_mask & ~AC_ERR_OTHER) 1778 qc->err_mask &= ~AC_ERR_OTHER; 1779 1780 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1781 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 1782 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1783 1784 /* determine whether the command is worth retrying */ 1785 if (!(qc->err_mask & AC_ERR_INVALID) && 1786 ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV)) 1787 qc->flags |= ATA_QCFLAG_RETRY; 1788 1789 /* accumulate error info */ 1790 ehc->i.dev = qc->dev; 1791 all_err_mask |= qc->err_mask; 1792 if (qc->flags & ATA_QCFLAG_IO) 1793 eflags |= ATA_EFLAG_IS_IO; 1794 } 1795 1796 /* enforce default EH actions */ 1797 if (ap->pflags & ATA_PFLAG_FROZEN || 1798 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1799 ehc->i.action |= ATA_EH_RESET; 1800 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 1801 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 1802 ehc->i.action |= ATA_EH_REVALIDATE; 1803 1804 /* If we have offending qcs and the associated failed device, 1805 * perform per-dev EH action only on the offending device. 1806 */ 1807 if (ehc->i.dev) { 1808 ehc->i.dev_action[ehc->i.dev->devno] |= 1809 ehc->i.action & ATA_EH_PERDEV_MASK; 1810 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1811 } 1812 1813 /* propagate timeout to host link */ 1814 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 1815 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 1816 1817 /* record error and consider speeding down */ 1818 dev = ehc->i.dev; 1819 if (!dev && ((ata_link_max_devices(link) == 1 && 1820 ata_dev_enabled(link->device)))) 1821 dev = link->device; 1822 1823 if (dev) { 1824 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 1825 eflags |= ATA_EFLAG_DUBIOUS_XFER; 1826 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 1827 } 1828 1829 DPRINTK("EXIT\n"); 1830 } 1831 1832 /** 1833 * ata_eh_autopsy - analyze error and determine recovery action 1834 * @ap: host port to perform autopsy on 1835 * 1836 * Analyze all links of @ap and determine why they failed and 1837 * which recovery actions are needed. 1838 * 1839 * LOCKING: 1840 * Kernel thread context (may sleep). 1841 */ 1842 void ata_eh_autopsy(struct ata_port *ap) 1843 { 1844 struct ata_link *link; 1845 1846 ata_port_for_each_link(link, ap) 1847 ata_eh_link_autopsy(link); 1848 1849 /* Autopsy of fanout ports can affect host link autopsy. 1850 * Perform host link autopsy last. 1851 */ 1852 if (sata_pmp_attached(ap)) 1853 ata_eh_link_autopsy(&ap->link); 1854 } 1855 1856 /** 1857 * ata_eh_link_report - report error handling to user 1858 * @link: ATA link EH is going on 1859 * 1860 * Report EH to user. 1861 * 1862 * LOCKING: 1863 * None. 1864 */ 1865 static void ata_eh_link_report(struct ata_link *link) 1866 { 1867 struct ata_port *ap = link->ap; 1868 struct ata_eh_context *ehc = &link->eh_context; 1869 const char *frozen, *desc; 1870 char tries_buf[6]; 1871 int tag, nr_failed = 0; 1872 1873 if (ehc->i.flags & ATA_EHI_QUIET) 1874 return; 1875 1876 desc = NULL; 1877 if (ehc->i.desc[0] != '\0') 1878 desc = ehc->i.desc; 1879 1880 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1881 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1882 1883 if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link || 1884 ((qc->flags & ATA_QCFLAG_QUIET) && 1885 qc->err_mask == AC_ERR_DEV)) 1886 continue; 1887 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1888 continue; 1889 1890 nr_failed++; 1891 } 1892 1893 if (!nr_failed && !ehc->i.err_mask) 1894 return; 1895 1896 frozen = ""; 1897 if (ap->pflags & ATA_PFLAG_FROZEN) 1898 frozen = " frozen"; 1899 1900 memset(tries_buf, 0, sizeof(tries_buf)); 1901 if (ap->eh_tries < ATA_EH_MAX_TRIES) 1902 snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d", 1903 ap->eh_tries); 1904 1905 if (ehc->i.dev) { 1906 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1907 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 1908 ehc->i.err_mask, link->sactive, ehc->i.serror, 1909 ehc->i.action, frozen, tries_buf); 1910 if (desc) 1911 ata_dev_printk(ehc->i.dev, KERN_ERR, "%s\n", desc); 1912 } else { 1913 ata_link_printk(link, KERN_ERR, "exception Emask 0x%x " 1914 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 1915 ehc->i.err_mask, link->sactive, ehc->i.serror, 1916 ehc->i.action, frozen, tries_buf); 1917 if (desc) 1918 ata_link_printk(link, KERN_ERR, "%s\n", desc); 1919 } 1920 1921 if (ehc->i.serror) 1922 ata_port_printk(ap, KERN_ERR, 1923 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 1924 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 1925 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 1926 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 1927 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 1928 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 1929 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 1930 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 1931 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 1932 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 1933 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 1934 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 1935 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 1936 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 1937 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 1938 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 1939 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 1940 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 1941 1942 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1943 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1944 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1945 const u8 *cdb = qc->cdb; 1946 char data_buf[20] = ""; 1947 char cdb_buf[70] = ""; 1948 1949 if (!(qc->flags & ATA_QCFLAG_FAILED) || 1950 qc->dev->link != link || !qc->err_mask) 1951 continue; 1952 1953 if (qc->dma_dir != DMA_NONE) { 1954 static const char *dma_str[] = { 1955 [DMA_BIDIRECTIONAL] = "bidi", 1956 [DMA_TO_DEVICE] = "out", 1957 [DMA_FROM_DEVICE] = "in", 1958 }; 1959 static const char *prot_str[] = { 1960 [ATA_PROT_PIO] = "pio", 1961 [ATA_PROT_DMA] = "dma", 1962 [ATA_PROT_NCQ] = "ncq", 1963 [ATAPI_PROT_PIO] = "pio", 1964 [ATAPI_PROT_DMA] = "dma", 1965 }; 1966 1967 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 1968 prot_str[qc->tf.protocol], qc->nbytes, 1969 dma_str[qc->dma_dir]); 1970 } 1971 1972 if (ata_is_atapi(qc->tf.protocol)) 1973 snprintf(cdb_buf, sizeof(cdb_buf), 1974 "cdb %02x %02x %02x %02x %02x %02x %02x %02x " 1975 "%02x %02x %02x %02x %02x %02x %02x %02x\n ", 1976 cdb[0], cdb[1], cdb[2], cdb[3], 1977 cdb[4], cdb[5], cdb[6], cdb[7], 1978 cdb[8], cdb[9], cdb[10], cdb[11], 1979 cdb[12], cdb[13], cdb[14], cdb[15]); 1980 1981 ata_dev_printk(qc->dev, KERN_ERR, 1982 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1983 "tag %d%s\n %s" 1984 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1985 "Emask 0x%x (%s)%s\n", 1986 cmd->command, cmd->feature, cmd->nsect, 1987 cmd->lbal, cmd->lbam, cmd->lbah, 1988 cmd->hob_feature, cmd->hob_nsect, 1989 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 1990 cmd->device, qc->tag, data_buf, cdb_buf, 1991 res->command, res->feature, res->nsect, 1992 res->lbal, res->lbam, res->lbah, 1993 res->hob_feature, res->hob_nsect, 1994 res->hob_lbal, res->hob_lbam, res->hob_lbah, 1995 res->device, qc->err_mask, ata_err_string(qc->err_mask), 1996 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 1997 1998 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 1999 ATA_ERR)) { 2000 if (res->command & ATA_BUSY) 2001 ata_dev_printk(qc->dev, KERN_ERR, 2002 "status: { Busy }\n"); 2003 else 2004 ata_dev_printk(qc->dev, KERN_ERR, 2005 "status: { %s%s%s%s}\n", 2006 res->command & ATA_DRDY ? "DRDY " : "", 2007 res->command & ATA_DF ? "DF " : "", 2008 res->command & ATA_DRQ ? "DRQ " : "", 2009 res->command & ATA_ERR ? "ERR " : ""); 2010 } 2011 2012 if (cmd->command != ATA_CMD_PACKET && 2013 (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF | 2014 ATA_ABORTED))) 2015 ata_dev_printk(qc->dev, KERN_ERR, 2016 "error: { %s%s%s%s}\n", 2017 res->feature & ATA_ICRC ? "ICRC " : "", 2018 res->feature & ATA_UNC ? "UNC " : "", 2019 res->feature & ATA_IDNF ? "IDNF " : "", 2020 res->feature & ATA_ABORTED ? "ABRT " : ""); 2021 } 2022 } 2023 2024 /** 2025 * ata_eh_report - report error handling to user 2026 * @ap: ATA port to report EH about 2027 * 2028 * Report EH to user. 2029 * 2030 * LOCKING: 2031 * None. 2032 */ 2033 void ata_eh_report(struct ata_port *ap) 2034 { 2035 struct ata_link *link; 2036 2037 __ata_port_for_each_link(link, ap) 2038 ata_eh_link_report(link); 2039 } 2040 2041 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2042 unsigned int *classes, unsigned long deadline) 2043 { 2044 struct ata_device *dev; 2045 2046 ata_link_for_each_dev(dev, link) 2047 classes[dev->devno] = ATA_DEV_UNKNOWN; 2048 2049 return reset(link, classes, deadline); 2050 } 2051 2052 static int ata_eh_followup_srst_needed(struct ata_link *link, 2053 int rc, int classify, 2054 const unsigned int *classes) 2055 { 2056 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2057 return 0; 2058 if (rc == -EAGAIN) { 2059 if (classify) 2060 return 1; 2061 rc = 0; 2062 } 2063 if (rc != 0) 2064 return 0; 2065 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2066 return 1; 2067 return 0; 2068 } 2069 2070 int ata_eh_reset(struct ata_link *link, int classify, 2071 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2072 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2073 { 2074 const int max_tries = ARRAY_SIZE(ata_eh_reset_timeouts); 2075 struct ata_port *ap = link->ap; 2076 struct ata_eh_context *ehc = &link->eh_context; 2077 unsigned int *classes = ehc->classes; 2078 unsigned int lflags = link->flags; 2079 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2080 int try = 0; 2081 struct ata_device *dev; 2082 unsigned long deadline, now; 2083 ata_reset_fn_t reset; 2084 unsigned long flags; 2085 u32 sstatus; 2086 int nr_known, rc; 2087 2088 /* 2089 * Prepare to reset 2090 */ 2091 spin_lock_irqsave(ap->lock, flags); 2092 ap->pflags |= ATA_PFLAG_RESETTING; 2093 spin_unlock_irqrestore(ap->lock, flags); 2094 2095 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2096 2097 ata_link_for_each_dev(dev, link) { 2098 /* If we issue an SRST then an ATA drive (not ATAPI) 2099 * may change configuration and be in PIO0 timing. If 2100 * we do a hard reset (or are coming from power on) 2101 * this is true for ATA or ATAPI. Until we've set a 2102 * suitable controller mode we should not touch the 2103 * bus as we may be talking too fast. 2104 */ 2105 dev->pio_mode = XFER_PIO_0; 2106 2107 /* If the controller has a pio mode setup function 2108 * then use it to set the chipset to rights. Don't 2109 * touch the DMA setup as that will be dealt with when 2110 * configuring devices. 2111 */ 2112 if (ap->ops->set_piomode) 2113 ap->ops->set_piomode(ap, dev); 2114 } 2115 2116 /* prefer hardreset */ 2117 reset = NULL; 2118 ehc->i.action &= ~ATA_EH_RESET; 2119 if (hardreset) { 2120 reset = hardreset; 2121 ehc->i.action = ATA_EH_HARDRESET; 2122 } else if (softreset) { 2123 reset = softreset; 2124 ehc->i.action = ATA_EH_SOFTRESET; 2125 } 2126 2127 if (prereset) { 2128 rc = prereset(link, 2129 ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT)); 2130 if (rc) { 2131 if (rc == -ENOENT) { 2132 ata_link_printk(link, KERN_DEBUG, 2133 "port disabled. ignoring.\n"); 2134 ehc->i.action &= ~ATA_EH_RESET; 2135 2136 ata_link_for_each_dev(dev, link) 2137 classes[dev->devno] = ATA_DEV_NONE; 2138 2139 rc = 0; 2140 } else 2141 ata_link_printk(link, KERN_ERR, 2142 "prereset failed (errno=%d)\n", rc); 2143 goto out; 2144 } 2145 2146 /* prereset() might have cleared ATA_EH_RESET. If so, 2147 * bang classes and return. 2148 */ 2149 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2150 ata_link_for_each_dev(dev, link) 2151 classes[dev->devno] = ATA_DEV_NONE; 2152 rc = 0; 2153 goto out; 2154 } 2155 } 2156 2157 retry: 2158 /* 2159 * Perform reset 2160 */ 2161 if (ata_is_host_link(link)) 2162 ata_eh_freeze_port(ap); 2163 2164 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2165 2166 if (reset) { 2167 if (verbose) 2168 ata_link_printk(link, KERN_INFO, "%s resetting link\n", 2169 reset == softreset ? "soft" : "hard"); 2170 2171 /* mark that this EH session started with reset */ 2172 if (reset == hardreset) 2173 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2174 else 2175 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2176 2177 rc = ata_do_reset(link, reset, classes, deadline); 2178 2179 if (reset == hardreset && 2180 ata_eh_followup_srst_needed(link, rc, classify, classes)) { 2181 /* okay, let's do follow-up softreset */ 2182 reset = softreset; 2183 2184 if (!reset) { 2185 ata_link_printk(link, KERN_ERR, 2186 "follow-up softreset required " 2187 "but no softreset avaliable\n"); 2188 rc = -EINVAL; 2189 goto fail; 2190 } 2191 2192 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2193 rc = ata_do_reset(link, reset, classes, deadline); 2194 } 2195 2196 /* -EAGAIN can happen if we skipped followup SRST */ 2197 if (rc && rc != -EAGAIN) 2198 goto fail; 2199 } else { 2200 if (verbose) 2201 ata_link_printk(link, KERN_INFO, "no reset method " 2202 "available, skipping reset\n"); 2203 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2204 lflags |= ATA_LFLAG_ASSUME_ATA; 2205 } 2206 2207 /* 2208 * Post-reset processing 2209 */ 2210 ata_link_for_each_dev(dev, link) { 2211 /* After the reset, the device state is PIO 0 and the 2212 * controller state is undefined. Reset also wakes up 2213 * drives from sleeping mode. 2214 */ 2215 dev->pio_mode = XFER_PIO_0; 2216 dev->flags &= ~ATA_DFLAG_SLEEPING; 2217 2218 if (ata_link_offline(link)) 2219 continue; 2220 2221 /* apply class override */ 2222 if (lflags & ATA_LFLAG_ASSUME_ATA) 2223 classes[dev->devno] = ATA_DEV_ATA; 2224 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2225 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; /* not yet */ 2226 } 2227 2228 /* record current link speed */ 2229 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2230 link->sata_spd = (sstatus >> 4) & 0xf; 2231 2232 /* thaw the port */ 2233 if (ata_is_host_link(link)) 2234 ata_eh_thaw_port(ap); 2235 2236 /* postreset() should clear hardware SError. Although SError 2237 * is cleared during link resume, clearing SError here is 2238 * necessary as some PHYs raise hotplug events after SRST. 2239 * This introduces race condition where hotplug occurs between 2240 * reset and here. This race is mediated by cross checking 2241 * link onlineness and classification result later. 2242 */ 2243 if (postreset) 2244 postreset(link, classes); 2245 2246 /* clear cached SError */ 2247 spin_lock_irqsave(link->ap->lock, flags); 2248 link->eh_info.serror = 0; 2249 spin_unlock_irqrestore(link->ap->lock, flags); 2250 2251 /* Make sure onlineness and classification result correspond. 2252 * Hotplug could have happened during reset and some 2253 * controllers fail to wait while a drive is spinning up after 2254 * being hotplugged causing misdetection. By cross checking 2255 * link onlineness and classification result, those conditions 2256 * can be reliably detected and retried. 2257 */ 2258 nr_known = 0; 2259 ata_link_for_each_dev(dev, link) { 2260 /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */ 2261 if (classes[dev->devno] == ATA_DEV_UNKNOWN) 2262 classes[dev->devno] = ATA_DEV_NONE; 2263 else 2264 nr_known++; 2265 } 2266 2267 if (classify && !nr_known && ata_link_online(link)) { 2268 if (try < max_tries) { 2269 ata_link_printk(link, KERN_WARNING, "link online but " 2270 "device misclassified, retrying\n"); 2271 rc = -EAGAIN; 2272 goto fail; 2273 } 2274 ata_link_printk(link, KERN_WARNING, 2275 "link online but device misclassified, " 2276 "device detection might fail\n"); 2277 } 2278 2279 /* reset successful, schedule revalidation */ 2280 ata_eh_done(link, NULL, ATA_EH_RESET); 2281 ehc->i.action |= ATA_EH_REVALIDATE; 2282 2283 rc = 0; 2284 out: 2285 /* clear hotplug flag */ 2286 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2287 2288 spin_lock_irqsave(ap->lock, flags); 2289 ap->pflags &= ~ATA_PFLAG_RESETTING; 2290 spin_unlock_irqrestore(ap->lock, flags); 2291 2292 return rc; 2293 2294 fail: 2295 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2296 if (!ata_is_host_link(link) && 2297 sata_scr_read(link, SCR_STATUS, &sstatus)) 2298 rc = -ERESTART; 2299 2300 if (rc == -ERESTART || try >= max_tries) 2301 goto out; 2302 2303 now = jiffies; 2304 if (time_before(now, deadline)) { 2305 unsigned long delta = deadline - now; 2306 2307 ata_link_printk(link, KERN_WARNING, "reset failed " 2308 "(errno=%d), retrying in %u secs\n", 2309 rc, (jiffies_to_msecs(delta) + 999) / 1000); 2310 2311 while (delta) 2312 delta = schedule_timeout_uninterruptible(delta); 2313 } 2314 2315 if (rc == -EPIPE || try == max_tries - 1) 2316 sata_down_spd_limit(link); 2317 if (hardreset) 2318 reset = hardreset; 2319 goto retry; 2320 } 2321 2322 static int ata_eh_revalidate_and_attach(struct ata_link *link, 2323 struct ata_device **r_failed_dev) 2324 { 2325 struct ata_port *ap = link->ap; 2326 struct ata_eh_context *ehc = &link->eh_context; 2327 struct ata_device *dev; 2328 unsigned int new_mask = 0; 2329 unsigned long flags; 2330 int rc = 0; 2331 2332 DPRINTK("ENTER\n"); 2333 2334 /* For PATA drive side cable detection to work, IDENTIFY must 2335 * be done backwards such that PDIAG- is released by the slave 2336 * device before the master device is identified. 2337 */ 2338 ata_link_for_each_dev_reverse(dev, link) { 2339 unsigned int action = ata_eh_dev_action(dev); 2340 unsigned int readid_flags = 0; 2341 2342 if (ehc->i.flags & ATA_EHI_DID_RESET) 2343 readid_flags |= ATA_READID_POSTRESET; 2344 2345 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 2346 WARN_ON(dev->class == ATA_DEV_PMP); 2347 2348 if (ata_link_offline(link)) { 2349 rc = -EIO; 2350 goto err; 2351 } 2352 2353 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 2354 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 2355 readid_flags); 2356 if (rc) 2357 goto err; 2358 2359 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 2360 2361 /* Configuration may have changed, reconfigure 2362 * transfer mode. 2363 */ 2364 ehc->i.flags |= ATA_EHI_SETMODE; 2365 2366 /* schedule the scsi_rescan_device() here */ 2367 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 2368 } else if (dev->class == ATA_DEV_UNKNOWN && 2369 ehc->tries[dev->devno] && 2370 ata_class_enabled(ehc->classes[dev->devno])) { 2371 dev->class = ehc->classes[dev->devno]; 2372 2373 if (dev->class == ATA_DEV_PMP) 2374 rc = sata_pmp_attach(dev); 2375 else 2376 rc = ata_dev_read_id(dev, &dev->class, 2377 readid_flags, dev->id); 2378 switch (rc) { 2379 case 0: 2380 new_mask |= 1 << dev->devno; 2381 break; 2382 case -ENOENT: 2383 /* IDENTIFY was issued to non-existent 2384 * device. No need to reset. Just 2385 * thaw and kill the device. 2386 */ 2387 ata_eh_thaw_port(ap); 2388 dev->class = ATA_DEV_UNKNOWN; 2389 break; 2390 default: 2391 dev->class = ATA_DEV_UNKNOWN; 2392 goto err; 2393 } 2394 } 2395 } 2396 2397 /* PDIAG- should have been released, ask cable type if post-reset */ 2398 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 2399 if (ap->ops->cable_detect) 2400 ap->cbl = ap->ops->cable_detect(ap); 2401 ata_force_cbl(ap); 2402 } 2403 2404 /* Configure new devices forward such that user doesn't see 2405 * device detection messages backwards. 2406 */ 2407 ata_link_for_each_dev(dev, link) { 2408 if (!(new_mask & (1 << dev->devno)) || 2409 dev->class == ATA_DEV_PMP) 2410 continue; 2411 2412 ehc->i.flags |= ATA_EHI_PRINTINFO; 2413 rc = ata_dev_configure(dev); 2414 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 2415 if (rc) 2416 goto err; 2417 2418 spin_lock_irqsave(ap->lock, flags); 2419 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 2420 spin_unlock_irqrestore(ap->lock, flags); 2421 2422 /* new device discovered, configure xfermode */ 2423 ehc->i.flags |= ATA_EHI_SETMODE; 2424 } 2425 2426 return 0; 2427 2428 err: 2429 *r_failed_dev = dev; 2430 DPRINTK("EXIT rc=%d\n", rc); 2431 return rc; 2432 } 2433 2434 /** 2435 * ata_set_mode - Program timings and issue SET FEATURES - XFER 2436 * @link: link on which timings will be programmed 2437 * @r_failed_dev: out paramter for failed device 2438 * 2439 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 2440 * ata_set_mode() fails, pointer to the failing device is 2441 * returned in @r_failed_dev. 2442 * 2443 * LOCKING: 2444 * PCI/etc. bus probe sem. 2445 * 2446 * RETURNS: 2447 * 0 on success, negative errno otherwise 2448 */ 2449 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 2450 { 2451 struct ata_port *ap = link->ap; 2452 struct ata_device *dev; 2453 int rc; 2454 2455 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 2456 ata_link_for_each_dev(dev, link) { 2457 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 2458 struct ata_ering_entry *ent; 2459 2460 ent = ata_ering_top(&dev->ering); 2461 if (ent) 2462 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 2463 } 2464 } 2465 2466 /* has private set_mode? */ 2467 if (ap->ops->set_mode) 2468 rc = ap->ops->set_mode(link, r_failed_dev); 2469 else 2470 rc = ata_do_set_mode(link, r_failed_dev); 2471 2472 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 2473 ata_link_for_each_dev(dev, link) { 2474 struct ata_eh_context *ehc = &link->eh_context; 2475 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 2476 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 2477 2478 if (dev->xfer_mode != saved_xfer_mode || 2479 ata_ncq_enabled(dev) != saved_ncq) 2480 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 2481 } 2482 2483 return rc; 2484 } 2485 2486 static int ata_link_nr_enabled(struct ata_link *link) 2487 { 2488 struct ata_device *dev; 2489 int cnt = 0; 2490 2491 ata_link_for_each_dev(dev, link) 2492 if (ata_dev_enabled(dev)) 2493 cnt++; 2494 return cnt; 2495 } 2496 2497 static int ata_link_nr_vacant(struct ata_link *link) 2498 { 2499 struct ata_device *dev; 2500 int cnt = 0; 2501 2502 ata_link_for_each_dev(dev, link) 2503 if (dev->class == ATA_DEV_UNKNOWN) 2504 cnt++; 2505 return cnt; 2506 } 2507 2508 static int ata_eh_skip_recovery(struct ata_link *link) 2509 { 2510 struct ata_port *ap = link->ap; 2511 struct ata_eh_context *ehc = &link->eh_context; 2512 struct ata_device *dev; 2513 2514 /* skip disabled links */ 2515 if (link->flags & ATA_LFLAG_DISABLED) 2516 return 1; 2517 2518 /* thaw frozen port and recover failed devices */ 2519 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 2520 return 0; 2521 2522 /* reset at least once if reset is requested */ 2523 if ((ehc->i.action & ATA_EH_RESET) && 2524 !(ehc->i.flags & ATA_EHI_DID_RESET)) 2525 return 0; 2526 2527 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2528 ata_link_for_each_dev(dev, link) { 2529 if (dev->class == ATA_DEV_UNKNOWN && 2530 ehc->classes[dev->devno] != ATA_DEV_NONE) 2531 return 0; 2532 } 2533 2534 return 1; 2535 } 2536 2537 static int ata_eh_schedule_probe(struct ata_device *dev) 2538 { 2539 struct ata_eh_context *ehc = &dev->link->eh_context; 2540 2541 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 2542 (ehc->did_probe_mask & (1 << dev->devno))) 2543 return 0; 2544 2545 ata_eh_detach_dev(dev); 2546 ata_dev_init(dev); 2547 ehc->did_probe_mask |= (1 << dev->devno); 2548 ehc->i.action |= ATA_EH_RESET; 2549 ehc->saved_xfer_mode[dev->devno] = 0; 2550 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 2551 2552 return 1; 2553 } 2554 2555 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 2556 { 2557 struct ata_eh_context *ehc = &dev->link->eh_context; 2558 2559 ehc->tries[dev->devno]--; 2560 2561 switch (err) { 2562 case -ENODEV: 2563 /* device missing or wrong IDENTIFY data, schedule probing */ 2564 ehc->i.probe_mask |= (1 << dev->devno); 2565 case -EINVAL: 2566 /* give it just one more chance */ 2567 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2568 case -EIO: 2569 if (ehc->tries[dev->devno] == 1 && dev->pio_mode > XFER_PIO_0) { 2570 /* This is the last chance, better to slow 2571 * down than lose it. 2572 */ 2573 sata_down_spd_limit(dev->link); 2574 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2575 } 2576 } 2577 2578 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2579 /* disable device if it has used up all its chances */ 2580 ata_dev_disable(dev); 2581 2582 /* detach if offline */ 2583 if (ata_link_offline(dev->link)) 2584 ata_eh_detach_dev(dev); 2585 2586 /* schedule probe if necessary */ 2587 if (ata_eh_schedule_probe(dev)) 2588 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2589 2590 return 1; 2591 } else { 2592 ehc->i.action |= ATA_EH_RESET; 2593 return 0; 2594 } 2595 } 2596 2597 /** 2598 * ata_eh_recover - recover host port after error 2599 * @ap: host port to recover 2600 * @prereset: prereset method (can be NULL) 2601 * @softreset: softreset method (can be NULL) 2602 * @hardreset: hardreset method (can be NULL) 2603 * @postreset: postreset method (can be NULL) 2604 * @r_failed_link: out parameter for failed link 2605 * 2606 * This is the alpha and omega, eum and yang, heart and soul of 2607 * libata exception handling. On entry, actions required to 2608 * recover each link and hotplug requests are recorded in the 2609 * link's eh_context. This function executes all the operations 2610 * with appropriate retrials and fallbacks to resurrect failed 2611 * devices, detach goners and greet newcomers. 2612 * 2613 * LOCKING: 2614 * Kernel thread context (may sleep). 2615 * 2616 * RETURNS: 2617 * 0 on success, -errno on failure. 2618 */ 2619 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 2620 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2621 ata_postreset_fn_t postreset, 2622 struct ata_link **r_failed_link) 2623 { 2624 struct ata_link *link; 2625 struct ata_device *dev; 2626 int nr_failed_devs, nr_disabled_devs; 2627 int rc; 2628 unsigned long flags; 2629 2630 DPRINTK("ENTER\n"); 2631 2632 /* prep for recovery */ 2633 ata_port_for_each_link(link, ap) { 2634 struct ata_eh_context *ehc = &link->eh_context; 2635 2636 /* re-enable link? */ 2637 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 2638 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 2639 spin_lock_irqsave(ap->lock, flags); 2640 link->flags &= ~ATA_LFLAG_DISABLED; 2641 spin_unlock_irqrestore(ap->lock, flags); 2642 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 2643 } 2644 2645 ata_link_for_each_dev(dev, link) { 2646 if (link->flags & ATA_LFLAG_NO_RETRY) 2647 ehc->tries[dev->devno] = 1; 2648 else 2649 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2650 2651 /* collect port action mask recorded in dev actions */ 2652 ehc->i.action |= ehc->i.dev_action[dev->devno] & 2653 ~ATA_EH_PERDEV_MASK; 2654 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 2655 2656 /* process hotplug request */ 2657 if (dev->flags & ATA_DFLAG_DETACH) 2658 ata_eh_detach_dev(dev); 2659 2660 /* schedule probe if necessary */ 2661 if (!ata_dev_enabled(dev)) 2662 ata_eh_schedule_probe(dev); 2663 } 2664 } 2665 2666 retry: 2667 rc = 0; 2668 nr_failed_devs = 0; 2669 nr_disabled_devs = 0; 2670 2671 /* if UNLOADING, finish immediately */ 2672 if (ap->pflags & ATA_PFLAG_UNLOADING) 2673 goto out; 2674 2675 /* prep for EH */ 2676 ata_port_for_each_link(link, ap) { 2677 struct ata_eh_context *ehc = &link->eh_context; 2678 2679 /* skip EH if possible. */ 2680 if (ata_eh_skip_recovery(link)) 2681 ehc->i.action = 0; 2682 2683 ata_link_for_each_dev(dev, link) 2684 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 2685 } 2686 2687 /* reset */ 2688 ata_port_for_each_link(link, ap) { 2689 struct ata_eh_context *ehc = &link->eh_context; 2690 2691 if (!(ehc->i.action & ATA_EH_RESET)) 2692 continue; 2693 2694 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 2695 prereset, softreset, hardreset, postreset); 2696 if (rc) { 2697 ata_link_printk(link, KERN_ERR, 2698 "reset failed, giving up\n"); 2699 goto out; 2700 } 2701 } 2702 2703 /* the rest */ 2704 ata_port_for_each_link(link, ap) { 2705 struct ata_eh_context *ehc = &link->eh_context; 2706 2707 /* revalidate existing devices and attach new ones */ 2708 rc = ata_eh_revalidate_and_attach(link, &dev); 2709 if (rc) 2710 goto dev_fail; 2711 2712 /* if PMP got attached, return, pmp EH will take care of it */ 2713 if (link->device->class == ATA_DEV_PMP) { 2714 ehc->i.action = 0; 2715 return 0; 2716 } 2717 2718 /* configure transfer mode if necessary */ 2719 if (ehc->i.flags & ATA_EHI_SETMODE) { 2720 rc = ata_set_mode(link, &dev); 2721 if (rc) 2722 goto dev_fail; 2723 ehc->i.flags &= ~ATA_EHI_SETMODE; 2724 } 2725 2726 if (ehc->i.action & ATA_EH_LPM) 2727 ata_link_for_each_dev(dev, link) 2728 ata_dev_enable_pm(dev, ap->pm_policy); 2729 2730 /* this link is okay now */ 2731 ehc->i.flags = 0; 2732 continue; 2733 2734 dev_fail: 2735 nr_failed_devs++; 2736 if (ata_eh_handle_dev_fail(dev, rc)) 2737 nr_disabled_devs++; 2738 2739 if (ap->pflags & ATA_PFLAG_FROZEN) { 2740 /* PMP reset requires working host port. 2741 * Can't retry if it's frozen. 2742 */ 2743 if (sata_pmp_attached(ap)) 2744 goto out; 2745 break; 2746 } 2747 } 2748 2749 if (nr_failed_devs) { 2750 if (nr_failed_devs != nr_disabled_devs) { 2751 ata_port_printk(ap, KERN_WARNING, "failed to recover " 2752 "some devices, retrying in 5 secs\n"); 2753 ssleep(5); 2754 } else { 2755 /* no device left to recover, repeat fast */ 2756 msleep(500); 2757 } 2758 2759 goto retry; 2760 } 2761 2762 out: 2763 if (rc && r_failed_link) 2764 *r_failed_link = link; 2765 2766 DPRINTK("EXIT, rc=%d\n", rc); 2767 return rc; 2768 } 2769 2770 /** 2771 * ata_eh_finish - finish up EH 2772 * @ap: host port to finish EH for 2773 * 2774 * Recovery is complete. Clean up EH states and retry or finish 2775 * failed qcs. 2776 * 2777 * LOCKING: 2778 * None. 2779 */ 2780 void ata_eh_finish(struct ata_port *ap) 2781 { 2782 int tag; 2783 2784 /* retry or finish qcs */ 2785 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2786 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2787 2788 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2789 continue; 2790 2791 if (qc->err_mask) { 2792 /* FIXME: Once EH migration is complete, 2793 * generate sense data in this function, 2794 * considering both err_mask and tf. 2795 */ 2796 if (qc->flags & ATA_QCFLAG_RETRY) 2797 ata_eh_qc_retry(qc); 2798 else 2799 ata_eh_qc_complete(qc); 2800 } else { 2801 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2802 ata_eh_qc_complete(qc); 2803 } else { 2804 /* feed zero TF to sense generation */ 2805 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2806 ata_eh_qc_retry(qc); 2807 } 2808 } 2809 } 2810 2811 /* make sure nr_active_links is zero after EH */ 2812 WARN_ON(ap->nr_active_links); 2813 ap->nr_active_links = 0; 2814 } 2815 2816 /** 2817 * ata_do_eh - do standard error handling 2818 * @ap: host port to handle error for 2819 * 2820 * @prereset: prereset method (can be NULL) 2821 * @softreset: softreset method (can be NULL) 2822 * @hardreset: hardreset method (can be NULL) 2823 * @postreset: postreset method (can be NULL) 2824 * 2825 * Perform standard error handling sequence. 2826 * 2827 * LOCKING: 2828 * Kernel thread context (may sleep). 2829 */ 2830 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2831 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2832 ata_postreset_fn_t postreset) 2833 { 2834 struct ata_device *dev; 2835 int rc; 2836 2837 ata_eh_autopsy(ap); 2838 ata_eh_report(ap); 2839 2840 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 2841 NULL); 2842 if (rc) { 2843 ata_link_for_each_dev(dev, &ap->link) 2844 ata_dev_disable(dev); 2845 } 2846 2847 ata_eh_finish(ap); 2848 } 2849 2850 /** 2851 * ata_std_error_handler - standard error handler 2852 * @ap: host port to handle error for 2853 * 2854 * Standard error handler 2855 * 2856 * LOCKING: 2857 * Kernel thread context (may sleep). 2858 */ 2859 void ata_std_error_handler(struct ata_port *ap) 2860 { 2861 struct ata_port_operations *ops = ap->ops; 2862 ata_reset_fn_t hardreset = ops->hardreset; 2863 2864 /* ignore built-in hardreset if SCR access is not available */ 2865 if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link)) 2866 hardreset = NULL; 2867 2868 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 2869 } 2870 2871 #ifdef CONFIG_PM 2872 /** 2873 * ata_eh_handle_port_suspend - perform port suspend operation 2874 * @ap: port to suspend 2875 * 2876 * Suspend @ap. 2877 * 2878 * LOCKING: 2879 * Kernel thread context (may sleep). 2880 */ 2881 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2882 { 2883 unsigned long flags; 2884 int rc = 0; 2885 2886 /* are we suspending? */ 2887 spin_lock_irqsave(ap->lock, flags); 2888 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2889 ap->pm_mesg.event == PM_EVENT_ON) { 2890 spin_unlock_irqrestore(ap->lock, flags); 2891 return; 2892 } 2893 spin_unlock_irqrestore(ap->lock, flags); 2894 2895 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2896 2897 /* tell ACPI we're suspending */ 2898 rc = ata_acpi_on_suspend(ap); 2899 if (rc) 2900 goto out; 2901 2902 /* suspend */ 2903 ata_eh_freeze_port(ap); 2904 2905 if (ap->ops->port_suspend) 2906 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2907 2908 ata_acpi_set_state(ap, PMSG_SUSPEND); 2909 out: 2910 /* report result */ 2911 spin_lock_irqsave(ap->lock, flags); 2912 2913 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2914 if (rc == 0) 2915 ap->pflags |= ATA_PFLAG_SUSPENDED; 2916 else if (ap->pflags & ATA_PFLAG_FROZEN) 2917 ata_port_schedule_eh(ap); 2918 2919 if (ap->pm_result) { 2920 *ap->pm_result = rc; 2921 ap->pm_result = NULL; 2922 } 2923 2924 spin_unlock_irqrestore(ap->lock, flags); 2925 2926 return; 2927 } 2928 2929 /** 2930 * ata_eh_handle_port_resume - perform port resume operation 2931 * @ap: port to resume 2932 * 2933 * Resume @ap. 2934 * 2935 * LOCKING: 2936 * Kernel thread context (may sleep). 2937 */ 2938 static void ata_eh_handle_port_resume(struct ata_port *ap) 2939 { 2940 unsigned long flags; 2941 int rc = 0; 2942 2943 /* are we resuming? */ 2944 spin_lock_irqsave(ap->lock, flags); 2945 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2946 ap->pm_mesg.event != PM_EVENT_ON) { 2947 spin_unlock_irqrestore(ap->lock, flags); 2948 return; 2949 } 2950 spin_unlock_irqrestore(ap->lock, flags); 2951 2952 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 2953 2954 ata_acpi_set_state(ap, PMSG_ON); 2955 2956 if (ap->ops->port_resume) 2957 rc = ap->ops->port_resume(ap); 2958 2959 /* tell ACPI that we're resuming */ 2960 ata_acpi_on_resume(ap); 2961 2962 /* report result */ 2963 spin_lock_irqsave(ap->lock, flags); 2964 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2965 if (ap->pm_result) { 2966 *ap->pm_result = rc; 2967 ap->pm_result = NULL; 2968 } 2969 spin_unlock_irqrestore(ap->lock, flags); 2970 } 2971 #endif /* CONFIG_PM */ 2972