1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/pci.h> 37 #include <scsi/scsi.h> 38 #include <scsi/scsi_host.h> 39 #include <scsi/scsi_eh.h> 40 #include <scsi/scsi_device.h> 41 #include <scsi/scsi_cmnd.h> 42 #include "../scsi/scsi_transport_api.h" 43 44 #include <linux/libata.h> 45 46 #include "libata.h" 47 48 enum { 49 /* speed down verdicts */ 50 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 51 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 52 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 53 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 54 55 /* error flags */ 56 ATA_EFLAG_IS_IO = (1 << 0), 57 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 58 59 /* error categories */ 60 ATA_ECAT_NONE = 0, 61 ATA_ECAT_ATA_BUS = 1, 62 ATA_ECAT_TOUT_HSM = 2, 63 ATA_ECAT_UNK_DEV = 3, 64 ATA_ECAT_DUBIOUS_NONE = 4, 65 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 66 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 67 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 68 ATA_ECAT_NR = 8, 69 }; 70 71 /* Waiting in ->prereset can never be reliable. It's sometimes nice 72 * to wait there but it can't be depended upon; otherwise, we wouldn't 73 * be resetting. Just give it enough time for most drives to spin up. 74 */ 75 enum { 76 ATA_EH_PRERESET_TIMEOUT = 10 * HZ, 77 ATA_EH_FASTDRAIN_INTERVAL = 3 * HZ, 78 }; 79 80 /* The following table determines how we sequence resets. Each entry 81 * represents timeout for that try. The first try can be soft or 82 * hardreset. All others are hardreset if available. In most cases 83 * the first reset w/ 10sec timeout should succeed. Following entries 84 * are mostly for error handling, hotplug and retarded devices. 85 */ 86 static const unsigned long ata_eh_reset_timeouts[] = { 87 10 * HZ, /* most drives spin up by 10sec */ 88 10 * HZ, /* > 99% working drives spin up before 20sec */ 89 35 * HZ, /* give > 30 secs of idleness for retarded devices */ 90 5 * HZ, /* and sweet one last chance */ 91 /* > 1 min has elapsed, give up */ 92 }; 93 94 static void __ata_port_freeze(struct ata_port *ap); 95 #ifdef CONFIG_PM 96 static void ata_eh_handle_port_suspend(struct ata_port *ap); 97 static void ata_eh_handle_port_resume(struct ata_port *ap); 98 #else /* CONFIG_PM */ 99 static void ata_eh_handle_port_suspend(struct ata_port *ap) 100 { } 101 102 static void ata_eh_handle_port_resume(struct ata_port *ap) 103 { } 104 #endif /* CONFIG_PM */ 105 106 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, 107 va_list args) 108 { 109 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 110 ATA_EH_DESC_LEN - ehi->desc_len, 111 fmt, args); 112 } 113 114 /** 115 * __ata_ehi_push_desc - push error description without adding separator 116 * @ehi: target EHI 117 * @fmt: printf format string 118 * 119 * Format string according to @fmt and append it to @ehi->desc. 120 * 121 * LOCKING: 122 * spin_lock_irqsave(host lock) 123 */ 124 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 125 { 126 va_list args; 127 128 va_start(args, fmt); 129 __ata_ehi_pushv_desc(ehi, fmt, args); 130 va_end(args); 131 } 132 133 /** 134 * ata_ehi_push_desc - push error description with separator 135 * @ehi: target EHI 136 * @fmt: printf format string 137 * 138 * Format string according to @fmt and append it to @ehi->desc. 139 * If @ehi->desc is not empty, ", " is added in-between. 140 * 141 * LOCKING: 142 * spin_lock_irqsave(host lock) 143 */ 144 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 145 { 146 va_list args; 147 148 if (ehi->desc_len) 149 __ata_ehi_push_desc(ehi, ", "); 150 151 va_start(args, fmt); 152 __ata_ehi_pushv_desc(ehi, fmt, args); 153 va_end(args); 154 } 155 156 /** 157 * ata_ehi_clear_desc - clean error description 158 * @ehi: target EHI 159 * 160 * Clear @ehi->desc. 161 * 162 * LOCKING: 163 * spin_lock_irqsave(host lock) 164 */ 165 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 166 { 167 ehi->desc[0] = '\0'; 168 ehi->desc_len = 0; 169 } 170 171 /** 172 * ata_port_desc - append port description 173 * @ap: target ATA port 174 * @fmt: printf format string 175 * 176 * Format string according to @fmt and append it to port 177 * description. If port description is not empty, " " is added 178 * in-between. This function is to be used while initializing 179 * ata_host. The description is printed on host registration. 180 * 181 * LOCKING: 182 * None. 183 */ 184 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 185 { 186 va_list args; 187 188 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 189 190 if (ap->link.eh_info.desc_len) 191 __ata_ehi_push_desc(&ap->link.eh_info, " "); 192 193 va_start(args, fmt); 194 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 195 va_end(args); 196 } 197 198 #ifdef CONFIG_PCI 199 200 /** 201 * ata_port_pbar_desc - append PCI BAR description 202 * @ap: target ATA port 203 * @bar: target PCI BAR 204 * @offset: offset into PCI BAR 205 * @name: name of the area 206 * 207 * If @offset is negative, this function formats a string which 208 * contains the name, address, size and type of the BAR and 209 * appends it to the port description. If @offset is zero or 210 * positive, only name and offsetted address is appended. 211 * 212 * LOCKING: 213 * None. 214 */ 215 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 216 const char *name) 217 { 218 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 219 char *type = ""; 220 unsigned long long start, len; 221 222 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 223 type = "m"; 224 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 225 type = "i"; 226 227 start = (unsigned long long)pci_resource_start(pdev, bar); 228 len = (unsigned long long)pci_resource_len(pdev, bar); 229 230 if (offset < 0) 231 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 232 else 233 ata_port_desc(ap, "%s 0x%llx", name, 234 start + (unsigned long long)offset); 235 } 236 237 #endif /* CONFIG_PCI */ 238 239 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 240 unsigned int err_mask) 241 { 242 struct ata_ering_entry *ent; 243 244 WARN_ON(!err_mask); 245 246 ering->cursor++; 247 ering->cursor %= ATA_ERING_SIZE; 248 249 ent = &ering->ring[ering->cursor]; 250 ent->eflags = eflags; 251 ent->err_mask = err_mask; 252 ent->timestamp = get_jiffies_64(); 253 } 254 255 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 256 { 257 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 258 259 if (ent->err_mask) 260 return ent; 261 return NULL; 262 } 263 264 static void ata_ering_clear(struct ata_ering *ering) 265 { 266 memset(ering, 0, sizeof(*ering)); 267 } 268 269 static int ata_ering_map(struct ata_ering *ering, 270 int (*map_fn)(struct ata_ering_entry *, void *), 271 void *arg) 272 { 273 int idx, rc = 0; 274 struct ata_ering_entry *ent; 275 276 idx = ering->cursor; 277 do { 278 ent = &ering->ring[idx]; 279 if (!ent->err_mask) 280 break; 281 rc = map_fn(ent, arg); 282 if (rc) 283 break; 284 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 285 } while (idx != ering->cursor); 286 287 return rc; 288 } 289 290 static unsigned int ata_eh_dev_action(struct ata_device *dev) 291 { 292 struct ata_eh_context *ehc = &dev->link->eh_context; 293 294 return ehc->i.action | ehc->i.dev_action[dev->devno]; 295 } 296 297 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 298 struct ata_eh_info *ehi, unsigned int action) 299 { 300 struct ata_device *tdev; 301 302 if (!dev) { 303 ehi->action &= ~action; 304 ata_link_for_each_dev(tdev, link) 305 ehi->dev_action[tdev->devno] &= ~action; 306 } else { 307 /* doesn't make sense for port-wide EH actions */ 308 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 309 310 /* break ehi->action into ehi->dev_action */ 311 if (ehi->action & action) { 312 ata_link_for_each_dev(tdev, link) 313 ehi->dev_action[tdev->devno] |= 314 ehi->action & action; 315 ehi->action &= ~action; 316 } 317 318 /* turn off the specified per-dev action */ 319 ehi->dev_action[dev->devno] &= ~action; 320 } 321 } 322 323 /** 324 * ata_scsi_timed_out - SCSI layer time out callback 325 * @cmd: timed out SCSI command 326 * 327 * Handles SCSI layer timeout. We race with normal completion of 328 * the qc for @cmd. If the qc is already gone, we lose and let 329 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 330 * timed out and EH should be invoked. Prevent ata_qc_complete() 331 * from finishing it by setting EH_SCHEDULED and return 332 * EH_NOT_HANDLED. 333 * 334 * TODO: kill this function once old EH is gone. 335 * 336 * LOCKING: 337 * Called from timer context 338 * 339 * RETURNS: 340 * EH_HANDLED or EH_NOT_HANDLED 341 */ 342 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 343 { 344 struct Scsi_Host *host = cmd->device->host; 345 struct ata_port *ap = ata_shost_to_port(host); 346 unsigned long flags; 347 struct ata_queued_cmd *qc; 348 enum scsi_eh_timer_return ret; 349 350 DPRINTK("ENTER\n"); 351 352 if (ap->ops->error_handler) { 353 ret = EH_NOT_HANDLED; 354 goto out; 355 } 356 357 ret = EH_HANDLED; 358 spin_lock_irqsave(ap->lock, flags); 359 qc = ata_qc_from_tag(ap, ap->link.active_tag); 360 if (qc) { 361 WARN_ON(qc->scsicmd != cmd); 362 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 363 qc->err_mask |= AC_ERR_TIMEOUT; 364 ret = EH_NOT_HANDLED; 365 } 366 spin_unlock_irqrestore(ap->lock, flags); 367 368 out: 369 DPRINTK("EXIT, ret=%d\n", ret); 370 return ret; 371 } 372 373 /** 374 * ata_scsi_error - SCSI layer error handler callback 375 * @host: SCSI host on which error occurred 376 * 377 * Handles SCSI-layer-thrown error events. 378 * 379 * LOCKING: 380 * Inherited from SCSI layer (none, can sleep) 381 * 382 * RETURNS: 383 * Zero. 384 */ 385 void ata_scsi_error(struct Scsi_Host *host) 386 { 387 struct ata_port *ap = ata_shost_to_port(host); 388 int i; 389 unsigned long flags; 390 391 DPRINTK("ENTER\n"); 392 393 /* synchronize with port task */ 394 ata_port_flush_task(ap); 395 396 /* synchronize with host lock and sort out timeouts */ 397 398 /* For new EH, all qcs are finished in one of three ways - 399 * normal completion, error completion, and SCSI timeout. 400 * Both cmpletions can race against SCSI timeout. When normal 401 * completion wins, the qc never reaches EH. When error 402 * completion wins, the qc has ATA_QCFLAG_FAILED set. 403 * 404 * When SCSI timeout wins, things are a bit more complex. 405 * Normal or error completion can occur after the timeout but 406 * before this point. In such cases, both types of 407 * completions are honored. A scmd is determined to have 408 * timed out iff its associated qc is active and not failed. 409 */ 410 if (ap->ops->error_handler) { 411 struct scsi_cmnd *scmd, *tmp; 412 int nr_timedout = 0; 413 414 spin_lock_irqsave(ap->lock, flags); 415 416 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 417 struct ata_queued_cmd *qc; 418 419 for (i = 0; i < ATA_MAX_QUEUE; i++) { 420 qc = __ata_qc_from_tag(ap, i); 421 if (qc->flags & ATA_QCFLAG_ACTIVE && 422 qc->scsicmd == scmd) 423 break; 424 } 425 426 if (i < ATA_MAX_QUEUE) { 427 /* the scmd has an associated qc */ 428 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 429 /* which hasn't failed yet, timeout */ 430 qc->err_mask |= AC_ERR_TIMEOUT; 431 qc->flags |= ATA_QCFLAG_FAILED; 432 nr_timedout++; 433 } 434 } else { 435 /* Normal completion occurred after 436 * SCSI timeout but before this point. 437 * Successfully complete it. 438 */ 439 scmd->retries = scmd->allowed; 440 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 441 } 442 } 443 444 /* If we have timed out qcs. They belong to EH from 445 * this point but the state of the controller is 446 * unknown. Freeze the port to make sure the IRQ 447 * handler doesn't diddle with those qcs. This must 448 * be done atomically w.r.t. setting QCFLAG_FAILED. 449 */ 450 if (nr_timedout) 451 __ata_port_freeze(ap); 452 453 spin_unlock_irqrestore(ap->lock, flags); 454 455 /* initialize eh_tries */ 456 ap->eh_tries = ATA_EH_MAX_TRIES; 457 } else 458 spin_unlock_wait(ap->lock); 459 460 repeat: 461 /* invoke error handler */ 462 if (ap->ops->error_handler) { 463 struct ata_link *link; 464 465 /* kill fast drain timer */ 466 del_timer_sync(&ap->fastdrain_timer); 467 468 /* process port resume request */ 469 ata_eh_handle_port_resume(ap); 470 471 /* fetch & clear EH info */ 472 spin_lock_irqsave(ap->lock, flags); 473 474 __ata_port_for_each_link(link, ap) { 475 struct ata_eh_context *ehc = &link->eh_context; 476 struct ata_device *dev; 477 478 memset(&link->eh_context, 0, sizeof(link->eh_context)); 479 link->eh_context.i = link->eh_info; 480 memset(&link->eh_info, 0, sizeof(link->eh_info)); 481 482 ata_link_for_each_dev(dev, link) { 483 int devno = dev->devno; 484 485 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 486 if (ata_ncq_enabled(dev)) 487 ehc->saved_ncq_enabled |= 1 << devno; 488 } 489 } 490 491 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 492 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 493 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 494 495 spin_unlock_irqrestore(ap->lock, flags); 496 497 /* invoke EH, skip if unloading or suspended */ 498 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 499 ap->ops->error_handler(ap); 500 else 501 ata_eh_finish(ap); 502 503 /* process port suspend request */ 504 ata_eh_handle_port_suspend(ap); 505 506 /* Exception might have happend after ->error_handler 507 * recovered the port but before this point. Repeat 508 * EH in such case. 509 */ 510 spin_lock_irqsave(ap->lock, flags); 511 512 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 513 if (--ap->eh_tries) { 514 spin_unlock_irqrestore(ap->lock, flags); 515 goto repeat; 516 } 517 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 518 "tries, giving up\n", ATA_EH_MAX_TRIES); 519 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 520 } 521 522 /* this run is complete, make sure EH info is clear */ 523 __ata_port_for_each_link(link, ap) 524 memset(&link->eh_info, 0, sizeof(link->eh_info)); 525 526 /* Clear host_eh_scheduled while holding ap->lock such 527 * that if exception occurs after this point but 528 * before EH completion, SCSI midlayer will 529 * re-initiate EH. 530 */ 531 host->host_eh_scheduled = 0; 532 533 spin_unlock_irqrestore(ap->lock, flags); 534 } else { 535 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 536 ap->ops->eng_timeout(ap); 537 } 538 539 /* finish or retry handled scmd's and clean up */ 540 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 541 542 scsi_eh_flush_done_q(&ap->eh_done_q); 543 544 /* clean up */ 545 spin_lock_irqsave(ap->lock, flags); 546 547 if (ap->pflags & ATA_PFLAG_LOADING) 548 ap->pflags &= ~ATA_PFLAG_LOADING; 549 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 550 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 551 552 if (ap->pflags & ATA_PFLAG_RECOVERED) 553 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 554 555 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 556 557 /* tell wait_eh that we're done */ 558 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 559 wake_up_all(&ap->eh_wait_q); 560 561 spin_unlock_irqrestore(ap->lock, flags); 562 563 DPRINTK("EXIT\n"); 564 } 565 566 /** 567 * ata_port_wait_eh - Wait for the currently pending EH to complete 568 * @ap: Port to wait EH for 569 * 570 * Wait until the currently pending EH is complete. 571 * 572 * LOCKING: 573 * Kernel thread context (may sleep). 574 */ 575 void ata_port_wait_eh(struct ata_port *ap) 576 { 577 unsigned long flags; 578 DEFINE_WAIT(wait); 579 580 retry: 581 spin_lock_irqsave(ap->lock, flags); 582 583 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 584 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 585 spin_unlock_irqrestore(ap->lock, flags); 586 schedule(); 587 spin_lock_irqsave(ap->lock, flags); 588 } 589 finish_wait(&ap->eh_wait_q, &wait); 590 591 spin_unlock_irqrestore(ap->lock, flags); 592 593 /* make sure SCSI EH is complete */ 594 if (scsi_host_in_recovery(ap->scsi_host)) { 595 msleep(10); 596 goto retry; 597 } 598 } 599 600 static int ata_eh_nr_in_flight(struct ata_port *ap) 601 { 602 unsigned int tag; 603 int nr = 0; 604 605 /* count only non-internal commands */ 606 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 607 if (ata_qc_from_tag(ap, tag)) 608 nr++; 609 610 return nr; 611 } 612 613 void ata_eh_fastdrain_timerfn(unsigned long arg) 614 { 615 struct ata_port *ap = (void *)arg; 616 unsigned long flags; 617 int cnt; 618 619 spin_lock_irqsave(ap->lock, flags); 620 621 cnt = ata_eh_nr_in_flight(ap); 622 623 /* are we done? */ 624 if (!cnt) 625 goto out_unlock; 626 627 if (cnt == ap->fastdrain_cnt) { 628 unsigned int tag; 629 630 /* No progress during the last interval, tag all 631 * in-flight qcs as timed out and freeze the port. 632 */ 633 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 634 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 635 if (qc) 636 qc->err_mask |= AC_ERR_TIMEOUT; 637 } 638 639 ata_port_freeze(ap); 640 } else { 641 /* some qcs have finished, give it another chance */ 642 ap->fastdrain_cnt = cnt; 643 ap->fastdrain_timer.expires = 644 jiffies + ATA_EH_FASTDRAIN_INTERVAL; 645 add_timer(&ap->fastdrain_timer); 646 } 647 648 out_unlock: 649 spin_unlock_irqrestore(ap->lock, flags); 650 } 651 652 /** 653 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 654 * @ap: target ATA port 655 * @fastdrain: activate fast drain 656 * 657 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 658 * is non-zero and EH wasn't pending before. Fast drain ensures 659 * that EH kicks in in timely manner. 660 * 661 * LOCKING: 662 * spin_lock_irqsave(host lock) 663 */ 664 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 665 { 666 int cnt; 667 668 /* already scheduled? */ 669 if (ap->pflags & ATA_PFLAG_EH_PENDING) 670 return; 671 672 ap->pflags |= ATA_PFLAG_EH_PENDING; 673 674 if (!fastdrain) 675 return; 676 677 /* do we have in-flight qcs? */ 678 cnt = ata_eh_nr_in_flight(ap); 679 if (!cnt) 680 return; 681 682 /* activate fast drain */ 683 ap->fastdrain_cnt = cnt; 684 ap->fastdrain_timer.expires = jiffies + ATA_EH_FASTDRAIN_INTERVAL; 685 add_timer(&ap->fastdrain_timer); 686 } 687 688 /** 689 * ata_qc_schedule_eh - schedule qc for error handling 690 * @qc: command to schedule error handling for 691 * 692 * Schedule error handling for @qc. EH will kick in as soon as 693 * other commands are drained. 694 * 695 * LOCKING: 696 * spin_lock_irqsave(host lock) 697 */ 698 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 699 { 700 struct ata_port *ap = qc->ap; 701 702 WARN_ON(!ap->ops->error_handler); 703 704 qc->flags |= ATA_QCFLAG_FAILED; 705 ata_eh_set_pending(ap, 1); 706 707 /* The following will fail if timeout has already expired. 708 * ata_scsi_error() takes care of such scmds on EH entry. 709 * Note that ATA_QCFLAG_FAILED is unconditionally set after 710 * this function completes. 711 */ 712 scsi_req_abort_cmd(qc->scsicmd); 713 } 714 715 /** 716 * ata_port_schedule_eh - schedule error handling without a qc 717 * @ap: ATA port to schedule EH for 718 * 719 * Schedule error handling for @ap. EH will kick in as soon as 720 * all commands are drained. 721 * 722 * LOCKING: 723 * spin_lock_irqsave(host lock) 724 */ 725 void ata_port_schedule_eh(struct ata_port *ap) 726 { 727 WARN_ON(!ap->ops->error_handler); 728 729 if (ap->pflags & ATA_PFLAG_INITIALIZING) 730 return; 731 732 ata_eh_set_pending(ap, 1); 733 scsi_schedule_eh(ap->scsi_host); 734 735 DPRINTK("port EH scheduled\n"); 736 } 737 738 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 739 { 740 int tag, nr_aborted = 0; 741 742 WARN_ON(!ap->ops->error_handler); 743 744 /* we're gonna abort all commands, no need for fast drain */ 745 ata_eh_set_pending(ap, 0); 746 747 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 748 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 749 750 if (qc && (!link || qc->dev->link == link)) { 751 qc->flags |= ATA_QCFLAG_FAILED; 752 ata_qc_complete(qc); 753 nr_aborted++; 754 } 755 } 756 757 if (!nr_aborted) 758 ata_port_schedule_eh(ap); 759 760 return nr_aborted; 761 } 762 763 /** 764 * ata_link_abort - abort all qc's on the link 765 * @link: ATA link to abort qc's for 766 * 767 * Abort all active qc's active on @link and schedule EH. 768 * 769 * LOCKING: 770 * spin_lock_irqsave(host lock) 771 * 772 * RETURNS: 773 * Number of aborted qc's. 774 */ 775 int ata_link_abort(struct ata_link *link) 776 { 777 return ata_do_link_abort(link->ap, link); 778 } 779 780 /** 781 * ata_port_abort - abort all qc's on the port 782 * @ap: ATA port to abort qc's for 783 * 784 * Abort all active qc's of @ap and schedule EH. 785 * 786 * LOCKING: 787 * spin_lock_irqsave(host_set lock) 788 * 789 * RETURNS: 790 * Number of aborted qc's. 791 */ 792 int ata_port_abort(struct ata_port *ap) 793 { 794 return ata_do_link_abort(ap, NULL); 795 } 796 797 /** 798 * __ata_port_freeze - freeze port 799 * @ap: ATA port to freeze 800 * 801 * This function is called when HSM violation or some other 802 * condition disrupts normal operation of the port. Frozen port 803 * is not allowed to perform any operation until the port is 804 * thawed, which usually follows a successful reset. 805 * 806 * ap->ops->freeze() callback can be used for freezing the port 807 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 808 * port cannot be frozen hardware-wise, the interrupt handler 809 * must ack and clear interrupts unconditionally while the port 810 * is frozen. 811 * 812 * LOCKING: 813 * spin_lock_irqsave(host lock) 814 */ 815 static void __ata_port_freeze(struct ata_port *ap) 816 { 817 WARN_ON(!ap->ops->error_handler); 818 819 if (ap->ops->freeze) 820 ap->ops->freeze(ap); 821 822 ap->pflags |= ATA_PFLAG_FROZEN; 823 824 DPRINTK("ata%u port frozen\n", ap->print_id); 825 } 826 827 /** 828 * ata_port_freeze - abort & freeze port 829 * @ap: ATA port to freeze 830 * 831 * Abort and freeze @ap. 832 * 833 * LOCKING: 834 * spin_lock_irqsave(host lock) 835 * 836 * RETURNS: 837 * Number of aborted commands. 838 */ 839 int ata_port_freeze(struct ata_port *ap) 840 { 841 int nr_aborted; 842 843 WARN_ON(!ap->ops->error_handler); 844 845 nr_aborted = ata_port_abort(ap); 846 __ata_port_freeze(ap); 847 848 return nr_aborted; 849 } 850 851 /** 852 * sata_async_notification - SATA async notification handler 853 * @ap: ATA port where async notification is received 854 * 855 * Handler to be called when async notification via SDB FIS is 856 * received. This function schedules EH if necessary. 857 * 858 * LOCKING: 859 * spin_lock_irqsave(host lock) 860 * 861 * RETURNS: 862 * 1 if EH is scheduled, 0 otherwise. 863 */ 864 int sata_async_notification(struct ata_port *ap) 865 { 866 u32 sntf; 867 int rc; 868 869 if (!(ap->flags & ATA_FLAG_AN)) 870 return 0; 871 872 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 873 if (rc == 0) 874 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 875 876 if (!ap->nr_pmp_links || rc) { 877 /* PMP is not attached or SNTF is not available */ 878 if (!ap->nr_pmp_links) { 879 /* PMP is not attached. Check whether ATAPI 880 * AN is configured. If so, notify media 881 * change. 882 */ 883 struct ata_device *dev = ap->link.device; 884 885 if ((dev->class == ATA_DEV_ATAPI) && 886 (dev->flags & ATA_DFLAG_AN)) 887 ata_scsi_media_change_notify(dev); 888 return 0; 889 } else { 890 /* PMP is attached but SNTF is not available. 891 * ATAPI async media change notification is 892 * not used. The PMP must be reporting PHY 893 * status change, schedule EH. 894 */ 895 ata_port_schedule_eh(ap); 896 return 1; 897 } 898 } else { 899 /* PMP is attached and SNTF is available */ 900 struct ata_link *link; 901 902 /* check and notify ATAPI AN */ 903 ata_port_for_each_link(link, ap) { 904 if (!(sntf & (1 << link->pmp))) 905 continue; 906 907 if ((link->device->class == ATA_DEV_ATAPI) && 908 (link->device->flags & ATA_DFLAG_AN)) 909 ata_scsi_media_change_notify(link->device); 910 } 911 912 /* If PMP is reporting that PHY status of some 913 * downstream ports has changed, schedule EH. 914 */ 915 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 916 ata_port_schedule_eh(ap); 917 return 1; 918 } 919 920 return 0; 921 } 922 } 923 924 /** 925 * ata_eh_freeze_port - EH helper to freeze port 926 * @ap: ATA port to freeze 927 * 928 * Freeze @ap. 929 * 930 * LOCKING: 931 * None. 932 */ 933 void ata_eh_freeze_port(struct ata_port *ap) 934 { 935 unsigned long flags; 936 937 if (!ap->ops->error_handler) 938 return; 939 940 spin_lock_irqsave(ap->lock, flags); 941 __ata_port_freeze(ap); 942 spin_unlock_irqrestore(ap->lock, flags); 943 } 944 945 /** 946 * ata_port_thaw_port - EH helper to thaw port 947 * @ap: ATA port to thaw 948 * 949 * Thaw frozen port @ap. 950 * 951 * LOCKING: 952 * None. 953 */ 954 void ata_eh_thaw_port(struct ata_port *ap) 955 { 956 unsigned long flags; 957 958 if (!ap->ops->error_handler) 959 return; 960 961 spin_lock_irqsave(ap->lock, flags); 962 963 ap->pflags &= ~ATA_PFLAG_FROZEN; 964 965 if (ap->ops->thaw) 966 ap->ops->thaw(ap); 967 968 spin_unlock_irqrestore(ap->lock, flags); 969 970 DPRINTK("ata%u port thawed\n", ap->print_id); 971 } 972 973 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 974 { 975 /* nada */ 976 } 977 978 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 979 { 980 struct ata_port *ap = qc->ap; 981 struct scsi_cmnd *scmd = qc->scsicmd; 982 unsigned long flags; 983 984 spin_lock_irqsave(ap->lock, flags); 985 qc->scsidone = ata_eh_scsidone; 986 __ata_qc_complete(qc); 987 WARN_ON(ata_tag_valid(qc->tag)); 988 spin_unlock_irqrestore(ap->lock, flags); 989 990 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 991 } 992 993 /** 994 * ata_eh_qc_complete - Complete an active ATA command from EH 995 * @qc: Command to complete 996 * 997 * Indicate to the mid and upper layers that an ATA command has 998 * completed. To be used from EH. 999 */ 1000 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1001 { 1002 struct scsi_cmnd *scmd = qc->scsicmd; 1003 scmd->retries = scmd->allowed; 1004 __ata_eh_qc_complete(qc); 1005 } 1006 1007 /** 1008 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1009 * @qc: Command to retry 1010 * 1011 * Indicate to the mid and upper layers that an ATA command 1012 * should be retried. To be used from EH. 1013 * 1014 * SCSI midlayer limits the number of retries to scmd->allowed. 1015 * scmd->retries is decremented for commands which get retried 1016 * due to unrelated failures (qc->err_mask is zero). 1017 */ 1018 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1019 { 1020 struct scsi_cmnd *scmd = qc->scsicmd; 1021 if (!qc->err_mask && scmd->retries) 1022 scmd->retries--; 1023 __ata_eh_qc_complete(qc); 1024 } 1025 1026 /** 1027 * ata_eh_detach_dev - detach ATA device 1028 * @dev: ATA device to detach 1029 * 1030 * Detach @dev. 1031 * 1032 * LOCKING: 1033 * None. 1034 */ 1035 void ata_eh_detach_dev(struct ata_device *dev) 1036 { 1037 struct ata_link *link = dev->link; 1038 struct ata_port *ap = link->ap; 1039 unsigned long flags; 1040 1041 ata_dev_disable(dev); 1042 1043 spin_lock_irqsave(ap->lock, flags); 1044 1045 dev->flags &= ~ATA_DFLAG_DETACH; 1046 1047 if (ata_scsi_offline_dev(dev)) { 1048 dev->flags |= ATA_DFLAG_DETACHED; 1049 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1050 } 1051 1052 /* clear per-dev EH actions */ 1053 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1054 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1055 1056 spin_unlock_irqrestore(ap->lock, flags); 1057 } 1058 1059 /** 1060 * ata_eh_about_to_do - about to perform eh_action 1061 * @link: target ATA link 1062 * @dev: target ATA dev for per-dev action (can be NULL) 1063 * @action: action about to be performed 1064 * 1065 * Called just before performing EH actions to clear related bits 1066 * in @link->eh_info such that eh actions are not unnecessarily 1067 * repeated. 1068 * 1069 * LOCKING: 1070 * None. 1071 */ 1072 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1073 unsigned int action) 1074 { 1075 struct ata_port *ap = link->ap; 1076 struct ata_eh_info *ehi = &link->eh_info; 1077 struct ata_eh_context *ehc = &link->eh_context; 1078 unsigned long flags; 1079 1080 spin_lock_irqsave(ap->lock, flags); 1081 1082 /* Reset is represented by combination of actions and EHI 1083 * flags. Suck in all related bits before clearing eh_info to 1084 * avoid losing requested action. 1085 */ 1086 if (action & ATA_EH_RESET_MASK) { 1087 ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; 1088 ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; 1089 1090 /* make sure all reset actions are cleared & clear EHI flags */ 1091 action |= ATA_EH_RESET_MASK; 1092 ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 1093 } 1094 1095 ata_eh_clear_action(link, dev, ehi, action); 1096 1097 if (!(ehc->i.flags & ATA_EHI_QUIET)) 1098 ap->pflags |= ATA_PFLAG_RECOVERED; 1099 1100 spin_unlock_irqrestore(ap->lock, flags); 1101 } 1102 1103 /** 1104 * ata_eh_done - EH action complete 1105 * @ap: target ATA port 1106 * @dev: target ATA dev for per-dev action (can be NULL) 1107 * @action: action just completed 1108 * 1109 * Called right after performing EH actions to clear related bits 1110 * in @link->eh_context. 1111 * 1112 * LOCKING: 1113 * None. 1114 */ 1115 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1116 unsigned int action) 1117 { 1118 struct ata_eh_context *ehc = &link->eh_context; 1119 1120 /* if reset is complete, clear all reset actions & reset modifier */ 1121 if (action & ATA_EH_RESET_MASK) { 1122 action |= ATA_EH_RESET_MASK; 1123 ehc->i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 1124 } 1125 1126 ata_eh_clear_action(link, dev, &ehc->i, action); 1127 } 1128 1129 /** 1130 * ata_err_string - convert err_mask to descriptive string 1131 * @err_mask: error mask to convert to string 1132 * 1133 * Convert @err_mask to descriptive string. Errors are 1134 * prioritized according to severity and only the most severe 1135 * error is reported. 1136 * 1137 * LOCKING: 1138 * None. 1139 * 1140 * RETURNS: 1141 * Descriptive string for @err_mask 1142 */ 1143 static const char *ata_err_string(unsigned int err_mask) 1144 { 1145 if (err_mask & AC_ERR_HOST_BUS) 1146 return "host bus error"; 1147 if (err_mask & AC_ERR_ATA_BUS) 1148 return "ATA bus error"; 1149 if (err_mask & AC_ERR_TIMEOUT) 1150 return "timeout"; 1151 if (err_mask & AC_ERR_HSM) 1152 return "HSM violation"; 1153 if (err_mask & AC_ERR_SYSTEM) 1154 return "internal error"; 1155 if (err_mask & AC_ERR_MEDIA) 1156 return "media error"; 1157 if (err_mask & AC_ERR_INVALID) 1158 return "invalid argument"; 1159 if (err_mask & AC_ERR_DEV) 1160 return "device error"; 1161 return "unknown error"; 1162 } 1163 1164 /** 1165 * ata_read_log_page - read a specific log page 1166 * @dev: target device 1167 * @page: page to read 1168 * @buf: buffer to store read page 1169 * @sectors: number of sectors to read 1170 * 1171 * Read log page using READ_LOG_EXT command. 1172 * 1173 * LOCKING: 1174 * Kernel thread context (may sleep). 1175 * 1176 * RETURNS: 1177 * 0 on success, AC_ERR_* mask otherwise. 1178 */ 1179 static unsigned int ata_read_log_page(struct ata_device *dev, 1180 u8 page, void *buf, unsigned int sectors) 1181 { 1182 struct ata_taskfile tf; 1183 unsigned int err_mask; 1184 1185 DPRINTK("read log page - page %d\n", page); 1186 1187 ata_tf_init(dev, &tf); 1188 tf.command = ATA_CMD_READ_LOG_EXT; 1189 tf.lbal = page; 1190 tf.nsect = sectors; 1191 tf.hob_nsect = sectors >> 8; 1192 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 1193 tf.protocol = ATA_PROT_PIO; 1194 1195 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 1196 buf, sectors * ATA_SECT_SIZE, 0); 1197 1198 DPRINTK("EXIT, err_mask=%x\n", err_mask); 1199 return err_mask; 1200 } 1201 1202 /** 1203 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1204 * @dev: Device to read log page 10h from 1205 * @tag: Resulting tag of the failed command 1206 * @tf: Resulting taskfile registers of the failed command 1207 * 1208 * Read log page 10h to obtain NCQ error details and clear error 1209 * condition. 1210 * 1211 * LOCKING: 1212 * Kernel thread context (may sleep). 1213 * 1214 * RETURNS: 1215 * 0 on success, -errno otherwise. 1216 */ 1217 static int ata_eh_read_log_10h(struct ata_device *dev, 1218 int *tag, struct ata_taskfile *tf) 1219 { 1220 u8 *buf = dev->link->ap->sector_buf; 1221 unsigned int err_mask; 1222 u8 csum; 1223 int i; 1224 1225 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 1226 if (err_mask) 1227 return -EIO; 1228 1229 csum = 0; 1230 for (i = 0; i < ATA_SECT_SIZE; i++) 1231 csum += buf[i]; 1232 if (csum) 1233 ata_dev_printk(dev, KERN_WARNING, 1234 "invalid checksum 0x%x on log page 10h\n", csum); 1235 1236 if (buf[0] & 0x80) 1237 return -ENOENT; 1238 1239 *tag = buf[0] & 0x1f; 1240 1241 tf->command = buf[2]; 1242 tf->feature = buf[3]; 1243 tf->lbal = buf[4]; 1244 tf->lbam = buf[5]; 1245 tf->lbah = buf[6]; 1246 tf->device = buf[7]; 1247 tf->hob_lbal = buf[8]; 1248 tf->hob_lbam = buf[9]; 1249 tf->hob_lbah = buf[10]; 1250 tf->nsect = buf[12]; 1251 tf->hob_nsect = buf[13]; 1252 1253 return 0; 1254 } 1255 1256 /** 1257 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1258 * @dev: device to perform REQUEST_SENSE to 1259 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1260 * 1261 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1262 * SENSE. This function is EH helper. 1263 * 1264 * LOCKING: 1265 * Kernel thread context (may sleep). 1266 * 1267 * RETURNS: 1268 * 0 on success, AC_ERR_* mask on failure 1269 */ 1270 static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) 1271 { 1272 struct ata_device *dev = qc->dev; 1273 unsigned char *sense_buf = qc->scsicmd->sense_buffer; 1274 struct ata_port *ap = dev->link->ap; 1275 struct ata_taskfile tf; 1276 u8 cdb[ATAPI_CDB_LEN]; 1277 1278 DPRINTK("ATAPI request sense\n"); 1279 1280 /* FIXME: is this needed? */ 1281 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1282 1283 /* initialize sense_buf with the error register, 1284 * for the case where they are -not- overwritten 1285 */ 1286 sense_buf[0] = 0x70; 1287 sense_buf[2] = qc->result_tf.feature >> 4; 1288 1289 /* some devices time out if garbage left in tf */ 1290 ata_tf_init(dev, &tf); 1291 1292 memset(cdb, 0, ATAPI_CDB_LEN); 1293 cdb[0] = REQUEST_SENSE; 1294 cdb[4] = SCSI_SENSE_BUFFERSIZE; 1295 1296 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1297 tf.command = ATA_CMD_PACKET; 1298 1299 /* is it pointless to prefer PIO for "safety reasons"? */ 1300 if (ap->flags & ATA_FLAG_PIO_DMA) { 1301 tf.protocol = ATAPI_PROT_DMA; 1302 tf.feature |= ATAPI_PKT_DMA; 1303 } else { 1304 tf.protocol = ATAPI_PROT_PIO; 1305 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1306 tf.lbah = 0; 1307 } 1308 1309 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1310 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1311 } 1312 1313 /** 1314 * ata_eh_analyze_serror - analyze SError for a failed port 1315 * @link: ATA link to analyze SError for 1316 * 1317 * Analyze SError if available and further determine cause of 1318 * failure. 1319 * 1320 * LOCKING: 1321 * None. 1322 */ 1323 static void ata_eh_analyze_serror(struct ata_link *link) 1324 { 1325 struct ata_eh_context *ehc = &link->eh_context; 1326 u32 serror = ehc->i.serror; 1327 unsigned int err_mask = 0, action = 0; 1328 u32 hotplug_mask; 1329 1330 if (serror & SERR_PERSISTENT) { 1331 err_mask |= AC_ERR_ATA_BUS; 1332 action |= ATA_EH_HARDRESET; 1333 } 1334 if (serror & 1335 (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { 1336 err_mask |= AC_ERR_ATA_BUS; 1337 action |= ATA_EH_SOFTRESET; 1338 } 1339 if (serror & SERR_PROTOCOL) { 1340 err_mask |= AC_ERR_HSM; 1341 action |= ATA_EH_SOFTRESET; 1342 } 1343 if (serror & SERR_INTERNAL) { 1344 err_mask |= AC_ERR_SYSTEM; 1345 action |= ATA_EH_HARDRESET; 1346 } 1347 1348 /* Determine whether a hotplug event has occurred. Both 1349 * SError.N/X are considered hotplug events for enabled or 1350 * host links. For disabled PMP links, only N bit is 1351 * considered as X bit is left at 1 for link plugging. 1352 */ 1353 hotplug_mask = 0; 1354 1355 if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1356 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1357 else 1358 hotplug_mask = SERR_PHYRDY_CHG; 1359 1360 if (serror & hotplug_mask) 1361 ata_ehi_hotplugged(&ehc->i); 1362 1363 ehc->i.err_mask |= err_mask; 1364 ehc->i.action |= action; 1365 } 1366 1367 /** 1368 * ata_eh_analyze_ncq_error - analyze NCQ error 1369 * @link: ATA link to analyze NCQ error for 1370 * 1371 * Read log page 10h, determine the offending qc and acquire 1372 * error status TF. For NCQ device errors, all LLDDs have to do 1373 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1374 * care of the rest. 1375 * 1376 * LOCKING: 1377 * Kernel thread context (may sleep). 1378 */ 1379 static void ata_eh_analyze_ncq_error(struct ata_link *link) 1380 { 1381 struct ata_port *ap = link->ap; 1382 struct ata_eh_context *ehc = &link->eh_context; 1383 struct ata_device *dev = link->device; 1384 struct ata_queued_cmd *qc; 1385 struct ata_taskfile tf; 1386 int tag, rc; 1387 1388 /* if frozen, we can't do much */ 1389 if (ap->pflags & ATA_PFLAG_FROZEN) 1390 return; 1391 1392 /* is it NCQ device error? */ 1393 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1394 return; 1395 1396 /* has LLDD analyzed already? */ 1397 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1398 qc = __ata_qc_from_tag(ap, tag); 1399 1400 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1401 continue; 1402 1403 if (qc->err_mask) 1404 return; 1405 } 1406 1407 /* okay, this error is ours */ 1408 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1409 if (rc) { 1410 ata_link_printk(link, KERN_ERR, "failed to read log page 10h " 1411 "(errno=%d)\n", rc); 1412 return; 1413 } 1414 1415 if (!(link->sactive & (1 << tag))) { 1416 ata_link_printk(link, KERN_ERR, "log page 10h reported " 1417 "inactive tag %d\n", tag); 1418 return; 1419 } 1420 1421 /* we've got the perpetrator, condemn it */ 1422 qc = __ata_qc_from_tag(ap, tag); 1423 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1424 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1425 ehc->i.err_mask &= ~AC_ERR_DEV; 1426 } 1427 1428 /** 1429 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1430 * @qc: qc to analyze 1431 * @tf: Taskfile registers to analyze 1432 * 1433 * Analyze taskfile of @qc and further determine cause of 1434 * failure. This function also requests ATAPI sense data if 1435 * avaliable. 1436 * 1437 * LOCKING: 1438 * Kernel thread context (may sleep). 1439 * 1440 * RETURNS: 1441 * Determined recovery action 1442 */ 1443 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1444 const struct ata_taskfile *tf) 1445 { 1446 unsigned int tmp, action = 0; 1447 u8 stat = tf->command, err = tf->feature; 1448 1449 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1450 qc->err_mask |= AC_ERR_HSM; 1451 return ATA_EH_SOFTRESET; 1452 } 1453 1454 if (stat & (ATA_ERR | ATA_DF)) 1455 qc->err_mask |= AC_ERR_DEV; 1456 else 1457 return 0; 1458 1459 switch (qc->dev->class) { 1460 case ATA_DEV_ATA: 1461 if (err & ATA_ICRC) 1462 qc->err_mask |= AC_ERR_ATA_BUS; 1463 if (err & ATA_UNC) 1464 qc->err_mask |= AC_ERR_MEDIA; 1465 if (err & ATA_IDNF) 1466 qc->err_mask |= AC_ERR_INVALID; 1467 break; 1468 1469 case ATA_DEV_ATAPI: 1470 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1471 tmp = atapi_eh_request_sense(qc); 1472 if (!tmp) { 1473 /* ATA_QCFLAG_SENSE_VALID is used to 1474 * tell atapi_qc_complete() that sense 1475 * data is already valid. 1476 * 1477 * TODO: interpret sense data and set 1478 * appropriate err_mask. 1479 */ 1480 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1481 } else 1482 qc->err_mask |= tmp; 1483 } 1484 } 1485 1486 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1487 action |= ATA_EH_SOFTRESET; 1488 1489 return action; 1490 } 1491 1492 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1493 int *xfer_ok) 1494 { 1495 int base = 0; 1496 1497 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1498 *xfer_ok = 1; 1499 1500 if (!*xfer_ok) 1501 base = ATA_ECAT_DUBIOUS_NONE; 1502 1503 if (err_mask & AC_ERR_ATA_BUS) 1504 return base + ATA_ECAT_ATA_BUS; 1505 1506 if (err_mask & AC_ERR_TIMEOUT) 1507 return base + ATA_ECAT_TOUT_HSM; 1508 1509 if (eflags & ATA_EFLAG_IS_IO) { 1510 if (err_mask & AC_ERR_HSM) 1511 return base + ATA_ECAT_TOUT_HSM; 1512 if ((err_mask & 1513 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1514 return base + ATA_ECAT_UNK_DEV; 1515 } 1516 1517 return 0; 1518 } 1519 1520 struct speed_down_verdict_arg { 1521 u64 since; 1522 int xfer_ok; 1523 int nr_errors[ATA_ECAT_NR]; 1524 }; 1525 1526 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1527 { 1528 struct speed_down_verdict_arg *arg = void_arg; 1529 int cat; 1530 1531 if (ent->timestamp < arg->since) 1532 return -1; 1533 1534 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1535 &arg->xfer_ok); 1536 arg->nr_errors[cat]++; 1537 1538 return 0; 1539 } 1540 1541 /** 1542 * ata_eh_speed_down_verdict - Determine speed down verdict 1543 * @dev: Device of interest 1544 * 1545 * This function examines error ring of @dev and determines 1546 * whether NCQ needs to be turned off, transfer speed should be 1547 * stepped down, or falling back to PIO is necessary. 1548 * 1549 * ECAT_ATA_BUS : ATA_BUS error for any command 1550 * 1551 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1552 * IO commands 1553 * 1554 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1555 * 1556 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1557 * data transfer hasn't been verified. 1558 * 1559 * Verdicts are 1560 * 1561 * NCQ_OFF : Turn off NCQ. 1562 * 1563 * SPEED_DOWN : Speed down transfer speed but don't fall back 1564 * to PIO. 1565 * 1566 * FALLBACK_TO_PIO : Fall back to PIO. 1567 * 1568 * Even if multiple verdicts are returned, only one action is 1569 * taken per error. An action triggered by non-DUBIOUS errors 1570 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1571 * This is to expedite speed down decisions right after device is 1572 * initially configured. 1573 * 1574 * The followings are speed down rules. #1 and #2 deal with 1575 * DUBIOUS errors. 1576 * 1577 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1578 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1579 * 1580 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1581 * occurred during last 5 mins, NCQ_OFF. 1582 * 1583 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1584 * ocurred during last 5 mins, FALLBACK_TO_PIO 1585 * 1586 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1587 * during last 10 mins, NCQ_OFF. 1588 * 1589 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1590 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1591 * 1592 * LOCKING: 1593 * Inherited from caller. 1594 * 1595 * RETURNS: 1596 * OR of ATA_EH_SPDN_* flags. 1597 */ 1598 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1599 { 1600 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1601 u64 j64 = get_jiffies_64(); 1602 struct speed_down_verdict_arg arg; 1603 unsigned int verdict = 0; 1604 1605 /* scan past 5 mins of error history */ 1606 memset(&arg, 0, sizeof(arg)); 1607 arg.since = j64 - min(j64, j5mins); 1608 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1609 1610 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1611 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1612 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1613 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1614 1615 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1616 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1617 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1618 1619 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1620 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1621 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1622 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1623 1624 /* scan past 10 mins of error history */ 1625 memset(&arg, 0, sizeof(arg)); 1626 arg.since = j64 - min(j64, j10mins); 1627 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1628 1629 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1630 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1631 verdict |= ATA_EH_SPDN_NCQ_OFF; 1632 1633 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1634 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1635 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1636 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1637 1638 return verdict; 1639 } 1640 1641 /** 1642 * ata_eh_speed_down - record error and speed down if necessary 1643 * @dev: Failed device 1644 * @eflags: mask of ATA_EFLAG_* flags 1645 * @err_mask: err_mask of the error 1646 * 1647 * Record error and examine error history to determine whether 1648 * adjusting transmission speed is necessary. It also sets 1649 * transmission limits appropriately if such adjustment is 1650 * necessary. 1651 * 1652 * LOCKING: 1653 * Kernel thread context (may sleep). 1654 * 1655 * RETURNS: 1656 * Determined recovery action. 1657 */ 1658 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1659 unsigned int eflags, unsigned int err_mask) 1660 { 1661 struct ata_link *link = dev->link; 1662 int xfer_ok = 0; 1663 unsigned int verdict; 1664 unsigned int action = 0; 1665 1666 /* don't bother if Cat-0 error */ 1667 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1668 return 0; 1669 1670 /* record error and determine whether speed down is necessary */ 1671 ata_ering_record(&dev->ering, eflags, err_mask); 1672 verdict = ata_eh_speed_down_verdict(dev); 1673 1674 /* turn off NCQ? */ 1675 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1676 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1677 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1678 dev->flags |= ATA_DFLAG_NCQ_OFF; 1679 ata_dev_printk(dev, KERN_WARNING, 1680 "NCQ disabled due to excessive errors\n"); 1681 goto done; 1682 } 1683 1684 /* speed down? */ 1685 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1686 /* speed down SATA link speed if possible */ 1687 if (sata_down_spd_limit(link) == 0) { 1688 action |= ATA_EH_HARDRESET; 1689 goto done; 1690 } 1691 1692 /* lower transfer mode */ 1693 if (dev->spdn_cnt < 2) { 1694 static const int dma_dnxfer_sel[] = 1695 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1696 static const int pio_dnxfer_sel[] = 1697 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1698 int sel; 1699 1700 if (dev->xfer_shift != ATA_SHIFT_PIO) 1701 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1702 else 1703 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1704 1705 dev->spdn_cnt++; 1706 1707 if (ata_down_xfermask_limit(dev, sel) == 0) { 1708 action |= ATA_EH_SOFTRESET; 1709 goto done; 1710 } 1711 } 1712 } 1713 1714 /* Fall back to PIO? Slowing down to PIO is meaningless for 1715 * SATA ATA devices. Consider it only for PATA and SATAPI. 1716 */ 1717 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1718 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 1719 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1720 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1721 dev->spdn_cnt = 0; 1722 action |= ATA_EH_SOFTRESET; 1723 goto done; 1724 } 1725 } 1726 1727 return 0; 1728 done: 1729 /* device has been slowed down, blow error history */ 1730 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 1731 ata_ering_clear(&dev->ering); 1732 return action; 1733 } 1734 1735 /** 1736 * ata_eh_link_autopsy - analyze error and determine recovery action 1737 * @link: host link to perform autopsy on 1738 * 1739 * Analyze why @link failed and determine which recovery actions 1740 * are needed. This function also sets more detailed AC_ERR_* 1741 * values and fills sense data for ATAPI CHECK SENSE. 1742 * 1743 * LOCKING: 1744 * Kernel thread context (may sleep). 1745 */ 1746 static void ata_eh_link_autopsy(struct ata_link *link) 1747 { 1748 struct ata_port *ap = link->ap; 1749 struct ata_eh_context *ehc = &link->eh_context; 1750 struct ata_device *dev; 1751 unsigned int all_err_mask = 0, eflags = 0; 1752 int tag; 1753 u32 serror; 1754 int rc; 1755 1756 DPRINTK("ENTER\n"); 1757 1758 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1759 return; 1760 1761 /* obtain and analyze SError */ 1762 rc = sata_scr_read(link, SCR_ERROR, &serror); 1763 if (rc == 0) { 1764 ehc->i.serror |= serror; 1765 ata_eh_analyze_serror(link); 1766 } else if (rc != -EOPNOTSUPP) { 1767 /* SError read failed, force hardreset and probing */ 1768 ata_ehi_schedule_probe(&ehc->i); 1769 ehc->i.action |= ATA_EH_HARDRESET; 1770 ehc->i.err_mask |= AC_ERR_OTHER; 1771 } 1772 1773 /* analyze NCQ failure */ 1774 ata_eh_analyze_ncq_error(link); 1775 1776 /* any real error trumps AC_ERR_OTHER */ 1777 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1778 ehc->i.err_mask &= ~AC_ERR_OTHER; 1779 1780 all_err_mask |= ehc->i.err_mask; 1781 1782 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1783 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1784 1785 if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link) 1786 continue; 1787 1788 /* inherit upper level err_mask */ 1789 qc->err_mask |= ehc->i.err_mask; 1790 1791 /* analyze TF */ 1792 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1793 1794 /* DEV errors are probably spurious in case of ATA_BUS error */ 1795 if (qc->err_mask & AC_ERR_ATA_BUS) 1796 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1797 AC_ERR_INVALID); 1798 1799 /* any real error trumps unknown error */ 1800 if (qc->err_mask & ~AC_ERR_OTHER) 1801 qc->err_mask &= ~AC_ERR_OTHER; 1802 1803 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1804 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 1805 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1806 1807 /* accumulate error info */ 1808 ehc->i.dev = qc->dev; 1809 all_err_mask |= qc->err_mask; 1810 if (qc->flags & ATA_QCFLAG_IO) 1811 eflags |= ATA_EFLAG_IS_IO; 1812 } 1813 1814 /* enforce default EH actions */ 1815 if (ap->pflags & ATA_PFLAG_FROZEN || 1816 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1817 ehc->i.action |= ATA_EH_SOFTRESET; 1818 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 1819 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 1820 ehc->i.action |= ATA_EH_REVALIDATE; 1821 1822 /* If we have offending qcs and the associated failed device, 1823 * perform per-dev EH action only on the offending device. 1824 */ 1825 if (ehc->i.dev) { 1826 ehc->i.dev_action[ehc->i.dev->devno] |= 1827 ehc->i.action & ATA_EH_PERDEV_MASK; 1828 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1829 } 1830 1831 /* propagate timeout to host link */ 1832 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 1833 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 1834 1835 /* record error and consider speeding down */ 1836 dev = ehc->i.dev; 1837 if (!dev && ((ata_link_max_devices(link) == 1 && 1838 ata_dev_enabled(link->device)))) 1839 dev = link->device; 1840 1841 if (dev) { 1842 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 1843 eflags |= ATA_EFLAG_DUBIOUS_XFER; 1844 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 1845 } 1846 1847 DPRINTK("EXIT\n"); 1848 } 1849 1850 /** 1851 * ata_eh_autopsy - analyze error and determine recovery action 1852 * @ap: host port to perform autopsy on 1853 * 1854 * Analyze all links of @ap and determine why they failed and 1855 * which recovery actions are needed. 1856 * 1857 * LOCKING: 1858 * Kernel thread context (may sleep). 1859 */ 1860 void ata_eh_autopsy(struct ata_port *ap) 1861 { 1862 struct ata_link *link; 1863 1864 ata_port_for_each_link(link, ap) 1865 ata_eh_link_autopsy(link); 1866 1867 /* Autopsy of fanout ports can affect host link autopsy. 1868 * Perform host link autopsy last. 1869 */ 1870 if (ap->nr_pmp_links) 1871 ata_eh_link_autopsy(&ap->link); 1872 } 1873 1874 /** 1875 * ata_eh_link_report - report error handling to user 1876 * @link: ATA link EH is going on 1877 * 1878 * Report EH to user. 1879 * 1880 * LOCKING: 1881 * None. 1882 */ 1883 static void ata_eh_link_report(struct ata_link *link) 1884 { 1885 struct ata_port *ap = link->ap; 1886 struct ata_eh_context *ehc = &link->eh_context; 1887 const char *frozen, *desc; 1888 char tries_buf[6]; 1889 int tag, nr_failed = 0; 1890 1891 if (ehc->i.flags & ATA_EHI_QUIET) 1892 return; 1893 1894 desc = NULL; 1895 if (ehc->i.desc[0] != '\0') 1896 desc = ehc->i.desc; 1897 1898 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1899 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1900 1901 if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link || 1902 ((qc->flags & ATA_QCFLAG_QUIET) && 1903 qc->err_mask == AC_ERR_DEV)) 1904 continue; 1905 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1906 continue; 1907 1908 nr_failed++; 1909 } 1910 1911 if (!nr_failed && !ehc->i.err_mask) 1912 return; 1913 1914 frozen = ""; 1915 if (ap->pflags & ATA_PFLAG_FROZEN) 1916 frozen = " frozen"; 1917 1918 memset(tries_buf, 0, sizeof(tries_buf)); 1919 if (ap->eh_tries < ATA_EH_MAX_TRIES) 1920 snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d", 1921 ap->eh_tries); 1922 1923 if (ehc->i.dev) { 1924 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1925 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 1926 ehc->i.err_mask, link->sactive, ehc->i.serror, 1927 ehc->i.action, frozen, tries_buf); 1928 if (desc) 1929 ata_dev_printk(ehc->i.dev, KERN_ERR, "%s\n", desc); 1930 } else { 1931 ata_link_printk(link, KERN_ERR, "exception Emask 0x%x " 1932 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 1933 ehc->i.err_mask, link->sactive, ehc->i.serror, 1934 ehc->i.action, frozen, tries_buf); 1935 if (desc) 1936 ata_link_printk(link, KERN_ERR, "%s\n", desc); 1937 } 1938 1939 if (ehc->i.serror) 1940 ata_port_printk(ap, KERN_ERR, 1941 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 1942 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 1943 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 1944 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 1945 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 1946 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 1947 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 1948 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 1949 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 1950 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 1951 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 1952 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 1953 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 1954 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 1955 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 1956 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 1957 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 1958 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 1959 1960 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1961 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1962 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1963 const u8 *cdb = qc->cdb; 1964 char data_buf[20] = ""; 1965 char cdb_buf[70] = ""; 1966 1967 if (!(qc->flags & ATA_QCFLAG_FAILED) || 1968 qc->dev->link != link || !qc->err_mask) 1969 continue; 1970 1971 if (qc->dma_dir != DMA_NONE) { 1972 static const char *dma_str[] = { 1973 [DMA_BIDIRECTIONAL] = "bidi", 1974 [DMA_TO_DEVICE] = "out", 1975 [DMA_FROM_DEVICE] = "in", 1976 }; 1977 static const char *prot_str[] = { 1978 [ATA_PROT_PIO] = "pio", 1979 [ATA_PROT_DMA] = "dma", 1980 [ATA_PROT_NCQ] = "ncq", 1981 [ATAPI_PROT_PIO] = "pio", 1982 [ATAPI_PROT_DMA] = "dma", 1983 }; 1984 1985 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 1986 prot_str[qc->tf.protocol], qc->nbytes, 1987 dma_str[qc->dma_dir]); 1988 } 1989 1990 if (ata_is_atapi(qc->tf.protocol)) 1991 snprintf(cdb_buf, sizeof(cdb_buf), 1992 "cdb %02x %02x %02x %02x %02x %02x %02x %02x " 1993 "%02x %02x %02x %02x %02x %02x %02x %02x\n ", 1994 cdb[0], cdb[1], cdb[2], cdb[3], 1995 cdb[4], cdb[5], cdb[6], cdb[7], 1996 cdb[8], cdb[9], cdb[10], cdb[11], 1997 cdb[12], cdb[13], cdb[14], cdb[15]); 1998 1999 ata_dev_printk(qc->dev, KERN_ERR, 2000 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2001 "tag %d%s\n %s" 2002 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 2003 "Emask 0x%x (%s)%s\n", 2004 cmd->command, cmd->feature, cmd->nsect, 2005 cmd->lbal, cmd->lbam, cmd->lbah, 2006 cmd->hob_feature, cmd->hob_nsect, 2007 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 2008 cmd->device, qc->tag, data_buf, cdb_buf, 2009 res->command, res->feature, res->nsect, 2010 res->lbal, res->lbam, res->lbah, 2011 res->hob_feature, res->hob_nsect, 2012 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2013 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2014 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2015 2016 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2017 ATA_ERR)) { 2018 if (res->command & ATA_BUSY) 2019 ata_dev_printk(qc->dev, KERN_ERR, 2020 "status: { Busy }\n"); 2021 else 2022 ata_dev_printk(qc->dev, KERN_ERR, 2023 "status: { %s%s%s%s}\n", 2024 res->command & ATA_DRDY ? "DRDY " : "", 2025 res->command & ATA_DF ? "DF " : "", 2026 res->command & ATA_DRQ ? "DRQ " : "", 2027 res->command & ATA_ERR ? "ERR " : ""); 2028 } 2029 2030 if (cmd->command != ATA_CMD_PACKET && 2031 (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF | 2032 ATA_ABORTED))) 2033 ata_dev_printk(qc->dev, KERN_ERR, 2034 "error: { %s%s%s%s}\n", 2035 res->feature & ATA_ICRC ? "ICRC " : "", 2036 res->feature & ATA_UNC ? "UNC " : "", 2037 res->feature & ATA_IDNF ? "IDNF " : "", 2038 res->feature & ATA_ABORTED ? "ABRT " : ""); 2039 } 2040 } 2041 2042 /** 2043 * ata_eh_report - report error handling to user 2044 * @ap: ATA port to report EH about 2045 * 2046 * Report EH to user. 2047 * 2048 * LOCKING: 2049 * None. 2050 */ 2051 void ata_eh_report(struct ata_port *ap) 2052 { 2053 struct ata_link *link; 2054 2055 __ata_port_for_each_link(link, ap) 2056 ata_eh_link_report(link); 2057 } 2058 2059 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2060 unsigned int *classes, unsigned long deadline) 2061 { 2062 struct ata_device *dev; 2063 int rc; 2064 2065 ata_link_for_each_dev(dev, link) 2066 classes[dev->devno] = ATA_DEV_UNKNOWN; 2067 2068 rc = reset(link, classes, deadline); 2069 if (rc) 2070 return rc; 2071 2072 /* If any class isn't ATA_DEV_UNKNOWN, consider classification 2073 * is complete and convert all ATA_DEV_UNKNOWN to 2074 * ATA_DEV_NONE. 2075 */ 2076 ata_link_for_each_dev(dev, link) 2077 if (classes[dev->devno] != ATA_DEV_UNKNOWN) 2078 break; 2079 2080 if (dev) { 2081 ata_link_for_each_dev(dev, link) { 2082 if (classes[dev->devno] == ATA_DEV_UNKNOWN) 2083 classes[dev->devno] = ATA_DEV_NONE; 2084 } 2085 } 2086 2087 return 0; 2088 } 2089 2090 static int ata_eh_followup_srst_needed(struct ata_link *link, 2091 int rc, int classify, 2092 const unsigned int *classes) 2093 { 2094 if (link->flags & ATA_LFLAG_NO_SRST) 2095 return 0; 2096 if (rc == -EAGAIN) 2097 return 1; 2098 if (rc != 0) 2099 return 0; 2100 if ((link->ap->flags & ATA_FLAG_PMP) && ata_is_host_link(link)) 2101 return 1; 2102 if (classify && !(link->flags & ATA_LFLAG_ASSUME_CLASS) && 2103 classes[0] == ATA_DEV_UNKNOWN) 2104 return 1; 2105 return 0; 2106 } 2107 2108 int ata_eh_reset(struct ata_link *link, int classify, 2109 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2110 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2111 { 2112 const int max_tries = ARRAY_SIZE(ata_eh_reset_timeouts); 2113 struct ata_port *ap = link->ap; 2114 struct ata_eh_context *ehc = &link->eh_context; 2115 unsigned int *classes = ehc->classes; 2116 unsigned int lflags = link->flags; 2117 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2118 int try = 0; 2119 struct ata_device *dev; 2120 unsigned long deadline, now; 2121 unsigned int tmp_action; 2122 ata_reset_fn_t reset; 2123 unsigned long flags; 2124 u32 sstatus; 2125 int rc; 2126 2127 /* about to reset */ 2128 spin_lock_irqsave(ap->lock, flags); 2129 ap->pflags |= ATA_PFLAG_RESETTING; 2130 spin_unlock_irqrestore(ap->lock, flags); 2131 2132 ata_eh_about_to_do(link, NULL, ehc->i.action & ATA_EH_RESET_MASK); 2133 2134 ata_link_for_each_dev(dev, link) { 2135 /* If we issue an SRST then an ATA drive (not ATAPI) 2136 * may change configuration and be in PIO0 timing. If 2137 * we do a hard reset (or are coming from power on) 2138 * this is true for ATA or ATAPI. Until we've set a 2139 * suitable controller mode we should not touch the 2140 * bus as we may be talking too fast. 2141 */ 2142 dev->pio_mode = XFER_PIO_0; 2143 2144 /* If the controller has a pio mode setup function 2145 * then use it to set the chipset to rights. Don't 2146 * touch the DMA setup as that will be dealt with when 2147 * configuring devices. 2148 */ 2149 if (ap->ops->set_piomode) 2150 ap->ops->set_piomode(ap, dev); 2151 } 2152 2153 if (!softreset && !hardreset) { 2154 if (verbose) 2155 ata_link_printk(link, KERN_INFO, "no reset method " 2156 "available, skipping reset\n"); 2157 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2158 lflags |= ATA_LFLAG_ASSUME_ATA; 2159 goto done; 2160 } 2161 2162 /* Determine which reset to use and record in ehc->i.action. 2163 * prereset() may examine and modify it. 2164 */ 2165 if (softreset && (!hardreset || (!(lflags & ATA_LFLAG_NO_SRST) && 2166 !sata_set_spd_needed(link) && 2167 !(ehc->i.action & ATA_EH_HARDRESET)))) 2168 tmp_action = ATA_EH_SOFTRESET; 2169 else 2170 tmp_action = ATA_EH_HARDRESET; 2171 2172 ehc->i.action = (ehc->i.action & ~ATA_EH_RESET_MASK) | tmp_action; 2173 2174 if (prereset) { 2175 rc = prereset(link, jiffies + ATA_EH_PRERESET_TIMEOUT); 2176 if (rc) { 2177 if (rc == -ENOENT) { 2178 ata_link_printk(link, KERN_DEBUG, 2179 "port disabled. ignoring.\n"); 2180 ehc->i.action &= ~ATA_EH_RESET_MASK; 2181 2182 ata_link_for_each_dev(dev, link) 2183 classes[dev->devno] = ATA_DEV_NONE; 2184 2185 rc = 0; 2186 } else 2187 ata_link_printk(link, KERN_ERR, 2188 "prereset failed (errno=%d)\n", rc); 2189 goto out; 2190 } 2191 } 2192 2193 /* prereset() might have modified ehc->i.action */ 2194 if (ehc->i.action & ATA_EH_HARDRESET) 2195 reset = hardreset; 2196 else if (ehc->i.action & ATA_EH_SOFTRESET) 2197 reset = softreset; 2198 else { 2199 /* prereset told us not to reset, bang classes and return */ 2200 ata_link_for_each_dev(dev, link) 2201 classes[dev->devno] = ATA_DEV_NONE; 2202 rc = 0; 2203 goto out; 2204 } 2205 2206 /* did prereset() screw up? if so, fix up to avoid oopsing */ 2207 if (!reset) { 2208 if (softreset) 2209 reset = softreset; 2210 else 2211 reset = hardreset; 2212 } 2213 2214 retry: 2215 deadline = jiffies + ata_eh_reset_timeouts[try++]; 2216 2217 /* shut up during boot probing */ 2218 if (verbose) 2219 ata_link_printk(link, KERN_INFO, "%s resetting link\n", 2220 reset == softreset ? "soft" : "hard"); 2221 2222 /* mark that this EH session started with reset */ 2223 if (reset == hardreset) 2224 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2225 else 2226 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2227 2228 rc = ata_do_reset(link, reset, classes, deadline); 2229 2230 if (reset == hardreset && 2231 ata_eh_followup_srst_needed(link, rc, classify, classes)) { 2232 /* okay, let's do follow-up softreset */ 2233 reset = softreset; 2234 2235 if (!reset) { 2236 ata_link_printk(link, KERN_ERR, 2237 "follow-up softreset required " 2238 "but no softreset avaliable\n"); 2239 rc = -EINVAL; 2240 goto fail; 2241 } 2242 2243 ata_eh_about_to_do(link, NULL, ATA_EH_RESET_MASK); 2244 rc = ata_do_reset(link, reset, classes, deadline); 2245 } 2246 2247 /* -EAGAIN can happen if we skipped followup SRST */ 2248 if (rc && rc != -EAGAIN) 2249 goto fail; 2250 2251 /* was classification successful? */ 2252 if (classify && classes[0] == ATA_DEV_UNKNOWN && 2253 !(lflags & ATA_LFLAG_ASSUME_CLASS)) { 2254 if (try < max_tries) { 2255 ata_link_printk(link, KERN_WARNING, 2256 "classification failed\n"); 2257 rc = -EINVAL; 2258 goto fail; 2259 } 2260 2261 ata_link_printk(link, KERN_WARNING, 2262 "classfication failed, assuming ATA\n"); 2263 lflags |= ATA_LFLAG_ASSUME_ATA; 2264 } 2265 2266 done: 2267 ata_link_for_each_dev(dev, link) { 2268 /* After the reset, the device state is PIO 0 and the 2269 * controller state is undefined. Reset also wakes up 2270 * drives from sleeping mode. 2271 */ 2272 dev->pio_mode = XFER_PIO_0; 2273 dev->flags &= ~ATA_DFLAG_SLEEPING; 2274 2275 if (ata_link_offline(link)) 2276 continue; 2277 2278 /* apply class override */ 2279 if (lflags & ATA_LFLAG_ASSUME_ATA) 2280 classes[dev->devno] = ATA_DEV_ATA; 2281 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2282 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; /* not yet */ 2283 } 2284 2285 /* record current link speed */ 2286 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2287 link->sata_spd = (sstatus >> 4) & 0xf; 2288 2289 if (postreset) 2290 postreset(link, classes); 2291 2292 /* reset successful, schedule revalidation */ 2293 ata_eh_done(link, NULL, ehc->i.action & ATA_EH_RESET_MASK); 2294 ehc->i.action |= ATA_EH_REVALIDATE; 2295 2296 rc = 0; 2297 out: 2298 /* clear hotplug flag */ 2299 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2300 2301 spin_lock_irqsave(ap->lock, flags); 2302 ap->pflags &= ~ATA_PFLAG_RESETTING; 2303 spin_unlock_irqrestore(ap->lock, flags); 2304 2305 return rc; 2306 2307 fail: 2308 if (rc == -ERESTART || try >= max_tries) 2309 goto out; 2310 2311 now = jiffies; 2312 if (time_before(now, deadline)) { 2313 unsigned long delta = deadline - now; 2314 2315 ata_link_printk(link, KERN_WARNING, "reset failed " 2316 "(errno=%d), retrying in %u secs\n", 2317 rc, (jiffies_to_msecs(delta) + 999) / 1000); 2318 2319 while (delta) 2320 delta = schedule_timeout_uninterruptible(delta); 2321 } 2322 2323 if (rc == -EPIPE || try == max_tries - 1) 2324 sata_down_spd_limit(link); 2325 if (hardreset) 2326 reset = hardreset; 2327 goto retry; 2328 } 2329 2330 static int ata_eh_revalidate_and_attach(struct ata_link *link, 2331 struct ata_device **r_failed_dev) 2332 { 2333 struct ata_port *ap = link->ap; 2334 struct ata_eh_context *ehc = &link->eh_context; 2335 struct ata_device *dev; 2336 unsigned int new_mask = 0; 2337 unsigned long flags; 2338 int rc = 0; 2339 2340 DPRINTK("ENTER\n"); 2341 2342 /* For PATA drive side cable detection to work, IDENTIFY must 2343 * be done backwards such that PDIAG- is released by the slave 2344 * device before the master device is identified. 2345 */ 2346 ata_link_for_each_dev_reverse(dev, link) { 2347 unsigned int action = ata_eh_dev_action(dev); 2348 unsigned int readid_flags = 0; 2349 2350 if (ehc->i.flags & ATA_EHI_DID_RESET) 2351 readid_flags |= ATA_READID_POSTRESET; 2352 2353 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 2354 WARN_ON(dev->class == ATA_DEV_PMP); 2355 2356 if (ata_link_offline(link)) { 2357 rc = -EIO; 2358 goto err; 2359 } 2360 2361 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 2362 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 2363 readid_flags); 2364 if (rc) 2365 goto err; 2366 2367 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 2368 2369 /* Configuration may have changed, reconfigure 2370 * transfer mode. 2371 */ 2372 ehc->i.flags |= ATA_EHI_SETMODE; 2373 2374 /* schedule the scsi_rescan_device() here */ 2375 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 2376 } else if (dev->class == ATA_DEV_UNKNOWN && 2377 ehc->tries[dev->devno] && 2378 ata_class_enabled(ehc->classes[dev->devno])) { 2379 dev->class = ehc->classes[dev->devno]; 2380 2381 if (dev->class == ATA_DEV_PMP) 2382 rc = sata_pmp_attach(dev); 2383 else 2384 rc = ata_dev_read_id(dev, &dev->class, 2385 readid_flags, dev->id); 2386 switch (rc) { 2387 case 0: 2388 new_mask |= 1 << dev->devno; 2389 break; 2390 case -ENOENT: 2391 /* IDENTIFY was issued to non-existent 2392 * device. No need to reset. Just 2393 * thaw and kill the device. 2394 */ 2395 ata_eh_thaw_port(ap); 2396 dev->class = ATA_DEV_UNKNOWN; 2397 break; 2398 default: 2399 dev->class = ATA_DEV_UNKNOWN; 2400 goto err; 2401 } 2402 } 2403 } 2404 2405 /* PDIAG- should have been released, ask cable type if post-reset */ 2406 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 2407 if (ap->ops->cable_detect) 2408 ap->cbl = ap->ops->cable_detect(ap); 2409 ata_force_cbl(ap); 2410 } 2411 2412 /* Configure new devices forward such that user doesn't see 2413 * device detection messages backwards. 2414 */ 2415 ata_link_for_each_dev(dev, link) { 2416 if (!(new_mask & (1 << dev->devno)) || 2417 dev->class == ATA_DEV_PMP) 2418 continue; 2419 2420 ehc->i.flags |= ATA_EHI_PRINTINFO; 2421 rc = ata_dev_configure(dev); 2422 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 2423 if (rc) 2424 goto err; 2425 2426 spin_lock_irqsave(ap->lock, flags); 2427 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 2428 spin_unlock_irqrestore(ap->lock, flags); 2429 2430 /* new device discovered, configure xfermode */ 2431 ehc->i.flags |= ATA_EHI_SETMODE; 2432 } 2433 2434 return 0; 2435 2436 err: 2437 *r_failed_dev = dev; 2438 DPRINTK("EXIT rc=%d\n", rc); 2439 return rc; 2440 } 2441 2442 /** 2443 * ata_set_mode - Program timings and issue SET FEATURES - XFER 2444 * @link: link on which timings will be programmed 2445 * @r_failed_dev: out paramter for failed device 2446 * 2447 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 2448 * ata_set_mode() fails, pointer to the failing device is 2449 * returned in @r_failed_dev. 2450 * 2451 * LOCKING: 2452 * PCI/etc. bus probe sem. 2453 * 2454 * RETURNS: 2455 * 0 on success, negative errno otherwise 2456 */ 2457 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 2458 { 2459 struct ata_port *ap = link->ap; 2460 struct ata_device *dev; 2461 int rc; 2462 2463 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 2464 ata_link_for_each_dev(dev, link) { 2465 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 2466 struct ata_ering_entry *ent; 2467 2468 ent = ata_ering_top(&dev->ering); 2469 if (ent) 2470 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 2471 } 2472 } 2473 2474 /* has private set_mode? */ 2475 if (ap->ops->set_mode) 2476 rc = ap->ops->set_mode(link, r_failed_dev); 2477 else 2478 rc = ata_do_set_mode(link, r_failed_dev); 2479 2480 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 2481 ata_link_for_each_dev(dev, link) { 2482 struct ata_eh_context *ehc = &link->eh_context; 2483 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 2484 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 2485 2486 if (dev->xfer_mode != saved_xfer_mode || 2487 ata_ncq_enabled(dev) != saved_ncq) 2488 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 2489 } 2490 2491 return rc; 2492 } 2493 2494 static int ata_link_nr_enabled(struct ata_link *link) 2495 { 2496 struct ata_device *dev; 2497 int cnt = 0; 2498 2499 ata_link_for_each_dev(dev, link) 2500 if (ata_dev_enabled(dev)) 2501 cnt++; 2502 return cnt; 2503 } 2504 2505 static int ata_link_nr_vacant(struct ata_link *link) 2506 { 2507 struct ata_device *dev; 2508 int cnt = 0; 2509 2510 ata_link_for_each_dev(dev, link) 2511 if (dev->class == ATA_DEV_UNKNOWN) 2512 cnt++; 2513 return cnt; 2514 } 2515 2516 static int ata_eh_skip_recovery(struct ata_link *link) 2517 { 2518 struct ata_eh_context *ehc = &link->eh_context; 2519 struct ata_device *dev; 2520 2521 /* skip disabled links */ 2522 if (link->flags & ATA_LFLAG_DISABLED) 2523 return 1; 2524 2525 /* thaw frozen port, resume link and recover failed devices */ 2526 if ((link->ap->pflags & ATA_PFLAG_FROZEN) || 2527 (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_link_nr_enabled(link)) 2528 return 0; 2529 2530 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2531 ata_link_for_each_dev(dev, link) { 2532 if (dev->class == ATA_DEV_UNKNOWN && 2533 ehc->classes[dev->devno] != ATA_DEV_NONE) 2534 return 0; 2535 } 2536 2537 return 1; 2538 } 2539 2540 static int ata_eh_schedule_probe(struct ata_device *dev) 2541 { 2542 struct ata_eh_context *ehc = &dev->link->eh_context; 2543 2544 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 2545 (ehc->did_probe_mask & (1 << dev->devno))) 2546 return 0; 2547 2548 ata_eh_detach_dev(dev); 2549 ata_dev_init(dev); 2550 ehc->did_probe_mask |= (1 << dev->devno); 2551 ehc->i.action |= ATA_EH_SOFTRESET; 2552 ehc->saved_xfer_mode[dev->devno] = 0; 2553 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 2554 2555 return 1; 2556 } 2557 2558 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 2559 { 2560 struct ata_eh_context *ehc = &dev->link->eh_context; 2561 2562 ehc->tries[dev->devno]--; 2563 2564 switch (err) { 2565 case -ENODEV: 2566 /* device missing or wrong IDENTIFY data, schedule probing */ 2567 ehc->i.probe_mask |= (1 << dev->devno); 2568 case -EINVAL: 2569 /* give it just one more chance */ 2570 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2571 case -EIO: 2572 if (ehc->tries[dev->devno] == 1 && dev->pio_mode > XFER_PIO_0) { 2573 /* This is the last chance, better to slow 2574 * down than lose it. 2575 */ 2576 sata_down_spd_limit(dev->link); 2577 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2578 } 2579 } 2580 2581 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2582 /* disable device if it has used up all its chances */ 2583 ata_dev_disable(dev); 2584 2585 /* detach if offline */ 2586 if (ata_link_offline(dev->link)) 2587 ata_eh_detach_dev(dev); 2588 2589 /* schedule probe if necessary */ 2590 if (ata_eh_schedule_probe(dev)) 2591 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2592 2593 return 1; 2594 } else { 2595 /* soft didn't work? be haaaaard */ 2596 if (ehc->i.flags & ATA_EHI_DID_RESET) 2597 ehc->i.action |= ATA_EH_HARDRESET; 2598 else 2599 ehc->i.action |= ATA_EH_SOFTRESET; 2600 2601 return 0; 2602 } 2603 } 2604 2605 /** 2606 * ata_eh_recover - recover host port after error 2607 * @ap: host port to recover 2608 * @prereset: prereset method (can be NULL) 2609 * @softreset: softreset method (can be NULL) 2610 * @hardreset: hardreset method (can be NULL) 2611 * @postreset: postreset method (can be NULL) 2612 * @r_failed_link: out parameter for failed link 2613 * 2614 * This is the alpha and omega, eum and yang, heart and soul of 2615 * libata exception handling. On entry, actions required to 2616 * recover each link and hotplug requests are recorded in the 2617 * link's eh_context. This function executes all the operations 2618 * with appropriate retrials and fallbacks to resurrect failed 2619 * devices, detach goners and greet newcomers. 2620 * 2621 * LOCKING: 2622 * Kernel thread context (may sleep). 2623 * 2624 * RETURNS: 2625 * 0 on success, -errno on failure. 2626 */ 2627 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 2628 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2629 ata_postreset_fn_t postreset, 2630 struct ata_link **r_failed_link) 2631 { 2632 struct ata_link *link; 2633 struct ata_device *dev; 2634 int nr_failed_devs, nr_disabled_devs; 2635 int reset, rc; 2636 unsigned long flags; 2637 2638 DPRINTK("ENTER\n"); 2639 2640 /* prep for recovery */ 2641 ata_port_for_each_link(link, ap) { 2642 struct ata_eh_context *ehc = &link->eh_context; 2643 2644 /* re-enable link? */ 2645 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 2646 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 2647 spin_lock_irqsave(ap->lock, flags); 2648 link->flags &= ~ATA_LFLAG_DISABLED; 2649 spin_unlock_irqrestore(ap->lock, flags); 2650 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 2651 } 2652 2653 ata_link_for_each_dev(dev, link) { 2654 if (link->flags & ATA_LFLAG_NO_RETRY) 2655 ehc->tries[dev->devno] = 1; 2656 else 2657 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2658 2659 /* collect port action mask recorded in dev actions */ 2660 ehc->i.action |= ehc->i.dev_action[dev->devno] & 2661 ~ATA_EH_PERDEV_MASK; 2662 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 2663 2664 /* process hotplug request */ 2665 if (dev->flags & ATA_DFLAG_DETACH) 2666 ata_eh_detach_dev(dev); 2667 2668 /* schedule probe if necessary */ 2669 if (!ata_dev_enabled(dev)) 2670 ata_eh_schedule_probe(dev); 2671 } 2672 } 2673 2674 retry: 2675 rc = 0; 2676 nr_failed_devs = 0; 2677 nr_disabled_devs = 0; 2678 reset = 0; 2679 2680 /* if UNLOADING, finish immediately */ 2681 if (ap->pflags & ATA_PFLAG_UNLOADING) 2682 goto out; 2683 2684 /* prep for EH */ 2685 ata_port_for_each_link(link, ap) { 2686 struct ata_eh_context *ehc = &link->eh_context; 2687 2688 /* skip EH if possible. */ 2689 if (ata_eh_skip_recovery(link)) 2690 ehc->i.action = 0; 2691 2692 /* do we need to reset? */ 2693 if (ehc->i.action & ATA_EH_RESET_MASK) 2694 reset = 1; 2695 2696 ata_link_for_each_dev(dev, link) 2697 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 2698 } 2699 2700 /* reset */ 2701 if (reset) { 2702 /* if PMP is attached, this function only deals with 2703 * downstream links, port should stay thawed. 2704 */ 2705 if (!ap->nr_pmp_links) 2706 ata_eh_freeze_port(ap); 2707 2708 ata_port_for_each_link(link, ap) { 2709 struct ata_eh_context *ehc = &link->eh_context; 2710 2711 if (!(ehc->i.action & ATA_EH_RESET_MASK)) 2712 continue; 2713 2714 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 2715 prereset, softreset, hardreset, 2716 postreset); 2717 if (rc) { 2718 ata_link_printk(link, KERN_ERR, 2719 "reset failed, giving up\n"); 2720 goto out; 2721 } 2722 } 2723 2724 if (!ap->nr_pmp_links) 2725 ata_eh_thaw_port(ap); 2726 } 2727 2728 /* the rest */ 2729 ata_port_for_each_link(link, ap) { 2730 struct ata_eh_context *ehc = &link->eh_context; 2731 2732 /* revalidate existing devices and attach new ones */ 2733 rc = ata_eh_revalidate_and_attach(link, &dev); 2734 if (rc) 2735 goto dev_fail; 2736 2737 /* if PMP got attached, return, pmp EH will take care of it */ 2738 if (link->device->class == ATA_DEV_PMP) { 2739 ehc->i.action = 0; 2740 return 0; 2741 } 2742 2743 /* configure transfer mode if necessary */ 2744 if (ehc->i.flags & ATA_EHI_SETMODE) { 2745 rc = ata_set_mode(link, &dev); 2746 if (rc) 2747 goto dev_fail; 2748 ehc->i.flags &= ~ATA_EHI_SETMODE; 2749 } 2750 2751 if (ehc->i.action & ATA_EH_LPM) 2752 ata_link_for_each_dev(dev, link) 2753 ata_dev_enable_pm(dev, ap->pm_policy); 2754 2755 /* this link is okay now */ 2756 ehc->i.flags = 0; 2757 continue; 2758 2759 dev_fail: 2760 nr_failed_devs++; 2761 if (ata_eh_handle_dev_fail(dev, rc)) 2762 nr_disabled_devs++; 2763 2764 if (ap->pflags & ATA_PFLAG_FROZEN) { 2765 /* PMP reset requires working host port. 2766 * Can't retry if it's frozen. 2767 */ 2768 if (ap->nr_pmp_links) 2769 goto out; 2770 break; 2771 } 2772 } 2773 2774 if (nr_failed_devs) { 2775 if (nr_failed_devs != nr_disabled_devs) { 2776 ata_port_printk(ap, KERN_WARNING, "failed to recover " 2777 "some devices, retrying in 5 secs\n"); 2778 ssleep(5); 2779 } else { 2780 /* no device left to recover, repeat fast */ 2781 msleep(500); 2782 } 2783 2784 goto retry; 2785 } 2786 2787 out: 2788 if (rc && r_failed_link) 2789 *r_failed_link = link; 2790 2791 DPRINTK("EXIT, rc=%d\n", rc); 2792 return rc; 2793 } 2794 2795 /** 2796 * ata_eh_finish - finish up EH 2797 * @ap: host port to finish EH for 2798 * 2799 * Recovery is complete. Clean up EH states and retry or finish 2800 * failed qcs. 2801 * 2802 * LOCKING: 2803 * None. 2804 */ 2805 void ata_eh_finish(struct ata_port *ap) 2806 { 2807 int tag; 2808 2809 /* retry or finish qcs */ 2810 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2811 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2812 2813 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2814 continue; 2815 2816 if (qc->err_mask) { 2817 /* FIXME: Once EH migration is complete, 2818 * generate sense data in this function, 2819 * considering both err_mask and tf. 2820 * 2821 * There's no point in retrying invalid 2822 * (detected by libata) and non-IO device 2823 * errors (rejected by device). Finish them 2824 * immediately. 2825 */ 2826 if ((qc->err_mask & AC_ERR_INVALID) || 2827 (!(qc->flags & ATA_QCFLAG_IO) && 2828 qc->err_mask == AC_ERR_DEV)) 2829 ata_eh_qc_complete(qc); 2830 else 2831 ata_eh_qc_retry(qc); 2832 } else { 2833 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2834 ata_eh_qc_complete(qc); 2835 } else { 2836 /* feed zero TF to sense generation */ 2837 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2838 ata_eh_qc_retry(qc); 2839 } 2840 } 2841 } 2842 2843 /* make sure nr_active_links is zero after EH */ 2844 WARN_ON(ap->nr_active_links); 2845 ap->nr_active_links = 0; 2846 } 2847 2848 /** 2849 * ata_do_eh - do standard error handling 2850 * @ap: host port to handle error for 2851 * @prereset: prereset method (can be NULL) 2852 * @softreset: softreset method (can be NULL) 2853 * @hardreset: hardreset method (can be NULL) 2854 * @postreset: postreset method (can be NULL) 2855 * 2856 * Perform standard error handling sequence. 2857 * 2858 * LOCKING: 2859 * Kernel thread context (may sleep). 2860 */ 2861 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2862 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2863 ata_postreset_fn_t postreset) 2864 { 2865 struct ata_device *dev; 2866 int rc; 2867 2868 ata_eh_autopsy(ap); 2869 ata_eh_report(ap); 2870 2871 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 2872 NULL); 2873 if (rc) { 2874 ata_link_for_each_dev(dev, &ap->link) 2875 ata_dev_disable(dev); 2876 } 2877 2878 ata_eh_finish(ap); 2879 } 2880 2881 #ifdef CONFIG_PM 2882 /** 2883 * ata_eh_handle_port_suspend - perform port suspend operation 2884 * @ap: port to suspend 2885 * 2886 * Suspend @ap. 2887 * 2888 * LOCKING: 2889 * Kernel thread context (may sleep). 2890 */ 2891 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2892 { 2893 unsigned long flags; 2894 int rc = 0; 2895 2896 /* are we suspending? */ 2897 spin_lock_irqsave(ap->lock, flags); 2898 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2899 ap->pm_mesg.event == PM_EVENT_ON) { 2900 spin_unlock_irqrestore(ap->lock, flags); 2901 return; 2902 } 2903 spin_unlock_irqrestore(ap->lock, flags); 2904 2905 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2906 2907 /* tell ACPI we're suspending */ 2908 rc = ata_acpi_on_suspend(ap); 2909 if (rc) 2910 goto out; 2911 2912 /* suspend */ 2913 ata_eh_freeze_port(ap); 2914 2915 if (ap->ops->port_suspend) 2916 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2917 2918 ata_acpi_set_state(ap, PMSG_SUSPEND); 2919 out: 2920 /* report result */ 2921 spin_lock_irqsave(ap->lock, flags); 2922 2923 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2924 if (rc == 0) 2925 ap->pflags |= ATA_PFLAG_SUSPENDED; 2926 else if (ap->pflags & ATA_PFLAG_FROZEN) 2927 ata_port_schedule_eh(ap); 2928 2929 if (ap->pm_result) { 2930 *ap->pm_result = rc; 2931 ap->pm_result = NULL; 2932 } 2933 2934 spin_unlock_irqrestore(ap->lock, flags); 2935 2936 return; 2937 } 2938 2939 /** 2940 * ata_eh_handle_port_resume - perform port resume operation 2941 * @ap: port to resume 2942 * 2943 * Resume @ap. 2944 * 2945 * LOCKING: 2946 * Kernel thread context (may sleep). 2947 */ 2948 static void ata_eh_handle_port_resume(struct ata_port *ap) 2949 { 2950 unsigned long flags; 2951 int rc = 0; 2952 2953 /* are we resuming? */ 2954 spin_lock_irqsave(ap->lock, flags); 2955 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2956 ap->pm_mesg.event != PM_EVENT_ON) { 2957 spin_unlock_irqrestore(ap->lock, flags); 2958 return; 2959 } 2960 spin_unlock_irqrestore(ap->lock, flags); 2961 2962 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 2963 2964 ata_acpi_set_state(ap, PMSG_ON); 2965 2966 if (ap->ops->port_resume) 2967 rc = ap->ops->port_resume(ap); 2968 2969 /* tell ACPI that we're resuming */ 2970 ata_acpi_on_resume(ap); 2971 2972 /* report result */ 2973 spin_lock_irqsave(ap->lock, flags); 2974 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2975 if (ap->pm_result) { 2976 *ap->pm_result = rc; 2977 ap->pm_result = NULL; 2978 } 2979 spin_unlock_irqrestore(ap->lock, flags); 2980 } 2981 #endif /* CONFIG_PM */ 2982