1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <linux/pci.h> 37 #include <scsi/scsi.h> 38 #include <scsi/scsi_host.h> 39 #include <scsi/scsi_eh.h> 40 #include <scsi/scsi_device.h> 41 #include <scsi/scsi_cmnd.h> 42 #include "../scsi/scsi_transport_api.h" 43 44 #include <linux/libata.h> 45 46 #include "libata.h" 47 48 enum { 49 /* speed down verdicts */ 50 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 51 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 52 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 53 ATA_EH_SPDN_KEEP_ERRORS = (1 << 3), 54 55 /* error flags */ 56 ATA_EFLAG_IS_IO = (1 << 0), 57 ATA_EFLAG_DUBIOUS_XFER = (1 << 1), 58 59 /* error categories */ 60 ATA_ECAT_NONE = 0, 61 ATA_ECAT_ATA_BUS = 1, 62 ATA_ECAT_TOUT_HSM = 2, 63 ATA_ECAT_UNK_DEV = 3, 64 ATA_ECAT_DUBIOUS_NONE = 4, 65 ATA_ECAT_DUBIOUS_ATA_BUS = 5, 66 ATA_ECAT_DUBIOUS_TOUT_HSM = 6, 67 ATA_ECAT_DUBIOUS_UNK_DEV = 7, 68 ATA_ECAT_NR = 8, 69 70 /* always put at least this amount of time between resets */ 71 ATA_EH_RESET_COOL_DOWN = 5000, 72 73 /* Waiting in ->prereset can never be reliable. It's 74 * sometimes nice to wait there but it can't be depended upon; 75 * otherwise, we wouldn't be resetting. Just give it enough 76 * time for most drives to spin up. 77 */ 78 ATA_EH_PRERESET_TIMEOUT = 10000, 79 ATA_EH_FASTDRAIN_INTERVAL = 3000, 80 }; 81 82 /* The following table determines how we sequence resets. Each entry 83 * represents timeout for that try. The first try can be soft or 84 * hardreset. All others are hardreset if available. In most cases 85 * the first reset w/ 10sec timeout should succeed. Following entries 86 * are mostly for error handling, hotplug and retarded devices. 87 */ 88 static const unsigned long ata_eh_reset_timeouts[] = { 89 10000, /* most drives spin up by 10sec */ 90 10000, /* > 99% working drives spin up before 20sec */ 91 35000, /* give > 30 secs of idleness for retarded devices */ 92 5000, /* and sweet one last chance */ 93 ULONG_MAX, /* > 1 min has elapsed, give up */ 94 }; 95 96 static void __ata_port_freeze(struct ata_port *ap); 97 #ifdef CONFIG_PM 98 static void ata_eh_handle_port_suspend(struct ata_port *ap); 99 static void ata_eh_handle_port_resume(struct ata_port *ap); 100 #else /* CONFIG_PM */ 101 static void ata_eh_handle_port_suspend(struct ata_port *ap) 102 { } 103 104 static void ata_eh_handle_port_resume(struct ata_port *ap) 105 { } 106 #endif /* CONFIG_PM */ 107 108 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, 109 va_list args) 110 { 111 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 112 ATA_EH_DESC_LEN - ehi->desc_len, 113 fmt, args); 114 } 115 116 /** 117 * __ata_ehi_push_desc - push error description without adding separator 118 * @ehi: target EHI 119 * @fmt: printf format string 120 * 121 * Format string according to @fmt and append it to @ehi->desc. 122 * 123 * LOCKING: 124 * spin_lock_irqsave(host lock) 125 */ 126 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 127 { 128 va_list args; 129 130 va_start(args, fmt); 131 __ata_ehi_pushv_desc(ehi, fmt, args); 132 va_end(args); 133 } 134 135 /** 136 * ata_ehi_push_desc - push error description with separator 137 * @ehi: target EHI 138 * @fmt: printf format string 139 * 140 * Format string according to @fmt and append it to @ehi->desc. 141 * If @ehi->desc is not empty, ", " is added in-between. 142 * 143 * LOCKING: 144 * spin_lock_irqsave(host lock) 145 */ 146 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 147 { 148 va_list args; 149 150 if (ehi->desc_len) 151 __ata_ehi_push_desc(ehi, ", "); 152 153 va_start(args, fmt); 154 __ata_ehi_pushv_desc(ehi, fmt, args); 155 va_end(args); 156 } 157 158 /** 159 * ata_ehi_clear_desc - clean error description 160 * @ehi: target EHI 161 * 162 * Clear @ehi->desc. 163 * 164 * LOCKING: 165 * spin_lock_irqsave(host lock) 166 */ 167 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 168 { 169 ehi->desc[0] = '\0'; 170 ehi->desc_len = 0; 171 } 172 173 /** 174 * ata_port_desc - append port description 175 * @ap: target ATA port 176 * @fmt: printf format string 177 * 178 * Format string according to @fmt and append it to port 179 * description. If port description is not empty, " " is added 180 * in-between. This function is to be used while initializing 181 * ata_host. The description is printed on host registration. 182 * 183 * LOCKING: 184 * None. 185 */ 186 void ata_port_desc(struct ata_port *ap, const char *fmt, ...) 187 { 188 va_list args; 189 190 WARN_ON(!(ap->pflags & ATA_PFLAG_INITIALIZING)); 191 192 if (ap->link.eh_info.desc_len) 193 __ata_ehi_push_desc(&ap->link.eh_info, " "); 194 195 va_start(args, fmt); 196 __ata_ehi_pushv_desc(&ap->link.eh_info, fmt, args); 197 va_end(args); 198 } 199 200 #ifdef CONFIG_PCI 201 202 /** 203 * ata_port_pbar_desc - append PCI BAR description 204 * @ap: target ATA port 205 * @bar: target PCI BAR 206 * @offset: offset into PCI BAR 207 * @name: name of the area 208 * 209 * If @offset is negative, this function formats a string which 210 * contains the name, address, size and type of the BAR and 211 * appends it to the port description. If @offset is zero or 212 * positive, only name and offsetted address is appended. 213 * 214 * LOCKING: 215 * None. 216 */ 217 void ata_port_pbar_desc(struct ata_port *ap, int bar, ssize_t offset, 218 const char *name) 219 { 220 struct pci_dev *pdev = to_pci_dev(ap->host->dev); 221 char *type = ""; 222 unsigned long long start, len; 223 224 if (pci_resource_flags(pdev, bar) & IORESOURCE_MEM) 225 type = "m"; 226 else if (pci_resource_flags(pdev, bar) & IORESOURCE_IO) 227 type = "i"; 228 229 start = (unsigned long long)pci_resource_start(pdev, bar); 230 len = (unsigned long long)pci_resource_len(pdev, bar); 231 232 if (offset < 0) 233 ata_port_desc(ap, "%s %s%llu@0x%llx", name, type, len, start); 234 else 235 ata_port_desc(ap, "%s 0x%llx", name, 236 start + (unsigned long long)offset); 237 } 238 239 #endif /* CONFIG_PCI */ 240 241 static void ata_ering_record(struct ata_ering *ering, unsigned int eflags, 242 unsigned int err_mask) 243 { 244 struct ata_ering_entry *ent; 245 246 WARN_ON(!err_mask); 247 248 ering->cursor++; 249 ering->cursor %= ATA_ERING_SIZE; 250 251 ent = &ering->ring[ering->cursor]; 252 ent->eflags = eflags; 253 ent->err_mask = err_mask; 254 ent->timestamp = get_jiffies_64(); 255 } 256 257 static struct ata_ering_entry *ata_ering_top(struct ata_ering *ering) 258 { 259 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 260 261 if (ent->err_mask) 262 return ent; 263 return NULL; 264 } 265 266 static void ata_ering_clear(struct ata_ering *ering) 267 { 268 memset(ering, 0, sizeof(*ering)); 269 } 270 271 static int ata_ering_map(struct ata_ering *ering, 272 int (*map_fn)(struct ata_ering_entry *, void *), 273 void *arg) 274 { 275 int idx, rc = 0; 276 struct ata_ering_entry *ent; 277 278 idx = ering->cursor; 279 do { 280 ent = &ering->ring[idx]; 281 if (!ent->err_mask) 282 break; 283 rc = map_fn(ent, arg); 284 if (rc) 285 break; 286 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 287 } while (idx != ering->cursor); 288 289 return rc; 290 } 291 292 static unsigned int ata_eh_dev_action(struct ata_device *dev) 293 { 294 struct ata_eh_context *ehc = &dev->link->eh_context; 295 296 return ehc->i.action | ehc->i.dev_action[dev->devno]; 297 } 298 299 static void ata_eh_clear_action(struct ata_link *link, struct ata_device *dev, 300 struct ata_eh_info *ehi, unsigned int action) 301 { 302 struct ata_device *tdev; 303 304 if (!dev) { 305 ehi->action &= ~action; 306 ata_link_for_each_dev(tdev, link) 307 ehi->dev_action[tdev->devno] &= ~action; 308 } else { 309 /* doesn't make sense for port-wide EH actions */ 310 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 311 312 /* break ehi->action into ehi->dev_action */ 313 if (ehi->action & action) { 314 ata_link_for_each_dev(tdev, link) 315 ehi->dev_action[tdev->devno] |= 316 ehi->action & action; 317 ehi->action &= ~action; 318 } 319 320 /* turn off the specified per-dev action */ 321 ehi->dev_action[dev->devno] &= ~action; 322 } 323 } 324 325 /** 326 * ata_scsi_timed_out - SCSI layer time out callback 327 * @cmd: timed out SCSI command 328 * 329 * Handles SCSI layer timeout. We race with normal completion of 330 * the qc for @cmd. If the qc is already gone, we lose and let 331 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 332 * timed out and EH should be invoked. Prevent ata_qc_complete() 333 * from finishing it by setting EH_SCHEDULED and return 334 * EH_NOT_HANDLED. 335 * 336 * TODO: kill this function once old EH is gone. 337 * 338 * LOCKING: 339 * Called from timer context 340 * 341 * RETURNS: 342 * EH_HANDLED or EH_NOT_HANDLED 343 */ 344 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 345 { 346 struct Scsi_Host *host = cmd->device->host; 347 struct ata_port *ap = ata_shost_to_port(host); 348 unsigned long flags; 349 struct ata_queued_cmd *qc; 350 enum scsi_eh_timer_return ret; 351 352 DPRINTK("ENTER\n"); 353 354 if (ap->ops->error_handler) { 355 ret = EH_NOT_HANDLED; 356 goto out; 357 } 358 359 ret = EH_HANDLED; 360 spin_lock_irqsave(ap->lock, flags); 361 qc = ata_qc_from_tag(ap, ap->link.active_tag); 362 if (qc) { 363 WARN_ON(qc->scsicmd != cmd); 364 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 365 qc->err_mask |= AC_ERR_TIMEOUT; 366 ret = EH_NOT_HANDLED; 367 } 368 spin_unlock_irqrestore(ap->lock, flags); 369 370 out: 371 DPRINTK("EXIT, ret=%d\n", ret); 372 return ret; 373 } 374 375 /** 376 * ata_scsi_error - SCSI layer error handler callback 377 * @host: SCSI host on which error occurred 378 * 379 * Handles SCSI-layer-thrown error events. 380 * 381 * LOCKING: 382 * Inherited from SCSI layer (none, can sleep) 383 * 384 * RETURNS: 385 * Zero. 386 */ 387 void ata_scsi_error(struct Scsi_Host *host) 388 { 389 struct ata_port *ap = ata_shost_to_port(host); 390 int i; 391 unsigned long flags; 392 393 DPRINTK("ENTER\n"); 394 395 /* synchronize with port task */ 396 ata_port_flush_task(ap); 397 398 /* synchronize with host lock and sort out timeouts */ 399 400 /* For new EH, all qcs are finished in one of three ways - 401 * normal completion, error completion, and SCSI timeout. 402 * Both cmpletions can race against SCSI timeout. When normal 403 * completion wins, the qc never reaches EH. When error 404 * completion wins, the qc has ATA_QCFLAG_FAILED set. 405 * 406 * When SCSI timeout wins, things are a bit more complex. 407 * Normal or error completion can occur after the timeout but 408 * before this point. In such cases, both types of 409 * completions are honored. A scmd is determined to have 410 * timed out iff its associated qc is active and not failed. 411 */ 412 if (ap->ops->error_handler) { 413 struct scsi_cmnd *scmd, *tmp; 414 int nr_timedout = 0; 415 416 spin_lock_irqsave(ap->lock, flags); 417 418 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 419 struct ata_queued_cmd *qc; 420 421 for (i = 0; i < ATA_MAX_QUEUE; i++) { 422 qc = __ata_qc_from_tag(ap, i); 423 if (qc->flags & ATA_QCFLAG_ACTIVE && 424 qc->scsicmd == scmd) 425 break; 426 } 427 428 if (i < ATA_MAX_QUEUE) { 429 /* the scmd has an associated qc */ 430 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 431 /* which hasn't failed yet, timeout */ 432 qc->err_mask |= AC_ERR_TIMEOUT; 433 qc->flags |= ATA_QCFLAG_FAILED; 434 nr_timedout++; 435 } 436 } else { 437 /* Normal completion occurred after 438 * SCSI timeout but before this point. 439 * Successfully complete it. 440 */ 441 scmd->retries = scmd->allowed; 442 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 443 } 444 } 445 446 /* If we have timed out qcs. They belong to EH from 447 * this point but the state of the controller is 448 * unknown. Freeze the port to make sure the IRQ 449 * handler doesn't diddle with those qcs. This must 450 * be done atomically w.r.t. setting QCFLAG_FAILED. 451 */ 452 if (nr_timedout) 453 __ata_port_freeze(ap); 454 455 spin_unlock_irqrestore(ap->lock, flags); 456 457 /* initialize eh_tries */ 458 ap->eh_tries = ATA_EH_MAX_TRIES; 459 } else 460 spin_unlock_wait(ap->lock); 461 462 repeat: 463 /* invoke error handler */ 464 if (ap->ops->error_handler) { 465 struct ata_link *link; 466 467 /* kill fast drain timer */ 468 del_timer_sync(&ap->fastdrain_timer); 469 470 /* process port resume request */ 471 ata_eh_handle_port_resume(ap); 472 473 /* fetch & clear EH info */ 474 spin_lock_irqsave(ap->lock, flags); 475 476 __ata_port_for_each_link(link, ap) { 477 struct ata_eh_context *ehc = &link->eh_context; 478 struct ata_device *dev; 479 480 memset(&link->eh_context, 0, sizeof(link->eh_context)); 481 link->eh_context.i = link->eh_info; 482 memset(&link->eh_info, 0, sizeof(link->eh_info)); 483 484 ata_link_for_each_dev(dev, link) { 485 int devno = dev->devno; 486 487 ehc->saved_xfer_mode[devno] = dev->xfer_mode; 488 if (ata_ncq_enabled(dev)) 489 ehc->saved_ncq_enabled |= 1 << devno; 490 } 491 492 /* set last reset timestamp to some time in the past */ 493 ehc->last_reset = jiffies - 60 * HZ; 494 } 495 496 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 497 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 498 ap->excl_link = NULL; /* don't maintain exclusion over EH */ 499 500 spin_unlock_irqrestore(ap->lock, flags); 501 502 /* invoke EH, skip if unloading or suspended */ 503 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 504 ap->ops->error_handler(ap); 505 else 506 ata_eh_finish(ap); 507 508 /* process port suspend request */ 509 ata_eh_handle_port_suspend(ap); 510 511 /* Exception might have happend after ->error_handler 512 * recovered the port but before this point. Repeat 513 * EH in such case. 514 */ 515 spin_lock_irqsave(ap->lock, flags); 516 517 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 518 if (--ap->eh_tries) { 519 spin_unlock_irqrestore(ap->lock, flags); 520 goto repeat; 521 } 522 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 523 "tries, giving up\n", ATA_EH_MAX_TRIES); 524 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 525 } 526 527 /* this run is complete, make sure EH info is clear */ 528 __ata_port_for_each_link(link, ap) 529 memset(&link->eh_info, 0, sizeof(link->eh_info)); 530 531 /* Clear host_eh_scheduled while holding ap->lock such 532 * that if exception occurs after this point but 533 * before EH completion, SCSI midlayer will 534 * re-initiate EH. 535 */ 536 host->host_eh_scheduled = 0; 537 538 spin_unlock_irqrestore(ap->lock, flags); 539 } else { 540 WARN_ON(ata_qc_from_tag(ap, ap->link.active_tag) == NULL); 541 ap->ops->eng_timeout(ap); 542 } 543 544 /* finish or retry handled scmd's and clean up */ 545 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 546 547 scsi_eh_flush_done_q(&ap->eh_done_q); 548 549 /* clean up */ 550 spin_lock_irqsave(ap->lock, flags); 551 552 if (ap->pflags & ATA_PFLAG_LOADING) 553 ap->pflags &= ~ATA_PFLAG_LOADING; 554 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 555 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 556 557 if (ap->pflags & ATA_PFLAG_RECOVERED) 558 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 559 560 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 561 562 /* tell wait_eh that we're done */ 563 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 564 wake_up_all(&ap->eh_wait_q); 565 566 spin_unlock_irqrestore(ap->lock, flags); 567 568 DPRINTK("EXIT\n"); 569 } 570 571 /** 572 * ata_port_wait_eh - Wait for the currently pending EH to complete 573 * @ap: Port to wait EH for 574 * 575 * Wait until the currently pending EH is complete. 576 * 577 * LOCKING: 578 * Kernel thread context (may sleep). 579 */ 580 void ata_port_wait_eh(struct ata_port *ap) 581 { 582 unsigned long flags; 583 DEFINE_WAIT(wait); 584 585 retry: 586 spin_lock_irqsave(ap->lock, flags); 587 588 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 589 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 590 spin_unlock_irqrestore(ap->lock, flags); 591 schedule(); 592 spin_lock_irqsave(ap->lock, flags); 593 } 594 finish_wait(&ap->eh_wait_q, &wait); 595 596 spin_unlock_irqrestore(ap->lock, flags); 597 598 /* make sure SCSI EH is complete */ 599 if (scsi_host_in_recovery(ap->scsi_host)) { 600 msleep(10); 601 goto retry; 602 } 603 } 604 605 static int ata_eh_nr_in_flight(struct ata_port *ap) 606 { 607 unsigned int tag; 608 int nr = 0; 609 610 /* count only non-internal commands */ 611 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 612 if (ata_qc_from_tag(ap, tag)) 613 nr++; 614 615 return nr; 616 } 617 618 void ata_eh_fastdrain_timerfn(unsigned long arg) 619 { 620 struct ata_port *ap = (void *)arg; 621 unsigned long flags; 622 int cnt; 623 624 spin_lock_irqsave(ap->lock, flags); 625 626 cnt = ata_eh_nr_in_flight(ap); 627 628 /* are we done? */ 629 if (!cnt) 630 goto out_unlock; 631 632 if (cnt == ap->fastdrain_cnt) { 633 unsigned int tag; 634 635 /* No progress during the last interval, tag all 636 * in-flight qcs as timed out and freeze the port. 637 */ 638 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 639 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 640 if (qc) 641 qc->err_mask |= AC_ERR_TIMEOUT; 642 } 643 644 ata_port_freeze(ap); 645 } else { 646 /* some qcs have finished, give it another chance */ 647 ap->fastdrain_cnt = cnt; 648 ap->fastdrain_timer.expires = 649 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 650 add_timer(&ap->fastdrain_timer); 651 } 652 653 out_unlock: 654 spin_unlock_irqrestore(ap->lock, flags); 655 } 656 657 /** 658 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 659 * @ap: target ATA port 660 * @fastdrain: activate fast drain 661 * 662 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 663 * is non-zero and EH wasn't pending before. Fast drain ensures 664 * that EH kicks in in timely manner. 665 * 666 * LOCKING: 667 * spin_lock_irqsave(host lock) 668 */ 669 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 670 { 671 int cnt; 672 673 /* already scheduled? */ 674 if (ap->pflags & ATA_PFLAG_EH_PENDING) 675 return; 676 677 ap->pflags |= ATA_PFLAG_EH_PENDING; 678 679 if (!fastdrain) 680 return; 681 682 /* do we have in-flight qcs? */ 683 cnt = ata_eh_nr_in_flight(ap); 684 if (!cnt) 685 return; 686 687 /* activate fast drain */ 688 ap->fastdrain_cnt = cnt; 689 ap->fastdrain_timer.expires = 690 ata_deadline(jiffies, ATA_EH_FASTDRAIN_INTERVAL); 691 add_timer(&ap->fastdrain_timer); 692 } 693 694 /** 695 * ata_qc_schedule_eh - schedule qc for error handling 696 * @qc: command to schedule error handling for 697 * 698 * Schedule error handling for @qc. EH will kick in as soon as 699 * other commands are drained. 700 * 701 * LOCKING: 702 * spin_lock_irqsave(host lock) 703 */ 704 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 705 { 706 struct ata_port *ap = qc->ap; 707 708 WARN_ON(!ap->ops->error_handler); 709 710 qc->flags |= ATA_QCFLAG_FAILED; 711 ata_eh_set_pending(ap, 1); 712 713 /* The following will fail if timeout has already expired. 714 * ata_scsi_error() takes care of such scmds on EH entry. 715 * Note that ATA_QCFLAG_FAILED is unconditionally set after 716 * this function completes. 717 */ 718 scsi_req_abort_cmd(qc->scsicmd); 719 } 720 721 /** 722 * ata_port_schedule_eh - schedule error handling without a qc 723 * @ap: ATA port to schedule EH for 724 * 725 * Schedule error handling for @ap. EH will kick in as soon as 726 * all commands are drained. 727 * 728 * LOCKING: 729 * spin_lock_irqsave(host lock) 730 */ 731 void ata_port_schedule_eh(struct ata_port *ap) 732 { 733 WARN_ON(!ap->ops->error_handler); 734 735 if (ap->pflags & ATA_PFLAG_INITIALIZING) 736 return; 737 738 ata_eh_set_pending(ap, 1); 739 scsi_schedule_eh(ap->scsi_host); 740 741 DPRINTK("port EH scheduled\n"); 742 } 743 744 static int ata_do_link_abort(struct ata_port *ap, struct ata_link *link) 745 { 746 int tag, nr_aborted = 0; 747 748 WARN_ON(!ap->ops->error_handler); 749 750 /* we're gonna abort all commands, no need for fast drain */ 751 ata_eh_set_pending(ap, 0); 752 753 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 754 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 755 756 if (qc && (!link || qc->dev->link == link)) { 757 qc->flags |= ATA_QCFLAG_FAILED; 758 ata_qc_complete(qc); 759 nr_aborted++; 760 } 761 } 762 763 if (!nr_aborted) 764 ata_port_schedule_eh(ap); 765 766 return nr_aborted; 767 } 768 769 /** 770 * ata_link_abort - abort all qc's on the link 771 * @link: ATA link to abort qc's for 772 * 773 * Abort all active qc's active on @link and schedule EH. 774 * 775 * LOCKING: 776 * spin_lock_irqsave(host lock) 777 * 778 * RETURNS: 779 * Number of aborted qc's. 780 */ 781 int ata_link_abort(struct ata_link *link) 782 { 783 return ata_do_link_abort(link->ap, link); 784 } 785 786 /** 787 * ata_port_abort - abort all qc's on the port 788 * @ap: ATA port to abort qc's for 789 * 790 * Abort all active qc's of @ap and schedule EH. 791 * 792 * LOCKING: 793 * spin_lock_irqsave(host_set lock) 794 * 795 * RETURNS: 796 * Number of aborted qc's. 797 */ 798 int ata_port_abort(struct ata_port *ap) 799 { 800 return ata_do_link_abort(ap, NULL); 801 } 802 803 /** 804 * __ata_port_freeze - freeze port 805 * @ap: ATA port to freeze 806 * 807 * This function is called when HSM violation or some other 808 * condition disrupts normal operation of the port. Frozen port 809 * is not allowed to perform any operation until the port is 810 * thawed, which usually follows a successful reset. 811 * 812 * ap->ops->freeze() callback can be used for freezing the port 813 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 814 * port cannot be frozen hardware-wise, the interrupt handler 815 * must ack and clear interrupts unconditionally while the port 816 * is frozen. 817 * 818 * LOCKING: 819 * spin_lock_irqsave(host lock) 820 */ 821 static void __ata_port_freeze(struct ata_port *ap) 822 { 823 WARN_ON(!ap->ops->error_handler); 824 825 if (ap->ops->freeze) 826 ap->ops->freeze(ap); 827 828 ap->pflags |= ATA_PFLAG_FROZEN; 829 830 DPRINTK("ata%u port frozen\n", ap->print_id); 831 } 832 833 /** 834 * ata_port_freeze - abort & freeze port 835 * @ap: ATA port to freeze 836 * 837 * Abort and freeze @ap. 838 * 839 * LOCKING: 840 * spin_lock_irqsave(host lock) 841 * 842 * RETURNS: 843 * Number of aborted commands. 844 */ 845 int ata_port_freeze(struct ata_port *ap) 846 { 847 int nr_aborted; 848 849 WARN_ON(!ap->ops->error_handler); 850 851 nr_aborted = ata_port_abort(ap); 852 __ata_port_freeze(ap); 853 854 return nr_aborted; 855 } 856 857 /** 858 * sata_async_notification - SATA async notification handler 859 * @ap: ATA port where async notification is received 860 * 861 * Handler to be called when async notification via SDB FIS is 862 * received. This function schedules EH if necessary. 863 * 864 * LOCKING: 865 * spin_lock_irqsave(host lock) 866 * 867 * RETURNS: 868 * 1 if EH is scheduled, 0 otherwise. 869 */ 870 int sata_async_notification(struct ata_port *ap) 871 { 872 u32 sntf; 873 int rc; 874 875 if (!(ap->flags & ATA_FLAG_AN)) 876 return 0; 877 878 rc = sata_scr_read(&ap->link, SCR_NOTIFICATION, &sntf); 879 if (rc == 0) 880 sata_scr_write(&ap->link, SCR_NOTIFICATION, sntf); 881 882 if (!sata_pmp_attached(ap) || rc) { 883 /* PMP is not attached or SNTF is not available */ 884 if (!sata_pmp_attached(ap)) { 885 /* PMP is not attached. Check whether ATAPI 886 * AN is configured. If so, notify media 887 * change. 888 */ 889 struct ata_device *dev = ap->link.device; 890 891 if ((dev->class == ATA_DEV_ATAPI) && 892 (dev->flags & ATA_DFLAG_AN)) 893 ata_scsi_media_change_notify(dev); 894 return 0; 895 } else { 896 /* PMP is attached but SNTF is not available. 897 * ATAPI async media change notification is 898 * not used. The PMP must be reporting PHY 899 * status change, schedule EH. 900 */ 901 ata_port_schedule_eh(ap); 902 return 1; 903 } 904 } else { 905 /* PMP is attached and SNTF is available */ 906 struct ata_link *link; 907 908 /* check and notify ATAPI AN */ 909 ata_port_for_each_link(link, ap) { 910 if (!(sntf & (1 << link->pmp))) 911 continue; 912 913 if ((link->device->class == ATA_DEV_ATAPI) && 914 (link->device->flags & ATA_DFLAG_AN)) 915 ata_scsi_media_change_notify(link->device); 916 } 917 918 /* If PMP is reporting that PHY status of some 919 * downstream ports has changed, schedule EH. 920 */ 921 if (sntf & (1 << SATA_PMP_CTRL_PORT)) { 922 ata_port_schedule_eh(ap); 923 return 1; 924 } 925 926 return 0; 927 } 928 } 929 930 /** 931 * ata_eh_freeze_port - EH helper to freeze port 932 * @ap: ATA port to freeze 933 * 934 * Freeze @ap. 935 * 936 * LOCKING: 937 * None. 938 */ 939 void ata_eh_freeze_port(struct ata_port *ap) 940 { 941 unsigned long flags; 942 943 if (!ap->ops->error_handler) 944 return; 945 946 spin_lock_irqsave(ap->lock, flags); 947 __ata_port_freeze(ap); 948 spin_unlock_irqrestore(ap->lock, flags); 949 } 950 951 /** 952 * ata_port_thaw_port - EH helper to thaw port 953 * @ap: ATA port to thaw 954 * 955 * Thaw frozen port @ap. 956 * 957 * LOCKING: 958 * None. 959 */ 960 void ata_eh_thaw_port(struct ata_port *ap) 961 { 962 unsigned long flags; 963 964 if (!ap->ops->error_handler) 965 return; 966 967 spin_lock_irqsave(ap->lock, flags); 968 969 ap->pflags &= ~ATA_PFLAG_FROZEN; 970 971 if (ap->ops->thaw) 972 ap->ops->thaw(ap); 973 974 spin_unlock_irqrestore(ap->lock, flags); 975 976 DPRINTK("ata%u port thawed\n", ap->print_id); 977 } 978 979 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 980 { 981 /* nada */ 982 } 983 984 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 985 { 986 struct ata_port *ap = qc->ap; 987 struct scsi_cmnd *scmd = qc->scsicmd; 988 unsigned long flags; 989 990 spin_lock_irqsave(ap->lock, flags); 991 qc->scsidone = ata_eh_scsidone; 992 __ata_qc_complete(qc); 993 WARN_ON(ata_tag_valid(qc->tag)); 994 spin_unlock_irqrestore(ap->lock, flags); 995 996 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 997 } 998 999 /** 1000 * ata_eh_qc_complete - Complete an active ATA command from EH 1001 * @qc: Command to complete 1002 * 1003 * Indicate to the mid and upper layers that an ATA command has 1004 * completed. To be used from EH. 1005 */ 1006 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 1007 { 1008 struct scsi_cmnd *scmd = qc->scsicmd; 1009 scmd->retries = scmd->allowed; 1010 __ata_eh_qc_complete(qc); 1011 } 1012 1013 /** 1014 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 1015 * @qc: Command to retry 1016 * 1017 * Indicate to the mid and upper layers that an ATA command 1018 * should be retried. To be used from EH. 1019 * 1020 * SCSI midlayer limits the number of retries to scmd->allowed. 1021 * scmd->retries is decremented for commands which get retried 1022 * due to unrelated failures (qc->err_mask is zero). 1023 */ 1024 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 1025 { 1026 struct scsi_cmnd *scmd = qc->scsicmd; 1027 if (!qc->err_mask && scmd->retries) 1028 scmd->retries--; 1029 __ata_eh_qc_complete(qc); 1030 } 1031 1032 /** 1033 * ata_eh_detach_dev - detach ATA device 1034 * @dev: ATA device to detach 1035 * 1036 * Detach @dev. 1037 * 1038 * LOCKING: 1039 * None. 1040 */ 1041 void ata_eh_detach_dev(struct ata_device *dev) 1042 { 1043 struct ata_link *link = dev->link; 1044 struct ata_port *ap = link->ap; 1045 unsigned long flags; 1046 1047 ata_dev_disable(dev); 1048 1049 spin_lock_irqsave(ap->lock, flags); 1050 1051 dev->flags &= ~ATA_DFLAG_DETACH; 1052 1053 if (ata_scsi_offline_dev(dev)) { 1054 dev->flags |= ATA_DFLAG_DETACHED; 1055 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1056 } 1057 1058 /* clear per-dev EH actions */ 1059 ata_eh_clear_action(link, dev, &link->eh_info, ATA_EH_PERDEV_MASK); 1060 ata_eh_clear_action(link, dev, &link->eh_context.i, ATA_EH_PERDEV_MASK); 1061 1062 spin_unlock_irqrestore(ap->lock, flags); 1063 } 1064 1065 /** 1066 * ata_eh_about_to_do - about to perform eh_action 1067 * @link: target ATA link 1068 * @dev: target ATA dev for per-dev action (can be NULL) 1069 * @action: action about to be performed 1070 * 1071 * Called just before performing EH actions to clear related bits 1072 * in @link->eh_info such that eh actions are not unnecessarily 1073 * repeated. 1074 * 1075 * LOCKING: 1076 * None. 1077 */ 1078 void ata_eh_about_to_do(struct ata_link *link, struct ata_device *dev, 1079 unsigned int action) 1080 { 1081 struct ata_port *ap = link->ap; 1082 struct ata_eh_info *ehi = &link->eh_info; 1083 struct ata_eh_context *ehc = &link->eh_context; 1084 unsigned long flags; 1085 1086 spin_lock_irqsave(ap->lock, flags); 1087 1088 ata_eh_clear_action(link, dev, ehi, action); 1089 1090 if (!(ehc->i.flags & ATA_EHI_QUIET)) 1091 ap->pflags |= ATA_PFLAG_RECOVERED; 1092 1093 spin_unlock_irqrestore(ap->lock, flags); 1094 } 1095 1096 /** 1097 * ata_eh_done - EH action complete 1098 * @ap: target ATA port 1099 * @dev: target ATA dev for per-dev action (can be NULL) 1100 * @action: action just completed 1101 * 1102 * Called right after performing EH actions to clear related bits 1103 * in @link->eh_context. 1104 * 1105 * LOCKING: 1106 * None. 1107 */ 1108 void ata_eh_done(struct ata_link *link, struct ata_device *dev, 1109 unsigned int action) 1110 { 1111 struct ata_eh_context *ehc = &link->eh_context; 1112 1113 ata_eh_clear_action(link, dev, &ehc->i, action); 1114 } 1115 1116 /** 1117 * ata_err_string - convert err_mask to descriptive string 1118 * @err_mask: error mask to convert to string 1119 * 1120 * Convert @err_mask to descriptive string. Errors are 1121 * prioritized according to severity and only the most severe 1122 * error is reported. 1123 * 1124 * LOCKING: 1125 * None. 1126 * 1127 * RETURNS: 1128 * Descriptive string for @err_mask 1129 */ 1130 static const char *ata_err_string(unsigned int err_mask) 1131 { 1132 if (err_mask & AC_ERR_HOST_BUS) 1133 return "host bus error"; 1134 if (err_mask & AC_ERR_ATA_BUS) 1135 return "ATA bus error"; 1136 if (err_mask & AC_ERR_TIMEOUT) 1137 return "timeout"; 1138 if (err_mask & AC_ERR_HSM) 1139 return "HSM violation"; 1140 if (err_mask & AC_ERR_SYSTEM) 1141 return "internal error"; 1142 if (err_mask & AC_ERR_MEDIA) 1143 return "media error"; 1144 if (err_mask & AC_ERR_INVALID) 1145 return "invalid argument"; 1146 if (err_mask & AC_ERR_DEV) 1147 return "device error"; 1148 return "unknown error"; 1149 } 1150 1151 /** 1152 * ata_read_log_page - read a specific log page 1153 * @dev: target device 1154 * @page: page to read 1155 * @buf: buffer to store read page 1156 * @sectors: number of sectors to read 1157 * 1158 * Read log page using READ_LOG_EXT command. 1159 * 1160 * LOCKING: 1161 * Kernel thread context (may sleep). 1162 * 1163 * RETURNS: 1164 * 0 on success, AC_ERR_* mask otherwise. 1165 */ 1166 static unsigned int ata_read_log_page(struct ata_device *dev, 1167 u8 page, void *buf, unsigned int sectors) 1168 { 1169 struct ata_taskfile tf; 1170 unsigned int err_mask; 1171 1172 DPRINTK("read log page - page %d\n", page); 1173 1174 ata_tf_init(dev, &tf); 1175 tf.command = ATA_CMD_READ_LOG_EXT; 1176 tf.lbal = page; 1177 tf.nsect = sectors; 1178 tf.hob_nsect = sectors >> 8; 1179 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 1180 tf.protocol = ATA_PROT_PIO; 1181 1182 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 1183 buf, sectors * ATA_SECT_SIZE, 0); 1184 1185 DPRINTK("EXIT, err_mask=%x\n", err_mask); 1186 return err_mask; 1187 } 1188 1189 /** 1190 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1191 * @dev: Device to read log page 10h from 1192 * @tag: Resulting tag of the failed command 1193 * @tf: Resulting taskfile registers of the failed command 1194 * 1195 * Read log page 10h to obtain NCQ error details and clear error 1196 * condition. 1197 * 1198 * LOCKING: 1199 * Kernel thread context (may sleep). 1200 * 1201 * RETURNS: 1202 * 0 on success, -errno otherwise. 1203 */ 1204 static int ata_eh_read_log_10h(struct ata_device *dev, 1205 int *tag, struct ata_taskfile *tf) 1206 { 1207 u8 *buf = dev->link->ap->sector_buf; 1208 unsigned int err_mask; 1209 u8 csum; 1210 int i; 1211 1212 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 1213 if (err_mask) 1214 return -EIO; 1215 1216 csum = 0; 1217 for (i = 0; i < ATA_SECT_SIZE; i++) 1218 csum += buf[i]; 1219 if (csum) 1220 ata_dev_printk(dev, KERN_WARNING, 1221 "invalid checksum 0x%x on log page 10h\n", csum); 1222 1223 if (buf[0] & 0x80) 1224 return -ENOENT; 1225 1226 *tag = buf[0] & 0x1f; 1227 1228 tf->command = buf[2]; 1229 tf->feature = buf[3]; 1230 tf->lbal = buf[4]; 1231 tf->lbam = buf[5]; 1232 tf->lbah = buf[6]; 1233 tf->device = buf[7]; 1234 tf->hob_lbal = buf[8]; 1235 tf->hob_lbam = buf[9]; 1236 tf->hob_lbah = buf[10]; 1237 tf->nsect = buf[12]; 1238 tf->hob_nsect = buf[13]; 1239 1240 return 0; 1241 } 1242 1243 /** 1244 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1245 * @dev: device to perform REQUEST_SENSE to 1246 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1247 * 1248 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1249 * SENSE. This function is EH helper. 1250 * 1251 * LOCKING: 1252 * Kernel thread context (may sleep). 1253 * 1254 * RETURNS: 1255 * 0 on success, AC_ERR_* mask on failure 1256 */ 1257 static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) 1258 { 1259 struct ata_device *dev = qc->dev; 1260 unsigned char *sense_buf = qc->scsicmd->sense_buffer; 1261 struct ata_port *ap = dev->link->ap; 1262 struct ata_taskfile tf; 1263 u8 cdb[ATAPI_CDB_LEN]; 1264 1265 DPRINTK("ATAPI request sense\n"); 1266 1267 /* FIXME: is this needed? */ 1268 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1269 1270 /* initialize sense_buf with the error register, 1271 * for the case where they are -not- overwritten 1272 */ 1273 sense_buf[0] = 0x70; 1274 sense_buf[2] = qc->result_tf.feature >> 4; 1275 1276 /* some devices time out if garbage left in tf */ 1277 ata_tf_init(dev, &tf); 1278 1279 memset(cdb, 0, ATAPI_CDB_LEN); 1280 cdb[0] = REQUEST_SENSE; 1281 cdb[4] = SCSI_SENSE_BUFFERSIZE; 1282 1283 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1284 tf.command = ATA_CMD_PACKET; 1285 1286 /* is it pointless to prefer PIO for "safety reasons"? */ 1287 if (ap->flags & ATA_FLAG_PIO_DMA) { 1288 tf.protocol = ATAPI_PROT_DMA; 1289 tf.feature |= ATAPI_PKT_DMA; 1290 } else { 1291 tf.protocol = ATAPI_PROT_PIO; 1292 tf.lbam = SCSI_SENSE_BUFFERSIZE; 1293 tf.lbah = 0; 1294 } 1295 1296 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1297 sense_buf, SCSI_SENSE_BUFFERSIZE, 0); 1298 } 1299 1300 /** 1301 * ata_eh_analyze_serror - analyze SError for a failed port 1302 * @link: ATA link to analyze SError for 1303 * 1304 * Analyze SError if available and further determine cause of 1305 * failure. 1306 * 1307 * LOCKING: 1308 * None. 1309 */ 1310 static void ata_eh_analyze_serror(struct ata_link *link) 1311 { 1312 struct ata_eh_context *ehc = &link->eh_context; 1313 u32 serror = ehc->i.serror; 1314 unsigned int err_mask = 0, action = 0; 1315 u32 hotplug_mask; 1316 1317 if (serror & (SERR_PERSISTENT | SERR_DATA)) { 1318 err_mask |= AC_ERR_ATA_BUS; 1319 action |= ATA_EH_RESET; 1320 } 1321 if (serror & SERR_PROTOCOL) { 1322 err_mask |= AC_ERR_HSM; 1323 action |= ATA_EH_RESET; 1324 } 1325 if (serror & SERR_INTERNAL) { 1326 err_mask |= AC_ERR_SYSTEM; 1327 action |= ATA_EH_RESET; 1328 } 1329 1330 /* Determine whether a hotplug event has occurred. Both 1331 * SError.N/X are considered hotplug events for enabled or 1332 * host links. For disabled PMP links, only N bit is 1333 * considered as X bit is left at 1 for link plugging. 1334 */ 1335 hotplug_mask = 0; 1336 1337 if (!(link->flags & ATA_LFLAG_DISABLED) || ata_is_host_link(link)) 1338 hotplug_mask = SERR_PHYRDY_CHG | SERR_DEV_XCHG; 1339 else 1340 hotplug_mask = SERR_PHYRDY_CHG; 1341 1342 if (serror & hotplug_mask) 1343 ata_ehi_hotplugged(&ehc->i); 1344 1345 ehc->i.err_mask |= err_mask; 1346 ehc->i.action |= action; 1347 } 1348 1349 /** 1350 * ata_eh_analyze_ncq_error - analyze NCQ error 1351 * @link: ATA link to analyze NCQ error for 1352 * 1353 * Read log page 10h, determine the offending qc and acquire 1354 * error status TF. For NCQ device errors, all LLDDs have to do 1355 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1356 * care of the rest. 1357 * 1358 * LOCKING: 1359 * Kernel thread context (may sleep). 1360 */ 1361 void ata_eh_analyze_ncq_error(struct ata_link *link) 1362 { 1363 struct ata_port *ap = link->ap; 1364 struct ata_eh_context *ehc = &link->eh_context; 1365 struct ata_device *dev = link->device; 1366 struct ata_queued_cmd *qc; 1367 struct ata_taskfile tf; 1368 int tag, rc; 1369 1370 /* if frozen, we can't do much */ 1371 if (ap->pflags & ATA_PFLAG_FROZEN) 1372 return; 1373 1374 /* is it NCQ device error? */ 1375 if (!link->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1376 return; 1377 1378 /* has LLDD analyzed already? */ 1379 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1380 qc = __ata_qc_from_tag(ap, tag); 1381 1382 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1383 continue; 1384 1385 if (qc->err_mask) 1386 return; 1387 } 1388 1389 /* okay, this error is ours */ 1390 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1391 if (rc) { 1392 ata_link_printk(link, KERN_ERR, "failed to read log page 10h " 1393 "(errno=%d)\n", rc); 1394 return; 1395 } 1396 1397 if (!(link->sactive & (1 << tag))) { 1398 ata_link_printk(link, KERN_ERR, "log page 10h reported " 1399 "inactive tag %d\n", tag); 1400 return; 1401 } 1402 1403 /* we've got the perpetrator, condemn it */ 1404 qc = __ata_qc_from_tag(ap, tag); 1405 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1406 qc->result_tf.flags = ATA_TFLAG_ISADDR | ATA_TFLAG_LBA | ATA_TFLAG_LBA48; 1407 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1408 ehc->i.err_mask &= ~AC_ERR_DEV; 1409 } 1410 1411 /** 1412 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1413 * @qc: qc to analyze 1414 * @tf: Taskfile registers to analyze 1415 * 1416 * Analyze taskfile of @qc and further determine cause of 1417 * failure. This function also requests ATAPI sense data if 1418 * avaliable. 1419 * 1420 * LOCKING: 1421 * Kernel thread context (may sleep). 1422 * 1423 * RETURNS: 1424 * Determined recovery action 1425 */ 1426 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1427 const struct ata_taskfile *tf) 1428 { 1429 unsigned int tmp, action = 0; 1430 u8 stat = tf->command, err = tf->feature; 1431 1432 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1433 qc->err_mask |= AC_ERR_HSM; 1434 return ATA_EH_RESET; 1435 } 1436 1437 if (stat & (ATA_ERR | ATA_DF)) 1438 qc->err_mask |= AC_ERR_DEV; 1439 else 1440 return 0; 1441 1442 switch (qc->dev->class) { 1443 case ATA_DEV_ATA: 1444 if (err & ATA_ICRC) 1445 qc->err_mask |= AC_ERR_ATA_BUS; 1446 if (err & ATA_UNC) 1447 qc->err_mask |= AC_ERR_MEDIA; 1448 if (err & ATA_IDNF) 1449 qc->err_mask |= AC_ERR_INVALID; 1450 break; 1451 1452 case ATA_DEV_ATAPI: 1453 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1454 tmp = atapi_eh_request_sense(qc); 1455 if (!tmp) { 1456 /* ATA_QCFLAG_SENSE_VALID is used to 1457 * tell atapi_qc_complete() that sense 1458 * data is already valid. 1459 * 1460 * TODO: interpret sense data and set 1461 * appropriate err_mask. 1462 */ 1463 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1464 } else 1465 qc->err_mask |= tmp; 1466 } 1467 } 1468 1469 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1470 action |= ATA_EH_RESET; 1471 1472 return action; 1473 } 1474 1475 static int ata_eh_categorize_error(unsigned int eflags, unsigned int err_mask, 1476 int *xfer_ok) 1477 { 1478 int base = 0; 1479 1480 if (!(eflags & ATA_EFLAG_DUBIOUS_XFER)) 1481 *xfer_ok = 1; 1482 1483 if (!*xfer_ok) 1484 base = ATA_ECAT_DUBIOUS_NONE; 1485 1486 if (err_mask & AC_ERR_ATA_BUS) 1487 return base + ATA_ECAT_ATA_BUS; 1488 1489 if (err_mask & AC_ERR_TIMEOUT) 1490 return base + ATA_ECAT_TOUT_HSM; 1491 1492 if (eflags & ATA_EFLAG_IS_IO) { 1493 if (err_mask & AC_ERR_HSM) 1494 return base + ATA_ECAT_TOUT_HSM; 1495 if ((err_mask & 1496 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1497 return base + ATA_ECAT_UNK_DEV; 1498 } 1499 1500 return 0; 1501 } 1502 1503 struct speed_down_verdict_arg { 1504 u64 since; 1505 int xfer_ok; 1506 int nr_errors[ATA_ECAT_NR]; 1507 }; 1508 1509 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1510 { 1511 struct speed_down_verdict_arg *arg = void_arg; 1512 int cat; 1513 1514 if (ent->timestamp < arg->since) 1515 return -1; 1516 1517 cat = ata_eh_categorize_error(ent->eflags, ent->err_mask, 1518 &arg->xfer_ok); 1519 arg->nr_errors[cat]++; 1520 1521 return 0; 1522 } 1523 1524 /** 1525 * ata_eh_speed_down_verdict - Determine speed down verdict 1526 * @dev: Device of interest 1527 * 1528 * This function examines error ring of @dev and determines 1529 * whether NCQ needs to be turned off, transfer speed should be 1530 * stepped down, or falling back to PIO is necessary. 1531 * 1532 * ECAT_ATA_BUS : ATA_BUS error for any command 1533 * 1534 * ECAT_TOUT_HSM : TIMEOUT for any command or HSM violation for 1535 * IO commands 1536 * 1537 * ECAT_UNK_DEV : Unknown DEV error for IO commands 1538 * 1539 * ECAT_DUBIOUS_* : Identical to above three but occurred while 1540 * data transfer hasn't been verified. 1541 * 1542 * Verdicts are 1543 * 1544 * NCQ_OFF : Turn off NCQ. 1545 * 1546 * SPEED_DOWN : Speed down transfer speed but don't fall back 1547 * to PIO. 1548 * 1549 * FALLBACK_TO_PIO : Fall back to PIO. 1550 * 1551 * Even if multiple verdicts are returned, only one action is 1552 * taken per error. An action triggered by non-DUBIOUS errors 1553 * clears ering, while one triggered by DUBIOUS_* errors doesn't. 1554 * This is to expedite speed down decisions right after device is 1555 * initially configured. 1556 * 1557 * The followings are speed down rules. #1 and #2 deal with 1558 * DUBIOUS errors. 1559 * 1560 * 1. If more than one DUBIOUS_ATA_BUS or DUBIOUS_TOUT_HSM errors 1561 * occurred during last 5 mins, SPEED_DOWN and FALLBACK_TO_PIO. 1562 * 1563 * 2. If more than one DUBIOUS_TOUT_HSM or DUBIOUS_UNK_DEV errors 1564 * occurred during last 5 mins, NCQ_OFF. 1565 * 1566 * 3. If more than 8 ATA_BUS, TOUT_HSM or UNK_DEV errors 1567 * ocurred during last 5 mins, FALLBACK_TO_PIO 1568 * 1569 * 4. If more than 3 TOUT_HSM or UNK_DEV errors occurred 1570 * during last 10 mins, NCQ_OFF. 1571 * 1572 * 5. If more than 3 ATA_BUS or TOUT_HSM errors, or more than 6 1573 * UNK_DEV errors occurred during last 10 mins, SPEED_DOWN. 1574 * 1575 * LOCKING: 1576 * Inherited from caller. 1577 * 1578 * RETURNS: 1579 * OR of ATA_EH_SPDN_* flags. 1580 */ 1581 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1582 { 1583 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1584 u64 j64 = get_jiffies_64(); 1585 struct speed_down_verdict_arg arg; 1586 unsigned int verdict = 0; 1587 1588 /* scan past 5 mins of error history */ 1589 memset(&arg, 0, sizeof(arg)); 1590 arg.since = j64 - min(j64, j5mins); 1591 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1592 1593 if (arg.nr_errors[ATA_ECAT_DUBIOUS_ATA_BUS] + 1594 arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] > 1) 1595 verdict |= ATA_EH_SPDN_SPEED_DOWN | 1596 ATA_EH_SPDN_FALLBACK_TO_PIO | ATA_EH_SPDN_KEEP_ERRORS; 1597 1598 if (arg.nr_errors[ATA_ECAT_DUBIOUS_TOUT_HSM] + 1599 arg.nr_errors[ATA_ECAT_DUBIOUS_UNK_DEV] > 1) 1600 verdict |= ATA_EH_SPDN_NCQ_OFF | ATA_EH_SPDN_KEEP_ERRORS; 1601 1602 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1603 arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1604 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1605 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1606 1607 /* scan past 10 mins of error history */ 1608 memset(&arg, 0, sizeof(arg)); 1609 arg.since = j64 - min(j64, j10mins); 1610 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1611 1612 if (arg.nr_errors[ATA_ECAT_TOUT_HSM] + 1613 arg.nr_errors[ATA_ECAT_UNK_DEV] > 3) 1614 verdict |= ATA_EH_SPDN_NCQ_OFF; 1615 1616 if (arg.nr_errors[ATA_ECAT_ATA_BUS] + 1617 arg.nr_errors[ATA_ECAT_TOUT_HSM] > 3 || 1618 arg.nr_errors[ATA_ECAT_UNK_DEV] > 6) 1619 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1620 1621 return verdict; 1622 } 1623 1624 /** 1625 * ata_eh_speed_down - record error and speed down if necessary 1626 * @dev: Failed device 1627 * @eflags: mask of ATA_EFLAG_* flags 1628 * @err_mask: err_mask of the error 1629 * 1630 * Record error and examine error history to determine whether 1631 * adjusting transmission speed is necessary. It also sets 1632 * transmission limits appropriately if such adjustment is 1633 * necessary. 1634 * 1635 * LOCKING: 1636 * Kernel thread context (may sleep). 1637 * 1638 * RETURNS: 1639 * Determined recovery action. 1640 */ 1641 static unsigned int ata_eh_speed_down(struct ata_device *dev, 1642 unsigned int eflags, unsigned int err_mask) 1643 { 1644 struct ata_link *link = dev->link; 1645 int xfer_ok = 0; 1646 unsigned int verdict; 1647 unsigned int action = 0; 1648 1649 /* don't bother if Cat-0 error */ 1650 if (ata_eh_categorize_error(eflags, err_mask, &xfer_ok) == 0) 1651 return 0; 1652 1653 /* record error and determine whether speed down is necessary */ 1654 ata_ering_record(&dev->ering, eflags, err_mask); 1655 verdict = ata_eh_speed_down_verdict(dev); 1656 1657 /* turn off NCQ? */ 1658 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1659 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1660 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1661 dev->flags |= ATA_DFLAG_NCQ_OFF; 1662 ata_dev_printk(dev, KERN_WARNING, 1663 "NCQ disabled due to excessive errors\n"); 1664 goto done; 1665 } 1666 1667 /* speed down? */ 1668 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1669 /* speed down SATA link speed if possible */ 1670 if (sata_down_spd_limit(link) == 0) { 1671 action |= ATA_EH_RESET; 1672 goto done; 1673 } 1674 1675 /* lower transfer mode */ 1676 if (dev->spdn_cnt < 2) { 1677 static const int dma_dnxfer_sel[] = 1678 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1679 static const int pio_dnxfer_sel[] = 1680 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1681 int sel; 1682 1683 if (dev->xfer_shift != ATA_SHIFT_PIO) 1684 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1685 else 1686 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1687 1688 dev->spdn_cnt++; 1689 1690 if (ata_down_xfermask_limit(dev, sel) == 0) { 1691 action |= ATA_EH_RESET; 1692 goto done; 1693 } 1694 } 1695 } 1696 1697 /* Fall back to PIO? Slowing down to PIO is meaningless for 1698 * SATA ATA devices. Consider it only for PATA and SATAPI. 1699 */ 1700 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1701 (link->ap->cbl != ATA_CBL_SATA || dev->class == ATA_DEV_ATAPI) && 1702 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1703 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1704 dev->spdn_cnt = 0; 1705 action |= ATA_EH_RESET; 1706 goto done; 1707 } 1708 } 1709 1710 return 0; 1711 done: 1712 /* device has been slowed down, blow error history */ 1713 if (!(verdict & ATA_EH_SPDN_KEEP_ERRORS)) 1714 ata_ering_clear(&dev->ering); 1715 return action; 1716 } 1717 1718 /** 1719 * ata_eh_link_autopsy - analyze error and determine recovery action 1720 * @link: host link to perform autopsy on 1721 * 1722 * Analyze why @link failed and determine which recovery actions 1723 * are needed. This function also sets more detailed AC_ERR_* 1724 * values and fills sense data for ATAPI CHECK SENSE. 1725 * 1726 * LOCKING: 1727 * Kernel thread context (may sleep). 1728 */ 1729 static void ata_eh_link_autopsy(struct ata_link *link) 1730 { 1731 struct ata_port *ap = link->ap; 1732 struct ata_eh_context *ehc = &link->eh_context; 1733 struct ata_device *dev; 1734 unsigned int all_err_mask = 0, eflags = 0; 1735 int tag; 1736 u32 serror; 1737 int rc; 1738 1739 DPRINTK("ENTER\n"); 1740 1741 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1742 return; 1743 1744 /* obtain and analyze SError */ 1745 rc = sata_scr_read(link, SCR_ERROR, &serror); 1746 if (rc == 0) { 1747 ehc->i.serror |= serror; 1748 ata_eh_analyze_serror(link); 1749 } else if (rc != -EOPNOTSUPP) { 1750 /* SError read failed, force reset and probing */ 1751 ehc->i.probe_mask |= ATA_ALL_DEVICES; 1752 ehc->i.action |= ATA_EH_RESET; 1753 ehc->i.err_mask |= AC_ERR_OTHER; 1754 } 1755 1756 /* analyze NCQ failure */ 1757 ata_eh_analyze_ncq_error(link); 1758 1759 /* any real error trumps AC_ERR_OTHER */ 1760 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1761 ehc->i.err_mask &= ~AC_ERR_OTHER; 1762 1763 all_err_mask |= ehc->i.err_mask; 1764 1765 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1766 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1767 1768 if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link) 1769 continue; 1770 1771 /* inherit upper level err_mask */ 1772 qc->err_mask |= ehc->i.err_mask; 1773 1774 /* analyze TF */ 1775 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1776 1777 /* DEV errors are probably spurious in case of ATA_BUS error */ 1778 if (qc->err_mask & AC_ERR_ATA_BUS) 1779 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1780 AC_ERR_INVALID); 1781 1782 /* any real error trumps unknown error */ 1783 if (qc->err_mask & ~AC_ERR_OTHER) 1784 qc->err_mask &= ~AC_ERR_OTHER; 1785 1786 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1787 if (qc->flags & ATA_QCFLAG_SENSE_VALID) 1788 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1789 1790 /* determine whether the command is worth retrying */ 1791 if (!(qc->err_mask & AC_ERR_INVALID) && 1792 ((qc->flags & ATA_QCFLAG_IO) || qc->err_mask != AC_ERR_DEV)) 1793 qc->flags |= ATA_QCFLAG_RETRY; 1794 1795 /* accumulate error info */ 1796 ehc->i.dev = qc->dev; 1797 all_err_mask |= qc->err_mask; 1798 if (qc->flags & ATA_QCFLAG_IO) 1799 eflags |= ATA_EFLAG_IS_IO; 1800 } 1801 1802 /* enforce default EH actions */ 1803 if (ap->pflags & ATA_PFLAG_FROZEN || 1804 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1805 ehc->i.action |= ATA_EH_RESET; 1806 else if (((eflags & ATA_EFLAG_IS_IO) && all_err_mask) || 1807 (!(eflags & ATA_EFLAG_IS_IO) && (all_err_mask & ~AC_ERR_DEV))) 1808 ehc->i.action |= ATA_EH_REVALIDATE; 1809 1810 /* If we have offending qcs and the associated failed device, 1811 * perform per-dev EH action only on the offending device. 1812 */ 1813 if (ehc->i.dev) { 1814 ehc->i.dev_action[ehc->i.dev->devno] |= 1815 ehc->i.action & ATA_EH_PERDEV_MASK; 1816 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1817 } 1818 1819 /* propagate timeout to host link */ 1820 if ((all_err_mask & AC_ERR_TIMEOUT) && !ata_is_host_link(link)) 1821 ap->link.eh_context.i.err_mask |= AC_ERR_TIMEOUT; 1822 1823 /* record error and consider speeding down */ 1824 dev = ehc->i.dev; 1825 if (!dev && ((ata_link_max_devices(link) == 1 && 1826 ata_dev_enabled(link->device)))) 1827 dev = link->device; 1828 1829 if (dev) { 1830 if (dev->flags & ATA_DFLAG_DUBIOUS_XFER) 1831 eflags |= ATA_EFLAG_DUBIOUS_XFER; 1832 ehc->i.action |= ata_eh_speed_down(dev, eflags, all_err_mask); 1833 } 1834 1835 DPRINTK("EXIT\n"); 1836 } 1837 1838 /** 1839 * ata_eh_autopsy - analyze error and determine recovery action 1840 * @ap: host port to perform autopsy on 1841 * 1842 * Analyze all links of @ap and determine why they failed and 1843 * which recovery actions are needed. 1844 * 1845 * LOCKING: 1846 * Kernel thread context (may sleep). 1847 */ 1848 void ata_eh_autopsy(struct ata_port *ap) 1849 { 1850 struct ata_link *link; 1851 1852 ata_port_for_each_link(link, ap) 1853 ata_eh_link_autopsy(link); 1854 1855 /* Autopsy of fanout ports can affect host link autopsy. 1856 * Perform host link autopsy last. 1857 */ 1858 if (sata_pmp_attached(ap)) 1859 ata_eh_link_autopsy(&ap->link); 1860 } 1861 1862 /** 1863 * ata_eh_link_report - report error handling to user 1864 * @link: ATA link EH is going on 1865 * 1866 * Report EH to user. 1867 * 1868 * LOCKING: 1869 * None. 1870 */ 1871 static void ata_eh_link_report(struct ata_link *link) 1872 { 1873 struct ata_port *ap = link->ap; 1874 struct ata_eh_context *ehc = &link->eh_context; 1875 const char *frozen, *desc; 1876 char tries_buf[6]; 1877 int tag, nr_failed = 0; 1878 1879 if (ehc->i.flags & ATA_EHI_QUIET) 1880 return; 1881 1882 desc = NULL; 1883 if (ehc->i.desc[0] != '\0') 1884 desc = ehc->i.desc; 1885 1886 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1887 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1888 1889 if (!(qc->flags & ATA_QCFLAG_FAILED) || qc->dev->link != link || 1890 ((qc->flags & ATA_QCFLAG_QUIET) && 1891 qc->err_mask == AC_ERR_DEV)) 1892 continue; 1893 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1894 continue; 1895 1896 nr_failed++; 1897 } 1898 1899 if (!nr_failed && !ehc->i.err_mask) 1900 return; 1901 1902 frozen = ""; 1903 if (ap->pflags & ATA_PFLAG_FROZEN) 1904 frozen = " frozen"; 1905 1906 memset(tries_buf, 0, sizeof(tries_buf)); 1907 if (ap->eh_tries < ATA_EH_MAX_TRIES) 1908 snprintf(tries_buf, sizeof(tries_buf) - 1, " t%d", 1909 ap->eh_tries); 1910 1911 if (ehc->i.dev) { 1912 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1913 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 1914 ehc->i.err_mask, link->sactive, ehc->i.serror, 1915 ehc->i.action, frozen, tries_buf); 1916 if (desc) 1917 ata_dev_printk(ehc->i.dev, KERN_ERR, "%s\n", desc); 1918 } else { 1919 ata_link_printk(link, KERN_ERR, "exception Emask 0x%x " 1920 "SAct 0x%x SErr 0x%x action 0x%x%s%s\n", 1921 ehc->i.err_mask, link->sactive, ehc->i.serror, 1922 ehc->i.action, frozen, tries_buf); 1923 if (desc) 1924 ata_link_printk(link, KERN_ERR, "%s\n", desc); 1925 } 1926 1927 if (ehc->i.serror) 1928 ata_port_printk(ap, KERN_ERR, 1929 "SError: { %s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s}\n", 1930 ehc->i.serror & SERR_DATA_RECOVERED ? "RecovData " : "", 1931 ehc->i.serror & SERR_COMM_RECOVERED ? "RecovComm " : "", 1932 ehc->i.serror & SERR_DATA ? "UnrecovData " : "", 1933 ehc->i.serror & SERR_PERSISTENT ? "Persist " : "", 1934 ehc->i.serror & SERR_PROTOCOL ? "Proto " : "", 1935 ehc->i.serror & SERR_INTERNAL ? "HostInt " : "", 1936 ehc->i.serror & SERR_PHYRDY_CHG ? "PHYRdyChg " : "", 1937 ehc->i.serror & SERR_PHY_INT_ERR ? "PHYInt " : "", 1938 ehc->i.serror & SERR_COMM_WAKE ? "CommWake " : "", 1939 ehc->i.serror & SERR_10B_8B_ERR ? "10B8B " : "", 1940 ehc->i.serror & SERR_DISPARITY ? "Dispar " : "", 1941 ehc->i.serror & SERR_CRC ? "BadCRC " : "", 1942 ehc->i.serror & SERR_HANDSHAKE ? "Handshk " : "", 1943 ehc->i.serror & SERR_LINK_SEQ_ERR ? "LinkSeq " : "", 1944 ehc->i.serror & SERR_TRANS_ST_ERROR ? "TrStaTrns " : "", 1945 ehc->i.serror & SERR_UNRECOG_FIS ? "UnrecFIS " : "", 1946 ehc->i.serror & SERR_DEV_XCHG ? "DevExch " : ""); 1947 1948 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1949 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1950 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1951 const u8 *cdb = qc->cdb; 1952 char data_buf[20] = ""; 1953 char cdb_buf[70] = ""; 1954 1955 if (!(qc->flags & ATA_QCFLAG_FAILED) || 1956 qc->dev->link != link || !qc->err_mask) 1957 continue; 1958 1959 if (qc->dma_dir != DMA_NONE) { 1960 static const char *dma_str[] = { 1961 [DMA_BIDIRECTIONAL] = "bidi", 1962 [DMA_TO_DEVICE] = "out", 1963 [DMA_FROM_DEVICE] = "in", 1964 }; 1965 static const char *prot_str[] = { 1966 [ATA_PROT_PIO] = "pio", 1967 [ATA_PROT_DMA] = "dma", 1968 [ATA_PROT_NCQ] = "ncq", 1969 [ATAPI_PROT_PIO] = "pio", 1970 [ATAPI_PROT_DMA] = "dma", 1971 }; 1972 1973 snprintf(data_buf, sizeof(data_buf), " %s %u %s", 1974 prot_str[qc->tf.protocol], qc->nbytes, 1975 dma_str[qc->dma_dir]); 1976 } 1977 1978 if (ata_is_atapi(qc->tf.protocol)) 1979 snprintf(cdb_buf, sizeof(cdb_buf), 1980 "cdb %02x %02x %02x %02x %02x %02x %02x %02x " 1981 "%02x %02x %02x %02x %02x %02x %02x %02x\n ", 1982 cdb[0], cdb[1], cdb[2], cdb[3], 1983 cdb[4], cdb[5], cdb[6], cdb[7], 1984 cdb[8], cdb[9], cdb[10], cdb[11], 1985 cdb[12], cdb[13], cdb[14], cdb[15]); 1986 1987 ata_dev_printk(qc->dev, KERN_ERR, 1988 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1989 "tag %d%s\n %s" 1990 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1991 "Emask 0x%x (%s)%s\n", 1992 cmd->command, cmd->feature, cmd->nsect, 1993 cmd->lbal, cmd->lbam, cmd->lbah, 1994 cmd->hob_feature, cmd->hob_nsect, 1995 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 1996 cmd->device, qc->tag, data_buf, cdb_buf, 1997 res->command, res->feature, res->nsect, 1998 res->lbal, res->lbam, res->lbah, 1999 res->hob_feature, res->hob_nsect, 2000 res->hob_lbal, res->hob_lbam, res->hob_lbah, 2001 res->device, qc->err_mask, ata_err_string(qc->err_mask), 2002 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 2003 2004 if (res->command & (ATA_BUSY | ATA_DRDY | ATA_DF | ATA_DRQ | 2005 ATA_ERR)) { 2006 if (res->command & ATA_BUSY) 2007 ata_dev_printk(qc->dev, KERN_ERR, 2008 "status: { Busy }\n"); 2009 else 2010 ata_dev_printk(qc->dev, KERN_ERR, 2011 "status: { %s%s%s%s}\n", 2012 res->command & ATA_DRDY ? "DRDY " : "", 2013 res->command & ATA_DF ? "DF " : "", 2014 res->command & ATA_DRQ ? "DRQ " : "", 2015 res->command & ATA_ERR ? "ERR " : ""); 2016 } 2017 2018 if (cmd->command != ATA_CMD_PACKET && 2019 (res->feature & (ATA_ICRC | ATA_UNC | ATA_IDNF | 2020 ATA_ABORTED))) 2021 ata_dev_printk(qc->dev, KERN_ERR, 2022 "error: { %s%s%s%s}\n", 2023 res->feature & ATA_ICRC ? "ICRC " : "", 2024 res->feature & ATA_UNC ? "UNC " : "", 2025 res->feature & ATA_IDNF ? "IDNF " : "", 2026 res->feature & ATA_ABORTED ? "ABRT " : ""); 2027 } 2028 } 2029 2030 /** 2031 * ata_eh_report - report error handling to user 2032 * @ap: ATA port to report EH about 2033 * 2034 * Report EH to user. 2035 * 2036 * LOCKING: 2037 * None. 2038 */ 2039 void ata_eh_report(struct ata_port *ap) 2040 { 2041 struct ata_link *link; 2042 2043 __ata_port_for_each_link(link, ap) 2044 ata_eh_link_report(link); 2045 } 2046 2047 static int ata_do_reset(struct ata_link *link, ata_reset_fn_t reset, 2048 unsigned int *classes, unsigned long deadline) 2049 { 2050 struct ata_device *dev; 2051 2052 ata_link_for_each_dev(dev, link) 2053 classes[dev->devno] = ATA_DEV_UNKNOWN; 2054 2055 return reset(link, classes, deadline); 2056 } 2057 2058 static int ata_eh_followup_srst_needed(struct ata_link *link, 2059 int rc, int classify, 2060 const unsigned int *classes) 2061 { 2062 if ((link->flags & ATA_LFLAG_NO_SRST) || ata_link_offline(link)) 2063 return 0; 2064 if (rc == -EAGAIN) { 2065 if (classify) 2066 return 1; 2067 rc = 0; 2068 } 2069 if (rc != 0) 2070 return 0; 2071 if (sata_pmp_supported(link->ap) && ata_is_host_link(link)) 2072 return 1; 2073 return 0; 2074 } 2075 2076 int ata_eh_reset(struct ata_link *link, int classify, 2077 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 2078 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 2079 { 2080 struct ata_port *ap = link->ap; 2081 struct ata_eh_context *ehc = &link->eh_context; 2082 unsigned int *classes = ehc->classes; 2083 unsigned int lflags = link->flags; 2084 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 2085 int max_tries = 0, try = 0; 2086 struct ata_device *dev; 2087 unsigned long deadline, now; 2088 ata_reset_fn_t reset; 2089 unsigned long flags; 2090 u32 sstatus; 2091 int nr_known, rc; 2092 2093 /* 2094 * Prepare to reset 2095 */ 2096 while (ata_eh_reset_timeouts[max_tries] != ULONG_MAX) 2097 max_tries++; 2098 2099 now = jiffies; 2100 deadline = ata_deadline(ehc->last_reset, ATA_EH_RESET_COOL_DOWN); 2101 if (time_before(now, deadline)) 2102 schedule_timeout_uninterruptible(deadline - now); 2103 2104 spin_lock_irqsave(ap->lock, flags); 2105 ap->pflags |= ATA_PFLAG_RESETTING; 2106 spin_unlock_irqrestore(ap->lock, flags); 2107 2108 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2109 ehc->last_reset = jiffies; 2110 2111 ata_link_for_each_dev(dev, link) { 2112 /* If we issue an SRST then an ATA drive (not ATAPI) 2113 * may change configuration and be in PIO0 timing. If 2114 * we do a hard reset (or are coming from power on) 2115 * this is true for ATA or ATAPI. Until we've set a 2116 * suitable controller mode we should not touch the 2117 * bus as we may be talking too fast. 2118 */ 2119 dev->pio_mode = XFER_PIO_0; 2120 2121 /* If the controller has a pio mode setup function 2122 * then use it to set the chipset to rights. Don't 2123 * touch the DMA setup as that will be dealt with when 2124 * configuring devices. 2125 */ 2126 if (ap->ops->set_piomode) 2127 ap->ops->set_piomode(ap, dev); 2128 } 2129 2130 /* prefer hardreset */ 2131 reset = NULL; 2132 ehc->i.action &= ~ATA_EH_RESET; 2133 if (hardreset) { 2134 reset = hardreset; 2135 ehc->i.action = ATA_EH_HARDRESET; 2136 } else if (softreset) { 2137 reset = softreset; 2138 ehc->i.action = ATA_EH_SOFTRESET; 2139 } 2140 2141 if (prereset) { 2142 rc = prereset(link, 2143 ata_deadline(jiffies, ATA_EH_PRERESET_TIMEOUT)); 2144 if (rc) { 2145 if (rc == -ENOENT) { 2146 ata_link_printk(link, KERN_DEBUG, 2147 "port disabled. ignoring.\n"); 2148 ehc->i.action &= ~ATA_EH_RESET; 2149 2150 ata_link_for_each_dev(dev, link) 2151 classes[dev->devno] = ATA_DEV_NONE; 2152 2153 rc = 0; 2154 } else 2155 ata_link_printk(link, KERN_ERR, 2156 "prereset failed (errno=%d)\n", rc); 2157 goto out; 2158 } 2159 2160 /* prereset() might have cleared ATA_EH_RESET. If so, 2161 * bang classes and return. 2162 */ 2163 if (reset && !(ehc->i.action & ATA_EH_RESET)) { 2164 ata_link_for_each_dev(dev, link) 2165 classes[dev->devno] = ATA_DEV_NONE; 2166 rc = 0; 2167 goto out; 2168 } 2169 } 2170 2171 retry: 2172 /* 2173 * Perform reset 2174 */ 2175 ehc->last_reset = jiffies; 2176 if (ata_is_host_link(link)) 2177 ata_eh_freeze_port(ap); 2178 2179 deadline = ata_deadline(jiffies, ata_eh_reset_timeouts[try++]); 2180 2181 if (reset) { 2182 if (verbose) 2183 ata_link_printk(link, KERN_INFO, "%s resetting link\n", 2184 reset == softreset ? "soft" : "hard"); 2185 2186 /* mark that this EH session started with reset */ 2187 if (reset == hardreset) 2188 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 2189 else 2190 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 2191 2192 rc = ata_do_reset(link, reset, classes, deadline); 2193 2194 if (reset == hardreset && 2195 ata_eh_followup_srst_needed(link, rc, classify, classes)) { 2196 /* okay, let's do follow-up softreset */ 2197 reset = softreset; 2198 2199 if (!reset) { 2200 ata_link_printk(link, KERN_ERR, 2201 "follow-up softreset required " 2202 "but no softreset avaliable\n"); 2203 rc = -EINVAL; 2204 goto fail; 2205 } 2206 2207 ata_eh_about_to_do(link, NULL, ATA_EH_RESET); 2208 rc = ata_do_reset(link, reset, classes, deadline); 2209 } 2210 2211 /* -EAGAIN can happen if we skipped followup SRST */ 2212 if (rc && rc != -EAGAIN) 2213 goto fail; 2214 } else { 2215 if (verbose) 2216 ata_link_printk(link, KERN_INFO, "no reset method " 2217 "available, skipping reset\n"); 2218 if (!(lflags & ATA_LFLAG_ASSUME_CLASS)) 2219 lflags |= ATA_LFLAG_ASSUME_ATA; 2220 } 2221 2222 /* 2223 * Post-reset processing 2224 */ 2225 ata_link_for_each_dev(dev, link) { 2226 /* After the reset, the device state is PIO 0 and the 2227 * controller state is undefined. Reset also wakes up 2228 * drives from sleeping mode. 2229 */ 2230 dev->pio_mode = XFER_PIO_0; 2231 dev->flags &= ~ATA_DFLAG_SLEEPING; 2232 2233 if (ata_link_offline(link)) 2234 continue; 2235 2236 /* apply class override */ 2237 if (lflags & ATA_LFLAG_ASSUME_ATA) 2238 classes[dev->devno] = ATA_DEV_ATA; 2239 else if (lflags & ATA_LFLAG_ASSUME_SEMB) 2240 classes[dev->devno] = ATA_DEV_SEMB_UNSUP; /* not yet */ 2241 } 2242 2243 /* record current link speed */ 2244 if (sata_scr_read(link, SCR_STATUS, &sstatus) == 0) 2245 link->sata_spd = (sstatus >> 4) & 0xf; 2246 2247 /* thaw the port */ 2248 if (ata_is_host_link(link)) 2249 ata_eh_thaw_port(ap); 2250 2251 /* postreset() should clear hardware SError. Although SError 2252 * is cleared during link resume, clearing SError here is 2253 * necessary as some PHYs raise hotplug events after SRST. 2254 * This introduces race condition where hotplug occurs between 2255 * reset and here. This race is mediated by cross checking 2256 * link onlineness and classification result later. 2257 */ 2258 if (postreset) 2259 postreset(link, classes); 2260 2261 /* clear cached SError */ 2262 spin_lock_irqsave(link->ap->lock, flags); 2263 link->eh_info.serror = 0; 2264 spin_unlock_irqrestore(link->ap->lock, flags); 2265 2266 /* Make sure onlineness and classification result correspond. 2267 * Hotplug could have happened during reset and some 2268 * controllers fail to wait while a drive is spinning up after 2269 * being hotplugged causing misdetection. By cross checking 2270 * link onlineness and classification result, those conditions 2271 * can be reliably detected and retried. 2272 */ 2273 nr_known = 0; 2274 ata_link_for_each_dev(dev, link) { 2275 /* convert all ATA_DEV_UNKNOWN to ATA_DEV_NONE */ 2276 if (classes[dev->devno] == ATA_DEV_UNKNOWN) 2277 classes[dev->devno] = ATA_DEV_NONE; 2278 else 2279 nr_known++; 2280 } 2281 2282 if (classify && !nr_known && ata_link_online(link)) { 2283 if (try < max_tries) { 2284 ata_link_printk(link, KERN_WARNING, "link online but " 2285 "device misclassified, retrying\n"); 2286 rc = -EAGAIN; 2287 goto fail; 2288 } 2289 ata_link_printk(link, KERN_WARNING, 2290 "link online but device misclassified, " 2291 "device detection might fail\n"); 2292 } 2293 2294 /* reset successful, schedule revalidation */ 2295 ata_eh_done(link, NULL, ATA_EH_RESET); 2296 ehc->last_reset = jiffies; 2297 ehc->i.action |= ATA_EH_REVALIDATE; 2298 2299 rc = 0; 2300 out: 2301 /* clear hotplug flag */ 2302 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 2303 2304 spin_lock_irqsave(ap->lock, flags); 2305 ap->pflags &= ~ATA_PFLAG_RESETTING; 2306 spin_unlock_irqrestore(ap->lock, flags); 2307 2308 return rc; 2309 2310 fail: 2311 /* if SCR isn't accessible on a fan-out port, PMP needs to be reset */ 2312 if (!ata_is_host_link(link) && 2313 sata_scr_read(link, SCR_STATUS, &sstatus)) 2314 rc = -ERESTART; 2315 2316 if (rc == -ERESTART || try >= max_tries) 2317 goto out; 2318 2319 now = jiffies; 2320 if (time_before(now, deadline)) { 2321 unsigned long delta = deadline - now; 2322 2323 ata_link_printk(link, KERN_WARNING, 2324 "reset failed (errno=%d), retrying in %u secs\n", 2325 rc, DIV_ROUND_UP(jiffies_to_msecs(delta), 1000)); 2326 2327 while (delta) 2328 delta = schedule_timeout_uninterruptible(delta); 2329 } 2330 2331 if (rc == -EPIPE || try == max_tries - 1) 2332 sata_down_spd_limit(link); 2333 if (hardreset) 2334 reset = hardreset; 2335 goto retry; 2336 } 2337 2338 static int ata_eh_revalidate_and_attach(struct ata_link *link, 2339 struct ata_device **r_failed_dev) 2340 { 2341 struct ata_port *ap = link->ap; 2342 struct ata_eh_context *ehc = &link->eh_context; 2343 struct ata_device *dev; 2344 unsigned int new_mask = 0; 2345 unsigned long flags; 2346 int rc = 0; 2347 2348 DPRINTK("ENTER\n"); 2349 2350 /* For PATA drive side cable detection to work, IDENTIFY must 2351 * be done backwards such that PDIAG- is released by the slave 2352 * device before the master device is identified. 2353 */ 2354 ata_link_for_each_dev_reverse(dev, link) { 2355 unsigned int action = ata_eh_dev_action(dev); 2356 unsigned int readid_flags = 0; 2357 2358 if (ehc->i.flags & ATA_EHI_DID_RESET) 2359 readid_flags |= ATA_READID_POSTRESET; 2360 2361 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 2362 WARN_ON(dev->class == ATA_DEV_PMP); 2363 2364 if (ata_link_offline(link)) { 2365 rc = -EIO; 2366 goto err; 2367 } 2368 2369 ata_eh_about_to_do(link, dev, ATA_EH_REVALIDATE); 2370 rc = ata_dev_revalidate(dev, ehc->classes[dev->devno], 2371 readid_flags); 2372 if (rc) 2373 goto err; 2374 2375 ata_eh_done(link, dev, ATA_EH_REVALIDATE); 2376 2377 /* Configuration may have changed, reconfigure 2378 * transfer mode. 2379 */ 2380 ehc->i.flags |= ATA_EHI_SETMODE; 2381 2382 /* schedule the scsi_rescan_device() here */ 2383 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 2384 } else if (dev->class == ATA_DEV_UNKNOWN && 2385 ehc->tries[dev->devno] && 2386 ata_class_enabled(ehc->classes[dev->devno])) { 2387 dev->class = ehc->classes[dev->devno]; 2388 2389 if (dev->class == ATA_DEV_PMP) 2390 rc = sata_pmp_attach(dev); 2391 else 2392 rc = ata_dev_read_id(dev, &dev->class, 2393 readid_flags, dev->id); 2394 switch (rc) { 2395 case 0: 2396 new_mask |= 1 << dev->devno; 2397 break; 2398 case -ENOENT: 2399 /* IDENTIFY was issued to non-existent 2400 * device. No need to reset. Just 2401 * thaw and kill the device. 2402 */ 2403 ata_eh_thaw_port(ap); 2404 dev->class = ATA_DEV_UNKNOWN; 2405 break; 2406 default: 2407 dev->class = ATA_DEV_UNKNOWN; 2408 goto err; 2409 } 2410 } 2411 } 2412 2413 /* PDIAG- should have been released, ask cable type if post-reset */ 2414 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ata_is_host_link(link)) { 2415 if (ap->ops->cable_detect) 2416 ap->cbl = ap->ops->cable_detect(ap); 2417 ata_force_cbl(ap); 2418 } 2419 2420 /* Configure new devices forward such that user doesn't see 2421 * device detection messages backwards. 2422 */ 2423 ata_link_for_each_dev(dev, link) { 2424 if (!(new_mask & (1 << dev->devno)) || 2425 dev->class == ATA_DEV_PMP) 2426 continue; 2427 2428 ehc->i.flags |= ATA_EHI_PRINTINFO; 2429 rc = ata_dev_configure(dev); 2430 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 2431 if (rc) 2432 goto err; 2433 2434 spin_lock_irqsave(ap->lock, flags); 2435 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 2436 spin_unlock_irqrestore(ap->lock, flags); 2437 2438 /* new device discovered, configure xfermode */ 2439 ehc->i.flags |= ATA_EHI_SETMODE; 2440 } 2441 2442 return 0; 2443 2444 err: 2445 *r_failed_dev = dev; 2446 DPRINTK("EXIT rc=%d\n", rc); 2447 return rc; 2448 } 2449 2450 /** 2451 * ata_set_mode - Program timings and issue SET FEATURES - XFER 2452 * @link: link on which timings will be programmed 2453 * @r_failed_dev: out paramter for failed device 2454 * 2455 * Set ATA device disk transfer mode (PIO3, UDMA6, etc.). If 2456 * ata_set_mode() fails, pointer to the failing device is 2457 * returned in @r_failed_dev. 2458 * 2459 * LOCKING: 2460 * PCI/etc. bus probe sem. 2461 * 2462 * RETURNS: 2463 * 0 on success, negative errno otherwise 2464 */ 2465 int ata_set_mode(struct ata_link *link, struct ata_device **r_failed_dev) 2466 { 2467 struct ata_port *ap = link->ap; 2468 struct ata_device *dev; 2469 int rc; 2470 2471 /* if data transfer is verified, clear DUBIOUS_XFER on ering top */ 2472 ata_link_for_each_dev(dev, link) { 2473 if (!(dev->flags & ATA_DFLAG_DUBIOUS_XFER)) { 2474 struct ata_ering_entry *ent; 2475 2476 ent = ata_ering_top(&dev->ering); 2477 if (ent) 2478 ent->eflags &= ~ATA_EFLAG_DUBIOUS_XFER; 2479 } 2480 } 2481 2482 /* has private set_mode? */ 2483 if (ap->ops->set_mode) 2484 rc = ap->ops->set_mode(link, r_failed_dev); 2485 else 2486 rc = ata_do_set_mode(link, r_failed_dev); 2487 2488 /* if transfer mode has changed, set DUBIOUS_XFER on device */ 2489 ata_link_for_each_dev(dev, link) { 2490 struct ata_eh_context *ehc = &link->eh_context; 2491 u8 saved_xfer_mode = ehc->saved_xfer_mode[dev->devno]; 2492 u8 saved_ncq = !!(ehc->saved_ncq_enabled & (1 << dev->devno)); 2493 2494 if (dev->xfer_mode != saved_xfer_mode || 2495 ata_ncq_enabled(dev) != saved_ncq) 2496 dev->flags |= ATA_DFLAG_DUBIOUS_XFER; 2497 } 2498 2499 return rc; 2500 } 2501 2502 static int ata_link_nr_enabled(struct ata_link *link) 2503 { 2504 struct ata_device *dev; 2505 int cnt = 0; 2506 2507 ata_link_for_each_dev(dev, link) 2508 if (ata_dev_enabled(dev)) 2509 cnt++; 2510 return cnt; 2511 } 2512 2513 static int ata_link_nr_vacant(struct ata_link *link) 2514 { 2515 struct ata_device *dev; 2516 int cnt = 0; 2517 2518 ata_link_for_each_dev(dev, link) 2519 if (dev->class == ATA_DEV_UNKNOWN) 2520 cnt++; 2521 return cnt; 2522 } 2523 2524 static int ata_eh_skip_recovery(struct ata_link *link) 2525 { 2526 struct ata_port *ap = link->ap; 2527 struct ata_eh_context *ehc = &link->eh_context; 2528 struct ata_device *dev; 2529 2530 /* skip disabled links */ 2531 if (link->flags & ATA_LFLAG_DISABLED) 2532 return 1; 2533 2534 /* thaw frozen port and recover failed devices */ 2535 if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link)) 2536 return 0; 2537 2538 /* reset at least once if reset is requested */ 2539 if ((ehc->i.action & ATA_EH_RESET) && 2540 !(ehc->i.flags & ATA_EHI_DID_RESET)) 2541 return 0; 2542 2543 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2544 ata_link_for_each_dev(dev, link) { 2545 if (dev->class == ATA_DEV_UNKNOWN && 2546 ehc->classes[dev->devno] != ATA_DEV_NONE) 2547 return 0; 2548 } 2549 2550 return 1; 2551 } 2552 2553 static int ata_eh_schedule_probe(struct ata_device *dev) 2554 { 2555 struct ata_eh_context *ehc = &dev->link->eh_context; 2556 2557 if (!(ehc->i.probe_mask & (1 << dev->devno)) || 2558 (ehc->did_probe_mask & (1 << dev->devno))) 2559 return 0; 2560 2561 ata_eh_detach_dev(dev); 2562 ata_dev_init(dev); 2563 ehc->did_probe_mask |= (1 << dev->devno); 2564 ehc->i.action |= ATA_EH_RESET; 2565 ehc->saved_xfer_mode[dev->devno] = 0; 2566 ehc->saved_ncq_enabled &= ~(1 << dev->devno); 2567 2568 return 1; 2569 } 2570 2571 static int ata_eh_handle_dev_fail(struct ata_device *dev, int err) 2572 { 2573 struct ata_eh_context *ehc = &dev->link->eh_context; 2574 2575 ehc->tries[dev->devno]--; 2576 2577 switch (err) { 2578 case -ENODEV: 2579 /* device missing or wrong IDENTIFY data, schedule probing */ 2580 ehc->i.probe_mask |= (1 << dev->devno); 2581 case -EINVAL: 2582 /* give it just one more chance */ 2583 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2584 case -EIO: 2585 if (ehc->tries[dev->devno] == 1 && dev->pio_mode > XFER_PIO_0) { 2586 /* This is the last chance, better to slow 2587 * down than lose it. 2588 */ 2589 sata_down_spd_limit(dev->link); 2590 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2591 } 2592 } 2593 2594 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2595 /* disable device if it has used up all its chances */ 2596 ata_dev_disable(dev); 2597 2598 /* detach if offline */ 2599 if (ata_link_offline(dev->link)) 2600 ata_eh_detach_dev(dev); 2601 2602 /* schedule probe if necessary */ 2603 if (ata_eh_schedule_probe(dev)) 2604 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2605 2606 return 1; 2607 } else { 2608 ehc->i.action |= ATA_EH_RESET; 2609 return 0; 2610 } 2611 } 2612 2613 /** 2614 * ata_eh_recover - recover host port after error 2615 * @ap: host port to recover 2616 * @prereset: prereset method (can be NULL) 2617 * @softreset: softreset method (can be NULL) 2618 * @hardreset: hardreset method (can be NULL) 2619 * @postreset: postreset method (can be NULL) 2620 * @r_failed_link: out parameter for failed link 2621 * 2622 * This is the alpha and omega, eum and yang, heart and soul of 2623 * libata exception handling. On entry, actions required to 2624 * recover each link and hotplug requests are recorded in the 2625 * link's eh_context. This function executes all the operations 2626 * with appropriate retrials and fallbacks to resurrect failed 2627 * devices, detach goners and greet newcomers. 2628 * 2629 * LOCKING: 2630 * Kernel thread context (may sleep). 2631 * 2632 * RETURNS: 2633 * 0 on success, -errno on failure. 2634 */ 2635 int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 2636 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2637 ata_postreset_fn_t postreset, 2638 struct ata_link **r_failed_link) 2639 { 2640 struct ata_link *link; 2641 struct ata_device *dev; 2642 int nr_failed_devs; 2643 int rc; 2644 unsigned long flags; 2645 2646 DPRINTK("ENTER\n"); 2647 2648 /* prep for recovery */ 2649 ata_port_for_each_link(link, ap) { 2650 struct ata_eh_context *ehc = &link->eh_context; 2651 2652 /* re-enable link? */ 2653 if (ehc->i.action & ATA_EH_ENABLE_LINK) { 2654 ata_eh_about_to_do(link, NULL, ATA_EH_ENABLE_LINK); 2655 spin_lock_irqsave(ap->lock, flags); 2656 link->flags &= ~ATA_LFLAG_DISABLED; 2657 spin_unlock_irqrestore(ap->lock, flags); 2658 ata_eh_done(link, NULL, ATA_EH_ENABLE_LINK); 2659 } 2660 2661 ata_link_for_each_dev(dev, link) { 2662 if (link->flags & ATA_LFLAG_NO_RETRY) 2663 ehc->tries[dev->devno] = 1; 2664 else 2665 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2666 2667 /* collect port action mask recorded in dev actions */ 2668 ehc->i.action |= ehc->i.dev_action[dev->devno] & 2669 ~ATA_EH_PERDEV_MASK; 2670 ehc->i.dev_action[dev->devno] &= ATA_EH_PERDEV_MASK; 2671 2672 /* process hotplug request */ 2673 if (dev->flags & ATA_DFLAG_DETACH) 2674 ata_eh_detach_dev(dev); 2675 2676 /* schedule probe if necessary */ 2677 if (!ata_dev_enabled(dev)) 2678 ata_eh_schedule_probe(dev); 2679 } 2680 } 2681 2682 retry: 2683 rc = 0; 2684 nr_failed_devs = 0; 2685 2686 /* if UNLOADING, finish immediately */ 2687 if (ap->pflags & ATA_PFLAG_UNLOADING) 2688 goto out; 2689 2690 /* prep for EH */ 2691 ata_port_for_each_link(link, ap) { 2692 struct ata_eh_context *ehc = &link->eh_context; 2693 2694 /* skip EH if possible. */ 2695 if (ata_eh_skip_recovery(link)) 2696 ehc->i.action = 0; 2697 2698 ata_link_for_each_dev(dev, link) 2699 ehc->classes[dev->devno] = ATA_DEV_UNKNOWN; 2700 } 2701 2702 /* reset */ 2703 ata_port_for_each_link(link, ap) { 2704 struct ata_eh_context *ehc = &link->eh_context; 2705 2706 if (!(ehc->i.action & ATA_EH_RESET)) 2707 continue; 2708 2709 rc = ata_eh_reset(link, ata_link_nr_vacant(link), 2710 prereset, softreset, hardreset, postreset); 2711 if (rc) { 2712 ata_link_printk(link, KERN_ERR, 2713 "reset failed, giving up\n"); 2714 goto out; 2715 } 2716 } 2717 2718 /* the rest */ 2719 ata_port_for_each_link(link, ap) { 2720 struct ata_eh_context *ehc = &link->eh_context; 2721 2722 /* revalidate existing devices and attach new ones */ 2723 rc = ata_eh_revalidate_and_attach(link, &dev); 2724 if (rc) 2725 goto dev_fail; 2726 2727 /* if PMP got attached, return, pmp EH will take care of it */ 2728 if (link->device->class == ATA_DEV_PMP) { 2729 ehc->i.action = 0; 2730 return 0; 2731 } 2732 2733 /* configure transfer mode if necessary */ 2734 if (ehc->i.flags & ATA_EHI_SETMODE) { 2735 rc = ata_set_mode(link, &dev); 2736 if (rc) 2737 goto dev_fail; 2738 ehc->i.flags &= ~ATA_EHI_SETMODE; 2739 } 2740 2741 if (ehc->i.action & ATA_EH_LPM) 2742 ata_link_for_each_dev(dev, link) 2743 ata_dev_enable_pm(dev, ap->pm_policy); 2744 2745 /* this link is okay now */ 2746 ehc->i.flags = 0; 2747 continue; 2748 2749 dev_fail: 2750 nr_failed_devs++; 2751 ata_eh_handle_dev_fail(dev, rc); 2752 2753 if (ap->pflags & ATA_PFLAG_FROZEN) { 2754 /* PMP reset requires working host port. 2755 * Can't retry if it's frozen. 2756 */ 2757 if (sata_pmp_attached(ap)) 2758 goto out; 2759 break; 2760 } 2761 } 2762 2763 if (nr_failed_devs) 2764 goto retry; 2765 2766 out: 2767 if (rc && r_failed_link) 2768 *r_failed_link = link; 2769 2770 DPRINTK("EXIT, rc=%d\n", rc); 2771 return rc; 2772 } 2773 2774 /** 2775 * ata_eh_finish - finish up EH 2776 * @ap: host port to finish EH for 2777 * 2778 * Recovery is complete. Clean up EH states and retry or finish 2779 * failed qcs. 2780 * 2781 * LOCKING: 2782 * None. 2783 */ 2784 void ata_eh_finish(struct ata_port *ap) 2785 { 2786 int tag; 2787 2788 /* retry or finish qcs */ 2789 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2790 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2791 2792 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2793 continue; 2794 2795 if (qc->err_mask) { 2796 /* FIXME: Once EH migration is complete, 2797 * generate sense data in this function, 2798 * considering both err_mask and tf. 2799 */ 2800 if (qc->flags & ATA_QCFLAG_RETRY) 2801 ata_eh_qc_retry(qc); 2802 else 2803 ata_eh_qc_complete(qc); 2804 } else { 2805 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2806 ata_eh_qc_complete(qc); 2807 } else { 2808 /* feed zero TF to sense generation */ 2809 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2810 ata_eh_qc_retry(qc); 2811 } 2812 } 2813 } 2814 2815 /* make sure nr_active_links is zero after EH */ 2816 WARN_ON(ap->nr_active_links); 2817 ap->nr_active_links = 0; 2818 } 2819 2820 /** 2821 * ata_do_eh - do standard error handling 2822 * @ap: host port to handle error for 2823 * 2824 * @prereset: prereset method (can be NULL) 2825 * @softreset: softreset method (can be NULL) 2826 * @hardreset: hardreset method (can be NULL) 2827 * @postreset: postreset method (can be NULL) 2828 * 2829 * Perform standard error handling sequence. 2830 * 2831 * LOCKING: 2832 * Kernel thread context (may sleep). 2833 */ 2834 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2835 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2836 ata_postreset_fn_t postreset) 2837 { 2838 struct ata_device *dev; 2839 int rc; 2840 2841 ata_eh_autopsy(ap); 2842 ata_eh_report(ap); 2843 2844 rc = ata_eh_recover(ap, prereset, softreset, hardreset, postreset, 2845 NULL); 2846 if (rc) { 2847 ata_link_for_each_dev(dev, &ap->link) 2848 ata_dev_disable(dev); 2849 } 2850 2851 ata_eh_finish(ap); 2852 } 2853 2854 /** 2855 * ata_std_error_handler - standard error handler 2856 * @ap: host port to handle error for 2857 * 2858 * Standard error handler 2859 * 2860 * LOCKING: 2861 * Kernel thread context (may sleep). 2862 */ 2863 void ata_std_error_handler(struct ata_port *ap) 2864 { 2865 struct ata_port_operations *ops = ap->ops; 2866 ata_reset_fn_t hardreset = ops->hardreset; 2867 2868 /* ignore built-in hardreset if SCR access is not available */ 2869 if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link)) 2870 hardreset = NULL; 2871 2872 ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset); 2873 } 2874 2875 #ifdef CONFIG_PM 2876 /** 2877 * ata_eh_handle_port_suspend - perform port suspend operation 2878 * @ap: port to suspend 2879 * 2880 * Suspend @ap. 2881 * 2882 * LOCKING: 2883 * Kernel thread context (may sleep). 2884 */ 2885 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2886 { 2887 unsigned long flags; 2888 int rc = 0; 2889 2890 /* are we suspending? */ 2891 spin_lock_irqsave(ap->lock, flags); 2892 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2893 ap->pm_mesg.event == PM_EVENT_ON) { 2894 spin_unlock_irqrestore(ap->lock, flags); 2895 return; 2896 } 2897 spin_unlock_irqrestore(ap->lock, flags); 2898 2899 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2900 2901 /* tell ACPI we're suspending */ 2902 rc = ata_acpi_on_suspend(ap); 2903 if (rc) 2904 goto out; 2905 2906 /* suspend */ 2907 ata_eh_freeze_port(ap); 2908 2909 if (ap->ops->port_suspend) 2910 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2911 2912 ata_acpi_set_state(ap, PMSG_SUSPEND); 2913 out: 2914 /* report result */ 2915 spin_lock_irqsave(ap->lock, flags); 2916 2917 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2918 if (rc == 0) 2919 ap->pflags |= ATA_PFLAG_SUSPENDED; 2920 else if (ap->pflags & ATA_PFLAG_FROZEN) 2921 ata_port_schedule_eh(ap); 2922 2923 if (ap->pm_result) { 2924 *ap->pm_result = rc; 2925 ap->pm_result = NULL; 2926 } 2927 2928 spin_unlock_irqrestore(ap->lock, flags); 2929 2930 return; 2931 } 2932 2933 /** 2934 * ata_eh_handle_port_resume - perform port resume operation 2935 * @ap: port to resume 2936 * 2937 * Resume @ap. 2938 * 2939 * LOCKING: 2940 * Kernel thread context (may sleep). 2941 */ 2942 static void ata_eh_handle_port_resume(struct ata_port *ap) 2943 { 2944 unsigned long flags; 2945 int rc = 0; 2946 2947 /* are we resuming? */ 2948 spin_lock_irqsave(ap->lock, flags); 2949 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2950 ap->pm_mesg.event != PM_EVENT_ON) { 2951 spin_unlock_irqrestore(ap->lock, flags); 2952 return; 2953 } 2954 spin_unlock_irqrestore(ap->lock, flags); 2955 2956 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 2957 2958 ata_acpi_set_state(ap, PMSG_ON); 2959 2960 if (ap->ops->port_resume) 2961 rc = ap->ops->port_resume(ap); 2962 2963 /* tell ACPI that we're resuming */ 2964 ata_acpi_on_resume(ap); 2965 2966 /* report result */ 2967 spin_lock_irqsave(ap->lock, flags); 2968 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2969 if (ap->pm_result) { 2970 *ap->pm_result = rc; 2971 ap->pm_result = NULL; 2972 } 2973 spin_unlock_irqrestore(ap->lock, flags); 2974 } 2975 #endif /* CONFIG_PM */ 2976