1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <scsi/scsi.h> 37 #include <scsi/scsi_host.h> 38 #include <scsi/scsi_eh.h> 39 #include <scsi/scsi_device.h> 40 #include <scsi/scsi_cmnd.h> 41 #include "../scsi/scsi_transport_api.h" 42 43 #include <linux/libata.h> 44 45 #include "libata.h" 46 47 enum { 48 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 49 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 50 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 51 }; 52 53 /* Waiting in ->prereset can never be reliable. It's sometimes nice 54 * to wait there but it can't be depended upon; otherwise, we wouldn't 55 * be resetting. Just give it enough time for most drives to spin up. 56 */ 57 enum { 58 ATA_EH_PRERESET_TIMEOUT = 10 * HZ, 59 }; 60 61 /* The following table determines how we sequence resets. Each entry 62 * represents timeout for that try. The first try can be soft or 63 * hardreset. All others are hardreset if available. In most cases 64 * the first reset w/ 10sec timeout should succeed. Following entries 65 * are mostly for error handling, hotplug and retarded devices. 66 */ 67 static const unsigned long ata_eh_reset_timeouts[] = { 68 10 * HZ, /* most drives spin up by 10sec */ 69 10 * HZ, /* > 99% working drives spin up before 20sec */ 70 35 * HZ, /* give > 30 secs of idleness for retarded devices */ 71 5 * HZ, /* and sweet one last chance */ 72 /* > 1 min has elapsed, give up */ 73 }; 74 75 static void __ata_port_freeze(struct ata_port *ap); 76 static void ata_eh_finish(struct ata_port *ap); 77 #ifdef CONFIG_PM 78 static void ata_eh_handle_port_suspend(struct ata_port *ap); 79 static void ata_eh_handle_port_resume(struct ata_port *ap); 80 #else /* CONFIG_PM */ 81 static void ata_eh_handle_port_suspend(struct ata_port *ap) 82 { } 83 84 static void ata_eh_handle_port_resume(struct ata_port *ap) 85 { } 86 #endif /* CONFIG_PM */ 87 88 static void ata_ering_record(struct ata_ering *ering, int is_io, 89 unsigned int err_mask) 90 { 91 struct ata_ering_entry *ent; 92 93 WARN_ON(!err_mask); 94 95 ering->cursor++; 96 ering->cursor %= ATA_ERING_SIZE; 97 98 ent = &ering->ring[ering->cursor]; 99 ent->is_io = is_io; 100 ent->err_mask = err_mask; 101 ent->timestamp = get_jiffies_64(); 102 } 103 104 static void ata_ering_clear(struct ata_ering *ering) 105 { 106 memset(ering, 0, sizeof(*ering)); 107 } 108 109 static int ata_ering_map(struct ata_ering *ering, 110 int (*map_fn)(struct ata_ering_entry *, void *), 111 void *arg) 112 { 113 int idx, rc = 0; 114 struct ata_ering_entry *ent; 115 116 idx = ering->cursor; 117 do { 118 ent = &ering->ring[idx]; 119 if (!ent->err_mask) 120 break; 121 rc = map_fn(ent, arg); 122 if (rc) 123 break; 124 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 125 } while (idx != ering->cursor); 126 127 return rc; 128 } 129 130 static unsigned int ata_eh_dev_action(struct ata_device *dev) 131 { 132 struct ata_eh_context *ehc = &dev->ap->eh_context; 133 134 return ehc->i.action | ehc->i.dev_action[dev->devno]; 135 } 136 137 static void ata_eh_clear_action(struct ata_device *dev, 138 struct ata_eh_info *ehi, unsigned int action) 139 { 140 int i; 141 142 if (!dev) { 143 ehi->action &= ~action; 144 for (i = 0; i < ATA_MAX_DEVICES; i++) 145 ehi->dev_action[i] &= ~action; 146 } else { 147 /* doesn't make sense for port-wide EH actions */ 148 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 149 150 /* break ehi->action into ehi->dev_action */ 151 if (ehi->action & action) { 152 for (i = 0; i < ATA_MAX_DEVICES; i++) 153 ehi->dev_action[i] |= ehi->action & action; 154 ehi->action &= ~action; 155 } 156 157 /* turn off the specified per-dev action */ 158 ehi->dev_action[dev->devno] &= ~action; 159 } 160 } 161 162 /** 163 * ata_scsi_timed_out - SCSI layer time out callback 164 * @cmd: timed out SCSI command 165 * 166 * Handles SCSI layer timeout. We race with normal completion of 167 * the qc for @cmd. If the qc is already gone, we lose and let 168 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 169 * timed out and EH should be invoked. Prevent ata_qc_complete() 170 * from finishing it by setting EH_SCHEDULED and return 171 * EH_NOT_HANDLED. 172 * 173 * TODO: kill this function once old EH is gone. 174 * 175 * LOCKING: 176 * Called from timer context 177 * 178 * RETURNS: 179 * EH_HANDLED or EH_NOT_HANDLED 180 */ 181 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 182 { 183 struct Scsi_Host *host = cmd->device->host; 184 struct ata_port *ap = ata_shost_to_port(host); 185 unsigned long flags; 186 struct ata_queued_cmd *qc; 187 enum scsi_eh_timer_return ret; 188 189 DPRINTK("ENTER\n"); 190 191 if (ap->ops->error_handler) { 192 ret = EH_NOT_HANDLED; 193 goto out; 194 } 195 196 ret = EH_HANDLED; 197 spin_lock_irqsave(ap->lock, flags); 198 qc = ata_qc_from_tag(ap, ap->active_tag); 199 if (qc) { 200 WARN_ON(qc->scsicmd != cmd); 201 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 202 qc->err_mask |= AC_ERR_TIMEOUT; 203 ret = EH_NOT_HANDLED; 204 } 205 spin_unlock_irqrestore(ap->lock, flags); 206 207 out: 208 DPRINTK("EXIT, ret=%d\n", ret); 209 return ret; 210 } 211 212 /** 213 * ata_scsi_error - SCSI layer error handler callback 214 * @host: SCSI host on which error occurred 215 * 216 * Handles SCSI-layer-thrown error events. 217 * 218 * LOCKING: 219 * Inherited from SCSI layer (none, can sleep) 220 * 221 * RETURNS: 222 * Zero. 223 */ 224 void ata_scsi_error(struct Scsi_Host *host) 225 { 226 struct ata_port *ap = ata_shost_to_port(host); 227 int i, repeat_cnt = ATA_EH_MAX_REPEAT; 228 unsigned long flags; 229 230 DPRINTK("ENTER\n"); 231 232 /* synchronize with port task */ 233 ata_port_flush_task(ap); 234 235 /* synchronize with host lock and sort out timeouts */ 236 237 /* For new EH, all qcs are finished in one of three ways - 238 * normal completion, error completion, and SCSI timeout. 239 * Both cmpletions can race against SCSI timeout. When normal 240 * completion wins, the qc never reaches EH. When error 241 * completion wins, the qc has ATA_QCFLAG_FAILED set. 242 * 243 * When SCSI timeout wins, things are a bit more complex. 244 * Normal or error completion can occur after the timeout but 245 * before this point. In such cases, both types of 246 * completions are honored. A scmd is determined to have 247 * timed out iff its associated qc is active and not failed. 248 */ 249 if (ap->ops->error_handler) { 250 struct scsi_cmnd *scmd, *tmp; 251 int nr_timedout = 0; 252 253 spin_lock_irqsave(ap->lock, flags); 254 255 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 256 struct ata_queued_cmd *qc; 257 258 for (i = 0; i < ATA_MAX_QUEUE; i++) { 259 qc = __ata_qc_from_tag(ap, i); 260 if (qc->flags & ATA_QCFLAG_ACTIVE && 261 qc->scsicmd == scmd) 262 break; 263 } 264 265 if (i < ATA_MAX_QUEUE) { 266 /* the scmd has an associated qc */ 267 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 268 /* which hasn't failed yet, timeout */ 269 qc->err_mask |= AC_ERR_TIMEOUT; 270 qc->flags |= ATA_QCFLAG_FAILED; 271 nr_timedout++; 272 } 273 } else { 274 /* Normal completion occurred after 275 * SCSI timeout but before this point. 276 * Successfully complete it. 277 */ 278 scmd->retries = scmd->allowed; 279 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 280 } 281 } 282 283 /* If we have timed out qcs. They belong to EH from 284 * this point but the state of the controller is 285 * unknown. Freeze the port to make sure the IRQ 286 * handler doesn't diddle with those qcs. This must 287 * be done atomically w.r.t. setting QCFLAG_FAILED. 288 */ 289 if (nr_timedout) 290 __ata_port_freeze(ap); 291 292 spin_unlock_irqrestore(ap->lock, flags); 293 } else 294 spin_unlock_wait(ap->lock); 295 296 repeat: 297 /* invoke error handler */ 298 if (ap->ops->error_handler) { 299 /* process port resume request */ 300 ata_eh_handle_port_resume(ap); 301 302 /* fetch & clear EH info */ 303 spin_lock_irqsave(ap->lock, flags); 304 305 memset(&ap->eh_context, 0, sizeof(ap->eh_context)); 306 ap->eh_context.i = ap->eh_info; 307 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 308 309 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 310 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 311 312 spin_unlock_irqrestore(ap->lock, flags); 313 314 /* invoke EH, skip if unloading or suspended */ 315 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 316 ap->ops->error_handler(ap); 317 else 318 ata_eh_finish(ap); 319 320 /* process port suspend request */ 321 ata_eh_handle_port_suspend(ap); 322 323 /* Exception might have happend after ->error_handler 324 * recovered the port but before this point. Repeat 325 * EH in such case. 326 */ 327 spin_lock_irqsave(ap->lock, flags); 328 329 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 330 if (--repeat_cnt) { 331 ata_port_printk(ap, KERN_INFO, 332 "EH pending after completion, " 333 "repeating EH (cnt=%d)\n", repeat_cnt); 334 spin_unlock_irqrestore(ap->lock, flags); 335 goto repeat; 336 } 337 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 338 "tries, giving up\n", ATA_EH_MAX_REPEAT); 339 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 340 } 341 342 /* this run is complete, make sure EH info is clear */ 343 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 344 345 /* Clear host_eh_scheduled while holding ap->lock such 346 * that if exception occurs after this point but 347 * before EH completion, SCSI midlayer will 348 * re-initiate EH. 349 */ 350 host->host_eh_scheduled = 0; 351 352 spin_unlock_irqrestore(ap->lock, flags); 353 } else { 354 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); 355 ap->ops->eng_timeout(ap); 356 } 357 358 /* finish or retry handled scmd's and clean up */ 359 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 360 361 scsi_eh_flush_done_q(&ap->eh_done_q); 362 363 /* clean up */ 364 spin_lock_irqsave(ap->lock, flags); 365 366 if (ap->pflags & ATA_PFLAG_LOADING) 367 ap->pflags &= ~ATA_PFLAG_LOADING; 368 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 369 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 370 371 if (ap->pflags & ATA_PFLAG_RECOVERED) 372 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 373 374 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 375 376 /* tell wait_eh that we're done */ 377 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 378 wake_up_all(&ap->eh_wait_q); 379 380 spin_unlock_irqrestore(ap->lock, flags); 381 382 DPRINTK("EXIT\n"); 383 } 384 385 /** 386 * ata_port_wait_eh - Wait for the currently pending EH to complete 387 * @ap: Port to wait EH for 388 * 389 * Wait until the currently pending EH is complete. 390 * 391 * LOCKING: 392 * Kernel thread context (may sleep). 393 */ 394 void ata_port_wait_eh(struct ata_port *ap) 395 { 396 unsigned long flags; 397 DEFINE_WAIT(wait); 398 399 retry: 400 spin_lock_irqsave(ap->lock, flags); 401 402 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 403 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 404 spin_unlock_irqrestore(ap->lock, flags); 405 schedule(); 406 spin_lock_irqsave(ap->lock, flags); 407 } 408 finish_wait(&ap->eh_wait_q, &wait); 409 410 spin_unlock_irqrestore(ap->lock, flags); 411 412 /* make sure SCSI EH is complete */ 413 if (scsi_host_in_recovery(ap->scsi_host)) { 414 msleep(10); 415 goto retry; 416 } 417 } 418 419 /** 420 * ata_qc_timeout - Handle timeout of queued command 421 * @qc: Command that timed out 422 * 423 * Some part of the kernel (currently, only the SCSI layer) 424 * has noticed that the active command on port @ap has not 425 * completed after a specified length of time. Handle this 426 * condition by disabling DMA (if necessary) and completing 427 * transactions, with error if necessary. 428 * 429 * This also handles the case of the "lost interrupt", where 430 * for some reason (possibly hardware bug, possibly driver bug) 431 * an interrupt was not delivered to the driver, even though the 432 * transaction completed successfully. 433 * 434 * TODO: kill this function once old EH is gone. 435 * 436 * LOCKING: 437 * Inherited from SCSI layer (none, can sleep) 438 */ 439 static void ata_qc_timeout(struct ata_queued_cmd *qc) 440 { 441 struct ata_port *ap = qc->ap; 442 u8 host_stat = 0, drv_stat; 443 unsigned long flags; 444 445 DPRINTK("ENTER\n"); 446 447 ap->hsm_task_state = HSM_ST_IDLE; 448 449 spin_lock_irqsave(ap->lock, flags); 450 451 switch (qc->tf.protocol) { 452 453 case ATA_PROT_DMA: 454 case ATA_PROT_ATAPI_DMA: 455 host_stat = ap->ops->bmdma_status(ap); 456 457 /* before we do anything else, clear DMA-Start bit */ 458 ap->ops->bmdma_stop(qc); 459 460 /* fall through */ 461 462 default: 463 ata_altstatus(ap); 464 drv_stat = ata_chk_status(ap); 465 466 /* ack bmdma irq events */ 467 ap->ops->irq_clear(ap); 468 469 ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, " 470 "stat 0x%x host_stat 0x%x\n", 471 qc->tf.command, drv_stat, host_stat); 472 473 /* complete taskfile transaction */ 474 qc->err_mask |= AC_ERR_TIMEOUT; 475 break; 476 } 477 478 spin_unlock_irqrestore(ap->lock, flags); 479 480 ata_eh_qc_complete(qc); 481 482 DPRINTK("EXIT\n"); 483 } 484 485 /** 486 * ata_eng_timeout - Handle timeout of queued command 487 * @ap: Port on which timed-out command is active 488 * 489 * Some part of the kernel (currently, only the SCSI layer) 490 * has noticed that the active command on port @ap has not 491 * completed after a specified length of time. Handle this 492 * condition by disabling DMA (if necessary) and completing 493 * transactions, with error if necessary. 494 * 495 * This also handles the case of the "lost interrupt", where 496 * for some reason (possibly hardware bug, possibly driver bug) 497 * an interrupt was not delivered to the driver, even though the 498 * transaction completed successfully. 499 * 500 * TODO: kill this function once old EH is gone. 501 * 502 * LOCKING: 503 * Inherited from SCSI layer (none, can sleep) 504 */ 505 void ata_eng_timeout(struct ata_port *ap) 506 { 507 DPRINTK("ENTER\n"); 508 509 ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag)); 510 511 DPRINTK("EXIT\n"); 512 } 513 514 /** 515 * ata_qc_schedule_eh - schedule qc for error handling 516 * @qc: command to schedule error handling for 517 * 518 * Schedule error handling for @qc. EH will kick in as soon as 519 * other commands are drained. 520 * 521 * LOCKING: 522 * spin_lock_irqsave(host lock) 523 */ 524 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 525 { 526 struct ata_port *ap = qc->ap; 527 528 WARN_ON(!ap->ops->error_handler); 529 530 qc->flags |= ATA_QCFLAG_FAILED; 531 qc->ap->pflags |= ATA_PFLAG_EH_PENDING; 532 533 /* The following will fail if timeout has already expired. 534 * ata_scsi_error() takes care of such scmds on EH entry. 535 * Note that ATA_QCFLAG_FAILED is unconditionally set after 536 * this function completes. 537 */ 538 scsi_req_abort_cmd(qc->scsicmd); 539 } 540 541 /** 542 * ata_port_schedule_eh - schedule error handling without a qc 543 * @ap: ATA port to schedule EH for 544 * 545 * Schedule error handling for @ap. EH will kick in as soon as 546 * all commands are drained. 547 * 548 * LOCKING: 549 * spin_lock_irqsave(host lock) 550 */ 551 void ata_port_schedule_eh(struct ata_port *ap) 552 { 553 WARN_ON(!ap->ops->error_handler); 554 555 if (ap->pflags & ATA_PFLAG_INITIALIZING) 556 return; 557 558 ap->pflags |= ATA_PFLAG_EH_PENDING; 559 scsi_schedule_eh(ap->scsi_host); 560 561 DPRINTK("port EH scheduled\n"); 562 } 563 564 /** 565 * ata_port_abort - abort all qc's on the port 566 * @ap: ATA port to abort qc's for 567 * 568 * Abort all active qc's of @ap and schedule EH. 569 * 570 * LOCKING: 571 * spin_lock_irqsave(host lock) 572 * 573 * RETURNS: 574 * Number of aborted qc's. 575 */ 576 int ata_port_abort(struct ata_port *ap) 577 { 578 int tag, nr_aborted = 0; 579 580 WARN_ON(!ap->ops->error_handler); 581 582 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 583 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 584 585 if (qc) { 586 qc->flags |= ATA_QCFLAG_FAILED; 587 ata_qc_complete(qc); 588 nr_aborted++; 589 } 590 } 591 592 if (!nr_aborted) 593 ata_port_schedule_eh(ap); 594 595 return nr_aborted; 596 } 597 598 /** 599 * __ata_port_freeze - freeze port 600 * @ap: ATA port to freeze 601 * 602 * This function is called when HSM violation or some other 603 * condition disrupts normal operation of the port. Frozen port 604 * is not allowed to perform any operation until the port is 605 * thawed, which usually follows a successful reset. 606 * 607 * ap->ops->freeze() callback can be used for freezing the port 608 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 609 * port cannot be frozen hardware-wise, the interrupt handler 610 * must ack and clear interrupts unconditionally while the port 611 * is frozen. 612 * 613 * LOCKING: 614 * spin_lock_irqsave(host lock) 615 */ 616 static void __ata_port_freeze(struct ata_port *ap) 617 { 618 WARN_ON(!ap->ops->error_handler); 619 620 if (ap->ops->freeze) 621 ap->ops->freeze(ap); 622 623 ap->pflags |= ATA_PFLAG_FROZEN; 624 625 DPRINTK("ata%u port frozen\n", ap->print_id); 626 } 627 628 /** 629 * ata_port_freeze - abort & freeze port 630 * @ap: ATA port to freeze 631 * 632 * Abort and freeze @ap. 633 * 634 * LOCKING: 635 * spin_lock_irqsave(host lock) 636 * 637 * RETURNS: 638 * Number of aborted commands. 639 */ 640 int ata_port_freeze(struct ata_port *ap) 641 { 642 int nr_aborted; 643 644 WARN_ON(!ap->ops->error_handler); 645 646 nr_aborted = ata_port_abort(ap); 647 __ata_port_freeze(ap); 648 649 return nr_aborted; 650 } 651 652 /** 653 * ata_eh_freeze_port - EH helper to freeze port 654 * @ap: ATA port to freeze 655 * 656 * Freeze @ap. 657 * 658 * LOCKING: 659 * None. 660 */ 661 void ata_eh_freeze_port(struct ata_port *ap) 662 { 663 unsigned long flags; 664 665 if (!ap->ops->error_handler) 666 return; 667 668 spin_lock_irqsave(ap->lock, flags); 669 __ata_port_freeze(ap); 670 spin_unlock_irqrestore(ap->lock, flags); 671 } 672 673 /** 674 * ata_port_thaw_port - EH helper to thaw port 675 * @ap: ATA port to thaw 676 * 677 * Thaw frozen port @ap. 678 * 679 * LOCKING: 680 * None. 681 */ 682 void ata_eh_thaw_port(struct ata_port *ap) 683 { 684 unsigned long flags; 685 686 if (!ap->ops->error_handler) 687 return; 688 689 spin_lock_irqsave(ap->lock, flags); 690 691 ap->pflags &= ~ATA_PFLAG_FROZEN; 692 693 if (ap->ops->thaw) 694 ap->ops->thaw(ap); 695 696 spin_unlock_irqrestore(ap->lock, flags); 697 698 DPRINTK("ata%u port thawed\n", ap->print_id); 699 } 700 701 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 702 { 703 /* nada */ 704 } 705 706 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 707 { 708 struct ata_port *ap = qc->ap; 709 struct scsi_cmnd *scmd = qc->scsicmd; 710 unsigned long flags; 711 712 spin_lock_irqsave(ap->lock, flags); 713 qc->scsidone = ata_eh_scsidone; 714 __ata_qc_complete(qc); 715 WARN_ON(ata_tag_valid(qc->tag)); 716 spin_unlock_irqrestore(ap->lock, flags); 717 718 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 719 } 720 721 /** 722 * ata_eh_qc_complete - Complete an active ATA command from EH 723 * @qc: Command to complete 724 * 725 * Indicate to the mid and upper layers that an ATA command has 726 * completed. To be used from EH. 727 */ 728 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 729 { 730 struct scsi_cmnd *scmd = qc->scsicmd; 731 scmd->retries = scmd->allowed; 732 __ata_eh_qc_complete(qc); 733 } 734 735 /** 736 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 737 * @qc: Command to retry 738 * 739 * Indicate to the mid and upper layers that an ATA command 740 * should be retried. To be used from EH. 741 * 742 * SCSI midlayer limits the number of retries to scmd->allowed. 743 * scmd->retries is decremented for commands which get retried 744 * due to unrelated failures (qc->err_mask is zero). 745 */ 746 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 747 { 748 struct scsi_cmnd *scmd = qc->scsicmd; 749 if (!qc->err_mask && scmd->retries) 750 scmd->retries--; 751 __ata_eh_qc_complete(qc); 752 } 753 754 /** 755 * ata_eh_detach_dev - detach ATA device 756 * @dev: ATA device to detach 757 * 758 * Detach @dev. 759 * 760 * LOCKING: 761 * None. 762 */ 763 static void ata_eh_detach_dev(struct ata_device *dev) 764 { 765 struct ata_port *ap = dev->ap; 766 unsigned long flags; 767 768 ata_dev_disable(dev); 769 770 spin_lock_irqsave(ap->lock, flags); 771 772 dev->flags &= ~ATA_DFLAG_DETACH; 773 774 if (ata_scsi_offline_dev(dev)) { 775 dev->flags |= ATA_DFLAG_DETACHED; 776 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 777 } 778 779 /* clear per-dev EH actions */ 780 ata_eh_clear_action(dev, &ap->eh_info, ATA_EH_PERDEV_MASK); 781 ata_eh_clear_action(dev, &ap->eh_context.i, ATA_EH_PERDEV_MASK); 782 783 spin_unlock_irqrestore(ap->lock, flags); 784 } 785 786 /** 787 * ata_eh_about_to_do - about to perform eh_action 788 * @ap: target ATA port 789 * @dev: target ATA dev for per-dev action (can be NULL) 790 * @action: action about to be performed 791 * 792 * Called just before performing EH actions to clear related bits 793 * in @ap->eh_info such that eh actions are not unnecessarily 794 * repeated. 795 * 796 * LOCKING: 797 * None. 798 */ 799 static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, 800 unsigned int action) 801 { 802 unsigned long flags; 803 struct ata_eh_info *ehi = &ap->eh_info; 804 struct ata_eh_context *ehc = &ap->eh_context; 805 806 spin_lock_irqsave(ap->lock, flags); 807 808 /* Reset is represented by combination of actions and EHI 809 * flags. Suck in all related bits before clearing eh_info to 810 * avoid losing requested action. 811 */ 812 if (action & ATA_EH_RESET_MASK) { 813 ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; 814 ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; 815 816 /* make sure all reset actions are cleared & clear EHI flags */ 817 action |= ATA_EH_RESET_MASK; 818 ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 819 } 820 821 ata_eh_clear_action(dev, ehi, action); 822 823 if (!(ehc->i.flags & ATA_EHI_QUIET)) 824 ap->pflags |= ATA_PFLAG_RECOVERED; 825 826 spin_unlock_irqrestore(ap->lock, flags); 827 } 828 829 /** 830 * ata_eh_done - EH action complete 831 * @ap: target ATA port 832 * @dev: target ATA dev for per-dev action (can be NULL) 833 * @action: action just completed 834 * 835 * Called right after performing EH actions to clear related bits 836 * in @ap->eh_context. 837 * 838 * LOCKING: 839 * None. 840 */ 841 static void ata_eh_done(struct ata_port *ap, struct ata_device *dev, 842 unsigned int action) 843 { 844 /* if reset is complete, clear all reset actions & reset modifier */ 845 if (action & ATA_EH_RESET_MASK) { 846 action |= ATA_EH_RESET_MASK; 847 ap->eh_context.i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 848 } 849 850 ata_eh_clear_action(dev, &ap->eh_context.i, action); 851 } 852 853 /** 854 * ata_err_string - convert err_mask to descriptive string 855 * @err_mask: error mask to convert to string 856 * 857 * Convert @err_mask to descriptive string. Errors are 858 * prioritized according to severity and only the most severe 859 * error is reported. 860 * 861 * LOCKING: 862 * None. 863 * 864 * RETURNS: 865 * Descriptive string for @err_mask 866 */ 867 static const char * ata_err_string(unsigned int err_mask) 868 { 869 if (err_mask & AC_ERR_HOST_BUS) 870 return "host bus error"; 871 if (err_mask & AC_ERR_ATA_BUS) 872 return "ATA bus error"; 873 if (err_mask & AC_ERR_TIMEOUT) 874 return "timeout"; 875 if (err_mask & AC_ERR_HSM) 876 return "HSM violation"; 877 if (err_mask & AC_ERR_SYSTEM) 878 return "internal error"; 879 if (err_mask & AC_ERR_MEDIA) 880 return "media error"; 881 if (err_mask & AC_ERR_INVALID) 882 return "invalid argument"; 883 if (err_mask & AC_ERR_DEV) 884 return "device error"; 885 return "unknown error"; 886 } 887 888 /** 889 * ata_read_log_page - read a specific log page 890 * @dev: target device 891 * @page: page to read 892 * @buf: buffer to store read page 893 * @sectors: number of sectors to read 894 * 895 * Read log page using READ_LOG_EXT command. 896 * 897 * LOCKING: 898 * Kernel thread context (may sleep). 899 * 900 * RETURNS: 901 * 0 on success, AC_ERR_* mask otherwise. 902 */ 903 static unsigned int ata_read_log_page(struct ata_device *dev, 904 u8 page, void *buf, unsigned int sectors) 905 { 906 struct ata_taskfile tf; 907 unsigned int err_mask; 908 909 DPRINTK("read log page - page %d\n", page); 910 911 ata_tf_init(dev, &tf); 912 tf.command = ATA_CMD_READ_LOG_EXT; 913 tf.lbal = page; 914 tf.nsect = sectors; 915 tf.hob_nsect = sectors >> 8; 916 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 917 tf.protocol = ATA_PROT_PIO; 918 919 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 920 buf, sectors * ATA_SECT_SIZE); 921 922 DPRINTK("EXIT, err_mask=%x\n", err_mask); 923 return err_mask; 924 } 925 926 /** 927 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 928 * @dev: Device to read log page 10h from 929 * @tag: Resulting tag of the failed command 930 * @tf: Resulting taskfile registers of the failed command 931 * 932 * Read log page 10h to obtain NCQ error details and clear error 933 * condition. 934 * 935 * LOCKING: 936 * Kernel thread context (may sleep). 937 * 938 * RETURNS: 939 * 0 on success, -errno otherwise. 940 */ 941 static int ata_eh_read_log_10h(struct ata_device *dev, 942 int *tag, struct ata_taskfile *tf) 943 { 944 u8 *buf = dev->ap->sector_buf; 945 unsigned int err_mask; 946 u8 csum; 947 int i; 948 949 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 950 if (err_mask) 951 return -EIO; 952 953 csum = 0; 954 for (i = 0; i < ATA_SECT_SIZE; i++) 955 csum += buf[i]; 956 if (csum) 957 ata_dev_printk(dev, KERN_WARNING, 958 "invalid checksum 0x%x on log page 10h\n", csum); 959 960 if (buf[0] & 0x80) 961 return -ENOENT; 962 963 *tag = buf[0] & 0x1f; 964 965 tf->command = buf[2]; 966 tf->feature = buf[3]; 967 tf->lbal = buf[4]; 968 tf->lbam = buf[5]; 969 tf->lbah = buf[6]; 970 tf->device = buf[7]; 971 tf->hob_lbal = buf[8]; 972 tf->hob_lbam = buf[9]; 973 tf->hob_lbah = buf[10]; 974 tf->nsect = buf[12]; 975 tf->hob_nsect = buf[13]; 976 977 return 0; 978 } 979 980 /** 981 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 982 * @dev: device to perform REQUEST_SENSE to 983 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 984 * 985 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 986 * SENSE. This function is EH helper. 987 * 988 * LOCKING: 989 * Kernel thread context (may sleep). 990 * 991 * RETURNS: 992 * 0 on success, AC_ERR_* mask on failure 993 */ 994 static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) 995 { 996 struct ata_device *dev = qc->dev; 997 unsigned char *sense_buf = qc->scsicmd->sense_buffer; 998 struct ata_port *ap = dev->ap; 999 struct ata_taskfile tf; 1000 u8 cdb[ATAPI_CDB_LEN]; 1001 1002 DPRINTK("ATAPI request sense\n"); 1003 1004 /* FIXME: is this needed? */ 1005 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1006 1007 /* initialize sense_buf with the error register, 1008 * for the case where they are -not- overwritten 1009 */ 1010 sense_buf[0] = 0x70; 1011 sense_buf[2] = qc->result_tf.feature >> 4; 1012 1013 /* some devices time out if garbage left in tf */ 1014 ata_tf_init(dev, &tf); 1015 1016 memset(cdb, 0, ATAPI_CDB_LEN); 1017 cdb[0] = REQUEST_SENSE; 1018 cdb[4] = SCSI_SENSE_BUFFERSIZE; 1019 1020 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1021 tf.command = ATA_CMD_PACKET; 1022 1023 /* is it pointless to prefer PIO for "safety reasons"? */ 1024 if (ap->flags & ATA_FLAG_PIO_DMA) { 1025 tf.protocol = ATA_PROT_ATAPI_DMA; 1026 tf.feature |= ATAPI_PKT_DMA; 1027 } else { 1028 tf.protocol = ATA_PROT_ATAPI; 1029 tf.lbam = (8 * 1024) & 0xff; 1030 tf.lbah = (8 * 1024) >> 8; 1031 } 1032 1033 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1034 sense_buf, SCSI_SENSE_BUFFERSIZE); 1035 } 1036 1037 /** 1038 * ata_eh_analyze_serror - analyze SError for a failed port 1039 * @ap: ATA port to analyze SError for 1040 * 1041 * Analyze SError if available and further determine cause of 1042 * failure. 1043 * 1044 * LOCKING: 1045 * None. 1046 */ 1047 static void ata_eh_analyze_serror(struct ata_port *ap) 1048 { 1049 struct ata_eh_context *ehc = &ap->eh_context; 1050 u32 serror = ehc->i.serror; 1051 unsigned int err_mask = 0, action = 0; 1052 1053 if (serror & SERR_PERSISTENT) { 1054 err_mask |= AC_ERR_ATA_BUS; 1055 action |= ATA_EH_HARDRESET; 1056 } 1057 if (serror & 1058 (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { 1059 err_mask |= AC_ERR_ATA_BUS; 1060 action |= ATA_EH_SOFTRESET; 1061 } 1062 if (serror & SERR_PROTOCOL) { 1063 err_mask |= AC_ERR_HSM; 1064 action |= ATA_EH_SOFTRESET; 1065 } 1066 if (serror & SERR_INTERNAL) { 1067 err_mask |= AC_ERR_SYSTEM; 1068 action |= ATA_EH_HARDRESET; 1069 } 1070 if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) 1071 ata_ehi_hotplugged(&ehc->i); 1072 1073 ehc->i.err_mask |= err_mask; 1074 ehc->i.action |= action; 1075 } 1076 1077 /** 1078 * ata_eh_analyze_ncq_error - analyze NCQ error 1079 * @ap: ATA port to analyze NCQ error for 1080 * 1081 * Read log page 10h, determine the offending qc and acquire 1082 * error status TF. For NCQ device errors, all LLDDs have to do 1083 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1084 * care of the rest. 1085 * 1086 * LOCKING: 1087 * Kernel thread context (may sleep). 1088 */ 1089 static void ata_eh_analyze_ncq_error(struct ata_port *ap) 1090 { 1091 struct ata_eh_context *ehc = &ap->eh_context; 1092 struct ata_device *dev = ap->device; 1093 struct ata_queued_cmd *qc; 1094 struct ata_taskfile tf; 1095 int tag, rc; 1096 1097 /* if frozen, we can't do much */ 1098 if (ap->pflags & ATA_PFLAG_FROZEN) 1099 return; 1100 1101 /* is it NCQ device error? */ 1102 if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1103 return; 1104 1105 /* has LLDD analyzed already? */ 1106 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1107 qc = __ata_qc_from_tag(ap, tag); 1108 1109 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1110 continue; 1111 1112 if (qc->err_mask) 1113 return; 1114 } 1115 1116 /* okay, this error is ours */ 1117 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1118 if (rc) { 1119 ata_port_printk(ap, KERN_ERR, "failed to read log page 10h " 1120 "(errno=%d)\n", rc); 1121 return; 1122 } 1123 1124 if (!(ap->sactive & (1 << tag))) { 1125 ata_port_printk(ap, KERN_ERR, "log page 10h reported " 1126 "inactive tag %d\n", tag); 1127 return; 1128 } 1129 1130 /* we've got the perpetrator, condemn it */ 1131 qc = __ata_qc_from_tag(ap, tag); 1132 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1133 qc->err_mask |= AC_ERR_DEV; 1134 ehc->i.err_mask &= ~AC_ERR_DEV; 1135 } 1136 1137 /** 1138 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1139 * @qc: qc to analyze 1140 * @tf: Taskfile registers to analyze 1141 * 1142 * Analyze taskfile of @qc and further determine cause of 1143 * failure. This function also requests ATAPI sense data if 1144 * avaliable. 1145 * 1146 * LOCKING: 1147 * Kernel thread context (may sleep). 1148 * 1149 * RETURNS: 1150 * Determined recovery action 1151 */ 1152 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1153 const struct ata_taskfile *tf) 1154 { 1155 unsigned int tmp, action = 0; 1156 u8 stat = tf->command, err = tf->feature; 1157 1158 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1159 qc->err_mask |= AC_ERR_HSM; 1160 return ATA_EH_SOFTRESET; 1161 } 1162 1163 if (stat & (ATA_ERR | ATA_DF)) 1164 qc->err_mask |= AC_ERR_DEV; 1165 else 1166 return 0; 1167 1168 switch (qc->dev->class) { 1169 case ATA_DEV_ATA: 1170 if (err & ATA_ICRC) 1171 qc->err_mask |= AC_ERR_ATA_BUS; 1172 if (err & ATA_UNC) 1173 qc->err_mask |= AC_ERR_MEDIA; 1174 if (err & ATA_IDNF) 1175 qc->err_mask |= AC_ERR_INVALID; 1176 break; 1177 1178 case ATA_DEV_ATAPI: 1179 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1180 tmp = atapi_eh_request_sense(qc); 1181 if (!tmp) { 1182 /* ATA_QCFLAG_SENSE_VALID is used to 1183 * tell atapi_qc_complete() that sense 1184 * data is already valid. 1185 * 1186 * TODO: interpret sense data and set 1187 * appropriate err_mask. 1188 */ 1189 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1190 } else 1191 qc->err_mask |= tmp; 1192 } 1193 } 1194 1195 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1196 action |= ATA_EH_SOFTRESET; 1197 1198 return action; 1199 } 1200 1201 static int ata_eh_categorize_error(int is_io, unsigned int err_mask) 1202 { 1203 if (err_mask & AC_ERR_ATA_BUS) 1204 return 1; 1205 1206 if (err_mask & AC_ERR_TIMEOUT) 1207 return 2; 1208 1209 if (is_io) { 1210 if (err_mask & AC_ERR_HSM) 1211 return 2; 1212 if ((err_mask & 1213 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1214 return 3; 1215 } 1216 1217 return 0; 1218 } 1219 1220 struct speed_down_verdict_arg { 1221 u64 since; 1222 int nr_errors[4]; 1223 }; 1224 1225 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1226 { 1227 struct speed_down_verdict_arg *arg = void_arg; 1228 int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); 1229 1230 if (ent->timestamp < arg->since) 1231 return -1; 1232 1233 arg->nr_errors[cat]++; 1234 return 0; 1235 } 1236 1237 /** 1238 * ata_eh_speed_down_verdict - Determine speed down verdict 1239 * @dev: Device of interest 1240 * 1241 * This function examines error ring of @dev and determines 1242 * whether NCQ needs to be turned off, transfer speed should be 1243 * stepped down, or falling back to PIO is necessary. 1244 * 1245 * Cat-1 is ATA_BUS error for any command. 1246 * 1247 * Cat-2 is TIMEOUT for any command or HSM violation for known 1248 * supported commands. 1249 * 1250 * Cat-3 is is unclassified DEV error for known supported 1251 * command. 1252 * 1253 * NCQ needs to be turned off if there have been more than 3 1254 * Cat-2 + Cat-3 errors during last 10 minutes. 1255 * 1256 * Speed down is necessary if there have been more than 3 Cat-1 + 1257 * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. 1258 * 1259 * Falling back to PIO mode is necessary if there have been more 1260 * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. 1261 * 1262 * LOCKING: 1263 * Inherited from caller. 1264 * 1265 * RETURNS: 1266 * OR of ATA_EH_SPDN_* flags. 1267 */ 1268 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1269 { 1270 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1271 u64 j64 = get_jiffies_64(); 1272 struct speed_down_verdict_arg arg; 1273 unsigned int verdict = 0; 1274 1275 /* scan past 10 mins of error history */ 1276 memset(&arg, 0, sizeof(arg)); 1277 arg.since = j64 - min(j64, j10mins); 1278 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1279 1280 if (arg.nr_errors[2] + arg.nr_errors[3] > 3) 1281 verdict |= ATA_EH_SPDN_NCQ_OFF; 1282 if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) 1283 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1284 1285 /* scan past 3 mins of error history */ 1286 memset(&arg, 0, sizeof(arg)); 1287 arg.since = j64 - min(j64, j5mins); 1288 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1289 1290 if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) 1291 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1292 1293 return verdict; 1294 } 1295 1296 /** 1297 * ata_eh_speed_down - record error and speed down if necessary 1298 * @dev: Failed device 1299 * @is_io: Did the device fail during normal IO? 1300 * @err_mask: err_mask of the error 1301 * 1302 * Record error and examine error history to determine whether 1303 * adjusting transmission speed is necessary. It also sets 1304 * transmission limits appropriately if such adjustment is 1305 * necessary. 1306 * 1307 * LOCKING: 1308 * Kernel thread context (may sleep). 1309 * 1310 * RETURNS: 1311 * Determined recovery action. 1312 */ 1313 static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, 1314 unsigned int err_mask) 1315 { 1316 unsigned int verdict; 1317 unsigned int action = 0; 1318 1319 /* don't bother if Cat-0 error */ 1320 if (ata_eh_categorize_error(is_io, err_mask) == 0) 1321 return 0; 1322 1323 /* record error and determine whether speed down is necessary */ 1324 ata_ering_record(&dev->ering, is_io, err_mask); 1325 verdict = ata_eh_speed_down_verdict(dev); 1326 1327 /* turn off NCQ? */ 1328 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1329 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1330 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1331 dev->flags |= ATA_DFLAG_NCQ_OFF; 1332 ata_dev_printk(dev, KERN_WARNING, 1333 "NCQ disabled due to excessive errors\n"); 1334 goto done; 1335 } 1336 1337 /* speed down? */ 1338 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1339 /* speed down SATA link speed if possible */ 1340 if (sata_down_spd_limit(dev->ap) == 0) { 1341 action |= ATA_EH_HARDRESET; 1342 goto done; 1343 } 1344 1345 /* lower transfer mode */ 1346 if (dev->spdn_cnt < 2) { 1347 static const int dma_dnxfer_sel[] = 1348 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1349 static const int pio_dnxfer_sel[] = 1350 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1351 int sel; 1352 1353 if (dev->xfer_shift != ATA_SHIFT_PIO) 1354 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1355 else 1356 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1357 1358 dev->spdn_cnt++; 1359 1360 if (ata_down_xfermask_limit(dev, sel) == 0) { 1361 action |= ATA_EH_SOFTRESET; 1362 goto done; 1363 } 1364 } 1365 } 1366 1367 /* Fall back to PIO? Slowing down to PIO is meaningless for 1368 * SATA. Consider it only for PATA. 1369 */ 1370 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1371 (dev->ap->cbl != ATA_CBL_SATA) && 1372 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1373 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1374 dev->spdn_cnt = 0; 1375 action |= ATA_EH_SOFTRESET; 1376 goto done; 1377 } 1378 } 1379 1380 return 0; 1381 done: 1382 /* device has been slowed down, blow error history */ 1383 ata_ering_clear(&dev->ering); 1384 return action; 1385 } 1386 1387 /** 1388 * ata_eh_autopsy - analyze error and determine recovery action 1389 * @ap: ATA port to perform autopsy on 1390 * 1391 * Analyze why @ap failed and determine which recovery action is 1392 * needed. This function also sets more detailed AC_ERR_* values 1393 * and fills sense data for ATAPI CHECK SENSE. 1394 * 1395 * LOCKING: 1396 * Kernel thread context (may sleep). 1397 */ 1398 static void ata_eh_autopsy(struct ata_port *ap) 1399 { 1400 struct ata_eh_context *ehc = &ap->eh_context; 1401 unsigned int all_err_mask = 0; 1402 int tag, is_io = 0; 1403 u32 serror; 1404 int rc; 1405 1406 DPRINTK("ENTER\n"); 1407 1408 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1409 return; 1410 1411 /* obtain and analyze SError */ 1412 rc = sata_scr_read(ap, SCR_ERROR, &serror); 1413 if (rc == 0) { 1414 ehc->i.serror |= serror; 1415 ata_eh_analyze_serror(ap); 1416 } else if (rc != -EOPNOTSUPP) 1417 ehc->i.action |= ATA_EH_HARDRESET; 1418 1419 /* analyze NCQ failure */ 1420 ata_eh_analyze_ncq_error(ap); 1421 1422 /* any real error trumps AC_ERR_OTHER */ 1423 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1424 ehc->i.err_mask &= ~AC_ERR_OTHER; 1425 1426 all_err_mask |= ehc->i.err_mask; 1427 1428 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1429 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1430 1431 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1432 continue; 1433 1434 /* inherit upper level err_mask */ 1435 qc->err_mask |= ehc->i.err_mask; 1436 1437 /* analyze TF */ 1438 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1439 1440 /* DEV errors are probably spurious in case of ATA_BUS error */ 1441 if (qc->err_mask & AC_ERR_ATA_BUS) 1442 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1443 AC_ERR_INVALID); 1444 1445 /* any real error trumps unknown error */ 1446 if (qc->err_mask & ~AC_ERR_OTHER) 1447 qc->err_mask &= ~AC_ERR_OTHER; 1448 1449 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1450 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1451 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1452 ehc->i.action &= ~ATA_EH_REVALIDATE; 1453 } 1454 1455 /* accumulate error info */ 1456 ehc->i.dev = qc->dev; 1457 all_err_mask |= qc->err_mask; 1458 if (qc->flags & ATA_QCFLAG_IO) 1459 is_io = 1; 1460 } 1461 1462 /* enforce default EH actions */ 1463 if (ap->pflags & ATA_PFLAG_FROZEN || 1464 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1465 ehc->i.action |= ATA_EH_SOFTRESET; 1466 else if (all_err_mask) 1467 ehc->i.action |= ATA_EH_REVALIDATE; 1468 1469 /* if we have offending qcs and the associated failed device */ 1470 if (ehc->i.dev) { 1471 /* speed down */ 1472 ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io, 1473 all_err_mask); 1474 1475 /* perform per-dev EH action only on the offending device */ 1476 ehc->i.dev_action[ehc->i.dev->devno] |= 1477 ehc->i.action & ATA_EH_PERDEV_MASK; 1478 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1479 } 1480 1481 DPRINTK("EXIT\n"); 1482 } 1483 1484 /** 1485 * ata_eh_report - report error handling to user 1486 * @ap: ATA port EH is going on 1487 * 1488 * Report EH to user. 1489 * 1490 * LOCKING: 1491 * None. 1492 */ 1493 static void ata_eh_report(struct ata_port *ap) 1494 { 1495 struct ata_eh_context *ehc = &ap->eh_context; 1496 const char *frozen, *desc; 1497 int tag, nr_failed = 0; 1498 1499 desc = NULL; 1500 if (ehc->i.desc[0] != '\0') 1501 desc = ehc->i.desc; 1502 1503 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1504 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1505 1506 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1507 continue; 1508 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1509 continue; 1510 1511 nr_failed++; 1512 } 1513 1514 if (!nr_failed && !ehc->i.err_mask) 1515 return; 1516 1517 frozen = ""; 1518 if (ap->pflags & ATA_PFLAG_FROZEN) 1519 frozen = " frozen"; 1520 1521 if (ehc->i.dev) { 1522 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1523 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1524 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1525 ehc->i.action, frozen); 1526 if (desc) 1527 ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc); 1528 } else { 1529 ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x " 1530 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1531 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1532 ehc->i.action, frozen); 1533 if (desc) 1534 ata_port_printk(ap, KERN_ERR, "(%s)\n", desc); 1535 } 1536 1537 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1538 static const char *dma_str[] = { 1539 [DMA_BIDIRECTIONAL] = "bidi", 1540 [DMA_TO_DEVICE] = "out", 1541 [DMA_FROM_DEVICE] = "in", 1542 [DMA_NONE] = "", 1543 }; 1544 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1545 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1546 1547 if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask) 1548 continue; 1549 1550 ata_dev_printk(qc->dev, KERN_ERR, 1551 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1552 "tag %d cdb 0x%x data %u %s\n " 1553 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1554 "Emask 0x%x (%s)\n", 1555 cmd->command, cmd->feature, cmd->nsect, 1556 cmd->lbal, cmd->lbam, cmd->lbah, 1557 cmd->hob_feature, cmd->hob_nsect, 1558 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 1559 cmd->device, qc->tag, qc->cdb[0], qc->nbytes, 1560 dma_str[qc->dma_dir], 1561 res->command, res->feature, res->nsect, 1562 res->lbal, res->lbam, res->lbah, 1563 res->hob_feature, res->hob_nsect, 1564 res->hob_lbal, res->hob_lbam, res->hob_lbah, 1565 res->device, qc->err_mask, ata_err_string(qc->err_mask)); 1566 } 1567 } 1568 1569 static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset, 1570 unsigned int *classes, unsigned long deadline) 1571 { 1572 int i, rc; 1573 1574 for (i = 0; i < ATA_MAX_DEVICES; i++) 1575 classes[i] = ATA_DEV_UNKNOWN; 1576 1577 rc = reset(ap, classes, deadline); 1578 if (rc) 1579 return rc; 1580 1581 /* If any class isn't ATA_DEV_UNKNOWN, consider classification 1582 * is complete and convert all ATA_DEV_UNKNOWN to 1583 * ATA_DEV_NONE. 1584 */ 1585 for (i = 0; i < ATA_MAX_DEVICES; i++) 1586 if (classes[i] != ATA_DEV_UNKNOWN) 1587 break; 1588 1589 if (i < ATA_MAX_DEVICES) 1590 for (i = 0; i < ATA_MAX_DEVICES; i++) 1591 if (classes[i] == ATA_DEV_UNKNOWN) 1592 classes[i] = ATA_DEV_NONE; 1593 1594 return 0; 1595 } 1596 1597 static int ata_eh_followup_srst_needed(int rc, int classify, 1598 const unsigned int *classes) 1599 { 1600 if (rc == -EAGAIN) 1601 return 1; 1602 if (rc != 0) 1603 return 0; 1604 if (classify && classes[0] == ATA_DEV_UNKNOWN) 1605 return 1; 1606 return 0; 1607 } 1608 1609 static int ata_eh_reset(struct ata_port *ap, int classify, 1610 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 1611 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 1612 { 1613 struct ata_eh_context *ehc = &ap->eh_context; 1614 unsigned int *classes = ehc->classes; 1615 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 1616 int try = 0; 1617 unsigned long deadline; 1618 unsigned int action; 1619 ata_reset_fn_t reset; 1620 int i, rc; 1621 1622 /* about to reset */ 1623 ata_eh_about_to_do(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1624 1625 /* Determine which reset to use and record in ehc->i.action. 1626 * prereset() may examine and modify it. 1627 */ 1628 action = ehc->i.action; 1629 ehc->i.action &= ~ATA_EH_RESET_MASK; 1630 if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && 1631 !(action & ATA_EH_HARDRESET)))) 1632 ehc->i.action |= ATA_EH_SOFTRESET; 1633 else 1634 ehc->i.action |= ATA_EH_HARDRESET; 1635 1636 if (prereset) { 1637 rc = prereset(ap, jiffies + ATA_EH_PRERESET_TIMEOUT); 1638 if (rc) { 1639 if (rc == -ENOENT) { 1640 ata_port_printk(ap, KERN_DEBUG, 1641 "port disabled. ignoring.\n"); 1642 ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; 1643 1644 for (i = 0; i < ATA_MAX_DEVICES; i++) 1645 classes[i] = ATA_DEV_NONE; 1646 1647 rc = 0; 1648 } else 1649 ata_port_printk(ap, KERN_ERR, 1650 "prereset failed (errno=%d)\n", rc); 1651 return rc; 1652 } 1653 } 1654 1655 /* prereset() might have modified ehc->i.action */ 1656 if (ehc->i.action & ATA_EH_HARDRESET) 1657 reset = hardreset; 1658 else if (ehc->i.action & ATA_EH_SOFTRESET) 1659 reset = softreset; 1660 else { 1661 /* prereset told us not to reset, bang classes and return */ 1662 for (i = 0; i < ATA_MAX_DEVICES; i++) 1663 classes[i] = ATA_DEV_NONE; 1664 return 0; 1665 } 1666 1667 /* did prereset() screw up? if so, fix up to avoid oopsing */ 1668 if (!reset) { 1669 if (softreset) 1670 reset = softreset; 1671 else 1672 reset = hardreset; 1673 } 1674 1675 retry: 1676 deadline = jiffies + ata_eh_reset_timeouts[try++]; 1677 1678 /* shut up during boot probing */ 1679 if (verbose) 1680 ata_port_printk(ap, KERN_INFO, "%s resetting port\n", 1681 reset == softreset ? "soft" : "hard"); 1682 1683 /* mark that this EH session started with reset */ 1684 if (reset == hardreset) 1685 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 1686 else 1687 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 1688 1689 rc = ata_do_reset(ap, reset, classes, deadline); 1690 1691 if (reset == hardreset && 1692 ata_eh_followup_srst_needed(rc, classify, classes)) { 1693 /* okay, let's do follow-up softreset */ 1694 reset = softreset; 1695 1696 if (!reset) { 1697 ata_port_printk(ap, KERN_ERR, 1698 "follow-up softreset required " 1699 "but no softreset avaliable\n"); 1700 return -EINVAL; 1701 } 1702 1703 ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); 1704 rc = ata_do_reset(ap, reset, classes, deadline); 1705 1706 if (rc == 0 && classify && 1707 classes[0] == ATA_DEV_UNKNOWN) { 1708 ata_port_printk(ap, KERN_ERR, 1709 "classification failed\n"); 1710 return -EINVAL; 1711 } 1712 } 1713 1714 if (rc && try < ARRAY_SIZE(ata_eh_reset_timeouts)) { 1715 unsigned long now = jiffies; 1716 1717 if (time_before(now, deadline)) { 1718 unsigned long delta = deadline - jiffies; 1719 1720 ata_port_printk(ap, KERN_WARNING, "reset failed " 1721 "(errno=%d), retrying in %u secs\n", 1722 rc, (jiffies_to_msecs(delta) + 999) / 1000); 1723 1724 schedule_timeout_uninterruptible(delta); 1725 } 1726 1727 if (reset == hardreset && 1728 try == ARRAY_SIZE(ata_eh_reset_timeouts) - 1) 1729 sata_down_spd_limit(ap); 1730 if (hardreset) 1731 reset = hardreset; 1732 goto retry; 1733 } 1734 1735 if (rc == 0) { 1736 /* After the reset, the device state is PIO 0 and the 1737 * controller state is undefined. Record the mode. 1738 */ 1739 for (i = 0; i < ATA_MAX_DEVICES; i++) 1740 ap->device[i].pio_mode = XFER_PIO_0; 1741 1742 if (postreset) 1743 postreset(ap, classes); 1744 1745 /* reset successful, schedule revalidation */ 1746 ata_eh_done(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1747 ehc->i.action |= ATA_EH_REVALIDATE; 1748 } 1749 1750 return rc; 1751 } 1752 1753 static int ata_eh_revalidate_and_attach(struct ata_port *ap, 1754 struct ata_device **r_failed_dev) 1755 { 1756 struct ata_eh_context *ehc = &ap->eh_context; 1757 struct ata_device *dev; 1758 unsigned int new_mask = 0; 1759 unsigned long flags; 1760 int i, rc = 0; 1761 1762 DPRINTK("ENTER\n"); 1763 1764 /* For PATA drive side cable detection to work, IDENTIFY must 1765 * be done backwards such that PDIAG- is released by the slave 1766 * device before the master device is identified. 1767 */ 1768 for (i = ATA_MAX_DEVICES - 1; i >= 0; i--) { 1769 unsigned int action, readid_flags = 0; 1770 1771 dev = &ap->device[i]; 1772 action = ata_eh_dev_action(dev); 1773 1774 if (ehc->i.flags & ATA_EHI_DID_RESET) 1775 readid_flags |= ATA_READID_POSTRESET; 1776 1777 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 1778 if (ata_port_offline(ap)) { 1779 rc = -EIO; 1780 goto err; 1781 } 1782 1783 ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE); 1784 rc = ata_dev_revalidate(dev, readid_flags); 1785 if (rc) 1786 goto err; 1787 1788 ata_eh_done(ap, dev, ATA_EH_REVALIDATE); 1789 1790 /* Configuration may have changed, reconfigure 1791 * transfer mode. 1792 */ 1793 ehc->i.flags |= ATA_EHI_SETMODE; 1794 1795 /* schedule the scsi_rescan_device() here */ 1796 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 1797 } else if (dev->class == ATA_DEV_UNKNOWN && 1798 ehc->tries[dev->devno] && 1799 ata_class_enabled(ehc->classes[dev->devno])) { 1800 dev->class = ehc->classes[dev->devno]; 1801 1802 rc = ata_dev_read_id(dev, &dev->class, readid_flags, 1803 dev->id); 1804 switch (rc) { 1805 case 0: 1806 new_mask |= 1 << i; 1807 break; 1808 case -ENOENT: 1809 /* IDENTIFY was issued to non-existent 1810 * device. No need to reset. Just 1811 * thaw and kill the device. 1812 */ 1813 ata_eh_thaw_port(ap); 1814 dev->class = ATA_DEV_UNKNOWN; 1815 break; 1816 default: 1817 dev->class = ATA_DEV_UNKNOWN; 1818 goto err; 1819 } 1820 } 1821 } 1822 1823 /* PDIAG- should have been released, ask cable type if post-reset */ 1824 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ap->ops->cable_detect) 1825 ap->cbl = ap->ops->cable_detect(ap); 1826 1827 /* Configure new devices forward such that user doesn't see 1828 * device detection messages backwards. 1829 */ 1830 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1831 dev = &ap->device[i]; 1832 1833 if (!(new_mask & (1 << i))) 1834 continue; 1835 1836 ehc->i.flags |= ATA_EHI_PRINTINFO; 1837 rc = ata_dev_configure(dev); 1838 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 1839 if (rc) 1840 goto err; 1841 1842 spin_lock_irqsave(ap->lock, flags); 1843 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1844 spin_unlock_irqrestore(ap->lock, flags); 1845 1846 /* new device discovered, configure xfermode */ 1847 ehc->i.flags |= ATA_EHI_SETMODE; 1848 } 1849 1850 return 0; 1851 1852 err: 1853 *r_failed_dev = dev; 1854 DPRINTK("EXIT rc=%d\n", rc); 1855 return rc; 1856 } 1857 1858 static int ata_port_nr_enabled(struct ata_port *ap) 1859 { 1860 int i, cnt = 0; 1861 1862 for (i = 0; i < ATA_MAX_DEVICES; i++) 1863 if (ata_dev_enabled(&ap->device[i])) 1864 cnt++; 1865 return cnt; 1866 } 1867 1868 static int ata_port_nr_vacant(struct ata_port *ap) 1869 { 1870 int i, cnt = 0; 1871 1872 for (i = 0; i < ATA_MAX_DEVICES; i++) 1873 if (ap->device[i].class == ATA_DEV_UNKNOWN) 1874 cnt++; 1875 return cnt; 1876 } 1877 1878 static int ata_eh_skip_recovery(struct ata_port *ap) 1879 { 1880 struct ata_eh_context *ehc = &ap->eh_context; 1881 int i; 1882 1883 /* thaw frozen port, resume link and recover failed devices */ 1884 if ((ap->pflags & ATA_PFLAG_FROZEN) || 1885 (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_port_nr_enabled(ap)) 1886 return 0; 1887 1888 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 1889 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1890 struct ata_device *dev = &ap->device[i]; 1891 1892 if (dev->class == ATA_DEV_UNKNOWN && 1893 ehc->classes[dev->devno] != ATA_DEV_NONE) 1894 return 0; 1895 } 1896 1897 return 1; 1898 } 1899 1900 /** 1901 * ata_eh_recover - recover host port after error 1902 * @ap: host port to recover 1903 * @prereset: prereset method (can be NULL) 1904 * @softreset: softreset method (can be NULL) 1905 * @hardreset: hardreset method (can be NULL) 1906 * @postreset: postreset method (can be NULL) 1907 * 1908 * This is the alpha and omega, eum and yang, heart and soul of 1909 * libata exception handling. On entry, actions required to 1910 * recover the port and hotplug requests are recorded in 1911 * eh_context. This function executes all the operations with 1912 * appropriate retrials and fallbacks to resurrect failed 1913 * devices, detach goners and greet newcomers. 1914 * 1915 * LOCKING: 1916 * Kernel thread context (may sleep). 1917 * 1918 * RETURNS: 1919 * 0 on success, -errno on failure. 1920 */ 1921 static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 1922 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 1923 ata_postreset_fn_t postreset) 1924 { 1925 struct ata_eh_context *ehc = &ap->eh_context; 1926 struct ata_device *dev; 1927 int i, rc; 1928 1929 DPRINTK("ENTER\n"); 1930 1931 /* prep for recovery */ 1932 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1933 dev = &ap->device[i]; 1934 1935 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 1936 1937 /* collect port action mask recorded in dev actions */ 1938 ehc->i.action |= ehc->i.dev_action[i] & ~ATA_EH_PERDEV_MASK; 1939 ehc->i.dev_action[i] &= ATA_EH_PERDEV_MASK; 1940 1941 /* process hotplug request */ 1942 if (dev->flags & ATA_DFLAG_DETACH) 1943 ata_eh_detach_dev(dev); 1944 1945 if (!ata_dev_enabled(dev) && 1946 ((ehc->i.probe_mask & (1 << dev->devno)) && 1947 !(ehc->did_probe_mask & (1 << dev->devno)))) { 1948 ata_eh_detach_dev(dev); 1949 ata_dev_init(dev); 1950 ehc->did_probe_mask |= (1 << dev->devno); 1951 ehc->i.action |= ATA_EH_SOFTRESET; 1952 } 1953 } 1954 1955 retry: 1956 rc = 0; 1957 1958 /* if UNLOADING, finish immediately */ 1959 if (ap->pflags & ATA_PFLAG_UNLOADING) 1960 goto out; 1961 1962 /* skip EH if possible. */ 1963 if (ata_eh_skip_recovery(ap)) 1964 ehc->i.action = 0; 1965 1966 for (i = 0; i < ATA_MAX_DEVICES; i++) 1967 ehc->classes[i] = ATA_DEV_UNKNOWN; 1968 1969 /* reset */ 1970 if (ehc->i.action & ATA_EH_RESET_MASK) { 1971 ata_eh_freeze_port(ap); 1972 1973 rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset, 1974 softreset, hardreset, postreset); 1975 if (rc) { 1976 ata_port_printk(ap, KERN_ERR, 1977 "reset failed, giving up\n"); 1978 goto out; 1979 } 1980 1981 ata_eh_thaw_port(ap); 1982 } 1983 1984 /* revalidate existing devices and attach new ones */ 1985 rc = ata_eh_revalidate_and_attach(ap, &dev); 1986 if (rc) 1987 goto dev_fail; 1988 1989 /* configure transfer mode if necessary */ 1990 if (ehc->i.flags & ATA_EHI_SETMODE) { 1991 rc = ata_set_mode(ap, &dev); 1992 if (rc) 1993 goto dev_fail; 1994 ehc->i.flags &= ~ATA_EHI_SETMODE; 1995 } 1996 1997 goto out; 1998 1999 dev_fail: 2000 ehc->tries[dev->devno]--; 2001 2002 switch (rc) { 2003 case -ENODEV: 2004 /* device missing or wrong IDENTIFY data, schedule probing */ 2005 ehc->i.probe_mask |= (1 << dev->devno); 2006 case -EINVAL: 2007 /* give it just one more chance */ 2008 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2009 case -EIO: 2010 if (ehc->tries[dev->devno] == 1) { 2011 /* This is the last chance, better to slow 2012 * down than lose it. 2013 */ 2014 sata_down_spd_limit(ap); 2015 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2016 } 2017 } 2018 2019 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2020 /* disable device if it has used up all its chances */ 2021 ata_dev_disable(dev); 2022 2023 /* detach if offline */ 2024 if (ata_port_offline(ap)) 2025 ata_eh_detach_dev(dev); 2026 2027 /* probe if requested */ 2028 if ((ehc->i.probe_mask & (1 << dev->devno)) && 2029 !(ehc->did_probe_mask & (1 << dev->devno))) { 2030 ata_eh_detach_dev(dev); 2031 ata_dev_init(dev); 2032 2033 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2034 ehc->did_probe_mask |= (1 << dev->devno); 2035 ehc->i.action |= ATA_EH_SOFTRESET; 2036 } 2037 } else { 2038 /* soft didn't work? be haaaaard */ 2039 if (ehc->i.flags & ATA_EHI_DID_RESET) 2040 ehc->i.action |= ATA_EH_HARDRESET; 2041 else 2042 ehc->i.action |= ATA_EH_SOFTRESET; 2043 } 2044 2045 if (ata_port_nr_enabled(ap)) { 2046 ata_port_printk(ap, KERN_WARNING, "failed to recover some " 2047 "devices, retrying in 5 secs\n"); 2048 ssleep(5); 2049 } else { 2050 /* no device left, repeat fast */ 2051 msleep(500); 2052 } 2053 2054 goto retry; 2055 2056 out: 2057 if (rc) { 2058 for (i = 0; i < ATA_MAX_DEVICES; i++) 2059 ata_dev_disable(&ap->device[i]); 2060 } 2061 2062 DPRINTK("EXIT, rc=%d\n", rc); 2063 return rc; 2064 } 2065 2066 /** 2067 * ata_eh_finish - finish up EH 2068 * @ap: host port to finish EH for 2069 * 2070 * Recovery is complete. Clean up EH states and retry or finish 2071 * failed qcs. 2072 * 2073 * LOCKING: 2074 * None. 2075 */ 2076 static void ata_eh_finish(struct ata_port *ap) 2077 { 2078 int tag; 2079 2080 /* retry or finish qcs */ 2081 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2082 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2083 2084 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2085 continue; 2086 2087 if (qc->err_mask) { 2088 /* FIXME: Once EH migration is complete, 2089 * generate sense data in this function, 2090 * considering both err_mask and tf. 2091 */ 2092 if (qc->err_mask & AC_ERR_INVALID) 2093 ata_eh_qc_complete(qc); 2094 else 2095 ata_eh_qc_retry(qc); 2096 } else { 2097 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2098 ata_eh_qc_complete(qc); 2099 } else { 2100 /* feed zero TF to sense generation */ 2101 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2102 ata_eh_qc_retry(qc); 2103 } 2104 } 2105 } 2106 } 2107 2108 /** 2109 * ata_do_eh - do standard error handling 2110 * @ap: host port to handle error for 2111 * @prereset: prereset method (can be NULL) 2112 * @softreset: softreset method (can be NULL) 2113 * @hardreset: hardreset method (can be NULL) 2114 * @postreset: postreset method (can be NULL) 2115 * 2116 * Perform standard error handling sequence. 2117 * 2118 * LOCKING: 2119 * Kernel thread context (may sleep). 2120 */ 2121 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2122 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2123 ata_postreset_fn_t postreset) 2124 { 2125 ata_eh_autopsy(ap); 2126 ata_eh_report(ap); 2127 ata_eh_recover(ap, prereset, softreset, hardreset, postreset); 2128 ata_eh_finish(ap); 2129 } 2130 2131 #ifdef CONFIG_PM 2132 /** 2133 * ata_eh_handle_port_suspend - perform port suspend operation 2134 * @ap: port to suspend 2135 * 2136 * Suspend @ap. 2137 * 2138 * LOCKING: 2139 * Kernel thread context (may sleep). 2140 */ 2141 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2142 { 2143 unsigned long flags; 2144 int rc = 0; 2145 2146 /* are we suspending? */ 2147 spin_lock_irqsave(ap->lock, flags); 2148 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2149 ap->pm_mesg.event == PM_EVENT_ON) { 2150 spin_unlock_irqrestore(ap->lock, flags); 2151 return; 2152 } 2153 spin_unlock_irqrestore(ap->lock, flags); 2154 2155 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2156 2157 /* tell ACPI we're suspending */ 2158 rc = ata_acpi_on_suspend(ap); 2159 if (rc) 2160 goto out; 2161 2162 /* suspend */ 2163 ata_eh_freeze_port(ap); 2164 2165 if (ap->ops->port_suspend) 2166 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2167 2168 out: 2169 /* report result */ 2170 spin_lock_irqsave(ap->lock, flags); 2171 2172 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2173 if (rc == 0) 2174 ap->pflags |= ATA_PFLAG_SUSPENDED; 2175 else if (ap->pflags & ATA_PFLAG_FROZEN) 2176 ata_port_schedule_eh(ap); 2177 2178 if (ap->pm_result) { 2179 *ap->pm_result = rc; 2180 ap->pm_result = NULL; 2181 } 2182 2183 spin_unlock_irqrestore(ap->lock, flags); 2184 2185 return; 2186 } 2187 2188 /** 2189 * ata_eh_handle_port_resume - perform port resume operation 2190 * @ap: port to resume 2191 * 2192 * Resume @ap. 2193 * 2194 * LOCKING: 2195 * Kernel thread context (may sleep). 2196 */ 2197 static void ata_eh_handle_port_resume(struct ata_port *ap) 2198 { 2199 unsigned long flags; 2200 int rc = 0; 2201 2202 /* are we resuming? */ 2203 spin_lock_irqsave(ap->lock, flags); 2204 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2205 ap->pm_mesg.event != PM_EVENT_ON) { 2206 spin_unlock_irqrestore(ap->lock, flags); 2207 return; 2208 } 2209 spin_unlock_irqrestore(ap->lock, flags); 2210 2211 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 2212 2213 if (ap->ops->port_resume) 2214 rc = ap->ops->port_resume(ap); 2215 2216 /* tell ACPI that we're resuming */ 2217 ata_acpi_on_resume(ap); 2218 2219 /* report result */ 2220 spin_lock_irqsave(ap->lock, flags); 2221 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2222 if (ap->pm_result) { 2223 *ap->pm_result = rc; 2224 ap->pm_result = NULL; 2225 } 2226 spin_unlock_irqrestore(ap->lock, flags); 2227 } 2228 #endif /* CONFIG_PM */ 2229