1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <scsi/scsi.h> 37 #include <scsi/scsi_host.h> 38 #include <scsi/scsi_eh.h> 39 #include <scsi/scsi_device.h> 40 #include <scsi/scsi_cmnd.h> 41 #include "../scsi/scsi_transport_api.h" 42 43 #include <linux/libata.h> 44 45 #include "libata.h" 46 47 enum { 48 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 49 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 50 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 51 }; 52 53 static void __ata_port_freeze(struct ata_port *ap); 54 static void ata_eh_finish(struct ata_port *ap); 55 #ifdef CONFIG_PM 56 static void ata_eh_handle_port_suspend(struct ata_port *ap); 57 static void ata_eh_handle_port_resume(struct ata_port *ap); 58 static int ata_eh_suspend(struct ata_port *ap, 59 struct ata_device **r_failed_dev); 60 static void ata_eh_prep_resume(struct ata_port *ap); 61 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev); 62 #else /* CONFIG_PM */ 63 static void ata_eh_handle_port_suspend(struct ata_port *ap) 64 { } 65 66 static void ata_eh_handle_port_resume(struct ata_port *ap) 67 { } 68 69 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) 70 { 71 return 0; 72 } 73 74 static void ata_eh_prep_resume(struct ata_port *ap) 75 { } 76 77 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) 78 { 79 return 0; 80 } 81 #endif /* CONFIG_PM */ 82 83 static void ata_ering_record(struct ata_ering *ering, int is_io, 84 unsigned int err_mask) 85 { 86 struct ata_ering_entry *ent; 87 88 WARN_ON(!err_mask); 89 90 ering->cursor++; 91 ering->cursor %= ATA_ERING_SIZE; 92 93 ent = &ering->ring[ering->cursor]; 94 ent->is_io = is_io; 95 ent->err_mask = err_mask; 96 ent->timestamp = get_jiffies_64(); 97 } 98 99 static void ata_ering_clear(struct ata_ering *ering) 100 { 101 memset(ering, 0, sizeof(*ering)); 102 } 103 104 static int ata_ering_map(struct ata_ering *ering, 105 int (*map_fn)(struct ata_ering_entry *, void *), 106 void *arg) 107 { 108 int idx, rc = 0; 109 struct ata_ering_entry *ent; 110 111 idx = ering->cursor; 112 do { 113 ent = &ering->ring[idx]; 114 if (!ent->err_mask) 115 break; 116 rc = map_fn(ent, arg); 117 if (rc) 118 break; 119 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 120 } while (idx != ering->cursor); 121 122 return rc; 123 } 124 125 static unsigned int ata_eh_dev_action(struct ata_device *dev) 126 { 127 struct ata_eh_context *ehc = &dev->ap->eh_context; 128 129 return ehc->i.action | ehc->i.dev_action[dev->devno]; 130 } 131 132 static void ata_eh_clear_action(struct ata_device *dev, 133 struct ata_eh_info *ehi, unsigned int action) 134 { 135 int i; 136 137 if (!dev) { 138 ehi->action &= ~action; 139 for (i = 0; i < ATA_MAX_DEVICES; i++) 140 ehi->dev_action[i] &= ~action; 141 } else { 142 /* doesn't make sense for port-wide EH actions */ 143 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 144 145 /* break ehi->action into ehi->dev_action */ 146 if (ehi->action & action) { 147 for (i = 0; i < ATA_MAX_DEVICES; i++) 148 ehi->dev_action[i] |= ehi->action & action; 149 ehi->action &= ~action; 150 } 151 152 /* turn off the specified per-dev action */ 153 ehi->dev_action[dev->devno] &= ~action; 154 } 155 } 156 157 /** 158 * ata_scsi_timed_out - SCSI layer time out callback 159 * @cmd: timed out SCSI command 160 * 161 * Handles SCSI layer timeout. We race with normal completion of 162 * the qc for @cmd. If the qc is already gone, we lose and let 163 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 164 * timed out and EH should be invoked. Prevent ata_qc_complete() 165 * from finishing it by setting EH_SCHEDULED and return 166 * EH_NOT_HANDLED. 167 * 168 * TODO: kill this function once old EH is gone. 169 * 170 * LOCKING: 171 * Called from timer context 172 * 173 * RETURNS: 174 * EH_HANDLED or EH_NOT_HANDLED 175 */ 176 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 177 { 178 struct Scsi_Host *host = cmd->device->host; 179 struct ata_port *ap = ata_shost_to_port(host); 180 unsigned long flags; 181 struct ata_queued_cmd *qc; 182 enum scsi_eh_timer_return ret; 183 184 DPRINTK("ENTER\n"); 185 186 if (ap->ops->error_handler) { 187 ret = EH_NOT_HANDLED; 188 goto out; 189 } 190 191 ret = EH_HANDLED; 192 spin_lock_irqsave(ap->lock, flags); 193 qc = ata_qc_from_tag(ap, ap->active_tag); 194 if (qc) { 195 WARN_ON(qc->scsicmd != cmd); 196 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 197 qc->err_mask |= AC_ERR_TIMEOUT; 198 ret = EH_NOT_HANDLED; 199 } 200 spin_unlock_irqrestore(ap->lock, flags); 201 202 out: 203 DPRINTK("EXIT, ret=%d\n", ret); 204 return ret; 205 } 206 207 /** 208 * ata_scsi_error - SCSI layer error handler callback 209 * @host: SCSI host on which error occurred 210 * 211 * Handles SCSI-layer-thrown error events. 212 * 213 * LOCKING: 214 * Inherited from SCSI layer (none, can sleep) 215 * 216 * RETURNS: 217 * Zero. 218 */ 219 void ata_scsi_error(struct Scsi_Host *host) 220 { 221 struct ata_port *ap = ata_shost_to_port(host); 222 int i, repeat_cnt = ATA_EH_MAX_REPEAT; 223 unsigned long flags; 224 225 DPRINTK("ENTER\n"); 226 227 /* synchronize with port task */ 228 ata_port_flush_task(ap); 229 230 /* synchronize with host lock and sort out timeouts */ 231 232 /* For new EH, all qcs are finished in one of three ways - 233 * normal completion, error completion, and SCSI timeout. 234 * Both cmpletions can race against SCSI timeout. When normal 235 * completion wins, the qc never reaches EH. When error 236 * completion wins, the qc has ATA_QCFLAG_FAILED set. 237 * 238 * When SCSI timeout wins, things are a bit more complex. 239 * Normal or error completion can occur after the timeout but 240 * before this point. In such cases, both types of 241 * completions are honored. A scmd is determined to have 242 * timed out iff its associated qc is active and not failed. 243 */ 244 if (ap->ops->error_handler) { 245 struct scsi_cmnd *scmd, *tmp; 246 int nr_timedout = 0; 247 248 spin_lock_irqsave(ap->lock, flags); 249 250 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 251 struct ata_queued_cmd *qc; 252 253 for (i = 0; i < ATA_MAX_QUEUE; i++) { 254 qc = __ata_qc_from_tag(ap, i); 255 if (qc->flags & ATA_QCFLAG_ACTIVE && 256 qc->scsicmd == scmd) 257 break; 258 } 259 260 if (i < ATA_MAX_QUEUE) { 261 /* the scmd has an associated qc */ 262 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 263 /* which hasn't failed yet, timeout */ 264 qc->err_mask |= AC_ERR_TIMEOUT; 265 qc->flags |= ATA_QCFLAG_FAILED; 266 nr_timedout++; 267 } 268 } else { 269 /* Normal completion occurred after 270 * SCSI timeout but before this point. 271 * Successfully complete it. 272 */ 273 scmd->retries = scmd->allowed; 274 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 275 } 276 } 277 278 /* If we have timed out qcs. They belong to EH from 279 * this point but the state of the controller is 280 * unknown. Freeze the port to make sure the IRQ 281 * handler doesn't diddle with those qcs. This must 282 * be done atomically w.r.t. setting QCFLAG_FAILED. 283 */ 284 if (nr_timedout) 285 __ata_port_freeze(ap); 286 287 spin_unlock_irqrestore(ap->lock, flags); 288 } else 289 spin_unlock_wait(ap->lock); 290 291 repeat: 292 /* invoke error handler */ 293 if (ap->ops->error_handler) { 294 /* process port resume request */ 295 ata_eh_handle_port_resume(ap); 296 297 /* fetch & clear EH info */ 298 spin_lock_irqsave(ap->lock, flags); 299 300 memset(&ap->eh_context, 0, sizeof(ap->eh_context)); 301 ap->eh_context.i = ap->eh_info; 302 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 303 304 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 305 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 306 307 spin_unlock_irqrestore(ap->lock, flags); 308 309 /* invoke EH, skip if unloading or suspended */ 310 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 311 ap->ops->error_handler(ap); 312 else 313 ata_eh_finish(ap); 314 315 /* process port suspend request */ 316 ata_eh_handle_port_suspend(ap); 317 318 /* Exception might have happend after ->error_handler 319 * recovered the port but before this point. Repeat 320 * EH in such case. 321 */ 322 spin_lock_irqsave(ap->lock, flags); 323 324 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 325 if (--repeat_cnt) { 326 ata_port_printk(ap, KERN_INFO, 327 "EH pending after completion, " 328 "repeating EH (cnt=%d)\n", repeat_cnt); 329 spin_unlock_irqrestore(ap->lock, flags); 330 goto repeat; 331 } 332 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 333 "tries, giving up\n", ATA_EH_MAX_REPEAT); 334 } 335 336 /* this run is complete, make sure EH info is clear */ 337 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 338 339 /* Clear host_eh_scheduled while holding ap->lock such 340 * that if exception occurs after this point but 341 * before EH completion, SCSI midlayer will 342 * re-initiate EH. 343 */ 344 host->host_eh_scheduled = 0; 345 346 spin_unlock_irqrestore(ap->lock, flags); 347 } else { 348 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); 349 ap->ops->eng_timeout(ap); 350 } 351 352 /* finish or retry handled scmd's and clean up */ 353 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 354 355 scsi_eh_flush_done_q(&ap->eh_done_q); 356 357 /* clean up */ 358 spin_lock_irqsave(ap->lock, flags); 359 360 if (ap->pflags & ATA_PFLAG_LOADING) 361 ap->pflags &= ~ATA_PFLAG_LOADING; 362 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 363 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 364 365 if (ap->pflags & ATA_PFLAG_RECOVERED) 366 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 367 368 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 369 370 /* tell wait_eh that we're done */ 371 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 372 wake_up_all(&ap->eh_wait_q); 373 374 spin_unlock_irqrestore(ap->lock, flags); 375 376 DPRINTK("EXIT\n"); 377 } 378 379 /** 380 * ata_port_wait_eh - Wait for the currently pending EH to complete 381 * @ap: Port to wait EH for 382 * 383 * Wait until the currently pending EH is complete. 384 * 385 * LOCKING: 386 * Kernel thread context (may sleep). 387 */ 388 void ata_port_wait_eh(struct ata_port *ap) 389 { 390 unsigned long flags; 391 DEFINE_WAIT(wait); 392 393 retry: 394 spin_lock_irqsave(ap->lock, flags); 395 396 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 397 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 398 spin_unlock_irqrestore(ap->lock, flags); 399 schedule(); 400 spin_lock_irqsave(ap->lock, flags); 401 } 402 finish_wait(&ap->eh_wait_q, &wait); 403 404 spin_unlock_irqrestore(ap->lock, flags); 405 406 /* make sure SCSI EH is complete */ 407 if (scsi_host_in_recovery(ap->scsi_host)) { 408 msleep(10); 409 goto retry; 410 } 411 } 412 413 /** 414 * ata_qc_timeout - Handle timeout of queued command 415 * @qc: Command that timed out 416 * 417 * Some part of the kernel (currently, only the SCSI layer) 418 * has noticed that the active command on port @ap has not 419 * completed after a specified length of time. Handle this 420 * condition by disabling DMA (if necessary) and completing 421 * transactions, with error if necessary. 422 * 423 * This also handles the case of the "lost interrupt", where 424 * for some reason (possibly hardware bug, possibly driver bug) 425 * an interrupt was not delivered to the driver, even though the 426 * transaction completed successfully. 427 * 428 * TODO: kill this function once old EH is gone. 429 * 430 * LOCKING: 431 * Inherited from SCSI layer (none, can sleep) 432 */ 433 static void ata_qc_timeout(struct ata_queued_cmd *qc) 434 { 435 struct ata_port *ap = qc->ap; 436 u8 host_stat = 0, drv_stat; 437 unsigned long flags; 438 439 DPRINTK("ENTER\n"); 440 441 ap->hsm_task_state = HSM_ST_IDLE; 442 443 spin_lock_irqsave(ap->lock, flags); 444 445 switch (qc->tf.protocol) { 446 447 case ATA_PROT_DMA: 448 case ATA_PROT_ATAPI_DMA: 449 host_stat = ap->ops->bmdma_status(ap); 450 451 /* before we do anything else, clear DMA-Start bit */ 452 ap->ops->bmdma_stop(qc); 453 454 /* fall through */ 455 456 default: 457 ata_altstatus(ap); 458 drv_stat = ata_chk_status(ap); 459 460 /* ack bmdma irq events */ 461 ap->ops->irq_clear(ap); 462 463 ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, " 464 "stat 0x%x host_stat 0x%x\n", 465 qc->tf.command, drv_stat, host_stat); 466 467 /* complete taskfile transaction */ 468 qc->err_mask |= AC_ERR_TIMEOUT; 469 break; 470 } 471 472 spin_unlock_irqrestore(ap->lock, flags); 473 474 ata_eh_qc_complete(qc); 475 476 DPRINTK("EXIT\n"); 477 } 478 479 /** 480 * ata_eng_timeout - Handle timeout of queued command 481 * @ap: Port on which timed-out command is active 482 * 483 * Some part of the kernel (currently, only the SCSI layer) 484 * has noticed that the active command on port @ap has not 485 * completed after a specified length of time. Handle this 486 * condition by disabling DMA (if necessary) and completing 487 * transactions, with error if necessary. 488 * 489 * This also handles the case of the "lost interrupt", where 490 * for some reason (possibly hardware bug, possibly driver bug) 491 * an interrupt was not delivered to the driver, even though the 492 * transaction completed successfully. 493 * 494 * TODO: kill this function once old EH is gone. 495 * 496 * LOCKING: 497 * Inherited from SCSI layer (none, can sleep) 498 */ 499 void ata_eng_timeout(struct ata_port *ap) 500 { 501 DPRINTK("ENTER\n"); 502 503 ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag)); 504 505 DPRINTK("EXIT\n"); 506 } 507 508 /** 509 * ata_qc_schedule_eh - schedule qc for error handling 510 * @qc: command to schedule error handling for 511 * 512 * Schedule error handling for @qc. EH will kick in as soon as 513 * other commands are drained. 514 * 515 * LOCKING: 516 * spin_lock_irqsave(host lock) 517 */ 518 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 519 { 520 struct ata_port *ap = qc->ap; 521 522 WARN_ON(!ap->ops->error_handler); 523 524 qc->flags |= ATA_QCFLAG_FAILED; 525 qc->ap->pflags |= ATA_PFLAG_EH_PENDING; 526 527 /* The following will fail if timeout has already expired. 528 * ata_scsi_error() takes care of such scmds on EH entry. 529 * Note that ATA_QCFLAG_FAILED is unconditionally set after 530 * this function completes. 531 */ 532 scsi_req_abort_cmd(qc->scsicmd); 533 } 534 535 /** 536 * ata_port_schedule_eh - schedule error handling without a qc 537 * @ap: ATA port to schedule EH for 538 * 539 * Schedule error handling for @ap. EH will kick in as soon as 540 * all commands are drained. 541 * 542 * LOCKING: 543 * spin_lock_irqsave(host lock) 544 */ 545 void ata_port_schedule_eh(struct ata_port *ap) 546 { 547 WARN_ON(!ap->ops->error_handler); 548 549 ap->pflags |= ATA_PFLAG_EH_PENDING; 550 scsi_schedule_eh(ap->scsi_host); 551 552 DPRINTK("port EH scheduled\n"); 553 } 554 555 /** 556 * ata_port_abort - abort all qc's on the port 557 * @ap: ATA port to abort qc's for 558 * 559 * Abort all active qc's of @ap and schedule EH. 560 * 561 * LOCKING: 562 * spin_lock_irqsave(host lock) 563 * 564 * RETURNS: 565 * Number of aborted qc's. 566 */ 567 int ata_port_abort(struct ata_port *ap) 568 { 569 int tag, nr_aborted = 0; 570 571 WARN_ON(!ap->ops->error_handler); 572 573 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 574 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 575 576 if (qc) { 577 qc->flags |= ATA_QCFLAG_FAILED; 578 ata_qc_complete(qc); 579 nr_aborted++; 580 } 581 } 582 583 if (!nr_aborted) 584 ata_port_schedule_eh(ap); 585 586 return nr_aborted; 587 } 588 589 /** 590 * __ata_port_freeze - freeze port 591 * @ap: ATA port to freeze 592 * 593 * This function is called when HSM violation or some other 594 * condition disrupts normal operation of the port. Frozen port 595 * is not allowed to perform any operation until the port is 596 * thawed, which usually follows a successful reset. 597 * 598 * ap->ops->freeze() callback can be used for freezing the port 599 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 600 * port cannot be frozen hardware-wise, the interrupt handler 601 * must ack and clear interrupts unconditionally while the port 602 * is frozen. 603 * 604 * LOCKING: 605 * spin_lock_irqsave(host lock) 606 */ 607 static void __ata_port_freeze(struct ata_port *ap) 608 { 609 WARN_ON(!ap->ops->error_handler); 610 611 if (ap->ops->freeze) 612 ap->ops->freeze(ap); 613 614 ap->pflags |= ATA_PFLAG_FROZEN; 615 616 DPRINTK("ata%u port frozen\n", ap->print_id); 617 } 618 619 /** 620 * ata_port_freeze - abort & freeze port 621 * @ap: ATA port to freeze 622 * 623 * Abort and freeze @ap. 624 * 625 * LOCKING: 626 * spin_lock_irqsave(host lock) 627 * 628 * RETURNS: 629 * Number of aborted commands. 630 */ 631 int ata_port_freeze(struct ata_port *ap) 632 { 633 int nr_aborted; 634 635 WARN_ON(!ap->ops->error_handler); 636 637 nr_aborted = ata_port_abort(ap); 638 __ata_port_freeze(ap); 639 640 return nr_aborted; 641 } 642 643 /** 644 * ata_eh_freeze_port - EH helper to freeze port 645 * @ap: ATA port to freeze 646 * 647 * Freeze @ap. 648 * 649 * LOCKING: 650 * None. 651 */ 652 void ata_eh_freeze_port(struct ata_port *ap) 653 { 654 unsigned long flags; 655 656 if (!ap->ops->error_handler) 657 return; 658 659 spin_lock_irqsave(ap->lock, flags); 660 __ata_port_freeze(ap); 661 spin_unlock_irqrestore(ap->lock, flags); 662 } 663 664 /** 665 * ata_port_thaw_port - EH helper to thaw port 666 * @ap: ATA port to thaw 667 * 668 * Thaw frozen port @ap. 669 * 670 * LOCKING: 671 * None. 672 */ 673 void ata_eh_thaw_port(struct ata_port *ap) 674 { 675 unsigned long flags; 676 677 if (!ap->ops->error_handler) 678 return; 679 680 spin_lock_irqsave(ap->lock, flags); 681 682 ap->pflags &= ~ATA_PFLAG_FROZEN; 683 684 if (ap->ops->thaw) 685 ap->ops->thaw(ap); 686 687 spin_unlock_irqrestore(ap->lock, flags); 688 689 DPRINTK("ata%u port thawed\n", ap->print_id); 690 } 691 692 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 693 { 694 /* nada */ 695 } 696 697 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 698 { 699 struct ata_port *ap = qc->ap; 700 struct scsi_cmnd *scmd = qc->scsicmd; 701 unsigned long flags; 702 703 spin_lock_irqsave(ap->lock, flags); 704 qc->scsidone = ata_eh_scsidone; 705 __ata_qc_complete(qc); 706 WARN_ON(ata_tag_valid(qc->tag)); 707 spin_unlock_irqrestore(ap->lock, flags); 708 709 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 710 } 711 712 /** 713 * ata_eh_qc_complete - Complete an active ATA command from EH 714 * @qc: Command to complete 715 * 716 * Indicate to the mid and upper layers that an ATA command has 717 * completed. To be used from EH. 718 */ 719 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 720 { 721 struct scsi_cmnd *scmd = qc->scsicmd; 722 scmd->retries = scmd->allowed; 723 __ata_eh_qc_complete(qc); 724 } 725 726 /** 727 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 728 * @qc: Command to retry 729 * 730 * Indicate to the mid and upper layers that an ATA command 731 * should be retried. To be used from EH. 732 * 733 * SCSI midlayer limits the number of retries to scmd->allowed. 734 * scmd->retries is decremented for commands which get retried 735 * due to unrelated failures (qc->err_mask is zero). 736 */ 737 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 738 { 739 struct scsi_cmnd *scmd = qc->scsicmd; 740 if (!qc->err_mask && scmd->retries) 741 scmd->retries--; 742 __ata_eh_qc_complete(qc); 743 } 744 745 /** 746 * ata_eh_detach_dev - detach ATA device 747 * @dev: ATA device to detach 748 * 749 * Detach @dev. 750 * 751 * LOCKING: 752 * None. 753 */ 754 static void ata_eh_detach_dev(struct ata_device *dev) 755 { 756 struct ata_port *ap = dev->ap; 757 unsigned long flags; 758 759 ata_dev_disable(dev); 760 761 spin_lock_irqsave(ap->lock, flags); 762 763 dev->flags &= ~ATA_DFLAG_DETACH; 764 765 if (ata_scsi_offline_dev(dev)) { 766 dev->flags |= ATA_DFLAG_DETACHED; 767 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 768 } 769 770 /* clear per-dev EH actions */ 771 ata_eh_clear_action(dev, &ap->eh_info, ATA_EH_PERDEV_MASK); 772 ata_eh_clear_action(dev, &ap->eh_context.i, ATA_EH_PERDEV_MASK); 773 774 spin_unlock_irqrestore(ap->lock, flags); 775 } 776 777 /** 778 * ata_eh_about_to_do - about to perform eh_action 779 * @ap: target ATA port 780 * @dev: target ATA dev for per-dev action (can be NULL) 781 * @action: action about to be performed 782 * 783 * Called just before performing EH actions to clear related bits 784 * in @ap->eh_info such that eh actions are not unnecessarily 785 * repeated. 786 * 787 * LOCKING: 788 * None. 789 */ 790 static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, 791 unsigned int action) 792 { 793 unsigned long flags; 794 struct ata_eh_info *ehi = &ap->eh_info; 795 struct ata_eh_context *ehc = &ap->eh_context; 796 797 spin_lock_irqsave(ap->lock, flags); 798 799 /* Reset is represented by combination of actions and EHI 800 * flags. Suck in all related bits before clearing eh_info to 801 * avoid losing requested action. 802 */ 803 if (action & ATA_EH_RESET_MASK) { 804 ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; 805 ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; 806 807 /* make sure all reset actions are cleared & clear EHI flags */ 808 action |= ATA_EH_RESET_MASK; 809 ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 810 } 811 812 ata_eh_clear_action(dev, ehi, action); 813 814 if (!(ehc->i.flags & ATA_EHI_QUIET)) 815 ap->pflags |= ATA_PFLAG_RECOVERED; 816 817 spin_unlock_irqrestore(ap->lock, flags); 818 } 819 820 /** 821 * ata_eh_done - EH action complete 822 * @ap: target ATA port 823 * @dev: target ATA dev for per-dev action (can be NULL) 824 * @action: action just completed 825 * 826 * Called right after performing EH actions to clear related bits 827 * in @ap->eh_context. 828 * 829 * LOCKING: 830 * None. 831 */ 832 static void ata_eh_done(struct ata_port *ap, struct ata_device *dev, 833 unsigned int action) 834 { 835 /* if reset is complete, clear all reset actions & reset modifier */ 836 if (action & ATA_EH_RESET_MASK) { 837 action |= ATA_EH_RESET_MASK; 838 ap->eh_context.i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 839 } 840 841 ata_eh_clear_action(dev, &ap->eh_context.i, action); 842 } 843 844 /** 845 * ata_err_string - convert err_mask to descriptive string 846 * @err_mask: error mask to convert to string 847 * 848 * Convert @err_mask to descriptive string. Errors are 849 * prioritized according to severity and only the most severe 850 * error is reported. 851 * 852 * LOCKING: 853 * None. 854 * 855 * RETURNS: 856 * Descriptive string for @err_mask 857 */ 858 static const char * ata_err_string(unsigned int err_mask) 859 { 860 if (err_mask & AC_ERR_HOST_BUS) 861 return "host bus error"; 862 if (err_mask & AC_ERR_ATA_BUS) 863 return "ATA bus error"; 864 if (err_mask & AC_ERR_TIMEOUT) 865 return "timeout"; 866 if (err_mask & AC_ERR_HSM) 867 return "HSM violation"; 868 if (err_mask & AC_ERR_SYSTEM) 869 return "internal error"; 870 if (err_mask & AC_ERR_MEDIA) 871 return "media error"; 872 if (err_mask & AC_ERR_INVALID) 873 return "invalid argument"; 874 if (err_mask & AC_ERR_DEV) 875 return "device error"; 876 return "unknown error"; 877 } 878 879 /** 880 * ata_read_log_page - read a specific log page 881 * @dev: target device 882 * @page: page to read 883 * @buf: buffer to store read page 884 * @sectors: number of sectors to read 885 * 886 * Read log page using READ_LOG_EXT command. 887 * 888 * LOCKING: 889 * Kernel thread context (may sleep). 890 * 891 * RETURNS: 892 * 0 on success, AC_ERR_* mask otherwise. 893 */ 894 static unsigned int ata_read_log_page(struct ata_device *dev, 895 u8 page, void *buf, unsigned int sectors) 896 { 897 struct ata_taskfile tf; 898 unsigned int err_mask; 899 900 DPRINTK("read log page - page %d\n", page); 901 902 ata_tf_init(dev, &tf); 903 tf.command = ATA_CMD_READ_LOG_EXT; 904 tf.lbal = page; 905 tf.nsect = sectors; 906 tf.hob_nsect = sectors >> 8; 907 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 908 tf.protocol = ATA_PROT_PIO; 909 910 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 911 buf, sectors * ATA_SECT_SIZE); 912 913 DPRINTK("EXIT, err_mask=%x\n", err_mask); 914 return err_mask; 915 } 916 917 /** 918 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 919 * @dev: Device to read log page 10h from 920 * @tag: Resulting tag of the failed command 921 * @tf: Resulting taskfile registers of the failed command 922 * 923 * Read log page 10h to obtain NCQ error details and clear error 924 * condition. 925 * 926 * LOCKING: 927 * Kernel thread context (may sleep). 928 * 929 * RETURNS: 930 * 0 on success, -errno otherwise. 931 */ 932 static int ata_eh_read_log_10h(struct ata_device *dev, 933 int *tag, struct ata_taskfile *tf) 934 { 935 u8 *buf = dev->ap->sector_buf; 936 unsigned int err_mask; 937 u8 csum; 938 int i; 939 940 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 941 if (err_mask) 942 return -EIO; 943 944 csum = 0; 945 for (i = 0; i < ATA_SECT_SIZE; i++) 946 csum += buf[i]; 947 if (csum) 948 ata_dev_printk(dev, KERN_WARNING, 949 "invalid checksum 0x%x on log page 10h\n", csum); 950 951 if (buf[0] & 0x80) 952 return -ENOENT; 953 954 *tag = buf[0] & 0x1f; 955 956 tf->command = buf[2]; 957 tf->feature = buf[3]; 958 tf->lbal = buf[4]; 959 tf->lbam = buf[5]; 960 tf->lbah = buf[6]; 961 tf->device = buf[7]; 962 tf->hob_lbal = buf[8]; 963 tf->hob_lbam = buf[9]; 964 tf->hob_lbah = buf[10]; 965 tf->nsect = buf[12]; 966 tf->hob_nsect = buf[13]; 967 968 return 0; 969 } 970 971 /** 972 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 973 * @dev: device to perform REQUEST_SENSE to 974 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 975 * 976 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 977 * SENSE. This function is EH helper. 978 * 979 * LOCKING: 980 * Kernel thread context (may sleep). 981 * 982 * RETURNS: 983 * 0 on success, AC_ERR_* mask on failure 984 */ 985 static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) 986 { 987 struct ata_device *dev = qc->dev; 988 unsigned char *sense_buf = qc->scsicmd->sense_buffer; 989 struct ata_port *ap = dev->ap; 990 struct ata_taskfile tf; 991 u8 cdb[ATAPI_CDB_LEN]; 992 993 DPRINTK("ATAPI request sense\n"); 994 995 /* FIXME: is this needed? */ 996 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 997 998 /* initialize sense_buf with the error register, 999 * for the case where they are -not- overwritten 1000 */ 1001 sense_buf[0] = 0x70; 1002 sense_buf[2] = qc->result_tf.feature >> 4; 1003 1004 /* some devices time out if garbage left in tf */ 1005 ata_tf_init(dev, &tf); 1006 1007 memset(cdb, 0, ATAPI_CDB_LEN); 1008 cdb[0] = REQUEST_SENSE; 1009 cdb[4] = SCSI_SENSE_BUFFERSIZE; 1010 1011 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1012 tf.command = ATA_CMD_PACKET; 1013 1014 /* is it pointless to prefer PIO for "safety reasons"? */ 1015 if (ap->flags & ATA_FLAG_PIO_DMA) { 1016 tf.protocol = ATA_PROT_ATAPI_DMA; 1017 tf.feature |= ATAPI_PKT_DMA; 1018 } else { 1019 tf.protocol = ATA_PROT_ATAPI; 1020 tf.lbam = (8 * 1024) & 0xff; 1021 tf.lbah = (8 * 1024) >> 8; 1022 } 1023 1024 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1025 sense_buf, SCSI_SENSE_BUFFERSIZE); 1026 } 1027 1028 /** 1029 * ata_eh_analyze_serror - analyze SError for a failed port 1030 * @ap: ATA port to analyze SError for 1031 * 1032 * Analyze SError if available and further determine cause of 1033 * failure. 1034 * 1035 * LOCKING: 1036 * None. 1037 */ 1038 static void ata_eh_analyze_serror(struct ata_port *ap) 1039 { 1040 struct ata_eh_context *ehc = &ap->eh_context; 1041 u32 serror = ehc->i.serror; 1042 unsigned int err_mask = 0, action = 0; 1043 1044 if (serror & SERR_PERSISTENT) { 1045 err_mask |= AC_ERR_ATA_BUS; 1046 action |= ATA_EH_HARDRESET; 1047 } 1048 if (serror & 1049 (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { 1050 err_mask |= AC_ERR_ATA_BUS; 1051 action |= ATA_EH_SOFTRESET; 1052 } 1053 if (serror & SERR_PROTOCOL) { 1054 err_mask |= AC_ERR_HSM; 1055 action |= ATA_EH_SOFTRESET; 1056 } 1057 if (serror & SERR_INTERNAL) { 1058 err_mask |= AC_ERR_SYSTEM; 1059 action |= ATA_EH_HARDRESET; 1060 } 1061 if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) 1062 ata_ehi_hotplugged(&ehc->i); 1063 1064 ehc->i.err_mask |= err_mask; 1065 ehc->i.action |= action; 1066 } 1067 1068 /** 1069 * ata_eh_analyze_ncq_error - analyze NCQ error 1070 * @ap: ATA port to analyze NCQ error for 1071 * 1072 * Read log page 10h, determine the offending qc and acquire 1073 * error status TF. For NCQ device errors, all LLDDs have to do 1074 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1075 * care of the rest. 1076 * 1077 * LOCKING: 1078 * Kernel thread context (may sleep). 1079 */ 1080 static void ata_eh_analyze_ncq_error(struct ata_port *ap) 1081 { 1082 struct ata_eh_context *ehc = &ap->eh_context; 1083 struct ata_device *dev = ap->device; 1084 struct ata_queued_cmd *qc; 1085 struct ata_taskfile tf; 1086 int tag, rc; 1087 1088 /* if frozen, we can't do much */ 1089 if (ap->pflags & ATA_PFLAG_FROZEN) 1090 return; 1091 1092 /* is it NCQ device error? */ 1093 if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1094 return; 1095 1096 /* has LLDD analyzed already? */ 1097 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1098 qc = __ata_qc_from_tag(ap, tag); 1099 1100 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1101 continue; 1102 1103 if (qc->err_mask) 1104 return; 1105 } 1106 1107 /* okay, this error is ours */ 1108 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1109 if (rc) { 1110 ata_port_printk(ap, KERN_ERR, "failed to read log page 10h " 1111 "(errno=%d)\n", rc); 1112 return; 1113 } 1114 1115 if (!(ap->sactive & (1 << tag))) { 1116 ata_port_printk(ap, KERN_ERR, "log page 10h reported " 1117 "inactive tag %d\n", tag); 1118 return; 1119 } 1120 1121 /* we've got the perpetrator, condemn it */ 1122 qc = __ata_qc_from_tag(ap, tag); 1123 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1124 qc->err_mask |= AC_ERR_DEV; 1125 ehc->i.err_mask &= ~AC_ERR_DEV; 1126 } 1127 1128 /** 1129 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1130 * @qc: qc to analyze 1131 * @tf: Taskfile registers to analyze 1132 * 1133 * Analyze taskfile of @qc and further determine cause of 1134 * failure. This function also requests ATAPI sense data if 1135 * avaliable. 1136 * 1137 * LOCKING: 1138 * Kernel thread context (may sleep). 1139 * 1140 * RETURNS: 1141 * Determined recovery action 1142 */ 1143 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1144 const struct ata_taskfile *tf) 1145 { 1146 unsigned int tmp, action = 0; 1147 u8 stat = tf->command, err = tf->feature; 1148 1149 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1150 qc->err_mask |= AC_ERR_HSM; 1151 return ATA_EH_SOFTRESET; 1152 } 1153 1154 if (stat & (ATA_ERR | ATA_DF)) 1155 qc->err_mask |= AC_ERR_DEV; 1156 else 1157 return 0; 1158 1159 switch (qc->dev->class) { 1160 case ATA_DEV_ATA: 1161 if (err & ATA_ICRC) 1162 qc->err_mask |= AC_ERR_ATA_BUS; 1163 if (err & ATA_UNC) 1164 qc->err_mask |= AC_ERR_MEDIA; 1165 if (err & ATA_IDNF) 1166 qc->err_mask |= AC_ERR_INVALID; 1167 break; 1168 1169 case ATA_DEV_ATAPI: 1170 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1171 tmp = atapi_eh_request_sense(qc); 1172 if (!tmp) { 1173 /* ATA_QCFLAG_SENSE_VALID is used to 1174 * tell atapi_qc_complete() that sense 1175 * data is already valid. 1176 * 1177 * TODO: interpret sense data and set 1178 * appropriate err_mask. 1179 */ 1180 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1181 } else 1182 qc->err_mask |= tmp; 1183 } 1184 } 1185 1186 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1187 action |= ATA_EH_SOFTRESET; 1188 1189 return action; 1190 } 1191 1192 static int ata_eh_categorize_error(int is_io, unsigned int err_mask) 1193 { 1194 if (err_mask & AC_ERR_ATA_BUS) 1195 return 1; 1196 1197 if (err_mask & AC_ERR_TIMEOUT) 1198 return 2; 1199 1200 if (is_io) { 1201 if (err_mask & AC_ERR_HSM) 1202 return 2; 1203 if ((err_mask & 1204 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1205 return 3; 1206 } 1207 1208 return 0; 1209 } 1210 1211 struct speed_down_verdict_arg { 1212 u64 since; 1213 int nr_errors[4]; 1214 }; 1215 1216 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1217 { 1218 struct speed_down_verdict_arg *arg = void_arg; 1219 int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); 1220 1221 if (ent->timestamp < arg->since) 1222 return -1; 1223 1224 arg->nr_errors[cat]++; 1225 return 0; 1226 } 1227 1228 /** 1229 * ata_eh_speed_down_verdict - Determine speed down verdict 1230 * @dev: Device of interest 1231 * 1232 * This function examines error ring of @dev and determines 1233 * whether NCQ needs to be turned off, transfer speed should be 1234 * stepped down, or falling back to PIO is necessary. 1235 * 1236 * Cat-1 is ATA_BUS error for any command. 1237 * 1238 * Cat-2 is TIMEOUT for any command or HSM violation for known 1239 * supported commands. 1240 * 1241 * Cat-3 is is unclassified DEV error for known supported 1242 * command. 1243 * 1244 * NCQ needs to be turned off if there have been more than 3 1245 * Cat-2 + Cat-3 errors during last 10 minutes. 1246 * 1247 * Speed down is necessary if there have been more than 3 Cat-1 + 1248 * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. 1249 * 1250 * Falling back to PIO mode is necessary if there have been more 1251 * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. 1252 * 1253 * LOCKING: 1254 * Inherited from caller. 1255 * 1256 * RETURNS: 1257 * OR of ATA_EH_SPDN_* flags. 1258 */ 1259 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1260 { 1261 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1262 u64 j64 = get_jiffies_64(); 1263 struct speed_down_verdict_arg arg; 1264 unsigned int verdict = 0; 1265 1266 /* scan past 10 mins of error history */ 1267 memset(&arg, 0, sizeof(arg)); 1268 arg.since = j64 - min(j64, j10mins); 1269 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1270 1271 if (arg.nr_errors[2] + arg.nr_errors[3] > 3) 1272 verdict |= ATA_EH_SPDN_NCQ_OFF; 1273 if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) 1274 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1275 1276 /* scan past 3 mins of error history */ 1277 memset(&arg, 0, sizeof(arg)); 1278 arg.since = j64 - min(j64, j5mins); 1279 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1280 1281 if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) 1282 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1283 1284 return verdict; 1285 } 1286 1287 /** 1288 * ata_eh_speed_down - record error and speed down if necessary 1289 * @dev: Failed device 1290 * @is_io: Did the device fail during normal IO? 1291 * @err_mask: err_mask of the error 1292 * 1293 * Record error and examine error history to determine whether 1294 * adjusting transmission speed is necessary. It also sets 1295 * transmission limits appropriately if such adjustment is 1296 * necessary. 1297 * 1298 * LOCKING: 1299 * Kernel thread context (may sleep). 1300 * 1301 * RETURNS: 1302 * Determined recovery action. 1303 */ 1304 static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, 1305 unsigned int err_mask) 1306 { 1307 unsigned int verdict; 1308 unsigned int action = 0; 1309 1310 /* don't bother if Cat-0 error */ 1311 if (ata_eh_categorize_error(is_io, err_mask) == 0) 1312 return 0; 1313 1314 /* record error and determine whether speed down is necessary */ 1315 ata_ering_record(&dev->ering, is_io, err_mask); 1316 verdict = ata_eh_speed_down_verdict(dev); 1317 1318 /* turn off NCQ? */ 1319 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1320 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1321 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1322 dev->flags |= ATA_DFLAG_NCQ_OFF; 1323 ata_dev_printk(dev, KERN_WARNING, 1324 "NCQ disabled due to excessive errors\n"); 1325 goto done; 1326 } 1327 1328 /* speed down? */ 1329 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1330 /* speed down SATA link speed if possible */ 1331 if (sata_down_spd_limit(dev->ap) == 0) { 1332 action |= ATA_EH_HARDRESET; 1333 goto done; 1334 } 1335 1336 /* lower transfer mode */ 1337 if (dev->spdn_cnt < 2) { 1338 static const int dma_dnxfer_sel[] = 1339 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1340 static const int pio_dnxfer_sel[] = 1341 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1342 int sel; 1343 1344 if (dev->xfer_shift != ATA_SHIFT_PIO) 1345 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1346 else 1347 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1348 1349 dev->spdn_cnt++; 1350 1351 if (ata_down_xfermask_limit(dev, sel) == 0) { 1352 action |= ATA_EH_SOFTRESET; 1353 goto done; 1354 } 1355 } 1356 } 1357 1358 /* Fall back to PIO? Slowing down to PIO is meaningless for 1359 * SATA. Consider it only for PATA. 1360 */ 1361 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1362 (dev->ap->cbl != ATA_CBL_SATA) && 1363 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1364 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1365 dev->spdn_cnt = 0; 1366 action |= ATA_EH_SOFTRESET; 1367 goto done; 1368 } 1369 } 1370 1371 return 0; 1372 done: 1373 /* device has been slowed down, blow error history */ 1374 ata_ering_clear(&dev->ering); 1375 return action; 1376 } 1377 1378 /** 1379 * ata_eh_autopsy - analyze error and determine recovery action 1380 * @ap: ATA port to perform autopsy on 1381 * 1382 * Analyze why @ap failed and determine which recovery action is 1383 * needed. This function also sets more detailed AC_ERR_* values 1384 * and fills sense data for ATAPI CHECK SENSE. 1385 * 1386 * LOCKING: 1387 * Kernel thread context (may sleep). 1388 */ 1389 static void ata_eh_autopsy(struct ata_port *ap) 1390 { 1391 struct ata_eh_context *ehc = &ap->eh_context; 1392 unsigned int all_err_mask = 0; 1393 int tag, is_io = 0; 1394 u32 serror; 1395 int rc; 1396 1397 DPRINTK("ENTER\n"); 1398 1399 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1400 return; 1401 1402 /* obtain and analyze SError */ 1403 rc = sata_scr_read(ap, SCR_ERROR, &serror); 1404 if (rc == 0) { 1405 ehc->i.serror |= serror; 1406 ata_eh_analyze_serror(ap); 1407 } else if (rc != -EOPNOTSUPP) 1408 ehc->i.action |= ATA_EH_HARDRESET; 1409 1410 /* analyze NCQ failure */ 1411 ata_eh_analyze_ncq_error(ap); 1412 1413 /* any real error trumps AC_ERR_OTHER */ 1414 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1415 ehc->i.err_mask &= ~AC_ERR_OTHER; 1416 1417 all_err_mask |= ehc->i.err_mask; 1418 1419 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1420 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1421 1422 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1423 continue; 1424 1425 /* inherit upper level err_mask */ 1426 qc->err_mask |= ehc->i.err_mask; 1427 1428 /* analyze TF */ 1429 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1430 1431 /* DEV errors are probably spurious in case of ATA_BUS error */ 1432 if (qc->err_mask & AC_ERR_ATA_BUS) 1433 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1434 AC_ERR_INVALID); 1435 1436 /* any real error trumps unknown error */ 1437 if (qc->err_mask & ~AC_ERR_OTHER) 1438 qc->err_mask &= ~AC_ERR_OTHER; 1439 1440 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1441 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1442 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1443 ehc->i.action &= ~ATA_EH_REVALIDATE; 1444 } 1445 1446 /* accumulate error info */ 1447 ehc->i.dev = qc->dev; 1448 all_err_mask |= qc->err_mask; 1449 if (qc->flags & ATA_QCFLAG_IO) 1450 is_io = 1; 1451 } 1452 1453 /* enforce default EH actions */ 1454 if (ap->pflags & ATA_PFLAG_FROZEN || 1455 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1456 ehc->i.action |= ATA_EH_SOFTRESET; 1457 else if (all_err_mask) 1458 ehc->i.action |= ATA_EH_REVALIDATE; 1459 1460 /* if we have offending qcs and the associated failed device */ 1461 if (ehc->i.dev) { 1462 /* speed down */ 1463 ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io, 1464 all_err_mask); 1465 1466 /* perform per-dev EH action only on the offending device */ 1467 ehc->i.dev_action[ehc->i.dev->devno] |= 1468 ehc->i.action & ATA_EH_PERDEV_MASK; 1469 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1470 } 1471 1472 DPRINTK("EXIT\n"); 1473 } 1474 1475 /** 1476 * ata_eh_report - report error handling to user 1477 * @ap: ATA port EH is going on 1478 * 1479 * Report EH to user. 1480 * 1481 * LOCKING: 1482 * None. 1483 */ 1484 static void ata_eh_report(struct ata_port *ap) 1485 { 1486 struct ata_eh_context *ehc = &ap->eh_context; 1487 const char *frozen, *desc; 1488 int tag, nr_failed = 0; 1489 1490 desc = NULL; 1491 if (ehc->i.desc[0] != '\0') 1492 desc = ehc->i.desc; 1493 1494 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1495 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1496 1497 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1498 continue; 1499 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1500 continue; 1501 1502 nr_failed++; 1503 } 1504 1505 if (!nr_failed && !ehc->i.err_mask) 1506 return; 1507 1508 frozen = ""; 1509 if (ap->pflags & ATA_PFLAG_FROZEN) 1510 frozen = " frozen"; 1511 1512 if (ehc->i.dev) { 1513 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1514 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1515 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1516 ehc->i.action, frozen); 1517 if (desc) 1518 ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc); 1519 } else { 1520 ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x " 1521 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1522 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1523 ehc->i.action, frozen); 1524 if (desc) 1525 ata_port_printk(ap, KERN_ERR, "(%s)\n", desc); 1526 } 1527 1528 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1529 static const char *dma_str[] = { 1530 [DMA_BIDIRECTIONAL] = "bidi", 1531 [DMA_TO_DEVICE] = "out", 1532 [DMA_FROM_DEVICE] = "in", 1533 [DMA_NONE] = "", 1534 }; 1535 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1536 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1537 1538 if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask) 1539 continue; 1540 1541 ata_dev_printk(qc->dev, KERN_ERR, 1542 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1543 "tag %d cdb 0x%x data %u %s\n " 1544 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1545 "Emask 0x%x (%s)\n", 1546 cmd->command, cmd->feature, cmd->nsect, 1547 cmd->lbal, cmd->lbam, cmd->lbah, 1548 cmd->hob_feature, cmd->hob_nsect, 1549 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 1550 cmd->device, qc->tag, qc->cdb[0], qc->nbytes, 1551 dma_str[qc->dma_dir], 1552 res->command, res->feature, res->nsect, 1553 res->lbal, res->lbam, res->lbah, 1554 res->hob_feature, res->hob_nsect, 1555 res->hob_lbal, res->hob_lbam, res->hob_lbah, 1556 res->device, qc->err_mask, ata_err_string(qc->err_mask)); 1557 } 1558 } 1559 1560 static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset, 1561 unsigned int *classes) 1562 { 1563 int i, rc; 1564 1565 for (i = 0; i < ATA_MAX_DEVICES; i++) 1566 classes[i] = ATA_DEV_UNKNOWN; 1567 1568 rc = reset(ap, classes); 1569 if (rc) 1570 return rc; 1571 1572 /* If any class isn't ATA_DEV_UNKNOWN, consider classification 1573 * is complete and convert all ATA_DEV_UNKNOWN to 1574 * ATA_DEV_NONE. 1575 */ 1576 for (i = 0; i < ATA_MAX_DEVICES; i++) 1577 if (classes[i] != ATA_DEV_UNKNOWN) 1578 break; 1579 1580 if (i < ATA_MAX_DEVICES) 1581 for (i = 0; i < ATA_MAX_DEVICES; i++) 1582 if (classes[i] == ATA_DEV_UNKNOWN) 1583 classes[i] = ATA_DEV_NONE; 1584 1585 return 0; 1586 } 1587 1588 static int ata_eh_followup_srst_needed(int rc, int classify, 1589 const unsigned int *classes) 1590 { 1591 if (rc == -EAGAIN) 1592 return 1; 1593 if (rc != 0) 1594 return 0; 1595 if (classify && classes[0] == ATA_DEV_UNKNOWN) 1596 return 1; 1597 return 0; 1598 } 1599 1600 static int ata_eh_reset(struct ata_port *ap, int classify, 1601 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 1602 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 1603 { 1604 struct ata_eh_context *ehc = &ap->eh_context; 1605 unsigned int *classes = ehc->classes; 1606 int tries = ATA_EH_RESET_TRIES; 1607 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 1608 unsigned int action; 1609 ata_reset_fn_t reset; 1610 int i, did_followup_srst, rc; 1611 1612 /* about to reset */ 1613 ata_eh_about_to_do(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1614 1615 /* Determine which reset to use and record in ehc->i.action. 1616 * prereset() may examine and modify it. 1617 */ 1618 action = ehc->i.action; 1619 ehc->i.action &= ~ATA_EH_RESET_MASK; 1620 if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && 1621 !(action & ATA_EH_HARDRESET)))) 1622 ehc->i.action |= ATA_EH_SOFTRESET; 1623 else 1624 ehc->i.action |= ATA_EH_HARDRESET; 1625 1626 if (prereset) { 1627 rc = prereset(ap); 1628 if (rc) { 1629 if (rc == -ENOENT) { 1630 ata_port_printk(ap, KERN_DEBUG, 1631 "port disabled. ignoring.\n"); 1632 ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; 1633 1634 for (i = 0; i < ATA_MAX_DEVICES; i++) 1635 classes[i] = ATA_DEV_NONE; 1636 1637 rc = 0; 1638 } else 1639 ata_port_printk(ap, KERN_ERR, 1640 "prereset failed (errno=%d)\n", rc); 1641 return rc; 1642 } 1643 } 1644 1645 /* prereset() might have modified ehc->i.action */ 1646 if (ehc->i.action & ATA_EH_HARDRESET) 1647 reset = hardreset; 1648 else if (ehc->i.action & ATA_EH_SOFTRESET) 1649 reset = softreset; 1650 else { 1651 /* prereset told us not to reset, bang classes and return */ 1652 for (i = 0; i < ATA_MAX_DEVICES; i++) 1653 classes[i] = ATA_DEV_NONE; 1654 return 0; 1655 } 1656 1657 /* did prereset() screw up? if so, fix up to avoid oopsing */ 1658 if (!reset) { 1659 ata_port_printk(ap, KERN_ERR, "BUG: prereset() requested " 1660 "invalid reset type\n"); 1661 if (softreset) 1662 reset = softreset; 1663 else 1664 reset = hardreset; 1665 } 1666 1667 retry: 1668 /* shut up during boot probing */ 1669 if (verbose) 1670 ata_port_printk(ap, KERN_INFO, "%s resetting port\n", 1671 reset == softreset ? "soft" : "hard"); 1672 1673 /* mark that this EH session started with reset */ 1674 ehc->i.flags |= ATA_EHI_DID_RESET; 1675 1676 rc = ata_do_reset(ap, reset, classes); 1677 1678 did_followup_srst = 0; 1679 if (reset == hardreset && 1680 ata_eh_followup_srst_needed(rc, classify, classes)) { 1681 /* okay, let's do follow-up softreset */ 1682 did_followup_srst = 1; 1683 reset = softreset; 1684 1685 if (!reset) { 1686 ata_port_printk(ap, KERN_ERR, 1687 "follow-up softreset required " 1688 "but no softreset avaliable\n"); 1689 return -EINVAL; 1690 } 1691 1692 ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); 1693 rc = ata_do_reset(ap, reset, classes); 1694 1695 if (rc == 0 && classify && 1696 classes[0] == ATA_DEV_UNKNOWN) { 1697 ata_port_printk(ap, KERN_ERR, 1698 "classification failed\n"); 1699 return -EINVAL; 1700 } 1701 } 1702 1703 if (rc && --tries) { 1704 const char *type; 1705 1706 if (reset == softreset) { 1707 if (did_followup_srst) 1708 type = "follow-up soft"; 1709 else 1710 type = "soft"; 1711 } else 1712 type = "hard"; 1713 1714 ata_port_printk(ap, KERN_WARNING, 1715 "%sreset failed, retrying in 5 secs\n", type); 1716 ssleep(5); 1717 1718 if (reset == hardreset) 1719 sata_down_spd_limit(ap); 1720 if (hardreset) 1721 reset = hardreset; 1722 goto retry; 1723 } 1724 1725 if (rc == 0) { 1726 /* After the reset, the device state is PIO 0 and the 1727 * controller state is undefined. Record the mode. 1728 */ 1729 for (i = 0; i < ATA_MAX_DEVICES; i++) 1730 ap->device[i].pio_mode = XFER_PIO_0; 1731 1732 if (postreset) 1733 postreset(ap, classes); 1734 1735 /* reset successful, schedule revalidation */ 1736 ata_eh_done(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1737 ehc->i.action |= ATA_EH_REVALIDATE; 1738 } 1739 1740 return rc; 1741 } 1742 1743 static int ata_eh_revalidate_and_attach(struct ata_port *ap, 1744 struct ata_device **r_failed_dev) 1745 { 1746 struct ata_eh_context *ehc = &ap->eh_context; 1747 struct ata_device *dev; 1748 unsigned int new_mask = 0; 1749 unsigned long flags; 1750 int i, rc = 0; 1751 1752 DPRINTK("ENTER\n"); 1753 1754 /* For PATA drive side cable detection to work, IDENTIFY must 1755 * be done backwards such that PDIAG- is released by the slave 1756 * device before the master device is identified. 1757 */ 1758 for (i = ATA_MAX_DEVICES - 1; i >= 0; i--) { 1759 unsigned int action, readid_flags = 0; 1760 1761 dev = &ap->device[i]; 1762 action = ata_eh_dev_action(dev); 1763 1764 if (ehc->i.flags & ATA_EHI_DID_RESET) 1765 readid_flags |= ATA_READID_POSTRESET; 1766 1767 if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) { 1768 if (ata_port_offline(ap)) { 1769 rc = -EIO; 1770 goto err; 1771 } 1772 1773 ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE); 1774 rc = ata_dev_revalidate(dev, readid_flags); 1775 if (rc) 1776 goto err; 1777 1778 ata_eh_done(ap, dev, ATA_EH_REVALIDATE); 1779 1780 /* Configuration may have changed, reconfigure 1781 * transfer mode. 1782 */ 1783 ehc->i.flags |= ATA_EHI_SETMODE; 1784 1785 /* schedule the scsi_rescan_device() here */ 1786 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 1787 } else if (dev->class == ATA_DEV_UNKNOWN && 1788 ehc->tries[dev->devno] && 1789 ata_class_enabled(ehc->classes[dev->devno])) { 1790 dev->class = ehc->classes[dev->devno]; 1791 1792 rc = ata_dev_read_id(dev, &dev->class, readid_flags, 1793 dev->id); 1794 switch (rc) { 1795 case 0: 1796 new_mask |= 1 << i; 1797 break; 1798 case -ENOENT: 1799 /* IDENTIFY was issued to non-existent 1800 * device. No need to reset. Just 1801 * thaw and kill the device. 1802 */ 1803 ata_eh_thaw_port(ap); 1804 dev->class = ATA_DEV_UNKNOWN; 1805 break; 1806 default: 1807 dev->class = ATA_DEV_UNKNOWN; 1808 goto err; 1809 } 1810 } 1811 } 1812 1813 /* PDIAG- should have been released, ask cable type if post-reset */ 1814 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ap->ops->cable_detect) 1815 ap->cbl = ap->ops->cable_detect(ap); 1816 1817 /* Configure new devices forward such that user doesn't see 1818 * device detection messages backwards. 1819 */ 1820 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1821 dev = &ap->device[i]; 1822 1823 if (!(new_mask & (1 << i))) 1824 continue; 1825 1826 ehc->i.flags |= ATA_EHI_PRINTINFO; 1827 rc = ata_dev_configure(dev); 1828 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 1829 if (rc) 1830 goto err; 1831 1832 spin_lock_irqsave(ap->lock, flags); 1833 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1834 spin_unlock_irqrestore(ap->lock, flags); 1835 1836 /* new device discovered, configure xfermode */ 1837 ehc->i.flags |= ATA_EHI_SETMODE; 1838 } 1839 1840 return 0; 1841 1842 err: 1843 *r_failed_dev = dev; 1844 DPRINTK("EXIT rc=%d\n", rc); 1845 return rc; 1846 } 1847 1848 #ifdef CONFIG_PM 1849 /** 1850 * ata_eh_suspend - handle suspend EH action 1851 * @ap: target host port 1852 * @r_failed_dev: result parameter to indicate failing device 1853 * 1854 * Handle suspend EH action. Disk devices are spinned down and 1855 * other types of devices are just marked suspended. Once 1856 * suspended, no EH action to the device is allowed until it is 1857 * resumed. 1858 * 1859 * LOCKING: 1860 * Kernel thread context (may sleep). 1861 * 1862 * RETURNS: 1863 * 0 on success, -errno otherwise 1864 */ 1865 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) 1866 { 1867 struct ata_device *dev; 1868 int i, rc = 0; 1869 1870 DPRINTK("ENTER\n"); 1871 1872 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1873 unsigned long flags; 1874 unsigned int action, err_mask; 1875 1876 dev = &ap->device[i]; 1877 action = ata_eh_dev_action(dev); 1878 1879 if (!ata_dev_enabled(dev) || !(action & ATA_EH_SUSPEND)) 1880 continue; 1881 1882 WARN_ON(dev->flags & ATA_DFLAG_SUSPENDED); 1883 1884 ata_eh_about_to_do(ap, dev, ATA_EH_SUSPEND); 1885 1886 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1887 /* flush cache */ 1888 rc = ata_flush_cache(dev); 1889 if (rc) 1890 break; 1891 1892 /* spin down */ 1893 err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1); 1894 if (err_mask) { 1895 ata_dev_printk(dev, KERN_ERR, "failed to " 1896 "spin down (err_mask=0x%x)\n", 1897 err_mask); 1898 rc = -EIO; 1899 break; 1900 } 1901 } 1902 1903 spin_lock_irqsave(ap->lock, flags); 1904 dev->flags |= ATA_DFLAG_SUSPENDED; 1905 spin_unlock_irqrestore(ap->lock, flags); 1906 1907 ata_eh_done(ap, dev, ATA_EH_SUSPEND); 1908 } 1909 1910 if (rc) 1911 *r_failed_dev = dev; 1912 1913 DPRINTK("EXIT\n"); 1914 return rc; 1915 } 1916 1917 /** 1918 * ata_eh_prep_resume - prep for resume EH action 1919 * @ap: target host port 1920 * 1921 * Clear SUSPENDED in preparation for scheduled resume actions. 1922 * This allows other parts of EH to access the devices being 1923 * resumed. 1924 * 1925 * LOCKING: 1926 * Kernel thread context (may sleep). 1927 */ 1928 static void ata_eh_prep_resume(struct ata_port *ap) 1929 { 1930 struct ata_device *dev; 1931 unsigned long flags; 1932 int i; 1933 1934 DPRINTK("ENTER\n"); 1935 1936 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1937 unsigned int action; 1938 1939 dev = &ap->device[i]; 1940 action = ata_eh_dev_action(dev); 1941 1942 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1943 continue; 1944 1945 spin_lock_irqsave(ap->lock, flags); 1946 dev->flags &= ~ATA_DFLAG_SUSPENDED; 1947 spin_unlock_irqrestore(ap->lock, flags); 1948 } 1949 1950 DPRINTK("EXIT\n"); 1951 } 1952 1953 /** 1954 * ata_eh_resume - handle resume EH action 1955 * @ap: target host port 1956 * @r_failed_dev: result parameter to indicate failing device 1957 * 1958 * Handle resume EH action. Target devices are already reset and 1959 * revalidated. Spinning up is the only operation left. 1960 * 1961 * LOCKING: 1962 * Kernel thread context (may sleep). 1963 * 1964 * RETURNS: 1965 * 0 on success, -errno otherwise 1966 */ 1967 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) 1968 { 1969 struct ata_device *dev; 1970 int i, rc = 0; 1971 1972 DPRINTK("ENTER\n"); 1973 1974 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1975 unsigned int action, err_mask; 1976 1977 dev = &ap->device[i]; 1978 action = ata_eh_dev_action(dev); 1979 1980 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1981 continue; 1982 1983 ata_eh_about_to_do(ap, dev, ATA_EH_RESUME); 1984 1985 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1986 err_mask = ata_do_simple_cmd(dev, 1987 ATA_CMD_IDLEIMMEDIATE); 1988 if (err_mask) { 1989 ata_dev_printk(dev, KERN_ERR, "failed to " 1990 "spin up (err_mask=0x%x)\n", 1991 err_mask); 1992 rc = -EIO; 1993 break; 1994 } 1995 } 1996 1997 ata_eh_done(ap, dev, ATA_EH_RESUME); 1998 } 1999 2000 if (rc) 2001 *r_failed_dev = dev; 2002 2003 DPRINTK("EXIT\n"); 2004 return 0; 2005 } 2006 #endif /* CONFIG_PM */ 2007 2008 static int ata_port_nr_enabled(struct ata_port *ap) 2009 { 2010 int i, cnt = 0; 2011 2012 for (i = 0; i < ATA_MAX_DEVICES; i++) 2013 if (ata_dev_enabled(&ap->device[i])) 2014 cnt++; 2015 return cnt; 2016 } 2017 2018 static int ata_port_nr_vacant(struct ata_port *ap) 2019 { 2020 int i, cnt = 0; 2021 2022 for (i = 0; i < ATA_MAX_DEVICES; i++) 2023 if (ap->device[i].class == ATA_DEV_UNKNOWN) 2024 cnt++; 2025 return cnt; 2026 } 2027 2028 static int ata_eh_skip_recovery(struct ata_port *ap) 2029 { 2030 struct ata_eh_context *ehc = &ap->eh_context; 2031 int i; 2032 2033 /* skip if all possible devices are suspended */ 2034 for (i = 0; i < ata_port_max_devices(ap); i++) { 2035 struct ata_device *dev = &ap->device[i]; 2036 2037 if (!(dev->flags & ATA_DFLAG_SUSPENDED)) 2038 break; 2039 } 2040 2041 if (i == ata_port_max_devices(ap)) 2042 return 1; 2043 2044 /* thaw frozen port, resume link and recover failed devices */ 2045 if ((ap->pflags & ATA_PFLAG_FROZEN) || 2046 (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_port_nr_enabled(ap)) 2047 return 0; 2048 2049 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2050 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2051 struct ata_device *dev = &ap->device[i]; 2052 2053 if (dev->class == ATA_DEV_UNKNOWN && 2054 ehc->classes[dev->devno] != ATA_DEV_NONE) 2055 return 0; 2056 } 2057 2058 return 1; 2059 } 2060 2061 /** 2062 * ata_eh_recover - recover host port after error 2063 * @ap: host port to recover 2064 * @prereset: prereset method (can be NULL) 2065 * @softreset: softreset method (can be NULL) 2066 * @hardreset: hardreset method (can be NULL) 2067 * @postreset: postreset method (can be NULL) 2068 * 2069 * This is the alpha and omega, eum and yang, heart and soul of 2070 * libata exception handling. On entry, actions required to 2071 * recover the port and hotplug requests are recorded in 2072 * eh_context. This function executes all the operations with 2073 * appropriate retrials and fallbacks to resurrect failed 2074 * devices, detach goners and greet newcomers. 2075 * 2076 * LOCKING: 2077 * Kernel thread context (may sleep). 2078 * 2079 * RETURNS: 2080 * 0 on success, -errno on failure. 2081 */ 2082 static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 2083 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2084 ata_postreset_fn_t postreset) 2085 { 2086 struct ata_eh_context *ehc = &ap->eh_context; 2087 struct ata_device *dev; 2088 int i, rc; 2089 2090 DPRINTK("ENTER\n"); 2091 2092 /* prep for recovery */ 2093 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2094 dev = &ap->device[i]; 2095 2096 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2097 2098 /* collect port action mask recorded in dev actions */ 2099 ehc->i.action |= ehc->i.dev_action[i] & ~ATA_EH_PERDEV_MASK; 2100 ehc->i.dev_action[i] &= ATA_EH_PERDEV_MASK; 2101 2102 /* process hotplug request */ 2103 if (dev->flags & ATA_DFLAG_DETACH) 2104 ata_eh_detach_dev(dev); 2105 2106 if (!ata_dev_enabled(dev) && 2107 ((ehc->i.probe_mask & (1 << dev->devno)) && 2108 !(ehc->did_probe_mask & (1 << dev->devno)))) { 2109 ata_eh_detach_dev(dev); 2110 ata_dev_init(dev); 2111 ehc->did_probe_mask |= (1 << dev->devno); 2112 ehc->i.action |= ATA_EH_SOFTRESET; 2113 } 2114 } 2115 2116 retry: 2117 rc = 0; 2118 2119 /* if UNLOADING, finish immediately */ 2120 if (ap->pflags & ATA_PFLAG_UNLOADING) 2121 goto out; 2122 2123 /* prep for resume */ 2124 ata_eh_prep_resume(ap); 2125 2126 /* skip EH if possible. */ 2127 if (ata_eh_skip_recovery(ap)) 2128 ehc->i.action = 0; 2129 2130 for (i = 0; i < ATA_MAX_DEVICES; i++) 2131 ehc->classes[i] = ATA_DEV_UNKNOWN; 2132 2133 /* reset */ 2134 if (ehc->i.action & ATA_EH_RESET_MASK) { 2135 ata_eh_freeze_port(ap); 2136 2137 rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset, 2138 softreset, hardreset, postreset); 2139 if (rc) { 2140 ata_port_printk(ap, KERN_ERR, 2141 "reset failed, giving up\n"); 2142 goto out; 2143 } 2144 2145 ata_eh_thaw_port(ap); 2146 } 2147 2148 /* revalidate existing devices and attach new ones */ 2149 rc = ata_eh_revalidate_and_attach(ap, &dev); 2150 if (rc) 2151 goto dev_fail; 2152 2153 /* resume devices */ 2154 rc = ata_eh_resume(ap, &dev); 2155 if (rc) 2156 goto dev_fail; 2157 2158 /* configure transfer mode if necessary */ 2159 if (ehc->i.flags & ATA_EHI_SETMODE) { 2160 rc = ata_set_mode(ap, &dev); 2161 if (rc) 2162 goto dev_fail; 2163 ehc->i.flags &= ~ATA_EHI_SETMODE; 2164 } 2165 2166 /* suspend devices */ 2167 rc = ata_eh_suspend(ap, &dev); 2168 if (rc) 2169 goto dev_fail; 2170 2171 goto out; 2172 2173 dev_fail: 2174 ehc->tries[dev->devno]--; 2175 2176 switch (rc) { 2177 case -EINVAL: 2178 /* eeek, something went very wrong, give up */ 2179 ehc->tries[dev->devno] = 0; 2180 break; 2181 2182 case -ENODEV: 2183 /* device missing or wrong IDENTIFY data, schedule probing */ 2184 ehc->i.probe_mask |= (1 << dev->devno); 2185 /* give it just one more chance */ 2186 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2187 case -EIO: 2188 if (ehc->tries[dev->devno] == 1) { 2189 /* This is the last chance, better to slow 2190 * down than lose it. 2191 */ 2192 sata_down_spd_limit(ap); 2193 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2194 } 2195 } 2196 2197 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2198 /* disable device if it has used up all its chances */ 2199 ata_dev_disable(dev); 2200 2201 /* detach if offline */ 2202 if (ata_port_offline(ap)) 2203 ata_eh_detach_dev(dev); 2204 2205 /* probe if requested */ 2206 if ((ehc->i.probe_mask & (1 << dev->devno)) && 2207 !(ehc->did_probe_mask & (1 << dev->devno))) { 2208 ata_eh_detach_dev(dev); 2209 ata_dev_init(dev); 2210 2211 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2212 ehc->did_probe_mask |= (1 << dev->devno); 2213 ehc->i.action |= ATA_EH_SOFTRESET; 2214 } 2215 } else { 2216 /* soft didn't work? be haaaaard */ 2217 if (ehc->i.flags & ATA_EHI_DID_RESET) 2218 ehc->i.action |= ATA_EH_HARDRESET; 2219 else 2220 ehc->i.action |= ATA_EH_SOFTRESET; 2221 } 2222 2223 if (ata_port_nr_enabled(ap)) { 2224 ata_port_printk(ap, KERN_WARNING, "failed to recover some " 2225 "devices, retrying in 5 secs\n"); 2226 ssleep(5); 2227 } else { 2228 /* no device left, repeat fast */ 2229 msleep(500); 2230 } 2231 2232 goto retry; 2233 2234 out: 2235 if (rc) { 2236 for (i = 0; i < ATA_MAX_DEVICES; i++) 2237 ata_dev_disable(&ap->device[i]); 2238 } 2239 2240 DPRINTK("EXIT, rc=%d\n", rc); 2241 return rc; 2242 } 2243 2244 /** 2245 * ata_eh_finish - finish up EH 2246 * @ap: host port to finish EH for 2247 * 2248 * Recovery is complete. Clean up EH states and retry or finish 2249 * failed qcs. 2250 * 2251 * LOCKING: 2252 * None. 2253 */ 2254 static void ata_eh_finish(struct ata_port *ap) 2255 { 2256 int tag; 2257 2258 /* retry or finish qcs */ 2259 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2260 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2261 2262 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2263 continue; 2264 2265 if (qc->err_mask) { 2266 /* FIXME: Once EH migration is complete, 2267 * generate sense data in this function, 2268 * considering both err_mask and tf. 2269 */ 2270 if (qc->err_mask & AC_ERR_INVALID) 2271 ata_eh_qc_complete(qc); 2272 else 2273 ata_eh_qc_retry(qc); 2274 } else { 2275 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2276 ata_eh_qc_complete(qc); 2277 } else { 2278 /* feed zero TF to sense generation */ 2279 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2280 ata_eh_qc_retry(qc); 2281 } 2282 } 2283 } 2284 } 2285 2286 /** 2287 * ata_do_eh - do standard error handling 2288 * @ap: host port to handle error for 2289 * @prereset: prereset method (can be NULL) 2290 * @softreset: softreset method (can be NULL) 2291 * @hardreset: hardreset method (can be NULL) 2292 * @postreset: postreset method (can be NULL) 2293 * 2294 * Perform standard error handling sequence. 2295 * 2296 * LOCKING: 2297 * Kernel thread context (may sleep). 2298 */ 2299 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2300 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2301 ata_postreset_fn_t postreset) 2302 { 2303 ata_eh_autopsy(ap); 2304 ata_eh_report(ap); 2305 ata_eh_recover(ap, prereset, softreset, hardreset, postreset); 2306 ata_eh_finish(ap); 2307 } 2308 2309 #ifdef CONFIG_PM 2310 /** 2311 * ata_eh_handle_port_suspend - perform port suspend operation 2312 * @ap: port to suspend 2313 * 2314 * Suspend @ap. 2315 * 2316 * LOCKING: 2317 * Kernel thread context (may sleep). 2318 */ 2319 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2320 { 2321 unsigned long flags; 2322 int rc = 0; 2323 2324 /* are we suspending? */ 2325 spin_lock_irqsave(ap->lock, flags); 2326 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2327 ap->pm_mesg.event == PM_EVENT_ON) { 2328 spin_unlock_irqrestore(ap->lock, flags); 2329 return; 2330 } 2331 spin_unlock_irqrestore(ap->lock, flags); 2332 2333 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2334 2335 /* suspend */ 2336 ata_eh_freeze_port(ap); 2337 2338 if (ap->ops->port_suspend) 2339 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2340 2341 /* report result */ 2342 spin_lock_irqsave(ap->lock, flags); 2343 2344 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2345 if (rc == 0) 2346 ap->pflags |= ATA_PFLAG_SUSPENDED; 2347 else 2348 ata_port_schedule_eh(ap); 2349 2350 if (ap->pm_result) { 2351 *ap->pm_result = rc; 2352 ap->pm_result = NULL; 2353 } 2354 2355 spin_unlock_irqrestore(ap->lock, flags); 2356 2357 return; 2358 } 2359 2360 /** 2361 * ata_eh_handle_port_resume - perform port resume operation 2362 * @ap: port to resume 2363 * 2364 * Resume @ap. 2365 * 2366 * This function also waits upto one second until all devices 2367 * hanging off this port requests resume EH action. This is to 2368 * prevent invoking EH and thus reset multiple times on resume. 2369 * 2370 * On DPM resume, where some of devices might not be resumed 2371 * together, this may delay port resume upto one second, but such 2372 * DPM resumes are rare and 1 sec delay isn't too bad. 2373 * 2374 * LOCKING: 2375 * Kernel thread context (may sleep). 2376 */ 2377 static void ata_eh_handle_port_resume(struct ata_port *ap) 2378 { 2379 unsigned long timeout; 2380 unsigned long flags; 2381 int i, rc = 0; 2382 2383 /* are we resuming? */ 2384 spin_lock_irqsave(ap->lock, flags); 2385 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2386 ap->pm_mesg.event != PM_EVENT_ON) { 2387 spin_unlock_irqrestore(ap->lock, flags); 2388 return; 2389 } 2390 spin_unlock_irqrestore(ap->lock, flags); 2391 2392 /* spurious? */ 2393 if (!(ap->pflags & ATA_PFLAG_SUSPENDED)) 2394 goto done; 2395 2396 if (ap->ops->port_resume) 2397 rc = ap->ops->port_resume(ap); 2398 2399 /* give devices time to request EH */ 2400 timeout = jiffies + HZ; /* 1s max */ 2401 while (1) { 2402 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2403 struct ata_device *dev = &ap->device[i]; 2404 unsigned int action = ata_eh_dev_action(dev); 2405 2406 if ((dev->flags & ATA_DFLAG_SUSPENDED) && 2407 !(action & ATA_EH_RESUME)) 2408 break; 2409 } 2410 2411 if (i == ATA_MAX_DEVICES || time_after(jiffies, timeout)) 2412 break; 2413 msleep(10); 2414 } 2415 2416 done: 2417 spin_lock_irqsave(ap->lock, flags); 2418 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2419 if (ap->pm_result) { 2420 *ap->pm_result = rc; 2421 ap->pm_result = NULL; 2422 } 2423 spin_unlock_irqrestore(ap->lock, flags); 2424 } 2425 #endif /* CONFIG_PM */ 2426