1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <scsi/scsi.h> 37 #include <scsi/scsi_host.h> 38 #include <scsi/scsi_eh.h> 39 #include <scsi/scsi_device.h> 40 #include <scsi/scsi_cmnd.h> 41 #include "../scsi/scsi_transport_api.h" 42 43 #include <linux/libata.h> 44 45 #include "libata.h" 46 47 enum { 48 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 49 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 50 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 51 }; 52 53 static void __ata_port_freeze(struct ata_port *ap); 54 static void ata_eh_finish(struct ata_port *ap); 55 #ifdef CONFIG_PM 56 static void ata_eh_handle_port_suspend(struct ata_port *ap); 57 static void ata_eh_handle_port_resume(struct ata_port *ap); 58 static int ata_eh_suspend(struct ata_port *ap, 59 struct ata_device **r_failed_dev); 60 static void ata_eh_prep_resume(struct ata_port *ap); 61 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev); 62 #else /* CONFIG_PM */ 63 static void ata_eh_handle_port_suspend(struct ata_port *ap) 64 { } 65 66 static void ata_eh_handle_port_resume(struct ata_port *ap) 67 { } 68 69 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) 70 { 71 return 0; 72 } 73 74 static void ata_eh_prep_resume(struct ata_port *ap) 75 { } 76 77 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) 78 { 79 return 0; 80 } 81 #endif /* CONFIG_PM */ 82 83 static void ata_ering_record(struct ata_ering *ering, int is_io, 84 unsigned int err_mask) 85 { 86 struct ata_ering_entry *ent; 87 88 WARN_ON(!err_mask); 89 90 ering->cursor++; 91 ering->cursor %= ATA_ERING_SIZE; 92 93 ent = &ering->ring[ering->cursor]; 94 ent->is_io = is_io; 95 ent->err_mask = err_mask; 96 ent->timestamp = get_jiffies_64(); 97 } 98 99 static void ata_ering_clear(struct ata_ering *ering) 100 { 101 memset(ering, 0, sizeof(*ering)); 102 } 103 104 static int ata_ering_map(struct ata_ering *ering, 105 int (*map_fn)(struct ata_ering_entry *, void *), 106 void *arg) 107 { 108 int idx, rc = 0; 109 struct ata_ering_entry *ent; 110 111 idx = ering->cursor; 112 do { 113 ent = &ering->ring[idx]; 114 if (!ent->err_mask) 115 break; 116 rc = map_fn(ent, arg); 117 if (rc) 118 break; 119 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 120 } while (idx != ering->cursor); 121 122 return rc; 123 } 124 125 static unsigned int ata_eh_dev_action(struct ata_device *dev) 126 { 127 struct ata_eh_context *ehc = &dev->ap->eh_context; 128 129 return ehc->i.action | ehc->i.dev_action[dev->devno]; 130 } 131 132 static void ata_eh_clear_action(struct ata_device *dev, 133 struct ata_eh_info *ehi, unsigned int action) 134 { 135 int i; 136 137 if (!dev) { 138 ehi->action &= ~action; 139 for (i = 0; i < ATA_MAX_DEVICES; i++) 140 ehi->dev_action[i] &= ~action; 141 } else { 142 /* doesn't make sense for port-wide EH actions */ 143 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 144 145 /* break ehi->action into ehi->dev_action */ 146 if (ehi->action & action) { 147 for (i = 0; i < ATA_MAX_DEVICES; i++) 148 ehi->dev_action[i] |= ehi->action & action; 149 ehi->action &= ~action; 150 } 151 152 /* turn off the specified per-dev action */ 153 ehi->dev_action[dev->devno] &= ~action; 154 } 155 } 156 157 /** 158 * ata_scsi_timed_out - SCSI layer time out callback 159 * @cmd: timed out SCSI command 160 * 161 * Handles SCSI layer timeout. We race with normal completion of 162 * the qc for @cmd. If the qc is already gone, we lose and let 163 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 164 * timed out and EH should be invoked. Prevent ata_qc_complete() 165 * from finishing it by setting EH_SCHEDULED and return 166 * EH_NOT_HANDLED. 167 * 168 * TODO: kill this function once old EH is gone. 169 * 170 * LOCKING: 171 * Called from timer context 172 * 173 * RETURNS: 174 * EH_HANDLED or EH_NOT_HANDLED 175 */ 176 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 177 { 178 struct Scsi_Host *host = cmd->device->host; 179 struct ata_port *ap = ata_shost_to_port(host); 180 unsigned long flags; 181 struct ata_queued_cmd *qc; 182 enum scsi_eh_timer_return ret; 183 184 DPRINTK("ENTER\n"); 185 186 if (ap->ops->error_handler) { 187 ret = EH_NOT_HANDLED; 188 goto out; 189 } 190 191 ret = EH_HANDLED; 192 spin_lock_irqsave(ap->lock, flags); 193 qc = ata_qc_from_tag(ap, ap->active_tag); 194 if (qc) { 195 WARN_ON(qc->scsicmd != cmd); 196 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 197 qc->err_mask |= AC_ERR_TIMEOUT; 198 ret = EH_NOT_HANDLED; 199 } 200 spin_unlock_irqrestore(ap->lock, flags); 201 202 out: 203 DPRINTK("EXIT, ret=%d\n", ret); 204 return ret; 205 } 206 207 /** 208 * ata_scsi_error - SCSI layer error handler callback 209 * @host: SCSI host on which error occurred 210 * 211 * Handles SCSI-layer-thrown error events. 212 * 213 * LOCKING: 214 * Inherited from SCSI layer (none, can sleep) 215 * 216 * RETURNS: 217 * Zero. 218 */ 219 void ata_scsi_error(struct Scsi_Host *host) 220 { 221 struct ata_port *ap = ata_shost_to_port(host); 222 int i, repeat_cnt = ATA_EH_MAX_REPEAT; 223 unsigned long flags; 224 225 DPRINTK("ENTER\n"); 226 227 /* synchronize with port task */ 228 ata_port_flush_task(ap); 229 230 /* synchronize with host lock and sort out timeouts */ 231 232 /* For new EH, all qcs are finished in one of three ways - 233 * normal completion, error completion, and SCSI timeout. 234 * Both cmpletions can race against SCSI timeout. When normal 235 * completion wins, the qc never reaches EH. When error 236 * completion wins, the qc has ATA_QCFLAG_FAILED set. 237 * 238 * When SCSI timeout wins, things are a bit more complex. 239 * Normal or error completion can occur after the timeout but 240 * before this point. In such cases, both types of 241 * completions are honored. A scmd is determined to have 242 * timed out iff its associated qc is active and not failed. 243 */ 244 if (ap->ops->error_handler) { 245 struct scsi_cmnd *scmd, *tmp; 246 int nr_timedout = 0; 247 248 spin_lock_irqsave(ap->lock, flags); 249 250 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 251 struct ata_queued_cmd *qc; 252 253 for (i = 0; i < ATA_MAX_QUEUE; i++) { 254 qc = __ata_qc_from_tag(ap, i); 255 if (qc->flags & ATA_QCFLAG_ACTIVE && 256 qc->scsicmd == scmd) 257 break; 258 } 259 260 if (i < ATA_MAX_QUEUE) { 261 /* the scmd has an associated qc */ 262 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 263 /* which hasn't failed yet, timeout */ 264 qc->err_mask |= AC_ERR_TIMEOUT; 265 qc->flags |= ATA_QCFLAG_FAILED; 266 nr_timedout++; 267 } 268 } else { 269 /* Normal completion occurred after 270 * SCSI timeout but before this point. 271 * Successfully complete it. 272 */ 273 scmd->retries = scmd->allowed; 274 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 275 } 276 } 277 278 /* If we have timed out qcs. They belong to EH from 279 * this point but the state of the controller is 280 * unknown. Freeze the port to make sure the IRQ 281 * handler doesn't diddle with those qcs. This must 282 * be done atomically w.r.t. setting QCFLAG_FAILED. 283 */ 284 if (nr_timedout) 285 __ata_port_freeze(ap); 286 287 spin_unlock_irqrestore(ap->lock, flags); 288 } else 289 spin_unlock_wait(ap->lock); 290 291 repeat: 292 /* invoke error handler */ 293 if (ap->ops->error_handler) { 294 /* process port resume request */ 295 ata_eh_handle_port_resume(ap); 296 297 /* fetch & clear EH info */ 298 spin_lock_irqsave(ap->lock, flags); 299 300 memset(&ap->eh_context, 0, sizeof(ap->eh_context)); 301 ap->eh_context.i = ap->eh_info; 302 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 303 304 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 305 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 306 307 spin_unlock_irqrestore(ap->lock, flags); 308 309 /* invoke EH, skip if unloading or suspended */ 310 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 311 ap->ops->error_handler(ap); 312 else 313 ata_eh_finish(ap); 314 315 /* process port suspend request */ 316 ata_eh_handle_port_suspend(ap); 317 318 /* Exception might have happend after ->error_handler 319 * recovered the port but before this point. Repeat 320 * EH in such case. 321 */ 322 spin_lock_irqsave(ap->lock, flags); 323 324 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 325 if (--repeat_cnt) { 326 ata_port_printk(ap, KERN_INFO, 327 "EH pending after completion, " 328 "repeating EH (cnt=%d)\n", repeat_cnt); 329 spin_unlock_irqrestore(ap->lock, flags); 330 goto repeat; 331 } 332 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 333 "tries, giving up\n", ATA_EH_MAX_REPEAT); 334 } 335 336 /* this run is complete, make sure EH info is clear */ 337 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 338 339 /* Clear host_eh_scheduled while holding ap->lock such 340 * that if exception occurs after this point but 341 * before EH completion, SCSI midlayer will 342 * re-initiate EH. 343 */ 344 host->host_eh_scheduled = 0; 345 346 spin_unlock_irqrestore(ap->lock, flags); 347 } else { 348 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); 349 ap->ops->eng_timeout(ap); 350 } 351 352 /* finish or retry handled scmd's and clean up */ 353 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 354 355 scsi_eh_flush_done_q(&ap->eh_done_q); 356 357 /* clean up */ 358 spin_lock_irqsave(ap->lock, flags); 359 360 if (ap->pflags & ATA_PFLAG_LOADING) 361 ap->pflags &= ~ATA_PFLAG_LOADING; 362 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 363 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 364 365 if (ap->pflags & ATA_PFLAG_RECOVERED) 366 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 367 368 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 369 370 /* tell wait_eh that we're done */ 371 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 372 wake_up_all(&ap->eh_wait_q); 373 374 spin_unlock_irqrestore(ap->lock, flags); 375 376 DPRINTK("EXIT\n"); 377 } 378 379 /** 380 * ata_port_wait_eh - Wait for the currently pending EH to complete 381 * @ap: Port to wait EH for 382 * 383 * Wait until the currently pending EH is complete. 384 * 385 * LOCKING: 386 * Kernel thread context (may sleep). 387 */ 388 void ata_port_wait_eh(struct ata_port *ap) 389 { 390 unsigned long flags; 391 DEFINE_WAIT(wait); 392 393 retry: 394 spin_lock_irqsave(ap->lock, flags); 395 396 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 397 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 398 spin_unlock_irqrestore(ap->lock, flags); 399 schedule(); 400 spin_lock_irqsave(ap->lock, flags); 401 } 402 finish_wait(&ap->eh_wait_q, &wait); 403 404 spin_unlock_irqrestore(ap->lock, flags); 405 406 /* make sure SCSI EH is complete */ 407 if (scsi_host_in_recovery(ap->scsi_host)) { 408 msleep(10); 409 goto retry; 410 } 411 } 412 413 /** 414 * ata_qc_timeout - Handle timeout of queued command 415 * @qc: Command that timed out 416 * 417 * Some part of the kernel (currently, only the SCSI layer) 418 * has noticed that the active command on port @ap has not 419 * completed after a specified length of time. Handle this 420 * condition by disabling DMA (if necessary) and completing 421 * transactions, with error if necessary. 422 * 423 * This also handles the case of the "lost interrupt", where 424 * for some reason (possibly hardware bug, possibly driver bug) 425 * an interrupt was not delivered to the driver, even though the 426 * transaction completed successfully. 427 * 428 * TODO: kill this function once old EH is gone. 429 * 430 * LOCKING: 431 * Inherited from SCSI layer (none, can sleep) 432 */ 433 static void ata_qc_timeout(struct ata_queued_cmd *qc) 434 { 435 struct ata_port *ap = qc->ap; 436 u8 host_stat = 0, drv_stat; 437 unsigned long flags; 438 439 DPRINTK("ENTER\n"); 440 441 ap->hsm_task_state = HSM_ST_IDLE; 442 443 spin_lock_irqsave(ap->lock, flags); 444 445 switch (qc->tf.protocol) { 446 447 case ATA_PROT_DMA: 448 case ATA_PROT_ATAPI_DMA: 449 host_stat = ap->ops->bmdma_status(ap); 450 451 /* before we do anything else, clear DMA-Start bit */ 452 ap->ops->bmdma_stop(qc); 453 454 /* fall through */ 455 456 default: 457 ata_altstatus(ap); 458 drv_stat = ata_chk_status(ap); 459 460 /* ack bmdma irq events */ 461 ap->ops->irq_clear(ap); 462 463 ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, " 464 "stat 0x%x host_stat 0x%x\n", 465 qc->tf.command, drv_stat, host_stat); 466 467 /* complete taskfile transaction */ 468 qc->err_mask |= AC_ERR_TIMEOUT; 469 break; 470 } 471 472 spin_unlock_irqrestore(ap->lock, flags); 473 474 ata_eh_qc_complete(qc); 475 476 DPRINTK("EXIT\n"); 477 } 478 479 /** 480 * ata_eng_timeout - Handle timeout of queued command 481 * @ap: Port on which timed-out command is active 482 * 483 * Some part of the kernel (currently, only the SCSI layer) 484 * has noticed that the active command on port @ap has not 485 * completed after a specified length of time. Handle this 486 * condition by disabling DMA (if necessary) and completing 487 * transactions, with error if necessary. 488 * 489 * This also handles the case of the "lost interrupt", where 490 * for some reason (possibly hardware bug, possibly driver bug) 491 * an interrupt was not delivered to the driver, even though the 492 * transaction completed successfully. 493 * 494 * TODO: kill this function once old EH is gone. 495 * 496 * LOCKING: 497 * Inherited from SCSI layer (none, can sleep) 498 */ 499 void ata_eng_timeout(struct ata_port *ap) 500 { 501 DPRINTK("ENTER\n"); 502 503 ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag)); 504 505 DPRINTK("EXIT\n"); 506 } 507 508 /** 509 * ata_qc_schedule_eh - schedule qc for error handling 510 * @qc: command to schedule error handling for 511 * 512 * Schedule error handling for @qc. EH will kick in as soon as 513 * other commands are drained. 514 * 515 * LOCKING: 516 * spin_lock_irqsave(host lock) 517 */ 518 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 519 { 520 struct ata_port *ap = qc->ap; 521 522 WARN_ON(!ap->ops->error_handler); 523 524 qc->flags |= ATA_QCFLAG_FAILED; 525 qc->ap->pflags |= ATA_PFLAG_EH_PENDING; 526 527 /* The following will fail if timeout has already expired. 528 * ata_scsi_error() takes care of such scmds on EH entry. 529 * Note that ATA_QCFLAG_FAILED is unconditionally set after 530 * this function completes. 531 */ 532 scsi_req_abort_cmd(qc->scsicmd); 533 } 534 535 /** 536 * ata_port_schedule_eh - schedule error handling without a qc 537 * @ap: ATA port to schedule EH for 538 * 539 * Schedule error handling for @ap. EH will kick in as soon as 540 * all commands are drained. 541 * 542 * LOCKING: 543 * spin_lock_irqsave(host lock) 544 */ 545 void ata_port_schedule_eh(struct ata_port *ap) 546 { 547 WARN_ON(!ap->ops->error_handler); 548 549 ap->pflags |= ATA_PFLAG_EH_PENDING; 550 scsi_schedule_eh(ap->scsi_host); 551 552 DPRINTK("port EH scheduled\n"); 553 } 554 555 /** 556 * ata_port_abort - abort all qc's on the port 557 * @ap: ATA port to abort qc's for 558 * 559 * Abort all active qc's of @ap and schedule EH. 560 * 561 * LOCKING: 562 * spin_lock_irqsave(host lock) 563 * 564 * RETURNS: 565 * Number of aborted qc's. 566 */ 567 int ata_port_abort(struct ata_port *ap) 568 { 569 int tag, nr_aborted = 0; 570 571 WARN_ON(!ap->ops->error_handler); 572 573 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 574 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 575 576 if (qc) { 577 qc->flags |= ATA_QCFLAG_FAILED; 578 ata_qc_complete(qc); 579 nr_aborted++; 580 } 581 } 582 583 if (!nr_aborted) 584 ata_port_schedule_eh(ap); 585 586 return nr_aborted; 587 } 588 589 /** 590 * __ata_port_freeze - freeze port 591 * @ap: ATA port to freeze 592 * 593 * This function is called when HSM violation or some other 594 * condition disrupts normal operation of the port. Frozen port 595 * is not allowed to perform any operation until the port is 596 * thawed, which usually follows a successful reset. 597 * 598 * ap->ops->freeze() callback can be used for freezing the port 599 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 600 * port cannot be frozen hardware-wise, the interrupt handler 601 * must ack and clear interrupts unconditionally while the port 602 * is frozen. 603 * 604 * LOCKING: 605 * spin_lock_irqsave(host lock) 606 */ 607 static void __ata_port_freeze(struct ata_port *ap) 608 { 609 WARN_ON(!ap->ops->error_handler); 610 611 if (ap->ops->freeze) 612 ap->ops->freeze(ap); 613 614 ap->pflags |= ATA_PFLAG_FROZEN; 615 616 DPRINTK("ata%u port frozen\n", ap->print_id); 617 } 618 619 /** 620 * ata_port_freeze - abort & freeze port 621 * @ap: ATA port to freeze 622 * 623 * Abort and freeze @ap. 624 * 625 * LOCKING: 626 * spin_lock_irqsave(host lock) 627 * 628 * RETURNS: 629 * Number of aborted commands. 630 */ 631 int ata_port_freeze(struct ata_port *ap) 632 { 633 int nr_aborted; 634 635 WARN_ON(!ap->ops->error_handler); 636 637 nr_aborted = ata_port_abort(ap); 638 __ata_port_freeze(ap); 639 640 return nr_aborted; 641 } 642 643 /** 644 * ata_eh_freeze_port - EH helper to freeze port 645 * @ap: ATA port to freeze 646 * 647 * Freeze @ap. 648 * 649 * LOCKING: 650 * None. 651 */ 652 void ata_eh_freeze_port(struct ata_port *ap) 653 { 654 unsigned long flags; 655 656 if (!ap->ops->error_handler) 657 return; 658 659 spin_lock_irqsave(ap->lock, flags); 660 __ata_port_freeze(ap); 661 spin_unlock_irqrestore(ap->lock, flags); 662 } 663 664 /** 665 * ata_port_thaw_port - EH helper to thaw port 666 * @ap: ATA port to thaw 667 * 668 * Thaw frozen port @ap. 669 * 670 * LOCKING: 671 * None. 672 */ 673 void ata_eh_thaw_port(struct ata_port *ap) 674 { 675 unsigned long flags; 676 677 if (!ap->ops->error_handler) 678 return; 679 680 spin_lock_irqsave(ap->lock, flags); 681 682 ap->pflags &= ~ATA_PFLAG_FROZEN; 683 684 if (ap->ops->thaw) 685 ap->ops->thaw(ap); 686 687 spin_unlock_irqrestore(ap->lock, flags); 688 689 DPRINTK("ata%u port thawed\n", ap->print_id); 690 } 691 692 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 693 { 694 /* nada */ 695 } 696 697 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 698 { 699 struct ata_port *ap = qc->ap; 700 struct scsi_cmnd *scmd = qc->scsicmd; 701 unsigned long flags; 702 703 spin_lock_irqsave(ap->lock, flags); 704 qc->scsidone = ata_eh_scsidone; 705 __ata_qc_complete(qc); 706 WARN_ON(ata_tag_valid(qc->tag)); 707 spin_unlock_irqrestore(ap->lock, flags); 708 709 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 710 } 711 712 /** 713 * ata_eh_qc_complete - Complete an active ATA command from EH 714 * @qc: Command to complete 715 * 716 * Indicate to the mid and upper layers that an ATA command has 717 * completed. To be used from EH. 718 */ 719 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 720 { 721 struct scsi_cmnd *scmd = qc->scsicmd; 722 scmd->retries = scmd->allowed; 723 __ata_eh_qc_complete(qc); 724 } 725 726 /** 727 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 728 * @qc: Command to retry 729 * 730 * Indicate to the mid and upper layers that an ATA command 731 * should be retried. To be used from EH. 732 * 733 * SCSI midlayer limits the number of retries to scmd->allowed. 734 * scmd->retries is decremented for commands which get retried 735 * due to unrelated failures (qc->err_mask is zero). 736 */ 737 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 738 { 739 struct scsi_cmnd *scmd = qc->scsicmd; 740 if (!qc->err_mask && scmd->retries) 741 scmd->retries--; 742 __ata_eh_qc_complete(qc); 743 } 744 745 /** 746 * ata_eh_detach_dev - detach ATA device 747 * @dev: ATA device to detach 748 * 749 * Detach @dev. 750 * 751 * LOCKING: 752 * None. 753 */ 754 static void ata_eh_detach_dev(struct ata_device *dev) 755 { 756 struct ata_port *ap = dev->ap; 757 unsigned long flags; 758 759 ata_dev_disable(dev); 760 761 spin_lock_irqsave(ap->lock, flags); 762 763 dev->flags &= ~ATA_DFLAG_DETACH; 764 765 if (ata_scsi_offline_dev(dev)) { 766 dev->flags |= ATA_DFLAG_DETACHED; 767 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 768 } 769 770 /* clear per-dev EH actions */ 771 ata_eh_clear_action(dev, &ap->eh_info, ATA_EH_PERDEV_MASK); 772 ata_eh_clear_action(dev, &ap->eh_context.i, ATA_EH_PERDEV_MASK); 773 774 spin_unlock_irqrestore(ap->lock, flags); 775 } 776 777 /** 778 * ata_eh_about_to_do - about to perform eh_action 779 * @ap: target ATA port 780 * @dev: target ATA dev for per-dev action (can be NULL) 781 * @action: action about to be performed 782 * 783 * Called just before performing EH actions to clear related bits 784 * in @ap->eh_info such that eh actions are not unnecessarily 785 * repeated. 786 * 787 * LOCKING: 788 * None. 789 */ 790 static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, 791 unsigned int action) 792 { 793 unsigned long flags; 794 struct ata_eh_info *ehi = &ap->eh_info; 795 struct ata_eh_context *ehc = &ap->eh_context; 796 797 spin_lock_irqsave(ap->lock, flags); 798 799 /* Reset is represented by combination of actions and EHI 800 * flags. Suck in all related bits before clearing eh_info to 801 * avoid losing requested action. 802 */ 803 if (action & ATA_EH_RESET_MASK) { 804 ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; 805 ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; 806 807 /* make sure all reset actions are cleared & clear EHI flags */ 808 action |= ATA_EH_RESET_MASK; 809 ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 810 } 811 812 ata_eh_clear_action(dev, ehi, action); 813 814 if (!(ehc->i.flags & ATA_EHI_QUIET)) 815 ap->pflags |= ATA_PFLAG_RECOVERED; 816 817 spin_unlock_irqrestore(ap->lock, flags); 818 } 819 820 /** 821 * ata_eh_done - EH action complete 822 * @ap: target ATA port 823 * @dev: target ATA dev for per-dev action (can be NULL) 824 * @action: action just completed 825 * 826 * Called right after performing EH actions to clear related bits 827 * in @ap->eh_context. 828 * 829 * LOCKING: 830 * None. 831 */ 832 static void ata_eh_done(struct ata_port *ap, struct ata_device *dev, 833 unsigned int action) 834 { 835 /* if reset is complete, clear all reset actions & reset modifier */ 836 if (action & ATA_EH_RESET_MASK) { 837 action |= ATA_EH_RESET_MASK; 838 ap->eh_context.i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 839 } 840 841 ata_eh_clear_action(dev, &ap->eh_context.i, action); 842 } 843 844 /** 845 * ata_err_string - convert err_mask to descriptive string 846 * @err_mask: error mask to convert to string 847 * 848 * Convert @err_mask to descriptive string. Errors are 849 * prioritized according to severity and only the most severe 850 * error is reported. 851 * 852 * LOCKING: 853 * None. 854 * 855 * RETURNS: 856 * Descriptive string for @err_mask 857 */ 858 static const char * ata_err_string(unsigned int err_mask) 859 { 860 if (err_mask & AC_ERR_HOST_BUS) 861 return "host bus error"; 862 if (err_mask & AC_ERR_ATA_BUS) 863 return "ATA bus error"; 864 if (err_mask & AC_ERR_TIMEOUT) 865 return "timeout"; 866 if (err_mask & AC_ERR_HSM) 867 return "HSM violation"; 868 if (err_mask & AC_ERR_SYSTEM) 869 return "internal error"; 870 if (err_mask & AC_ERR_MEDIA) 871 return "media error"; 872 if (err_mask & AC_ERR_INVALID) 873 return "invalid argument"; 874 if (err_mask & AC_ERR_DEV) 875 return "device error"; 876 return "unknown error"; 877 } 878 879 /** 880 * ata_read_log_page - read a specific log page 881 * @dev: target device 882 * @page: page to read 883 * @buf: buffer to store read page 884 * @sectors: number of sectors to read 885 * 886 * Read log page using READ_LOG_EXT command. 887 * 888 * LOCKING: 889 * Kernel thread context (may sleep). 890 * 891 * RETURNS: 892 * 0 on success, AC_ERR_* mask otherwise. 893 */ 894 static unsigned int ata_read_log_page(struct ata_device *dev, 895 u8 page, void *buf, unsigned int sectors) 896 { 897 struct ata_taskfile tf; 898 unsigned int err_mask; 899 900 DPRINTK("read log page - page %d\n", page); 901 902 ata_tf_init(dev, &tf); 903 tf.command = ATA_CMD_READ_LOG_EXT; 904 tf.lbal = page; 905 tf.nsect = sectors; 906 tf.hob_nsect = sectors >> 8; 907 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 908 tf.protocol = ATA_PROT_PIO; 909 910 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 911 buf, sectors * ATA_SECT_SIZE); 912 913 DPRINTK("EXIT, err_mask=%x\n", err_mask); 914 return err_mask; 915 } 916 917 /** 918 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 919 * @dev: Device to read log page 10h from 920 * @tag: Resulting tag of the failed command 921 * @tf: Resulting taskfile registers of the failed command 922 * 923 * Read log page 10h to obtain NCQ error details and clear error 924 * condition. 925 * 926 * LOCKING: 927 * Kernel thread context (may sleep). 928 * 929 * RETURNS: 930 * 0 on success, -errno otherwise. 931 */ 932 static int ata_eh_read_log_10h(struct ata_device *dev, 933 int *tag, struct ata_taskfile *tf) 934 { 935 u8 *buf = dev->ap->sector_buf; 936 unsigned int err_mask; 937 u8 csum; 938 int i; 939 940 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 941 if (err_mask) 942 return -EIO; 943 944 csum = 0; 945 for (i = 0; i < ATA_SECT_SIZE; i++) 946 csum += buf[i]; 947 if (csum) 948 ata_dev_printk(dev, KERN_WARNING, 949 "invalid checksum 0x%x on log page 10h\n", csum); 950 951 if (buf[0] & 0x80) 952 return -ENOENT; 953 954 *tag = buf[0] & 0x1f; 955 956 tf->command = buf[2]; 957 tf->feature = buf[3]; 958 tf->lbal = buf[4]; 959 tf->lbam = buf[5]; 960 tf->lbah = buf[6]; 961 tf->device = buf[7]; 962 tf->hob_lbal = buf[8]; 963 tf->hob_lbam = buf[9]; 964 tf->hob_lbah = buf[10]; 965 tf->nsect = buf[12]; 966 tf->hob_nsect = buf[13]; 967 968 return 0; 969 } 970 971 /** 972 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 973 * @dev: device to perform REQUEST_SENSE to 974 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 975 * 976 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 977 * SENSE. This function is EH helper. 978 * 979 * LOCKING: 980 * Kernel thread context (may sleep). 981 * 982 * RETURNS: 983 * 0 on success, AC_ERR_* mask on failure 984 */ 985 static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) 986 { 987 struct ata_device *dev = qc->dev; 988 unsigned char *sense_buf = qc->scsicmd->sense_buffer; 989 struct ata_port *ap = dev->ap; 990 struct ata_taskfile tf; 991 u8 cdb[ATAPI_CDB_LEN]; 992 993 DPRINTK("ATAPI request sense\n"); 994 995 /* FIXME: is this needed? */ 996 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 997 998 /* initialize sense_buf with the error register, 999 * for the case where they are -not- overwritten 1000 */ 1001 sense_buf[0] = 0x70; 1002 sense_buf[2] = qc->result_tf.feature >> 4; 1003 1004 /* some devices time out if garbage left in tf */ 1005 ata_tf_init(dev, &tf); 1006 1007 memset(cdb, 0, ATAPI_CDB_LEN); 1008 cdb[0] = REQUEST_SENSE; 1009 cdb[4] = SCSI_SENSE_BUFFERSIZE; 1010 1011 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1012 tf.command = ATA_CMD_PACKET; 1013 1014 /* is it pointless to prefer PIO for "safety reasons"? */ 1015 if (ap->flags & ATA_FLAG_PIO_DMA) { 1016 tf.protocol = ATA_PROT_ATAPI_DMA; 1017 tf.feature |= ATAPI_PKT_DMA; 1018 } else { 1019 tf.protocol = ATA_PROT_ATAPI; 1020 tf.lbam = (8 * 1024) & 0xff; 1021 tf.lbah = (8 * 1024) >> 8; 1022 } 1023 1024 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1025 sense_buf, SCSI_SENSE_BUFFERSIZE); 1026 } 1027 1028 /** 1029 * ata_eh_analyze_serror - analyze SError for a failed port 1030 * @ap: ATA port to analyze SError for 1031 * 1032 * Analyze SError if available and further determine cause of 1033 * failure. 1034 * 1035 * LOCKING: 1036 * None. 1037 */ 1038 static void ata_eh_analyze_serror(struct ata_port *ap) 1039 { 1040 struct ata_eh_context *ehc = &ap->eh_context; 1041 u32 serror = ehc->i.serror; 1042 unsigned int err_mask = 0, action = 0; 1043 1044 if (serror & SERR_PERSISTENT) { 1045 err_mask |= AC_ERR_ATA_BUS; 1046 action |= ATA_EH_HARDRESET; 1047 } 1048 if (serror & 1049 (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { 1050 err_mask |= AC_ERR_ATA_BUS; 1051 action |= ATA_EH_SOFTRESET; 1052 } 1053 if (serror & SERR_PROTOCOL) { 1054 err_mask |= AC_ERR_HSM; 1055 action |= ATA_EH_SOFTRESET; 1056 } 1057 if (serror & SERR_INTERNAL) { 1058 err_mask |= AC_ERR_SYSTEM; 1059 action |= ATA_EH_HARDRESET; 1060 } 1061 if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) 1062 ata_ehi_hotplugged(&ehc->i); 1063 1064 ehc->i.err_mask |= err_mask; 1065 ehc->i.action |= action; 1066 } 1067 1068 /** 1069 * ata_eh_analyze_ncq_error - analyze NCQ error 1070 * @ap: ATA port to analyze NCQ error for 1071 * 1072 * Read log page 10h, determine the offending qc and acquire 1073 * error status TF. For NCQ device errors, all LLDDs have to do 1074 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1075 * care of the rest. 1076 * 1077 * LOCKING: 1078 * Kernel thread context (may sleep). 1079 */ 1080 static void ata_eh_analyze_ncq_error(struct ata_port *ap) 1081 { 1082 struct ata_eh_context *ehc = &ap->eh_context; 1083 struct ata_device *dev = ap->device; 1084 struct ata_queued_cmd *qc; 1085 struct ata_taskfile tf; 1086 int tag, rc; 1087 1088 /* if frozen, we can't do much */ 1089 if (ap->pflags & ATA_PFLAG_FROZEN) 1090 return; 1091 1092 /* is it NCQ device error? */ 1093 if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1094 return; 1095 1096 /* has LLDD analyzed already? */ 1097 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1098 qc = __ata_qc_from_tag(ap, tag); 1099 1100 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1101 continue; 1102 1103 if (qc->err_mask) 1104 return; 1105 } 1106 1107 /* okay, this error is ours */ 1108 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1109 if (rc) { 1110 ata_port_printk(ap, KERN_ERR, "failed to read log page 10h " 1111 "(errno=%d)\n", rc); 1112 return; 1113 } 1114 1115 if (!(ap->sactive & (1 << tag))) { 1116 ata_port_printk(ap, KERN_ERR, "log page 10h reported " 1117 "inactive tag %d\n", tag); 1118 return; 1119 } 1120 1121 /* we've got the perpetrator, condemn it */ 1122 qc = __ata_qc_from_tag(ap, tag); 1123 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1124 qc->err_mask |= AC_ERR_DEV; 1125 ehc->i.err_mask &= ~AC_ERR_DEV; 1126 } 1127 1128 /** 1129 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1130 * @qc: qc to analyze 1131 * @tf: Taskfile registers to analyze 1132 * 1133 * Analyze taskfile of @qc and further determine cause of 1134 * failure. This function also requests ATAPI sense data if 1135 * avaliable. 1136 * 1137 * LOCKING: 1138 * Kernel thread context (may sleep). 1139 * 1140 * RETURNS: 1141 * Determined recovery action 1142 */ 1143 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1144 const struct ata_taskfile *tf) 1145 { 1146 unsigned int tmp, action = 0; 1147 u8 stat = tf->command, err = tf->feature; 1148 1149 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1150 qc->err_mask |= AC_ERR_HSM; 1151 return ATA_EH_SOFTRESET; 1152 } 1153 1154 if (stat & (ATA_ERR | ATA_DF)) 1155 qc->err_mask |= AC_ERR_DEV; 1156 else 1157 return 0; 1158 1159 switch (qc->dev->class) { 1160 case ATA_DEV_ATA: 1161 if (err & ATA_ICRC) 1162 qc->err_mask |= AC_ERR_ATA_BUS; 1163 if (err & ATA_UNC) 1164 qc->err_mask |= AC_ERR_MEDIA; 1165 if (err & ATA_IDNF) 1166 qc->err_mask |= AC_ERR_INVALID; 1167 break; 1168 1169 case ATA_DEV_ATAPI: 1170 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1171 tmp = atapi_eh_request_sense(qc); 1172 if (!tmp) { 1173 /* ATA_QCFLAG_SENSE_VALID is used to 1174 * tell atapi_qc_complete() that sense 1175 * data is already valid. 1176 * 1177 * TODO: interpret sense data and set 1178 * appropriate err_mask. 1179 */ 1180 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1181 } else 1182 qc->err_mask |= tmp; 1183 } 1184 } 1185 1186 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1187 action |= ATA_EH_SOFTRESET; 1188 1189 return action; 1190 } 1191 1192 static int ata_eh_categorize_error(int is_io, unsigned int err_mask) 1193 { 1194 if (err_mask & AC_ERR_ATA_BUS) 1195 return 1; 1196 1197 if (err_mask & AC_ERR_TIMEOUT) 1198 return 2; 1199 1200 if (is_io) { 1201 if (err_mask & AC_ERR_HSM) 1202 return 2; 1203 if ((err_mask & 1204 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1205 return 3; 1206 } 1207 1208 return 0; 1209 } 1210 1211 struct speed_down_verdict_arg { 1212 u64 since; 1213 int nr_errors[4]; 1214 }; 1215 1216 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1217 { 1218 struct speed_down_verdict_arg *arg = void_arg; 1219 int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); 1220 1221 if (ent->timestamp < arg->since) 1222 return -1; 1223 1224 arg->nr_errors[cat]++; 1225 return 0; 1226 } 1227 1228 /** 1229 * ata_eh_speed_down_verdict - Determine speed down verdict 1230 * @dev: Device of interest 1231 * 1232 * This function examines error ring of @dev and determines 1233 * whether NCQ needs to be turned off, transfer speed should be 1234 * stepped down, or falling back to PIO is necessary. 1235 * 1236 * Cat-1 is ATA_BUS error for any command. 1237 * 1238 * Cat-2 is TIMEOUT for any command or HSM violation for known 1239 * supported commands. 1240 * 1241 * Cat-3 is is unclassified DEV error for known supported 1242 * command. 1243 * 1244 * NCQ needs to be turned off if there have been more than 3 1245 * Cat-2 + Cat-3 errors during last 10 minutes. 1246 * 1247 * Speed down is necessary if there have been more than 3 Cat-1 + 1248 * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. 1249 * 1250 * Falling back to PIO mode is necessary if there have been more 1251 * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. 1252 * 1253 * LOCKING: 1254 * Inherited from caller. 1255 * 1256 * RETURNS: 1257 * OR of ATA_EH_SPDN_* flags. 1258 */ 1259 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1260 { 1261 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1262 u64 j64 = get_jiffies_64(); 1263 struct speed_down_verdict_arg arg; 1264 unsigned int verdict = 0; 1265 1266 /* scan past 10 mins of error history */ 1267 memset(&arg, 0, sizeof(arg)); 1268 arg.since = j64 - min(j64, j10mins); 1269 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1270 1271 if (arg.nr_errors[2] + arg.nr_errors[3] > 3) 1272 verdict |= ATA_EH_SPDN_NCQ_OFF; 1273 if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) 1274 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1275 1276 /* scan past 3 mins of error history */ 1277 memset(&arg, 0, sizeof(arg)); 1278 arg.since = j64 - min(j64, j5mins); 1279 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1280 1281 if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) 1282 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1283 1284 return verdict; 1285 } 1286 1287 /** 1288 * ata_eh_speed_down - record error and speed down if necessary 1289 * @dev: Failed device 1290 * @is_io: Did the device fail during normal IO? 1291 * @err_mask: err_mask of the error 1292 * 1293 * Record error and examine error history to determine whether 1294 * adjusting transmission speed is necessary. It also sets 1295 * transmission limits appropriately if such adjustment is 1296 * necessary. 1297 * 1298 * LOCKING: 1299 * Kernel thread context (may sleep). 1300 * 1301 * RETURNS: 1302 * Determined recovery action. 1303 */ 1304 static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, 1305 unsigned int err_mask) 1306 { 1307 unsigned int verdict; 1308 unsigned int action = 0; 1309 1310 /* don't bother if Cat-0 error */ 1311 if (ata_eh_categorize_error(is_io, err_mask) == 0) 1312 return 0; 1313 1314 /* record error and determine whether speed down is necessary */ 1315 ata_ering_record(&dev->ering, is_io, err_mask); 1316 verdict = ata_eh_speed_down_verdict(dev); 1317 1318 /* turn off NCQ? */ 1319 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1320 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1321 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1322 dev->flags |= ATA_DFLAG_NCQ_OFF; 1323 ata_dev_printk(dev, KERN_WARNING, 1324 "NCQ disabled due to excessive errors\n"); 1325 goto done; 1326 } 1327 1328 /* speed down? */ 1329 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1330 /* speed down SATA link speed if possible */ 1331 if (sata_down_spd_limit(dev->ap) == 0) { 1332 action |= ATA_EH_HARDRESET; 1333 goto done; 1334 } 1335 1336 /* lower transfer mode */ 1337 if (dev->spdn_cnt < 2) { 1338 static const int dma_dnxfer_sel[] = 1339 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1340 static const int pio_dnxfer_sel[] = 1341 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1342 int sel; 1343 1344 if (dev->xfer_shift != ATA_SHIFT_PIO) 1345 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1346 else 1347 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1348 1349 dev->spdn_cnt++; 1350 1351 if (ata_down_xfermask_limit(dev, sel) == 0) { 1352 action |= ATA_EH_SOFTRESET; 1353 goto done; 1354 } 1355 } 1356 } 1357 1358 /* Fall back to PIO? Slowing down to PIO is meaningless for 1359 * SATA. Consider it only for PATA. 1360 */ 1361 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1362 (dev->ap->cbl != ATA_CBL_SATA) && 1363 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1364 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1365 dev->spdn_cnt = 0; 1366 action |= ATA_EH_SOFTRESET; 1367 goto done; 1368 } 1369 } 1370 1371 return 0; 1372 done: 1373 /* device has been slowed down, blow error history */ 1374 ata_ering_clear(&dev->ering); 1375 return action; 1376 } 1377 1378 /** 1379 * ata_eh_autopsy - analyze error and determine recovery action 1380 * @ap: ATA port to perform autopsy on 1381 * 1382 * Analyze why @ap failed and determine which recovery action is 1383 * needed. This function also sets more detailed AC_ERR_* values 1384 * and fills sense data for ATAPI CHECK SENSE. 1385 * 1386 * LOCKING: 1387 * Kernel thread context (may sleep). 1388 */ 1389 static void ata_eh_autopsy(struct ata_port *ap) 1390 { 1391 struct ata_eh_context *ehc = &ap->eh_context; 1392 unsigned int all_err_mask = 0; 1393 int tag, is_io = 0; 1394 u32 serror; 1395 int rc; 1396 1397 DPRINTK("ENTER\n"); 1398 1399 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1400 return; 1401 1402 /* obtain and analyze SError */ 1403 rc = sata_scr_read(ap, SCR_ERROR, &serror); 1404 if (rc == 0) { 1405 ehc->i.serror |= serror; 1406 ata_eh_analyze_serror(ap); 1407 } else if (rc != -EOPNOTSUPP) 1408 ehc->i.action |= ATA_EH_HARDRESET; 1409 1410 /* analyze NCQ failure */ 1411 ata_eh_analyze_ncq_error(ap); 1412 1413 /* any real error trumps AC_ERR_OTHER */ 1414 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1415 ehc->i.err_mask &= ~AC_ERR_OTHER; 1416 1417 all_err_mask |= ehc->i.err_mask; 1418 1419 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1420 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1421 1422 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1423 continue; 1424 1425 /* inherit upper level err_mask */ 1426 qc->err_mask |= ehc->i.err_mask; 1427 1428 /* analyze TF */ 1429 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1430 1431 /* DEV errors are probably spurious in case of ATA_BUS error */ 1432 if (qc->err_mask & AC_ERR_ATA_BUS) 1433 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1434 AC_ERR_INVALID); 1435 1436 /* any real error trumps unknown error */ 1437 if (qc->err_mask & ~AC_ERR_OTHER) 1438 qc->err_mask &= ~AC_ERR_OTHER; 1439 1440 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1441 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1442 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1443 ehc->i.action &= ~ATA_EH_REVALIDATE; 1444 } 1445 1446 /* accumulate error info */ 1447 ehc->i.dev = qc->dev; 1448 all_err_mask |= qc->err_mask; 1449 if (qc->flags & ATA_QCFLAG_IO) 1450 is_io = 1; 1451 } 1452 1453 /* enforce default EH actions */ 1454 if (ap->pflags & ATA_PFLAG_FROZEN || 1455 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1456 ehc->i.action |= ATA_EH_SOFTRESET; 1457 else if (all_err_mask) 1458 ehc->i.action |= ATA_EH_REVALIDATE; 1459 1460 /* if we have offending qcs and the associated failed device */ 1461 if (ehc->i.dev) { 1462 /* speed down */ 1463 ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io, 1464 all_err_mask); 1465 1466 /* perform per-dev EH action only on the offending device */ 1467 ehc->i.dev_action[ehc->i.dev->devno] |= 1468 ehc->i.action & ATA_EH_PERDEV_MASK; 1469 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1470 } 1471 1472 DPRINTK("EXIT\n"); 1473 } 1474 1475 /** 1476 * ata_eh_report - report error handling to user 1477 * @ap: ATA port EH is going on 1478 * 1479 * Report EH to user. 1480 * 1481 * LOCKING: 1482 * None. 1483 */ 1484 static void ata_eh_report(struct ata_port *ap) 1485 { 1486 struct ata_eh_context *ehc = &ap->eh_context; 1487 const char *frozen, *desc; 1488 int tag, nr_failed = 0; 1489 1490 desc = NULL; 1491 if (ehc->i.desc[0] != '\0') 1492 desc = ehc->i.desc; 1493 1494 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1495 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1496 1497 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1498 continue; 1499 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1500 continue; 1501 1502 nr_failed++; 1503 } 1504 1505 if (!nr_failed && !ehc->i.err_mask) 1506 return; 1507 1508 frozen = ""; 1509 if (ap->pflags & ATA_PFLAG_FROZEN) 1510 frozen = " frozen"; 1511 1512 if (ehc->i.dev) { 1513 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1514 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1515 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1516 ehc->i.action, frozen); 1517 if (desc) 1518 ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc); 1519 } else { 1520 ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x " 1521 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1522 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1523 ehc->i.action, frozen); 1524 if (desc) 1525 ata_port_printk(ap, KERN_ERR, "(%s)\n", desc); 1526 } 1527 1528 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1529 static const char *dma_str[] = { 1530 [DMA_BIDIRECTIONAL] = "bidi", 1531 [DMA_TO_DEVICE] = "out", 1532 [DMA_FROM_DEVICE] = "in", 1533 [DMA_NONE] = "", 1534 }; 1535 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1536 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1537 1538 if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask) 1539 continue; 1540 1541 ata_dev_printk(qc->dev, KERN_ERR, 1542 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1543 "tag %d cdb 0x%x data %u %s\n " 1544 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1545 "Emask 0x%x (%s)\n", 1546 cmd->command, cmd->feature, cmd->nsect, 1547 cmd->lbal, cmd->lbam, cmd->lbah, 1548 cmd->hob_feature, cmd->hob_nsect, 1549 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 1550 cmd->device, qc->tag, qc->cdb[0], qc->nbytes, 1551 dma_str[qc->dma_dir], 1552 res->command, res->feature, res->nsect, 1553 res->lbal, res->lbam, res->lbah, 1554 res->hob_feature, res->hob_nsect, 1555 res->hob_lbal, res->hob_lbam, res->hob_lbah, 1556 res->device, qc->err_mask, ata_err_string(qc->err_mask)); 1557 } 1558 } 1559 1560 static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset, 1561 unsigned int *classes) 1562 { 1563 int i, rc; 1564 1565 for (i = 0; i < ATA_MAX_DEVICES; i++) 1566 classes[i] = ATA_DEV_UNKNOWN; 1567 1568 rc = reset(ap, classes); 1569 if (rc) 1570 return rc; 1571 1572 /* If any class isn't ATA_DEV_UNKNOWN, consider classification 1573 * is complete and convert all ATA_DEV_UNKNOWN to 1574 * ATA_DEV_NONE. 1575 */ 1576 for (i = 0; i < ATA_MAX_DEVICES; i++) 1577 if (classes[i] != ATA_DEV_UNKNOWN) 1578 break; 1579 1580 if (i < ATA_MAX_DEVICES) 1581 for (i = 0; i < ATA_MAX_DEVICES; i++) 1582 if (classes[i] == ATA_DEV_UNKNOWN) 1583 classes[i] = ATA_DEV_NONE; 1584 1585 return 0; 1586 } 1587 1588 static int ata_eh_followup_srst_needed(int rc, int classify, 1589 const unsigned int *classes) 1590 { 1591 if (rc == -EAGAIN) 1592 return 1; 1593 if (rc != 0) 1594 return 0; 1595 if (classify && classes[0] == ATA_DEV_UNKNOWN) 1596 return 1; 1597 return 0; 1598 } 1599 1600 static int ata_eh_reset(struct ata_port *ap, int classify, 1601 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 1602 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 1603 { 1604 struct ata_eh_context *ehc = &ap->eh_context; 1605 unsigned int *classes = ehc->classes; 1606 int tries = ATA_EH_RESET_TRIES; 1607 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 1608 unsigned int action; 1609 ata_reset_fn_t reset; 1610 int i, did_followup_srst, rc; 1611 1612 /* about to reset */ 1613 ata_eh_about_to_do(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1614 1615 /* Determine which reset to use and record in ehc->i.action. 1616 * prereset() may examine and modify it. 1617 */ 1618 action = ehc->i.action; 1619 ehc->i.action &= ~ATA_EH_RESET_MASK; 1620 if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && 1621 !(action & ATA_EH_HARDRESET)))) 1622 ehc->i.action |= ATA_EH_SOFTRESET; 1623 else 1624 ehc->i.action |= ATA_EH_HARDRESET; 1625 1626 if (prereset) { 1627 rc = prereset(ap); 1628 if (rc) { 1629 if (rc == -ENOENT) { 1630 ata_port_printk(ap, KERN_DEBUG, 1631 "port disabled. ignoring.\n"); 1632 ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; 1633 1634 for (i = 0; i < ATA_MAX_DEVICES; i++) 1635 classes[i] = ATA_DEV_NONE; 1636 1637 rc = 0; 1638 } else 1639 ata_port_printk(ap, KERN_ERR, 1640 "prereset failed (errno=%d)\n", rc); 1641 return rc; 1642 } 1643 } 1644 1645 /* prereset() might have modified ehc->i.action */ 1646 if (ehc->i.action & ATA_EH_HARDRESET) 1647 reset = hardreset; 1648 else if (ehc->i.action & ATA_EH_SOFTRESET) 1649 reset = softreset; 1650 else { 1651 /* prereset told us not to reset, bang classes and return */ 1652 for (i = 0; i < ATA_MAX_DEVICES; i++) 1653 classes[i] = ATA_DEV_NONE; 1654 return 0; 1655 } 1656 1657 /* did prereset() screw up? if so, fix up to avoid oopsing */ 1658 if (!reset) { 1659 ata_port_printk(ap, KERN_ERR, "BUG: prereset() requested " 1660 "invalid reset type\n"); 1661 if (softreset) 1662 reset = softreset; 1663 else 1664 reset = hardreset; 1665 } 1666 1667 retry: 1668 /* shut up during boot probing */ 1669 if (verbose) 1670 ata_port_printk(ap, KERN_INFO, "%s resetting port\n", 1671 reset == softreset ? "soft" : "hard"); 1672 1673 /* mark that this EH session started with reset */ 1674 if (reset == hardreset) 1675 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 1676 else 1677 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 1678 1679 rc = ata_do_reset(ap, reset, classes); 1680 1681 did_followup_srst = 0; 1682 if (reset == hardreset && 1683 ata_eh_followup_srst_needed(rc, classify, classes)) { 1684 /* okay, let's do follow-up softreset */ 1685 did_followup_srst = 1; 1686 reset = softreset; 1687 1688 if (!reset) { 1689 ata_port_printk(ap, KERN_ERR, 1690 "follow-up softreset required " 1691 "but no softreset avaliable\n"); 1692 return -EINVAL; 1693 } 1694 1695 ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); 1696 rc = ata_do_reset(ap, reset, classes); 1697 1698 if (rc == 0 && classify && 1699 classes[0] == ATA_DEV_UNKNOWN) { 1700 ata_port_printk(ap, KERN_ERR, 1701 "classification failed\n"); 1702 return -EINVAL; 1703 } 1704 } 1705 1706 if (rc && --tries) { 1707 const char *type; 1708 1709 if (reset == softreset) { 1710 if (did_followup_srst) 1711 type = "follow-up soft"; 1712 else 1713 type = "soft"; 1714 } else 1715 type = "hard"; 1716 1717 ata_port_printk(ap, KERN_WARNING, 1718 "%sreset failed, retrying in 5 secs\n", type); 1719 ssleep(5); 1720 1721 if (reset == hardreset) 1722 sata_down_spd_limit(ap); 1723 if (hardreset) 1724 reset = hardreset; 1725 goto retry; 1726 } 1727 1728 if (rc == 0) { 1729 /* After the reset, the device state is PIO 0 and the 1730 * controller state is undefined. Record the mode. 1731 */ 1732 for (i = 0; i < ATA_MAX_DEVICES; i++) 1733 ap->device[i].pio_mode = XFER_PIO_0; 1734 1735 if (postreset) 1736 postreset(ap, classes); 1737 1738 /* reset successful, schedule revalidation */ 1739 ata_eh_done(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1740 ehc->i.action |= ATA_EH_REVALIDATE; 1741 } 1742 1743 return rc; 1744 } 1745 1746 static int ata_eh_revalidate_and_attach(struct ata_port *ap, 1747 struct ata_device **r_failed_dev) 1748 { 1749 struct ata_eh_context *ehc = &ap->eh_context; 1750 struct ata_device *dev; 1751 unsigned int new_mask = 0; 1752 unsigned long flags; 1753 int i, rc = 0; 1754 1755 DPRINTK("ENTER\n"); 1756 1757 /* For PATA drive side cable detection to work, IDENTIFY must 1758 * be done backwards such that PDIAG- is released by the slave 1759 * device before the master device is identified. 1760 */ 1761 for (i = ATA_MAX_DEVICES - 1; i >= 0; i--) { 1762 unsigned int action, readid_flags = 0; 1763 1764 dev = &ap->device[i]; 1765 action = ata_eh_dev_action(dev); 1766 1767 if (ehc->i.flags & ATA_EHI_DID_RESET) 1768 readid_flags |= ATA_READID_POSTRESET; 1769 1770 if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) { 1771 if (ata_port_offline(ap)) { 1772 rc = -EIO; 1773 goto err; 1774 } 1775 1776 ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE); 1777 rc = ata_dev_revalidate(dev, readid_flags); 1778 if (rc) 1779 goto err; 1780 1781 ata_eh_done(ap, dev, ATA_EH_REVALIDATE); 1782 1783 /* Configuration may have changed, reconfigure 1784 * transfer mode. 1785 */ 1786 ehc->i.flags |= ATA_EHI_SETMODE; 1787 1788 /* schedule the scsi_rescan_device() here */ 1789 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 1790 } else if (dev->class == ATA_DEV_UNKNOWN && 1791 ehc->tries[dev->devno] && 1792 ata_class_enabled(ehc->classes[dev->devno])) { 1793 dev->class = ehc->classes[dev->devno]; 1794 1795 rc = ata_dev_read_id(dev, &dev->class, readid_flags, 1796 dev->id); 1797 switch (rc) { 1798 case 0: 1799 new_mask |= 1 << i; 1800 break; 1801 case -ENOENT: 1802 /* IDENTIFY was issued to non-existent 1803 * device. No need to reset. Just 1804 * thaw and kill the device. 1805 */ 1806 ata_eh_thaw_port(ap); 1807 dev->class = ATA_DEV_UNKNOWN; 1808 break; 1809 default: 1810 dev->class = ATA_DEV_UNKNOWN; 1811 goto err; 1812 } 1813 } 1814 } 1815 1816 /* PDIAG- should have been released, ask cable type if post-reset */ 1817 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ap->ops->cable_detect) 1818 ap->cbl = ap->ops->cable_detect(ap); 1819 1820 /* Configure new devices forward such that user doesn't see 1821 * device detection messages backwards. 1822 */ 1823 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1824 dev = &ap->device[i]; 1825 1826 if (!(new_mask & (1 << i))) 1827 continue; 1828 1829 ehc->i.flags |= ATA_EHI_PRINTINFO; 1830 rc = ata_dev_configure(dev); 1831 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 1832 if (rc) 1833 goto err; 1834 1835 spin_lock_irqsave(ap->lock, flags); 1836 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1837 spin_unlock_irqrestore(ap->lock, flags); 1838 1839 /* new device discovered, configure xfermode */ 1840 ehc->i.flags |= ATA_EHI_SETMODE; 1841 } 1842 1843 return 0; 1844 1845 err: 1846 *r_failed_dev = dev; 1847 DPRINTK("EXIT rc=%d\n", rc); 1848 return rc; 1849 } 1850 1851 #ifdef CONFIG_PM 1852 /** 1853 * ata_eh_suspend - handle suspend EH action 1854 * @ap: target host port 1855 * @r_failed_dev: result parameter to indicate failing device 1856 * 1857 * Handle suspend EH action. Disk devices are spinned down and 1858 * other types of devices are just marked suspended. Once 1859 * suspended, no EH action to the device is allowed until it is 1860 * resumed. 1861 * 1862 * LOCKING: 1863 * Kernel thread context (may sleep). 1864 * 1865 * RETURNS: 1866 * 0 on success, -errno otherwise 1867 */ 1868 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) 1869 { 1870 struct ata_device *dev; 1871 int i, rc = 0; 1872 1873 DPRINTK("ENTER\n"); 1874 1875 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1876 unsigned long flags; 1877 unsigned int action, err_mask; 1878 1879 dev = &ap->device[i]; 1880 action = ata_eh_dev_action(dev); 1881 1882 if (!ata_dev_enabled(dev) || !(action & ATA_EH_SUSPEND)) 1883 continue; 1884 1885 WARN_ON(dev->flags & ATA_DFLAG_SUSPENDED); 1886 1887 ata_eh_about_to_do(ap, dev, ATA_EH_SUSPEND); 1888 1889 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1890 /* flush cache */ 1891 rc = ata_flush_cache(dev); 1892 if (rc) 1893 break; 1894 1895 /* spin down */ 1896 err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1); 1897 if (err_mask) { 1898 ata_dev_printk(dev, KERN_ERR, "failed to " 1899 "spin down (err_mask=0x%x)\n", 1900 err_mask); 1901 rc = -EIO; 1902 break; 1903 } 1904 } 1905 1906 spin_lock_irqsave(ap->lock, flags); 1907 dev->flags |= ATA_DFLAG_SUSPENDED; 1908 spin_unlock_irqrestore(ap->lock, flags); 1909 1910 ata_eh_done(ap, dev, ATA_EH_SUSPEND); 1911 } 1912 1913 if (rc) 1914 *r_failed_dev = dev; 1915 1916 DPRINTK("EXIT\n"); 1917 return rc; 1918 } 1919 1920 /** 1921 * ata_eh_prep_resume - prep for resume EH action 1922 * @ap: target host port 1923 * 1924 * Clear SUSPENDED in preparation for scheduled resume actions. 1925 * This allows other parts of EH to access the devices being 1926 * resumed. 1927 * 1928 * LOCKING: 1929 * Kernel thread context (may sleep). 1930 */ 1931 static void ata_eh_prep_resume(struct ata_port *ap) 1932 { 1933 struct ata_device *dev; 1934 unsigned long flags; 1935 int i; 1936 1937 DPRINTK("ENTER\n"); 1938 1939 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1940 unsigned int action; 1941 1942 dev = &ap->device[i]; 1943 action = ata_eh_dev_action(dev); 1944 1945 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1946 continue; 1947 1948 spin_lock_irqsave(ap->lock, flags); 1949 dev->flags &= ~ATA_DFLAG_SUSPENDED; 1950 spin_unlock_irqrestore(ap->lock, flags); 1951 } 1952 1953 DPRINTK("EXIT\n"); 1954 } 1955 1956 /** 1957 * ata_eh_resume - handle resume EH action 1958 * @ap: target host port 1959 * @r_failed_dev: result parameter to indicate failing device 1960 * 1961 * Handle resume EH action. Target devices are already reset and 1962 * revalidated. Spinning up is the only operation left. 1963 * 1964 * LOCKING: 1965 * Kernel thread context (may sleep). 1966 * 1967 * RETURNS: 1968 * 0 on success, -errno otherwise 1969 */ 1970 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) 1971 { 1972 struct ata_device *dev; 1973 int i, rc = 0; 1974 1975 DPRINTK("ENTER\n"); 1976 1977 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1978 unsigned int action, err_mask; 1979 1980 dev = &ap->device[i]; 1981 action = ata_eh_dev_action(dev); 1982 1983 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1984 continue; 1985 1986 ata_eh_about_to_do(ap, dev, ATA_EH_RESUME); 1987 1988 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1989 err_mask = ata_do_simple_cmd(dev, 1990 ATA_CMD_IDLEIMMEDIATE); 1991 if (err_mask) { 1992 ata_dev_printk(dev, KERN_ERR, "failed to " 1993 "spin up (err_mask=0x%x)\n", 1994 err_mask); 1995 rc = -EIO; 1996 break; 1997 } 1998 } 1999 2000 ata_eh_done(ap, dev, ATA_EH_RESUME); 2001 } 2002 2003 if (rc) 2004 *r_failed_dev = dev; 2005 2006 DPRINTK("EXIT\n"); 2007 return 0; 2008 } 2009 #endif /* CONFIG_PM */ 2010 2011 static int ata_port_nr_enabled(struct ata_port *ap) 2012 { 2013 int i, cnt = 0; 2014 2015 for (i = 0; i < ATA_MAX_DEVICES; i++) 2016 if (ata_dev_enabled(&ap->device[i])) 2017 cnt++; 2018 return cnt; 2019 } 2020 2021 static int ata_port_nr_vacant(struct ata_port *ap) 2022 { 2023 int i, cnt = 0; 2024 2025 for (i = 0; i < ATA_MAX_DEVICES; i++) 2026 if (ap->device[i].class == ATA_DEV_UNKNOWN) 2027 cnt++; 2028 return cnt; 2029 } 2030 2031 static int ata_eh_skip_recovery(struct ata_port *ap) 2032 { 2033 struct ata_eh_context *ehc = &ap->eh_context; 2034 int i; 2035 2036 /* skip if all possible devices are suspended */ 2037 for (i = 0; i < ata_port_max_devices(ap); i++) { 2038 struct ata_device *dev = &ap->device[i]; 2039 2040 if (!(dev->flags & ATA_DFLAG_SUSPENDED)) 2041 break; 2042 } 2043 2044 if (i == ata_port_max_devices(ap)) 2045 return 1; 2046 2047 /* thaw frozen port, resume link and recover failed devices */ 2048 if ((ap->pflags & ATA_PFLAG_FROZEN) || 2049 (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_port_nr_enabled(ap)) 2050 return 0; 2051 2052 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2053 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2054 struct ata_device *dev = &ap->device[i]; 2055 2056 if (dev->class == ATA_DEV_UNKNOWN && 2057 ehc->classes[dev->devno] != ATA_DEV_NONE) 2058 return 0; 2059 } 2060 2061 return 1; 2062 } 2063 2064 /** 2065 * ata_eh_recover - recover host port after error 2066 * @ap: host port to recover 2067 * @prereset: prereset method (can be NULL) 2068 * @softreset: softreset method (can be NULL) 2069 * @hardreset: hardreset method (can be NULL) 2070 * @postreset: postreset method (can be NULL) 2071 * 2072 * This is the alpha and omega, eum and yang, heart and soul of 2073 * libata exception handling. On entry, actions required to 2074 * recover the port and hotplug requests are recorded in 2075 * eh_context. This function executes all the operations with 2076 * appropriate retrials and fallbacks to resurrect failed 2077 * devices, detach goners and greet newcomers. 2078 * 2079 * LOCKING: 2080 * Kernel thread context (may sleep). 2081 * 2082 * RETURNS: 2083 * 0 on success, -errno on failure. 2084 */ 2085 static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 2086 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2087 ata_postreset_fn_t postreset) 2088 { 2089 struct ata_eh_context *ehc = &ap->eh_context; 2090 struct ata_device *dev; 2091 int i, rc; 2092 2093 DPRINTK("ENTER\n"); 2094 2095 /* prep for recovery */ 2096 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2097 dev = &ap->device[i]; 2098 2099 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2100 2101 /* collect port action mask recorded in dev actions */ 2102 ehc->i.action |= ehc->i.dev_action[i] & ~ATA_EH_PERDEV_MASK; 2103 ehc->i.dev_action[i] &= ATA_EH_PERDEV_MASK; 2104 2105 /* process hotplug request */ 2106 if (dev->flags & ATA_DFLAG_DETACH) 2107 ata_eh_detach_dev(dev); 2108 2109 if (!ata_dev_enabled(dev) && 2110 ((ehc->i.probe_mask & (1 << dev->devno)) && 2111 !(ehc->did_probe_mask & (1 << dev->devno)))) { 2112 ata_eh_detach_dev(dev); 2113 ata_dev_init(dev); 2114 ehc->did_probe_mask |= (1 << dev->devno); 2115 ehc->i.action |= ATA_EH_SOFTRESET; 2116 } 2117 } 2118 2119 retry: 2120 rc = 0; 2121 2122 /* if UNLOADING, finish immediately */ 2123 if (ap->pflags & ATA_PFLAG_UNLOADING) 2124 goto out; 2125 2126 /* prep for resume */ 2127 ata_eh_prep_resume(ap); 2128 2129 /* skip EH if possible. */ 2130 if (ata_eh_skip_recovery(ap)) 2131 ehc->i.action = 0; 2132 2133 for (i = 0; i < ATA_MAX_DEVICES; i++) 2134 ehc->classes[i] = ATA_DEV_UNKNOWN; 2135 2136 /* reset */ 2137 if (ehc->i.action & ATA_EH_RESET_MASK) { 2138 ata_eh_freeze_port(ap); 2139 2140 rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset, 2141 softreset, hardreset, postreset); 2142 if (rc) { 2143 ata_port_printk(ap, KERN_ERR, 2144 "reset failed, giving up\n"); 2145 goto out; 2146 } 2147 2148 ata_eh_thaw_port(ap); 2149 } 2150 2151 /* revalidate existing devices and attach new ones */ 2152 rc = ata_eh_revalidate_and_attach(ap, &dev); 2153 if (rc) 2154 goto dev_fail; 2155 2156 /* resume devices */ 2157 rc = ata_eh_resume(ap, &dev); 2158 if (rc) 2159 goto dev_fail; 2160 2161 /* configure transfer mode if necessary */ 2162 if (ehc->i.flags & ATA_EHI_SETMODE) { 2163 rc = ata_set_mode(ap, &dev); 2164 if (rc) 2165 goto dev_fail; 2166 ehc->i.flags &= ~ATA_EHI_SETMODE; 2167 } 2168 2169 /* suspend devices */ 2170 rc = ata_eh_suspend(ap, &dev); 2171 if (rc) 2172 goto dev_fail; 2173 2174 goto out; 2175 2176 dev_fail: 2177 ehc->tries[dev->devno]--; 2178 2179 switch (rc) { 2180 case -EINVAL: 2181 /* eeek, something went very wrong, give up */ 2182 ehc->tries[dev->devno] = 0; 2183 break; 2184 2185 case -ENODEV: 2186 /* device missing or wrong IDENTIFY data, schedule probing */ 2187 ehc->i.probe_mask |= (1 << dev->devno); 2188 /* give it just one more chance */ 2189 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2190 case -EIO: 2191 if (ehc->tries[dev->devno] == 1) { 2192 /* This is the last chance, better to slow 2193 * down than lose it. 2194 */ 2195 sata_down_spd_limit(ap); 2196 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2197 } 2198 } 2199 2200 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2201 /* disable device if it has used up all its chances */ 2202 ata_dev_disable(dev); 2203 2204 /* detach if offline */ 2205 if (ata_port_offline(ap)) 2206 ata_eh_detach_dev(dev); 2207 2208 /* probe if requested */ 2209 if ((ehc->i.probe_mask & (1 << dev->devno)) && 2210 !(ehc->did_probe_mask & (1 << dev->devno))) { 2211 ata_eh_detach_dev(dev); 2212 ata_dev_init(dev); 2213 2214 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2215 ehc->did_probe_mask |= (1 << dev->devno); 2216 ehc->i.action |= ATA_EH_SOFTRESET; 2217 } 2218 } else { 2219 /* soft didn't work? be haaaaard */ 2220 if (ehc->i.flags & ATA_EHI_DID_RESET) 2221 ehc->i.action |= ATA_EH_HARDRESET; 2222 else 2223 ehc->i.action |= ATA_EH_SOFTRESET; 2224 } 2225 2226 if (ata_port_nr_enabled(ap)) { 2227 ata_port_printk(ap, KERN_WARNING, "failed to recover some " 2228 "devices, retrying in 5 secs\n"); 2229 ssleep(5); 2230 } else { 2231 /* no device left, repeat fast */ 2232 msleep(500); 2233 } 2234 2235 goto retry; 2236 2237 out: 2238 if (rc) { 2239 for (i = 0; i < ATA_MAX_DEVICES; i++) 2240 ata_dev_disable(&ap->device[i]); 2241 } 2242 2243 DPRINTK("EXIT, rc=%d\n", rc); 2244 return rc; 2245 } 2246 2247 /** 2248 * ata_eh_finish - finish up EH 2249 * @ap: host port to finish EH for 2250 * 2251 * Recovery is complete. Clean up EH states and retry or finish 2252 * failed qcs. 2253 * 2254 * LOCKING: 2255 * None. 2256 */ 2257 static void ata_eh_finish(struct ata_port *ap) 2258 { 2259 int tag; 2260 2261 /* retry or finish qcs */ 2262 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2263 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2264 2265 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2266 continue; 2267 2268 if (qc->err_mask) { 2269 /* FIXME: Once EH migration is complete, 2270 * generate sense data in this function, 2271 * considering both err_mask and tf. 2272 */ 2273 if (qc->err_mask & AC_ERR_INVALID) 2274 ata_eh_qc_complete(qc); 2275 else 2276 ata_eh_qc_retry(qc); 2277 } else { 2278 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2279 ata_eh_qc_complete(qc); 2280 } else { 2281 /* feed zero TF to sense generation */ 2282 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2283 ata_eh_qc_retry(qc); 2284 } 2285 } 2286 } 2287 } 2288 2289 /** 2290 * ata_do_eh - do standard error handling 2291 * @ap: host port to handle error for 2292 * @prereset: prereset method (can be NULL) 2293 * @softreset: softreset method (can be NULL) 2294 * @hardreset: hardreset method (can be NULL) 2295 * @postreset: postreset method (can be NULL) 2296 * 2297 * Perform standard error handling sequence. 2298 * 2299 * LOCKING: 2300 * Kernel thread context (may sleep). 2301 */ 2302 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2303 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2304 ata_postreset_fn_t postreset) 2305 { 2306 ata_eh_autopsy(ap); 2307 ata_eh_report(ap); 2308 ata_eh_recover(ap, prereset, softreset, hardreset, postreset); 2309 ata_eh_finish(ap); 2310 } 2311 2312 #ifdef CONFIG_PM 2313 /** 2314 * ata_eh_handle_port_suspend - perform port suspend operation 2315 * @ap: port to suspend 2316 * 2317 * Suspend @ap. 2318 * 2319 * LOCKING: 2320 * Kernel thread context (may sleep). 2321 */ 2322 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2323 { 2324 unsigned long flags; 2325 int rc = 0; 2326 2327 /* are we suspending? */ 2328 spin_lock_irqsave(ap->lock, flags); 2329 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2330 ap->pm_mesg.event == PM_EVENT_ON) { 2331 spin_unlock_irqrestore(ap->lock, flags); 2332 return; 2333 } 2334 spin_unlock_irqrestore(ap->lock, flags); 2335 2336 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2337 2338 /* suspend */ 2339 ata_eh_freeze_port(ap); 2340 2341 if (ap->ops->port_suspend) 2342 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2343 2344 /* report result */ 2345 spin_lock_irqsave(ap->lock, flags); 2346 2347 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2348 if (rc == 0) 2349 ap->pflags |= ATA_PFLAG_SUSPENDED; 2350 else 2351 ata_port_schedule_eh(ap); 2352 2353 if (ap->pm_result) { 2354 *ap->pm_result = rc; 2355 ap->pm_result = NULL; 2356 } 2357 2358 spin_unlock_irqrestore(ap->lock, flags); 2359 2360 return; 2361 } 2362 2363 /** 2364 * ata_eh_handle_port_resume - perform port resume operation 2365 * @ap: port to resume 2366 * 2367 * Resume @ap. 2368 * 2369 * This function also waits upto one second until all devices 2370 * hanging off this port requests resume EH action. This is to 2371 * prevent invoking EH and thus reset multiple times on resume. 2372 * 2373 * On DPM resume, where some of devices might not be resumed 2374 * together, this may delay port resume upto one second, but such 2375 * DPM resumes are rare and 1 sec delay isn't too bad. 2376 * 2377 * LOCKING: 2378 * Kernel thread context (may sleep). 2379 */ 2380 static void ata_eh_handle_port_resume(struct ata_port *ap) 2381 { 2382 unsigned long timeout; 2383 unsigned long flags; 2384 int i, rc = 0; 2385 2386 /* are we resuming? */ 2387 spin_lock_irqsave(ap->lock, flags); 2388 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2389 ap->pm_mesg.event != PM_EVENT_ON) { 2390 spin_unlock_irqrestore(ap->lock, flags); 2391 return; 2392 } 2393 spin_unlock_irqrestore(ap->lock, flags); 2394 2395 /* spurious? */ 2396 if (!(ap->pflags & ATA_PFLAG_SUSPENDED)) 2397 goto done; 2398 2399 if (ap->ops->port_resume) 2400 rc = ap->ops->port_resume(ap); 2401 2402 /* give devices time to request EH */ 2403 timeout = jiffies + HZ; /* 1s max */ 2404 while (1) { 2405 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2406 struct ata_device *dev = &ap->device[i]; 2407 unsigned int action = ata_eh_dev_action(dev); 2408 2409 if ((dev->flags & ATA_DFLAG_SUSPENDED) && 2410 !(action & ATA_EH_RESUME)) 2411 break; 2412 } 2413 2414 if (i == ATA_MAX_DEVICES || time_after(jiffies, timeout)) 2415 break; 2416 msleep(10); 2417 } 2418 2419 done: 2420 spin_lock_irqsave(ap->lock, flags); 2421 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2422 if (ap->pm_result) { 2423 *ap->pm_result = rc; 2424 ap->pm_result = NULL; 2425 } 2426 spin_unlock_irqrestore(ap->lock, flags); 2427 } 2428 #endif /* CONFIG_PM */ 2429