1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <scsi/scsi.h> 37 #include <scsi/scsi_host.h> 38 #include <scsi/scsi_eh.h> 39 #include <scsi/scsi_device.h> 40 #include <scsi/scsi_cmnd.h> 41 #include "../scsi/scsi_transport_api.h" 42 43 #include <linux/libata.h> 44 45 #include "libata.h" 46 47 static void __ata_port_freeze(struct ata_port *ap); 48 static void ata_eh_finish(struct ata_port *ap); 49 static void ata_eh_handle_port_suspend(struct ata_port *ap); 50 static void ata_eh_handle_port_resume(struct ata_port *ap); 51 52 static void ata_ering_record(struct ata_ering *ering, int is_io, 53 unsigned int err_mask) 54 { 55 struct ata_ering_entry *ent; 56 57 WARN_ON(!err_mask); 58 59 ering->cursor++; 60 ering->cursor %= ATA_ERING_SIZE; 61 62 ent = &ering->ring[ering->cursor]; 63 ent->is_io = is_io; 64 ent->err_mask = err_mask; 65 ent->timestamp = get_jiffies_64(); 66 } 67 68 static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering) 69 { 70 struct ata_ering_entry *ent = &ering->ring[ering->cursor]; 71 if (!ent->err_mask) 72 return NULL; 73 return ent; 74 } 75 76 static int ata_ering_map(struct ata_ering *ering, 77 int (*map_fn)(struct ata_ering_entry *, void *), 78 void *arg) 79 { 80 int idx, rc = 0; 81 struct ata_ering_entry *ent; 82 83 idx = ering->cursor; 84 do { 85 ent = &ering->ring[idx]; 86 if (!ent->err_mask) 87 break; 88 rc = map_fn(ent, arg); 89 if (rc) 90 break; 91 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 92 } while (idx != ering->cursor); 93 94 return rc; 95 } 96 97 static unsigned int ata_eh_dev_action(struct ata_device *dev) 98 { 99 struct ata_eh_context *ehc = &dev->ap->eh_context; 100 101 return ehc->i.action | ehc->i.dev_action[dev->devno]; 102 } 103 104 static void ata_eh_clear_action(struct ata_device *dev, 105 struct ata_eh_info *ehi, unsigned int action) 106 { 107 int i; 108 109 if (!dev) { 110 ehi->action &= ~action; 111 for (i = 0; i < ATA_MAX_DEVICES; i++) 112 ehi->dev_action[i] &= ~action; 113 } else { 114 /* doesn't make sense for port-wide EH actions */ 115 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 116 117 /* break ehi->action into ehi->dev_action */ 118 if (ehi->action & action) { 119 for (i = 0; i < ATA_MAX_DEVICES; i++) 120 ehi->dev_action[i] |= ehi->action & action; 121 ehi->action &= ~action; 122 } 123 124 /* turn off the specified per-dev action */ 125 ehi->dev_action[dev->devno] &= ~action; 126 } 127 } 128 129 /** 130 * ata_scsi_timed_out - SCSI layer time out callback 131 * @cmd: timed out SCSI command 132 * 133 * Handles SCSI layer timeout. We race with normal completion of 134 * the qc for @cmd. If the qc is already gone, we lose and let 135 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 136 * timed out and EH should be invoked. Prevent ata_qc_complete() 137 * from finishing it by setting EH_SCHEDULED and return 138 * EH_NOT_HANDLED. 139 * 140 * TODO: kill this function once old EH is gone. 141 * 142 * LOCKING: 143 * Called from timer context 144 * 145 * RETURNS: 146 * EH_HANDLED or EH_NOT_HANDLED 147 */ 148 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 149 { 150 struct Scsi_Host *host = cmd->device->host; 151 struct ata_port *ap = ata_shost_to_port(host); 152 unsigned long flags; 153 struct ata_queued_cmd *qc; 154 enum scsi_eh_timer_return ret; 155 156 DPRINTK("ENTER\n"); 157 158 if (ap->ops->error_handler) { 159 ret = EH_NOT_HANDLED; 160 goto out; 161 } 162 163 ret = EH_HANDLED; 164 spin_lock_irqsave(ap->lock, flags); 165 qc = ata_qc_from_tag(ap, ap->active_tag); 166 if (qc) { 167 WARN_ON(qc->scsicmd != cmd); 168 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 169 qc->err_mask |= AC_ERR_TIMEOUT; 170 ret = EH_NOT_HANDLED; 171 } 172 spin_unlock_irqrestore(ap->lock, flags); 173 174 out: 175 DPRINTK("EXIT, ret=%d\n", ret); 176 return ret; 177 } 178 179 /** 180 * ata_scsi_error - SCSI layer error handler callback 181 * @host: SCSI host on which error occurred 182 * 183 * Handles SCSI-layer-thrown error events. 184 * 185 * LOCKING: 186 * Inherited from SCSI layer (none, can sleep) 187 * 188 * RETURNS: 189 * Zero. 190 */ 191 void ata_scsi_error(struct Scsi_Host *host) 192 { 193 struct ata_port *ap = ata_shost_to_port(host); 194 int i, repeat_cnt = ATA_EH_MAX_REPEAT; 195 unsigned long flags; 196 197 DPRINTK("ENTER\n"); 198 199 /* synchronize with port task */ 200 ata_port_flush_task(ap); 201 202 /* synchronize with host lock and sort out timeouts */ 203 204 /* For new EH, all qcs are finished in one of three ways - 205 * normal completion, error completion, and SCSI timeout. 206 * Both cmpletions can race against SCSI timeout. When normal 207 * completion wins, the qc never reaches EH. When error 208 * completion wins, the qc has ATA_QCFLAG_FAILED set. 209 * 210 * When SCSI timeout wins, things are a bit more complex. 211 * Normal or error completion can occur after the timeout but 212 * before this point. In such cases, both types of 213 * completions are honored. A scmd is determined to have 214 * timed out iff its associated qc is active and not failed. 215 */ 216 if (ap->ops->error_handler) { 217 struct scsi_cmnd *scmd, *tmp; 218 int nr_timedout = 0; 219 220 spin_lock_irqsave(ap->lock, flags); 221 222 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 223 struct ata_queued_cmd *qc; 224 225 for (i = 0; i < ATA_MAX_QUEUE; i++) { 226 qc = __ata_qc_from_tag(ap, i); 227 if (qc->flags & ATA_QCFLAG_ACTIVE && 228 qc->scsicmd == scmd) 229 break; 230 } 231 232 if (i < ATA_MAX_QUEUE) { 233 /* the scmd has an associated qc */ 234 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 235 /* which hasn't failed yet, timeout */ 236 qc->err_mask |= AC_ERR_TIMEOUT; 237 qc->flags |= ATA_QCFLAG_FAILED; 238 nr_timedout++; 239 } 240 } else { 241 /* Normal completion occurred after 242 * SCSI timeout but before this point. 243 * Successfully complete it. 244 */ 245 scmd->retries = scmd->allowed; 246 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 247 } 248 } 249 250 /* If we have timed out qcs. They belong to EH from 251 * this point but the state of the controller is 252 * unknown. Freeze the port to make sure the IRQ 253 * handler doesn't diddle with those qcs. This must 254 * be done atomically w.r.t. setting QCFLAG_FAILED. 255 */ 256 if (nr_timedout) 257 __ata_port_freeze(ap); 258 259 spin_unlock_irqrestore(ap->lock, flags); 260 } else 261 spin_unlock_wait(ap->lock); 262 263 repeat: 264 /* invoke error handler */ 265 if (ap->ops->error_handler) { 266 /* process port resume request */ 267 ata_eh_handle_port_resume(ap); 268 269 /* fetch & clear EH info */ 270 spin_lock_irqsave(ap->lock, flags); 271 272 memset(&ap->eh_context, 0, sizeof(ap->eh_context)); 273 ap->eh_context.i = ap->eh_info; 274 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 275 276 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 277 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 278 279 spin_unlock_irqrestore(ap->lock, flags); 280 281 /* invoke EH, skip if unloading or suspended */ 282 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 283 ap->ops->error_handler(ap); 284 else 285 ata_eh_finish(ap); 286 287 /* process port suspend request */ 288 ata_eh_handle_port_suspend(ap); 289 290 /* Exception might have happend after ->error_handler 291 * recovered the port but before this point. Repeat 292 * EH in such case. 293 */ 294 spin_lock_irqsave(ap->lock, flags); 295 296 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 297 if (--repeat_cnt) { 298 ata_port_printk(ap, KERN_INFO, 299 "EH pending after completion, " 300 "repeating EH (cnt=%d)\n", repeat_cnt); 301 spin_unlock_irqrestore(ap->lock, flags); 302 goto repeat; 303 } 304 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 305 "tries, giving up\n", ATA_EH_MAX_REPEAT); 306 } 307 308 /* this run is complete, make sure EH info is clear */ 309 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 310 311 /* Clear host_eh_scheduled while holding ap->lock such 312 * that if exception occurs after this point but 313 * before EH completion, SCSI midlayer will 314 * re-initiate EH. 315 */ 316 host->host_eh_scheduled = 0; 317 318 spin_unlock_irqrestore(ap->lock, flags); 319 } else { 320 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); 321 ap->ops->eng_timeout(ap); 322 } 323 324 /* finish or retry handled scmd's and clean up */ 325 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 326 327 scsi_eh_flush_done_q(&ap->eh_done_q); 328 329 /* clean up */ 330 spin_lock_irqsave(ap->lock, flags); 331 332 if (ap->pflags & ATA_PFLAG_LOADING) 333 ap->pflags &= ~ATA_PFLAG_LOADING; 334 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 335 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 336 337 if (ap->pflags & ATA_PFLAG_RECOVERED) 338 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 339 340 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 341 342 /* tell wait_eh that we're done */ 343 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 344 wake_up_all(&ap->eh_wait_q); 345 346 spin_unlock_irqrestore(ap->lock, flags); 347 348 DPRINTK("EXIT\n"); 349 } 350 351 /** 352 * ata_port_wait_eh - Wait for the currently pending EH to complete 353 * @ap: Port to wait EH for 354 * 355 * Wait until the currently pending EH is complete. 356 * 357 * LOCKING: 358 * Kernel thread context (may sleep). 359 */ 360 void ata_port_wait_eh(struct ata_port *ap) 361 { 362 unsigned long flags; 363 DEFINE_WAIT(wait); 364 365 retry: 366 spin_lock_irqsave(ap->lock, flags); 367 368 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 369 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 370 spin_unlock_irqrestore(ap->lock, flags); 371 schedule(); 372 spin_lock_irqsave(ap->lock, flags); 373 } 374 finish_wait(&ap->eh_wait_q, &wait); 375 376 spin_unlock_irqrestore(ap->lock, flags); 377 378 /* make sure SCSI EH is complete */ 379 if (scsi_host_in_recovery(ap->scsi_host)) { 380 msleep(10); 381 goto retry; 382 } 383 } 384 385 /** 386 * ata_qc_timeout - Handle timeout of queued command 387 * @qc: Command that timed out 388 * 389 * Some part of the kernel (currently, only the SCSI layer) 390 * has noticed that the active command on port @ap has not 391 * completed after a specified length of time. Handle this 392 * condition by disabling DMA (if necessary) and completing 393 * transactions, with error if necessary. 394 * 395 * This also handles the case of the "lost interrupt", where 396 * for some reason (possibly hardware bug, possibly driver bug) 397 * an interrupt was not delivered to the driver, even though the 398 * transaction completed successfully. 399 * 400 * TODO: kill this function once old EH is gone. 401 * 402 * LOCKING: 403 * Inherited from SCSI layer (none, can sleep) 404 */ 405 static void ata_qc_timeout(struct ata_queued_cmd *qc) 406 { 407 struct ata_port *ap = qc->ap; 408 u8 host_stat = 0, drv_stat; 409 unsigned long flags; 410 411 DPRINTK("ENTER\n"); 412 413 ap->hsm_task_state = HSM_ST_IDLE; 414 415 spin_lock_irqsave(ap->lock, flags); 416 417 switch (qc->tf.protocol) { 418 419 case ATA_PROT_DMA: 420 case ATA_PROT_ATAPI_DMA: 421 host_stat = ap->ops->bmdma_status(ap); 422 423 /* before we do anything else, clear DMA-Start bit */ 424 ap->ops->bmdma_stop(qc); 425 426 /* fall through */ 427 428 default: 429 ata_altstatus(ap); 430 drv_stat = ata_chk_status(ap); 431 432 /* ack bmdma irq events */ 433 ap->ops->irq_clear(ap); 434 435 ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, " 436 "stat 0x%x host_stat 0x%x\n", 437 qc->tf.command, drv_stat, host_stat); 438 439 /* complete taskfile transaction */ 440 qc->err_mask |= AC_ERR_TIMEOUT; 441 break; 442 } 443 444 spin_unlock_irqrestore(ap->lock, flags); 445 446 ata_eh_qc_complete(qc); 447 448 DPRINTK("EXIT\n"); 449 } 450 451 /** 452 * ata_eng_timeout - Handle timeout of queued command 453 * @ap: Port on which timed-out command is active 454 * 455 * Some part of the kernel (currently, only the SCSI layer) 456 * has noticed that the active command on port @ap has not 457 * completed after a specified length of time. Handle this 458 * condition by disabling DMA (if necessary) and completing 459 * transactions, with error if necessary. 460 * 461 * This also handles the case of the "lost interrupt", where 462 * for some reason (possibly hardware bug, possibly driver bug) 463 * an interrupt was not delivered to the driver, even though the 464 * transaction completed successfully. 465 * 466 * TODO: kill this function once old EH is gone. 467 * 468 * LOCKING: 469 * Inherited from SCSI layer (none, can sleep) 470 */ 471 void ata_eng_timeout(struct ata_port *ap) 472 { 473 DPRINTK("ENTER\n"); 474 475 ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag)); 476 477 DPRINTK("EXIT\n"); 478 } 479 480 /** 481 * ata_qc_schedule_eh - schedule qc for error handling 482 * @qc: command to schedule error handling for 483 * 484 * Schedule error handling for @qc. EH will kick in as soon as 485 * other commands are drained. 486 * 487 * LOCKING: 488 * spin_lock_irqsave(host lock) 489 */ 490 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 491 { 492 struct ata_port *ap = qc->ap; 493 494 WARN_ON(!ap->ops->error_handler); 495 496 qc->flags |= ATA_QCFLAG_FAILED; 497 qc->ap->pflags |= ATA_PFLAG_EH_PENDING; 498 499 /* The following will fail if timeout has already expired. 500 * ata_scsi_error() takes care of such scmds on EH entry. 501 * Note that ATA_QCFLAG_FAILED is unconditionally set after 502 * this function completes. 503 */ 504 scsi_req_abort_cmd(qc->scsicmd); 505 } 506 507 /** 508 * ata_port_schedule_eh - schedule error handling without a qc 509 * @ap: ATA port to schedule EH for 510 * 511 * Schedule error handling for @ap. EH will kick in as soon as 512 * all commands are drained. 513 * 514 * LOCKING: 515 * spin_lock_irqsave(host lock) 516 */ 517 void ata_port_schedule_eh(struct ata_port *ap) 518 { 519 WARN_ON(!ap->ops->error_handler); 520 521 ap->pflags |= ATA_PFLAG_EH_PENDING; 522 scsi_schedule_eh(ap->scsi_host); 523 524 DPRINTK("port EH scheduled\n"); 525 } 526 527 /** 528 * ata_port_abort - abort all qc's on the port 529 * @ap: ATA port to abort qc's for 530 * 531 * Abort all active qc's of @ap and schedule EH. 532 * 533 * LOCKING: 534 * spin_lock_irqsave(host lock) 535 * 536 * RETURNS: 537 * Number of aborted qc's. 538 */ 539 int ata_port_abort(struct ata_port *ap) 540 { 541 int tag, nr_aborted = 0; 542 543 WARN_ON(!ap->ops->error_handler); 544 545 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 546 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 547 548 if (qc) { 549 qc->flags |= ATA_QCFLAG_FAILED; 550 ata_qc_complete(qc); 551 nr_aborted++; 552 } 553 } 554 555 if (!nr_aborted) 556 ata_port_schedule_eh(ap); 557 558 return nr_aborted; 559 } 560 561 /** 562 * __ata_port_freeze - freeze port 563 * @ap: ATA port to freeze 564 * 565 * This function is called when HSM violation or some other 566 * condition disrupts normal operation of the port. Frozen port 567 * is not allowed to perform any operation until the port is 568 * thawed, which usually follows a successful reset. 569 * 570 * ap->ops->freeze() callback can be used for freezing the port 571 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 572 * port cannot be frozen hardware-wise, the interrupt handler 573 * must ack and clear interrupts unconditionally while the port 574 * is frozen. 575 * 576 * LOCKING: 577 * spin_lock_irqsave(host lock) 578 */ 579 static void __ata_port_freeze(struct ata_port *ap) 580 { 581 WARN_ON(!ap->ops->error_handler); 582 583 if (ap->ops->freeze) 584 ap->ops->freeze(ap); 585 586 ap->pflags |= ATA_PFLAG_FROZEN; 587 588 DPRINTK("ata%u port frozen\n", ap->id); 589 } 590 591 /** 592 * ata_port_freeze - abort & freeze port 593 * @ap: ATA port to freeze 594 * 595 * Abort and freeze @ap. 596 * 597 * LOCKING: 598 * spin_lock_irqsave(host lock) 599 * 600 * RETURNS: 601 * Number of aborted commands. 602 */ 603 int ata_port_freeze(struct ata_port *ap) 604 { 605 int nr_aborted; 606 607 WARN_ON(!ap->ops->error_handler); 608 609 nr_aborted = ata_port_abort(ap); 610 __ata_port_freeze(ap); 611 612 return nr_aborted; 613 } 614 615 /** 616 * ata_eh_freeze_port - EH helper to freeze port 617 * @ap: ATA port to freeze 618 * 619 * Freeze @ap. 620 * 621 * LOCKING: 622 * None. 623 */ 624 void ata_eh_freeze_port(struct ata_port *ap) 625 { 626 unsigned long flags; 627 628 if (!ap->ops->error_handler) 629 return; 630 631 spin_lock_irqsave(ap->lock, flags); 632 __ata_port_freeze(ap); 633 spin_unlock_irqrestore(ap->lock, flags); 634 } 635 636 /** 637 * ata_port_thaw_port - EH helper to thaw port 638 * @ap: ATA port to thaw 639 * 640 * Thaw frozen port @ap. 641 * 642 * LOCKING: 643 * None. 644 */ 645 void ata_eh_thaw_port(struct ata_port *ap) 646 { 647 unsigned long flags; 648 649 if (!ap->ops->error_handler) 650 return; 651 652 spin_lock_irqsave(ap->lock, flags); 653 654 ap->pflags &= ~ATA_PFLAG_FROZEN; 655 656 if (ap->ops->thaw) 657 ap->ops->thaw(ap); 658 659 spin_unlock_irqrestore(ap->lock, flags); 660 661 DPRINTK("ata%u port thawed\n", ap->id); 662 } 663 664 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 665 { 666 /* nada */ 667 } 668 669 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 670 { 671 struct ata_port *ap = qc->ap; 672 struct scsi_cmnd *scmd = qc->scsicmd; 673 unsigned long flags; 674 675 spin_lock_irqsave(ap->lock, flags); 676 qc->scsidone = ata_eh_scsidone; 677 __ata_qc_complete(qc); 678 WARN_ON(ata_tag_valid(qc->tag)); 679 spin_unlock_irqrestore(ap->lock, flags); 680 681 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 682 } 683 684 /** 685 * ata_eh_qc_complete - Complete an active ATA command from EH 686 * @qc: Command to complete 687 * 688 * Indicate to the mid and upper layers that an ATA command has 689 * completed. To be used from EH. 690 */ 691 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 692 { 693 struct scsi_cmnd *scmd = qc->scsicmd; 694 scmd->retries = scmd->allowed; 695 __ata_eh_qc_complete(qc); 696 } 697 698 /** 699 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 700 * @qc: Command to retry 701 * 702 * Indicate to the mid and upper layers that an ATA command 703 * should be retried. To be used from EH. 704 * 705 * SCSI midlayer limits the number of retries to scmd->allowed. 706 * scmd->retries is decremented for commands which get retried 707 * due to unrelated failures (qc->err_mask is zero). 708 */ 709 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 710 { 711 struct scsi_cmnd *scmd = qc->scsicmd; 712 if (!qc->err_mask && scmd->retries) 713 scmd->retries--; 714 __ata_eh_qc_complete(qc); 715 } 716 717 /** 718 * ata_eh_detach_dev - detach ATA device 719 * @dev: ATA device to detach 720 * 721 * Detach @dev. 722 * 723 * LOCKING: 724 * None. 725 */ 726 static void ata_eh_detach_dev(struct ata_device *dev) 727 { 728 struct ata_port *ap = dev->ap; 729 unsigned long flags; 730 731 ata_dev_disable(dev); 732 733 spin_lock_irqsave(ap->lock, flags); 734 735 dev->flags &= ~ATA_DFLAG_DETACH; 736 737 if (ata_scsi_offline_dev(dev)) { 738 dev->flags |= ATA_DFLAG_DETACHED; 739 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 740 } 741 742 /* clear per-dev EH actions */ 743 ata_eh_clear_action(dev, &ap->eh_info, ATA_EH_PERDEV_MASK); 744 ata_eh_clear_action(dev, &ap->eh_context.i, ATA_EH_PERDEV_MASK); 745 746 spin_unlock_irqrestore(ap->lock, flags); 747 } 748 749 /** 750 * ata_eh_about_to_do - about to perform eh_action 751 * @ap: target ATA port 752 * @dev: target ATA dev for per-dev action (can be NULL) 753 * @action: action about to be performed 754 * 755 * Called just before performing EH actions to clear related bits 756 * in @ap->eh_info such that eh actions are not unnecessarily 757 * repeated. 758 * 759 * LOCKING: 760 * None. 761 */ 762 static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, 763 unsigned int action) 764 { 765 unsigned long flags; 766 struct ata_eh_info *ehi = &ap->eh_info; 767 struct ata_eh_context *ehc = &ap->eh_context; 768 769 spin_lock_irqsave(ap->lock, flags); 770 771 /* Reset is represented by combination of actions and EHI 772 * flags. Suck in all related bits before clearing eh_info to 773 * avoid losing requested action. 774 */ 775 if (action & ATA_EH_RESET_MASK) { 776 ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; 777 ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; 778 779 /* make sure all reset actions are cleared & clear EHI flags */ 780 action |= ATA_EH_RESET_MASK; 781 ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 782 } 783 784 ata_eh_clear_action(dev, ehi, action); 785 786 if (!(ehc->i.flags & ATA_EHI_QUIET)) 787 ap->pflags |= ATA_PFLAG_RECOVERED; 788 789 spin_unlock_irqrestore(ap->lock, flags); 790 } 791 792 /** 793 * ata_eh_done - EH action complete 794 * @ap: target ATA port 795 * @dev: target ATA dev for per-dev action (can be NULL) 796 * @action: action just completed 797 * 798 * Called right after performing EH actions to clear related bits 799 * in @ap->eh_context. 800 * 801 * LOCKING: 802 * None. 803 */ 804 static void ata_eh_done(struct ata_port *ap, struct ata_device *dev, 805 unsigned int action) 806 { 807 /* if reset is complete, clear all reset actions & reset modifier */ 808 if (action & ATA_EH_RESET_MASK) { 809 action |= ATA_EH_RESET_MASK; 810 ap->eh_context.i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 811 } 812 813 ata_eh_clear_action(dev, &ap->eh_context.i, action); 814 } 815 816 /** 817 * ata_err_string - convert err_mask to descriptive string 818 * @err_mask: error mask to convert to string 819 * 820 * Convert @err_mask to descriptive string. Errors are 821 * prioritized according to severity and only the most severe 822 * error is reported. 823 * 824 * LOCKING: 825 * None. 826 * 827 * RETURNS: 828 * Descriptive string for @err_mask 829 */ 830 static const char * ata_err_string(unsigned int err_mask) 831 { 832 if (err_mask & AC_ERR_HOST_BUS) 833 return "host bus error"; 834 if (err_mask & AC_ERR_ATA_BUS) 835 return "ATA bus error"; 836 if (err_mask & AC_ERR_TIMEOUT) 837 return "timeout"; 838 if (err_mask & AC_ERR_HSM) 839 return "HSM violation"; 840 if (err_mask & AC_ERR_SYSTEM) 841 return "internal error"; 842 if (err_mask & AC_ERR_MEDIA) 843 return "media error"; 844 if (err_mask & AC_ERR_INVALID) 845 return "invalid argument"; 846 if (err_mask & AC_ERR_DEV) 847 return "device error"; 848 return "unknown error"; 849 } 850 851 /** 852 * ata_read_log_page - read a specific log page 853 * @dev: target device 854 * @page: page to read 855 * @buf: buffer to store read page 856 * @sectors: number of sectors to read 857 * 858 * Read log page using READ_LOG_EXT command. 859 * 860 * LOCKING: 861 * Kernel thread context (may sleep). 862 * 863 * RETURNS: 864 * 0 on success, AC_ERR_* mask otherwise. 865 */ 866 static unsigned int ata_read_log_page(struct ata_device *dev, 867 u8 page, void *buf, unsigned int sectors) 868 { 869 struct ata_taskfile tf; 870 unsigned int err_mask; 871 872 DPRINTK("read log page - page %d\n", page); 873 874 ata_tf_init(dev, &tf); 875 tf.command = ATA_CMD_READ_LOG_EXT; 876 tf.lbal = page; 877 tf.nsect = sectors; 878 tf.hob_nsect = sectors >> 8; 879 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 880 tf.protocol = ATA_PROT_PIO; 881 882 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 883 buf, sectors * ATA_SECT_SIZE); 884 885 DPRINTK("EXIT, err_mask=%x\n", err_mask); 886 return err_mask; 887 } 888 889 /** 890 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 891 * @dev: Device to read log page 10h from 892 * @tag: Resulting tag of the failed command 893 * @tf: Resulting taskfile registers of the failed command 894 * 895 * Read log page 10h to obtain NCQ error details and clear error 896 * condition. 897 * 898 * LOCKING: 899 * Kernel thread context (may sleep). 900 * 901 * RETURNS: 902 * 0 on success, -errno otherwise. 903 */ 904 static int ata_eh_read_log_10h(struct ata_device *dev, 905 int *tag, struct ata_taskfile *tf) 906 { 907 u8 *buf = dev->ap->sector_buf; 908 unsigned int err_mask; 909 u8 csum; 910 int i; 911 912 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 913 if (err_mask) 914 return -EIO; 915 916 csum = 0; 917 for (i = 0; i < ATA_SECT_SIZE; i++) 918 csum += buf[i]; 919 if (csum) 920 ata_dev_printk(dev, KERN_WARNING, 921 "invalid checksum 0x%x on log page 10h\n", csum); 922 923 if (buf[0] & 0x80) 924 return -ENOENT; 925 926 *tag = buf[0] & 0x1f; 927 928 tf->command = buf[2]; 929 tf->feature = buf[3]; 930 tf->lbal = buf[4]; 931 tf->lbam = buf[5]; 932 tf->lbah = buf[6]; 933 tf->device = buf[7]; 934 tf->hob_lbal = buf[8]; 935 tf->hob_lbam = buf[9]; 936 tf->hob_lbah = buf[10]; 937 tf->nsect = buf[12]; 938 tf->hob_nsect = buf[13]; 939 940 return 0; 941 } 942 943 /** 944 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 945 * @dev: device to perform REQUEST_SENSE to 946 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 947 * 948 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 949 * SENSE. This function is EH helper. 950 * 951 * LOCKING: 952 * Kernel thread context (may sleep). 953 * 954 * RETURNS: 955 * 0 on success, AC_ERR_* mask on failure 956 */ 957 static unsigned int atapi_eh_request_sense(struct ata_device *dev, 958 unsigned char *sense_buf) 959 { 960 struct ata_port *ap = dev->ap; 961 struct ata_taskfile tf; 962 u8 cdb[ATAPI_CDB_LEN]; 963 964 DPRINTK("ATAPI request sense\n"); 965 966 ata_tf_init(dev, &tf); 967 968 /* FIXME: is this needed? */ 969 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 970 971 /* XXX: why tf_read here? */ 972 ap->ops->tf_read(ap, &tf); 973 974 /* fill these in, for the case where they are -not- overwritten */ 975 sense_buf[0] = 0x70; 976 sense_buf[2] = tf.feature >> 4; 977 978 memset(cdb, 0, ATAPI_CDB_LEN); 979 cdb[0] = REQUEST_SENSE; 980 cdb[4] = SCSI_SENSE_BUFFERSIZE; 981 982 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 983 tf.command = ATA_CMD_PACKET; 984 985 /* is it pointless to prefer PIO for "safety reasons"? */ 986 if (ap->flags & ATA_FLAG_PIO_DMA) { 987 tf.protocol = ATA_PROT_ATAPI_DMA; 988 tf.feature |= ATAPI_PKT_DMA; 989 } else { 990 tf.protocol = ATA_PROT_ATAPI; 991 tf.lbam = (8 * 1024) & 0xff; 992 tf.lbah = (8 * 1024) >> 8; 993 } 994 995 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 996 sense_buf, SCSI_SENSE_BUFFERSIZE); 997 } 998 999 /** 1000 * ata_eh_analyze_serror - analyze SError for a failed port 1001 * @ap: ATA port to analyze SError for 1002 * 1003 * Analyze SError if available and further determine cause of 1004 * failure. 1005 * 1006 * LOCKING: 1007 * None. 1008 */ 1009 static void ata_eh_analyze_serror(struct ata_port *ap) 1010 { 1011 struct ata_eh_context *ehc = &ap->eh_context; 1012 u32 serror = ehc->i.serror; 1013 unsigned int err_mask = 0, action = 0; 1014 1015 if (serror & SERR_PERSISTENT) { 1016 err_mask |= AC_ERR_ATA_BUS; 1017 action |= ATA_EH_HARDRESET; 1018 } 1019 if (serror & 1020 (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { 1021 err_mask |= AC_ERR_ATA_BUS; 1022 action |= ATA_EH_SOFTRESET; 1023 } 1024 if (serror & SERR_PROTOCOL) { 1025 err_mask |= AC_ERR_HSM; 1026 action |= ATA_EH_SOFTRESET; 1027 } 1028 if (serror & SERR_INTERNAL) { 1029 err_mask |= AC_ERR_SYSTEM; 1030 action |= ATA_EH_SOFTRESET; 1031 } 1032 if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) 1033 ata_ehi_hotplugged(&ehc->i); 1034 1035 ehc->i.err_mask |= err_mask; 1036 ehc->i.action |= action; 1037 } 1038 1039 /** 1040 * ata_eh_analyze_ncq_error - analyze NCQ error 1041 * @ap: ATA port to analyze NCQ error for 1042 * 1043 * Read log page 10h, determine the offending qc and acquire 1044 * error status TF. For NCQ device errors, all LLDDs have to do 1045 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1046 * care of the rest. 1047 * 1048 * LOCKING: 1049 * Kernel thread context (may sleep). 1050 */ 1051 static void ata_eh_analyze_ncq_error(struct ata_port *ap) 1052 { 1053 struct ata_eh_context *ehc = &ap->eh_context; 1054 struct ata_device *dev = ap->device; 1055 struct ata_queued_cmd *qc; 1056 struct ata_taskfile tf; 1057 int tag, rc; 1058 1059 /* if frozen, we can't do much */ 1060 if (ap->pflags & ATA_PFLAG_FROZEN) 1061 return; 1062 1063 /* is it NCQ device error? */ 1064 if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1065 return; 1066 1067 /* has LLDD analyzed already? */ 1068 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1069 qc = __ata_qc_from_tag(ap, tag); 1070 1071 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1072 continue; 1073 1074 if (qc->err_mask) 1075 return; 1076 } 1077 1078 /* okay, this error is ours */ 1079 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1080 if (rc) { 1081 ata_port_printk(ap, KERN_ERR, "failed to read log page 10h " 1082 "(errno=%d)\n", rc); 1083 return; 1084 } 1085 1086 if (!(ap->sactive & (1 << tag))) { 1087 ata_port_printk(ap, KERN_ERR, "log page 10h reported " 1088 "inactive tag %d\n", tag); 1089 return; 1090 } 1091 1092 /* we've got the perpetrator, condemn it */ 1093 qc = __ata_qc_from_tag(ap, tag); 1094 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1095 qc->err_mask |= AC_ERR_DEV; 1096 ehc->i.err_mask &= ~AC_ERR_DEV; 1097 } 1098 1099 /** 1100 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1101 * @qc: qc to analyze 1102 * @tf: Taskfile registers to analyze 1103 * 1104 * Analyze taskfile of @qc and further determine cause of 1105 * failure. This function also requests ATAPI sense data if 1106 * avaliable. 1107 * 1108 * LOCKING: 1109 * Kernel thread context (may sleep). 1110 * 1111 * RETURNS: 1112 * Determined recovery action 1113 */ 1114 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1115 const struct ata_taskfile *tf) 1116 { 1117 unsigned int tmp, action = 0; 1118 u8 stat = tf->command, err = tf->feature; 1119 1120 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1121 qc->err_mask |= AC_ERR_HSM; 1122 return ATA_EH_SOFTRESET; 1123 } 1124 1125 if (!(qc->err_mask & AC_ERR_DEV)) 1126 return 0; 1127 1128 switch (qc->dev->class) { 1129 case ATA_DEV_ATA: 1130 if (err & ATA_ICRC) 1131 qc->err_mask |= AC_ERR_ATA_BUS; 1132 if (err & ATA_UNC) 1133 qc->err_mask |= AC_ERR_MEDIA; 1134 if (err & ATA_IDNF) 1135 qc->err_mask |= AC_ERR_INVALID; 1136 break; 1137 1138 case ATA_DEV_ATAPI: 1139 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1140 tmp = atapi_eh_request_sense(qc->dev, 1141 qc->scsicmd->sense_buffer); 1142 if (!tmp) { 1143 /* ATA_QCFLAG_SENSE_VALID is used to 1144 * tell atapi_qc_complete() that sense 1145 * data is already valid. 1146 * 1147 * TODO: interpret sense data and set 1148 * appropriate err_mask. 1149 */ 1150 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1151 } else 1152 qc->err_mask |= tmp; 1153 } 1154 } 1155 1156 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1157 action |= ATA_EH_SOFTRESET; 1158 1159 return action; 1160 } 1161 1162 static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent) 1163 { 1164 if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT)) 1165 return 1; 1166 1167 if (ent->is_io) { 1168 if (ent->err_mask & AC_ERR_HSM) 1169 return 1; 1170 if ((ent->err_mask & 1171 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1172 return 2; 1173 } 1174 1175 return 0; 1176 } 1177 1178 struct speed_down_needed_arg { 1179 u64 since; 1180 int nr_errors[3]; 1181 }; 1182 1183 static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg) 1184 { 1185 struct speed_down_needed_arg *arg = void_arg; 1186 1187 if (ent->timestamp < arg->since) 1188 return -1; 1189 1190 arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++; 1191 return 0; 1192 } 1193 1194 /** 1195 * ata_eh_speed_down_needed - Determine wheter speed down is necessary 1196 * @dev: Device of interest 1197 * 1198 * This function examines error ring of @dev and determines 1199 * whether speed down is necessary. Speed down is necessary if 1200 * there have been more than 3 of Cat-1 errors or 10 of Cat-2 1201 * errors during last 15 minutes. 1202 * 1203 * Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM 1204 * violation for known supported commands. 1205 * 1206 * Cat-2 errors are unclassified DEV error for known supported 1207 * command. 1208 * 1209 * LOCKING: 1210 * Inherited from caller. 1211 * 1212 * RETURNS: 1213 * 1 if speed down is necessary, 0 otherwise 1214 */ 1215 static int ata_eh_speed_down_needed(struct ata_device *dev) 1216 { 1217 const u64 interval = 15LLU * 60 * HZ; 1218 static const int err_limits[3] = { -1, 3, 10 }; 1219 struct speed_down_needed_arg arg; 1220 struct ata_ering_entry *ent; 1221 int err_cat; 1222 u64 j64; 1223 1224 ent = ata_ering_top(&dev->ering); 1225 if (!ent) 1226 return 0; 1227 1228 err_cat = ata_eh_categorize_ering_entry(ent); 1229 if (err_cat == 0) 1230 return 0; 1231 1232 memset(&arg, 0, sizeof(arg)); 1233 1234 j64 = get_jiffies_64(); 1235 if (j64 >= interval) 1236 arg.since = j64 - interval; 1237 else 1238 arg.since = 0; 1239 1240 ata_ering_map(&dev->ering, speed_down_needed_cb, &arg); 1241 1242 return arg.nr_errors[err_cat] > err_limits[err_cat]; 1243 } 1244 1245 /** 1246 * ata_eh_speed_down - record error and speed down if necessary 1247 * @dev: Failed device 1248 * @is_io: Did the device fail during normal IO? 1249 * @err_mask: err_mask of the error 1250 * 1251 * Record error and examine error history to determine whether 1252 * adjusting transmission speed is necessary. It also sets 1253 * transmission limits appropriately if such adjustment is 1254 * necessary. 1255 * 1256 * LOCKING: 1257 * Kernel thread context (may sleep). 1258 * 1259 * RETURNS: 1260 * 0 on success, -errno otherwise 1261 */ 1262 static int ata_eh_speed_down(struct ata_device *dev, int is_io, 1263 unsigned int err_mask) 1264 { 1265 if (!err_mask) 1266 return 0; 1267 1268 /* record error and determine whether speed down is necessary */ 1269 ata_ering_record(&dev->ering, is_io, err_mask); 1270 1271 if (!ata_eh_speed_down_needed(dev)) 1272 return 0; 1273 1274 /* speed down SATA link speed if possible */ 1275 if (sata_down_spd_limit(dev->ap) == 0) 1276 return ATA_EH_HARDRESET; 1277 1278 /* lower transfer mode */ 1279 if (ata_down_xfermask_limit(dev, 0) == 0) 1280 return ATA_EH_SOFTRESET; 1281 1282 ata_dev_printk(dev, KERN_ERR, 1283 "speed down requested but no transfer mode left\n"); 1284 return 0; 1285 } 1286 1287 /** 1288 * ata_eh_autopsy - analyze error and determine recovery action 1289 * @ap: ATA port to perform autopsy on 1290 * 1291 * Analyze why @ap failed and determine which recovery action is 1292 * needed. This function also sets more detailed AC_ERR_* values 1293 * and fills sense data for ATAPI CHECK SENSE. 1294 * 1295 * LOCKING: 1296 * Kernel thread context (may sleep). 1297 */ 1298 static void ata_eh_autopsy(struct ata_port *ap) 1299 { 1300 struct ata_eh_context *ehc = &ap->eh_context; 1301 unsigned int all_err_mask = 0; 1302 int tag, is_io = 0; 1303 u32 serror; 1304 int rc; 1305 1306 DPRINTK("ENTER\n"); 1307 1308 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1309 return; 1310 1311 /* obtain and analyze SError */ 1312 rc = sata_scr_read(ap, SCR_ERROR, &serror); 1313 if (rc == 0) { 1314 ehc->i.serror |= serror; 1315 ata_eh_analyze_serror(ap); 1316 } else if (rc != -EOPNOTSUPP) 1317 ehc->i.action |= ATA_EH_HARDRESET; 1318 1319 /* analyze NCQ failure */ 1320 ata_eh_analyze_ncq_error(ap); 1321 1322 /* any real error trumps AC_ERR_OTHER */ 1323 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1324 ehc->i.err_mask &= ~AC_ERR_OTHER; 1325 1326 all_err_mask |= ehc->i.err_mask; 1327 1328 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1329 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1330 1331 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1332 continue; 1333 1334 /* inherit upper level err_mask */ 1335 qc->err_mask |= ehc->i.err_mask; 1336 1337 /* analyze TF */ 1338 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1339 1340 /* DEV errors are probably spurious in case of ATA_BUS error */ 1341 if (qc->err_mask & AC_ERR_ATA_BUS) 1342 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1343 AC_ERR_INVALID); 1344 1345 /* any real error trumps unknown error */ 1346 if (qc->err_mask & ~AC_ERR_OTHER) 1347 qc->err_mask &= ~AC_ERR_OTHER; 1348 1349 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1350 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1351 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1352 ehc->i.action &= ~ATA_EH_REVALIDATE; 1353 } 1354 1355 /* accumulate error info */ 1356 ehc->i.dev = qc->dev; 1357 all_err_mask |= qc->err_mask; 1358 if (qc->flags & ATA_QCFLAG_IO) 1359 is_io = 1; 1360 } 1361 1362 /* enforce default EH actions */ 1363 if (ap->pflags & ATA_PFLAG_FROZEN || 1364 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1365 ehc->i.action |= ATA_EH_SOFTRESET; 1366 else if (all_err_mask) 1367 ehc->i.action |= ATA_EH_REVALIDATE; 1368 1369 /* if we have offending qcs and the associated failed device */ 1370 if (ehc->i.dev) { 1371 /* speed down */ 1372 ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io, 1373 all_err_mask); 1374 1375 /* perform per-dev EH action only on the offending device */ 1376 ehc->i.dev_action[ehc->i.dev->devno] |= 1377 ehc->i.action & ATA_EH_PERDEV_MASK; 1378 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1379 } 1380 1381 DPRINTK("EXIT\n"); 1382 } 1383 1384 /** 1385 * ata_eh_report - report error handling to user 1386 * @ap: ATA port EH is going on 1387 * 1388 * Report EH to user. 1389 * 1390 * LOCKING: 1391 * None. 1392 */ 1393 static void ata_eh_report(struct ata_port *ap) 1394 { 1395 struct ata_eh_context *ehc = &ap->eh_context; 1396 const char *frozen, *desc; 1397 int tag, nr_failed = 0; 1398 1399 desc = NULL; 1400 if (ehc->i.desc[0] != '\0') 1401 desc = ehc->i.desc; 1402 1403 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1404 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1405 1406 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1407 continue; 1408 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1409 continue; 1410 1411 nr_failed++; 1412 } 1413 1414 if (!nr_failed && !ehc->i.err_mask) 1415 return; 1416 1417 frozen = ""; 1418 if (ap->pflags & ATA_PFLAG_FROZEN) 1419 frozen = " frozen"; 1420 1421 if (ehc->i.dev) { 1422 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1423 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1424 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1425 ehc->i.action, frozen); 1426 if (desc) 1427 ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc); 1428 } else { 1429 ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x " 1430 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1431 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1432 ehc->i.action, frozen); 1433 if (desc) 1434 ata_port_printk(ap, KERN_ERR, "(%s)\n", desc); 1435 } 1436 1437 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1438 static const char *dma_str[] = { 1439 [DMA_BIDIRECTIONAL] = "bidi", 1440 [DMA_TO_DEVICE] = "out", 1441 [DMA_FROM_DEVICE] = "in", 1442 [DMA_NONE] = "", 1443 }; 1444 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1445 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1446 unsigned int nbytes; 1447 1448 if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask) 1449 continue; 1450 1451 nbytes = qc->nbytes; 1452 if (!nbytes) 1453 nbytes = qc->nsect << 9; 1454 1455 ata_dev_printk(qc->dev, KERN_ERR, 1456 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1457 "tag %d cdb 0x%x data %u %s\n " 1458 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1459 "Emask 0x%x (%s)\n", 1460 cmd->command, cmd->feature, cmd->nsect, 1461 cmd->lbal, cmd->lbam, cmd->lbah, 1462 cmd->hob_feature, cmd->hob_nsect, 1463 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 1464 cmd->device, qc->tag, qc->cdb[0], nbytes, 1465 dma_str[qc->dma_dir], 1466 res->command, res->feature, res->nsect, 1467 res->lbal, res->lbam, res->lbah, 1468 res->hob_feature, res->hob_nsect, 1469 res->hob_lbal, res->hob_lbam, res->hob_lbah, 1470 res->device, qc->err_mask, ata_err_string(qc->err_mask)); 1471 } 1472 } 1473 1474 static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset, 1475 unsigned int *classes) 1476 { 1477 int i, rc; 1478 1479 for (i = 0; i < ATA_MAX_DEVICES; i++) 1480 classes[i] = ATA_DEV_UNKNOWN; 1481 1482 rc = reset(ap, classes); 1483 if (rc) 1484 return rc; 1485 1486 /* If any class isn't ATA_DEV_UNKNOWN, consider classification 1487 * is complete and convert all ATA_DEV_UNKNOWN to 1488 * ATA_DEV_NONE. 1489 */ 1490 for (i = 0; i < ATA_MAX_DEVICES; i++) 1491 if (classes[i] != ATA_DEV_UNKNOWN) 1492 break; 1493 1494 if (i < ATA_MAX_DEVICES) 1495 for (i = 0; i < ATA_MAX_DEVICES; i++) 1496 if (classes[i] == ATA_DEV_UNKNOWN) 1497 classes[i] = ATA_DEV_NONE; 1498 1499 return 0; 1500 } 1501 1502 static int ata_eh_followup_srst_needed(int rc, int classify, 1503 const unsigned int *classes) 1504 { 1505 if (rc == -EAGAIN) 1506 return 1; 1507 if (rc != 0) 1508 return 0; 1509 if (classify && classes[0] == ATA_DEV_UNKNOWN) 1510 return 1; 1511 return 0; 1512 } 1513 1514 static int ata_eh_reset(struct ata_port *ap, int classify, 1515 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 1516 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 1517 { 1518 struct ata_eh_context *ehc = &ap->eh_context; 1519 unsigned int *classes = ehc->classes; 1520 int tries = ATA_EH_RESET_TRIES; 1521 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 1522 unsigned int action; 1523 ata_reset_fn_t reset; 1524 int i, did_followup_srst, rc; 1525 1526 /* about to reset */ 1527 ata_eh_about_to_do(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1528 1529 /* Determine which reset to use and record in ehc->i.action. 1530 * prereset() may examine and modify it. 1531 */ 1532 action = ehc->i.action; 1533 ehc->i.action &= ~ATA_EH_RESET_MASK; 1534 if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && 1535 !(action & ATA_EH_HARDRESET)))) 1536 ehc->i.action |= ATA_EH_SOFTRESET; 1537 else 1538 ehc->i.action |= ATA_EH_HARDRESET; 1539 1540 if (prereset) { 1541 rc = prereset(ap); 1542 if (rc) { 1543 if (rc == -ENOENT) { 1544 ata_port_printk(ap, KERN_DEBUG, "port disabled. ignoring.\n"); 1545 ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; 1546 } else 1547 ata_port_printk(ap, KERN_ERR, 1548 "prereset failed (errno=%d)\n", rc); 1549 return rc; 1550 } 1551 } 1552 1553 /* prereset() might have modified ehc->i.action */ 1554 if (ehc->i.action & ATA_EH_HARDRESET) 1555 reset = hardreset; 1556 else if (ehc->i.action & ATA_EH_SOFTRESET) 1557 reset = softreset; 1558 else { 1559 /* prereset told us not to reset, bang classes and return */ 1560 for (i = 0; i < ATA_MAX_DEVICES; i++) 1561 classes[i] = ATA_DEV_NONE; 1562 return 0; 1563 } 1564 1565 /* did prereset() screw up? if so, fix up to avoid oopsing */ 1566 if (!reset) { 1567 ata_port_printk(ap, KERN_ERR, "BUG: prereset() requested " 1568 "invalid reset type\n"); 1569 if (softreset) 1570 reset = softreset; 1571 else 1572 reset = hardreset; 1573 } 1574 1575 retry: 1576 /* shut up during boot probing */ 1577 if (verbose) 1578 ata_port_printk(ap, KERN_INFO, "%s resetting port\n", 1579 reset == softreset ? "soft" : "hard"); 1580 1581 /* mark that this EH session started with reset */ 1582 ehc->i.flags |= ATA_EHI_DID_RESET; 1583 1584 rc = ata_do_reset(ap, reset, classes); 1585 1586 did_followup_srst = 0; 1587 if (reset == hardreset && 1588 ata_eh_followup_srst_needed(rc, classify, classes)) { 1589 /* okay, let's do follow-up softreset */ 1590 did_followup_srst = 1; 1591 reset = softreset; 1592 1593 if (!reset) { 1594 ata_port_printk(ap, KERN_ERR, 1595 "follow-up softreset required " 1596 "but no softreset avaliable\n"); 1597 return -EINVAL; 1598 } 1599 1600 ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); 1601 rc = ata_do_reset(ap, reset, classes); 1602 1603 if (rc == 0 && classify && 1604 classes[0] == ATA_DEV_UNKNOWN) { 1605 ata_port_printk(ap, KERN_ERR, 1606 "classification failed\n"); 1607 return -EINVAL; 1608 } 1609 } 1610 1611 if (rc && --tries) { 1612 const char *type; 1613 1614 if (reset == softreset) { 1615 if (did_followup_srst) 1616 type = "follow-up soft"; 1617 else 1618 type = "soft"; 1619 } else 1620 type = "hard"; 1621 1622 ata_port_printk(ap, KERN_WARNING, 1623 "%sreset failed, retrying in 5 secs\n", type); 1624 ssleep(5); 1625 1626 if (reset == hardreset) 1627 sata_down_spd_limit(ap); 1628 if (hardreset) 1629 reset = hardreset; 1630 goto retry; 1631 } 1632 1633 if (rc == 0) { 1634 /* After the reset, the device state is PIO 0 and the 1635 * controller state is undefined. Record the mode. 1636 */ 1637 for (i = 0; i < ATA_MAX_DEVICES; i++) 1638 ap->device[i].pio_mode = XFER_PIO_0; 1639 1640 if (postreset) 1641 postreset(ap, classes); 1642 1643 /* reset successful, schedule revalidation */ 1644 ata_eh_done(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1645 ehc->i.action |= ATA_EH_REVALIDATE; 1646 } 1647 1648 return rc; 1649 } 1650 1651 static int ata_eh_revalidate_and_attach(struct ata_port *ap, 1652 struct ata_device **r_failed_dev) 1653 { 1654 struct ata_eh_context *ehc = &ap->eh_context; 1655 struct ata_device *dev; 1656 unsigned long flags; 1657 int i, rc = 0; 1658 1659 DPRINTK("ENTER\n"); 1660 1661 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1662 unsigned int action, readid_flags = 0; 1663 1664 dev = &ap->device[i]; 1665 action = ata_eh_dev_action(dev); 1666 1667 if (ehc->i.flags & ATA_EHI_DID_RESET) 1668 readid_flags |= ATA_READID_POSTRESET; 1669 1670 if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) { 1671 if (ata_port_offline(ap)) { 1672 rc = -EIO; 1673 break; 1674 } 1675 1676 ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE); 1677 rc = ata_dev_revalidate(dev, readid_flags); 1678 if (rc) 1679 break; 1680 1681 ata_eh_done(ap, dev, ATA_EH_REVALIDATE); 1682 1683 /* Configuration may have changed, reconfigure 1684 * transfer mode. 1685 */ 1686 ehc->i.flags |= ATA_EHI_SETMODE; 1687 1688 /* schedule the scsi_rescan_device() here */ 1689 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 1690 } else if (dev->class == ATA_DEV_UNKNOWN && 1691 ehc->tries[dev->devno] && 1692 ata_class_enabled(ehc->classes[dev->devno])) { 1693 dev->class = ehc->classes[dev->devno]; 1694 1695 rc = ata_dev_read_id(dev, &dev->class, readid_flags, 1696 dev->id); 1697 if (rc == 0) { 1698 ehc->i.flags |= ATA_EHI_PRINTINFO; 1699 rc = ata_dev_configure(dev); 1700 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 1701 } else if (rc == -ENOENT) { 1702 /* IDENTIFY was issued to non-existent 1703 * device. No need to reset. Just 1704 * thaw and kill the device. 1705 */ 1706 ata_eh_thaw_port(ap); 1707 dev->class = ATA_DEV_UNKNOWN; 1708 rc = 0; 1709 } 1710 1711 if (rc) { 1712 dev->class = ATA_DEV_UNKNOWN; 1713 break; 1714 } 1715 1716 if (ata_dev_enabled(dev)) { 1717 spin_lock_irqsave(ap->lock, flags); 1718 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1719 spin_unlock_irqrestore(ap->lock, flags); 1720 1721 /* new device discovered, configure xfermode */ 1722 ehc->i.flags |= ATA_EHI_SETMODE; 1723 } 1724 } 1725 } 1726 1727 if (rc) 1728 *r_failed_dev = dev; 1729 1730 DPRINTK("EXIT\n"); 1731 return rc; 1732 } 1733 1734 /** 1735 * ata_eh_suspend - handle suspend EH action 1736 * @ap: target host port 1737 * @r_failed_dev: result parameter to indicate failing device 1738 * 1739 * Handle suspend EH action. Disk devices are spinned down and 1740 * other types of devices are just marked suspended. Once 1741 * suspended, no EH action to the device is allowed until it is 1742 * resumed. 1743 * 1744 * LOCKING: 1745 * Kernel thread context (may sleep). 1746 * 1747 * RETURNS: 1748 * 0 on success, -errno otherwise 1749 */ 1750 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) 1751 { 1752 struct ata_device *dev; 1753 int i, rc = 0; 1754 1755 DPRINTK("ENTER\n"); 1756 1757 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1758 unsigned long flags; 1759 unsigned int action, err_mask; 1760 1761 dev = &ap->device[i]; 1762 action = ata_eh_dev_action(dev); 1763 1764 if (!ata_dev_enabled(dev) || !(action & ATA_EH_SUSPEND)) 1765 continue; 1766 1767 WARN_ON(dev->flags & ATA_DFLAG_SUSPENDED); 1768 1769 ata_eh_about_to_do(ap, dev, ATA_EH_SUSPEND); 1770 1771 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1772 /* flush cache */ 1773 rc = ata_flush_cache(dev); 1774 if (rc) 1775 break; 1776 1777 /* spin down */ 1778 err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1); 1779 if (err_mask) { 1780 ata_dev_printk(dev, KERN_ERR, "failed to " 1781 "spin down (err_mask=0x%x)\n", 1782 err_mask); 1783 rc = -EIO; 1784 break; 1785 } 1786 } 1787 1788 spin_lock_irqsave(ap->lock, flags); 1789 dev->flags |= ATA_DFLAG_SUSPENDED; 1790 spin_unlock_irqrestore(ap->lock, flags); 1791 1792 ata_eh_done(ap, dev, ATA_EH_SUSPEND); 1793 } 1794 1795 if (rc) 1796 *r_failed_dev = dev; 1797 1798 DPRINTK("EXIT\n"); 1799 return 0; 1800 } 1801 1802 /** 1803 * ata_eh_prep_resume - prep for resume EH action 1804 * @ap: target host port 1805 * 1806 * Clear SUSPENDED in preparation for scheduled resume actions. 1807 * This allows other parts of EH to access the devices being 1808 * resumed. 1809 * 1810 * LOCKING: 1811 * Kernel thread context (may sleep). 1812 */ 1813 static void ata_eh_prep_resume(struct ata_port *ap) 1814 { 1815 struct ata_device *dev; 1816 unsigned long flags; 1817 int i; 1818 1819 DPRINTK("ENTER\n"); 1820 1821 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1822 unsigned int action; 1823 1824 dev = &ap->device[i]; 1825 action = ata_eh_dev_action(dev); 1826 1827 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1828 continue; 1829 1830 spin_lock_irqsave(ap->lock, flags); 1831 dev->flags &= ~ATA_DFLAG_SUSPENDED; 1832 spin_unlock_irqrestore(ap->lock, flags); 1833 } 1834 1835 DPRINTK("EXIT\n"); 1836 } 1837 1838 /** 1839 * ata_eh_resume - handle resume EH action 1840 * @ap: target host port 1841 * @r_failed_dev: result parameter to indicate failing device 1842 * 1843 * Handle resume EH action. Target devices are already reset and 1844 * revalidated. Spinning up is the only operation left. 1845 * 1846 * LOCKING: 1847 * Kernel thread context (may sleep). 1848 * 1849 * RETURNS: 1850 * 0 on success, -errno otherwise 1851 */ 1852 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) 1853 { 1854 struct ata_device *dev; 1855 int i, rc = 0; 1856 1857 DPRINTK("ENTER\n"); 1858 1859 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1860 unsigned int action, err_mask; 1861 1862 dev = &ap->device[i]; 1863 action = ata_eh_dev_action(dev); 1864 1865 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1866 continue; 1867 1868 ata_eh_about_to_do(ap, dev, ATA_EH_RESUME); 1869 1870 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1871 err_mask = ata_do_simple_cmd(dev, 1872 ATA_CMD_IDLEIMMEDIATE); 1873 if (err_mask) { 1874 ata_dev_printk(dev, KERN_ERR, "failed to " 1875 "spin up (err_mask=0x%x)\n", 1876 err_mask); 1877 rc = -EIO; 1878 break; 1879 } 1880 } 1881 1882 ata_eh_done(ap, dev, ATA_EH_RESUME); 1883 } 1884 1885 if (rc) 1886 *r_failed_dev = dev; 1887 1888 DPRINTK("EXIT\n"); 1889 return 0; 1890 } 1891 1892 static int ata_port_nr_enabled(struct ata_port *ap) 1893 { 1894 int i, cnt = 0; 1895 1896 for (i = 0; i < ATA_MAX_DEVICES; i++) 1897 if (ata_dev_enabled(&ap->device[i])) 1898 cnt++; 1899 return cnt; 1900 } 1901 1902 static int ata_port_nr_vacant(struct ata_port *ap) 1903 { 1904 int i, cnt = 0; 1905 1906 for (i = 0; i < ATA_MAX_DEVICES; i++) 1907 if (ap->device[i].class == ATA_DEV_UNKNOWN) 1908 cnt++; 1909 return cnt; 1910 } 1911 1912 static int ata_eh_skip_recovery(struct ata_port *ap) 1913 { 1914 struct ata_eh_context *ehc = &ap->eh_context; 1915 int i; 1916 1917 /* skip if all possible devices are suspended */ 1918 for (i = 0; i < ata_port_max_devices(ap); i++) { 1919 struct ata_device *dev = &ap->device[i]; 1920 1921 if (!(dev->flags & ATA_DFLAG_SUSPENDED)) 1922 break; 1923 } 1924 1925 if (i == ata_port_max_devices(ap)) 1926 return 1; 1927 1928 /* thaw frozen port, resume link and recover failed devices */ 1929 if ((ap->pflags & ATA_PFLAG_FROZEN) || 1930 (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_port_nr_enabled(ap)) 1931 return 0; 1932 1933 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 1934 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1935 struct ata_device *dev = &ap->device[i]; 1936 1937 if (dev->class == ATA_DEV_UNKNOWN && 1938 ehc->classes[dev->devno] != ATA_DEV_NONE) 1939 return 0; 1940 } 1941 1942 return 1; 1943 } 1944 1945 /** 1946 * ata_eh_recover - recover host port after error 1947 * @ap: host port to recover 1948 * @prereset: prereset method (can be NULL) 1949 * @softreset: softreset method (can be NULL) 1950 * @hardreset: hardreset method (can be NULL) 1951 * @postreset: postreset method (can be NULL) 1952 * 1953 * This is the alpha and omega, eum and yang, heart and soul of 1954 * libata exception handling. On entry, actions required to 1955 * recover the port and hotplug requests are recorded in 1956 * eh_context. This function executes all the operations with 1957 * appropriate retrials and fallbacks to resurrect failed 1958 * devices, detach goners and greet newcomers. 1959 * 1960 * LOCKING: 1961 * Kernel thread context (may sleep). 1962 * 1963 * RETURNS: 1964 * 0 on success, -errno on failure. 1965 */ 1966 static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 1967 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 1968 ata_postreset_fn_t postreset) 1969 { 1970 struct ata_eh_context *ehc = &ap->eh_context; 1971 struct ata_device *dev; 1972 int down_xfermask, i, rc; 1973 1974 DPRINTK("ENTER\n"); 1975 1976 /* prep for recovery */ 1977 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1978 dev = &ap->device[i]; 1979 1980 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 1981 1982 /* collect port action mask recorded in dev actions */ 1983 ehc->i.action |= ehc->i.dev_action[i] & ~ATA_EH_PERDEV_MASK; 1984 ehc->i.dev_action[i] &= ATA_EH_PERDEV_MASK; 1985 1986 /* process hotplug request */ 1987 if (dev->flags & ATA_DFLAG_DETACH) 1988 ata_eh_detach_dev(dev); 1989 1990 if (!ata_dev_enabled(dev) && 1991 ((ehc->i.probe_mask & (1 << dev->devno)) && 1992 !(ehc->did_probe_mask & (1 << dev->devno)))) { 1993 ata_eh_detach_dev(dev); 1994 ata_dev_init(dev); 1995 ehc->did_probe_mask |= (1 << dev->devno); 1996 ehc->i.action |= ATA_EH_SOFTRESET; 1997 } 1998 } 1999 2000 retry: 2001 down_xfermask = 0; 2002 rc = 0; 2003 2004 /* if UNLOADING, finish immediately */ 2005 if (ap->pflags & ATA_PFLAG_UNLOADING) 2006 goto out; 2007 2008 /* prep for resume */ 2009 ata_eh_prep_resume(ap); 2010 2011 /* skip EH if possible. */ 2012 if (ata_eh_skip_recovery(ap)) 2013 ehc->i.action = 0; 2014 2015 for (i = 0; i < ATA_MAX_DEVICES; i++) 2016 ehc->classes[i] = ATA_DEV_UNKNOWN; 2017 2018 /* reset */ 2019 if (ehc->i.action & ATA_EH_RESET_MASK) { 2020 ata_eh_freeze_port(ap); 2021 2022 rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset, 2023 softreset, hardreset, postreset); 2024 if (rc) { 2025 ata_port_printk(ap, KERN_ERR, 2026 "reset failed, giving up\n"); 2027 goto out; 2028 } 2029 2030 ata_eh_thaw_port(ap); 2031 } 2032 2033 /* revalidate existing devices and attach new ones */ 2034 rc = ata_eh_revalidate_and_attach(ap, &dev); 2035 if (rc) 2036 goto dev_fail; 2037 2038 /* resume devices */ 2039 rc = ata_eh_resume(ap, &dev); 2040 if (rc) 2041 goto dev_fail; 2042 2043 /* configure transfer mode if necessary */ 2044 if (ehc->i.flags & ATA_EHI_SETMODE) { 2045 rc = ata_set_mode(ap, &dev); 2046 if (rc) { 2047 down_xfermask = 1; 2048 goto dev_fail; 2049 } 2050 ehc->i.flags &= ~ATA_EHI_SETMODE; 2051 } 2052 2053 /* suspend devices */ 2054 rc = ata_eh_suspend(ap, &dev); 2055 if (rc) 2056 goto dev_fail; 2057 2058 goto out; 2059 2060 dev_fail: 2061 switch (rc) { 2062 case -ENODEV: 2063 /* device missing, schedule probing */ 2064 ehc->i.probe_mask |= (1 << dev->devno); 2065 case -EINVAL: 2066 ehc->tries[dev->devno] = 0; 2067 break; 2068 case -EIO: 2069 sata_down_spd_limit(ap); 2070 default: 2071 ehc->tries[dev->devno]--; 2072 if (down_xfermask && 2073 ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1)) 2074 ehc->tries[dev->devno] = 0; 2075 } 2076 2077 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2078 /* disable device if it has used up all its chances */ 2079 ata_dev_disable(dev); 2080 2081 /* detach if offline */ 2082 if (ata_port_offline(ap)) 2083 ata_eh_detach_dev(dev); 2084 2085 /* probe if requested */ 2086 if ((ehc->i.probe_mask & (1 << dev->devno)) && 2087 !(ehc->did_probe_mask & (1 << dev->devno))) { 2088 ata_eh_detach_dev(dev); 2089 ata_dev_init(dev); 2090 2091 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2092 ehc->did_probe_mask |= (1 << dev->devno); 2093 ehc->i.action |= ATA_EH_SOFTRESET; 2094 } 2095 } else { 2096 /* soft didn't work? be haaaaard */ 2097 if (ehc->i.flags & ATA_EHI_DID_RESET) 2098 ehc->i.action |= ATA_EH_HARDRESET; 2099 else 2100 ehc->i.action |= ATA_EH_SOFTRESET; 2101 } 2102 2103 if (ata_port_nr_enabled(ap)) { 2104 ata_port_printk(ap, KERN_WARNING, "failed to recover some " 2105 "devices, retrying in 5 secs\n"); 2106 ssleep(5); 2107 } else { 2108 /* no device left, repeat fast */ 2109 msleep(500); 2110 } 2111 2112 goto retry; 2113 2114 out: 2115 if (rc) { 2116 for (i = 0; i < ATA_MAX_DEVICES; i++) 2117 ata_dev_disable(&ap->device[i]); 2118 } 2119 2120 DPRINTK("EXIT, rc=%d\n", rc); 2121 return rc; 2122 } 2123 2124 /** 2125 * ata_eh_finish - finish up EH 2126 * @ap: host port to finish EH for 2127 * 2128 * Recovery is complete. Clean up EH states and retry or finish 2129 * failed qcs. 2130 * 2131 * LOCKING: 2132 * None. 2133 */ 2134 static void ata_eh_finish(struct ata_port *ap) 2135 { 2136 int tag; 2137 2138 /* retry or finish qcs */ 2139 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2140 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2141 2142 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2143 continue; 2144 2145 if (qc->err_mask) { 2146 /* FIXME: Once EH migration is complete, 2147 * generate sense data in this function, 2148 * considering both err_mask and tf. 2149 */ 2150 if (qc->err_mask & AC_ERR_INVALID) 2151 ata_eh_qc_complete(qc); 2152 else 2153 ata_eh_qc_retry(qc); 2154 } else { 2155 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2156 ata_eh_qc_complete(qc); 2157 } else { 2158 /* feed zero TF to sense generation */ 2159 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2160 ata_eh_qc_retry(qc); 2161 } 2162 } 2163 } 2164 } 2165 2166 /** 2167 * ata_do_eh - do standard error handling 2168 * @ap: host port to handle error for 2169 * @prereset: prereset method (can be NULL) 2170 * @softreset: softreset method (can be NULL) 2171 * @hardreset: hardreset method (can be NULL) 2172 * @postreset: postreset method (can be NULL) 2173 * 2174 * Perform standard error handling sequence. 2175 * 2176 * LOCKING: 2177 * Kernel thread context (may sleep). 2178 */ 2179 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2180 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2181 ata_postreset_fn_t postreset) 2182 { 2183 ata_eh_autopsy(ap); 2184 ata_eh_report(ap); 2185 ata_eh_recover(ap, prereset, softreset, hardreset, postreset); 2186 ata_eh_finish(ap); 2187 } 2188 2189 /** 2190 * ata_eh_handle_port_suspend - perform port suspend operation 2191 * @ap: port to suspend 2192 * 2193 * Suspend @ap. 2194 * 2195 * LOCKING: 2196 * Kernel thread context (may sleep). 2197 */ 2198 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2199 { 2200 unsigned long flags; 2201 int rc = 0; 2202 2203 /* are we suspending? */ 2204 spin_lock_irqsave(ap->lock, flags); 2205 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2206 ap->pm_mesg.event == PM_EVENT_ON) { 2207 spin_unlock_irqrestore(ap->lock, flags); 2208 return; 2209 } 2210 spin_unlock_irqrestore(ap->lock, flags); 2211 2212 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2213 2214 /* suspend */ 2215 ata_eh_freeze_port(ap); 2216 2217 if (ap->ops->port_suspend) 2218 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2219 2220 /* report result */ 2221 spin_lock_irqsave(ap->lock, flags); 2222 2223 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2224 if (rc == 0) 2225 ap->pflags |= ATA_PFLAG_SUSPENDED; 2226 else 2227 ata_port_schedule_eh(ap); 2228 2229 if (ap->pm_result) { 2230 *ap->pm_result = rc; 2231 ap->pm_result = NULL; 2232 } 2233 2234 spin_unlock_irqrestore(ap->lock, flags); 2235 2236 return; 2237 } 2238 2239 /** 2240 * ata_eh_handle_port_resume - perform port resume operation 2241 * @ap: port to resume 2242 * 2243 * Resume @ap. 2244 * 2245 * This function also waits upto one second until all devices 2246 * hanging off this port requests resume EH action. This is to 2247 * prevent invoking EH and thus reset multiple times on resume. 2248 * 2249 * On DPM resume, where some of devices might not be resumed 2250 * together, this may delay port resume upto one second, but such 2251 * DPM resumes are rare and 1 sec delay isn't too bad. 2252 * 2253 * LOCKING: 2254 * Kernel thread context (may sleep). 2255 */ 2256 static void ata_eh_handle_port_resume(struct ata_port *ap) 2257 { 2258 unsigned long timeout; 2259 unsigned long flags; 2260 int i, rc = 0; 2261 2262 /* are we resuming? */ 2263 spin_lock_irqsave(ap->lock, flags); 2264 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2265 ap->pm_mesg.event != PM_EVENT_ON) { 2266 spin_unlock_irqrestore(ap->lock, flags); 2267 return; 2268 } 2269 spin_unlock_irqrestore(ap->lock, flags); 2270 2271 /* spurious? */ 2272 if (!(ap->pflags & ATA_PFLAG_SUSPENDED)) 2273 goto done; 2274 2275 if (ap->ops->port_resume) 2276 rc = ap->ops->port_resume(ap); 2277 2278 /* give devices time to request EH */ 2279 timeout = jiffies + HZ; /* 1s max */ 2280 while (1) { 2281 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2282 struct ata_device *dev = &ap->device[i]; 2283 unsigned int action = ata_eh_dev_action(dev); 2284 2285 if ((dev->flags & ATA_DFLAG_SUSPENDED) && 2286 !(action & ATA_EH_RESUME)) 2287 break; 2288 } 2289 2290 if (i == ATA_MAX_DEVICES || time_after(jiffies, timeout)) 2291 break; 2292 msleep(10); 2293 } 2294 2295 done: 2296 spin_lock_irqsave(ap->lock, flags); 2297 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2298 if (ap->pm_result) { 2299 *ap->pm_result = rc; 2300 ap->pm_result = NULL; 2301 } 2302 spin_unlock_irqrestore(ap->lock, flags); 2303 } 2304