1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <scsi/scsi.h> 37 #include <scsi/scsi_host.h> 38 #include <scsi/scsi_eh.h> 39 #include <scsi/scsi_device.h> 40 #include <scsi/scsi_cmnd.h> 41 #include "../scsi/scsi_transport_api.h" 42 43 #include <linux/libata.h> 44 45 #include "libata.h" 46 47 enum { 48 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 49 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 50 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 51 }; 52 53 static void __ata_port_freeze(struct ata_port *ap); 54 static void ata_eh_finish(struct ata_port *ap); 55 static void ata_eh_handle_port_suspend(struct ata_port *ap); 56 static void ata_eh_handle_port_resume(struct ata_port *ap); 57 58 static void ata_ering_record(struct ata_ering *ering, int is_io, 59 unsigned int err_mask) 60 { 61 struct ata_ering_entry *ent; 62 63 WARN_ON(!err_mask); 64 65 ering->cursor++; 66 ering->cursor %= ATA_ERING_SIZE; 67 68 ent = &ering->ring[ering->cursor]; 69 ent->is_io = is_io; 70 ent->err_mask = err_mask; 71 ent->timestamp = get_jiffies_64(); 72 } 73 74 static void ata_ering_clear(struct ata_ering *ering) 75 { 76 memset(ering, 0, sizeof(*ering)); 77 } 78 79 static int ata_ering_map(struct ata_ering *ering, 80 int (*map_fn)(struct ata_ering_entry *, void *), 81 void *arg) 82 { 83 int idx, rc = 0; 84 struct ata_ering_entry *ent; 85 86 idx = ering->cursor; 87 do { 88 ent = &ering->ring[idx]; 89 if (!ent->err_mask) 90 break; 91 rc = map_fn(ent, arg); 92 if (rc) 93 break; 94 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 95 } while (idx != ering->cursor); 96 97 return rc; 98 } 99 100 static unsigned int ata_eh_dev_action(struct ata_device *dev) 101 { 102 struct ata_eh_context *ehc = &dev->ap->eh_context; 103 104 return ehc->i.action | ehc->i.dev_action[dev->devno]; 105 } 106 107 static void ata_eh_clear_action(struct ata_device *dev, 108 struct ata_eh_info *ehi, unsigned int action) 109 { 110 int i; 111 112 if (!dev) { 113 ehi->action &= ~action; 114 for (i = 0; i < ATA_MAX_DEVICES; i++) 115 ehi->dev_action[i] &= ~action; 116 } else { 117 /* doesn't make sense for port-wide EH actions */ 118 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 119 120 /* break ehi->action into ehi->dev_action */ 121 if (ehi->action & action) { 122 for (i = 0; i < ATA_MAX_DEVICES; i++) 123 ehi->dev_action[i] |= ehi->action & action; 124 ehi->action &= ~action; 125 } 126 127 /* turn off the specified per-dev action */ 128 ehi->dev_action[dev->devno] &= ~action; 129 } 130 } 131 132 /** 133 * ata_scsi_timed_out - SCSI layer time out callback 134 * @cmd: timed out SCSI command 135 * 136 * Handles SCSI layer timeout. We race with normal completion of 137 * the qc for @cmd. If the qc is already gone, we lose and let 138 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 139 * timed out and EH should be invoked. Prevent ata_qc_complete() 140 * from finishing it by setting EH_SCHEDULED and return 141 * EH_NOT_HANDLED. 142 * 143 * TODO: kill this function once old EH is gone. 144 * 145 * LOCKING: 146 * Called from timer context 147 * 148 * RETURNS: 149 * EH_HANDLED or EH_NOT_HANDLED 150 */ 151 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 152 { 153 struct Scsi_Host *host = cmd->device->host; 154 struct ata_port *ap = ata_shost_to_port(host); 155 unsigned long flags; 156 struct ata_queued_cmd *qc; 157 enum scsi_eh_timer_return ret; 158 159 DPRINTK("ENTER\n"); 160 161 if (ap->ops->error_handler) { 162 ret = EH_NOT_HANDLED; 163 goto out; 164 } 165 166 ret = EH_HANDLED; 167 spin_lock_irqsave(ap->lock, flags); 168 qc = ata_qc_from_tag(ap, ap->active_tag); 169 if (qc) { 170 WARN_ON(qc->scsicmd != cmd); 171 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 172 qc->err_mask |= AC_ERR_TIMEOUT; 173 ret = EH_NOT_HANDLED; 174 } 175 spin_unlock_irqrestore(ap->lock, flags); 176 177 out: 178 DPRINTK("EXIT, ret=%d\n", ret); 179 return ret; 180 } 181 182 /** 183 * ata_scsi_error - SCSI layer error handler callback 184 * @host: SCSI host on which error occurred 185 * 186 * Handles SCSI-layer-thrown error events. 187 * 188 * LOCKING: 189 * Inherited from SCSI layer (none, can sleep) 190 * 191 * RETURNS: 192 * Zero. 193 */ 194 void ata_scsi_error(struct Scsi_Host *host) 195 { 196 struct ata_port *ap = ata_shost_to_port(host); 197 int i, repeat_cnt = ATA_EH_MAX_REPEAT; 198 unsigned long flags; 199 200 DPRINTK("ENTER\n"); 201 202 /* synchronize with port task */ 203 ata_port_flush_task(ap); 204 205 /* synchronize with host lock and sort out timeouts */ 206 207 /* For new EH, all qcs are finished in one of three ways - 208 * normal completion, error completion, and SCSI timeout. 209 * Both cmpletions can race against SCSI timeout. When normal 210 * completion wins, the qc never reaches EH. When error 211 * completion wins, the qc has ATA_QCFLAG_FAILED set. 212 * 213 * When SCSI timeout wins, things are a bit more complex. 214 * Normal or error completion can occur after the timeout but 215 * before this point. In such cases, both types of 216 * completions are honored. A scmd is determined to have 217 * timed out iff its associated qc is active and not failed. 218 */ 219 if (ap->ops->error_handler) { 220 struct scsi_cmnd *scmd, *tmp; 221 int nr_timedout = 0; 222 223 spin_lock_irqsave(ap->lock, flags); 224 225 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 226 struct ata_queued_cmd *qc; 227 228 for (i = 0; i < ATA_MAX_QUEUE; i++) { 229 qc = __ata_qc_from_tag(ap, i); 230 if (qc->flags & ATA_QCFLAG_ACTIVE && 231 qc->scsicmd == scmd) 232 break; 233 } 234 235 if (i < ATA_MAX_QUEUE) { 236 /* the scmd has an associated qc */ 237 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 238 /* which hasn't failed yet, timeout */ 239 qc->err_mask |= AC_ERR_TIMEOUT; 240 qc->flags |= ATA_QCFLAG_FAILED; 241 nr_timedout++; 242 } 243 } else { 244 /* Normal completion occurred after 245 * SCSI timeout but before this point. 246 * Successfully complete it. 247 */ 248 scmd->retries = scmd->allowed; 249 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 250 } 251 } 252 253 /* If we have timed out qcs. They belong to EH from 254 * this point but the state of the controller is 255 * unknown. Freeze the port to make sure the IRQ 256 * handler doesn't diddle with those qcs. This must 257 * be done atomically w.r.t. setting QCFLAG_FAILED. 258 */ 259 if (nr_timedout) 260 __ata_port_freeze(ap); 261 262 spin_unlock_irqrestore(ap->lock, flags); 263 } else 264 spin_unlock_wait(ap->lock); 265 266 repeat: 267 /* invoke error handler */ 268 if (ap->ops->error_handler) { 269 /* process port resume request */ 270 ata_eh_handle_port_resume(ap); 271 272 /* fetch & clear EH info */ 273 spin_lock_irqsave(ap->lock, flags); 274 275 memset(&ap->eh_context, 0, sizeof(ap->eh_context)); 276 ap->eh_context.i = ap->eh_info; 277 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 278 279 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 280 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 281 282 spin_unlock_irqrestore(ap->lock, flags); 283 284 /* invoke EH, skip if unloading or suspended */ 285 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 286 ap->ops->error_handler(ap); 287 else 288 ata_eh_finish(ap); 289 290 /* process port suspend request */ 291 ata_eh_handle_port_suspend(ap); 292 293 /* Exception might have happend after ->error_handler 294 * recovered the port but before this point. Repeat 295 * EH in such case. 296 */ 297 spin_lock_irqsave(ap->lock, flags); 298 299 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 300 if (--repeat_cnt) { 301 ata_port_printk(ap, KERN_INFO, 302 "EH pending after completion, " 303 "repeating EH (cnt=%d)\n", repeat_cnt); 304 spin_unlock_irqrestore(ap->lock, flags); 305 goto repeat; 306 } 307 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 308 "tries, giving up\n", ATA_EH_MAX_REPEAT); 309 } 310 311 /* this run is complete, make sure EH info is clear */ 312 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 313 314 /* Clear host_eh_scheduled while holding ap->lock such 315 * that if exception occurs after this point but 316 * before EH completion, SCSI midlayer will 317 * re-initiate EH. 318 */ 319 host->host_eh_scheduled = 0; 320 321 spin_unlock_irqrestore(ap->lock, flags); 322 } else { 323 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); 324 ap->ops->eng_timeout(ap); 325 } 326 327 /* finish or retry handled scmd's and clean up */ 328 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 329 330 scsi_eh_flush_done_q(&ap->eh_done_q); 331 332 /* clean up */ 333 spin_lock_irqsave(ap->lock, flags); 334 335 if (ap->pflags & ATA_PFLAG_LOADING) 336 ap->pflags &= ~ATA_PFLAG_LOADING; 337 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 338 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 339 340 if (ap->pflags & ATA_PFLAG_RECOVERED) 341 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 342 343 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 344 345 /* tell wait_eh that we're done */ 346 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 347 wake_up_all(&ap->eh_wait_q); 348 349 spin_unlock_irqrestore(ap->lock, flags); 350 351 DPRINTK("EXIT\n"); 352 } 353 354 /** 355 * ata_port_wait_eh - Wait for the currently pending EH to complete 356 * @ap: Port to wait EH for 357 * 358 * Wait until the currently pending EH is complete. 359 * 360 * LOCKING: 361 * Kernel thread context (may sleep). 362 */ 363 void ata_port_wait_eh(struct ata_port *ap) 364 { 365 unsigned long flags; 366 DEFINE_WAIT(wait); 367 368 retry: 369 spin_lock_irqsave(ap->lock, flags); 370 371 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 372 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 373 spin_unlock_irqrestore(ap->lock, flags); 374 schedule(); 375 spin_lock_irqsave(ap->lock, flags); 376 } 377 finish_wait(&ap->eh_wait_q, &wait); 378 379 spin_unlock_irqrestore(ap->lock, flags); 380 381 /* make sure SCSI EH is complete */ 382 if (scsi_host_in_recovery(ap->scsi_host)) { 383 msleep(10); 384 goto retry; 385 } 386 } 387 388 /** 389 * ata_qc_timeout - Handle timeout of queued command 390 * @qc: Command that timed out 391 * 392 * Some part of the kernel (currently, only the SCSI layer) 393 * has noticed that the active command on port @ap has not 394 * completed after a specified length of time. Handle this 395 * condition by disabling DMA (if necessary) and completing 396 * transactions, with error if necessary. 397 * 398 * This also handles the case of the "lost interrupt", where 399 * for some reason (possibly hardware bug, possibly driver bug) 400 * an interrupt was not delivered to the driver, even though the 401 * transaction completed successfully. 402 * 403 * TODO: kill this function once old EH is gone. 404 * 405 * LOCKING: 406 * Inherited from SCSI layer (none, can sleep) 407 */ 408 static void ata_qc_timeout(struct ata_queued_cmd *qc) 409 { 410 struct ata_port *ap = qc->ap; 411 u8 host_stat = 0, drv_stat; 412 unsigned long flags; 413 414 DPRINTK("ENTER\n"); 415 416 ap->hsm_task_state = HSM_ST_IDLE; 417 418 spin_lock_irqsave(ap->lock, flags); 419 420 switch (qc->tf.protocol) { 421 422 case ATA_PROT_DMA: 423 case ATA_PROT_ATAPI_DMA: 424 host_stat = ap->ops->bmdma_status(ap); 425 426 /* before we do anything else, clear DMA-Start bit */ 427 ap->ops->bmdma_stop(qc); 428 429 /* fall through */ 430 431 default: 432 ata_altstatus(ap); 433 drv_stat = ata_chk_status(ap); 434 435 /* ack bmdma irq events */ 436 ap->ops->irq_clear(ap); 437 438 ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, " 439 "stat 0x%x host_stat 0x%x\n", 440 qc->tf.command, drv_stat, host_stat); 441 442 /* complete taskfile transaction */ 443 qc->err_mask |= AC_ERR_TIMEOUT; 444 break; 445 } 446 447 spin_unlock_irqrestore(ap->lock, flags); 448 449 ata_eh_qc_complete(qc); 450 451 DPRINTK("EXIT\n"); 452 } 453 454 /** 455 * ata_eng_timeout - Handle timeout of queued command 456 * @ap: Port on which timed-out command is active 457 * 458 * Some part of the kernel (currently, only the SCSI layer) 459 * has noticed that the active command on port @ap has not 460 * completed after a specified length of time. Handle this 461 * condition by disabling DMA (if necessary) and completing 462 * transactions, with error if necessary. 463 * 464 * This also handles the case of the "lost interrupt", where 465 * for some reason (possibly hardware bug, possibly driver bug) 466 * an interrupt was not delivered to the driver, even though the 467 * transaction completed successfully. 468 * 469 * TODO: kill this function once old EH is gone. 470 * 471 * LOCKING: 472 * Inherited from SCSI layer (none, can sleep) 473 */ 474 void ata_eng_timeout(struct ata_port *ap) 475 { 476 DPRINTK("ENTER\n"); 477 478 ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag)); 479 480 DPRINTK("EXIT\n"); 481 } 482 483 /** 484 * ata_qc_schedule_eh - schedule qc for error handling 485 * @qc: command to schedule error handling for 486 * 487 * Schedule error handling for @qc. EH will kick in as soon as 488 * other commands are drained. 489 * 490 * LOCKING: 491 * spin_lock_irqsave(host lock) 492 */ 493 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 494 { 495 struct ata_port *ap = qc->ap; 496 497 WARN_ON(!ap->ops->error_handler); 498 499 qc->flags |= ATA_QCFLAG_FAILED; 500 qc->ap->pflags |= ATA_PFLAG_EH_PENDING; 501 502 /* The following will fail if timeout has already expired. 503 * ata_scsi_error() takes care of such scmds on EH entry. 504 * Note that ATA_QCFLAG_FAILED is unconditionally set after 505 * this function completes. 506 */ 507 scsi_req_abort_cmd(qc->scsicmd); 508 } 509 510 /** 511 * ata_port_schedule_eh - schedule error handling without a qc 512 * @ap: ATA port to schedule EH for 513 * 514 * Schedule error handling for @ap. EH will kick in as soon as 515 * all commands are drained. 516 * 517 * LOCKING: 518 * spin_lock_irqsave(host lock) 519 */ 520 void ata_port_schedule_eh(struct ata_port *ap) 521 { 522 WARN_ON(!ap->ops->error_handler); 523 524 ap->pflags |= ATA_PFLAG_EH_PENDING; 525 scsi_schedule_eh(ap->scsi_host); 526 527 DPRINTK("port EH scheduled\n"); 528 } 529 530 /** 531 * ata_port_abort - abort all qc's on the port 532 * @ap: ATA port to abort qc's for 533 * 534 * Abort all active qc's of @ap and schedule EH. 535 * 536 * LOCKING: 537 * spin_lock_irqsave(host lock) 538 * 539 * RETURNS: 540 * Number of aborted qc's. 541 */ 542 int ata_port_abort(struct ata_port *ap) 543 { 544 int tag, nr_aborted = 0; 545 546 WARN_ON(!ap->ops->error_handler); 547 548 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 549 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 550 551 if (qc) { 552 qc->flags |= ATA_QCFLAG_FAILED; 553 ata_qc_complete(qc); 554 nr_aborted++; 555 } 556 } 557 558 if (!nr_aborted) 559 ata_port_schedule_eh(ap); 560 561 return nr_aborted; 562 } 563 564 /** 565 * __ata_port_freeze - freeze port 566 * @ap: ATA port to freeze 567 * 568 * This function is called when HSM violation or some other 569 * condition disrupts normal operation of the port. Frozen port 570 * is not allowed to perform any operation until the port is 571 * thawed, which usually follows a successful reset. 572 * 573 * ap->ops->freeze() callback can be used for freezing the port 574 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 575 * port cannot be frozen hardware-wise, the interrupt handler 576 * must ack and clear interrupts unconditionally while the port 577 * is frozen. 578 * 579 * LOCKING: 580 * spin_lock_irqsave(host lock) 581 */ 582 static void __ata_port_freeze(struct ata_port *ap) 583 { 584 WARN_ON(!ap->ops->error_handler); 585 586 if (ap->ops->freeze) 587 ap->ops->freeze(ap); 588 589 ap->pflags |= ATA_PFLAG_FROZEN; 590 591 DPRINTK("ata%u port frozen\n", ap->print_id); 592 } 593 594 /** 595 * ata_port_freeze - abort & freeze port 596 * @ap: ATA port to freeze 597 * 598 * Abort and freeze @ap. 599 * 600 * LOCKING: 601 * spin_lock_irqsave(host lock) 602 * 603 * RETURNS: 604 * Number of aborted commands. 605 */ 606 int ata_port_freeze(struct ata_port *ap) 607 { 608 int nr_aborted; 609 610 WARN_ON(!ap->ops->error_handler); 611 612 nr_aborted = ata_port_abort(ap); 613 __ata_port_freeze(ap); 614 615 return nr_aborted; 616 } 617 618 /** 619 * ata_eh_freeze_port - EH helper to freeze port 620 * @ap: ATA port to freeze 621 * 622 * Freeze @ap. 623 * 624 * LOCKING: 625 * None. 626 */ 627 void ata_eh_freeze_port(struct ata_port *ap) 628 { 629 unsigned long flags; 630 631 if (!ap->ops->error_handler) 632 return; 633 634 spin_lock_irqsave(ap->lock, flags); 635 __ata_port_freeze(ap); 636 spin_unlock_irqrestore(ap->lock, flags); 637 } 638 639 /** 640 * ata_port_thaw_port - EH helper to thaw port 641 * @ap: ATA port to thaw 642 * 643 * Thaw frozen port @ap. 644 * 645 * LOCKING: 646 * None. 647 */ 648 void ata_eh_thaw_port(struct ata_port *ap) 649 { 650 unsigned long flags; 651 652 if (!ap->ops->error_handler) 653 return; 654 655 spin_lock_irqsave(ap->lock, flags); 656 657 ap->pflags &= ~ATA_PFLAG_FROZEN; 658 659 if (ap->ops->thaw) 660 ap->ops->thaw(ap); 661 662 spin_unlock_irqrestore(ap->lock, flags); 663 664 DPRINTK("ata%u port thawed\n", ap->print_id); 665 } 666 667 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 668 { 669 /* nada */ 670 } 671 672 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 673 { 674 struct ata_port *ap = qc->ap; 675 struct scsi_cmnd *scmd = qc->scsicmd; 676 unsigned long flags; 677 678 spin_lock_irqsave(ap->lock, flags); 679 qc->scsidone = ata_eh_scsidone; 680 __ata_qc_complete(qc); 681 WARN_ON(ata_tag_valid(qc->tag)); 682 spin_unlock_irqrestore(ap->lock, flags); 683 684 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 685 } 686 687 /** 688 * ata_eh_qc_complete - Complete an active ATA command from EH 689 * @qc: Command to complete 690 * 691 * Indicate to the mid and upper layers that an ATA command has 692 * completed. To be used from EH. 693 */ 694 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 695 { 696 struct scsi_cmnd *scmd = qc->scsicmd; 697 scmd->retries = scmd->allowed; 698 __ata_eh_qc_complete(qc); 699 } 700 701 /** 702 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 703 * @qc: Command to retry 704 * 705 * Indicate to the mid and upper layers that an ATA command 706 * should be retried. To be used from EH. 707 * 708 * SCSI midlayer limits the number of retries to scmd->allowed. 709 * scmd->retries is decremented for commands which get retried 710 * due to unrelated failures (qc->err_mask is zero). 711 */ 712 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 713 { 714 struct scsi_cmnd *scmd = qc->scsicmd; 715 if (!qc->err_mask && scmd->retries) 716 scmd->retries--; 717 __ata_eh_qc_complete(qc); 718 } 719 720 /** 721 * ata_eh_detach_dev - detach ATA device 722 * @dev: ATA device to detach 723 * 724 * Detach @dev. 725 * 726 * LOCKING: 727 * None. 728 */ 729 static void ata_eh_detach_dev(struct ata_device *dev) 730 { 731 struct ata_port *ap = dev->ap; 732 unsigned long flags; 733 734 ata_dev_disable(dev); 735 736 spin_lock_irqsave(ap->lock, flags); 737 738 dev->flags &= ~ATA_DFLAG_DETACH; 739 740 if (ata_scsi_offline_dev(dev)) { 741 dev->flags |= ATA_DFLAG_DETACHED; 742 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 743 } 744 745 /* clear per-dev EH actions */ 746 ata_eh_clear_action(dev, &ap->eh_info, ATA_EH_PERDEV_MASK); 747 ata_eh_clear_action(dev, &ap->eh_context.i, ATA_EH_PERDEV_MASK); 748 749 spin_unlock_irqrestore(ap->lock, flags); 750 } 751 752 /** 753 * ata_eh_about_to_do - about to perform eh_action 754 * @ap: target ATA port 755 * @dev: target ATA dev for per-dev action (can be NULL) 756 * @action: action about to be performed 757 * 758 * Called just before performing EH actions to clear related bits 759 * in @ap->eh_info such that eh actions are not unnecessarily 760 * repeated. 761 * 762 * LOCKING: 763 * None. 764 */ 765 static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, 766 unsigned int action) 767 { 768 unsigned long flags; 769 struct ata_eh_info *ehi = &ap->eh_info; 770 struct ata_eh_context *ehc = &ap->eh_context; 771 772 spin_lock_irqsave(ap->lock, flags); 773 774 /* Reset is represented by combination of actions and EHI 775 * flags. Suck in all related bits before clearing eh_info to 776 * avoid losing requested action. 777 */ 778 if (action & ATA_EH_RESET_MASK) { 779 ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; 780 ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; 781 782 /* make sure all reset actions are cleared & clear EHI flags */ 783 action |= ATA_EH_RESET_MASK; 784 ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 785 } 786 787 ata_eh_clear_action(dev, ehi, action); 788 789 if (!(ehc->i.flags & ATA_EHI_QUIET)) 790 ap->pflags |= ATA_PFLAG_RECOVERED; 791 792 spin_unlock_irqrestore(ap->lock, flags); 793 } 794 795 /** 796 * ata_eh_done - EH action complete 797 * @ap: target ATA port 798 * @dev: target ATA dev for per-dev action (can be NULL) 799 * @action: action just completed 800 * 801 * Called right after performing EH actions to clear related bits 802 * in @ap->eh_context. 803 * 804 * LOCKING: 805 * None. 806 */ 807 static void ata_eh_done(struct ata_port *ap, struct ata_device *dev, 808 unsigned int action) 809 { 810 /* if reset is complete, clear all reset actions & reset modifier */ 811 if (action & ATA_EH_RESET_MASK) { 812 action |= ATA_EH_RESET_MASK; 813 ap->eh_context.i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 814 } 815 816 ata_eh_clear_action(dev, &ap->eh_context.i, action); 817 } 818 819 /** 820 * ata_err_string - convert err_mask to descriptive string 821 * @err_mask: error mask to convert to string 822 * 823 * Convert @err_mask to descriptive string. Errors are 824 * prioritized according to severity and only the most severe 825 * error is reported. 826 * 827 * LOCKING: 828 * None. 829 * 830 * RETURNS: 831 * Descriptive string for @err_mask 832 */ 833 static const char * ata_err_string(unsigned int err_mask) 834 { 835 if (err_mask & AC_ERR_HOST_BUS) 836 return "host bus error"; 837 if (err_mask & AC_ERR_ATA_BUS) 838 return "ATA bus error"; 839 if (err_mask & AC_ERR_TIMEOUT) 840 return "timeout"; 841 if (err_mask & AC_ERR_HSM) 842 return "HSM violation"; 843 if (err_mask & AC_ERR_SYSTEM) 844 return "internal error"; 845 if (err_mask & AC_ERR_MEDIA) 846 return "media error"; 847 if (err_mask & AC_ERR_INVALID) 848 return "invalid argument"; 849 if (err_mask & AC_ERR_DEV) 850 return "device error"; 851 return "unknown error"; 852 } 853 854 /** 855 * ata_read_log_page - read a specific log page 856 * @dev: target device 857 * @page: page to read 858 * @buf: buffer to store read page 859 * @sectors: number of sectors to read 860 * 861 * Read log page using READ_LOG_EXT command. 862 * 863 * LOCKING: 864 * Kernel thread context (may sleep). 865 * 866 * RETURNS: 867 * 0 on success, AC_ERR_* mask otherwise. 868 */ 869 static unsigned int ata_read_log_page(struct ata_device *dev, 870 u8 page, void *buf, unsigned int sectors) 871 { 872 struct ata_taskfile tf; 873 unsigned int err_mask; 874 875 DPRINTK("read log page - page %d\n", page); 876 877 ata_tf_init(dev, &tf); 878 tf.command = ATA_CMD_READ_LOG_EXT; 879 tf.lbal = page; 880 tf.nsect = sectors; 881 tf.hob_nsect = sectors >> 8; 882 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 883 tf.protocol = ATA_PROT_PIO; 884 885 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 886 buf, sectors * ATA_SECT_SIZE); 887 888 DPRINTK("EXIT, err_mask=%x\n", err_mask); 889 return err_mask; 890 } 891 892 /** 893 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 894 * @dev: Device to read log page 10h from 895 * @tag: Resulting tag of the failed command 896 * @tf: Resulting taskfile registers of the failed command 897 * 898 * Read log page 10h to obtain NCQ error details and clear error 899 * condition. 900 * 901 * LOCKING: 902 * Kernel thread context (may sleep). 903 * 904 * RETURNS: 905 * 0 on success, -errno otherwise. 906 */ 907 static int ata_eh_read_log_10h(struct ata_device *dev, 908 int *tag, struct ata_taskfile *tf) 909 { 910 u8 *buf = dev->ap->sector_buf; 911 unsigned int err_mask; 912 u8 csum; 913 int i; 914 915 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 916 if (err_mask) 917 return -EIO; 918 919 csum = 0; 920 for (i = 0; i < ATA_SECT_SIZE; i++) 921 csum += buf[i]; 922 if (csum) 923 ata_dev_printk(dev, KERN_WARNING, 924 "invalid checksum 0x%x on log page 10h\n", csum); 925 926 if (buf[0] & 0x80) 927 return -ENOENT; 928 929 *tag = buf[0] & 0x1f; 930 931 tf->command = buf[2]; 932 tf->feature = buf[3]; 933 tf->lbal = buf[4]; 934 tf->lbam = buf[5]; 935 tf->lbah = buf[6]; 936 tf->device = buf[7]; 937 tf->hob_lbal = buf[8]; 938 tf->hob_lbam = buf[9]; 939 tf->hob_lbah = buf[10]; 940 tf->nsect = buf[12]; 941 tf->hob_nsect = buf[13]; 942 943 return 0; 944 } 945 946 /** 947 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 948 * @dev: device to perform REQUEST_SENSE to 949 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 950 * 951 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 952 * SENSE. This function is EH helper. 953 * 954 * LOCKING: 955 * Kernel thread context (may sleep). 956 * 957 * RETURNS: 958 * 0 on success, AC_ERR_* mask on failure 959 */ 960 static unsigned int atapi_eh_request_sense(struct ata_device *dev, 961 unsigned char *sense_buf) 962 { 963 struct ata_port *ap = dev->ap; 964 struct ata_taskfile tf; 965 u8 cdb[ATAPI_CDB_LEN]; 966 967 DPRINTK("ATAPI request sense\n"); 968 969 ata_tf_init(dev, &tf); 970 971 /* FIXME: is this needed? */ 972 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 973 974 /* XXX: why tf_read here? */ 975 ap->ops->tf_read(ap, &tf); 976 977 /* fill these in, for the case where they are -not- overwritten */ 978 sense_buf[0] = 0x70; 979 sense_buf[2] = tf.feature >> 4; 980 981 memset(cdb, 0, ATAPI_CDB_LEN); 982 cdb[0] = REQUEST_SENSE; 983 cdb[4] = SCSI_SENSE_BUFFERSIZE; 984 985 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 986 tf.command = ATA_CMD_PACKET; 987 988 /* is it pointless to prefer PIO for "safety reasons"? */ 989 if (ap->flags & ATA_FLAG_PIO_DMA) { 990 tf.protocol = ATA_PROT_ATAPI_DMA; 991 tf.feature |= ATAPI_PKT_DMA; 992 } else { 993 tf.protocol = ATA_PROT_ATAPI; 994 tf.lbam = (8 * 1024) & 0xff; 995 tf.lbah = (8 * 1024) >> 8; 996 } 997 998 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 999 sense_buf, SCSI_SENSE_BUFFERSIZE); 1000 } 1001 1002 /** 1003 * ata_eh_analyze_serror - analyze SError for a failed port 1004 * @ap: ATA port to analyze SError for 1005 * 1006 * Analyze SError if available and further determine cause of 1007 * failure. 1008 * 1009 * LOCKING: 1010 * None. 1011 */ 1012 static void ata_eh_analyze_serror(struct ata_port *ap) 1013 { 1014 struct ata_eh_context *ehc = &ap->eh_context; 1015 u32 serror = ehc->i.serror; 1016 unsigned int err_mask = 0, action = 0; 1017 1018 if (serror & SERR_PERSISTENT) { 1019 err_mask |= AC_ERR_ATA_BUS; 1020 action |= ATA_EH_HARDRESET; 1021 } 1022 if (serror & 1023 (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { 1024 err_mask |= AC_ERR_ATA_BUS; 1025 action |= ATA_EH_SOFTRESET; 1026 } 1027 if (serror & SERR_PROTOCOL) { 1028 err_mask |= AC_ERR_HSM; 1029 action |= ATA_EH_SOFTRESET; 1030 } 1031 if (serror & SERR_INTERNAL) { 1032 err_mask |= AC_ERR_SYSTEM; 1033 action |= ATA_EH_SOFTRESET; 1034 } 1035 if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) 1036 ata_ehi_hotplugged(&ehc->i); 1037 1038 ehc->i.err_mask |= err_mask; 1039 ehc->i.action |= action; 1040 } 1041 1042 /** 1043 * ata_eh_analyze_ncq_error - analyze NCQ error 1044 * @ap: ATA port to analyze NCQ error for 1045 * 1046 * Read log page 10h, determine the offending qc and acquire 1047 * error status TF. For NCQ device errors, all LLDDs have to do 1048 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1049 * care of the rest. 1050 * 1051 * LOCKING: 1052 * Kernel thread context (may sleep). 1053 */ 1054 static void ata_eh_analyze_ncq_error(struct ata_port *ap) 1055 { 1056 struct ata_eh_context *ehc = &ap->eh_context; 1057 struct ata_device *dev = ap->device; 1058 struct ata_queued_cmd *qc; 1059 struct ata_taskfile tf; 1060 int tag, rc; 1061 1062 /* if frozen, we can't do much */ 1063 if (ap->pflags & ATA_PFLAG_FROZEN) 1064 return; 1065 1066 /* is it NCQ device error? */ 1067 if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1068 return; 1069 1070 /* has LLDD analyzed already? */ 1071 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1072 qc = __ata_qc_from_tag(ap, tag); 1073 1074 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1075 continue; 1076 1077 if (qc->err_mask) 1078 return; 1079 } 1080 1081 /* okay, this error is ours */ 1082 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1083 if (rc) { 1084 ata_port_printk(ap, KERN_ERR, "failed to read log page 10h " 1085 "(errno=%d)\n", rc); 1086 return; 1087 } 1088 1089 if (!(ap->sactive & (1 << tag))) { 1090 ata_port_printk(ap, KERN_ERR, "log page 10h reported " 1091 "inactive tag %d\n", tag); 1092 return; 1093 } 1094 1095 /* we've got the perpetrator, condemn it */ 1096 qc = __ata_qc_from_tag(ap, tag); 1097 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1098 qc->err_mask |= AC_ERR_DEV; 1099 ehc->i.err_mask &= ~AC_ERR_DEV; 1100 } 1101 1102 /** 1103 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1104 * @qc: qc to analyze 1105 * @tf: Taskfile registers to analyze 1106 * 1107 * Analyze taskfile of @qc and further determine cause of 1108 * failure. This function also requests ATAPI sense data if 1109 * avaliable. 1110 * 1111 * LOCKING: 1112 * Kernel thread context (may sleep). 1113 * 1114 * RETURNS: 1115 * Determined recovery action 1116 */ 1117 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1118 const struct ata_taskfile *tf) 1119 { 1120 unsigned int tmp, action = 0; 1121 u8 stat = tf->command, err = tf->feature; 1122 1123 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1124 qc->err_mask |= AC_ERR_HSM; 1125 return ATA_EH_SOFTRESET; 1126 } 1127 1128 if (!(qc->err_mask & AC_ERR_DEV)) 1129 return 0; 1130 1131 switch (qc->dev->class) { 1132 case ATA_DEV_ATA: 1133 if (err & ATA_ICRC) 1134 qc->err_mask |= AC_ERR_ATA_BUS; 1135 if (err & ATA_UNC) 1136 qc->err_mask |= AC_ERR_MEDIA; 1137 if (err & ATA_IDNF) 1138 qc->err_mask |= AC_ERR_INVALID; 1139 break; 1140 1141 case ATA_DEV_ATAPI: 1142 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1143 tmp = atapi_eh_request_sense(qc->dev, 1144 qc->scsicmd->sense_buffer); 1145 if (!tmp) { 1146 /* ATA_QCFLAG_SENSE_VALID is used to 1147 * tell atapi_qc_complete() that sense 1148 * data is already valid. 1149 * 1150 * TODO: interpret sense data and set 1151 * appropriate err_mask. 1152 */ 1153 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1154 } else 1155 qc->err_mask |= tmp; 1156 } 1157 } 1158 1159 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1160 action |= ATA_EH_SOFTRESET; 1161 1162 return action; 1163 } 1164 1165 static int ata_eh_categorize_error(int is_io, unsigned int err_mask) 1166 { 1167 if (err_mask & AC_ERR_ATA_BUS) 1168 return 1; 1169 1170 if (err_mask & AC_ERR_TIMEOUT) 1171 return 2; 1172 1173 if (is_io) { 1174 if (err_mask & AC_ERR_HSM) 1175 return 2; 1176 if ((err_mask & 1177 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1178 return 3; 1179 } 1180 1181 return 0; 1182 } 1183 1184 struct speed_down_verdict_arg { 1185 u64 since; 1186 int nr_errors[4]; 1187 }; 1188 1189 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1190 { 1191 struct speed_down_verdict_arg *arg = void_arg; 1192 int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); 1193 1194 if (ent->timestamp < arg->since) 1195 return -1; 1196 1197 arg->nr_errors[cat]++; 1198 return 0; 1199 } 1200 1201 /** 1202 * ata_eh_speed_down_verdict - Determine speed down verdict 1203 * @dev: Device of interest 1204 * 1205 * This function examines error ring of @dev and determines 1206 * whether NCQ needs to be turned off, transfer speed should be 1207 * stepped down, or falling back to PIO is necessary. 1208 * 1209 * Cat-1 is ATA_BUS error for any command. 1210 * 1211 * Cat-2 is TIMEOUT for any command or HSM violation for known 1212 * supported commands. 1213 * 1214 * Cat-3 is is unclassified DEV error for known supported 1215 * command. 1216 * 1217 * NCQ needs to be turned off if there have been more than 3 1218 * Cat-2 + Cat-3 errors during last 10 minutes. 1219 * 1220 * Speed down is necessary if there have been more than 3 Cat-1 + 1221 * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. 1222 * 1223 * Falling back to PIO mode is necessary if there have been more 1224 * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. 1225 * 1226 * LOCKING: 1227 * Inherited from caller. 1228 * 1229 * RETURNS: 1230 * OR of ATA_EH_SPDN_* flags. 1231 */ 1232 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1233 { 1234 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1235 u64 j64 = get_jiffies_64(); 1236 struct speed_down_verdict_arg arg; 1237 unsigned int verdict = 0; 1238 1239 /* scan past 10 mins of error history */ 1240 memset(&arg, 0, sizeof(arg)); 1241 arg.since = j64 - min(j64, j10mins); 1242 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1243 1244 if (arg.nr_errors[2] + arg.nr_errors[3] > 3) 1245 verdict |= ATA_EH_SPDN_NCQ_OFF; 1246 if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) 1247 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1248 1249 /* scan past 3 mins of error history */ 1250 memset(&arg, 0, sizeof(arg)); 1251 arg.since = j64 - min(j64, j5mins); 1252 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1253 1254 if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) 1255 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1256 1257 return verdict; 1258 } 1259 1260 /** 1261 * ata_eh_speed_down - record error and speed down if necessary 1262 * @dev: Failed device 1263 * @is_io: Did the device fail during normal IO? 1264 * @err_mask: err_mask of the error 1265 * 1266 * Record error and examine error history to determine whether 1267 * adjusting transmission speed is necessary. It also sets 1268 * transmission limits appropriately if such adjustment is 1269 * necessary. 1270 * 1271 * LOCKING: 1272 * Kernel thread context (may sleep). 1273 * 1274 * RETURNS: 1275 * Determined recovery action. 1276 */ 1277 static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, 1278 unsigned int err_mask) 1279 { 1280 unsigned int verdict; 1281 unsigned int action = 0; 1282 1283 /* don't bother if Cat-0 error */ 1284 if (ata_eh_categorize_error(is_io, err_mask) == 0) 1285 return 0; 1286 1287 /* record error and determine whether speed down is necessary */ 1288 ata_ering_record(&dev->ering, is_io, err_mask); 1289 verdict = ata_eh_speed_down_verdict(dev); 1290 1291 /* turn off NCQ? */ 1292 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1293 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1294 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1295 dev->flags |= ATA_DFLAG_NCQ_OFF; 1296 ata_dev_printk(dev, KERN_WARNING, 1297 "NCQ disabled due to excessive errors\n"); 1298 goto done; 1299 } 1300 1301 /* speed down? */ 1302 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1303 /* speed down SATA link speed if possible */ 1304 if (sata_down_spd_limit(dev->ap) == 0) { 1305 action |= ATA_EH_HARDRESET; 1306 goto done; 1307 } 1308 1309 /* lower transfer mode */ 1310 if (dev->spdn_cnt < 2) { 1311 static const int dma_dnxfer_sel[] = 1312 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1313 static const int pio_dnxfer_sel[] = 1314 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1315 int sel; 1316 1317 if (dev->xfer_shift != ATA_SHIFT_PIO) 1318 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1319 else 1320 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1321 1322 dev->spdn_cnt++; 1323 1324 if (ata_down_xfermask_limit(dev, sel) == 0) { 1325 action |= ATA_EH_SOFTRESET; 1326 goto done; 1327 } 1328 } 1329 } 1330 1331 /* Fall back to PIO? Slowing down to PIO is meaningless for 1332 * SATA. Consider it only for PATA. 1333 */ 1334 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1335 (dev->ap->cbl != ATA_CBL_SATA) && 1336 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1337 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1338 dev->spdn_cnt = 0; 1339 action |= ATA_EH_SOFTRESET; 1340 goto done; 1341 } 1342 } 1343 1344 return 0; 1345 done: 1346 /* device has been slowed down, blow error history */ 1347 ata_ering_clear(&dev->ering); 1348 return action; 1349 } 1350 1351 /** 1352 * ata_eh_autopsy - analyze error and determine recovery action 1353 * @ap: ATA port to perform autopsy on 1354 * 1355 * Analyze why @ap failed and determine which recovery action is 1356 * needed. This function also sets more detailed AC_ERR_* values 1357 * and fills sense data for ATAPI CHECK SENSE. 1358 * 1359 * LOCKING: 1360 * Kernel thread context (may sleep). 1361 */ 1362 static void ata_eh_autopsy(struct ata_port *ap) 1363 { 1364 struct ata_eh_context *ehc = &ap->eh_context; 1365 unsigned int all_err_mask = 0; 1366 int tag, is_io = 0; 1367 u32 serror; 1368 int rc; 1369 1370 DPRINTK("ENTER\n"); 1371 1372 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1373 return; 1374 1375 /* obtain and analyze SError */ 1376 rc = sata_scr_read(ap, SCR_ERROR, &serror); 1377 if (rc == 0) { 1378 ehc->i.serror |= serror; 1379 ata_eh_analyze_serror(ap); 1380 } else if (rc != -EOPNOTSUPP) 1381 ehc->i.action |= ATA_EH_HARDRESET; 1382 1383 /* analyze NCQ failure */ 1384 ata_eh_analyze_ncq_error(ap); 1385 1386 /* any real error trumps AC_ERR_OTHER */ 1387 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1388 ehc->i.err_mask &= ~AC_ERR_OTHER; 1389 1390 all_err_mask |= ehc->i.err_mask; 1391 1392 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1393 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1394 1395 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1396 continue; 1397 1398 /* inherit upper level err_mask */ 1399 qc->err_mask |= ehc->i.err_mask; 1400 1401 /* analyze TF */ 1402 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1403 1404 /* DEV errors are probably spurious in case of ATA_BUS error */ 1405 if (qc->err_mask & AC_ERR_ATA_BUS) 1406 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1407 AC_ERR_INVALID); 1408 1409 /* any real error trumps unknown error */ 1410 if (qc->err_mask & ~AC_ERR_OTHER) 1411 qc->err_mask &= ~AC_ERR_OTHER; 1412 1413 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1414 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1415 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1416 ehc->i.action &= ~ATA_EH_REVALIDATE; 1417 } 1418 1419 /* accumulate error info */ 1420 ehc->i.dev = qc->dev; 1421 all_err_mask |= qc->err_mask; 1422 if (qc->flags & ATA_QCFLAG_IO) 1423 is_io = 1; 1424 } 1425 1426 /* enforce default EH actions */ 1427 if (ap->pflags & ATA_PFLAG_FROZEN || 1428 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1429 ehc->i.action |= ATA_EH_SOFTRESET; 1430 else if (all_err_mask) 1431 ehc->i.action |= ATA_EH_REVALIDATE; 1432 1433 /* if we have offending qcs and the associated failed device */ 1434 if (ehc->i.dev) { 1435 /* speed down */ 1436 ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io, 1437 all_err_mask); 1438 1439 /* perform per-dev EH action only on the offending device */ 1440 ehc->i.dev_action[ehc->i.dev->devno] |= 1441 ehc->i.action & ATA_EH_PERDEV_MASK; 1442 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1443 } 1444 1445 DPRINTK("EXIT\n"); 1446 } 1447 1448 /** 1449 * ata_eh_report - report error handling to user 1450 * @ap: ATA port EH is going on 1451 * 1452 * Report EH to user. 1453 * 1454 * LOCKING: 1455 * None. 1456 */ 1457 static void ata_eh_report(struct ata_port *ap) 1458 { 1459 struct ata_eh_context *ehc = &ap->eh_context; 1460 const char *frozen, *desc; 1461 int tag, nr_failed = 0; 1462 1463 desc = NULL; 1464 if (ehc->i.desc[0] != '\0') 1465 desc = ehc->i.desc; 1466 1467 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1468 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1469 1470 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1471 continue; 1472 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1473 continue; 1474 1475 nr_failed++; 1476 } 1477 1478 if (!nr_failed && !ehc->i.err_mask) 1479 return; 1480 1481 frozen = ""; 1482 if (ap->pflags & ATA_PFLAG_FROZEN) 1483 frozen = " frozen"; 1484 1485 if (ehc->i.dev) { 1486 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1487 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1488 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1489 ehc->i.action, frozen); 1490 if (desc) 1491 ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc); 1492 } else { 1493 ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x " 1494 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1495 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1496 ehc->i.action, frozen); 1497 if (desc) 1498 ata_port_printk(ap, KERN_ERR, "(%s)\n", desc); 1499 } 1500 1501 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1502 static const char *dma_str[] = { 1503 [DMA_BIDIRECTIONAL] = "bidi", 1504 [DMA_TO_DEVICE] = "out", 1505 [DMA_FROM_DEVICE] = "in", 1506 [DMA_NONE] = "", 1507 }; 1508 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1509 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1510 1511 if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask) 1512 continue; 1513 1514 ata_dev_printk(qc->dev, KERN_ERR, 1515 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1516 "tag %d cdb 0x%x data %u %s\n " 1517 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1518 "Emask 0x%x (%s)\n", 1519 cmd->command, cmd->feature, cmd->nsect, 1520 cmd->lbal, cmd->lbam, cmd->lbah, 1521 cmd->hob_feature, cmd->hob_nsect, 1522 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 1523 cmd->device, qc->tag, qc->cdb[0], qc->nbytes, 1524 dma_str[qc->dma_dir], 1525 res->command, res->feature, res->nsect, 1526 res->lbal, res->lbam, res->lbah, 1527 res->hob_feature, res->hob_nsect, 1528 res->hob_lbal, res->hob_lbam, res->hob_lbah, 1529 res->device, qc->err_mask, ata_err_string(qc->err_mask)); 1530 } 1531 } 1532 1533 static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset, 1534 unsigned int *classes) 1535 { 1536 int i, rc; 1537 1538 for (i = 0; i < ATA_MAX_DEVICES; i++) 1539 classes[i] = ATA_DEV_UNKNOWN; 1540 1541 rc = reset(ap, classes); 1542 if (rc) 1543 return rc; 1544 1545 /* If any class isn't ATA_DEV_UNKNOWN, consider classification 1546 * is complete and convert all ATA_DEV_UNKNOWN to 1547 * ATA_DEV_NONE. 1548 */ 1549 for (i = 0; i < ATA_MAX_DEVICES; i++) 1550 if (classes[i] != ATA_DEV_UNKNOWN) 1551 break; 1552 1553 if (i < ATA_MAX_DEVICES) 1554 for (i = 0; i < ATA_MAX_DEVICES; i++) 1555 if (classes[i] == ATA_DEV_UNKNOWN) 1556 classes[i] = ATA_DEV_NONE; 1557 1558 return 0; 1559 } 1560 1561 static int ata_eh_followup_srst_needed(int rc, int classify, 1562 const unsigned int *classes) 1563 { 1564 if (rc == -EAGAIN) 1565 return 1; 1566 if (rc != 0) 1567 return 0; 1568 if (classify && classes[0] == ATA_DEV_UNKNOWN) 1569 return 1; 1570 return 0; 1571 } 1572 1573 static int ata_eh_reset(struct ata_port *ap, int classify, 1574 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 1575 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 1576 { 1577 struct ata_eh_context *ehc = &ap->eh_context; 1578 unsigned int *classes = ehc->classes; 1579 int tries = ATA_EH_RESET_TRIES; 1580 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 1581 unsigned int action; 1582 ata_reset_fn_t reset; 1583 int i, did_followup_srst, rc; 1584 1585 /* about to reset */ 1586 ata_eh_about_to_do(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1587 1588 /* Determine which reset to use and record in ehc->i.action. 1589 * prereset() may examine and modify it. 1590 */ 1591 action = ehc->i.action; 1592 ehc->i.action &= ~ATA_EH_RESET_MASK; 1593 if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && 1594 !(action & ATA_EH_HARDRESET)))) 1595 ehc->i.action |= ATA_EH_SOFTRESET; 1596 else 1597 ehc->i.action |= ATA_EH_HARDRESET; 1598 1599 if (prereset) { 1600 rc = prereset(ap); 1601 if (rc) { 1602 if (rc == -ENOENT) { 1603 ata_port_printk(ap, KERN_DEBUG, "port disabled. ignoring.\n"); 1604 ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; 1605 } else 1606 ata_port_printk(ap, KERN_ERR, 1607 "prereset failed (errno=%d)\n", rc); 1608 return rc; 1609 } 1610 } 1611 1612 /* prereset() might have modified ehc->i.action */ 1613 if (ehc->i.action & ATA_EH_HARDRESET) 1614 reset = hardreset; 1615 else if (ehc->i.action & ATA_EH_SOFTRESET) 1616 reset = softreset; 1617 else { 1618 /* prereset told us not to reset, bang classes and return */ 1619 for (i = 0; i < ATA_MAX_DEVICES; i++) 1620 classes[i] = ATA_DEV_NONE; 1621 return 0; 1622 } 1623 1624 /* did prereset() screw up? if so, fix up to avoid oopsing */ 1625 if (!reset) { 1626 ata_port_printk(ap, KERN_ERR, "BUG: prereset() requested " 1627 "invalid reset type\n"); 1628 if (softreset) 1629 reset = softreset; 1630 else 1631 reset = hardreset; 1632 } 1633 1634 retry: 1635 /* shut up during boot probing */ 1636 if (verbose) 1637 ata_port_printk(ap, KERN_INFO, "%s resetting port\n", 1638 reset == softreset ? "soft" : "hard"); 1639 1640 /* mark that this EH session started with reset */ 1641 ehc->i.flags |= ATA_EHI_DID_RESET; 1642 1643 rc = ata_do_reset(ap, reset, classes); 1644 1645 did_followup_srst = 0; 1646 if (reset == hardreset && 1647 ata_eh_followup_srst_needed(rc, classify, classes)) { 1648 /* okay, let's do follow-up softreset */ 1649 did_followup_srst = 1; 1650 reset = softreset; 1651 1652 if (!reset) { 1653 ata_port_printk(ap, KERN_ERR, 1654 "follow-up softreset required " 1655 "but no softreset avaliable\n"); 1656 return -EINVAL; 1657 } 1658 1659 ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); 1660 rc = ata_do_reset(ap, reset, classes); 1661 1662 if (rc == 0 && classify && 1663 classes[0] == ATA_DEV_UNKNOWN) { 1664 ata_port_printk(ap, KERN_ERR, 1665 "classification failed\n"); 1666 return -EINVAL; 1667 } 1668 } 1669 1670 if (rc && --tries) { 1671 const char *type; 1672 1673 if (reset == softreset) { 1674 if (did_followup_srst) 1675 type = "follow-up soft"; 1676 else 1677 type = "soft"; 1678 } else 1679 type = "hard"; 1680 1681 ata_port_printk(ap, KERN_WARNING, 1682 "%sreset failed, retrying in 5 secs\n", type); 1683 ssleep(5); 1684 1685 if (reset == hardreset) 1686 sata_down_spd_limit(ap); 1687 if (hardreset) 1688 reset = hardreset; 1689 goto retry; 1690 } 1691 1692 if (rc == 0) { 1693 /* After the reset, the device state is PIO 0 and the 1694 * controller state is undefined. Record the mode. 1695 */ 1696 for (i = 0; i < ATA_MAX_DEVICES; i++) 1697 ap->device[i].pio_mode = XFER_PIO_0; 1698 1699 if (postreset) 1700 postreset(ap, classes); 1701 1702 /* reset successful, schedule revalidation */ 1703 ata_eh_done(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1704 ehc->i.action |= ATA_EH_REVALIDATE; 1705 } 1706 1707 return rc; 1708 } 1709 1710 static int ata_eh_revalidate_and_attach(struct ata_port *ap, 1711 struct ata_device **r_failed_dev) 1712 { 1713 struct ata_eh_context *ehc = &ap->eh_context; 1714 struct ata_device *dev; 1715 unsigned long flags; 1716 int i, rc = 0; 1717 1718 DPRINTK("ENTER\n"); 1719 1720 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1721 unsigned int action, readid_flags = 0; 1722 1723 dev = &ap->device[i]; 1724 action = ata_eh_dev_action(dev); 1725 1726 if (ehc->i.flags & ATA_EHI_DID_RESET) 1727 readid_flags |= ATA_READID_POSTRESET; 1728 1729 if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) { 1730 if (ata_port_offline(ap)) { 1731 rc = -EIO; 1732 break; 1733 } 1734 1735 ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE); 1736 rc = ata_dev_revalidate(dev, readid_flags); 1737 if (rc) 1738 break; 1739 1740 ata_eh_done(ap, dev, ATA_EH_REVALIDATE); 1741 1742 /* Configuration may have changed, reconfigure 1743 * transfer mode. 1744 */ 1745 ehc->i.flags |= ATA_EHI_SETMODE; 1746 1747 /* schedule the scsi_rescan_device() here */ 1748 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 1749 } else if (dev->class == ATA_DEV_UNKNOWN && 1750 ehc->tries[dev->devno] && 1751 ata_class_enabled(ehc->classes[dev->devno])) { 1752 dev->class = ehc->classes[dev->devno]; 1753 1754 rc = ata_dev_read_id(dev, &dev->class, readid_flags, 1755 dev->id); 1756 if (rc == 0) { 1757 ehc->i.flags |= ATA_EHI_PRINTINFO; 1758 rc = ata_dev_configure(dev); 1759 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 1760 } else if (rc == -ENOENT) { 1761 /* IDENTIFY was issued to non-existent 1762 * device. No need to reset. Just 1763 * thaw and kill the device. 1764 */ 1765 ata_eh_thaw_port(ap); 1766 dev->class = ATA_DEV_UNKNOWN; 1767 rc = 0; 1768 } 1769 1770 if (rc) { 1771 dev->class = ATA_DEV_UNKNOWN; 1772 break; 1773 } 1774 1775 if (ata_dev_enabled(dev)) { 1776 spin_lock_irqsave(ap->lock, flags); 1777 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 1778 spin_unlock_irqrestore(ap->lock, flags); 1779 1780 /* new device discovered, configure xfermode */ 1781 ehc->i.flags |= ATA_EHI_SETMODE; 1782 } 1783 } 1784 } 1785 1786 if (rc) 1787 *r_failed_dev = dev; 1788 1789 DPRINTK("EXIT\n"); 1790 return rc; 1791 } 1792 1793 /** 1794 * ata_eh_suspend - handle suspend EH action 1795 * @ap: target host port 1796 * @r_failed_dev: result parameter to indicate failing device 1797 * 1798 * Handle suspend EH action. Disk devices are spinned down and 1799 * other types of devices are just marked suspended. Once 1800 * suspended, no EH action to the device is allowed until it is 1801 * resumed. 1802 * 1803 * LOCKING: 1804 * Kernel thread context (may sleep). 1805 * 1806 * RETURNS: 1807 * 0 on success, -errno otherwise 1808 */ 1809 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev) 1810 { 1811 struct ata_device *dev; 1812 int i, rc = 0; 1813 1814 DPRINTK("ENTER\n"); 1815 1816 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1817 unsigned long flags; 1818 unsigned int action, err_mask; 1819 1820 dev = &ap->device[i]; 1821 action = ata_eh_dev_action(dev); 1822 1823 if (!ata_dev_enabled(dev) || !(action & ATA_EH_SUSPEND)) 1824 continue; 1825 1826 WARN_ON(dev->flags & ATA_DFLAG_SUSPENDED); 1827 1828 ata_eh_about_to_do(ap, dev, ATA_EH_SUSPEND); 1829 1830 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1831 /* flush cache */ 1832 rc = ata_flush_cache(dev); 1833 if (rc) 1834 break; 1835 1836 /* spin down */ 1837 err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1); 1838 if (err_mask) { 1839 ata_dev_printk(dev, KERN_ERR, "failed to " 1840 "spin down (err_mask=0x%x)\n", 1841 err_mask); 1842 rc = -EIO; 1843 break; 1844 } 1845 } 1846 1847 spin_lock_irqsave(ap->lock, flags); 1848 dev->flags |= ATA_DFLAG_SUSPENDED; 1849 spin_unlock_irqrestore(ap->lock, flags); 1850 1851 ata_eh_done(ap, dev, ATA_EH_SUSPEND); 1852 } 1853 1854 if (rc) 1855 *r_failed_dev = dev; 1856 1857 DPRINTK("EXIT\n"); 1858 return rc; 1859 } 1860 1861 /** 1862 * ata_eh_prep_resume - prep for resume EH action 1863 * @ap: target host port 1864 * 1865 * Clear SUSPENDED in preparation for scheduled resume actions. 1866 * This allows other parts of EH to access the devices being 1867 * resumed. 1868 * 1869 * LOCKING: 1870 * Kernel thread context (may sleep). 1871 */ 1872 static void ata_eh_prep_resume(struct ata_port *ap) 1873 { 1874 struct ata_device *dev; 1875 unsigned long flags; 1876 int i; 1877 1878 DPRINTK("ENTER\n"); 1879 1880 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1881 unsigned int action; 1882 1883 dev = &ap->device[i]; 1884 action = ata_eh_dev_action(dev); 1885 1886 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1887 continue; 1888 1889 spin_lock_irqsave(ap->lock, flags); 1890 dev->flags &= ~ATA_DFLAG_SUSPENDED; 1891 spin_unlock_irqrestore(ap->lock, flags); 1892 } 1893 1894 DPRINTK("EXIT\n"); 1895 } 1896 1897 /** 1898 * ata_eh_resume - handle resume EH action 1899 * @ap: target host port 1900 * @r_failed_dev: result parameter to indicate failing device 1901 * 1902 * Handle resume EH action. Target devices are already reset and 1903 * revalidated. Spinning up is the only operation left. 1904 * 1905 * LOCKING: 1906 * Kernel thread context (may sleep). 1907 * 1908 * RETURNS: 1909 * 0 on success, -errno otherwise 1910 */ 1911 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev) 1912 { 1913 struct ata_device *dev; 1914 int i, rc = 0; 1915 1916 DPRINTK("ENTER\n"); 1917 1918 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1919 unsigned int action, err_mask; 1920 1921 dev = &ap->device[i]; 1922 action = ata_eh_dev_action(dev); 1923 1924 if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME)) 1925 continue; 1926 1927 ata_eh_about_to_do(ap, dev, ATA_EH_RESUME); 1928 1929 if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) { 1930 err_mask = ata_do_simple_cmd(dev, 1931 ATA_CMD_IDLEIMMEDIATE); 1932 if (err_mask) { 1933 ata_dev_printk(dev, KERN_ERR, "failed to " 1934 "spin up (err_mask=0x%x)\n", 1935 err_mask); 1936 rc = -EIO; 1937 break; 1938 } 1939 } 1940 1941 ata_eh_done(ap, dev, ATA_EH_RESUME); 1942 } 1943 1944 if (rc) 1945 *r_failed_dev = dev; 1946 1947 DPRINTK("EXIT\n"); 1948 return 0; 1949 } 1950 1951 static int ata_port_nr_enabled(struct ata_port *ap) 1952 { 1953 int i, cnt = 0; 1954 1955 for (i = 0; i < ATA_MAX_DEVICES; i++) 1956 if (ata_dev_enabled(&ap->device[i])) 1957 cnt++; 1958 return cnt; 1959 } 1960 1961 static int ata_port_nr_vacant(struct ata_port *ap) 1962 { 1963 int i, cnt = 0; 1964 1965 for (i = 0; i < ATA_MAX_DEVICES; i++) 1966 if (ap->device[i].class == ATA_DEV_UNKNOWN) 1967 cnt++; 1968 return cnt; 1969 } 1970 1971 static int ata_eh_skip_recovery(struct ata_port *ap) 1972 { 1973 struct ata_eh_context *ehc = &ap->eh_context; 1974 int i; 1975 1976 /* skip if all possible devices are suspended */ 1977 for (i = 0; i < ata_port_max_devices(ap); i++) { 1978 struct ata_device *dev = &ap->device[i]; 1979 1980 if (!(dev->flags & ATA_DFLAG_SUSPENDED)) 1981 break; 1982 } 1983 1984 if (i == ata_port_max_devices(ap)) 1985 return 1; 1986 1987 /* thaw frozen port, resume link and recover failed devices */ 1988 if ((ap->pflags & ATA_PFLAG_FROZEN) || 1989 (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_port_nr_enabled(ap)) 1990 return 0; 1991 1992 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 1993 for (i = 0; i < ATA_MAX_DEVICES; i++) { 1994 struct ata_device *dev = &ap->device[i]; 1995 1996 if (dev->class == ATA_DEV_UNKNOWN && 1997 ehc->classes[dev->devno] != ATA_DEV_NONE) 1998 return 0; 1999 } 2000 2001 return 1; 2002 } 2003 2004 /** 2005 * ata_eh_recover - recover host port after error 2006 * @ap: host port to recover 2007 * @prereset: prereset method (can be NULL) 2008 * @softreset: softreset method (can be NULL) 2009 * @hardreset: hardreset method (can be NULL) 2010 * @postreset: postreset method (can be NULL) 2011 * 2012 * This is the alpha and omega, eum and yang, heart and soul of 2013 * libata exception handling. On entry, actions required to 2014 * recover the port and hotplug requests are recorded in 2015 * eh_context. This function executes all the operations with 2016 * appropriate retrials and fallbacks to resurrect failed 2017 * devices, detach goners and greet newcomers. 2018 * 2019 * LOCKING: 2020 * Kernel thread context (may sleep). 2021 * 2022 * RETURNS: 2023 * 0 on success, -errno on failure. 2024 */ 2025 static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 2026 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2027 ata_postreset_fn_t postreset) 2028 { 2029 struct ata_eh_context *ehc = &ap->eh_context; 2030 struct ata_device *dev; 2031 int i, rc; 2032 2033 DPRINTK("ENTER\n"); 2034 2035 /* prep for recovery */ 2036 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2037 dev = &ap->device[i]; 2038 2039 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2040 2041 /* collect port action mask recorded in dev actions */ 2042 ehc->i.action |= ehc->i.dev_action[i] & ~ATA_EH_PERDEV_MASK; 2043 ehc->i.dev_action[i] &= ATA_EH_PERDEV_MASK; 2044 2045 /* process hotplug request */ 2046 if (dev->flags & ATA_DFLAG_DETACH) 2047 ata_eh_detach_dev(dev); 2048 2049 if (!ata_dev_enabled(dev) && 2050 ((ehc->i.probe_mask & (1 << dev->devno)) && 2051 !(ehc->did_probe_mask & (1 << dev->devno)))) { 2052 ata_eh_detach_dev(dev); 2053 ata_dev_init(dev); 2054 ehc->did_probe_mask |= (1 << dev->devno); 2055 ehc->i.action |= ATA_EH_SOFTRESET; 2056 } 2057 } 2058 2059 retry: 2060 rc = 0; 2061 2062 /* if UNLOADING, finish immediately */ 2063 if (ap->pflags & ATA_PFLAG_UNLOADING) 2064 goto out; 2065 2066 /* prep for resume */ 2067 ata_eh_prep_resume(ap); 2068 2069 /* skip EH if possible. */ 2070 if (ata_eh_skip_recovery(ap)) 2071 ehc->i.action = 0; 2072 2073 for (i = 0; i < ATA_MAX_DEVICES; i++) 2074 ehc->classes[i] = ATA_DEV_UNKNOWN; 2075 2076 /* reset */ 2077 if (ehc->i.action & ATA_EH_RESET_MASK) { 2078 ata_eh_freeze_port(ap); 2079 2080 rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset, 2081 softreset, hardreset, postreset); 2082 if (rc) { 2083 ata_port_printk(ap, KERN_ERR, 2084 "reset failed, giving up\n"); 2085 goto out; 2086 } 2087 2088 ata_eh_thaw_port(ap); 2089 } 2090 2091 /* revalidate existing devices and attach new ones */ 2092 rc = ata_eh_revalidate_and_attach(ap, &dev); 2093 if (rc) 2094 goto dev_fail; 2095 2096 /* resume devices */ 2097 rc = ata_eh_resume(ap, &dev); 2098 if (rc) 2099 goto dev_fail; 2100 2101 /* configure transfer mode if necessary */ 2102 if (ehc->i.flags & ATA_EHI_SETMODE) { 2103 rc = ata_set_mode(ap, &dev); 2104 if (rc) 2105 goto dev_fail; 2106 ehc->i.flags &= ~ATA_EHI_SETMODE; 2107 } 2108 2109 /* suspend devices */ 2110 rc = ata_eh_suspend(ap, &dev); 2111 if (rc) 2112 goto dev_fail; 2113 2114 goto out; 2115 2116 dev_fail: 2117 ehc->tries[dev->devno]--; 2118 2119 switch (rc) { 2120 case -EINVAL: 2121 /* eeek, something went very wrong, give up */ 2122 ehc->tries[dev->devno] = 0; 2123 break; 2124 2125 case -ENODEV: 2126 /* device missing or wrong IDENTIFY data, schedule probing */ 2127 ehc->i.probe_mask |= (1 << dev->devno); 2128 /* give it just one more chance */ 2129 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2130 case -EIO: 2131 if (ehc->tries[dev->devno] == 1) { 2132 /* This is the last chance, better to slow 2133 * down than lose it. 2134 */ 2135 sata_down_spd_limit(ap); 2136 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2137 } 2138 } 2139 2140 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2141 /* disable device if it has used up all its chances */ 2142 ata_dev_disable(dev); 2143 2144 /* detach if offline */ 2145 if (ata_port_offline(ap)) 2146 ata_eh_detach_dev(dev); 2147 2148 /* probe if requested */ 2149 if ((ehc->i.probe_mask & (1 << dev->devno)) && 2150 !(ehc->did_probe_mask & (1 << dev->devno))) { 2151 ata_eh_detach_dev(dev); 2152 ata_dev_init(dev); 2153 2154 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2155 ehc->did_probe_mask |= (1 << dev->devno); 2156 ehc->i.action |= ATA_EH_SOFTRESET; 2157 } 2158 } else { 2159 /* soft didn't work? be haaaaard */ 2160 if (ehc->i.flags & ATA_EHI_DID_RESET) 2161 ehc->i.action |= ATA_EH_HARDRESET; 2162 else 2163 ehc->i.action |= ATA_EH_SOFTRESET; 2164 } 2165 2166 if (ata_port_nr_enabled(ap)) { 2167 ata_port_printk(ap, KERN_WARNING, "failed to recover some " 2168 "devices, retrying in 5 secs\n"); 2169 ssleep(5); 2170 } else { 2171 /* no device left, repeat fast */ 2172 msleep(500); 2173 } 2174 2175 goto retry; 2176 2177 out: 2178 if (rc) { 2179 for (i = 0; i < ATA_MAX_DEVICES; i++) 2180 ata_dev_disable(&ap->device[i]); 2181 } 2182 2183 DPRINTK("EXIT, rc=%d\n", rc); 2184 return rc; 2185 } 2186 2187 /** 2188 * ata_eh_finish - finish up EH 2189 * @ap: host port to finish EH for 2190 * 2191 * Recovery is complete. Clean up EH states and retry or finish 2192 * failed qcs. 2193 * 2194 * LOCKING: 2195 * None. 2196 */ 2197 static void ata_eh_finish(struct ata_port *ap) 2198 { 2199 int tag; 2200 2201 /* retry or finish qcs */ 2202 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2203 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2204 2205 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2206 continue; 2207 2208 if (qc->err_mask) { 2209 /* FIXME: Once EH migration is complete, 2210 * generate sense data in this function, 2211 * considering both err_mask and tf. 2212 */ 2213 if (qc->err_mask & AC_ERR_INVALID) 2214 ata_eh_qc_complete(qc); 2215 else 2216 ata_eh_qc_retry(qc); 2217 } else { 2218 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2219 ata_eh_qc_complete(qc); 2220 } else { 2221 /* feed zero TF to sense generation */ 2222 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2223 ata_eh_qc_retry(qc); 2224 } 2225 } 2226 } 2227 } 2228 2229 /** 2230 * ata_do_eh - do standard error handling 2231 * @ap: host port to handle error for 2232 * @prereset: prereset method (can be NULL) 2233 * @softreset: softreset method (can be NULL) 2234 * @hardreset: hardreset method (can be NULL) 2235 * @postreset: postreset method (can be NULL) 2236 * 2237 * Perform standard error handling sequence. 2238 * 2239 * LOCKING: 2240 * Kernel thread context (may sleep). 2241 */ 2242 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2243 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2244 ata_postreset_fn_t postreset) 2245 { 2246 ata_eh_autopsy(ap); 2247 ata_eh_report(ap); 2248 ata_eh_recover(ap, prereset, softreset, hardreset, postreset); 2249 ata_eh_finish(ap); 2250 } 2251 2252 /** 2253 * ata_eh_handle_port_suspend - perform port suspend operation 2254 * @ap: port to suspend 2255 * 2256 * Suspend @ap. 2257 * 2258 * LOCKING: 2259 * Kernel thread context (may sleep). 2260 */ 2261 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2262 { 2263 unsigned long flags; 2264 int rc = 0; 2265 2266 /* are we suspending? */ 2267 spin_lock_irqsave(ap->lock, flags); 2268 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2269 ap->pm_mesg.event == PM_EVENT_ON) { 2270 spin_unlock_irqrestore(ap->lock, flags); 2271 return; 2272 } 2273 spin_unlock_irqrestore(ap->lock, flags); 2274 2275 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2276 2277 /* suspend */ 2278 ata_eh_freeze_port(ap); 2279 2280 if (ap->ops->port_suspend) 2281 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2282 2283 /* report result */ 2284 spin_lock_irqsave(ap->lock, flags); 2285 2286 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2287 if (rc == 0) 2288 ap->pflags |= ATA_PFLAG_SUSPENDED; 2289 else 2290 ata_port_schedule_eh(ap); 2291 2292 if (ap->pm_result) { 2293 *ap->pm_result = rc; 2294 ap->pm_result = NULL; 2295 } 2296 2297 spin_unlock_irqrestore(ap->lock, flags); 2298 2299 return; 2300 } 2301 2302 /** 2303 * ata_eh_handle_port_resume - perform port resume operation 2304 * @ap: port to resume 2305 * 2306 * Resume @ap. 2307 * 2308 * This function also waits upto one second until all devices 2309 * hanging off this port requests resume EH action. This is to 2310 * prevent invoking EH and thus reset multiple times on resume. 2311 * 2312 * On DPM resume, where some of devices might not be resumed 2313 * together, this may delay port resume upto one second, but such 2314 * DPM resumes are rare and 1 sec delay isn't too bad. 2315 * 2316 * LOCKING: 2317 * Kernel thread context (may sleep). 2318 */ 2319 static void ata_eh_handle_port_resume(struct ata_port *ap) 2320 { 2321 unsigned long timeout; 2322 unsigned long flags; 2323 int i, rc = 0; 2324 2325 /* are we resuming? */ 2326 spin_lock_irqsave(ap->lock, flags); 2327 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2328 ap->pm_mesg.event != PM_EVENT_ON) { 2329 spin_unlock_irqrestore(ap->lock, flags); 2330 return; 2331 } 2332 spin_unlock_irqrestore(ap->lock, flags); 2333 2334 /* spurious? */ 2335 if (!(ap->pflags & ATA_PFLAG_SUSPENDED)) 2336 goto done; 2337 2338 if (ap->ops->port_resume) 2339 rc = ap->ops->port_resume(ap); 2340 2341 /* give devices time to request EH */ 2342 timeout = jiffies + HZ; /* 1s max */ 2343 while (1) { 2344 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2345 struct ata_device *dev = &ap->device[i]; 2346 unsigned int action = ata_eh_dev_action(dev); 2347 2348 if ((dev->flags & ATA_DFLAG_SUSPENDED) && 2349 !(action & ATA_EH_RESUME)) 2350 break; 2351 } 2352 2353 if (i == ATA_MAX_DEVICES || time_after(jiffies, timeout)) 2354 break; 2355 msleep(10); 2356 } 2357 2358 done: 2359 spin_lock_irqsave(ap->lock, flags); 2360 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2361 if (ap->pm_result) { 2362 *ap->pm_result = rc; 2363 ap->pm_result = NULL; 2364 } 2365 spin_unlock_irqrestore(ap->lock, flags); 2366 } 2367