1 /* 2 * libata-eh.c - libata error handling 3 * 4 * Maintained by: Jeff Garzik <jgarzik@pobox.com> 5 * Please ALWAYS copy linux-ide@vger.kernel.org 6 * on emails. 7 * 8 * Copyright 2006 Tejun Heo <htejun@gmail.com> 9 * 10 * 11 * This program is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU General Public License as 13 * published by the Free Software Foundation; either version 2, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; see the file COPYING. If not, write to 23 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, 24 * USA. 25 * 26 * 27 * libata documentation is available via 'make {ps|pdf}docs', 28 * as Documentation/DocBook/libata.* 29 * 30 * Hardware documentation available from http://www.t13.org/ and 31 * http://www.sata-io.org/ 32 * 33 */ 34 35 #include <linux/kernel.h> 36 #include <scsi/scsi.h> 37 #include <scsi/scsi_host.h> 38 #include <scsi/scsi_eh.h> 39 #include <scsi/scsi_device.h> 40 #include <scsi/scsi_cmnd.h> 41 #include "../scsi/scsi_transport_api.h" 42 43 #include <linux/libata.h> 44 45 #include "libata.h" 46 47 enum { 48 ATA_EH_SPDN_NCQ_OFF = (1 << 0), 49 ATA_EH_SPDN_SPEED_DOWN = (1 << 1), 50 ATA_EH_SPDN_FALLBACK_TO_PIO = (1 << 2), 51 }; 52 53 /* Waiting in ->prereset can never be reliable. It's sometimes nice 54 * to wait there but it can't be depended upon; otherwise, we wouldn't 55 * be resetting. Just give it enough time for most drives to spin up. 56 */ 57 enum { 58 ATA_EH_PRERESET_TIMEOUT = 10 * HZ, 59 ATA_EH_FASTDRAIN_INTERVAL = 3 * HZ, 60 }; 61 62 /* The following table determines how we sequence resets. Each entry 63 * represents timeout for that try. The first try can be soft or 64 * hardreset. All others are hardreset if available. In most cases 65 * the first reset w/ 10sec timeout should succeed. Following entries 66 * are mostly for error handling, hotplug and retarded devices. 67 */ 68 static const unsigned long ata_eh_reset_timeouts[] = { 69 10 * HZ, /* most drives spin up by 10sec */ 70 10 * HZ, /* > 99% working drives spin up before 20sec */ 71 35 * HZ, /* give > 30 secs of idleness for retarded devices */ 72 5 * HZ, /* and sweet one last chance */ 73 /* > 1 min has elapsed, give up */ 74 }; 75 76 static void __ata_port_freeze(struct ata_port *ap); 77 static void ata_eh_finish(struct ata_port *ap); 78 #ifdef CONFIG_PM 79 static void ata_eh_handle_port_suspend(struct ata_port *ap); 80 static void ata_eh_handle_port_resume(struct ata_port *ap); 81 #else /* CONFIG_PM */ 82 static void ata_eh_handle_port_suspend(struct ata_port *ap) 83 { } 84 85 static void ata_eh_handle_port_resume(struct ata_port *ap) 86 { } 87 #endif /* CONFIG_PM */ 88 89 static void __ata_ehi_pushv_desc(struct ata_eh_info *ehi, const char *fmt, 90 va_list args) 91 { 92 ehi->desc_len += vscnprintf(ehi->desc + ehi->desc_len, 93 ATA_EH_DESC_LEN - ehi->desc_len, 94 fmt, args); 95 } 96 97 /** 98 * __ata_ehi_push_desc - push error description without adding separator 99 * @ehi: target EHI 100 * @fmt: printf format string 101 * 102 * Format string according to @fmt and append it to @ehi->desc. 103 * 104 * LOCKING: 105 * spin_lock_irqsave(host lock) 106 */ 107 void __ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 108 { 109 va_list args; 110 111 va_start(args, fmt); 112 __ata_ehi_pushv_desc(ehi, fmt, args); 113 va_end(args); 114 } 115 116 /** 117 * ata_ehi_push_desc - push error description with separator 118 * @ehi: target EHI 119 * @fmt: printf format string 120 * 121 * Format string according to @fmt and append it to @ehi->desc. 122 * If @ehi->desc is not empty, ", " is added in-between. 123 * 124 * LOCKING: 125 * spin_lock_irqsave(host lock) 126 */ 127 void ata_ehi_push_desc(struct ata_eh_info *ehi, const char *fmt, ...) 128 { 129 va_list args; 130 131 if (ehi->desc_len) 132 __ata_ehi_push_desc(ehi, ", "); 133 134 va_start(args, fmt); 135 __ata_ehi_pushv_desc(ehi, fmt, args); 136 va_end(args); 137 } 138 139 /** 140 * ata_ehi_clear_desc - clean error description 141 * @ehi: target EHI 142 * 143 * Clear @ehi->desc. 144 * 145 * LOCKING: 146 * spin_lock_irqsave(host lock) 147 */ 148 void ata_ehi_clear_desc(struct ata_eh_info *ehi) 149 { 150 ehi->desc[0] = '\0'; 151 ehi->desc_len = 0; 152 } 153 154 static void ata_ering_record(struct ata_ering *ering, int is_io, 155 unsigned int err_mask) 156 { 157 struct ata_ering_entry *ent; 158 159 WARN_ON(!err_mask); 160 161 ering->cursor++; 162 ering->cursor %= ATA_ERING_SIZE; 163 164 ent = &ering->ring[ering->cursor]; 165 ent->is_io = is_io; 166 ent->err_mask = err_mask; 167 ent->timestamp = get_jiffies_64(); 168 } 169 170 static void ata_ering_clear(struct ata_ering *ering) 171 { 172 memset(ering, 0, sizeof(*ering)); 173 } 174 175 static int ata_ering_map(struct ata_ering *ering, 176 int (*map_fn)(struct ata_ering_entry *, void *), 177 void *arg) 178 { 179 int idx, rc = 0; 180 struct ata_ering_entry *ent; 181 182 idx = ering->cursor; 183 do { 184 ent = &ering->ring[idx]; 185 if (!ent->err_mask) 186 break; 187 rc = map_fn(ent, arg); 188 if (rc) 189 break; 190 idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE; 191 } while (idx != ering->cursor); 192 193 return rc; 194 } 195 196 static unsigned int ata_eh_dev_action(struct ata_device *dev) 197 { 198 struct ata_eh_context *ehc = &dev->ap->eh_context; 199 200 return ehc->i.action | ehc->i.dev_action[dev->devno]; 201 } 202 203 static void ata_eh_clear_action(struct ata_device *dev, 204 struct ata_eh_info *ehi, unsigned int action) 205 { 206 int i; 207 208 if (!dev) { 209 ehi->action &= ~action; 210 for (i = 0; i < ATA_MAX_DEVICES; i++) 211 ehi->dev_action[i] &= ~action; 212 } else { 213 /* doesn't make sense for port-wide EH actions */ 214 WARN_ON(!(action & ATA_EH_PERDEV_MASK)); 215 216 /* break ehi->action into ehi->dev_action */ 217 if (ehi->action & action) { 218 for (i = 0; i < ATA_MAX_DEVICES; i++) 219 ehi->dev_action[i] |= ehi->action & action; 220 ehi->action &= ~action; 221 } 222 223 /* turn off the specified per-dev action */ 224 ehi->dev_action[dev->devno] &= ~action; 225 } 226 } 227 228 /** 229 * ata_scsi_timed_out - SCSI layer time out callback 230 * @cmd: timed out SCSI command 231 * 232 * Handles SCSI layer timeout. We race with normal completion of 233 * the qc for @cmd. If the qc is already gone, we lose and let 234 * the scsi command finish (EH_HANDLED). Otherwise, the qc has 235 * timed out and EH should be invoked. Prevent ata_qc_complete() 236 * from finishing it by setting EH_SCHEDULED and return 237 * EH_NOT_HANDLED. 238 * 239 * TODO: kill this function once old EH is gone. 240 * 241 * LOCKING: 242 * Called from timer context 243 * 244 * RETURNS: 245 * EH_HANDLED or EH_NOT_HANDLED 246 */ 247 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd) 248 { 249 struct Scsi_Host *host = cmd->device->host; 250 struct ata_port *ap = ata_shost_to_port(host); 251 unsigned long flags; 252 struct ata_queued_cmd *qc; 253 enum scsi_eh_timer_return ret; 254 255 DPRINTK("ENTER\n"); 256 257 if (ap->ops->error_handler) { 258 ret = EH_NOT_HANDLED; 259 goto out; 260 } 261 262 ret = EH_HANDLED; 263 spin_lock_irqsave(ap->lock, flags); 264 qc = ata_qc_from_tag(ap, ap->active_tag); 265 if (qc) { 266 WARN_ON(qc->scsicmd != cmd); 267 qc->flags |= ATA_QCFLAG_EH_SCHEDULED; 268 qc->err_mask |= AC_ERR_TIMEOUT; 269 ret = EH_NOT_HANDLED; 270 } 271 spin_unlock_irqrestore(ap->lock, flags); 272 273 out: 274 DPRINTK("EXIT, ret=%d\n", ret); 275 return ret; 276 } 277 278 /** 279 * ata_scsi_error - SCSI layer error handler callback 280 * @host: SCSI host on which error occurred 281 * 282 * Handles SCSI-layer-thrown error events. 283 * 284 * LOCKING: 285 * Inherited from SCSI layer (none, can sleep) 286 * 287 * RETURNS: 288 * Zero. 289 */ 290 void ata_scsi_error(struct Scsi_Host *host) 291 { 292 struct ata_port *ap = ata_shost_to_port(host); 293 int i, repeat_cnt = ATA_EH_MAX_REPEAT; 294 unsigned long flags; 295 296 DPRINTK("ENTER\n"); 297 298 /* synchronize with port task */ 299 ata_port_flush_task(ap); 300 301 /* synchronize with host lock and sort out timeouts */ 302 303 /* For new EH, all qcs are finished in one of three ways - 304 * normal completion, error completion, and SCSI timeout. 305 * Both cmpletions can race against SCSI timeout. When normal 306 * completion wins, the qc never reaches EH. When error 307 * completion wins, the qc has ATA_QCFLAG_FAILED set. 308 * 309 * When SCSI timeout wins, things are a bit more complex. 310 * Normal or error completion can occur after the timeout but 311 * before this point. In such cases, both types of 312 * completions are honored. A scmd is determined to have 313 * timed out iff its associated qc is active and not failed. 314 */ 315 if (ap->ops->error_handler) { 316 struct scsi_cmnd *scmd, *tmp; 317 int nr_timedout = 0; 318 319 spin_lock_irqsave(ap->lock, flags); 320 321 list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) { 322 struct ata_queued_cmd *qc; 323 324 for (i = 0; i < ATA_MAX_QUEUE; i++) { 325 qc = __ata_qc_from_tag(ap, i); 326 if (qc->flags & ATA_QCFLAG_ACTIVE && 327 qc->scsicmd == scmd) 328 break; 329 } 330 331 if (i < ATA_MAX_QUEUE) { 332 /* the scmd has an associated qc */ 333 if (!(qc->flags & ATA_QCFLAG_FAILED)) { 334 /* which hasn't failed yet, timeout */ 335 qc->err_mask |= AC_ERR_TIMEOUT; 336 qc->flags |= ATA_QCFLAG_FAILED; 337 nr_timedout++; 338 } 339 } else { 340 /* Normal completion occurred after 341 * SCSI timeout but before this point. 342 * Successfully complete it. 343 */ 344 scmd->retries = scmd->allowed; 345 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 346 } 347 } 348 349 /* If we have timed out qcs. They belong to EH from 350 * this point but the state of the controller is 351 * unknown. Freeze the port to make sure the IRQ 352 * handler doesn't diddle with those qcs. This must 353 * be done atomically w.r.t. setting QCFLAG_FAILED. 354 */ 355 if (nr_timedout) 356 __ata_port_freeze(ap); 357 358 spin_unlock_irqrestore(ap->lock, flags); 359 } else 360 spin_unlock_wait(ap->lock); 361 362 repeat: 363 /* invoke error handler */ 364 if (ap->ops->error_handler) { 365 /* kill fast drain timer */ 366 del_timer_sync(&ap->fastdrain_timer); 367 368 /* process port resume request */ 369 ata_eh_handle_port_resume(ap); 370 371 /* fetch & clear EH info */ 372 spin_lock_irqsave(ap->lock, flags); 373 374 memset(&ap->eh_context, 0, sizeof(ap->eh_context)); 375 ap->eh_context.i = ap->eh_info; 376 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 377 378 ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS; 379 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 380 381 spin_unlock_irqrestore(ap->lock, flags); 382 383 /* invoke EH, skip if unloading or suspended */ 384 if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED))) 385 ap->ops->error_handler(ap); 386 else 387 ata_eh_finish(ap); 388 389 /* process port suspend request */ 390 ata_eh_handle_port_suspend(ap); 391 392 /* Exception might have happend after ->error_handler 393 * recovered the port but before this point. Repeat 394 * EH in such case. 395 */ 396 spin_lock_irqsave(ap->lock, flags); 397 398 if (ap->pflags & ATA_PFLAG_EH_PENDING) { 399 if (--repeat_cnt) { 400 ata_port_printk(ap, KERN_INFO, 401 "EH pending after completion, " 402 "repeating EH (cnt=%d)\n", repeat_cnt); 403 spin_unlock_irqrestore(ap->lock, flags); 404 goto repeat; 405 } 406 ata_port_printk(ap, KERN_ERR, "EH pending after %d " 407 "tries, giving up\n", ATA_EH_MAX_REPEAT); 408 ap->pflags &= ~ATA_PFLAG_EH_PENDING; 409 } 410 411 /* this run is complete, make sure EH info is clear */ 412 memset(&ap->eh_info, 0, sizeof(ap->eh_info)); 413 414 /* Clear host_eh_scheduled while holding ap->lock such 415 * that if exception occurs after this point but 416 * before EH completion, SCSI midlayer will 417 * re-initiate EH. 418 */ 419 host->host_eh_scheduled = 0; 420 421 spin_unlock_irqrestore(ap->lock, flags); 422 } else { 423 WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL); 424 ap->ops->eng_timeout(ap); 425 } 426 427 /* finish or retry handled scmd's and clean up */ 428 WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q)); 429 430 scsi_eh_flush_done_q(&ap->eh_done_q); 431 432 /* clean up */ 433 spin_lock_irqsave(ap->lock, flags); 434 435 if (ap->pflags & ATA_PFLAG_LOADING) 436 ap->pflags &= ~ATA_PFLAG_LOADING; 437 else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG) 438 queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0); 439 440 if (ap->pflags & ATA_PFLAG_RECOVERED) 441 ata_port_printk(ap, KERN_INFO, "EH complete\n"); 442 443 ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED); 444 445 /* tell wait_eh that we're done */ 446 ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS; 447 wake_up_all(&ap->eh_wait_q); 448 449 spin_unlock_irqrestore(ap->lock, flags); 450 451 DPRINTK("EXIT\n"); 452 } 453 454 /** 455 * ata_port_wait_eh - Wait for the currently pending EH to complete 456 * @ap: Port to wait EH for 457 * 458 * Wait until the currently pending EH is complete. 459 * 460 * LOCKING: 461 * Kernel thread context (may sleep). 462 */ 463 void ata_port_wait_eh(struct ata_port *ap) 464 { 465 unsigned long flags; 466 DEFINE_WAIT(wait); 467 468 retry: 469 spin_lock_irqsave(ap->lock, flags); 470 471 while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) { 472 prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE); 473 spin_unlock_irqrestore(ap->lock, flags); 474 schedule(); 475 spin_lock_irqsave(ap->lock, flags); 476 } 477 finish_wait(&ap->eh_wait_q, &wait); 478 479 spin_unlock_irqrestore(ap->lock, flags); 480 481 /* make sure SCSI EH is complete */ 482 if (scsi_host_in_recovery(ap->scsi_host)) { 483 msleep(10); 484 goto retry; 485 } 486 } 487 488 /** 489 * ata_qc_timeout - Handle timeout of queued command 490 * @qc: Command that timed out 491 * 492 * Some part of the kernel (currently, only the SCSI layer) 493 * has noticed that the active command on port @ap has not 494 * completed after a specified length of time. Handle this 495 * condition by disabling DMA (if necessary) and completing 496 * transactions, with error if necessary. 497 * 498 * This also handles the case of the "lost interrupt", where 499 * for some reason (possibly hardware bug, possibly driver bug) 500 * an interrupt was not delivered to the driver, even though the 501 * transaction completed successfully. 502 * 503 * TODO: kill this function once old EH is gone. 504 * 505 * LOCKING: 506 * Inherited from SCSI layer (none, can sleep) 507 */ 508 static void ata_qc_timeout(struct ata_queued_cmd *qc) 509 { 510 struct ata_port *ap = qc->ap; 511 u8 host_stat = 0, drv_stat; 512 unsigned long flags; 513 514 DPRINTK("ENTER\n"); 515 516 ap->hsm_task_state = HSM_ST_IDLE; 517 518 spin_lock_irqsave(ap->lock, flags); 519 520 switch (qc->tf.protocol) { 521 522 case ATA_PROT_DMA: 523 case ATA_PROT_ATAPI_DMA: 524 host_stat = ap->ops->bmdma_status(ap); 525 526 /* before we do anything else, clear DMA-Start bit */ 527 ap->ops->bmdma_stop(qc); 528 529 /* fall through */ 530 531 default: 532 ata_altstatus(ap); 533 drv_stat = ata_chk_status(ap); 534 535 /* ack bmdma irq events */ 536 ap->ops->irq_clear(ap); 537 538 ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, " 539 "stat 0x%x host_stat 0x%x\n", 540 qc->tf.command, drv_stat, host_stat); 541 542 /* complete taskfile transaction */ 543 qc->err_mask |= AC_ERR_TIMEOUT; 544 break; 545 } 546 547 spin_unlock_irqrestore(ap->lock, flags); 548 549 ata_eh_qc_complete(qc); 550 551 DPRINTK("EXIT\n"); 552 } 553 554 /** 555 * ata_eng_timeout - Handle timeout of queued command 556 * @ap: Port on which timed-out command is active 557 * 558 * Some part of the kernel (currently, only the SCSI layer) 559 * has noticed that the active command on port @ap has not 560 * completed after a specified length of time. Handle this 561 * condition by disabling DMA (if necessary) and completing 562 * transactions, with error if necessary. 563 * 564 * This also handles the case of the "lost interrupt", where 565 * for some reason (possibly hardware bug, possibly driver bug) 566 * an interrupt was not delivered to the driver, even though the 567 * transaction completed successfully. 568 * 569 * TODO: kill this function once old EH is gone. 570 * 571 * LOCKING: 572 * Inherited from SCSI layer (none, can sleep) 573 */ 574 void ata_eng_timeout(struct ata_port *ap) 575 { 576 DPRINTK("ENTER\n"); 577 578 ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag)); 579 580 DPRINTK("EXIT\n"); 581 } 582 583 static int ata_eh_nr_in_flight(struct ata_port *ap) 584 { 585 unsigned int tag; 586 int nr = 0; 587 588 /* count only non-internal commands */ 589 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) 590 if (ata_qc_from_tag(ap, tag)) 591 nr++; 592 593 return nr; 594 } 595 596 void ata_eh_fastdrain_timerfn(unsigned long arg) 597 { 598 struct ata_port *ap = (void *)arg; 599 unsigned long flags; 600 int cnt; 601 602 spin_lock_irqsave(ap->lock, flags); 603 604 cnt = ata_eh_nr_in_flight(ap); 605 606 /* are we done? */ 607 if (!cnt) 608 goto out_unlock; 609 610 if (cnt == ap->fastdrain_cnt) { 611 unsigned int tag; 612 613 /* No progress during the last interval, tag all 614 * in-flight qcs as timed out and freeze the port. 615 */ 616 for (tag = 0; tag < ATA_MAX_QUEUE - 1; tag++) { 617 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 618 if (qc) 619 qc->err_mask |= AC_ERR_TIMEOUT; 620 } 621 622 ata_port_freeze(ap); 623 } else { 624 /* some qcs have finished, give it another chance */ 625 ap->fastdrain_cnt = cnt; 626 ap->fastdrain_timer.expires = 627 jiffies + ATA_EH_FASTDRAIN_INTERVAL; 628 add_timer(&ap->fastdrain_timer); 629 } 630 631 out_unlock: 632 spin_unlock_irqrestore(ap->lock, flags); 633 } 634 635 /** 636 * ata_eh_set_pending - set ATA_PFLAG_EH_PENDING and activate fast drain 637 * @ap: target ATA port 638 * @fastdrain: activate fast drain 639 * 640 * Set ATA_PFLAG_EH_PENDING and activate fast drain if @fastdrain 641 * is non-zero and EH wasn't pending before. Fast drain ensures 642 * that EH kicks in in timely manner. 643 * 644 * LOCKING: 645 * spin_lock_irqsave(host lock) 646 */ 647 static void ata_eh_set_pending(struct ata_port *ap, int fastdrain) 648 { 649 int cnt; 650 651 /* already scheduled? */ 652 if (ap->pflags & ATA_PFLAG_EH_PENDING) 653 return; 654 655 ap->pflags |= ATA_PFLAG_EH_PENDING; 656 657 if (!fastdrain) 658 return; 659 660 /* do we have in-flight qcs? */ 661 cnt = ata_eh_nr_in_flight(ap); 662 if (!cnt) 663 return; 664 665 /* activate fast drain */ 666 ap->fastdrain_cnt = cnt; 667 ap->fastdrain_timer.expires = jiffies + ATA_EH_FASTDRAIN_INTERVAL; 668 add_timer(&ap->fastdrain_timer); 669 } 670 671 /** 672 * ata_qc_schedule_eh - schedule qc for error handling 673 * @qc: command to schedule error handling for 674 * 675 * Schedule error handling for @qc. EH will kick in as soon as 676 * other commands are drained. 677 * 678 * LOCKING: 679 * spin_lock_irqsave(host lock) 680 */ 681 void ata_qc_schedule_eh(struct ata_queued_cmd *qc) 682 { 683 struct ata_port *ap = qc->ap; 684 685 WARN_ON(!ap->ops->error_handler); 686 687 qc->flags |= ATA_QCFLAG_FAILED; 688 ata_eh_set_pending(ap, 1); 689 690 /* The following will fail if timeout has already expired. 691 * ata_scsi_error() takes care of such scmds on EH entry. 692 * Note that ATA_QCFLAG_FAILED is unconditionally set after 693 * this function completes. 694 */ 695 scsi_req_abort_cmd(qc->scsicmd); 696 } 697 698 /** 699 * ata_port_schedule_eh - schedule error handling without a qc 700 * @ap: ATA port to schedule EH for 701 * 702 * Schedule error handling for @ap. EH will kick in as soon as 703 * all commands are drained. 704 * 705 * LOCKING: 706 * spin_lock_irqsave(host lock) 707 */ 708 void ata_port_schedule_eh(struct ata_port *ap) 709 { 710 WARN_ON(!ap->ops->error_handler); 711 712 if (ap->pflags & ATA_PFLAG_INITIALIZING) 713 return; 714 715 ata_eh_set_pending(ap, 1); 716 scsi_schedule_eh(ap->scsi_host); 717 718 DPRINTK("port EH scheduled\n"); 719 } 720 721 /** 722 * ata_port_abort - abort all qc's on the port 723 * @ap: ATA port to abort qc's for 724 * 725 * Abort all active qc's of @ap and schedule EH. 726 * 727 * LOCKING: 728 * spin_lock_irqsave(host lock) 729 * 730 * RETURNS: 731 * Number of aborted qc's. 732 */ 733 int ata_port_abort(struct ata_port *ap) 734 { 735 int tag, nr_aborted = 0; 736 737 WARN_ON(!ap->ops->error_handler); 738 739 /* we're gonna abort all commands, no need for fast drain */ 740 ata_eh_set_pending(ap, 0); 741 742 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 743 struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag); 744 745 if (qc) { 746 qc->flags |= ATA_QCFLAG_FAILED; 747 ata_qc_complete(qc); 748 nr_aborted++; 749 } 750 } 751 752 if (!nr_aborted) 753 ata_port_schedule_eh(ap); 754 755 return nr_aborted; 756 } 757 758 /** 759 * __ata_port_freeze - freeze port 760 * @ap: ATA port to freeze 761 * 762 * This function is called when HSM violation or some other 763 * condition disrupts normal operation of the port. Frozen port 764 * is not allowed to perform any operation until the port is 765 * thawed, which usually follows a successful reset. 766 * 767 * ap->ops->freeze() callback can be used for freezing the port 768 * hardware-wise (e.g. mask interrupt and stop DMA engine). If a 769 * port cannot be frozen hardware-wise, the interrupt handler 770 * must ack and clear interrupts unconditionally while the port 771 * is frozen. 772 * 773 * LOCKING: 774 * spin_lock_irqsave(host lock) 775 */ 776 static void __ata_port_freeze(struct ata_port *ap) 777 { 778 WARN_ON(!ap->ops->error_handler); 779 780 if (ap->ops->freeze) 781 ap->ops->freeze(ap); 782 783 ap->pflags |= ATA_PFLAG_FROZEN; 784 785 DPRINTK("ata%u port frozen\n", ap->print_id); 786 } 787 788 /** 789 * ata_port_freeze - abort & freeze port 790 * @ap: ATA port to freeze 791 * 792 * Abort and freeze @ap. 793 * 794 * LOCKING: 795 * spin_lock_irqsave(host lock) 796 * 797 * RETURNS: 798 * Number of aborted commands. 799 */ 800 int ata_port_freeze(struct ata_port *ap) 801 { 802 int nr_aborted; 803 804 WARN_ON(!ap->ops->error_handler); 805 806 nr_aborted = ata_port_abort(ap); 807 __ata_port_freeze(ap); 808 809 return nr_aborted; 810 } 811 812 /** 813 * ata_eh_freeze_port - EH helper to freeze port 814 * @ap: ATA port to freeze 815 * 816 * Freeze @ap. 817 * 818 * LOCKING: 819 * None. 820 */ 821 void ata_eh_freeze_port(struct ata_port *ap) 822 { 823 unsigned long flags; 824 825 if (!ap->ops->error_handler) 826 return; 827 828 spin_lock_irqsave(ap->lock, flags); 829 __ata_port_freeze(ap); 830 spin_unlock_irqrestore(ap->lock, flags); 831 } 832 833 /** 834 * ata_port_thaw_port - EH helper to thaw port 835 * @ap: ATA port to thaw 836 * 837 * Thaw frozen port @ap. 838 * 839 * LOCKING: 840 * None. 841 */ 842 void ata_eh_thaw_port(struct ata_port *ap) 843 { 844 unsigned long flags; 845 846 if (!ap->ops->error_handler) 847 return; 848 849 spin_lock_irqsave(ap->lock, flags); 850 851 ap->pflags &= ~ATA_PFLAG_FROZEN; 852 853 if (ap->ops->thaw) 854 ap->ops->thaw(ap); 855 856 spin_unlock_irqrestore(ap->lock, flags); 857 858 DPRINTK("ata%u port thawed\n", ap->print_id); 859 } 860 861 static void ata_eh_scsidone(struct scsi_cmnd *scmd) 862 { 863 /* nada */ 864 } 865 866 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc) 867 { 868 struct ata_port *ap = qc->ap; 869 struct scsi_cmnd *scmd = qc->scsicmd; 870 unsigned long flags; 871 872 spin_lock_irqsave(ap->lock, flags); 873 qc->scsidone = ata_eh_scsidone; 874 __ata_qc_complete(qc); 875 WARN_ON(ata_tag_valid(qc->tag)); 876 spin_unlock_irqrestore(ap->lock, flags); 877 878 scsi_eh_finish_cmd(scmd, &ap->eh_done_q); 879 } 880 881 /** 882 * ata_eh_qc_complete - Complete an active ATA command from EH 883 * @qc: Command to complete 884 * 885 * Indicate to the mid and upper layers that an ATA command has 886 * completed. To be used from EH. 887 */ 888 void ata_eh_qc_complete(struct ata_queued_cmd *qc) 889 { 890 struct scsi_cmnd *scmd = qc->scsicmd; 891 scmd->retries = scmd->allowed; 892 __ata_eh_qc_complete(qc); 893 } 894 895 /** 896 * ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH 897 * @qc: Command to retry 898 * 899 * Indicate to the mid and upper layers that an ATA command 900 * should be retried. To be used from EH. 901 * 902 * SCSI midlayer limits the number of retries to scmd->allowed. 903 * scmd->retries is decremented for commands which get retried 904 * due to unrelated failures (qc->err_mask is zero). 905 */ 906 void ata_eh_qc_retry(struct ata_queued_cmd *qc) 907 { 908 struct scsi_cmnd *scmd = qc->scsicmd; 909 if (!qc->err_mask && scmd->retries) 910 scmd->retries--; 911 __ata_eh_qc_complete(qc); 912 } 913 914 /** 915 * ata_eh_detach_dev - detach ATA device 916 * @dev: ATA device to detach 917 * 918 * Detach @dev. 919 * 920 * LOCKING: 921 * None. 922 */ 923 static void ata_eh_detach_dev(struct ata_device *dev) 924 { 925 struct ata_port *ap = dev->ap; 926 unsigned long flags; 927 928 ata_dev_disable(dev); 929 930 spin_lock_irqsave(ap->lock, flags); 931 932 dev->flags &= ~ATA_DFLAG_DETACH; 933 934 if (ata_scsi_offline_dev(dev)) { 935 dev->flags |= ATA_DFLAG_DETACHED; 936 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 937 } 938 939 /* clear per-dev EH actions */ 940 ata_eh_clear_action(dev, &ap->eh_info, ATA_EH_PERDEV_MASK); 941 ata_eh_clear_action(dev, &ap->eh_context.i, ATA_EH_PERDEV_MASK); 942 943 spin_unlock_irqrestore(ap->lock, flags); 944 } 945 946 /** 947 * ata_eh_about_to_do - about to perform eh_action 948 * @ap: target ATA port 949 * @dev: target ATA dev for per-dev action (can be NULL) 950 * @action: action about to be performed 951 * 952 * Called just before performing EH actions to clear related bits 953 * in @ap->eh_info such that eh actions are not unnecessarily 954 * repeated. 955 * 956 * LOCKING: 957 * None. 958 */ 959 static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev, 960 unsigned int action) 961 { 962 unsigned long flags; 963 struct ata_eh_info *ehi = &ap->eh_info; 964 struct ata_eh_context *ehc = &ap->eh_context; 965 966 spin_lock_irqsave(ap->lock, flags); 967 968 /* Reset is represented by combination of actions and EHI 969 * flags. Suck in all related bits before clearing eh_info to 970 * avoid losing requested action. 971 */ 972 if (action & ATA_EH_RESET_MASK) { 973 ehc->i.action |= ehi->action & ATA_EH_RESET_MASK; 974 ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK; 975 976 /* make sure all reset actions are cleared & clear EHI flags */ 977 action |= ATA_EH_RESET_MASK; 978 ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 979 } 980 981 ata_eh_clear_action(dev, ehi, action); 982 983 if (!(ehc->i.flags & ATA_EHI_QUIET)) 984 ap->pflags |= ATA_PFLAG_RECOVERED; 985 986 spin_unlock_irqrestore(ap->lock, flags); 987 } 988 989 /** 990 * ata_eh_done - EH action complete 991 * @ap: target ATA port 992 * @dev: target ATA dev for per-dev action (can be NULL) 993 * @action: action just completed 994 * 995 * Called right after performing EH actions to clear related bits 996 * in @ap->eh_context. 997 * 998 * LOCKING: 999 * None. 1000 */ 1001 static void ata_eh_done(struct ata_port *ap, struct ata_device *dev, 1002 unsigned int action) 1003 { 1004 /* if reset is complete, clear all reset actions & reset modifier */ 1005 if (action & ATA_EH_RESET_MASK) { 1006 action |= ATA_EH_RESET_MASK; 1007 ap->eh_context.i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK; 1008 } 1009 1010 ata_eh_clear_action(dev, &ap->eh_context.i, action); 1011 } 1012 1013 /** 1014 * ata_err_string - convert err_mask to descriptive string 1015 * @err_mask: error mask to convert to string 1016 * 1017 * Convert @err_mask to descriptive string. Errors are 1018 * prioritized according to severity and only the most severe 1019 * error is reported. 1020 * 1021 * LOCKING: 1022 * None. 1023 * 1024 * RETURNS: 1025 * Descriptive string for @err_mask 1026 */ 1027 static const char * ata_err_string(unsigned int err_mask) 1028 { 1029 if (err_mask & AC_ERR_HOST_BUS) 1030 return "host bus error"; 1031 if (err_mask & AC_ERR_ATA_BUS) 1032 return "ATA bus error"; 1033 if (err_mask & AC_ERR_TIMEOUT) 1034 return "timeout"; 1035 if (err_mask & AC_ERR_HSM) 1036 return "HSM violation"; 1037 if (err_mask & AC_ERR_SYSTEM) 1038 return "internal error"; 1039 if (err_mask & AC_ERR_MEDIA) 1040 return "media error"; 1041 if (err_mask & AC_ERR_INVALID) 1042 return "invalid argument"; 1043 if (err_mask & AC_ERR_DEV) 1044 return "device error"; 1045 return "unknown error"; 1046 } 1047 1048 /** 1049 * ata_read_log_page - read a specific log page 1050 * @dev: target device 1051 * @page: page to read 1052 * @buf: buffer to store read page 1053 * @sectors: number of sectors to read 1054 * 1055 * Read log page using READ_LOG_EXT command. 1056 * 1057 * LOCKING: 1058 * Kernel thread context (may sleep). 1059 * 1060 * RETURNS: 1061 * 0 on success, AC_ERR_* mask otherwise. 1062 */ 1063 static unsigned int ata_read_log_page(struct ata_device *dev, 1064 u8 page, void *buf, unsigned int sectors) 1065 { 1066 struct ata_taskfile tf; 1067 unsigned int err_mask; 1068 1069 DPRINTK("read log page - page %d\n", page); 1070 1071 ata_tf_init(dev, &tf); 1072 tf.command = ATA_CMD_READ_LOG_EXT; 1073 tf.lbal = page; 1074 tf.nsect = sectors; 1075 tf.hob_nsect = sectors >> 8; 1076 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE; 1077 tf.protocol = ATA_PROT_PIO; 1078 1079 err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE, 1080 buf, sectors * ATA_SECT_SIZE); 1081 1082 DPRINTK("EXIT, err_mask=%x\n", err_mask); 1083 return err_mask; 1084 } 1085 1086 /** 1087 * ata_eh_read_log_10h - Read log page 10h for NCQ error details 1088 * @dev: Device to read log page 10h from 1089 * @tag: Resulting tag of the failed command 1090 * @tf: Resulting taskfile registers of the failed command 1091 * 1092 * Read log page 10h to obtain NCQ error details and clear error 1093 * condition. 1094 * 1095 * LOCKING: 1096 * Kernel thread context (may sleep). 1097 * 1098 * RETURNS: 1099 * 0 on success, -errno otherwise. 1100 */ 1101 static int ata_eh_read_log_10h(struct ata_device *dev, 1102 int *tag, struct ata_taskfile *tf) 1103 { 1104 u8 *buf = dev->ap->sector_buf; 1105 unsigned int err_mask; 1106 u8 csum; 1107 int i; 1108 1109 err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1); 1110 if (err_mask) 1111 return -EIO; 1112 1113 csum = 0; 1114 for (i = 0; i < ATA_SECT_SIZE; i++) 1115 csum += buf[i]; 1116 if (csum) 1117 ata_dev_printk(dev, KERN_WARNING, 1118 "invalid checksum 0x%x on log page 10h\n", csum); 1119 1120 if (buf[0] & 0x80) 1121 return -ENOENT; 1122 1123 *tag = buf[0] & 0x1f; 1124 1125 tf->command = buf[2]; 1126 tf->feature = buf[3]; 1127 tf->lbal = buf[4]; 1128 tf->lbam = buf[5]; 1129 tf->lbah = buf[6]; 1130 tf->device = buf[7]; 1131 tf->hob_lbal = buf[8]; 1132 tf->hob_lbam = buf[9]; 1133 tf->hob_lbah = buf[10]; 1134 tf->nsect = buf[12]; 1135 tf->hob_nsect = buf[13]; 1136 1137 return 0; 1138 } 1139 1140 /** 1141 * atapi_eh_request_sense - perform ATAPI REQUEST_SENSE 1142 * @dev: device to perform REQUEST_SENSE to 1143 * @sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long) 1144 * 1145 * Perform ATAPI REQUEST_SENSE after the device reported CHECK 1146 * SENSE. This function is EH helper. 1147 * 1148 * LOCKING: 1149 * Kernel thread context (may sleep). 1150 * 1151 * RETURNS: 1152 * 0 on success, AC_ERR_* mask on failure 1153 */ 1154 static unsigned int atapi_eh_request_sense(struct ata_queued_cmd *qc) 1155 { 1156 struct ata_device *dev = qc->dev; 1157 unsigned char *sense_buf = qc->scsicmd->sense_buffer; 1158 struct ata_port *ap = dev->ap; 1159 struct ata_taskfile tf; 1160 u8 cdb[ATAPI_CDB_LEN]; 1161 1162 DPRINTK("ATAPI request sense\n"); 1163 1164 /* FIXME: is this needed? */ 1165 memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE); 1166 1167 /* initialize sense_buf with the error register, 1168 * for the case where they are -not- overwritten 1169 */ 1170 sense_buf[0] = 0x70; 1171 sense_buf[2] = qc->result_tf.feature >> 4; 1172 1173 /* some devices time out if garbage left in tf */ 1174 ata_tf_init(dev, &tf); 1175 1176 memset(cdb, 0, ATAPI_CDB_LEN); 1177 cdb[0] = REQUEST_SENSE; 1178 cdb[4] = SCSI_SENSE_BUFFERSIZE; 1179 1180 tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE; 1181 tf.command = ATA_CMD_PACKET; 1182 1183 /* is it pointless to prefer PIO for "safety reasons"? */ 1184 if (ap->flags & ATA_FLAG_PIO_DMA) { 1185 tf.protocol = ATA_PROT_ATAPI_DMA; 1186 tf.feature |= ATAPI_PKT_DMA; 1187 } else { 1188 tf.protocol = ATA_PROT_ATAPI; 1189 tf.lbam = (8 * 1024) & 0xff; 1190 tf.lbah = (8 * 1024) >> 8; 1191 } 1192 1193 return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE, 1194 sense_buf, SCSI_SENSE_BUFFERSIZE); 1195 } 1196 1197 /** 1198 * ata_eh_analyze_serror - analyze SError for a failed port 1199 * @ap: ATA port to analyze SError for 1200 * 1201 * Analyze SError if available and further determine cause of 1202 * failure. 1203 * 1204 * LOCKING: 1205 * None. 1206 */ 1207 static void ata_eh_analyze_serror(struct ata_port *ap) 1208 { 1209 struct ata_eh_context *ehc = &ap->eh_context; 1210 u32 serror = ehc->i.serror; 1211 unsigned int err_mask = 0, action = 0; 1212 1213 if (serror & SERR_PERSISTENT) { 1214 err_mask |= AC_ERR_ATA_BUS; 1215 action |= ATA_EH_HARDRESET; 1216 } 1217 if (serror & 1218 (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) { 1219 err_mask |= AC_ERR_ATA_BUS; 1220 action |= ATA_EH_SOFTRESET; 1221 } 1222 if (serror & SERR_PROTOCOL) { 1223 err_mask |= AC_ERR_HSM; 1224 action |= ATA_EH_SOFTRESET; 1225 } 1226 if (serror & SERR_INTERNAL) { 1227 err_mask |= AC_ERR_SYSTEM; 1228 action |= ATA_EH_HARDRESET; 1229 } 1230 if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG)) 1231 ata_ehi_hotplugged(&ehc->i); 1232 1233 ehc->i.err_mask |= err_mask; 1234 ehc->i.action |= action; 1235 } 1236 1237 /** 1238 * ata_eh_analyze_ncq_error - analyze NCQ error 1239 * @ap: ATA port to analyze NCQ error for 1240 * 1241 * Read log page 10h, determine the offending qc and acquire 1242 * error status TF. For NCQ device errors, all LLDDs have to do 1243 * is setting AC_ERR_DEV in ehi->err_mask. This function takes 1244 * care of the rest. 1245 * 1246 * LOCKING: 1247 * Kernel thread context (may sleep). 1248 */ 1249 static void ata_eh_analyze_ncq_error(struct ata_port *ap) 1250 { 1251 struct ata_eh_context *ehc = &ap->eh_context; 1252 struct ata_device *dev = ap->device; 1253 struct ata_queued_cmd *qc; 1254 struct ata_taskfile tf; 1255 int tag, rc; 1256 1257 /* if frozen, we can't do much */ 1258 if (ap->pflags & ATA_PFLAG_FROZEN) 1259 return; 1260 1261 /* is it NCQ device error? */ 1262 if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV)) 1263 return; 1264 1265 /* has LLDD analyzed already? */ 1266 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1267 qc = __ata_qc_from_tag(ap, tag); 1268 1269 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1270 continue; 1271 1272 if (qc->err_mask) 1273 return; 1274 } 1275 1276 /* okay, this error is ours */ 1277 rc = ata_eh_read_log_10h(dev, &tag, &tf); 1278 if (rc) { 1279 ata_port_printk(ap, KERN_ERR, "failed to read log page 10h " 1280 "(errno=%d)\n", rc); 1281 return; 1282 } 1283 1284 if (!(ap->sactive & (1 << tag))) { 1285 ata_port_printk(ap, KERN_ERR, "log page 10h reported " 1286 "inactive tag %d\n", tag); 1287 return; 1288 } 1289 1290 /* we've got the perpetrator, condemn it */ 1291 qc = __ata_qc_from_tag(ap, tag); 1292 memcpy(&qc->result_tf, &tf, sizeof(tf)); 1293 qc->err_mask |= AC_ERR_DEV | AC_ERR_NCQ; 1294 ehc->i.err_mask &= ~AC_ERR_DEV; 1295 } 1296 1297 /** 1298 * ata_eh_analyze_tf - analyze taskfile of a failed qc 1299 * @qc: qc to analyze 1300 * @tf: Taskfile registers to analyze 1301 * 1302 * Analyze taskfile of @qc and further determine cause of 1303 * failure. This function also requests ATAPI sense data if 1304 * avaliable. 1305 * 1306 * LOCKING: 1307 * Kernel thread context (may sleep). 1308 * 1309 * RETURNS: 1310 * Determined recovery action 1311 */ 1312 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc, 1313 const struct ata_taskfile *tf) 1314 { 1315 unsigned int tmp, action = 0; 1316 u8 stat = tf->command, err = tf->feature; 1317 1318 if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) { 1319 qc->err_mask |= AC_ERR_HSM; 1320 return ATA_EH_SOFTRESET; 1321 } 1322 1323 if (stat & (ATA_ERR | ATA_DF)) 1324 qc->err_mask |= AC_ERR_DEV; 1325 else 1326 return 0; 1327 1328 switch (qc->dev->class) { 1329 case ATA_DEV_ATA: 1330 if (err & ATA_ICRC) 1331 qc->err_mask |= AC_ERR_ATA_BUS; 1332 if (err & ATA_UNC) 1333 qc->err_mask |= AC_ERR_MEDIA; 1334 if (err & ATA_IDNF) 1335 qc->err_mask |= AC_ERR_INVALID; 1336 break; 1337 1338 case ATA_DEV_ATAPI: 1339 if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) { 1340 tmp = atapi_eh_request_sense(qc); 1341 if (!tmp) { 1342 /* ATA_QCFLAG_SENSE_VALID is used to 1343 * tell atapi_qc_complete() that sense 1344 * data is already valid. 1345 * 1346 * TODO: interpret sense data and set 1347 * appropriate err_mask. 1348 */ 1349 qc->flags |= ATA_QCFLAG_SENSE_VALID; 1350 } else 1351 qc->err_mask |= tmp; 1352 } 1353 } 1354 1355 if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS)) 1356 action |= ATA_EH_SOFTRESET; 1357 1358 return action; 1359 } 1360 1361 static int ata_eh_categorize_error(int is_io, unsigned int err_mask) 1362 { 1363 if (err_mask & AC_ERR_ATA_BUS) 1364 return 1; 1365 1366 if (err_mask & AC_ERR_TIMEOUT) 1367 return 2; 1368 1369 if (is_io) { 1370 if (err_mask & AC_ERR_HSM) 1371 return 2; 1372 if ((err_mask & 1373 (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV) 1374 return 3; 1375 } 1376 1377 return 0; 1378 } 1379 1380 struct speed_down_verdict_arg { 1381 u64 since; 1382 int nr_errors[4]; 1383 }; 1384 1385 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg) 1386 { 1387 struct speed_down_verdict_arg *arg = void_arg; 1388 int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask); 1389 1390 if (ent->timestamp < arg->since) 1391 return -1; 1392 1393 arg->nr_errors[cat]++; 1394 return 0; 1395 } 1396 1397 /** 1398 * ata_eh_speed_down_verdict - Determine speed down verdict 1399 * @dev: Device of interest 1400 * 1401 * This function examines error ring of @dev and determines 1402 * whether NCQ needs to be turned off, transfer speed should be 1403 * stepped down, or falling back to PIO is necessary. 1404 * 1405 * Cat-1 is ATA_BUS error for any command. 1406 * 1407 * Cat-2 is TIMEOUT for any command or HSM violation for known 1408 * supported commands. 1409 * 1410 * Cat-3 is is unclassified DEV error for known supported 1411 * command. 1412 * 1413 * NCQ needs to be turned off if there have been more than 3 1414 * Cat-2 + Cat-3 errors during last 10 minutes. 1415 * 1416 * Speed down is necessary if there have been more than 3 Cat-1 + 1417 * Cat-2 errors or 10 Cat-3 errors during last 10 minutes. 1418 * 1419 * Falling back to PIO mode is necessary if there have been more 1420 * than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes. 1421 * 1422 * LOCKING: 1423 * Inherited from caller. 1424 * 1425 * RETURNS: 1426 * OR of ATA_EH_SPDN_* flags. 1427 */ 1428 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev) 1429 { 1430 const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ; 1431 u64 j64 = get_jiffies_64(); 1432 struct speed_down_verdict_arg arg; 1433 unsigned int verdict = 0; 1434 1435 /* scan past 10 mins of error history */ 1436 memset(&arg, 0, sizeof(arg)); 1437 arg.since = j64 - min(j64, j10mins); 1438 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1439 1440 if (arg.nr_errors[2] + arg.nr_errors[3] > 3) 1441 verdict |= ATA_EH_SPDN_NCQ_OFF; 1442 if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10) 1443 verdict |= ATA_EH_SPDN_SPEED_DOWN; 1444 1445 /* scan past 3 mins of error history */ 1446 memset(&arg, 0, sizeof(arg)); 1447 arg.since = j64 - min(j64, j5mins); 1448 ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg); 1449 1450 if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10) 1451 verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO; 1452 1453 return verdict; 1454 } 1455 1456 /** 1457 * ata_eh_speed_down - record error and speed down if necessary 1458 * @dev: Failed device 1459 * @is_io: Did the device fail during normal IO? 1460 * @err_mask: err_mask of the error 1461 * 1462 * Record error and examine error history to determine whether 1463 * adjusting transmission speed is necessary. It also sets 1464 * transmission limits appropriately if such adjustment is 1465 * necessary. 1466 * 1467 * LOCKING: 1468 * Kernel thread context (may sleep). 1469 * 1470 * RETURNS: 1471 * Determined recovery action. 1472 */ 1473 static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io, 1474 unsigned int err_mask) 1475 { 1476 unsigned int verdict; 1477 unsigned int action = 0; 1478 1479 /* don't bother if Cat-0 error */ 1480 if (ata_eh_categorize_error(is_io, err_mask) == 0) 1481 return 0; 1482 1483 /* record error and determine whether speed down is necessary */ 1484 ata_ering_record(&dev->ering, is_io, err_mask); 1485 verdict = ata_eh_speed_down_verdict(dev); 1486 1487 /* turn off NCQ? */ 1488 if ((verdict & ATA_EH_SPDN_NCQ_OFF) && 1489 (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ | 1490 ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) { 1491 dev->flags |= ATA_DFLAG_NCQ_OFF; 1492 ata_dev_printk(dev, KERN_WARNING, 1493 "NCQ disabled due to excessive errors\n"); 1494 goto done; 1495 } 1496 1497 /* speed down? */ 1498 if (verdict & ATA_EH_SPDN_SPEED_DOWN) { 1499 /* speed down SATA link speed if possible */ 1500 if (sata_down_spd_limit(dev->ap) == 0) { 1501 action |= ATA_EH_HARDRESET; 1502 goto done; 1503 } 1504 1505 /* lower transfer mode */ 1506 if (dev->spdn_cnt < 2) { 1507 static const int dma_dnxfer_sel[] = 1508 { ATA_DNXFER_DMA, ATA_DNXFER_40C }; 1509 static const int pio_dnxfer_sel[] = 1510 { ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 }; 1511 int sel; 1512 1513 if (dev->xfer_shift != ATA_SHIFT_PIO) 1514 sel = dma_dnxfer_sel[dev->spdn_cnt]; 1515 else 1516 sel = pio_dnxfer_sel[dev->spdn_cnt]; 1517 1518 dev->spdn_cnt++; 1519 1520 if (ata_down_xfermask_limit(dev, sel) == 0) { 1521 action |= ATA_EH_SOFTRESET; 1522 goto done; 1523 } 1524 } 1525 } 1526 1527 /* Fall back to PIO? Slowing down to PIO is meaningless for 1528 * SATA. Consider it only for PATA. 1529 */ 1530 if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) && 1531 (dev->ap->cbl != ATA_CBL_SATA) && 1532 (dev->xfer_shift != ATA_SHIFT_PIO)) { 1533 if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) { 1534 dev->spdn_cnt = 0; 1535 action |= ATA_EH_SOFTRESET; 1536 goto done; 1537 } 1538 } 1539 1540 return 0; 1541 done: 1542 /* device has been slowed down, blow error history */ 1543 ata_ering_clear(&dev->ering); 1544 return action; 1545 } 1546 1547 /** 1548 * ata_eh_autopsy - analyze error and determine recovery action 1549 * @ap: ATA port to perform autopsy on 1550 * 1551 * Analyze why @ap failed and determine which recovery action is 1552 * needed. This function also sets more detailed AC_ERR_* values 1553 * and fills sense data for ATAPI CHECK SENSE. 1554 * 1555 * LOCKING: 1556 * Kernel thread context (may sleep). 1557 */ 1558 static void ata_eh_autopsy(struct ata_port *ap) 1559 { 1560 struct ata_eh_context *ehc = &ap->eh_context; 1561 unsigned int all_err_mask = 0; 1562 int tag, is_io = 0; 1563 u32 serror; 1564 int rc; 1565 1566 DPRINTK("ENTER\n"); 1567 1568 if (ehc->i.flags & ATA_EHI_NO_AUTOPSY) 1569 return; 1570 1571 /* obtain and analyze SError */ 1572 rc = sata_scr_read(ap, SCR_ERROR, &serror); 1573 if (rc == 0) { 1574 ehc->i.serror |= serror; 1575 ata_eh_analyze_serror(ap); 1576 } else if (rc != -EOPNOTSUPP) { 1577 /* SError read failed, force hardreset and probing */ 1578 ata_ehi_schedule_probe(&ehc->i); 1579 ehc->i.action |= ATA_EH_HARDRESET; 1580 ehc->i.err_mask |= AC_ERR_OTHER; 1581 } 1582 1583 /* analyze NCQ failure */ 1584 ata_eh_analyze_ncq_error(ap); 1585 1586 /* any real error trumps AC_ERR_OTHER */ 1587 if (ehc->i.err_mask & ~AC_ERR_OTHER) 1588 ehc->i.err_mask &= ~AC_ERR_OTHER; 1589 1590 all_err_mask |= ehc->i.err_mask; 1591 1592 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1593 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1594 1595 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1596 continue; 1597 1598 /* inherit upper level err_mask */ 1599 qc->err_mask |= ehc->i.err_mask; 1600 1601 /* analyze TF */ 1602 ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf); 1603 1604 /* DEV errors are probably spurious in case of ATA_BUS error */ 1605 if (qc->err_mask & AC_ERR_ATA_BUS) 1606 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA | 1607 AC_ERR_INVALID); 1608 1609 /* any real error trumps unknown error */ 1610 if (qc->err_mask & ~AC_ERR_OTHER) 1611 qc->err_mask &= ~AC_ERR_OTHER; 1612 1613 /* SENSE_VALID trumps dev/unknown error and revalidation */ 1614 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 1615 qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER); 1616 ehc->i.action &= ~ATA_EH_REVALIDATE; 1617 } 1618 1619 /* accumulate error info */ 1620 ehc->i.dev = qc->dev; 1621 all_err_mask |= qc->err_mask; 1622 if (qc->flags & ATA_QCFLAG_IO) 1623 is_io = 1; 1624 } 1625 1626 /* enforce default EH actions */ 1627 if (ap->pflags & ATA_PFLAG_FROZEN || 1628 all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT)) 1629 ehc->i.action |= ATA_EH_SOFTRESET; 1630 else if (all_err_mask) 1631 ehc->i.action |= ATA_EH_REVALIDATE; 1632 1633 /* if we have offending qcs and the associated failed device */ 1634 if (ehc->i.dev) { 1635 /* speed down */ 1636 ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io, 1637 all_err_mask); 1638 1639 /* perform per-dev EH action only on the offending device */ 1640 ehc->i.dev_action[ehc->i.dev->devno] |= 1641 ehc->i.action & ATA_EH_PERDEV_MASK; 1642 ehc->i.action &= ~ATA_EH_PERDEV_MASK; 1643 } 1644 1645 DPRINTK("EXIT\n"); 1646 } 1647 1648 /** 1649 * ata_eh_report - report error handling to user 1650 * @ap: ATA port EH is going on 1651 * 1652 * Report EH to user. 1653 * 1654 * LOCKING: 1655 * None. 1656 */ 1657 static void ata_eh_report(struct ata_port *ap) 1658 { 1659 struct ata_eh_context *ehc = &ap->eh_context; 1660 const char *frozen, *desc; 1661 int tag, nr_failed = 0; 1662 1663 desc = NULL; 1664 if (ehc->i.desc[0] != '\0') 1665 desc = ehc->i.desc; 1666 1667 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1668 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1669 1670 if (!(qc->flags & ATA_QCFLAG_FAILED)) 1671 continue; 1672 if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask) 1673 continue; 1674 1675 nr_failed++; 1676 } 1677 1678 if (!nr_failed && !ehc->i.err_mask) 1679 return; 1680 1681 frozen = ""; 1682 if (ap->pflags & ATA_PFLAG_FROZEN) 1683 frozen = " frozen"; 1684 1685 if (ehc->i.dev) { 1686 ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x " 1687 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1688 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1689 ehc->i.action, frozen); 1690 if (desc) 1691 ata_dev_printk(ehc->i.dev, KERN_ERR, "%s\n", desc); 1692 } else { 1693 ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x " 1694 "SAct 0x%x SErr 0x%x action 0x%x%s\n", 1695 ehc->i.err_mask, ap->sactive, ehc->i.serror, 1696 ehc->i.action, frozen); 1697 if (desc) 1698 ata_port_printk(ap, KERN_ERR, "%s\n", desc); 1699 } 1700 1701 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 1702 static const char *dma_str[] = { 1703 [DMA_BIDIRECTIONAL] = "bidi", 1704 [DMA_TO_DEVICE] = "out", 1705 [DMA_FROM_DEVICE] = "in", 1706 [DMA_NONE] = "", 1707 }; 1708 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 1709 struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf; 1710 1711 if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask) 1712 continue; 1713 1714 ata_dev_printk(qc->dev, KERN_ERR, 1715 "cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1716 "tag %d cdb 0x%x data %u %s\n " 1717 "res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x " 1718 "Emask 0x%x (%s)%s\n", 1719 cmd->command, cmd->feature, cmd->nsect, 1720 cmd->lbal, cmd->lbam, cmd->lbah, 1721 cmd->hob_feature, cmd->hob_nsect, 1722 cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah, 1723 cmd->device, qc->tag, qc->cdb[0], qc->nbytes, 1724 dma_str[qc->dma_dir], 1725 res->command, res->feature, res->nsect, 1726 res->lbal, res->lbam, res->lbah, 1727 res->hob_feature, res->hob_nsect, 1728 res->hob_lbal, res->hob_lbam, res->hob_lbah, 1729 res->device, qc->err_mask, ata_err_string(qc->err_mask), 1730 qc->err_mask & AC_ERR_NCQ ? " <F>" : ""); 1731 } 1732 } 1733 1734 static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset, 1735 unsigned int *classes, unsigned long deadline) 1736 { 1737 int i, rc; 1738 1739 for (i = 0; i < ATA_MAX_DEVICES; i++) 1740 classes[i] = ATA_DEV_UNKNOWN; 1741 1742 rc = reset(ap, classes, deadline); 1743 if (rc) 1744 return rc; 1745 1746 /* If any class isn't ATA_DEV_UNKNOWN, consider classification 1747 * is complete and convert all ATA_DEV_UNKNOWN to 1748 * ATA_DEV_NONE. 1749 */ 1750 for (i = 0; i < ATA_MAX_DEVICES; i++) 1751 if (classes[i] != ATA_DEV_UNKNOWN) 1752 break; 1753 1754 if (i < ATA_MAX_DEVICES) 1755 for (i = 0; i < ATA_MAX_DEVICES; i++) 1756 if (classes[i] == ATA_DEV_UNKNOWN) 1757 classes[i] = ATA_DEV_NONE; 1758 1759 return 0; 1760 } 1761 1762 static int ata_eh_followup_srst_needed(int rc, int classify, 1763 const unsigned int *classes) 1764 { 1765 if (rc == -EAGAIN) 1766 return 1; 1767 if (rc != 0) 1768 return 0; 1769 if (classify && classes[0] == ATA_DEV_UNKNOWN) 1770 return 1; 1771 return 0; 1772 } 1773 1774 static int ata_eh_reset(struct ata_port *ap, int classify, 1775 ata_prereset_fn_t prereset, ata_reset_fn_t softreset, 1776 ata_reset_fn_t hardreset, ata_postreset_fn_t postreset) 1777 { 1778 struct ata_eh_context *ehc = &ap->eh_context; 1779 unsigned int *classes = ehc->classes; 1780 int verbose = !(ehc->i.flags & ATA_EHI_QUIET); 1781 int try = 0; 1782 unsigned long deadline; 1783 unsigned int action; 1784 ata_reset_fn_t reset; 1785 int i, rc; 1786 1787 /* about to reset */ 1788 ata_eh_about_to_do(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1789 1790 /* Determine which reset to use and record in ehc->i.action. 1791 * prereset() may examine and modify it. 1792 */ 1793 action = ehc->i.action; 1794 ehc->i.action &= ~ATA_EH_RESET_MASK; 1795 if (softreset && (!hardreset || (!sata_set_spd_needed(ap) && 1796 !(action & ATA_EH_HARDRESET)))) 1797 ehc->i.action |= ATA_EH_SOFTRESET; 1798 else 1799 ehc->i.action |= ATA_EH_HARDRESET; 1800 1801 if (prereset) { 1802 rc = prereset(ap, jiffies + ATA_EH_PRERESET_TIMEOUT); 1803 if (rc) { 1804 if (rc == -ENOENT) { 1805 ata_port_printk(ap, KERN_DEBUG, 1806 "port disabled. ignoring.\n"); 1807 ap->eh_context.i.action &= ~ATA_EH_RESET_MASK; 1808 1809 for (i = 0; i < ATA_MAX_DEVICES; i++) 1810 classes[i] = ATA_DEV_NONE; 1811 1812 rc = 0; 1813 } else 1814 ata_port_printk(ap, KERN_ERR, 1815 "prereset failed (errno=%d)\n", rc); 1816 goto out; 1817 } 1818 } 1819 1820 /* prereset() might have modified ehc->i.action */ 1821 if (ehc->i.action & ATA_EH_HARDRESET) 1822 reset = hardreset; 1823 else if (ehc->i.action & ATA_EH_SOFTRESET) 1824 reset = softreset; 1825 else { 1826 /* prereset told us not to reset, bang classes and return */ 1827 for (i = 0; i < ATA_MAX_DEVICES; i++) 1828 classes[i] = ATA_DEV_NONE; 1829 rc = 0; 1830 goto out; 1831 } 1832 1833 /* did prereset() screw up? if so, fix up to avoid oopsing */ 1834 if (!reset) { 1835 if (softreset) 1836 reset = softreset; 1837 else 1838 reset = hardreset; 1839 } 1840 1841 retry: 1842 deadline = jiffies + ata_eh_reset_timeouts[try++]; 1843 1844 /* shut up during boot probing */ 1845 if (verbose) 1846 ata_port_printk(ap, KERN_INFO, "%s resetting port\n", 1847 reset == softreset ? "soft" : "hard"); 1848 1849 /* mark that this EH session started with reset */ 1850 if (reset == hardreset) 1851 ehc->i.flags |= ATA_EHI_DID_HARDRESET; 1852 else 1853 ehc->i.flags |= ATA_EHI_DID_SOFTRESET; 1854 1855 rc = ata_do_reset(ap, reset, classes, deadline); 1856 1857 if (reset == hardreset && 1858 ata_eh_followup_srst_needed(rc, classify, classes)) { 1859 /* okay, let's do follow-up softreset */ 1860 reset = softreset; 1861 1862 if (!reset) { 1863 ata_port_printk(ap, KERN_ERR, 1864 "follow-up softreset required " 1865 "but no softreset avaliable\n"); 1866 rc = -EINVAL; 1867 goto out; 1868 } 1869 1870 ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK); 1871 rc = ata_do_reset(ap, reset, classes, deadline); 1872 1873 if (rc == 0 && classify && 1874 classes[0] == ATA_DEV_UNKNOWN) { 1875 ata_port_printk(ap, KERN_ERR, 1876 "classification failed\n"); 1877 rc = -EINVAL; 1878 goto out; 1879 } 1880 } 1881 1882 if (rc && try < ARRAY_SIZE(ata_eh_reset_timeouts)) { 1883 unsigned long now = jiffies; 1884 1885 if (time_before(now, deadline)) { 1886 unsigned long delta = deadline - jiffies; 1887 1888 ata_port_printk(ap, KERN_WARNING, "reset failed " 1889 "(errno=%d), retrying in %u secs\n", 1890 rc, (jiffies_to_msecs(delta) + 999) / 1000); 1891 1892 schedule_timeout_uninterruptible(delta); 1893 } 1894 1895 if (rc == -EPIPE || 1896 try == ARRAY_SIZE(ata_eh_reset_timeouts) - 1) 1897 sata_down_spd_limit(ap); 1898 if (hardreset) 1899 reset = hardreset; 1900 goto retry; 1901 } 1902 1903 if (rc == 0) { 1904 u32 sstatus; 1905 1906 /* After the reset, the device state is PIO 0 and the 1907 * controller state is undefined. Record the mode. 1908 */ 1909 for (i = 0; i < ATA_MAX_DEVICES; i++) 1910 ap->device[i].pio_mode = XFER_PIO_0; 1911 1912 /* record current link speed */ 1913 if (sata_scr_read(ap, SCR_STATUS, &sstatus) == 0) 1914 ap->sata_spd = (sstatus >> 4) & 0xf; 1915 1916 if (postreset) 1917 postreset(ap, classes); 1918 1919 /* reset successful, schedule revalidation */ 1920 ata_eh_done(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK); 1921 ehc->i.action |= ATA_EH_REVALIDATE; 1922 } 1923 out: 1924 /* clear hotplug flag */ 1925 ehc->i.flags &= ~ATA_EHI_HOTPLUGGED; 1926 return rc; 1927 } 1928 1929 static int ata_eh_revalidate_and_attach(struct ata_port *ap, 1930 struct ata_device **r_failed_dev) 1931 { 1932 struct ata_eh_context *ehc = &ap->eh_context; 1933 struct ata_device *dev; 1934 unsigned int new_mask = 0; 1935 unsigned long flags; 1936 int i, rc = 0; 1937 1938 DPRINTK("ENTER\n"); 1939 1940 /* For PATA drive side cable detection to work, IDENTIFY must 1941 * be done backwards such that PDIAG- is released by the slave 1942 * device before the master device is identified. 1943 */ 1944 for (i = ATA_MAX_DEVICES - 1; i >= 0; i--) { 1945 unsigned int action, readid_flags = 0; 1946 1947 dev = &ap->device[i]; 1948 action = ata_eh_dev_action(dev); 1949 1950 if (ehc->i.flags & ATA_EHI_DID_RESET) 1951 readid_flags |= ATA_READID_POSTRESET; 1952 1953 if ((action & ATA_EH_REVALIDATE) && ata_dev_enabled(dev)) { 1954 if (ata_port_offline(ap)) { 1955 rc = -EIO; 1956 goto err; 1957 } 1958 1959 ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE); 1960 rc = ata_dev_revalidate(dev, readid_flags); 1961 if (rc) 1962 goto err; 1963 1964 ata_eh_done(ap, dev, ATA_EH_REVALIDATE); 1965 1966 /* Configuration may have changed, reconfigure 1967 * transfer mode. 1968 */ 1969 ehc->i.flags |= ATA_EHI_SETMODE; 1970 1971 /* schedule the scsi_rescan_device() here */ 1972 queue_work(ata_aux_wq, &(ap->scsi_rescan_task)); 1973 } else if (dev->class == ATA_DEV_UNKNOWN && 1974 ehc->tries[dev->devno] && 1975 ata_class_enabled(ehc->classes[dev->devno])) { 1976 dev->class = ehc->classes[dev->devno]; 1977 1978 rc = ata_dev_read_id(dev, &dev->class, readid_flags, 1979 dev->id); 1980 switch (rc) { 1981 case 0: 1982 new_mask |= 1 << i; 1983 break; 1984 case -ENOENT: 1985 /* IDENTIFY was issued to non-existent 1986 * device. No need to reset. Just 1987 * thaw and kill the device. 1988 */ 1989 ata_eh_thaw_port(ap); 1990 dev->class = ATA_DEV_UNKNOWN; 1991 break; 1992 default: 1993 dev->class = ATA_DEV_UNKNOWN; 1994 goto err; 1995 } 1996 } 1997 } 1998 1999 /* PDIAG- should have been released, ask cable type if post-reset */ 2000 if ((ehc->i.flags & ATA_EHI_DID_RESET) && ap->ops->cable_detect) 2001 ap->cbl = ap->ops->cable_detect(ap); 2002 2003 /* Configure new devices forward such that user doesn't see 2004 * device detection messages backwards. 2005 */ 2006 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2007 dev = &ap->device[i]; 2008 2009 if (!(new_mask & (1 << i))) 2010 continue; 2011 2012 ehc->i.flags |= ATA_EHI_PRINTINFO; 2013 rc = ata_dev_configure(dev); 2014 ehc->i.flags &= ~ATA_EHI_PRINTINFO; 2015 if (rc) 2016 goto err; 2017 2018 spin_lock_irqsave(ap->lock, flags); 2019 ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG; 2020 spin_unlock_irqrestore(ap->lock, flags); 2021 2022 /* new device discovered, configure xfermode */ 2023 ehc->i.flags |= ATA_EHI_SETMODE; 2024 } 2025 2026 return 0; 2027 2028 err: 2029 *r_failed_dev = dev; 2030 DPRINTK("EXIT rc=%d\n", rc); 2031 return rc; 2032 } 2033 2034 static int ata_port_nr_enabled(struct ata_port *ap) 2035 { 2036 int i, cnt = 0; 2037 2038 for (i = 0; i < ATA_MAX_DEVICES; i++) 2039 if (ata_dev_enabled(&ap->device[i])) 2040 cnt++; 2041 return cnt; 2042 } 2043 2044 static int ata_port_nr_vacant(struct ata_port *ap) 2045 { 2046 int i, cnt = 0; 2047 2048 for (i = 0; i < ATA_MAX_DEVICES; i++) 2049 if (ap->device[i].class == ATA_DEV_UNKNOWN) 2050 cnt++; 2051 return cnt; 2052 } 2053 2054 static int ata_eh_skip_recovery(struct ata_port *ap) 2055 { 2056 struct ata_eh_context *ehc = &ap->eh_context; 2057 int i; 2058 2059 /* thaw frozen port, resume link and recover failed devices */ 2060 if ((ap->pflags & ATA_PFLAG_FROZEN) || 2061 (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_port_nr_enabled(ap)) 2062 return 0; 2063 2064 /* skip if class codes for all vacant slots are ATA_DEV_NONE */ 2065 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2066 struct ata_device *dev = &ap->device[i]; 2067 2068 if (dev->class == ATA_DEV_UNKNOWN && 2069 ehc->classes[dev->devno] != ATA_DEV_NONE) 2070 return 0; 2071 } 2072 2073 return 1; 2074 } 2075 2076 static void ata_eh_handle_dev_fail(struct ata_device *dev, int err) 2077 { 2078 struct ata_port *ap = dev->ap; 2079 struct ata_eh_context *ehc = &ap->eh_context; 2080 2081 ehc->tries[dev->devno]--; 2082 2083 switch (err) { 2084 case -ENODEV: 2085 /* device missing or wrong IDENTIFY data, schedule probing */ 2086 ehc->i.probe_mask |= (1 << dev->devno); 2087 case -EINVAL: 2088 /* give it just one more chance */ 2089 ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1); 2090 case -EIO: 2091 if (ehc->tries[dev->devno] == 1) { 2092 /* This is the last chance, better to slow 2093 * down than lose it. 2094 */ 2095 sata_down_spd_limit(ap); 2096 ata_down_xfermask_limit(dev, ATA_DNXFER_PIO); 2097 } 2098 } 2099 2100 if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) { 2101 /* disable device if it has used up all its chances */ 2102 ata_dev_disable(dev); 2103 2104 /* detach if offline */ 2105 if (ata_port_offline(ap)) 2106 ata_eh_detach_dev(dev); 2107 2108 /* probe if requested */ 2109 if ((ehc->i.probe_mask & (1 << dev->devno)) && 2110 !(ehc->did_probe_mask & (1 << dev->devno))) { 2111 ata_eh_detach_dev(dev); 2112 ata_dev_init(dev); 2113 2114 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2115 ehc->did_probe_mask |= (1 << dev->devno); 2116 ehc->i.action |= ATA_EH_SOFTRESET; 2117 } 2118 } else { 2119 /* soft didn't work? be haaaaard */ 2120 if (ehc->i.flags & ATA_EHI_DID_RESET) 2121 ehc->i.action |= ATA_EH_HARDRESET; 2122 else 2123 ehc->i.action |= ATA_EH_SOFTRESET; 2124 } 2125 } 2126 2127 /** 2128 * ata_eh_recover - recover host port after error 2129 * @ap: host port to recover 2130 * @prereset: prereset method (can be NULL) 2131 * @softreset: softreset method (can be NULL) 2132 * @hardreset: hardreset method (can be NULL) 2133 * @postreset: postreset method (can be NULL) 2134 * 2135 * This is the alpha and omega, eum and yang, heart and soul of 2136 * libata exception handling. On entry, actions required to 2137 * recover the port and hotplug requests are recorded in 2138 * eh_context. This function executes all the operations with 2139 * appropriate retrials and fallbacks to resurrect failed 2140 * devices, detach goners and greet newcomers. 2141 * 2142 * LOCKING: 2143 * Kernel thread context (may sleep). 2144 * 2145 * RETURNS: 2146 * 0 on success, -errno on failure. 2147 */ 2148 static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset, 2149 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2150 ata_postreset_fn_t postreset) 2151 { 2152 struct ata_eh_context *ehc = &ap->eh_context; 2153 struct ata_device *dev; 2154 int i, rc; 2155 2156 DPRINTK("ENTER\n"); 2157 2158 /* prep for recovery */ 2159 for (i = 0; i < ATA_MAX_DEVICES; i++) { 2160 dev = &ap->device[i]; 2161 2162 ehc->tries[dev->devno] = ATA_EH_DEV_TRIES; 2163 2164 /* collect port action mask recorded in dev actions */ 2165 ehc->i.action |= ehc->i.dev_action[i] & ~ATA_EH_PERDEV_MASK; 2166 ehc->i.dev_action[i] &= ATA_EH_PERDEV_MASK; 2167 2168 /* process hotplug request */ 2169 if (dev->flags & ATA_DFLAG_DETACH) 2170 ata_eh_detach_dev(dev); 2171 2172 if (!ata_dev_enabled(dev) && 2173 ((ehc->i.probe_mask & (1 << dev->devno)) && 2174 !(ehc->did_probe_mask & (1 << dev->devno)))) { 2175 ata_eh_detach_dev(dev); 2176 ata_dev_init(dev); 2177 ehc->did_probe_mask |= (1 << dev->devno); 2178 ehc->i.action |= ATA_EH_SOFTRESET; 2179 } 2180 } 2181 2182 retry: 2183 rc = 0; 2184 2185 /* if UNLOADING, finish immediately */ 2186 if (ap->pflags & ATA_PFLAG_UNLOADING) 2187 goto out; 2188 2189 /* skip EH if possible. */ 2190 if (ata_eh_skip_recovery(ap)) 2191 ehc->i.action = 0; 2192 2193 for (i = 0; i < ATA_MAX_DEVICES; i++) 2194 ehc->classes[i] = ATA_DEV_UNKNOWN; 2195 2196 /* reset */ 2197 if (ehc->i.action & ATA_EH_RESET_MASK) { 2198 ata_eh_freeze_port(ap); 2199 2200 rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset, 2201 softreset, hardreset, postreset); 2202 if (rc) { 2203 ata_port_printk(ap, KERN_ERR, 2204 "reset failed, giving up\n"); 2205 goto out; 2206 } 2207 2208 ata_eh_thaw_port(ap); 2209 } 2210 2211 /* revalidate existing devices and attach new ones */ 2212 rc = ata_eh_revalidate_and_attach(ap, &dev); 2213 if (rc) 2214 goto dev_fail; 2215 2216 /* configure transfer mode if necessary */ 2217 if (ehc->i.flags & ATA_EHI_SETMODE) { 2218 rc = ata_set_mode(ap, &dev); 2219 if (rc) 2220 goto dev_fail; 2221 ehc->i.flags &= ~ATA_EHI_SETMODE; 2222 } 2223 2224 goto out; 2225 2226 dev_fail: 2227 ata_eh_handle_dev_fail(dev, rc); 2228 2229 if (ata_port_nr_enabled(ap)) { 2230 ata_port_printk(ap, KERN_WARNING, "failed to recover some " 2231 "devices, retrying in 5 secs\n"); 2232 ssleep(5); 2233 } else { 2234 /* no device left, repeat fast */ 2235 msleep(500); 2236 } 2237 2238 goto retry; 2239 2240 out: 2241 if (rc) { 2242 for (i = 0; i < ATA_MAX_DEVICES; i++) 2243 ata_dev_disable(&ap->device[i]); 2244 } 2245 2246 DPRINTK("EXIT, rc=%d\n", rc); 2247 return rc; 2248 } 2249 2250 /** 2251 * ata_eh_finish - finish up EH 2252 * @ap: host port to finish EH for 2253 * 2254 * Recovery is complete. Clean up EH states and retry or finish 2255 * failed qcs. 2256 * 2257 * LOCKING: 2258 * None. 2259 */ 2260 static void ata_eh_finish(struct ata_port *ap) 2261 { 2262 int tag; 2263 2264 /* retry or finish qcs */ 2265 for (tag = 0; tag < ATA_MAX_QUEUE; tag++) { 2266 struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag); 2267 2268 if (!(qc->flags & ATA_QCFLAG_FAILED)) 2269 continue; 2270 2271 if (qc->err_mask) { 2272 /* FIXME: Once EH migration is complete, 2273 * generate sense data in this function, 2274 * considering both err_mask and tf. 2275 */ 2276 if (qc->err_mask & AC_ERR_INVALID) 2277 ata_eh_qc_complete(qc); 2278 else 2279 ata_eh_qc_retry(qc); 2280 } else { 2281 if (qc->flags & ATA_QCFLAG_SENSE_VALID) { 2282 ata_eh_qc_complete(qc); 2283 } else { 2284 /* feed zero TF to sense generation */ 2285 memset(&qc->result_tf, 0, sizeof(qc->result_tf)); 2286 ata_eh_qc_retry(qc); 2287 } 2288 } 2289 } 2290 } 2291 2292 /** 2293 * ata_do_eh - do standard error handling 2294 * @ap: host port to handle error for 2295 * @prereset: prereset method (can be NULL) 2296 * @softreset: softreset method (can be NULL) 2297 * @hardreset: hardreset method (can be NULL) 2298 * @postreset: postreset method (can be NULL) 2299 * 2300 * Perform standard error handling sequence. 2301 * 2302 * LOCKING: 2303 * Kernel thread context (may sleep). 2304 */ 2305 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset, 2306 ata_reset_fn_t softreset, ata_reset_fn_t hardreset, 2307 ata_postreset_fn_t postreset) 2308 { 2309 ata_eh_autopsy(ap); 2310 ata_eh_report(ap); 2311 ata_eh_recover(ap, prereset, softreset, hardreset, postreset); 2312 ata_eh_finish(ap); 2313 } 2314 2315 #ifdef CONFIG_PM 2316 /** 2317 * ata_eh_handle_port_suspend - perform port suspend operation 2318 * @ap: port to suspend 2319 * 2320 * Suspend @ap. 2321 * 2322 * LOCKING: 2323 * Kernel thread context (may sleep). 2324 */ 2325 static void ata_eh_handle_port_suspend(struct ata_port *ap) 2326 { 2327 unsigned long flags; 2328 int rc = 0; 2329 2330 /* are we suspending? */ 2331 spin_lock_irqsave(ap->lock, flags); 2332 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2333 ap->pm_mesg.event == PM_EVENT_ON) { 2334 spin_unlock_irqrestore(ap->lock, flags); 2335 return; 2336 } 2337 spin_unlock_irqrestore(ap->lock, flags); 2338 2339 WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED); 2340 2341 /* tell ACPI we're suspending */ 2342 rc = ata_acpi_on_suspend(ap); 2343 if (rc) 2344 goto out; 2345 2346 /* suspend */ 2347 ata_eh_freeze_port(ap); 2348 2349 if (ap->ops->port_suspend) 2350 rc = ap->ops->port_suspend(ap, ap->pm_mesg); 2351 2352 out: 2353 /* report result */ 2354 spin_lock_irqsave(ap->lock, flags); 2355 2356 ap->pflags &= ~ATA_PFLAG_PM_PENDING; 2357 if (rc == 0) 2358 ap->pflags |= ATA_PFLAG_SUSPENDED; 2359 else if (ap->pflags & ATA_PFLAG_FROZEN) 2360 ata_port_schedule_eh(ap); 2361 2362 if (ap->pm_result) { 2363 *ap->pm_result = rc; 2364 ap->pm_result = NULL; 2365 } 2366 2367 spin_unlock_irqrestore(ap->lock, flags); 2368 2369 return; 2370 } 2371 2372 /** 2373 * ata_eh_handle_port_resume - perform port resume operation 2374 * @ap: port to resume 2375 * 2376 * Resume @ap. 2377 * 2378 * LOCKING: 2379 * Kernel thread context (may sleep). 2380 */ 2381 static void ata_eh_handle_port_resume(struct ata_port *ap) 2382 { 2383 unsigned long flags; 2384 int rc = 0; 2385 2386 /* are we resuming? */ 2387 spin_lock_irqsave(ap->lock, flags); 2388 if (!(ap->pflags & ATA_PFLAG_PM_PENDING) || 2389 ap->pm_mesg.event != PM_EVENT_ON) { 2390 spin_unlock_irqrestore(ap->lock, flags); 2391 return; 2392 } 2393 spin_unlock_irqrestore(ap->lock, flags); 2394 2395 WARN_ON(!(ap->pflags & ATA_PFLAG_SUSPENDED)); 2396 2397 if (ap->ops->port_resume) 2398 rc = ap->ops->port_resume(ap); 2399 2400 /* tell ACPI that we're resuming */ 2401 ata_acpi_on_resume(ap); 2402 2403 /* report result */ 2404 spin_lock_irqsave(ap->lock, flags); 2405 ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED); 2406 if (ap->pm_result) { 2407 *ap->pm_result = rc; 2408 ap->pm_result = NULL; 2409 } 2410 spin_unlock_irqrestore(ap->lock, flags); 2411 } 2412 #endif /* CONFIG_PM */ 2413