xref: /openbmc/linux/drivers/ata/libata-eh.c (revision 6f366c1c751454df3d1c0f25f15ee0164821112a)
1 /*
2  *  libata-eh.c - libata error handling
3  *
4  *  Maintained by:  Jeff Garzik <jgarzik@pobox.com>
5  *    		    Please ALWAYS copy linux-ide@vger.kernel.org
6  *		    on emails.
7  *
8  *  Copyright 2006 Tejun Heo <htejun@gmail.com>
9  *
10  *
11  *  This program is free software; you can redistribute it and/or
12  *  modify it under the terms of the GNU General Public License as
13  *  published by the Free Software Foundation; either version 2, or
14  *  (at your option) any later version.
15  *
16  *  This program is distributed in the hope that it will be useful,
17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  *  General Public License for more details.
20  *
21  *  You should have received a copy of the GNU General Public License
22  *  along with this program; see the file COPYING.  If not, write to
23  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
24  *  USA.
25  *
26  *
27  *  libata documentation is available via 'make {ps|pdf}docs',
28  *  as Documentation/DocBook/libata.*
29  *
30  *  Hardware documentation available from http://www.t13.org/ and
31  *  http://www.sata-io.org/
32  *
33  */
34 
35 #include <linux/kernel.h>
36 #include <scsi/scsi.h>
37 #include <scsi/scsi_host.h>
38 #include <scsi/scsi_eh.h>
39 #include <scsi/scsi_device.h>
40 #include <scsi/scsi_cmnd.h>
41 #include "../scsi/scsi_transport_api.h"
42 
43 #include <linux/libata.h>
44 
45 #include "libata.h"
46 
47 enum {
48 	ATA_EH_SPDN_NCQ_OFF		= (1 << 0),
49 	ATA_EH_SPDN_SPEED_DOWN		= (1 << 1),
50 	ATA_EH_SPDN_FALLBACK_TO_PIO	= (1 << 2),
51 };
52 
53 static void __ata_port_freeze(struct ata_port *ap);
54 static void ata_eh_finish(struct ata_port *ap);
55 static void ata_eh_handle_port_suspend(struct ata_port *ap);
56 static void ata_eh_handle_port_resume(struct ata_port *ap);
57 
58 static void ata_ering_record(struct ata_ering *ering, int is_io,
59 			     unsigned int err_mask)
60 {
61 	struct ata_ering_entry *ent;
62 
63 	WARN_ON(!err_mask);
64 
65 	ering->cursor++;
66 	ering->cursor %= ATA_ERING_SIZE;
67 
68 	ent = &ering->ring[ering->cursor];
69 	ent->is_io = is_io;
70 	ent->err_mask = err_mask;
71 	ent->timestamp = get_jiffies_64();
72 }
73 
74 static void ata_ering_clear(struct ata_ering *ering)
75 {
76 	memset(ering, 0, sizeof(*ering));
77 }
78 
79 static int ata_ering_map(struct ata_ering *ering,
80 			 int (*map_fn)(struct ata_ering_entry *, void *),
81 			 void *arg)
82 {
83 	int idx, rc = 0;
84 	struct ata_ering_entry *ent;
85 
86 	idx = ering->cursor;
87 	do {
88 		ent = &ering->ring[idx];
89 		if (!ent->err_mask)
90 			break;
91 		rc = map_fn(ent, arg);
92 		if (rc)
93 			break;
94 		idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
95 	} while (idx != ering->cursor);
96 
97 	return rc;
98 }
99 
100 static unsigned int ata_eh_dev_action(struct ata_device *dev)
101 {
102 	struct ata_eh_context *ehc = &dev->ap->eh_context;
103 
104 	return ehc->i.action | ehc->i.dev_action[dev->devno];
105 }
106 
107 static void ata_eh_clear_action(struct ata_device *dev,
108 				struct ata_eh_info *ehi, unsigned int action)
109 {
110 	int i;
111 
112 	if (!dev) {
113 		ehi->action &= ~action;
114 		for (i = 0; i < ATA_MAX_DEVICES; i++)
115 			ehi->dev_action[i] &= ~action;
116 	} else {
117 		/* doesn't make sense for port-wide EH actions */
118 		WARN_ON(!(action & ATA_EH_PERDEV_MASK));
119 
120 		/* break ehi->action into ehi->dev_action */
121 		if (ehi->action & action) {
122 			for (i = 0; i < ATA_MAX_DEVICES; i++)
123 				ehi->dev_action[i] |= ehi->action & action;
124 			ehi->action &= ~action;
125 		}
126 
127 		/* turn off the specified per-dev action */
128 		ehi->dev_action[dev->devno] &= ~action;
129 	}
130 }
131 
132 /**
133  *	ata_scsi_timed_out - SCSI layer time out callback
134  *	@cmd: timed out SCSI command
135  *
136  *	Handles SCSI layer timeout.  We race with normal completion of
137  *	the qc for @cmd.  If the qc is already gone, we lose and let
138  *	the scsi command finish (EH_HANDLED).  Otherwise, the qc has
139  *	timed out and EH should be invoked.  Prevent ata_qc_complete()
140  *	from finishing it by setting EH_SCHEDULED and return
141  *	EH_NOT_HANDLED.
142  *
143  *	TODO: kill this function once old EH is gone.
144  *
145  *	LOCKING:
146  *	Called from timer context
147  *
148  *	RETURNS:
149  *	EH_HANDLED or EH_NOT_HANDLED
150  */
151 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
152 {
153 	struct Scsi_Host *host = cmd->device->host;
154 	struct ata_port *ap = ata_shost_to_port(host);
155 	unsigned long flags;
156 	struct ata_queued_cmd *qc;
157 	enum scsi_eh_timer_return ret;
158 
159 	DPRINTK("ENTER\n");
160 
161 	if (ap->ops->error_handler) {
162 		ret = EH_NOT_HANDLED;
163 		goto out;
164 	}
165 
166 	ret = EH_HANDLED;
167 	spin_lock_irqsave(ap->lock, flags);
168 	qc = ata_qc_from_tag(ap, ap->active_tag);
169 	if (qc) {
170 		WARN_ON(qc->scsicmd != cmd);
171 		qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
172 		qc->err_mask |= AC_ERR_TIMEOUT;
173 		ret = EH_NOT_HANDLED;
174 	}
175 	spin_unlock_irqrestore(ap->lock, flags);
176 
177  out:
178 	DPRINTK("EXIT, ret=%d\n", ret);
179 	return ret;
180 }
181 
182 /**
183  *	ata_scsi_error - SCSI layer error handler callback
184  *	@host: SCSI host on which error occurred
185  *
186  *	Handles SCSI-layer-thrown error events.
187  *
188  *	LOCKING:
189  *	Inherited from SCSI layer (none, can sleep)
190  *
191  *	RETURNS:
192  *	Zero.
193  */
194 void ata_scsi_error(struct Scsi_Host *host)
195 {
196 	struct ata_port *ap = ata_shost_to_port(host);
197 	int i, repeat_cnt = ATA_EH_MAX_REPEAT;
198 	unsigned long flags;
199 
200 	DPRINTK("ENTER\n");
201 
202 	/* synchronize with port task */
203 	ata_port_flush_task(ap);
204 
205 	/* synchronize with host lock and sort out timeouts */
206 
207 	/* For new EH, all qcs are finished in one of three ways -
208 	 * normal completion, error completion, and SCSI timeout.
209 	 * Both cmpletions can race against SCSI timeout.  When normal
210 	 * completion wins, the qc never reaches EH.  When error
211 	 * completion wins, the qc has ATA_QCFLAG_FAILED set.
212 	 *
213 	 * When SCSI timeout wins, things are a bit more complex.
214 	 * Normal or error completion can occur after the timeout but
215 	 * before this point.  In such cases, both types of
216 	 * completions are honored.  A scmd is determined to have
217 	 * timed out iff its associated qc is active and not failed.
218 	 */
219 	if (ap->ops->error_handler) {
220 		struct scsi_cmnd *scmd, *tmp;
221 		int nr_timedout = 0;
222 
223 		spin_lock_irqsave(ap->lock, flags);
224 
225 		list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
226 			struct ata_queued_cmd *qc;
227 
228 			for (i = 0; i < ATA_MAX_QUEUE; i++) {
229 				qc = __ata_qc_from_tag(ap, i);
230 				if (qc->flags & ATA_QCFLAG_ACTIVE &&
231 				    qc->scsicmd == scmd)
232 					break;
233 			}
234 
235 			if (i < ATA_MAX_QUEUE) {
236 				/* the scmd has an associated qc */
237 				if (!(qc->flags & ATA_QCFLAG_FAILED)) {
238 					/* which hasn't failed yet, timeout */
239 					qc->err_mask |= AC_ERR_TIMEOUT;
240 					qc->flags |= ATA_QCFLAG_FAILED;
241 					nr_timedout++;
242 				}
243 			} else {
244 				/* Normal completion occurred after
245 				 * SCSI timeout but before this point.
246 				 * Successfully complete it.
247 				 */
248 				scmd->retries = scmd->allowed;
249 				scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
250 			}
251 		}
252 
253 		/* If we have timed out qcs.  They belong to EH from
254 		 * this point but the state of the controller is
255 		 * unknown.  Freeze the port to make sure the IRQ
256 		 * handler doesn't diddle with those qcs.  This must
257 		 * be done atomically w.r.t. setting QCFLAG_FAILED.
258 		 */
259 		if (nr_timedout)
260 			__ata_port_freeze(ap);
261 
262 		spin_unlock_irqrestore(ap->lock, flags);
263 	} else
264 		spin_unlock_wait(ap->lock);
265 
266  repeat:
267 	/* invoke error handler */
268 	if (ap->ops->error_handler) {
269 		/* process port resume request */
270 		ata_eh_handle_port_resume(ap);
271 
272 		/* fetch & clear EH info */
273 		spin_lock_irqsave(ap->lock, flags);
274 
275 		memset(&ap->eh_context, 0, sizeof(ap->eh_context));
276 		ap->eh_context.i = ap->eh_info;
277 		memset(&ap->eh_info, 0, sizeof(ap->eh_info));
278 
279 		ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
280 		ap->pflags &= ~ATA_PFLAG_EH_PENDING;
281 
282 		spin_unlock_irqrestore(ap->lock, flags);
283 
284 		/* invoke EH, skip if unloading or suspended */
285 		if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
286 			ap->ops->error_handler(ap);
287 		else
288 			ata_eh_finish(ap);
289 
290 		/* process port suspend request */
291 		ata_eh_handle_port_suspend(ap);
292 
293 		/* Exception might have happend after ->error_handler
294 		 * recovered the port but before this point.  Repeat
295 		 * EH in such case.
296 		 */
297 		spin_lock_irqsave(ap->lock, flags);
298 
299 		if (ap->pflags & ATA_PFLAG_EH_PENDING) {
300 			if (--repeat_cnt) {
301 				ata_port_printk(ap, KERN_INFO,
302 					"EH pending after completion, "
303 					"repeating EH (cnt=%d)\n", repeat_cnt);
304 				spin_unlock_irqrestore(ap->lock, flags);
305 				goto repeat;
306 			}
307 			ata_port_printk(ap, KERN_ERR, "EH pending after %d "
308 					"tries, giving up\n", ATA_EH_MAX_REPEAT);
309 		}
310 
311 		/* this run is complete, make sure EH info is clear */
312 		memset(&ap->eh_info, 0, sizeof(ap->eh_info));
313 
314 		/* Clear host_eh_scheduled while holding ap->lock such
315 		 * that if exception occurs after this point but
316 		 * before EH completion, SCSI midlayer will
317 		 * re-initiate EH.
318 		 */
319 		host->host_eh_scheduled = 0;
320 
321 		spin_unlock_irqrestore(ap->lock, flags);
322 	} else {
323 		WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
324 		ap->ops->eng_timeout(ap);
325 	}
326 
327 	/* finish or retry handled scmd's and clean up */
328 	WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
329 
330 	scsi_eh_flush_done_q(&ap->eh_done_q);
331 
332 	/* clean up */
333 	spin_lock_irqsave(ap->lock, flags);
334 
335 	if (ap->pflags & ATA_PFLAG_LOADING)
336 		ap->pflags &= ~ATA_PFLAG_LOADING;
337 	else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
338 		queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0);
339 
340 	if (ap->pflags & ATA_PFLAG_RECOVERED)
341 		ata_port_printk(ap, KERN_INFO, "EH complete\n");
342 
343 	ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED);
344 
345 	/* tell wait_eh that we're done */
346 	ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS;
347 	wake_up_all(&ap->eh_wait_q);
348 
349 	spin_unlock_irqrestore(ap->lock, flags);
350 
351 	DPRINTK("EXIT\n");
352 }
353 
354 /**
355  *	ata_port_wait_eh - Wait for the currently pending EH to complete
356  *	@ap: Port to wait EH for
357  *
358  *	Wait until the currently pending EH is complete.
359  *
360  *	LOCKING:
361  *	Kernel thread context (may sleep).
362  */
363 void ata_port_wait_eh(struct ata_port *ap)
364 {
365 	unsigned long flags;
366 	DEFINE_WAIT(wait);
367 
368  retry:
369 	spin_lock_irqsave(ap->lock, flags);
370 
371 	while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) {
372 		prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
373 		spin_unlock_irqrestore(ap->lock, flags);
374 		schedule();
375 		spin_lock_irqsave(ap->lock, flags);
376 	}
377 	finish_wait(&ap->eh_wait_q, &wait);
378 
379 	spin_unlock_irqrestore(ap->lock, flags);
380 
381 	/* make sure SCSI EH is complete */
382 	if (scsi_host_in_recovery(ap->scsi_host)) {
383 		msleep(10);
384 		goto retry;
385 	}
386 }
387 
388 /**
389  *	ata_qc_timeout - Handle timeout of queued command
390  *	@qc: Command that timed out
391  *
392  *	Some part of the kernel (currently, only the SCSI layer)
393  *	has noticed that the active command on port @ap has not
394  *	completed after a specified length of time.  Handle this
395  *	condition by disabling DMA (if necessary) and completing
396  *	transactions, with error if necessary.
397  *
398  *	This also handles the case of the "lost interrupt", where
399  *	for some reason (possibly hardware bug, possibly driver bug)
400  *	an interrupt was not delivered to the driver, even though the
401  *	transaction completed successfully.
402  *
403  *	TODO: kill this function once old EH is gone.
404  *
405  *	LOCKING:
406  *	Inherited from SCSI layer (none, can sleep)
407  */
408 static void ata_qc_timeout(struct ata_queued_cmd *qc)
409 {
410 	struct ata_port *ap = qc->ap;
411 	u8 host_stat = 0, drv_stat;
412 	unsigned long flags;
413 
414 	DPRINTK("ENTER\n");
415 
416 	ap->hsm_task_state = HSM_ST_IDLE;
417 
418 	spin_lock_irqsave(ap->lock, flags);
419 
420 	switch (qc->tf.protocol) {
421 
422 	case ATA_PROT_DMA:
423 	case ATA_PROT_ATAPI_DMA:
424 		host_stat = ap->ops->bmdma_status(ap);
425 
426 		/* before we do anything else, clear DMA-Start bit */
427 		ap->ops->bmdma_stop(qc);
428 
429 		/* fall through */
430 
431 	default:
432 		ata_altstatus(ap);
433 		drv_stat = ata_chk_status(ap);
434 
435 		/* ack bmdma irq events */
436 		ap->ops->irq_clear(ap);
437 
438 		ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, "
439 			       "stat 0x%x host_stat 0x%x\n",
440 			       qc->tf.command, drv_stat, host_stat);
441 
442 		/* complete taskfile transaction */
443 		qc->err_mask |= AC_ERR_TIMEOUT;
444 		break;
445 	}
446 
447 	spin_unlock_irqrestore(ap->lock, flags);
448 
449 	ata_eh_qc_complete(qc);
450 
451 	DPRINTK("EXIT\n");
452 }
453 
454 /**
455  *	ata_eng_timeout - Handle timeout of queued command
456  *	@ap: Port on which timed-out command is active
457  *
458  *	Some part of the kernel (currently, only the SCSI layer)
459  *	has noticed that the active command on port @ap has not
460  *	completed after a specified length of time.  Handle this
461  *	condition by disabling DMA (if necessary) and completing
462  *	transactions, with error if necessary.
463  *
464  *	This also handles the case of the "lost interrupt", where
465  *	for some reason (possibly hardware bug, possibly driver bug)
466  *	an interrupt was not delivered to the driver, even though the
467  *	transaction completed successfully.
468  *
469  *	TODO: kill this function once old EH is gone.
470  *
471  *	LOCKING:
472  *	Inherited from SCSI layer (none, can sleep)
473  */
474 void ata_eng_timeout(struct ata_port *ap)
475 {
476 	DPRINTK("ENTER\n");
477 
478 	ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag));
479 
480 	DPRINTK("EXIT\n");
481 }
482 
483 /**
484  *	ata_qc_schedule_eh - schedule qc for error handling
485  *	@qc: command to schedule error handling for
486  *
487  *	Schedule error handling for @qc.  EH will kick in as soon as
488  *	other commands are drained.
489  *
490  *	LOCKING:
491  *	spin_lock_irqsave(host lock)
492  */
493 void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
494 {
495 	struct ata_port *ap = qc->ap;
496 
497 	WARN_ON(!ap->ops->error_handler);
498 
499 	qc->flags |= ATA_QCFLAG_FAILED;
500 	qc->ap->pflags |= ATA_PFLAG_EH_PENDING;
501 
502 	/* The following will fail if timeout has already expired.
503 	 * ata_scsi_error() takes care of such scmds on EH entry.
504 	 * Note that ATA_QCFLAG_FAILED is unconditionally set after
505 	 * this function completes.
506 	 */
507 	scsi_req_abort_cmd(qc->scsicmd);
508 }
509 
510 /**
511  *	ata_port_schedule_eh - schedule error handling without a qc
512  *	@ap: ATA port to schedule EH for
513  *
514  *	Schedule error handling for @ap.  EH will kick in as soon as
515  *	all commands are drained.
516  *
517  *	LOCKING:
518  *	spin_lock_irqsave(host lock)
519  */
520 void ata_port_schedule_eh(struct ata_port *ap)
521 {
522 	WARN_ON(!ap->ops->error_handler);
523 
524 	ap->pflags |= ATA_PFLAG_EH_PENDING;
525 	scsi_schedule_eh(ap->scsi_host);
526 
527 	DPRINTK("port EH scheduled\n");
528 }
529 
530 /**
531  *	ata_port_abort - abort all qc's on the port
532  *	@ap: ATA port to abort qc's for
533  *
534  *	Abort all active qc's of @ap and schedule EH.
535  *
536  *	LOCKING:
537  *	spin_lock_irqsave(host lock)
538  *
539  *	RETURNS:
540  *	Number of aborted qc's.
541  */
542 int ata_port_abort(struct ata_port *ap)
543 {
544 	int tag, nr_aborted = 0;
545 
546 	WARN_ON(!ap->ops->error_handler);
547 
548 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
549 		struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
550 
551 		if (qc) {
552 			qc->flags |= ATA_QCFLAG_FAILED;
553 			ata_qc_complete(qc);
554 			nr_aborted++;
555 		}
556 	}
557 
558 	if (!nr_aborted)
559 		ata_port_schedule_eh(ap);
560 
561 	return nr_aborted;
562 }
563 
564 /**
565  *	__ata_port_freeze - freeze port
566  *	@ap: ATA port to freeze
567  *
568  *	This function is called when HSM violation or some other
569  *	condition disrupts normal operation of the port.  Frozen port
570  *	is not allowed to perform any operation until the port is
571  *	thawed, which usually follows a successful reset.
572  *
573  *	ap->ops->freeze() callback can be used for freezing the port
574  *	hardware-wise (e.g. mask interrupt and stop DMA engine).  If a
575  *	port cannot be frozen hardware-wise, the interrupt handler
576  *	must ack and clear interrupts unconditionally while the port
577  *	is frozen.
578  *
579  *	LOCKING:
580  *	spin_lock_irqsave(host lock)
581  */
582 static void __ata_port_freeze(struct ata_port *ap)
583 {
584 	WARN_ON(!ap->ops->error_handler);
585 
586 	if (ap->ops->freeze)
587 		ap->ops->freeze(ap);
588 
589 	ap->pflags |= ATA_PFLAG_FROZEN;
590 
591 	DPRINTK("ata%u port frozen\n", ap->print_id);
592 }
593 
594 /**
595  *	ata_port_freeze - abort & freeze port
596  *	@ap: ATA port to freeze
597  *
598  *	Abort and freeze @ap.
599  *
600  *	LOCKING:
601  *	spin_lock_irqsave(host lock)
602  *
603  *	RETURNS:
604  *	Number of aborted commands.
605  */
606 int ata_port_freeze(struct ata_port *ap)
607 {
608 	int nr_aborted;
609 
610 	WARN_ON(!ap->ops->error_handler);
611 
612 	nr_aborted = ata_port_abort(ap);
613 	__ata_port_freeze(ap);
614 
615 	return nr_aborted;
616 }
617 
618 /**
619  *	ata_eh_freeze_port - EH helper to freeze port
620  *	@ap: ATA port to freeze
621  *
622  *	Freeze @ap.
623  *
624  *	LOCKING:
625  *	None.
626  */
627 void ata_eh_freeze_port(struct ata_port *ap)
628 {
629 	unsigned long flags;
630 
631 	if (!ap->ops->error_handler)
632 		return;
633 
634 	spin_lock_irqsave(ap->lock, flags);
635 	__ata_port_freeze(ap);
636 	spin_unlock_irqrestore(ap->lock, flags);
637 }
638 
639 /**
640  *	ata_port_thaw_port - EH helper to thaw port
641  *	@ap: ATA port to thaw
642  *
643  *	Thaw frozen port @ap.
644  *
645  *	LOCKING:
646  *	None.
647  */
648 void ata_eh_thaw_port(struct ata_port *ap)
649 {
650 	unsigned long flags;
651 
652 	if (!ap->ops->error_handler)
653 		return;
654 
655 	spin_lock_irqsave(ap->lock, flags);
656 
657 	ap->pflags &= ~ATA_PFLAG_FROZEN;
658 
659 	if (ap->ops->thaw)
660 		ap->ops->thaw(ap);
661 
662 	spin_unlock_irqrestore(ap->lock, flags);
663 
664 	DPRINTK("ata%u port thawed\n", ap->print_id);
665 }
666 
667 static void ata_eh_scsidone(struct scsi_cmnd *scmd)
668 {
669 	/* nada */
670 }
671 
672 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
673 {
674 	struct ata_port *ap = qc->ap;
675 	struct scsi_cmnd *scmd = qc->scsicmd;
676 	unsigned long flags;
677 
678 	spin_lock_irqsave(ap->lock, flags);
679 	qc->scsidone = ata_eh_scsidone;
680 	__ata_qc_complete(qc);
681 	WARN_ON(ata_tag_valid(qc->tag));
682 	spin_unlock_irqrestore(ap->lock, flags);
683 
684 	scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
685 }
686 
687 /**
688  *	ata_eh_qc_complete - Complete an active ATA command from EH
689  *	@qc: Command to complete
690  *
691  *	Indicate to the mid and upper layers that an ATA command has
692  *	completed.  To be used from EH.
693  */
694 void ata_eh_qc_complete(struct ata_queued_cmd *qc)
695 {
696 	struct scsi_cmnd *scmd = qc->scsicmd;
697 	scmd->retries = scmd->allowed;
698 	__ata_eh_qc_complete(qc);
699 }
700 
701 /**
702  *	ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
703  *	@qc: Command to retry
704  *
705  *	Indicate to the mid and upper layers that an ATA command
706  *	should be retried.  To be used from EH.
707  *
708  *	SCSI midlayer limits the number of retries to scmd->allowed.
709  *	scmd->retries is decremented for commands which get retried
710  *	due to unrelated failures (qc->err_mask is zero).
711  */
712 void ata_eh_qc_retry(struct ata_queued_cmd *qc)
713 {
714 	struct scsi_cmnd *scmd = qc->scsicmd;
715 	if (!qc->err_mask && scmd->retries)
716 		scmd->retries--;
717 	__ata_eh_qc_complete(qc);
718 }
719 
720 /**
721  *	ata_eh_detach_dev - detach ATA device
722  *	@dev: ATA device to detach
723  *
724  *	Detach @dev.
725  *
726  *	LOCKING:
727  *	None.
728  */
729 static void ata_eh_detach_dev(struct ata_device *dev)
730 {
731 	struct ata_port *ap = dev->ap;
732 	unsigned long flags;
733 
734 	ata_dev_disable(dev);
735 
736 	spin_lock_irqsave(ap->lock, flags);
737 
738 	dev->flags &= ~ATA_DFLAG_DETACH;
739 
740 	if (ata_scsi_offline_dev(dev)) {
741 		dev->flags |= ATA_DFLAG_DETACHED;
742 		ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
743 	}
744 
745 	/* clear per-dev EH actions */
746 	ata_eh_clear_action(dev, &ap->eh_info, ATA_EH_PERDEV_MASK);
747 	ata_eh_clear_action(dev, &ap->eh_context.i, ATA_EH_PERDEV_MASK);
748 
749 	spin_unlock_irqrestore(ap->lock, flags);
750 }
751 
752 /**
753  *	ata_eh_about_to_do - about to perform eh_action
754  *	@ap: target ATA port
755  *	@dev: target ATA dev for per-dev action (can be NULL)
756  *	@action: action about to be performed
757  *
758  *	Called just before performing EH actions to clear related bits
759  *	in @ap->eh_info such that eh actions are not unnecessarily
760  *	repeated.
761  *
762  *	LOCKING:
763  *	None.
764  */
765 static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev,
766 			       unsigned int action)
767 {
768 	unsigned long flags;
769 	struct ata_eh_info *ehi = &ap->eh_info;
770 	struct ata_eh_context *ehc = &ap->eh_context;
771 
772 	spin_lock_irqsave(ap->lock, flags);
773 
774 	/* Reset is represented by combination of actions and EHI
775 	 * flags.  Suck in all related bits before clearing eh_info to
776 	 * avoid losing requested action.
777 	 */
778 	if (action & ATA_EH_RESET_MASK) {
779 		ehc->i.action |= ehi->action & ATA_EH_RESET_MASK;
780 		ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK;
781 
782 		/* make sure all reset actions are cleared & clear EHI flags */
783 		action |= ATA_EH_RESET_MASK;
784 		ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK;
785 	}
786 
787 	ata_eh_clear_action(dev, ehi, action);
788 
789 	if (!(ehc->i.flags & ATA_EHI_QUIET))
790 		ap->pflags |= ATA_PFLAG_RECOVERED;
791 
792 	spin_unlock_irqrestore(ap->lock, flags);
793 }
794 
795 /**
796  *	ata_eh_done - EH action complete
797  *	@ap: target ATA port
798  *	@dev: target ATA dev for per-dev action (can be NULL)
799  *	@action: action just completed
800  *
801  *	Called right after performing EH actions to clear related bits
802  *	in @ap->eh_context.
803  *
804  *	LOCKING:
805  *	None.
806  */
807 static void ata_eh_done(struct ata_port *ap, struct ata_device *dev,
808 			unsigned int action)
809 {
810 	/* if reset is complete, clear all reset actions & reset modifier */
811 	if (action & ATA_EH_RESET_MASK) {
812 		action |= ATA_EH_RESET_MASK;
813 		ap->eh_context.i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK;
814 	}
815 
816 	ata_eh_clear_action(dev, &ap->eh_context.i, action);
817 }
818 
819 /**
820  *	ata_err_string - convert err_mask to descriptive string
821  *	@err_mask: error mask to convert to string
822  *
823  *	Convert @err_mask to descriptive string.  Errors are
824  *	prioritized according to severity and only the most severe
825  *	error is reported.
826  *
827  *	LOCKING:
828  *	None.
829  *
830  *	RETURNS:
831  *	Descriptive string for @err_mask
832  */
833 static const char * ata_err_string(unsigned int err_mask)
834 {
835 	if (err_mask & AC_ERR_HOST_BUS)
836 		return "host bus error";
837 	if (err_mask & AC_ERR_ATA_BUS)
838 		return "ATA bus error";
839 	if (err_mask & AC_ERR_TIMEOUT)
840 		return "timeout";
841 	if (err_mask & AC_ERR_HSM)
842 		return "HSM violation";
843 	if (err_mask & AC_ERR_SYSTEM)
844 		return "internal error";
845 	if (err_mask & AC_ERR_MEDIA)
846 		return "media error";
847 	if (err_mask & AC_ERR_INVALID)
848 		return "invalid argument";
849 	if (err_mask & AC_ERR_DEV)
850 		return "device error";
851 	return "unknown error";
852 }
853 
854 /**
855  *	ata_read_log_page - read a specific log page
856  *	@dev: target device
857  *	@page: page to read
858  *	@buf: buffer to store read page
859  *	@sectors: number of sectors to read
860  *
861  *	Read log page using READ_LOG_EXT command.
862  *
863  *	LOCKING:
864  *	Kernel thread context (may sleep).
865  *
866  *	RETURNS:
867  *	0 on success, AC_ERR_* mask otherwise.
868  */
869 static unsigned int ata_read_log_page(struct ata_device *dev,
870 				      u8 page, void *buf, unsigned int sectors)
871 {
872 	struct ata_taskfile tf;
873 	unsigned int err_mask;
874 
875 	DPRINTK("read log page - page %d\n", page);
876 
877 	ata_tf_init(dev, &tf);
878 	tf.command = ATA_CMD_READ_LOG_EXT;
879 	tf.lbal = page;
880 	tf.nsect = sectors;
881 	tf.hob_nsect = sectors >> 8;
882 	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE;
883 	tf.protocol = ATA_PROT_PIO;
884 
885 	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
886 				     buf, sectors * ATA_SECT_SIZE);
887 
888 	DPRINTK("EXIT, err_mask=%x\n", err_mask);
889 	return err_mask;
890 }
891 
892 /**
893  *	ata_eh_read_log_10h - Read log page 10h for NCQ error details
894  *	@dev: Device to read log page 10h from
895  *	@tag: Resulting tag of the failed command
896  *	@tf: Resulting taskfile registers of the failed command
897  *
898  *	Read log page 10h to obtain NCQ error details and clear error
899  *	condition.
900  *
901  *	LOCKING:
902  *	Kernel thread context (may sleep).
903  *
904  *	RETURNS:
905  *	0 on success, -errno otherwise.
906  */
907 static int ata_eh_read_log_10h(struct ata_device *dev,
908 			       int *tag, struct ata_taskfile *tf)
909 {
910 	u8 *buf = dev->ap->sector_buf;
911 	unsigned int err_mask;
912 	u8 csum;
913 	int i;
914 
915 	err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1);
916 	if (err_mask)
917 		return -EIO;
918 
919 	csum = 0;
920 	for (i = 0; i < ATA_SECT_SIZE; i++)
921 		csum += buf[i];
922 	if (csum)
923 		ata_dev_printk(dev, KERN_WARNING,
924 			       "invalid checksum 0x%x on log page 10h\n", csum);
925 
926 	if (buf[0] & 0x80)
927 		return -ENOENT;
928 
929 	*tag = buf[0] & 0x1f;
930 
931 	tf->command = buf[2];
932 	tf->feature = buf[3];
933 	tf->lbal = buf[4];
934 	tf->lbam = buf[5];
935 	tf->lbah = buf[6];
936 	tf->device = buf[7];
937 	tf->hob_lbal = buf[8];
938 	tf->hob_lbam = buf[9];
939 	tf->hob_lbah = buf[10];
940 	tf->nsect = buf[12];
941 	tf->hob_nsect = buf[13];
942 
943 	return 0;
944 }
945 
946 /**
947  *	atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
948  *	@dev: device to perform REQUEST_SENSE to
949  *	@sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
950  *
951  *	Perform ATAPI REQUEST_SENSE after the device reported CHECK
952  *	SENSE.  This function is EH helper.
953  *
954  *	LOCKING:
955  *	Kernel thread context (may sleep).
956  *
957  *	RETURNS:
958  *	0 on success, AC_ERR_* mask on failure
959  */
960 static unsigned int atapi_eh_request_sense(struct ata_device *dev,
961 					   unsigned char *sense_buf)
962 {
963 	struct ata_port *ap = dev->ap;
964 	struct ata_taskfile tf;
965 	u8 cdb[ATAPI_CDB_LEN];
966 
967 	DPRINTK("ATAPI request sense\n");
968 
969 	ata_tf_init(dev, &tf);
970 
971 	/* FIXME: is this needed? */
972 	memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
973 
974 	/* XXX: why tf_read here? */
975 	ap->ops->tf_read(ap, &tf);
976 
977 	/* fill these in, for the case where they are -not- overwritten */
978 	sense_buf[0] = 0x70;
979 	sense_buf[2] = tf.feature >> 4;
980 
981 	memset(cdb, 0, ATAPI_CDB_LEN);
982 	cdb[0] = REQUEST_SENSE;
983 	cdb[4] = SCSI_SENSE_BUFFERSIZE;
984 
985 	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
986 	tf.command = ATA_CMD_PACKET;
987 
988 	/* is it pointless to prefer PIO for "safety reasons"? */
989 	if (ap->flags & ATA_FLAG_PIO_DMA) {
990 		tf.protocol = ATA_PROT_ATAPI_DMA;
991 		tf.feature |= ATAPI_PKT_DMA;
992 	} else {
993 		tf.protocol = ATA_PROT_ATAPI;
994 		tf.lbam = (8 * 1024) & 0xff;
995 		tf.lbah = (8 * 1024) >> 8;
996 	}
997 
998 	return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
999 				 sense_buf, SCSI_SENSE_BUFFERSIZE);
1000 }
1001 
1002 /**
1003  *	ata_eh_analyze_serror - analyze SError for a failed port
1004  *	@ap: ATA port to analyze SError for
1005  *
1006  *	Analyze SError if available and further determine cause of
1007  *	failure.
1008  *
1009  *	LOCKING:
1010  *	None.
1011  */
1012 static void ata_eh_analyze_serror(struct ata_port *ap)
1013 {
1014 	struct ata_eh_context *ehc = &ap->eh_context;
1015 	u32 serror = ehc->i.serror;
1016 	unsigned int err_mask = 0, action = 0;
1017 
1018 	if (serror & SERR_PERSISTENT) {
1019 		err_mask |= AC_ERR_ATA_BUS;
1020 		action |= ATA_EH_HARDRESET;
1021 	}
1022 	if (serror &
1023 	    (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) {
1024 		err_mask |= AC_ERR_ATA_BUS;
1025 		action |= ATA_EH_SOFTRESET;
1026 	}
1027 	if (serror & SERR_PROTOCOL) {
1028 		err_mask |= AC_ERR_HSM;
1029 		action |= ATA_EH_SOFTRESET;
1030 	}
1031 	if (serror & SERR_INTERNAL) {
1032 		err_mask |= AC_ERR_SYSTEM;
1033 		action |= ATA_EH_SOFTRESET;
1034 	}
1035 	if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG))
1036 		ata_ehi_hotplugged(&ehc->i);
1037 
1038 	ehc->i.err_mask |= err_mask;
1039 	ehc->i.action |= action;
1040 }
1041 
1042 /**
1043  *	ata_eh_analyze_ncq_error - analyze NCQ error
1044  *	@ap: ATA port to analyze NCQ error for
1045  *
1046  *	Read log page 10h, determine the offending qc and acquire
1047  *	error status TF.  For NCQ device errors, all LLDDs have to do
1048  *	is setting AC_ERR_DEV in ehi->err_mask.  This function takes
1049  *	care of the rest.
1050  *
1051  *	LOCKING:
1052  *	Kernel thread context (may sleep).
1053  */
1054 static void ata_eh_analyze_ncq_error(struct ata_port *ap)
1055 {
1056 	struct ata_eh_context *ehc = &ap->eh_context;
1057 	struct ata_device *dev = ap->device;
1058 	struct ata_queued_cmd *qc;
1059 	struct ata_taskfile tf;
1060 	int tag, rc;
1061 
1062 	/* if frozen, we can't do much */
1063 	if (ap->pflags & ATA_PFLAG_FROZEN)
1064 		return;
1065 
1066 	/* is it NCQ device error? */
1067 	if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
1068 		return;
1069 
1070 	/* has LLDD analyzed already? */
1071 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1072 		qc = __ata_qc_from_tag(ap, tag);
1073 
1074 		if (!(qc->flags & ATA_QCFLAG_FAILED))
1075 			continue;
1076 
1077 		if (qc->err_mask)
1078 			return;
1079 	}
1080 
1081 	/* okay, this error is ours */
1082 	rc = ata_eh_read_log_10h(dev, &tag, &tf);
1083 	if (rc) {
1084 		ata_port_printk(ap, KERN_ERR, "failed to read log page 10h "
1085 				"(errno=%d)\n", rc);
1086 		return;
1087 	}
1088 
1089 	if (!(ap->sactive & (1 << tag))) {
1090 		ata_port_printk(ap, KERN_ERR, "log page 10h reported "
1091 				"inactive tag %d\n", tag);
1092 		return;
1093 	}
1094 
1095 	/* we've got the perpetrator, condemn it */
1096 	qc = __ata_qc_from_tag(ap, tag);
1097 	memcpy(&qc->result_tf, &tf, sizeof(tf));
1098 	qc->err_mask |= AC_ERR_DEV;
1099 	ehc->i.err_mask &= ~AC_ERR_DEV;
1100 }
1101 
1102 /**
1103  *	ata_eh_analyze_tf - analyze taskfile of a failed qc
1104  *	@qc: qc to analyze
1105  *	@tf: Taskfile registers to analyze
1106  *
1107  *	Analyze taskfile of @qc and further determine cause of
1108  *	failure.  This function also requests ATAPI sense data if
1109  *	avaliable.
1110  *
1111  *	LOCKING:
1112  *	Kernel thread context (may sleep).
1113  *
1114  *	RETURNS:
1115  *	Determined recovery action
1116  */
1117 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1118 				      const struct ata_taskfile *tf)
1119 {
1120 	unsigned int tmp, action = 0;
1121 	u8 stat = tf->command, err = tf->feature;
1122 
1123 	if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
1124 		qc->err_mask |= AC_ERR_HSM;
1125 		return ATA_EH_SOFTRESET;
1126 	}
1127 
1128 	if (!(qc->err_mask & AC_ERR_DEV))
1129 		return 0;
1130 
1131 	switch (qc->dev->class) {
1132 	case ATA_DEV_ATA:
1133 		if (err & ATA_ICRC)
1134 			qc->err_mask |= AC_ERR_ATA_BUS;
1135 		if (err & ATA_UNC)
1136 			qc->err_mask |= AC_ERR_MEDIA;
1137 		if (err & ATA_IDNF)
1138 			qc->err_mask |= AC_ERR_INVALID;
1139 		break;
1140 
1141 	case ATA_DEV_ATAPI:
1142 		if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
1143 			tmp = atapi_eh_request_sense(qc->dev,
1144 						     qc->scsicmd->sense_buffer);
1145 			if (!tmp) {
1146 				/* ATA_QCFLAG_SENSE_VALID is used to
1147 				 * tell atapi_qc_complete() that sense
1148 				 * data is already valid.
1149 				 *
1150 				 * TODO: interpret sense data and set
1151 				 * appropriate err_mask.
1152 				 */
1153 				qc->flags |= ATA_QCFLAG_SENSE_VALID;
1154 			} else
1155 				qc->err_mask |= tmp;
1156 		}
1157 	}
1158 
1159 	if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
1160 		action |= ATA_EH_SOFTRESET;
1161 
1162 	return action;
1163 }
1164 
1165 static int ata_eh_categorize_error(int is_io, unsigned int err_mask)
1166 {
1167 	if (err_mask & AC_ERR_ATA_BUS)
1168 		return 1;
1169 
1170 	if (err_mask & AC_ERR_TIMEOUT)
1171 		return 2;
1172 
1173 	if (is_io) {
1174 		if (err_mask & AC_ERR_HSM)
1175 			return 2;
1176 		if ((err_mask &
1177 		     (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1178 			return 3;
1179 	}
1180 
1181 	return 0;
1182 }
1183 
1184 struct speed_down_verdict_arg {
1185 	u64 since;
1186 	int nr_errors[4];
1187 };
1188 
1189 static int speed_down_verdict_cb(struct ata_ering_entry *ent, void *void_arg)
1190 {
1191 	struct speed_down_verdict_arg *arg = void_arg;
1192 	int cat = ata_eh_categorize_error(ent->is_io, ent->err_mask);
1193 
1194 	if (ent->timestamp < arg->since)
1195 		return -1;
1196 
1197 	arg->nr_errors[cat]++;
1198 	return 0;
1199 }
1200 
1201 /**
1202  *	ata_eh_speed_down_verdict - Determine speed down verdict
1203  *	@dev: Device of interest
1204  *
1205  *	This function examines error ring of @dev and determines
1206  *	whether NCQ needs to be turned off, transfer speed should be
1207  *	stepped down, or falling back to PIO is necessary.
1208  *
1209  *	Cat-1 is ATA_BUS error for any command.
1210  *
1211  *	Cat-2 is TIMEOUT for any command or HSM violation for known
1212  *	supported commands.
1213  *
1214  *	Cat-3 is is unclassified DEV error for known supported
1215  *	command.
1216  *
1217  *	NCQ needs to be turned off if there have been more than 3
1218  *	Cat-2 + Cat-3 errors during last 10 minutes.
1219  *
1220  *	Speed down is necessary if there have been more than 3 Cat-1 +
1221  *	Cat-2 errors or 10 Cat-3 errors during last 10 minutes.
1222  *
1223  *	Falling back to PIO mode is necessary if there have been more
1224  *	than 10 Cat-1 + Cat-2 + Cat-3 errors during last 5 minutes.
1225  *
1226  *	LOCKING:
1227  *	Inherited from caller.
1228  *
1229  *	RETURNS:
1230  *	OR of ATA_EH_SPDN_* flags.
1231  */
1232 static unsigned int ata_eh_speed_down_verdict(struct ata_device *dev)
1233 {
1234 	const u64 j5mins = 5LLU * 60 * HZ, j10mins = 10LLU * 60 * HZ;
1235 	u64 j64 = get_jiffies_64();
1236 	struct speed_down_verdict_arg arg;
1237 	unsigned int verdict = 0;
1238 
1239 	/* scan past 10 mins of error history */
1240 	memset(&arg, 0, sizeof(arg));
1241 	arg.since = j64 - min(j64, j10mins);
1242 	ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1243 
1244 	if (arg.nr_errors[2] + arg.nr_errors[3] > 3)
1245 		verdict |= ATA_EH_SPDN_NCQ_OFF;
1246 	if (arg.nr_errors[1] + arg.nr_errors[2] > 3 || arg.nr_errors[3] > 10)
1247 		verdict |= ATA_EH_SPDN_SPEED_DOWN;
1248 
1249 	/* scan past 3 mins of error history */
1250 	memset(&arg, 0, sizeof(arg));
1251 	arg.since = j64 - min(j64, j5mins);
1252 	ata_ering_map(&dev->ering, speed_down_verdict_cb, &arg);
1253 
1254 	if (arg.nr_errors[1] + arg.nr_errors[2] + arg.nr_errors[3] > 10)
1255 		verdict |= ATA_EH_SPDN_FALLBACK_TO_PIO;
1256 
1257 	return verdict;
1258 }
1259 
1260 /**
1261  *	ata_eh_speed_down - record error and speed down if necessary
1262  *	@dev: Failed device
1263  *	@is_io: Did the device fail during normal IO?
1264  *	@err_mask: err_mask of the error
1265  *
1266  *	Record error and examine error history to determine whether
1267  *	adjusting transmission speed is necessary.  It also sets
1268  *	transmission limits appropriately if such adjustment is
1269  *	necessary.
1270  *
1271  *	LOCKING:
1272  *	Kernel thread context (may sleep).
1273  *
1274  *	RETURNS:
1275  *	Determined recovery action.
1276  */
1277 static unsigned int ata_eh_speed_down(struct ata_device *dev, int is_io,
1278 				      unsigned int err_mask)
1279 {
1280 	unsigned int verdict;
1281 	unsigned int action = 0;
1282 
1283 	/* don't bother if Cat-0 error */
1284 	if (ata_eh_categorize_error(is_io, err_mask) == 0)
1285 		return 0;
1286 
1287 	/* record error and determine whether speed down is necessary */
1288 	ata_ering_record(&dev->ering, is_io, err_mask);
1289 	verdict = ata_eh_speed_down_verdict(dev);
1290 
1291 	/* turn off NCQ? */
1292 	if ((verdict & ATA_EH_SPDN_NCQ_OFF) &&
1293 	    (dev->flags & (ATA_DFLAG_PIO | ATA_DFLAG_NCQ |
1294 			   ATA_DFLAG_NCQ_OFF)) == ATA_DFLAG_NCQ) {
1295 		dev->flags |= ATA_DFLAG_NCQ_OFF;
1296 		ata_dev_printk(dev, KERN_WARNING,
1297 			       "NCQ disabled due to excessive errors\n");
1298 		goto done;
1299 	}
1300 
1301 	/* speed down? */
1302 	if (verdict & ATA_EH_SPDN_SPEED_DOWN) {
1303 		/* speed down SATA link speed if possible */
1304 		if (sata_down_spd_limit(dev->ap) == 0) {
1305 			action |= ATA_EH_HARDRESET;
1306 			goto done;
1307 		}
1308 
1309 		/* lower transfer mode */
1310 		if (dev->spdn_cnt < 2) {
1311 			static const int dma_dnxfer_sel[] =
1312 				{ ATA_DNXFER_DMA, ATA_DNXFER_40C };
1313 			static const int pio_dnxfer_sel[] =
1314 				{ ATA_DNXFER_PIO, ATA_DNXFER_FORCE_PIO0 };
1315 			int sel;
1316 
1317 			if (dev->xfer_shift != ATA_SHIFT_PIO)
1318 				sel = dma_dnxfer_sel[dev->spdn_cnt];
1319 			else
1320 				sel = pio_dnxfer_sel[dev->spdn_cnt];
1321 
1322 			dev->spdn_cnt++;
1323 
1324 			if (ata_down_xfermask_limit(dev, sel) == 0) {
1325 				action |= ATA_EH_SOFTRESET;
1326 				goto done;
1327 			}
1328 		}
1329 	}
1330 
1331 	/* Fall back to PIO?  Slowing down to PIO is meaningless for
1332 	 * SATA.  Consider it only for PATA.
1333 	 */
1334 	if ((verdict & ATA_EH_SPDN_FALLBACK_TO_PIO) && (dev->spdn_cnt >= 2) &&
1335 	    (dev->ap->cbl != ATA_CBL_SATA) &&
1336 	    (dev->xfer_shift != ATA_SHIFT_PIO)) {
1337 		if (ata_down_xfermask_limit(dev, ATA_DNXFER_FORCE_PIO) == 0) {
1338 			dev->spdn_cnt = 0;
1339 			action |= ATA_EH_SOFTRESET;
1340 			goto done;
1341 		}
1342 	}
1343 
1344 	return 0;
1345  done:
1346 	/* device has been slowed down, blow error history */
1347 	ata_ering_clear(&dev->ering);
1348 	return action;
1349 }
1350 
1351 /**
1352  *	ata_eh_autopsy - analyze error and determine recovery action
1353  *	@ap: ATA port to perform autopsy on
1354  *
1355  *	Analyze why @ap failed and determine which recovery action is
1356  *	needed.  This function also sets more detailed AC_ERR_* values
1357  *	and fills sense data for ATAPI CHECK SENSE.
1358  *
1359  *	LOCKING:
1360  *	Kernel thread context (may sleep).
1361  */
1362 static void ata_eh_autopsy(struct ata_port *ap)
1363 {
1364 	struct ata_eh_context *ehc = &ap->eh_context;
1365 	unsigned int all_err_mask = 0;
1366 	int tag, is_io = 0;
1367 	u32 serror;
1368 	int rc;
1369 
1370 	DPRINTK("ENTER\n");
1371 
1372 	if (ehc->i.flags & ATA_EHI_NO_AUTOPSY)
1373 		return;
1374 
1375 	/* obtain and analyze SError */
1376 	rc = sata_scr_read(ap, SCR_ERROR, &serror);
1377 	if (rc == 0) {
1378 		ehc->i.serror |= serror;
1379 		ata_eh_analyze_serror(ap);
1380 	} else if (rc != -EOPNOTSUPP)
1381 		ehc->i.action |= ATA_EH_HARDRESET;
1382 
1383 	/* analyze NCQ failure */
1384 	ata_eh_analyze_ncq_error(ap);
1385 
1386 	/* any real error trumps AC_ERR_OTHER */
1387 	if (ehc->i.err_mask & ~AC_ERR_OTHER)
1388 		ehc->i.err_mask &= ~AC_ERR_OTHER;
1389 
1390 	all_err_mask |= ehc->i.err_mask;
1391 
1392 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1393 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
1394 
1395 		if (!(qc->flags & ATA_QCFLAG_FAILED))
1396 			continue;
1397 
1398 		/* inherit upper level err_mask */
1399 		qc->err_mask |= ehc->i.err_mask;
1400 
1401 		/* analyze TF */
1402 		ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf);
1403 
1404 		/* DEV errors are probably spurious in case of ATA_BUS error */
1405 		if (qc->err_mask & AC_ERR_ATA_BUS)
1406 			qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
1407 					  AC_ERR_INVALID);
1408 
1409 		/* any real error trumps unknown error */
1410 		if (qc->err_mask & ~AC_ERR_OTHER)
1411 			qc->err_mask &= ~AC_ERR_OTHER;
1412 
1413 		/* SENSE_VALID trumps dev/unknown error and revalidation */
1414 		if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
1415 			qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
1416 			ehc->i.action &= ~ATA_EH_REVALIDATE;
1417 		}
1418 
1419 		/* accumulate error info */
1420 		ehc->i.dev = qc->dev;
1421 		all_err_mask |= qc->err_mask;
1422 		if (qc->flags & ATA_QCFLAG_IO)
1423 			is_io = 1;
1424 	}
1425 
1426 	/* enforce default EH actions */
1427 	if (ap->pflags & ATA_PFLAG_FROZEN ||
1428 	    all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
1429 		ehc->i.action |= ATA_EH_SOFTRESET;
1430 	else if (all_err_mask)
1431 		ehc->i.action |= ATA_EH_REVALIDATE;
1432 
1433 	/* if we have offending qcs and the associated failed device */
1434 	if (ehc->i.dev) {
1435 		/* speed down */
1436 		ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io,
1437 						   all_err_mask);
1438 
1439 		/* perform per-dev EH action only on the offending device */
1440 		ehc->i.dev_action[ehc->i.dev->devno] |=
1441 			ehc->i.action & ATA_EH_PERDEV_MASK;
1442 		ehc->i.action &= ~ATA_EH_PERDEV_MASK;
1443 	}
1444 
1445 	DPRINTK("EXIT\n");
1446 }
1447 
1448 /**
1449  *	ata_eh_report - report error handling to user
1450  *	@ap: ATA port EH is going on
1451  *
1452  *	Report EH to user.
1453  *
1454  *	LOCKING:
1455  *	None.
1456  */
1457 static void ata_eh_report(struct ata_port *ap)
1458 {
1459 	struct ata_eh_context *ehc = &ap->eh_context;
1460 	const char *frozen, *desc;
1461 	int tag, nr_failed = 0;
1462 
1463 	desc = NULL;
1464 	if (ehc->i.desc[0] != '\0')
1465 		desc = ehc->i.desc;
1466 
1467 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1468 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
1469 
1470 		if (!(qc->flags & ATA_QCFLAG_FAILED))
1471 			continue;
1472 		if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
1473 			continue;
1474 
1475 		nr_failed++;
1476 	}
1477 
1478 	if (!nr_failed && !ehc->i.err_mask)
1479 		return;
1480 
1481 	frozen = "";
1482 	if (ap->pflags & ATA_PFLAG_FROZEN)
1483 		frozen = " frozen";
1484 
1485 	if (ehc->i.dev) {
1486 		ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x "
1487 			       "SAct 0x%x SErr 0x%x action 0x%x%s\n",
1488 			       ehc->i.err_mask, ap->sactive, ehc->i.serror,
1489 			       ehc->i.action, frozen);
1490 		if (desc)
1491 			ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc);
1492 	} else {
1493 		ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x "
1494 				"SAct 0x%x SErr 0x%x action 0x%x%s\n",
1495 				ehc->i.err_mask, ap->sactive, ehc->i.serror,
1496 				ehc->i.action, frozen);
1497 		if (desc)
1498 			ata_port_printk(ap, KERN_ERR, "(%s)\n", desc);
1499 	}
1500 
1501 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1502 		static const char *dma_str[] = {
1503 			[DMA_BIDIRECTIONAL]	= "bidi",
1504 			[DMA_TO_DEVICE]		= "out",
1505 			[DMA_FROM_DEVICE]	= "in",
1506 			[DMA_NONE]		= "",
1507 		};
1508 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
1509 		struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf;
1510 
1511 		if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask)
1512 			continue;
1513 
1514 		ata_dev_printk(qc->dev, KERN_ERR,
1515 			"cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
1516 			"tag %d cdb 0x%x data %u %s\n         "
1517 			"res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
1518 			"Emask 0x%x (%s)\n",
1519 			cmd->command, cmd->feature, cmd->nsect,
1520 			cmd->lbal, cmd->lbam, cmd->lbah,
1521 			cmd->hob_feature, cmd->hob_nsect,
1522 			cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah,
1523 			cmd->device, qc->tag, qc->cdb[0], qc->nbytes,
1524 			dma_str[qc->dma_dir],
1525 			res->command, res->feature, res->nsect,
1526 			res->lbal, res->lbam, res->lbah,
1527 			res->hob_feature, res->hob_nsect,
1528 			res->hob_lbal, res->hob_lbam, res->hob_lbah,
1529 			res->device, qc->err_mask, ata_err_string(qc->err_mask));
1530 	}
1531 }
1532 
1533 static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset,
1534 			unsigned int *classes)
1535 {
1536 	int i, rc;
1537 
1538 	for (i = 0; i < ATA_MAX_DEVICES; i++)
1539 		classes[i] = ATA_DEV_UNKNOWN;
1540 
1541 	rc = reset(ap, classes);
1542 	if (rc)
1543 		return rc;
1544 
1545 	/* If any class isn't ATA_DEV_UNKNOWN, consider classification
1546 	 * is complete and convert all ATA_DEV_UNKNOWN to
1547 	 * ATA_DEV_NONE.
1548 	 */
1549 	for (i = 0; i < ATA_MAX_DEVICES; i++)
1550 		if (classes[i] != ATA_DEV_UNKNOWN)
1551 			break;
1552 
1553 	if (i < ATA_MAX_DEVICES)
1554 		for (i = 0; i < ATA_MAX_DEVICES; i++)
1555 			if (classes[i] == ATA_DEV_UNKNOWN)
1556 				classes[i] = ATA_DEV_NONE;
1557 
1558 	return 0;
1559 }
1560 
1561 static int ata_eh_followup_srst_needed(int rc, int classify,
1562 				       const unsigned int *classes)
1563 {
1564 	if (rc == -EAGAIN)
1565 		return 1;
1566 	if (rc != 0)
1567 		return 0;
1568 	if (classify && classes[0] == ATA_DEV_UNKNOWN)
1569 		return 1;
1570 	return 0;
1571 }
1572 
1573 static int ata_eh_reset(struct ata_port *ap, int classify,
1574 			ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
1575 			ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
1576 {
1577 	struct ata_eh_context *ehc = &ap->eh_context;
1578 	unsigned int *classes = ehc->classes;
1579 	int tries = ATA_EH_RESET_TRIES;
1580 	int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
1581 	unsigned int action;
1582 	ata_reset_fn_t reset;
1583 	int i, did_followup_srst, rc;
1584 
1585 	/* about to reset */
1586 	ata_eh_about_to_do(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK);
1587 
1588 	/* Determine which reset to use and record in ehc->i.action.
1589 	 * prereset() may examine and modify it.
1590 	 */
1591 	action = ehc->i.action;
1592 	ehc->i.action &= ~ATA_EH_RESET_MASK;
1593 	if (softreset && (!hardreset || (!sata_set_spd_needed(ap) &&
1594 					 !(action & ATA_EH_HARDRESET))))
1595 		ehc->i.action |= ATA_EH_SOFTRESET;
1596 	else
1597 		ehc->i.action |= ATA_EH_HARDRESET;
1598 
1599 	if (prereset) {
1600 		rc = prereset(ap);
1601 		if (rc) {
1602 			if (rc == -ENOENT) {
1603 				ata_port_printk(ap, KERN_DEBUG, "port disabled. ignoring.\n");
1604 				ap->eh_context.i.action &= ~ATA_EH_RESET_MASK;
1605 			} else
1606 				ata_port_printk(ap, KERN_ERR,
1607 					"prereset failed (errno=%d)\n", rc);
1608 			return rc;
1609 		}
1610 	}
1611 
1612 	/* prereset() might have modified ehc->i.action */
1613 	if (ehc->i.action & ATA_EH_HARDRESET)
1614 		reset = hardreset;
1615 	else if (ehc->i.action & ATA_EH_SOFTRESET)
1616 		reset = softreset;
1617 	else {
1618 		/* prereset told us not to reset, bang classes and return */
1619 		for (i = 0; i < ATA_MAX_DEVICES; i++)
1620 			classes[i] = ATA_DEV_NONE;
1621 		return 0;
1622 	}
1623 
1624 	/* did prereset() screw up?  if so, fix up to avoid oopsing */
1625 	if (!reset) {
1626 		ata_port_printk(ap, KERN_ERR, "BUG: prereset() requested "
1627 				"invalid reset type\n");
1628 		if (softreset)
1629 			reset = softreset;
1630 		else
1631 			reset = hardreset;
1632 	}
1633 
1634  retry:
1635 	/* shut up during boot probing */
1636 	if (verbose)
1637 		ata_port_printk(ap, KERN_INFO, "%s resetting port\n",
1638 				reset == softreset ? "soft" : "hard");
1639 
1640 	/* mark that this EH session started with reset */
1641 	ehc->i.flags |= ATA_EHI_DID_RESET;
1642 
1643 	rc = ata_do_reset(ap, reset, classes);
1644 
1645 	did_followup_srst = 0;
1646 	if (reset == hardreset &&
1647 	    ata_eh_followup_srst_needed(rc, classify, classes)) {
1648 		/* okay, let's do follow-up softreset */
1649 		did_followup_srst = 1;
1650 		reset = softreset;
1651 
1652 		if (!reset) {
1653 			ata_port_printk(ap, KERN_ERR,
1654 					"follow-up softreset required "
1655 					"but no softreset avaliable\n");
1656 			return -EINVAL;
1657 		}
1658 
1659 		ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK);
1660 		rc = ata_do_reset(ap, reset, classes);
1661 
1662 		if (rc == 0 && classify &&
1663 		    classes[0] == ATA_DEV_UNKNOWN) {
1664 			ata_port_printk(ap, KERN_ERR,
1665 					"classification failed\n");
1666 			return -EINVAL;
1667 		}
1668 	}
1669 
1670 	if (rc && --tries) {
1671 		const char *type;
1672 
1673 		if (reset == softreset) {
1674 			if (did_followup_srst)
1675 				type = "follow-up soft";
1676 			else
1677 				type = "soft";
1678 		} else
1679 			type = "hard";
1680 
1681 		ata_port_printk(ap, KERN_WARNING,
1682 				"%sreset failed, retrying in 5 secs\n", type);
1683 		ssleep(5);
1684 
1685 		if (reset == hardreset)
1686 			sata_down_spd_limit(ap);
1687 		if (hardreset)
1688 			reset = hardreset;
1689 		goto retry;
1690 	}
1691 
1692 	if (rc == 0) {
1693 		/* After the reset, the device state is PIO 0 and the
1694 		 * controller state is undefined.  Record the mode.
1695 		 */
1696 		for (i = 0; i < ATA_MAX_DEVICES; i++)
1697 			ap->device[i].pio_mode = XFER_PIO_0;
1698 
1699 		if (postreset)
1700 			postreset(ap, classes);
1701 
1702 		/* reset successful, schedule revalidation */
1703 		ata_eh_done(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK);
1704 		ehc->i.action |= ATA_EH_REVALIDATE;
1705 	}
1706 
1707 	return rc;
1708 }
1709 
1710 static int ata_eh_revalidate_and_attach(struct ata_port *ap,
1711 					struct ata_device **r_failed_dev)
1712 {
1713 	struct ata_eh_context *ehc = &ap->eh_context;
1714 	struct ata_device *dev;
1715 	unsigned long flags;
1716 	int i, rc = 0;
1717 
1718 	DPRINTK("ENTER\n");
1719 
1720 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1721 		unsigned int action, readid_flags = 0;
1722 
1723 		dev = &ap->device[i];
1724 		action = ata_eh_dev_action(dev);
1725 
1726 		if (ehc->i.flags & ATA_EHI_DID_RESET)
1727 			readid_flags |= ATA_READID_POSTRESET;
1728 
1729 		if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) {
1730 			if (ata_port_offline(ap)) {
1731 				rc = -EIO;
1732 				break;
1733 			}
1734 
1735 			ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE);
1736 			rc = ata_dev_revalidate(dev, readid_flags);
1737 			if (rc)
1738 				break;
1739 
1740 			ata_eh_done(ap, dev, ATA_EH_REVALIDATE);
1741 
1742 			/* Configuration may have changed, reconfigure
1743 			 * transfer mode.
1744 			 */
1745 			ehc->i.flags |= ATA_EHI_SETMODE;
1746 
1747 			/* schedule the scsi_rescan_device() here */
1748 			queue_work(ata_aux_wq, &(ap->scsi_rescan_task));
1749 		} else if (dev->class == ATA_DEV_UNKNOWN &&
1750 			   ehc->tries[dev->devno] &&
1751 			   ata_class_enabled(ehc->classes[dev->devno])) {
1752 			dev->class = ehc->classes[dev->devno];
1753 
1754 			rc = ata_dev_read_id(dev, &dev->class, readid_flags,
1755 					     dev->id);
1756 			if (rc == 0) {
1757 				ehc->i.flags |= ATA_EHI_PRINTINFO;
1758 				rc = ata_dev_configure(dev);
1759 				ehc->i.flags &= ~ATA_EHI_PRINTINFO;
1760 			} else if (rc == -ENOENT) {
1761 				/* IDENTIFY was issued to non-existent
1762 				 * device.  No need to reset.  Just
1763 				 * thaw and kill the device.
1764 				 */
1765 				ata_eh_thaw_port(ap);
1766 				dev->class = ATA_DEV_UNKNOWN;
1767 				rc = 0;
1768 			}
1769 
1770 			if (rc) {
1771 				dev->class = ATA_DEV_UNKNOWN;
1772 				break;
1773 			}
1774 
1775 			if (ata_dev_enabled(dev)) {
1776 				spin_lock_irqsave(ap->lock, flags);
1777 				ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
1778 				spin_unlock_irqrestore(ap->lock, flags);
1779 
1780 				/* new device discovered, configure xfermode */
1781 				ehc->i.flags |= ATA_EHI_SETMODE;
1782 			}
1783 		}
1784 	}
1785 
1786 	if (rc)
1787 		*r_failed_dev = dev;
1788 
1789 	DPRINTK("EXIT\n");
1790 	return rc;
1791 }
1792 
1793 /**
1794  *	ata_eh_suspend - handle suspend EH action
1795  *	@ap: target host port
1796  *	@r_failed_dev: result parameter to indicate failing device
1797  *
1798  *	Handle suspend EH action.  Disk devices are spinned down and
1799  *	other types of devices are just marked suspended.  Once
1800  *	suspended, no EH action to the device is allowed until it is
1801  *	resumed.
1802  *
1803  *	LOCKING:
1804  *	Kernel thread context (may sleep).
1805  *
1806  *	RETURNS:
1807  *	0 on success, -errno otherwise
1808  */
1809 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev)
1810 {
1811 	struct ata_device *dev;
1812 	int i, rc = 0;
1813 
1814 	DPRINTK("ENTER\n");
1815 
1816 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1817 		unsigned long flags;
1818 		unsigned int action, err_mask;
1819 
1820 		dev = &ap->device[i];
1821 		action = ata_eh_dev_action(dev);
1822 
1823 		if (!ata_dev_enabled(dev) || !(action & ATA_EH_SUSPEND))
1824 			continue;
1825 
1826 		WARN_ON(dev->flags & ATA_DFLAG_SUSPENDED);
1827 
1828 		ata_eh_about_to_do(ap, dev, ATA_EH_SUSPEND);
1829 
1830 		if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) {
1831 			/* flush cache */
1832 			rc = ata_flush_cache(dev);
1833 			if (rc)
1834 				break;
1835 
1836 			/* spin down */
1837 			err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1);
1838 			if (err_mask) {
1839 				ata_dev_printk(dev, KERN_ERR, "failed to "
1840 					       "spin down (err_mask=0x%x)\n",
1841 					       err_mask);
1842 				rc = -EIO;
1843 				break;
1844 			}
1845 		}
1846 
1847 		spin_lock_irqsave(ap->lock, flags);
1848 		dev->flags |= ATA_DFLAG_SUSPENDED;
1849 		spin_unlock_irqrestore(ap->lock, flags);
1850 
1851 		ata_eh_done(ap, dev, ATA_EH_SUSPEND);
1852 	}
1853 
1854 	if (rc)
1855 		*r_failed_dev = dev;
1856 
1857 	DPRINTK("EXIT\n");
1858 	return rc;
1859 }
1860 
1861 /**
1862  *	ata_eh_prep_resume - prep for resume EH action
1863  *	@ap: target host port
1864  *
1865  *	Clear SUSPENDED in preparation for scheduled resume actions.
1866  *	This allows other parts of EH to access the devices being
1867  *	resumed.
1868  *
1869  *	LOCKING:
1870  *	Kernel thread context (may sleep).
1871  */
1872 static void ata_eh_prep_resume(struct ata_port *ap)
1873 {
1874 	struct ata_device *dev;
1875 	unsigned long flags;
1876 	int i;
1877 
1878 	DPRINTK("ENTER\n");
1879 
1880 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1881 		unsigned int action;
1882 
1883 		dev = &ap->device[i];
1884 		action = ata_eh_dev_action(dev);
1885 
1886 		if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME))
1887 			continue;
1888 
1889 		spin_lock_irqsave(ap->lock, flags);
1890 		dev->flags &= ~ATA_DFLAG_SUSPENDED;
1891 		spin_unlock_irqrestore(ap->lock, flags);
1892 	}
1893 
1894 	DPRINTK("EXIT\n");
1895 }
1896 
1897 /**
1898  *	ata_eh_resume - handle resume EH action
1899  *	@ap: target host port
1900  *	@r_failed_dev: result parameter to indicate failing device
1901  *
1902  *	Handle resume EH action.  Target devices are already reset and
1903  *	revalidated.  Spinning up is the only operation left.
1904  *
1905  *	LOCKING:
1906  *	Kernel thread context (may sleep).
1907  *
1908  *	RETURNS:
1909  *	0 on success, -errno otherwise
1910  */
1911 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev)
1912 {
1913 	struct ata_device *dev;
1914 	int i, rc = 0;
1915 
1916 	DPRINTK("ENTER\n");
1917 
1918 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1919 		unsigned int action, err_mask;
1920 
1921 		dev = &ap->device[i];
1922 		action = ata_eh_dev_action(dev);
1923 
1924 		if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME))
1925 			continue;
1926 
1927 		ata_eh_about_to_do(ap, dev, ATA_EH_RESUME);
1928 
1929 		if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) {
1930 			err_mask = ata_do_simple_cmd(dev,
1931 						     ATA_CMD_IDLEIMMEDIATE);
1932 			if (err_mask) {
1933 				ata_dev_printk(dev, KERN_ERR, "failed to "
1934 					       "spin up (err_mask=0x%x)\n",
1935 					       err_mask);
1936 				rc = -EIO;
1937 				break;
1938 			}
1939 		}
1940 
1941 		ata_eh_done(ap, dev, ATA_EH_RESUME);
1942 	}
1943 
1944 	if (rc)
1945 		*r_failed_dev = dev;
1946 
1947 	DPRINTK("EXIT\n");
1948 	return 0;
1949 }
1950 
1951 static int ata_port_nr_enabled(struct ata_port *ap)
1952 {
1953 	int i, cnt = 0;
1954 
1955 	for (i = 0; i < ATA_MAX_DEVICES; i++)
1956 		if (ata_dev_enabled(&ap->device[i]))
1957 			cnt++;
1958 	return cnt;
1959 }
1960 
1961 static int ata_port_nr_vacant(struct ata_port *ap)
1962 {
1963 	int i, cnt = 0;
1964 
1965 	for (i = 0; i < ATA_MAX_DEVICES; i++)
1966 		if (ap->device[i].class == ATA_DEV_UNKNOWN)
1967 			cnt++;
1968 	return cnt;
1969 }
1970 
1971 static int ata_eh_skip_recovery(struct ata_port *ap)
1972 {
1973 	struct ata_eh_context *ehc = &ap->eh_context;
1974 	int i;
1975 
1976 	/* skip if all possible devices are suspended */
1977 	for (i = 0; i < ata_port_max_devices(ap); i++) {
1978 		struct ata_device *dev = &ap->device[i];
1979 
1980 		if (!(dev->flags & ATA_DFLAG_SUSPENDED))
1981 			break;
1982 	}
1983 
1984 	if (i == ata_port_max_devices(ap))
1985 		return 1;
1986 
1987 	/* thaw frozen port, resume link and recover failed devices */
1988 	if ((ap->pflags & ATA_PFLAG_FROZEN) ||
1989 	    (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_port_nr_enabled(ap))
1990 		return 0;
1991 
1992 	/* skip if class codes for all vacant slots are ATA_DEV_NONE */
1993 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1994 		struct ata_device *dev = &ap->device[i];
1995 
1996 		if (dev->class == ATA_DEV_UNKNOWN &&
1997 		    ehc->classes[dev->devno] != ATA_DEV_NONE)
1998 			return 0;
1999 	}
2000 
2001 	return 1;
2002 }
2003 
2004 /**
2005  *	ata_eh_recover - recover host port after error
2006  *	@ap: host port to recover
2007  *	@prereset: prereset method (can be NULL)
2008  *	@softreset: softreset method (can be NULL)
2009  *	@hardreset: hardreset method (can be NULL)
2010  *	@postreset: postreset method (can be NULL)
2011  *
2012  *	This is the alpha and omega, eum and yang, heart and soul of
2013  *	libata exception handling.  On entry, actions required to
2014  *	recover the port and hotplug requests are recorded in
2015  *	eh_context.  This function executes all the operations with
2016  *	appropriate retrials and fallbacks to resurrect failed
2017  *	devices, detach goners and greet newcomers.
2018  *
2019  *	LOCKING:
2020  *	Kernel thread context (may sleep).
2021  *
2022  *	RETURNS:
2023  *	0 on success, -errno on failure.
2024  */
2025 static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
2026 			  ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
2027 			  ata_postreset_fn_t postreset)
2028 {
2029 	struct ata_eh_context *ehc = &ap->eh_context;
2030 	struct ata_device *dev;
2031 	int i, rc;
2032 
2033 	DPRINTK("ENTER\n");
2034 
2035 	/* prep for recovery */
2036 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
2037 		dev = &ap->device[i];
2038 
2039 		ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
2040 
2041 		/* collect port action mask recorded in dev actions */
2042 		ehc->i.action |= ehc->i.dev_action[i] & ~ATA_EH_PERDEV_MASK;
2043 		ehc->i.dev_action[i] &= ATA_EH_PERDEV_MASK;
2044 
2045 		/* process hotplug request */
2046 		if (dev->flags & ATA_DFLAG_DETACH)
2047 			ata_eh_detach_dev(dev);
2048 
2049 		if (!ata_dev_enabled(dev) &&
2050 		    ((ehc->i.probe_mask & (1 << dev->devno)) &&
2051 		     !(ehc->did_probe_mask & (1 << dev->devno)))) {
2052 			ata_eh_detach_dev(dev);
2053 			ata_dev_init(dev);
2054 			ehc->did_probe_mask |= (1 << dev->devno);
2055 			ehc->i.action |= ATA_EH_SOFTRESET;
2056 		}
2057 	}
2058 
2059  retry:
2060 	rc = 0;
2061 
2062 	/* if UNLOADING, finish immediately */
2063 	if (ap->pflags & ATA_PFLAG_UNLOADING)
2064 		goto out;
2065 
2066 	/* prep for resume */
2067 	ata_eh_prep_resume(ap);
2068 
2069 	/* skip EH if possible. */
2070 	if (ata_eh_skip_recovery(ap))
2071 		ehc->i.action = 0;
2072 
2073 	for (i = 0; i < ATA_MAX_DEVICES; i++)
2074 		ehc->classes[i] = ATA_DEV_UNKNOWN;
2075 
2076 	/* reset */
2077 	if (ehc->i.action & ATA_EH_RESET_MASK) {
2078 		ata_eh_freeze_port(ap);
2079 
2080 		rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset,
2081 				  softreset, hardreset, postreset);
2082 		if (rc) {
2083 			ata_port_printk(ap, KERN_ERR,
2084 					"reset failed, giving up\n");
2085 			goto out;
2086 		}
2087 
2088 		ata_eh_thaw_port(ap);
2089 	}
2090 
2091 	/* revalidate existing devices and attach new ones */
2092 	rc = ata_eh_revalidate_and_attach(ap, &dev);
2093 	if (rc)
2094 		goto dev_fail;
2095 
2096 	/* resume devices */
2097 	rc = ata_eh_resume(ap, &dev);
2098 	if (rc)
2099 		goto dev_fail;
2100 
2101 	/* configure transfer mode if necessary */
2102 	if (ehc->i.flags & ATA_EHI_SETMODE) {
2103 		rc = ata_set_mode(ap, &dev);
2104 		if (rc)
2105 			goto dev_fail;
2106 		ehc->i.flags &= ~ATA_EHI_SETMODE;
2107 	}
2108 
2109 	/* suspend devices */
2110 	rc = ata_eh_suspend(ap, &dev);
2111 	if (rc)
2112 		goto dev_fail;
2113 
2114 	goto out;
2115 
2116  dev_fail:
2117 	ehc->tries[dev->devno]--;
2118 
2119 	switch (rc) {
2120 	case -EINVAL:
2121 		/* eeek, something went very wrong, give up */
2122 		ehc->tries[dev->devno] = 0;
2123 		break;
2124 
2125 	case -ENODEV:
2126 		/* device missing or wrong IDENTIFY data, schedule probing */
2127 		ehc->i.probe_mask |= (1 << dev->devno);
2128 		/* give it just one more chance */
2129 		ehc->tries[dev->devno] = min(ehc->tries[dev->devno], 1);
2130 	case -EIO:
2131 		if (ehc->tries[dev->devno] == 1) {
2132 			/* This is the last chance, better to slow
2133 			 * down than lose it.
2134 			 */
2135 			sata_down_spd_limit(ap);
2136 			ata_down_xfermask_limit(dev, ATA_DNXFER_PIO);
2137 		}
2138 	}
2139 
2140 	if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) {
2141 		/* disable device if it has used up all its chances */
2142 		ata_dev_disable(dev);
2143 
2144 		/* detach if offline */
2145 		if (ata_port_offline(ap))
2146 			ata_eh_detach_dev(dev);
2147 
2148 		/* probe if requested */
2149 		if ((ehc->i.probe_mask & (1 << dev->devno)) &&
2150 		    !(ehc->did_probe_mask & (1 << dev->devno))) {
2151 			ata_eh_detach_dev(dev);
2152 			ata_dev_init(dev);
2153 
2154 			ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
2155 			ehc->did_probe_mask |= (1 << dev->devno);
2156 			ehc->i.action |= ATA_EH_SOFTRESET;
2157 		}
2158 	} else {
2159 		/* soft didn't work?  be haaaaard */
2160 		if (ehc->i.flags & ATA_EHI_DID_RESET)
2161 			ehc->i.action |= ATA_EH_HARDRESET;
2162 		else
2163 			ehc->i.action |= ATA_EH_SOFTRESET;
2164 	}
2165 
2166 	if (ata_port_nr_enabled(ap)) {
2167 		ata_port_printk(ap, KERN_WARNING, "failed to recover some "
2168 				"devices, retrying in 5 secs\n");
2169 		ssleep(5);
2170 	} else {
2171 		/* no device left, repeat fast */
2172 		msleep(500);
2173 	}
2174 
2175 	goto retry;
2176 
2177  out:
2178 	if (rc) {
2179 		for (i = 0; i < ATA_MAX_DEVICES; i++)
2180 			ata_dev_disable(&ap->device[i]);
2181 	}
2182 
2183 	DPRINTK("EXIT, rc=%d\n", rc);
2184 	return rc;
2185 }
2186 
2187 /**
2188  *	ata_eh_finish - finish up EH
2189  *	@ap: host port to finish EH for
2190  *
2191  *	Recovery is complete.  Clean up EH states and retry or finish
2192  *	failed qcs.
2193  *
2194  *	LOCKING:
2195  *	None.
2196  */
2197 static void ata_eh_finish(struct ata_port *ap)
2198 {
2199 	int tag;
2200 
2201 	/* retry or finish qcs */
2202 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
2203 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
2204 
2205 		if (!(qc->flags & ATA_QCFLAG_FAILED))
2206 			continue;
2207 
2208 		if (qc->err_mask) {
2209 			/* FIXME: Once EH migration is complete,
2210 			 * generate sense data in this function,
2211 			 * considering both err_mask and tf.
2212 			 */
2213 			if (qc->err_mask & AC_ERR_INVALID)
2214 				ata_eh_qc_complete(qc);
2215 			else
2216 				ata_eh_qc_retry(qc);
2217 		} else {
2218 			if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
2219 				ata_eh_qc_complete(qc);
2220 			} else {
2221 				/* feed zero TF to sense generation */
2222 				memset(&qc->result_tf, 0, sizeof(qc->result_tf));
2223 				ata_eh_qc_retry(qc);
2224 			}
2225 		}
2226 	}
2227 }
2228 
2229 /**
2230  *	ata_do_eh - do standard error handling
2231  *	@ap: host port to handle error for
2232  *	@prereset: prereset method (can be NULL)
2233  *	@softreset: softreset method (can be NULL)
2234  *	@hardreset: hardreset method (can be NULL)
2235  *	@postreset: postreset method (can be NULL)
2236  *
2237  *	Perform standard error handling sequence.
2238  *
2239  *	LOCKING:
2240  *	Kernel thread context (may sleep).
2241  */
2242 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
2243 	       ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
2244 	       ata_postreset_fn_t postreset)
2245 {
2246 	ata_eh_autopsy(ap);
2247 	ata_eh_report(ap);
2248 	ata_eh_recover(ap, prereset, softreset, hardreset, postreset);
2249 	ata_eh_finish(ap);
2250 }
2251 
2252 /**
2253  *	ata_eh_handle_port_suspend - perform port suspend operation
2254  *	@ap: port to suspend
2255  *
2256  *	Suspend @ap.
2257  *
2258  *	LOCKING:
2259  *	Kernel thread context (may sleep).
2260  */
2261 static void ata_eh_handle_port_suspend(struct ata_port *ap)
2262 {
2263 	unsigned long flags;
2264 	int rc = 0;
2265 
2266 	/* are we suspending? */
2267 	spin_lock_irqsave(ap->lock, flags);
2268 	if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
2269 	    ap->pm_mesg.event == PM_EVENT_ON) {
2270 		spin_unlock_irqrestore(ap->lock, flags);
2271 		return;
2272 	}
2273 	spin_unlock_irqrestore(ap->lock, flags);
2274 
2275 	WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
2276 
2277 	/* suspend */
2278 	ata_eh_freeze_port(ap);
2279 
2280 	if (ap->ops->port_suspend)
2281 		rc = ap->ops->port_suspend(ap, ap->pm_mesg);
2282 
2283 	/* report result */
2284 	spin_lock_irqsave(ap->lock, flags);
2285 
2286 	ap->pflags &= ~ATA_PFLAG_PM_PENDING;
2287 	if (rc == 0)
2288 		ap->pflags |= ATA_PFLAG_SUSPENDED;
2289 	else
2290 		ata_port_schedule_eh(ap);
2291 
2292 	if (ap->pm_result) {
2293 		*ap->pm_result = rc;
2294 		ap->pm_result = NULL;
2295 	}
2296 
2297 	spin_unlock_irqrestore(ap->lock, flags);
2298 
2299 	return;
2300 }
2301 
2302 /**
2303  *	ata_eh_handle_port_resume - perform port resume operation
2304  *	@ap: port to resume
2305  *
2306  *	Resume @ap.
2307  *
2308  *	This function also waits upto one second until all devices
2309  *	hanging off this port requests resume EH action.  This is to
2310  *	prevent invoking EH and thus reset multiple times on resume.
2311  *
2312  *	On DPM resume, where some of devices might not be resumed
2313  *	together, this may delay port resume upto one second, but such
2314  *	DPM resumes are rare and 1 sec delay isn't too bad.
2315  *
2316  *	LOCKING:
2317  *	Kernel thread context (may sleep).
2318  */
2319 static void ata_eh_handle_port_resume(struct ata_port *ap)
2320 {
2321 	unsigned long timeout;
2322 	unsigned long flags;
2323 	int i, rc = 0;
2324 
2325 	/* are we resuming? */
2326 	spin_lock_irqsave(ap->lock, flags);
2327 	if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
2328 	    ap->pm_mesg.event != PM_EVENT_ON) {
2329 		spin_unlock_irqrestore(ap->lock, flags);
2330 		return;
2331 	}
2332 	spin_unlock_irqrestore(ap->lock, flags);
2333 
2334 	/* spurious? */
2335 	if (!(ap->pflags & ATA_PFLAG_SUSPENDED))
2336 		goto done;
2337 
2338 	if (ap->ops->port_resume)
2339 		rc = ap->ops->port_resume(ap);
2340 
2341 	/* give devices time to request EH */
2342 	timeout = jiffies + HZ; /* 1s max */
2343 	while (1) {
2344 		for (i = 0; i < ATA_MAX_DEVICES; i++) {
2345 			struct ata_device *dev = &ap->device[i];
2346 			unsigned int action = ata_eh_dev_action(dev);
2347 
2348 			if ((dev->flags & ATA_DFLAG_SUSPENDED) &&
2349 			    !(action & ATA_EH_RESUME))
2350 				break;
2351 		}
2352 
2353 		if (i == ATA_MAX_DEVICES || time_after(jiffies, timeout))
2354 			break;
2355 		msleep(10);
2356 	}
2357 
2358  done:
2359 	spin_lock_irqsave(ap->lock, flags);
2360 	ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED);
2361 	if (ap->pm_result) {
2362 		*ap->pm_result = rc;
2363 		ap->pm_result = NULL;
2364 	}
2365 	spin_unlock_irqrestore(ap->lock, flags);
2366 }
2367