xref: /openbmc/linux/drivers/ata/libata-eh.c (revision 79a55b72a1996f77e9d23c7a5282e5839d45beb3)
1 /*
2  *  libata-eh.c - libata error handling
3  *
4  *  Maintained by:  Jeff Garzik <jgarzik@pobox.com>
5  *    		    Please ALWAYS copy linux-ide@vger.kernel.org
6  *		    on emails.
7  *
8  *  Copyright 2006 Tejun Heo <htejun@gmail.com>
9  *
10  *
11  *  This program is free software; you can redistribute it and/or
12  *  modify it under the terms of the GNU General Public License as
13  *  published by the Free Software Foundation; either version 2, or
14  *  (at your option) any later version.
15  *
16  *  This program is distributed in the hope that it will be useful,
17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  *  General Public License for more details.
20  *
21  *  You should have received a copy of the GNU General Public License
22  *  along with this program; see the file COPYING.  If not, write to
23  *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
24  *  USA.
25  *
26  *
27  *  libata documentation is available via 'make {ps|pdf}docs',
28  *  as Documentation/DocBook/libata.*
29  *
30  *  Hardware documentation available from http://www.t13.org/ and
31  *  http://www.sata-io.org/
32  *
33  */
34 
35 #include <linux/kernel.h>
36 #include <scsi/scsi.h>
37 #include <scsi/scsi_host.h>
38 #include <scsi/scsi_eh.h>
39 #include <scsi/scsi_device.h>
40 #include <scsi/scsi_cmnd.h>
41 #include "../scsi/scsi_transport_api.h"
42 
43 #include <linux/libata.h>
44 
45 #include "libata.h"
46 
47 static void __ata_port_freeze(struct ata_port *ap);
48 static void ata_eh_finish(struct ata_port *ap);
49 static void ata_eh_handle_port_suspend(struct ata_port *ap);
50 static void ata_eh_handle_port_resume(struct ata_port *ap);
51 
52 static void ata_ering_record(struct ata_ering *ering, int is_io,
53 			     unsigned int err_mask)
54 {
55 	struct ata_ering_entry *ent;
56 
57 	WARN_ON(!err_mask);
58 
59 	ering->cursor++;
60 	ering->cursor %= ATA_ERING_SIZE;
61 
62 	ent = &ering->ring[ering->cursor];
63 	ent->is_io = is_io;
64 	ent->err_mask = err_mask;
65 	ent->timestamp = get_jiffies_64();
66 }
67 
68 static struct ata_ering_entry * ata_ering_top(struct ata_ering *ering)
69 {
70 	struct ata_ering_entry *ent = &ering->ring[ering->cursor];
71 	if (!ent->err_mask)
72 		return NULL;
73 	return ent;
74 }
75 
76 static int ata_ering_map(struct ata_ering *ering,
77 			 int (*map_fn)(struct ata_ering_entry *, void *),
78 			 void *arg)
79 {
80 	int idx, rc = 0;
81 	struct ata_ering_entry *ent;
82 
83 	idx = ering->cursor;
84 	do {
85 		ent = &ering->ring[idx];
86 		if (!ent->err_mask)
87 			break;
88 		rc = map_fn(ent, arg);
89 		if (rc)
90 			break;
91 		idx = (idx - 1 + ATA_ERING_SIZE) % ATA_ERING_SIZE;
92 	} while (idx != ering->cursor);
93 
94 	return rc;
95 }
96 
97 static unsigned int ata_eh_dev_action(struct ata_device *dev)
98 {
99 	struct ata_eh_context *ehc = &dev->ap->eh_context;
100 
101 	return ehc->i.action | ehc->i.dev_action[dev->devno];
102 }
103 
104 static void ata_eh_clear_action(struct ata_device *dev,
105 				struct ata_eh_info *ehi, unsigned int action)
106 {
107 	int i;
108 
109 	if (!dev) {
110 		ehi->action &= ~action;
111 		for (i = 0; i < ATA_MAX_DEVICES; i++)
112 			ehi->dev_action[i] &= ~action;
113 	} else {
114 		/* doesn't make sense for port-wide EH actions */
115 		WARN_ON(!(action & ATA_EH_PERDEV_MASK));
116 
117 		/* break ehi->action into ehi->dev_action */
118 		if (ehi->action & action) {
119 			for (i = 0; i < ATA_MAX_DEVICES; i++)
120 				ehi->dev_action[i] |= ehi->action & action;
121 			ehi->action &= ~action;
122 		}
123 
124 		/* turn off the specified per-dev action */
125 		ehi->dev_action[dev->devno] &= ~action;
126 	}
127 }
128 
129 /**
130  *	ata_scsi_timed_out - SCSI layer time out callback
131  *	@cmd: timed out SCSI command
132  *
133  *	Handles SCSI layer timeout.  We race with normal completion of
134  *	the qc for @cmd.  If the qc is already gone, we lose and let
135  *	the scsi command finish (EH_HANDLED).  Otherwise, the qc has
136  *	timed out and EH should be invoked.  Prevent ata_qc_complete()
137  *	from finishing it by setting EH_SCHEDULED and return
138  *	EH_NOT_HANDLED.
139  *
140  *	TODO: kill this function once old EH is gone.
141  *
142  *	LOCKING:
143  *	Called from timer context
144  *
145  *	RETURNS:
146  *	EH_HANDLED or EH_NOT_HANDLED
147  */
148 enum scsi_eh_timer_return ata_scsi_timed_out(struct scsi_cmnd *cmd)
149 {
150 	struct Scsi_Host *host = cmd->device->host;
151 	struct ata_port *ap = ata_shost_to_port(host);
152 	unsigned long flags;
153 	struct ata_queued_cmd *qc;
154 	enum scsi_eh_timer_return ret;
155 
156 	DPRINTK("ENTER\n");
157 
158 	if (ap->ops->error_handler) {
159 		ret = EH_NOT_HANDLED;
160 		goto out;
161 	}
162 
163 	ret = EH_HANDLED;
164 	spin_lock_irqsave(ap->lock, flags);
165 	qc = ata_qc_from_tag(ap, ap->active_tag);
166 	if (qc) {
167 		WARN_ON(qc->scsicmd != cmd);
168 		qc->flags |= ATA_QCFLAG_EH_SCHEDULED;
169 		qc->err_mask |= AC_ERR_TIMEOUT;
170 		ret = EH_NOT_HANDLED;
171 	}
172 	spin_unlock_irqrestore(ap->lock, flags);
173 
174  out:
175 	DPRINTK("EXIT, ret=%d\n", ret);
176 	return ret;
177 }
178 
179 /**
180  *	ata_scsi_error - SCSI layer error handler callback
181  *	@host: SCSI host on which error occurred
182  *
183  *	Handles SCSI-layer-thrown error events.
184  *
185  *	LOCKING:
186  *	Inherited from SCSI layer (none, can sleep)
187  *
188  *	RETURNS:
189  *	Zero.
190  */
191 void ata_scsi_error(struct Scsi_Host *host)
192 {
193 	struct ata_port *ap = ata_shost_to_port(host);
194 	int i, repeat_cnt = ATA_EH_MAX_REPEAT;
195 	unsigned long flags;
196 
197 	DPRINTK("ENTER\n");
198 
199 	/* synchronize with port task */
200 	ata_port_flush_task(ap);
201 
202 	/* synchronize with host lock and sort out timeouts */
203 
204 	/* For new EH, all qcs are finished in one of three ways -
205 	 * normal completion, error completion, and SCSI timeout.
206 	 * Both cmpletions can race against SCSI timeout.  When normal
207 	 * completion wins, the qc never reaches EH.  When error
208 	 * completion wins, the qc has ATA_QCFLAG_FAILED set.
209 	 *
210 	 * When SCSI timeout wins, things are a bit more complex.
211 	 * Normal or error completion can occur after the timeout but
212 	 * before this point.  In such cases, both types of
213 	 * completions are honored.  A scmd is determined to have
214 	 * timed out iff its associated qc is active and not failed.
215 	 */
216 	if (ap->ops->error_handler) {
217 		struct scsi_cmnd *scmd, *tmp;
218 		int nr_timedout = 0;
219 
220 		spin_lock_irqsave(ap->lock, flags);
221 
222 		list_for_each_entry_safe(scmd, tmp, &host->eh_cmd_q, eh_entry) {
223 			struct ata_queued_cmd *qc;
224 
225 			for (i = 0; i < ATA_MAX_QUEUE; i++) {
226 				qc = __ata_qc_from_tag(ap, i);
227 				if (qc->flags & ATA_QCFLAG_ACTIVE &&
228 				    qc->scsicmd == scmd)
229 					break;
230 			}
231 
232 			if (i < ATA_MAX_QUEUE) {
233 				/* the scmd has an associated qc */
234 				if (!(qc->flags & ATA_QCFLAG_FAILED)) {
235 					/* which hasn't failed yet, timeout */
236 					qc->err_mask |= AC_ERR_TIMEOUT;
237 					qc->flags |= ATA_QCFLAG_FAILED;
238 					nr_timedout++;
239 				}
240 			} else {
241 				/* Normal completion occurred after
242 				 * SCSI timeout but before this point.
243 				 * Successfully complete it.
244 				 */
245 				scmd->retries = scmd->allowed;
246 				scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
247 			}
248 		}
249 
250 		/* If we have timed out qcs.  They belong to EH from
251 		 * this point but the state of the controller is
252 		 * unknown.  Freeze the port to make sure the IRQ
253 		 * handler doesn't diddle with those qcs.  This must
254 		 * be done atomically w.r.t. setting QCFLAG_FAILED.
255 		 */
256 		if (nr_timedout)
257 			__ata_port_freeze(ap);
258 
259 		spin_unlock_irqrestore(ap->lock, flags);
260 	} else
261 		spin_unlock_wait(ap->lock);
262 
263  repeat:
264 	/* invoke error handler */
265 	if (ap->ops->error_handler) {
266 		/* process port resume request */
267 		ata_eh_handle_port_resume(ap);
268 
269 		/* fetch & clear EH info */
270 		spin_lock_irqsave(ap->lock, flags);
271 
272 		memset(&ap->eh_context, 0, sizeof(ap->eh_context));
273 		ap->eh_context.i = ap->eh_info;
274 		memset(&ap->eh_info, 0, sizeof(ap->eh_info));
275 
276 		ap->pflags |= ATA_PFLAG_EH_IN_PROGRESS;
277 		ap->pflags &= ~ATA_PFLAG_EH_PENDING;
278 
279 		spin_unlock_irqrestore(ap->lock, flags);
280 
281 		/* invoke EH, skip if unloading or suspended */
282 		if (!(ap->pflags & (ATA_PFLAG_UNLOADING | ATA_PFLAG_SUSPENDED)))
283 			ap->ops->error_handler(ap);
284 		else
285 			ata_eh_finish(ap);
286 
287 		/* process port suspend request */
288 		ata_eh_handle_port_suspend(ap);
289 
290 		/* Exception might have happend after ->error_handler
291 		 * recovered the port but before this point.  Repeat
292 		 * EH in such case.
293 		 */
294 		spin_lock_irqsave(ap->lock, flags);
295 
296 		if (ap->pflags & ATA_PFLAG_EH_PENDING) {
297 			if (--repeat_cnt) {
298 				ata_port_printk(ap, KERN_INFO,
299 					"EH pending after completion, "
300 					"repeating EH (cnt=%d)\n", repeat_cnt);
301 				spin_unlock_irqrestore(ap->lock, flags);
302 				goto repeat;
303 			}
304 			ata_port_printk(ap, KERN_ERR, "EH pending after %d "
305 					"tries, giving up\n", ATA_EH_MAX_REPEAT);
306 		}
307 
308 		/* this run is complete, make sure EH info is clear */
309 		memset(&ap->eh_info, 0, sizeof(ap->eh_info));
310 
311 		/* Clear host_eh_scheduled while holding ap->lock such
312 		 * that if exception occurs after this point but
313 		 * before EH completion, SCSI midlayer will
314 		 * re-initiate EH.
315 		 */
316 		host->host_eh_scheduled = 0;
317 
318 		spin_unlock_irqrestore(ap->lock, flags);
319 	} else {
320 		WARN_ON(ata_qc_from_tag(ap, ap->active_tag) == NULL);
321 		ap->ops->eng_timeout(ap);
322 	}
323 
324 	/* finish or retry handled scmd's and clean up */
325 	WARN_ON(host->host_failed || !list_empty(&host->eh_cmd_q));
326 
327 	scsi_eh_flush_done_q(&ap->eh_done_q);
328 
329 	/* clean up */
330 	spin_lock_irqsave(ap->lock, flags);
331 
332 	if (ap->pflags & ATA_PFLAG_LOADING)
333 		ap->pflags &= ~ATA_PFLAG_LOADING;
334 	else if (ap->pflags & ATA_PFLAG_SCSI_HOTPLUG)
335 		queue_delayed_work(ata_aux_wq, &ap->hotplug_task, 0);
336 
337 	if (ap->pflags & ATA_PFLAG_RECOVERED)
338 		ata_port_printk(ap, KERN_INFO, "EH complete\n");
339 
340 	ap->pflags &= ~(ATA_PFLAG_SCSI_HOTPLUG | ATA_PFLAG_RECOVERED);
341 
342 	/* tell wait_eh that we're done */
343 	ap->pflags &= ~ATA_PFLAG_EH_IN_PROGRESS;
344 	wake_up_all(&ap->eh_wait_q);
345 
346 	spin_unlock_irqrestore(ap->lock, flags);
347 
348 	DPRINTK("EXIT\n");
349 }
350 
351 /**
352  *	ata_port_wait_eh - Wait for the currently pending EH to complete
353  *	@ap: Port to wait EH for
354  *
355  *	Wait until the currently pending EH is complete.
356  *
357  *	LOCKING:
358  *	Kernel thread context (may sleep).
359  */
360 void ata_port_wait_eh(struct ata_port *ap)
361 {
362 	unsigned long flags;
363 	DEFINE_WAIT(wait);
364 
365  retry:
366 	spin_lock_irqsave(ap->lock, flags);
367 
368 	while (ap->pflags & (ATA_PFLAG_EH_PENDING | ATA_PFLAG_EH_IN_PROGRESS)) {
369 		prepare_to_wait(&ap->eh_wait_q, &wait, TASK_UNINTERRUPTIBLE);
370 		spin_unlock_irqrestore(ap->lock, flags);
371 		schedule();
372 		spin_lock_irqsave(ap->lock, flags);
373 	}
374 	finish_wait(&ap->eh_wait_q, &wait);
375 
376 	spin_unlock_irqrestore(ap->lock, flags);
377 
378 	/* make sure SCSI EH is complete */
379 	if (scsi_host_in_recovery(ap->scsi_host)) {
380 		msleep(10);
381 		goto retry;
382 	}
383 }
384 
385 /**
386  *	ata_qc_timeout - Handle timeout of queued command
387  *	@qc: Command that timed out
388  *
389  *	Some part of the kernel (currently, only the SCSI layer)
390  *	has noticed that the active command on port @ap has not
391  *	completed after a specified length of time.  Handle this
392  *	condition by disabling DMA (if necessary) and completing
393  *	transactions, with error if necessary.
394  *
395  *	This also handles the case of the "lost interrupt", where
396  *	for some reason (possibly hardware bug, possibly driver bug)
397  *	an interrupt was not delivered to the driver, even though the
398  *	transaction completed successfully.
399  *
400  *	TODO: kill this function once old EH is gone.
401  *
402  *	LOCKING:
403  *	Inherited from SCSI layer (none, can sleep)
404  */
405 static void ata_qc_timeout(struct ata_queued_cmd *qc)
406 {
407 	struct ata_port *ap = qc->ap;
408 	u8 host_stat = 0, drv_stat;
409 	unsigned long flags;
410 
411 	DPRINTK("ENTER\n");
412 
413 	ap->hsm_task_state = HSM_ST_IDLE;
414 
415 	spin_lock_irqsave(ap->lock, flags);
416 
417 	switch (qc->tf.protocol) {
418 
419 	case ATA_PROT_DMA:
420 	case ATA_PROT_ATAPI_DMA:
421 		host_stat = ap->ops->bmdma_status(ap);
422 
423 		/* before we do anything else, clear DMA-Start bit */
424 		ap->ops->bmdma_stop(qc);
425 
426 		/* fall through */
427 
428 	default:
429 		ata_altstatus(ap);
430 		drv_stat = ata_chk_status(ap);
431 
432 		/* ack bmdma irq events */
433 		ap->ops->irq_clear(ap);
434 
435 		ata_dev_printk(qc->dev, KERN_ERR, "command 0x%x timeout, "
436 			       "stat 0x%x host_stat 0x%x\n",
437 			       qc->tf.command, drv_stat, host_stat);
438 
439 		/* complete taskfile transaction */
440 		qc->err_mask |= AC_ERR_TIMEOUT;
441 		break;
442 	}
443 
444 	spin_unlock_irqrestore(ap->lock, flags);
445 
446 	ata_eh_qc_complete(qc);
447 
448 	DPRINTK("EXIT\n");
449 }
450 
451 /**
452  *	ata_eng_timeout - Handle timeout of queued command
453  *	@ap: Port on which timed-out command is active
454  *
455  *	Some part of the kernel (currently, only the SCSI layer)
456  *	has noticed that the active command on port @ap has not
457  *	completed after a specified length of time.  Handle this
458  *	condition by disabling DMA (if necessary) and completing
459  *	transactions, with error if necessary.
460  *
461  *	This also handles the case of the "lost interrupt", where
462  *	for some reason (possibly hardware bug, possibly driver bug)
463  *	an interrupt was not delivered to the driver, even though the
464  *	transaction completed successfully.
465  *
466  *	TODO: kill this function once old EH is gone.
467  *
468  *	LOCKING:
469  *	Inherited from SCSI layer (none, can sleep)
470  */
471 void ata_eng_timeout(struct ata_port *ap)
472 {
473 	DPRINTK("ENTER\n");
474 
475 	ata_qc_timeout(ata_qc_from_tag(ap, ap->active_tag));
476 
477 	DPRINTK("EXIT\n");
478 }
479 
480 /**
481  *	ata_qc_schedule_eh - schedule qc for error handling
482  *	@qc: command to schedule error handling for
483  *
484  *	Schedule error handling for @qc.  EH will kick in as soon as
485  *	other commands are drained.
486  *
487  *	LOCKING:
488  *	spin_lock_irqsave(host lock)
489  */
490 void ata_qc_schedule_eh(struct ata_queued_cmd *qc)
491 {
492 	struct ata_port *ap = qc->ap;
493 
494 	WARN_ON(!ap->ops->error_handler);
495 
496 	qc->flags |= ATA_QCFLAG_FAILED;
497 	qc->ap->pflags |= ATA_PFLAG_EH_PENDING;
498 
499 	/* The following will fail if timeout has already expired.
500 	 * ata_scsi_error() takes care of such scmds on EH entry.
501 	 * Note that ATA_QCFLAG_FAILED is unconditionally set after
502 	 * this function completes.
503 	 */
504 	scsi_req_abort_cmd(qc->scsicmd);
505 }
506 
507 /**
508  *	ata_port_schedule_eh - schedule error handling without a qc
509  *	@ap: ATA port to schedule EH for
510  *
511  *	Schedule error handling for @ap.  EH will kick in as soon as
512  *	all commands are drained.
513  *
514  *	LOCKING:
515  *	spin_lock_irqsave(host lock)
516  */
517 void ata_port_schedule_eh(struct ata_port *ap)
518 {
519 	WARN_ON(!ap->ops->error_handler);
520 
521 	ap->pflags |= ATA_PFLAG_EH_PENDING;
522 	scsi_schedule_eh(ap->scsi_host);
523 
524 	DPRINTK("port EH scheduled\n");
525 }
526 
527 /**
528  *	ata_port_abort - abort all qc's on the port
529  *	@ap: ATA port to abort qc's for
530  *
531  *	Abort all active qc's of @ap and schedule EH.
532  *
533  *	LOCKING:
534  *	spin_lock_irqsave(host lock)
535  *
536  *	RETURNS:
537  *	Number of aborted qc's.
538  */
539 int ata_port_abort(struct ata_port *ap)
540 {
541 	int tag, nr_aborted = 0;
542 
543 	WARN_ON(!ap->ops->error_handler);
544 
545 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
546 		struct ata_queued_cmd *qc = ata_qc_from_tag(ap, tag);
547 
548 		if (qc) {
549 			qc->flags |= ATA_QCFLAG_FAILED;
550 			ata_qc_complete(qc);
551 			nr_aborted++;
552 		}
553 	}
554 
555 	if (!nr_aborted)
556 		ata_port_schedule_eh(ap);
557 
558 	return nr_aborted;
559 }
560 
561 /**
562  *	__ata_port_freeze - freeze port
563  *	@ap: ATA port to freeze
564  *
565  *	This function is called when HSM violation or some other
566  *	condition disrupts normal operation of the port.  Frozen port
567  *	is not allowed to perform any operation until the port is
568  *	thawed, which usually follows a successful reset.
569  *
570  *	ap->ops->freeze() callback can be used for freezing the port
571  *	hardware-wise (e.g. mask interrupt and stop DMA engine).  If a
572  *	port cannot be frozen hardware-wise, the interrupt handler
573  *	must ack and clear interrupts unconditionally while the port
574  *	is frozen.
575  *
576  *	LOCKING:
577  *	spin_lock_irqsave(host lock)
578  */
579 static void __ata_port_freeze(struct ata_port *ap)
580 {
581 	WARN_ON(!ap->ops->error_handler);
582 
583 	if (ap->ops->freeze)
584 		ap->ops->freeze(ap);
585 
586 	ap->pflags |= ATA_PFLAG_FROZEN;
587 
588 	DPRINTK("ata%u port frozen\n", ap->id);
589 }
590 
591 /**
592  *	ata_port_freeze - abort & freeze port
593  *	@ap: ATA port to freeze
594  *
595  *	Abort and freeze @ap.
596  *
597  *	LOCKING:
598  *	spin_lock_irqsave(host lock)
599  *
600  *	RETURNS:
601  *	Number of aborted commands.
602  */
603 int ata_port_freeze(struct ata_port *ap)
604 {
605 	int nr_aborted;
606 
607 	WARN_ON(!ap->ops->error_handler);
608 
609 	nr_aborted = ata_port_abort(ap);
610 	__ata_port_freeze(ap);
611 
612 	return nr_aborted;
613 }
614 
615 /**
616  *	ata_eh_freeze_port - EH helper to freeze port
617  *	@ap: ATA port to freeze
618  *
619  *	Freeze @ap.
620  *
621  *	LOCKING:
622  *	None.
623  */
624 void ata_eh_freeze_port(struct ata_port *ap)
625 {
626 	unsigned long flags;
627 
628 	if (!ap->ops->error_handler)
629 		return;
630 
631 	spin_lock_irqsave(ap->lock, flags);
632 	__ata_port_freeze(ap);
633 	spin_unlock_irqrestore(ap->lock, flags);
634 }
635 
636 /**
637  *	ata_port_thaw_port - EH helper to thaw port
638  *	@ap: ATA port to thaw
639  *
640  *	Thaw frozen port @ap.
641  *
642  *	LOCKING:
643  *	None.
644  */
645 void ata_eh_thaw_port(struct ata_port *ap)
646 {
647 	unsigned long flags;
648 
649 	if (!ap->ops->error_handler)
650 		return;
651 
652 	spin_lock_irqsave(ap->lock, flags);
653 
654 	ap->pflags &= ~ATA_PFLAG_FROZEN;
655 
656 	if (ap->ops->thaw)
657 		ap->ops->thaw(ap);
658 
659 	spin_unlock_irqrestore(ap->lock, flags);
660 
661 	DPRINTK("ata%u port thawed\n", ap->id);
662 }
663 
664 static void ata_eh_scsidone(struct scsi_cmnd *scmd)
665 {
666 	/* nada */
667 }
668 
669 static void __ata_eh_qc_complete(struct ata_queued_cmd *qc)
670 {
671 	struct ata_port *ap = qc->ap;
672 	struct scsi_cmnd *scmd = qc->scsicmd;
673 	unsigned long flags;
674 
675 	spin_lock_irqsave(ap->lock, flags);
676 	qc->scsidone = ata_eh_scsidone;
677 	__ata_qc_complete(qc);
678 	WARN_ON(ata_tag_valid(qc->tag));
679 	spin_unlock_irqrestore(ap->lock, flags);
680 
681 	scsi_eh_finish_cmd(scmd, &ap->eh_done_q);
682 }
683 
684 /**
685  *	ata_eh_qc_complete - Complete an active ATA command from EH
686  *	@qc: Command to complete
687  *
688  *	Indicate to the mid and upper layers that an ATA command has
689  *	completed.  To be used from EH.
690  */
691 void ata_eh_qc_complete(struct ata_queued_cmd *qc)
692 {
693 	struct scsi_cmnd *scmd = qc->scsicmd;
694 	scmd->retries = scmd->allowed;
695 	__ata_eh_qc_complete(qc);
696 }
697 
698 /**
699  *	ata_eh_qc_retry - Tell midlayer to retry an ATA command after EH
700  *	@qc: Command to retry
701  *
702  *	Indicate to the mid and upper layers that an ATA command
703  *	should be retried.  To be used from EH.
704  *
705  *	SCSI midlayer limits the number of retries to scmd->allowed.
706  *	scmd->retries is decremented for commands which get retried
707  *	due to unrelated failures (qc->err_mask is zero).
708  */
709 void ata_eh_qc_retry(struct ata_queued_cmd *qc)
710 {
711 	struct scsi_cmnd *scmd = qc->scsicmd;
712 	if (!qc->err_mask && scmd->retries)
713 		scmd->retries--;
714 	__ata_eh_qc_complete(qc);
715 }
716 
717 /**
718  *	ata_eh_detach_dev - detach ATA device
719  *	@dev: ATA device to detach
720  *
721  *	Detach @dev.
722  *
723  *	LOCKING:
724  *	None.
725  */
726 static void ata_eh_detach_dev(struct ata_device *dev)
727 {
728 	struct ata_port *ap = dev->ap;
729 	unsigned long flags;
730 
731 	ata_dev_disable(dev);
732 
733 	spin_lock_irqsave(ap->lock, flags);
734 
735 	dev->flags &= ~ATA_DFLAG_DETACH;
736 
737 	if (ata_scsi_offline_dev(dev)) {
738 		dev->flags |= ATA_DFLAG_DETACHED;
739 		ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
740 	}
741 
742 	/* clear per-dev EH actions */
743 	ata_eh_clear_action(dev, &ap->eh_info, ATA_EH_PERDEV_MASK);
744 	ata_eh_clear_action(dev, &ap->eh_context.i, ATA_EH_PERDEV_MASK);
745 
746 	spin_unlock_irqrestore(ap->lock, flags);
747 }
748 
749 /**
750  *	ata_eh_about_to_do - about to perform eh_action
751  *	@ap: target ATA port
752  *	@dev: target ATA dev for per-dev action (can be NULL)
753  *	@action: action about to be performed
754  *
755  *	Called just before performing EH actions to clear related bits
756  *	in @ap->eh_info such that eh actions are not unnecessarily
757  *	repeated.
758  *
759  *	LOCKING:
760  *	None.
761  */
762 static void ata_eh_about_to_do(struct ata_port *ap, struct ata_device *dev,
763 			       unsigned int action)
764 {
765 	unsigned long flags;
766 	struct ata_eh_info *ehi = &ap->eh_info;
767 	struct ata_eh_context *ehc = &ap->eh_context;
768 
769 	spin_lock_irqsave(ap->lock, flags);
770 
771 	/* Reset is represented by combination of actions and EHI
772 	 * flags.  Suck in all related bits before clearing eh_info to
773 	 * avoid losing requested action.
774 	 */
775 	if (action & ATA_EH_RESET_MASK) {
776 		ehc->i.action |= ehi->action & ATA_EH_RESET_MASK;
777 		ehc->i.flags |= ehi->flags & ATA_EHI_RESET_MODIFIER_MASK;
778 
779 		/* make sure all reset actions are cleared & clear EHI flags */
780 		action |= ATA_EH_RESET_MASK;
781 		ehi->flags &= ~ATA_EHI_RESET_MODIFIER_MASK;
782 	}
783 
784 	ata_eh_clear_action(dev, ehi, action);
785 
786 	if (!(ehc->i.flags & ATA_EHI_QUIET))
787 		ap->pflags |= ATA_PFLAG_RECOVERED;
788 
789 	spin_unlock_irqrestore(ap->lock, flags);
790 }
791 
792 /**
793  *	ata_eh_done - EH action complete
794  *	@ap: target ATA port
795  *	@dev: target ATA dev for per-dev action (can be NULL)
796  *	@action: action just completed
797  *
798  *	Called right after performing EH actions to clear related bits
799  *	in @ap->eh_context.
800  *
801  *	LOCKING:
802  *	None.
803  */
804 static void ata_eh_done(struct ata_port *ap, struct ata_device *dev,
805 			unsigned int action)
806 {
807 	/* if reset is complete, clear all reset actions & reset modifier */
808 	if (action & ATA_EH_RESET_MASK) {
809 		action |= ATA_EH_RESET_MASK;
810 		ap->eh_context.i.flags &= ~ATA_EHI_RESET_MODIFIER_MASK;
811 	}
812 
813 	ata_eh_clear_action(dev, &ap->eh_context.i, action);
814 }
815 
816 /**
817  *	ata_err_string - convert err_mask to descriptive string
818  *	@err_mask: error mask to convert to string
819  *
820  *	Convert @err_mask to descriptive string.  Errors are
821  *	prioritized according to severity and only the most severe
822  *	error is reported.
823  *
824  *	LOCKING:
825  *	None.
826  *
827  *	RETURNS:
828  *	Descriptive string for @err_mask
829  */
830 static const char * ata_err_string(unsigned int err_mask)
831 {
832 	if (err_mask & AC_ERR_HOST_BUS)
833 		return "host bus error";
834 	if (err_mask & AC_ERR_ATA_BUS)
835 		return "ATA bus error";
836 	if (err_mask & AC_ERR_TIMEOUT)
837 		return "timeout";
838 	if (err_mask & AC_ERR_HSM)
839 		return "HSM violation";
840 	if (err_mask & AC_ERR_SYSTEM)
841 		return "internal error";
842 	if (err_mask & AC_ERR_MEDIA)
843 		return "media error";
844 	if (err_mask & AC_ERR_INVALID)
845 		return "invalid argument";
846 	if (err_mask & AC_ERR_DEV)
847 		return "device error";
848 	return "unknown error";
849 }
850 
851 /**
852  *	ata_read_log_page - read a specific log page
853  *	@dev: target device
854  *	@page: page to read
855  *	@buf: buffer to store read page
856  *	@sectors: number of sectors to read
857  *
858  *	Read log page using READ_LOG_EXT command.
859  *
860  *	LOCKING:
861  *	Kernel thread context (may sleep).
862  *
863  *	RETURNS:
864  *	0 on success, AC_ERR_* mask otherwise.
865  */
866 static unsigned int ata_read_log_page(struct ata_device *dev,
867 				      u8 page, void *buf, unsigned int sectors)
868 {
869 	struct ata_taskfile tf;
870 	unsigned int err_mask;
871 
872 	DPRINTK("read log page - page %d\n", page);
873 
874 	ata_tf_init(dev, &tf);
875 	tf.command = ATA_CMD_READ_LOG_EXT;
876 	tf.lbal = page;
877 	tf.nsect = sectors;
878 	tf.hob_nsect = sectors >> 8;
879 	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_LBA48 | ATA_TFLAG_DEVICE;
880 	tf.protocol = ATA_PROT_PIO;
881 
882 	err_mask = ata_exec_internal(dev, &tf, NULL, DMA_FROM_DEVICE,
883 				     buf, sectors * ATA_SECT_SIZE);
884 
885 	DPRINTK("EXIT, err_mask=%x\n", err_mask);
886 	return err_mask;
887 }
888 
889 /**
890  *	ata_eh_read_log_10h - Read log page 10h for NCQ error details
891  *	@dev: Device to read log page 10h from
892  *	@tag: Resulting tag of the failed command
893  *	@tf: Resulting taskfile registers of the failed command
894  *
895  *	Read log page 10h to obtain NCQ error details and clear error
896  *	condition.
897  *
898  *	LOCKING:
899  *	Kernel thread context (may sleep).
900  *
901  *	RETURNS:
902  *	0 on success, -errno otherwise.
903  */
904 static int ata_eh_read_log_10h(struct ata_device *dev,
905 			       int *tag, struct ata_taskfile *tf)
906 {
907 	u8 *buf = dev->ap->sector_buf;
908 	unsigned int err_mask;
909 	u8 csum;
910 	int i;
911 
912 	err_mask = ata_read_log_page(dev, ATA_LOG_SATA_NCQ, buf, 1);
913 	if (err_mask)
914 		return -EIO;
915 
916 	csum = 0;
917 	for (i = 0; i < ATA_SECT_SIZE; i++)
918 		csum += buf[i];
919 	if (csum)
920 		ata_dev_printk(dev, KERN_WARNING,
921 			       "invalid checksum 0x%x on log page 10h\n", csum);
922 
923 	if (buf[0] & 0x80)
924 		return -ENOENT;
925 
926 	*tag = buf[0] & 0x1f;
927 
928 	tf->command = buf[2];
929 	tf->feature = buf[3];
930 	tf->lbal = buf[4];
931 	tf->lbam = buf[5];
932 	tf->lbah = buf[6];
933 	tf->device = buf[7];
934 	tf->hob_lbal = buf[8];
935 	tf->hob_lbam = buf[9];
936 	tf->hob_lbah = buf[10];
937 	tf->nsect = buf[12];
938 	tf->hob_nsect = buf[13];
939 
940 	return 0;
941 }
942 
943 /**
944  *	atapi_eh_request_sense - perform ATAPI REQUEST_SENSE
945  *	@dev: device to perform REQUEST_SENSE to
946  *	@sense_buf: result sense data buffer (SCSI_SENSE_BUFFERSIZE bytes long)
947  *
948  *	Perform ATAPI REQUEST_SENSE after the device reported CHECK
949  *	SENSE.  This function is EH helper.
950  *
951  *	LOCKING:
952  *	Kernel thread context (may sleep).
953  *
954  *	RETURNS:
955  *	0 on success, AC_ERR_* mask on failure
956  */
957 static unsigned int atapi_eh_request_sense(struct ata_device *dev,
958 					   unsigned char *sense_buf)
959 {
960 	struct ata_port *ap = dev->ap;
961 	struct ata_taskfile tf;
962 	u8 cdb[ATAPI_CDB_LEN];
963 
964 	DPRINTK("ATAPI request sense\n");
965 
966 	ata_tf_init(dev, &tf);
967 
968 	/* FIXME: is this needed? */
969 	memset(sense_buf, 0, SCSI_SENSE_BUFFERSIZE);
970 
971 	/* XXX: why tf_read here? */
972 	ap->ops->tf_read(ap, &tf);
973 
974 	/* fill these in, for the case where they are -not- overwritten */
975 	sense_buf[0] = 0x70;
976 	sense_buf[2] = tf.feature >> 4;
977 
978 	memset(cdb, 0, ATAPI_CDB_LEN);
979 	cdb[0] = REQUEST_SENSE;
980 	cdb[4] = SCSI_SENSE_BUFFERSIZE;
981 
982 	tf.flags |= ATA_TFLAG_ISADDR | ATA_TFLAG_DEVICE;
983 	tf.command = ATA_CMD_PACKET;
984 
985 	/* is it pointless to prefer PIO for "safety reasons"? */
986 	if (ap->flags & ATA_FLAG_PIO_DMA) {
987 		tf.protocol = ATA_PROT_ATAPI_DMA;
988 		tf.feature |= ATAPI_PKT_DMA;
989 	} else {
990 		tf.protocol = ATA_PROT_ATAPI;
991 		tf.lbam = (8 * 1024) & 0xff;
992 		tf.lbah = (8 * 1024) >> 8;
993 	}
994 
995 	return ata_exec_internal(dev, &tf, cdb, DMA_FROM_DEVICE,
996 				 sense_buf, SCSI_SENSE_BUFFERSIZE);
997 }
998 
999 /**
1000  *	ata_eh_analyze_serror - analyze SError for a failed port
1001  *	@ap: ATA port to analyze SError for
1002  *
1003  *	Analyze SError if available and further determine cause of
1004  *	failure.
1005  *
1006  *	LOCKING:
1007  *	None.
1008  */
1009 static void ata_eh_analyze_serror(struct ata_port *ap)
1010 {
1011 	struct ata_eh_context *ehc = &ap->eh_context;
1012 	u32 serror = ehc->i.serror;
1013 	unsigned int err_mask = 0, action = 0;
1014 
1015 	if (serror & SERR_PERSISTENT) {
1016 		err_mask |= AC_ERR_ATA_BUS;
1017 		action |= ATA_EH_HARDRESET;
1018 	}
1019 	if (serror &
1020 	    (SERR_DATA_RECOVERED | SERR_COMM_RECOVERED | SERR_DATA)) {
1021 		err_mask |= AC_ERR_ATA_BUS;
1022 		action |= ATA_EH_SOFTRESET;
1023 	}
1024 	if (serror & SERR_PROTOCOL) {
1025 		err_mask |= AC_ERR_HSM;
1026 		action |= ATA_EH_SOFTRESET;
1027 	}
1028 	if (serror & SERR_INTERNAL) {
1029 		err_mask |= AC_ERR_SYSTEM;
1030 		action |= ATA_EH_SOFTRESET;
1031 	}
1032 	if (serror & (SERR_PHYRDY_CHG | SERR_DEV_XCHG))
1033 		ata_ehi_hotplugged(&ehc->i);
1034 
1035 	ehc->i.err_mask |= err_mask;
1036 	ehc->i.action |= action;
1037 }
1038 
1039 /**
1040  *	ata_eh_analyze_ncq_error - analyze NCQ error
1041  *	@ap: ATA port to analyze NCQ error for
1042  *
1043  *	Read log page 10h, determine the offending qc and acquire
1044  *	error status TF.  For NCQ device errors, all LLDDs have to do
1045  *	is setting AC_ERR_DEV in ehi->err_mask.  This function takes
1046  *	care of the rest.
1047  *
1048  *	LOCKING:
1049  *	Kernel thread context (may sleep).
1050  */
1051 static void ata_eh_analyze_ncq_error(struct ata_port *ap)
1052 {
1053 	struct ata_eh_context *ehc = &ap->eh_context;
1054 	struct ata_device *dev = ap->device;
1055 	struct ata_queued_cmd *qc;
1056 	struct ata_taskfile tf;
1057 	int tag, rc;
1058 
1059 	/* if frozen, we can't do much */
1060 	if (ap->pflags & ATA_PFLAG_FROZEN)
1061 		return;
1062 
1063 	/* is it NCQ device error? */
1064 	if (!ap->sactive || !(ehc->i.err_mask & AC_ERR_DEV))
1065 		return;
1066 
1067 	/* has LLDD analyzed already? */
1068 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1069 		qc = __ata_qc_from_tag(ap, tag);
1070 
1071 		if (!(qc->flags & ATA_QCFLAG_FAILED))
1072 			continue;
1073 
1074 		if (qc->err_mask)
1075 			return;
1076 	}
1077 
1078 	/* okay, this error is ours */
1079 	rc = ata_eh_read_log_10h(dev, &tag, &tf);
1080 	if (rc) {
1081 		ata_port_printk(ap, KERN_ERR, "failed to read log page 10h "
1082 				"(errno=%d)\n", rc);
1083 		return;
1084 	}
1085 
1086 	if (!(ap->sactive & (1 << tag))) {
1087 		ata_port_printk(ap, KERN_ERR, "log page 10h reported "
1088 				"inactive tag %d\n", tag);
1089 		return;
1090 	}
1091 
1092 	/* we've got the perpetrator, condemn it */
1093 	qc = __ata_qc_from_tag(ap, tag);
1094 	memcpy(&qc->result_tf, &tf, sizeof(tf));
1095 	qc->err_mask |= AC_ERR_DEV;
1096 	ehc->i.err_mask &= ~AC_ERR_DEV;
1097 }
1098 
1099 /**
1100  *	ata_eh_analyze_tf - analyze taskfile of a failed qc
1101  *	@qc: qc to analyze
1102  *	@tf: Taskfile registers to analyze
1103  *
1104  *	Analyze taskfile of @qc and further determine cause of
1105  *	failure.  This function also requests ATAPI sense data if
1106  *	avaliable.
1107  *
1108  *	LOCKING:
1109  *	Kernel thread context (may sleep).
1110  *
1111  *	RETURNS:
1112  *	Determined recovery action
1113  */
1114 static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
1115 				      const struct ata_taskfile *tf)
1116 {
1117 	unsigned int tmp, action = 0;
1118 	u8 stat = tf->command, err = tf->feature;
1119 
1120 	if ((stat & (ATA_BUSY | ATA_DRQ | ATA_DRDY)) != ATA_DRDY) {
1121 		qc->err_mask |= AC_ERR_HSM;
1122 		return ATA_EH_SOFTRESET;
1123 	}
1124 
1125 	if (!(qc->err_mask & AC_ERR_DEV))
1126 		return 0;
1127 
1128 	switch (qc->dev->class) {
1129 	case ATA_DEV_ATA:
1130 		if (err & ATA_ICRC)
1131 			qc->err_mask |= AC_ERR_ATA_BUS;
1132 		if (err & ATA_UNC)
1133 			qc->err_mask |= AC_ERR_MEDIA;
1134 		if (err & ATA_IDNF)
1135 			qc->err_mask |= AC_ERR_INVALID;
1136 		break;
1137 
1138 	case ATA_DEV_ATAPI:
1139 		if (!(qc->ap->pflags & ATA_PFLAG_FROZEN)) {
1140 			tmp = atapi_eh_request_sense(qc->dev,
1141 						     qc->scsicmd->sense_buffer);
1142 			if (!tmp) {
1143 				/* ATA_QCFLAG_SENSE_VALID is used to
1144 				 * tell atapi_qc_complete() that sense
1145 				 * data is already valid.
1146 				 *
1147 				 * TODO: interpret sense data and set
1148 				 * appropriate err_mask.
1149 				 */
1150 				qc->flags |= ATA_QCFLAG_SENSE_VALID;
1151 			} else
1152 				qc->err_mask |= tmp;
1153 		}
1154 	}
1155 
1156 	if (qc->err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT | AC_ERR_ATA_BUS))
1157 		action |= ATA_EH_SOFTRESET;
1158 
1159 	return action;
1160 }
1161 
1162 static int ata_eh_categorize_ering_entry(struct ata_ering_entry *ent)
1163 {
1164 	if (ent->err_mask & (AC_ERR_ATA_BUS | AC_ERR_TIMEOUT))
1165 		return 1;
1166 
1167 	if (ent->is_io) {
1168 		if (ent->err_mask & AC_ERR_HSM)
1169 			return 1;
1170 		if ((ent->err_mask &
1171 		     (AC_ERR_DEV|AC_ERR_MEDIA|AC_ERR_INVALID)) == AC_ERR_DEV)
1172 			return 2;
1173 	}
1174 
1175 	return 0;
1176 }
1177 
1178 struct speed_down_needed_arg {
1179 	u64 since;
1180 	int nr_errors[3];
1181 };
1182 
1183 static int speed_down_needed_cb(struct ata_ering_entry *ent, void *void_arg)
1184 {
1185 	struct speed_down_needed_arg *arg = void_arg;
1186 
1187 	if (ent->timestamp < arg->since)
1188 		return -1;
1189 
1190 	arg->nr_errors[ata_eh_categorize_ering_entry(ent)]++;
1191 	return 0;
1192 }
1193 
1194 /**
1195  *	ata_eh_speed_down_needed - Determine wheter speed down is necessary
1196  *	@dev: Device of interest
1197  *
1198  *	This function examines error ring of @dev and determines
1199  *	whether speed down is necessary.  Speed down is necessary if
1200  *	there have been more than 3 of Cat-1 errors or 10 of Cat-2
1201  *	errors during last 15 minutes.
1202  *
1203  *	Cat-1 errors are ATA_BUS, TIMEOUT for any command and HSM
1204  *	violation for known supported commands.
1205  *
1206  *	Cat-2 errors are unclassified DEV error for known supported
1207  *	command.
1208  *
1209  *	LOCKING:
1210  *	Inherited from caller.
1211  *
1212  *	RETURNS:
1213  *	1 if speed down is necessary, 0 otherwise
1214  */
1215 static int ata_eh_speed_down_needed(struct ata_device *dev)
1216 {
1217 	const u64 interval = 15LLU * 60 * HZ;
1218 	static const int err_limits[3] = { -1, 3, 10 };
1219 	struct speed_down_needed_arg arg;
1220 	struct ata_ering_entry *ent;
1221 	int err_cat;
1222 	u64 j64;
1223 
1224 	ent = ata_ering_top(&dev->ering);
1225 	if (!ent)
1226 		return 0;
1227 
1228 	err_cat = ata_eh_categorize_ering_entry(ent);
1229 	if (err_cat == 0)
1230 		return 0;
1231 
1232 	memset(&arg, 0, sizeof(arg));
1233 
1234 	j64 = get_jiffies_64();
1235 	if (j64 >= interval)
1236 		arg.since = j64 - interval;
1237 	else
1238 		arg.since = 0;
1239 
1240 	ata_ering_map(&dev->ering, speed_down_needed_cb, &arg);
1241 
1242 	return arg.nr_errors[err_cat] > err_limits[err_cat];
1243 }
1244 
1245 /**
1246  *	ata_eh_speed_down - record error and speed down if necessary
1247  *	@dev: Failed device
1248  *	@is_io: Did the device fail during normal IO?
1249  *	@err_mask: err_mask of the error
1250  *
1251  *	Record error and examine error history to determine whether
1252  *	adjusting transmission speed is necessary.  It also sets
1253  *	transmission limits appropriately if such adjustment is
1254  *	necessary.
1255  *
1256  *	LOCKING:
1257  *	Kernel thread context (may sleep).
1258  *
1259  *	RETURNS:
1260  *	0 on success, -errno otherwise
1261  */
1262 static int ata_eh_speed_down(struct ata_device *dev, int is_io,
1263 			     unsigned int err_mask)
1264 {
1265 	if (!err_mask)
1266 		return 0;
1267 
1268 	/* record error and determine whether speed down is necessary */
1269 	ata_ering_record(&dev->ering, is_io, err_mask);
1270 
1271 	if (!ata_eh_speed_down_needed(dev))
1272 		return 0;
1273 
1274 	/* speed down SATA link speed if possible */
1275 	if (sata_down_spd_limit(dev->ap) == 0)
1276 		return ATA_EH_HARDRESET;
1277 
1278 	/* lower transfer mode */
1279 	if (ata_down_xfermask_limit(dev, 0) == 0)
1280 		return ATA_EH_SOFTRESET;
1281 
1282 	ata_dev_printk(dev, KERN_ERR,
1283 		       "speed down requested but no transfer mode left\n");
1284 	return 0;
1285 }
1286 
1287 /**
1288  *	ata_eh_autopsy - analyze error and determine recovery action
1289  *	@ap: ATA port to perform autopsy on
1290  *
1291  *	Analyze why @ap failed and determine which recovery action is
1292  *	needed.  This function also sets more detailed AC_ERR_* values
1293  *	and fills sense data for ATAPI CHECK SENSE.
1294  *
1295  *	LOCKING:
1296  *	Kernel thread context (may sleep).
1297  */
1298 static void ata_eh_autopsy(struct ata_port *ap)
1299 {
1300 	struct ata_eh_context *ehc = &ap->eh_context;
1301 	unsigned int all_err_mask = 0;
1302 	int tag, is_io = 0;
1303 	u32 serror;
1304 	int rc;
1305 
1306 	DPRINTK("ENTER\n");
1307 
1308 	if (ehc->i.flags & ATA_EHI_NO_AUTOPSY)
1309 		return;
1310 
1311 	/* obtain and analyze SError */
1312 	rc = sata_scr_read(ap, SCR_ERROR, &serror);
1313 	if (rc == 0) {
1314 		ehc->i.serror |= serror;
1315 		ata_eh_analyze_serror(ap);
1316 	} else if (rc != -EOPNOTSUPP)
1317 		ehc->i.action |= ATA_EH_HARDRESET;
1318 
1319 	/* analyze NCQ failure */
1320 	ata_eh_analyze_ncq_error(ap);
1321 
1322 	/* any real error trumps AC_ERR_OTHER */
1323 	if (ehc->i.err_mask & ~AC_ERR_OTHER)
1324 		ehc->i.err_mask &= ~AC_ERR_OTHER;
1325 
1326 	all_err_mask |= ehc->i.err_mask;
1327 
1328 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1329 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
1330 
1331 		if (!(qc->flags & ATA_QCFLAG_FAILED))
1332 			continue;
1333 
1334 		/* inherit upper level err_mask */
1335 		qc->err_mask |= ehc->i.err_mask;
1336 
1337 		/* analyze TF */
1338 		ehc->i.action |= ata_eh_analyze_tf(qc, &qc->result_tf);
1339 
1340 		/* DEV errors are probably spurious in case of ATA_BUS error */
1341 		if (qc->err_mask & AC_ERR_ATA_BUS)
1342 			qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_MEDIA |
1343 					  AC_ERR_INVALID);
1344 
1345 		/* any real error trumps unknown error */
1346 		if (qc->err_mask & ~AC_ERR_OTHER)
1347 			qc->err_mask &= ~AC_ERR_OTHER;
1348 
1349 		/* SENSE_VALID trumps dev/unknown error and revalidation */
1350 		if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
1351 			qc->err_mask &= ~(AC_ERR_DEV | AC_ERR_OTHER);
1352 			ehc->i.action &= ~ATA_EH_REVALIDATE;
1353 		}
1354 
1355 		/* accumulate error info */
1356 		ehc->i.dev = qc->dev;
1357 		all_err_mask |= qc->err_mask;
1358 		if (qc->flags & ATA_QCFLAG_IO)
1359 			is_io = 1;
1360 	}
1361 
1362 	/* enforce default EH actions */
1363 	if (ap->pflags & ATA_PFLAG_FROZEN ||
1364 	    all_err_mask & (AC_ERR_HSM | AC_ERR_TIMEOUT))
1365 		ehc->i.action |= ATA_EH_SOFTRESET;
1366 	else if (all_err_mask)
1367 		ehc->i.action |= ATA_EH_REVALIDATE;
1368 
1369 	/* if we have offending qcs and the associated failed device */
1370 	if (ehc->i.dev) {
1371 		/* speed down */
1372 		ehc->i.action |= ata_eh_speed_down(ehc->i.dev, is_io,
1373 						   all_err_mask);
1374 
1375 		/* perform per-dev EH action only on the offending device */
1376 		ehc->i.dev_action[ehc->i.dev->devno] |=
1377 			ehc->i.action & ATA_EH_PERDEV_MASK;
1378 		ehc->i.action &= ~ATA_EH_PERDEV_MASK;
1379 	}
1380 
1381 	DPRINTK("EXIT\n");
1382 }
1383 
1384 /**
1385  *	ata_eh_report - report error handling to user
1386  *	@ap: ATA port EH is going on
1387  *
1388  *	Report EH to user.
1389  *
1390  *	LOCKING:
1391  *	None.
1392  */
1393 static void ata_eh_report(struct ata_port *ap)
1394 {
1395 	struct ata_eh_context *ehc = &ap->eh_context;
1396 	const char *frozen, *desc;
1397 	int tag, nr_failed = 0;
1398 
1399 	desc = NULL;
1400 	if (ehc->i.desc[0] != '\0')
1401 		desc = ehc->i.desc;
1402 
1403 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1404 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
1405 
1406 		if (!(qc->flags & ATA_QCFLAG_FAILED))
1407 			continue;
1408 		if (qc->flags & ATA_QCFLAG_SENSE_VALID && !qc->err_mask)
1409 			continue;
1410 
1411 		nr_failed++;
1412 	}
1413 
1414 	if (!nr_failed && !ehc->i.err_mask)
1415 		return;
1416 
1417 	frozen = "";
1418 	if (ap->pflags & ATA_PFLAG_FROZEN)
1419 		frozen = " frozen";
1420 
1421 	if (ehc->i.dev) {
1422 		ata_dev_printk(ehc->i.dev, KERN_ERR, "exception Emask 0x%x "
1423 			       "SAct 0x%x SErr 0x%x action 0x%x%s\n",
1424 			       ehc->i.err_mask, ap->sactive, ehc->i.serror,
1425 			       ehc->i.action, frozen);
1426 		if (desc)
1427 			ata_dev_printk(ehc->i.dev, KERN_ERR, "(%s)\n", desc);
1428 	} else {
1429 		ata_port_printk(ap, KERN_ERR, "exception Emask 0x%x "
1430 				"SAct 0x%x SErr 0x%x action 0x%x%s\n",
1431 				ehc->i.err_mask, ap->sactive, ehc->i.serror,
1432 				ehc->i.action, frozen);
1433 		if (desc)
1434 			ata_port_printk(ap, KERN_ERR, "(%s)\n", desc);
1435 	}
1436 
1437 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
1438 		static const char *dma_str[] = {
1439 			[DMA_BIDIRECTIONAL]	= "bidi",
1440 			[DMA_TO_DEVICE]		= "out",
1441 			[DMA_FROM_DEVICE]	= "in",
1442 			[DMA_NONE]		= "",
1443 		};
1444 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
1445 		struct ata_taskfile *cmd = &qc->tf, *res = &qc->result_tf;
1446 		unsigned int nbytes;
1447 
1448 		if (!(qc->flags & ATA_QCFLAG_FAILED) || !qc->err_mask)
1449 			continue;
1450 
1451 		nbytes = qc->nbytes;
1452 		if (!nbytes)
1453 			nbytes = qc->nsect << 9;
1454 
1455 		ata_dev_printk(qc->dev, KERN_ERR,
1456 			"cmd %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
1457 			"tag %d cdb 0x%x data %u %s\n         "
1458 			"res %02x/%02x:%02x:%02x:%02x:%02x/%02x:%02x:%02x:%02x:%02x/%02x "
1459 			"Emask 0x%x (%s)\n",
1460 			cmd->command, cmd->feature, cmd->nsect,
1461 			cmd->lbal, cmd->lbam, cmd->lbah,
1462 			cmd->hob_feature, cmd->hob_nsect,
1463 			cmd->hob_lbal, cmd->hob_lbam, cmd->hob_lbah,
1464 			cmd->device, qc->tag, qc->cdb[0], nbytes,
1465 			dma_str[qc->dma_dir],
1466 			res->command, res->feature, res->nsect,
1467 			res->lbal, res->lbam, res->lbah,
1468 			res->hob_feature, res->hob_nsect,
1469 			res->hob_lbal, res->hob_lbam, res->hob_lbah,
1470 			res->device, qc->err_mask, ata_err_string(qc->err_mask));
1471 	}
1472 }
1473 
1474 static int ata_do_reset(struct ata_port *ap, ata_reset_fn_t reset,
1475 			unsigned int *classes)
1476 {
1477 	int i, rc;
1478 
1479 	for (i = 0; i < ATA_MAX_DEVICES; i++)
1480 		classes[i] = ATA_DEV_UNKNOWN;
1481 
1482 	rc = reset(ap, classes);
1483 	if (rc)
1484 		return rc;
1485 
1486 	/* If any class isn't ATA_DEV_UNKNOWN, consider classification
1487 	 * is complete and convert all ATA_DEV_UNKNOWN to
1488 	 * ATA_DEV_NONE.
1489 	 */
1490 	for (i = 0; i < ATA_MAX_DEVICES; i++)
1491 		if (classes[i] != ATA_DEV_UNKNOWN)
1492 			break;
1493 
1494 	if (i < ATA_MAX_DEVICES)
1495 		for (i = 0; i < ATA_MAX_DEVICES; i++)
1496 			if (classes[i] == ATA_DEV_UNKNOWN)
1497 				classes[i] = ATA_DEV_NONE;
1498 
1499 	return 0;
1500 }
1501 
1502 static int ata_eh_followup_srst_needed(int rc, int classify,
1503 				       const unsigned int *classes)
1504 {
1505 	if (rc == -EAGAIN)
1506 		return 1;
1507 	if (rc != 0)
1508 		return 0;
1509 	if (classify && classes[0] == ATA_DEV_UNKNOWN)
1510 		return 1;
1511 	return 0;
1512 }
1513 
1514 static int ata_eh_reset(struct ata_port *ap, int classify,
1515 			ata_prereset_fn_t prereset, ata_reset_fn_t softreset,
1516 			ata_reset_fn_t hardreset, ata_postreset_fn_t postreset)
1517 {
1518 	struct ata_eh_context *ehc = &ap->eh_context;
1519 	unsigned int *classes = ehc->classes;
1520 	int tries = ATA_EH_RESET_TRIES;
1521 	int verbose = !(ehc->i.flags & ATA_EHI_QUIET);
1522 	unsigned int action;
1523 	ata_reset_fn_t reset;
1524 	int i, did_followup_srst, rc;
1525 
1526 	/* about to reset */
1527 	ata_eh_about_to_do(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK);
1528 
1529 	/* Determine which reset to use and record in ehc->i.action.
1530 	 * prereset() may examine and modify it.
1531 	 */
1532 	action = ehc->i.action;
1533 	ehc->i.action &= ~ATA_EH_RESET_MASK;
1534 	if (softreset && (!hardreset || (!sata_set_spd_needed(ap) &&
1535 					 !(action & ATA_EH_HARDRESET))))
1536 		ehc->i.action |= ATA_EH_SOFTRESET;
1537 	else
1538 		ehc->i.action |= ATA_EH_HARDRESET;
1539 
1540 	if (prereset) {
1541 		rc = prereset(ap);
1542 		if (rc) {
1543 			if (rc == -ENOENT) {
1544 				ata_port_printk(ap, KERN_DEBUG, "port disabled. ignoring.\n");
1545 				ap->eh_context.i.action &= ~ATA_EH_RESET_MASK;
1546 			} else
1547 				ata_port_printk(ap, KERN_ERR,
1548 					"prereset failed (errno=%d)\n", rc);
1549 			return rc;
1550 		}
1551 	}
1552 
1553 	/* prereset() might have modified ehc->i.action */
1554 	if (ehc->i.action & ATA_EH_HARDRESET)
1555 		reset = hardreset;
1556 	else if (ehc->i.action & ATA_EH_SOFTRESET)
1557 		reset = softreset;
1558 	else {
1559 		/* prereset told us not to reset, bang classes and return */
1560 		for (i = 0; i < ATA_MAX_DEVICES; i++)
1561 			classes[i] = ATA_DEV_NONE;
1562 		return 0;
1563 	}
1564 
1565 	/* did prereset() screw up?  if so, fix up to avoid oopsing */
1566 	if (!reset) {
1567 		ata_port_printk(ap, KERN_ERR, "BUG: prereset() requested "
1568 				"invalid reset type\n");
1569 		if (softreset)
1570 			reset = softreset;
1571 		else
1572 			reset = hardreset;
1573 	}
1574 
1575  retry:
1576 	/* shut up during boot probing */
1577 	if (verbose)
1578 		ata_port_printk(ap, KERN_INFO, "%s resetting port\n",
1579 				reset == softreset ? "soft" : "hard");
1580 
1581 	/* mark that this EH session started with reset */
1582 	ehc->i.flags |= ATA_EHI_DID_RESET;
1583 
1584 	rc = ata_do_reset(ap, reset, classes);
1585 
1586 	did_followup_srst = 0;
1587 	if (reset == hardreset &&
1588 	    ata_eh_followup_srst_needed(rc, classify, classes)) {
1589 		/* okay, let's do follow-up softreset */
1590 		did_followup_srst = 1;
1591 		reset = softreset;
1592 
1593 		if (!reset) {
1594 			ata_port_printk(ap, KERN_ERR,
1595 					"follow-up softreset required "
1596 					"but no softreset avaliable\n");
1597 			return -EINVAL;
1598 		}
1599 
1600 		ata_eh_about_to_do(ap, NULL, ATA_EH_RESET_MASK);
1601 		rc = ata_do_reset(ap, reset, classes);
1602 
1603 		if (rc == 0 && classify &&
1604 		    classes[0] == ATA_DEV_UNKNOWN) {
1605 			ata_port_printk(ap, KERN_ERR,
1606 					"classification failed\n");
1607 			return -EINVAL;
1608 		}
1609 	}
1610 
1611 	if (rc && --tries) {
1612 		const char *type;
1613 
1614 		if (reset == softreset) {
1615 			if (did_followup_srst)
1616 				type = "follow-up soft";
1617 			else
1618 				type = "soft";
1619 		} else
1620 			type = "hard";
1621 
1622 		ata_port_printk(ap, KERN_WARNING,
1623 				"%sreset failed, retrying in 5 secs\n", type);
1624 		ssleep(5);
1625 
1626 		if (reset == hardreset)
1627 			sata_down_spd_limit(ap);
1628 		if (hardreset)
1629 			reset = hardreset;
1630 		goto retry;
1631 	}
1632 
1633 	if (rc == 0) {
1634 		/* After the reset, the device state is PIO 0 and the
1635 		 * controller state is undefined.  Record the mode.
1636 		 */
1637 		for (i = 0; i < ATA_MAX_DEVICES; i++)
1638 			ap->device[i].pio_mode = XFER_PIO_0;
1639 
1640 		if (postreset)
1641 			postreset(ap, classes);
1642 
1643 		/* reset successful, schedule revalidation */
1644 		ata_eh_done(ap, NULL, ehc->i.action & ATA_EH_RESET_MASK);
1645 		ehc->i.action |= ATA_EH_REVALIDATE;
1646 	}
1647 
1648 	return rc;
1649 }
1650 
1651 static int ata_eh_revalidate_and_attach(struct ata_port *ap,
1652 					struct ata_device **r_failed_dev)
1653 {
1654 	struct ata_eh_context *ehc = &ap->eh_context;
1655 	struct ata_device *dev;
1656 	unsigned long flags;
1657 	int i, rc = 0;
1658 
1659 	DPRINTK("ENTER\n");
1660 
1661 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1662 		unsigned int action, readid_flags = 0;
1663 
1664 		dev = &ap->device[i];
1665 		action = ata_eh_dev_action(dev);
1666 
1667 		if (ehc->i.flags & ATA_EHI_DID_RESET)
1668 			readid_flags |= ATA_READID_POSTRESET;
1669 
1670 		if (action & ATA_EH_REVALIDATE && ata_dev_ready(dev)) {
1671 			if (ata_port_offline(ap)) {
1672 				rc = -EIO;
1673 				break;
1674 			}
1675 
1676 			ata_eh_about_to_do(ap, dev, ATA_EH_REVALIDATE);
1677 			rc = ata_dev_revalidate(dev, readid_flags);
1678 			if (rc)
1679 				break;
1680 
1681 			ata_eh_done(ap, dev, ATA_EH_REVALIDATE);
1682 
1683 			/* Configuration may have changed, reconfigure
1684 			 * transfer mode.
1685 			 */
1686 			ehc->i.flags |= ATA_EHI_SETMODE;
1687 
1688 			/* schedule the scsi_rescan_device() here */
1689 			queue_work(ata_aux_wq, &(ap->scsi_rescan_task));
1690 		} else if (dev->class == ATA_DEV_UNKNOWN &&
1691 			   ehc->tries[dev->devno] &&
1692 			   ata_class_enabled(ehc->classes[dev->devno])) {
1693 			dev->class = ehc->classes[dev->devno];
1694 
1695 			rc = ata_dev_read_id(dev, &dev->class, readid_flags,
1696 					     dev->id);
1697 			if (rc == 0) {
1698 				ehc->i.flags |= ATA_EHI_PRINTINFO;
1699 				rc = ata_dev_configure(dev);
1700 				ehc->i.flags &= ~ATA_EHI_PRINTINFO;
1701 			} else if (rc == -ENOENT) {
1702 				/* IDENTIFY was issued to non-existent
1703 				 * device.  No need to reset.  Just
1704 				 * thaw and kill the device.
1705 				 */
1706 				ata_eh_thaw_port(ap);
1707 				dev->class = ATA_DEV_UNKNOWN;
1708 				rc = 0;
1709 			}
1710 
1711 			if (rc) {
1712 				dev->class = ATA_DEV_UNKNOWN;
1713 				break;
1714 			}
1715 
1716 			if (ata_dev_enabled(dev)) {
1717 				spin_lock_irqsave(ap->lock, flags);
1718 				ap->pflags |= ATA_PFLAG_SCSI_HOTPLUG;
1719 				spin_unlock_irqrestore(ap->lock, flags);
1720 
1721 				/* new device discovered, configure xfermode */
1722 				ehc->i.flags |= ATA_EHI_SETMODE;
1723 			}
1724 		}
1725 	}
1726 
1727 	if (rc)
1728 		*r_failed_dev = dev;
1729 
1730 	DPRINTK("EXIT\n");
1731 	return rc;
1732 }
1733 
1734 /**
1735  *	ata_eh_suspend - handle suspend EH action
1736  *	@ap: target host port
1737  *	@r_failed_dev: result parameter to indicate failing device
1738  *
1739  *	Handle suspend EH action.  Disk devices are spinned down and
1740  *	other types of devices are just marked suspended.  Once
1741  *	suspended, no EH action to the device is allowed until it is
1742  *	resumed.
1743  *
1744  *	LOCKING:
1745  *	Kernel thread context (may sleep).
1746  *
1747  *	RETURNS:
1748  *	0 on success, -errno otherwise
1749  */
1750 static int ata_eh_suspend(struct ata_port *ap, struct ata_device **r_failed_dev)
1751 {
1752 	struct ata_device *dev;
1753 	int i, rc = 0;
1754 
1755 	DPRINTK("ENTER\n");
1756 
1757 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1758 		unsigned long flags;
1759 		unsigned int action, err_mask;
1760 
1761 		dev = &ap->device[i];
1762 		action = ata_eh_dev_action(dev);
1763 
1764 		if (!ata_dev_enabled(dev) || !(action & ATA_EH_SUSPEND))
1765 			continue;
1766 
1767 		WARN_ON(dev->flags & ATA_DFLAG_SUSPENDED);
1768 
1769 		ata_eh_about_to_do(ap, dev, ATA_EH_SUSPEND);
1770 
1771 		if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) {
1772 			/* flush cache */
1773 			rc = ata_flush_cache(dev);
1774 			if (rc)
1775 				break;
1776 
1777 			/* spin down */
1778 			err_mask = ata_do_simple_cmd(dev, ATA_CMD_STANDBYNOW1);
1779 			if (err_mask) {
1780 				ata_dev_printk(dev, KERN_ERR, "failed to "
1781 					       "spin down (err_mask=0x%x)\n",
1782 					       err_mask);
1783 				rc = -EIO;
1784 				break;
1785 			}
1786 		}
1787 
1788 		spin_lock_irqsave(ap->lock, flags);
1789 		dev->flags |= ATA_DFLAG_SUSPENDED;
1790 		spin_unlock_irqrestore(ap->lock, flags);
1791 
1792 		ata_eh_done(ap, dev, ATA_EH_SUSPEND);
1793 	}
1794 
1795 	if (rc)
1796 		*r_failed_dev = dev;
1797 
1798 	DPRINTK("EXIT\n");
1799 	return 0;
1800 }
1801 
1802 /**
1803  *	ata_eh_prep_resume - prep for resume EH action
1804  *	@ap: target host port
1805  *
1806  *	Clear SUSPENDED in preparation for scheduled resume actions.
1807  *	This allows other parts of EH to access the devices being
1808  *	resumed.
1809  *
1810  *	LOCKING:
1811  *	Kernel thread context (may sleep).
1812  */
1813 static void ata_eh_prep_resume(struct ata_port *ap)
1814 {
1815 	struct ata_device *dev;
1816 	unsigned long flags;
1817 	int i;
1818 
1819 	DPRINTK("ENTER\n");
1820 
1821 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1822 		unsigned int action;
1823 
1824 		dev = &ap->device[i];
1825 		action = ata_eh_dev_action(dev);
1826 
1827 		if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME))
1828 			continue;
1829 
1830 		spin_lock_irqsave(ap->lock, flags);
1831 		dev->flags &= ~ATA_DFLAG_SUSPENDED;
1832 		spin_unlock_irqrestore(ap->lock, flags);
1833 	}
1834 
1835 	DPRINTK("EXIT\n");
1836 }
1837 
1838 /**
1839  *	ata_eh_resume - handle resume EH action
1840  *	@ap: target host port
1841  *	@r_failed_dev: result parameter to indicate failing device
1842  *
1843  *	Handle resume EH action.  Target devices are already reset and
1844  *	revalidated.  Spinning up is the only operation left.
1845  *
1846  *	LOCKING:
1847  *	Kernel thread context (may sleep).
1848  *
1849  *	RETURNS:
1850  *	0 on success, -errno otherwise
1851  */
1852 static int ata_eh_resume(struct ata_port *ap, struct ata_device **r_failed_dev)
1853 {
1854 	struct ata_device *dev;
1855 	int i, rc = 0;
1856 
1857 	DPRINTK("ENTER\n");
1858 
1859 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1860 		unsigned int action, err_mask;
1861 
1862 		dev = &ap->device[i];
1863 		action = ata_eh_dev_action(dev);
1864 
1865 		if (!ata_dev_enabled(dev) || !(action & ATA_EH_RESUME))
1866 			continue;
1867 
1868 		ata_eh_about_to_do(ap, dev, ATA_EH_RESUME);
1869 
1870 		if (dev->class == ATA_DEV_ATA && !(action & ATA_EH_PM_FREEZE)) {
1871 			err_mask = ata_do_simple_cmd(dev,
1872 						     ATA_CMD_IDLEIMMEDIATE);
1873 			if (err_mask) {
1874 				ata_dev_printk(dev, KERN_ERR, "failed to "
1875 					       "spin up (err_mask=0x%x)\n",
1876 					       err_mask);
1877 				rc = -EIO;
1878 				break;
1879 			}
1880 		}
1881 
1882 		ata_eh_done(ap, dev, ATA_EH_RESUME);
1883 	}
1884 
1885 	if (rc)
1886 		*r_failed_dev = dev;
1887 
1888 	DPRINTK("EXIT\n");
1889 	return 0;
1890 }
1891 
1892 static int ata_port_nr_enabled(struct ata_port *ap)
1893 {
1894 	int i, cnt = 0;
1895 
1896 	for (i = 0; i < ATA_MAX_DEVICES; i++)
1897 		if (ata_dev_enabled(&ap->device[i]))
1898 			cnt++;
1899 	return cnt;
1900 }
1901 
1902 static int ata_port_nr_vacant(struct ata_port *ap)
1903 {
1904 	int i, cnt = 0;
1905 
1906 	for (i = 0; i < ATA_MAX_DEVICES; i++)
1907 		if (ap->device[i].class == ATA_DEV_UNKNOWN)
1908 			cnt++;
1909 	return cnt;
1910 }
1911 
1912 static int ata_eh_skip_recovery(struct ata_port *ap)
1913 {
1914 	struct ata_eh_context *ehc = &ap->eh_context;
1915 	int i;
1916 
1917 	/* skip if all possible devices are suspended */
1918 	for (i = 0; i < ata_port_max_devices(ap); i++) {
1919 		struct ata_device *dev = &ap->device[i];
1920 
1921 		if (!(dev->flags & ATA_DFLAG_SUSPENDED))
1922 			break;
1923 	}
1924 
1925 	if (i == ata_port_max_devices(ap))
1926 		return 1;
1927 
1928 	/* thaw frozen port, resume link and recover failed devices */
1929 	if ((ap->pflags & ATA_PFLAG_FROZEN) ||
1930 	    (ehc->i.flags & ATA_EHI_RESUME_LINK) || ata_port_nr_enabled(ap))
1931 		return 0;
1932 
1933 	/* skip if class codes for all vacant slots are ATA_DEV_NONE */
1934 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1935 		struct ata_device *dev = &ap->device[i];
1936 
1937 		if (dev->class == ATA_DEV_UNKNOWN &&
1938 		    ehc->classes[dev->devno] != ATA_DEV_NONE)
1939 			return 0;
1940 	}
1941 
1942 	return 1;
1943 }
1944 
1945 /**
1946  *	ata_eh_recover - recover host port after error
1947  *	@ap: host port to recover
1948  *	@prereset: prereset method (can be NULL)
1949  *	@softreset: softreset method (can be NULL)
1950  *	@hardreset: hardreset method (can be NULL)
1951  *	@postreset: postreset method (can be NULL)
1952  *
1953  *	This is the alpha and omega, eum and yang, heart and soul of
1954  *	libata exception handling.  On entry, actions required to
1955  *	recover the port and hotplug requests are recorded in
1956  *	eh_context.  This function executes all the operations with
1957  *	appropriate retrials and fallbacks to resurrect failed
1958  *	devices, detach goners and greet newcomers.
1959  *
1960  *	LOCKING:
1961  *	Kernel thread context (may sleep).
1962  *
1963  *	RETURNS:
1964  *	0 on success, -errno on failure.
1965  */
1966 static int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
1967 			  ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
1968 			  ata_postreset_fn_t postreset)
1969 {
1970 	struct ata_eh_context *ehc = &ap->eh_context;
1971 	struct ata_device *dev;
1972 	int down_xfermask, i, rc;
1973 
1974 	DPRINTK("ENTER\n");
1975 
1976 	/* prep for recovery */
1977 	for (i = 0; i < ATA_MAX_DEVICES; i++) {
1978 		dev = &ap->device[i];
1979 
1980 		ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
1981 
1982 		/* collect port action mask recorded in dev actions */
1983 		ehc->i.action |= ehc->i.dev_action[i] & ~ATA_EH_PERDEV_MASK;
1984 		ehc->i.dev_action[i] &= ATA_EH_PERDEV_MASK;
1985 
1986 		/* process hotplug request */
1987 		if (dev->flags & ATA_DFLAG_DETACH)
1988 			ata_eh_detach_dev(dev);
1989 
1990 		if (!ata_dev_enabled(dev) &&
1991 		    ((ehc->i.probe_mask & (1 << dev->devno)) &&
1992 		     !(ehc->did_probe_mask & (1 << dev->devno)))) {
1993 			ata_eh_detach_dev(dev);
1994 			ata_dev_init(dev);
1995 			ehc->did_probe_mask |= (1 << dev->devno);
1996 			ehc->i.action |= ATA_EH_SOFTRESET;
1997 		}
1998 	}
1999 
2000  retry:
2001 	down_xfermask = 0;
2002 	rc = 0;
2003 
2004 	/* if UNLOADING, finish immediately */
2005 	if (ap->pflags & ATA_PFLAG_UNLOADING)
2006 		goto out;
2007 
2008 	/* prep for resume */
2009 	ata_eh_prep_resume(ap);
2010 
2011 	/* skip EH if possible. */
2012 	if (ata_eh_skip_recovery(ap))
2013 		ehc->i.action = 0;
2014 
2015 	for (i = 0; i < ATA_MAX_DEVICES; i++)
2016 		ehc->classes[i] = ATA_DEV_UNKNOWN;
2017 
2018 	/* reset */
2019 	if (ehc->i.action & ATA_EH_RESET_MASK) {
2020 		ata_eh_freeze_port(ap);
2021 
2022 		rc = ata_eh_reset(ap, ata_port_nr_vacant(ap), prereset,
2023 				  softreset, hardreset, postreset);
2024 		if (rc) {
2025 			ata_port_printk(ap, KERN_ERR,
2026 					"reset failed, giving up\n");
2027 			goto out;
2028 		}
2029 
2030 		ata_eh_thaw_port(ap);
2031 	}
2032 
2033 	/* revalidate existing devices and attach new ones */
2034 	rc = ata_eh_revalidate_and_attach(ap, &dev);
2035 	if (rc)
2036 		goto dev_fail;
2037 
2038 	/* resume devices */
2039 	rc = ata_eh_resume(ap, &dev);
2040 	if (rc)
2041 		goto dev_fail;
2042 
2043 	/* configure transfer mode if necessary */
2044 	if (ehc->i.flags & ATA_EHI_SETMODE) {
2045 		rc = ata_set_mode(ap, &dev);
2046 		if (rc) {
2047 			down_xfermask = 1;
2048 			goto dev_fail;
2049 		}
2050 		ehc->i.flags &= ~ATA_EHI_SETMODE;
2051 	}
2052 
2053 	/* suspend devices */
2054 	rc = ata_eh_suspend(ap, &dev);
2055 	if (rc)
2056 		goto dev_fail;
2057 
2058 	goto out;
2059 
2060  dev_fail:
2061 	switch (rc) {
2062 	case -ENODEV:
2063 		/* device missing, schedule probing */
2064 		ehc->i.probe_mask |= (1 << dev->devno);
2065 	case -EINVAL:
2066 		ehc->tries[dev->devno] = 0;
2067 		break;
2068 	case -EIO:
2069 		sata_down_spd_limit(ap);
2070 	default:
2071 		ehc->tries[dev->devno]--;
2072 		if (down_xfermask &&
2073 		    ata_down_xfermask_limit(dev, ehc->tries[dev->devno] == 1))
2074 			ehc->tries[dev->devno] = 0;
2075 	}
2076 
2077 	if (ata_dev_enabled(dev) && !ehc->tries[dev->devno]) {
2078 		/* disable device if it has used up all its chances */
2079 		ata_dev_disable(dev);
2080 
2081 		/* detach if offline */
2082 		if (ata_port_offline(ap))
2083 			ata_eh_detach_dev(dev);
2084 
2085 		/* probe if requested */
2086 		if ((ehc->i.probe_mask & (1 << dev->devno)) &&
2087 		    !(ehc->did_probe_mask & (1 << dev->devno))) {
2088 			ata_eh_detach_dev(dev);
2089 			ata_dev_init(dev);
2090 
2091 			ehc->tries[dev->devno] = ATA_EH_DEV_TRIES;
2092 			ehc->did_probe_mask |= (1 << dev->devno);
2093 			ehc->i.action |= ATA_EH_SOFTRESET;
2094 		}
2095 	} else {
2096 		/* soft didn't work?  be haaaaard */
2097 		if (ehc->i.flags & ATA_EHI_DID_RESET)
2098 			ehc->i.action |= ATA_EH_HARDRESET;
2099 		else
2100 			ehc->i.action |= ATA_EH_SOFTRESET;
2101 	}
2102 
2103 	if (ata_port_nr_enabled(ap)) {
2104 		ata_port_printk(ap, KERN_WARNING, "failed to recover some "
2105 				"devices, retrying in 5 secs\n");
2106 		ssleep(5);
2107 	} else {
2108 		/* no device left, repeat fast */
2109 		msleep(500);
2110 	}
2111 
2112 	goto retry;
2113 
2114  out:
2115 	if (rc) {
2116 		for (i = 0; i < ATA_MAX_DEVICES; i++)
2117 			ata_dev_disable(&ap->device[i]);
2118 	}
2119 
2120 	DPRINTK("EXIT, rc=%d\n", rc);
2121 	return rc;
2122 }
2123 
2124 /**
2125  *	ata_eh_finish - finish up EH
2126  *	@ap: host port to finish EH for
2127  *
2128  *	Recovery is complete.  Clean up EH states and retry or finish
2129  *	failed qcs.
2130  *
2131  *	LOCKING:
2132  *	None.
2133  */
2134 static void ata_eh_finish(struct ata_port *ap)
2135 {
2136 	int tag;
2137 
2138 	/* retry or finish qcs */
2139 	for (tag = 0; tag < ATA_MAX_QUEUE; tag++) {
2140 		struct ata_queued_cmd *qc = __ata_qc_from_tag(ap, tag);
2141 
2142 		if (!(qc->flags & ATA_QCFLAG_FAILED))
2143 			continue;
2144 
2145 		if (qc->err_mask) {
2146 			/* FIXME: Once EH migration is complete,
2147 			 * generate sense data in this function,
2148 			 * considering both err_mask and tf.
2149 			 */
2150 			if (qc->err_mask & AC_ERR_INVALID)
2151 				ata_eh_qc_complete(qc);
2152 			else
2153 				ata_eh_qc_retry(qc);
2154 		} else {
2155 			if (qc->flags & ATA_QCFLAG_SENSE_VALID) {
2156 				ata_eh_qc_complete(qc);
2157 			} else {
2158 				/* feed zero TF to sense generation */
2159 				memset(&qc->result_tf, 0, sizeof(qc->result_tf));
2160 				ata_eh_qc_retry(qc);
2161 			}
2162 		}
2163 	}
2164 }
2165 
2166 /**
2167  *	ata_do_eh - do standard error handling
2168  *	@ap: host port to handle error for
2169  *	@prereset: prereset method (can be NULL)
2170  *	@softreset: softreset method (can be NULL)
2171  *	@hardreset: hardreset method (can be NULL)
2172  *	@postreset: postreset method (can be NULL)
2173  *
2174  *	Perform standard error handling sequence.
2175  *
2176  *	LOCKING:
2177  *	Kernel thread context (may sleep).
2178  */
2179 void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
2180 	       ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
2181 	       ata_postreset_fn_t postreset)
2182 {
2183 	ata_eh_autopsy(ap);
2184 	ata_eh_report(ap);
2185 	ata_eh_recover(ap, prereset, softreset, hardreset, postreset);
2186 	ata_eh_finish(ap);
2187 }
2188 
2189 /**
2190  *	ata_eh_handle_port_suspend - perform port suspend operation
2191  *	@ap: port to suspend
2192  *
2193  *	Suspend @ap.
2194  *
2195  *	LOCKING:
2196  *	Kernel thread context (may sleep).
2197  */
2198 static void ata_eh_handle_port_suspend(struct ata_port *ap)
2199 {
2200 	unsigned long flags;
2201 	int rc = 0;
2202 
2203 	/* are we suspending? */
2204 	spin_lock_irqsave(ap->lock, flags);
2205 	if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
2206 	    ap->pm_mesg.event == PM_EVENT_ON) {
2207 		spin_unlock_irqrestore(ap->lock, flags);
2208 		return;
2209 	}
2210 	spin_unlock_irqrestore(ap->lock, flags);
2211 
2212 	WARN_ON(ap->pflags & ATA_PFLAG_SUSPENDED);
2213 
2214 	/* suspend */
2215 	ata_eh_freeze_port(ap);
2216 
2217 	if (ap->ops->port_suspend)
2218 		rc = ap->ops->port_suspend(ap, ap->pm_mesg);
2219 
2220 	/* report result */
2221 	spin_lock_irqsave(ap->lock, flags);
2222 
2223 	ap->pflags &= ~ATA_PFLAG_PM_PENDING;
2224 	if (rc == 0)
2225 		ap->pflags |= ATA_PFLAG_SUSPENDED;
2226 	else
2227 		ata_port_schedule_eh(ap);
2228 
2229 	if (ap->pm_result) {
2230 		*ap->pm_result = rc;
2231 		ap->pm_result = NULL;
2232 	}
2233 
2234 	spin_unlock_irqrestore(ap->lock, flags);
2235 
2236 	return;
2237 }
2238 
2239 /**
2240  *	ata_eh_handle_port_resume - perform port resume operation
2241  *	@ap: port to resume
2242  *
2243  *	Resume @ap.
2244  *
2245  *	This function also waits upto one second until all devices
2246  *	hanging off this port requests resume EH action.  This is to
2247  *	prevent invoking EH and thus reset multiple times on resume.
2248  *
2249  *	On DPM resume, where some of devices might not be resumed
2250  *	together, this may delay port resume upto one second, but such
2251  *	DPM resumes are rare and 1 sec delay isn't too bad.
2252  *
2253  *	LOCKING:
2254  *	Kernel thread context (may sleep).
2255  */
2256 static void ata_eh_handle_port_resume(struct ata_port *ap)
2257 {
2258 	unsigned long timeout;
2259 	unsigned long flags;
2260 	int i, rc = 0;
2261 
2262 	/* are we resuming? */
2263 	spin_lock_irqsave(ap->lock, flags);
2264 	if (!(ap->pflags & ATA_PFLAG_PM_PENDING) ||
2265 	    ap->pm_mesg.event != PM_EVENT_ON) {
2266 		spin_unlock_irqrestore(ap->lock, flags);
2267 		return;
2268 	}
2269 	spin_unlock_irqrestore(ap->lock, flags);
2270 
2271 	/* spurious? */
2272 	if (!(ap->pflags & ATA_PFLAG_SUSPENDED))
2273 		goto done;
2274 
2275 	if (ap->ops->port_resume)
2276 		rc = ap->ops->port_resume(ap);
2277 
2278 	/* give devices time to request EH */
2279 	timeout = jiffies + HZ; /* 1s max */
2280 	while (1) {
2281 		for (i = 0; i < ATA_MAX_DEVICES; i++) {
2282 			struct ata_device *dev = &ap->device[i];
2283 			unsigned int action = ata_eh_dev_action(dev);
2284 
2285 			if ((dev->flags & ATA_DFLAG_SUSPENDED) &&
2286 			    !(action & ATA_EH_RESUME))
2287 				break;
2288 		}
2289 
2290 		if (i == ATA_MAX_DEVICES || time_after(jiffies, timeout))
2291 			break;
2292 		msleep(10);
2293 	}
2294 
2295  done:
2296 	spin_lock_irqsave(ap->lock, flags);
2297 	ap->pflags &= ~(ATA_PFLAG_PM_PENDING | ATA_PFLAG_SUSPENDED);
2298 	if (ap->pm_result) {
2299 		*ap->pm_result = rc;
2300 		ap->pm_result = NULL;
2301 	}
2302 	spin_unlock_irqrestore(ap->lock, flags);
2303 }
2304