xref: /openbmc/linux/drivers/ata/pata_octeon_cf.c (revision 47aab53331effedd3f5a6136854bd1da011f94b6)
1 /*
2  * Driver for the Octeon bootbus compact flash.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 2005 - 2012 Cavium Inc.
9  * Copyright (C) 2008 Wind River Systems
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/libata.h>
15 #include <linux/hrtimer.h>
16 #include <linux/slab.h>
17 #include <linux/irq.h>
18 #include <linux/of.h>
19 #include <linux/of_address.h>
20 #include <linux/of_platform.h>
21 #include <linux/platform_device.h>
22 #include <scsi/scsi_host.h>
23 #include <trace/events/libata.h>
24 #include <asm/byteorder.h>
25 #include <asm/octeon/octeon.h>
26 
27 /*
28  * The Octeon bootbus compact flash interface is connected in at least
29  * 3 different configurations on various evaluation boards:
30  *
31  * -- 8  bits no irq, no DMA
32  * -- 16 bits no irq, no DMA
33  * -- 16 bits True IDE mode with DMA, but no irq.
34  *
35  * In the last case the DMA engine can generate an interrupt when the
36  * transfer is complete.  For the first two cases only PIO is supported.
37  *
38  */
39 
40 #define DRV_NAME	"pata_octeon_cf"
41 #define DRV_VERSION	"2.2"
42 
43 /* Poll interval in nS. */
44 #define OCTEON_CF_BUSY_POLL_INTERVAL 500000
45 
46 #define DMA_CFG 0
47 #define DMA_TIM 0x20
48 #define DMA_INT 0x38
49 #define DMA_INT_EN 0x50
50 
51 struct octeon_cf_port {
52 	struct hrtimer delayed_finish;
53 	struct ata_port *ap;
54 	int dma_finished;
55 	void		*c0;
56 	unsigned int cs0;
57 	unsigned int cs1;
58 	bool is_true_ide;
59 	u64 dma_base;
60 };
61 
62 static const struct scsi_host_template octeon_cf_sht = {
63 	ATA_PIO_SHT(DRV_NAME),
64 };
65 
66 static int enable_dma;
67 module_param(enable_dma, int, 0444);
68 MODULE_PARM_DESC(enable_dma,
69 		 "Enable use of DMA on interfaces that support it (0=no dma [default], 1=use dma)");
70 
71 /*
72  * Convert nanosecond based time to setting used in the
73  * boot bus timing register, based on timing multiple
74  */
75 static unsigned int ns_to_tim_reg(unsigned int tim_mult, unsigned int nsecs)
76 {
77 	/*
78 	 * Compute # of eclock periods to get desired duration in
79 	 * nanoseconds.
80 	 */
81 	return DIV_ROUND_UP(nsecs * (octeon_get_io_clock_rate() / 1000000),
82 			  1000 * tim_mult);
83 }
84 
85 static void octeon_cf_set_boot_reg_cfg(int cs, unsigned int multiplier)
86 {
87 	union cvmx_mio_boot_reg_cfgx reg_cfg;
88 	unsigned int tim_mult;
89 
90 	switch (multiplier) {
91 	case 8:
92 		tim_mult = 3;
93 		break;
94 	case 4:
95 		tim_mult = 0;
96 		break;
97 	case 2:
98 		tim_mult = 2;
99 		break;
100 	default:
101 		tim_mult = 1;
102 		break;
103 	}
104 
105 	reg_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(cs));
106 	reg_cfg.s.dmack = 0;	/* Don't assert DMACK on access */
107 	reg_cfg.s.tim_mult = tim_mult;	/* Timing mutiplier */
108 	reg_cfg.s.rd_dly = 0;	/* Sample on falling edge of BOOT_OE */
109 	reg_cfg.s.sam = 0;	/* Don't combine write and output enable */
110 	reg_cfg.s.we_ext = 0;	/* No write enable extension */
111 	reg_cfg.s.oe_ext = 0;	/* No read enable extension */
112 	reg_cfg.s.en = 1;	/* Enable this region */
113 	reg_cfg.s.orbit = 0;	/* Don't combine with previous region */
114 	reg_cfg.s.ale = 0;	/* Don't do address multiplexing */
115 	cvmx_write_csr(CVMX_MIO_BOOT_REG_CFGX(cs), reg_cfg.u64);
116 }
117 
118 /*
119  * Called after libata determines the needed PIO mode. This
120  * function programs the Octeon bootbus regions to support the
121  * timing requirements of the PIO mode.
122  *
123  * @ap:     ATA port information
124  * @dev:    ATA device
125  */
126 static void octeon_cf_set_piomode(struct ata_port *ap, struct ata_device *dev)
127 {
128 	struct octeon_cf_port *cf_port = ap->private_data;
129 	union cvmx_mio_boot_reg_timx reg_tim;
130 	int T;
131 	struct ata_timing timing;
132 
133 	unsigned int div;
134 	int use_iordy;
135 	int trh;
136 	int pause;
137 	/* These names are timing parameters from the ATA spec */
138 	int t2;
139 
140 	/*
141 	 * A divisor value of four will overflow the timing fields at
142 	 * clock rates greater than 800MHz
143 	 */
144 	if (octeon_get_io_clock_rate() <= 800000000)
145 		div = 4;
146 	else
147 		div = 8;
148 	T = (int)((1000000000000LL * div) / octeon_get_io_clock_rate());
149 
150 	BUG_ON(ata_timing_compute(dev, dev->pio_mode, &timing, T, T));
151 
152 	t2 = timing.active;
153 	if (t2)
154 		t2--;
155 
156 	trh = ns_to_tim_reg(div, 20);
157 	if (trh)
158 		trh--;
159 
160 	pause = (int)timing.cycle - (int)timing.active -
161 		(int)timing.setup - trh;
162 	if (pause < 0)
163 		pause = 0;
164 	if (pause)
165 		pause--;
166 
167 	octeon_cf_set_boot_reg_cfg(cf_port->cs0, div);
168 	if (cf_port->is_true_ide)
169 		/* True IDE mode, program both chip selects.  */
170 		octeon_cf_set_boot_reg_cfg(cf_port->cs1, div);
171 
172 
173 	use_iordy = ata_pio_need_iordy(dev);
174 
175 	reg_tim.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_TIMX(cf_port->cs0));
176 	/* Disable page mode */
177 	reg_tim.s.pagem = 0;
178 	/* Enable dynamic timing */
179 	reg_tim.s.waitm = use_iordy;
180 	/* Pages are disabled */
181 	reg_tim.s.pages = 0;
182 	/* We don't use multiplexed address mode */
183 	reg_tim.s.ale = 0;
184 	/* Not used */
185 	reg_tim.s.page = 0;
186 	/* Time after IORDY to coninue to assert the data */
187 	reg_tim.s.wait = 0;
188 	/* Time to wait to complete the cycle. */
189 	reg_tim.s.pause = pause;
190 	/* How long to hold after a write to de-assert CE. */
191 	reg_tim.s.wr_hld = trh;
192 	/* How long to wait after a read to de-assert CE. */
193 	reg_tim.s.rd_hld = trh;
194 	/* How long write enable is asserted */
195 	reg_tim.s.we = t2;
196 	/* How long read enable is asserted */
197 	reg_tim.s.oe = t2;
198 	/* Time after CE that read/write starts */
199 	reg_tim.s.ce = ns_to_tim_reg(div, 5);
200 	/* Time before CE that address is valid */
201 	reg_tim.s.adr = 0;
202 
203 	/* Program the bootbus region timing for the data port chip select. */
204 	cvmx_write_csr(CVMX_MIO_BOOT_REG_TIMX(cf_port->cs0), reg_tim.u64);
205 	if (cf_port->is_true_ide)
206 		/* True IDE mode, program both chip selects.  */
207 		cvmx_write_csr(CVMX_MIO_BOOT_REG_TIMX(cf_port->cs1),
208 			       reg_tim.u64);
209 }
210 
211 static void octeon_cf_set_dmamode(struct ata_port *ap, struct ata_device *dev)
212 {
213 	struct octeon_cf_port *cf_port = ap->private_data;
214 	union cvmx_mio_boot_pin_defs pin_defs;
215 	union cvmx_mio_boot_dma_timx dma_tim;
216 	unsigned int oe_a;
217 	unsigned int oe_n;
218 	unsigned int dma_ackh;
219 	unsigned int dma_arq;
220 	unsigned int pause;
221 	unsigned int T0, Tkr, Td;
222 	unsigned int tim_mult;
223 	int c;
224 
225 	const struct ata_timing *timing;
226 
227 	timing = ata_timing_find_mode(dev->dma_mode);
228 	T0	= timing->cycle;
229 	Td	= timing->active;
230 	Tkr	= timing->recover;
231 	dma_ackh = timing->dmack_hold;
232 
233 	dma_tim.u64 = 0;
234 	/* dma_tim.s.tim_mult = 0 --> 4x */
235 	tim_mult = 4;
236 
237 	/* not spec'ed, value in eclocks, not affected by tim_mult */
238 	dma_arq = 8;
239 	pause = 25 - dma_arq * 1000 /
240 		(octeon_get_io_clock_rate() / 1000000); /* Tz */
241 
242 	oe_a = Td;
243 	/* Tkr from cf spec, lengthened to meet T0 */
244 	oe_n = max(T0 - oe_a, Tkr);
245 
246 	pin_defs.u64 = cvmx_read_csr(CVMX_MIO_BOOT_PIN_DEFS);
247 
248 	/* DMA channel number. */
249 	c = (cf_port->dma_base & 8) >> 3;
250 
251 	/* Invert the polarity if the default is 0*/
252 	dma_tim.s.dmack_pi = (pin_defs.u64 & (1ull << (11 + c))) ? 0 : 1;
253 
254 	dma_tim.s.oe_n = ns_to_tim_reg(tim_mult, oe_n);
255 	dma_tim.s.oe_a = ns_to_tim_reg(tim_mult, oe_a);
256 
257 	/*
258 	 * This is tI, C.F. spec. says 0, but Sony CF card requires
259 	 * more, we use 20 nS.
260 	 */
261 	dma_tim.s.dmack_s = ns_to_tim_reg(tim_mult, 20);
262 	dma_tim.s.dmack_h = ns_to_tim_reg(tim_mult, dma_ackh);
263 
264 	dma_tim.s.dmarq = dma_arq;
265 	dma_tim.s.pause = ns_to_tim_reg(tim_mult, pause);
266 
267 	dma_tim.s.rd_dly = 0;	/* Sample right on edge */
268 
269 	/*  writes only */
270 	dma_tim.s.we_n = ns_to_tim_reg(tim_mult, oe_n);
271 	dma_tim.s.we_a = ns_to_tim_reg(tim_mult, oe_a);
272 
273 	ata_dev_dbg(dev, "ns to ticks (mult %d) of %d is: %d\n", tim_mult, 60,
274 		 ns_to_tim_reg(tim_mult, 60));
275 	ata_dev_dbg(dev, "oe_n: %d, oe_a: %d, dmack_s: %d, dmack_h: %d, dmarq: %d, pause: %d\n",
276 		 dma_tim.s.oe_n, dma_tim.s.oe_a, dma_tim.s.dmack_s,
277 		 dma_tim.s.dmack_h, dma_tim.s.dmarq, dma_tim.s.pause);
278 
279 	cvmx_write_csr(cf_port->dma_base + DMA_TIM, dma_tim.u64);
280 }
281 
282 /*
283  * Handle an 8 bit I/O request.
284  *
285  * @qc:         Queued command
286  * @buffer:     Data buffer
287  * @buflen:     Length of the buffer.
288  * @rw:         True to write.
289  */
290 static unsigned int octeon_cf_data_xfer8(struct ata_queued_cmd *qc,
291 					 unsigned char *buffer,
292 					 unsigned int buflen,
293 					 int rw)
294 {
295 	struct ata_port *ap		= qc->dev->link->ap;
296 	void __iomem *data_addr		= ap->ioaddr.data_addr;
297 	unsigned long words;
298 	int count;
299 
300 	words = buflen;
301 	if (rw) {
302 		count = 16;
303 		while (words--) {
304 			iowrite8(*buffer, data_addr);
305 			buffer++;
306 			/*
307 			 * Every 16 writes do a read so the bootbus
308 			 * FIFO doesn't fill up.
309 			 */
310 			if (--count == 0) {
311 				ioread8(ap->ioaddr.altstatus_addr);
312 				count = 16;
313 			}
314 		}
315 	} else {
316 		ioread8_rep(data_addr, buffer, words);
317 	}
318 	return buflen;
319 }
320 
321 /*
322  * Handle a 16 bit I/O request.
323  *
324  * @qc:         Queued command
325  * @buffer:     Data buffer
326  * @buflen:     Length of the buffer.
327  * @rw:         True to write.
328  */
329 static unsigned int octeon_cf_data_xfer16(struct ata_queued_cmd *qc,
330 					  unsigned char *buffer,
331 					  unsigned int buflen,
332 					  int rw)
333 {
334 	struct ata_port *ap		= qc->dev->link->ap;
335 	void __iomem *data_addr		= ap->ioaddr.data_addr;
336 	unsigned long words;
337 	int count;
338 
339 	words = buflen / 2;
340 	if (rw) {
341 		count = 16;
342 		while (words--) {
343 			iowrite16(*(uint16_t *)buffer, data_addr);
344 			buffer += sizeof(uint16_t);
345 			/*
346 			 * Every 16 writes do a read so the bootbus
347 			 * FIFO doesn't fill up.
348 			 */
349 			if (--count == 0) {
350 				ioread8(ap->ioaddr.altstatus_addr);
351 				count = 16;
352 			}
353 		}
354 	} else {
355 		while (words--) {
356 			*(uint16_t *)buffer = ioread16(data_addr);
357 			buffer += sizeof(uint16_t);
358 		}
359 	}
360 	/* Transfer trailing 1 byte, if any. */
361 	if (unlikely(buflen & 0x01)) {
362 		__le16 align_buf[1] = { 0 };
363 
364 		if (rw == READ) {
365 			align_buf[0] = cpu_to_le16(ioread16(data_addr));
366 			memcpy(buffer, align_buf, 1);
367 		} else {
368 			memcpy(align_buf, buffer, 1);
369 			iowrite16(le16_to_cpu(align_buf[0]), data_addr);
370 		}
371 		words++;
372 	}
373 	return buflen;
374 }
375 
376 /*
377  * Read the taskfile for 16bit non-True IDE only.
378  */
379 static void octeon_cf_tf_read16(struct ata_port *ap, struct ata_taskfile *tf)
380 {
381 	u16 blob;
382 	/* The base of the registers is at ioaddr.data_addr. */
383 	void __iomem *base = ap->ioaddr.data_addr;
384 
385 	blob = __raw_readw(base + 0xc);
386 	tf->error = blob >> 8;
387 
388 	blob = __raw_readw(base + 2);
389 	tf->nsect = blob & 0xff;
390 	tf->lbal = blob >> 8;
391 
392 	blob = __raw_readw(base + 4);
393 	tf->lbam = blob & 0xff;
394 	tf->lbah = blob >> 8;
395 
396 	blob = __raw_readw(base + 6);
397 	tf->device = blob & 0xff;
398 	tf->status = blob >> 8;
399 
400 	if (tf->flags & ATA_TFLAG_LBA48) {
401 		if (likely(ap->ioaddr.ctl_addr)) {
402 			iowrite8(tf->ctl | ATA_HOB, ap->ioaddr.ctl_addr);
403 
404 			blob = __raw_readw(base + 0xc);
405 			tf->hob_feature = blob >> 8;
406 
407 			blob = __raw_readw(base + 2);
408 			tf->hob_nsect = blob & 0xff;
409 			tf->hob_lbal = blob >> 8;
410 
411 			blob = __raw_readw(base + 4);
412 			tf->hob_lbam = blob & 0xff;
413 			tf->hob_lbah = blob >> 8;
414 
415 			iowrite8(tf->ctl, ap->ioaddr.ctl_addr);
416 			ap->last_ctl = tf->ctl;
417 		} else {
418 			WARN_ON(1);
419 		}
420 	}
421 }
422 
423 static u8 octeon_cf_check_status16(struct ata_port *ap)
424 {
425 	u16 blob;
426 	void __iomem *base = ap->ioaddr.data_addr;
427 
428 	blob = __raw_readw(base + 6);
429 	return blob >> 8;
430 }
431 
432 static int octeon_cf_softreset16(struct ata_link *link, unsigned int *classes,
433 				 unsigned long deadline)
434 {
435 	struct ata_port *ap = link->ap;
436 	void __iomem *base = ap->ioaddr.data_addr;
437 	int rc;
438 	u8 err;
439 
440 	__raw_writew(ap->ctl, base + 0xe);
441 	udelay(20);
442 	__raw_writew(ap->ctl | ATA_SRST, base + 0xe);
443 	udelay(20);
444 	__raw_writew(ap->ctl, base + 0xe);
445 
446 	rc = ata_sff_wait_after_reset(link, 1, deadline);
447 	if (rc) {
448 		ata_link_err(link, "SRST failed (errno=%d)\n", rc);
449 		return rc;
450 	}
451 
452 	/* determine by signature whether we have ATA or ATAPI devices */
453 	classes[0] = ata_sff_dev_classify(&link->device[0], 1, &err);
454 	return 0;
455 }
456 
457 /*
458  * Load the taskfile for 16bit non-True IDE only.  The device_addr is
459  * not loaded, we do this as part of octeon_cf_exec_command16.
460  */
461 static void octeon_cf_tf_load16(struct ata_port *ap,
462 				const struct ata_taskfile *tf)
463 {
464 	unsigned int is_addr = tf->flags & ATA_TFLAG_ISADDR;
465 	/* The base of the registers is at ioaddr.data_addr. */
466 	void __iomem *base = ap->ioaddr.data_addr;
467 
468 	if (tf->ctl != ap->last_ctl) {
469 		iowrite8(tf->ctl, ap->ioaddr.ctl_addr);
470 		ap->last_ctl = tf->ctl;
471 		ata_wait_idle(ap);
472 	}
473 	if (is_addr && (tf->flags & ATA_TFLAG_LBA48)) {
474 		__raw_writew(tf->hob_feature << 8, base + 0xc);
475 		__raw_writew(tf->hob_nsect | tf->hob_lbal << 8, base + 2);
476 		__raw_writew(tf->hob_lbam | tf->hob_lbah << 8, base + 4);
477 	}
478 	if (is_addr) {
479 		__raw_writew(tf->feature << 8, base + 0xc);
480 		__raw_writew(tf->nsect | tf->lbal << 8, base + 2);
481 		__raw_writew(tf->lbam | tf->lbah << 8, base + 4);
482 	}
483 	ata_wait_idle(ap);
484 }
485 
486 
487 static void octeon_cf_dev_select(struct ata_port *ap, unsigned int device)
488 {
489 /*  There is only one device, do nothing. */
490 	return;
491 }
492 
493 /*
494  * Issue ATA command to host controller.  The device_addr is also sent
495  * as it must be written in a combined write with the command.
496  */
497 static void octeon_cf_exec_command16(struct ata_port *ap,
498 				const struct ata_taskfile *tf)
499 {
500 	/* The base of the registers is at ioaddr.data_addr. */
501 	void __iomem *base = ap->ioaddr.data_addr;
502 	u16 blob = 0;
503 
504 	if (tf->flags & ATA_TFLAG_DEVICE)
505 		blob = tf->device;
506 
507 	blob |= (tf->command << 8);
508 	__raw_writew(blob, base + 6);
509 
510 	ata_wait_idle(ap);
511 }
512 
513 static void octeon_cf_ata_port_noaction(struct ata_port *ap)
514 {
515 }
516 
517 static void octeon_cf_dma_setup(struct ata_queued_cmd *qc)
518 {
519 	struct ata_port *ap = qc->ap;
520 	struct octeon_cf_port *cf_port;
521 
522 	cf_port = ap->private_data;
523 	/* issue r/w command */
524 	qc->cursg = qc->sg;
525 	cf_port->dma_finished = 0;
526 	ap->ops->sff_exec_command(ap, &qc->tf);
527 }
528 
529 /*
530  * Start a DMA transfer that was already setup
531  *
532  * @qc:     Information about the DMA
533  */
534 static void octeon_cf_dma_start(struct ata_queued_cmd *qc)
535 {
536 	struct octeon_cf_port *cf_port = qc->ap->private_data;
537 	union cvmx_mio_boot_dma_cfgx mio_boot_dma_cfg;
538 	union cvmx_mio_boot_dma_intx mio_boot_dma_int;
539 	struct scatterlist *sg;
540 
541 	/* Get the scatter list entry we need to DMA into */
542 	sg = qc->cursg;
543 	BUG_ON(!sg);
544 
545 	/*
546 	 * Clear the DMA complete status.
547 	 */
548 	mio_boot_dma_int.u64 = 0;
549 	mio_boot_dma_int.s.done = 1;
550 	cvmx_write_csr(cf_port->dma_base + DMA_INT, mio_boot_dma_int.u64);
551 
552 	/* Enable the interrupt.  */
553 	cvmx_write_csr(cf_port->dma_base + DMA_INT_EN, mio_boot_dma_int.u64);
554 
555 	/* Set the direction of the DMA */
556 	mio_boot_dma_cfg.u64 = 0;
557 #ifdef __LITTLE_ENDIAN
558 	mio_boot_dma_cfg.s.endian = 1;
559 #endif
560 	mio_boot_dma_cfg.s.en = 1;
561 	mio_boot_dma_cfg.s.rw = ((qc->tf.flags & ATA_TFLAG_WRITE) != 0);
562 
563 	/*
564 	 * Don't stop the DMA if the device deasserts DMARQ. Many
565 	 * compact flashes deassert DMARQ for a short time between
566 	 * sectors. Instead of stopping and restarting the DMA, we'll
567 	 * let the hardware do it. If the DMA is really stopped early
568 	 * due to an error condition, a later timeout will force us to
569 	 * stop.
570 	 */
571 	mio_boot_dma_cfg.s.clr = 0;
572 
573 	/* Size is specified in 16bit words and minus one notation */
574 	mio_boot_dma_cfg.s.size = sg_dma_len(sg) / 2 - 1;
575 
576 	/* We need to swap the high and low bytes of every 16 bits */
577 	mio_boot_dma_cfg.s.swap8 = 1;
578 
579 	mio_boot_dma_cfg.s.adr = sg_dma_address(sg);
580 
581 	cvmx_write_csr(cf_port->dma_base + DMA_CFG, mio_boot_dma_cfg.u64);
582 }
583 
584 /*
585  *
586  *	LOCKING:
587  *	spin_lock_irqsave(host lock)
588  *
589  */
590 static unsigned int octeon_cf_dma_finished(struct ata_port *ap,
591 					struct ata_queued_cmd *qc)
592 {
593 	struct ata_eh_info *ehi = &ap->link.eh_info;
594 	struct octeon_cf_port *cf_port = ap->private_data;
595 	union cvmx_mio_boot_dma_cfgx dma_cfg;
596 	union cvmx_mio_boot_dma_intx dma_int;
597 	u8 status;
598 
599 	trace_ata_bmdma_stop(ap, &qc->tf, qc->tag);
600 
601 	if (ap->hsm_task_state != HSM_ST_LAST)
602 		return 0;
603 
604 	dma_cfg.u64 = cvmx_read_csr(cf_port->dma_base + DMA_CFG);
605 	if (dma_cfg.s.size != 0xfffff) {
606 		/* Error, the transfer was not complete.  */
607 		qc->err_mask |= AC_ERR_HOST_BUS;
608 		ap->hsm_task_state = HSM_ST_ERR;
609 	}
610 
611 	/* Stop and clear the dma engine.  */
612 	dma_cfg.u64 = 0;
613 	dma_cfg.s.size = -1;
614 	cvmx_write_csr(cf_port->dma_base + DMA_CFG, dma_cfg.u64);
615 
616 	/* Disable the interrupt.  */
617 	dma_int.u64 = 0;
618 	cvmx_write_csr(cf_port->dma_base + DMA_INT_EN, dma_int.u64);
619 
620 	/* Clear the DMA complete status */
621 	dma_int.s.done = 1;
622 	cvmx_write_csr(cf_port->dma_base + DMA_INT, dma_int.u64);
623 
624 	status = ap->ops->sff_check_status(ap);
625 
626 	ata_sff_hsm_move(ap, qc, status, 0);
627 
628 	if (unlikely(qc->err_mask) && (qc->tf.protocol == ATA_PROT_DMA))
629 		ata_ehi_push_desc(ehi, "DMA stat 0x%x", status);
630 
631 	return 1;
632 }
633 
634 /*
635  * Check if any queued commands have more DMAs, if so start the next
636  * transfer, else do end of transfer handling.
637  */
638 static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance)
639 {
640 	struct ata_host *host = dev_instance;
641 	struct octeon_cf_port *cf_port;
642 	int i;
643 	unsigned int handled = 0;
644 	unsigned long flags;
645 
646 	spin_lock_irqsave(&host->lock, flags);
647 
648 	for (i = 0; i < host->n_ports; i++) {
649 		u8 status;
650 		struct ata_port *ap;
651 		struct ata_queued_cmd *qc;
652 		union cvmx_mio_boot_dma_intx dma_int;
653 		union cvmx_mio_boot_dma_cfgx dma_cfg;
654 
655 		ap = host->ports[i];
656 		cf_port = ap->private_data;
657 
658 		dma_int.u64 = cvmx_read_csr(cf_port->dma_base + DMA_INT);
659 		dma_cfg.u64 = cvmx_read_csr(cf_port->dma_base + DMA_CFG);
660 
661 		qc = ata_qc_from_tag(ap, ap->link.active_tag);
662 
663 		if (!qc || (qc->tf.flags & ATA_TFLAG_POLLING))
664 			continue;
665 
666 		if (dma_int.s.done && !dma_cfg.s.en) {
667 			if (!sg_is_last(qc->cursg)) {
668 				qc->cursg = sg_next(qc->cursg);
669 				handled = 1;
670 				trace_ata_bmdma_start(ap, &qc->tf, qc->tag);
671 				octeon_cf_dma_start(qc);
672 				continue;
673 			} else {
674 				cf_port->dma_finished = 1;
675 			}
676 		}
677 		if (!cf_port->dma_finished)
678 			continue;
679 		status = ioread8(ap->ioaddr.altstatus_addr);
680 		if (status & (ATA_BUSY | ATA_DRQ)) {
681 			/*
682 			 * We are busy, try to handle it later.  This
683 			 * is the DMA finished interrupt, and it could
684 			 * take a little while for the card to be
685 			 * ready for more commands.
686 			 */
687 			/* Clear DMA irq. */
688 			dma_int.u64 = 0;
689 			dma_int.s.done = 1;
690 			cvmx_write_csr(cf_port->dma_base + DMA_INT,
691 				       dma_int.u64);
692 			hrtimer_start_range_ns(&cf_port->delayed_finish,
693 					       ns_to_ktime(OCTEON_CF_BUSY_POLL_INTERVAL),
694 					       OCTEON_CF_BUSY_POLL_INTERVAL / 5,
695 					       HRTIMER_MODE_REL);
696 			handled = 1;
697 		} else {
698 			handled |= octeon_cf_dma_finished(ap, qc);
699 		}
700 	}
701 	spin_unlock_irqrestore(&host->lock, flags);
702 	return IRQ_RETVAL(handled);
703 }
704 
705 static enum hrtimer_restart octeon_cf_delayed_finish(struct hrtimer *hrt)
706 {
707 	struct octeon_cf_port *cf_port = container_of(hrt,
708 						      struct octeon_cf_port,
709 						      delayed_finish);
710 	struct ata_port *ap = cf_port->ap;
711 	struct ata_host *host = ap->host;
712 	struct ata_queued_cmd *qc;
713 	unsigned long flags;
714 	u8 status;
715 	enum hrtimer_restart rv = HRTIMER_NORESTART;
716 
717 	spin_lock_irqsave(&host->lock, flags);
718 
719 	/*
720 	 * If the port is not waiting for completion, it must have
721 	 * handled it previously.  The hsm_task_state is
722 	 * protected by host->lock.
723 	 */
724 	if (ap->hsm_task_state != HSM_ST_LAST || !cf_port->dma_finished)
725 		goto out;
726 
727 	status = ioread8(ap->ioaddr.altstatus_addr);
728 	if (status & (ATA_BUSY | ATA_DRQ)) {
729 		/* Still busy, try again. */
730 		hrtimer_forward_now(hrt,
731 				    ns_to_ktime(OCTEON_CF_BUSY_POLL_INTERVAL));
732 		rv = HRTIMER_RESTART;
733 		goto out;
734 	}
735 	qc = ata_qc_from_tag(ap, ap->link.active_tag);
736 	if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)))
737 		octeon_cf_dma_finished(ap, qc);
738 out:
739 	spin_unlock_irqrestore(&host->lock, flags);
740 	return rv;
741 }
742 
743 static void octeon_cf_dev_config(struct ata_device *dev)
744 {
745 	/*
746 	 * A maximum of 2^20 - 1 16 bit transfers are possible with
747 	 * the bootbus DMA.  So we need to throttle max_sectors to
748 	 * (2^12 - 1 == 4095) to assure that this can never happen.
749 	 */
750 	dev->max_sectors = min(dev->max_sectors, 4095U);
751 }
752 
753 /*
754  * We don't do ATAPI DMA so return 0.
755  */
756 static int octeon_cf_check_atapi_dma(struct ata_queued_cmd *qc)
757 {
758 	return 0;
759 }
760 
761 static unsigned int octeon_cf_qc_issue(struct ata_queued_cmd *qc)
762 {
763 	struct ata_port *ap = qc->ap;
764 
765 	switch (qc->tf.protocol) {
766 	case ATA_PROT_DMA:
767 		WARN_ON(qc->tf.flags & ATA_TFLAG_POLLING);
768 
769 		trace_ata_tf_load(ap, &qc->tf);
770 		ap->ops->sff_tf_load(ap, &qc->tf);  /* load tf registers */
771 		trace_ata_bmdma_setup(ap, &qc->tf, qc->tag);
772 		octeon_cf_dma_setup(qc);	    /* set up dma */
773 		trace_ata_bmdma_start(ap, &qc->tf, qc->tag);
774 		octeon_cf_dma_start(qc);	    /* initiate dma */
775 		ap->hsm_task_state = HSM_ST_LAST;
776 		break;
777 
778 	case ATAPI_PROT_DMA:
779 		dev_err(ap->dev, "Error, ATAPI not supported\n");
780 		BUG();
781 
782 	default:
783 		return ata_sff_qc_issue(qc);
784 	}
785 
786 	return 0;
787 }
788 
789 static struct ata_port_operations octeon_cf_ops = {
790 	.inherits		= &ata_sff_port_ops,
791 	.check_atapi_dma	= octeon_cf_check_atapi_dma,
792 	.qc_prep		= ata_noop_qc_prep,
793 	.qc_issue		= octeon_cf_qc_issue,
794 	.sff_dev_select		= octeon_cf_dev_select,
795 	.sff_irq_on		= octeon_cf_ata_port_noaction,
796 	.sff_irq_clear		= octeon_cf_ata_port_noaction,
797 	.cable_detect		= ata_cable_40wire,
798 	.set_piomode		= octeon_cf_set_piomode,
799 	.set_dmamode		= octeon_cf_set_dmamode,
800 	.dev_config		= octeon_cf_dev_config,
801 };
802 
803 static int octeon_cf_probe(struct platform_device *pdev)
804 {
805 	struct resource *res_cs0, *res_cs1;
806 
807 	bool is_16bit;
808 	u64 reg;
809 	struct device_node *node;
810 	void __iomem *cs0;
811 	void __iomem *cs1 = NULL;
812 	struct ata_host *host;
813 	struct ata_port *ap;
814 	int irq = 0;
815 	irq_handler_t irq_handler = NULL;
816 	void __iomem *base;
817 	struct octeon_cf_port *cf_port;
818 	int rv = -ENOMEM;
819 	u32 bus_width;
820 
821 	node = pdev->dev.of_node;
822 	if (node == NULL)
823 		return -EINVAL;
824 
825 	cf_port = devm_kzalloc(&pdev->dev, sizeof(*cf_port), GFP_KERNEL);
826 	if (!cf_port)
827 		return -ENOMEM;
828 
829 	cf_port->is_true_ide = of_property_read_bool(node, "cavium,true-ide");
830 
831 	if (of_property_read_u32(node, "cavium,bus-width", &bus_width) == 0)
832 		is_16bit = (bus_width == 16);
833 	else
834 		is_16bit = false;
835 
836 	rv = of_property_read_reg(node, 0, &reg, NULL);
837 	if (rv < 0)
838 		return rv;
839 	cf_port->cs0 = upper_32_bits(reg);
840 
841 	if (cf_port->is_true_ide) {
842 		struct device_node *dma_node;
843 		dma_node = of_parse_phandle(node,
844 					    "cavium,dma-engine-handle", 0);
845 		if (dma_node) {
846 			struct platform_device *dma_dev;
847 			dma_dev = of_find_device_by_node(dma_node);
848 			if (dma_dev) {
849 				struct resource *res_dma;
850 				int i;
851 				res_dma = platform_get_resource(dma_dev, IORESOURCE_MEM, 0);
852 				if (!res_dma) {
853 					put_device(&dma_dev->dev);
854 					of_node_put(dma_node);
855 					return -EINVAL;
856 				}
857 				cf_port->dma_base = (u64)devm_ioremap(&pdev->dev, res_dma->start,
858 									 resource_size(res_dma));
859 				if (!cf_port->dma_base) {
860 					put_device(&dma_dev->dev);
861 					of_node_put(dma_node);
862 					return -EINVAL;
863 				}
864 
865 				i = platform_get_irq(dma_dev, 0);
866 				if (i > 0) {
867 					irq = i;
868 					irq_handler = octeon_cf_interrupt;
869 				}
870 				put_device(&dma_dev->dev);
871 			}
872 			of_node_put(dma_node);
873 		}
874 		res_cs1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
875 		if (!res_cs1)
876 			return -EINVAL;
877 
878 		cs1 = devm_ioremap(&pdev->dev, res_cs1->start,
879 					   resource_size(res_cs1));
880 		if (!cs1)
881 			return -EINVAL;
882 
883 		rv = of_property_read_reg(node, 1, &reg, NULL);
884 		if (rv < 0)
885 			return rv;
886 		cf_port->cs1 = upper_32_bits(reg);
887 	}
888 
889 	res_cs0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
890 	if (!res_cs0)
891 		return -EINVAL;
892 
893 	cs0 = devm_ioremap(&pdev->dev, res_cs0->start,
894 				   resource_size(res_cs0));
895 	if (!cs0)
896 		return rv;
897 
898 	/* allocate host */
899 	host = ata_host_alloc(&pdev->dev, 1);
900 	if (!host)
901 		return rv;
902 
903 	ap = host->ports[0];
904 	ap->private_data = cf_port;
905 	pdev->dev.platform_data = cf_port;
906 	cf_port->ap = ap;
907 	ap->ops = &octeon_cf_ops;
908 	ap->pio_mask = ATA_PIO6;
909 	ap->flags |= ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING;
910 
911 	if (!is_16bit) {
912 		base = cs0 + 0x800;
913 		ap->ioaddr.cmd_addr	= base;
914 		ata_sff_std_ports(&ap->ioaddr);
915 
916 		ap->ioaddr.altstatus_addr = base + 0xe;
917 		ap->ioaddr.ctl_addr	= base + 0xe;
918 		octeon_cf_ops.sff_data_xfer = octeon_cf_data_xfer8;
919 	} else if (cf_port->is_true_ide) {
920 		base = cs0;
921 		ap->ioaddr.cmd_addr	= base + (ATA_REG_CMD << 1) + 1;
922 		ap->ioaddr.data_addr	= base + (ATA_REG_DATA << 1);
923 		ap->ioaddr.error_addr	= base + (ATA_REG_ERR << 1) + 1;
924 		ap->ioaddr.feature_addr	= base + (ATA_REG_FEATURE << 1) + 1;
925 		ap->ioaddr.nsect_addr	= base + (ATA_REG_NSECT << 1) + 1;
926 		ap->ioaddr.lbal_addr	= base + (ATA_REG_LBAL << 1) + 1;
927 		ap->ioaddr.lbam_addr	= base + (ATA_REG_LBAM << 1) + 1;
928 		ap->ioaddr.lbah_addr	= base + (ATA_REG_LBAH << 1) + 1;
929 		ap->ioaddr.device_addr	= base + (ATA_REG_DEVICE << 1) + 1;
930 		ap->ioaddr.status_addr	= base + (ATA_REG_STATUS << 1) + 1;
931 		ap->ioaddr.command_addr	= base + (ATA_REG_CMD << 1) + 1;
932 		ap->ioaddr.altstatus_addr = cs1 + (6 << 1) + 1;
933 		ap->ioaddr.ctl_addr	= cs1 + (6 << 1) + 1;
934 		octeon_cf_ops.sff_data_xfer = octeon_cf_data_xfer16;
935 
936 		ap->mwdma_mask	= enable_dma ? ATA_MWDMA4 : 0;
937 
938 		/* True IDE mode needs a timer to poll for not-busy.  */
939 		hrtimer_init(&cf_port->delayed_finish, CLOCK_MONOTONIC,
940 			     HRTIMER_MODE_REL);
941 		cf_port->delayed_finish.function = octeon_cf_delayed_finish;
942 	} else {
943 		/* 16 bit but not True IDE */
944 		base = cs0 + 0x800;
945 		octeon_cf_ops.sff_data_xfer	= octeon_cf_data_xfer16;
946 		octeon_cf_ops.softreset		= octeon_cf_softreset16;
947 		octeon_cf_ops.sff_check_status	= octeon_cf_check_status16;
948 		octeon_cf_ops.sff_tf_read	= octeon_cf_tf_read16;
949 		octeon_cf_ops.sff_tf_load	= octeon_cf_tf_load16;
950 		octeon_cf_ops.sff_exec_command	= octeon_cf_exec_command16;
951 
952 		ap->ioaddr.data_addr	= base + ATA_REG_DATA;
953 		ap->ioaddr.nsect_addr	= base + ATA_REG_NSECT;
954 		ap->ioaddr.lbal_addr	= base + ATA_REG_LBAL;
955 		ap->ioaddr.ctl_addr	= base + 0xe;
956 		ap->ioaddr.altstatus_addr = base + 0xe;
957 	}
958 	cf_port->c0 = ap->ioaddr.ctl_addr;
959 
960 	rv = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
961 	if (rv)
962 		return rv;
963 
964 	ata_port_desc(ap, "cmd %p ctl %p", base, ap->ioaddr.ctl_addr);
965 
966 	dev_info(&pdev->dev, "version " DRV_VERSION" %d bit%s.\n",
967 		 is_16bit ? 16 : 8,
968 		 cf_port->is_true_ide ? ", True IDE" : "");
969 
970 	return ata_host_activate(host, irq, irq_handler,
971 				 IRQF_SHARED, &octeon_cf_sht);
972 }
973 
974 static void octeon_cf_shutdown(struct device *dev)
975 {
976 	union cvmx_mio_boot_dma_cfgx dma_cfg;
977 	union cvmx_mio_boot_dma_intx dma_int;
978 
979 	struct octeon_cf_port *cf_port = dev_get_platdata(dev);
980 
981 	if (cf_port->dma_base) {
982 		/* Stop and clear the dma engine.  */
983 		dma_cfg.u64 = 0;
984 		dma_cfg.s.size = -1;
985 		cvmx_write_csr(cf_port->dma_base + DMA_CFG, dma_cfg.u64);
986 
987 		/* Disable the interrupt.  */
988 		dma_int.u64 = 0;
989 		cvmx_write_csr(cf_port->dma_base + DMA_INT_EN, dma_int.u64);
990 
991 		/* Clear the DMA complete status */
992 		dma_int.s.done = 1;
993 		cvmx_write_csr(cf_port->dma_base + DMA_INT, dma_int.u64);
994 
995 		__raw_writeb(0, cf_port->c0);
996 		udelay(20);
997 		__raw_writeb(ATA_SRST, cf_port->c0);
998 		udelay(20);
999 		__raw_writeb(0, cf_port->c0);
1000 		mdelay(100);
1001 	}
1002 }
1003 
1004 static const struct of_device_id octeon_cf_match[] = {
1005 	{ .compatible = "cavium,ebt3000-compact-flash", },
1006 	{ /* sentinel */ }
1007 };
1008 MODULE_DEVICE_TABLE(of, octeon_cf_match);
1009 
1010 static struct platform_driver octeon_cf_driver = {
1011 	.probe		= octeon_cf_probe,
1012 	.driver		= {
1013 		.name	= DRV_NAME,
1014 		.of_match_table = octeon_cf_match,
1015 		.shutdown = octeon_cf_shutdown
1016 	},
1017 };
1018 
1019 static int __init octeon_cf_init(void)
1020 {
1021 	return platform_driver_register(&octeon_cf_driver);
1022 }
1023 
1024 
1025 MODULE_AUTHOR("David Daney <ddaney@caviumnetworks.com>");
1026 MODULE_DESCRIPTION("low-level driver for Cavium OCTEON Compact Flash PATA");
1027 MODULE_LICENSE("GPL");
1028 MODULE_VERSION(DRV_VERSION);
1029 MODULE_ALIAS("platform:" DRV_NAME);
1030 
1031 module_init(octeon_cf_init);
1032