xref: /openbmc/linux/drivers/ata/pata_octeon_cf.c (revision c819e2cf)
1 /*
2  * Driver for the Octeon bootbus compact flash.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 2005 - 2012 Cavium Inc.
9  * Copyright (C) 2008 Wind River Systems
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/module.h>
14 #include <linux/libata.h>
15 #include <linux/hrtimer.h>
16 #include <linux/slab.h>
17 #include <linux/irq.h>
18 #include <linux/of.h>
19 #include <linux/of_platform.h>
20 #include <linux/platform_device.h>
21 #include <scsi/scsi_host.h>
22 
23 #include <asm/byteorder.h>
24 #include <asm/octeon/octeon.h>
25 
26 /*
27  * The Octeon bootbus compact flash interface is connected in at least
28  * 3 different configurations on various evaluation boards:
29  *
30  * -- 8  bits no irq, no DMA
31  * -- 16 bits no irq, no DMA
32  * -- 16 bits True IDE mode with DMA, but no irq.
33  *
34  * In the last case the DMA engine can generate an interrupt when the
35  * transfer is complete.  For the first two cases only PIO is supported.
36  *
37  */
38 
39 #define DRV_NAME	"pata_octeon_cf"
40 #define DRV_VERSION	"2.2"
41 
42 /* Poll interval in nS. */
43 #define OCTEON_CF_BUSY_POLL_INTERVAL 500000
44 
45 #define DMA_CFG 0
46 #define DMA_TIM 0x20
47 #define DMA_INT 0x38
48 #define DMA_INT_EN 0x50
49 
50 struct octeon_cf_port {
51 	struct hrtimer delayed_finish;
52 	struct ata_port *ap;
53 	int dma_finished;
54 	void		*c0;
55 	unsigned int cs0;
56 	unsigned int cs1;
57 	bool is_true_ide;
58 	u64 dma_base;
59 };
60 
61 static struct scsi_host_template octeon_cf_sht = {
62 	ATA_PIO_SHT(DRV_NAME),
63 };
64 
65 static int enable_dma;
66 module_param(enable_dma, int, 0444);
67 MODULE_PARM_DESC(enable_dma,
68 		 "Enable use of DMA on interfaces that support it (0=no dma [default], 1=use dma)");
69 
70 /**
71  * Convert nanosecond based time to setting used in the
72  * boot bus timing register, based on timing multiple
73  */
74 static unsigned int ns_to_tim_reg(unsigned int tim_mult, unsigned int nsecs)
75 {
76 	unsigned int val;
77 
78 	/*
79 	 * Compute # of eclock periods to get desired duration in
80 	 * nanoseconds.
81 	 */
82 	val = DIV_ROUND_UP(nsecs * (octeon_get_io_clock_rate() / 1000000),
83 			  1000 * tim_mult);
84 
85 	return val;
86 }
87 
88 static void octeon_cf_set_boot_reg_cfg(int cs, unsigned int multiplier)
89 {
90 	union cvmx_mio_boot_reg_cfgx reg_cfg;
91 	unsigned int tim_mult;
92 
93 	switch (multiplier) {
94 	case 8:
95 		tim_mult = 3;
96 		break;
97 	case 4:
98 		tim_mult = 0;
99 		break;
100 	case 2:
101 		tim_mult = 2;
102 		break;
103 	default:
104 		tim_mult = 1;
105 		break;
106 	}
107 
108 	reg_cfg.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_CFGX(cs));
109 	reg_cfg.s.dmack = 0;	/* Don't assert DMACK on access */
110 	reg_cfg.s.tim_mult = tim_mult;	/* Timing mutiplier */
111 	reg_cfg.s.rd_dly = 0;	/* Sample on falling edge of BOOT_OE */
112 	reg_cfg.s.sam = 0;	/* Don't combine write and output enable */
113 	reg_cfg.s.we_ext = 0;	/* No write enable extension */
114 	reg_cfg.s.oe_ext = 0;	/* No read enable extension */
115 	reg_cfg.s.en = 1;	/* Enable this region */
116 	reg_cfg.s.orbit = 0;	/* Don't combine with previous region */
117 	reg_cfg.s.ale = 0;	/* Don't do address multiplexing */
118 	cvmx_write_csr(CVMX_MIO_BOOT_REG_CFGX(cs), reg_cfg.u64);
119 }
120 
121 /**
122  * Called after libata determines the needed PIO mode. This
123  * function programs the Octeon bootbus regions to support the
124  * timing requirements of the PIO mode.
125  *
126  * @ap:     ATA port information
127  * @dev:    ATA device
128  */
129 static void octeon_cf_set_piomode(struct ata_port *ap, struct ata_device *dev)
130 {
131 	struct octeon_cf_port *cf_port = ap->private_data;
132 	union cvmx_mio_boot_reg_timx reg_tim;
133 	int T;
134 	struct ata_timing timing;
135 
136 	unsigned int div;
137 	int use_iordy;
138 	int trh;
139 	int pause;
140 	/* These names are timing parameters from the ATA spec */
141 	int t1;
142 	int t2;
143 	int t2i;
144 
145 	/*
146 	 * A divisor value of four will overflow the timing fields at
147 	 * clock rates greater than 800MHz
148 	 */
149 	if (octeon_get_io_clock_rate() <= 800000000)
150 		div = 4;
151 	else
152 		div = 8;
153 	T = (int)((1000000000000LL * div) / octeon_get_io_clock_rate());
154 
155 	if (ata_timing_compute(dev, dev->pio_mode, &timing, T, T))
156 		BUG();
157 
158 	t1 = timing.setup;
159 	if (t1)
160 		t1--;
161 	t2 = timing.active;
162 	if (t2)
163 		t2--;
164 	t2i = timing.act8b;
165 	if (t2i)
166 		t2i--;
167 
168 	trh = ns_to_tim_reg(div, 20);
169 	if (trh)
170 		trh--;
171 
172 	pause = (int)timing.cycle - (int)timing.active -
173 		(int)timing.setup - trh;
174 	if (pause < 0)
175 		pause = 0;
176 	if (pause)
177 		pause--;
178 
179 	octeon_cf_set_boot_reg_cfg(cf_port->cs0, div);
180 	if (cf_port->is_true_ide)
181 		/* True IDE mode, program both chip selects.  */
182 		octeon_cf_set_boot_reg_cfg(cf_port->cs1, div);
183 
184 
185 	use_iordy = ata_pio_need_iordy(dev);
186 
187 	reg_tim.u64 = cvmx_read_csr(CVMX_MIO_BOOT_REG_TIMX(cf_port->cs0));
188 	/* Disable page mode */
189 	reg_tim.s.pagem = 0;
190 	/* Enable dynamic timing */
191 	reg_tim.s.waitm = use_iordy;
192 	/* Pages are disabled */
193 	reg_tim.s.pages = 0;
194 	/* We don't use multiplexed address mode */
195 	reg_tim.s.ale = 0;
196 	/* Not used */
197 	reg_tim.s.page = 0;
198 	/* Time after IORDY to coninue to assert the data */
199 	reg_tim.s.wait = 0;
200 	/* Time to wait to complete the cycle. */
201 	reg_tim.s.pause = pause;
202 	/* How long to hold after a write to de-assert CE. */
203 	reg_tim.s.wr_hld = trh;
204 	/* How long to wait after a read to de-assert CE. */
205 	reg_tim.s.rd_hld = trh;
206 	/* How long write enable is asserted */
207 	reg_tim.s.we = t2;
208 	/* How long read enable is asserted */
209 	reg_tim.s.oe = t2;
210 	/* Time after CE that read/write starts */
211 	reg_tim.s.ce = ns_to_tim_reg(div, 5);
212 	/* Time before CE that address is valid */
213 	reg_tim.s.adr = 0;
214 
215 	/* Program the bootbus region timing for the data port chip select. */
216 	cvmx_write_csr(CVMX_MIO_BOOT_REG_TIMX(cf_port->cs0), reg_tim.u64);
217 	if (cf_port->is_true_ide)
218 		/* True IDE mode, program both chip selects.  */
219 		cvmx_write_csr(CVMX_MIO_BOOT_REG_TIMX(cf_port->cs1),
220 			       reg_tim.u64);
221 }
222 
223 static void octeon_cf_set_dmamode(struct ata_port *ap, struct ata_device *dev)
224 {
225 	struct octeon_cf_port *cf_port = ap->private_data;
226 	union cvmx_mio_boot_pin_defs pin_defs;
227 	union cvmx_mio_boot_dma_timx dma_tim;
228 	unsigned int oe_a;
229 	unsigned int oe_n;
230 	unsigned int dma_ackh;
231 	unsigned int dma_arq;
232 	unsigned int pause;
233 	unsigned int T0, Tkr, Td;
234 	unsigned int tim_mult;
235 	int c;
236 
237 	const struct ata_timing *timing;
238 
239 	timing = ata_timing_find_mode(dev->dma_mode);
240 	T0	= timing->cycle;
241 	Td	= timing->active;
242 	Tkr	= timing->recover;
243 	dma_ackh = timing->dmack_hold;
244 
245 	dma_tim.u64 = 0;
246 	/* dma_tim.s.tim_mult = 0 --> 4x */
247 	tim_mult = 4;
248 
249 	/* not spec'ed, value in eclocks, not affected by tim_mult */
250 	dma_arq = 8;
251 	pause = 25 - dma_arq * 1000 /
252 		(octeon_get_io_clock_rate() / 1000000); /* Tz */
253 
254 	oe_a = Td;
255 	/* Tkr from cf spec, lengthened to meet T0 */
256 	oe_n = max(T0 - oe_a, Tkr);
257 
258 	pin_defs.u64 = cvmx_read_csr(CVMX_MIO_BOOT_PIN_DEFS);
259 
260 	/* DMA channel number. */
261 	c = (cf_port->dma_base & 8) >> 3;
262 
263 	/* Invert the polarity if the default is 0*/
264 	dma_tim.s.dmack_pi = (pin_defs.u64 & (1ull << (11 + c))) ? 0 : 1;
265 
266 	dma_tim.s.oe_n = ns_to_tim_reg(tim_mult, oe_n);
267 	dma_tim.s.oe_a = ns_to_tim_reg(tim_mult, oe_a);
268 
269 	/*
270 	 * This is tI, C.F. spec. says 0, but Sony CF card requires
271 	 * more, we use 20 nS.
272 	 */
273 	dma_tim.s.dmack_s = ns_to_tim_reg(tim_mult, 20);
274 	dma_tim.s.dmack_h = ns_to_tim_reg(tim_mult, dma_ackh);
275 
276 	dma_tim.s.dmarq = dma_arq;
277 	dma_tim.s.pause = ns_to_tim_reg(tim_mult, pause);
278 
279 	dma_tim.s.rd_dly = 0;	/* Sample right on edge */
280 
281 	/*  writes only */
282 	dma_tim.s.we_n = ns_to_tim_reg(tim_mult, oe_n);
283 	dma_tim.s.we_a = ns_to_tim_reg(tim_mult, oe_a);
284 
285 	pr_debug("ns to ticks (mult %d) of %d is: %d\n", tim_mult, 60,
286 		 ns_to_tim_reg(tim_mult, 60));
287 	pr_debug("oe_n: %d, oe_a: %d, dmack_s: %d, dmack_h: %d, dmarq: %d, pause: %d\n",
288 		 dma_tim.s.oe_n, dma_tim.s.oe_a, dma_tim.s.dmack_s,
289 		 dma_tim.s.dmack_h, dma_tim.s.dmarq, dma_tim.s.pause);
290 
291 	cvmx_write_csr(cf_port->dma_base + DMA_TIM, dma_tim.u64);
292 }
293 
294 /**
295  * Handle an 8 bit I/O request.
296  *
297  * @dev:        Device to access
298  * @buffer:     Data buffer
299  * @buflen:     Length of the buffer.
300  * @rw:         True to write.
301  */
302 static unsigned int octeon_cf_data_xfer8(struct ata_device *dev,
303 					 unsigned char *buffer,
304 					 unsigned int buflen,
305 					 int rw)
306 {
307 	struct ata_port *ap		= dev->link->ap;
308 	void __iomem *data_addr		= ap->ioaddr.data_addr;
309 	unsigned long words;
310 	int count;
311 
312 	words = buflen;
313 	if (rw) {
314 		count = 16;
315 		while (words--) {
316 			iowrite8(*buffer, data_addr);
317 			buffer++;
318 			/*
319 			 * Every 16 writes do a read so the bootbus
320 			 * FIFO doesn't fill up.
321 			 */
322 			if (--count == 0) {
323 				ioread8(ap->ioaddr.altstatus_addr);
324 				count = 16;
325 			}
326 		}
327 	} else {
328 		ioread8_rep(data_addr, buffer, words);
329 	}
330 	return buflen;
331 }
332 
333 /**
334  * Handle a 16 bit I/O request.
335  *
336  * @dev:        Device to access
337  * @buffer:     Data buffer
338  * @buflen:     Length of the buffer.
339  * @rw:         True to write.
340  */
341 static unsigned int octeon_cf_data_xfer16(struct ata_device *dev,
342 					  unsigned char *buffer,
343 					  unsigned int buflen,
344 					  int rw)
345 {
346 	struct ata_port *ap		= dev->link->ap;
347 	void __iomem *data_addr		= ap->ioaddr.data_addr;
348 	unsigned long words;
349 	int count;
350 
351 	words = buflen / 2;
352 	if (rw) {
353 		count = 16;
354 		while (words--) {
355 			iowrite16(*(uint16_t *)buffer, data_addr);
356 			buffer += sizeof(uint16_t);
357 			/*
358 			 * Every 16 writes do a read so the bootbus
359 			 * FIFO doesn't fill up.
360 			 */
361 			if (--count == 0) {
362 				ioread8(ap->ioaddr.altstatus_addr);
363 				count = 16;
364 			}
365 		}
366 	} else {
367 		while (words--) {
368 			*(uint16_t *)buffer = ioread16(data_addr);
369 			buffer += sizeof(uint16_t);
370 		}
371 	}
372 	/* Transfer trailing 1 byte, if any. */
373 	if (unlikely(buflen & 0x01)) {
374 		__le16 align_buf[1] = { 0 };
375 
376 		if (rw == READ) {
377 			align_buf[0] = cpu_to_le16(ioread16(data_addr));
378 			memcpy(buffer, align_buf, 1);
379 		} else {
380 			memcpy(align_buf, buffer, 1);
381 			iowrite16(le16_to_cpu(align_buf[0]), data_addr);
382 		}
383 		words++;
384 	}
385 	return buflen;
386 }
387 
388 /**
389  * Read the taskfile for 16bit non-True IDE only.
390  */
391 static void octeon_cf_tf_read16(struct ata_port *ap, struct ata_taskfile *tf)
392 {
393 	u16 blob;
394 	/* The base of the registers is at ioaddr.data_addr. */
395 	void __iomem *base = ap->ioaddr.data_addr;
396 
397 	blob = __raw_readw(base + 0xc);
398 	tf->feature = blob >> 8;
399 
400 	blob = __raw_readw(base + 2);
401 	tf->nsect = blob & 0xff;
402 	tf->lbal = blob >> 8;
403 
404 	blob = __raw_readw(base + 4);
405 	tf->lbam = blob & 0xff;
406 	tf->lbah = blob >> 8;
407 
408 	blob = __raw_readw(base + 6);
409 	tf->device = blob & 0xff;
410 	tf->command = blob >> 8;
411 
412 	if (tf->flags & ATA_TFLAG_LBA48) {
413 		if (likely(ap->ioaddr.ctl_addr)) {
414 			iowrite8(tf->ctl | ATA_HOB, ap->ioaddr.ctl_addr);
415 
416 			blob = __raw_readw(base + 0xc);
417 			tf->hob_feature = blob >> 8;
418 
419 			blob = __raw_readw(base + 2);
420 			tf->hob_nsect = blob & 0xff;
421 			tf->hob_lbal = blob >> 8;
422 
423 			blob = __raw_readw(base + 4);
424 			tf->hob_lbam = blob & 0xff;
425 			tf->hob_lbah = blob >> 8;
426 
427 			iowrite8(tf->ctl, ap->ioaddr.ctl_addr);
428 			ap->last_ctl = tf->ctl;
429 		} else {
430 			WARN_ON(1);
431 		}
432 	}
433 }
434 
435 static u8 octeon_cf_check_status16(struct ata_port *ap)
436 {
437 	u16 blob;
438 	void __iomem *base = ap->ioaddr.data_addr;
439 
440 	blob = __raw_readw(base + 6);
441 	return blob >> 8;
442 }
443 
444 static int octeon_cf_softreset16(struct ata_link *link, unsigned int *classes,
445 				 unsigned long deadline)
446 {
447 	struct ata_port *ap = link->ap;
448 	void __iomem *base = ap->ioaddr.data_addr;
449 	int rc;
450 	u8 err;
451 
452 	DPRINTK("about to softreset\n");
453 	__raw_writew(ap->ctl, base + 0xe);
454 	udelay(20);
455 	__raw_writew(ap->ctl | ATA_SRST, base + 0xe);
456 	udelay(20);
457 	__raw_writew(ap->ctl, base + 0xe);
458 
459 	rc = ata_sff_wait_after_reset(link, 1, deadline);
460 	if (rc) {
461 		ata_link_err(link, "SRST failed (errno=%d)\n", rc);
462 		return rc;
463 	}
464 
465 	/* determine by signature whether we have ATA or ATAPI devices */
466 	classes[0] = ata_sff_dev_classify(&link->device[0], 1, &err);
467 	DPRINTK("EXIT, classes[0]=%u [1]=%u\n", classes[0], classes[1]);
468 	return 0;
469 }
470 
471 /**
472  * Load the taskfile for 16bit non-True IDE only.  The device_addr is
473  * not loaded, we do this as part of octeon_cf_exec_command16.
474  */
475 static void octeon_cf_tf_load16(struct ata_port *ap,
476 				const struct ata_taskfile *tf)
477 {
478 	unsigned int is_addr = tf->flags & ATA_TFLAG_ISADDR;
479 	/* The base of the registers is at ioaddr.data_addr. */
480 	void __iomem *base = ap->ioaddr.data_addr;
481 
482 	if (tf->ctl != ap->last_ctl) {
483 		iowrite8(tf->ctl, ap->ioaddr.ctl_addr);
484 		ap->last_ctl = tf->ctl;
485 		ata_wait_idle(ap);
486 	}
487 	if (is_addr && (tf->flags & ATA_TFLAG_LBA48)) {
488 		__raw_writew(tf->hob_feature << 8, base + 0xc);
489 		__raw_writew(tf->hob_nsect | tf->hob_lbal << 8, base + 2);
490 		__raw_writew(tf->hob_lbam | tf->hob_lbah << 8, base + 4);
491 		VPRINTK("hob: feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n",
492 			tf->hob_feature,
493 			tf->hob_nsect,
494 			tf->hob_lbal,
495 			tf->hob_lbam,
496 			tf->hob_lbah);
497 	}
498 	if (is_addr) {
499 		__raw_writew(tf->feature << 8, base + 0xc);
500 		__raw_writew(tf->nsect | tf->lbal << 8, base + 2);
501 		__raw_writew(tf->lbam | tf->lbah << 8, base + 4);
502 		VPRINTK("feat 0x%X nsect 0x%X, lba 0x%X 0x%X 0x%X\n",
503 			tf->feature,
504 			tf->nsect,
505 			tf->lbal,
506 			tf->lbam,
507 			tf->lbah);
508 	}
509 	ata_wait_idle(ap);
510 }
511 
512 
513 static void octeon_cf_dev_select(struct ata_port *ap, unsigned int device)
514 {
515 /*  There is only one device, do nothing. */
516 	return;
517 }
518 
519 /*
520  * Issue ATA command to host controller.  The device_addr is also sent
521  * as it must be written in a combined write with the command.
522  */
523 static void octeon_cf_exec_command16(struct ata_port *ap,
524 				const struct ata_taskfile *tf)
525 {
526 	/* The base of the registers is at ioaddr.data_addr. */
527 	void __iomem *base = ap->ioaddr.data_addr;
528 	u16 blob;
529 
530 	if (tf->flags & ATA_TFLAG_DEVICE) {
531 		VPRINTK("device 0x%X\n", tf->device);
532 		blob = tf->device;
533 	} else {
534 		blob = 0;
535 	}
536 
537 	DPRINTK("ata%u: cmd 0x%X\n", ap->print_id, tf->command);
538 	blob |= (tf->command << 8);
539 	__raw_writew(blob, base + 6);
540 
541 
542 	ata_wait_idle(ap);
543 }
544 
545 static void octeon_cf_ata_port_noaction(struct ata_port *ap)
546 {
547 }
548 
549 static void octeon_cf_dma_setup(struct ata_queued_cmd *qc)
550 {
551 	struct ata_port *ap = qc->ap;
552 	struct octeon_cf_port *cf_port;
553 
554 	cf_port = ap->private_data;
555 	DPRINTK("ENTER\n");
556 	/* issue r/w command */
557 	qc->cursg = qc->sg;
558 	cf_port->dma_finished = 0;
559 	ap->ops->sff_exec_command(ap, &qc->tf);
560 	DPRINTK("EXIT\n");
561 }
562 
563 /**
564  * Start a DMA transfer that was already setup
565  *
566  * @qc:     Information about the DMA
567  */
568 static void octeon_cf_dma_start(struct ata_queued_cmd *qc)
569 {
570 	struct octeon_cf_port *cf_port = qc->ap->private_data;
571 	union cvmx_mio_boot_dma_cfgx mio_boot_dma_cfg;
572 	union cvmx_mio_boot_dma_intx mio_boot_dma_int;
573 	struct scatterlist *sg;
574 
575 	VPRINTK("%d scatterlists\n", qc->n_elem);
576 
577 	/* Get the scatter list entry we need to DMA into */
578 	sg = qc->cursg;
579 	BUG_ON(!sg);
580 
581 	/*
582 	 * Clear the DMA complete status.
583 	 */
584 	mio_boot_dma_int.u64 = 0;
585 	mio_boot_dma_int.s.done = 1;
586 	cvmx_write_csr(cf_port->dma_base + DMA_INT, mio_boot_dma_int.u64);
587 
588 	/* Enable the interrupt.  */
589 	cvmx_write_csr(cf_port->dma_base + DMA_INT_EN, mio_boot_dma_int.u64);
590 
591 	/* Set the direction of the DMA */
592 	mio_boot_dma_cfg.u64 = 0;
593 #ifdef __LITTLE_ENDIAN
594 	mio_boot_dma_cfg.s.endian = 1;
595 #endif
596 	mio_boot_dma_cfg.s.en = 1;
597 	mio_boot_dma_cfg.s.rw = ((qc->tf.flags & ATA_TFLAG_WRITE) != 0);
598 
599 	/*
600 	 * Don't stop the DMA if the device deasserts DMARQ. Many
601 	 * compact flashes deassert DMARQ for a short time between
602 	 * sectors. Instead of stopping and restarting the DMA, we'll
603 	 * let the hardware do it. If the DMA is really stopped early
604 	 * due to an error condition, a later timeout will force us to
605 	 * stop.
606 	 */
607 	mio_boot_dma_cfg.s.clr = 0;
608 
609 	/* Size is specified in 16bit words and minus one notation */
610 	mio_boot_dma_cfg.s.size = sg_dma_len(sg) / 2 - 1;
611 
612 	/* We need to swap the high and low bytes of every 16 bits */
613 	mio_boot_dma_cfg.s.swap8 = 1;
614 
615 	mio_boot_dma_cfg.s.adr = sg_dma_address(sg);
616 
617 	VPRINTK("%s %d bytes address=%p\n",
618 		(mio_boot_dma_cfg.s.rw) ? "write" : "read", sg->length,
619 		(void *)(unsigned long)mio_boot_dma_cfg.s.adr);
620 
621 	cvmx_write_csr(cf_port->dma_base + DMA_CFG, mio_boot_dma_cfg.u64);
622 }
623 
624 /**
625  *
626  *	LOCKING:
627  *	spin_lock_irqsave(host lock)
628  *
629  */
630 static unsigned int octeon_cf_dma_finished(struct ata_port *ap,
631 					struct ata_queued_cmd *qc)
632 {
633 	struct ata_eh_info *ehi = &ap->link.eh_info;
634 	struct octeon_cf_port *cf_port = ap->private_data;
635 	union cvmx_mio_boot_dma_cfgx dma_cfg;
636 	union cvmx_mio_boot_dma_intx dma_int;
637 	u8 status;
638 
639 	VPRINTK("ata%u: protocol %d task_state %d\n",
640 		ap->print_id, qc->tf.protocol, ap->hsm_task_state);
641 
642 
643 	if (ap->hsm_task_state != HSM_ST_LAST)
644 		return 0;
645 
646 	dma_cfg.u64 = cvmx_read_csr(cf_port->dma_base + DMA_CFG);
647 	if (dma_cfg.s.size != 0xfffff) {
648 		/* Error, the transfer was not complete.  */
649 		qc->err_mask |= AC_ERR_HOST_BUS;
650 		ap->hsm_task_state = HSM_ST_ERR;
651 	}
652 
653 	/* Stop and clear the dma engine.  */
654 	dma_cfg.u64 = 0;
655 	dma_cfg.s.size = -1;
656 	cvmx_write_csr(cf_port->dma_base + DMA_CFG, dma_cfg.u64);
657 
658 	/* Disable the interrupt.  */
659 	dma_int.u64 = 0;
660 	cvmx_write_csr(cf_port->dma_base + DMA_INT_EN, dma_int.u64);
661 
662 	/* Clear the DMA complete status */
663 	dma_int.s.done = 1;
664 	cvmx_write_csr(cf_port->dma_base + DMA_INT, dma_int.u64);
665 
666 	status = ap->ops->sff_check_status(ap);
667 
668 	ata_sff_hsm_move(ap, qc, status, 0);
669 
670 	if (unlikely(qc->err_mask) && (qc->tf.protocol == ATA_PROT_DMA))
671 		ata_ehi_push_desc(ehi, "DMA stat 0x%x", status);
672 
673 	return 1;
674 }
675 
676 /*
677  * Check if any queued commands have more DMAs, if so start the next
678  * transfer, else do end of transfer handling.
679  */
680 static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance)
681 {
682 	struct ata_host *host = dev_instance;
683 	struct octeon_cf_port *cf_port;
684 	int i;
685 	unsigned int handled = 0;
686 	unsigned long flags;
687 
688 	spin_lock_irqsave(&host->lock, flags);
689 
690 	DPRINTK("ENTER\n");
691 	for (i = 0; i < host->n_ports; i++) {
692 		u8 status;
693 		struct ata_port *ap;
694 		struct ata_queued_cmd *qc;
695 		union cvmx_mio_boot_dma_intx dma_int;
696 		union cvmx_mio_boot_dma_cfgx dma_cfg;
697 
698 		ap = host->ports[i];
699 		cf_port = ap->private_data;
700 
701 		dma_int.u64 = cvmx_read_csr(cf_port->dma_base + DMA_INT);
702 		dma_cfg.u64 = cvmx_read_csr(cf_port->dma_base + DMA_CFG);
703 
704 		qc = ata_qc_from_tag(ap, ap->link.active_tag);
705 
706 		if (!qc || (qc->tf.flags & ATA_TFLAG_POLLING))
707 			continue;
708 
709 		if (dma_int.s.done && !dma_cfg.s.en) {
710 			if (!sg_is_last(qc->cursg)) {
711 				qc->cursg = sg_next(qc->cursg);
712 				handled = 1;
713 				octeon_cf_dma_start(qc);
714 				continue;
715 			} else {
716 				cf_port->dma_finished = 1;
717 			}
718 		}
719 		if (!cf_port->dma_finished)
720 			continue;
721 		status = ioread8(ap->ioaddr.altstatus_addr);
722 		if (status & (ATA_BUSY | ATA_DRQ)) {
723 			/*
724 			 * We are busy, try to handle it later.  This
725 			 * is the DMA finished interrupt, and it could
726 			 * take a little while for the card to be
727 			 * ready for more commands.
728 			 */
729 			/* Clear DMA irq. */
730 			dma_int.u64 = 0;
731 			dma_int.s.done = 1;
732 			cvmx_write_csr(cf_port->dma_base + DMA_INT,
733 				       dma_int.u64);
734 			hrtimer_start_range_ns(&cf_port->delayed_finish,
735 					       ns_to_ktime(OCTEON_CF_BUSY_POLL_INTERVAL),
736 					       OCTEON_CF_BUSY_POLL_INTERVAL / 5,
737 					       HRTIMER_MODE_REL);
738 			handled = 1;
739 		} else {
740 			handled |= octeon_cf_dma_finished(ap, qc);
741 		}
742 	}
743 	spin_unlock_irqrestore(&host->lock, flags);
744 	DPRINTK("EXIT\n");
745 	return IRQ_RETVAL(handled);
746 }
747 
748 static enum hrtimer_restart octeon_cf_delayed_finish(struct hrtimer *hrt)
749 {
750 	struct octeon_cf_port *cf_port = container_of(hrt,
751 						      struct octeon_cf_port,
752 						      delayed_finish);
753 	struct ata_port *ap = cf_port->ap;
754 	struct ata_host *host = ap->host;
755 	struct ata_queued_cmd *qc;
756 	unsigned long flags;
757 	u8 status;
758 	enum hrtimer_restart rv = HRTIMER_NORESTART;
759 
760 	spin_lock_irqsave(&host->lock, flags);
761 
762 	/*
763 	 * If the port is not waiting for completion, it must have
764 	 * handled it previously.  The hsm_task_state is
765 	 * protected by host->lock.
766 	 */
767 	if (ap->hsm_task_state != HSM_ST_LAST || !cf_port->dma_finished)
768 		goto out;
769 
770 	status = ioread8(ap->ioaddr.altstatus_addr);
771 	if (status & (ATA_BUSY | ATA_DRQ)) {
772 		/* Still busy, try again. */
773 		hrtimer_forward_now(hrt,
774 				    ns_to_ktime(OCTEON_CF_BUSY_POLL_INTERVAL));
775 		rv = HRTIMER_RESTART;
776 		goto out;
777 	}
778 	qc = ata_qc_from_tag(ap, ap->link.active_tag);
779 	if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)))
780 		octeon_cf_dma_finished(ap, qc);
781 out:
782 	spin_unlock_irqrestore(&host->lock, flags);
783 	return rv;
784 }
785 
786 static void octeon_cf_dev_config(struct ata_device *dev)
787 {
788 	/*
789 	 * A maximum of 2^20 - 1 16 bit transfers are possible with
790 	 * the bootbus DMA.  So we need to throttle max_sectors to
791 	 * (2^12 - 1 == 4095) to assure that this can never happen.
792 	 */
793 	dev->max_sectors = min(dev->max_sectors, 4095U);
794 }
795 
796 /*
797  * We don't do ATAPI DMA so return 0.
798  */
799 static int octeon_cf_check_atapi_dma(struct ata_queued_cmd *qc)
800 {
801 	return 0;
802 }
803 
804 static unsigned int octeon_cf_qc_issue(struct ata_queued_cmd *qc)
805 {
806 	struct ata_port *ap = qc->ap;
807 
808 	switch (qc->tf.protocol) {
809 	case ATA_PROT_DMA:
810 		WARN_ON(qc->tf.flags & ATA_TFLAG_POLLING);
811 
812 		ap->ops->sff_tf_load(ap, &qc->tf);  /* load tf registers */
813 		octeon_cf_dma_setup(qc);	    /* set up dma */
814 		octeon_cf_dma_start(qc);	    /* initiate dma */
815 		ap->hsm_task_state = HSM_ST_LAST;
816 		break;
817 
818 	case ATAPI_PROT_DMA:
819 		dev_err(ap->dev, "Error, ATAPI not supported\n");
820 		BUG();
821 
822 	default:
823 		return ata_sff_qc_issue(qc);
824 	}
825 
826 	return 0;
827 }
828 
829 static struct ata_port_operations octeon_cf_ops = {
830 	.inherits		= &ata_sff_port_ops,
831 	.check_atapi_dma	= octeon_cf_check_atapi_dma,
832 	.qc_prep		= ata_noop_qc_prep,
833 	.qc_issue		= octeon_cf_qc_issue,
834 	.sff_dev_select		= octeon_cf_dev_select,
835 	.sff_irq_on		= octeon_cf_ata_port_noaction,
836 	.sff_irq_clear		= octeon_cf_ata_port_noaction,
837 	.cable_detect		= ata_cable_40wire,
838 	.set_piomode		= octeon_cf_set_piomode,
839 	.set_dmamode		= octeon_cf_set_dmamode,
840 	.dev_config		= octeon_cf_dev_config,
841 };
842 
843 static int octeon_cf_probe(struct platform_device *pdev)
844 {
845 	struct resource *res_cs0, *res_cs1;
846 
847 	bool is_16bit;
848 	const __be32 *cs_num;
849 	struct property *reg_prop;
850 	int n_addr, n_size, reg_len;
851 	struct device_node *node;
852 	const void *prop;
853 	void __iomem *cs0;
854 	void __iomem *cs1 = NULL;
855 	struct ata_host *host;
856 	struct ata_port *ap;
857 	int irq = 0;
858 	irq_handler_t irq_handler = NULL;
859 	void __iomem *base;
860 	struct octeon_cf_port *cf_port;
861 	int rv = -ENOMEM;
862 
863 
864 	node = pdev->dev.of_node;
865 	if (node == NULL)
866 		return -EINVAL;
867 
868 	cf_port = devm_kzalloc(&pdev->dev, sizeof(*cf_port), GFP_KERNEL);
869 	if (!cf_port)
870 		return -ENOMEM;
871 
872 	cf_port->is_true_ide = (of_find_property(node, "cavium,true-ide", NULL) != NULL);
873 
874 	prop = of_get_property(node, "cavium,bus-width", NULL);
875 	if (prop)
876 		is_16bit = (be32_to_cpup(prop) == 16);
877 	else
878 		is_16bit = false;
879 
880 	n_addr = of_n_addr_cells(node);
881 	n_size = of_n_size_cells(node);
882 
883 	reg_prop = of_find_property(node, "reg", &reg_len);
884 	if (!reg_prop || reg_len < sizeof(__be32))
885 		return -EINVAL;
886 
887 	cs_num = reg_prop->value;
888 	cf_port->cs0 = be32_to_cpup(cs_num);
889 
890 	if (cf_port->is_true_ide) {
891 		struct device_node *dma_node;
892 		dma_node = of_parse_phandle(node,
893 					    "cavium,dma-engine-handle", 0);
894 		if (dma_node) {
895 			struct platform_device *dma_dev;
896 			dma_dev = of_find_device_by_node(dma_node);
897 			if (dma_dev) {
898 				struct resource *res_dma;
899 				int i;
900 				res_dma = platform_get_resource(dma_dev, IORESOURCE_MEM, 0);
901 				if (!res_dma) {
902 					of_node_put(dma_node);
903 					return -EINVAL;
904 				}
905 				cf_port->dma_base = (u64)devm_ioremap_nocache(&pdev->dev, res_dma->start,
906 									 resource_size(res_dma));
907 				if (!cf_port->dma_base) {
908 					of_node_put(dma_node);
909 					return -EINVAL;
910 				}
911 
912 				irq_handler = octeon_cf_interrupt;
913 				i = platform_get_irq(dma_dev, 0);
914 				if (i > 0)
915 					irq = i;
916 			}
917 			of_node_put(dma_node);
918 		}
919 		res_cs1 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
920 		if (!res_cs1)
921 			return -EINVAL;
922 
923 		cs1 = devm_ioremap_nocache(&pdev->dev, res_cs1->start,
924 					   resource_size(res_cs1));
925 		if (!cs1)
926 			return rv;
927 
928 		if (reg_len < (n_addr + n_size + 1) * sizeof(__be32))
929 			return -EINVAL;
930 
931 		cs_num += n_addr + n_size;
932 		cf_port->cs1 = be32_to_cpup(cs_num);
933 	}
934 
935 	res_cs0 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
936 	if (!res_cs0)
937 		return -EINVAL;
938 
939 	cs0 = devm_ioremap_nocache(&pdev->dev, res_cs0->start,
940 				   resource_size(res_cs0));
941 	if (!cs0)
942 		return rv;
943 
944 	/* allocate host */
945 	host = ata_host_alloc(&pdev->dev, 1);
946 	if (!host)
947 		return rv;
948 
949 	ap = host->ports[0];
950 	ap->private_data = cf_port;
951 	pdev->dev.platform_data = cf_port;
952 	cf_port->ap = ap;
953 	ap->ops = &octeon_cf_ops;
954 	ap->pio_mask = ATA_PIO6;
955 	ap->flags |= ATA_FLAG_NO_ATAPI | ATA_FLAG_PIO_POLLING;
956 
957 	if (!is_16bit) {
958 		base = cs0 + 0x800;
959 		ap->ioaddr.cmd_addr	= base;
960 		ata_sff_std_ports(&ap->ioaddr);
961 
962 		ap->ioaddr.altstatus_addr = base + 0xe;
963 		ap->ioaddr.ctl_addr	= base + 0xe;
964 		octeon_cf_ops.sff_data_xfer = octeon_cf_data_xfer8;
965 	} else if (cf_port->is_true_ide) {
966 		base = cs0;
967 		ap->ioaddr.cmd_addr	= base + (ATA_REG_CMD << 1) + 1;
968 		ap->ioaddr.data_addr	= base + (ATA_REG_DATA << 1);
969 		ap->ioaddr.error_addr	= base + (ATA_REG_ERR << 1) + 1;
970 		ap->ioaddr.feature_addr	= base + (ATA_REG_FEATURE << 1) + 1;
971 		ap->ioaddr.nsect_addr	= base + (ATA_REG_NSECT << 1) + 1;
972 		ap->ioaddr.lbal_addr	= base + (ATA_REG_LBAL << 1) + 1;
973 		ap->ioaddr.lbam_addr	= base + (ATA_REG_LBAM << 1) + 1;
974 		ap->ioaddr.lbah_addr	= base + (ATA_REG_LBAH << 1) + 1;
975 		ap->ioaddr.device_addr	= base + (ATA_REG_DEVICE << 1) + 1;
976 		ap->ioaddr.status_addr	= base + (ATA_REG_STATUS << 1) + 1;
977 		ap->ioaddr.command_addr	= base + (ATA_REG_CMD << 1) + 1;
978 		ap->ioaddr.altstatus_addr = cs1 + (6 << 1) + 1;
979 		ap->ioaddr.ctl_addr	= cs1 + (6 << 1) + 1;
980 		octeon_cf_ops.sff_data_xfer = octeon_cf_data_xfer16;
981 
982 		ap->mwdma_mask	= enable_dma ? ATA_MWDMA4 : 0;
983 
984 		/* True IDE mode needs a timer to poll for not-busy.  */
985 		hrtimer_init(&cf_port->delayed_finish, CLOCK_MONOTONIC,
986 			     HRTIMER_MODE_REL);
987 		cf_port->delayed_finish.function = octeon_cf_delayed_finish;
988 	} else {
989 		/* 16 bit but not True IDE */
990 		base = cs0 + 0x800;
991 		octeon_cf_ops.sff_data_xfer	= octeon_cf_data_xfer16;
992 		octeon_cf_ops.softreset		= octeon_cf_softreset16;
993 		octeon_cf_ops.sff_check_status	= octeon_cf_check_status16;
994 		octeon_cf_ops.sff_tf_read	= octeon_cf_tf_read16;
995 		octeon_cf_ops.sff_tf_load	= octeon_cf_tf_load16;
996 		octeon_cf_ops.sff_exec_command	= octeon_cf_exec_command16;
997 
998 		ap->ioaddr.data_addr	= base + ATA_REG_DATA;
999 		ap->ioaddr.nsect_addr	= base + ATA_REG_NSECT;
1000 		ap->ioaddr.lbal_addr	= base + ATA_REG_LBAL;
1001 		ap->ioaddr.ctl_addr	= base + 0xe;
1002 		ap->ioaddr.altstatus_addr = base + 0xe;
1003 	}
1004 	cf_port->c0 = ap->ioaddr.ctl_addr;
1005 
1006 	rv = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
1007 	if (rv)
1008 		return rv;
1009 
1010 	ata_port_desc(ap, "cmd %p ctl %p", base, ap->ioaddr.ctl_addr);
1011 
1012 	dev_info(&pdev->dev, "version " DRV_VERSION" %d bit%s.\n",
1013 		 is_16bit ? 16 : 8,
1014 		 cf_port->is_true_ide ? ", True IDE" : "");
1015 
1016 	return ata_host_activate(host, irq, irq_handler,
1017 				 IRQF_SHARED, &octeon_cf_sht);
1018 }
1019 
1020 static void octeon_cf_shutdown(struct device *dev)
1021 {
1022 	union cvmx_mio_boot_dma_cfgx dma_cfg;
1023 	union cvmx_mio_boot_dma_intx dma_int;
1024 
1025 	struct octeon_cf_port *cf_port = dev_get_platdata(dev);
1026 
1027 	if (cf_port->dma_base) {
1028 		/* Stop and clear the dma engine.  */
1029 		dma_cfg.u64 = 0;
1030 		dma_cfg.s.size = -1;
1031 		cvmx_write_csr(cf_port->dma_base + DMA_CFG, dma_cfg.u64);
1032 
1033 		/* Disable the interrupt.  */
1034 		dma_int.u64 = 0;
1035 		cvmx_write_csr(cf_port->dma_base + DMA_INT_EN, dma_int.u64);
1036 
1037 		/* Clear the DMA complete status */
1038 		dma_int.s.done = 1;
1039 		cvmx_write_csr(cf_port->dma_base + DMA_INT, dma_int.u64);
1040 
1041 		__raw_writeb(0, cf_port->c0);
1042 		udelay(20);
1043 		__raw_writeb(ATA_SRST, cf_port->c0);
1044 		udelay(20);
1045 		__raw_writeb(0, cf_port->c0);
1046 		mdelay(100);
1047 	}
1048 }
1049 
1050 static struct of_device_id octeon_cf_match[] = {
1051 	{
1052 		.compatible = "cavium,ebt3000-compact-flash",
1053 	},
1054 	{},
1055 };
1056 MODULE_DEVICE_TABLE(of, octeon_i2c_match);
1057 
1058 static struct platform_driver octeon_cf_driver = {
1059 	.probe		= octeon_cf_probe,
1060 	.driver		= {
1061 		.name	= DRV_NAME,
1062 		.of_match_table = octeon_cf_match,
1063 		.shutdown = octeon_cf_shutdown
1064 	},
1065 };
1066 
1067 static int __init octeon_cf_init(void)
1068 {
1069 	return platform_driver_register(&octeon_cf_driver);
1070 }
1071 
1072 
1073 MODULE_AUTHOR("David Daney <ddaney@caviumnetworks.com>");
1074 MODULE_DESCRIPTION("low-level driver for Cavium OCTEON Compact Flash PATA");
1075 MODULE_LICENSE("GPL");
1076 MODULE_VERSION(DRV_VERSION);
1077 MODULE_ALIAS("platform:" DRV_NAME);
1078 
1079 module_init(octeon_cf_init);
1080