xref: /openbmc/linux/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c (revision fe17b91a7777df140d0f1433991da67ba658796c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * DMM IOMMU driver support functions for TI OMAP processors.
4  *
5  * Copyright (C) 2011 Texas Instruments Incorporated - https://www.ti.com/
6  * Author: Rob Clark <rob@ti.com>
7  *         Andy Gross <andy.gross@ti.com>
8  */
9 
10 #include <linux/completion.h>
11 #include <linux/delay.h>
12 #include <linux/dma-mapping.h>
13 #include <linux/dmaengine.h>
14 #include <linux/errno.h>
15 #include <linux/init.h>
16 #include <linux/interrupt.h>
17 #include <linux/list.h>
18 #include <linux/mm.h>
19 #include <linux/module.h>
20 #include <linux/platform_device.h> /* platform_device() */
21 #include <linux/sched.h>
22 #include <linux/seq_file.h>
23 #include <linux/slab.h>
24 #include <linux/time.h>
25 #include <linux/vmalloc.h>
26 #include <linux/wait.h>
27 
28 #include "omap_dmm_tiler.h"
29 #include "omap_dmm_priv.h"
30 
31 #define DMM_DRIVER_NAME "dmm"
32 
33 /* mappings for associating views to luts */
34 static struct tcm *containers[TILFMT_NFORMATS];
35 static struct dmm *omap_dmm;
36 
37 #if defined(CONFIG_OF)
38 static const struct of_device_id dmm_of_match[];
39 #endif
40 
41 /* global spinlock for protecting lists */
42 static DEFINE_SPINLOCK(list_lock);
43 
44 /* Geometry table */
45 #define GEOM(xshift, yshift, bytes_per_pixel) { \
46 		.x_shft = (xshift), \
47 		.y_shft = (yshift), \
48 		.cpp    = (bytes_per_pixel), \
49 		.slot_w = 1 << (SLOT_WIDTH_BITS - (xshift)), \
50 		.slot_h = 1 << (SLOT_HEIGHT_BITS - (yshift)), \
51 	}
52 
53 static const struct {
54 	u32 x_shft;	/* unused X-bits (as part of bpp) */
55 	u32 y_shft;	/* unused Y-bits (as part of bpp) */
56 	u32 cpp;		/* bytes/chars per pixel */
57 	u32 slot_w;	/* width of each slot (in pixels) */
58 	u32 slot_h;	/* height of each slot (in pixels) */
59 } geom[TILFMT_NFORMATS] = {
60 	[TILFMT_8BIT]  = GEOM(0, 0, 1),
61 	[TILFMT_16BIT] = GEOM(0, 1, 2),
62 	[TILFMT_32BIT] = GEOM(1, 1, 4),
63 	[TILFMT_PAGE]  = GEOM(SLOT_WIDTH_BITS, SLOT_HEIGHT_BITS, 1),
64 };
65 
66 
67 /* lookup table for registers w/ per-engine instances */
68 static const u32 reg[][4] = {
69 	[PAT_STATUS] = {DMM_PAT_STATUS__0, DMM_PAT_STATUS__1,
70 			DMM_PAT_STATUS__2, DMM_PAT_STATUS__3},
71 	[PAT_DESCR]  = {DMM_PAT_DESCR__0, DMM_PAT_DESCR__1,
72 			DMM_PAT_DESCR__2, DMM_PAT_DESCR__3},
73 };
74 
75 static int dmm_dma_copy(struct dmm *dmm, dma_addr_t src, dma_addr_t dst)
76 {
77 	struct dma_async_tx_descriptor *tx;
78 	enum dma_status status;
79 	dma_cookie_t cookie;
80 
81 	tx = dmaengine_prep_dma_memcpy(dmm->wa_dma_chan, dst, src, 4, 0);
82 	if (!tx) {
83 		dev_err(dmm->dev, "Failed to prepare DMA memcpy\n");
84 		return -EIO;
85 	}
86 
87 	cookie = tx->tx_submit(tx);
88 	if (dma_submit_error(cookie)) {
89 		dev_err(dmm->dev, "Failed to do DMA tx_submit\n");
90 		return -EIO;
91 	}
92 
93 	status = dma_sync_wait(dmm->wa_dma_chan, cookie);
94 	if (status != DMA_COMPLETE)
95 		dev_err(dmm->dev, "i878 wa DMA copy failure\n");
96 
97 	dmaengine_terminate_all(dmm->wa_dma_chan);
98 	return 0;
99 }
100 
101 static u32 dmm_read_wa(struct dmm *dmm, u32 reg)
102 {
103 	dma_addr_t src, dst;
104 	int r;
105 
106 	src = dmm->phys_base + reg;
107 	dst = dmm->wa_dma_handle;
108 
109 	r = dmm_dma_copy(dmm, src, dst);
110 	if (r) {
111 		dev_err(dmm->dev, "sDMA read transfer timeout\n");
112 		return readl(dmm->base + reg);
113 	}
114 
115 	/*
116 	 * As per i878 workaround, the DMA is used to access the DMM registers.
117 	 * Make sure that the readl is not moved by the compiler or the CPU
118 	 * earlier than the DMA finished writing the value to memory.
119 	 */
120 	rmb();
121 	return readl(dmm->wa_dma_data);
122 }
123 
124 static void dmm_write_wa(struct dmm *dmm, u32 val, u32 reg)
125 {
126 	dma_addr_t src, dst;
127 	int r;
128 
129 	writel(val, dmm->wa_dma_data);
130 	/*
131 	 * As per i878 workaround, the DMA is used to access the DMM registers.
132 	 * Make sure that the writel is not moved by the compiler or the CPU, so
133 	 * the data will be in place before we start the DMA to do the actual
134 	 * register write.
135 	 */
136 	wmb();
137 
138 	src = dmm->wa_dma_handle;
139 	dst = dmm->phys_base + reg;
140 
141 	r = dmm_dma_copy(dmm, src, dst);
142 	if (r) {
143 		dev_err(dmm->dev, "sDMA write transfer timeout\n");
144 		writel(val, dmm->base + reg);
145 	}
146 }
147 
148 static u32 dmm_read(struct dmm *dmm, u32 reg)
149 {
150 	if (dmm->dmm_workaround) {
151 		u32 v;
152 		unsigned long flags;
153 
154 		spin_lock_irqsave(&dmm->wa_lock, flags);
155 		v = dmm_read_wa(dmm, reg);
156 		spin_unlock_irqrestore(&dmm->wa_lock, flags);
157 
158 		return v;
159 	} else {
160 		return readl(dmm->base + reg);
161 	}
162 }
163 
164 static void dmm_write(struct dmm *dmm, u32 val, u32 reg)
165 {
166 	if (dmm->dmm_workaround) {
167 		unsigned long flags;
168 
169 		spin_lock_irqsave(&dmm->wa_lock, flags);
170 		dmm_write_wa(dmm, val, reg);
171 		spin_unlock_irqrestore(&dmm->wa_lock, flags);
172 	} else {
173 		writel(val, dmm->base + reg);
174 	}
175 }
176 
177 static int dmm_workaround_init(struct dmm *dmm)
178 {
179 	dma_cap_mask_t mask;
180 
181 	spin_lock_init(&dmm->wa_lock);
182 
183 	dmm->wa_dma_data = dma_alloc_coherent(dmm->dev,  sizeof(u32),
184 					      &dmm->wa_dma_handle, GFP_KERNEL);
185 	if (!dmm->wa_dma_data)
186 		return -ENOMEM;
187 
188 	dma_cap_zero(mask);
189 	dma_cap_set(DMA_MEMCPY, mask);
190 
191 	dmm->wa_dma_chan = dma_request_channel(mask, NULL, NULL);
192 	if (!dmm->wa_dma_chan) {
193 		dma_free_coherent(dmm->dev, 4, dmm->wa_dma_data, dmm->wa_dma_handle);
194 		return -ENODEV;
195 	}
196 
197 	return 0;
198 }
199 
200 static void dmm_workaround_uninit(struct dmm *dmm)
201 {
202 	dma_release_channel(dmm->wa_dma_chan);
203 
204 	dma_free_coherent(dmm->dev, 4, dmm->wa_dma_data, dmm->wa_dma_handle);
205 }
206 
207 /* simple allocator to grab next 16 byte aligned memory from txn */
208 static void *alloc_dma(struct dmm_txn *txn, size_t sz, dma_addr_t *pa)
209 {
210 	void *ptr;
211 	struct refill_engine *engine = txn->engine_handle;
212 
213 	/* dmm programming requires 16 byte aligned addresses */
214 	txn->current_pa = round_up(txn->current_pa, 16);
215 	txn->current_va = (void *)round_up((long)txn->current_va, 16);
216 
217 	ptr = txn->current_va;
218 	*pa = txn->current_pa;
219 
220 	txn->current_pa += sz;
221 	txn->current_va += sz;
222 
223 	BUG_ON((txn->current_va - engine->refill_va) > REFILL_BUFFER_SIZE);
224 
225 	return ptr;
226 }
227 
228 /* check status and spin until wait_mask comes true */
229 static int wait_status(struct refill_engine *engine, u32 wait_mask)
230 {
231 	struct dmm *dmm = engine->dmm;
232 	u32 r = 0, err, i;
233 
234 	i = DMM_FIXED_RETRY_COUNT;
235 	while (true) {
236 		r = dmm_read(dmm, reg[PAT_STATUS][engine->id]);
237 		err = r & DMM_PATSTATUS_ERR;
238 		if (err) {
239 			dev_err(dmm->dev,
240 				"%s: error (engine%d). PAT_STATUS: 0x%08x\n",
241 				__func__, engine->id, r);
242 			return -EFAULT;
243 		}
244 
245 		if ((r & wait_mask) == wait_mask)
246 			break;
247 
248 		if (--i == 0) {
249 			dev_err(dmm->dev,
250 				"%s: timeout (engine%d). PAT_STATUS: 0x%08x\n",
251 				__func__, engine->id, r);
252 			return -ETIMEDOUT;
253 		}
254 
255 		udelay(1);
256 	}
257 
258 	return 0;
259 }
260 
261 static void release_engine(struct refill_engine *engine)
262 {
263 	unsigned long flags;
264 
265 	spin_lock_irqsave(&list_lock, flags);
266 	list_add(&engine->idle_node, &omap_dmm->idle_head);
267 	spin_unlock_irqrestore(&list_lock, flags);
268 
269 	atomic_inc(&omap_dmm->engine_counter);
270 	wake_up_interruptible(&omap_dmm->engine_queue);
271 }
272 
273 static irqreturn_t omap_dmm_irq_handler(int irq, void *arg)
274 {
275 	struct dmm *dmm = arg;
276 	u32 status = dmm_read(dmm, DMM_PAT_IRQSTATUS);
277 	int i;
278 
279 	/* ack IRQ */
280 	dmm_write(dmm, status, DMM_PAT_IRQSTATUS);
281 
282 	for (i = 0; i < dmm->num_engines; i++) {
283 		if (status & DMM_IRQSTAT_ERR_MASK)
284 			dev_err(dmm->dev,
285 				"irq error(engine%d): IRQSTAT 0x%02x\n",
286 				i, status & 0xff);
287 
288 		if (status & DMM_IRQSTAT_LST) {
289 			if (dmm->engines[i].async)
290 				release_engine(&dmm->engines[i]);
291 
292 			complete(&dmm->engines[i].compl);
293 		}
294 
295 		status >>= 8;
296 	}
297 
298 	return IRQ_HANDLED;
299 }
300 
301 /*
302  * Get a handle for a DMM transaction
303  */
304 static struct dmm_txn *dmm_txn_init(struct dmm *dmm, struct tcm *tcm)
305 {
306 	struct dmm_txn *txn = NULL;
307 	struct refill_engine *engine = NULL;
308 	int ret;
309 	unsigned long flags;
310 
311 
312 	/* wait until an engine is available */
313 	ret = wait_event_interruptible(omap_dmm->engine_queue,
314 		atomic_add_unless(&omap_dmm->engine_counter, -1, 0));
315 	if (ret)
316 		return ERR_PTR(ret);
317 
318 	/* grab an idle engine */
319 	spin_lock_irqsave(&list_lock, flags);
320 	if (!list_empty(&dmm->idle_head)) {
321 		engine = list_entry(dmm->idle_head.next, struct refill_engine,
322 					idle_node);
323 		list_del(&engine->idle_node);
324 	}
325 	spin_unlock_irqrestore(&list_lock, flags);
326 
327 	BUG_ON(!engine);
328 
329 	txn = &engine->txn;
330 	engine->tcm = tcm;
331 	txn->engine_handle = engine;
332 	txn->last_pat = NULL;
333 	txn->current_va = engine->refill_va;
334 	txn->current_pa = engine->refill_pa;
335 
336 	return txn;
337 }
338 
339 /*
340  * Add region to DMM transaction.  If pages or pages[i] is NULL, then the
341  * corresponding slot is cleared (ie. dummy_pa is programmed)
342  */
343 static void dmm_txn_append(struct dmm_txn *txn, struct pat_area *area,
344 		struct page **pages, u32 npages, u32 roll)
345 {
346 	dma_addr_t pat_pa = 0, data_pa = 0;
347 	u32 *data;
348 	struct pat *pat;
349 	struct refill_engine *engine = txn->engine_handle;
350 	int columns = (1 + area->x1 - area->x0);
351 	int rows = (1 + area->y1 - area->y0);
352 	int i = columns*rows;
353 
354 	pat = alloc_dma(txn, sizeof(*pat), &pat_pa);
355 
356 	if (txn->last_pat)
357 		txn->last_pat->next_pa = (u32)pat_pa;
358 
359 	pat->area = *area;
360 
361 	/* adjust Y coordinates based off of container parameters */
362 	pat->area.y0 += engine->tcm->y_offset;
363 	pat->area.y1 += engine->tcm->y_offset;
364 
365 	pat->ctrl = (struct pat_ctrl){
366 			.start = 1,
367 			.lut_id = engine->tcm->lut_id,
368 		};
369 
370 	data = alloc_dma(txn, 4*i, &data_pa);
371 	/* FIXME: what if data_pa is more than 32-bit ? */
372 	pat->data_pa = data_pa;
373 
374 	while (i--) {
375 		int n = i + roll;
376 		if (n >= npages)
377 			n -= npages;
378 		data[i] = (pages && pages[n]) ?
379 			page_to_phys(pages[n]) : engine->dmm->dummy_pa;
380 	}
381 
382 	txn->last_pat = pat;
383 
384 	return;
385 }
386 
387 /*
388  * Commit the DMM transaction.
389  */
390 static int dmm_txn_commit(struct dmm_txn *txn, bool wait)
391 {
392 	int ret = 0;
393 	struct refill_engine *engine = txn->engine_handle;
394 	struct dmm *dmm = engine->dmm;
395 
396 	if (!txn->last_pat) {
397 		dev_err(engine->dmm->dev, "need at least one txn\n");
398 		ret = -EINVAL;
399 		goto cleanup;
400 	}
401 
402 	txn->last_pat->next_pa = 0;
403 	/* ensure that the written descriptors are visible to DMM */
404 	wmb();
405 
406 	/*
407 	 * NOTE: the wmb() above should be enough, but there seems to be a bug
408 	 * in OMAP's memory barrier implementation, which in some rare cases may
409 	 * cause the writes not to be observable after wmb().
410 	 */
411 
412 	/* read back to ensure the data is in RAM */
413 	readl(&txn->last_pat->next_pa);
414 
415 	/* write to PAT_DESCR to clear out any pending transaction */
416 	dmm_write(dmm, 0x0, reg[PAT_DESCR][engine->id]);
417 
418 	/* wait for engine ready: */
419 	ret = wait_status(engine, DMM_PATSTATUS_READY);
420 	if (ret) {
421 		ret = -EFAULT;
422 		goto cleanup;
423 	}
424 
425 	/* mark whether it is async to denote list management in IRQ handler */
426 	engine->async = wait ? false : true;
427 	reinit_completion(&engine->compl);
428 	/* verify that the irq handler sees the 'async' and completion value */
429 	smp_mb();
430 
431 	/* kick reload */
432 	dmm_write(dmm, engine->refill_pa, reg[PAT_DESCR][engine->id]);
433 
434 	if (wait) {
435 		if (!wait_for_completion_timeout(&engine->compl,
436 				msecs_to_jiffies(100))) {
437 			dev_err(dmm->dev, "timed out waiting for done\n");
438 			ret = -ETIMEDOUT;
439 			goto cleanup;
440 		}
441 
442 		/* Check the engine status before continue */
443 		ret = wait_status(engine, DMM_PATSTATUS_READY |
444 				  DMM_PATSTATUS_VALID | DMM_PATSTATUS_DONE);
445 	}
446 
447 cleanup:
448 	/* only place engine back on list if we are done with it */
449 	if (ret || wait)
450 		release_engine(engine);
451 
452 	return ret;
453 }
454 
455 /*
456  * DMM programming
457  */
458 static int fill(struct tcm_area *area, struct page **pages,
459 		u32 npages, u32 roll, bool wait)
460 {
461 	int ret = 0;
462 	struct tcm_area slice, area_s;
463 	struct dmm_txn *txn;
464 
465 	/*
466 	 * FIXME
467 	 *
468 	 * Asynchronous fill does not work reliably, as the driver does not
469 	 * handle errors in the async code paths. The fill operation may
470 	 * silently fail, leading to leaking DMM engines, which may eventually
471 	 * lead to deadlock if we run out of DMM engines.
472 	 *
473 	 * For now, always set 'wait' so that we only use sync fills. Async
474 	 * fills should be fixed, or alternatively we could decide to only
475 	 * support sync fills and so the whole async code path could be removed.
476 	 */
477 
478 	wait = true;
479 
480 	txn = dmm_txn_init(omap_dmm, area->tcm);
481 	if (IS_ERR_OR_NULL(txn))
482 		return -ENOMEM;
483 
484 	tcm_for_each_slice(slice, *area, area_s) {
485 		struct pat_area p_area = {
486 				.x0 = slice.p0.x,  .y0 = slice.p0.y,
487 				.x1 = slice.p1.x,  .y1 = slice.p1.y,
488 		};
489 
490 		dmm_txn_append(txn, &p_area, pages, npages, roll);
491 
492 		roll += tcm_sizeof(slice);
493 	}
494 
495 	ret = dmm_txn_commit(txn, wait);
496 
497 	return ret;
498 }
499 
500 /*
501  * Pin/unpin
502  */
503 
504 /* note: slots for which pages[i] == NULL are filled w/ dummy page
505  */
506 int tiler_pin(struct tiler_block *block, struct page **pages,
507 		u32 npages, u32 roll, bool wait)
508 {
509 	int ret;
510 
511 	ret = fill(&block->area, pages, npages, roll, wait);
512 
513 	if (ret)
514 		tiler_unpin(block);
515 
516 	return ret;
517 }
518 
519 int tiler_unpin(struct tiler_block *block)
520 {
521 	return fill(&block->area, NULL, 0, 0, false);
522 }
523 
524 /*
525  * Reserve/release
526  */
527 struct tiler_block *tiler_reserve_2d(enum tiler_fmt fmt, u16 w,
528 		u16 h, u16 align)
529 {
530 	struct tiler_block *block;
531 	u32 min_align = 128;
532 	int ret;
533 	unsigned long flags;
534 	u32 slot_bytes;
535 
536 	block = kzalloc(sizeof(*block), GFP_KERNEL);
537 	if (!block)
538 		return ERR_PTR(-ENOMEM);
539 
540 	BUG_ON(!validfmt(fmt));
541 
542 	/* convert width/height to slots */
543 	w = DIV_ROUND_UP(w, geom[fmt].slot_w);
544 	h = DIV_ROUND_UP(h, geom[fmt].slot_h);
545 
546 	/* convert alignment to slots */
547 	slot_bytes = geom[fmt].slot_w * geom[fmt].cpp;
548 	min_align = max(min_align, slot_bytes);
549 	align = (align > min_align) ? ALIGN(align, min_align) : min_align;
550 	align /= slot_bytes;
551 
552 	block->fmt = fmt;
553 
554 	ret = tcm_reserve_2d(containers[fmt], w, h, align, -1, slot_bytes,
555 			&block->area);
556 	if (ret) {
557 		kfree(block);
558 		return ERR_PTR(-ENOMEM);
559 	}
560 
561 	/* add to allocation list */
562 	spin_lock_irqsave(&list_lock, flags);
563 	list_add(&block->alloc_node, &omap_dmm->alloc_head);
564 	spin_unlock_irqrestore(&list_lock, flags);
565 
566 	return block;
567 }
568 
569 struct tiler_block *tiler_reserve_1d(size_t size)
570 {
571 	struct tiler_block *block = kzalloc(sizeof(*block), GFP_KERNEL);
572 	int num_pages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
573 	unsigned long flags;
574 
575 	if (!block)
576 		return ERR_PTR(-ENOMEM);
577 
578 	block->fmt = TILFMT_PAGE;
579 
580 	if (tcm_reserve_1d(containers[TILFMT_PAGE], num_pages,
581 				&block->area)) {
582 		kfree(block);
583 		return ERR_PTR(-ENOMEM);
584 	}
585 
586 	spin_lock_irqsave(&list_lock, flags);
587 	list_add(&block->alloc_node, &omap_dmm->alloc_head);
588 	spin_unlock_irqrestore(&list_lock, flags);
589 
590 	return block;
591 }
592 
593 /* note: if you have pin'd pages, you should have already unpin'd first! */
594 int tiler_release(struct tiler_block *block)
595 {
596 	int ret = tcm_free(&block->area);
597 	unsigned long flags;
598 
599 	if (block->area.tcm)
600 		dev_err(omap_dmm->dev, "failed to release block\n");
601 
602 	spin_lock_irqsave(&list_lock, flags);
603 	list_del(&block->alloc_node);
604 	spin_unlock_irqrestore(&list_lock, flags);
605 
606 	kfree(block);
607 	return ret;
608 }
609 
610 /*
611  * Utils
612  */
613 
614 /* calculate the tiler space address of a pixel in a view orientation...
615  * below description copied from the display subsystem section of TRM:
616  *
617  * When the TILER is addressed, the bits:
618  *   [28:27] = 0x0 for 8-bit tiled
619  *             0x1 for 16-bit tiled
620  *             0x2 for 32-bit tiled
621  *             0x3 for page mode
622  *   [31:29] = 0x0 for 0-degree view
623  *             0x1 for 180-degree view + mirroring
624  *             0x2 for 0-degree view + mirroring
625  *             0x3 for 180-degree view
626  *             0x4 for 270-degree view + mirroring
627  *             0x5 for 270-degree view
628  *             0x6 for 90-degree view
629  *             0x7 for 90-degree view + mirroring
630  * Otherwise the bits indicated the corresponding bit address to access
631  * the SDRAM.
632  */
633 static u32 tiler_get_address(enum tiler_fmt fmt, u32 orient, u32 x, u32 y)
634 {
635 	u32 x_bits, y_bits, tmp, x_mask, y_mask, alignment;
636 
637 	x_bits = CONT_WIDTH_BITS - geom[fmt].x_shft;
638 	y_bits = CONT_HEIGHT_BITS - geom[fmt].y_shft;
639 	alignment = geom[fmt].x_shft + geom[fmt].y_shft;
640 
641 	/* validate coordinate */
642 	x_mask = MASK(x_bits);
643 	y_mask = MASK(y_bits);
644 
645 	if (x < 0 || x > x_mask || y < 0 || y > y_mask) {
646 		DBG("invalid coords: %u < 0 || %u > %u || %u < 0 || %u > %u",
647 				x, x, x_mask, y, y, y_mask);
648 		return 0;
649 	}
650 
651 	/* account for mirroring */
652 	if (orient & MASK_X_INVERT)
653 		x ^= x_mask;
654 	if (orient & MASK_Y_INVERT)
655 		y ^= y_mask;
656 
657 	/* get coordinate address */
658 	if (orient & MASK_XY_FLIP)
659 		tmp = ((x << y_bits) + y);
660 	else
661 		tmp = ((y << x_bits) + x);
662 
663 	return TIL_ADDR((tmp << alignment), orient, fmt);
664 }
665 
666 dma_addr_t tiler_ssptr(struct tiler_block *block)
667 {
668 	BUG_ON(!validfmt(block->fmt));
669 
670 	return TILVIEW_8BIT + tiler_get_address(block->fmt, 0,
671 			block->area.p0.x * geom[block->fmt].slot_w,
672 			block->area.p0.y * geom[block->fmt].slot_h);
673 }
674 
675 dma_addr_t tiler_tsptr(struct tiler_block *block, u32 orient,
676 		u32 x, u32 y)
677 {
678 	struct tcm_pt *p = &block->area.p0;
679 	BUG_ON(!validfmt(block->fmt));
680 
681 	return tiler_get_address(block->fmt, orient,
682 			(p->x * geom[block->fmt].slot_w) + x,
683 			(p->y * geom[block->fmt].slot_h) + y);
684 }
685 
686 void tiler_align(enum tiler_fmt fmt, u16 *w, u16 *h)
687 {
688 	BUG_ON(!validfmt(fmt));
689 	*w = round_up(*w, geom[fmt].slot_w);
690 	*h = round_up(*h, geom[fmt].slot_h);
691 }
692 
693 u32 tiler_stride(enum tiler_fmt fmt, u32 orient)
694 {
695 	BUG_ON(!validfmt(fmt));
696 
697 	if (orient & MASK_XY_FLIP)
698 		return 1 << (CONT_HEIGHT_BITS + geom[fmt].x_shft);
699 	else
700 		return 1 << (CONT_WIDTH_BITS + geom[fmt].y_shft);
701 }
702 
703 size_t tiler_size(enum tiler_fmt fmt, u16 w, u16 h)
704 {
705 	tiler_align(fmt, &w, &h);
706 	return geom[fmt].cpp * w * h;
707 }
708 
709 size_t tiler_vsize(enum tiler_fmt fmt, u16 w, u16 h)
710 {
711 	BUG_ON(!validfmt(fmt));
712 	return round_up(geom[fmt].cpp * w, PAGE_SIZE) * h;
713 }
714 
715 u32 tiler_get_cpu_cache_flags(void)
716 {
717 	return omap_dmm->plat_data->cpu_cache_flags;
718 }
719 
720 bool dmm_is_available(void)
721 {
722 	return omap_dmm ? true : false;
723 }
724 
725 static int omap_dmm_remove(struct platform_device *dev)
726 {
727 	struct tiler_block *block, *_block;
728 	int i;
729 	unsigned long flags;
730 
731 	if (omap_dmm) {
732 		/* Disable all enabled interrupts */
733 		dmm_write(omap_dmm, 0x7e7e7e7e, DMM_PAT_IRQENABLE_CLR);
734 		free_irq(omap_dmm->irq, omap_dmm);
735 
736 		/* free all area regions */
737 		spin_lock_irqsave(&list_lock, flags);
738 		list_for_each_entry_safe(block, _block, &omap_dmm->alloc_head,
739 					alloc_node) {
740 			list_del(&block->alloc_node);
741 			kfree(block);
742 		}
743 		spin_unlock_irqrestore(&list_lock, flags);
744 
745 		for (i = 0; i < omap_dmm->num_lut; i++)
746 			if (omap_dmm->tcm && omap_dmm->tcm[i])
747 				omap_dmm->tcm[i]->deinit(omap_dmm->tcm[i]);
748 		kfree(omap_dmm->tcm);
749 
750 		kfree(omap_dmm->engines);
751 		if (omap_dmm->refill_va)
752 			dma_free_wc(omap_dmm->dev,
753 				    REFILL_BUFFER_SIZE * omap_dmm->num_engines,
754 				    omap_dmm->refill_va, omap_dmm->refill_pa);
755 		if (omap_dmm->dummy_page)
756 			__free_page(omap_dmm->dummy_page);
757 
758 		if (omap_dmm->dmm_workaround)
759 			dmm_workaround_uninit(omap_dmm);
760 
761 		iounmap(omap_dmm->base);
762 		kfree(omap_dmm);
763 		omap_dmm = NULL;
764 	}
765 
766 	return 0;
767 }
768 
769 static int omap_dmm_probe(struct platform_device *dev)
770 {
771 	int ret = -EFAULT, i;
772 	struct tcm_area area = {0};
773 	u32 hwinfo, pat_geom;
774 	struct resource *mem;
775 
776 	omap_dmm = kzalloc(sizeof(*omap_dmm), GFP_KERNEL);
777 	if (!omap_dmm)
778 		goto fail;
779 
780 	/* initialize lists */
781 	INIT_LIST_HEAD(&omap_dmm->alloc_head);
782 	INIT_LIST_HEAD(&omap_dmm->idle_head);
783 
784 	init_waitqueue_head(&omap_dmm->engine_queue);
785 
786 	if (dev->dev.of_node) {
787 		const struct of_device_id *match;
788 
789 		match = of_match_node(dmm_of_match, dev->dev.of_node);
790 		if (!match) {
791 			dev_err(&dev->dev, "failed to find matching device node\n");
792 			ret = -ENODEV;
793 			goto fail;
794 		}
795 
796 		omap_dmm->plat_data = match->data;
797 	}
798 
799 	/* lookup hwmod data - base address and irq */
800 	mem = platform_get_resource(dev, IORESOURCE_MEM, 0);
801 	if (!mem) {
802 		dev_err(&dev->dev, "failed to get base address resource\n");
803 		goto fail;
804 	}
805 
806 	omap_dmm->phys_base = mem->start;
807 	omap_dmm->base = ioremap(mem->start, SZ_2K);
808 
809 	if (!omap_dmm->base) {
810 		dev_err(&dev->dev, "failed to get dmm base address\n");
811 		goto fail;
812 	}
813 
814 	omap_dmm->irq = platform_get_irq(dev, 0);
815 	if (omap_dmm->irq < 0) {
816 		dev_err(&dev->dev, "failed to get IRQ resource\n");
817 		goto fail;
818 	}
819 
820 	omap_dmm->dev = &dev->dev;
821 
822 	if (of_machine_is_compatible("ti,dra7")) {
823 		/*
824 		 * DRA7 Errata i878 says that MPU should not be used to access
825 		 * RAM and DMM at the same time. As it's not possible to prevent
826 		 * MPU accessing RAM, we need to access DMM via a proxy.
827 		 */
828 		if (!dmm_workaround_init(omap_dmm)) {
829 			omap_dmm->dmm_workaround = true;
830 			dev_info(&dev->dev,
831 				"workaround for errata i878 in use\n");
832 		} else {
833 			dev_warn(&dev->dev,
834 				 "failed to initialize work-around for i878\n");
835 		}
836 	}
837 
838 	hwinfo = dmm_read(omap_dmm, DMM_PAT_HWINFO);
839 	omap_dmm->num_engines = (hwinfo >> 24) & 0x1F;
840 	omap_dmm->num_lut = (hwinfo >> 16) & 0x1F;
841 	omap_dmm->container_width = 256;
842 	omap_dmm->container_height = 128;
843 
844 	atomic_set(&omap_dmm->engine_counter, omap_dmm->num_engines);
845 
846 	/* read out actual LUT width and height */
847 	pat_geom = dmm_read(omap_dmm, DMM_PAT_GEOMETRY);
848 	omap_dmm->lut_width = ((pat_geom >> 16) & 0xF) << 5;
849 	omap_dmm->lut_height = ((pat_geom >> 24) & 0xF) << 5;
850 
851 	/* increment LUT by one if on OMAP5 */
852 	/* LUT has twice the height, and is split into a separate container */
853 	if (omap_dmm->lut_height != omap_dmm->container_height)
854 		omap_dmm->num_lut++;
855 
856 	/* initialize DMM registers */
857 	dmm_write(omap_dmm, 0x88888888, DMM_PAT_VIEW__0);
858 	dmm_write(omap_dmm, 0x88888888, DMM_PAT_VIEW__1);
859 	dmm_write(omap_dmm, 0x80808080, DMM_PAT_VIEW_MAP__0);
860 	dmm_write(omap_dmm, 0x80000000, DMM_PAT_VIEW_MAP_BASE);
861 	dmm_write(omap_dmm, 0x88888888, DMM_TILER_OR__0);
862 	dmm_write(omap_dmm, 0x88888888, DMM_TILER_OR__1);
863 
864 	omap_dmm->dummy_page = alloc_page(GFP_KERNEL | __GFP_DMA32);
865 	if (!omap_dmm->dummy_page) {
866 		dev_err(&dev->dev, "could not allocate dummy page\n");
867 		ret = -ENOMEM;
868 		goto fail;
869 	}
870 
871 	/* set dma mask for device */
872 	ret = dma_set_coherent_mask(&dev->dev, DMA_BIT_MASK(32));
873 	if (ret)
874 		goto fail;
875 
876 	omap_dmm->dummy_pa = page_to_phys(omap_dmm->dummy_page);
877 
878 	/* alloc refill memory */
879 	omap_dmm->refill_va = dma_alloc_wc(&dev->dev,
880 					   REFILL_BUFFER_SIZE * omap_dmm->num_engines,
881 					   &omap_dmm->refill_pa, GFP_KERNEL);
882 	if (!omap_dmm->refill_va) {
883 		dev_err(&dev->dev, "could not allocate refill memory\n");
884 		ret = -ENOMEM;
885 		goto fail;
886 	}
887 
888 	/* alloc engines */
889 	omap_dmm->engines = kcalloc(omap_dmm->num_engines,
890 				    sizeof(*omap_dmm->engines), GFP_KERNEL);
891 	if (!omap_dmm->engines) {
892 		ret = -ENOMEM;
893 		goto fail;
894 	}
895 
896 	for (i = 0; i < omap_dmm->num_engines; i++) {
897 		omap_dmm->engines[i].id = i;
898 		omap_dmm->engines[i].dmm = omap_dmm;
899 		omap_dmm->engines[i].refill_va = omap_dmm->refill_va +
900 						(REFILL_BUFFER_SIZE * i);
901 		omap_dmm->engines[i].refill_pa = omap_dmm->refill_pa +
902 						(REFILL_BUFFER_SIZE * i);
903 		init_completion(&omap_dmm->engines[i].compl);
904 
905 		list_add(&omap_dmm->engines[i].idle_node, &omap_dmm->idle_head);
906 	}
907 
908 	omap_dmm->tcm = kcalloc(omap_dmm->num_lut, sizeof(*omap_dmm->tcm),
909 				GFP_KERNEL);
910 	if (!omap_dmm->tcm) {
911 		ret = -ENOMEM;
912 		goto fail;
913 	}
914 
915 	/* init containers */
916 	/* Each LUT is associated with a TCM (container manager).  We use the
917 	   lut_id to denote the lut_id used to identify the correct LUT for
918 	   programming during reill operations */
919 	for (i = 0; i < omap_dmm->num_lut; i++) {
920 		omap_dmm->tcm[i] = sita_init(omap_dmm->container_width,
921 						omap_dmm->container_height);
922 
923 		if (!omap_dmm->tcm[i]) {
924 			dev_err(&dev->dev, "failed to allocate container\n");
925 			ret = -ENOMEM;
926 			goto fail;
927 		}
928 
929 		omap_dmm->tcm[i]->lut_id = i;
930 	}
931 
932 	/* assign access mode containers to applicable tcm container */
933 	/* OMAP 4 has 1 container for all 4 views */
934 	/* OMAP 5 has 2 containers, 1 for 2D and 1 for 1D */
935 	containers[TILFMT_8BIT] = omap_dmm->tcm[0];
936 	containers[TILFMT_16BIT] = omap_dmm->tcm[0];
937 	containers[TILFMT_32BIT] = omap_dmm->tcm[0];
938 
939 	if (omap_dmm->container_height != omap_dmm->lut_height) {
940 		/* second LUT is used for PAGE mode.  Programming must use
941 		   y offset that is added to all y coordinates.  LUT id is still
942 		   0, because it is the same LUT, just the upper 128 lines */
943 		containers[TILFMT_PAGE] = omap_dmm->tcm[1];
944 		omap_dmm->tcm[1]->y_offset = OMAP5_LUT_OFFSET;
945 		omap_dmm->tcm[1]->lut_id = 0;
946 	} else {
947 		containers[TILFMT_PAGE] = omap_dmm->tcm[0];
948 	}
949 
950 	area = (struct tcm_area) {
951 		.tcm = NULL,
952 		.p1.x = omap_dmm->container_width - 1,
953 		.p1.y = omap_dmm->container_height - 1,
954 	};
955 
956 	ret = request_irq(omap_dmm->irq, omap_dmm_irq_handler, IRQF_SHARED,
957 				"omap_dmm_irq_handler", omap_dmm);
958 
959 	if (ret) {
960 		dev_err(&dev->dev, "couldn't register IRQ %d, error %d\n",
961 			omap_dmm->irq, ret);
962 		omap_dmm->irq = -1;
963 		goto fail;
964 	}
965 
966 	/* Enable all interrupts for each refill engine except
967 	 * ERR_LUT_MISS<n> (which is just advisory, and we don't care
968 	 * about because we want to be able to refill live scanout
969 	 * buffers for accelerated pan/scroll) and FILL_DSC<n> which
970 	 * we just generally don't care about.
971 	 */
972 	dmm_write(omap_dmm, 0x7e7e7e7e, DMM_PAT_IRQENABLE_SET);
973 
974 	/* initialize all LUTs to dummy page entries */
975 	for (i = 0; i < omap_dmm->num_lut; i++) {
976 		area.tcm = omap_dmm->tcm[i];
977 		if (fill(&area, NULL, 0, 0, true))
978 			dev_err(omap_dmm->dev, "refill failed");
979 	}
980 
981 	dev_info(omap_dmm->dev, "initialized all PAT entries\n");
982 
983 	return 0;
984 
985 fail:
986 	if (omap_dmm_remove(dev))
987 		dev_err(&dev->dev, "cleanup failed\n");
988 	return ret;
989 }
990 
991 /*
992  * debugfs support
993  */
994 
995 #ifdef CONFIG_DEBUG_FS
996 
997 static const char *alphabet = "abcdefghijklmnopqrstuvwxyz"
998 				"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
999 static const char *special = ".,:;'\"`~!^-+";
1000 
1001 static void fill_map(char **map, int xdiv, int ydiv, struct tcm_area *a,
1002 							char c, bool ovw)
1003 {
1004 	int x, y;
1005 	for (y = a->p0.y / ydiv; y <= a->p1.y / ydiv; y++)
1006 		for (x = a->p0.x / xdiv; x <= a->p1.x / xdiv; x++)
1007 			if (map[y][x] == ' ' || ovw)
1008 				map[y][x] = c;
1009 }
1010 
1011 static void fill_map_pt(char **map, int xdiv, int ydiv, struct tcm_pt *p,
1012 									char c)
1013 {
1014 	map[p->y / ydiv][p->x / xdiv] = c;
1015 }
1016 
1017 static char read_map_pt(char **map, int xdiv, int ydiv, struct tcm_pt *p)
1018 {
1019 	return map[p->y / ydiv][p->x / xdiv];
1020 }
1021 
1022 static int map_width(int xdiv, int x0, int x1)
1023 {
1024 	return (x1 / xdiv) - (x0 / xdiv) + 1;
1025 }
1026 
1027 static void text_map(char **map, int xdiv, char *nice, int yd, int x0, int x1)
1028 {
1029 	char *p = map[yd] + (x0 / xdiv);
1030 	int w = (map_width(xdiv, x0, x1) - strlen(nice)) / 2;
1031 	if (w >= 0) {
1032 		p += w;
1033 		while (*nice)
1034 			*p++ = *nice++;
1035 	}
1036 }
1037 
1038 static void map_1d_info(char **map, int xdiv, int ydiv, char *nice,
1039 							struct tcm_area *a)
1040 {
1041 	sprintf(nice, "%dK", tcm_sizeof(*a) * 4);
1042 	if (a->p0.y + 1 < a->p1.y) {
1043 		text_map(map, xdiv, nice, (a->p0.y + a->p1.y) / 2 / ydiv, 0,
1044 							256 - 1);
1045 	} else if (a->p0.y < a->p1.y) {
1046 		if (strlen(nice) < map_width(xdiv, a->p0.x, 256 - 1))
1047 			text_map(map, xdiv, nice, a->p0.y / ydiv,
1048 					a->p0.x + xdiv,	256 - 1);
1049 		else if (strlen(nice) < map_width(xdiv, 0, a->p1.x))
1050 			text_map(map, xdiv, nice, a->p1.y / ydiv,
1051 					0, a->p1.y - xdiv);
1052 	} else if (strlen(nice) + 1 < map_width(xdiv, a->p0.x, a->p1.x)) {
1053 		text_map(map, xdiv, nice, a->p0.y / ydiv, a->p0.x, a->p1.x);
1054 	}
1055 }
1056 
1057 static void map_2d_info(char **map, int xdiv, int ydiv, char *nice,
1058 							struct tcm_area *a)
1059 {
1060 	sprintf(nice, "(%d*%d)", tcm_awidth(*a), tcm_aheight(*a));
1061 	if (strlen(nice) + 1 < map_width(xdiv, a->p0.x, a->p1.x))
1062 		text_map(map, xdiv, nice, (a->p0.y + a->p1.y) / 2 / ydiv,
1063 							a->p0.x, a->p1.x);
1064 }
1065 
1066 int tiler_map_show(struct seq_file *s, void *arg)
1067 {
1068 	int xdiv = 2, ydiv = 1;
1069 	char **map = NULL, *global_map;
1070 	struct tiler_block *block;
1071 	struct tcm_area a, p;
1072 	int i;
1073 	const char *m2d = alphabet;
1074 	const char *a2d = special;
1075 	const char *m2dp = m2d, *a2dp = a2d;
1076 	char nice[128];
1077 	int h_adj;
1078 	int w_adj;
1079 	unsigned long flags;
1080 	int lut_idx;
1081 
1082 
1083 	if (!omap_dmm) {
1084 		/* early return if dmm/tiler device is not initialized */
1085 		return 0;
1086 	}
1087 
1088 	h_adj = omap_dmm->container_height / ydiv;
1089 	w_adj = omap_dmm->container_width / xdiv;
1090 
1091 	map = kmalloc_array(h_adj, sizeof(*map), GFP_KERNEL);
1092 	global_map = kmalloc_array(w_adj + 1, h_adj, GFP_KERNEL);
1093 
1094 	if (!map || !global_map)
1095 		goto error;
1096 
1097 	for (lut_idx = 0; lut_idx < omap_dmm->num_lut; lut_idx++) {
1098 		memset(map, 0, h_adj * sizeof(*map));
1099 		memset(global_map, ' ', (w_adj + 1) * h_adj);
1100 
1101 		for (i = 0; i < omap_dmm->container_height; i++) {
1102 			map[i] = global_map + i * (w_adj + 1);
1103 			map[i][w_adj] = 0;
1104 		}
1105 
1106 		spin_lock_irqsave(&list_lock, flags);
1107 
1108 		list_for_each_entry(block, &omap_dmm->alloc_head, alloc_node) {
1109 			if (block->area.tcm == omap_dmm->tcm[lut_idx]) {
1110 				if (block->fmt != TILFMT_PAGE) {
1111 					fill_map(map, xdiv, ydiv, &block->area,
1112 						*m2dp, true);
1113 					if (!*++a2dp)
1114 						a2dp = a2d;
1115 					if (!*++m2dp)
1116 						m2dp = m2d;
1117 					map_2d_info(map, xdiv, ydiv, nice,
1118 							&block->area);
1119 				} else {
1120 					bool start = read_map_pt(map, xdiv,
1121 						ydiv, &block->area.p0) == ' ';
1122 					bool end = read_map_pt(map, xdiv, ydiv,
1123 							&block->area.p1) == ' ';
1124 
1125 					tcm_for_each_slice(a, block->area, p)
1126 						fill_map(map, xdiv, ydiv, &a,
1127 							'=', true);
1128 					fill_map_pt(map, xdiv, ydiv,
1129 							&block->area.p0,
1130 							start ? '<' : 'X');
1131 					fill_map_pt(map, xdiv, ydiv,
1132 							&block->area.p1,
1133 							end ? '>' : 'X');
1134 					map_1d_info(map, xdiv, ydiv, nice,
1135 							&block->area);
1136 				}
1137 			}
1138 		}
1139 
1140 		spin_unlock_irqrestore(&list_lock, flags);
1141 
1142 		if (s) {
1143 			seq_printf(s, "CONTAINER %d DUMP BEGIN\n", lut_idx);
1144 			for (i = 0; i < 128; i++)
1145 				seq_printf(s, "%03d:%s\n", i, map[i]);
1146 			seq_printf(s, "CONTAINER %d DUMP END\n", lut_idx);
1147 		} else {
1148 			dev_dbg(omap_dmm->dev, "CONTAINER %d DUMP BEGIN\n",
1149 				lut_idx);
1150 			for (i = 0; i < 128; i++)
1151 				dev_dbg(omap_dmm->dev, "%03d:%s\n", i, map[i]);
1152 			dev_dbg(omap_dmm->dev, "CONTAINER %d DUMP END\n",
1153 				lut_idx);
1154 		}
1155 	}
1156 
1157 error:
1158 	kfree(map);
1159 	kfree(global_map);
1160 
1161 	return 0;
1162 }
1163 #endif
1164 
1165 #ifdef CONFIG_PM_SLEEP
1166 static int omap_dmm_resume(struct device *dev)
1167 {
1168 	struct tcm_area area;
1169 	int i;
1170 
1171 	if (!omap_dmm)
1172 		return -ENODEV;
1173 
1174 	area = (struct tcm_area) {
1175 		.tcm = NULL,
1176 		.p1.x = omap_dmm->container_width - 1,
1177 		.p1.y = omap_dmm->container_height - 1,
1178 	};
1179 
1180 	/* initialize all LUTs to dummy page entries */
1181 	for (i = 0; i < omap_dmm->num_lut; i++) {
1182 		area.tcm = omap_dmm->tcm[i];
1183 		if (fill(&area, NULL, 0, 0, true))
1184 			dev_err(dev, "refill failed");
1185 	}
1186 
1187 	return 0;
1188 }
1189 #endif
1190 
1191 static SIMPLE_DEV_PM_OPS(omap_dmm_pm_ops, NULL, omap_dmm_resume);
1192 
1193 #if defined(CONFIG_OF)
1194 static const struct dmm_platform_data dmm_omap4_platform_data = {
1195 	.cpu_cache_flags = OMAP_BO_WC,
1196 };
1197 
1198 static const struct dmm_platform_data dmm_omap5_platform_data = {
1199 	.cpu_cache_flags = OMAP_BO_UNCACHED,
1200 };
1201 
1202 static const struct of_device_id dmm_of_match[] = {
1203 	{
1204 		.compatible = "ti,omap4-dmm",
1205 		.data = &dmm_omap4_platform_data,
1206 	},
1207 	{
1208 		.compatible = "ti,omap5-dmm",
1209 		.data = &dmm_omap5_platform_data,
1210 	},
1211 	{},
1212 };
1213 #endif
1214 
1215 struct platform_driver omap_dmm_driver = {
1216 	.probe = omap_dmm_probe,
1217 	.remove = omap_dmm_remove,
1218 	.driver = {
1219 		.owner = THIS_MODULE,
1220 		.name = DMM_DRIVER_NAME,
1221 		.of_match_table = of_match_ptr(dmm_of_match),
1222 		.pm = &omap_dmm_pm_ops,
1223 	},
1224 };
1225 
1226 MODULE_LICENSE("GPL v2");
1227 MODULE_AUTHOR("Andy Gross <andy.gross@ti.com>");
1228 MODULE_DESCRIPTION("OMAP DMM/Tiler Driver");
1229