xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 6e2055a9)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifdef CONFIG_MSM_OCMEM
19 #  include <mach/ocmem.h>
20 #endif
21 
22 #include "a3xx_gpu.h"
23 
24 #define A3XX_INT0_MASK \
25 	(A3XX_INT0_RBBM_AHB_ERROR |        \
26 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
27 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
28 	 A3XX_INT0_CP_OPCODE_ERROR |       \
29 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
30 	 A3XX_INT0_CP_HW_FAULT |           \
31 	 A3XX_INT0_CP_IB1_INT |            \
32 	 A3XX_INT0_CP_IB2_INT |            \
33 	 A3XX_INT0_CP_RB_INT |             \
34 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
35 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
36 	 A3XX_INT0_UCHE_OOB_ACCESS)
37 
38 static struct platform_device *a3xx_pdev;
39 
40 static void a3xx_me_init(struct msm_gpu *gpu)
41 {
42 	struct msm_ringbuffer *ring = gpu->rb;
43 
44 	OUT_PKT3(ring, CP_ME_INIT, 17);
45 	OUT_RING(ring, 0x000003f7);
46 	OUT_RING(ring, 0x00000000);
47 	OUT_RING(ring, 0x00000000);
48 	OUT_RING(ring, 0x00000000);
49 	OUT_RING(ring, 0x00000080);
50 	OUT_RING(ring, 0x00000100);
51 	OUT_RING(ring, 0x00000180);
52 	OUT_RING(ring, 0x00006600);
53 	OUT_RING(ring, 0x00000150);
54 	OUT_RING(ring, 0x0000014e);
55 	OUT_RING(ring, 0x00000154);
56 	OUT_RING(ring, 0x00000001);
57 	OUT_RING(ring, 0x00000000);
58 	OUT_RING(ring, 0x00000000);
59 	OUT_RING(ring, 0x00000000);
60 	OUT_RING(ring, 0x00000000);
61 	OUT_RING(ring, 0x00000000);
62 
63 	gpu->funcs->flush(gpu);
64 	gpu->funcs->idle(gpu);
65 }
66 
67 static int a3xx_hw_init(struct msm_gpu *gpu)
68 {
69 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
70 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
71 	uint32_t *ptr, len;
72 	int i, ret;
73 
74 	DBG("%s", gpu->name);
75 
76 	if (adreno_is_a305(adreno_gpu)) {
77 		/* Set up 16 deep read/write request queues: */
78 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
79 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
80 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
81 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
82 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
83 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
84 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
85 		/* Enable WR-REQ: */
86 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
87 		/* Set up round robin arbitration between both AXI ports: */
88 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
89 		/* Set up AOOO: */
90 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
91 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
92 
93 	} else if (adreno_is_a320(adreno_gpu)) {
94 		/* Set up 16 deep read/write request queues: */
95 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
96 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
97 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
98 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
99 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
100 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
101 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
102 		/* Enable WR-REQ: */
103 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
104 		/* Set up round robin arbitration between both AXI ports: */
105 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
106 		/* Set up AOOO: */
107 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
108 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
109 		/* Enable 1K sort: */
110 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
111 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
112 
113 	} else if (adreno_is_a330v2(adreno_gpu)) {
114 		/*
115 		 * Most of the VBIF registers on 8974v2 have the correct
116 		 * values at power on, so we won't modify those if we don't
117 		 * need to
118 		 */
119 		/* Enable 1k sort: */
120 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
121 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
122 		/* Enable WR-REQ: */
123 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
124 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
125 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
126 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
127 
128 	} else if (adreno_is_a330(adreno_gpu)) {
129 		/* Set up 16 deep read/write request queues: */
130 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
131 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
132 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
133 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
134 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
135 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
136 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
137 		/* Enable WR-REQ: */
138 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
139 		/* Set up round robin arbitration between both AXI ports: */
140 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
141 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
142 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
143 		/* Set up AOOO: */
144 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
145 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
146 		/* Enable 1K sort: */
147 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
148 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
149 		/* Disable VBIF clock gating. This is to enable AXI running
150 		 * higher frequency than GPU:
151 		 */
152 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
153 
154 	} else {
155 		BUG();
156 	}
157 
158 	/* Make all blocks contribute to the GPU BUSY perf counter: */
159 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
160 
161 	/* Tune the hystersis counters for SP and CP idle detection: */
162 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
163 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
164 
165 	/* Enable the RBBM error reporting bits.  This lets us get
166 	 * useful information on failure:
167 	 */
168 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
169 
170 	/* Enable AHB error reporting: */
171 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
172 
173 	/* Turn on the power counters: */
174 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
175 
176 	/* Turn on hang detection - this spews a lot of useful information
177 	 * into the RBBM registers on a hang:
178 	 */
179 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
180 
181 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
182 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
183 
184 	/* Enable Clock gating: */
185 	if (adreno_is_a320(adreno_gpu))
186 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
187 	else if (adreno_is_a330v2(adreno_gpu))
188 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
189 	else if (adreno_is_a330(adreno_gpu))
190 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
191 
192 	if (adreno_is_a330v2(adreno_gpu))
193 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
194 	else if (adreno_is_a330(adreno_gpu))
195 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
196 
197 	/* Set the OCMEM base address for A330, etc */
198 	if (a3xx_gpu->ocmem_hdl) {
199 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
200 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
201 	}
202 
203 	/* Turn on performance counters: */
204 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
205 
206 	/* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS
207 	 * we will use this to augment our hang detection:
208 	 */
209 	gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT,
210 			SP_FS_FULL_ALU_INSTRUCTIONS);
211 
212 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
213 
214 	ret = adreno_hw_init(gpu);
215 	if (ret)
216 		return ret;
217 
218 	/* setup access protection: */
219 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
220 
221 	/* RBBM registers */
222 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
223 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
224 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
225 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
226 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
227 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
228 
229 	/* CP registers */
230 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
231 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
232 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
233 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
235 
236 	/* RB registers */
237 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
238 
239 	/* VBIF registers */
240 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
241 
242 	/* NOTE: PM4/micro-engine firmware registers look to be the same
243 	 * for a2xx and a3xx.. we could possibly push that part down to
244 	 * adreno_gpu base class.  Or push both PM4 and PFP but
245 	 * parameterize the pfp ucode addr/data registers..
246 	 */
247 
248 	/* Load PM4: */
249 	ptr = (uint32_t *)(adreno_gpu->pm4->data);
250 	len = adreno_gpu->pm4->size / 4;
251 	DBG("loading PM4 ucode version: %x", ptr[1]);
252 
253 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
254 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
255 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
256 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
257 	for (i = 1; i < len; i++)
258 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
259 
260 	/* Load PFP: */
261 	ptr = (uint32_t *)(adreno_gpu->pfp->data);
262 	len = adreno_gpu->pfp->size / 4;
263 	DBG("loading PFP ucode version: %x", ptr[5]);
264 
265 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
266 	for (i = 1; i < len; i++)
267 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
268 
269 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
270 	if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) {
271 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
272 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
273 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
274 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
275 	} else if (adreno_is_a330(adreno_gpu)) {
276 		/* NOTE: this (value take from downstream android driver)
277 		 * includes some bits outside of the known bitfields.  But
278 		 * A330 has this "MERCIU queue" thing too, which might
279 		 * explain a new bitfield or reshuffling:
280 		 */
281 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
282 	}
283 
284 	/* clear ME_HALT to start micro engine */
285 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
286 
287 	a3xx_me_init(gpu);
288 
289 	return 0;
290 }
291 
292 static void a3xx_recover(struct msm_gpu *gpu)
293 {
294 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
295 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
296 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
297 	adreno_recover(gpu);
298 }
299 
300 static void a3xx_destroy(struct msm_gpu *gpu)
301 {
302 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
303 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
304 
305 	DBG("%s", gpu->name);
306 
307 	adreno_gpu_cleanup(adreno_gpu);
308 
309 #ifdef CONFIG_MSM_OCMEM
310 	if (a3xx_gpu->ocmem_base)
311 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
312 #endif
313 
314 	put_device(&a3xx_gpu->pdev->dev);
315 	kfree(a3xx_gpu);
316 }
317 
318 static void a3xx_idle(struct msm_gpu *gpu)
319 {
320 	unsigned long t;
321 
322 	/* wait for ringbuffer to drain: */
323 	adreno_idle(gpu);
324 
325 	t = jiffies + ADRENO_IDLE_TIMEOUT;
326 
327 	/* then wait for GPU to finish: */
328 	do {
329 		uint32_t rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
330 		if (!(rbbm_status & A3XX_RBBM_STATUS_GPU_BUSY))
331 			return;
332 	} while(time_before(jiffies, t));
333 
334 	DRM_ERROR("timeout waiting for %s to idle!\n", gpu->name);
335 
336 	/* TODO maybe we need to reset GPU here to recover from hang? */
337 }
338 
339 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
340 {
341 	uint32_t status;
342 
343 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
344 	DBG("%s: %08x", gpu->name, status);
345 
346 	// TODO
347 
348 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
349 
350 	msm_gpu_retire(gpu);
351 
352 	return IRQ_HANDLED;
353 }
354 
355 #ifdef CONFIG_DEBUG_FS
356 static const unsigned int a3xx_registers[] = {
357 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
358 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
359 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
360 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
361 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
362 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
363 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
364 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
365 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
366 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
367 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
368 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
369 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
370 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
371 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
372 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
373 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
374 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
375 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
376 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
377 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
378 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
379 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
380 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
381 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
382 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
383 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
384 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
385 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
386 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
387 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
388 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
389 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
390 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
391 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
392 	0x303c, 0x303c, 0x305e, 0x305f,
393 };
394 
395 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
396 {
397 	int i;
398 
399 	adreno_show(gpu, m);
400 	seq_printf(m, "status:   %08x\n",
401 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
402 
403 	/* dump these out in a form that can be parsed by demsm: */
404 	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
405 	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
406 		uint32_t start = a3xx_registers[i];
407 		uint32_t end   = a3xx_registers[i+1];
408 		uint32_t addr;
409 
410 		for (addr = start; addr <= end; addr++) {
411 			uint32_t val = gpu_read(gpu, addr);
412 			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
413 		}
414 	}
415 }
416 #endif
417 
418 static const struct adreno_gpu_funcs funcs = {
419 	.base = {
420 		.get_param = adreno_get_param,
421 		.hw_init = a3xx_hw_init,
422 		.pm_suspend = msm_gpu_pm_suspend,
423 		.pm_resume = msm_gpu_pm_resume,
424 		.recover = a3xx_recover,
425 		.last_fence = adreno_last_fence,
426 		.submit = adreno_submit,
427 		.flush = adreno_flush,
428 		.idle = a3xx_idle,
429 		.irq = a3xx_irq,
430 		.destroy = a3xx_destroy,
431 #ifdef CONFIG_DEBUG_FS
432 		.show = a3xx_show,
433 #endif
434 	},
435 };
436 
437 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
438 {
439 	struct a3xx_gpu *a3xx_gpu = NULL;
440 	struct adreno_gpu *adreno_gpu;
441 	struct msm_gpu *gpu;
442 	struct platform_device *pdev = a3xx_pdev;
443 	struct adreno_platform_config *config;
444 	int ret;
445 
446 	if (!pdev) {
447 		dev_err(dev->dev, "no a3xx device\n");
448 		ret = -ENXIO;
449 		goto fail;
450 	}
451 
452 	config = pdev->dev.platform_data;
453 
454 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
455 	if (!a3xx_gpu) {
456 		ret = -ENOMEM;
457 		goto fail;
458 	}
459 
460 	adreno_gpu = &a3xx_gpu->base;
461 	gpu = &adreno_gpu->base;
462 
463 	get_device(&pdev->dev);
464 	a3xx_gpu->pdev = pdev;
465 
466 	gpu->fast_rate = config->fast_rate;
467 	gpu->slow_rate = config->slow_rate;
468 	gpu->bus_freq  = config->bus_freq;
469 #ifdef CONFIG_MSM_BUS_SCALING
470 	gpu->bus_scale_table = config->bus_scale_table;
471 #endif
472 
473 	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
474 			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
475 
476 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev);
477 	if (ret)
478 		goto fail;
479 
480 	/* if needed, allocate gmem: */
481 	if (adreno_is_a330(adreno_gpu)) {
482 #ifdef CONFIG_MSM_OCMEM
483 		/* TODO this is different/missing upstream: */
484 		struct ocmem_buf *ocmem_hdl =
485 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
486 
487 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
488 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
489 		adreno_gpu->gmem = ocmem_hdl->len;
490 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
491 				a3xx_gpu->ocmem_base);
492 #endif
493 	}
494 
495 	if (!gpu->mmu) {
496 		/* TODO we think it is possible to configure the GPU to
497 		 * restrict access to VRAM carveout.  But the required
498 		 * registers are unknown.  For now just bail out and
499 		 * limp along with just modesetting.  If it turns out
500 		 * to not be possible to restrict access, then we must
501 		 * implement a cmdstream validator.
502 		 */
503 		dev_err(dev->dev, "No memory protection without IOMMU\n");
504 		ret = -ENXIO;
505 		goto fail;
506 	}
507 
508 	return gpu;
509 
510 fail:
511 	if (a3xx_gpu)
512 		a3xx_destroy(&a3xx_gpu->base.base);
513 
514 	return ERR_PTR(ret);
515 }
516 
517 /*
518  * The a3xx device:
519  */
520 
521 #if defined(CONFIG_MSM_BUS_SCALING) && !defined(CONFIG_OF)
522 #  include <mach/kgsl.h>
523 #endif
524 
525 static int a3xx_probe(struct platform_device *pdev)
526 {
527 	static struct adreno_platform_config config = {};
528 #ifdef CONFIG_OF
529 	struct device_node *child, *node = pdev->dev.of_node;
530 	u32 val;
531 	int ret;
532 
533 	ret = of_property_read_u32(node, "qcom,chipid", &val);
534 	if (ret) {
535 		dev_err(&pdev->dev, "could not find chipid: %d\n", ret);
536 		return ret;
537 	}
538 
539 	config.rev = ADRENO_REV((val >> 24) & 0xff,
540 			(val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
541 
542 	/* find clock rates: */
543 	config.fast_rate = 0;
544 	config.slow_rate = ~0;
545 	for_each_child_of_node(node, child) {
546 		if (of_device_is_compatible(child, "qcom,gpu-pwrlevels")) {
547 			struct device_node *pwrlvl;
548 			for_each_child_of_node(child, pwrlvl) {
549 				ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val);
550 				if (ret) {
551 					dev_err(&pdev->dev, "could not find gpu-freq: %d\n", ret);
552 					return ret;
553 				}
554 				config.fast_rate = max(config.fast_rate, val);
555 				config.slow_rate = min(config.slow_rate, val);
556 			}
557 		}
558 	}
559 
560 	if (!config.fast_rate) {
561 		dev_err(&pdev->dev, "could not find clk rates\n");
562 		return -ENXIO;
563 	}
564 
565 #else
566 	struct kgsl_device_platform_data *pdata = pdev->dev.platform_data;
567 	uint32_t version = socinfo_get_version();
568 	if (cpu_is_apq8064ab()) {
569 		config.fast_rate = 450000000;
570 		config.slow_rate = 27000000;
571 		config.bus_freq  = 4;
572 		config.rev = ADRENO_REV(3, 2, 1, 0);
573 	} else if (cpu_is_apq8064()) {
574 		config.fast_rate = 400000000;
575 		config.slow_rate = 27000000;
576 		config.bus_freq  = 4;
577 
578 		if (SOCINFO_VERSION_MAJOR(version) == 2)
579 			config.rev = ADRENO_REV(3, 2, 0, 2);
580 		else if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
581 				(SOCINFO_VERSION_MINOR(version) == 1))
582 			config.rev = ADRENO_REV(3, 2, 0, 1);
583 		else
584 			config.rev = ADRENO_REV(3, 2, 0, 0);
585 
586 	} else if (cpu_is_msm8960ab()) {
587 		config.fast_rate = 400000000;
588 		config.slow_rate = 320000000;
589 		config.bus_freq  = 4;
590 
591 		if (SOCINFO_VERSION_MINOR(version) == 0)
592 			config.rev = ADRENO_REV(3, 2, 1, 0);
593 		else
594 			config.rev = ADRENO_REV(3, 2, 1, 1);
595 
596 	} else if (cpu_is_msm8930()) {
597 		config.fast_rate = 400000000;
598 		config.slow_rate = 27000000;
599 		config.bus_freq  = 3;
600 
601 		if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
602 			(SOCINFO_VERSION_MINOR(version) == 2))
603 			config.rev = ADRENO_REV(3, 0, 5, 2);
604 		else
605 			config.rev = ADRENO_REV(3, 0, 5, 0);
606 
607 	}
608 #  ifdef CONFIG_MSM_BUS_SCALING
609 	config.bus_scale_table = pdata->bus_scale_table;
610 #  endif
611 #endif
612 	pdev->dev.platform_data = &config;
613 	a3xx_pdev = pdev;
614 	return 0;
615 }
616 
617 static int a3xx_remove(struct platform_device *pdev)
618 {
619 	a3xx_pdev = NULL;
620 	return 0;
621 }
622 
623 static const struct of_device_id dt_match[] = {
624 	{ .compatible = "qcom,kgsl-3d0" },
625 	{}
626 };
627 MODULE_DEVICE_TABLE(of, dt_match);
628 
629 static struct platform_driver a3xx_driver = {
630 	.probe = a3xx_probe,
631 	.remove = a3xx_remove,
632 	.driver = {
633 		.name = "kgsl-3d0",
634 		.of_match_table = dt_match,
635 	},
636 };
637 
638 void __init a3xx_register(void)
639 {
640 	platform_driver_register(&a3xx_driver);
641 }
642 
643 void __exit a3xx_unregister(void)
644 {
645 	platform_driver_unregister(&a3xx_driver);
646 }
647