xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision b34e08d5)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifdef CONFIG_MSM_OCMEM
19 #  include <mach/ocmem.h>
20 #endif
21 
22 #include "a3xx_gpu.h"
23 
24 #define A3XX_INT0_MASK \
25 	(A3XX_INT0_RBBM_AHB_ERROR |        \
26 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
27 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
28 	 A3XX_INT0_CP_OPCODE_ERROR |       \
29 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
30 	 A3XX_INT0_CP_HW_FAULT |           \
31 	 A3XX_INT0_CP_IB1_INT |            \
32 	 A3XX_INT0_CP_IB2_INT |            \
33 	 A3XX_INT0_CP_RB_INT |             \
34 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
35 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
36 	 A3XX_INT0_UCHE_OOB_ACCESS)
37 
38 
39 static bool hang_debug = false;
40 MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can be slow!)");
41 module_param_named(hang_debug, hang_debug, bool, 0600);
42 static void a3xx_dump(struct msm_gpu *gpu);
43 
44 static void a3xx_me_init(struct msm_gpu *gpu)
45 {
46 	struct msm_ringbuffer *ring = gpu->rb;
47 
48 	OUT_PKT3(ring, CP_ME_INIT, 17);
49 	OUT_RING(ring, 0x000003f7);
50 	OUT_RING(ring, 0x00000000);
51 	OUT_RING(ring, 0x00000000);
52 	OUT_RING(ring, 0x00000000);
53 	OUT_RING(ring, 0x00000080);
54 	OUT_RING(ring, 0x00000100);
55 	OUT_RING(ring, 0x00000180);
56 	OUT_RING(ring, 0x00006600);
57 	OUT_RING(ring, 0x00000150);
58 	OUT_RING(ring, 0x0000014e);
59 	OUT_RING(ring, 0x00000154);
60 	OUT_RING(ring, 0x00000001);
61 	OUT_RING(ring, 0x00000000);
62 	OUT_RING(ring, 0x00000000);
63 	OUT_RING(ring, 0x00000000);
64 	OUT_RING(ring, 0x00000000);
65 	OUT_RING(ring, 0x00000000);
66 
67 	gpu->funcs->flush(gpu);
68 	gpu->funcs->idle(gpu);
69 }
70 
71 static int a3xx_hw_init(struct msm_gpu *gpu)
72 {
73 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
74 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
75 	uint32_t *ptr, len;
76 	int i, ret;
77 
78 	DBG("%s", gpu->name);
79 
80 	if (adreno_is_a305(adreno_gpu)) {
81 		/* Set up 16 deep read/write request queues: */
82 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
83 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
84 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
85 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
86 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
87 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
88 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
89 		/* Enable WR-REQ: */
90 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
91 		/* Set up round robin arbitration between both AXI ports: */
92 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
93 		/* Set up AOOO: */
94 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
95 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
96 
97 	} else if (adreno_is_a320(adreno_gpu)) {
98 		/* Set up 16 deep read/write request queues: */
99 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
100 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
101 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
102 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
103 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
104 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
105 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
106 		/* Enable WR-REQ: */
107 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
108 		/* Set up round robin arbitration between both AXI ports: */
109 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
110 		/* Set up AOOO: */
111 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
112 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
113 		/* Enable 1K sort: */
114 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
115 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
116 
117 	} else if (adreno_is_a330v2(adreno_gpu)) {
118 		/*
119 		 * Most of the VBIF registers on 8974v2 have the correct
120 		 * values at power on, so we won't modify those if we don't
121 		 * need to
122 		 */
123 		/* Enable 1k sort: */
124 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
125 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
126 		/* Enable WR-REQ: */
127 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
128 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
129 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
130 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
131 
132 	} else if (adreno_is_a330(adreno_gpu)) {
133 		/* Set up 16 deep read/write request queues: */
134 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
135 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
136 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
137 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
138 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
139 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
140 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
141 		/* Enable WR-REQ: */
142 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
143 		/* Set up round robin arbitration between both AXI ports: */
144 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
145 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
146 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
147 		/* Set up AOOO: */
148 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
149 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
150 		/* Enable 1K sort: */
151 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
152 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
153 		/* Disable VBIF clock gating. This is to enable AXI running
154 		 * higher frequency than GPU:
155 		 */
156 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
157 
158 	} else {
159 		BUG();
160 	}
161 
162 	/* Make all blocks contribute to the GPU BUSY perf counter: */
163 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
164 
165 	/* Tune the hystersis counters for SP and CP idle detection: */
166 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
167 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
168 
169 	/* Enable the RBBM error reporting bits.  This lets us get
170 	 * useful information on failure:
171 	 */
172 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
173 
174 	/* Enable AHB error reporting: */
175 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
176 
177 	/* Turn on the power counters: */
178 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
179 
180 	/* Turn on hang detection - this spews a lot of useful information
181 	 * into the RBBM registers on a hang:
182 	 */
183 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
184 
185 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
186 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
187 
188 	/* Enable Clock gating: */
189 	if (adreno_is_a320(adreno_gpu))
190 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
191 	else if (adreno_is_a330v2(adreno_gpu))
192 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
193 	else if (adreno_is_a330(adreno_gpu))
194 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
195 
196 	if (adreno_is_a330v2(adreno_gpu))
197 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
198 	else if (adreno_is_a330(adreno_gpu))
199 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
200 
201 	/* Set the OCMEM base address for A330, etc */
202 	if (a3xx_gpu->ocmem_hdl) {
203 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
204 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
205 	}
206 
207 	/* Turn on performance counters: */
208 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
209 
210 	/* Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS
211 	 * we will use this to augment our hang detection:
212 	 */
213 	gpu_write(gpu, REG_A3XX_SP_PERFCOUNTER7_SELECT,
214 			SP_FS_FULL_ALU_INSTRUCTIONS);
215 
216 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
217 
218 	ret = adreno_hw_init(gpu);
219 	if (ret)
220 		return ret;
221 
222 	/* setup access protection: */
223 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
224 
225 	/* RBBM registers */
226 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
227 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
228 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
229 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
230 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
231 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
232 
233 	/* CP registers */
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
235 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
237 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
238 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
239 
240 	/* RB registers */
241 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
242 
243 	/* VBIF registers */
244 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
245 
246 	/* NOTE: PM4/micro-engine firmware registers look to be the same
247 	 * for a2xx and a3xx.. we could possibly push that part down to
248 	 * adreno_gpu base class.  Or push both PM4 and PFP but
249 	 * parameterize the pfp ucode addr/data registers..
250 	 */
251 
252 	/* Load PM4: */
253 	ptr = (uint32_t *)(adreno_gpu->pm4->data);
254 	len = adreno_gpu->pm4->size / 4;
255 	DBG("loading PM4 ucode version: %x", ptr[1]);
256 
257 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
258 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
259 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
260 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
261 	for (i = 1; i < len; i++)
262 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
263 
264 	/* Load PFP: */
265 	ptr = (uint32_t *)(adreno_gpu->pfp->data);
266 	len = adreno_gpu->pfp->size / 4;
267 	DBG("loading PFP ucode version: %x", ptr[5]);
268 
269 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
270 	for (i = 1; i < len; i++)
271 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
272 
273 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
274 	if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) {
275 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
276 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
277 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
278 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
279 	} else if (adreno_is_a330(adreno_gpu)) {
280 		/* NOTE: this (value take from downstream android driver)
281 		 * includes some bits outside of the known bitfields.  But
282 		 * A330 has this "MERCIU queue" thing too, which might
283 		 * explain a new bitfield or reshuffling:
284 		 */
285 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
286 	}
287 
288 	/* clear ME_HALT to start micro engine */
289 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
290 
291 	a3xx_me_init(gpu);
292 
293 	return 0;
294 }
295 
296 static void a3xx_recover(struct msm_gpu *gpu)
297 {
298 	/* dump registers before resetting gpu, if enabled: */
299 	if (hang_debug)
300 		a3xx_dump(gpu);
301 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
302 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
303 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
304 	adreno_recover(gpu);
305 }
306 
307 static void a3xx_destroy(struct msm_gpu *gpu)
308 {
309 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
310 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
311 
312 	DBG("%s", gpu->name);
313 
314 	adreno_gpu_cleanup(adreno_gpu);
315 
316 #ifdef CONFIG_MSM_OCMEM
317 	if (a3xx_gpu->ocmem_base)
318 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
319 #endif
320 
321 	kfree(a3xx_gpu);
322 }
323 
324 static void a3xx_idle(struct msm_gpu *gpu)
325 {
326 	/* wait for ringbuffer to drain: */
327 	adreno_idle(gpu);
328 
329 	/* then wait for GPU to finish: */
330 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
331 			A3XX_RBBM_STATUS_GPU_BUSY)))
332 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
333 
334 	/* TODO maybe we need to reset GPU here to recover from hang? */
335 }
336 
337 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
338 {
339 	uint32_t status;
340 
341 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
342 	DBG("%s: %08x", gpu->name, status);
343 
344 	// TODO
345 
346 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
347 
348 	msm_gpu_retire(gpu);
349 
350 	return IRQ_HANDLED;
351 }
352 
353 static const unsigned int a3xx_registers[] = {
354 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
355 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
356 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
357 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
358 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
359 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
360 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
361 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
362 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
363 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
364 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
365 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
366 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
367 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
368 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
369 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
370 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
371 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
372 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
373 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
374 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
375 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
376 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
377 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
378 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
379 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
380 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
381 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
382 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
383 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
384 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
385 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
386 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
387 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
388 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
389 	0x303c, 0x303c, 0x305e, 0x305f,
390 };
391 
392 #ifdef CONFIG_DEBUG_FS
393 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
394 {
395 	struct drm_device *dev = gpu->dev;
396 	int i;
397 
398 	adreno_show(gpu, m);
399 
400 	mutex_lock(&dev->struct_mutex);
401 
402 	gpu->funcs->pm_resume(gpu);
403 
404 	seq_printf(m, "status:   %08x\n",
405 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
406 
407 	/* dump these out in a form that can be parsed by demsm: */
408 	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
409 	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
410 		uint32_t start = a3xx_registers[i];
411 		uint32_t end   = a3xx_registers[i+1];
412 		uint32_t addr;
413 
414 		for (addr = start; addr <= end; addr++) {
415 			uint32_t val = gpu_read(gpu, addr);
416 			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
417 		}
418 	}
419 
420 	gpu->funcs->pm_suspend(gpu);
421 
422 	mutex_unlock(&dev->struct_mutex);
423 }
424 #endif
425 
426 /* would be nice to not have to duplicate the _show() stuff with printk(): */
427 static void a3xx_dump(struct msm_gpu *gpu)
428 {
429 	int i;
430 
431 	adreno_dump(gpu);
432 	printk("status:   %08x\n",
433 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
434 
435 	/* dump these out in a form that can be parsed by demsm: */
436 	printk("IO:region %s 00000000 00020000\n", gpu->name);
437 	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
438 		uint32_t start = a3xx_registers[i];
439 		uint32_t end   = a3xx_registers[i+1];
440 		uint32_t addr;
441 
442 		for (addr = start; addr <= end; addr++) {
443 			uint32_t val = gpu_read(gpu, addr);
444 			printk("IO:R %08x %08x\n", addr<<2, val);
445 		}
446 	}
447 }
448 
449 static const struct adreno_gpu_funcs funcs = {
450 	.base = {
451 		.get_param = adreno_get_param,
452 		.hw_init = a3xx_hw_init,
453 		.pm_suspend = msm_gpu_pm_suspend,
454 		.pm_resume = msm_gpu_pm_resume,
455 		.recover = a3xx_recover,
456 		.last_fence = adreno_last_fence,
457 		.submit = adreno_submit,
458 		.flush = adreno_flush,
459 		.idle = a3xx_idle,
460 		.irq = a3xx_irq,
461 		.destroy = a3xx_destroy,
462 #ifdef CONFIG_DEBUG_FS
463 		.show = a3xx_show,
464 #endif
465 	},
466 };
467 
468 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
469 {
470 	struct a3xx_gpu *a3xx_gpu = NULL;
471 	struct adreno_gpu *adreno_gpu;
472 	struct msm_gpu *gpu;
473 	struct msm_drm_private *priv = dev->dev_private;
474 	struct platform_device *pdev = priv->gpu_pdev;
475 	struct adreno_platform_config *config;
476 	int ret;
477 
478 	if (!pdev) {
479 		dev_err(dev->dev, "no a3xx device\n");
480 		ret = -ENXIO;
481 		goto fail;
482 	}
483 
484 	config = pdev->dev.platform_data;
485 
486 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
487 	if (!a3xx_gpu) {
488 		ret = -ENOMEM;
489 		goto fail;
490 	}
491 
492 	adreno_gpu = &a3xx_gpu->base;
493 	gpu = &adreno_gpu->base;
494 
495 	a3xx_gpu->pdev = pdev;
496 
497 	gpu->fast_rate = config->fast_rate;
498 	gpu->slow_rate = config->slow_rate;
499 	gpu->bus_freq  = config->bus_freq;
500 #ifdef CONFIG_MSM_BUS_SCALING
501 	gpu->bus_scale_table = config->bus_scale_table;
502 #endif
503 
504 	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
505 			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
506 
507 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev);
508 	if (ret)
509 		goto fail;
510 
511 	/* if needed, allocate gmem: */
512 	if (adreno_is_a330(adreno_gpu)) {
513 #ifdef CONFIG_MSM_OCMEM
514 		/* TODO this is different/missing upstream: */
515 		struct ocmem_buf *ocmem_hdl =
516 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
517 
518 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
519 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
520 		adreno_gpu->gmem = ocmem_hdl->len;
521 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
522 				a3xx_gpu->ocmem_base);
523 #endif
524 	}
525 
526 	if (!gpu->mmu) {
527 		/* TODO we think it is possible to configure the GPU to
528 		 * restrict access to VRAM carveout.  But the required
529 		 * registers are unknown.  For now just bail out and
530 		 * limp along with just modesetting.  If it turns out
531 		 * to not be possible to restrict access, then we must
532 		 * implement a cmdstream validator.
533 		 */
534 		dev_err(dev->dev, "No memory protection without IOMMU\n");
535 		ret = -ENXIO;
536 		goto fail;
537 	}
538 
539 	return gpu;
540 
541 fail:
542 	if (a3xx_gpu)
543 		a3xx_destroy(&a3xx_gpu->base.base);
544 
545 	return ERR_PTR(ret);
546 }
547 
548 /*
549  * The a3xx device:
550  */
551 
552 #if defined(CONFIG_MSM_BUS_SCALING) && !defined(CONFIG_OF)
553 #  include <mach/kgsl.h>
554 #endif
555 
556 static void set_gpu_pdev(struct drm_device *dev,
557 		struct platform_device *pdev)
558 {
559 	struct msm_drm_private *priv = dev->dev_private;
560 	priv->gpu_pdev = pdev;
561 }
562 
563 static int a3xx_bind(struct device *dev, struct device *master, void *data)
564 {
565 	static struct adreno_platform_config config = {};
566 #ifdef CONFIG_OF
567 	struct device_node *child, *node = dev->of_node;
568 	u32 val;
569 	int ret;
570 
571 	ret = of_property_read_u32(node, "qcom,chipid", &val);
572 	if (ret) {
573 		dev_err(dev, "could not find chipid: %d\n", ret);
574 		return ret;
575 	}
576 
577 	config.rev = ADRENO_REV((val >> 24) & 0xff,
578 			(val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
579 
580 	/* find clock rates: */
581 	config.fast_rate = 0;
582 	config.slow_rate = ~0;
583 	for_each_child_of_node(node, child) {
584 		if (of_device_is_compatible(child, "qcom,gpu-pwrlevels")) {
585 			struct device_node *pwrlvl;
586 			for_each_child_of_node(child, pwrlvl) {
587 				ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val);
588 				if (ret) {
589 					dev_err(dev, "could not find gpu-freq: %d\n", ret);
590 					return ret;
591 				}
592 				config.fast_rate = max(config.fast_rate, val);
593 				config.slow_rate = min(config.slow_rate, val);
594 			}
595 		}
596 	}
597 
598 	if (!config.fast_rate) {
599 		dev_err(dev, "could not find clk rates\n");
600 		return -ENXIO;
601 	}
602 
603 #else
604 	struct kgsl_device_platform_data *pdata = dev->platform_data;
605 	uint32_t version = socinfo_get_version();
606 	if (cpu_is_apq8064ab()) {
607 		config.fast_rate = 450000000;
608 		config.slow_rate = 27000000;
609 		config.bus_freq  = 4;
610 		config.rev = ADRENO_REV(3, 2, 1, 0);
611 	} else if (cpu_is_apq8064()) {
612 		config.fast_rate = 400000000;
613 		config.slow_rate = 27000000;
614 		config.bus_freq  = 4;
615 
616 		if (SOCINFO_VERSION_MAJOR(version) == 2)
617 			config.rev = ADRENO_REV(3, 2, 0, 2);
618 		else if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
619 				(SOCINFO_VERSION_MINOR(version) == 1))
620 			config.rev = ADRENO_REV(3, 2, 0, 1);
621 		else
622 			config.rev = ADRENO_REV(3, 2, 0, 0);
623 
624 	} else if (cpu_is_msm8960ab()) {
625 		config.fast_rate = 400000000;
626 		config.slow_rate = 320000000;
627 		config.bus_freq  = 4;
628 
629 		if (SOCINFO_VERSION_MINOR(version) == 0)
630 			config.rev = ADRENO_REV(3, 2, 1, 0);
631 		else
632 			config.rev = ADRENO_REV(3, 2, 1, 1);
633 
634 	} else if (cpu_is_msm8930()) {
635 		config.fast_rate = 400000000;
636 		config.slow_rate = 27000000;
637 		config.bus_freq  = 3;
638 
639 		if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
640 			(SOCINFO_VERSION_MINOR(version) == 2))
641 			config.rev = ADRENO_REV(3, 0, 5, 2);
642 		else
643 			config.rev = ADRENO_REV(3, 0, 5, 0);
644 
645 	}
646 #  ifdef CONFIG_MSM_BUS_SCALING
647 	config.bus_scale_table = pdata->bus_scale_table;
648 #  endif
649 #endif
650 	dev->platform_data = &config;
651 	set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
652 	return 0;
653 }
654 
655 static void a3xx_unbind(struct device *dev, struct device *master,
656 		void *data)
657 {
658 	set_gpu_pdev(dev_get_drvdata(master), NULL);
659 }
660 
661 static const struct component_ops a3xx_ops = {
662 		.bind   = a3xx_bind,
663 		.unbind = a3xx_unbind,
664 };
665 
666 static int a3xx_probe(struct platform_device *pdev)
667 {
668 	return component_add(&pdev->dev, &a3xx_ops);
669 }
670 
671 static int a3xx_remove(struct platform_device *pdev)
672 {
673 	component_del(&pdev->dev, &a3xx_ops);
674 	return 0;
675 }
676 
677 static const struct of_device_id dt_match[] = {
678 	{ .compatible = "qcom,kgsl-3d0" },
679 	{}
680 };
681 
682 static struct platform_driver a3xx_driver = {
683 	.probe = a3xx_probe,
684 	.remove = a3xx_remove,
685 	.driver = {
686 		.name = "kgsl-3d0",
687 		.of_match_table = dt_match,
688 	},
689 };
690 
691 void __init a3xx_register(void)
692 {
693 	platform_driver_register(&a3xx_driver);
694 }
695 
696 void __exit a3xx_unregister(void)
697 {
698 	platform_driver_unregister(&a3xx_driver);
699 }
700