xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 7051924f771722c6dd235e693742cda6488ac700)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifdef CONFIG_MSM_OCMEM
19 #  include <mach/ocmem.h>
20 #endif
21 
22 #include "a3xx_gpu.h"
23 
24 #define A3XX_INT0_MASK \
25 	(A3XX_INT0_RBBM_AHB_ERROR |        \
26 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
27 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
28 	 A3XX_INT0_CP_OPCODE_ERROR |       \
29 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
30 	 A3XX_INT0_CP_HW_FAULT |           \
31 	 A3XX_INT0_CP_IB1_INT |            \
32 	 A3XX_INT0_CP_IB2_INT |            \
33 	 A3XX_INT0_CP_RB_INT |             \
34 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
35 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
36 	 A3XX_INT0_UCHE_OOB_ACCESS)
37 
38 
39 static bool hang_debug = false;
40 MODULE_PARM_DESC(hang_debug, "Dump registers when hang is detected (can be slow!)");
41 module_param_named(hang_debug, hang_debug, bool, 0600);
42 static void a3xx_dump(struct msm_gpu *gpu);
43 
44 static void a3xx_me_init(struct msm_gpu *gpu)
45 {
46 	struct msm_ringbuffer *ring = gpu->rb;
47 
48 	OUT_PKT3(ring, CP_ME_INIT, 17);
49 	OUT_RING(ring, 0x000003f7);
50 	OUT_RING(ring, 0x00000000);
51 	OUT_RING(ring, 0x00000000);
52 	OUT_RING(ring, 0x00000000);
53 	OUT_RING(ring, 0x00000080);
54 	OUT_RING(ring, 0x00000100);
55 	OUT_RING(ring, 0x00000180);
56 	OUT_RING(ring, 0x00006600);
57 	OUT_RING(ring, 0x00000150);
58 	OUT_RING(ring, 0x0000014e);
59 	OUT_RING(ring, 0x00000154);
60 	OUT_RING(ring, 0x00000001);
61 	OUT_RING(ring, 0x00000000);
62 	OUT_RING(ring, 0x00000000);
63 	OUT_RING(ring, 0x00000000);
64 	OUT_RING(ring, 0x00000000);
65 	OUT_RING(ring, 0x00000000);
66 
67 	gpu->funcs->flush(gpu);
68 	gpu->funcs->idle(gpu);
69 }
70 
71 static int a3xx_hw_init(struct msm_gpu *gpu)
72 {
73 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
74 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
75 	uint32_t *ptr, len;
76 	int i, ret;
77 
78 	DBG("%s", gpu->name);
79 
80 	if (adreno_is_a305(adreno_gpu)) {
81 		/* Set up 16 deep read/write request queues: */
82 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
83 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
84 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
85 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
86 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
87 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
88 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
89 		/* Enable WR-REQ: */
90 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
91 		/* Set up round robin arbitration between both AXI ports: */
92 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
93 		/* Set up AOOO: */
94 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
95 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
96 
97 	} else if (adreno_is_a320(adreno_gpu)) {
98 		/* Set up 16 deep read/write request queues: */
99 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
100 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
101 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
102 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
103 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
104 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
105 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
106 		/* Enable WR-REQ: */
107 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
108 		/* Set up round robin arbitration between both AXI ports: */
109 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
110 		/* Set up AOOO: */
111 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
112 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
113 		/* Enable 1K sort: */
114 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
115 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
116 
117 	} else if (adreno_is_a330v2(adreno_gpu)) {
118 		/*
119 		 * Most of the VBIF registers on 8974v2 have the correct
120 		 * values at power on, so we won't modify those if we don't
121 		 * need to
122 		 */
123 		/* Enable 1k sort: */
124 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
125 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
126 		/* Enable WR-REQ: */
127 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
128 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
129 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
130 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
131 
132 	} else if (adreno_is_a330(adreno_gpu)) {
133 		/* Set up 16 deep read/write request queues: */
134 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
135 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
136 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
137 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
138 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
139 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
140 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
141 		/* Enable WR-REQ: */
142 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
143 		/* Set up round robin arbitration between both AXI ports: */
144 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
145 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
146 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
147 		/* Set up AOOO: */
148 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
149 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
150 		/* Enable 1K sort: */
151 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
152 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
153 		/* Disable VBIF clock gating. This is to enable AXI running
154 		 * higher frequency than GPU:
155 		 */
156 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
157 
158 	} else {
159 		BUG();
160 	}
161 
162 	/* Make all blocks contribute to the GPU BUSY perf counter: */
163 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
164 
165 	/* Tune the hystersis counters for SP and CP idle detection: */
166 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
167 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
168 
169 	/* Enable the RBBM error reporting bits.  This lets us get
170 	 * useful information on failure:
171 	 */
172 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
173 
174 	/* Enable AHB error reporting: */
175 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
176 
177 	/* Turn on the power counters: */
178 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
179 
180 	/* Turn on hang detection - this spews a lot of useful information
181 	 * into the RBBM registers on a hang:
182 	 */
183 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
184 
185 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
186 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
187 
188 	/* Enable Clock gating: */
189 	if (adreno_is_a320(adreno_gpu))
190 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
191 	else if (adreno_is_a330v2(adreno_gpu))
192 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
193 	else if (adreno_is_a330(adreno_gpu))
194 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
195 
196 	if (adreno_is_a330v2(adreno_gpu))
197 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
198 	else if (adreno_is_a330(adreno_gpu))
199 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
200 
201 	/* Set the OCMEM base address for A330, etc */
202 	if (a3xx_gpu->ocmem_hdl) {
203 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
204 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
205 	}
206 
207 	/* Turn on performance counters: */
208 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
209 
210 	/* Enable the perfcntrs that we use.. */
211 	for (i = 0; i < gpu->num_perfcntrs; i++) {
212 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
213 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
214 	}
215 
216 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
217 
218 	ret = adreno_hw_init(gpu);
219 	if (ret)
220 		return ret;
221 
222 	/* setup access protection: */
223 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
224 
225 	/* RBBM registers */
226 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
227 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
228 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
229 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
230 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
231 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
232 
233 	/* CP registers */
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
235 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
237 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
238 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
239 
240 	/* RB registers */
241 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
242 
243 	/* VBIF registers */
244 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
245 
246 	/* NOTE: PM4/micro-engine firmware registers look to be the same
247 	 * for a2xx and a3xx.. we could possibly push that part down to
248 	 * adreno_gpu base class.  Or push both PM4 and PFP but
249 	 * parameterize the pfp ucode addr/data registers..
250 	 */
251 
252 	/* Load PM4: */
253 	ptr = (uint32_t *)(adreno_gpu->pm4->data);
254 	len = adreno_gpu->pm4->size / 4;
255 	DBG("loading PM4 ucode version: %x", ptr[1]);
256 
257 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
258 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
259 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
260 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
261 	for (i = 1; i < len; i++)
262 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
263 
264 	/* Load PFP: */
265 	ptr = (uint32_t *)(adreno_gpu->pfp->data);
266 	len = adreno_gpu->pfp->size / 4;
267 	DBG("loading PFP ucode version: %x", ptr[5]);
268 
269 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
270 	for (i = 1; i < len; i++)
271 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
272 
273 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
274 	if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) {
275 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
276 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
277 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
278 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
279 	} else if (adreno_is_a330(adreno_gpu)) {
280 		/* NOTE: this (value take from downstream android driver)
281 		 * includes some bits outside of the known bitfields.  But
282 		 * A330 has this "MERCIU queue" thing too, which might
283 		 * explain a new bitfield or reshuffling:
284 		 */
285 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
286 	}
287 
288 	/* clear ME_HALT to start micro engine */
289 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
290 
291 	a3xx_me_init(gpu);
292 
293 	return 0;
294 }
295 
296 static void a3xx_recover(struct msm_gpu *gpu)
297 {
298 	/* dump registers before resetting gpu, if enabled: */
299 	if (hang_debug)
300 		a3xx_dump(gpu);
301 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
302 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
303 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
304 	adreno_recover(gpu);
305 }
306 
307 static void a3xx_destroy(struct msm_gpu *gpu)
308 {
309 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
310 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
311 
312 	DBG("%s", gpu->name);
313 
314 	adreno_gpu_cleanup(adreno_gpu);
315 
316 #ifdef CONFIG_MSM_OCMEM
317 	if (a3xx_gpu->ocmem_base)
318 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
319 #endif
320 
321 	kfree(a3xx_gpu);
322 }
323 
324 static void a3xx_idle(struct msm_gpu *gpu)
325 {
326 	/* wait for ringbuffer to drain: */
327 	adreno_idle(gpu);
328 
329 	/* then wait for GPU to finish: */
330 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
331 			A3XX_RBBM_STATUS_GPU_BUSY)))
332 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
333 
334 	/* TODO maybe we need to reset GPU here to recover from hang? */
335 }
336 
337 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
338 {
339 	uint32_t status;
340 
341 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
342 	DBG("%s: %08x", gpu->name, status);
343 
344 	// TODO
345 
346 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
347 
348 	msm_gpu_retire(gpu);
349 
350 	return IRQ_HANDLED;
351 }
352 
353 static const unsigned int a3xx_registers[] = {
354 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
355 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
356 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
357 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
358 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
359 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
360 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
361 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
362 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
363 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
364 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
365 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
366 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
367 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
368 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
369 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
370 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
371 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
372 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
373 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
374 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
375 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
376 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
377 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
378 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
379 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
380 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
381 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
382 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
383 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
384 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
385 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
386 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
387 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
388 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
389 	0x303c, 0x303c, 0x305e, 0x305f,
390 };
391 
392 #ifdef CONFIG_DEBUG_FS
393 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
394 {
395 	int i;
396 
397 	adreno_show(gpu, m);
398 
399 	gpu->funcs->pm_resume(gpu);
400 
401 	seq_printf(m, "status:   %08x\n",
402 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
403 
404 	/* dump these out in a form that can be parsed by demsm: */
405 	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
406 	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
407 		uint32_t start = a3xx_registers[i];
408 		uint32_t end   = a3xx_registers[i+1];
409 		uint32_t addr;
410 
411 		for (addr = start; addr <= end; addr++) {
412 			uint32_t val = gpu_read(gpu, addr);
413 			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
414 		}
415 	}
416 
417 	gpu->funcs->pm_suspend(gpu);
418 }
419 #endif
420 
421 /* would be nice to not have to duplicate the _show() stuff with printk(): */
422 static void a3xx_dump(struct msm_gpu *gpu)
423 {
424 	int i;
425 
426 	adreno_dump(gpu);
427 	printk("status:   %08x\n",
428 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
429 
430 	/* dump these out in a form that can be parsed by demsm: */
431 	printk("IO:region %s 00000000 00020000\n", gpu->name);
432 	for (i = 0; i < ARRAY_SIZE(a3xx_registers); i += 2) {
433 		uint32_t start = a3xx_registers[i];
434 		uint32_t end   = a3xx_registers[i+1];
435 		uint32_t addr;
436 
437 		for (addr = start; addr <= end; addr++) {
438 			uint32_t val = gpu_read(gpu, addr);
439 			printk("IO:R %08x %08x\n", addr<<2, val);
440 		}
441 	}
442 }
443 
444 static const struct adreno_gpu_funcs funcs = {
445 	.base = {
446 		.get_param = adreno_get_param,
447 		.hw_init = a3xx_hw_init,
448 		.pm_suspend = msm_gpu_pm_suspend,
449 		.pm_resume = msm_gpu_pm_resume,
450 		.recover = a3xx_recover,
451 		.last_fence = adreno_last_fence,
452 		.submit = adreno_submit,
453 		.flush = adreno_flush,
454 		.idle = a3xx_idle,
455 		.irq = a3xx_irq,
456 		.destroy = a3xx_destroy,
457 #ifdef CONFIG_DEBUG_FS
458 		.show = a3xx_show,
459 #endif
460 	},
461 };
462 
463 static const struct msm_gpu_perfcntr perfcntrs[] = {
464 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
465 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
466 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
467 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
468 };
469 
470 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
471 {
472 	struct a3xx_gpu *a3xx_gpu = NULL;
473 	struct adreno_gpu *adreno_gpu;
474 	struct msm_gpu *gpu;
475 	struct msm_drm_private *priv = dev->dev_private;
476 	struct platform_device *pdev = priv->gpu_pdev;
477 	struct adreno_platform_config *config;
478 	int ret;
479 
480 	if (!pdev) {
481 		dev_err(dev->dev, "no a3xx device\n");
482 		ret = -ENXIO;
483 		goto fail;
484 	}
485 
486 	config = pdev->dev.platform_data;
487 
488 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
489 	if (!a3xx_gpu) {
490 		ret = -ENOMEM;
491 		goto fail;
492 	}
493 
494 	adreno_gpu = &a3xx_gpu->base;
495 	gpu = &adreno_gpu->base;
496 
497 	a3xx_gpu->pdev = pdev;
498 
499 	gpu->fast_rate = config->fast_rate;
500 	gpu->slow_rate = config->slow_rate;
501 	gpu->bus_freq  = config->bus_freq;
502 #ifdef CONFIG_MSM_BUS_SCALING
503 	gpu->bus_scale_table = config->bus_scale_table;
504 #endif
505 
506 	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
507 			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
508 
509 	gpu->perfcntrs = perfcntrs;
510 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
511 
512 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, config->rev);
513 	if (ret)
514 		goto fail;
515 
516 	/* if needed, allocate gmem: */
517 	if (adreno_is_a330(adreno_gpu)) {
518 #ifdef CONFIG_MSM_OCMEM
519 		/* TODO this is different/missing upstream: */
520 		struct ocmem_buf *ocmem_hdl =
521 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
522 
523 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
524 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
525 		adreno_gpu->gmem = ocmem_hdl->len;
526 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
527 				a3xx_gpu->ocmem_base);
528 #endif
529 	}
530 
531 	if (!gpu->mmu) {
532 		/* TODO we think it is possible to configure the GPU to
533 		 * restrict access to VRAM carveout.  But the required
534 		 * registers are unknown.  For now just bail out and
535 		 * limp along with just modesetting.  If it turns out
536 		 * to not be possible to restrict access, then we must
537 		 * implement a cmdstream validator.
538 		 */
539 		dev_err(dev->dev, "No memory protection without IOMMU\n");
540 		ret = -ENXIO;
541 		goto fail;
542 	}
543 
544 	return gpu;
545 
546 fail:
547 	if (a3xx_gpu)
548 		a3xx_destroy(&a3xx_gpu->base.base);
549 
550 	return ERR_PTR(ret);
551 }
552 
553 /*
554  * The a3xx device:
555  */
556 
557 #if defined(CONFIG_MSM_BUS_SCALING) && !defined(CONFIG_OF)
558 #  include <mach/kgsl.h>
559 #endif
560 
561 static void set_gpu_pdev(struct drm_device *dev,
562 		struct platform_device *pdev)
563 {
564 	struct msm_drm_private *priv = dev->dev_private;
565 	priv->gpu_pdev = pdev;
566 }
567 
568 static int a3xx_bind(struct device *dev, struct device *master, void *data)
569 {
570 	static struct adreno_platform_config config = {};
571 #ifdef CONFIG_OF
572 	struct device_node *child, *node = dev->of_node;
573 	u32 val;
574 	int ret;
575 
576 	ret = of_property_read_u32(node, "qcom,chipid", &val);
577 	if (ret) {
578 		dev_err(dev, "could not find chipid: %d\n", ret);
579 		return ret;
580 	}
581 
582 	config.rev = ADRENO_REV((val >> 24) & 0xff,
583 			(val >> 16) & 0xff, (val >> 8) & 0xff, val & 0xff);
584 
585 	/* find clock rates: */
586 	config.fast_rate = 0;
587 	config.slow_rate = ~0;
588 	for_each_child_of_node(node, child) {
589 		if (of_device_is_compatible(child, "qcom,gpu-pwrlevels")) {
590 			struct device_node *pwrlvl;
591 			for_each_child_of_node(child, pwrlvl) {
592 				ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val);
593 				if (ret) {
594 					dev_err(dev, "could not find gpu-freq: %d\n", ret);
595 					return ret;
596 				}
597 				config.fast_rate = max(config.fast_rate, val);
598 				config.slow_rate = min(config.slow_rate, val);
599 			}
600 		}
601 	}
602 
603 	if (!config.fast_rate) {
604 		dev_err(dev, "could not find clk rates\n");
605 		return -ENXIO;
606 	}
607 
608 #else
609 	struct kgsl_device_platform_data *pdata = dev->platform_data;
610 	uint32_t version = socinfo_get_version();
611 	if (cpu_is_apq8064ab()) {
612 		config.fast_rate = 450000000;
613 		config.slow_rate = 27000000;
614 		config.bus_freq  = 4;
615 		config.rev = ADRENO_REV(3, 2, 1, 0);
616 	} else if (cpu_is_apq8064()) {
617 		config.fast_rate = 400000000;
618 		config.slow_rate = 27000000;
619 		config.bus_freq  = 4;
620 
621 		if (SOCINFO_VERSION_MAJOR(version) == 2)
622 			config.rev = ADRENO_REV(3, 2, 0, 2);
623 		else if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
624 				(SOCINFO_VERSION_MINOR(version) == 1))
625 			config.rev = ADRENO_REV(3, 2, 0, 1);
626 		else
627 			config.rev = ADRENO_REV(3, 2, 0, 0);
628 
629 	} else if (cpu_is_msm8960ab()) {
630 		config.fast_rate = 400000000;
631 		config.slow_rate = 320000000;
632 		config.bus_freq  = 4;
633 
634 		if (SOCINFO_VERSION_MINOR(version) == 0)
635 			config.rev = ADRENO_REV(3, 2, 1, 0);
636 		else
637 			config.rev = ADRENO_REV(3, 2, 1, 1);
638 
639 	} else if (cpu_is_msm8930()) {
640 		config.fast_rate = 400000000;
641 		config.slow_rate = 27000000;
642 		config.bus_freq  = 3;
643 
644 		if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
645 			(SOCINFO_VERSION_MINOR(version) == 2))
646 			config.rev = ADRENO_REV(3, 0, 5, 2);
647 		else
648 			config.rev = ADRENO_REV(3, 0, 5, 0);
649 
650 	}
651 #  ifdef CONFIG_MSM_BUS_SCALING
652 	config.bus_scale_table = pdata->bus_scale_table;
653 #  endif
654 #endif
655 	dev->platform_data = &config;
656 	set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
657 	return 0;
658 }
659 
660 static void a3xx_unbind(struct device *dev, struct device *master,
661 		void *data)
662 {
663 	set_gpu_pdev(dev_get_drvdata(master), NULL);
664 }
665 
666 static const struct component_ops a3xx_ops = {
667 		.bind   = a3xx_bind,
668 		.unbind = a3xx_unbind,
669 };
670 
671 static int a3xx_probe(struct platform_device *pdev)
672 {
673 	return component_add(&pdev->dev, &a3xx_ops);
674 }
675 
676 static int a3xx_remove(struct platform_device *pdev)
677 {
678 	component_del(&pdev->dev, &a3xx_ops);
679 	return 0;
680 }
681 
682 static const struct of_device_id dt_match[] = {
683 	{ .compatible = "qcom,adreno-3xx" },
684 	/* for backwards compat w/ downstream kgsl DT files: */
685 	{ .compatible = "qcom,kgsl-3d0" },
686 	{}
687 };
688 
689 static struct platform_driver a3xx_driver = {
690 	.probe = a3xx_probe,
691 	.remove = a3xx_remove,
692 	.driver = {
693 		.name = "kgsl-3d0",
694 		.of_match_table = dt_match,
695 	},
696 };
697 
698 void __init a3xx_register(void)
699 {
700 	platform_driver_register(&a3xx_driver);
701 }
702 
703 void __exit a3xx_unregister(void)
704 {
705 	platform_driver_unregister(&a3xx_driver);
706 }
707