xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 4464005a12b5c79e1a364e6272ee10a83413f928)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Red Hat
4  * Author: Rob Clark <robdclark@gmail.com>
5  *
6  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7  */
8 
9 #include "a3xx_gpu.h"
10 
11 #define A3XX_INT0_MASK \
12 	(A3XX_INT0_RBBM_AHB_ERROR |        \
13 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
14 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
15 	 A3XX_INT0_CP_OPCODE_ERROR |       \
16 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
17 	 A3XX_INT0_CP_HW_FAULT |           \
18 	 A3XX_INT0_CP_IB1_INT |            \
19 	 A3XX_INT0_CP_IB2_INT |            \
20 	 A3XX_INT0_CP_RB_INT |             \
21 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
22 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
23 	 A3XX_INT0_CACHE_FLUSH_TS |        \
24 	 A3XX_INT0_UCHE_OOB_ACCESS)
25 
26 extern bool hang_debug;
27 
28 static void a3xx_dump(struct msm_gpu *gpu);
29 static bool a3xx_idle(struct msm_gpu *gpu);
30 
31 static bool a3xx_me_init(struct msm_gpu *gpu)
32 {
33 	struct msm_ringbuffer *ring = gpu->rb[0];
34 
35 	OUT_PKT3(ring, CP_ME_INIT, 17);
36 	OUT_RING(ring, 0x000003f7);
37 	OUT_RING(ring, 0x00000000);
38 	OUT_RING(ring, 0x00000000);
39 	OUT_RING(ring, 0x00000000);
40 	OUT_RING(ring, 0x00000080);
41 	OUT_RING(ring, 0x00000100);
42 	OUT_RING(ring, 0x00000180);
43 	OUT_RING(ring, 0x00006600);
44 	OUT_RING(ring, 0x00000150);
45 	OUT_RING(ring, 0x0000014e);
46 	OUT_RING(ring, 0x00000154);
47 	OUT_RING(ring, 0x00000001);
48 	OUT_RING(ring, 0x00000000);
49 	OUT_RING(ring, 0x00000000);
50 	OUT_RING(ring, 0x00000000);
51 	OUT_RING(ring, 0x00000000);
52 	OUT_RING(ring, 0x00000000);
53 
54 	gpu->funcs->flush(gpu, ring);
55 	return a3xx_idle(gpu);
56 }
57 
58 static int a3xx_hw_init(struct msm_gpu *gpu)
59 {
60 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
61 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
62 	uint32_t *ptr, len;
63 	int i, ret;
64 
65 	DBG("%s", gpu->name);
66 
67 	if (adreno_is_a305(adreno_gpu)) {
68 		/* Set up 16 deep read/write request queues: */
69 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
70 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
71 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
72 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
73 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
74 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
75 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
76 		/* Enable WR-REQ: */
77 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
78 		/* Set up round robin arbitration between both AXI ports: */
79 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
80 		/* Set up AOOO: */
81 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
82 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
83 	} else if (adreno_is_a306(adreno_gpu)) {
84 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
85 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
86 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
87 	} else if (adreno_is_a320(adreno_gpu)) {
88 		/* Set up 16 deep read/write request queues: */
89 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
90 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
91 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
92 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
93 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
94 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
95 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
96 		/* Enable WR-REQ: */
97 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
98 		/* Set up round robin arbitration between both AXI ports: */
99 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
100 		/* Set up AOOO: */
101 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
102 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
103 		/* Enable 1K sort: */
104 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
105 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
106 
107 	} else if (adreno_is_a330v2(adreno_gpu)) {
108 		/*
109 		 * Most of the VBIF registers on 8974v2 have the correct
110 		 * values at power on, so we won't modify those if we don't
111 		 * need to
112 		 */
113 		/* Enable 1k sort: */
114 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
115 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
116 		/* Enable WR-REQ: */
117 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
118 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
119 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
120 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
121 
122 	} else if (adreno_is_a330(adreno_gpu)) {
123 		/* Set up 16 deep read/write request queues: */
124 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
125 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
126 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
127 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
128 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
129 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
130 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
131 		/* Enable WR-REQ: */
132 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
133 		/* Set up round robin arbitration between both AXI ports: */
134 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
135 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
136 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
137 		/* Set up AOOO: */
138 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
139 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
140 		/* Enable 1K sort: */
141 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
142 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
143 		/* Disable VBIF clock gating. This is to enable AXI running
144 		 * higher frequency than GPU:
145 		 */
146 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
147 
148 	} else {
149 		BUG();
150 	}
151 
152 	/* Make all blocks contribute to the GPU BUSY perf counter: */
153 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
154 
155 	/* Tune the hystersis counters for SP and CP idle detection: */
156 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
157 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
158 
159 	/* Enable the RBBM error reporting bits.  This lets us get
160 	 * useful information on failure:
161 	 */
162 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
163 
164 	/* Enable AHB error reporting: */
165 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
166 
167 	/* Turn on the power counters: */
168 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
169 
170 	/* Turn on hang detection - this spews a lot of useful information
171 	 * into the RBBM registers on a hang:
172 	 */
173 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
174 
175 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
176 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
177 
178 	/* Enable Clock gating: */
179 	if (adreno_is_a306(adreno_gpu))
180 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
181 	else if (adreno_is_a320(adreno_gpu))
182 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
183 	else if (adreno_is_a330v2(adreno_gpu))
184 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
185 	else if (adreno_is_a330(adreno_gpu))
186 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
187 
188 	if (adreno_is_a330v2(adreno_gpu))
189 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
190 	else if (adreno_is_a330(adreno_gpu))
191 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
192 
193 	/* Set the OCMEM base address for A330, etc */
194 	if (a3xx_gpu->ocmem.hdl) {
195 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
196 			(unsigned int)(a3xx_gpu->ocmem.base >> 14));
197 	}
198 
199 	/* Turn on performance counters: */
200 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
201 
202 	/* Enable the perfcntrs that we use.. */
203 	for (i = 0; i < gpu->num_perfcntrs; i++) {
204 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
205 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
206 	}
207 
208 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
209 
210 	ret = adreno_hw_init(gpu);
211 	if (ret)
212 		return ret;
213 
214 	/* setup access protection: */
215 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
216 
217 	/* RBBM registers */
218 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
219 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
220 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
221 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
222 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
223 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
224 
225 	/* CP registers */
226 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
227 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
228 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
229 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
230 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
231 
232 	/* RB registers */
233 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
234 
235 	/* VBIF registers */
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
237 
238 	/* NOTE: PM4/micro-engine firmware registers look to be the same
239 	 * for a2xx and a3xx.. we could possibly push that part down to
240 	 * adreno_gpu base class.  Or push both PM4 and PFP but
241 	 * parameterize the pfp ucode addr/data registers..
242 	 */
243 
244 	/* Load PM4: */
245 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
246 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
247 	DBG("loading PM4 ucode version: %x", ptr[1]);
248 
249 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
250 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
251 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
252 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
253 	for (i = 1; i < len; i++)
254 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
255 
256 	/* Load PFP: */
257 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
258 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
259 	DBG("loading PFP ucode version: %x", ptr[5]);
260 
261 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
262 	for (i = 1; i < len; i++)
263 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
264 
265 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
266 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
267 			adreno_is_a320(adreno_gpu)) {
268 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
269 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
270 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
271 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
272 	} else if (adreno_is_a330(adreno_gpu)) {
273 		/* NOTE: this (value take from downstream android driver)
274 		 * includes some bits outside of the known bitfields.  But
275 		 * A330 has this "MERCIU queue" thing too, which might
276 		 * explain a new bitfield or reshuffling:
277 		 */
278 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
279 	}
280 
281 	/* clear ME_HALT to start micro engine */
282 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
283 
284 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
285 }
286 
287 static void a3xx_recover(struct msm_gpu *gpu)
288 {
289 	int i;
290 
291 	adreno_dump_info(gpu);
292 
293 	for (i = 0; i < 8; i++) {
294 		printk("CP_SCRATCH_REG%d: %u\n", i,
295 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
296 	}
297 
298 	/* dump registers before resetting gpu, if enabled: */
299 	if (hang_debug)
300 		a3xx_dump(gpu);
301 
302 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
303 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
304 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
305 	adreno_recover(gpu);
306 }
307 
308 static void a3xx_destroy(struct msm_gpu *gpu)
309 {
310 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
311 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
312 
313 	DBG("%s", gpu->name);
314 
315 	adreno_gpu_cleanup(adreno_gpu);
316 
317 	adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
318 
319 	kfree(a3xx_gpu);
320 }
321 
322 static bool a3xx_idle(struct msm_gpu *gpu)
323 {
324 	/* wait for ringbuffer to drain: */
325 	if (!adreno_idle(gpu, gpu->rb[0]))
326 		return false;
327 
328 	/* then wait for GPU to finish: */
329 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
330 			A3XX_RBBM_STATUS_GPU_BUSY))) {
331 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
332 
333 		/* TODO maybe we need to reset GPU here to recover from hang? */
334 		return false;
335 	}
336 
337 	return true;
338 }
339 
340 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
341 {
342 	uint32_t status;
343 
344 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
345 	DBG("%s: %08x", gpu->name, status);
346 
347 	// TODO
348 
349 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
350 
351 	msm_gpu_retire(gpu);
352 
353 	return IRQ_HANDLED;
354 }
355 
356 static const unsigned int a3xx_registers[] = {
357 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
358 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
359 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
360 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
361 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
362 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
363 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
364 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
365 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
366 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
367 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
368 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
369 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
370 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
371 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
372 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
373 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
374 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
375 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
376 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
377 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
378 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
379 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
380 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
381 	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
382 	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
383 	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
384 	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
385 	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
386 	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
387 	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
388 	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
389 	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
390 	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
391 	~0   /* sentinel */
392 };
393 
394 /* would be nice to not have to duplicate the _show() stuff with printk(): */
395 static void a3xx_dump(struct msm_gpu *gpu)
396 {
397 	printk("status:   %08x\n",
398 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
399 	adreno_dump(gpu);
400 }
401 
402 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
403 {
404 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
405 
406 	if (!state)
407 		return ERR_PTR(-ENOMEM);
408 
409 	adreno_gpu_state_get(gpu, state);
410 
411 	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
412 
413 	return state;
414 }
415 
416 /* Register offset defines for A3XX */
417 static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
418 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
419 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
420 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
421 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
422 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
423 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
424 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
425 };
426 
427 static const struct adreno_gpu_funcs funcs = {
428 	.base = {
429 		.get_param = adreno_get_param,
430 		.hw_init = a3xx_hw_init,
431 		.pm_suspend = msm_gpu_pm_suspend,
432 		.pm_resume = msm_gpu_pm_resume,
433 		.recover = a3xx_recover,
434 		.submit = adreno_submit,
435 		.flush = adreno_flush,
436 		.active_ring = adreno_active_ring,
437 		.irq = a3xx_irq,
438 		.destroy = a3xx_destroy,
439 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
440 		.show = adreno_show,
441 #endif
442 		.gpu_state_get = a3xx_gpu_state_get,
443 		.gpu_state_put = adreno_gpu_state_put,
444 		.create_address_space = adreno_iommu_create_address_space,
445 	},
446 };
447 
448 static const struct msm_gpu_perfcntr perfcntrs[] = {
449 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
450 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
451 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
452 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
453 };
454 
455 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
456 {
457 	struct a3xx_gpu *a3xx_gpu = NULL;
458 	struct adreno_gpu *adreno_gpu;
459 	struct msm_gpu *gpu;
460 	struct msm_drm_private *priv = dev->dev_private;
461 	struct platform_device *pdev = priv->gpu_pdev;
462 	int ret;
463 
464 	if (!pdev) {
465 		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
466 		ret = -ENXIO;
467 		goto fail;
468 	}
469 
470 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
471 	if (!a3xx_gpu) {
472 		ret = -ENOMEM;
473 		goto fail;
474 	}
475 
476 	adreno_gpu = &a3xx_gpu->base;
477 	gpu = &adreno_gpu->base;
478 
479 	gpu->perfcntrs = perfcntrs;
480 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
481 
482 	adreno_gpu->registers = a3xx_registers;
483 	adreno_gpu->reg_offsets = a3xx_register_offsets;
484 
485 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
486 	if (ret)
487 		goto fail;
488 
489 	/* if needed, allocate gmem: */
490 	if (adreno_is_a330(adreno_gpu)) {
491 		ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
492 					    adreno_gpu, &a3xx_gpu->ocmem);
493 		if (ret)
494 			goto fail;
495 	}
496 
497 	if (!gpu->aspace) {
498 		/* TODO we think it is possible to configure the GPU to
499 		 * restrict access to VRAM carveout.  But the required
500 		 * registers are unknown.  For now just bail out and
501 		 * limp along with just modesetting.  If it turns out
502 		 * to not be possible to restrict access, then we must
503 		 * implement a cmdstream validator.
504 		 */
505 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
506 		ret = -ENXIO;
507 		goto fail;
508 	}
509 
510 	/*
511 	 * Set the ICC path to maximum speed for now by multiplying the fastest
512 	 * frequency by the bus width (8). We'll want to scale this later on to
513 	 * improve battery life.
514 	 */
515 	icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
516 	icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
517 
518 	return gpu;
519 
520 fail:
521 	if (a3xx_gpu)
522 		a3xx_destroy(&a3xx_gpu->base.base);
523 
524 	return ERR_PTR(ret);
525 }
526