xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 98ddec80)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published by
9  * the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifdef CONFIG_MSM_OCMEM
21 #  include <mach/ocmem.h>
22 #endif
23 
24 #include "a3xx_gpu.h"
25 
26 #define A3XX_INT0_MASK \
27 	(A3XX_INT0_RBBM_AHB_ERROR |        \
28 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
29 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
30 	 A3XX_INT0_CP_OPCODE_ERROR |       \
31 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
32 	 A3XX_INT0_CP_HW_FAULT |           \
33 	 A3XX_INT0_CP_IB1_INT |            \
34 	 A3XX_INT0_CP_IB2_INT |            \
35 	 A3XX_INT0_CP_RB_INT |             \
36 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
37 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
38 	 A3XX_INT0_CACHE_FLUSH_TS |        \
39 	 A3XX_INT0_UCHE_OOB_ACCESS)
40 
41 extern bool hang_debug;
42 
43 static void a3xx_dump(struct msm_gpu *gpu);
44 static bool a3xx_idle(struct msm_gpu *gpu);
45 
46 static bool a3xx_me_init(struct msm_gpu *gpu)
47 {
48 	struct msm_ringbuffer *ring = gpu->rb[0];
49 
50 	OUT_PKT3(ring, CP_ME_INIT, 17);
51 	OUT_RING(ring, 0x000003f7);
52 	OUT_RING(ring, 0x00000000);
53 	OUT_RING(ring, 0x00000000);
54 	OUT_RING(ring, 0x00000000);
55 	OUT_RING(ring, 0x00000080);
56 	OUT_RING(ring, 0x00000100);
57 	OUT_RING(ring, 0x00000180);
58 	OUT_RING(ring, 0x00006600);
59 	OUT_RING(ring, 0x00000150);
60 	OUT_RING(ring, 0x0000014e);
61 	OUT_RING(ring, 0x00000154);
62 	OUT_RING(ring, 0x00000001);
63 	OUT_RING(ring, 0x00000000);
64 	OUT_RING(ring, 0x00000000);
65 	OUT_RING(ring, 0x00000000);
66 	OUT_RING(ring, 0x00000000);
67 	OUT_RING(ring, 0x00000000);
68 
69 	gpu->funcs->flush(gpu, ring);
70 	return a3xx_idle(gpu);
71 }
72 
73 static int a3xx_hw_init(struct msm_gpu *gpu)
74 {
75 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
76 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
77 	uint32_t *ptr, len;
78 	int i, ret;
79 
80 	DBG("%s", gpu->name);
81 
82 	if (adreno_is_a305(adreno_gpu)) {
83 		/* Set up 16 deep read/write request queues: */
84 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
85 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
86 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
87 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
88 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
89 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
90 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
91 		/* Enable WR-REQ: */
92 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
93 		/* Set up round robin arbitration between both AXI ports: */
94 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
95 		/* Set up AOOO: */
96 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
97 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
98 	} else if (adreno_is_a306(adreno_gpu)) {
99 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
100 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
101 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
102 	} else if (adreno_is_a320(adreno_gpu)) {
103 		/* Set up 16 deep read/write request queues: */
104 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
105 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
106 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
107 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
108 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
109 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
110 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
111 		/* Enable WR-REQ: */
112 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
113 		/* Set up round robin arbitration between both AXI ports: */
114 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
115 		/* Set up AOOO: */
116 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
117 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
118 		/* Enable 1K sort: */
119 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
120 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
121 
122 	} else if (adreno_is_a330v2(adreno_gpu)) {
123 		/*
124 		 * Most of the VBIF registers on 8974v2 have the correct
125 		 * values at power on, so we won't modify those if we don't
126 		 * need to
127 		 */
128 		/* Enable 1k sort: */
129 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
130 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
131 		/* Enable WR-REQ: */
132 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
133 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
134 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
135 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
136 
137 	} else if (adreno_is_a330(adreno_gpu)) {
138 		/* Set up 16 deep read/write request queues: */
139 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
140 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
141 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
142 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
143 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
144 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
145 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
146 		/* Enable WR-REQ: */
147 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
148 		/* Set up round robin arbitration between both AXI ports: */
149 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
150 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
151 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
152 		/* Set up AOOO: */
153 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
154 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
155 		/* Enable 1K sort: */
156 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
157 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
158 		/* Disable VBIF clock gating. This is to enable AXI running
159 		 * higher frequency than GPU:
160 		 */
161 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
162 
163 	} else {
164 		BUG();
165 	}
166 
167 	/* Make all blocks contribute to the GPU BUSY perf counter: */
168 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
169 
170 	/* Tune the hystersis counters for SP and CP idle detection: */
171 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
172 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
173 
174 	/* Enable the RBBM error reporting bits.  This lets us get
175 	 * useful information on failure:
176 	 */
177 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
178 
179 	/* Enable AHB error reporting: */
180 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
181 
182 	/* Turn on the power counters: */
183 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
184 
185 	/* Turn on hang detection - this spews a lot of useful information
186 	 * into the RBBM registers on a hang:
187 	 */
188 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
189 
190 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
191 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
192 
193 	/* Enable Clock gating: */
194 	if (adreno_is_a306(adreno_gpu))
195 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
196 	else if (adreno_is_a320(adreno_gpu))
197 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
198 	else if (adreno_is_a330v2(adreno_gpu))
199 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
200 	else if (adreno_is_a330(adreno_gpu))
201 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
202 
203 	if (adreno_is_a330v2(adreno_gpu))
204 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
205 	else if (adreno_is_a330(adreno_gpu))
206 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
207 
208 	/* Set the OCMEM base address for A330, etc */
209 	if (a3xx_gpu->ocmem_hdl) {
210 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
211 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
212 	}
213 
214 	/* Turn on performance counters: */
215 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
216 
217 	/* Enable the perfcntrs that we use.. */
218 	for (i = 0; i < gpu->num_perfcntrs; i++) {
219 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
220 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
221 	}
222 
223 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
224 
225 	ret = adreno_hw_init(gpu);
226 	if (ret)
227 		return ret;
228 
229 	/* setup access protection: */
230 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
231 
232 	/* RBBM registers */
233 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
235 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
237 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
238 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
239 
240 	/* CP registers */
241 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
242 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
243 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
244 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
245 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
246 
247 	/* RB registers */
248 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
249 
250 	/* VBIF registers */
251 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
252 
253 	/* NOTE: PM4/micro-engine firmware registers look to be the same
254 	 * for a2xx and a3xx.. we could possibly push that part down to
255 	 * adreno_gpu base class.  Or push both PM4 and PFP but
256 	 * parameterize the pfp ucode addr/data registers..
257 	 */
258 
259 	/* Load PM4: */
260 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
261 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
262 	DBG("loading PM4 ucode version: %x", ptr[1]);
263 
264 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
265 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
266 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
267 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
268 	for (i = 1; i < len; i++)
269 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
270 
271 	/* Load PFP: */
272 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
273 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
274 	DBG("loading PFP ucode version: %x", ptr[5]);
275 
276 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
277 	for (i = 1; i < len; i++)
278 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
279 
280 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
281 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
282 			adreno_is_a320(adreno_gpu)) {
283 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
284 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
285 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
286 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
287 	} else if (adreno_is_a330(adreno_gpu)) {
288 		/* NOTE: this (value take from downstream android driver)
289 		 * includes some bits outside of the known bitfields.  But
290 		 * A330 has this "MERCIU queue" thing too, which might
291 		 * explain a new bitfield or reshuffling:
292 		 */
293 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
294 	}
295 
296 	/* clear ME_HALT to start micro engine */
297 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
298 
299 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
300 }
301 
302 static void a3xx_recover(struct msm_gpu *gpu)
303 {
304 	int i;
305 
306 	adreno_dump_info(gpu);
307 
308 	for (i = 0; i < 8; i++) {
309 		printk("CP_SCRATCH_REG%d: %u\n", i,
310 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
311 	}
312 
313 	/* dump registers before resetting gpu, if enabled: */
314 	if (hang_debug)
315 		a3xx_dump(gpu);
316 
317 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
318 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
319 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
320 	adreno_recover(gpu);
321 }
322 
323 static void a3xx_destroy(struct msm_gpu *gpu)
324 {
325 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
326 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
327 
328 	DBG("%s", gpu->name);
329 
330 	adreno_gpu_cleanup(adreno_gpu);
331 
332 #ifdef CONFIG_MSM_OCMEM
333 	if (a3xx_gpu->ocmem_base)
334 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
335 #endif
336 
337 	kfree(a3xx_gpu);
338 }
339 
340 static bool a3xx_idle(struct msm_gpu *gpu)
341 {
342 	/* wait for ringbuffer to drain: */
343 	if (!adreno_idle(gpu, gpu->rb[0]))
344 		return false;
345 
346 	/* then wait for GPU to finish: */
347 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
348 			A3XX_RBBM_STATUS_GPU_BUSY))) {
349 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
350 
351 		/* TODO maybe we need to reset GPU here to recover from hang? */
352 		return false;
353 	}
354 
355 	return true;
356 }
357 
358 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
359 {
360 	uint32_t status;
361 
362 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
363 	DBG("%s: %08x", gpu->name, status);
364 
365 	// TODO
366 
367 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
368 
369 	msm_gpu_retire(gpu);
370 
371 	return IRQ_HANDLED;
372 }
373 
374 static const unsigned int a3xx_registers[] = {
375 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
376 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
377 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
378 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
379 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
380 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
381 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
382 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
383 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
384 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
385 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
386 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
387 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
388 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
389 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
390 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
391 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
392 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
393 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
394 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
395 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
396 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
397 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
398 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
399 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
400 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
401 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
402 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
403 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
404 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
405 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
406 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
407 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
408 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
409 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
410 	0x303c, 0x303c, 0x305e, 0x305f,
411 	~0   /* sentinel */
412 };
413 
414 #ifdef CONFIG_DEBUG_FS
415 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
416 {
417 	seq_printf(m, "status:   %08x\n",
418 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
419 	adreno_show(gpu, m);
420 }
421 #endif
422 
423 /* would be nice to not have to duplicate the _show() stuff with printk(): */
424 static void a3xx_dump(struct msm_gpu *gpu)
425 {
426 	printk("status:   %08x\n",
427 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
428 	adreno_dump(gpu);
429 }
430 /* Register offset defines for A3XX */
431 static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
432 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
433 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
434 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
435 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
436 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
437 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
438 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
439 };
440 
441 static const struct adreno_gpu_funcs funcs = {
442 	.base = {
443 		.get_param = adreno_get_param,
444 		.hw_init = a3xx_hw_init,
445 		.pm_suspend = msm_gpu_pm_suspend,
446 		.pm_resume = msm_gpu_pm_resume,
447 		.recover = a3xx_recover,
448 		.submit = adreno_submit,
449 		.flush = adreno_flush,
450 		.active_ring = adreno_active_ring,
451 		.irq = a3xx_irq,
452 		.destroy = a3xx_destroy,
453 #ifdef CONFIG_DEBUG_FS
454 		.show = a3xx_show,
455 #endif
456 	},
457 };
458 
459 static const struct msm_gpu_perfcntr perfcntrs[] = {
460 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
461 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
462 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
463 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
464 };
465 
466 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
467 {
468 	struct a3xx_gpu *a3xx_gpu = NULL;
469 	struct adreno_gpu *adreno_gpu;
470 	struct msm_gpu *gpu;
471 	struct msm_drm_private *priv = dev->dev_private;
472 	struct platform_device *pdev = priv->gpu_pdev;
473 	int ret;
474 
475 	if (!pdev) {
476 		dev_err(dev->dev, "no a3xx device\n");
477 		ret = -ENXIO;
478 		goto fail;
479 	}
480 
481 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
482 	if (!a3xx_gpu) {
483 		ret = -ENOMEM;
484 		goto fail;
485 	}
486 
487 	adreno_gpu = &a3xx_gpu->base;
488 	gpu = &adreno_gpu->base;
489 
490 	gpu->perfcntrs = perfcntrs;
491 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
492 
493 	adreno_gpu->registers = a3xx_registers;
494 	adreno_gpu->reg_offsets = a3xx_register_offsets;
495 
496 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
497 	if (ret)
498 		goto fail;
499 
500 	/* if needed, allocate gmem: */
501 	if (adreno_is_a330(adreno_gpu)) {
502 #ifdef CONFIG_MSM_OCMEM
503 		/* TODO this is different/missing upstream: */
504 		struct ocmem_buf *ocmem_hdl =
505 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
506 
507 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
508 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
509 		adreno_gpu->gmem = ocmem_hdl->len;
510 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
511 				a3xx_gpu->ocmem_base);
512 #endif
513 	}
514 
515 	if (!gpu->aspace) {
516 		/* TODO we think it is possible to configure the GPU to
517 		 * restrict access to VRAM carveout.  But the required
518 		 * registers are unknown.  For now just bail out and
519 		 * limp along with just modesetting.  If it turns out
520 		 * to not be possible to restrict access, then we must
521 		 * implement a cmdstream validator.
522 		 */
523 		dev_err(dev->dev, "No memory protection without IOMMU\n");
524 		ret = -ENXIO;
525 		goto fail;
526 	}
527 
528 	return gpu;
529 
530 fail:
531 	if (a3xx_gpu)
532 		a3xx_destroy(&a3xx_gpu->base.base);
533 
534 	return ERR_PTR(ret);
535 }
536