xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 4da722ca)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published by
9  * the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifdef CONFIG_MSM_OCMEM
21 #  include <mach/ocmem.h>
22 #endif
23 
24 #include "a3xx_gpu.h"
25 
26 #define A3XX_INT0_MASK \
27 	(A3XX_INT0_RBBM_AHB_ERROR |        \
28 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
29 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
30 	 A3XX_INT0_CP_OPCODE_ERROR |       \
31 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
32 	 A3XX_INT0_CP_HW_FAULT |           \
33 	 A3XX_INT0_CP_IB1_INT |            \
34 	 A3XX_INT0_CP_IB2_INT |            \
35 	 A3XX_INT0_CP_RB_INT |             \
36 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
37 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
38 	 A3XX_INT0_UCHE_OOB_ACCESS)
39 
40 extern bool hang_debug;
41 
42 static void a3xx_dump(struct msm_gpu *gpu);
43 static bool a3xx_idle(struct msm_gpu *gpu);
44 
45 static bool a3xx_me_init(struct msm_gpu *gpu)
46 {
47 	struct msm_ringbuffer *ring = gpu->rb;
48 
49 	OUT_PKT3(ring, CP_ME_INIT, 17);
50 	OUT_RING(ring, 0x000003f7);
51 	OUT_RING(ring, 0x00000000);
52 	OUT_RING(ring, 0x00000000);
53 	OUT_RING(ring, 0x00000000);
54 	OUT_RING(ring, 0x00000080);
55 	OUT_RING(ring, 0x00000100);
56 	OUT_RING(ring, 0x00000180);
57 	OUT_RING(ring, 0x00006600);
58 	OUT_RING(ring, 0x00000150);
59 	OUT_RING(ring, 0x0000014e);
60 	OUT_RING(ring, 0x00000154);
61 	OUT_RING(ring, 0x00000001);
62 	OUT_RING(ring, 0x00000000);
63 	OUT_RING(ring, 0x00000000);
64 	OUT_RING(ring, 0x00000000);
65 	OUT_RING(ring, 0x00000000);
66 	OUT_RING(ring, 0x00000000);
67 
68 	gpu->funcs->flush(gpu);
69 	return a3xx_idle(gpu);
70 }
71 
72 static int a3xx_hw_init(struct msm_gpu *gpu)
73 {
74 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
75 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
76 	uint32_t *ptr, len;
77 	int i, ret;
78 
79 	DBG("%s", gpu->name);
80 
81 	if (adreno_is_a305(adreno_gpu)) {
82 		/* Set up 16 deep read/write request queues: */
83 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
84 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
85 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
86 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
87 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
88 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
89 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
90 		/* Enable WR-REQ: */
91 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
92 		/* Set up round robin arbitration between both AXI ports: */
93 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
94 		/* Set up AOOO: */
95 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
96 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
97 	} else if (adreno_is_a306(adreno_gpu)) {
98 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
99 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
100 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
101 	} else if (adreno_is_a320(adreno_gpu)) {
102 		/* Set up 16 deep read/write request queues: */
103 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
104 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
105 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
106 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
107 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
108 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
109 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
110 		/* Enable WR-REQ: */
111 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
112 		/* Set up round robin arbitration between both AXI ports: */
113 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
114 		/* Set up AOOO: */
115 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
116 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
117 		/* Enable 1K sort: */
118 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
119 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
120 
121 	} else if (adreno_is_a330v2(adreno_gpu)) {
122 		/*
123 		 * Most of the VBIF registers on 8974v2 have the correct
124 		 * values at power on, so we won't modify those if we don't
125 		 * need to
126 		 */
127 		/* Enable 1k sort: */
128 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
129 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
130 		/* Enable WR-REQ: */
131 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
132 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
133 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
134 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
135 
136 	} else if (adreno_is_a330(adreno_gpu)) {
137 		/* Set up 16 deep read/write request queues: */
138 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
139 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
140 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
141 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
142 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
143 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
144 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
145 		/* Enable WR-REQ: */
146 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
147 		/* Set up round robin arbitration between both AXI ports: */
148 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
149 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
150 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
151 		/* Set up AOOO: */
152 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
153 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
154 		/* Enable 1K sort: */
155 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
156 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
157 		/* Disable VBIF clock gating. This is to enable AXI running
158 		 * higher frequency than GPU:
159 		 */
160 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
161 
162 	} else {
163 		BUG();
164 	}
165 
166 	/* Make all blocks contribute to the GPU BUSY perf counter: */
167 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
168 
169 	/* Tune the hystersis counters for SP and CP idle detection: */
170 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
171 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
172 
173 	/* Enable the RBBM error reporting bits.  This lets us get
174 	 * useful information on failure:
175 	 */
176 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
177 
178 	/* Enable AHB error reporting: */
179 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
180 
181 	/* Turn on the power counters: */
182 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
183 
184 	/* Turn on hang detection - this spews a lot of useful information
185 	 * into the RBBM registers on a hang:
186 	 */
187 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
188 
189 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
190 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
191 
192 	/* Enable Clock gating: */
193 	if (adreno_is_a306(adreno_gpu))
194 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
195 	else if (adreno_is_a320(adreno_gpu))
196 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
197 	else if (adreno_is_a330v2(adreno_gpu))
198 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
199 	else if (adreno_is_a330(adreno_gpu))
200 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
201 
202 	if (adreno_is_a330v2(adreno_gpu))
203 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
204 	else if (adreno_is_a330(adreno_gpu))
205 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
206 
207 	/* Set the OCMEM base address for A330, etc */
208 	if (a3xx_gpu->ocmem_hdl) {
209 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
210 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
211 	}
212 
213 	/* Turn on performance counters: */
214 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
215 
216 	/* Enable the perfcntrs that we use.. */
217 	for (i = 0; i < gpu->num_perfcntrs; i++) {
218 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
219 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
220 	}
221 
222 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
223 
224 	ret = adreno_hw_init(gpu);
225 	if (ret)
226 		return ret;
227 
228 	/* setup access protection: */
229 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
230 
231 	/* RBBM registers */
232 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
233 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
235 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
237 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
238 
239 	/* CP registers */
240 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
241 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
242 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
243 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
244 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
245 
246 	/* RB registers */
247 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
248 
249 	/* VBIF registers */
250 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
251 
252 	/* NOTE: PM4/micro-engine firmware registers look to be the same
253 	 * for a2xx and a3xx.. we could possibly push that part down to
254 	 * adreno_gpu base class.  Or push both PM4 and PFP but
255 	 * parameterize the pfp ucode addr/data registers..
256 	 */
257 
258 	/* Load PM4: */
259 	ptr = (uint32_t *)(adreno_gpu->pm4->data);
260 	len = adreno_gpu->pm4->size / 4;
261 	DBG("loading PM4 ucode version: %x", ptr[1]);
262 
263 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
264 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
265 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
266 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
267 	for (i = 1; i < len; i++)
268 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
269 
270 	/* Load PFP: */
271 	ptr = (uint32_t *)(adreno_gpu->pfp->data);
272 	len = adreno_gpu->pfp->size / 4;
273 	DBG("loading PFP ucode version: %x", ptr[5]);
274 
275 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
276 	for (i = 1; i < len; i++)
277 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
278 
279 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
280 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
281 			adreno_is_a320(adreno_gpu)) {
282 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
283 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
284 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
285 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
286 	} else if (adreno_is_a330(adreno_gpu)) {
287 		/* NOTE: this (value take from downstream android driver)
288 		 * includes some bits outside of the known bitfields.  But
289 		 * A330 has this "MERCIU queue" thing too, which might
290 		 * explain a new bitfield or reshuffling:
291 		 */
292 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
293 	}
294 
295 	/* clear ME_HALT to start micro engine */
296 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
297 
298 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
299 }
300 
301 static void a3xx_recover(struct msm_gpu *gpu)
302 {
303 	int i;
304 
305 	adreno_dump_info(gpu);
306 
307 	for (i = 0; i < 8; i++) {
308 		printk("CP_SCRATCH_REG%d: %u\n", i,
309 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
310 	}
311 
312 	/* dump registers before resetting gpu, if enabled: */
313 	if (hang_debug)
314 		a3xx_dump(gpu);
315 
316 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
317 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
318 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
319 	adreno_recover(gpu);
320 }
321 
322 static void a3xx_destroy(struct msm_gpu *gpu)
323 {
324 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
325 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
326 
327 	DBG("%s", gpu->name);
328 
329 	adreno_gpu_cleanup(adreno_gpu);
330 
331 #ifdef CONFIG_MSM_OCMEM
332 	if (a3xx_gpu->ocmem_base)
333 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
334 #endif
335 
336 	kfree(a3xx_gpu);
337 }
338 
339 static bool a3xx_idle(struct msm_gpu *gpu)
340 {
341 	/* wait for ringbuffer to drain: */
342 	if (!adreno_idle(gpu))
343 		return false;
344 
345 	/* then wait for GPU to finish: */
346 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
347 			A3XX_RBBM_STATUS_GPU_BUSY))) {
348 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
349 
350 		/* TODO maybe we need to reset GPU here to recover from hang? */
351 		return false;
352 	}
353 
354 	return true;
355 }
356 
357 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
358 {
359 	uint32_t status;
360 
361 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
362 	DBG("%s: %08x", gpu->name, status);
363 
364 	// TODO
365 
366 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
367 
368 	msm_gpu_retire(gpu);
369 
370 	return IRQ_HANDLED;
371 }
372 
373 static const unsigned int a3xx_registers[] = {
374 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
375 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
376 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
377 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
378 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
379 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
380 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
381 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
382 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
383 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
384 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
385 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
386 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
387 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
388 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
389 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
390 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
391 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
392 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
393 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
394 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
395 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
396 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
397 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
398 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
399 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
400 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
401 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
402 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
403 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
404 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
405 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
406 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
407 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
408 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
409 	0x303c, 0x303c, 0x305e, 0x305f,
410 	~0   /* sentinel */
411 };
412 
413 #ifdef CONFIG_DEBUG_FS
414 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
415 {
416 	seq_printf(m, "status:   %08x\n",
417 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
418 	adreno_show(gpu, m);
419 }
420 #endif
421 
422 /* would be nice to not have to duplicate the _show() stuff with printk(): */
423 static void a3xx_dump(struct msm_gpu *gpu)
424 {
425 	printk("status:   %08x\n",
426 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
427 	adreno_dump(gpu);
428 }
429 /* Register offset defines for A3XX */
430 static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
431 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
432 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
433 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
434 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
435 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
436 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
437 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
438 };
439 
440 static const struct adreno_gpu_funcs funcs = {
441 	.base = {
442 		.get_param = adreno_get_param,
443 		.hw_init = a3xx_hw_init,
444 		.pm_suspend = msm_gpu_pm_suspend,
445 		.pm_resume = msm_gpu_pm_resume,
446 		.recover = a3xx_recover,
447 		.last_fence = adreno_last_fence,
448 		.submit = adreno_submit,
449 		.flush = adreno_flush,
450 		.irq = a3xx_irq,
451 		.destroy = a3xx_destroy,
452 #ifdef CONFIG_DEBUG_FS
453 		.show = a3xx_show,
454 #endif
455 	},
456 };
457 
458 static const struct msm_gpu_perfcntr perfcntrs[] = {
459 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
460 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
461 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
462 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
463 };
464 
465 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
466 {
467 	struct a3xx_gpu *a3xx_gpu = NULL;
468 	struct adreno_gpu *adreno_gpu;
469 	struct msm_gpu *gpu;
470 	struct msm_drm_private *priv = dev->dev_private;
471 	struct platform_device *pdev = priv->gpu_pdev;
472 	int ret;
473 
474 	if (!pdev) {
475 		dev_err(dev->dev, "no a3xx device\n");
476 		ret = -ENXIO;
477 		goto fail;
478 	}
479 
480 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
481 	if (!a3xx_gpu) {
482 		ret = -ENOMEM;
483 		goto fail;
484 	}
485 
486 	adreno_gpu = &a3xx_gpu->base;
487 	gpu = &adreno_gpu->base;
488 
489 	a3xx_gpu->pdev = pdev;
490 
491 	gpu->perfcntrs = perfcntrs;
492 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
493 
494 	adreno_gpu->registers = a3xx_registers;
495 	adreno_gpu->reg_offsets = a3xx_register_offsets;
496 
497 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
498 	if (ret)
499 		goto fail;
500 
501 	/* if needed, allocate gmem: */
502 	if (adreno_is_a330(adreno_gpu)) {
503 #ifdef CONFIG_MSM_OCMEM
504 		/* TODO this is different/missing upstream: */
505 		struct ocmem_buf *ocmem_hdl =
506 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
507 
508 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
509 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
510 		adreno_gpu->gmem = ocmem_hdl->len;
511 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
512 				a3xx_gpu->ocmem_base);
513 #endif
514 	}
515 
516 	if (!gpu->aspace) {
517 		/* TODO we think it is possible to configure the GPU to
518 		 * restrict access to VRAM carveout.  But the required
519 		 * registers are unknown.  For now just bail out and
520 		 * limp along with just modesetting.  If it turns out
521 		 * to not be possible to restrict access, then we must
522 		 * implement a cmdstream validator.
523 		 */
524 		dev_err(dev->dev, "No memory protection without IOMMU\n");
525 		ret = -ENXIO;
526 		goto fail;
527 	}
528 
529 	return gpu;
530 
531 fail:
532 	if (a3xx_gpu)
533 		a3xx_destroy(&a3xx_gpu->base.base);
534 
535 	return ERR_PTR(ret);
536 }
537