xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 05cf4fe738242183f1237f1b3a28b4479348c0a1)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published by
9  * the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifdef CONFIG_MSM_OCMEM
21 #  include <mach/ocmem.h>
22 #endif
23 
24 #include "a3xx_gpu.h"
25 
26 #define A3XX_INT0_MASK \
27 	(A3XX_INT0_RBBM_AHB_ERROR |        \
28 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
29 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
30 	 A3XX_INT0_CP_OPCODE_ERROR |       \
31 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
32 	 A3XX_INT0_CP_HW_FAULT |           \
33 	 A3XX_INT0_CP_IB1_INT |            \
34 	 A3XX_INT0_CP_IB2_INT |            \
35 	 A3XX_INT0_CP_RB_INT |             \
36 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
37 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
38 	 A3XX_INT0_CACHE_FLUSH_TS |        \
39 	 A3XX_INT0_UCHE_OOB_ACCESS)
40 
41 extern bool hang_debug;
42 
43 static void a3xx_dump(struct msm_gpu *gpu);
44 static bool a3xx_idle(struct msm_gpu *gpu);
45 
46 static bool a3xx_me_init(struct msm_gpu *gpu)
47 {
48 	struct msm_ringbuffer *ring = gpu->rb[0];
49 
50 	OUT_PKT3(ring, CP_ME_INIT, 17);
51 	OUT_RING(ring, 0x000003f7);
52 	OUT_RING(ring, 0x00000000);
53 	OUT_RING(ring, 0x00000000);
54 	OUT_RING(ring, 0x00000000);
55 	OUT_RING(ring, 0x00000080);
56 	OUT_RING(ring, 0x00000100);
57 	OUT_RING(ring, 0x00000180);
58 	OUT_RING(ring, 0x00006600);
59 	OUT_RING(ring, 0x00000150);
60 	OUT_RING(ring, 0x0000014e);
61 	OUT_RING(ring, 0x00000154);
62 	OUT_RING(ring, 0x00000001);
63 	OUT_RING(ring, 0x00000000);
64 	OUT_RING(ring, 0x00000000);
65 	OUT_RING(ring, 0x00000000);
66 	OUT_RING(ring, 0x00000000);
67 	OUT_RING(ring, 0x00000000);
68 
69 	gpu->funcs->flush(gpu, ring);
70 	return a3xx_idle(gpu);
71 }
72 
73 static int a3xx_hw_init(struct msm_gpu *gpu)
74 {
75 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
76 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
77 	uint32_t *ptr, len;
78 	int i, ret;
79 
80 	DBG("%s", gpu->name);
81 
82 	if (adreno_is_a305(adreno_gpu)) {
83 		/* Set up 16 deep read/write request queues: */
84 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
85 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
86 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
87 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
88 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
89 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
90 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
91 		/* Enable WR-REQ: */
92 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
93 		/* Set up round robin arbitration between both AXI ports: */
94 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
95 		/* Set up AOOO: */
96 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
97 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
98 	} else if (adreno_is_a306(adreno_gpu)) {
99 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
100 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
101 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
102 	} else if (adreno_is_a320(adreno_gpu)) {
103 		/* Set up 16 deep read/write request queues: */
104 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
105 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
106 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
107 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
108 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
109 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
110 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
111 		/* Enable WR-REQ: */
112 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
113 		/* Set up round robin arbitration between both AXI ports: */
114 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
115 		/* Set up AOOO: */
116 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
117 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
118 		/* Enable 1K sort: */
119 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
120 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
121 
122 	} else if (adreno_is_a330v2(adreno_gpu)) {
123 		/*
124 		 * Most of the VBIF registers on 8974v2 have the correct
125 		 * values at power on, so we won't modify those if we don't
126 		 * need to
127 		 */
128 		/* Enable 1k sort: */
129 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
130 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
131 		/* Enable WR-REQ: */
132 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
133 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
134 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
135 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
136 
137 	} else if (adreno_is_a330(adreno_gpu)) {
138 		/* Set up 16 deep read/write request queues: */
139 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
140 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
141 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
142 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
143 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
144 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
145 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
146 		/* Enable WR-REQ: */
147 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
148 		/* Set up round robin arbitration between both AXI ports: */
149 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
150 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
151 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
152 		/* Set up AOOO: */
153 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
154 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
155 		/* Enable 1K sort: */
156 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
157 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
158 		/* Disable VBIF clock gating. This is to enable AXI running
159 		 * higher frequency than GPU:
160 		 */
161 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
162 
163 	} else {
164 		BUG();
165 	}
166 
167 	/* Make all blocks contribute to the GPU BUSY perf counter: */
168 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
169 
170 	/* Tune the hystersis counters for SP and CP idle detection: */
171 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
172 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
173 
174 	/* Enable the RBBM error reporting bits.  This lets us get
175 	 * useful information on failure:
176 	 */
177 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
178 
179 	/* Enable AHB error reporting: */
180 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
181 
182 	/* Turn on the power counters: */
183 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
184 
185 	/* Turn on hang detection - this spews a lot of useful information
186 	 * into the RBBM registers on a hang:
187 	 */
188 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
189 
190 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
191 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
192 
193 	/* Enable Clock gating: */
194 	if (adreno_is_a306(adreno_gpu))
195 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
196 	else if (adreno_is_a320(adreno_gpu))
197 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
198 	else if (adreno_is_a330v2(adreno_gpu))
199 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
200 	else if (adreno_is_a330(adreno_gpu))
201 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
202 
203 	if (adreno_is_a330v2(adreno_gpu))
204 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
205 	else if (adreno_is_a330(adreno_gpu))
206 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
207 
208 	/* Set the OCMEM base address for A330, etc */
209 	if (a3xx_gpu->ocmem_hdl) {
210 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
211 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
212 	}
213 
214 	/* Turn on performance counters: */
215 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
216 
217 	/* Enable the perfcntrs that we use.. */
218 	for (i = 0; i < gpu->num_perfcntrs; i++) {
219 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
220 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
221 	}
222 
223 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
224 
225 	ret = adreno_hw_init(gpu);
226 	if (ret)
227 		return ret;
228 
229 	/* setup access protection: */
230 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
231 
232 	/* RBBM registers */
233 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
235 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
237 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
238 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
239 
240 	/* CP registers */
241 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
242 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
243 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
244 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
245 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
246 
247 	/* RB registers */
248 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
249 
250 	/* VBIF registers */
251 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
252 
253 	/* NOTE: PM4/micro-engine firmware registers look to be the same
254 	 * for a2xx and a3xx.. we could possibly push that part down to
255 	 * adreno_gpu base class.  Or push both PM4 and PFP but
256 	 * parameterize the pfp ucode addr/data registers..
257 	 */
258 
259 	/* Load PM4: */
260 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
261 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
262 	DBG("loading PM4 ucode version: %x", ptr[1]);
263 
264 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
265 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
266 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
267 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
268 	for (i = 1; i < len; i++)
269 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
270 
271 	/* Load PFP: */
272 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
273 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
274 	DBG("loading PFP ucode version: %x", ptr[5]);
275 
276 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
277 	for (i = 1; i < len; i++)
278 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
279 
280 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
281 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
282 			adreno_is_a320(adreno_gpu)) {
283 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
284 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
285 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
286 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
287 	} else if (adreno_is_a330(adreno_gpu)) {
288 		/* NOTE: this (value take from downstream android driver)
289 		 * includes some bits outside of the known bitfields.  But
290 		 * A330 has this "MERCIU queue" thing too, which might
291 		 * explain a new bitfield or reshuffling:
292 		 */
293 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
294 	}
295 
296 	/* clear ME_HALT to start micro engine */
297 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
298 
299 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
300 }
301 
302 static void a3xx_recover(struct msm_gpu *gpu)
303 {
304 	int i;
305 
306 	adreno_dump_info(gpu);
307 
308 	for (i = 0; i < 8; i++) {
309 		printk("CP_SCRATCH_REG%d: %u\n", i,
310 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
311 	}
312 
313 	/* dump registers before resetting gpu, if enabled: */
314 	if (hang_debug)
315 		a3xx_dump(gpu);
316 
317 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
318 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
319 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
320 	adreno_recover(gpu);
321 }
322 
323 static void a3xx_destroy(struct msm_gpu *gpu)
324 {
325 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
326 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
327 
328 	DBG("%s", gpu->name);
329 
330 	adreno_gpu_cleanup(adreno_gpu);
331 
332 #ifdef CONFIG_MSM_OCMEM
333 	if (a3xx_gpu->ocmem_base)
334 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
335 #endif
336 
337 	kfree(a3xx_gpu);
338 }
339 
340 static bool a3xx_idle(struct msm_gpu *gpu)
341 {
342 	/* wait for ringbuffer to drain: */
343 	if (!adreno_idle(gpu, gpu->rb[0]))
344 		return false;
345 
346 	/* then wait for GPU to finish: */
347 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
348 			A3XX_RBBM_STATUS_GPU_BUSY))) {
349 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
350 
351 		/* TODO maybe we need to reset GPU here to recover from hang? */
352 		return false;
353 	}
354 
355 	return true;
356 }
357 
358 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
359 {
360 	uint32_t status;
361 
362 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
363 	DBG("%s: %08x", gpu->name, status);
364 
365 	// TODO
366 
367 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
368 
369 	msm_gpu_retire(gpu);
370 
371 	return IRQ_HANDLED;
372 }
373 
374 static const unsigned int a3xx_registers[] = {
375 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
376 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
377 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
378 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
379 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
380 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
381 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
382 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
383 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
384 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
385 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
386 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
387 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
388 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
389 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
390 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
391 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
392 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
393 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
394 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
395 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
396 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
397 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
398 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
399 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
400 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
401 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
402 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
403 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
404 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
405 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
406 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
407 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
408 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
409 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
410 	0x303c, 0x303c, 0x305e, 0x305f,
411 	~0   /* sentinel */
412 };
413 
414 /* would be nice to not have to duplicate the _show() stuff with printk(): */
415 static void a3xx_dump(struct msm_gpu *gpu)
416 {
417 	printk("status:   %08x\n",
418 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
419 	adreno_dump(gpu);
420 }
421 
422 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
423 {
424 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
425 
426 	if (!state)
427 		return ERR_PTR(-ENOMEM);
428 
429 	adreno_gpu_state_get(gpu, state);
430 
431 	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
432 
433 	return state;
434 }
435 
436 /* Register offset defines for A3XX */
437 static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
438 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
439 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
440 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
441 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
442 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
443 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
444 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
445 };
446 
447 static const struct adreno_gpu_funcs funcs = {
448 	.base = {
449 		.get_param = adreno_get_param,
450 		.hw_init = a3xx_hw_init,
451 		.pm_suspend = msm_gpu_pm_suspend,
452 		.pm_resume = msm_gpu_pm_resume,
453 		.recover = a3xx_recover,
454 		.submit = adreno_submit,
455 		.flush = adreno_flush,
456 		.active_ring = adreno_active_ring,
457 		.irq = a3xx_irq,
458 		.destroy = a3xx_destroy,
459 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
460 		.show = adreno_show,
461 #endif
462 		.gpu_state_get = a3xx_gpu_state_get,
463 		.gpu_state_put = adreno_gpu_state_put,
464 	},
465 };
466 
467 static const struct msm_gpu_perfcntr perfcntrs[] = {
468 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
469 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
470 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
471 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
472 };
473 
474 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
475 {
476 	struct a3xx_gpu *a3xx_gpu = NULL;
477 	struct adreno_gpu *adreno_gpu;
478 	struct msm_gpu *gpu;
479 	struct msm_drm_private *priv = dev->dev_private;
480 	struct platform_device *pdev = priv->gpu_pdev;
481 	int ret;
482 
483 	if (!pdev) {
484 		dev_err(dev->dev, "no a3xx device\n");
485 		ret = -ENXIO;
486 		goto fail;
487 	}
488 
489 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
490 	if (!a3xx_gpu) {
491 		ret = -ENOMEM;
492 		goto fail;
493 	}
494 
495 	adreno_gpu = &a3xx_gpu->base;
496 	gpu = &adreno_gpu->base;
497 
498 	gpu->perfcntrs = perfcntrs;
499 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
500 
501 	adreno_gpu->registers = a3xx_registers;
502 	adreno_gpu->reg_offsets = a3xx_register_offsets;
503 
504 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
505 	if (ret)
506 		goto fail;
507 
508 	/* if needed, allocate gmem: */
509 	if (adreno_is_a330(adreno_gpu)) {
510 #ifdef CONFIG_MSM_OCMEM
511 		/* TODO this is different/missing upstream: */
512 		struct ocmem_buf *ocmem_hdl =
513 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
514 
515 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
516 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
517 		adreno_gpu->gmem = ocmem_hdl->len;
518 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
519 				a3xx_gpu->ocmem_base);
520 #endif
521 	}
522 
523 	if (!gpu->aspace) {
524 		/* TODO we think it is possible to configure the GPU to
525 		 * restrict access to VRAM carveout.  But the required
526 		 * registers are unknown.  For now just bail out and
527 		 * limp along with just modesetting.  If it turns out
528 		 * to not be possible to restrict access, then we must
529 		 * implement a cmdstream validator.
530 		 */
531 		dev_err(dev->dev, "No memory protection without IOMMU\n");
532 		ret = -ENXIO;
533 		goto fail;
534 	}
535 
536 	return gpu;
537 
538 fail:
539 	if (a3xx_gpu)
540 		a3xx_destroy(&a3xx_gpu->base.base);
541 
542 	return ERR_PTR(ret);
543 }
544