xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision b830f94f)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Red Hat
4  * Author: Rob Clark <robdclark@gmail.com>
5  *
6  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7  */
8 
9 #ifdef CONFIG_MSM_OCMEM
10 #  include <mach/ocmem.h>
11 #endif
12 
13 #include "a3xx_gpu.h"
14 
15 #define A3XX_INT0_MASK \
16 	(A3XX_INT0_RBBM_AHB_ERROR |        \
17 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
18 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
19 	 A3XX_INT0_CP_OPCODE_ERROR |       \
20 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
21 	 A3XX_INT0_CP_HW_FAULT |           \
22 	 A3XX_INT0_CP_IB1_INT |            \
23 	 A3XX_INT0_CP_IB2_INT |            \
24 	 A3XX_INT0_CP_RB_INT |             \
25 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
26 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
27 	 A3XX_INT0_CACHE_FLUSH_TS |        \
28 	 A3XX_INT0_UCHE_OOB_ACCESS)
29 
30 extern bool hang_debug;
31 
32 static void a3xx_dump(struct msm_gpu *gpu);
33 static bool a3xx_idle(struct msm_gpu *gpu);
34 
35 static bool a3xx_me_init(struct msm_gpu *gpu)
36 {
37 	struct msm_ringbuffer *ring = gpu->rb[0];
38 
39 	OUT_PKT3(ring, CP_ME_INIT, 17);
40 	OUT_RING(ring, 0x000003f7);
41 	OUT_RING(ring, 0x00000000);
42 	OUT_RING(ring, 0x00000000);
43 	OUT_RING(ring, 0x00000000);
44 	OUT_RING(ring, 0x00000080);
45 	OUT_RING(ring, 0x00000100);
46 	OUT_RING(ring, 0x00000180);
47 	OUT_RING(ring, 0x00006600);
48 	OUT_RING(ring, 0x00000150);
49 	OUT_RING(ring, 0x0000014e);
50 	OUT_RING(ring, 0x00000154);
51 	OUT_RING(ring, 0x00000001);
52 	OUT_RING(ring, 0x00000000);
53 	OUT_RING(ring, 0x00000000);
54 	OUT_RING(ring, 0x00000000);
55 	OUT_RING(ring, 0x00000000);
56 	OUT_RING(ring, 0x00000000);
57 
58 	gpu->funcs->flush(gpu, ring);
59 	return a3xx_idle(gpu);
60 }
61 
62 static int a3xx_hw_init(struct msm_gpu *gpu)
63 {
64 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
65 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
66 	uint32_t *ptr, len;
67 	int i, ret;
68 
69 	DBG("%s", gpu->name);
70 
71 	if (adreno_is_a305(adreno_gpu)) {
72 		/* Set up 16 deep read/write request queues: */
73 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
74 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
75 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
76 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
77 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
78 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
79 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
80 		/* Enable WR-REQ: */
81 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
82 		/* Set up round robin arbitration between both AXI ports: */
83 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
84 		/* Set up AOOO: */
85 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
86 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
87 	} else if (adreno_is_a306(adreno_gpu)) {
88 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
89 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
90 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
91 	} else if (adreno_is_a320(adreno_gpu)) {
92 		/* Set up 16 deep read/write request queues: */
93 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
94 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
95 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
96 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
97 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
98 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
99 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
100 		/* Enable WR-REQ: */
101 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
102 		/* Set up round robin arbitration between both AXI ports: */
103 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
104 		/* Set up AOOO: */
105 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
106 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
107 		/* Enable 1K sort: */
108 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
109 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
110 
111 	} else if (adreno_is_a330v2(adreno_gpu)) {
112 		/*
113 		 * Most of the VBIF registers on 8974v2 have the correct
114 		 * values at power on, so we won't modify those if we don't
115 		 * need to
116 		 */
117 		/* Enable 1k sort: */
118 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
119 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
120 		/* Enable WR-REQ: */
121 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
122 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
123 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
124 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
125 
126 	} else if (adreno_is_a330(adreno_gpu)) {
127 		/* Set up 16 deep read/write request queues: */
128 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
129 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
130 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
131 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
132 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
133 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
134 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
135 		/* Enable WR-REQ: */
136 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
137 		/* Set up round robin arbitration between both AXI ports: */
138 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
139 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
140 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
141 		/* Set up AOOO: */
142 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
143 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
144 		/* Enable 1K sort: */
145 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
146 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
147 		/* Disable VBIF clock gating. This is to enable AXI running
148 		 * higher frequency than GPU:
149 		 */
150 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
151 
152 	} else {
153 		BUG();
154 	}
155 
156 	/* Make all blocks contribute to the GPU BUSY perf counter: */
157 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
158 
159 	/* Tune the hystersis counters for SP and CP idle detection: */
160 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
161 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
162 
163 	/* Enable the RBBM error reporting bits.  This lets us get
164 	 * useful information on failure:
165 	 */
166 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
167 
168 	/* Enable AHB error reporting: */
169 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
170 
171 	/* Turn on the power counters: */
172 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
173 
174 	/* Turn on hang detection - this spews a lot of useful information
175 	 * into the RBBM registers on a hang:
176 	 */
177 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
178 
179 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
180 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
181 
182 	/* Enable Clock gating: */
183 	if (adreno_is_a306(adreno_gpu))
184 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
185 	else if (adreno_is_a320(adreno_gpu))
186 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
187 	else if (adreno_is_a330v2(adreno_gpu))
188 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
189 	else if (adreno_is_a330(adreno_gpu))
190 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
191 
192 	if (adreno_is_a330v2(adreno_gpu))
193 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
194 	else if (adreno_is_a330(adreno_gpu))
195 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
196 
197 	/* Set the OCMEM base address for A330, etc */
198 	if (a3xx_gpu->ocmem_hdl) {
199 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
200 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
201 	}
202 
203 	/* Turn on performance counters: */
204 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
205 
206 	/* Enable the perfcntrs that we use.. */
207 	for (i = 0; i < gpu->num_perfcntrs; i++) {
208 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
209 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
210 	}
211 
212 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
213 
214 	ret = adreno_hw_init(gpu);
215 	if (ret)
216 		return ret;
217 
218 	/* setup access protection: */
219 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
220 
221 	/* RBBM registers */
222 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
223 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
224 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
225 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
226 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
227 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
228 
229 	/* CP registers */
230 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
231 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
232 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
233 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
235 
236 	/* RB registers */
237 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
238 
239 	/* VBIF registers */
240 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
241 
242 	/* NOTE: PM4/micro-engine firmware registers look to be the same
243 	 * for a2xx and a3xx.. we could possibly push that part down to
244 	 * adreno_gpu base class.  Or push both PM4 and PFP but
245 	 * parameterize the pfp ucode addr/data registers..
246 	 */
247 
248 	/* Load PM4: */
249 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
250 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
251 	DBG("loading PM4 ucode version: %x", ptr[1]);
252 
253 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
254 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
255 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
256 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
257 	for (i = 1; i < len; i++)
258 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
259 
260 	/* Load PFP: */
261 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
262 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
263 	DBG("loading PFP ucode version: %x", ptr[5]);
264 
265 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
266 	for (i = 1; i < len; i++)
267 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
268 
269 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
270 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
271 			adreno_is_a320(adreno_gpu)) {
272 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
273 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
274 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
275 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
276 	} else if (adreno_is_a330(adreno_gpu)) {
277 		/* NOTE: this (value take from downstream android driver)
278 		 * includes some bits outside of the known bitfields.  But
279 		 * A330 has this "MERCIU queue" thing too, which might
280 		 * explain a new bitfield or reshuffling:
281 		 */
282 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
283 	}
284 
285 	/* clear ME_HALT to start micro engine */
286 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
287 
288 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
289 }
290 
291 static void a3xx_recover(struct msm_gpu *gpu)
292 {
293 	int i;
294 
295 	adreno_dump_info(gpu);
296 
297 	for (i = 0; i < 8; i++) {
298 		printk("CP_SCRATCH_REG%d: %u\n", i,
299 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
300 	}
301 
302 	/* dump registers before resetting gpu, if enabled: */
303 	if (hang_debug)
304 		a3xx_dump(gpu);
305 
306 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
307 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
308 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
309 	adreno_recover(gpu);
310 }
311 
312 static void a3xx_destroy(struct msm_gpu *gpu)
313 {
314 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
315 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
316 
317 	DBG("%s", gpu->name);
318 
319 	adreno_gpu_cleanup(adreno_gpu);
320 
321 #ifdef CONFIG_MSM_OCMEM
322 	if (a3xx_gpu->ocmem_base)
323 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
324 #endif
325 
326 	kfree(a3xx_gpu);
327 }
328 
329 static bool a3xx_idle(struct msm_gpu *gpu)
330 {
331 	/* wait for ringbuffer to drain: */
332 	if (!adreno_idle(gpu, gpu->rb[0]))
333 		return false;
334 
335 	/* then wait for GPU to finish: */
336 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
337 			A3XX_RBBM_STATUS_GPU_BUSY))) {
338 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
339 
340 		/* TODO maybe we need to reset GPU here to recover from hang? */
341 		return false;
342 	}
343 
344 	return true;
345 }
346 
347 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
348 {
349 	uint32_t status;
350 
351 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
352 	DBG("%s: %08x", gpu->name, status);
353 
354 	// TODO
355 
356 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
357 
358 	msm_gpu_retire(gpu);
359 
360 	return IRQ_HANDLED;
361 }
362 
363 static const unsigned int a3xx_registers[] = {
364 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
365 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
366 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
367 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
368 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
369 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
370 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
371 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
372 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
373 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
374 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
375 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
376 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
377 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
378 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
379 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
380 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
381 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
382 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
383 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
384 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
385 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
386 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
387 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
388 	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
389 	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
390 	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
391 	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
392 	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
393 	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
394 	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
395 	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
396 	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
397 	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
398 	~0   /* sentinel */
399 };
400 
401 /* would be nice to not have to duplicate the _show() stuff with printk(): */
402 static void a3xx_dump(struct msm_gpu *gpu)
403 {
404 	printk("status:   %08x\n",
405 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
406 	adreno_dump(gpu);
407 }
408 
409 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
410 {
411 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
412 
413 	if (!state)
414 		return ERR_PTR(-ENOMEM);
415 
416 	adreno_gpu_state_get(gpu, state);
417 
418 	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
419 
420 	return state;
421 }
422 
423 /* Register offset defines for A3XX */
424 static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
425 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
426 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
427 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
428 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
429 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
430 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
431 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
432 };
433 
434 static const struct adreno_gpu_funcs funcs = {
435 	.base = {
436 		.get_param = adreno_get_param,
437 		.hw_init = a3xx_hw_init,
438 		.pm_suspend = msm_gpu_pm_suspend,
439 		.pm_resume = msm_gpu_pm_resume,
440 		.recover = a3xx_recover,
441 		.submit = adreno_submit,
442 		.flush = adreno_flush,
443 		.active_ring = adreno_active_ring,
444 		.irq = a3xx_irq,
445 		.destroy = a3xx_destroy,
446 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
447 		.show = adreno_show,
448 #endif
449 		.gpu_state_get = a3xx_gpu_state_get,
450 		.gpu_state_put = adreno_gpu_state_put,
451 	},
452 };
453 
454 static const struct msm_gpu_perfcntr perfcntrs[] = {
455 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
456 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
457 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
458 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
459 };
460 
461 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
462 {
463 	struct a3xx_gpu *a3xx_gpu = NULL;
464 	struct adreno_gpu *adreno_gpu;
465 	struct msm_gpu *gpu;
466 	struct msm_drm_private *priv = dev->dev_private;
467 	struct platform_device *pdev = priv->gpu_pdev;
468 	int ret;
469 
470 	if (!pdev) {
471 		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
472 		ret = -ENXIO;
473 		goto fail;
474 	}
475 
476 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
477 	if (!a3xx_gpu) {
478 		ret = -ENOMEM;
479 		goto fail;
480 	}
481 
482 	adreno_gpu = &a3xx_gpu->base;
483 	gpu = &adreno_gpu->base;
484 
485 	gpu->perfcntrs = perfcntrs;
486 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
487 
488 	adreno_gpu->registers = a3xx_registers;
489 	adreno_gpu->reg_offsets = a3xx_register_offsets;
490 
491 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
492 	if (ret)
493 		goto fail;
494 
495 	/* if needed, allocate gmem: */
496 	if (adreno_is_a330(adreno_gpu)) {
497 #ifdef CONFIG_MSM_OCMEM
498 		/* TODO this is different/missing upstream: */
499 		struct ocmem_buf *ocmem_hdl =
500 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
501 
502 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
503 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
504 		adreno_gpu->gmem = ocmem_hdl->len;
505 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
506 				a3xx_gpu->ocmem_base);
507 #endif
508 	}
509 
510 	if (!gpu->aspace) {
511 		/* TODO we think it is possible to configure the GPU to
512 		 * restrict access to VRAM carveout.  But the required
513 		 * registers are unknown.  For now just bail out and
514 		 * limp along with just modesetting.  If it turns out
515 		 * to not be possible to restrict access, then we must
516 		 * implement a cmdstream validator.
517 		 */
518 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
519 		ret = -ENXIO;
520 		goto fail;
521 	}
522 
523 	return gpu;
524 
525 fail:
526 	if (a3xx_gpu)
527 		a3xx_destroy(&a3xx_gpu->base.base);
528 
529 	return ERR_PTR(ret);
530 }
531