xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision f97cee494dc92395a668445bcd24d34c89f4ff8c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Red Hat
4  * Author: Rob Clark <robdclark@gmail.com>
5  *
6  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7  */
8 
9 #include "a3xx_gpu.h"
10 
11 #define A3XX_INT0_MASK \
12 	(A3XX_INT0_RBBM_AHB_ERROR |        \
13 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
14 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
15 	 A3XX_INT0_CP_OPCODE_ERROR |       \
16 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
17 	 A3XX_INT0_CP_HW_FAULT |           \
18 	 A3XX_INT0_CP_IB1_INT |            \
19 	 A3XX_INT0_CP_IB2_INT |            \
20 	 A3XX_INT0_CP_RB_INT |             \
21 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
22 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
23 	 A3XX_INT0_CACHE_FLUSH_TS |        \
24 	 A3XX_INT0_UCHE_OOB_ACCESS)
25 
26 extern bool hang_debug;
27 
28 static void a3xx_dump(struct msm_gpu *gpu);
29 static bool a3xx_idle(struct msm_gpu *gpu);
30 
31 static bool a3xx_me_init(struct msm_gpu *gpu)
32 {
33 	struct msm_ringbuffer *ring = gpu->rb[0];
34 
35 	OUT_PKT3(ring, CP_ME_INIT, 17);
36 	OUT_RING(ring, 0x000003f7);
37 	OUT_RING(ring, 0x00000000);
38 	OUT_RING(ring, 0x00000000);
39 	OUT_RING(ring, 0x00000000);
40 	OUT_RING(ring, 0x00000080);
41 	OUT_RING(ring, 0x00000100);
42 	OUT_RING(ring, 0x00000180);
43 	OUT_RING(ring, 0x00006600);
44 	OUT_RING(ring, 0x00000150);
45 	OUT_RING(ring, 0x0000014e);
46 	OUT_RING(ring, 0x00000154);
47 	OUT_RING(ring, 0x00000001);
48 	OUT_RING(ring, 0x00000000);
49 	OUT_RING(ring, 0x00000000);
50 	OUT_RING(ring, 0x00000000);
51 	OUT_RING(ring, 0x00000000);
52 	OUT_RING(ring, 0x00000000);
53 
54 	gpu->funcs->flush(gpu, ring);
55 	return a3xx_idle(gpu);
56 }
57 
58 static int a3xx_hw_init(struct msm_gpu *gpu)
59 {
60 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
61 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
62 	uint32_t *ptr, len;
63 	int i, ret;
64 
65 	DBG("%s", gpu->name);
66 
67 	if (adreno_is_a305(adreno_gpu)) {
68 		/* Set up 16 deep read/write request queues: */
69 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
70 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
71 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
72 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
73 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
74 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
75 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
76 		/* Enable WR-REQ: */
77 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
78 		/* Set up round robin arbitration between both AXI ports: */
79 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
80 		/* Set up AOOO: */
81 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
82 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
83 	} else if (adreno_is_a306(adreno_gpu)) {
84 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
85 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
86 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
87 	} else if (adreno_is_a320(adreno_gpu)) {
88 		/* Set up 16 deep read/write request queues: */
89 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
90 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
91 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
92 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
93 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
94 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
95 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
96 		/* Enable WR-REQ: */
97 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
98 		/* Set up round robin arbitration between both AXI ports: */
99 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
100 		/* Set up AOOO: */
101 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
102 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
103 		/* Enable 1K sort: */
104 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
105 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
106 
107 	} else if (adreno_is_a330v2(adreno_gpu)) {
108 		/*
109 		 * Most of the VBIF registers on 8974v2 have the correct
110 		 * values at power on, so we won't modify those if we don't
111 		 * need to
112 		 */
113 		/* Enable 1k sort: */
114 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
115 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
116 		/* Enable WR-REQ: */
117 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
118 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
119 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
120 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
121 
122 	} else if (adreno_is_a330(adreno_gpu)) {
123 		/* Set up 16 deep read/write request queues: */
124 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
125 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
126 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
127 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
128 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
129 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
130 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
131 		/* Enable WR-REQ: */
132 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
133 		/* Set up round robin arbitration between both AXI ports: */
134 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
135 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
136 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
137 		/* Set up AOOO: */
138 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
139 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
140 		/* Enable 1K sort: */
141 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
142 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
143 		/* Disable VBIF clock gating. This is to enable AXI running
144 		 * higher frequency than GPU:
145 		 */
146 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
147 
148 	} else {
149 		BUG();
150 	}
151 
152 	/* Make all blocks contribute to the GPU BUSY perf counter: */
153 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
154 
155 	/* Tune the hystersis counters for SP and CP idle detection: */
156 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
157 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
158 
159 	/* Enable the RBBM error reporting bits.  This lets us get
160 	 * useful information on failure:
161 	 */
162 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
163 
164 	/* Enable AHB error reporting: */
165 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
166 
167 	/* Turn on the power counters: */
168 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
169 
170 	/* Turn on hang detection - this spews a lot of useful information
171 	 * into the RBBM registers on a hang:
172 	 */
173 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
174 
175 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
176 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
177 
178 	/* Enable Clock gating: */
179 	if (adreno_is_a306(adreno_gpu))
180 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
181 	else if (adreno_is_a320(adreno_gpu))
182 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
183 	else if (adreno_is_a330v2(adreno_gpu))
184 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
185 	else if (adreno_is_a330(adreno_gpu))
186 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
187 
188 	if (adreno_is_a330v2(adreno_gpu))
189 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
190 	else if (adreno_is_a330(adreno_gpu))
191 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
192 
193 	/* Set the OCMEM base address for A330, etc */
194 	if (a3xx_gpu->ocmem.hdl) {
195 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
196 			(unsigned int)(a3xx_gpu->ocmem.base >> 14));
197 	}
198 
199 	/* Turn on performance counters: */
200 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
201 
202 	/* Enable the perfcntrs that we use.. */
203 	for (i = 0; i < gpu->num_perfcntrs; i++) {
204 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
205 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
206 	}
207 
208 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
209 
210 	ret = adreno_hw_init(gpu);
211 	if (ret)
212 		return ret;
213 
214 	/*
215 	 * Use the default ringbuffer size and block size but disable the RPTR
216 	 * shadow
217 	 */
218 	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
219 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
220 
221 	/* Set the ringbuffer address */
222 	gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
223 
224 	/* setup access protection: */
225 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
226 
227 	/* RBBM registers */
228 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
229 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
230 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
231 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
232 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
233 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
234 
235 	/* CP registers */
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
237 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
238 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
239 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
240 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
241 
242 	/* RB registers */
243 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
244 
245 	/* VBIF registers */
246 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
247 
248 	/* NOTE: PM4/micro-engine firmware registers look to be the same
249 	 * for a2xx and a3xx.. we could possibly push that part down to
250 	 * adreno_gpu base class.  Or push both PM4 and PFP but
251 	 * parameterize the pfp ucode addr/data registers..
252 	 */
253 
254 	/* Load PM4: */
255 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
256 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
257 	DBG("loading PM4 ucode version: %x", ptr[1]);
258 
259 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
260 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
261 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
262 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
263 	for (i = 1; i < len; i++)
264 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
265 
266 	/* Load PFP: */
267 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
268 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
269 	DBG("loading PFP ucode version: %x", ptr[5]);
270 
271 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
272 	for (i = 1; i < len; i++)
273 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
274 
275 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
276 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
277 			adreno_is_a320(adreno_gpu)) {
278 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
279 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
280 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
281 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
282 	} else if (adreno_is_a330(adreno_gpu)) {
283 		/* NOTE: this (value take from downstream android driver)
284 		 * includes some bits outside of the known bitfields.  But
285 		 * A330 has this "MERCIU queue" thing too, which might
286 		 * explain a new bitfield or reshuffling:
287 		 */
288 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
289 	}
290 
291 	/* clear ME_HALT to start micro engine */
292 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
293 
294 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
295 }
296 
297 static void a3xx_recover(struct msm_gpu *gpu)
298 {
299 	int i;
300 
301 	adreno_dump_info(gpu);
302 
303 	for (i = 0; i < 8; i++) {
304 		printk("CP_SCRATCH_REG%d: %u\n", i,
305 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
306 	}
307 
308 	/* dump registers before resetting gpu, if enabled: */
309 	if (hang_debug)
310 		a3xx_dump(gpu);
311 
312 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
313 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
314 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
315 	adreno_recover(gpu);
316 }
317 
318 static void a3xx_destroy(struct msm_gpu *gpu)
319 {
320 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
321 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
322 
323 	DBG("%s", gpu->name);
324 
325 	adreno_gpu_cleanup(adreno_gpu);
326 
327 	adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
328 
329 	kfree(a3xx_gpu);
330 }
331 
332 static bool a3xx_idle(struct msm_gpu *gpu)
333 {
334 	/* wait for ringbuffer to drain: */
335 	if (!adreno_idle(gpu, gpu->rb[0]))
336 		return false;
337 
338 	/* then wait for GPU to finish: */
339 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
340 			A3XX_RBBM_STATUS_GPU_BUSY))) {
341 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
342 
343 		/* TODO maybe we need to reset GPU here to recover from hang? */
344 		return false;
345 	}
346 
347 	return true;
348 }
349 
350 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
351 {
352 	uint32_t status;
353 
354 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
355 	DBG("%s: %08x", gpu->name, status);
356 
357 	// TODO
358 
359 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
360 
361 	msm_gpu_retire(gpu);
362 
363 	return IRQ_HANDLED;
364 }
365 
366 static const unsigned int a3xx_registers[] = {
367 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
368 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
369 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
370 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
371 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
372 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
373 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
374 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
375 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
376 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
377 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
378 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
379 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
380 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
381 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
382 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
383 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
384 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
385 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
386 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
387 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
388 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
389 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
390 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
391 	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
392 	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
393 	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
394 	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
395 	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
396 	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
397 	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
398 	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
399 	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
400 	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
401 	~0   /* sentinel */
402 };
403 
404 /* would be nice to not have to duplicate the _show() stuff with printk(): */
405 static void a3xx_dump(struct msm_gpu *gpu)
406 {
407 	printk("status:   %08x\n",
408 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
409 	adreno_dump(gpu);
410 }
411 
412 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
413 {
414 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
415 
416 	if (!state)
417 		return ERR_PTR(-ENOMEM);
418 
419 	adreno_gpu_state_get(gpu, state);
420 
421 	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
422 
423 	return state;
424 }
425 
426 /* Register offset defines for A3XX */
427 static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
428 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
429 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
430 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
431 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
432 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
433 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
434 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
435 };
436 
437 static const struct adreno_gpu_funcs funcs = {
438 	.base = {
439 		.get_param = adreno_get_param,
440 		.hw_init = a3xx_hw_init,
441 		.pm_suspend = msm_gpu_pm_suspend,
442 		.pm_resume = msm_gpu_pm_resume,
443 		.recover = a3xx_recover,
444 		.submit = adreno_submit,
445 		.flush = adreno_flush,
446 		.active_ring = adreno_active_ring,
447 		.irq = a3xx_irq,
448 		.destroy = a3xx_destroy,
449 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
450 		.show = adreno_show,
451 #endif
452 		.gpu_state_get = a3xx_gpu_state_get,
453 		.gpu_state_put = adreno_gpu_state_put,
454 		.create_address_space = adreno_iommu_create_address_space,
455 	},
456 };
457 
458 static const struct msm_gpu_perfcntr perfcntrs[] = {
459 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
460 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
461 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
462 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
463 };
464 
465 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
466 {
467 	struct a3xx_gpu *a3xx_gpu = NULL;
468 	struct adreno_gpu *adreno_gpu;
469 	struct msm_gpu *gpu;
470 	struct msm_drm_private *priv = dev->dev_private;
471 	struct platform_device *pdev = priv->gpu_pdev;
472 	int ret;
473 
474 	if (!pdev) {
475 		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
476 		ret = -ENXIO;
477 		goto fail;
478 	}
479 
480 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
481 	if (!a3xx_gpu) {
482 		ret = -ENOMEM;
483 		goto fail;
484 	}
485 
486 	adreno_gpu = &a3xx_gpu->base;
487 	gpu = &adreno_gpu->base;
488 
489 	gpu->perfcntrs = perfcntrs;
490 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
491 
492 	adreno_gpu->registers = a3xx_registers;
493 	adreno_gpu->reg_offsets = a3xx_register_offsets;
494 
495 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
496 	if (ret)
497 		goto fail;
498 
499 	/* if needed, allocate gmem: */
500 	if (adreno_is_a330(adreno_gpu)) {
501 		ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
502 					    adreno_gpu, &a3xx_gpu->ocmem);
503 		if (ret)
504 			goto fail;
505 	}
506 
507 	if (!gpu->aspace) {
508 		/* TODO we think it is possible to configure the GPU to
509 		 * restrict access to VRAM carveout.  But the required
510 		 * registers are unknown.  For now just bail out and
511 		 * limp along with just modesetting.  If it turns out
512 		 * to not be possible to restrict access, then we must
513 		 * implement a cmdstream validator.
514 		 */
515 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
516 		ret = -ENXIO;
517 		goto fail;
518 	}
519 
520 	/*
521 	 * Set the ICC path to maximum speed for now by multiplying the fastest
522 	 * frequency by the bus width (8). We'll want to scale this later on to
523 	 * improve battery life.
524 	 */
525 	icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
526 	icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
527 
528 	return gpu;
529 
530 fail:
531 	if (a3xx_gpu)
532 		a3xx_destroy(&a3xx_gpu->base.base);
533 
534 	return ERR_PTR(ret);
535 }
536