xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 9659281c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (C) 2013 Red Hat
4  * Author: Rob Clark <robdclark@gmail.com>
5  *
6  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
7  */
8 
9 #include "a3xx_gpu.h"
10 
11 #define A3XX_INT0_MASK \
12 	(A3XX_INT0_RBBM_AHB_ERROR |        \
13 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
14 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
15 	 A3XX_INT0_CP_OPCODE_ERROR |       \
16 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
17 	 A3XX_INT0_CP_HW_FAULT |           \
18 	 A3XX_INT0_CP_IB1_INT |            \
19 	 A3XX_INT0_CP_IB2_INT |            \
20 	 A3XX_INT0_CP_RB_INT |             \
21 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
22 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
23 	 A3XX_INT0_CACHE_FLUSH_TS |        \
24 	 A3XX_INT0_UCHE_OOB_ACCESS)
25 
26 extern bool hang_debug;
27 
28 static void a3xx_dump(struct msm_gpu *gpu);
29 static bool a3xx_idle(struct msm_gpu *gpu);
30 
31 static void a3xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
32 {
33 	struct msm_drm_private *priv = gpu->dev->dev_private;
34 	struct msm_ringbuffer *ring = submit->ring;
35 	unsigned int i;
36 
37 	for (i = 0; i < submit->nr_cmds; i++) {
38 		switch (submit->cmd[i].type) {
39 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
40 			/* ignore IB-targets */
41 			break;
42 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
43 			/* ignore if there has not been a ctx switch: */
44 			if (priv->lastctx == submit->queue->ctx)
45 				break;
46 			fallthrough;
47 		case MSM_SUBMIT_CMD_BUF:
48 			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
49 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
50 			OUT_RING(ring, submit->cmd[i].size);
51 			OUT_PKT2(ring);
52 			break;
53 		}
54 	}
55 
56 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
57 	OUT_RING(ring, submit->seqno);
58 
59 	/* Flush HLSQ lazy updates to make sure there is nothing
60 	 * pending for indirect loads after the timestamp has
61 	 * passed:
62 	 */
63 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
64 	OUT_RING(ring, HLSQ_FLUSH);
65 
66 	/* wait for idle before cache flush/interrupt */
67 	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
68 	OUT_RING(ring, 0x00000000);
69 
70 	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
71 	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
72 	OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
73 	OUT_RING(ring, rbmemptr(ring, fence));
74 	OUT_RING(ring, submit->seqno);
75 
76 #if 0
77 	/* Dummy set-constant to trigger context rollover */
78 	OUT_PKT3(ring, CP_SET_CONSTANT, 2);
79 	OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
80 	OUT_RING(ring, 0x00000000);
81 #endif
82 
83 	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
84 }
85 
86 static bool a3xx_me_init(struct msm_gpu *gpu)
87 {
88 	struct msm_ringbuffer *ring = gpu->rb[0];
89 
90 	OUT_PKT3(ring, CP_ME_INIT, 17);
91 	OUT_RING(ring, 0x000003f7);
92 	OUT_RING(ring, 0x00000000);
93 	OUT_RING(ring, 0x00000000);
94 	OUT_RING(ring, 0x00000000);
95 	OUT_RING(ring, 0x00000080);
96 	OUT_RING(ring, 0x00000100);
97 	OUT_RING(ring, 0x00000180);
98 	OUT_RING(ring, 0x00006600);
99 	OUT_RING(ring, 0x00000150);
100 	OUT_RING(ring, 0x0000014e);
101 	OUT_RING(ring, 0x00000154);
102 	OUT_RING(ring, 0x00000001);
103 	OUT_RING(ring, 0x00000000);
104 	OUT_RING(ring, 0x00000000);
105 	OUT_RING(ring, 0x00000000);
106 	OUT_RING(ring, 0x00000000);
107 	OUT_RING(ring, 0x00000000);
108 
109 	adreno_flush(gpu, ring, REG_AXXX_CP_RB_WPTR);
110 	return a3xx_idle(gpu);
111 }
112 
113 static int a3xx_hw_init(struct msm_gpu *gpu)
114 {
115 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
116 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
117 	uint32_t *ptr, len;
118 	int i, ret;
119 
120 	DBG("%s", gpu->name);
121 
122 	if (adreno_is_a305(adreno_gpu)) {
123 		/* Set up 16 deep read/write request queues: */
124 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
125 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
126 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
127 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
128 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
129 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
130 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
131 		/* Enable WR-REQ: */
132 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
133 		/* Set up round robin arbitration between both AXI ports: */
134 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
135 		/* Set up AOOO: */
136 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
137 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
138 	} else if (adreno_is_a306(adreno_gpu)) {
139 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
140 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
141 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
142 	} else if (adreno_is_a320(adreno_gpu)) {
143 		/* Set up 16 deep read/write request queues: */
144 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
145 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
146 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
147 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
148 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
149 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
150 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
151 		/* Enable WR-REQ: */
152 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
153 		/* Set up round robin arbitration between both AXI ports: */
154 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
155 		/* Set up AOOO: */
156 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
157 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
158 		/* Enable 1K sort: */
159 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
160 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
161 
162 	} else if (adreno_is_a330v2(adreno_gpu)) {
163 		/*
164 		 * Most of the VBIF registers on 8974v2 have the correct
165 		 * values at power on, so we won't modify those if we don't
166 		 * need to
167 		 */
168 		/* Enable 1k sort: */
169 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
170 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
171 		/* Enable WR-REQ: */
172 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
173 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
174 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
175 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
176 
177 	} else if (adreno_is_a330(adreno_gpu)) {
178 		/* Set up 16 deep read/write request queues: */
179 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
180 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
181 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
182 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
183 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
184 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
185 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
186 		/* Enable WR-REQ: */
187 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
188 		/* Set up round robin arbitration between both AXI ports: */
189 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
190 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
191 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
192 		/* Set up AOOO: */
193 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
194 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
195 		/* Enable 1K sort: */
196 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
197 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
198 		/* Disable VBIF clock gating. This is to enable AXI running
199 		 * higher frequency than GPU:
200 		 */
201 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
202 
203 	} else {
204 		BUG();
205 	}
206 
207 	/* Make all blocks contribute to the GPU BUSY perf counter: */
208 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
209 
210 	/* Tune the hystersis counters for SP and CP idle detection: */
211 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
212 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
213 
214 	/* Enable the RBBM error reporting bits.  This lets us get
215 	 * useful information on failure:
216 	 */
217 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
218 
219 	/* Enable AHB error reporting: */
220 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
221 
222 	/* Turn on the power counters: */
223 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
224 
225 	/* Turn on hang detection - this spews a lot of useful information
226 	 * into the RBBM registers on a hang:
227 	 */
228 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
229 
230 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
231 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
232 
233 	/* Enable Clock gating: */
234 	if (adreno_is_a306(adreno_gpu))
235 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
236 	else if (adreno_is_a320(adreno_gpu))
237 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
238 	else if (adreno_is_a330v2(adreno_gpu))
239 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
240 	else if (adreno_is_a330(adreno_gpu))
241 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
242 
243 	if (adreno_is_a330v2(adreno_gpu))
244 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
245 	else if (adreno_is_a330(adreno_gpu))
246 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
247 
248 	/* Set the OCMEM base address for A330, etc */
249 	if (a3xx_gpu->ocmem.hdl) {
250 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
251 			(unsigned int)(a3xx_gpu->ocmem.base >> 14));
252 	}
253 
254 	/* Turn on performance counters: */
255 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
256 
257 	/* Enable the perfcntrs that we use.. */
258 	for (i = 0; i < gpu->num_perfcntrs; i++) {
259 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
260 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
261 	}
262 
263 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
264 
265 	ret = adreno_hw_init(gpu);
266 	if (ret)
267 		return ret;
268 
269 	/*
270 	 * Use the default ringbuffer size and block size but disable the RPTR
271 	 * shadow
272 	 */
273 	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
274 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
275 
276 	/* Set the ringbuffer address */
277 	gpu_write(gpu, REG_AXXX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
278 
279 	/* setup access protection: */
280 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
281 
282 	/* RBBM registers */
283 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
284 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
285 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
286 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
287 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
288 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
289 
290 	/* CP registers */
291 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
292 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
293 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
294 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
295 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
296 
297 	/* RB registers */
298 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
299 
300 	/* VBIF registers */
301 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
302 
303 	/* NOTE: PM4/micro-engine firmware registers look to be the same
304 	 * for a2xx and a3xx.. we could possibly push that part down to
305 	 * adreno_gpu base class.  Or push both PM4 and PFP but
306 	 * parameterize the pfp ucode addr/data registers..
307 	 */
308 
309 	/* Load PM4: */
310 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
311 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
312 	DBG("loading PM4 ucode version: %x", ptr[1]);
313 
314 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
315 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
316 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
317 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
318 	for (i = 1; i < len; i++)
319 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
320 
321 	/* Load PFP: */
322 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
323 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
324 	DBG("loading PFP ucode version: %x", ptr[5]);
325 
326 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
327 	for (i = 1; i < len; i++)
328 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
329 
330 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
331 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
332 			adreno_is_a320(adreno_gpu)) {
333 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
334 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
335 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
336 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
337 	} else if (adreno_is_a330(adreno_gpu)) {
338 		/* NOTE: this (value take from downstream android driver)
339 		 * includes some bits outside of the known bitfields.  But
340 		 * A330 has this "MERCIU queue" thing too, which might
341 		 * explain a new bitfield or reshuffling:
342 		 */
343 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
344 	}
345 
346 	/* clear ME_HALT to start micro engine */
347 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
348 
349 	return a3xx_me_init(gpu) ? 0 : -EINVAL;
350 }
351 
352 static void a3xx_recover(struct msm_gpu *gpu)
353 {
354 	int i;
355 
356 	adreno_dump_info(gpu);
357 
358 	for (i = 0; i < 8; i++) {
359 		printk("CP_SCRATCH_REG%d: %u\n", i,
360 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
361 	}
362 
363 	/* dump registers before resetting gpu, if enabled: */
364 	if (hang_debug)
365 		a3xx_dump(gpu);
366 
367 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
368 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
369 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
370 	adreno_recover(gpu);
371 }
372 
373 static void a3xx_destroy(struct msm_gpu *gpu)
374 {
375 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
376 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
377 
378 	DBG("%s", gpu->name);
379 
380 	adreno_gpu_cleanup(adreno_gpu);
381 
382 	adreno_gpu_ocmem_cleanup(&a3xx_gpu->ocmem);
383 
384 	kfree(a3xx_gpu);
385 }
386 
387 static bool a3xx_idle(struct msm_gpu *gpu)
388 {
389 	/* wait for ringbuffer to drain: */
390 	if (!adreno_idle(gpu, gpu->rb[0]))
391 		return false;
392 
393 	/* then wait for GPU to finish: */
394 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
395 			A3XX_RBBM_STATUS_GPU_BUSY))) {
396 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
397 
398 		/* TODO maybe we need to reset GPU here to recover from hang? */
399 		return false;
400 	}
401 
402 	return true;
403 }
404 
405 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
406 {
407 	uint32_t status;
408 
409 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
410 	DBG("%s: %08x", gpu->name, status);
411 
412 	// TODO
413 
414 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
415 
416 	msm_gpu_retire(gpu);
417 
418 	return IRQ_HANDLED;
419 }
420 
421 static const unsigned int a3xx_registers[] = {
422 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
423 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
424 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
425 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
426 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
427 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
428 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
429 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
430 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
431 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
432 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
433 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
434 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
435 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
436 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
437 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
438 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
439 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
440 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
441 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
442 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
443 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
444 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
445 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2440, 0x2440, 0x2444, 0x2444,
446 	0x2448, 0x244d, 0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470,
447 	0x2472, 0x2472, 0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3,
448 	0x24e4, 0x24ef, 0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e,
449 	0x2510, 0x2511, 0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea,
450 	0x25ec, 0x25ed, 0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617,
451 	0x261a, 0x261a, 0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0,
452 	0x26c4, 0x26ce, 0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9,
453 	0x26ec, 0x26ec, 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
454 	0x300c, 0x300e, 0x301c, 0x301d, 0x302a, 0x302a, 0x302c, 0x302d,
455 	0x3030, 0x3031, 0x3034, 0x3036, 0x303c, 0x303c, 0x305e, 0x305f,
456 	~0   /* sentinel */
457 };
458 
459 /* would be nice to not have to duplicate the _show() stuff with printk(): */
460 static void a3xx_dump(struct msm_gpu *gpu)
461 {
462 	printk("status:   %08x\n",
463 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
464 	adreno_dump(gpu);
465 }
466 
467 static struct msm_gpu_state *a3xx_gpu_state_get(struct msm_gpu *gpu)
468 {
469 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
470 
471 	if (!state)
472 		return ERR_PTR(-ENOMEM);
473 
474 	adreno_gpu_state_get(gpu, state);
475 
476 	state->rbbm_status = gpu_read(gpu, REG_A3XX_RBBM_STATUS);
477 
478 	return state;
479 }
480 
481 static u32 a3xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
482 {
483 	ring->memptrs->rptr = gpu_read(gpu, REG_AXXX_CP_RB_RPTR);
484 	return ring->memptrs->rptr;
485 }
486 
487 static const struct adreno_gpu_funcs funcs = {
488 	.base = {
489 		.get_param = adreno_get_param,
490 		.hw_init = a3xx_hw_init,
491 		.pm_suspend = msm_gpu_pm_suspend,
492 		.pm_resume = msm_gpu_pm_resume,
493 		.recover = a3xx_recover,
494 		.submit = a3xx_submit,
495 		.active_ring = adreno_active_ring,
496 		.irq = a3xx_irq,
497 		.destroy = a3xx_destroy,
498 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
499 		.show = adreno_show,
500 #endif
501 		.gpu_state_get = a3xx_gpu_state_get,
502 		.gpu_state_put = adreno_gpu_state_put,
503 		.create_address_space = adreno_iommu_create_address_space,
504 		.get_rptr = a3xx_get_rptr,
505 	},
506 };
507 
508 static const struct msm_gpu_perfcntr perfcntrs[] = {
509 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
510 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
511 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
512 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
513 };
514 
515 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
516 {
517 	struct a3xx_gpu *a3xx_gpu = NULL;
518 	struct adreno_gpu *adreno_gpu;
519 	struct msm_gpu *gpu;
520 	struct msm_drm_private *priv = dev->dev_private;
521 	struct platform_device *pdev = priv->gpu_pdev;
522 	struct icc_path *ocmem_icc_path;
523 	struct icc_path *icc_path;
524 	int ret;
525 
526 	if (!pdev) {
527 		DRM_DEV_ERROR(dev->dev, "no a3xx device\n");
528 		ret = -ENXIO;
529 		goto fail;
530 	}
531 
532 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
533 	if (!a3xx_gpu) {
534 		ret = -ENOMEM;
535 		goto fail;
536 	}
537 
538 	adreno_gpu = &a3xx_gpu->base;
539 	gpu = &adreno_gpu->base;
540 
541 	gpu->perfcntrs = perfcntrs;
542 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
543 
544 	adreno_gpu->registers = a3xx_registers;
545 
546 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
547 	if (ret)
548 		goto fail;
549 
550 	/* if needed, allocate gmem: */
551 	if (adreno_is_a330(adreno_gpu)) {
552 		ret = adreno_gpu_ocmem_init(&adreno_gpu->base.pdev->dev,
553 					    adreno_gpu, &a3xx_gpu->ocmem);
554 		if (ret)
555 			goto fail;
556 	}
557 
558 	if (!gpu->aspace) {
559 		/* TODO we think it is possible to configure the GPU to
560 		 * restrict access to VRAM carveout.  But the required
561 		 * registers are unknown.  For now just bail out and
562 		 * limp along with just modesetting.  If it turns out
563 		 * to not be possible to restrict access, then we must
564 		 * implement a cmdstream validator.
565 		 */
566 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
567 		if (!allow_vram_carveout) {
568 			ret = -ENXIO;
569 			goto fail;
570 		}
571 	}
572 
573 	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
574 	ret = IS_ERR(icc_path);
575 	if (ret)
576 		goto fail;
577 
578 	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
579 	ret = IS_ERR(ocmem_icc_path);
580 	if (ret) {
581 		/* allow -ENODATA, ocmem icc is optional */
582 		if (ret != -ENODATA)
583 			goto fail;
584 		ocmem_icc_path = NULL;
585 	}
586 
587 
588 	/*
589 	 * Set the ICC path to maximum speed for now by multiplying the fastest
590 	 * frequency by the bus width (8). We'll want to scale this later on to
591 	 * improve battery life.
592 	 */
593 	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
594 	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
595 
596 	return gpu;
597 
598 fail:
599 	if (a3xx_gpu)
600 		a3xx_destroy(&a3xx_gpu->base.base);
601 
602 	return ERR_PTR(ret);
603 }
604