xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision 6417f03132a6952cd17ddd8eaddbac92b61b17e0)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3  */
4 #include "a4xx_gpu.h"
5 
6 #define A4XX_INT0_MASK \
7 	(A4XX_INT0_RBBM_AHB_ERROR |        \
8 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
10 	 A4XX_INT0_CP_OPCODE_ERROR |       \
11 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12 	 A4XX_INT0_CP_HW_FAULT |           \
13 	 A4XX_INT0_CP_IB1_INT |            \
14 	 A4XX_INT0_CP_IB2_INT |            \
15 	 A4XX_INT0_CP_RB_INT |             \
16 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
17 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
18 	 A4XX_INT0_CACHE_FLUSH_TS |        \
19 	 A4XX_INT0_UCHE_OOB_ACCESS)
20 
21 extern bool hang_debug;
22 static void a4xx_dump(struct msm_gpu *gpu);
23 static bool a4xx_idle(struct msm_gpu *gpu);
24 
25 static void a4xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
26 {
27 	struct msm_drm_private *priv = gpu->dev->dev_private;
28 	struct msm_ringbuffer *ring = submit->ring;
29 	unsigned int i;
30 
31 	for (i = 0; i < submit->nr_cmds; i++) {
32 		switch (submit->cmd[i].type) {
33 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
34 			/* ignore IB-targets */
35 			break;
36 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
37 			/* ignore if there has not been a ctx switch: */
38 			if (priv->lastctx == submit->queue->ctx)
39 				break;
40 			fallthrough;
41 		case MSM_SUBMIT_CMD_BUF:
42 			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFE, 2);
43 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
44 			OUT_RING(ring, submit->cmd[i].size);
45 			OUT_PKT2(ring);
46 			break;
47 		}
48 	}
49 
50 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
51 	OUT_RING(ring, submit->seqno);
52 
53 	/* Flush HLSQ lazy updates to make sure there is nothing
54 	 * pending for indirect loads after the timestamp has
55 	 * passed:
56 	 */
57 	OUT_PKT3(ring, CP_EVENT_WRITE, 1);
58 	OUT_RING(ring, HLSQ_FLUSH);
59 
60 	/* wait for idle before cache flush/interrupt */
61 	OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
62 	OUT_RING(ring, 0x00000000);
63 
64 	/* BIT(31) of CACHE_FLUSH_TS triggers CACHE_FLUSH_TS IRQ from GPU */
65 	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
66 	OUT_RING(ring, CACHE_FLUSH_TS | BIT(31));
67 	OUT_RING(ring, rbmemptr(ring, fence));
68 	OUT_RING(ring, submit->seqno);
69 
70 	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
71 }
72 
73 /*
74  * a4xx_enable_hwcg() - Program the clock control registers
75  * @device: The adreno device pointer
76  */
77 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
78 {
79 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
80 	unsigned int i;
81 	for (i = 0; i < 4; i++)
82 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
83 	for (i = 0; i < 4; i++)
84 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
85 	for (i = 0; i < 4; i++)
86 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
87 	for (i = 0; i < 4; i++)
88 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
89 	for (i = 0; i < 4; i++)
90 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
91 	for (i = 0; i < 4; i++)
92 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
93 	for (i = 0; i < 4; i++)
94 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
95 	for (i = 0; i < 4; i++)
96 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
97 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
98 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
99 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
100 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
101 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
102 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
103 	for (i = 0; i < 4; i++)
104 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
105 
106 	/* Disable L1 clocking in A420 due to CCU issues with it */
107 	for (i = 0; i < 4; i++) {
108 		if (adreno_is_a420(adreno_gpu)) {
109 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
110 					0x00002020);
111 		} else {
112 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
113 					0x00022020);
114 		}
115 	}
116 
117 	/* No CCU for A405 */
118 	if (!adreno_is_a405(adreno_gpu)) {
119 		for (i = 0; i < 4; i++) {
120 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
121 					0x00000922);
122 		}
123 
124 		for (i = 0; i < 4; i++) {
125 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
126 					0x00000000);
127 		}
128 
129 		for (i = 0; i < 4; i++) {
130 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
131 					0x00000001);
132 		}
133 	}
134 
135 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
136 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
137 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
138 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
139 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
140 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
141 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
142 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
143 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
144 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
145 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
146 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
147 	/* Early A430's have a timing issue with SP/TP power collapse;
148 	   disabling HW clock gating prevents it. */
149 	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
150 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
151 	else
152 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
153 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
154 }
155 
156 
157 static bool a4xx_me_init(struct msm_gpu *gpu)
158 {
159 	struct msm_ringbuffer *ring = gpu->rb[0];
160 
161 	OUT_PKT3(ring, CP_ME_INIT, 17);
162 	OUT_RING(ring, 0x000003f7);
163 	OUT_RING(ring, 0x00000000);
164 	OUT_RING(ring, 0x00000000);
165 	OUT_RING(ring, 0x00000000);
166 	OUT_RING(ring, 0x00000080);
167 	OUT_RING(ring, 0x00000100);
168 	OUT_RING(ring, 0x00000180);
169 	OUT_RING(ring, 0x00006600);
170 	OUT_RING(ring, 0x00000150);
171 	OUT_RING(ring, 0x0000014e);
172 	OUT_RING(ring, 0x00000154);
173 	OUT_RING(ring, 0x00000001);
174 	OUT_RING(ring, 0x00000000);
175 	OUT_RING(ring, 0x00000000);
176 	OUT_RING(ring, 0x00000000);
177 	OUT_RING(ring, 0x00000000);
178 	OUT_RING(ring, 0x00000000);
179 
180 	adreno_flush(gpu, ring, REG_A4XX_CP_RB_WPTR);
181 	return a4xx_idle(gpu);
182 }
183 
184 static int a4xx_hw_init(struct msm_gpu *gpu)
185 {
186 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
187 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
188 	uint32_t *ptr, len;
189 	int i, ret;
190 
191 	if (adreno_is_a405(adreno_gpu)) {
192 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
193 	} else if (adreno_is_a420(adreno_gpu)) {
194 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
195 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
196 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
197 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
198 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
199 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
200 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
201 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
202 	} else if (adreno_is_a430(adreno_gpu)) {
203 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
204 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
205 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
206 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
207 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
208 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
209 	} else {
210 		BUG();
211 	}
212 
213 	/* Make all blocks contribute to the GPU BUSY perf counter */
214 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
215 
216 	/* Tune the hystersis counters for SP and CP idle detection */
217 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
218 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
219 
220 	if (adreno_is_a430(adreno_gpu)) {
221 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
222 	}
223 
224 	 /* Enable the RBBM error reporting bits */
225 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
226 
227 	/* Enable AHB error reporting*/
228 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
229 
230 	/* Enable power counters*/
231 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
232 
233 	/*
234 	 * Turn on hang detection - this spews a lot of useful information
235 	 * into the RBBM registers on a hang:
236 	 */
237 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
238 			(1 << 30) | 0xFFFF);
239 
240 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
241 			(unsigned int)(a4xx_gpu->ocmem.base >> 14));
242 
243 	/* Turn on performance counters: */
244 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
245 
246 	/* use the first CP counter for timestamp queries.. userspace may set
247 	 * this as well but it selects the same counter/countable:
248 	 */
249 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
250 
251 	if (adreno_is_a430(adreno_gpu))
252 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
253 
254 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
255 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
256 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
257 
258 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
259 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
260 
261 	/* On A430 enable SP regfile sleep for power savings */
262 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
263 	if (!adreno_is_a420(adreno_gpu)) {
264 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
265 			0x00000441);
266 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
267 			0x00000441);
268 	}
269 
270 	a4xx_enable_hwcg(gpu);
271 
272 	/*
273 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
274 	 * due to timing issue with HLSQ_TP_CLK_EN
275 	 */
276 	if (adreno_is_a420(adreno_gpu)) {
277 		unsigned int val;
278 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
279 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
280 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
281 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
282 	}
283 
284 	/* setup access protection: */
285 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
286 
287 	/* RBBM registers */
288 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
289 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
290 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
291 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
292 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
293 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
294 
295 	/* CP registers */
296 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
297 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
298 
299 
300 	/* RB registers */
301 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
302 
303 	/* HLSQ registers */
304 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
305 
306 	/* VPC registers */
307 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
308 
309 	/* SMMU registers */
310 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
311 
312 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
313 
314 	ret = adreno_hw_init(gpu);
315 	if (ret)
316 		return ret;
317 
318 	/*
319 	 * Use the default ringbuffer size and block size but disable the RPTR
320 	 * shadow
321 	 */
322 	gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
323 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
324 
325 	/* Set the ringbuffer address */
326 	gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
327 
328 	/* Load PM4: */
329 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
330 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
331 	DBG("loading PM4 ucode version: %u", ptr[0]);
332 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
333 	for (i = 1; i < len; i++)
334 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
335 
336 	/* Load PFP: */
337 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
338 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
339 	DBG("loading PFP ucode version: %u", ptr[0]);
340 
341 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
342 	for (i = 1; i < len; i++)
343 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
344 
345 	/* clear ME_HALT to start micro engine */
346 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
347 
348 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
349 }
350 
351 static void a4xx_recover(struct msm_gpu *gpu)
352 {
353 	int i;
354 
355 	adreno_dump_info(gpu);
356 
357 	for (i = 0; i < 8; i++) {
358 		printk("CP_SCRATCH_REG%d: %u\n", i,
359 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
360 	}
361 
362 	/* dump registers before resetting gpu, if enabled: */
363 	if (hang_debug)
364 		a4xx_dump(gpu);
365 
366 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
367 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
368 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
369 	adreno_recover(gpu);
370 }
371 
372 static void a4xx_destroy(struct msm_gpu *gpu)
373 {
374 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
376 
377 	DBG("%s", gpu->name);
378 
379 	adreno_gpu_cleanup(adreno_gpu);
380 
381 	adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
382 
383 	kfree(a4xx_gpu);
384 }
385 
386 static bool a4xx_idle(struct msm_gpu *gpu)
387 {
388 	/* wait for ringbuffer to drain: */
389 	if (!adreno_idle(gpu, gpu->rb[0]))
390 		return false;
391 
392 	/* then wait for GPU to finish: */
393 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
394 					A4XX_RBBM_STATUS_GPU_BUSY))) {
395 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
396 		/* TODO maybe we need to reset GPU here to recover from hang? */
397 		return false;
398 	}
399 
400 	return true;
401 }
402 
403 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
404 {
405 	uint32_t status;
406 
407 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
408 	DBG("%s: Int status %08x", gpu->name, status);
409 
410 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
411 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
412 		printk("CP | Protected mode error| %s | addr=%x\n",
413 			reg & (1 << 24) ? "WRITE" : "READ",
414 			(reg & 0xFFFFF) >> 2);
415 	}
416 
417 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
418 
419 	msm_gpu_retire(gpu);
420 
421 	return IRQ_HANDLED;
422 }
423 
424 static const unsigned int a4xx_registers[] = {
425 	/* RBBM */
426 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
427 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
428 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
429 	/* CP */
430 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
431 	0x0578, 0x058F,
432 	/* VSC */
433 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
434 	/* GRAS */
435 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
436 	/* RB */
437 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
438 	/* PC */
439 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
440 	/* VFD */
441 	0x0E40, 0x0E4A,
442 	/* VPC */
443 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
444 	/* UCHE */
445 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
446 	/* VMIDMT */
447 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
448 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
449 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
450 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
451 	0x1380, 0x1380,
452 	/* GRAS CTX 0 */
453 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
454 	/* PC CTX 0 */
455 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
456 	/* VFD CTX 0 */
457 	0x2200, 0x2204, 0x2208, 0x22A9,
458 	/* GRAS CTX 1 */
459 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
460 	/* PC CTX 1 */
461 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
462 	/* VFD CTX 1 */
463 	0x2600, 0x2604, 0x2608, 0x26A9,
464 	/* XPU */
465 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
466 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
467 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
468 	/* VBIF */
469 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
470 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
471 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
472 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
473 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
474 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
475 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
476 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
477 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
478 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
479 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
480 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
481 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
482 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
483 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
484 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
485 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
486 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
487 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
488 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
489 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
490 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
491 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
492 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
493 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
494 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
495 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
496 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
497 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
498 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
499 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
500 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
501 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
502 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
503 	~0 /* sentinel */
504 };
505 
506 static const unsigned int a405_registers[] = {
507 	/* RBBM */
508 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
509 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
510 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
511 	/* CP */
512 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
513 	0x0578, 0x058F,
514 	/* VSC */
515 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
516 	/* GRAS */
517 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
518 	/* RB */
519 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
520 	/* PC */
521 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
522 	/* VFD */
523 	0x0E40, 0x0E4A,
524 	/* VPC */
525 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
526 	/* UCHE */
527 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
528 	/* GRAS CTX 0 */
529 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
530 	/* PC CTX 0 */
531 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
532 	/* VFD CTX 0 */
533 	0x2200, 0x2204, 0x2208, 0x22A9,
534 	/* GRAS CTX 1 */
535 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
536 	/* PC CTX 1 */
537 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
538 	/* VFD CTX 1 */
539 	0x2600, 0x2604, 0x2608, 0x26A9,
540 	/* VBIF version 0x20050000*/
541 	0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
542 	0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
543 	0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
544 	0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
545 	0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
546 	0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
547 	0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
548 	0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
549 	~0 /* sentinel */
550 };
551 
552 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
553 {
554 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
555 
556 	if (!state)
557 		return ERR_PTR(-ENOMEM);
558 
559 	adreno_gpu_state_get(gpu, state);
560 
561 	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
562 
563 	return state;
564 }
565 
566 static void a4xx_dump(struct msm_gpu *gpu)
567 {
568 	printk("status:   %08x\n",
569 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
570 	adreno_dump(gpu);
571 }
572 
573 static int a4xx_pm_resume(struct msm_gpu *gpu) {
574 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
575 	int ret;
576 
577 	ret = msm_gpu_pm_resume(gpu);
578 	if (ret)
579 		return ret;
580 
581 	if (adreno_is_a430(adreno_gpu)) {
582 		unsigned int reg;
583 		/* Set the default register values; set SW_COLLAPSE to 0 */
584 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
585 		do {
586 			udelay(5);
587 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
588 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
589 	}
590 	return 0;
591 }
592 
593 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
594 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
595 	int ret;
596 
597 	ret = msm_gpu_pm_suspend(gpu);
598 	if (ret)
599 		return ret;
600 
601 	if (adreno_is_a430(adreno_gpu)) {
602 		/* Set the default register values; set SW_COLLAPSE to 1 */
603 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
604 	}
605 	return 0;
606 }
607 
608 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
609 {
610 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
611 		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
612 
613 	return 0;
614 }
615 
616 static u32 a4xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
617 {
618 	ring->memptrs->rptr = gpu_read(gpu, REG_A4XX_CP_RB_RPTR);
619 	return ring->memptrs->rptr;
620 }
621 
622 static const struct adreno_gpu_funcs funcs = {
623 	.base = {
624 		.get_param = adreno_get_param,
625 		.hw_init = a4xx_hw_init,
626 		.pm_suspend = a4xx_pm_suspend,
627 		.pm_resume = a4xx_pm_resume,
628 		.recover = a4xx_recover,
629 		.submit = a4xx_submit,
630 		.active_ring = adreno_active_ring,
631 		.irq = a4xx_irq,
632 		.destroy = a4xx_destroy,
633 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
634 		.show = adreno_show,
635 #endif
636 		.gpu_state_get = a4xx_gpu_state_get,
637 		.gpu_state_put = adreno_gpu_state_put,
638 		.create_address_space = adreno_iommu_create_address_space,
639 		.get_rptr = a4xx_get_rptr,
640 	},
641 	.get_timestamp = a4xx_get_timestamp,
642 };
643 
644 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
645 {
646 	struct a4xx_gpu *a4xx_gpu = NULL;
647 	struct adreno_gpu *adreno_gpu;
648 	struct msm_gpu *gpu;
649 	struct msm_drm_private *priv = dev->dev_private;
650 	struct platform_device *pdev = priv->gpu_pdev;
651 	struct icc_path *ocmem_icc_path;
652 	struct icc_path *icc_path;
653 	int ret;
654 
655 	if (!pdev) {
656 		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
657 		ret = -ENXIO;
658 		goto fail;
659 	}
660 
661 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
662 	if (!a4xx_gpu) {
663 		ret = -ENOMEM;
664 		goto fail;
665 	}
666 
667 	adreno_gpu = &a4xx_gpu->base;
668 	gpu = &adreno_gpu->base;
669 
670 	gpu->perfcntrs = NULL;
671 	gpu->num_perfcntrs = 0;
672 
673 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
674 	if (ret)
675 		goto fail;
676 
677 	adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
678 							     a4xx_registers;
679 
680 	/* if needed, allocate gmem: */
681 	ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
682 				    &a4xx_gpu->ocmem);
683 	if (ret)
684 		goto fail;
685 
686 	if (!gpu->aspace) {
687 		/* TODO we think it is possible to configure the GPU to
688 		 * restrict access to VRAM carveout.  But the required
689 		 * registers are unknown.  For now just bail out and
690 		 * limp along with just modesetting.  If it turns out
691 		 * to not be possible to restrict access, then we must
692 		 * implement a cmdstream validator.
693 		 */
694 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
695 		if (!allow_vram_carveout) {
696 			ret = -ENXIO;
697 			goto fail;
698 		}
699 	}
700 
701 	icc_path = devm_of_icc_get(&pdev->dev, "gfx-mem");
702 	ret = IS_ERR(icc_path);
703 	if (ret)
704 		goto fail;
705 
706 	ocmem_icc_path = devm_of_icc_get(&pdev->dev, "ocmem");
707 	ret = IS_ERR(ocmem_icc_path);
708 	if (ret) {
709 		/* allow -ENODATA, ocmem icc is optional */
710 		if (ret != -ENODATA)
711 			goto fail;
712 		ocmem_icc_path = NULL;
713 	}
714 
715 	/*
716 	 * Set the ICC path to maximum speed for now by multiplying the fastest
717 	 * frequency by the bus width (8). We'll want to scale this later on to
718 	 * improve battery life.
719 	 */
720 	icc_set_bw(icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
721 	icc_set_bw(ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
722 
723 	return gpu;
724 
725 fail:
726 	if (a4xx_gpu)
727 		a4xx_destroy(&a4xx_gpu->base.base);
728 
729 	return ERR_PTR(ret);
730 }
731