xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision 12fbfc4c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3  */
4 #include "a4xx_gpu.h"
5 
6 #define A4XX_INT0_MASK \
7 	(A4XX_INT0_RBBM_AHB_ERROR |        \
8 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
10 	 A4XX_INT0_CP_OPCODE_ERROR |       \
11 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12 	 A4XX_INT0_CP_HW_FAULT |           \
13 	 A4XX_INT0_CP_IB1_INT |            \
14 	 A4XX_INT0_CP_IB2_INT |            \
15 	 A4XX_INT0_CP_RB_INT |             \
16 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
17 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
18 	 A4XX_INT0_CACHE_FLUSH_TS |        \
19 	 A4XX_INT0_UCHE_OOB_ACCESS)
20 
21 extern bool hang_debug;
22 static void a4xx_dump(struct msm_gpu *gpu);
23 static bool a4xx_idle(struct msm_gpu *gpu);
24 
25 /*
26  * a4xx_enable_hwcg() - Program the clock control registers
27  * @device: The adreno device pointer
28  */
29 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
30 {
31 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
32 	unsigned int i;
33 	for (i = 0; i < 4; i++)
34 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
35 	for (i = 0; i < 4; i++)
36 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
37 	for (i = 0; i < 4; i++)
38 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
39 	for (i = 0; i < 4; i++)
40 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
41 	for (i = 0; i < 4; i++)
42 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
43 	for (i = 0; i < 4; i++)
44 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
45 	for (i = 0; i < 4; i++)
46 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
47 	for (i = 0; i < 4; i++)
48 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
49 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
50 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
51 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
52 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
53 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
54 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
55 	for (i = 0; i < 4; i++)
56 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
57 
58 	/* Disable L1 clocking in A420 due to CCU issues with it */
59 	for (i = 0; i < 4; i++) {
60 		if (adreno_is_a420(adreno_gpu)) {
61 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
62 					0x00002020);
63 		} else {
64 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
65 					0x00022020);
66 		}
67 	}
68 
69 	/* No CCU for A405 */
70 	if (!adreno_is_a405(adreno_gpu)) {
71 		for (i = 0; i < 4; i++) {
72 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
73 					0x00000922);
74 		}
75 
76 		for (i = 0; i < 4; i++) {
77 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
78 					0x00000000);
79 		}
80 
81 		for (i = 0; i < 4; i++) {
82 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
83 					0x00000001);
84 		}
85 	}
86 
87 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
88 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
89 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
90 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
91 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
92 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
93 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
94 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
95 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
96 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
97 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
98 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
99 	/* Early A430's have a timing issue with SP/TP power collapse;
100 	   disabling HW clock gating prevents it. */
101 	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
102 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
103 	else
104 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
105 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
106 }
107 
108 
109 static bool a4xx_me_init(struct msm_gpu *gpu)
110 {
111 	struct msm_ringbuffer *ring = gpu->rb[0];
112 
113 	OUT_PKT3(ring, CP_ME_INIT, 17);
114 	OUT_RING(ring, 0x000003f7);
115 	OUT_RING(ring, 0x00000000);
116 	OUT_RING(ring, 0x00000000);
117 	OUT_RING(ring, 0x00000000);
118 	OUT_RING(ring, 0x00000080);
119 	OUT_RING(ring, 0x00000100);
120 	OUT_RING(ring, 0x00000180);
121 	OUT_RING(ring, 0x00006600);
122 	OUT_RING(ring, 0x00000150);
123 	OUT_RING(ring, 0x0000014e);
124 	OUT_RING(ring, 0x00000154);
125 	OUT_RING(ring, 0x00000001);
126 	OUT_RING(ring, 0x00000000);
127 	OUT_RING(ring, 0x00000000);
128 	OUT_RING(ring, 0x00000000);
129 	OUT_RING(ring, 0x00000000);
130 	OUT_RING(ring, 0x00000000);
131 
132 	gpu->funcs->flush(gpu, ring);
133 	return a4xx_idle(gpu);
134 }
135 
136 static int a4xx_hw_init(struct msm_gpu *gpu)
137 {
138 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
139 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
140 	uint32_t *ptr, len;
141 	int i, ret;
142 
143 	if (adreno_is_a405(adreno_gpu)) {
144 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
145 	} else if (adreno_is_a420(adreno_gpu)) {
146 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
147 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
148 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
149 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
150 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
151 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
152 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
153 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
154 	} else if (adreno_is_a430(adreno_gpu)) {
155 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
156 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
157 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
158 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
159 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
160 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
161 	} else {
162 		BUG();
163 	}
164 
165 	/* Make all blocks contribute to the GPU BUSY perf counter */
166 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
167 
168 	/* Tune the hystersis counters for SP and CP idle detection */
169 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
170 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
171 
172 	if (adreno_is_a430(adreno_gpu)) {
173 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
174 	}
175 
176 	 /* Enable the RBBM error reporting bits */
177 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
178 
179 	/* Enable AHB error reporting*/
180 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
181 
182 	/* Enable power counters*/
183 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
184 
185 	/*
186 	 * Turn on hang detection - this spews a lot of useful information
187 	 * into the RBBM registers on a hang:
188 	 */
189 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
190 			(1 << 30) | 0xFFFF);
191 
192 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
193 			(unsigned int)(a4xx_gpu->ocmem.base >> 14));
194 
195 	/* Turn on performance counters: */
196 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
197 
198 	/* use the first CP counter for timestamp queries.. userspace may set
199 	 * this as well but it selects the same counter/countable:
200 	 */
201 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
202 
203 	if (adreno_is_a430(adreno_gpu))
204 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
205 
206 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
207 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
208 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
209 
210 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
211 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
212 
213 	/* On A430 enable SP regfile sleep for power savings */
214 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
215 	if (!adreno_is_a420(adreno_gpu)) {
216 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
217 			0x00000441);
218 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
219 			0x00000441);
220 	}
221 
222 	a4xx_enable_hwcg(gpu);
223 
224 	/*
225 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
226 	 * due to timing issue with HLSQ_TP_CLK_EN
227 	 */
228 	if (adreno_is_a420(adreno_gpu)) {
229 		unsigned int val;
230 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
231 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
232 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
233 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
234 	}
235 
236 	/* setup access protection: */
237 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
238 
239 	/* RBBM registers */
240 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
241 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
242 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
243 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
244 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
245 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
246 
247 	/* CP registers */
248 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
249 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
250 
251 
252 	/* RB registers */
253 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
254 
255 	/* HLSQ registers */
256 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
257 
258 	/* VPC registers */
259 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
260 
261 	/* SMMU registers */
262 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
263 
264 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
265 
266 	ret = adreno_hw_init(gpu);
267 	if (ret)
268 		return ret;
269 
270 	/*
271 	 * Use the default ringbuffer size and block size but disable the RPTR
272 	 * shadow
273 	 */
274 	gpu_write(gpu, REG_A4XX_CP_RB_CNTL,
275 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
276 
277 	/* Set the ringbuffer address */
278 	gpu_write(gpu, REG_A4XX_CP_RB_BASE, lower_32_bits(gpu->rb[0]->iova));
279 
280 	/* Load PM4: */
281 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
282 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
283 	DBG("loading PM4 ucode version: %u", ptr[0]);
284 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
285 	for (i = 1; i < len; i++)
286 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
287 
288 	/* Load PFP: */
289 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
290 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
291 	DBG("loading PFP ucode version: %u", ptr[0]);
292 
293 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
294 	for (i = 1; i < len; i++)
295 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
296 
297 	/* clear ME_HALT to start micro engine */
298 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
299 
300 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
301 }
302 
303 static void a4xx_recover(struct msm_gpu *gpu)
304 {
305 	int i;
306 
307 	adreno_dump_info(gpu);
308 
309 	for (i = 0; i < 8; i++) {
310 		printk("CP_SCRATCH_REG%d: %u\n", i,
311 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
312 	}
313 
314 	/* dump registers before resetting gpu, if enabled: */
315 	if (hang_debug)
316 		a4xx_dump(gpu);
317 
318 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
319 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
320 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
321 	adreno_recover(gpu);
322 }
323 
324 static void a4xx_destroy(struct msm_gpu *gpu)
325 {
326 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
327 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
328 
329 	DBG("%s", gpu->name);
330 
331 	adreno_gpu_cleanup(adreno_gpu);
332 
333 	adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
334 
335 	kfree(a4xx_gpu);
336 }
337 
338 static bool a4xx_idle(struct msm_gpu *gpu)
339 {
340 	/* wait for ringbuffer to drain: */
341 	if (!adreno_idle(gpu, gpu->rb[0]))
342 		return false;
343 
344 	/* then wait for GPU to finish: */
345 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
346 					A4XX_RBBM_STATUS_GPU_BUSY))) {
347 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
348 		/* TODO maybe we need to reset GPU here to recover from hang? */
349 		return false;
350 	}
351 
352 	return true;
353 }
354 
355 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
356 {
357 	uint32_t status;
358 
359 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
360 	DBG("%s: Int status %08x", gpu->name, status);
361 
362 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
363 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
364 		printk("CP | Protected mode error| %s | addr=%x\n",
365 			reg & (1 << 24) ? "WRITE" : "READ",
366 			(reg & 0xFFFFF) >> 2);
367 	}
368 
369 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
370 
371 	msm_gpu_retire(gpu);
372 
373 	return IRQ_HANDLED;
374 }
375 
376 static const unsigned int a4xx_registers[] = {
377 	/* RBBM */
378 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
379 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
380 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
381 	/* CP */
382 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
383 	0x0578, 0x058F,
384 	/* VSC */
385 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
386 	/* GRAS */
387 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
388 	/* RB */
389 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
390 	/* PC */
391 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
392 	/* VFD */
393 	0x0E40, 0x0E4A,
394 	/* VPC */
395 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
396 	/* UCHE */
397 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
398 	/* VMIDMT */
399 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
400 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
401 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
402 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
403 	0x1380, 0x1380,
404 	/* GRAS CTX 0 */
405 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
406 	/* PC CTX 0 */
407 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
408 	/* VFD CTX 0 */
409 	0x2200, 0x2204, 0x2208, 0x22A9,
410 	/* GRAS CTX 1 */
411 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
412 	/* PC CTX 1 */
413 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
414 	/* VFD CTX 1 */
415 	0x2600, 0x2604, 0x2608, 0x26A9,
416 	/* XPU */
417 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
418 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
419 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
420 	/* VBIF */
421 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
422 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
423 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
424 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
425 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
426 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
427 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
428 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
429 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
430 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
431 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
432 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
433 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
434 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
435 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
436 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
437 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
438 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
439 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
440 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
441 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
442 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
443 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
444 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
445 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
446 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
447 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
448 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
449 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
450 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
451 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
452 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
453 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
454 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
455 	~0 /* sentinel */
456 };
457 
458 static const unsigned int a405_registers[] = {
459 	/* RBBM */
460 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
461 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
462 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
463 	/* CP */
464 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
465 	0x0578, 0x058F,
466 	/* VSC */
467 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
468 	/* GRAS */
469 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
470 	/* RB */
471 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
472 	/* PC */
473 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
474 	/* VFD */
475 	0x0E40, 0x0E4A,
476 	/* VPC */
477 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
478 	/* UCHE */
479 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
480 	/* GRAS CTX 0 */
481 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
482 	/* PC CTX 0 */
483 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
484 	/* VFD CTX 0 */
485 	0x2200, 0x2204, 0x2208, 0x22A9,
486 	/* GRAS CTX 1 */
487 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
488 	/* PC CTX 1 */
489 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
490 	/* VFD CTX 1 */
491 	0x2600, 0x2604, 0x2608, 0x26A9,
492 	/* VBIF version 0x20050000*/
493 	0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
494 	0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
495 	0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
496 	0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
497 	0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
498 	0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
499 	0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
500 	0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
501 	~0 /* sentinel */
502 };
503 
504 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
505 {
506 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
507 
508 	if (!state)
509 		return ERR_PTR(-ENOMEM);
510 
511 	adreno_gpu_state_get(gpu, state);
512 
513 	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
514 
515 	return state;
516 }
517 
518 /* Register offset defines for A4XX, in order of enum adreno_regs */
519 static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
520 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE),
521 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
522 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR),
523 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
524 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR),
525 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR),
526 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL),
527 };
528 
529 static void a4xx_dump(struct msm_gpu *gpu)
530 {
531 	printk("status:   %08x\n",
532 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
533 	adreno_dump(gpu);
534 }
535 
536 static int a4xx_pm_resume(struct msm_gpu *gpu) {
537 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
538 	int ret;
539 
540 	ret = msm_gpu_pm_resume(gpu);
541 	if (ret)
542 		return ret;
543 
544 	if (adreno_is_a430(adreno_gpu)) {
545 		unsigned int reg;
546 		/* Set the default register values; set SW_COLLAPSE to 0 */
547 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
548 		do {
549 			udelay(5);
550 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
551 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
552 	}
553 	return 0;
554 }
555 
556 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
557 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
558 	int ret;
559 
560 	ret = msm_gpu_pm_suspend(gpu);
561 	if (ret)
562 		return ret;
563 
564 	if (adreno_is_a430(adreno_gpu)) {
565 		/* Set the default register values; set SW_COLLAPSE to 1 */
566 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
567 	}
568 	return 0;
569 }
570 
571 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
572 {
573 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
574 		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
575 
576 	return 0;
577 }
578 
579 static const struct adreno_gpu_funcs funcs = {
580 	.base = {
581 		.get_param = adreno_get_param,
582 		.hw_init = a4xx_hw_init,
583 		.pm_suspend = a4xx_pm_suspend,
584 		.pm_resume = a4xx_pm_resume,
585 		.recover = a4xx_recover,
586 		.submit = adreno_submit,
587 		.flush = adreno_flush,
588 		.active_ring = adreno_active_ring,
589 		.irq = a4xx_irq,
590 		.destroy = a4xx_destroy,
591 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
592 		.show = adreno_show,
593 #endif
594 		.gpu_state_get = a4xx_gpu_state_get,
595 		.gpu_state_put = adreno_gpu_state_put,
596 		.create_address_space = adreno_iommu_create_address_space,
597 	},
598 	.get_timestamp = a4xx_get_timestamp,
599 };
600 
601 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
602 {
603 	struct a4xx_gpu *a4xx_gpu = NULL;
604 	struct adreno_gpu *adreno_gpu;
605 	struct msm_gpu *gpu;
606 	struct msm_drm_private *priv = dev->dev_private;
607 	struct platform_device *pdev = priv->gpu_pdev;
608 	int ret;
609 
610 	if (!pdev) {
611 		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
612 		ret = -ENXIO;
613 		goto fail;
614 	}
615 
616 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
617 	if (!a4xx_gpu) {
618 		ret = -ENOMEM;
619 		goto fail;
620 	}
621 
622 	adreno_gpu = &a4xx_gpu->base;
623 	gpu = &adreno_gpu->base;
624 
625 	gpu->perfcntrs = NULL;
626 	gpu->num_perfcntrs = 0;
627 
628 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
629 	if (ret)
630 		goto fail;
631 
632 	adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
633 							     a4xx_registers;
634 	adreno_gpu->reg_offsets = a4xx_register_offsets;
635 
636 	/* if needed, allocate gmem: */
637 	if (adreno_is_a4xx(adreno_gpu)) {
638 		ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
639 					    &a4xx_gpu->ocmem);
640 		if (ret)
641 			goto fail;
642 	}
643 
644 	if (!gpu->aspace) {
645 		/* TODO we think it is possible to configure the GPU to
646 		 * restrict access to VRAM carveout.  But the required
647 		 * registers are unknown.  For now just bail out and
648 		 * limp along with just modesetting.  If it turns out
649 		 * to not be possible to restrict access, then we must
650 		 * implement a cmdstream validator.
651 		 */
652 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
653 		ret = -ENXIO;
654 		goto fail;
655 	}
656 
657 	/*
658 	 * Set the ICC path to maximum speed for now by multiplying the fastest
659 	 * frequency by the bus width (8). We'll want to scale this later on to
660 	 * improve battery life.
661 	 */
662 	icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
663 	icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
664 
665 	return gpu;
666 
667 fail:
668 	if (a4xx_gpu)
669 		a4xx_destroy(&a4xx_gpu->base.base);
670 
671 	return ERR_PTR(ret);
672 }
673