xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision 8365a898)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3  */
4 #include "a4xx_gpu.h"
5 
6 #define A4XX_INT0_MASK \
7 	(A4XX_INT0_RBBM_AHB_ERROR |        \
8 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
10 	 A4XX_INT0_CP_OPCODE_ERROR |       \
11 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12 	 A4XX_INT0_CP_HW_FAULT |           \
13 	 A4XX_INT0_CP_IB1_INT |            \
14 	 A4XX_INT0_CP_IB2_INT |            \
15 	 A4XX_INT0_CP_RB_INT |             \
16 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
17 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
18 	 A4XX_INT0_CACHE_FLUSH_TS |        \
19 	 A4XX_INT0_UCHE_OOB_ACCESS)
20 
21 extern bool hang_debug;
22 static void a4xx_dump(struct msm_gpu *gpu);
23 static bool a4xx_idle(struct msm_gpu *gpu);
24 
25 /*
26  * a4xx_enable_hwcg() - Program the clock control registers
27  * @device: The adreno device pointer
28  */
29 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
30 {
31 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
32 	unsigned int i;
33 	for (i = 0; i < 4; i++)
34 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
35 	for (i = 0; i < 4; i++)
36 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
37 	for (i = 0; i < 4; i++)
38 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
39 	for (i = 0; i < 4; i++)
40 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
41 	for (i = 0; i < 4; i++)
42 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
43 	for (i = 0; i < 4; i++)
44 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
45 	for (i = 0; i < 4; i++)
46 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
47 	for (i = 0; i < 4; i++)
48 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
49 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
50 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
51 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
52 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
53 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
54 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
55 	for (i = 0; i < 4; i++)
56 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
57 
58 	/* Disable L1 clocking in A420 due to CCU issues with it */
59 	for (i = 0; i < 4; i++) {
60 		if (adreno_is_a420(adreno_gpu)) {
61 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
62 					0x00002020);
63 		} else {
64 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
65 					0x00022020);
66 		}
67 	}
68 
69 	/* No CCU for A405 */
70 	if (!adreno_is_a405(adreno_gpu)) {
71 		for (i = 0; i < 4; i++) {
72 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
73 					0x00000922);
74 		}
75 
76 		for (i = 0; i < 4; i++) {
77 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
78 					0x00000000);
79 		}
80 
81 		for (i = 0; i < 4; i++) {
82 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
83 					0x00000001);
84 		}
85 	}
86 
87 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
88 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
89 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
90 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
91 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
92 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
93 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
94 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
95 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
96 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
97 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
98 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
99 	/* Early A430's have a timing issue with SP/TP power collapse;
100 	   disabling HW clock gating prevents it. */
101 	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
102 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
103 	else
104 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
105 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
106 }
107 
108 
109 static bool a4xx_me_init(struct msm_gpu *gpu)
110 {
111 	struct msm_ringbuffer *ring = gpu->rb[0];
112 
113 	OUT_PKT3(ring, CP_ME_INIT, 17);
114 	OUT_RING(ring, 0x000003f7);
115 	OUT_RING(ring, 0x00000000);
116 	OUT_RING(ring, 0x00000000);
117 	OUT_RING(ring, 0x00000000);
118 	OUT_RING(ring, 0x00000080);
119 	OUT_RING(ring, 0x00000100);
120 	OUT_RING(ring, 0x00000180);
121 	OUT_RING(ring, 0x00006600);
122 	OUT_RING(ring, 0x00000150);
123 	OUT_RING(ring, 0x0000014e);
124 	OUT_RING(ring, 0x00000154);
125 	OUT_RING(ring, 0x00000001);
126 	OUT_RING(ring, 0x00000000);
127 	OUT_RING(ring, 0x00000000);
128 	OUT_RING(ring, 0x00000000);
129 	OUT_RING(ring, 0x00000000);
130 	OUT_RING(ring, 0x00000000);
131 
132 	gpu->funcs->flush(gpu, ring);
133 	return a4xx_idle(gpu);
134 }
135 
136 static int a4xx_hw_init(struct msm_gpu *gpu)
137 {
138 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
139 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
140 	uint32_t *ptr, len;
141 	int i, ret;
142 
143 	if (adreno_is_a405(adreno_gpu)) {
144 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
145 	} else if (adreno_is_a420(adreno_gpu)) {
146 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
147 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
148 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
149 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
150 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
151 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
152 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
153 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
154 	} else if (adreno_is_a430(adreno_gpu)) {
155 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
156 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
157 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
158 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
159 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
160 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
161 	} else {
162 		BUG();
163 	}
164 
165 	/* Make all blocks contribute to the GPU BUSY perf counter */
166 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
167 
168 	/* Tune the hystersis counters for SP and CP idle detection */
169 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
170 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
171 
172 	if (adreno_is_a430(adreno_gpu)) {
173 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
174 	}
175 
176 	 /* Enable the RBBM error reporting bits */
177 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
178 
179 	/* Enable AHB error reporting*/
180 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
181 
182 	/* Enable power counters*/
183 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
184 
185 	/*
186 	 * Turn on hang detection - this spews a lot of useful information
187 	 * into the RBBM registers on a hang:
188 	 */
189 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
190 			(1 << 30) | 0xFFFF);
191 
192 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
193 			(unsigned int)(a4xx_gpu->ocmem.base >> 14));
194 
195 	/* Turn on performance counters: */
196 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
197 
198 	/* use the first CP counter for timestamp queries.. userspace may set
199 	 * this as well but it selects the same counter/countable:
200 	 */
201 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
202 
203 	if (adreno_is_a430(adreno_gpu))
204 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
205 
206 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
207 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
208 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
209 
210 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
211 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
212 
213 	/* On A430 enable SP regfile sleep for power savings */
214 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
215 	if (!adreno_is_a420(adreno_gpu)) {
216 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
217 			0x00000441);
218 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
219 			0x00000441);
220 	}
221 
222 	a4xx_enable_hwcg(gpu);
223 
224 	/*
225 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
226 	 * due to timing issue with HLSQ_TP_CLK_EN
227 	 */
228 	if (adreno_is_a420(adreno_gpu)) {
229 		unsigned int val;
230 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
231 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
232 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
233 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
234 	}
235 
236 	/* setup access protection: */
237 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
238 
239 	/* RBBM registers */
240 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
241 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
242 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
243 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
244 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
245 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
246 
247 	/* CP registers */
248 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
249 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
250 
251 
252 	/* RB registers */
253 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
254 
255 	/* HLSQ registers */
256 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
257 
258 	/* VPC registers */
259 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
260 
261 	/* SMMU registers */
262 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
263 
264 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
265 
266 	ret = adreno_hw_init(gpu);
267 	if (ret)
268 		return ret;
269 
270 	/* Load PM4: */
271 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
272 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
273 	DBG("loading PM4 ucode version: %u", ptr[0]);
274 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
275 	for (i = 1; i < len; i++)
276 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
277 
278 	/* Load PFP: */
279 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
280 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
281 	DBG("loading PFP ucode version: %u", ptr[0]);
282 
283 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
284 	for (i = 1; i < len; i++)
285 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
286 
287 	/* clear ME_HALT to start micro engine */
288 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
289 
290 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
291 }
292 
293 static void a4xx_recover(struct msm_gpu *gpu)
294 {
295 	int i;
296 
297 	adreno_dump_info(gpu);
298 
299 	for (i = 0; i < 8; i++) {
300 		printk("CP_SCRATCH_REG%d: %u\n", i,
301 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
302 	}
303 
304 	/* dump registers before resetting gpu, if enabled: */
305 	if (hang_debug)
306 		a4xx_dump(gpu);
307 
308 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
309 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
310 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
311 	adreno_recover(gpu);
312 }
313 
314 static void a4xx_destroy(struct msm_gpu *gpu)
315 {
316 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
317 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
318 
319 	DBG("%s", gpu->name);
320 
321 	adreno_gpu_cleanup(adreno_gpu);
322 
323 	adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
324 
325 	kfree(a4xx_gpu);
326 }
327 
328 static bool a4xx_idle(struct msm_gpu *gpu)
329 {
330 	/* wait for ringbuffer to drain: */
331 	if (!adreno_idle(gpu, gpu->rb[0]))
332 		return false;
333 
334 	/* then wait for GPU to finish: */
335 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
336 					A4XX_RBBM_STATUS_GPU_BUSY))) {
337 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
338 		/* TODO maybe we need to reset GPU here to recover from hang? */
339 		return false;
340 	}
341 
342 	return true;
343 }
344 
345 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
346 {
347 	uint32_t status;
348 
349 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
350 	DBG("%s: Int status %08x", gpu->name, status);
351 
352 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
353 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
354 		printk("CP | Protected mode error| %s | addr=%x\n",
355 			reg & (1 << 24) ? "WRITE" : "READ",
356 			(reg & 0xFFFFF) >> 2);
357 	}
358 
359 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
360 
361 	msm_gpu_retire(gpu);
362 
363 	return IRQ_HANDLED;
364 }
365 
366 static const unsigned int a4xx_registers[] = {
367 	/* RBBM */
368 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
369 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
370 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
371 	/* CP */
372 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
373 	0x0578, 0x058F,
374 	/* VSC */
375 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
376 	/* GRAS */
377 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
378 	/* RB */
379 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
380 	/* PC */
381 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
382 	/* VFD */
383 	0x0E40, 0x0E4A,
384 	/* VPC */
385 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
386 	/* UCHE */
387 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
388 	/* VMIDMT */
389 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
390 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
391 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
392 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
393 	0x1380, 0x1380,
394 	/* GRAS CTX 0 */
395 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
396 	/* PC CTX 0 */
397 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
398 	/* VFD CTX 0 */
399 	0x2200, 0x2204, 0x2208, 0x22A9,
400 	/* GRAS CTX 1 */
401 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
402 	/* PC CTX 1 */
403 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
404 	/* VFD CTX 1 */
405 	0x2600, 0x2604, 0x2608, 0x26A9,
406 	/* XPU */
407 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
408 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
409 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
410 	/* VBIF */
411 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
412 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
413 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
414 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
415 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
416 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
417 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
418 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
419 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
420 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
421 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
422 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
423 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
424 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
425 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
426 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
427 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
428 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
429 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
430 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
431 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
432 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
433 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
434 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
435 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
436 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
437 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
438 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
439 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
440 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
441 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
442 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
443 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
444 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
445 	~0 /* sentinel */
446 };
447 
448 static const unsigned int a405_registers[] = {
449 	/* RBBM */
450 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
451 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
452 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
453 	/* CP */
454 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
455 	0x0578, 0x058F,
456 	/* VSC */
457 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
458 	/* GRAS */
459 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
460 	/* RB */
461 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
462 	/* PC */
463 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
464 	/* VFD */
465 	0x0E40, 0x0E4A,
466 	/* VPC */
467 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
468 	/* UCHE */
469 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
470 	/* GRAS CTX 0 */
471 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
472 	/* PC CTX 0 */
473 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
474 	/* VFD CTX 0 */
475 	0x2200, 0x2204, 0x2208, 0x22A9,
476 	/* GRAS CTX 1 */
477 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
478 	/* PC CTX 1 */
479 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
480 	/* VFD CTX 1 */
481 	0x2600, 0x2604, 0x2608, 0x26A9,
482 	/* VBIF version 0x20050000*/
483 	0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
484 	0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
485 	0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
486 	0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
487 	0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
488 	0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
489 	0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
490 	0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
491 	~0 /* sentinel */
492 };
493 
494 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
495 {
496 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
497 
498 	if (!state)
499 		return ERR_PTR(-ENOMEM);
500 
501 	adreno_gpu_state_get(gpu, state);
502 
503 	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
504 
505 	return state;
506 }
507 
508 /* Register offset defines for A4XX, in order of enum adreno_regs */
509 static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
510 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE),
511 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
512 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR),
513 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
514 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR),
515 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR),
516 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL),
517 };
518 
519 static void a4xx_dump(struct msm_gpu *gpu)
520 {
521 	printk("status:   %08x\n",
522 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
523 	adreno_dump(gpu);
524 }
525 
526 static int a4xx_pm_resume(struct msm_gpu *gpu) {
527 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
528 	int ret;
529 
530 	ret = msm_gpu_pm_resume(gpu);
531 	if (ret)
532 		return ret;
533 
534 	if (adreno_is_a430(adreno_gpu)) {
535 		unsigned int reg;
536 		/* Set the default register values; set SW_COLLAPSE to 0 */
537 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
538 		do {
539 			udelay(5);
540 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
541 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
542 	}
543 	return 0;
544 }
545 
546 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
547 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
548 	int ret;
549 
550 	ret = msm_gpu_pm_suspend(gpu);
551 	if (ret)
552 		return ret;
553 
554 	if (adreno_is_a430(adreno_gpu)) {
555 		/* Set the default register values; set SW_COLLAPSE to 1 */
556 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
557 	}
558 	return 0;
559 }
560 
561 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
562 {
563 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
564 		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
565 
566 	return 0;
567 }
568 
569 static const struct adreno_gpu_funcs funcs = {
570 	.base = {
571 		.get_param = adreno_get_param,
572 		.hw_init = a4xx_hw_init,
573 		.pm_suspend = a4xx_pm_suspend,
574 		.pm_resume = a4xx_pm_resume,
575 		.recover = a4xx_recover,
576 		.submit = adreno_submit,
577 		.flush = adreno_flush,
578 		.active_ring = adreno_active_ring,
579 		.irq = a4xx_irq,
580 		.destroy = a4xx_destroy,
581 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
582 		.show = adreno_show,
583 #endif
584 		.gpu_state_get = a4xx_gpu_state_get,
585 		.gpu_state_put = adreno_gpu_state_put,
586 		.create_address_space = adreno_iommu_create_address_space,
587 	},
588 	.get_timestamp = a4xx_get_timestamp,
589 };
590 
591 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
592 {
593 	struct a4xx_gpu *a4xx_gpu = NULL;
594 	struct adreno_gpu *adreno_gpu;
595 	struct msm_gpu *gpu;
596 	struct msm_drm_private *priv = dev->dev_private;
597 	struct platform_device *pdev = priv->gpu_pdev;
598 	int ret;
599 
600 	if (!pdev) {
601 		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
602 		ret = -ENXIO;
603 		goto fail;
604 	}
605 
606 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
607 	if (!a4xx_gpu) {
608 		ret = -ENOMEM;
609 		goto fail;
610 	}
611 
612 	adreno_gpu = &a4xx_gpu->base;
613 	gpu = &adreno_gpu->base;
614 
615 	gpu->perfcntrs = NULL;
616 	gpu->num_perfcntrs = 0;
617 
618 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
619 	if (ret)
620 		goto fail;
621 
622 	adreno_gpu->registers = adreno_is_a405(adreno_gpu) ? a405_registers :
623 							     a4xx_registers;
624 	adreno_gpu->reg_offsets = a4xx_register_offsets;
625 
626 	/* if needed, allocate gmem: */
627 	if (adreno_is_a4xx(adreno_gpu)) {
628 		ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
629 					    &a4xx_gpu->ocmem);
630 		if (ret)
631 			goto fail;
632 	}
633 
634 	if (!gpu->aspace) {
635 		/* TODO we think it is possible to configure the GPU to
636 		 * restrict access to VRAM carveout.  But the required
637 		 * registers are unknown.  For now just bail out and
638 		 * limp along with just modesetting.  If it turns out
639 		 * to not be possible to restrict access, then we must
640 		 * implement a cmdstream validator.
641 		 */
642 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
643 		ret = -ENXIO;
644 		goto fail;
645 	}
646 
647 	/*
648 	 * Set the ICC path to maximum speed for now by multiplying the fastest
649 	 * frequency by the bus width (8). We'll want to scale this later on to
650 	 * improve battery life.
651 	 */
652 	icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
653 	icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
654 
655 	return gpu;
656 
657 fail:
658 	if (a4xx_gpu)
659 		a4xx_destroy(&a4xx_gpu->base.base);
660 
661 	return ERR_PTR(ret);
662 }
663