xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision ddae1423)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3  */
4 #include "a4xx_gpu.h"
5 
6 #define A4XX_INT0_MASK \
7 	(A4XX_INT0_RBBM_AHB_ERROR |        \
8 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
9 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
10 	 A4XX_INT0_CP_OPCODE_ERROR |       \
11 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
12 	 A4XX_INT0_CP_HW_FAULT |           \
13 	 A4XX_INT0_CP_IB1_INT |            \
14 	 A4XX_INT0_CP_IB2_INT |            \
15 	 A4XX_INT0_CP_RB_INT |             \
16 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
17 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
18 	 A4XX_INT0_CACHE_FLUSH_TS |        \
19 	 A4XX_INT0_UCHE_OOB_ACCESS)
20 
21 extern bool hang_debug;
22 static void a4xx_dump(struct msm_gpu *gpu);
23 static bool a4xx_idle(struct msm_gpu *gpu);
24 
25 /*
26  * a4xx_enable_hwcg() - Program the clock control registers
27  * @device: The adreno device pointer
28  */
29 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
30 {
31 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
32 	unsigned int i;
33 	for (i = 0; i < 4; i++)
34 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
35 	for (i = 0; i < 4; i++)
36 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
37 	for (i = 0; i < 4; i++)
38 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
39 	for (i = 0; i < 4; i++)
40 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
41 	for (i = 0; i < 4; i++)
42 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
43 	for (i = 0; i < 4; i++)
44 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
45 	for (i = 0; i < 4; i++)
46 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
47 	for (i = 0; i < 4; i++)
48 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
49 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
50 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
51 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
52 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
53 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
54 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
55 	for (i = 0; i < 4; i++)
56 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
57 
58 	/* Disable L1 clocking in A420 due to CCU issues with it */
59 	for (i = 0; i < 4; i++) {
60 		if (adreno_is_a420(adreno_gpu)) {
61 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
62 					0x00002020);
63 		} else {
64 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
65 					0x00022020);
66 		}
67 	}
68 
69 	for (i = 0; i < 4; i++) {
70 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
71 				0x00000922);
72 	}
73 
74 	for (i = 0; i < 4; i++) {
75 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
76 				0x00000000);
77 	}
78 
79 	for (i = 0; i < 4; i++) {
80 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
81 				0x00000001);
82 	}
83 
84 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
85 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
86 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
87 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
88 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
89 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
90 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
91 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
92 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
93 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
94 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
95 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
96 	/* Early A430's have a timing issue with SP/TP power collapse;
97 	   disabling HW clock gating prevents it. */
98 	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
99 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
100 	else
101 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
102 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
103 }
104 
105 
106 static bool a4xx_me_init(struct msm_gpu *gpu)
107 {
108 	struct msm_ringbuffer *ring = gpu->rb[0];
109 
110 	OUT_PKT3(ring, CP_ME_INIT, 17);
111 	OUT_RING(ring, 0x000003f7);
112 	OUT_RING(ring, 0x00000000);
113 	OUT_RING(ring, 0x00000000);
114 	OUT_RING(ring, 0x00000000);
115 	OUT_RING(ring, 0x00000080);
116 	OUT_RING(ring, 0x00000100);
117 	OUT_RING(ring, 0x00000180);
118 	OUT_RING(ring, 0x00006600);
119 	OUT_RING(ring, 0x00000150);
120 	OUT_RING(ring, 0x0000014e);
121 	OUT_RING(ring, 0x00000154);
122 	OUT_RING(ring, 0x00000001);
123 	OUT_RING(ring, 0x00000000);
124 	OUT_RING(ring, 0x00000000);
125 	OUT_RING(ring, 0x00000000);
126 	OUT_RING(ring, 0x00000000);
127 	OUT_RING(ring, 0x00000000);
128 
129 	gpu->funcs->flush(gpu, ring);
130 	return a4xx_idle(gpu);
131 }
132 
133 static int a4xx_hw_init(struct msm_gpu *gpu)
134 {
135 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
136 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
137 	uint32_t *ptr, len;
138 	int i, ret;
139 
140 	if (adreno_is_a420(adreno_gpu)) {
141 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
142 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
143 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
144 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
145 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
146 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
147 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
148 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
149 	} else if (adreno_is_a430(adreno_gpu)) {
150 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
151 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
152 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
153 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
154 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
155 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
156 	} else {
157 		BUG();
158 	}
159 
160 	/* Make all blocks contribute to the GPU BUSY perf counter */
161 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
162 
163 	/* Tune the hystersis counters for SP and CP idle detection */
164 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
165 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
166 
167 	if (adreno_is_a430(adreno_gpu)) {
168 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
169 	}
170 
171 	 /* Enable the RBBM error reporting bits */
172 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
173 
174 	/* Enable AHB error reporting*/
175 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
176 
177 	/* Enable power counters*/
178 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
179 
180 	/*
181 	 * Turn on hang detection - this spews a lot of useful information
182 	 * into the RBBM registers on a hang:
183 	 */
184 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
185 			(1 << 30) | 0xFFFF);
186 
187 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
188 			(unsigned int)(a4xx_gpu->ocmem.base >> 14));
189 
190 	/* Turn on performance counters: */
191 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
192 
193 	/* use the first CP counter for timestamp queries.. userspace may set
194 	 * this as well but it selects the same counter/countable:
195 	 */
196 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
197 
198 	if (adreno_is_a430(adreno_gpu))
199 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
200 
201 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
202 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
203 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
204 
205 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
206 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
207 
208 	/* On A430 enable SP regfile sleep for power savings */
209 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
210 	if (!adreno_is_a420(adreno_gpu)) {
211 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
212 			0x00000441);
213 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
214 			0x00000441);
215 	}
216 
217 	a4xx_enable_hwcg(gpu);
218 
219 	/*
220 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
221 	 * due to timing issue with HLSQ_TP_CLK_EN
222 	 */
223 	if (adreno_is_a420(adreno_gpu)) {
224 		unsigned int val;
225 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
226 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
227 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
228 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
229 	}
230 
231 	/* setup access protection: */
232 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
233 
234 	/* RBBM registers */
235 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
236 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
237 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
238 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
239 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
240 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
241 
242 	/* CP registers */
243 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
244 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
245 
246 
247 	/* RB registers */
248 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
249 
250 	/* HLSQ registers */
251 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
252 
253 	/* VPC registers */
254 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
255 
256 	/* SMMU registers */
257 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
258 
259 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
260 
261 	ret = adreno_hw_init(gpu);
262 	if (ret)
263 		return ret;
264 
265 	/* Load PM4: */
266 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
267 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
268 	DBG("loading PM4 ucode version: %u", ptr[0]);
269 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
270 	for (i = 1; i < len; i++)
271 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
272 
273 	/* Load PFP: */
274 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
275 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
276 	DBG("loading PFP ucode version: %u", ptr[0]);
277 
278 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
279 	for (i = 1; i < len; i++)
280 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
281 
282 	/* clear ME_HALT to start micro engine */
283 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
284 
285 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
286 }
287 
288 static void a4xx_recover(struct msm_gpu *gpu)
289 {
290 	int i;
291 
292 	adreno_dump_info(gpu);
293 
294 	for (i = 0; i < 8; i++) {
295 		printk("CP_SCRATCH_REG%d: %u\n", i,
296 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
297 	}
298 
299 	/* dump registers before resetting gpu, if enabled: */
300 	if (hang_debug)
301 		a4xx_dump(gpu);
302 
303 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
304 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
305 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
306 	adreno_recover(gpu);
307 }
308 
309 static void a4xx_destroy(struct msm_gpu *gpu)
310 {
311 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
312 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
313 
314 	DBG("%s", gpu->name);
315 
316 	adreno_gpu_cleanup(adreno_gpu);
317 
318 	adreno_gpu_ocmem_cleanup(&a4xx_gpu->ocmem);
319 
320 	kfree(a4xx_gpu);
321 }
322 
323 static bool a4xx_idle(struct msm_gpu *gpu)
324 {
325 	/* wait for ringbuffer to drain: */
326 	if (!adreno_idle(gpu, gpu->rb[0]))
327 		return false;
328 
329 	/* then wait for GPU to finish: */
330 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
331 					A4XX_RBBM_STATUS_GPU_BUSY))) {
332 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
333 		/* TODO maybe we need to reset GPU here to recover from hang? */
334 		return false;
335 	}
336 
337 	return true;
338 }
339 
340 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
341 {
342 	uint32_t status;
343 
344 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
345 	DBG("%s: Int status %08x", gpu->name, status);
346 
347 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
348 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
349 		printk("CP | Protected mode error| %s | addr=%x\n",
350 			reg & (1 << 24) ? "WRITE" : "READ",
351 			(reg & 0xFFFFF) >> 2);
352 	}
353 
354 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
355 
356 	msm_gpu_retire(gpu);
357 
358 	return IRQ_HANDLED;
359 }
360 
361 static const unsigned int a4xx_registers[] = {
362 	/* RBBM */
363 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
364 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
365 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
366 	/* CP */
367 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
368 	0x0578, 0x058F,
369 	/* VSC */
370 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
371 	/* GRAS */
372 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
373 	/* RB */
374 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
375 	/* PC */
376 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
377 	/* VFD */
378 	0x0E40, 0x0E4A,
379 	/* VPC */
380 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
381 	/* UCHE */
382 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
383 	/* VMIDMT */
384 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
385 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
386 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
387 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
388 	0x1380, 0x1380,
389 	/* GRAS CTX 0 */
390 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
391 	/* PC CTX 0 */
392 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
393 	/* VFD CTX 0 */
394 	0x2200, 0x2204, 0x2208, 0x22A9,
395 	/* GRAS CTX 1 */
396 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
397 	/* PC CTX 1 */
398 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
399 	/* VFD CTX 1 */
400 	0x2600, 0x2604, 0x2608, 0x26A9,
401 	/* XPU */
402 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
403 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
404 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
405 	/* VBIF */
406 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
407 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
408 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
409 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
410 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
411 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
412 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
413 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
414 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
415 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
416 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
417 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
418 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
419 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
420 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
421 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
422 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
423 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
424 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
425 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
426 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
427 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
428 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
429 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
430 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
431 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
432 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
433 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
434 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
435 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
436 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
437 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
438 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
439 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
440 	~0 /* sentinel */
441 };
442 
443 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
444 {
445 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
446 
447 	if (!state)
448 		return ERR_PTR(-ENOMEM);
449 
450 	adreno_gpu_state_get(gpu, state);
451 
452 	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
453 
454 	return state;
455 }
456 
457 /* Register offset defines for A4XX, in order of enum adreno_regs */
458 static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
459 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE),
460 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
461 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR),
462 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
463 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR),
464 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR),
465 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL),
466 };
467 
468 static void a4xx_dump(struct msm_gpu *gpu)
469 {
470 	printk("status:   %08x\n",
471 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
472 	adreno_dump(gpu);
473 }
474 
475 static int a4xx_pm_resume(struct msm_gpu *gpu) {
476 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
477 	int ret;
478 
479 	ret = msm_gpu_pm_resume(gpu);
480 	if (ret)
481 		return ret;
482 
483 	if (adreno_is_a430(adreno_gpu)) {
484 		unsigned int reg;
485 		/* Set the default register values; set SW_COLLAPSE to 0 */
486 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
487 		do {
488 			udelay(5);
489 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
490 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
491 	}
492 	return 0;
493 }
494 
495 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
496 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
497 	int ret;
498 
499 	ret = msm_gpu_pm_suspend(gpu);
500 	if (ret)
501 		return ret;
502 
503 	if (adreno_is_a430(adreno_gpu)) {
504 		/* Set the default register values; set SW_COLLAPSE to 1 */
505 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
506 	}
507 	return 0;
508 }
509 
510 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
511 {
512 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
513 		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
514 
515 	return 0;
516 }
517 
518 static const struct adreno_gpu_funcs funcs = {
519 	.base = {
520 		.get_param = adreno_get_param,
521 		.hw_init = a4xx_hw_init,
522 		.pm_suspend = a4xx_pm_suspend,
523 		.pm_resume = a4xx_pm_resume,
524 		.recover = a4xx_recover,
525 		.submit = adreno_submit,
526 		.flush = adreno_flush,
527 		.active_ring = adreno_active_ring,
528 		.irq = a4xx_irq,
529 		.destroy = a4xx_destroy,
530 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
531 		.show = adreno_show,
532 #endif
533 		.gpu_state_get = a4xx_gpu_state_get,
534 		.gpu_state_put = adreno_gpu_state_put,
535 	},
536 	.get_timestamp = a4xx_get_timestamp,
537 };
538 
539 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
540 {
541 	struct a4xx_gpu *a4xx_gpu = NULL;
542 	struct adreno_gpu *adreno_gpu;
543 	struct msm_gpu *gpu;
544 	struct msm_drm_private *priv = dev->dev_private;
545 	struct platform_device *pdev = priv->gpu_pdev;
546 	int ret;
547 
548 	if (!pdev) {
549 		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
550 		ret = -ENXIO;
551 		goto fail;
552 	}
553 
554 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
555 	if (!a4xx_gpu) {
556 		ret = -ENOMEM;
557 		goto fail;
558 	}
559 
560 	adreno_gpu = &a4xx_gpu->base;
561 	gpu = &adreno_gpu->base;
562 
563 	gpu->perfcntrs = NULL;
564 	gpu->num_perfcntrs = 0;
565 
566 	adreno_gpu->registers = a4xx_registers;
567 	adreno_gpu->reg_offsets = a4xx_register_offsets;
568 
569 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
570 	if (ret)
571 		goto fail;
572 
573 	/* if needed, allocate gmem: */
574 	if (adreno_is_a4xx(adreno_gpu)) {
575 		ret = adreno_gpu_ocmem_init(dev->dev, adreno_gpu,
576 					    &a4xx_gpu->ocmem);
577 		if (ret)
578 			goto fail;
579 	}
580 
581 	if (!gpu->aspace) {
582 		/* TODO we think it is possible to configure the GPU to
583 		 * restrict access to VRAM carveout.  But the required
584 		 * registers are unknown.  For now just bail out and
585 		 * limp along with just modesetting.  If it turns out
586 		 * to not be possible to restrict access, then we must
587 		 * implement a cmdstream validator.
588 		 */
589 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
590 		ret = -ENXIO;
591 		goto fail;
592 	}
593 
594 	/*
595 	 * Set the ICC path to maximum speed for now by multiplying the fastest
596 	 * frequency by the bus width (8). We'll want to scale this later on to
597 	 * improve battery life.
598 	 */
599 	icc_set_bw(gpu->icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
600 	icc_set_bw(gpu->ocmem_icc_path, 0, Bps_to_icc(gpu->fast_rate) * 8);
601 
602 	return gpu;
603 
604 fail:
605 	if (a4xx_gpu)
606 		a4xx_destroy(&a4xx_gpu->base.base);
607 
608 	return ERR_PTR(ret);
609 }
610