xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision 4f727ecefefbd180de10e25b3e74c03dce3f1e75)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
3  */
4 #include "a4xx_gpu.h"
5 #ifdef CONFIG_MSM_OCMEM
6 #  include <soc/qcom/ocmem.h>
7 #endif
8 
9 #define A4XX_INT0_MASK \
10 	(A4XX_INT0_RBBM_AHB_ERROR |        \
11 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
12 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
13 	 A4XX_INT0_CP_OPCODE_ERROR |       \
14 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
15 	 A4XX_INT0_CP_HW_FAULT |           \
16 	 A4XX_INT0_CP_IB1_INT |            \
17 	 A4XX_INT0_CP_IB2_INT |            \
18 	 A4XX_INT0_CP_RB_INT |             \
19 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
20 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
21 	 A4XX_INT0_CACHE_FLUSH_TS |        \
22 	 A4XX_INT0_UCHE_OOB_ACCESS)
23 
24 extern bool hang_debug;
25 static void a4xx_dump(struct msm_gpu *gpu);
26 static bool a4xx_idle(struct msm_gpu *gpu);
27 
28 /*
29  * a4xx_enable_hwcg() - Program the clock control registers
30  * @device: The adreno device pointer
31  */
32 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
33 {
34 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
35 	unsigned int i;
36 	for (i = 0; i < 4; i++)
37 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
38 	for (i = 0; i < 4; i++)
39 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
40 	for (i = 0; i < 4; i++)
41 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
42 	for (i = 0; i < 4; i++)
43 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
44 	for (i = 0; i < 4; i++)
45 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
46 	for (i = 0; i < 4; i++)
47 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
48 	for (i = 0; i < 4; i++)
49 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
50 	for (i = 0; i < 4; i++)
51 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
52 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
53 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
54 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
55 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
56 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
57 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
58 	for (i = 0; i < 4; i++)
59 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
60 
61 	/* Disable L1 clocking in A420 due to CCU issues with it */
62 	for (i = 0; i < 4; i++) {
63 		if (adreno_is_a420(adreno_gpu)) {
64 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
65 					0x00002020);
66 		} else {
67 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
68 					0x00022020);
69 		}
70 	}
71 
72 	for (i = 0; i < 4; i++) {
73 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
74 				0x00000922);
75 	}
76 
77 	for (i = 0; i < 4; i++) {
78 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
79 				0x00000000);
80 	}
81 
82 	for (i = 0; i < 4; i++) {
83 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
84 				0x00000001);
85 	}
86 
87 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
88 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
89 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
90 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
91 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
92 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
93 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
94 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
95 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
96 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
97 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
98 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
99 	/* Early A430's have a timing issue with SP/TP power collapse;
100 	   disabling HW clock gating prevents it. */
101 	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
102 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
103 	else
104 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
105 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
106 }
107 
108 
109 static bool a4xx_me_init(struct msm_gpu *gpu)
110 {
111 	struct msm_ringbuffer *ring = gpu->rb[0];
112 
113 	OUT_PKT3(ring, CP_ME_INIT, 17);
114 	OUT_RING(ring, 0x000003f7);
115 	OUT_RING(ring, 0x00000000);
116 	OUT_RING(ring, 0x00000000);
117 	OUT_RING(ring, 0x00000000);
118 	OUT_RING(ring, 0x00000080);
119 	OUT_RING(ring, 0x00000100);
120 	OUT_RING(ring, 0x00000180);
121 	OUT_RING(ring, 0x00006600);
122 	OUT_RING(ring, 0x00000150);
123 	OUT_RING(ring, 0x0000014e);
124 	OUT_RING(ring, 0x00000154);
125 	OUT_RING(ring, 0x00000001);
126 	OUT_RING(ring, 0x00000000);
127 	OUT_RING(ring, 0x00000000);
128 	OUT_RING(ring, 0x00000000);
129 	OUT_RING(ring, 0x00000000);
130 	OUT_RING(ring, 0x00000000);
131 
132 	gpu->funcs->flush(gpu, ring);
133 	return a4xx_idle(gpu);
134 }
135 
136 static int a4xx_hw_init(struct msm_gpu *gpu)
137 {
138 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
139 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
140 	uint32_t *ptr, len;
141 	int i, ret;
142 
143 	if (adreno_is_a420(adreno_gpu)) {
144 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
145 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
146 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
147 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
148 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
149 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
150 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
151 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
152 	} else if (adreno_is_a430(adreno_gpu)) {
153 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
154 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
155 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
156 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
157 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
158 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
159 	} else {
160 		BUG();
161 	}
162 
163 	/* Make all blocks contribute to the GPU BUSY perf counter */
164 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
165 
166 	/* Tune the hystersis counters for SP and CP idle detection */
167 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
168 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
169 
170 	if (adreno_is_a430(adreno_gpu)) {
171 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
172 	}
173 
174 	 /* Enable the RBBM error reporting bits */
175 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
176 
177 	/* Enable AHB error reporting*/
178 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
179 
180 	/* Enable power counters*/
181 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
182 
183 	/*
184 	 * Turn on hang detection - this spews a lot of useful information
185 	 * into the RBBM registers on a hang:
186 	 */
187 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
188 			(1 << 30) | 0xFFFF);
189 
190 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
191 			(unsigned int)(a4xx_gpu->ocmem_base >> 14));
192 
193 	/* Turn on performance counters: */
194 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
195 
196 	/* use the first CP counter for timestamp queries.. userspace may set
197 	 * this as well but it selects the same counter/countable:
198 	 */
199 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
200 
201 	if (adreno_is_a430(adreno_gpu))
202 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
203 
204 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
205 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
206 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
207 
208 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
209 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
210 
211 	/* On A430 enable SP regfile sleep for power savings */
212 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
213 	if (!adreno_is_a420(adreno_gpu)) {
214 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
215 			0x00000441);
216 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
217 			0x00000441);
218 	}
219 
220 	a4xx_enable_hwcg(gpu);
221 
222 	/*
223 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
224 	 * due to timing issue with HLSQ_TP_CLK_EN
225 	 */
226 	if (adreno_is_a420(adreno_gpu)) {
227 		unsigned int val;
228 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
229 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
230 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
231 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
232 	}
233 
234 	/* setup access protection: */
235 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
236 
237 	/* RBBM registers */
238 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
239 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
240 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
241 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
242 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
243 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
244 
245 	/* CP registers */
246 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
247 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
248 
249 
250 	/* RB registers */
251 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
252 
253 	/* HLSQ registers */
254 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
255 
256 	/* VPC registers */
257 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
258 
259 	/* SMMU registers */
260 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
261 
262 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
263 
264 	ret = adreno_hw_init(gpu);
265 	if (ret)
266 		return ret;
267 
268 	/* Load PM4: */
269 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
270 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
271 	DBG("loading PM4 ucode version: %u", ptr[0]);
272 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
273 	for (i = 1; i < len; i++)
274 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
275 
276 	/* Load PFP: */
277 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
278 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
279 	DBG("loading PFP ucode version: %u", ptr[0]);
280 
281 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
282 	for (i = 1; i < len; i++)
283 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
284 
285 	/* clear ME_HALT to start micro engine */
286 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
287 
288 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
289 }
290 
291 static void a4xx_recover(struct msm_gpu *gpu)
292 {
293 	int i;
294 
295 	adreno_dump_info(gpu);
296 
297 	for (i = 0; i < 8; i++) {
298 		printk("CP_SCRATCH_REG%d: %u\n", i,
299 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
300 	}
301 
302 	/* dump registers before resetting gpu, if enabled: */
303 	if (hang_debug)
304 		a4xx_dump(gpu);
305 
306 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
307 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
308 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
309 	adreno_recover(gpu);
310 }
311 
312 static void a4xx_destroy(struct msm_gpu *gpu)
313 {
314 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
315 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
316 
317 	DBG("%s", gpu->name);
318 
319 	adreno_gpu_cleanup(adreno_gpu);
320 
321 #ifdef CONFIG_MSM_OCMEM
322 	if (a4xx_gpu->ocmem_base)
323 		ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl);
324 #endif
325 
326 	kfree(a4xx_gpu);
327 }
328 
329 static bool a4xx_idle(struct msm_gpu *gpu)
330 {
331 	/* wait for ringbuffer to drain: */
332 	if (!adreno_idle(gpu, gpu->rb[0]))
333 		return false;
334 
335 	/* then wait for GPU to finish: */
336 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
337 					A4XX_RBBM_STATUS_GPU_BUSY))) {
338 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
339 		/* TODO maybe we need to reset GPU here to recover from hang? */
340 		return false;
341 	}
342 
343 	return true;
344 }
345 
346 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
347 {
348 	uint32_t status;
349 
350 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
351 	DBG("%s: Int status %08x", gpu->name, status);
352 
353 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
354 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
355 		printk("CP | Protected mode error| %s | addr=%x\n",
356 			reg & (1 << 24) ? "WRITE" : "READ",
357 			(reg & 0xFFFFF) >> 2);
358 	}
359 
360 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
361 
362 	msm_gpu_retire(gpu);
363 
364 	return IRQ_HANDLED;
365 }
366 
367 static const unsigned int a4xx_registers[] = {
368 	/* RBBM */
369 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
370 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
371 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
372 	/* CP */
373 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
374 	0x0578, 0x058F,
375 	/* VSC */
376 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
377 	/* GRAS */
378 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
379 	/* RB */
380 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
381 	/* PC */
382 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
383 	/* VFD */
384 	0x0E40, 0x0E4A,
385 	/* VPC */
386 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
387 	/* UCHE */
388 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
389 	/* VMIDMT */
390 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
391 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
392 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
393 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
394 	0x1380, 0x1380,
395 	/* GRAS CTX 0 */
396 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
397 	/* PC CTX 0 */
398 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
399 	/* VFD CTX 0 */
400 	0x2200, 0x2204, 0x2208, 0x22A9,
401 	/* GRAS CTX 1 */
402 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
403 	/* PC CTX 1 */
404 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
405 	/* VFD CTX 1 */
406 	0x2600, 0x2604, 0x2608, 0x26A9,
407 	/* XPU */
408 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
409 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
410 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
411 	/* VBIF */
412 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
413 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
414 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
415 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
416 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
417 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
418 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
419 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
420 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
421 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
422 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
423 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
424 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
425 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
426 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
427 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
428 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
429 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
430 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
431 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
432 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
433 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
434 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
435 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
436 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
437 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
438 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
439 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
440 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
441 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
442 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
443 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
444 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
445 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
446 	~0 /* sentinel */
447 };
448 
449 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
450 {
451 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
452 
453 	if (!state)
454 		return ERR_PTR(-ENOMEM);
455 
456 	adreno_gpu_state_get(gpu, state);
457 
458 	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
459 
460 	return state;
461 }
462 
463 /* Register offset defines for A4XX, in order of enum adreno_regs */
464 static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
465 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE),
466 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
467 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR),
468 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
469 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR),
470 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR),
471 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL),
472 };
473 
474 static void a4xx_dump(struct msm_gpu *gpu)
475 {
476 	printk("status:   %08x\n",
477 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
478 	adreno_dump(gpu);
479 }
480 
481 static int a4xx_pm_resume(struct msm_gpu *gpu) {
482 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
483 	int ret;
484 
485 	ret = msm_gpu_pm_resume(gpu);
486 	if (ret)
487 		return ret;
488 
489 	if (adreno_is_a430(adreno_gpu)) {
490 		unsigned int reg;
491 		/* Set the default register values; set SW_COLLAPSE to 0 */
492 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
493 		do {
494 			udelay(5);
495 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
496 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
497 	}
498 	return 0;
499 }
500 
501 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
502 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
503 	int ret;
504 
505 	ret = msm_gpu_pm_suspend(gpu);
506 	if (ret)
507 		return ret;
508 
509 	if (adreno_is_a430(adreno_gpu)) {
510 		/* Set the default register values; set SW_COLLAPSE to 1 */
511 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
512 	}
513 	return 0;
514 }
515 
516 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
517 {
518 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
519 		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
520 
521 	return 0;
522 }
523 
524 static const struct adreno_gpu_funcs funcs = {
525 	.base = {
526 		.get_param = adreno_get_param,
527 		.hw_init = a4xx_hw_init,
528 		.pm_suspend = a4xx_pm_suspend,
529 		.pm_resume = a4xx_pm_resume,
530 		.recover = a4xx_recover,
531 		.submit = adreno_submit,
532 		.flush = adreno_flush,
533 		.active_ring = adreno_active_ring,
534 		.irq = a4xx_irq,
535 		.destroy = a4xx_destroy,
536 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
537 		.show = adreno_show,
538 #endif
539 		.gpu_state_get = a4xx_gpu_state_get,
540 		.gpu_state_put = adreno_gpu_state_put,
541 	},
542 	.get_timestamp = a4xx_get_timestamp,
543 };
544 
545 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
546 {
547 	struct a4xx_gpu *a4xx_gpu = NULL;
548 	struct adreno_gpu *adreno_gpu;
549 	struct msm_gpu *gpu;
550 	struct msm_drm_private *priv = dev->dev_private;
551 	struct platform_device *pdev = priv->gpu_pdev;
552 	int ret;
553 
554 	if (!pdev) {
555 		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
556 		ret = -ENXIO;
557 		goto fail;
558 	}
559 
560 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
561 	if (!a4xx_gpu) {
562 		ret = -ENOMEM;
563 		goto fail;
564 	}
565 
566 	adreno_gpu = &a4xx_gpu->base;
567 	gpu = &adreno_gpu->base;
568 
569 	gpu->perfcntrs = NULL;
570 	gpu->num_perfcntrs = 0;
571 
572 	adreno_gpu->registers = a4xx_registers;
573 	adreno_gpu->reg_offsets = a4xx_register_offsets;
574 
575 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
576 	if (ret)
577 		goto fail;
578 
579 	/* if needed, allocate gmem: */
580 	if (adreno_is_a4xx(adreno_gpu)) {
581 #ifdef CONFIG_MSM_OCMEM
582 		/* TODO this is different/missing upstream: */
583 		struct ocmem_buf *ocmem_hdl =
584 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
585 
586 		a4xx_gpu->ocmem_hdl = ocmem_hdl;
587 		a4xx_gpu->ocmem_base = ocmem_hdl->addr;
588 		adreno_gpu->gmem = ocmem_hdl->len;
589 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
590 				a4xx_gpu->ocmem_base);
591 #endif
592 	}
593 
594 	if (!gpu->aspace) {
595 		/* TODO we think it is possible to configure the GPU to
596 		 * restrict access to VRAM carveout.  But the required
597 		 * registers are unknown.  For now just bail out and
598 		 * limp along with just modesetting.  If it turns out
599 		 * to not be possible to restrict access, then we must
600 		 * implement a cmdstream validator.
601 		 */
602 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
603 		ret = -ENXIO;
604 		goto fail;
605 	}
606 
607 	return gpu;
608 
609 fail:
610 	if (a4xx_gpu)
611 		a4xx_destroy(&a4xx_gpu->base.base);
612 
613 	return ERR_PTR(ret);
614 }
615