xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision be709d48)
1 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 and
5  * only version 2 as published by the Free Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  */
13 #include "a4xx_gpu.h"
14 #ifdef CONFIG_MSM_OCMEM
15 #  include <soc/qcom/ocmem.h>
16 #endif
17 
18 #define A4XX_INT0_MASK \
19 	(A4XX_INT0_RBBM_AHB_ERROR |        \
20 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
21 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
22 	 A4XX_INT0_CP_OPCODE_ERROR |       \
23 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
24 	 A4XX_INT0_CP_HW_FAULT |           \
25 	 A4XX_INT0_CP_IB1_INT |            \
26 	 A4XX_INT0_CP_IB2_INT |            \
27 	 A4XX_INT0_CP_RB_INT |             \
28 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
29 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
30 	 A4XX_INT0_CACHE_FLUSH_TS |        \
31 	 A4XX_INT0_UCHE_OOB_ACCESS)
32 
33 extern bool hang_debug;
34 static void a4xx_dump(struct msm_gpu *gpu);
35 static bool a4xx_idle(struct msm_gpu *gpu);
36 
37 /*
38  * a4xx_enable_hwcg() - Program the clock control registers
39  * @device: The adreno device pointer
40  */
41 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
42 {
43 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
44 	unsigned int i;
45 	for (i = 0; i < 4; i++)
46 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
47 	for (i = 0; i < 4; i++)
48 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
49 	for (i = 0; i < 4; i++)
50 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
51 	for (i = 0; i < 4; i++)
52 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
53 	for (i = 0; i < 4; i++)
54 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
55 	for (i = 0; i < 4; i++)
56 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
57 	for (i = 0; i < 4; i++)
58 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
59 	for (i = 0; i < 4; i++)
60 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
61 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
62 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
63 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
64 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
65 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
66 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
67 	for (i = 0; i < 4; i++)
68 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
69 
70 	/* Disable L1 clocking in A420 due to CCU issues with it */
71 	for (i = 0; i < 4; i++) {
72 		if (adreno_is_a420(adreno_gpu)) {
73 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
74 					0x00002020);
75 		} else {
76 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
77 					0x00022020);
78 		}
79 	}
80 
81 	for (i = 0; i < 4; i++) {
82 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
83 				0x00000922);
84 	}
85 
86 	for (i = 0; i < 4; i++) {
87 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
88 				0x00000000);
89 	}
90 
91 	for (i = 0; i < 4; i++) {
92 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
93 				0x00000001);
94 	}
95 
96 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
97 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
98 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
99 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
100 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
101 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
102 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
103 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
104 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
105 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
106 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
107 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
108 	/* Early A430's have a timing issue with SP/TP power collapse;
109 	   disabling HW clock gating prevents it. */
110 	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
111 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
112 	else
113 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
114 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
115 }
116 
117 
118 static bool a4xx_me_init(struct msm_gpu *gpu)
119 {
120 	struct msm_ringbuffer *ring = gpu->rb[0];
121 
122 	OUT_PKT3(ring, CP_ME_INIT, 17);
123 	OUT_RING(ring, 0x000003f7);
124 	OUT_RING(ring, 0x00000000);
125 	OUT_RING(ring, 0x00000000);
126 	OUT_RING(ring, 0x00000000);
127 	OUT_RING(ring, 0x00000080);
128 	OUT_RING(ring, 0x00000100);
129 	OUT_RING(ring, 0x00000180);
130 	OUT_RING(ring, 0x00006600);
131 	OUT_RING(ring, 0x00000150);
132 	OUT_RING(ring, 0x0000014e);
133 	OUT_RING(ring, 0x00000154);
134 	OUT_RING(ring, 0x00000001);
135 	OUT_RING(ring, 0x00000000);
136 	OUT_RING(ring, 0x00000000);
137 	OUT_RING(ring, 0x00000000);
138 	OUT_RING(ring, 0x00000000);
139 	OUT_RING(ring, 0x00000000);
140 
141 	gpu->funcs->flush(gpu, ring);
142 	return a4xx_idle(gpu);
143 }
144 
145 static int a4xx_hw_init(struct msm_gpu *gpu)
146 {
147 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
148 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
149 	uint32_t *ptr, len;
150 	int i, ret;
151 
152 	if (adreno_is_a420(adreno_gpu)) {
153 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
154 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
155 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
156 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
157 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
158 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
159 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
160 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
161 	} else if (adreno_is_a430(adreno_gpu)) {
162 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
163 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
164 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
165 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
166 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
167 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
168 	} else {
169 		BUG();
170 	}
171 
172 	/* Make all blocks contribute to the GPU BUSY perf counter */
173 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
174 
175 	/* Tune the hystersis counters for SP and CP idle detection */
176 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
177 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
178 
179 	if (adreno_is_a430(adreno_gpu)) {
180 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
181 	}
182 
183 	 /* Enable the RBBM error reporting bits */
184 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
185 
186 	/* Enable AHB error reporting*/
187 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
188 
189 	/* Enable power counters*/
190 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
191 
192 	/*
193 	 * Turn on hang detection - this spews a lot of useful information
194 	 * into the RBBM registers on a hang:
195 	 */
196 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
197 			(1 << 30) | 0xFFFF);
198 
199 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
200 			(unsigned int)(a4xx_gpu->ocmem_base >> 14));
201 
202 	/* Turn on performance counters: */
203 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
204 
205 	/* use the first CP counter for timestamp queries.. userspace may set
206 	 * this as well but it selects the same counter/countable:
207 	 */
208 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
209 
210 	if (adreno_is_a430(adreno_gpu))
211 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
212 
213 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
214 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
215 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
216 
217 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
218 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
219 
220 	/* On A430 enable SP regfile sleep for power savings */
221 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
222 	if (!adreno_is_a420(adreno_gpu)) {
223 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
224 			0x00000441);
225 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
226 			0x00000441);
227 	}
228 
229 	a4xx_enable_hwcg(gpu);
230 
231 	/*
232 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
233 	 * due to timing issue with HLSQ_TP_CLK_EN
234 	 */
235 	if (adreno_is_a420(adreno_gpu)) {
236 		unsigned int val;
237 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
238 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
239 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
240 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
241 	}
242 
243 	/* setup access protection: */
244 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
245 
246 	/* RBBM registers */
247 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
248 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
249 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
250 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
251 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
252 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
253 
254 	/* CP registers */
255 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
256 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
257 
258 
259 	/* RB registers */
260 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
261 
262 	/* HLSQ registers */
263 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
264 
265 	/* VPC registers */
266 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
267 
268 	/* SMMU registers */
269 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
270 
271 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
272 
273 	ret = adreno_hw_init(gpu);
274 	if (ret)
275 		return ret;
276 
277 	/* Load PM4: */
278 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PM4]->data);
279 	len = adreno_gpu->fw[ADRENO_FW_PM4]->size / 4;
280 	DBG("loading PM4 ucode version: %u", ptr[0]);
281 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
282 	for (i = 1; i < len; i++)
283 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
284 
285 	/* Load PFP: */
286 	ptr = (uint32_t *)(adreno_gpu->fw[ADRENO_FW_PFP]->data);
287 	len = adreno_gpu->fw[ADRENO_FW_PFP]->size / 4;
288 	DBG("loading PFP ucode version: %u", ptr[0]);
289 
290 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
291 	for (i = 1; i < len; i++)
292 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
293 
294 	/* clear ME_HALT to start micro engine */
295 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
296 
297 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
298 }
299 
300 static void a4xx_recover(struct msm_gpu *gpu)
301 {
302 	int i;
303 
304 	adreno_dump_info(gpu);
305 
306 	for (i = 0; i < 8; i++) {
307 		printk("CP_SCRATCH_REG%d: %u\n", i,
308 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
309 	}
310 
311 	/* dump registers before resetting gpu, if enabled: */
312 	if (hang_debug)
313 		a4xx_dump(gpu);
314 
315 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
316 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
317 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
318 	adreno_recover(gpu);
319 }
320 
321 static void a4xx_destroy(struct msm_gpu *gpu)
322 {
323 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
324 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
325 
326 	DBG("%s", gpu->name);
327 
328 	adreno_gpu_cleanup(adreno_gpu);
329 
330 #ifdef CONFIG_MSM_OCMEM
331 	if (a4xx_gpu->ocmem_base)
332 		ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl);
333 #endif
334 
335 	kfree(a4xx_gpu);
336 }
337 
338 static bool a4xx_idle(struct msm_gpu *gpu)
339 {
340 	/* wait for ringbuffer to drain: */
341 	if (!adreno_idle(gpu, gpu->rb[0]))
342 		return false;
343 
344 	/* then wait for GPU to finish: */
345 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
346 					A4XX_RBBM_STATUS_GPU_BUSY))) {
347 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
348 		/* TODO maybe we need to reset GPU here to recover from hang? */
349 		return false;
350 	}
351 
352 	return true;
353 }
354 
355 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
356 {
357 	uint32_t status;
358 
359 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
360 	DBG("%s: Int status %08x", gpu->name, status);
361 
362 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
363 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
364 		printk("CP | Protected mode error| %s | addr=%x\n",
365 			reg & (1 << 24) ? "WRITE" : "READ",
366 			(reg & 0xFFFFF) >> 2);
367 	}
368 
369 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
370 
371 	msm_gpu_retire(gpu);
372 
373 	return IRQ_HANDLED;
374 }
375 
376 static const unsigned int a4xx_registers[] = {
377 	/* RBBM */
378 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
379 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
380 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
381 	/* CP */
382 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
383 	0x0578, 0x058F,
384 	/* VSC */
385 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
386 	/* GRAS */
387 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
388 	/* RB */
389 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
390 	/* PC */
391 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
392 	/* VFD */
393 	0x0E40, 0x0E4A,
394 	/* VPC */
395 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
396 	/* UCHE */
397 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
398 	/* VMIDMT */
399 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
400 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
401 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
402 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
403 	0x1380, 0x1380,
404 	/* GRAS CTX 0 */
405 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
406 	/* PC CTX 0 */
407 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
408 	/* VFD CTX 0 */
409 	0x2200, 0x2204, 0x2208, 0x22A9,
410 	/* GRAS CTX 1 */
411 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
412 	/* PC CTX 1 */
413 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
414 	/* VFD CTX 1 */
415 	0x2600, 0x2604, 0x2608, 0x26A9,
416 	/* XPU */
417 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
418 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
419 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
420 	/* VBIF */
421 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
422 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
423 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
424 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
425 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
426 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
427 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
428 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
429 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
430 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
431 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
432 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
433 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
434 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
435 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
436 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
437 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
438 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
439 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
440 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
441 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
442 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
443 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
444 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
445 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
446 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
447 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
448 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
449 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
450 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
451 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
452 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
453 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
454 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
455 	~0 /* sentinel */
456 };
457 
458 static struct msm_gpu_state *a4xx_gpu_state_get(struct msm_gpu *gpu)
459 {
460 	struct msm_gpu_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
461 
462 	if (!state)
463 		return ERR_PTR(-ENOMEM);
464 
465 	adreno_gpu_state_get(gpu, state);
466 
467 	state->rbbm_status = gpu_read(gpu, REG_A4XX_RBBM_STATUS);
468 
469 	return state;
470 }
471 
472 /* Register offset defines for A4XX, in order of enum adreno_regs */
473 static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
474 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE),
475 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
476 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR),
477 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
478 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR),
479 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR),
480 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL),
481 };
482 
483 static void a4xx_dump(struct msm_gpu *gpu)
484 {
485 	printk("status:   %08x\n",
486 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
487 	adreno_dump(gpu);
488 }
489 
490 static int a4xx_pm_resume(struct msm_gpu *gpu) {
491 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
492 	int ret;
493 
494 	ret = msm_gpu_pm_resume(gpu);
495 	if (ret)
496 		return ret;
497 
498 	if (adreno_is_a430(adreno_gpu)) {
499 		unsigned int reg;
500 		/* Set the default register values; set SW_COLLAPSE to 0 */
501 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
502 		do {
503 			udelay(5);
504 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
505 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
506 	}
507 	return 0;
508 }
509 
510 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
511 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
512 	int ret;
513 
514 	ret = msm_gpu_pm_suspend(gpu);
515 	if (ret)
516 		return ret;
517 
518 	if (adreno_is_a430(adreno_gpu)) {
519 		/* Set the default register values; set SW_COLLAPSE to 1 */
520 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
521 	}
522 	return 0;
523 }
524 
525 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
526 {
527 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
528 		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
529 
530 	return 0;
531 }
532 
533 static const struct adreno_gpu_funcs funcs = {
534 	.base = {
535 		.get_param = adreno_get_param,
536 		.hw_init = a4xx_hw_init,
537 		.pm_suspend = a4xx_pm_suspend,
538 		.pm_resume = a4xx_pm_resume,
539 		.recover = a4xx_recover,
540 		.submit = adreno_submit,
541 		.flush = adreno_flush,
542 		.active_ring = adreno_active_ring,
543 		.irq = a4xx_irq,
544 		.destroy = a4xx_destroy,
545 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
546 		.show = adreno_show,
547 #endif
548 		.gpu_state_get = a4xx_gpu_state_get,
549 		.gpu_state_put = adreno_gpu_state_put,
550 	},
551 	.get_timestamp = a4xx_get_timestamp,
552 };
553 
554 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
555 {
556 	struct a4xx_gpu *a4xx_gpu = NULL;
557 	struct adreno_gpu *adreno_gpu;
558 	struct msm_gpu *gpu;
559 	struct msm_drm_private *priv = dev->dev_private;
560 	struct platform_device *pdev = priv->gpu_pdev;
561 	int ret;
562 
563 	if (!pdev) {
564 		DRM_DEV_ERROR(dev->dev, "no a4xx device\n");
565 		ret = -ENXIO;
566 		goto fail;
567 	}
568 
569 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
570 	if (!a4xx_gpu) {
571 		ret = -ENOMEM;
572 		goto fail;
573 	}
574 
575 	adreno_gpu = &a4xx_gpu->base;
576 	gpu = &adreno_gpu->base;
577 
578 	gpu->perfcntrs = NULL;
579 	gpu->num_perfcntrs = 0;
580 
581 	adreno_gpu->registers = a4xx_registers;
582 	adreno_gpu->reg_offsets = a4xx_register_offsets;
583 
584 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
585 	if (ret)
586 		goto fail;
587 
588 	/* if needed, allocate gmem: */
589 	if (adreno_is_a4xx(adreno_gpu)) {
590 #ifdef CONFIG_MSM_OCMEM
591 		/* TODO this is different/missing upstream: */
592 		struct ocmem_buf *ocmem_hdl =
593 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
594 
595 		a4xx_gpu->ocmem_hdl = ocmem_hdl;
596 		a4xx_gpu->ocmem_base = ocmem_hdl->addr;
597 		adreno_gpu->gmem = ocmem_hdl->len;
598 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
599 				a4xx_gpu->ocmem_base);
600 #endif
601 	}
602 
603 	if (!gpu->aspace) {
604 		/* TODO we think it is possible to configure the GPU to
605 		 * restrict access to VRAM carveout.  But the required
606 		 * registers are unknown.  For now just bail out and
607 		 * limp along with just modesetting.  If it turns out
608 		 * to not be possible to restrict access, then we must
609 		 * implement a cmdstream validator.
610 		 */
611 		DRM_DEV_ERROR(dev->dev, "No memory protection without IOMMU\n");
612 		ret = -ENXIO;
613 		goto fail;
614 	}
615 
616 	return gpu;
617 
618 fail:
619 	if (a4xx_gpu)
620 		a4xx_destroy(&a4xx_gpu->base.base);
621 
622 	return ERR_PTR(ret);
623 }
624