xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision cfbb9be8)
1 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 and
5  * only version 2 as published by the Free Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  */
13 #include "a4xx_gpu.h"
14 #ifdef CONFIG_MSM_OCMEM
15 #  include <soc/qcom/ocmem.h>
16 #endif
17 
18 #define A4XX_INT0_MASK \
19 	(A4XX_INT0_RBBM_AHB_ERROR |        \
20 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
21 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
22 	 A4XX_INT0_CP_OPCODE_ERROR |       \
23 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
24 	 A4XX_INT0_CP_HW_FAULT |           \
25 	 A4XX_INT0_CP_IB1_INT |            \
26 	 A4XX_INT0_CP_IB2_INT |            \
27 	 A4XX_INT0_CP_RB_INT |             \
28 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
29 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
30 	 A4XX_INT0_UCHE_OOB_ACCESS)
31 
32 extern bool hang_debug;
33 static void a4xx_dump(struct msm_gpu *gpu);
34 static bool a4xx_idle(struct msm_gpu *gpu);
35 
36 /*
37  * a4xx_enable_hwcg() - Program the clock control registers
38  * @device: The adreno device pointer
39  */
40 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
41 {
42 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
43 	unsigned int i;
44 	for (i = 0; i < 4; i++)
45 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
46 	for (i = 0; i < 4; i++)
47 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
48 	for (i = 0; i < 4; i++)
49 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
50 	for (i = 0; i < 4; i++)
51 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
52 	for (i = 0; i < 4; i++)
53 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
54 	for (i = 0; i < 4; i++)
55 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
56 	for (i = 0; i < 4; i++)
57 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
58 	for (i = 0; i < 4; i++)
59 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
60 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
61 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
62 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
63 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
64 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
65 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
66 	for (i = 0; i < 4; i++)
67 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
68 
69 	/* Disable L1 clocking in A420 due to CCU issues with it */
70 	for (i = 0; i < 4; i++) {
71 		if (adreno_is_a420(adreno_gpu)) {
72 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
73 					0x00002020);
74 		} else {
75 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
76 					0x00022020);
77 		}
78 	}
79 
80 	for (i = 0; i < 4; i++) {
81 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
82 				0x00000922);
83 	}
84 
85 	for (i = 0; i < 4; i++) {
86 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
87 				0x00000000);
88 	}
89 
90 	for (i = 0; i < 4; i++) {
91 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
92 				0x00000001);
93 	}
94 
95 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
96 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
97 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
98 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
99 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
100 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
101 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
102 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
103 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
104 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
105 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
106 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
107 	/* Early A430's have a timing issue with SP/TP power collapse;
108 	   disabling HW clock gating prevents it. */
109 	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
110 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
111 	else
112 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
113 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
114 }
115 
116 
117 static bool a4xx_me_init(struct msm_gpu *gpu)
118 {
119 	struct msm_ringbuffer *ring = gpu->rb[0];
120 
121 	OUT_PKT3(ring, CP_ME_INIT, 17);
122 	OUT_RING(ring, 0x000003f7);
123 	OUT_RING(ring, 0x00000000);
124 	OUT_RING(ring, 0x00000000);
125 	OUT_RING(ring, 0x00000000);
126 	OUT_RING(ring, 0x00000080);
127 	OUT_RING(ring, 0x00000100);
128 	OUT_RING(ring, 0x00000180);
129 	OUT_RING(ring, 0x00006600);
130 	OUT_RING(ring, 0x00000150);
131 	OUT_RING(ring, 0x0000014e);
132 	OUT_RING(ring, 0x00000154);
133 	OUT_RING(ring, 0x00000001);
134 	OUT_RING(ring, 0x00000000);
135 	OUT_RING(ring, 0x00000000);
136 	OUT_RING(ring, 0x00000000);
137 	OUT_RING(ring, 0x00000000);
138 	OUT_RING(ring, 0x00000000);
139 
140 	gpu->funcs->flush(gpu, ring);
141 	return a4xx_idle(gpu);
142 }
143 
144 static int a4xx_hw_init(struct msm_gpu *gpu)
145 {
146 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
147 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
148 	uint32_t *ptr, len;
149 	int i, ret;
150 
151 	if (adreno_is_a420(adreno_gpu)) {
152 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
153 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
154 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
155 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
156 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
157 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
158 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
159 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
160 	} else if (adreno_is_a430(adreno_gpu)) {
161 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
162 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
163 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
164 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
165 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
166 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
167 	} else {
168 		BUG();
169 	}
170 
171 	/* Make all blocks contribute to the GPU BUSY perf counter */
172 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
173 
174 	/* Tune the hystersis counters for SP and CP idle detection */
175 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
176 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
177 
178 	if (adreno_is_a430(adreno_gpu)) {
179 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
180 	}
181 
182 	 /* Enable the RBBM error reporting bits */
183 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
184 
185 	/* Enable AHB error reporting*/
186 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
187 
188 	/* Enable power counters*/
189 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
190 
191 	/*
192 	 * Turn on hang detection - this spews a lot of useful information
193 	 * into the RBBM registers on a hang:
194 	 */
195 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
196 			(1 << 30) | 0xFFFF);
197 
198 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
199 			(unsigned int)(a4xx_gpu->ocmem_base >> 14));
200 
201 	/* Turn on performance counters: */
202 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
203 
204 	/* use the first CP counter for timestamp queries.. userspace may set
205 	 * this as well but it selects the same counter/countable:
206 	 */
207 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
208 
209 	if (adreno_is_a430(adreno_gpu))
210 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
211 
212 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
213 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
214 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
215 
216 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
217 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
218 
219 	/* On A430 enable SP regfile sleep for power savings */
220 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
221 	if (!adreno_is_a420(adreno_gpu)) {
222 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
223 			0x00000441);
224 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
225 			0x00000441);
226 	}
227 
228 	a4xx_enable_hwcg(gpu);
229 
230 	/*
231 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
232 	 * due to timing issue with HLSQ_TP_CLK_EN
233 	 */
234 	if (adreno_is_a420(adreno_gpu)) {
235 		unsigned int val;
236 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
237 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
238 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
239 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
240 	}
241 
242 	/* setup access protection: */
243 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
244 
245 	/* RBBM registers */
246 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
247 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
248 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
249 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
250 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
251 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
252 
253 	/* CP registers */
254 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
255 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
256 
257 
258 	/* RB registers */
259 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
260 
261 	/* HLSQ registers */
262 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
263 
264 	/* VPC registers */
265 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
266 
267 	/* SMMU registers */
268 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
269 
270 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
271 
272 	ret = adreno_hw_init(gpu);
273 	if (ret)
274 		return ret;
275 
276 	/* Load PM4: */
277 	ptr = (uint32_t *)(adreno_gpu->pm4->data);
278 	len = adreno_gpu->pm4->size / 4;
279 	DBG("loading PM4 ucode version: %u", ptr[0]);
280 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
281 	for (i = 1; i < len; i++)
282 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
283 
284 	/* Load PFP: */
285 	ptr = (uint32_t *)(adreno_gpu->pfp->data);
286 	len = adreno_gpu->pfp->size / 4;
287 	DBG("loading PFP ucode version: %u", ptr[0]);
288 
289 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
290 	for (i = 1; i < len; i++)
291 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
292 
293 	/* clear ME_HALT to start micro engine */
294 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
295 
296 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
297 }
298 
299 static void a4xx_recover(struct msm_gpu *gpu)
300 {
301 	int i;
302 
303 	adreno_dump_info(gpu);
304 
305 	for (i = 0; i < 8; i++) {
306 		printk("CP_SCRATCH_REG%d: %u\n", i,
307 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
308 	}
309 
310 	/* dump registers before resetting gpu, if enabled: */
311 	if (hang_debug)
312 		a4xx_dump(gpu);
313 
314 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
315 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
316 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
317 	adreno_recover(gpu);
318 }
319 
320 static void a4xx_destroy(struct msm_gpu *gpu)
321 {
322 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
323 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
324 
325 	DBG("%s", gpu->name);
326 
327 	adreno_gpu_cleanup(adreno_gpu);
328 
329 #ifdef CONFIG_MSM_OCMEM
330 	if (a4xx_gpu->ocmem_base)
331 		ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl);
332 #endif
333 
334 	kfree(a4xx_gpu);
335 }
336 
337 static bool a4xx_idle(struct msm_gpu *gpu)
338 {
339 	/* wait for ringbuffer to drain: */
340 	if (!adreno_idle(gpu, gpu->rb[0]))
341 		return false;
342 
343 	/* then wait for GPU to finish: */
344 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
345 					A4XX_RBBM_STATUS_GPU_BUSY))) {
346 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
347 		/* TODO maybe we need to reset GPU here to recover from hang? */
348 		return false;
349 	}
350 
351 	return true;
352 }
353 
354 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
355 {
356 	uint32_t status;
357 
358 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
359 	DBG("%s: Int status %08x", gpu->name, status);
360 
361 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
362 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
363 		printk("CP | Protected mode error| %s | addr=%x\n",
364 			reg & (1 << 24) ? "WRITE" : "READ",
365 			(reg & 0xFFFFF) >> 2);
366 	}
367 
368 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
369 
370 	msm_gpu_retire(gpu);
371 
372 	return IRQ_HANDLED;
373 }
374 
375 static const unsigned int a4xx_registers[] = {
376 	/* RBBM */
377 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
378 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
379 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
380 	/* CP */
381 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
382 	0x0578, 0x058F,
383 	/* VSC */
384 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
385 	/* GRAS */
386 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
387 	/* RB */
388 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
389 	/* PC */
390 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
391 	/* VFD */
392 	0x0E40, 0x0E4A,
393 	/* VPC */
394 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
395 	/* UCHE */
396 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
397 	/* VMIDMT */
398 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
399 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
400 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
401 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
402 	0x1380, 0x1380,
403 	/* GRAS CTX 0 */
404 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
405 	/* PC CTX 0 */
406 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
407 	/* VFD CTX 0 */
408 	0x2200, 0x2204, 0x2208, 0x22A9,
409 	/* GRAS CTX 1 */
410 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
411 	/* PC CTX 1 */
412 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
413 	/* VFD CTX 1 */
414 	0x2600, 0x2604, 0x2608, 0x26A9,
415 	/* XPU */
416 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
417 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
418 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
419 	/* VBIF */
420 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
421 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
422 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
423 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
424 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
425 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
426 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
427 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
428 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
429 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
430 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
431 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
432 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
433 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
434 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
435 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
436 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
437 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
438 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
439 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
440 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
441 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
442 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
443 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
444 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
445 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
446 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
447 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
448 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
449 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
450 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
451 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
452 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
453 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
454 	~0 /* sentinel */
455 };
456 
457 #ifdef CONFIG_DEBUG_FS
458 static void a4xx_show(struct msm_gpu *gpu, struct seq_file *m)
459 {
460 	seq_printf(m, "status:   %08x\n",
461 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
462 	adreno_show(gpu, m);
463 
464 }
465 #endif
466 
467 /* Register offset defines for A4XX, in order of enum adreno_regs */
468 static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
469 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE),
470 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
471 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR),
472 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
473 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR),
474 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR),
475 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL),
476 };
477 
478 static void a4xx_dump(struct msm_gpu *gpu)
479 {
480 	printk("status:   %08x\n",
481 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
482 	adreno_dump(gpu);
483 }
484 
485 static int a4xx_pm_resume(struct msm_gpu *gpu) {
486 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
487 	int ret;
488 
489 	ret = msm_gpu_pm_resume(gpu);
490 	if (ret)
491 		return ret;
492 
493 	if (adreno_is_a430(adreno_gpu)) {
494 		unsigned int reg;
495 		/* Set the default register values; set SW_COLLAPSE to 0 */
496 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
497 		do {
498 			udelay(5);
499 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
500 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
501 	}
502 	return 0;
503 }
504 
505 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
506 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
507 	int ret;
508 
509 	ret = msm_gpu_pm_suspend(gpu);
510 	if (ret)
511 		return ret;
512 
513 	if (adreno_is_a430(adreno_gpu)) {
514 		/* Set the default register values; set SW_COLLAPSE to 1 */
515 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
516 	}
517 	return 0;
518 }
519 
520 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
521 {
522 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
523 		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
524 
525 	return 0;
526 }
527 
528 static const struct adreno_gpu_funcs funcs = {
529 	.base = {
530 		.get_param = adreno_get_param,
531 		.hw_init = a4xx_hw_init,
532 		.pm_suspend = a4xx_pm_suspend,
533 		.pm_resume = a4xx_pm_resume,
534 		.recover = a4xx_recover,
535 		.submit = adreno_submit,
536 		.flush = adreno_flush,
537 		.active_ring = adreno_active_ring,
538 		.irq = a4xx_irq,
539 		.destroy = a4xx_destroy,
540 #ifdef CONFIG_DEBUG_FS
541 		.show = a4xx_show,
542 #endif
543 	},
544 	.get_timestamp = a4xx_get_timestamp,
545 };
546 
547 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
548 {
549 	struct a4xx_gpu *a4xx_gpu = NULL;
550 	struct adreno_gpu *adreno_gpu;
551 	struct msm_gpu *gpu;
552 	struct msm_drm_private *priv = dev->dev_private;
553 	struct platform_device *pdev = priv->gpu_pdev;
554 	int ret;
555 
556 	if (!pdev) {
557 		dev_err(dev->dev, "no a4xx device\n");
558 		ret = -ENXIO;
559 		goto fail;
560 	}
561 
562 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
563 	if (!a4xx_gpu) {
564 		ret = -ENOMEM;
565 		goto fail;
566 	}
567 
568 	adreno_gpu = &a4xx_gpu->base;
569 	gpu = &adreno_gpu->base;
570 
571 	gpu->perfcntrs = NULL;
572 	gpu->num_perfcntrs = 0;
573 
574 	adreno_gpu->registers = a4xx_registers;
575 	adreno_gpu->reg_offsets = a4xx_register_offsets;
576 
577 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
578 	if (ret)
579 		goto fail;
580 
581 	/* if needed, allocate gmem: */
582 	if (adreno_is_a4xx(adreno_gpu)) {
583 #ifdef CONFIG_MSM_OCMEM
584 		/* TODO this is different/missing upstream: */
585 		struct ocmem_buf *ocmem_hdl =
586 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
587 
588 		a4xx_gpu->ocmem_hdl = ocmem_hdl;
589 		a4xx_gpu->ocmem_base = ocmem_hdl->addr;
590 		adreno_gpu->gmem = ocmem_hdl->len;
591 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
592 				a4xx_gpu->ocmem_base);
593 #endif
594 	}
595 
596 	if (!gpu->aspace) {
597 		/* TODO we think it is possible to configure the GPU to
598 		 * restrict access to VRAM carveout.  But the required
599 		 * registers are unknown.  For now just bail out and
600 		 * limp along with just modesetting.  If it turns out
601 		 * to not be possible to restrict access, then we must
602 		 * implement a cmdstream validator.
603 		 */
604 		dev_err(dev->dev, "No memory protection without IOMMU\n");
605 		ret = -ENXIO;
606 		goto fail;
607 	}
608 
609 	return gpu;
610 
611 fail:
612 	if (a4xx_gpu)
613 		a4xx_destroy(&a4xx_gpu->base.base);
614 
615 	return ERR_PTR(ret);
616 }
617