xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a4xx_gpu.c (revision 726bd223)
1 /* Copyright (c) 2014 The Linux Foundation. All rights reserved.
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 and
5  * only version 2 as published by the Free Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  */
13 #include "a4xx_gpu.h"
14 #ifdef CONFIG_MSM_OCMEM
15 #  include <soc/qcom/ocmem.h>
16 #endif
17 
18 #define A4XX_INT0_MASK \
19 	(A4XX_INT0_RBBM_AHB_ERROR |        \
20 	 A4XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
21 	 A4XX_INT0_CP_T0_PACKET_IN_IB |    \
22 	 A4XX_INT0_CP_OPCODE_ERROR |       \
23 	 A4XX_INT0_CP_RESERVED_BIT_ERROR | \
24 	 A4XX_INT0_CP_HW_FAULT |           \
25 	 A4XX_INT0_CP_IB1_INT |            \
26 	 A4XX_INT0_CP_IB2_INT |            \
27 	 A4XX_INT0_CP_RB_INT |             \
28 	 A4XX_INT0_CP_REG_PROTECT_FAULT |  \
29 	 A4XX_INT0_CP_AHB_ERROR_HALT |     \
30 	 A4XX_INT0_UCHE_OOB_ACCESS)
31 
32 extern bool hang_debug;
33 static void a4xx_dump(struct msm_gpu *gpu);
34 
35 /*
36  * a4xx_enable_hwcg() - Program the clock control registers
37  * @device: The adreno device pointer
38  */
39 static void a4xx_enable_hwcg(struct msm_gpu *gpu)
40 {
41 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
42 	unsigned int i;
43 	for (i = 0; i < 4; i++)
44 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TP(i), 0x02222202);
45 	for (i = 0; i < 4; i++)
46 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_TP(i), 0x00002222);
47 	for (i = 0; i < 4; i++)
48 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TP(i), 0x0E739CE7);
49 	for (i = 0; i < 4; i++)
50 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TP(i), 0x00111111);
51 	for (i = 0; i < 4; i++)
52 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_SP(i), 0x22222222);
53 	for (i = 0; i < 4; i++)
54 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_SP(i), 0x00222222);
55 	for (i = 0; i < 4; i++)
56 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_SP(i), 0x00000104);
57 	for (i = 0; i < 4; i++)
58 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_SP(i), 0x00000081);
59 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
60 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
61 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
62 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
63 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
64 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
65 	for (i = 0; i < 4; i++)
66 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_RB(i), 0x22222222);
67 
68 	/* Disable L1 clocking in A420 due to CCU issues with it */
69 	for (i = 0; i < 4; i++) {
70 		if (adreno_is_a420(adreno_gpu)) {
71 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
72 					0x00002020);
73 		} else {
74 			gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2_RB(i),
75 					0x00022020);
76 		}
77 	}
78 
79 	for (i = 0; i < 4; i++) {
80 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_MARB_CCU(i),
81 				0x00000922);
82 	}
83 
84 	for (i = 0; i < 4; i++) {
85 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU(i),
86 				0x00000000);
87 	}
88 
89 	for (i = 0; i < 4; i++) {
90 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(i),
91 				0x00000001);
92 	}
93 
94 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
95 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
96 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
97 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
98 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
99 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
100 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
101 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
102 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
103 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
104 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
105 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
106 	/* Early A430's have a timing issue with SP/TP power collapse;
107 	   disabling HW clock gating prevents it. */
108 	if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
109 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
110 	else
111 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
112 	gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
113 }
114 
115 
116 static bool a4xx_me_init(struct msm_gpu *gpu)
117 {
118 	struct msm_ringbuffer *ring = gpu->rb;
119 
120 	OUT_PKT3(ring, CP_ME_INIT, 17);
121 	OUT_RING(ring, 0x000003f7);
122 	OUT_RING(ring, 0x00000000);
123 	OUT_RING(ring, 0x00000000);
124 	OUT_RING(ring, 0x00000000);
125 	OUT_RING(ring, 0x00000080);
126 	OUT_RING(ring, 0x00000100);
127 	OUT_RING(ring, 0x00000180);
128 	OUT_RING(ring, 0x00006600);
129 	OUT_RING(ring, 0x00000150);
130 	OUT_RING(ring, 0x0000014e);
131 	OUT_RING(ring, 0x00000154);
132 	OUT_RING(ring, 0x00000001);
133 	OUT_RING(ring, 0x00000000);
134 	OUT_RING(ring, 0x00000000);
135 	OUT_RING(ring, 0x00000000);
136 	OUT_RING(ring, 0x00000000);
137 	OUT_RING(ring, 0x00000000);
138 
139 	gpu->funcs->flush(gpu);
140 	return gpu->funcs->idle(gpu);
141 }
142 
143 static int a4xx_hw_init(struct msm_gpu *gpu)
144 {
145 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
146 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
147 	uint32_t *ptr, len;
148 	int i, ret;
149 
150 	if (adreno_is_a420(adreno_gpu)) {
151 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
152 		gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
153 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
154 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
155 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
156 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
157 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
158 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
159 	} else if (adreno_is_a430(adreno_gpu)) {
160 		gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
161 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
162 		gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
163 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
164 		gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
165 		gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
166 	} else {
167 		BUG();
168 	}
169 
170 	/* Make all blocks contribute to the GPU BUSY perf counter */
171 	gpu_write(gpu, REG_A4XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
172 
173 	/* Tune the hystersis counters for SP and CP idle detection */
174 	gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
175 	gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
176 
177 	if (adreno_is_a430(adreno_gpu)) {
178 		gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
179 	}
180 
181 	 /* Enable the RBBM error reporting bits */
182 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
183 
184 	/* Enable AHB error reporting*/
185 	gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL1, 0xa6ffffff);
186 
187 	/* Enable power counters*/
188 	gpu_write(gpu, REG_A4XX_RBBM_RBBM_CTL, 0x00000030);
189 
190 	/*
191 	 * Turn on hang detection - this spews a lot of useful information
192 	 * into the RBBM registers on a hang:
193 	 */
194 	gpu_write(gpu, REG_A4XX_RBBM_INTERFACE_HANG_INT_CTL,
195 			(1 << 30) | 0xFFFF);
196 
197 	gpu_write(gpu, REG_A4XX_RB_GMEM_BASE_ADDR,
198 			(unsigned int)(a4xx_gpu->ocmem_base >> 14));
199 
200 	/* Turn on performance counters: */
201 	gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
202 
203 	/* use the first CP counter for timestamp queries.. userspace may set
204 	 * this as well but it selects the same counter/countable:
205 	 */
206 	gpu_write(gpu, REG_A4XX_CP_PERFCTR_CP_SEL_0, CP_ALWAYS_COUNT);
207 
208 	if (adreno_is_a430(adreno_gpu))
209 		gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
210 
211 	/* Disable L2 bypass to avoid UCHE out of bounds errors */
212 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
213 	gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
214 
215 	gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
216 			(adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
217 
218 	/* On A430 enable SP regfile sleep for power savings */
219 	/* TODO downstream does this for !420, so maybe applies for 405 too? */
220 	if (!adreno_is_a420(adreno_gpu)) {
221 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
222 			0x00000441);
223 		gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
224 			0x00000441);
225 	}
226 
227 	a4xx_enable_hwcg(gpu);
228 
229 	/*
230 	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
231 	 * due to timing issue with HLSQ_TP_CLK_EN
232 	 */
233 	if (adreno_is_a420(adreno_gpu)) {
234 		unsigned int val;
235 		val = gpu_read(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ);
236 		val &= ~A4XX_CGC_HLSQ_EARLY_CYC__MASK;
237 		val |= 2 << A4XX_CGC_HLSQ_EARLY_CYC__SHIFT;
238 		gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
239 	}
240 
241 	/* setup access protection: */
242 	gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
243 
244 	/* RBBM registers */
245 	gpu_write(gpu, REG_A4XX_CP_PROTECT(0), 0x62000010);
246 	gpu_write(gpu, REG_A4XX_CP_PROTECT(1), 0x63000020);
247 	gpu_write(gpu, REG_A4XX_CP_PROTECT(2), 0x64000040);
248 	gpu_write(gpu, REG_A4XX_CP_PROTECT(3), 0x65000080);
249 	gpu_write(gpu, REG_A4XX_CP_PROTECT(4), 0x66000100);
250 	gpu_write(gpu, REG_A4XX_CP_PROTECT(5), 0x64000200);
251 
252 	/* CP registers */
253 	gpu_write(gpu, REG_A4XX_CP_PROTECT(6), 0x67000800);
254 	gpu_write(gpu, REG_A4XX_CP_PROTECT(7), 0x64001600);
255 
256 
257 	/* RB registers */
258 	gpu_write(gpu, REG_A4XX_CP_PROTECT(8), 0x60003300);
259 
260 	/* HLSQ registers */
261 	gpu_write(gpu, REG_A4XX_CP_PROTECT(9), 0x60003800);
262 
263 	/* VPC registers */
264 	gpu_write(gpu, REG_A4XX_CP_PROTECT(10), 0x61003980);
265 
266 	/* SMMU registers */
267 	gpu_write(gpu, REG_A4XX_CP_PROTECT(11), 0x6e010000);
268 
269 	gpu_write(gpu, REG_A4XX_RBBM_INT_0_MASK, A4XX_INT0_MASK);
270 
271 	ret = adreno_hw_init(gpu);
272 	if (ret)
273 		return ret;
274 
275 	/* Load PM4: */
276 	ptr = (uint32_t *)(adreno_gpu->pm4->data);
277 	len = adreno_gpu->pm4->size / 4;
278 	DBG("loading PM4 ucode version: %u", ptr[0]);
279 	gpu_write(gpu, REG_A4XX_CP_ME_RAM_WADDR, 0);
280 	for (i = 1; i < len; i++)
281 		gpu_write(gpu, REG_A4XX_CP_ME_RAM_DATA, ptr[i]);
282 
283 	/* Load PFP: */
284 	ptr = (uint32_t *)(adreno_gpu->pfp->data);
285 	len = adreno_gpu->pfp->size / 4;
286 	DBG("loading PFP ucode version: %u", ptr[0]);
287 
288 	gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_ADDR, 0);
289 	for (i = 1; i < len; i++)
290 		gpu_write(gpu, REG_A4XX_CP_PFP_UCODE_DATA, ptr[i]);
291 
292 	/* clear ME_HALT to start micro engine */
293 	gpu_write(gpu, REG_A4XX_CP_ME_CNTL, 0);
294 
295 	return a4xx_me_init(gpu) ? 0 : -EINVAL;
296 }
297 
298 static void a4xx_recover(struct msm_gpu *gpu)
299 {
300 	int i;
301 
302 	adreno_dump_info(gpu);
303 
304 	for (i = 0; i < 8; i++) {
305 		printk("CP_SCRATCH_REG%d: %u\n", i,
306 			gpu_read(gpu, REG_AXXX_CP_SCRATCH_REG0 + i));
307 	}
308 
309 	/* dump registers before resetting gpu, if enabled: */
310 	if (hang_debug)
311 		a4xx_dump(gpu);
312 
313 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 1);
314 	gpu_read(gpu, REG_A4XX_RBBM_SW_RESET_CMD);
315 	gpu_write(gpu, REG_A4XX_RBBM_SW_RESET_CMD, 0);
316 	adreno_recover(gpu);
317 }
318 
319 static void a4xx_destroy(struct msm_gpu *gpu)
320 {
321 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
322 	struct a4xx_gpu *a4xx_gpu = to_a4xx_gpu(adreno_gpu);
323 
324 	DBG("%s", gpu->name);
325 
326 	adreno_gpu_cleanup(adreno_gpu);
327 
328 #ifdef CONFIG_MSM_OCMEM
329 	if (a4xx_gpu->ocmem_base)
330 		ocmem_free(OCMEM_GRAPHICS, a4xx_gpu->ocmem_hdl);
331 #endif
332 
333 	kfree(a4xx_gpu);
334 }
335 
336 static bool a4xx_idle(struct msm_gpu *gpu)
337 {
338 	/* wait for ringbuffer to drain: */
339 	if (!adreno_idle(gpu))
340 		return false;
341 
342 	/* then wait for GPU to finish: */
343 	if (spin_until(!(gpu_read(gpu, REG_A4XX_RBBM_STATUS) &
344 					A4XX_RBBM_STATUS_GPU_BUSY))) {
345 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
346 		/* TODO maybe we need to reset GPU here to recover from hang? */
347 		return false;
348 	}
349 
350 	return true;
351 }
352 
353 static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
354 {
355 	uint32_t status;
356 
357 	status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
358 	DBG("%s: Int status %08x", gpu->name, status);
359 
360 	if (status & A4XX_INT0_CP_REG_PROTECT_FAULT) {
361 		uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
362 		printk("CP | Protected mode error| %s | addr=%x\n",
363 			reg & (1 << 24) ? "WRITE" : "READ",
364 			(reg & 0xFFFFF) >> 2);
365 	}
366 
367 	gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
368 
369 	msm_gpu_retire(gpu);
370 
371 	return IRQ_HANDLED;
372 }
373 
374 static const unsigned int a4xx_registers[] = {
375 	/* RBBM */
376 	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
377 	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
378 	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
379 	/* CP */
380 	0x0200, 0x0233, 0x0240, 0x0250, 0x04C0, 0x04DD, 0x0500, 0x050B,
381 	0x0578, 0x058F,
382 	/* VSC */
383 	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
384 	/* GRAS */
385 	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
386 	/* RB */
387 	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
388 	/* PC */
389 	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
390 	/* VFD */
391 	0x0E40, 0x0E4A,
392 	/* VPC */
393 	0x0E60, 0x0E61, 0x0E63, 0x0E68,
394 	/* UCHE */
395 	0x0E80, 0x0E84, 0x0E88, 0x0E95,
396 	/* VMIDMT */
397 	0x1000, 0x1000, 0x1002, 0x1002, 0x1004, 0x1004, 0x1008, 0x100A,
398 	0x100C, 0x100D, 0x100F, 0x1010, 0x1012, 0x1016, 0x1024, 0x1024,
399 	0x1027, 0x1027, 0x1100, 0x1100, 0x1102, 0x1102, 0x1104, 0x1104,
400 	0x1110, 0x1110, 0x1112, 0x1116, 0x1124, 0x1124, 0x1300, 0x1300,
401 	0x1380, 0x1380,
402 	/* GRAS CTX 0 */
403 	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
404 	/* PC CTX 0 */
405 	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
406 	/* VFD CTX 0 */
407 	0x2200, 0x2204, 0x2208, 0x22A9,
408 	/* GRAS CTX 1 */
409 	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
410 	/* PC CTX 1 */
411 	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
412 	/* VFD CTX 1 */
413 	0x2600, 0x2604, 0x2608, 0x26A9,
414 	/* XPU */
415 	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
416 	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
417 	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
418 	/* VBIF */
419 	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
420 	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
421 	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
422 	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
423 	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
424 	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
425 	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
426 	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
427 	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
428 	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
429 	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
430 	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
431 	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
432 	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
433 	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
434 	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
435 	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
436 	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
437 	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
438 	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
439 	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
440 	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
441 	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
442 	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
443 	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
444 	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
445 	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
446 	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
447 	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
448 	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
449 	0x67D6, 0x67D6, 0x67EE, 0x67EE, 0x6800, 0x6801, 0x6808, 0x680F,
450 	0x6814, 0x6816, 0x6818, 0x681B, 0x69FD, 0x69FD, 0x6A3C, 0x6A3C,
451 	0x6B80, 0x6B80, 0x6BA0, 0x6BA0, 0x6BC0, 0x6BC1, 0x6BC8, 0x6BC9,
452 	0x6BD0, 0x6BD4, 0x6BD6, 0x6BD6, 0x6BEE, 0x6BEE,
453 	~0 /* sentinel */
454 };
455 
456 #ifdef CONFIG_DEBUG_FS
457 static void a4xx_show(struct msm_gpu *gpu, struct seq_file *m)
458 {
459 	gpu->funcs->pm_resume(gpu);
460 
461 	seq_printf(m, "status:   %08x\n",
462 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
463 	gpu->funcs->pm_suspend(gpu);
464 
465 	adreno_show(gpu, m);
466 
467 }
468 #endif
469 
470 /* Register offset defines for A4XX, in order of enum adreno_regs */
471 static const unsigned int a4xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
472 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A4XX_CP_RB_BASE),
473 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_BASE_HI),
474 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A4XX_CP_RB_RPTR_ADDR),
475 	REG_ADRENO_SKIP(REG_ADRENO_CP_RB_RPTR_ADDR_HI),
476 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A4XX_CP_RB_RPTR),
477 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A4XX_CP_RB_WPTR),
478 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A4XX_CP_RB_CNTL),
479 };
480 
481 static void a4xx_dump(struct msm_gpu *gpu)
482 {
483 	printk("status:   %08x\n",
484 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
485 	adreno_dump(gpu);
486 }
487 
488 static int a4xx_pm_resume(struct msm_gpu *gpu) {
489 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
490 	int ret;
491 
492 	ret = msm_gpu_pm_resume(gpu);
493 	if (ret)
494 		return ret;
495 
496 	if (adreno_is_a430(adreno_gpu)) {
497 		unsigned int reg;
498 		/* Set the default register values; set SW_COLLAPSE to 0 */
499 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778000);
500 		do {
501 			udelay(5);
502 			reg = gpu_read(gpu, REG_A4XX_RBBM_POWER_STATUS);
503 		} while (!(reg & A4XX_RBBM_POWER_CNTL_IP_SP_TP_PWR_ON));
504 	}
505 	return 0;
506 }
507 
508 static int a4xx_pm_suspend(struct msm_gpu *gpu) {
509 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
510 	int ret;
511 
512 	ret = msm_gpu_pm_suspend(gpu);
513 	if (ret)
514 		return ret;
515 
516 	if (adreno_is_a430(adreno_gpu)) {
517 		/* Set the default register values; set SW_COLLAPSE to 1 */
518 		gpu_write(gpu, REG_A4XX_RBBM_POWER_CNTL_IP, 0x778001);
519 	}
520 	return 0;
521 }
522 
523 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
524 {
525 	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
526 		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
527 
528 	return 0;
529 }
530 
531 static const struct adreno_gpu_funcs funcs = {
532 	.base = {
533 		.get_param = adreno_get_param,
534 		.hw_init = a4xx_hw_init,
535 		.pm_suspend = a4xx_pm_suspend,
536 		.pm_resume = a4xx_pm_resume,
537 		.recover = a4xx_recover,
538 		.last_fence = adreno_last_fence,
539 		.submit = adreno_submit,
540 		.flush = adreno_flush,
541 		.idle = a4xx_idle,
542 		.irq = a4xx_irq,
543 		.destroy = a4xx_destroy,
544 #ifdef CONFIG_DEBUG_FS
545 		.show = a4xx_show,
546 #endif
547 	},
548 	.get_timestamp = a4xx_get_timestamp,
549 };
550 
551 struct msm_gpu *a4xx_gpu_init(struct drm_device *dev)
552 {
553 	struct a4xx_gpu *a4xx_gpu = NULL;
554 	struct adreno_gpu *adreno_gpu;
555 	struct msm_gpu *gpu;
556 	struct msm_drm_private *priv = dev->dev_private;
557 	struct platform_device *pdev = priv->gpu_pdev;
558 	int ret;
559 
560 	if (!pdev) {
561 		dev_err(dev->dev, "no a4xx device\n");
562 		ret = -ENXIO;
563 		goto fail;
564 	}
565 
566 	a4xx_gpu = kzalloc(sizeof(*a4xx_gpu), GFP_KERNEL);
567 	if (!a4xx_gpu) {
568 		ret = -ENOMEM;
569 		goto fail;
570 	}
571 
572 	adreno_gpu = &a4xx_gpu->base;
573 	gpu = &adreno_gpu->base;
574 
575 	a4xx_gpu->pdev = pdev;
576 
577 	gpu->perfcntrs = NULL;
578 	gpu->num_perfcntrs = 0;
579 
580 	adreno_gpu->registers = a4xx_registers;
581 	adreno_gpu->reg_offsets = a4xx_register_offsets;
582 
583 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
584 	if (ret)
585 		goto fail;
586 
587 	/* if needed, allocate gmem: */
588 	if (adreno_is_a4xx(adreno_gpu)) {
589 #ifdef CONFIG_MSM_OCMEM
590 		/* TODO this is different/missing upstream: */
591 		struct ocmem_buf *ocmem_hdl =
592 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
593 
594 		a4xx_gpu->ocmem_hdl = ocmem_hdl;
595 		a4xx_gpu->ocmem_base = ocmem_hdl->addr;
596 		adreno_gpu->gmem = ocmem_hdl->len;
597 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
598 				a4xx_gpu->ocmem_base);
599 #endif
600 	}
601 
602 	if (!gpu->aspace) {
603 		/* TODO we think it is possible to configure the GPU to
604 		 * restrict access to VRAM carveout.  But the required
605 		 * registers are unknown.  For now just bail out and
606 		 * limp along with just modesetting.  If it turns out
607 		 * to not be possible to restrict access, then we must
608 		 * implement a cmdstream validator.
609 		 */
610 		dev_err(dev->dev, "No memory protection without IOMMU\n");
611 		ret = -ENXIO;
612 		goto fail;
613 	}
614 
615 	return gpu;
616 
617 fail:
618 	if (a4xx_gpu)
619 		a4xx_destroy(&a4xx_gpu->base.base);
620 
621 	return ERR_PTR(ret);
622 }
623