xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision bbde9fc1824aab58bc78c084163007dd6c03fe5b)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published by
9  * the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifdef CONFIG_MSM_OCMEM
21 #  include <mach/ocmem.h>
22 #endif
23 
24 #include "a3xx_gpu.h"
25 
26 #define A3XX_INT0_MASK \
27 	(A3XX_INT0_RBBM_AHB_ERROR |        \
28 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
29 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
30 	 A3XX_INT0_CP_OPCODE_ERROR |       \
31 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
32 	 A3XX_INT0_CP_HW_FAULT |           \
33 	 A3XX_INT0_CP_IB1_INT |            \
34 	 A3XX_INT0_CP_IB2_INT |            \
35 	 A3XX_INT0_CP_RB_INT |             \
36 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
37 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
38 	 A3XX_INT0_UCHE_OOB_ACCESS)
39 
40 extern bool hang_debug;
41 
42 static void a3xx_dump(struct msm_gpu *gpu);
43 
44 static void a3xx_me_init(struct msm_gpu *gpu)
45 {
46 	struct msm_ringbuffer *ring = gpu->rb;
47 
48 	OUT_PKT3(ring, CP_ME_INIT, 17);
49 	OUT_RING(ring, 0x000003f7);
50 	OUT_RING(ring, 0x00000000);
51 	OUT_RING(ring, 0x00000000);
52 	OUT_RING(ring, 0x00000000);
53 	OUT_RING(ring, 0x00000080);
54 	OUT_RING(ring, 0x00000100);
55 	OUT_RING(ring, 0x00000180);
56 	OUT_RING(ring, 0x00006600);
57 	OUT_RING(ring, 0x00000150);
58 	OUT_RING(ring, 0x0000014e);
59 	OUT_RING(ring, 0x00000154);
60 	OUT_RING(ring, 0x00000001);
61 	OUT_RING(ring, 0x00000000);
62 	OUT_RING(ring, 0x00000000);
63 	OUT_RING(ring, 0x00000000);
64 	OUT_RING(ring, 0x00000000);
65 	OUT_RING(ring, 0x00000000);
66 
67 	gpu->funcs->flush(gpu);
68 	gpu->funcs->idle(gpu);
69 }
70 
71 static int a3xx_hw_init(struct msm_gpu *gpu)
72 {
73 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
74 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
75 	uint32_t *ptr, len;
76 	int i, ret;
77 
78 	DBG("%s", gpu->name);
79 
80 	if (adreno_is_a305(adreno_gpu)) {
81 		/* Set up 16 deep read/write request queues: */
82 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
83 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
84 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
85 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
86 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
87 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
88 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
89 		/* Enable WR-REQ: */
90 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
91 		/* Set up round robin arbitration between both AXI ports: */
92 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
93 		/* Set up AOOO: */
94 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
95 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
96 	} else if (adreno_is_a306(adreno_gpu)) {
97 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
98 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000a);
99 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000a);
100 	} else if (adreno_is_a320(adreno_gpu)) {
101 		/* Set up 16 deep read/write request queues: */
102 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
103 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
104 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
105 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
106 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
107 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
108 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
109 		/* Enable WR-REQ: */
110 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
111 		/* Set up round robin arbitration between both AXI ports: */
112 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
113 		/* Set up AOOO: */
114 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
115 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
116 		/* Enable 1K sort: */
117 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
118 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
119 
120 	} else if (adreno_is_a330v2(adreno_gpu)) {
121 		/*
122 		 * Most of the VBIF registers on 8974v2 have the correct
123 		 * values at power on, so we won't modify those if we don't
124 		 * need to
125 		 */
126 		/* Enable 1k sort: */
127 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
128 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
129 		/* Enable WR-REQ: */
130 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
131 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
132 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
133 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
134 
135 	} else if (adreno_is_a330(adreno_gpu)) {
136 		/* Set up 16 deep read/write request queues: */
137 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
138 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
139 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
140 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
141 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
142 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
143 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
144 		/* Enable WR-REQ: */
145 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
146 		/* Set up round robin arbitration between both AXI ports: */
147 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
148 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
149 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
150 		/* Set up AOOO: */
151 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
152 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
153 		/* Enable 1K sort: */
154 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
155 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
156 		/* Disable VBIF clock gating. This is to enable AXI running
157 		 * higher frequency than GPU:
158 		 */
159 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
160 
161 	} else {
162 		BUG();
163 	}
164 
165 	/* Make all blocks contribute to the GPU BUSY perf counter: */
166 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
167 
168 	/* Tune the hystersis counters for SP and CP idle detection: */
169 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
170 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
171 
172 	/* Enable the RBBM error reporting bits.  This lets us get
173 	 * useful information on failure:
174 	 */
175 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
176 
177 	/* Enable AHB error reporting: */
178 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
179 
180 	/* Turn on the power counters: */
181 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
182 
183 	/* Turn on hang detection - this spews a lot of useful information
184 	 * into the RBBM registers on a hang:
185 	 */
186 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
187 
188 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
189 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
190 
191 	/* Enable Clock gating: */
192 	if (adreno_is_a306(adreno_gpu))
193 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
194 	else if (adreno_is_a320(adreno_gpu))
195 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
196 	else if (adreno_is_a330v2(adreno_gpu))
197 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
198 	else if (adreno_is_a330(adreno_gpu))
199 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
200 
201 	if (adreno_is_a330v2(adreno_gpu))
202 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
203 	else if (adreno_is_a330(adreno_gpu))
204 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
205 
206 	/* Set the OCMEM base address for A330, etc */
207 	if (a3xx_gpu->ocmem_hdl) {
208 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
209 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
210 	}
211 
212 	/* Turn on performance counters: */
213 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
214 
215 	/* Enable the perfcntrs that we use.. */
216 	for (i = 0; i < gpu->num_perfcntrs; i++) {
217 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
218 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
219 	}
220 
221 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
222 
223 	ret = adreno_hw_init(gpu);
224 	if (ret)
225 		return ret;
226 
227 	/* setup access protection: */
228 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
229 
230 	/* RBBM registers */
231 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
232 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
233 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
235 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
237 
238 	/* CP registers */
239 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
240 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
241 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
242 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
243 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
244 
245 	/* RB registers */
246 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
247 
248 	/* VBIF registers */
249 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
250 
251 	/* NOTE: PM4/micro-engine firmware registers look to be the same
252 	 * for a2xx and a3xx.. we could possibly push that part down to
253 	 * adreno_gpu base class.  Or push both PM4 and PFP but
254 	 * parameterize the pfp ucode addr/data registers..
255 	 */
256 
257 	/* Load PM4: */
258 	ptr = (uint32_t *)(adreno_gpu->pm4->data);
259 	len = adreno_gpu->pm4->size / 4;
260 	DBG("loading PM4 ucode version: %x", ptr[1]);
261 
262 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
263 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
264 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
265 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
266 	for (i = 1; i < len; i++)
267 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
268 
269 	/* Load PFP: */
270 	ptr = (uint32_t *)(adreno_gpu->pfp->data);
271 	len = adreno_gpu->pfp->size / 4;
272 	DBG("loading PFP ucode version: %x", ptr[5]);
273 
274 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
275 	for (i = 1; i < len; i++)
276 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
277 
278 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
279 	if (adreno_is_a305(adreno_gpu) || adreno_is_a306(adreno_gpu) ||
280 			adreno_is_a320(adreno_gpu)) {
281 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
282 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
283 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
284 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
285 	} else if (adreno_is_a330(adreno_gpu)) {
286 		/* NOTE: this (value take from downstream android driver)
287 		 * includes some bits outside of the known bitfields.  But
288 		 * A330 has this "MERCIU queue" thing too, which might
289 		 * explain a new bitfield or reshuffling:
290 		 */
291 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
292 	}
293 
294 	/* clear ME_HALT to start micro engine */
295 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
296 
297 	a3xx_me_init(gpu);
298 
299 	return 0;
300 }
301 
302 static void a3xx_recover(struct msm_gpu *gpu)
303 {
304 	adreno_dump_info(gpu);
305 
306 	/* dump registers before resetting gpu, if enabled: */
307 	if (hang_debug)
308 		a3xx_dump(gpu);
309 
310 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
311 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
312 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
313 	adreno_recover(gpu);
314 }
315 
316 static void a3xx_destroy(struct msm_gpu *gpu)
317 {
318 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
319 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
320 
321 	DBG("%s", gpu->name);
322 
323 	adreno_gpu_cleanup(adreno_gpu);
324 
325 #ifdef CONFIG_MSM_OCMEM
326 	if (a3xx_gpu->ocmem_base)
327 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
328 #endif
329 
330 	kfree(a3xx_gpu);
331 }
332 
333 static void a3xx_idle(struct msm_gpu *gpu)
334 {
335 	/* wait for ringbuffer to drain: */
336 	adreno_idle(gpu);
337 
338 	/* then wait for GPU to finish: */
339 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
340 			A3XX_RBBM_STATUS_GPU_BUSY)))
341 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
342 
343 	/* TODO maybe we need to reset GPU here to recover from hang? */
344 }
345 
346 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
347 {
348 	uint32_t status;
349 
350 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
351 	DBG("%s: %08x", gpu->name, status);
352 
353 	// TODO
354 
355 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
356 
357 	msm_gpu_retire(gpu);
358 
359 	return IRQ_HANDLED;
360 }
361 
362 static const unsigned int a3xx_registers[] = {
363 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
364 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
365 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
366 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
367 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
368 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
369 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
370 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
371 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
372 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
373 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
374 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
375 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
376 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
377 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
378 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
379 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
380 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
381 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
382 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
383 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
384 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
385 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
386 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
387 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
388 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
389 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
390 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
391 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
392 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
393 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
394 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
395 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
396 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
397 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
398 	0x303c, 0x303c, 0x305e, 0x305f,
399 	~0   /* sentinel */
400 };
401 
402 #ifdef CONFIG_DEBUG_FS
403 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
404 {
405 	gpu->funcs->pm_resume(gpu);
406 	seq_printf(m, "status:   %08x\n",
407 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
408 	gpu->funcs->pm_suspend(gpu);
409 	adreno_show(gpu, m);
410 }
411 #endif
412 
413 /* would be nice to not have to duplicate the _show() stuff with printk(): */
414 static void a3xx_dump(struct msm_gpu *gpu)
415 {
416 	printk("status:   %08x\n",
417 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
418 	adreno_dump(gpu);
419 }
420 /* Register offset defines for A3XX */
421 static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
422 	REG_ADRENO_DEFINE(REG_ADRENO_CP_DEBUG, REG_AXXX_CP_DEBUG),
423 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ME_RAM_WADDR, REG_AXXX_CP_ME_RAM_WADDR),
424 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ME_RAM_DATA, REG_AXXX_CP_ME_RAM_DATA),
425 	REG_ADRENO_DEFINE(REG_ADRENO_CP_PFP_UCODE_DATA,
426 			REG_A3XX_CP_PFP_UCODE_DATA),
427 	REG_ADRENO_DEFINE(REG_ADRENO_CP_PFP_UCODE_ADDR,
428 			REG_A3XX_CP_PFP_UCODE_ADDR),
429 	REG_ADRENO_DEFINE(REG_ADRENO_CP_WFI_PEND_CTR, REG_A3XX_CP_WFI_PEND_CTR),
430 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
431 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
432 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
433 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
434 	REG_ADRENO_DEFINE(REG_ADRENO_CP_PROTECT_CTRL, REG_A3XX_CP_PROTECT_CTRL),
435 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ME_CNTL, REG_AXXX_CP_ME_CNTL),
436 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
437 	REG_ADRENO_DEFINE(REG_ADRENO_CP_IB1_BASE, REG_AXXX_CP_IB1_BASE),
438 	REG_ADRENO_DEFINE(REG_ADRENO_CP_IB1_BUFSZ, REG_AXXX_CP_IB1_BUFSZ),
439 	REG_ADRENO_DEFINE(REG_ADRENO_CP_IB2_BASE, REG_AXXX_CP_IB2_BASE),
440 	REG_ADRENO_DEFINE(REG_ADRENO_CP_IB2_BUFSZ, REG_AXXX_CP_IB2_BUFSZ),
441 	REG_ADRENO_DEFINE(REG_ADRENO_CP_TIMESTAMP, REG_AXXX_CP_SCRATCH_REG0),
442 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ME_RAM_RADDR, REG_AXXX_CP_ME_RAM_RADDR),
443 	REG_ADRENO_DEFINE(REG_ADRENO_SCRATCH_ADDR, REG_AXXX_SCRATCH_ADDR),
444 	REG_ADRENO_DEFINE(REG_ADRENO_SCRATCH_UMSK, REG_AXXX_SCRATCH_UMSK),
445 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ROQ_ADDR, REG_A3XX_CP_ROQ_ADDR),
446 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ROQ_DATA, REG_A3XX_CP_ROQ_DATA),
447 	REG_ADRENO_DEFINE(REG_ADRENO_CP_MERCIU_ADDR, REG_A3XX_CP_MERCIU_ADDR),
448 	REG_ADRENO_DEFINE(REG_ADRENO_CP_MERCIU_DATA, REG_A3XX_CP_MERCIU_DATA),
449 	REG_ADRENO_DEFINE(REG_ADRENO_CP_MERCIU_DATA2, REG_A3XX_CP_MERCIU_DATA2),
450 	REG_ADRENO_DEFINE(REG_ADRENO_CP_MEQ_ADDR, REG_A3XX_CP_MEQ_ADDR),
451 	REG_ADRENO_DEFINE(REG_ADRENO_CP_MEQ_DATA, REG_A3XX_CP_MEQ_DATA),
452 	REG_ADRENO_DEFINE(REG_ADRENO_CP_HW_FAULT, REG_A3XX_CP_HW_FAULT),
453 	REG_ADRENO_DEFINE(REG_ADRENO_CP_PROTECT_STATUS,
454 			REG_A3XX_CP_PROTECT_STATUS),
455 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_STATUS, REG_A3XX_RBBM_STATUS),
456 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_CTL,
457 			REG_A3XX_RBBM_PERFCTR_CTL),
458 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_LOAD_CMD0,
459 			REG_A3XX_RBBM_PERFCTR_LOAD_CMD0),
460 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_LOAD_CMD1,
461 			REG_A3XX_RBBM_PERFCTR_LOAD_CMD1),
462 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_PWR_1_LO,
463 			REG_A3XX_RBBM_PERFCTR_PWR_1_LO),
464 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_INT_0_MASK, REG_A3XX_RBBM_INT_0_MASK),
465 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_INT_0_STATUS,
466 			REG_A3XX_RBBM_INT_0_STATUS),
467 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_AHB_ERROR_STATUS,
468 			REG_A3XX_RBBM_AHB_ERROR_STATUS),
469 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_AHB_CMD, REG_A3XX_RBBM_AHB_CMD),
470 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_INT_CLEAR_CMD,
471 			REG_A3XX_RBBM_INT_CLEAR_CMD),
472 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_CLOCK_CTL, REG_A3XX_RBBM_CLOCK_CTL),
473 	REG_ADRENO_DEFINE(REG_ADRENO_VPC_DEBUG_RAM_SEL,
474 			REG_A3XX_VPC_VPC_DEBUG_RAM_SEL),
475 	REG_ADRENO_DEFINE(REG_ADRENO_VPC_DEBUG_RAM_READ,
476 			REG_A3XX_VPC_VPC_DEBUG_RAM_READ),
477 	REG_ADRENO_DEFINE(REG_ADRENO_VSC_SIZE_ADDRESS,
478 			REG_A3XX_VSC_SIZE_ADDRESS),
479 	REG_ADRENO_DEFINE(REG_ADRENO_VFD_CONTROL_0, REG_A3XX_VFD_CONTROL_0),
480 	REG_ADRENO_DEFINE(REG_ADRENO_VFD_INDEX_MAX, REG_A3XX_VFD_INDEX_MAX),
481 	REG_ADRENO_DEFINE(REG_ADRENO_SP_VS_PVT_MEM_ADDR_REG,
482 			REG_A3XX_SP_VS_PVT_MEM_ADDR_REG),
483 	REG_ADRENO_DEFINE(REG_ADRENO_SP_FS_PVT_MEM_ADDR_REG,
484 			REG_A3XX_SP_FS_PVT_MEM_ADDR_REG),
485 	REG_ADRENO_DEFINE(REG_ADRENO_SP_VS_OBJ_START_REG,
486 			REG_A3XX_SP_VS_OBJ_START_REG),
487 	REG_ADRENO_DEFINE(REG_ADRENO_SP_FS_OBJ_START_REG,
488 			REG_A3XX_SP_FS_OBJ_START_REG),
489 	REG_ADRENO_DEFINE(REG_ADRENO_PA_SC_AA_CONFIG, REG_A3XX_PA_SC_AA_CONFIG),
490 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PM_OVERRIDE2,
491 			REG_A3XX_RBBM_PM_OVERRIDE2),
492 	REG_ADRENO_DEFINE(REG_ADRENO_SCRATCH_REG2, REG_AXXX_CP_SCRATCH_REG2),
493 	REG_ADRENO_DEFINE(REG_ADRENO_SQ_GPR_MANAGEMENT,
494 			REG_A3XX_SQ_GPR_MANAGEMENT),
495 	REG_ADRENO_DEFINE(REG_ADRENO_SQ_INST_STORE_MANAGMENT,
496 			REG_A3XX_SQ_INST_STORE_MANAGMENT),
497 	REG_ADRENO_DEFINE(REG_ADRENO_TP0_CHICKEN, REG_A3XX_TP0_CHICKEN),
498 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_RBBM_CTL, REG_A3XX_RBBM_RBBM_CTL),
499 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_SW_RESET_CMD,
500 			REG_A3XX_RBBM_SW_RESET_CMD),
501 	REG_ADRENO_DEFINE(REG_ADRENO_UCHE_INVALIDATE0,
502 			REG_A3XX_UCHE_CACHE_INVALIDATE0_REG),
503 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_LOAD_VALUE_LO,
504 			REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO),
505 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_LOAD_VALUE_HI,
506 			REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI),
507 };
508 
509 static const struct adreno_gpu_funcs funcs = {
510 	.base = {
511 		.get_param = adreno_get_param,
512 		.hw_init = a3xx_hw_init,
513 		.pm_suspend = msm_gpu_pm_suspend,
514 		.pm_resume = msm_gpu_pm_resume,
515 		.recover = a3xx_recover,
516 		.last_fence = adreno_last_fence,
517 		.submit = adreno_submit,
518 		.flush = adreno_flush,
519 		.idle = a3xx_idle,
520 		.irq = a3xx_irq,
521 		.destroy = a3xx_destroy,
522 #ifdef CONFIG_DEBUG_FS
523 		.show = a3xx_show,
524 #endif
525 	},
526 };
527 
528 static const struct msm_gpu_perfcntr perfcntrs[] = {
529 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
530 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
531 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
532 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
533 };
534 
535 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
536 {
537 	struct a3xx_gpu *a3xx_gpu = NULL;
538 	struct adreno_gpu *adreno_gpu;
539 	struct msm_gpu *gpu;
540 	struct msm_drm_private *priv = dev->dev_private;
541 	struct platform_device *pdev = priv->gpu_pdev;
542 	int ret;
543 
544 	if (!pdev) {
545 		dev_err(dev->dev, "no a3xx device\n");
546 		ret = -ENXIO;
547 		goto fail;
548 	}
549 
550 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
551 	if (!a3xx_gpu) {
552 		ret = -ENOMEM;
553 		goto fail;
554 	}
555 
556 	adreno_gpu = &a3xx_gpu->base;
557 	gpu = &adreno_gpu->base;
558 
559 	a3xx_gpu->pdev = pdev;
560 
561 	gpu->perfcntrs = perfcntrs;
562 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
563 
564 	adreno_gpu->registers = a3xx_registers;
565 	adreno_gpu->reg_offsets = a3xx_register_offsets;
566 
567 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
568 	if (ret)
569 		goto fail;
570 
571 	/* if needed, allocate gmem: */
572 	if (adreno_is_a330(adreno_gpu)) {
573 #ifdef CONFIG_MSM_OCMEM
574 		/* TODO this is different/missing upstream: */
575 		struct ocmem_buf *ocmem_hdl =
576 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
577 
578 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
579 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
580 		adreno_gpu->gmem = ocmem_hdl->len;
581 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
582 				a3xx_gpu->ocmem_base);
583 #endif
584 	}
585 
586 	if (!gpu->mmu) {
587 		/* TODO we think it is possible to configure the GPU to
588 		 * restrict access to VRAM carveout.  But the required
589 		 * registers are unknown.  For now just bail out and
590 		 * limp along with just modesetting.  If it turns out
591 		 * to not be possible to restrict access, then we must
592 		 * implement a cmdstream validator.
593 		 */
594 		dev_err(dev->dev, "No memory protection without IOMMU\n");
595 		ret = -ENXIO;
596 		goto fail;
597 	}
598 
599 	return gpu;
600 
601 fail:
602 	if (a3xx_gpu)
603 		a3xx_destroy(&a3xx_gpu->base.base);
604 
605 	return ERR_PTR(ret);
606 }
607