xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 6774def6)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #ifdef CONFIG_MSM_OCMEM
19 #  include <mach/ocmem.h>
20 #endif
21 
22 #include "a3xx_gpu.h"
23 
24 #define A3XX_INT0_MASK \
25 	(A3XX_INT0_RBBM_AHB_ERROR |        \
26 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
27 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
28 	 A3XX_INT0_CP_OPCODE_ERROR |       \
29 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
30 	 A3XX_INT0_CP_HW_FAULT |           \
31 	 A3XX_INT0_CP_IB1_INT |            \
32 	 A3XX_INT0_CP_IB2_INT |            \
33 	 A3XX_INT0_CP_RB_INT |             \
34 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
35 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
36 	 A3XX_INT0_UCHE_OOB_ACCESS)
37 
38 extern bool hang_debug;
39 
40 static void a3xx_dump(struct msm_gpu *gpu);
41 
42 static void a3xx_me_init(struct msm_gpu *gpu)
43 {
44 	struct msm_ringbuffer *ring = gpu->rb;
45 
46 	OUT_PKT3(ring, CP_ME_INIT, 17);
47 	OUT_RING(ring, 0x000003f7);
48 	OUT_RING(ring, 0x00000000);
49 	OUT_RING(ring, 0x00000000);
50 	OUT_RING(ring, 0x00000000);
51 	OUT_RING(ring, 0x00000080);
52 	OUT_RING(ring, 0x00000100);
53 	OUT_RING(ring, 0x00000180);
54 	OUT_RING(ring, 0x00006600);
55 	OUT_RING(ring, 0x00000150);
56 	OUT_RING(ring, 0x0000014e);
57 	OUT_RING(ring, 0x00000154);
58 	OUT_RING(ring, 0x00000001);
59 	OUT_RING(ring, 0x00000000);
60 	OUT_RING(ring, 0x00000000);
61 	OUT_RING(ring, 0x00000000);
62 	OUT_RING(ring, 0x00000000);
63 	OUT_RING(ring, 0x00000000);
64 
65 	gpu->funcs->flush(gpu);
66 	gpu->funcs->idle(gpu);
67 }
68 
69 static int a3xx_hw_init(struct msm_gpu *gpu)
70 {
71 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
72 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
73 	uint32_t *ptr, len;
74 	int i, ret;
75 
76 	DBG("%s", gpu->name);
77 
78 	if (adreno_is_a305(adreno_gpu)) {
79 		/* Set up 16 deep read/write request queues: */
80 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
81 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
82 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
83 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
84 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
85 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
86 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
87 		/* Enable WR-REQ: */
88 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
89 		/* Set up round robin arbitration between both AXI ports: */
90 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
91 		/* Set up AOOO: */
92 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
93 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
94 
95 	} else if (adreno_is_a320(adreno_gpu)) {
96 		/* Set up 16 deep read/write request queues: */
97 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
98 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
99 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
100 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
101 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
102 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
103 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
104 		/* Enable WR-REQ: */
105 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
106 		/* Set up round robin arbitration between both AXI ports: */
107 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
108 		/* Set up AOOO: */
109 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
110 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
111 		/* Enable 1K sort: */
112 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
113 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
114 
115 	} else if (adreno_is_a330v2(adreno_gpu)) {
116 		/*
117 		 * Most of the VBIF registers on 8974v2 have the correct
118 		 * values at power on, so we won't modify those if we don't
119 		 * need to
120 		 */
121 		/* Enable 1k sort: */
122 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
123 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
124 		/* Enable WR-REQ: */
125 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
126 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
127 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
128 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
129 
130 	} else if (adreno_is_a330(adreno_gpu)) {
131 		/* Set up 16 deep read/write request queues: */
132 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
133 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
134 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
135 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
136 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
137 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
138 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
139 		/* Enable WR-REQ: */
140 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
141 		/* Set up round robin arbitration between both AXI ports: */
142 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
143 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
144 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
145 		/* Set up AOOO: */
146 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
147 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
148 		/* Enable 1K sort: */
149 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
150 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
151 		/* Disable VBIF clock gating. This is to enable AXI running
152 		 * higher frequency than GPU:
153 		 */
154 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
155 
156 	} else {
157 		BUG();
158 	}
159 
160 	/* Make all blocks contribute to the GPU BUSY perf counter: */
161 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
162 
163 	/* Tune the hystersis counters for SP and CP idle detection: */
164 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
165 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
166 
167 	/* Enable the RBBM error reporting bits.  This lets us get
168 	 * useful information on failure:
169 	 */
170 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
171 
172 	/* Enable AHB error reporting: */
173 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
174 
175 	/* Turn on the power counters: */
176 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
177 
178 	/* Turn on hang detection - this spews a lot of useful information
179 	 * into the RBBM registers on a hang:
180 	 */
181 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
182 
183 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
184 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
185 
186 	/* Enable Clock gating: */
187 	if (adreno_is_a320(adreno_gpu))
188 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
189 	else if (adreno_is_a330v2(adreno_gpu))
190 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
191 	else if (adreno_is_a330(adreno_gpu))
192 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
193 
194 	if (adreno_is_a330v2(adreno_gpu))
195 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
196 	else if (adreno_is_a330(adreno_gpu))
197 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
198 
199 	/* Set the OCMEM base address for A330, etc */
200 	if (a3xx_gpu->ocmem_hdl) {
201 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
202 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
203 	}
204 
205 	/* Turn on performance counters: */
206 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
207 
208 	/* Enable the perfcntrs that we use.. */
209 	for (i = 0; i < gpu->num_perfcntrs; i++) {
210 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
211 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
212 	}
213 
214 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
215 
216 	ret = adreno_hw_init(gpu);
217 	if (ret)
218 		return ret;
219 
220 	/* setup access protection: */
221 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
222 
223 	/* RBBM registers */
224 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
225 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
226 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
227 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
228 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
229 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
230 
231 	/* CP registers */
232 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
233 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
235 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
237 
238 	/* RB registers */
239 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
240 
241 	/* VBIF registers */
242 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
243 
244 	/* NOTE: PM4/micro-engine firmware registers look to be the same
245 	 * for a2xx and a3xx.. we could possibly push that part down to
246 	 * adreno_gpu base class.  Or push both PM4 and PFP but
247 	 * parameterize the pfp ucode addr/data registers..
248 	 */
249 
250 	/* Load PM4: */
251 	ptr = (uint32_t *)(adreno_gpu->pm4->data);
252 	len = adreno_gpu->pm4->size / 4;
253 	DBG("loading PM4 ucode version: %x", ptr[1]);
254 
255 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
256 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
257 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
258 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
259 	for (i = 1; i < len; i++)
260 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
261 
262 	/* Load PFP: */
263 	ptr = (uint32_t *)(adreno_gpu->pfp->data);
264 	len = adreno_gpu->pfp->size / 4;
265 	DBG("loading PFP ucode version: %x", ptr[5]);
266 
267 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
268 	for (i = 1; i < len; i++)
269 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
270 
271 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
272 	if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) {
273 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
274 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
275 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
276 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
277 	} else if (adreno_is_a330(adreno_gpu)) {
278 		/* NOTE: this (value take from downstream android driver)
279 		 * includes some bits outside of the known bitfields.  But
280 		 * A330 has this "MERCIU queue" thing too, which might
281 		 * explain a new bitfield or reshuffling:
282 		 */
283 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
284 	}
285 
286 	/* clear ME_HALT to start micro engine */
287 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
288 
289 	a3xx_me_init(gpu);
290 
291 	return 0;
292 }
293 
294 static void a3xx_recover(struct msm_gpu *gpu)
295 {
296 	/* dump registers before resetting gpu, if enabled: */
297 	if (hang_debug)
298 		a3xx_dump(gpu);
299 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
300 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
301 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
302 	adreno_recover(gpu);
303 }
304 
305 static void a3xx_destroy(struct msm_gpu *gpu)
306 {
307 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
308 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
309 
310 	DBG("%s", gpu->name);
311 
312 	adreno_gpu_cleanup(adreno_gpu);
313 
314 #ifdef CONFIG_MSM_OCMEM
315 	if (a3xx_gpu->ocmem_base)
316 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
317 #endif
318 
319 	kfree(a3xx_gpu);
320 }
321 
322 static void a3xx_idle(struct msm_gpu *gpu)
323 {
324 	/* wait for ringbuffer to drain: */
325 	adreno_idle(gpu);
326 
327 	/* then wait for GPU to finish: */
328 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
329 			A3XX_RBBM_STATUS_GPU_BUSY)))
330 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
331 
332 	/* TODO maybe we need to reset GPU here to recover from hang? */
333 }
334 
335 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
336 {
337 	uint32_t status;
338 
339 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
340 	DBG("%s: %08x", gpu->name, status);
341 
342 	// TODO
343 
344 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
345 
346 	msm_gpu_retire(gpu);
347 
348 	return IRQ_HANDLED;
349 }
350 
351 static const unsigned int a3xx_registers[] = {
352 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
353 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
354 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
355 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
356 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
357 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
358 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
359 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
360 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
361 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
362 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
363 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
364 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
365 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
366 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
367 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
368 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
369 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
370 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
371 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
372 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
373 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
374 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
375 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
376 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
377 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
378 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
379 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
380 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
381 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
382 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
383 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
384 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
385 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
386 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
387 	0x303c, 0x303c, 0x305e, 0x305f,
388 	~0   /* sentinel */
389 };
390 
391 #ifdef CONFIG_DEBUG_FS
392 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
393 {
394 	gpu->funcs->pm_resume(gpu);
395 	seq_printf(m, "status:   %08x\n",
396 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
397 	gpu->funcs->pm_suspend(gpu);
398 	adreno_show(gpu, m);
399 }
400 #endif
401 
402 /* would be nice to not have to duplicate the _show() stuff with printk(): */
403 static void a3xx_dump(struct msm_gpu *gpu)
404 {
405 	printk("status:   %08x\n",
406 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
407 	adreno_dump(gpu);
408 }
409 
410 static const struct adreno_gpu_funcs funcs = {
411 	.base = {
412 		.get_param = adreno_get_param,
413 		.hw_init = a3xx_hw_init,
414 		.pm_suspend = msm_gpu_pm_suspend,
415 		.pm_resume = msm_gpu_pm_resume,
416 		.recover = a3xx_recover,
417 		.last_fence = adreno_last_fence,
418 		.submit = adreno_submit,
419 		.flush = adreno_flush,
420 		.idle = a3xx_idle,
421 		.irq = a3xx_irq,
422 		.destroy = a3xx_destroy,
423 #ifdef CONFIG_DEBUG_FS
424 		.show = a3xx_show,
425 #endif
426 	},
427 };
428 
429 static const struct msm_gpu_perfcntr perfcntrs[] = {
430 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
431 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
432 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
433 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
434 };
435 
436 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
437 {
438 	struct a3xx_gpu *a3xx_gpu = NULL;
439 	struct adreno_gpu *adreno_gpu;
440 	struct msm_gpu *gpu;
441 	struct msm_drm_private *priv = dev->dev_private;
442 	struct platform_device *pdev = priv->gpu_pdev;
443 	int ret;
444 
445 	if (!pdev) {
446 		dev_err(dev->dev, "no a3xx device\n");
447 		ret = -ENXIO;
448 		goto fail;
449 	}
450 
451 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
452 	if (!a3xx_gpu) {
453 		ret = -ENOMEM;
454 		goto fail;
455 	}
456 
457 	adreno_gpu = &a3xx_gpu->base;
458 	gpu = &adreno_gpu->base;
459 
460 	a3xx_gpu->pdev = pdev;
461 
462 	gpu->perfcntrs = perfcntrs;
463 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
464 
465 	adreno_gpu->registers = a3xx_registers;
466 
467 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
468 	if (ret)
469 		goto fail;
470 
471 	/* if needed, allocate gmem: */
472 	if (adreno_is_a330(adreno_gpu)) {
473 #ifdef CONFIG_MSM_OCMEM
474 		/* TODO this is different/missing upstream: */
475 		struct ocmem_buf *ocmem_hdl =
476 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
477 
478 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
479 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
480 		adreno_gpu->gmem = ocmem_hdl->len;
481 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
482 				a3xx_gpu->ocmem_base);
483 #endif
484 	}
485 
486 	if (!gpu->mmu) {
487 		/* TODO we think it is possible to configure the GPU to
488 		 * restrict access to VRAM carveout.  But the required
489 		 * registers are unknown.  For now just bail out and
490 		 * limp along with just modesetting.  If it turns out
491 		 * to not be possible to restrict access, then we must
492 		 * implement a cmdstream validator.
493 		 */
494 		dev_err(dev->dev, "No memory protection without IOMMU\n");
495 		ret = -ENXIO;
496 		goto fail;
497 	}
498 
499 	return gpu;
500 
501 fail:
502 	if (a3xx_gpu)
503 		a3xx_destroy(&a3xx_gpu->base.base);
504 
505 	return ERR_PTR(ret);
506 }
507