xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a3xx_gpu.c (revision 4f3db074)
1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * Copyright (c) 2014 The Linux Foundation. All rights reserved.
6  *
7  * This program is free software; you can redistribute it and/or modify it
8  * under the terms of the GNU General Public License version 2 as published by
9  * the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful, but WITHOUT
12  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
14  * more details.
15  *
16  * You should have received a copy of the GNU General Public License along with
17  * this program.  If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifdef CONFIG_MSM_OCMEM
21 #  include <mach/ocmem.h>
22 #endif
23 
24 #include "a3xx_gpu.h"
25 
26 #define A3XX_INT0_MASK \
27 	(A3XX_INT0_RBBM_AHB_ERROR |        \
28 	 A3XX_INT0_RBBM_ATB_BUS_OVERFLOW | \
29 	 A3XX_INT0_CP_T0_PACKET_IN_IB |    \
30 	 A3XX_INT0_CP_OPCODE_ERROR |       \
31 	 A3XX_INT0_CP_RESERVED_BIT_ERROR | \
32 	 A3XX_INT0_CP_HW_FAULT |           \
33 	 A3XX_INT0_CP_IB1_INT |            \
34 	 A3XX_INT0_CP_IB2_INT |            \
35 	 A3XX_INT0_CP_RB_INT |             \
36 	 A3XX_INT0_CP_REG_PROTECT_FAULT |  \
37 	 A3XX_INT0_CP_AHB_ERROR_HALT |     \
38 	 A3XX_INT0_UCHE_OOB_ACCESS)
39 
40 extern bool hang_debug;
41 
42 static void a3xx_dump(struct msm_gpu *gpu);
43 
44 static void a3xx_me_init(struct msm_gpu *gpu)
45 {
46 	struct msm_ringbuffer *ring = gpu->rb;
47 
48 	OUT_PKT3(ring, CP_ME_INIT, 17);
49 	OUT_RING(ring, 0x000003f7);
50 	OUT_RING(ring, 0x00000000);
51 	OUT_RING(ring, 0x00000000);
52 	OUT_RING(ring, 0x00000000);
53 	OUT_RING(ring, 0x00000080);
54 	OUT_RING(ring, 0x00000100);
55 	OUT_RING(ring, 0x00000180);
56 	OUT_RING(ring, 0x00006600);
57 	OUT_RING(ring, 0x00000150);
58 	OUT_RING(ring, 0x0000014e);
59 	OUT_RING(ring, 0x00000154);
60 	OUT_RING(ring, 0x00000001);
61 	OUT_RING(ring, 0x00000000);
62 	OUT_RING(ring, 0x00000000);
63 	OUT_RING(ring, 0x00000000);
64 	OUT_RING(ring, 0x00000000);
65 	OUT_RING(ring, 0x00000000);
66 
67 	gpu->funcs->flush(gpu);
68 	gpu->funcs->idle(gpu);
69 }
70 
71 static int a3xx_hw_init(struct msm_gpu *gpu)
72 {
73 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
74 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
75 	uint32_t *ptr, len;
76 	int i, ret;
77 
78 	DBG("%s", gpu->name);
79 
80 	if (adreno_is_a305(adreno_gpu)) {
81 		/* Set up 16 deep read/write request queues: */
82 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
83 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
84 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
85 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
86 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
87 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
88 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
89 		/* Enable WR-REQ: */
90 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
91 		/* Set up round robin arbitration between both AXI ports: */
92 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
93 		/* Set up AOOO: */
94 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
95 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
96 
97 	} else if (adreno_is_a320(adreno_gpu)) {
98 		/* Set up 16 deep read/write request queues: */
99 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010);
100 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010);
101 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010);
102 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010);
103 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
104 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010);
105 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010);
106 		/* Enable WR-REQ: */
107 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000ff);
108 		/* Set up round robin arbitration between both AXI ports: */
109 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
110 		/* Set up AOOO: */
111 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003c);
112 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003c003c);
113 		/* Enable 1K sort: */
114 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x000000ff);
115 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
116 
117 	} else if (adreno_is_a330v2(adreno_gpu)) {
118 		/*
119 		 * Most of the VBIF registers on 8974v2 have the correct
120 		 * values at power on, so we won't modify those if we don't
121 		 * need to
122 		 */
123 		/* Enable 1k sort: */
124 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
125 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
126 		/* Enable WR-REQ: */
127 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
128 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
129 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
130 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003);
131 
132 	} else if (adreno_is_a330(adreno_gpu)) {
133 		/* Set up 16 deep read/write request queues: */
134 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
135 		gpu_write(gpu, REG_A3XX_VBIF_IN_RD_LIM_CONF1, 0x18181818);
136 		gpu_write(gpu, REG_A3XX_VBIF_OUT_RD_LIM_CONF0, 0x18181818);
137 		gpu_write(gpu, REG_A3XX_VBIF_OUT_WR_LIM_CONF0, 0x18181818);
138 		gpu_write(gpu, REG_A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303);
139 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
140 		gpu_write(gpu, REG_A3XX_VBIF_IN_WR_LIM_CONF1, 0x18181818);
141 		/* Enable WR-REQ: */
142 		gpu_write(gpu, REG_A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003f);
143 		/* Set up round robin arbitration between both AXI ports: */
144 		gpu_write(gpu, REG_A3XX_VBIF_ARB_CTL, 0x00000030);
145 		/* Set up VBIF_ROUND_ROBIN_QOS_ARB: */
146 		gpu_write(gpu, REG_A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001);
147 		/* Set up AOOO: */
148 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003f);
149 		gpu_write(gpu, REG_A3XX_VBIF_OUT_AXI_AOOO, 0x003f003f);
150 		/* Enable 1K sort: */
151 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT, 0x0001003f);
152 		gpu_write(gpu, REG_A3XX_VBIF_ABIT_SORT_CONF, 0x000000a4);
153 		/* Disable VBIF clock gating. This is to enable AXI running
154 		 * higher frequency than GPU:
155 		 */
156 		gpu_write(gpu, REG_A3XX_VBIF_CLKON, 0x00000001);
157 
158 	} else {
159 		BUG();
160 	}
161 
162 	/* Make all blocks contribute to the GPU BUSY perf counter: */
163 	gpu_write(gpu, REG_A3XX_RBBM_GPU_BUSY_MASKED, 0xffffffff);
164 
165 	/* Tune the hystersis counters for SP and CP idle detection: */
166 	gpu_write(gpu, REG_A3XX_RBBM_SP_HYST_CNT, 0x10);
167 	gpu_write(gpu, REG_A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
168 
169 	/* Enable the RBBM error reporting bits.  This lets us get
170 	 * useful information on failure:
171 	 */
172 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL0, 0x00000001);
173 
174 	/* Enable AHB error reporting: */
175 	gpu_write(gpu, REG_A3XX_RBBM_AHB_CTL1, 0xa6ffffff);
176 
177 	/* Turn on the power counters: */
178 	gpu_write(gpu, REG_A3XX_RBBM_RBBM_CTL, 0x00030000);
179 
180 	/* Turn on hang detection - this spews a lot of useful information
181 	 * into the RBBM registers on a hang:
182 	 */
183 	gpu_write(gpu, REG_A3XX_RBBM_INTERFACE_HANG_INT_CTL, 0x00010fff);
184 
185 	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0): */
186 	gpu_write(gpu, REG_A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
187 
188 	/* Enable Clock gating: */
189 	if (adreno_is_a320(adreno_gpu))
190 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbfffffff);
191 	else if (adreno_is_a330v2(adreno_gpu))
192 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xaaaaaaaa);
193 	else if (adreno_is_a330(adreno_gpu))
194 		gpu_write(gpu, REG_A3XX_RBBM_CLOCK_CTL, 0xbffcffff);
195 
196 	if (adreno_is_a330v2(adreno_gpu))
197 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x05515455);
198 	else if (adreno_is_a330(adreno_gpu))
199 		gpu_write(gpu, REG_A3XX_RBBM_GPR0_CTL, 0x00000000);
200 
201 	/* Set the OCMEM base address for A330, etc */
202 	if (a3xx_gpu->ocmem_hdl) {
203 		gpu_write(gpu, REG_A3XX_RB_GMEM_BASE_ADDR,
204 			(unsigned int)(a3xx_gpu->ocmem_base >> 14));
205 	}
206 
207 	/* Turn on performance counters: */
208 	gpu_write(gpu, REG_A3XX_RBBM_PERFCTR_CTL, 0x01);
209 
210 	/* Enable the perfcntrs that we use.. */
211 	for (i = 0; i < gpu->num_perfcntrs; i++) {
212 		const struct msm_gpu_perfcntr *perfcntr = &gpu->perfcntrs[i];
213 		gpu_write(gpu, perfcntr->select_reg, perfcntr->select_val);
214 	}
215 
216 	gpu_write(gpu, REG_A3XX_RBBM_INT_0_MASK, A3XX_INT0_MASK);
217 
218 	ret = adreno_hw_init(gpu);
219 	if (ret)
220 		return ret;
221 
222 	/* setup access protection: */
223 	gpu_write(gpu, REG_A3XX_CP_PROTECT_CTRL, 0x00000007);
224 
225 	/* RBBM registers */
226 	gpu_write(gpu, REG_A3XX_CP_PROTECT(0), 0x63000040);
227 	gpu_write(gpu, REG_A3XX_CP_PROTECT(1), 0x62000080);
228 	gpu_write(gpu, REG_A3XX_CP_PROTECT(2), 0x600000cc);
229 	gpu_write(gpu, REG_A3XX_CP_PROTECT(3), 0x60000108);
230 	gpu_write(gpu, REG_A3XX_CP_PROTECT(4), 0x64000140);
231 	gpu_write(gpu, REG_A3XX_CP_PROTECT(5), 0x66000400);
232 
233 	/* CP registers */
234 	gpu_write(gpu, REG_A3XX_CP_PROTECT(6), 0x65000700);
235 	gpu_write(gpu, REG_A3XX_CP_PROTECT(7), 0x610007d8);
236 	gpu_write(gpu, REG_A3XX_CP_PROTECT(8), 0x620007e0);
237 	gpu_write(gpu, REG_A3XX_CP_PROTECT(9), 0x61001178);
238 	gpu_write(gpu, REG_A3XX_CP_PROTECT(10), 0x64001180);
239 
240 	/* RB registers */
241 	gpu_write(gpu, REG_A3XX_CP_PROTECT(11), 0x60003300);
242 
243 	/* VBIF registers */
244 	gpu_write(gpu, REG_A3XX_CP_PROTECT(12), 0x6b00c000);
245 
246 	/* NOTE: PM4/micro-engine firmware registers look to be the same
247 	 * for a2xx and a3xx.. we could possibly push that part down to
248 	 * adreno_gpu base class.  Or push both PM4 and PFP but
249 	 * parameterize the pfp ucode addr/data registers..
250 	 */
251 
252 	/* Load PM4: */
253 	ptr = (uint32_t *)(adreno_gpu->pm4->data);
254 	len = adreno_gpu->pm4->size / 4;
255 	DBG("loading PM4 ucode version: %x", ptr[1]);
256 
257 	gpu_write(gpu, REG_AXXX_CP_DEBUG,
258 			AXXX_CP_DEBUG_DYNAMIC_CLK_DISABLE |
259 			AXXX_CP_DEBUG_MIU_128BIT_WRITE_ENABLE);
260 	gpu_write(gpu, REG_AXXX_CP_ME_RAM_WADDR, 0);
261 	for (i = 1; i < len; i++)
262 		gpu_write(gpu, REG_AXXX_CP_ME_RAM_DATA, ptr[i]);
263 
264 	/* Load PFP: */
265 	ptr = (uint32_t *)(adreno_gpu->pfp->data);
266 	len = adreno_gpu->pfp->size / 4;
267 	DBG("loading PFP ucode version: %x", ptr[5]);
268 
269 	gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_ADDR, 0);
270 	for (i = 1; i < len; i++)
271 		gpu_write(gpu, REG_A3XX_CP_PFP_UCODE_DATA, ptr[i]);
272 
273 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
274 	if (adreno_is_a305(adreno_gpu) || adreno_is_a320(adreno_gpu)) {
275 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS,
276 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB1_START(2) |
277 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_IB2_START(6) |
278 				AXXX_CP_QUEUE_THRESHOLDS_CSQ_ST_START(14));
279 	} else if (adreno_is_a330(adreno_gpu)) {
280 		/* NOTE: this (value take from downstream android driver)
281 		 * includes some bits outside of the known bitfields.  But
282 		 * A330 has this "MERCIU queue" thing too, which might
283 		 * explain a new bitfield or reshuffling:
284 		 */
285 		gpu_write(gpu, REG_AXXX_CP_QUEUE_THRESHOLDS, 0x003e2008);
286 	}
287 
288 	/* clear ME_HALT to start micro engine */
289 	gpu_write(gpu, REG_AXXX_CP_ME_CNTL, 0);
290 
291 	a3xx_me_init(gpu);
292 
293 	return 0;
294 }
295 
296 static void a3xx_recover(struct msm_gpu *gpu)
297 {
298 	/* dump registers before resetting gpu, if enabled: */
299 	if (hang_debug)
300 		a3xx_dump(gpu);
301 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 1);
302 	gpu_read(gpu, REG_A3XX_RBBM_SW_RESET_CMD);
303 	gpu_write(gpu, REG_A3XX_RBBM_SW_RESET_CMD, 0);
304 	adreno_recover(gpu);
305 }
306 
307 static void a3xx_destroy(struct msm_gpu *gpu)
308 {
309 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
310 	struct a3xx_gpu *a3xx_gpu = to_a3xx_gpu(adreno_gpu);
311 
312 	DBG("%s", gpu->name);
313 
314 	adreno_gpu_cleanup(adreno_gpu);
315 
316 #ifdef CONFIG_MSM_OCMEM
317 	if (a3xx_gpu->ocmem_base)
318 		ocmem_free(OCMEM_GRAPHICS, a3xx_gpu->ocmem_hdl);
319 #endif
320 
321 	kfree(a3xx_gpu);
322 }
323 
324 static void a3xx_idle(struct msm_gpu *gpu)
325 {
326 	/* wait for ringbuffer to drain: */
327 	adreno_idle(gpu);
328 
329 	/* then wait for GPU to finish: */
330 	if (spin_until(!(gpu_read(gpu, REG_A3XX_RBBM_STATUS) &
331 			A3XX_RBBM_STATUS_GPU_BUSY)))
332 		DRM_ERROR("%s: timeout waiting for GPU to idle!\n", gpu->name);
333 
334 	/* TODO maybe we need to reset GPU here to recover from hang? */
335 }
336 
337 static irqreturn_t a3xx_irq(struct msm_gpu *gpu)
338 {
339 	uint32_t status;
340 
341 	status = gpu_read(gpu, REG_A3XX_RBBM_INT_0_STATUS);
342 	DBG("%s: %08x", gpu->name, status);
343 
344 	// TODO
345 
346 	gpu_write(gpu, REG_A3XX_RBBM_INT_CLEAR_CMD, status);
347 
348 	msm_gpu_retire(gpu);
349 
350 	return IRQ_HANDLED;
351 }
352 
353 static const unsigned int a3xx_registers[] = {
354 	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
355 	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
356 	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
357 	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
358 	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
359 	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f5, 0x01fc, 0x01ff,
360 	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
361 	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
362 	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
363 	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
364 	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
365 	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5, 0x0e00, 0x0e05,
366 	0x0e0c, 0x0e0c, 0x0e22, 0x0e23, 0x0e41, 0x0e45, 0x0e64, 0x0e65,
367 	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
368 	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
369 	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
370 	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
371 	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
372 	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
373 	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
374 	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a, 0x2240, 0x227e,
375 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
376 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
377 	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
378 	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
379 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
380 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
381 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
382 	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
383 	0x25f0, 0x25f0, 0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
384 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
385 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
386 	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
387 	0x2750, 0x2756, 0x2760, 0x2760, 0x300c, 0x300e, 0x301c, 0x301d,
388 	0x302a, 0x302a, 0x302c, 0x302d, 0x3030, 0x3031, 0x3034, 0x3036,
389 	0x303c, 0x303c, 0x305e, 0x305f,
390 	~0   /* sentinel */
391 };
392 
393 #ifdef CONFIG_DEBUG_FS
394 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
395 {
396 	gpu->funcs->pm_resume(gpu);
397 	seq_printf(m, "status:   %08x\n",
398 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
399 	gpu->funcs->pm_suspend(gpu);
400 	adreno_show(gpu, m);
401 }
402 #endif
403 
404 /* would be nice to not have to duplicate the _show() stuff with printk(): */
405 static void a3xx_dump(struct msm_gpu *gpu)
406 {
407 	printk("status:   %08x\n",
408 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
409 	adreno_dump(gpu);
410 }
411 /* Register offset defines for A3XX */
412 static const unsigned int a3xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
413 	REG_ADRENO_DEFINE(REG_ADRENO_CP_DEBUG, REG_AXXX_CP_DEBUG),
414 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ME_RAM_WADDR, REG_AXXX_CP_ME_RAM_WADDR),
415 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ME_RAM_DATA, REG_AXXX_CP_ME_RAM_DATA),
416 	REG_ADRENO_DEFINE(REG_ADRENO_CP_PFP_UCODE_DATA,
417 			REG_A3XX_CP_PFP_UCODE_DATA),
418 	REG_ADRENO_DEFINE(REG_ADRENO_CP_PFP_UCODE_ADDR,
419 			REG_A3XX_CP_PFP_UCODE_ADDR),
420 	REG_ADRENO_DEFINE(REG_ADRENO_CP_WFI_PEND_CTR, REG_A3XX_CP_WFI_PEND_CTR),
421 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_AXXX_CP_RB_BASE),
422 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_AXXX_CP_RB_RPTR_ADDR),
423 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_AXXX_CP_RB_RPTR),
424 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_AXXX_CP_RB_WPTR),
425 	REG_ADRENO_DEFINE(REG_ADRENO_CP_PROTECT_CTRL, REG_A3XX_CP_PROTECT_CTRL),
426 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ME_CNTL, REG_AXXX_CP_ME_CNTL),
427 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_AXXX_CP_RB_CNTL),
428 	REG_ADRENO_DEFINE(REG_ADRENO_CP_IB1_BASE, REG_AXXX_CP_IB1_BASE),
429 	REG_ADRENO_DEFINE(REG_ADRENO_CP_IB1_BUFSZ, REG_AXXX_CP_IB1_BUFSZ),
430 	REG_ADRENO_DEFINE(REG_ADRENO_CP_IB2_BASE, REG_AXXX_CP_IB2_BASE),
431 	REG_ADRENO_DEFINE(REG_ADRENO_CP_IB2_BUFSZ, REG_AXXX_CP_IB2_BUFSZ),
432 	REG_ADRENO_DEFINE(REG_ADRENO_CP_TIMESTAMP, REG_AXXX_CP_SCRATCH_REG0),
433 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ME_RAM_RADDR, REG_AXXX_CP_ME_RAM_RADDR),
434 	REG_ADRENO_DEFINE(REG_ADRENO_SCRATCH_ADDR, REG_AXXX_SCRATCH_ADDR),
435 	REG_ADRENO_DEFINE(REG_ADRENO_SCRATCH_UMSK, REG_AXXX_SCRATCH_UMSK),
436 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ROQ_ADDR, REG_A3XX_CP_ROQ_ADDR),
437 	REG_ADRENO_DEFINE(REG_ADRENO_CP_ROQ_DATA, REG_A3XX_CP_ROQ_DATA),
438 	REG_ADRENO_DEFINE(REG_ADRENO_CP_MERCIU_ADDR, REG_A3XX_CP_MERCIU_ADDR),
439 	REG_ADRENO_DEFINE(REG_ADRENO_CP_MERCIU_DATA, REG_A3XX_CP_MERCIU_DATA),
440 	REG_ADRENO_DEFINE(REG_ADRENO_CP_MERCIU_DATA2, REG_A3XX_CP_MERCIU_DATA2),
441 	REG_ADRENO_DEFINE(REG_ADRENO_CP_MEQ_ADDR, REG_A3XX_CP_MEQ_ADDR),
442 	REG_ADRENO_DEFINE(REG_ADRENO_CP_MEQ_DATA, REG_A3XX_CP_MEQ_DATA),
443 	REG_ADRENO_DEFINE(REG_ADRENO_CP_HW_FAULT, REG_A3XX_CP_HW_FAULT),
444 	REG_ADRENO_DEFINE(REG_ADRENO_CP_PROTECT_STATUS,
445 			REG_A3XX_CP_PROTECT_STATUS),
446 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_STATUS, REG_A3XX_RBBM_STATUS),
447 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_CTL,
448 			REG_A3XX_RBBM_PERFCTR_CTL),
449 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_LOAD_CMD0,
450 			REG_A3XX_RBBM_PERFCTR_LOAD_CMD0),
451 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_LOAD_CMD1,
452 			REG_A3XX_RBBM_PERFCTR_LOAD_CMD1),
453 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_PWR_1_LO,
454 			REG_A3XX_RBBM_PERFCTR_PWR_1_LO),
455 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_INT_0_MASK, REG_A3XX_RBBM_INT_0_MASK),
456 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_INT_0_STATUS,
457 			REG_A3XX_RBBM_INT_0_STATUS),
458 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_AHB_ERROR_STATUS,
459 			REG_A3XX_RBBM_AHB_ERROR_STATUS),
460 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_AHB_CMD, REG_A3XX_RBBM_AHB_CMD),
461 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_INT_CLEAR_CMD,
462 			REG_A3XX_RBBM_INT_CLEAR_CMD),
463 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_CLOCK_CTL, REG_A3XX_RBBM_CLOCK_CTL),
464 	REG_ADRENO_DEFINE(REG_ADRENO_VPC_DEBUG_RAM_SEL,
465 			REG_A3XX_VPC_VPC_DEBUG_RAM_SEL),
466 	REG_ADRENO_DEFINE(REG_ADRENO_VPC_DEBUG_RAM_READ,
467 			REG_A3XX_VPC_VPC_DEBUG_RAM_READ),
468 	REG_ADRENO_DEFINE(REG_ADRENO_VSC_SIZE_ADDRESS,
469 			REG_A3XX_VSC_SIZE_ADDRESS),
470 	REG_ADRENO_DEFINE(REG_ADRENO_VFD_CONTROL_0, REG_A3XX_VFD_CONTROL_0),
471 	REG_ADRENO_DEFINE(REG_ADRENO_VFD_INDEX_MAX, REG_A3XX_VFD_INDEX_MAX),
472 	REG_ADRENO_DEFINE(REG_ADRENO_SP_VS_PVT_MEM_ADDR_REG,
473 			REG_A3XX_SP_VS_PVT_MEM_ADDR_REG),
474 	REG_ADRENO_DEFINE(REG_ADRENO_SP_FS_PVT_MEM_ADDR_REG,
475 			REG_A3XX_SP_FS_PVT_MEM_ADDR_REG),
476 	REG_ADRENO_DEFINE(REG_ADRENO_SP_VS_OBJ_START_REG,
477 			REG_A3XX_SP_VS_OBJ_START_REG),
478 	REG_ADRENO_DEFINE(REG_ADRENO_SP_FS_OBJ_START_REG,
479 			REG_A3XX_SP_FS_OBJ_START_REG),
480 	REG_ADRENO_DEFINE(REG_ADRENO_PA_SC_AA_CONFIG, REG_A3XX_PA_SC_AA_CONFIG),
481 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PM_OVERRIDE2,
482 			REG_A3XX_RBBM_PM_OVERRIDE2),
483 	REG_ADRENO_DEFINE(REG_ADRENO_SCRATCH_REG2, REG_AXXX_CP_SCRATCH_REG2),
484 	REG_ADRENO_DEFINE(REG_ADRENO_SQ_GPR_MANAGEMENT,
485 			REG_A3XX_SQ_GPR_MANAGEMENT),
486 	REG_ADRENO_DEFINE(REG_ADRENO_SQ_INST_STORE_MANAGMENT,
487 			REG_A3XX_SQ_INST_STORE_MANAGMENT),
488 	REG_ADRENO_DEFINE(REG_ADRENO_TP0_CHICKEN, REG_A3XX_TP0_CHICKEN),
489 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_RBBM_CTL, REG_A3XX_RBBM_RBBM_CTL),
490 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_SW_RESET_CMD,
491 			REG_A3XX_RBBM_SW_RESET_CMD),
492 	REG_ADRENO_DEFINE(REG_ADRENO_UCHE_INVALIDATE0,
493 			REG_A3XX_UCHE_CACHE_INVALIDATE0_REG),
494 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_LOAD_VALUE_LO,
495 			REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_LO),
496 	REG_ADRENO_DEFINE(REG_ADRENO_RBBM_PERFCTR_LOAD_VALUE_HI,
497 			REG_A3XX_RBBM_PERFCTR_LOAD_VALUE_HI),
498 };
499 
500 static const struct adreno_gpu_funcs funcs = {
501 	.base = {
502 		.get_param = adreno_get_param,
503 		.hw_init = a3xx_hw_init,
504 		.pm_suspend = msm_gpu_pm_suspend,
505 		.pm_resume = msm_gpu_pm_resume,
506 		.recover = a3xx_recover,
507 		.last_fence = adreno_last_fence,
508 		.submit = adreno_submit,
509 		.flush = adreno_flush,
510 		.idle = a3xx_idle,
511 		.irq = a3xx_irq,
512 		.destroy = a3xx_destroy,
513 #ifdef CONFIG_DEBUG_FS
514 		.show = a3xx_show,
515 #endif
516 	},
517 };
518 
519 static const struct msm_gpu_perfcntr perfcntrs[] = {
520 	{ REG_A3XX_SP_PERFCOUNTER6_SELECT, REG_A3XX_RBBM_PERFCTR_SP_6_LO,
521 			SP_ALU_ACTIVE_CYCLES, "ALUACTIVE" },
522 	{ REG_A3XX_SP_PERFCOUNTER7_SELECT, REG_A3XX_RBBM_PERFCTR_SP_7_LO,
523 			SP_FS_FULL_ALU_INSTRUCTIONS, "ALUFULL" },
524 };
525 
526 struct msm_gpu *a3xx_gpu_init(struct drm_device *dev)
527 {
528 	struct a3xx_gpu *a3xx_gpu = NULL;
529 	struct adreno_gpu *adreno_gpu;
530 	struct msm_gpu *gpu;
531 	struct msm_drm_private *priv = dev->dev_private;
532 	struct platform_device *pdev = priv->gpu_pdev;
533 	int ret;
534 
535 	if (!pdev) {
536 		dev_err(dev->dev, "no a3xx device\n");
537 		ret = -ENXIO;
538 		goto fail;
539 	}
540 
541 	a3xx_gpu = kzalloc(sizeof(*a3xx_gpu), GFP_KERNEL);
542 	if (!a3xx_gpu) {
543 		ret = -ENOMEM;
544 		goto fail;
545 	}
546 
547 	adreno_gpu = &a3xx_gpu->base;
548 	gpu = &adreno_gpu->base;
549 
550 	a3xx_gpu->pdev = pdev;
551 
552 	gpu->perfcntrs = perfcntrs;
553 	gpu->num_perfcntrs = ARRAY_SIZE(perfcntrs);
554 
555 	adreno_gpu->registers = a3xx_registers;
556 	adreno_gpu->reg_offsets = a3xx_register_offsets;
557 
558 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs);
559 	if (ret)
560 		goto fail;
561 
562 	/* if needed, allocate gmem: */
563 	if (adreno_is_a330(adreno_gpu)) {
564 #ifdef CONFIG_MSM_OCMEM
565 		/* TODO this is different/missing upstream: */
566 		struct ocmem_buf *ocmem_hdl =
567 				ocmem_allocate(OCMEM_GRAPHICS, adreno_gpu->gmem);
568 
569 		a3xx_gpu->ocmem_hdl = ocmem_hdl;
570 		a3xx_gpu->ocmem_base = ocmem_hdl->addr;
571 		adreno_gpu->gmem = ocmem_hdl->len;
572 		DBG("using %dK of OCMEM at 0x%08x", adreno_gpu->gmem / 1024,
573 				a3xx_gpu->ocmem_base);
574 #endif
575 	}
576 
577 	if (!gpu->mmu) {
578 		/* TODO we think it is possible to configure the GPU to
579 		 * restrict access to VRAM carveout.  But the required
580 		 * registers are unknown.  For now just bail out and
581 		 * limp along with just modesetting.  If it turns out
582 		 * to not be possible to restrict access, then we must
583 		 * implement a cmdstream validator.
584 		 */
585 		dev_err(dev->dev, "No memory protection without IOMMU\n");
586 		ret = -ENXIO;
587 		goto fail;
588 	}
589 
590 	return gpu;
591 
592 fail:
593 	if (a3xx_gpu)
594 		a3xx_destroy(&a3xx_gpu->base.base);
595 
596 	return ERR_PTR(ret);
597 }
598