xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision abade675e02e1b73da0c20ffaf08fbe309038298)
1 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 and
5  * only version 2 as published by the Free Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/types.h>
16 #include <linux/cpumask.h>
17 #include <linux/qcom_scm.h>
18 #include <linux/pm_opp.h>
19 #include <linux/nvmem-consumer.h>
20 #include <linux/slab.h>
21 #include "msm_gem.h"
22 #include "msm_mmu.h"
23 #include "a5xx_gpu.h"
24 
25 extern bool hang_debug;
26 static void a5xx_dump(struct msm_gpu *gpu);
27 
28 #define GPU_PAS_ID 13
29 
30 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
31 {
32 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
33 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
34 	uint32_t wptr;
35 	unsigned long flags;
36 
37 	spin_lock_irqsave(&ring->lock, flags);
38 
39 	/* Copy the shadow to the actual register */
40 	ring->cur = ring->next;
41 
42 	/* Make sure to wrap wptr if we need to */
43 	wptr = get_wptr(ring);
44 
45 	spin_unlock_irqrestore(&ring->lock, flags);
46 
47 	/* Make sure everything is posted before making a decision */
48 	mb();
49 
50 	/* Update HW if this is the current ring and we are not in preempt */
51 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
52 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
53 }
54 
55 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
56 	struct msm_file_private *ctx)
57 {
58 	struct msm_drm_private *priv = gpu->dev->dev_private;
59 	struct msm_ringbuffer *ring = submit->ring;
60 	struct msm_gem_object *obj;
61 	uint32_t *ptr, dwords;
62 	unsigned int i;
63 
64 	for (i = 0; i < submit->nr_cmds; i++) {
65 		switch (submit->cmd[i].type) {
66 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
67 			break;
68 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
69 			if (priv->lastctx == ctx)
70 				break;
71 		case MSM_SUBMIT_CMD_BUF:
72 			/* copy commands into RB: */
73 			obj = submit->bos[submit->cmd[i].idx].obj;
74 			dwords = submit->cmd[i].size;
75 
76 			ptr = msm_gem_get_vaddr(&obj->base);
77 
78 			/* _get_vaddr() shouldn't fail at this point,
79 			 * since we've already mapped it once in
80 			 * submit_reloc()
81 			 */
82 			if (WARN_ON(!ptr))
83 				return;
84 
85 			for (i = 0; i < dwords; i++) {
86 				/* normally the OUT_PKTn() would wait
87 				 * for space for the packet.  But since
88 				 * we just OUT_RING() the whole thing,
89 				 * need to call adreno_wait_ring()
90 				 * ourself:
91 				 */
92 				adreno_wait_ring(ring, 1);
93 				OUT_RING(ring, ptr[i]);
94 			}
95 
96 			msm_gem_put_vaddr(&obj->base);
97 
98 			break;
99 		}
100 	}
101 
102 	a5xx_flush(gpu, ring);
103 	a5xx_preempt_trigger(gpu);
104 
105 	/* we might not necessarily have a cmd from userspace to
106 	 * trigger an event to know that submit has completed, so
107 	 * do this manually:
108 	 */
109 	a5xx_idle(gpu, ring);
110 	ring->memptrs->fence = submit->seqno;
111 	msm_gpu_retire(gpu);
112 }
113 
114 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
115 	struct msm_file_private *ctx)
116 {
117 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
118 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
119 	struct msm_drm_private *priv = gpu->dev->dev_private;
120 	struct msm_ringbuffer *ring = submit->ring;
121 	unsigned int i, ibs = 0;
122 
123 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
124 		priv->lastctx = NULL;
125 		a5xx_submit_in_rb(gpu, submit, ctx);
126 		return;
127 	}
128 
129 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
130 	OUT_RING(ring, 0x02);
131 
132 	/* Turn off protected mode to write to special registers */
133 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
134 	OUT_RING(ring, 0);
135 
136 	/* Set the save preemption record for the ring/command */
137 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
138 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
139 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
140 
141 	/* Turn back on protected mode */
142 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
143 	OUT_RING(ring, 1);
144 
145 	/* Enable local preemption for finegrain preemption */
146 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
147 	OUT_RING(ring, 0x02);
148 
149 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
150 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
151 	OUT_RING(ring, 0x02);
152 
153 	/* Submit the commands */
154 	for (i = 0; i < submit->nr_cmds; i++) {
155 		switch (submit->cmd[i].type) {
156 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
157 			break;
158 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
159 			if (priv->lastctx == ctx)
160 				break;
161 		case MSM_SUBMIT_CMD_BUF:
162 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
163 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
164 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
165 			OUT_RING(ring, submit->cmd[i].size);
166 			ibs++;
167 			break;
168 		}
169 	}
170 
171 	/*
172 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
173 	 * are done rendering - otherwise a lucky preemption would start
174 	 * replaying from the last checkpoint
175 	 */
176 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
177 	OUT_RING(ring, 0);
178 	OUT_RING(ring, 0);
179 	OUT_RING(ring, 0);
180 	OUT_RING(ring, 0);
181 	OUT_RING(ring, 0);
182 
183 	/* Turn off IB level preemptions */
184 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
185 	OUT_RING(ring, 0x01);
186 
187 	/* Write the fence to the scratch register */
188 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
189 	OUT_RING(ring, submit->seqno);
190 
191 	/*
192 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
193 	 * timestamp is written to the memory and then triggers the interrupt
194 	 */
195 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
196 	OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
197 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
198 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
199 	OUT_RING(ring, submit->seqno);
200 
201 	/* Yield the floor on command completion */
202 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
203 	/*
204 	 * If dword[2:1] are non zero, they specify an address for the CP to
205 	 * write the value of dword[3] to on preemption complete. Write 0 to
206 	 * skip the write
207 	 */
208 	OUT_RING(ring, 0x00);
209 	OUT_RING(ring, 0x00);
210 	/* Data value - not used if the address above is 0 */
211 	OUT_RING(ring, 0x01);
212 	/* Set bit 0 to trigger an interrupt on preempt complete */
213 	OUT_RING(ring, 0x01);
214 
215 	a5xx_flush(gpu, ring);
216 
217 	/* Check to see if we need to start preemption */
218 	a5xx_preempt_trigger(gpu);
219 }
220 
221 static const struct {
222 	u32 offset;
223 	u32 value;
224 } a5xx_hwcg[] = {
225 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
226 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
227 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
228 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
229 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
230 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
231 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
232 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
233 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
234 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
235 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
236 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
237 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
238 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
239 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
240 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
241 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
242 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
243 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
244 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
245 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
246 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
247 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
248 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
249 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
250 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
251 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
252 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
253 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
254 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
255 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
256 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
257 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
258 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
259 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
260 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
261 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
262 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
263 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
264 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
265 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
266 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
267 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
268 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
269 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
270 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
271 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
272 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
273 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
274 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
275 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
276 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
277 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
278 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
279 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
280 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
281 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
282 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
283 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
284 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
285 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
286 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
287 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
288 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
289 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
290 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
291 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
292 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
293 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
294 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
295 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
296 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
297 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
298 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
299 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
300 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
301 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
302 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
303 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
304 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
305 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
306 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
307 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
308 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
309 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
310 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
311 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
312 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
313 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
314 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
315 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
316 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
317 };
318 
319 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
320 {
321 	unsigned int i;
322 
323 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
324 		gpu_write(gpu, a5xx_hwcg[i].offset,
325 			state ? a5xx_hwcg[i].value : 0);
326 
327 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
328 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
329 }
330 
331 static int a5xx_me_init(struct msm_gpu *gpu)
332 {
333 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
334 	struct msm_ringbuffer *ring = gpu->rb[0];
335 
336 	OUT_PKT7(ring, CP_ME_INIT, 8);
337 
338 	OUT_RING(ring, 0x0000002F);
339 
340 	/* Enable multiple hardware contexts */
341 	OUT_RING(ring, 0x00000003);
342 
343 	/* Enable error detection */
344 	OUT_RING(ring, 0x20000000);
345 
346 	/* Don't enable header dump */
347 	OUT_RING(ring, 0x00000000);
348 	OUT_RING(ring, 0x00000000);
349 
350 	/* Specify workarounds for various microcode issues */
351 	if (adreno_is_a530(adreno_gpu)) {
352 		/* Workaround for token end syncs
353 		 * Force a WFI after every direct-render 3D mode draw and every
354 		 * 2D mode 3 draw
355 		 */
356 		OUT_RING(ring, 0x0000000B);
357 	} else {
358 		/* No workarounds enabled */
359 		OUT_RING(ring, 0x00000000);
360 	}
361 
362 	OUT_RING(ring, 0x00000000);
363 	OUT_RING(ring, 0x00000000);
364 
365 	gpu->funcs->flush(gpu, ring);
366 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
367 }
368 
369 static int a5xx_preempt_start(struct msm_gpu *gpu)
370 {
371 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
372 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
373 	struct msm_ringbuffer *ring = gpu->rb[0];
374 
375 	if (gpu->nr_rings == 1)
376 		return 0;
377 
378 	/* Turn off protected mode to write to special registers */
379 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
380 	OUT_RING(ring, 0);
381 
382 	/* Set the save preemption record for the ring/command */
383 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
384 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
385 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
386 
387 	/* Turn back on protected mode */
388 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
389 	OUT_RING(ring, 1);
390 
391 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
392 	OUT_RING(ring, 0x00);
393 
394 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
395 	OUT_RING(ring, 0x01);
396 
397 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
398 	OUT_RING(ring, 0x01);
399 
400 	/* Yield the floor on command completion */
401 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
402 	OUT_RING(ring, 0x00);
403 	OUT_RING(ring, 0x00);
404 	OUT_RING(ring, 0x01);
405 	OUT_RING(ring, 0x01);
406 
407 	gpu->funcs->flush(gpu, ring);
408 
409 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
410 }
411 
412 static int a5xx_ucode_init(struct msm_gpu *gpu)
413 {
414 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
415 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
416 	int ret;
417 
418 	if (!a5xx_gpu->pm4_bo) {
419 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
420 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
421 
422 
423 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
424 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
425 			a5xx_gpu->pm4_bo = NULL;
426 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
427 				ret);
428 			return ret;
429 		}
430 
431 		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
432 	}
433 
434 	if (!a5xx_gpu->pfp_bo) {
435 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
436 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
437 
438 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
439 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
440 			a5xx_gpu->pfp_bo = NULL;
441 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
442 				ret);
443 			return ret;
444 		}
445 
446 		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
447 	}
448 
449 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
450 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
451 
452 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
453 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
454 
455 	return 0;
456 }
457 
458 #define SCM_GPU_ZAP_SHADER_RESUME 0
459 
460 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
461 {
462 	int ret;
463 
464 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
465 	if (ret)
466 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
467 			gpu->name, ret);
468 
469 	return ret;
470 }
471 
472 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
473 {
474 	static bool loaded;
475 	int ret;
476 
477 	/*
478 	 * If the zap shader is already loaded into memory we just need to kick
479 	 * the remote processor to reinitialize it
480 	 */
481 	if (loaded)
482 		return a5xx_zap_shader_resume(gpu);
483 
484 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
485 
486 	loaded = !ret;
487 	return ret;
488 }
489 
490 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
491 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
492 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
493 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
494 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
495 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
496 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
497 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
498 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
499 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
500 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
501 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
502 
503 static int a5xx_hw_init(struct msm_gpu *gpu)
504 {
505 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
506 	int ret;
507 
508 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
509 
510 	/* Make all blocks contribute to the GPU BUSY perf counter */
511 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
512 
513 	/* Enable RBBM error reporting bits */
514 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
515 
516 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
517 		/*
518 		 * Mask out the activity signals from RB1-3 to avoid false
519 		 * positives
520 		 */
521 
522 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
523 			0xF0000000);
524 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
525 			0xFFFFFFFF);
526 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
527 			0xFFFFFFFF);
528 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
529 			0xFFFFFFFF);
530 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
531 			0xFFFFFFFF);
532 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
533 			0xFFFFFFFF);
534 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
535 			0xFFFFFFFF);
536 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
537 			0xFFFFFFFF);
538 	}
539 
540 	/* Enable fault detection */
541 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
542 		(1 << 30) | 0xFFFF);
543 
544 	/* Turn on performance counters */
545 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
546 
547 	/* Select CP0 to always count cycles */
548 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
549 
550 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
551 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
552 
553 	/* Increase VFD cache access so LRZ and other data gets evicted less */
554 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
555 
556 	/* Disable L2 bypass in the UCHE */
557 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
558 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
559 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
560 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
561 
562 	/* Set the GMEM VA range (0 to gpu->gmem) */
563 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
564 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
565 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
566 		0x00100000 + adreno_gpu->gmem - 1);
567 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
568 
569 	gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
570 	gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
571 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
572 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
573 
574 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
575 
576 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
577 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
578 
579 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
580 
581 	/* Enable USE_RETENTION_FLOPS */
582 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
583 
584 	/* Enable ME/PFP split notification */
585 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
586 
587 	/* Enable HWCG */
588 	a5xx_set_hwcg(gpu, true);
589 
590 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
591 
592 	/* Set the highest bank bit */
593 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
594 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
595 
596 	/* Protect registers from the CP */
597 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
598 
599 	/* RBBM */
600 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
601 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
602 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
603 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
604 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
605 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
606 
607 	/* Content protect */
608 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
609 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
610 			16));
611 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
612 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
613 
614 	/* CP */
615 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
616 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
617 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
618 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
619 
620 	/* RB */
621 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
622 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
623 
624 	/* VPC */
625 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
626 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
627 
628 	/* UCHE */
629 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
630 
631 	if (adreno_is_a530(adreno_gpu))
632 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
633 			ADRENO_PROTECT_RW(0x10000, 0x8000));
634 
635 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
636 	/*
637 	 * Disable the trusted memory range - we don't actually supported secure
638 	 * memory rendering at this point in time and we don't want to block off
639 	 * part of the virtual memory space.
640 	 */
641 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
642 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
643 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
644 
645 	ret = adreno_hw_init(gpu);
646 	if (ret)
647 		return ret;
648 
649 	a5xx_preempt_hw_init(gpu);
650 
651 	a5xx_gpmu_ucode_init(gpu);
652 
653 	ret = a5xx_ucode_init(gpu);
654 	if (ret)
655 		return ret;
656 
657 	/* Disable the interrupts through the initial bringup stage */
658 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
659 
660 	/* Clear ME_HALT to start the micro engine */
661 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
662 	ret = a5xx_me_init(gpu);
663 	if (ret)
664 		return ret;
665 
666 	ret = a5xx_power_init(gpu);
667 	if (ret)
668 		return ret;
669 
670 	/*
671 	 * Send a pipeline event stat to get misbehaving counters to start
672 	 * ticking correctly
673 	 */
674 	if (adreno_is_a530(adreno_gpu)) {
675 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
676 		OUT_RING(gpu->rb[0], 0x0F);
677 
678 		gpu->funcs->flush(gpu, gpu->rb[0]);
679 		if (!a5xx_idle(gpu, gpu->rb[0]))
680 			return -EINVAL;
681 	}
682 
683 	/*
684 	 * Try to load a zap shader into the secure world. If successful
685 	 * we can use the CP to switch out of secure mode. If not then we
686 	 * have no resource but to try to switch ourselves out manually. If we
687 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
688 	 * be blocked and a permissions violation will soon follow.
689 	 */
690 	ret = a5xx_zap_shader_init(gpu);
691 	if (!ret) {
692 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
693 		OUT_RING(gpu->rb[0], 0x00000000);
694 
695 		gpu->funcs->flush(gpu, gpu->rb[0]);
696 		if (!a5xx_idle(gpu, gpu->rb[0]))
697 			return -EINVAL;
698 	} else {
699 		/* Print a warning so if we die, we know why */
700 		dev_warn_once(gpu->dev->dev,
701 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
702 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
703 	}
704 
705 	/* Last step - yield the ringbuffer */
706 	a5xx_preempt_start(gpu);
707 
708 	return 0;
709 }
710 
711 static void a5xx_recover(struct msm_gpu *gpu)
712 {
713 	int i;
714 
715 	adreno_dump_info(gpu);
716 
717 	for (i = 0; i < 8; i++) {
718 		printk("CP_SCRATCH_REG%d: %u\n", i,
719 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
720 	}
721 
722 	if (hang_debug)
723 		a5xx_dump(gpu);
724 
725 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
726 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
727 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
728 	adreno_recover(gpu);
729 }
730 
731 static void a5xx_destroy(struct msm_gpu *gpu)
732 {
733 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
734 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
735 
736 	DBG("%s", gpu->name);
737 
738 	a5xx_preempt_fini(gpu);
739 
740 	if (a5xx_gpu->pm4_bo) {
741 		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
742 		drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
743 	}
744 
745 	if (a5xx_gpu->pfp_bo) {
746 		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
747 		drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
748 	}
749 
750 	if (a5xx_gpu->gpmu_bo) {
751 		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
752 		drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
753 	}
754 
755 	adreno_gpu_cleanup(adreno_gpu);
756 	kfree(a5xx_gpu);
757 }
758 
759 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
760 {
761 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
762 		return false;
763 
764 	/*
765 	 * Nearly every abnormality ends up pausing the GPU and triggering a
766 	 * fault so we can safely just watch for this one interrupt to fire
767 	 */
768 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
769 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
770 }
771 
772 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
773 {
774 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
775 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
776 
777 	if (ring != a5xx_gpu->cur_ring) {
778 		WARN(1, "Tried to idle a non-current ringbuffer\n");
779 		return false;
780 	}
781 
782 	/* wait for CP to drain ringbuffer: */
783 	if (!adreno_idle(gpu, ring))
784 		return false;
785 
786 	if (spin_until(_a5xx_check_idle(gpu))) {
787 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
788 			gpu->name, __builtin_return_address(0),
789 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
790 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
791 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
792 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
793 		return false;
794 	}
795 
796 	return true;
797 }
798 
799 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
800 {
801 	struct msm_gpu *gpu = arg;
802 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
803 			iova, flags,
804 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
805 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
806 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
807 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
808 
809 	return -EFAULT;
810 }
811 
812 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
813 {
814 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
815 
816 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
817 		u32 val;
818 
819 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
820 
821 		/*
822 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
823 		 * read it twice
824 		 */
825 
826 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
827 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
828 
829 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
830 			val);
831 	}
832 
833 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
834 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
835 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
836 
837 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
838 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
839 
840 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
841 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
842 
843 		dev_err_ratelimited(gpu->dev->dev,
844 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
845 			val & (1 << 24) ? "WRITE" : "READ",
846 			(val & 0xFFFFF) >> 2, val);
847 	}
848 
849 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
850 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
851 		const char *access[16] = { "reserved", "reserved",
852 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
853 			"", "", "me read", "me write", "", "", "crashdump read",
854 			"crashdump write" };
855 
856 		dev_err_ratelimited(gpu->dev->dev,
857 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
858 			status & 0xFFFFF, access[(status >> 24) & 0xF],
859 			(status & (1 << 31)), status);
860 	}
861 }
862 
863 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
864 {
865 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
866 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
867 
868 		dev_err_ratelimited(gpu->dev->dev,
869 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
870 			val & (1 << 28) ? "WRITE" : "READ",
871 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
872 			(val >> 24) & 0xF);
873 
874 		/* Clear the error */
875 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
876 
877 		/* Clear the interrupt */
878 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
879 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
880 	}
881 
882 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
883 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
884 
885 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
886 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
887 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
888 
889 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
890 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
891 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
892 
893 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
894 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
895 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
896 
897 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
898 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
899 
900 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
901 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
902 }
903 
904 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
905 {
906 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
907 
908 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
909 
910 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
911 		addr);
912 }
913 
914 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
915 {
916 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
917 }
918 
919 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
920 {
921 	struct drm_device *dev = gpu->dev;
922 	struct msm_drm_private *priv = dev->dev_private;
923 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
924 
925 	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
926 		ring ? ring->id : -1, ring ? ring->seqno : 0,
927 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
928 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
929 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
930 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
931 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
932 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
933 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
934 
935 	/* Turn off the hangcheck timer to keep it from bothering us */
936 	del_timer(&gpu->hangcheck_timer);
937 
938 	queue_work(priv->wq, &gpu->recover_work);
939 }
940 
941 #define RBBM_ERROR_MASK \
942 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
943 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
944 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
945 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
946 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
947 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
948 
949 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
950 {
951 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
952 
953 	/*
954 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
955 	 * before the source is cleared the interrupt will storm.
956 	 */
957 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
958 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
959 
960 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
961 	if (status & RBBM_ERROR_MASK)
962 		a5xx_rbbm_err_irq(gpu, status);
963 
964 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
965 		a5xx_cp_err_irq(gpu);
966 
967 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
968 		a5xx_fault_detect_irq(gpu);
969 
970 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
971 		a5xx_uche_err_irq(gpu);
972 
973 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
974 		a5xx_gpmu_err_irq(gpu);
975 
976 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
977 		a5xx_preempt_trigger(gpu);
978 		msm_gpu_retire(gpu);
979 	}
980 
981 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
982 		a5xx_preempt_irq(gpu);
983 
984 	return IRQ_HANDLED;
985 }
986 
987 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
988 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
989 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
990 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
991 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
992 		REG_A5XX_CP_RB_RPTR_ADDR_HI),
993 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
994 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
995 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
996 };
997 
998 static const u32 a5xx_registers[] = {
999 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1000 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1001 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1002 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1003 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1004 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1005 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1006 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1007 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1008 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1009 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1010 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1011 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1012 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1013 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1014 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1015 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1016 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1017 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1018 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1019 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1020 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1021 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1022 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1023 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1024 	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1025 	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1026 	0xAC60, 0xAC60, ~0,
1027 };
1028 
1029 static void a5xx_dump(struct msm_gpu *gpu)
1030 {
1031 	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1032 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1033 	adreno_dump(gpu);
1034 }
1035 
1036 static int a5xx_pm_resume(struct msm_gpu *gpu)
1037 {
1038 	int ret;
1039 
1040 	/* Turn on the core power */
1041 	ret = msm_gpu_pm_resume(gpu);
1042 	if (ret)
1043 		return ret;
1044 
1045 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1046 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1047 
1048 	/* Wait 3 usecs before polling */
1049 	udelay(3);
1050 
1051 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1052 		(1 << 20), (1 << 20));
1053 	if (ret) {
1054 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1055 			gpu->name,
1056 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1057 		return ret;
1058 	}
1059 
1060 	/* Turn on the SP domain */
1061 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1062 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1063 		(1 << 20), (1 << 20));
1064 	if (ret)
1065 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1066 			gpu->name);
1067 
1068 	return ret;
1069 }
1070 
1071 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1072 {
1073 	/* Clear the VBIF pipe before shutting down */
1074 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1075 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1076 
1077 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1078 
1079 	/*
1080 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1081 	 * entries
1082 	 */
1083 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1084 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1085 
1086 	return msm_gpu_pm_suspend(gpu);
1087 }
1088 
1089 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1090 {
1091 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1092 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1093 
1094 	return 0;
1095 }
1096 
1097 struct a5xx_crashdumper {
1098 	void *ptr;
1099 	struct drm_gem_object *bo;
1100 	u64 iova;
1101 };
1102 
1103 struct a5xx_gpu_state {
1104 	struct msm_gpu_state base;
1105 	u32 *hlsqregs;
1106 };
1107 
1108 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1109 		struct a5xx_crashdumper *dumper)
1110 {
1111 	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1112 		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1113 		&dumper->bo, &dumper->iova);
1114 
1115 	if (!IS_ERR(dumper->ptr))
1116 		msm_gem_object_set_name(dumper->bo, "crashdump");
1117 
1118 	return PTR_ERR_OR_ZERO(dumper->ptr);
1119 }
1120 
1121 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1122 		struct a5xx_crashdumper *dumper)
1123 {
1124 	u32 val;
1125 
1126 	if (IS_ERR_OR_NULL(dumper->ptr))
1127 		return -EINVAL;
1128 
1129 	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1130 		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1131 
1132 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1133 
1134 	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1135 		val & 0x04, 100, 10000);
1136 }
1137 
1138 /*
1139  * These are a list of the registers that need to be read through the HLSQ
1140  * aperture through the crashdumper.  These are not nominally accessible from
1141  * the CPU on a secure platform.
1142  */
1143 static const struct {
1144 	u32 type;
1145 	u32 regoffset;
1146 	u32 count;
1147 } a5xx_hlsq_aperture_regs[] = {
1148 	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1149 	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1150 	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1151 	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1152 	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1153 	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1154 	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1155 	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1156 	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1157 	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1158 	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1159 	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1160 	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1161 	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1162 	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1163 };
1164 
1165 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1166 		struct a5xx_gpu_state *a5xx_state)
1167 {
1168 	struct a5xx_crashdumper dumper = { 0 };
1169 	u32 offset, count = 0;
1170 	u64 *ptr;
1171 	int i;
1172 
1173 	if (a5xx_crashdumper_init(gpu, &dumper))
1174 		return;
1175 
1176 	/* The script will be written at offset 0 */
1177 	ptr = dumper.ptr;
1178 
1179 	/* Start writing the data at offset 256k */
1180 	offset = dumper.iova + (256 * SZ_1K);
1181 
1182 	/* Count how many additional registers to get from the HLSQ aperture */
1183 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1184 		count += a5xx_hlsq_aperture_regs[i].count;
1185 
1186 	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1187 	if (!a5xx_state->hlsqregs)
1188 		return;
1189 
1190 	/* Build the crashdump script */
1191 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1192 		u32 type = a5xx_hlsq_aperture_regs[i].type;
1193 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1194 
1195 		/* Write the register to select the desired bank */
1196 		*ptr++ = ((u64) type << 8);
1197 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1198 			(1 << 21) | 1;
1199 
1200 		*ptr++ = offset;
1201 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1202 			| c;
1203 
1204 		offset += c * sizeof(u32);
1205 	}
1206 
1207 	/* Write two zeros to close off the script */
1208 	*ptr++ = 0;
1209 	*ptr++ = 0;
1210 
1211 	if (a5xx_crashdumper_run(gpu, &dumper)) {
1212 		kfree(a5xx_state->hlsqregs);
1213 		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1214 		return;
1215 	}
1216 
1217 	/* Copy the data from the crashdumper to the state */
1218 	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1219 		count * sizeof(u32));
1220 
1221 	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1222 }
1223 
1224 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1225 {
1226 	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1227 			GFP_KERNEL);
1228 
1229 	if (!a5xx_state)
1230 		return ERR_PTR(-ENOMEM);
1231 
1232 	/* Temporarily disable hardware clock gating before reading the hw */
1233 	a5xx_set_hwcg(gpu, false);
1234 
1235 	/* First get the generic state from the adreno core */
1236 	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1237 
1238 	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1239 
1240 	/* Get the HLSQ regs with the help of the crashdumper */
1241 	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1242 
1243 	a5xx_set_hwcg(gpu, true);
1244 
1245 	return &a5xx_state->base;
1246 }
1247 
1248 static void a5xx_gpu_state_destroy(struct kref *kref)
1249 {
1250 	struct msm_gpu_state *state = container_of(kref,
1251 		struct msm_gpu_state, ref);
1252 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1253 		struct a5xx_gpu_state, base);
1254 
1255 	kfree(a5xx_state->hlsqregs);
1256 
1257 	adreno_gpu_state_destroy(state);
1258 	kfree(a5xx_state);
1259 }
1260 
1261 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1262 {
1263 	if (IS_ERR_OR_NULL(state))
1264 		return 1;
1265 
1266 	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1267 }
1268 
1269 
1270 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1271 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1272 		struct drm_printer *p)
1273 {
1274 	int i, j;
1275 	u32 pos = 0;
1276 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1277 		struct a5xx_gpu_state, base);
1278 
1279 	if (IS_ERR_OR_NULL(state))
1280 		return;
1281 
1282 	adreno_show(gpu, state, p);
1283 
1284 	/* Dump the additional a5xx HLSQ registers */
1285 	if (!a5xx_state->hlsqregs)
1286 		return;
1287 
1288 	drm_printf(p, "registers-hlsq:\n");
1289 
1290 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1291 		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1292 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1293 
1294 		for (j = 0; j < c; j++, pos++, o++) {
1295 			/*
1296 			 * To keep the crashdump simple we pull the entire range
1297 			 * for each register type but not all of the registers
1298 			 * in the range are valid. Fortunately invalid registers
1299 			 * stick out like a sore thumb with a value of
1300 			 * 0xdeadbeef
1301 			 */
1302 			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1303 				continue;
1304 
1305 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1306 				o << 2, a5xx_state->hlsqregs[pos]);
1307 		}
1308 	}
1309 }
1310 #endif
1311 
1312 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1313 {
1314 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1315 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1316 
1317 	return a5xx_gpu->cur_ring;
1318 }
1319 
1320 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1321 {
1322 	u64 busy_cycles, busy_time;
1323 
1324 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1325 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1326 
1327 	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1328 	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1329 
1330 	gpu->devfreq.busy_cycles = busy_cycles;
1331 
1332 	if (WARN_ON(busy_time > ~0LU))
1333 		return ~0LU;
1334 
1335 	return (unsigned long)busy_time;
1336 }
1337 
1338 static const struct adreno_gpu_funcs funcs = {
1339 	.base = {
1340 		.get_param = adreno_get_param,
1341 		.hw_init = a5xx_hw_init,
1342 		.pm_suspend = a5xx_pm_suspend,
1343 		.pm_resume = a5xx_pm_resume,
1344 		.recover = a5xx_recover,
1345 		.submit = a5xx_submit,
1346 		.flush = a5xx_flush,
1347 		.active_ring = a5xx_active_ring,
1348 		.irq = a5xx_irq,
1349 		.destroy = a5xx_destroy,
1350 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1351 		.show = a5xx_show,
1352 #endif
1353 #if defined(CONFIG_DEBUG_FS)
1354 		.debugfs_init = a5xx_debugfs_init,
1355 #endif
1356 		.gpu_busy = a5xx_gpu_busy,
1357 		.gpu_state_get = a5xx_gpu_state_get,
1358 		.gpu_state_put = a5xx_gpu_state_put,
1359 	},
1360 	.get_timestamp = a5xx_get_timestamp,
1361 };
1362 
1363 static void check_speed_bin(struct device *dev)
1364 {
1365 	struct nvmem_cell *cell;
1366 	u32 bin, val;
1367 
1368 	cell = nvmem_cell_get(dev, "speed_bin");
1369 
1370 	/* If a nvmem cell isn't defined, nothing to do */
1371 	if (IS_ERR(cell))
1372 		return;
1373 
1374 	bin = *((u32 *) nvmem_cell_read(cell, NULL));
1375 	nvmem_cell_put(cell);
1376 
1377 	val = (1 << bin);
1378 
1379 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1380 }
1381 
1382 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1383 {
1384 	struct msm_drm_private *priv = dev->dev_private;
1385 	struct platform_device *pdev = priv->gpu_pdev;
1386 	struct a5xx_gpu *a5xx_gpu = NULL;
1387 	struct adreno_gpu *adreno_gpu;
1388 	struct msm_gpu *gpu;
1389 	int ret;
1390 
1391 	if (!pdev) {
1392 		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1393 		return ERR_PTR(-ENXIO);
1394 	}
1395 
1396 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1397 	if (!a5xx_gpu)
1398 		return ERR_PTR(-ENOMEM);
1399 
1400 	adreno_gpu = &a5xx_gpu->base;
1401 	gpu = &adreno_gpu->base;
1402 
1403 	adreno_gpu->registers = a5xx_registers;
1404 	adreno_gpu->reg_offsets = a5xx_register_offsets;
1405 
1406 	a5xx_gpu->lm_leakage = 0x4E001A;
1407 
1408 	check_speed_bin(&pdev->dev);
1409 
1410 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1411 	if (ret) {
1412 		a5xx_destroy(&(a5xx_gpu->base.base));
1413 		return ERR_PTR(ret);
1414 	}
1415 
1416 	if (gpu->aspace)
1417 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1418 
1419 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1420 	a5xx_preempt_init(gpu);
1421 
1422 	return gpu;
1423 }
1424