xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision b8d312aa)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4 
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15 
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18 
19 #define GPU_PAS_ID 13
20 
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25 	uint32_t wptr;
26 	unsigned long flags;
27 
28 	spin_lock_irqsave(&ring->lock, flags);
29 
30 	/* Copy the shadow to the actual register */
31 	ring->cur = ring->next;
32 
33 	/* Make sure to wrap wptr if we need to */
34 	wptr = get_wptr(ring);
35 
36 	spin_unlock_irqrestore(&ring->lock, flags);
37 
38 	/* Make sure everything is posted before making a decision */
39 	mb();
40 
41 	/* Update HW if this is the current ring and we are not in preempt */
42 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44 }
45 
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 	struct msm_file_private *ctx)
48 {
49 	struct msm_drm_private *priv = gpu->dev->dev_private;
50 	struct msm_ringbuffer *ring = submit->ring;
51 	struct msm_gem_object *obj;
52 	uint32_t *ptr, dwords;
53 	unsigned int i;
54 
55 	for (i = 0; i < submit->nr_cmds; i++) {
56 		switch (submit->cmd[i].type) {
57 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58 			break;
59 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 			if (priv->lastctx == ctx)
61 				break;
62 			/* fall-thru */
63 		case MSM_SUBMIT_CMD_BUF:
64 			/* copy commands into RB: */
65 			obj = submit->bos[submit->cmd[i].idx].obj;
66 			dwords = submit->cmd[i].size;
67 
68 			ptr = msm_gem_get_vaddr(&obj->base);
69 
70 			/* _get_vaddr() shouldn't fail at this point,
71 			 * since we've already mapped it once in
72 			 * submit_reloc()
73 			 */
74 			if (WARN_ON(!ptr))
75 				return;
76 
77 			for (i = 0; i < dwords; i++) {
78 				/* normally the OUT_PKTn() would wait
79 				 * for space for the packet.  But since
80 				 * we just OUT_RING() the whole thing,
81 				 * need to call adreno_wait_ring()
82 				 * ourself:
83 				 */
84 				adreno_wait_ring(ring, 1);
85 				OUT_RING(ring, ptr[i]);
86 			}
87 
88 			msm_gem_put_vaddr(&obj->base);
89 
90 			break;
91 		}
92 	}
93 
94 	a5xx_flush(gpu, ring);
95 	a5xx_preempt_trigger(gpu);
96 
97 	/* we might not necessarily have a cmd from userspace to
98 	 * trigger an event to know that submit has completed, so
99 	 * do this manually:
100 	 */
101 	a5xx_idle(gpu, ring);
102 	ring->memptrs->fence = submit->seqno;
103 	msm_gpu_retire(gpu);
104 }
105 
106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107 	struct msm_file_private *ctx)
108 {
109 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111 	struct msm_drm_private *priv = gpu->dev->dev_private;
112 	struct msm_ringbuffer *ring = submit->ring;
113 	unsigned int i, ibs = 0;
114 
115 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116 		priv->lastctx = NULL;
117 		a5xx_submit_in_rb(gpu, submit, ctx);
118 		return;
119 	}
120 
121 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122 	OUT_RING(ring, 0x02);
123 
124 	/* Turn off protected mode to write to special registers */
125 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
126 	OUT_RING(ring, 0);
127 
128 	/* Set the save preemption record for the ring/command */
129 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
132 
133 	/* Turn back on protected mode */
134 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135 	OUT_RING(ring, 1);
136 
137 	/* Enable local preemption for finegrain preemption */
138 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139 	OUT_RING(ring, 0x02);
140 
141 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143 	OUT_RING(ring, 0x02);
144 
145 	/* Submit the commands */
146 	for (i = 0; i < submit->nr_cmds; i++) {
147 		switch (submit->cmd[i].type) {
148 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
149 			break;
150 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151 			if (priv->lastctx == ctx)
152 				break;
153 			/* fall-thru */
154 		case MSM_SUBMIT_CMD_BUF:
155 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158 			OUT_RING(ring, submit->cmd[i].size);
159 			ibs++;
160 			break;
161 		}
162 	}
163 
164 	/*
165 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
166 	 * are done rendering - otherwise a lucky preemption would start
167 	 * replaying from the last checkpoint
168 	 */
169 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
170 	OUT_RING(ring, 0);
171 	OUT_RING(ring, 0);
172 	OUT_RING(ring, 0);
173 	OUT_RING(ring, 0);
174 	OUT_RING(ring, 0);
175 
176 	/* Turn off IB level preemptions */
177 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178 	OUT_RING(ring, 0x01);
179 
180 	/* Write the fence to the scratch register */
181 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182 	OUT_RING(ring, submit->seqno);
183 
184 	/*
185 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
186 	 * timestamp is written to the memory and then triggers the interrupt
187 	 */
188 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189 	OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
190 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
191 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
192 	OUT_RING(ring, submit->seqno);
193 
194 	/* Yield the floor on command completion */
195 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
196 	/*
197 	 * If dword[2:1] are non zero, they specify an address for the CP to
198 	 * write the value of dword[3] to on preemption complete. Write 0 to
199 	 * skip the write
200 	 */
201 	OUT_RING(ring, 0x00);
202 	OUT_RING(ring, 0x00);
203 	/* Data value - not used if the address above is 0 */
204 	OUT_RING(ring, 0x01);
205 	/* Set bit 0 to trigger an interrupt on preempt complete */
206 	OUT_RING(ring, 0x01);
207 
208 	a5xx_flush(gpu, ring);
209 
210 	/* Check to see if we need to start preemption */
211 	a5xx_preempt_trigger(gpu);
212 }
213 
214 static const struct {
215 	u32 offset;
216 	u32 value;
217 } a5xx_hwcg[] = {
218 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
219 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
220 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
221 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
222 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
223 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
224 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
225 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
226 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
227 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
228 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
229 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
230 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
231 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
232 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
233 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
234 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
235 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
236 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
237 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
238 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
239 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
240 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
241 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
242 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
243 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
244 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
245 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
246 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
247 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
248 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
249 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
250 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
251 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
252 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
253 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
254 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
255 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
256 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
257 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
258 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
259 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
260 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
261 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
262 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
263 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
264 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
265 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
266 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
267 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
268 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
269 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
270 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
271 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
272 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
273 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
274 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
275 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
276 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
277 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
278 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
279 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
280 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
281 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
282 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
283 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
284 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
285 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
286 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
287 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
288 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
289 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
290 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
291 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
292 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
293 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
294 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
295 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
296 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
297 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
298 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
299 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
300 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
301 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
302 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
303 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
304 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
305 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
306 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
307 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
308 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
309 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
310 };
311 
312 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
313 {
314 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
315 	unsigned int i;
316 
317 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
318 		gpu_write(gpu, a5xx_hwcg[i].offset,
319 			state ? a5xx_hwcg[i].value : 0);
320 
321 	if (adreno_is_a540(adreno_gpu)) {
322 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
323 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
324 	}
325 
326 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
327 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
328 }
329 
330 static int a5xx_me_init(struct msm_gpu *gpu)
331 {
332 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
333 	struct msm_ringbuffer *ring = gpu->rb[0];
334 
335 	OUT_PKT7(ring, CP_ME_INIT, 8);
336 
337 	OUT_RING(ring, 0x0000002F);
338 
339 	/* Enable multiple hardware contexts */
340 	OUT_RING(ring, 0x00000003);
341 
342 	/* Enable error detection */
343 	OUT_RING(ring, 0x20000000);
344 
345 	/* Don't enable header dump */
346 	OUT_RING(ring, 0x00000000);
347 	OUT_RING(ring, 0x00000000);
348 
349 	/* Specify workarounds for various microcode issues */
350 	if (adreno_is_a530(adreno_gpu)) {
351 		/* Workaround for token end syncs
352 		 * Force a WFI after every direct-render 3D mode draw and every
353 		 * 2D mode 3 draw
354 		 */
355 		OUT_RING(ring, 0x0000000B);
356 	} else {
357 		/* No workarounds enabled */
358 		OUT_RING(ring, 0x00000000);
359 	}
360 
361 	OUT_RING(ring, 0x00000000);
362 	OUT_RING(ring, 0x00000000);
363 
364 	gpu->funcs->flush(gpu, ring);
365 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
366 }
367 
368 static int a5xx_preempt_start(struct msm_gpu *gpu)
369 {
370 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
371 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
372 	struct msm_ringbuffer *ring = gpu->rb[0];
373 
374 	if (gpu->nr_rings == 1)
375 		return 0;
376 
377 	/* Turn off protected mode to write to special registers */
378 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
379 	OUT_RING(ring, 0);
380 
381 	/* Set the save preemption record for the ring/command */
382 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
383 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
384 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
385 
386 	/* Turn back on protected mode */
387 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
388 	OUT_RING(ring, 1);
389 
390 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
391 	OUT_RING(ring, 0x00);
392 
393 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
394 	OUT_RING(ring, 0x01);
395 
396 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
397 	OUT_RING(ring, 0x01);
398 
399 	/* Yield the floor on command completion */
400 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
401 	OUT_RING(ring, 0x00);
402 	OUT_RING(ring, 0x00);
403 	OUT_RING(ring, 0x01);
404 	OUT_RING(ring, 0x01);
405 
406 	gpu->funcs->flush(gpu, ring);
407 
408 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
409 }
410 
411 static int a5xx_ucode_init(struct msm_gpu *gpu)
412 {
413 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
414 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
415 	int ret;
416 
417 	if (!a5xx_gpu->pm4_bo) {
418 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
419 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
420 
421 
422 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
423 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
424 			a5xx_gpu->pm4_bo = NULL;
425 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
426 				ret);
427 			return ret;
428 		}
429 
430 		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
431 	}
432 
433 	if (!a5xx_gpu->pfp_bo) {
434 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
435 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
436 
437 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
438 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
439 			a5xx_gpu->pfp_bo = NULL;
440 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
441 				ret);
442 			return ret;
443 		}
444 
445 		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
446 	}
447 
448 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
449 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
450 
451 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
452 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
453 
454 	return 0;
455 }
456 
457 #define SCM_GPU_ZAP_SHADER_RESUME 0
458 
459 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
460 {
461 	int ret;
462 
463 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
464 	if (ret)
465 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
466 			gpu->name, ret);
467 
468 	return ret;
469 }
470 
471 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
472 {
473 	static bool loaded;
474 	int ret;
475 
476 	/*
477 	 * If the zap shader is already loaded into memory we just need to kick
478 	 * the remote processor to reinitialize it
479 	 */
480 	if (loaded)
481 		return a5xx_zap_shader_resume(gpu);
482 
483 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
484 
485 	loaded = !ret;
486 	return ret;
487 }
488 
489 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
490 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
491 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
492 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
493 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
494 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
495 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
496 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
497 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
498 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
499 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
500 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
501 
502 static int a5xx_hw_init(struct msm_gpu *gpu)
503 {
504 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
505 	int ret;
506 
507 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
508 
509 	if (adreno_is_a540(adreno_gpu))
510 		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
511 
512 	/* Make all blocks contribute to the GPU BUSY perf counter */
513 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
514 
515 	/* Enable RBBM error reporting bits */
516 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
517 
518 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
519 		/*
520 		 * Mask out the activity signals from RB1-3 to avoid false
521 		 * positives
522 		 */
523 
524 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
525 			0xF0000000);
526 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
527 			0xFFFFFFFF);
528 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
529 			0xFFFFFFFF);
530 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
531 			0xFFFFFFFF);
532 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
533 			0xFFFFFFFF);
534 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
535 			0xFFFFFFFF);
536 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
537 			0xFFFFFFFF);
538 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
539 			0xFFFFFFFF);
540 	}
541 
542 	/* Enable fault detection */
543 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
544 		(1 << 30) | 0xFFFF);
545 
546 	/* Turn on performance counters */
547 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
548 
549 	/* Select CP0 to always count cycles */
550 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
551 
552 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
553 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
554 
555 	/* Increase VFD cache access so LRZ and other data gets evicted less */
556 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
557 
558 	/* Disable L2 bypass in the UCHE */
559 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
560 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
561 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
562 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
563 
564 	/* Set the GMEM VA range (0 to gpu->gmem) */
565 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
566 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
567 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
568 		0x00100000 + adreno_gpu->gmem - 1);
569 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
570 
571 	gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
572 	if (adreno_is_a530(adreno_gpu))
573 		gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
574 	if (adreno_is_a540(adreno_gpu))
575 		gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
576 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
577 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
578 
579 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
580 
581 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
582 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
583 
584 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
585 
586 	/* Enable USE_RETENTION_FLOPS */
587 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
588 
589 	/* Enable ME/PFP split notification */
590 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
591 
592 	/* Enable HWCG */
593 	a5xx_set_hwcg(gpu, true);
594 
595 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
596 
597 	/* Set the highest bank bit */
598 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
599 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
600 	if (adreno_is_a540(adreno_gpu))
601 		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
602 
603 	/* Protect registers from the CP */
604 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
605 
606 	/* RBBM */
607 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
608 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
609 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
610 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
611 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
612 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
613 
614 	/* Content protect */
615 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
616 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
617 			16));
618 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
619 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
620 
621 	/* CP */
622 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
623 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
624 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
625 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
626 
627 	/* RB */
628 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
629 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
630 
631 	/* VPC */
632 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
633 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
634 
635 	/* UCHE */
636 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
637 
638 	if (adreno_is_a530(adreno_gpu))
639 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
640 			ADRENO_PROTECT_RW(0x10000, 0x8000));
641 
642 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
643 	/*
644 	 * Disable the trusted memory range - we don't actually supported secure
645 	 * memory rendering at this point in time and we don't want to block off
646 	 * part of the virtual memory space.
647 	 */
648 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
649 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
650 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
651 
652 	/* Put the GPU into 64 bit by default */
653 	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
654 	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
655 	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
656 	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
657 	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
658 	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
659 	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
660 	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
661 	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
662 	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
663 	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
664 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
665 
666 	/*
667 	 * VPC corner case with local memory load kill leads to corrupt
668 	 * internal state. Normal Disable does not work for all a5x chips.
669 	 * So do the following setting to disable it.
670 	 */
671 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
672 		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
673 		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
674 	}
675 
676 	ret = adreno_hw_init(gpu);
677 	if (ret)
678 		return ret;
679 
680 	a5xx_preempt_hw_init(gpu);
681 
682 	a5xx_gpmu_ucode_init(gpu);
683 
684 	ret = a5xx_ucode_init(gpu);
685 	if (ret)
686 		return ret;
687 
688 	/* Disable the interrupts through the initial bringup stage */
689 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
690 
691 	/* Clear ME_HALT to start the micro engine */
692 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
693 	ret = a5xx_me_init(gpu);
694 	if (ret)
695 		return ret;
696 
697 	ret = a5xx_power_init(gpu);
698 	if (ret)
699 		return ret;
700 
701 	/*
702 	 * Send a pipeline event stat to get misbehaving counters to start
703 	 * ticking correctly
704 	 */
705 	if (adreno_is_a530(adreno_gpu)) {
706 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
707 		OUT_RING(gpu->rb[0], 0x0F);
708 
709 		gpu->funcs->flush(gpu, gpu->rb[0]);
710 		if (!a5xx_idle(gpu, gpu->rb[0]))
711 			return -EINVAL;
712 	}
713 
714 	/*
715 	 * Try to load a zap shader into the secure world. If successful
716 	 * we can use the CP to switch out of secure mode. If not then we
717 	 * have no resource but to try to switch ourselves out manually. If we
718 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
719 	 * be blocked and a permissions violation will soon follow.
720 	 */
721 	ret = a5xx_zap_shader_init(gpu);
722 	if (!ret) {
723 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
724 		OUT_RING(gpu->rb[0], 0x00000000);
725 
726 		gpu->funcs->flush(gpu, gpu->rb[0]);
727 		if (!a5xx_idle(gpu, gpu->rb[0]))
728 			return -EINVAL;
729 	} else {
730 		/* Print a warning so if we die, we know why */
731 		dev_warn_once(gpu->dev->dev,
732 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
733 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
734 	}
735 
736 	/* Last step - yield the ringbuffer */
737 	a5xx_preempt_start(gpu);
738 
739 	return 0;
740 }
741 
742 static void a5xx_recover(struct msm_gpu *gpu)
743 {
744 	int i;
745 
746 	adreno_dump_info(gpu);
747 
748 	for (i = 0; i < 8; i++) {
749 		printk("CP_SCRATCH_REG%d: %u\n", i,
750 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
751 	}
752 
753 	if (hang_debug)
754 		a5xx_dump(gpu);
755 
756 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
757 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
758 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
759 	adreno_recover(gpu);
760 }
761 
762 static void a5xx_destroy(struct msm_gpu *gpu)
763 {
764 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
765 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
766 
767 	DBG("%s", gpu->name);
768 
769 	a5xx_preempt_fini(gpu);
770 
771 	if (a5xx_gpu->pm4_bo) {
772 		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
773 		drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
774 	}
775 
776 	if (a5xx_gpu->pfp_bo) {
777 		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
778 		drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
779 	}
780 
781 	if (a5xx_gpu->gpmu_bo) {
782 		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
783 		drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
784 	}
785 
786 	adreno_gpu_cleanup(adreno_gpu);
787 	kfree(a5xx_gpu);
788 }
789 
790 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
791 {
792 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
793 		return false;
794 
795 	/*
796 	 * Nearly every abnormality ends up pausing the GPU and triggering a
797 	 * fault so we can safely just watch for this one interrupt to fire
798 	 */
799 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
800 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
801 }
802 
803 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
804 {
805 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
806 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
807 
808 	if (ring != a5xx_gpu->cur_ring) {
809 		WARN(1, "Tried to idle a non-current ringbuffer\n");
810 		return false;
811 	}
812 
813 	/* wait for CP to drain ringbuffer: */
814 	if (!adreno_idle(gpu, ring))
815 		return false;
816 
817 	if (spin_until(_a5xx_check_idle(gpu))) {
818 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
819 			gpu->name, __builtin_return_address(0),
820 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
821 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
822 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
823 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
824 		return false;
825 	}
826 
827 	return true;
828 }
829 
830 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
831 {
832 	struct msm_gpu *gpu = arg;
833 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
834 			iova, flags,
835 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
836 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
837 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
838 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
839 
840 	return -EFAULT;
841 }
842 
843 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
844 {
845 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
846 
847 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
848 		u32 val;
849 
850 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
851 
852 		/*
853 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
854 		 * read it twice
855 		 */
856 
857 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
858 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
859 
860 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
861 			val);
862 	}
863 
864 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
865 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
866 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
867 
868 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
869 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
870 
871 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
872 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
873 
874 		dev_err_ratelimited(gpu->dev->dev,
875 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
876 			val & (1 << 24) ? "WRITE" : "READ",
877 			(val & 0xFFFFF) >> 2, val);
878 	}
879 
880 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
881 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
882 		const char *access[16] = { "reserved", "reserved",
883 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
884 			"", "", "me read", "me write", "", "", "crashdump read",
885 			"crashdump write" };
886 
887 		dev_err_ratelimited(gpu->dev->dev,
888 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
889 			status & 0xFFFFF, access[(status >> 24) & 0xF],
890 			(status & (1 << 31)), status);
891 	}
892 }
893 
894 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
895 {
896 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
897 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
898 
899 		dev_err_ratelimited(gpu->dev->dev,
900 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
901 			val & (1 << 28) ? "WRITE" : "READ",
902 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
903 			(val >> 24) & 0xF);
904 
905 		/* Clear the error */
906 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
907 
908 		/* Clear the interrupt */
909 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
910 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
911 	}
912 
913 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
914 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
915 
916 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
917 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
918 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
919 
920 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
921 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
922 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
923 
924 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
925 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
926 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
927 
928 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
929 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
930 
931 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
932 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
933 }
934 
935 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
936 {
937 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
938 
939 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
940 
941 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
942 		addr);
943 }
944 
945 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
946 {
947 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
948 }
949 
950 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
951 {
952 	struct drm_device *dev = gpu->dev;
953 	struct msm_drm_private *priv = dev->dev_private;
954 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
955 
956 	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
957 		ring ? ring->id : -1, ring ? ring->seqno : 0,
958 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
959 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
960 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
961 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
962 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
963 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
964 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
965 
966 	/* Turn off the hangcheck timer to keep it from bothering us */
967 	del_timer(&gpu->hangcheck_timer);
968 
969 	queue_work(priv->wq, &gpu->recover_work);
970 }
971 
972 #define RBBM_ERROR_MASK \
973 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
974 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
975 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
976 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
977 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
978 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
979 
980 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
981 {
982 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
983 
984 	/*
985 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
986 	 * before the source is cleared the interrupt will storm.
987 	 */
988 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
989 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
990 
991 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
992 	if (status & RBBM_ERROR_MASK)
993 		a5xx_rbbm_err_irq(gpu, status);
994 
995 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
996 		a5xx_cp_err_irq(gpu);
997 
998 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
999 		a5xx_fault_detect_irq(gpu);
1000 
1001 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1002 		a5xx_uche_err_irq(gpu);
1003 
1004 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1005 		a5xx_gpmu_err_irq(gpu);
1006 
1007 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1008 		a5xx_preempt_trigger(gpu);
1009 		msm_gpu_retire(gpu);
1010 	}
1011 
1012 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1013 		a5xx_preempt_irq(gpu);
1014 
1015 	return IRQ_HANDLED;
1016 }
1017 
1018 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1019 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1020 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1021 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1022 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1023 		REG_A5XX_CP_RB_RPTR_ADDR_HI),
1024 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1025 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1026 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1027 };
1028 
1029 static const u32 a5xx_registers[] = {
1030 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1031 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1032 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1033 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1034 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1035 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1036 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1037 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1038 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1039 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1040 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1041 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1042 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1043 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1044 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1045 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1046 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1047 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1048 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1049 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1050 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1051 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1052 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1053 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1054 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1055 	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1056 	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1057 	0xAC60, 0xAC60, ~0,
1058 };
1059 
1060 static void a5xx_dump(struct msm_gpu *gpu)
1061 {
1062 	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1063 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1064 	adreno_dump(gpu);
1065 }
1066 
1067 static int a5xx_pm_resume(struct msm_gpu *gpu)
1068 {
1069 	int ret;
1070 
1071 	/* Turn on the core power */
1072 	ret = msm_gpu_pm_resume(gpu);
1073 	if (ret)
1074 		return ret;
1075 
1076 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1077 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1078 
1079 	/* Wait 3 usecs before polling */
1080 	udelay(3);
1081 
1082 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1083 		(1 << 20), (1 << 20));
1084 	if (ret) {
1085 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1086 			gpu->name,
1087 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1088 		return ret;
1089 	}
1090 
1091 	/* Turn on the SP domain */
1092 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1093 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1094 		(1 << 20), (1 << 20));
1095 	if (ret)
1096 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1097 			gpu->name);
1098 
1099 	return ret;
1100 }
1101 
1102 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1103 {
1104 	/* Clear the VBIF pipe before shutting down */
1105 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1106 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1107 
1108 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1109 
1110 	/*
1111 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1112 	 * entries
1113 	 */
1114 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1115 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1116 
1117 	return msm_gpu_pm_suspend(gpu);
1118 }
1119 
1120 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1121 {
1122 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1123 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1124 
1125 	return 0;
1126 }
1127 
1128 struct a5xx_crashdumper {
1129 	void *ptr;
1130 	struct drm_gem_object *bo;
1131 	u64 iova;
1132 };
1133 
1134 struct a5xx_gpu_state {
1135 	struct msm_gpu_state base;
1136 	u32 *hlsqregs;
1137 };
1138 
1139 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1140 		struct a5xx_crashdumper *dumper)
1141 {
1142 	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1143 		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1144 		&dumper->bo, &dumper->iova);
1145 
1146 	if (!IS_ERR(dumper->ptr))
1147 		msm_gem_object_set_name(dumper->bo, "crashdump");
1148 
1149 	return PTR_ERR_OR_ZERO(dumper->ptr);
1150 }
1151 
1152 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1153 		struct a5xx_crashdumper *dumper)
1154 {
1155 	u32 val;
1156 
1157 	if (IS_ERR_OR_NULL(dumper->ptr))
1158 		return -EINVAL;
1159 
1160 	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1161 		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1162 
1163 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1164 
1165 	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1166 		val & 0x04, 100, 10000);
1167 }
1168 
1169 /*
1170  * These are a list of the registers that need to be read through the HLSQ
1171  * aperture through the crashdumper.  These are not nominally accessible from
1172  * the CPU on a secure platform.
1173  */
1174 static const struct {
1175 	u32 type;
1176 	u32 regoffset;
1177 	u32 count;
1178 } a5xx_hlsq_aperture_regs[] = {
1179 	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1180 	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1181 	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1182 	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1183 	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1184 	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1185 	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1186 	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1187 	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1188 	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1189 	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1190 	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1191 	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1192 	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1193 	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1194 };
1195 
1196 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1197 		struct a5xx_gpu_state *a5xx_state)
1198 {
1199 	struct a5xx_crashdumper dumper = { 0 };
1200 	u32 offset, count = 0;
1201 	u64 *ptr;
1202 	int i;
1203 
1204 	if (a5xx_crashdumper_init(gpu, &dumper))
1205 		return;
1206 
1207 	/* The script will be written at offset 0 */
1208 	ptr = dumper.ptr;
1209 
1210 	/* Start writing the data at offset 256k */
1211 	offset = dumper.iova + (256 * SZ_1K);
1212 
1213 	/* Count how many additional registers to get from the HLSQ aperture */
1214 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1215 		count += a5xx_hlsq_aperture_regs[i].count;
1216 
1217 	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1218 	if (!a5xx_state->hlsqregs)
1219 		return;
1220 
1221 	/* Build the crashdump script */
1222 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1223 		u32 type = a5xx_hlsq_aperture_regs[i].type;
1224 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1225 
1226 		/* Write the register to select the desired bank */
1227 		*ptr++ = ((u64) type << 8);
1228 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1229 			(1 << 21) | 1;
1230 
1231 		*ptr++ = offset;
1232 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1233 			| c;
1234 
1235 		offset += c * sizeof(u32);
1236 	}
1237 
1238 	/* Write two zeros to close off the script */
1239 	*ptr++ = 0;
1240 	*ptr++ = 0;
1241 
1242 	if (a5xx_crashdumper_run(gpu, &dumper)) {
1243 		kfree(a5xx_state->hlsqregs);
1244 		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1245 		return;
1246 	}
1247 
1248 	/* Copy the data from the crashdumper to the state */
1249 	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1250 		count * sizeof(u32));
1251 
1252 	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1253 }
1254 
1255 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1256 {
1257 	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1258 			GFP_KERNEL);
1259 
1260 	if (!a5xx_state)
1261 		return ERR_PTR(-ENOMEM);
1262 
1263 	/* Temporarily disable hardware clock gating before reading the hw */
1264 	a5xx_set_hwcg(gpu, false);
1265 
1266 	/* First get the generic state from the adreno core */
1267 	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1268 
1269 	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1270 
1271 	/* Get the HLSQ regs with the help of the crashdumper */
1272 	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1273 
1274 	a5xx_set_hwcg(gpu, true);
1275 
1276 	return &a5xx_state->base;
1277 }
1278 
1279 static void a5xx_gpu_state_destroy(struct kref *kref)
1280 {
1281 	struct msm_gpu_state *state = container_of(kref,
1282 		struct msm_gpu_state, ref);
1283 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1284 		struct a5xx_gpu_state, base);
1285 
1286 	kfree(a5xx_state->hlsqregs);
1287 
1288 	adreno_gpu_state_destroy(state);
1289 	kfree(a5xx_state);
1290 }
1291 
1292 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1293 {
1294 	if (IS_ERR_OR_NULL(state))
1295 		return 1;
1296 
1297 	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1298 }
1299 
1300 
1301 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1302 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1303 		struct drm_printer *p)
1304 {
1305 	int i, j;
1306 	u32 pos = 0;
1307 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1308 		struct a5xx_gpu_state, base);
1309 
1310 	if (IS_ERR_OR_NULL(state))
1311 		return;
1312 
1313 	adreno_show(gpu, state, p);
1314 
1315 	/* Dump the additional a5xx HLSQ registers */
1316 	if (!a5xx_state->hlsqregs)
1317 		return;
1318 
1319 	drm_printf(p, "registers-hlsq:\n");
1320 
1321 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1322 		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1323 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1324 
1325 		for (j = 0; j < c; j++, pos++, o++) {
1326 			/*
1327 			 * To keep the crashdump simple we pull the entire range
1328 			 * for each register type but not all of the registers
1329 			 * in the range are valid. Fortunately invalid registers
1330 			 * stick out like a sore thumb with a value of
1331 			 * 0xdeadbeef
1332 			 */
1333 			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1334 				continue;
1335 
1336 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1337 				o << 2, a5xx_state->hlsqregs[pos]);
1338 		}
1339 	}
1340 }
1341 #endif
1342 
1343 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1344 {
1345 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1346 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1347 
1348 	return a5xx_gpu->cur_ring;
1349 }
1350 
1351 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1352 {
1353 	u64 busy_cycles, busy_time;
1354 
1355 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1356 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1357 
1358 	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1359 	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1360 
1361 	gpu->devfreq.busy_cycles = busy_cycles;
1362 
1363 	if (WARN_ON(busy_time > ~0LU))
1364 		return ~0LU;
1365 
1366 	return (unsigned long)busy_time;
1367 }
1368 
1369 static const struct adreno_gpu_funcs funcs = {
1370 	.base = {
1371 		.get_param = adreno_get_param,
1372 		.hw_init = a5xx_hw_init,
1373 		.pm_suspend = a5xx_pm_suspend,
1374 		.pm_resume = a5xx_pm_resume,
1375 		.recover = a5xx_recover,
1376 		.submit = a5xx_submit,
1377 		.flush = a5xx_flush,
1378 		.active_ring = a5xx_active_ring,
1379 		.irq = a5xx_irq,
1380 		.destroy = a5xx_destroy,
1381 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1382 		.show = a5xx_show,
1383 #endif
1384 #if defined(CONFIG_DEBUG_FS)
1385 		.debugfs_init = a5xx_debugfs_init,
1386 #endif
1387 		.gpu_busy = a5xx_gpu_busy,
1388 		.gpu_state_get = a5xx_gpu_state_get,
1389 		.gpu_state_put = a5xx_gpu_state_put,
1390 	},
1391 	.get_timestamp = a5xx_get_timestamp,
1392 };
1393 
1394 static void check_speed_bin(struct device *dev)
1395 {
1396 	struct nvmem_cell *cell;
1397 	u32 bin, val;
1398 
1399 	cell = nvmem_cell_get(dev, "speed_bin");
1400 
1401 	/* If a nvmem cell isn't defined, nothing to do */
1402 	if (IS_ERR(cell))
1403 		return;
1404 
1405 	bin = *((u32 *) nvmem_cell_read(cell, NULL));
1406 	nvmem_cell_put(cell);
1407 
1408 	val = (1 << bin);
1409 
1410 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1411 }
1412 
1413 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1414 {
1415 	struct msm_drm_private *priv = dev->dev_private;
1416 	struct platform_device *pdev = priv->gpu_pdev;
1417 	struct a5xx_gpu *a5xx_gpu = NULL;
1418 	struct adreno_gpu *adreno_gpu;
1419 	struct msm_gpu *gpu;
1420 	int ret;
1421 
1422 	if (!pdev) {
1423 		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1424 		return ERR_PTR(-ENXIO);
1425 	}
1426 
1427 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1428 	if (!a5xx_gpu)
1429 		return ERR_PTR(-ENOMEM);
1430 
1431 	adreno_gpu = &a5xx_gpu->base;
1432 	gpu = &adreno_gpu->base;
1433 
1434 	adreno_gpu->registers = a5xx_registers;
1435 	adreno_gpu->reg_offsets = a5xx_register_offsets;
1436 
1437 	a5xx_gpu->lm_leakage = 0x4E001A;
1438 
1439 	check_speed_bin(&pdev->dev);
1440 
1441 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1442 	if (ret) {
1443 		a5xx_destroy(&(a5xx_gpu->base.base));
1444 		return ERR_PTR(ret);
1445 	}
1446 
1447 	if (gpu->aspace)
1448 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1449 
1450 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1451 	a5xx_preempt_init(gpu);
1452 
1453 	return gpu;
1454 }
1455