xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision 29c37341)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4 
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15 
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18 
19 #define GPU_PAS_ID 13
20 
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25 	uint32_t wptr;
26 	unsigned long flags;
27 
28 	spin_lock_irqsave(&ring->lock, flags);
29 
30 	/* Copy the shadow to the actual register */
31 	ring->cur = ring->next;
32 
33 	/* Make sure to wrap wptr if we need to */
34 	wptr = get_wptr(ring);
35 
36 	spin_unlock_irqrestore(&ring->lock, flags);
37 
38 	/* Make sure everything is posted before making a decision */
39 	mb();
40 
41 	/* Update HW if this is the current ring and we are not in preempt */
42 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44 }
45 
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 	struct msm_file_private *ctx)
48 {
49 	struct msm_drm_private *priv = gpu->dev->dev_private;
50 	struct msm_ringbuffer *ring = submit->ring;
51 	struct msm_gem_object *obj;
52 	uint32_t *ptr, dwords;
53 	unsigned int i;
54 
55 	for (i = 0; i < submit->nr_cmds; i++) {
56 		switch (submit->cmd[i].type) {
57 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58 			break;
59 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 			if (priv->lastctx == ctx)
61 				break;
62 			/* fall-thru */
63 		case MSM_SUBMIT_CMD_BUF:
64 			/* copy commands into RB: */
65 			obj = submit->bos[submit->cmd[i].idx].obj;
66 			dwords = submit->cmd[i].size;
67 
68 			ptr = msm_gem_get_vaddr(&obj->base);
69 
70 			/* _get_vaddr() shouldn't fail at this point,
71 			 * since we've already mapped it once in
72 			 * submit_reloc()
73 			 */
74 			if (WARN_ON(!ptr))
75 				return;
76 
77 			for (i = 0; i < dwords; i++) {
78 				/* normally the OUT_PKTn() would wait
79 				 * for space for the packet.  But since
80 				 * we just OUT_RING() the whole thing,
81 				 * need to call adreno_wait_ring()
82 				 * ourself:
83 				 */
84 				adreno_wait_ring(ring, 1);
85 				OUT_RING(ring, ptr[i]);
86 			}
87 
88 			msm_gem_put_vaddr(&obj->base);
89 
90 			break;
91 		}
92 	}
93 
94 	a5xx_flush(gpu, ring);
95 	a5xx_preempt_trigger(gpu);
96 
97 	/* we might not necessarily have a cmd from userspace to
98 	 * trigger an event to know that submit has completed, so
99 	 * do this manually:
100 	 */
101 	a5xx_idle(gpu, ring);
102 	ring->memptrs->fence = submit->seqno;
103 	msm_gpu_retire(gpu);
104 }
105 
106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107 	struct msm_file_private *ctx)
108 {
109 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111 	struct msm_drm_private *priv = gpu->dev->dev_private;
112 	struct msm_ringbuffer *ring = submit->ring;
113 	unsigned int i, ibs = 0;
114 
115 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116 		priv->lastctx = NULL;
117 		a5xx_submit_in_rb(gpu, submit, ctx);
118 		return;
119 	}
120 
121 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122 	OUT_RING(ring, 0x02);
123 
124 	/* Turn off protected mode to write to special registers */
125 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
126 	OUT_RING(ring, 0);
127 
128 	/* Set the save preemption record for the ring/command */
129 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
132 
133 	/* Turn back on protected mode */
134 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135 	OUT_RING(ring, 1);
136 
137 	/* Enable local preemption for finegrain preemption */
138 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139 	OUT_RING(ring, 0x02);
140 
141 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143 	OUT_RING(ring, 0x02);
144 
145 	/* Submit the commands */
146 	for (i = 0; i < submit->nr_cmds; i++) {
147 		switch (submit->cmd[i].type) {
148 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
149 			break;
150 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151 			if (priv->lastctx == ctx)
152 				break;
153 			/* fall-thru */
154 		case MSM_SUBMIT_CMD_BUF:
155 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158 			OUT_RING(ring, submit->cmd[i].size);
159 			ibs++;
160 			break;
161 		}
162 	}
163 
164 	/*
165 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
166 	 * are done rendering - otherwise a lucky preemption would start
167 	 * replaying from the last checkpoint
168 	 */
169 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
170 	OUT_RING(ring, 0);
171 	OUT_RING(ring, 0);
172 	OUT_RING(ring, 0);
173 	OUT_RING(ring, 0);
174 	OUT_RING(ring, 0);
175 
176 	/* Turn off IB level preemptions */
177 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178 	OUT_RING(ring, 0x01);
179 
180 	/* Write the fence to the scratch register */
181 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182 	OUT_RING(ring, submit->seqno);
183 
184 	/*
185 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
186 	 * timestamp is written to the memory and then triggers the interrupt
187 	 */
188 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
190 		CP_EVENT_WRITE_0_IRQ);
191 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
192 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
193 	OUT_RING(ring, submit->seqno);
194 
195 	/* Yield the floor on command completion */
196 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
197 	/*
198 	 * If dword[2:1] are non zero, they specify an address for the CP to
199 	 * write the value of dword[3] to on preemption complete. Write 0 to
200 	 * skip the write
201 	 */
202 	OUT_RING(ring, 0x00);
203 	OUT_RING(ring, 0x00);
204 	/* Data value - not used if the address above is 0 */
205 	OUT_RING(ring, 0x01);
206 	/* Set bit 0 to trigger an interrupt on preempt complete */
207 	OUT_RING(ring, 0x01);
208 
209 	a5xx_flush(gpu, ring);
210 
211 	/* Check to see if we need to start preemption */
212 	a5xx_preempt_trigger(gpu);
213 }
214 
215 static const struct {
216 	u32 offset;
217 	u32 value;
218 } a5xx_hwcg[] = {
219 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
220 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
221 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
222 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
223 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
224 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
225 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
226 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
227 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
228 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
229 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
230 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
231 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
232 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
233 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
234 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
235 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
236 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
237 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
238 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
239 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
240 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
241 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
242 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
243 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
244 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
245 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
246 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
247 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
248 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
249 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
250 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
251 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
252 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
253 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
254 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
255 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
256 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
257 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
258 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
259 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
260 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
261 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
262 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
263 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
264 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
265 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
266 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
267 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
268 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
269 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
270 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
271 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
272 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
273 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
274 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
275 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
276 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
277 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
278 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
279 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
280 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
281 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
282 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
283 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
284 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
285 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
286 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
287 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
288 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
289 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
290 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
291 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
292 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
293 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
294 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
295 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
296 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
297 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
298 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
299 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
300 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
301 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
302 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
303 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
304 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
305 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
306 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
307 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
308 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
309 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
310 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
311 };
312 
313 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
314 {
315 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
316 	unsigned int i;
317 
318 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
319 		gpu_write(gpu, a5xx_hwcg[i].offset,
320 			state ? a5xx_hwcg[i].value : 0);
321 
322 	if (adreno_is_a540(adreno_gpu)) {
323 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
324 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
325 	}
326 
327 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
328 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
329 }
330 
331 static int a5xx_me_init(struct msm_gpu *gpu)
332 {
333 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
334 	struct msm_ringbuffer *ring = gpu->rb[0];
335 
336 	OUT_PKT7(ring, CP_ME_INIT, 8);
337 
338 	OUT_RING(ring, 0x0000002F);
339 
340 	/* Enable multiple hardware contexts */
341 	OUT_RING(ring, 0x00000003);
342 
343 	/* Enable error detection */
344 	OUT_RING(ring, 0x20000000);
345 
346 	/* Don't enable header dump */
347 	OUT_RING(ring, 0x00000000);
348 	OUT_RING(ring, 0x00000000);
349 
350 	/* Specify workarounds for various microcode issues */
351 	if (adreno_is_a530(adreno_gpu)) {
352 		/* Workaround for token end syncs
353 		 * Force a WFI after every direct-render 3D mode draw and every
354 		 * 2D mode 3 draw
355 		 */
356 		OUT_RING(ring, 0x0000000B);
357 	} else if (adreno_is_a510(adreno_gpu)) {
358 		/* Workaround for token and syncs */
359 		OUT_RING(ring, 0x00000001);
360 	} else {
361 		/* No workarounds enabled */
362 		OUT_RING(ring, 0x00000000);
363 	}
364 
365 	OUT_RING(ring, 0x00000000);
366 	OUT_RING(ring, 0x00000000);
367 
368 	gpu->funcs->flush(gpu, ring);
369 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
370 }
371 
372 static int a5xx_preempt_start(struct msm_gpu *gpu)
373 {
374 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
376 	struct msm_ringbuffer *ring = gpu->rb[0];
377 
378 	if (gpu->nr_rings == 1)
379 		return 0;
380 
381 	/* Turn off protected mode to write to special registers */
382 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
383 	OUT_RING(ring, 0);
384 
385 	/* Set the save preemption record for the ring/command */
386 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
387 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
388 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
389 
390 	/* Turn back on protected mode */
391 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
392 	OUT_RING(ring, 1);
393 
394 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
395 	OUT_RING(ring, 0x00);
396 
397 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
398 	OUT_RING(ring, 0x01);
399 
400 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
401 	OUT_RING(ring, 0x01);
402 
403 	/* Yield the floor on command completion */
404 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
405 	OUT_RING(ring, 0x00);
406 	OUT_RING(ring, 0x00);
407 	OUT_RING(ring, 0x01);
408 	OUT_RING(ring, 0x01);
409 
410 	gpu->funcs->flush(gpu, ring);
411 
412 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
413 }
414 
415 static int a5xx_ucode_init(struct msm_gpu *gpu)
416 {
417 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
418 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
419 	int ret;
420 
421 	if (!a5xx_gpu->pm4_bo) {
422 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
423 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
424 
425 
426 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
427 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
428 			a5xx_gpu->pm4_bo = NULL;
429 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
430 				ret);
431 			return ret;
432 		}
433 
434 		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
435 	}
436 
437 	if (!a5xx_gpu->pfp_bo) {
438 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
439 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
440 
441 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
442 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
443 			a5xx_gpu->pfp_bo = NULL;
444 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
445 				ret);
446 			return ret;
447 		}
448 
449 		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
450 	}
451 
452 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
453 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
454 
455 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
456 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
457 
458 	return 0;
459 }
460 
461 #define SCM_GPU_ZAP_SHADER_RESUME 0
462 
463 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
464 {
465 	int ret;
466 
467 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
468 	if (ret)
469 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
470 			gpu->name, ret);
471 
472 	return ret;
473 }
474 
475 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
476 {
477 	static bool loaded;
478 	int ret;
479 
480 	/*
481 	 * If the zap shader is already loaded into memory we just need to kick
482 	 * the remote processor to reinitialize it
483 	 */
484 	if (loaded)
485 		return a5xx_zap_shader_resume(gpu);
486 
487 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
488 
489 	loaded = !ret;
490 	return ret;
491 }
492 
493 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
494 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
495 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
496 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
497 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
498 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
499 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
500 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
501 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
502 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
503 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
504 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
505 
506 static int a5xx_hw_init(struct msm_gpu *gpu)
507 {
508 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
509 	int ret;
510 
511 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
512 
513 	if (adreno_is_a540(adreno_gpu))
514 		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
515 
516 	/* Make all blocks contribute to the GPU BUSY perf counter */
517 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
518 
519 	/* Enable RBBM error reporting bits */
520 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
521 
522 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
523 		/*
524 		 * Mask out the activity signals from RB1-3 to avoid false
525 		 * positives
526 		 */
527 
528 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
529 			0xF0000000);
530 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
531 			0xFFFFFFFF);
532 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
533 			0xFFFFFFFF);
534 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
535 			0xFFFFFFFF);
536 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
537 			0xFFFFFFFF);
538 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
539 			0xFFFFFFFF);
540 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
541 			0xFFFFFFFF);
542 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
543 			0xFFFFFFFF);
544 	}
545 
546 	/* Enable fault detection */
547 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
548 		(1 << 30) | 0xFFFF);
549 
550 	/* Turn on performance counters */
551 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
552 
553 	/* Select CP0 to always count cycles */
554 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
555 
556 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
557 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
558 
559 	/* Increase VFD cache access so LRZ and other data gets evicted less */
560 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
561 
562 	/* Disable L2 bypass in the UCHE */
563 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
564 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
565 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
566 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
567 
568 	/* Set the GMEM VA range (0 to gpu->gmem) */
569 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
570 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
571 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
572 		0x00100000 + adreno_gpu->gmem - 1);
573 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
574 
575 	if (adreno_is_a510(adreno_gpu)) {
576 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
577 		gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
578 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
579 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
580 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
581 			  (0x200 << 11 | 0x200 << 22));
582 	} else {
583 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
584 		if (adreno_is_a530(adreno_gpu))
585 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
586 		if (adreno_is_a540(adreno_gpu))
587 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
588 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
589 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
590 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
591 			  (0x400 << 11 | 0x300 << 22));
592 	}
593 
594 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
595 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
596 
597 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
598 
599 	/* Enable USE_RETENTION_FLOPS */
600 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
601 
602 	/* Enable ME/PFP split notification */
603 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
604 
605 	/*
606 	 *  In A5x, CCU can send context_done event of a particular context to
607 	 *  UCHE which ultimately reaches CP even when there is valid
608 	 *  transaction of that context inside CCU. This can let CP to program
609 	 *  config registers, which will make the "valid transaction" inside
610 	 *  CCU to be interpreted differently. This can cause gpu fault. This
611 	 *  bug is fixed in latest A510 revision. To enable this bug fix -
612 	 *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
613 	 *  (disable). For older A510 version this bit is unused.
614 	 */
615 	if (adreno_is_a510(adreno_gpu))
616 		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
617 
618 	/* Enable HWCG */
619 	a5xx_set_hwcg(gpu, true);
620 
621 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
622 
623 	/* Set the highest bank bit */
624 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
625 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
626 	if (adreno_is_a540(adreno_gpu))
627 		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
628 
629 	/* Protect registers from the CP */
630 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
631 
632 	/* RBBM */
633 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
634 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
635 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
636 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
637 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
638 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
639 
640 	/* Content protect */
641 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
642 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
643 			16));
644 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
645 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
646 
647 	/* CP */
648 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
649 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
650 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
651 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
652 
653 	/* RB */
654 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
655 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
656 
657 	/* VPC */
658 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
659 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
660 
661 	/* UCHE */
662 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
663 
664 	if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
665 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
666 			ADRENO_PROTECT_RW(0x10000, 0x8000));
667 
668 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
669 	/*
670 	 * Disable the trusted memory range - we don't actually supported secure
671 	 * memory rendering at this point in time and we don't want to block off
672 	 * part of the virtual memory space.
673 	 */
674 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
675 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
676 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
677 
678 	/* Put the GPU into 64 bit by default */
679 	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
680 	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
681 	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
682 	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
683 	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
684 	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
685 	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
686 	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
687 	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
688 	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
689 	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
690 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
691 
692 	/*
693 	 * VPC corner case with local memory load kill leads to corrupt
694 	 * internal state. Normal Disable does not work for all a5x chips.
695 	 * So do the following setting to disable it.
696 	 */
697 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
698 		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
699 		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
700 	}
701 
702 	ret = adreno_hw_init(gpu);
703 	if (ret)
704 		return ret;
705 
706 	a5xx_preempt_hw_init(gpu);
707 
708 	if (!adreno_is_a510(adreno_gpu))
709 		a5xx_gpmu_ucode_init(gpu);
710 
711 	ret = a5xx_ucode_init(gpu);
712 	if (ret)
713 		return ret;
714 
715 	/* Disable the interrupts through the initial bringup stage */
716 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
717 
718 	/* Clear ME_HALT to start the micro engine */
719 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
720 	ret = a5xx_me_init(gpu);
721 	if (ret)
722 		return ret;
723 
724 	ret = a5xx_power_init(gpu);
725 	if (ret)
726 		return ret;
727 
728 	/*
729 	 * Send a pipeline event stat to get misbehaving counters to start
730 	 * ticking correctly
731 	 */
732 	if (adreno_is_a530(adreno_gpu)) {
733 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
734 		OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
735 
736 		gpu->funcs->flush(gpu, gpu->rb[0]);
737 		if (!a5xx_idle(gpu, gpu->rb[0]))
738 			return -EINVAL;
739 	}
740 
741 	/*
742 	 * If the chip that we are using does support loading one, then
743 	 * try to load a zap shader into the secure world. If successful
744 	 * we can use the CP to switch out of secure mode. If not then we
745 	 * have no resource but to try to switch ourselves out manually. If we
746 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
747 	 * be blocked and a permissions violation will soon follow.
748 	 */
749 	ret = a5xx_zap_shader_init(gpu);
750 	if (!ret) {
751 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
752 		OUT_RING(gpu->rb[0], 0x00000000);
753 
754 		gpu->funcs->flush(gpu, gpu->rb[0]);
755 		if (!a5xx_idle(gpu, gpu->rb[0]))
756 			return -EINVAL;
757 	} else if (ret == -ENODEV) {
758 		/*
759 		 * This device does not use zap shader (but print a warning
760 		 * just in case someone got their dt wrong.. hopefully they
761 		 * have a debug UART to realize the error of their ways...
762 		 * if you mess this up you are about to crash horribly)
763 		 */
764 		dev_warn_once(gpu->dev->dev,
765 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
766 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
767 	} else {
768 		return ret;
769 	}
770 
771 	/* Last step - yield the ringbuffer */
772 	a5xx_preempt_start(gpu);
773 
774 	return 0;
775 }
776 
777 static void a5xx_recover(struct msm_gpu *gpu)
778 {
779 	int i;
780 
781 	adreno_dump_info(gpu);
782 
783 	for (i = 0; i < 8; i++) {
784 		printk("CP_SCRATCH_REG%d: %u\n", i,
785 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
786 	}
787 
788 	if (hang_debug)
789 		a5xx_dump(gpu);
790 
791 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
792 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
793 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
794 	adreno_recover(gpu);
795 }
796 
797 static void a5xx_destroy(struct msm_gpu *gpu)
798 {
799 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
800 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
801 
802 	DBG("%s", gpu->name);
803 
804 	a5xx_preempt_fini(gpu);
805 
806 	if (a5xx_gpu->pm4_bo) {
807 		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
808 		drm_gem_object_put(a5xx_gpu->pm4_bo);
809 	}
810 
811 	if (a5xx_gpu->pfp_bo) {
812 		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
813 		drm_gem_object_put(a5xx_gpu->pfp_bo);
814 	}
815 
816 	if (a5xx_gpu->gpmu_bo) {
817 		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
818 		drm_gem_object_put(a5xx_gpu->gpmu_bo);
819 	}
820 
821 	adreno_gpu_cleanup(adreno_gpu);
822 	kfree(a5xx_gpu);
823 }
824 
825 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
826 {
827 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
828 		return false;
829 
830 	/*
831 	 * Nearly every abnormality ends up pausing the GPU and triggering a
832 	 * fault so we can safely just watch for this one interrupt to fire
833 	 */
834 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
835 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
836 }
837 
838 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
839 {
840 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
841 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
842 
843 	if (ring != a5xx_gpu->cur_ring) {
844 		WARN(1, "Tried to idle a non-current ringbuffer\n");
845 		return false;
846 	}
847 
848 	/* wait for CP to drain ringbuffer: */
849 	if (!adreno_idle(gpu, ring))
850 		return false;
851 
852 	if (spin_until(_a5xx_check_idle(gpu))) {
853 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
854 			gpu->name, __builtin_return_address(0),
855 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
856 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
857 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
858 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
859 		return false;
860 	}
861 
862 	return true;
863 }
864 
865 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
866 {
867 	struct msm_gpu *gpu = arg;
868 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
869 			iova, flags,
870 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
871 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
872 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
873 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
874 
875 	return -EFAULT;
876 }
877 
878 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
879 {
880 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
881 
882 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
883 		u32 val;
884 
885 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
886 
887 		/*
888 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
889 		 * read it twice
890 		 */
891 
892 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
893 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
894 
895 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
896 			val);
897 	}
898 
899 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
900 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
901 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
902 
903 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
904 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
905 
906 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
907 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
908 
909 		dev_err_ratelimited(gpu->dev->dev,
910 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
911 			val & (1 << 24) ? "WRITE" : "READ",
912 			(val & 0xFFFFF) >> 2, val);
913 	}
914 
915 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
916 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
917 		const char *access[16] = { "reserved", "reserved",
918 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
919 			"", "", "me read", "me write", "", "", "crashdump read",
920 			"crashdump write" };
921 
922 		dev_err_ratelimited(gpu->dev->dev,
923 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
924 			status & 0xFFFFF, access[(status >> 24) & 0xF],
925 			(status & (1 << 31)), status);
926 	}
927 }
928 
929 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
930 {
931 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
932 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
933 
934 		dev_err_ratelimited(gpu->dev->dev,
935 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
936 			val & (1 << 28) ? "WRITE" : "READ",
937 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
938 			(val >> 24) & 0xF);
939 
940 		/* Clear the error */
941 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
942 
943 		/* Clear the interrupt */
944 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
945 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
946 	}
947 
948 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
949 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
950 
951 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
952 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
953 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
954 
955 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
956 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
957 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
958 
959 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
960 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
961 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
962 
963 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
964 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
965 
966 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
967 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
968 }
969 
970 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
971 {
972 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
973 
974 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
975 
976 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
977 		addr);
978 }
979 
980 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
981 {
982 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
983 }
984 
985 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
986 {
987 	struct drm_device *dev = gpu->dev;
988 	struct msm_drm_private *priv = dev->dev_private;
989 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
990 
991 	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
992 		ring ? ring->id : -1, ring ? ring->seqno : 0,
993 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
994 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
995 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
996 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
997 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
998 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
999 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1000 
1001 	/* Turn off the hangcheck timer to keep it from bothering us */
1002 	del_timer(&gpu->hangcheck_timer);
1003 
1004 	queue_work(priv->wq, &gpu->recover_work);
1005 }
1006 
1007 #define RBBM_ERROR_MASK \
1008 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1009 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1010 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1011 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1012 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1013 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1014 
1015 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1016 {
1017 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1018 
1019 	/*
1020 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1021 	 * before the source is cleared the interrupt will storm.
1022 	 */
1023 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1024 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1025 
1026 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1027 	if (status & RBBM_ERROR_MASK)
1028 		a5xx_rbbm_err_irq(gpu, status);
1029 
1030 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1031 		a5xx_cp_err_irq(gpu);
1032 
1033 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1034 		a5xx_fault_detect_irq(gpu);
1035 
1036 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1037 		a5xx_uche_err_irq(gpu);
1038 
1039 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1040 		a5xx_gpmu_err_irq(gpu);
1041 
1042 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1043 		a5xx_preempt_trigger(gpu);
1044 		msm_gpu_retire(gpu);
1045 	}
1046 
1047 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1048 		a5xx_preempt_irq(gpu);
1049 
1050 	return IRQ_HANDLED;
1051 }
1052 
1053 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1054 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1055 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1056 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1057 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1058 		REG_A5XX_CP_RB_RPTR_ADDR_HI),
1059 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1060 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1061 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1062 };
1063 
1064 static const u32 a5xx_registers[] = {
1065 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1066 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1067 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1068 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1069 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1070 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1071 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1072 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1073 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1074 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1075 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1076 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1077 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1078 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1079 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1080 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1081 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1082 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1083 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1084 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1085 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1086 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1087 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1088 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1089 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1090 	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1091 	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1092 	0xAC60, 0xAC60, ~0,
1093 };
1094 
1095 static void a5xx_dump(struct msm_gpu *gpu)
1096 {
1097 	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1098 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1099 	adreno_dump(gpu);
1100 }
1101 
1102 static int a5xx_pm_resume(struct msm_gpu *gpu)
1103 {
1104 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1105 	int ret;
1106 
1107 	/* Turn on the core power */
1108 	ret = msm_gpu_pm_resume(gpu);
1109 	if (ret)
1110 		return ret;
1111 
1112 	if (adreno_is_a510(adreno_gpu)) {
1113 		/* Halt the sp_input_clk at HM level */
1114 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1115 		a5xx_set_hwcg(gpu, true);
1116 		/* Turn on sp_input_clk at HM level */
1117 		gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1118 		return 0;
1119 	}
1120 
1121 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1122 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1123 
1124 	/* Wait 3 usecs before polling */
1125 	udelay(3);
1126 
1127 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1128 		(1 << 20), (1 << 20));
1129 	if (ret) {
1130 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1131 			gpu->name,
1132 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1133 		return ret;
1134 	}
1135 
1136 	/* Turn on the SP domain */
1137 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1138 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1139 		(1 << 20), (1 << 20));
1140 	if (ret)
1141 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1142 			gpu->name);
1143 
1144 	return ret;
1145 }
1146 
1147 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1148 {
1149 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1150 	u32 mask = 0xf;
1151 
1152 	/* A510 has 3 XIN ports in VBIF */
1153 	if (adreno_is_a510(adreno_gpu))
1154 		mask = 0x7;
1155 
1156 	/* Clear the VBIF pipe before shutting down */
1157 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1158 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1159 				mask) == mask);
1160 
1161 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1162 
1163 	/*
1164 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1165 	 * entries
1166 	 */
1167 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1168 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1169 
1170 	return msm_gpu_pm_suspend(gpu);
1171 }
1172 
1173 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1174 {
1175 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1176 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1177 
1178 	return 0;
1179 }
1180 
1181 struct a5xx_crashdumper {
1182 	void *ptr;
1183 	struct drm_gem_object *bo;
1184 	u64 iova;
1185 };
1186 
1187 struct a5xx_gpu_state {
1188 	struct msm_gpu_state base;
1189 	u32 *hlsqregs;
1190 };
1191 
1192 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1193 		struct a5xx_crashdumper *dumper)
1194 {
1195 	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1196 		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1197 		&dumper->bo, &dumper->iova);
1198 
1199 	if (!IS_ERR(dumper->ptr))
1200 		msm_gem_object_set_name(dumper->bo, "crashdump");
1201 
1202 	return PTR_ERR_OR_ZERO(dumper->ptr);
1203 }
1204 
1205 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1206 		struct a5xx_crashdumper *dumper)
1207 {
1208 	u32 val;
1209 
1210 	if (IS_ERR_OR_NULL(dumper->ptr))
1211 		return -EINVAL;
1212 
1213 	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1214 		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1215 
1216 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1217 
1218 	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1219 		val & 0x04, 100, 10000);
1220 }
1221 
1222 /*
1223  * These are a list of the registers that need to be read through the HLSQ
1224  * aperture through the crashdumper.  These are not nominally accessible from
1225  * the CPU on a secure platform.
1226  */
1227 static const struct {
1228 	u32 type;
1229 	u32 regoffset;
1230 	u32 count;
1231 } a5xx_hlsq_aperture_regs[] = {
1232 	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1233 	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1234 	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1235 	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1236 	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1237 	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1238 	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1239 	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1240 	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1241 	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1242 	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1243 	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1244 	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1245 	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1246 	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1247 };
1248 
1249 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1250 		struct a5xx_gpu_state *a5xx_state)
1251 {
1252 	struct a5xx_crashdumper dumper = { 0 };
1253 	u32 offset, count = 0;
1254 	u64 *ptr;
1255 	int i;
1256 
1257 	if (a5xx_crashdumper_init(gpu, &dumper))
1258 		return;
1259 
1260 	/* The script will be written at offset 0 */
1261 	ptr = dumper.ptr;
1262 
1263 	/* Start writing the data at offset 256k */
1264 	offset = dumper.iova + (256 * SZ_1K);
1265 
1266 	/* Count how many additional registers to get from the HLSQ aperture */
1267 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1268 		count += a5xx_hlsq_aperture_regs[i].count;
1269 
1270 	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1271 	if (!a5xx_state->hlsqregs)
1272 		return;
1273 
1274 	/* Build the crashdump script */
1275 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1276 		u32 type = a5xx_hlsq_aperture_regs[i].type;
1277 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1278 
1279 		/* Write the register to select the desired bank */
1280 		*ptr++ = ((u64) type << 8);
1281 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1282 			(1 << 21) | 1;
1283 
1284 		*ptr++ = offset;
1285 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1286 			| c;
1287 
1288 		offset += c * sizeof(u32);
1289 	}
1290 
1291 	/* Write two zeros to close off the script */
1292 	*ptr++ = 0;
1293 	*ptr++ = 0;
1294 
1295 	if (a5xx_crashdumper_run(gpu, &dumper)) {
1296 		kfree(a5xx_state->hlsqregs);
1297 		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1298 		return;
1299 	}
1300 
1301 	/* Copy the data from the crashdumper to the state */
1302 	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1303 		count * sizeof(u32));
1304 
1305 	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1306 }
1307 
1308 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1309 {
1310 	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1311 			GFP_KERNEL);
1312 
1313 	if (!a5xx_state)
1314 		return ERR_PTR(-ENOMEM);
1315 
1316 	/* Temporarily disable hardware clock gating before reading the hw */
1317 	a5xx_set_hwcg(gpu, false);
1318 
1319 	/* First get the generic state from the adreno core */
1320 	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1321 
1322 	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1323 
1324 	/* Get the HLSQ regs with the help of the crashdumper */
1325 	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1326 
1327 	a5xx_set_hwcg(gpu, true);
1328 
1329 	return &a5xx_state->base;
1330 }
1331 
1332 static void a5xx_gpu_state_destroy(struct kref *kref)
1333 {
1334 	struct msm_gpu_state *state = container_of(kref,
1335 		struct msm_gpu_state, ref);
1336 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1337 		struct a5xx_gpu_state, base);
1338 
1339 	kfree(a5xx_state->hlsqregs);
1340 
1341 	adreno_gpu_state_destroy(state);
1342 	kfree(a5xx_state);
1343 }
1344 
1345 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1346 {
1347 	if (IS_ERR_OR_NULL(state))
1348 		return 1;
1349 
1350 	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1351 }
1352 
1353 
1354 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1355 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1356 		      struct drm_printer *p)
1357 {
1358 	int i, j;
1359 	u32 pos = 0;
1360 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1361 		struct a5xx_gpu_state, base);
1362 
1363 	if (IS_ERR_OR_NULL(state))
1364 		return;
1365 
1366 	adreno_show(gpu, state, p);
1367 
1368 	/* Dump the additional a5xx HLSQ registers */
1369 	if (!a5xx_state->hlsqregs)
1370 		return;
1371 
1372 	drm_printf(p, "registers-hlsq:\n");
1373 
1374 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1375 		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1376 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1377 
1378 		for (j = 0; j < c; j++, pos++, o++) {
1379 			/*
1380 			 * To keep the crashdump simple we pull the entire range
1381 			 * for each register type but not all of the registers
1382 			 * in the range are valid. Fortunately invalid registers
1383 			 * stick out like a sore thumb with a value of
1384 			 * 0xdeadbeef
1385 			 */
1386 			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1387 				continue;
1388 
1389 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1390 				o << 2, a5xx_state->hlsqregs[pos]);
1391 		}
1392 	}
1393 }
1394 #endif
1395 
1396 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1397 {
1398 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1399 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1400 
1401 	return a5xx_gpu->cur_ring;
1402 }
1403 
1404 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1405 {
1406 	u64 busy_cycles, busy_time;
1407 
1408 	/* Only read the gpu busy if the hardware is already active */
1409 	if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1410 		return 0;
1411 
1412 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1413 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1414 
1415 	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1416 	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1417 
1418 	gpu->devfreq.busy_cycles = busy_cycles;
1419 
1420 	pm_runtime_put(&gpu->pdev->dev);
1421 
1422 	if (WARN_ON(busy_time > ~0LU))
1423 		return ~0LU;
1424 
1425 	return (unsigned long)busy_time;
1426 }
1427 
1428 static const struct adreno_gpu_funcs funcs = {
1429 	.base = {
1430 		.get_param = adreno_get_param,
1431 		.hw_init = a5xx_hw_init,
1432 		.pm_suspend = a5xx_pm_suspend,
1433 		.pm_resume = a5xx_pm_resume,
1434 		.recover = a5xx_recover,
1435 		.submit = a5xx_submit,
1436 		.flush = a5xx_flush,
1437 		.active_ring = a5xx_active_ring,
1438 		.irq = a5xx_irq,
1439 		.destroy = a5xx_destroy,
1440 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1441 		.show = a5xx_show,
1442 #endif
1443 #if defined(CONFIG_DEBUG_FS)
1444 		.debugfs_init = a5xx_debugfs_init,
1445 #endif
1446 		.gpu_busy = a5xx_gpu_busy,
1447 		.gpu_state_get = a5xx_gpu_state_get,
1448 		.gpu_state_put = a5xx_gpu_state_put,
1449 		.create_address_space = adreno_iommu_create_address_space,
1450 	},
1451 	.get_timestamp = a5xx_get_timestamp,
1452 };
1453 
1454 static void check_speed_bin(struct device *dev)
1455 {
1456 	struct nvmem_cell *cell;
1457 	u32 val;
1458 
1459 	/*
1460 	 * If the OPP table specifies a opp-supported-hw property then we have
1461 	 * to set something with dev_pm_opp_set_supported_hw() or the table
1462 	 * doesn't get populated so pick an arbitrary value that should
1463 	 * ensure the default frequencies are selected but not conflict with any
1464 	 * actual bins
1465 	 */
1466 	val = 0x80;
1467 
1468 	cell = nvmem_cell_get(dev, "speed_bin");
1469 
1470 	if (!IS_ERR(cell)) {
1471 		void *buf = nvmem_cell_read(cell, NULL);
1472 
1473 		if (!IS_ERR(buf)) {
1474 			u8 bin = *((u8 *) buf);
1475 
1476 			val = (1 << bin);
1477 			kfree(buf);
1478 		}
1479 
1480 		nvmem_cell_put(cell);
1481 	}
1482 
1483 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1484 }
1485 
1486 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1487 {
1488 	struct msm_drm_private *priv = dev->dev_private;
1489 	struct platform_device *pdev = priv->gpu_pdev;
1490 	struct a5xx_gpu *a5xx_gpu = NULL;
1491 	struct adreno_gpu *adreno_gpu;
1492 	struct msm_gpu *gpu;
1493 	int ret;
1494 
1495 	if (!pdev) {
1496 		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1497 		return ERR_PTR(-ENXIO);
1498 	}
1499 
1500 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1501 	if (!a5xx_gpu)
1502 		return ERR_PTR(-ENOMEM);
1503 
1504 	adreno_gpu = &a5xx_gpu->base;
1505 	gpu = &adreno_gpu->base;
1506 
1507 	adreno_gpu->registers = a5xx_registers;
1508 	adreno_gpu->reg_offsets = a5xx_register_offsets;
1509 
1510 	a5xx_gpu->lm_leakage = 0x4E001A;
1511 
1512 	check_speed_bin(&pdev->dev);
1513 
1514 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1515 	if (ret) {
1516 		a5xx_destroy(&(a5xx_gpu->base.base));
1517 		return ERR_PTR(ret);
1518 	}
1519 
1520 	if (gpu->aspace)
1521 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1522 
1523 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1524 	a5xx_preempt_init(gpu);
1525 
1526 	return gpu;
1527 }
1528