xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision a89aa749ece9c6fee7932163472d2ee0efd6ddd3)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4 
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15 
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18 
19 #define GPU_PAS_ID 13
20 
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25 	uint32_t wptr;
26 	unsigned long flags;
27 
28 	spin_lock_irqsave(&ring->lock, flags);
29 
30 	/* Copy the shadow to the actual register */
31 	ring->cur = ring->next;
32 
33 	/* Make sure to wrap wptr if we need to */
34 	wptr = get_wptr(ring);
35 
36 	spin_unlock_irqrestore(&ring->lock, flags);
37 
38 	/* Make sure everything is posted before making a decision */
39 	mb();
40 
41 	/* Update HW if this is the current ring and we are not in preempt */
42 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44 }
45 
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 	struct msm_file_private *ctx)
48 {
49 	struct msm_drm_private *priv = gpu->dev->dev_private;
50 	struct msm_ringbuffer *ring = submit->ring;
51 	struct msm_gem_object *obj;
52 	uint32_t *ptr, dwords;
53 	unsigned int i;
54 
55 	for (i = 0; i < submit->nr_cmds; i++) {
56 		switch (submit->cmd[i].type) {
57 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58 			break;
59 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 			if (priv->lastctx == ctx)
61 				break;
62 			/* fall-thru */
63 		case MSM_SUBMIT_CMD_BUF:
64 			/* copy commands into RB: */
65 			obj = submit->bos[submit->cmd[i].idx].obj;
66 			dwords = submit->cmd[i].size;
67 
68 			ptr = msm_gem_get_vaddr(&obj->base);
69 
70 			/* _get_vaddr() shouldn't fail at this point,
71 			 * since we've already mapped it once in
72 			 * submit_reloc()
73 			 */
74 			if (WARN_ON(!ptr))
75 				return;
76 
77 			for (i = 0; i < dwords; i++) {
78 				/* normally the OUT_PKTn() would wait
79 				 * for space for the packet.  But since
80 				 * we just OUT_RING() the whole thing,
81 				 * need to call adreno_wait_ring()
82 				 * ourself:
83 				 */
84 				adreno_wait_ring(ring, 1);
85 				OUT_RING(ring, ptr[i]);
86 			}
87 
88 			msm_gem_put_vaddr(&obj->base);
89 
90 			break;
91 		}
92 	}
93 
94 	a5xx_flush(gpu, ring);
95 	a5xx_preempt_trigger(gpu);
96 
97 	/* we might not necessarily have a cmd from userspace to
98 	 * trigger an event to know that submit has completed, so
99 	 * do this manually:
100 	 */
101 	a5xx_idle(gpu, ring);
102 	ring->memptrs->fence = submit->seqno;
103 	msm_gpu_retire(gpu);
104 }
105 
106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107 	struct msm_file_private *ctx)
108 {
109 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111 	struct msm_drm_private *priv = gpu->dev->dev_private;
112 	struct msm_ringbuffer *ring = submit->ring;
113 	unsigned int i, ibs = 0;
114 
115 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116 		priv->lastctx = NULL;
117 		a5xx_submit_in_rb(gpu, submit, ctx);
118 		return;
119 	}
120 
121 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122 	OUT_RING(ring, 0x02);
123 
124 	/* Turn off protected mode to write to special registers */
125 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
126 	OUT_RING(ring, 0);
127 
128 	/* Set the save preemption record for the ring/command */
129 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
132 
133 	/* Turn back on protected mode */
134 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135 	OUT_RING(ring, 1);
136 
137 	/* Enable local preemption for finegrain preemption */
138 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139 	OUT_RING(ring, 0x02);
140 
141 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143 	OUT_RING(ring, 0x02);
144 
145 	/* Submit the commands */
146 	for (i = 0; i < submit->nr_cmds; i++) {
147 		switch (submit->cmd[i].type) {
148 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
149 			break;
150 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151 			if (priv->lastctx == ctx)
152 				break;
153 			/* fall-thru */
154 		case MSM_SUBMIT_CMD_BUF:
155 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158 			OUT_RING(ring, submit->cmd[i].size);
159 			ibs++;
160 			break;
161 		}
162 	}
163 
164 	/*
165 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
166 	 * are done rendering - otherwise a lucky preemption would start
167 	 * replaying from the last checkpoint
168 	 */
169 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
170 	OUT_RING(ring, 0);
171 	OUT_RING(ring, 0);
172 	OUT_RING(ring, 0);
173 	OUT_RING(ring, 0);
174 	OUT_RING(ring, 0);
175 
176 	/* Turn off IB level preemptions */
177 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178 	OUT_RING(ring, 0x01);
179 
180 	/* Write the fence to the scratch register */
181 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182 	OUT_RING(ring, submit->seqno);
183 
184 	/*
185 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
186 	 * timestamp is written to the memory and then triggers the interrupt
187 	 */
188 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189 	OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
190 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
191 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
192 	OUT_RING(ring, submit->seqno);
193 
194 	/* Yield the floor on command completion */
195 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
196 	/*
197 	 * If dword[2:1] are non zero, they specify an address for the CP to
198 	 * write the value of dword[3] to on preemption complete. Write 0 to
199 	 * skip the write
200 	 */
201 	OUT_RING(ring, 0x00);
202 	OUT_RING(ring, 0x00);
203 	/* Data value - not used if the address above is 0 */
204 	OUT_RING(ring, 0x01);
205 	/* Set bit 0 to trigger an interrupt on preempt complete */
206 	OUT_RING(ring, 0x01);
207 
208 	a5xx_flush(gpu, ring);
209 
210 	/* Check to see if we need to start preemption */
211 	a5xx_preempt_trigger(gpu);
212 }
213 
214 static const struct {
215 	u32 offset;
216 	u32 value;
217 } a5xx_hwcg[] = {
218 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
219 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
220 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
221 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
222 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
223 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
224 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
225 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
226 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
227 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
228 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
229 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
230 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
231 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
232 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
233 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
234 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
235 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
236 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
237 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
238 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
239 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
240 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
241 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
242 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
243 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
244 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
245 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
246 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
247 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
248 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
249 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
250 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
251 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
252 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
253 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
254 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
255 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
256 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
257 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
258 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
259 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
260 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
261 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
262 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
263 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
264 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
265 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
266 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
267 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
268 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
269 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
270 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
271 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
272 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
273 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
274 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
275 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
276 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
277 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
278 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
279 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
280 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
281 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
282 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
283 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
284 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
285 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
286 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
287 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
288 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
289 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
290 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
291 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
292 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
293 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
294 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
295 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
296 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
297 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
298 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
299 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
300 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
301 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
302 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
303 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
304 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
305 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
306 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
307 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
308 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
309 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
310 };
311 
312 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
313 {
314 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
315 	unsigned int i;
316 
317 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
318 		gpu_write(gpu, a5xx_hwcg[i].offset,
319 			state ? a5xx_hwcg[i].value : 0);
320 
321 	if (adreno_is_a540(adreno_gpu)) {
322 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
323 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
324 	}
325 
326 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
327 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
328 }
329 
330 static int a5xx_me_init(struct msm_gpu *gpu)
331 {
332 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
333 	struct msm_ringbuffer *ring = gpu->rb[0];
334 
335 	OUT_PKT7(ring, CP_ME_INIT, 8);
336 
337 	OUT_RING(ring, 0x0000002F);
338 
339 	/* Enable multiple hardware contexts */
340 	OUT_RING(ring, 0x00000003);
341 
342 	/* Enable error detection */
343 	OUT_RING(ring, 0x20000000);
344 
345 	/* Don't enable header dump */
346 	OUT_RING(ring, 0x00000000);
347 	OUT_RING(ring, 0x00000000);
348 
349 	/* Specify workarounds for various microcode issues */
350 	if (adreno_is_a530(adreno_gpu)) {
351 		/* Workaround for token end syncs
352 		 * Force a WFI after every direct-render 3D mode draw and every
353 		 * 2D mode 3 draw
354 		 */
355 		OUT_RING(ring, 0x0000000B);
356 	} else if (adreno_is_a510(adreno_gpu)) {
357 		/* Workaround for token and syncs */
358 		OUT_RING(ring, 0x00000001);
359 	} else {
360 		/* No workarounds enabled */
361 		OUT_RING(ring, 0x00000000);
362 	}
363 
364 	OUT_RING(ring, 0x00000000);
365 	OUT_RING(ring, 0x00000000);
366 
367 	gpu->funcs->flush(gpu, ring);
368 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
369 }
370 
371 static int a5xx_preempt_start(struct msm_gpu *gpu)
372 {
373 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
374 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
375 	struct msm_ringbuffer *ring = gpu->rb[0];
376 
377 	if (gpu->nr_rings == 1)
378 		return 0;
379 
380 	/* Turn off protected mode to write to special registers */
381 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
382 	OUT_RING(ring, 0);
383 
384 	/* Set the save preemption record for the ring/command */
385 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
386 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
387 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
388 
389 	/* Turn back on protected mode */
390 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
391 	OUT_RING(ring, 1);
392 
393 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
394 	OUT_RING(ring, 0x00);
395 
396 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
397 	OUT_RING(ring, 0x01);
398 
399 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
400 	OUT_RING(ring, 0x01);
401 
402 	/* Yield the floor on command completion */
403 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
404 	OUT_RING(ring, 0x00);
405 	OUT_RING(ring, 0x00);
406 	OUT_RING(ring, 0x01);
407 	OUT_RING(ring, 0x01);
408 
409 	gpu->funcs->flush(gpu, ring);
410 
411 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
412 }
413 
414 static int a5xx_ucode_init(struct msm_gpu *gpu)
415 {
416 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
417 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
418 	int ret;
419 
420 	if (!a5xx_gpu->pm4_bo) {
421 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
422 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
423 
424 
425 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
426 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
427 			a5xx_gpu->pm4_bo = NULL;
428 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
429 				ret);
430 			return ret;
431 		}
432 
433 		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
434 	}
435 
436 	if (!a5xx_gpu->pfp_bo) {
437 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
438 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
439 
440 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
441 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
442 			a5xx_gpu->pfp_bo = NULL;
443 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
444 				ret);
445 			return ret;
446 		}
447 
448 		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
449 	}
450 
451 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
452 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
453 
454 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
455 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
456 
457 	return 0;
458 }
459 
460 #define SCM_GPU_ZAP_SHADER_RESUME 0
461 
462 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
463 {
464 	int ret;
465 
466 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
467 	if (ret)
468 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
469 			gpu->name, ret);
470 
471 	return ret;
472 }
473 
474 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
475 {
476 	static bool loaded;
477 	int ret;
478 
479 	/*
480 	 * If the zap shader is already loaded into memory we just need to kick
481 	 * the remote processor to reinitialize it
482 	 */
483 	if (loaded)
484 		return a5xx_zap_shader_resume(gpu);
485 
486 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
487 
488 	loaded = !ret;
489 	return ret;
490 }
491 
492 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
493 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
494 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
495 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
496 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
497 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
498 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
499 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
500 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
501 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
502 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
503 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
504 
505 static int a5xx_hw_init(struct msm_gpu *gpu)
506 {
507 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
508 	int ret;
509 
510 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
511 
512 	if (adreno_is_a540(adreno_gpu))
513 		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
514 
515 	/* Make all blocks contribute to the GPU BUSY perf counter */
516 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
517 
518 	/* Enable RBBM error reporting bits */
519 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
520 
521 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
522 		/*
523 		 * Mask out the activity signals from RB1-3 to avoid false
524 		 * positives
525 		 */
526 
527 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
528 			0xF0000000);
529 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
530 			0xFFFFFFFF);
531 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
532 			0xFFFFFFFF);
533 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
534 			0xFFFFFFFF);
535 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
536 			0xFFFFFFFF);
537 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
538 			0xFFFFFFFF);
539 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
540 			0xFFFFFFFF);
541 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
542 			0xFFFFFFFF);
543 	}
544 
545 	/* Enable fault detection */
546 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
547 		(1 << 30) | 0xFFFF);
548 
549 	/* Turn on performance counters */
550 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
551 
552 	/* Select CP0 to always count cycles */
553 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
554 
555 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
556 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
557 
558 	/* Increase VFD cache access so LRZ and other data gets evicted less */
559 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
560 
561 	/* Disable L2 bypass in the UCHE */
562 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
563 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
564 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
565 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
566 
567 	/* Set the GMEM VA range (0 to gpu->gmem) */
568 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
569 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
570 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
571 		0x00100000 + adreno_gpu->gmem - 1);
572 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
573 
574 	if (adreno_is_a510(adreno_gpu)) {
575 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
576 		gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
577 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
578 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
579 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
580 			  (0x200 << 11 | 0x200 << 22));
581 	} else {
582 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
583 		if (adreno_is_a530(adreno_gpu))
584 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
585 		if (adreno_is_a540(adreno_gpu))
586 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
587 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
588 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
589 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
590 			  (0x400 << 11 | 0x300 << 22));
591 	}
592 
593 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
594 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
595 
596 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
597 
598 	/* Enable USE_RETENTION_FLOPS */
599 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
600 
601 	/* Enable ME/PFP split notification */
602 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
603 
604 	/*
605 	 *  In A5x, CCU can send context_done event of a particular context to
606 	 *  UCHE which ultimately reaches CP even when there is valid
607 	 *  transaction of that context inside CCU. This can let CP to program
608 	 *  config registers, which will make the "valid transaction" inside
609 	 *  CCU to be interpreted differently. This can cause gpu fault. This
610 	 *  bug is fixed in latest A510 revision. To enable this bug fix -
611 	 *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
612 	 *  (disable). For older A510 version this bit is unused.
613 	 */
614 	if (adreno_is_a510(adreno_gpu))
615 		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
616 
617 	/* Enable HWCG */
618 	a5xx_set_hwcg(gpu, true);
619 
620 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
621 
622 	/* Set the highest bank bit */
623 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
624 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
625 	if (adreno_is_a540(adreno_gpu))
626 		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
627 
628 	/* Protect registers from the CP */
629 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
630 
631 	/* RBBM */
632 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
633 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
634 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
635 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
636 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
637 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
638 
639 	/* Content protect */
640 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
641 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
642 			16));
643 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
644 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
645 
646 	/* CP */
647 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
648 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
649 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
650 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
651 
652 	/* RB */
653 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
654 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
655 
656 	/* VPC */
657 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
658 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
659 
660 	/* UCHE */
661 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
662 
663 	if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
664 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
665 			ADRENO_PROTECT_RW(0x10000, 0x8000));
666 
667 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
668 	/*
669 	 * Disable the trusted memory range - we don't actually supported secure
670 	 * memory rendering at this point in time and we don't want to block off
671 	 * part of the virtual memory space.
672 	 */
673 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
674 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
675 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
676 
677 	/* Put the GPU into 64 bit by default */
678 	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
679 	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
680 	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
681 	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
682 	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
683 	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
684 	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
685 	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
686 	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
687 	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
688 	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
689 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
690 
691 	/*
692 	 * VPC corner case with local memory load kill leads to corrupt
693 	 * internal state. Normal Disable does not work for all a5x chips.
694 	 * So do the following setting to disable it.
695 	 */
696 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
697 		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
698 		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
699 	}
700 
701 	ret = adreno_hw_init(gpu);
702 	if (ret)
703 		return ret;
704 
705 	a5xx_preempt_hw_init(gpu);
706 
707 	if (!adreno_is_a510(adreno_gpu))
708 		a5xx_gpmu_ucode_init(gpu);
709 
710 	ret = a5xx_ucode_init(gpu);
711 	if (ret)
712 		return ret;
713 
714 	/* Disable the interrupts through the initial bringup stage */
715 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
716 
717 	/* Clear ME_HALT to start the micro engine */
718 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
719 	ret = a5xx_me_init(gpu);
720 	if (ret)
721 		return ret;
722 
723 	ret = a5xx_power_init(gpu);
724 	if (ret)
725 		return ret;
726 
727 	/*
728 	 * Send a pipeline event stat to get misbehaving counters to start
729 	 * ticking correctly
730 	 */
731 	if (adreno_is_a530(adreno_gpu)) {
732 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
733 		OUT_RING(gpu->rb[0], 0x0F);
734 
735 		gpu->funcs->flush(gpu, gpu->rb[0]);
736 		if (!a5xx_idle(gpu, gpu->rb[0]))
737 			return -EINVAL;
738 	}
739 
740 	/*
741 	 * If the chip that we are using does support loading one, then
742 	 * try to load a zap shader into the secure world. If successful
743 	 * we can use the CP to switch out of secure mode. If not then we
744 	 * have no resource but to try to switch ourselves out manually. If we
745 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
746 	 * be blocked and a permissions violation will soon follow.
747 	 */
748 	ret = a5xx_zap_shader_init(gpu);
749 	if (!ret) {
750 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
751 		OUT_RING(gpu->rb[0], 0x00000000);
752 
753 		gpu->funcs->flush(gpu, gpu->rb[0]);
754 		if (!a5xx_idle(gpu, gpu->rb[0]))
755 			return -EINVAL;
756 	} else if (ret == -ENODEV) {
757 		/*
758 		 * This device does not use zap shader (but print a warning
759 		 * just in case someone got their dt wrong.. hopefully they
760 		 * have a debug UART to realize the error of their ways...
761 		 * if you mess this up you are about to crash horribly)
762 		 */
763 		dev_warn_once(gpu->dev->dev,
764 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
765 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
766 	} else {
767 		return ret;
768 	}
769 
770 	/* Last step - yield the ringbuffer */
771 	a5xx_preempt_start(gpu);
772 
773 	return 0;
774 }
775 
776 static void a5xx_recover(struct msm_gpu *gpu)
777 {
778 	int i;
779 
780 	adreno_dump_info(gpu);
781 
782 	for (i = 0; i < 8; i++) {
783 		printk("CP_SCRATCH_REG%d: %u\n", i,
784 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
785 	}
786 
787 	if (hang_debug)
788 		a5xx_dump(gpu);
789 
790 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
791 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
792 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
793 	adreno_recover(gpu);
794 }
795 
796 static void a5xx_destroy(struct msm_gpu *gpu)
797 {
798 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
799 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
800 
801 	DBG("%s", gpu->name);
802 
803 	a5xx_preempt_fini(gpu);
804 
805 	if (a5xx_gpu->pm4_bo) {
806 		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
807 		drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
808 	}
809 
810 	if (a5xx_gpu->pfp_bo) {
811 		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
812 		drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
813 	}
814 
815 	if (a5xx_gpu->gpmu_bo) {
816 		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
817 		drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
818 	}
819 
820 	adreno_gpu_cleanup(adreno_gpu);
821 	kfree(a5xx_gpu);
822 }
823 
824 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
825 {
826 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
827 		return false;
828 
829 	/*
830 	 * Nearly every abnormality ends up pausing the GPU and triggering a
831 	 * fault so we can safely just watch for this one interrupt to fire
832 	 */
833 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
834 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
835 }
836 
837 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
838 {
839 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
840 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
841 
842 	if (ring != a5xx_gpu->cur_ring) {
843 		WARN(1, "Tried to idle a non-current ringbuffer\n");
844 		return false;
845 	}
846 
847 	/* wait for CP to drain ringbuffer: */
848 	if (!adreno_idle(gpu, ring))
849 		return false;
850 
851 	if (spin_until(_a5xx_check_idle(gpu))) {
852 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
853 			gpu->name, __builtin_return_address(0),
854 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
855 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
856 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
857 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
858 		return false;
859 	}
860 
861 	return true;
862 }
863 
864 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
865 {
866 	struct msm_gpu *gpu = arg;
867 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
868 			iova, flags,
869 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
870 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
871 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
872 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
873 
874 	return -EFAULT;
875 }
876 
877 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
878 {
879 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
880 
881 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
882 		u32 val;
883 
884 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
885 
886 		/*
887 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
888 		 * read it twice
889 		 */
890 
891 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
892 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
893 
894 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
895 			val);
896 	}
897 
898 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
899 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
900 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
901 
902 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
903 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
904 
905 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
906 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
907 
908 		dev_err_ratelimited(gpu->dev->dev,
909 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
910 			val & (1 << 24) ? "WRITE" : "READ",
911 			(val & 0xFFFFF) >> 2, val);
912 	}
913 
914 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
915 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
916 		const char *access[16] = { "reserved", "reserved",
917 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
918 			"", "", "me read", "me write", "", "", "crashdump read",
919 			"crashdump write" };
920 
921 		dev_err_ratelimited(gpu->dev->dev,
922 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
923 			status & 0xFFFFF, access[(status >> 24) & 0xF],
924 			(status & (1 << 31)), status);
925 	}
926 }
927 
928 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
929 {
930 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
931 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
932 
933 		dev_err_ratelimited(gpu->dev->dev,
934 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
935 			val & (1 << 28) ? "WRITE" : "READ",
936 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
937 			(val >> 24) & 0xF);
938 
939 		/* Clear the error */
940 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
941 
942 		/* Clear the interrupt */
943 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
944 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
945 	}
946 
947 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
948 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
949 
950 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
951 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
952 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
953 
954 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
955 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
956 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
957 
958 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
959 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
960 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
961 
962 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
963 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
964 
965 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
966 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
967 }
968 
969 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
970 {
971 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
972 
973 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
974 
975 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
976 		addr);
977 }
978 
979 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
980 {
981 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
982 }
983 
984 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
985 {
986 	struct drm_device *dev = gpu->dev;
987 	struct msm_drm_private *priv = dev->dev_private;
988 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
989 
990 	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
991 		ring ? ring->id : -1, ring ? ring->seqno : 0,
992 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
993 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
994 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
995 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
996 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
997 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
998 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
999 
1000 	/* Turn off the hangcheck timer to keep it from bothering us */
1001 	del_timer(&gpu->hangcheck_timer);
1002 
1003 	queue_work(priv->wq, &gpu->recover_work);
1004 }
1005 
1006 #define RBBM_ERROR_MASK \
1007 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1008 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1009 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1010 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1011 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1012 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1013 
1014 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1015 {
1016 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1017 
1018 	/*
1019 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1020 	 * before the source is cleared the interrupt will storm.
1021 	 */
1022 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1023 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1024 
1025 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1026 	if (status & RBBM_ERROR_MASK)
1027 		a5xx_rbbm_err_irq(gpu, status);
1028 
1029 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1030 		a5xx_cp_err_irq(gpu);
1031 
1032 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1033 		a5xx_fault_detect_irq(gpu);
1034 
1035 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1036 		a5xx_uche_err_irq(gpu);
1037 
1038 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1039 		a5xx_gpmu_err_irq(gpu);
1040 
1041 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1042 		a5xx_preempt_trigger(gpu);
1043 		msm_gpu_retire(gpu);
1044 	}
1045 
1046 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1047 		a5xx_preempt_irq(gpu);
1048 
1049 	return IRQ_HANDLED;
1050 }
1051 
1052 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1053 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1054 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1055 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1056 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1057 		REG_A5XX_CP_RB_RPTR_ADDR_HI),
1058 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1059 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1060 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1061 };
1062 
1063 static const u32 a5xx_registers[] = {
1064 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1065 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1066 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1067 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1068 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1069 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1070 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1071 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1072 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1073 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1074 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1075 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1076 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1077 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1078 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1079 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1080 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1081 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1082 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1083 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1084 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1085 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1086 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1087 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1088 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1089 	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1090 	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1091 	0xAC60, 0xAC60, ~0,
1092 };
1093 
1094 static void a5xx_dump(struct msm_gpu *gpu)
1095 {
1096 	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1097 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1098 	adreno_dump(gpu);
1099 }
1100 
1101 static int a5xx_pm_resume(struct msm_gpu *gpu)
1102 {
1103 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1104 	int ret;
1105 
1106 	/* Turn on the core power */
1107 	ret = msm_gpu_pm_resume(gpu);
1108 	if (ret)
1109 		return ret;
1110 
1111 	if (adreno_is_a510(adreno_gpu)) {
1112 		/* Halt the sp_input_clk at HM level */
1113 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1114 		a5xx_set_hwcg(gpu, true);
1115 		/* Turn on sp_input_clk at HM level */
1116 		gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1117 		return 0;
1118 	}
1119 
1120 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1121 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1122 
1123 	/* Wait 3 usecs before polling */
1124 	udelay(3);
1125 
1126 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1127 		(1 << 20), (1 << 20));
1128 	if (ret) {
1129 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1130 			gpu->name,
1131 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1132 		return ret;
1133 	}
1134 
1135 	/* Turn on the SP domain */
1136 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1137 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1138 		(1 << 20), (1 << 20));
1139 	if (ret)
1140 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1141 			gpu->name);
1142 
1143 	return ret;
1144 }
1145 
1146 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1147 {
1148 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1149 	u32 mask = 0xf;
1150 
1151 	/* A510 has 3 XIN ports in VBIF */
1152 	if (adreno_is_a510(adreno_gpu))
1153 		mask = 0x7;
1154 
1155 	/* Clear the VBIF pipe before shutting down */
1156 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1157 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1158 				mask) == mask);
1159 
1160 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1161 
1162 	/*
1163 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1164 	 * entries
1165 	 */
1166 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1167 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1168 
1169 	return msm_gpu_pm_suspend(gpu);
1170 }
1171 
1172 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1173 {
1174 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1175 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1176 
1177 	return 0;
1178 }
1179 
1180 struct a5xx_crashdumper {
1181 	void *ptr;
1182 	struct drm_gem_object *bo;
1183 	u64 iova;
1184 };
1185 
1186 struct a5xx_gpu_state {
1187 	struct msm_gpu_state base;
1188 	u32 *hlsqregs;
1189 };
1190 
1191 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1192 		struct a5xx_crashdumper *dumper)
1193 {
1194 	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1195 		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1196 		&dumper->bo, &dumper->iova);
1197 
1198 	if (!IS_ERR(dumper->ptr))
1199 		msm_gem_object_set_name(dumper->bo, "crashdump");
1200 
1201 	return PTR_ERR_OR_ZERO(dumper->ptr);
1202 }
1203 
1204 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1205 		struct a5xx_crashdumper *dumper)
1206 {
1207 	u32 val;
1208 
1209 	if (IS_ERR_OR_NULL(dumper->ptr))
1210 		return -EINVAL;
1211 
1212 	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1213 		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1214 
1215 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1216 
1217 	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1218 		val & 0x04, 100, 10000);
1219 }
1220 
1221 /*
1222  * These are a list of the registers that need to be read through the HLSQ
1223  * aperture through the crashdumper.  These are not nominally accessible from
1224  * the CPU on a secure platform.
1225  */
1226 static const struct {
1227 	u32 type;
1228 	u32 regoffset;
1229 	u32 count;
1230 } a5xx_hlsq_aperture_regs[] = {
1231 	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1232 	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1233 	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1234 	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1235 	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1236 	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1237 	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1238 	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1239 	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1240 	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1241 	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1242 	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1243 	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1244 	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1245 	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1246 };
1247 
1248 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1249 		struct a5xx_gpu_state *a5xx_state)
1250 {
1251 	struct a5xx_crashdumper dumper = { 0 };
1252 	u32 offset, count = 0;
1253 	u64 *ptr;
1254 	int i;
1255 
1256 	if (a5xx_crashdumper_init(gpu, &dumper))
1257 		return;
1258 
1259 	/* The script will be written at offset 0 */
1260 	ptr = dumper.ptr;
1261 
1262 	/* Start writing the data at offset 256k */
1263 	offset = dumper.iova + (256 * SZ_1K);
1264 
1265 	/* Count how many additional registers to get from the HLSQ aperture */
1266 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1267 		count += a5xx_hlsq_aperture_regs[i].count;
1268 
1269 	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1270 	if (!a5xx_state->hlsqregs)
1271 		return;
1272 
1273 	/* Build the crashdump script */
1274 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1275 		u32 type = a5xx_hlsq_aperture_regs[i].type;
1276 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1277 
1278 		/* Write the register to select the desired bank */
1279 		*ptr++ = ((u64) type << 8);
1280 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1281 			(1 << 21) | 1;
1282 
1283 		*ptr++ = offset;
1284 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1285 			| c;
1286 
1287 		offset += c * sizeof(u32);
1288 	}
1289 
1290 	/* Write two zeros to close off the script */
1291 	*ptr++ = 0;
1292 	*ptr++ = 0;
1293 
1294 	if (a5xx_crashdumper_run(gpu, &dumper)) {
1295 		kfree(a5xx_state->hlsqregs);
1296 		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1297 		return;
1298 	}
1299 
1300 	/* Copy the data from the crashdumper to the state */
1301 	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1302 		count * sizeof(u32));
1303 
1304 	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1305 }
1306 
1307 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1308 {
1309 	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1310 			GFP_KERNEL);
1311 
1312 	if (!a5xx_state)
1313 		return ERR_PTR(-ENOMEM);
1314 
1315 	/* Temporarily disable hardware clock gating before reading the hw */
1316 	a5xx_set_hwcg(gpu, false);
1317 
1318 	/* First get the generic state from the adreno core */
1319 	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1320 
1321 	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1322 
1323 	/* Get the HLSQ regs with the help of the crashdumper */
1324 	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1325 
1326 	a5xx_set_hwcg(gpu, true);
1327 
1328 	return &a5xx_state->base;
1329 }
1330 
1331 static void a5xx_gpu_state_destroy(struct kref *kref)
1332 {
1333 	struct msm_gpu_state *state = container_of(kref,
1334 		struct msm_gpu_state, ref);
1335 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1336 		struct a5xx_gpu_state, base);
1337 
1338 	kfree(a5xx_state->hlsqregs);
1339 
1340 	adreno_gpu_state_destroy(state);
1341 	kfree(a5xx_state);
1342 }
1343 
1344 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1345 {
1346 	if (IS_ERR_OR_NULL(state))
1347 		return 1;
1348 
1349 	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1350 }
1351 
1352 
1353 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1354 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1355 		      struct drm_printer *p)
1356 {
1357 	int i, j;
1358 	u32 pos = 0;
1359 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1360 		struct a5xx_gpu_state, base);
1361 
1362 	if (IS_ERR_OR_NULL(state))
1363 		return;
1364 
1365 	adreno_show(gpu, state, p);
1366 
1367 	/* Dump the additional a5xx HLSQ registers */
1368 	if (!a5xx_state->hlsqregs)
1369 		return;
1370 
1371 	drm_printf(p, "registers-hlsq:\n");
1372 
1373 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1374 		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1375 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1376 
1377 		for (j = 0; j < c; j++, pos++, o++) {
1378 			/*
1379 			 * To keep the crashdump simple we pull the entire range
1380 			 * for each register type but not all of the registers
1381 			 * in the range are valid. Fortunately invalid registers
1382 			 * stick out like a sore thumb with a value of
1383 			 * 0xdeadbeef
1384 			 */
1385 			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1386 				continue;
1387 
1388 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1389 				o << 2, a5xx_state->hlsqregs[pos]);
1390 		}
1391 	}
1392 }
1393 #endif
1394 
1395 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1396 {
1397 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1398 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1399 
1400 	return a5xx_gpu->cur_ring;
1401 }
1402 
1403 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1404 {
1405 	u64 busy_cycles, busy_time;
1406 
1407 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1408 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1409 
1410 	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1411 	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1412 
1413 	gpu->devfreq.busy_cycles = busy_cycles;
1414 
1415 	if (WARN_ON(busy_time > ~0LU))
1416 		return ~0LU;
1417 
1418 	return (unsigned long)busy_time;
1419 }
1420 
1421 static const struct adreno_gpu_funcs funcs = {
1422 	.base = {
1423 		.get_param = adreno_get_param,
1424 		.hw_init = a5xx_hw_init,
1425 		.pm_suspend = a5xx_pm_suspend,
1426 		.pm_resume = a5xx_pm_resume,
1427 		.recover = a5xx_recover,
1428 		.submit = a5xx_submit,
1429 		.flush = a5xx_flush,
1430 		.active_ring = a5xx_active_ring,
1431 		.irq = a5xx_irq,
1432 		.destroy = a5xx_destroy,
1433 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1434 		.show = a5xx_show,
1435 #endif
1436 #if defined(CONFIG_DEBUG_FS)
1437 		.debugfs_init = a5xx_debugfs_init,
1438 #endif
1439 		.gpu_busy = a5xx_gpu_busy,
1440 		.gpu_state_get = a5xx_gpu_state_get,
1441 		.gpu_state_put = a5xx_gpu_state_put,
1442 	},
1443 	.get_timestamp = a5xx_get_timestamp,
1444 };
1445 
1446 static void check_speed_bin(struct device *dev)
1447 {
1448 	struct nvmem_cell *cell;
1449 	u32 val;
1450 
1451 	/*
1452 	 * If the OPP table specifies a opp-supported-hw property then we have
1453 	 * to set something with dev_pm_opp_set_supported_hw() or the table
1454 	 * doesn't get populated so pick an arbitrary value that should
1455 	 * ensure the default frequencies are selected but not conflict with any
1456 	 * actual bins
1457 	 */
1458 	val = 0x80;
1459 
1460 	cell = nvmem_cell_get(dev, "speed_bin");
1461 
1462 	if (!IS_ERR(cell)) {
1463 		void *buf = nvmem_cell_read(cell, NULL);
1464 
1465 		if (!IS_ERR(buf)) {
1466 			u8 bin = *((u8 *) buf);
1467 
1468 			val = (1 << bin);
1469 			kfree(buf);
1470 		}
1471 
1472 		nvmem_cell_put(cell);
1473 	}
1474 
1475 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1476 }
1477 
1478 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1479 {
1480 	struct msm_drm_private *priv = dev->dev_private;
1481 	struct platform_device *pdev = priv->gpu_pdev;
1482 	struct a5xx_gpu *a5xx_gpu = NULL;
1483 	struct adreno_gpu *adreno_gpu;
1484 	struct msm_gpu *gpu;
1485 	int ret;
1486 
1487 	if (!pdev) {
1488 		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1489 		return ERR_PTR(-ENXIO);
1490 	}
1491 
1492 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1493 	if (!a5xx_gpu)
1494 		return ERR_PTR(-ENOMEM);
1495 
1496 	adreno_gpu = &a5xx_gpu->base;
1497 	gpu = &adreno_gpu->base;
1498 
1499 	adreno_gpu->registers = a5xx_registers;
1500 	adreno_gpu->reg_offsets = a5xx_register_offsets;
1501 
1502 	a5xx_gpu->lm_leakage = 0x4E001A;
1503 
1504 	check_speed_bin(&pdev->dev);
1505 
1506 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1507 	if (ret) {
1508 		a5xx_destroy(&(a5xx_gpu->base.base));
1509 		return ERR_PTR(ret);
1510 	}
1511 
1512 	if (gpu->aspace)
1513 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1514 
1515 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1516 	a5xx_preempt_init(gpu);
1517 
1518 	return gpu;
1519 }
1520