xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision f97cee494dc92395a668445bcd24d34c89f4ff8c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4 
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15 
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18 
19 #define GPU_PAS_ID 13
20 
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25 	uint32_t wptr;
26 	unsigned long flags;
27 
28 	spin_lock_irqsave(&ring->lock, flags);
29 
30 	/* Copy the shadow to the actual register */
31 	ring->cur = ring->next;
32 
33 	/* Make sure to wrap wptr if we need to */
34 	wptr = get_wptr(ring);
35 
36 	spin_unlock_irqrestore(&ring->lock, flags);
37 
38 	/* Make sure everything is posted before making a decision */
39 	mb();
40 
41 	/* Update HW if this is the current ring and we are not in preempt */
42 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44 }
45 
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 	struct msm_file_private *ctx)
48 {
49 	struct msm_drm_private *priv = gpu->dev->dev_private;
50 	struct msm_ringbuffer *ring = submit->ring;
51 	struct msm_gem_object *obj;
52 	uint32_t *ptr, dwords;
53 	unsigned int i;
54 
55 	for (i = 0; i < submit->nr_cmds; i++) {
56 		switch (submit->cmd[i].type) {
57 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58 			break;
59 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 			if (priv->lastctx == ctx)
61 				break;
62 			fallthrough;
63 		case MSM_SUBMIT_CMD_BUF:
64 			/* copy commands into RB: */
65 			obj = submit->bos[submit->cmd[i].idx].obj;
66 			dwords = submit->cmd[i].size;
67 
68 			ptr = msm_gem_get_vaddr(&obj->base);
69 
70 			/* _get_vaddr() shouldn't fail at this point,
71 			 * since we've already mapped it once in
72 			 * submit_reloc()
73 			 */
74 			if (WARN_ON(!ptr))
75 				return;
76 
77 			for (i = 0; i < dwords; i++) {
78 				/* normally the OUT_PKTn() would wait
79 				 * for space for the packet.  But since
80 				 * we just OUT_RING() the whole thing,
81 				 * need to call adreno_wait_ring()
82 				 * ourself:
83 				 */
84 				adreno_wait_ring(ring, 1);
85 				OUT_RING(ring, ptr[i]);
86 			}
87 
88 			msm_gem_put_vaddr(&obj->base);
89 
90 			break;
91 		}
92 	}
93 
94 	a5xx_flush(gpu, ring);
95 	a5xx_preempt_trigger(gpu);
96 
97 	/* we might not necessarily have a cmd from userspace to
98 	 * trigger an event to know that submit has completed, so
99 	 * do this manually:
100 	 */
101 	a5xx_idle(gpu, ring);
102 	ring->memptrs->fence = submit->seqno;
103 	msm_gpu_retire(gpu);
104 }
105 
106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107 	struct msm_file_private *ctx)
108 {
109 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111 	struct msm_drm_private *priv = gpu->dev->dev_private;
112 	struct msm_ringbuffer *ring = submit->ring;
113 	unsigned int i, ibs = 0;
114 
115 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116 		priv->lastctx = NULL;
117 		a5xx_submit_in_rb(gpu, submit, ctx);
118 		return;
119 	}
120 
121 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122 	OUT_RING(ring, 0x02);
123 
124 	/* Turn off protected mode to write to special registers */
125 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
126 	OUT_RING(ring, 0);
127 
128 	/* Set the save preemption record for the ring/command */
129 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
132 
133 	/* Turn back on protected mode */
134 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135 	OUT_RING(ring, 1);
136 
137 	/* Enable local preemption for finegrain preemption */
138 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139 	OUT_RING(ring, 0x02);
140 
141 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143 	OUT_RING(ring, 0x02);
144 
145 	/* Submit the commands */
146 	for (i = 0; i < submit->nr_cmds; i++) {
147 		switch (submit->cmd[i].type) {
148 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
149 			break;
150 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151 			if (priv->lastctx == ctx)
152 				break;
153 			fallthrough;
154 		case MSM_SUBMIT_CMD_BUF:
155 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158 			OUT_RING(ring, submit->cmd[i].size);
159 			ibs++;
160 			break;
161 		}
162 	}
163 
164 	/*
165 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
166 	 * are done rendering - otherwise a lucky preemption would start
167 	 * replaying from the last checkpoint
168 	 */
169 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
170 	OUT_RING(ring, 0);
171 	OUT_RING(ring, 0);
172 	OUT_RING(ring, 0);
173 	OUT_RING(ring, 0);
174 	OUT_RING(ring, 0);
175 
176 	/* Turn off IB level preemptions */
177 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178 	OUT_RING(ring, 0x01);
179 
180 	/* Write the fence to the scratch register */
181 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182 	OUT_RING(ring, submit->seqno);
183 
184 	/*
185 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
186 	 * timestamp is written to the memory and then triggers the interrupt
187 	 */
188 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189 	OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS) |
190 		CP_EVENT_WRITE_0_IRQ);
191 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
192 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
193 	OUT_RING(ring, submit->seqno);
194 
195 	/* Yield the floor on command completion */
196 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
197 	/*
198 	 * If dword[2:1] are non zero, they specify an address for the CP to
199 	 * write the value of dword[3] to on preemption complete. Write 0 to
200 	 * skip the write
201 	 */
202 	OUT_RING(ring, 0x00);
203 	OUT_RING(ring, 0x00);
204 	/* Data value - not used if the address above is 0 */
205 	OUT_RING(ring, 0x01);
206 	/* Set bit 0 to trigger an interrupt on preempt complete */
207 	OUT_RING(ring, 0x01);
208 
209 	a5xx_flush(gpu, ring);
210 
211 	/* Check to see if we need to start preemption */
212 	a5xx_preempt_trigger(gpu);
213 }
214 
215 static const struct {
216 	u32 offset;
217 	u32 value;
218 } a5xx_hwcg[] = {
219 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
220 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
221 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
222 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
223 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
224 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
225 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
226 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
227 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
228 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
229 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
230 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
231 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
232 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
233 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
234 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
235 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
236 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
237 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
238 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
239 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
240 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
241 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
242 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
243 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
244 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
245 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
246 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
247 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
248 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
249 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
250 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
251 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
252 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
253 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
254 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
255 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
256 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
257 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
258 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
259 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
260 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
261 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
262 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
263 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
264 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
265 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
266 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
267 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
268 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
269 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
270 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
271 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
272 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
273 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
274 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
275 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
276 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
277 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
278 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
279 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
280 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
281 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
282 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
283 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
284 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
285 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
286 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
287 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
288 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
289 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
290 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
291 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
292 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
293 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
294 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
295 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
296 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
297 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
298 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
299 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
300 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
301 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
302 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
303 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
304 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
305 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
306 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
307 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
308 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
309 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
310 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
311 };
312 
313 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
314 {
315 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
316 	unsigned int i;
317 
318 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
319 		gpu_write(gpu, a5xx_hwcg[i].offset,
320 			state ? a5xx_hwcg[i].value : 0);
321 
322 	if (adreno_is_a540(adreno_gpu)) {
323 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
324 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
325 	}
326 
327 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
328 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
329 }
330 
331 static int a5xx_me_init(struct msm_gpu *gpu)
332 {
333 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
334 	struct msm_ringbuffer *ring = gpu->rb[0];
335 
336 	OUT_PKT7(ring, CP_ME_INIT, 8);
337 
338 	OUT_RING(ring, 0x0000002F);
339 
340 	/* Enable multiple hardware contexts */
341 	OUT_RING(ring, 0x00000003);
342 
343 	/* Enable error detection */
344 	OUT_RING(ring, 0x20000000);
345 
346 	/* Don't enable header dump */
347 	OUT_RING(ring, 0x00000000);
348 	OUT_RING(ring, 0x00000000);
349 
350 	/* Specify workarounds for various microcode issues */
351 	if (adreno_is_a530(adreno_gpu)) {
352 		/* Workaround for token end syncs
353 		 * Force a WFI after every direct-render 3D mode draw and every
354 		 * 2D mode 3 draw
355 		 */
356 		OUT_RING(ring, 0x0000000B);
357 	} else if (adreno_is_a510(adreno_gpu)) {
358 		/* Workaround for token and syncs */
359 		OUT_RING(ring, 0x00000001);
360 	} else {
361 		/* No workarounds enabled */
362 		OUT_RING(ring, 0x00000000);
363 	}
364 
365 	OUT_RING(ring, 0x00000000);
366 	OUT_RING(ring, 0x00000000);
367 
368 	gpu->funcs->flush(gpu, ring);
369 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
370 }
371 
372 static int a5xx_preempt_start(struct msm_gpu *gpu)
373 {
374 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
375 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
376 	struct msm_ringbuffer *ring = gpu->rb[0];
377 
378 	if (gpu->nr_rings == 1)
379 		return 0;
380 
381 	/* Turn off protected mode to write to special registers */
382 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
383 	OUT_RING(ring, 0);
384 
385 	/* Set the save preemption record for the ring/command */
386 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
387 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
388 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
389 
390 	/* Turn back on protected mode */
391 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
392 	OUT_RING(ring, 1);
393 
394 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
395 	OUT_RING(ring, 0x00);
396 
397 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
398 	OUT_RING(ring, 0x01);
399 
400 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
401 	OUT_RING(ring, 0x01);
402 
403 	/* Yield the floor on command completion */
404 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
405 	OUT_RING(ring, 0x00);
406 	OUT_RING(ring, 0x00);
407 	OUT_RING(ring, 0x01);
408 	OUT_RING(ring, 0x01);
409 
410 	gpu->funcs->flush(gpu, ring);
411 
412 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
413 }
414 
415 static int a5xx_ucode_init(struct msm_gpu *gpu)
416 {
417 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
418 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
419 	int ret;
420 
421 	if (!a5xx_gpu->pm4_bo) {
422 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
423 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
424 
425 
426 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
427 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
428 			a5xx_gpu->pm4_bo = NULL;
429 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
430 				ret);
431 			return ret;
432 		}
433 
434 		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
435 	}
436 
437 	if (!a5xx_gpu->pfp_bo) {
438 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
439 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
440 
441 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
442 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
443 			a5xx_gpu->pfp_bo = NULL;
444 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
445 				ret);
446 			return ret;
447 		}
448 
449 		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
450 	}
451 
452 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
453 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
454 
455 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
456 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
457 
458 	return 0;
459 }
460 
461 #define SCM_GPU_ZAP_SHADER_RESUME 0
462 
463 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
464 {
465 	int ret;
466 
467 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
468 	if (ret)
469 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
470 			gpu->name, ret);
471 
472 	return ret;
473 }
474 
475 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
476 {
477 	static bool loaded;
478 	int ret;
479 
480 	/*
481 	 * If the zap shader is already loaded into memory we just need to kick
482 	 * the remote processor to reinitialize it
483 	 */
484 	if (loaded)
485 		return a5xx_zap_shader_resume(gpu);
486 
487 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
488 
489 	loaded = !ret;
490 	return ret;
491 }
492 
493 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
494 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
495 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
496 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
497 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
498 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
499 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
500 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
501 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
502 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
503 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
504 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
505 
506 static int a5xx_hw_init(struct msm_gpu *gpu)
507 {
508 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
509 	int ret;
510 
511 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
512 
513 	if (adreno_is_a540(adreno_gpu))
514 		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
515 
516 	/* Make all blocks contribute to the GPU BUSY perf counter */
517 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
518 
519 	/* Enable RBBM error reporting bits */
520 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
521 
522 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
523 		/*
524 		 * Mask out the activity signals from RB1-3 to avoid false
525 		 * positives
526 		 */
527 
528 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
529 			0xF0000000);
530 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
531 			0xFFFFFFFF);
532 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
533 			0xFFFFFFFF);
534 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
535 			0xFFFFFFFF);
536 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
537 			0xFFFFFFFF);
538 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
539 			0xFFFFFFFF);
540 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
541 			0xFFFFFFFF);
542 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
543 			0xFFFFFFFF);
544 	}
545 
546 	/* Enable fault detection */
547 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
548 		(1 << 30) | 0xFFFF);
549 
550 	/* Turn on performance counters */
551 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
552 
553 	/* Select CP0 to always count cycles */
554 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
555 
556 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
557 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
558 
559 	/* Increase VFD cache access so LRZ and other data gets evicted less */
560 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
561 
562 	/* Disable L2 bypass in the UCHE */
563 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
564 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
565 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
566 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
567 
568 	/* Set the GMEM VA range (0 to gpu->gmem) */
569 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
570 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
571 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
572 		0x00100000 + adreno_gpu->gmem - 1);
573 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
574 
575 	if (adreno_is_a510(adreno_gpu)) {
576 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
577 		gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
578 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
579 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
580 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
581 			  (0x200 << 11 | 0x200 << 22));
582 	} else {
583 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
584 		if (adreno_is_a530(adreno_gpu))
585 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
586 		if (adreno_is_a540(adreno_gpu))
587 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
588 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
589 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
590 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
591 			  (0x400 << 11 | 0x300 << 22));
592 	}
593 
594 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
595 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
596 
597 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
598 
599 	/* Enable USE_RETENTION_FLOPS */
600 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
601 
602 	/* Enable ME/PFP split notification */
603 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
604 
605 	/*
606 	 *  In A5x, CCU can send context_done event of a particular context to
607 	 *  UCHE which ultimately reaches CP even when there is valid
608 	 *  transaction of that context inside CCU. This can let CP to program
609 	 *  config registers, which will make the "valid transaction" inside
610 	 *  CCU to be interpreted differently. This can cause gpu fault. This
611 	 *  bug is fixed in latest A510 revision. To enable this bug fix -
612 	 *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
613 	 *  (disable). For older A510 version this bit is unused.
614 	 */
615 	if (adreno_is_a510(adreno_gpu))
616 		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
617 
618 	/* Enable HWCG */
619 	a5xx_set_hwcg(gpu, true);
620 
621 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
622 
623 	/* Set the highest bank bit */
624 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
625 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
626 	if (adreno_is_a540(adreno_gpu))
627 		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
628 
629 	/* Protect registers from the CP */
630 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
631 
632 	/* RBBM */
633 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
634 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
635 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
636 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
637 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
638 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
639 
640 	/* Content protect */
641 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
642 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
643 			16));
644 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
645 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
646 
647 	/* CP */
648 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
649 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
650 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
651 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
652 
653 	/* RB */
654 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
655 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
656 
657 	/* VPC */
658 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
659 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
660 
661 	/* UCHE */
662 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
663 
664 	if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
665 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
666 			ADRENO_PROTECT_RW(0x10000, 0x8000));
667 
668 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
669 	/*
670 	 * Disable the trusted memory range - we don't actually supported secure
671 	 * memory rendering at this point in time and we don't want to block off
672 	 * part of the virtual memory space.
673 	 */
674 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
675 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
676 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
677 
678 	/* Put the GPU into 64 bit by default */
679 	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
680 	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
681 	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
682 	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
683 	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
684 	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
685 	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
686 	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
687 	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
688 	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
689 	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
690 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
691 
692 	/*
693 	 * VPC corner case with local memory load kill leads to corrupt
694 	 * internal state. Normal Disable does not work for all a5x chips.
695 	 * So do the following setting to disable it.
696 	 */
697 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
698 		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
699 		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
700 	}
701 
702 	ret = adreno_hw_init(gpu);
703 	if (ret)
704 		return ret;
705 
706 	if (!adreno_is_a510(adreno_gpu))
707 		a5xx_gpmu_ucode_init(gpu);
708 
709 	ret = a5xx_ucode_init(gpu);
710 	if (ret)
711 		return ret;
712 
713 	/* Set the ringbuffer address */
714 	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
715 		gpu->rb[0]->iova);
716 
717 	gpu_write(gpu, REG_A5XX_CP_RB_CNTL,
718 		MSM_GPU_RB_CNTL_DEFAULT | AXXX_CP_RB_CNTL_NO_UPDATE);
719 
720 	a5xx_preempt_hw_init(gpu);
721 
722 	/* Disable the interrupts through the initial bringup stage */
723 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
724 
725 	/* Clear ME_HALT to start the micro engine */
726 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
727 	ret = a5xx_me_init(gpu);
728 	if (ret)
729 		return ret;
730 
731 	ret = a5xx_power_init(gpu);
732 	if (ret)
733 		return ret;
734 
735 	/*
736 	 * Send a pipeline event stat to get misbehaving counters to start
737 	 * ticking correctly
738 	 */
739 	if (adreno_is_a530(adreno_gpu)) {
740 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
741 		OUT_RING(gpu->rb[0], CP_EVENT_WRITE_0_EVENT(STAT_EVENT));
742 
743 		gpu->funcs->flush(gpu, gpu->rb[0]);
744 		if (!a5xx_idle(gpu, gpu->rb[0]))
745 			return -EINVAL;
746 	}
747 
748 	/*
749 	 * If the chip that we are using does support loading one, then
750 	 * try to load a zap shader into the secure world. If successful
751 	 * we can use the CP to switch out of secure mode. If not then we
752 	 * have no resource but to try to switch ourselves out manually. If we
753 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
754 	 * be blocked and a permissions violation will soon follow.
755 	 */
756 	ret = a5xx_zap_shader_init(gpu);
757 	if (!ret) {
758 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
759 		OUT_RING(gpu->rb[0], 0x00000000);
760 
761 		gpu->funcs->flush(gpu, gpu->rb[0]);
762 		if (!a5xx_idle(gpu, gpu->rb[0]))
763 			return -EINVAL;
764 	} else if (ret == -ENODEV) {
765 		/*
766 		 * This device does not use zap shader (but print a warning
767 		 * just in case someone got their dt wrong.. hopefully they
768 		 * have a debug UART to realize the error of their ways...
769 		 * if you mess this up you are about to crash horribly)
770 		 */
771 		dev_warn_once(gpu->dev->dev,
772 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
773 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
774 	} else {
775 		return ret;
776 	}
777 
778 	/* Last step - yield the ringbuffer */
779 	a5xx_preempt_start(gpu);
780 
781 	return 0;
782 }
783 
784 static void a5xx_recover(struct msm_gpu *gpu)
785 {
786 	int i;
787 
788 	adreno_dump_info(gpu);
789 
790 	for (i = 0; i < 8; i++) {
791 		printk("CP_SCRATCH_REG%d: %u\n", i,
792 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
793 	}
794 
795 	if (hang_debug)
796 		a5xx_dump(gpu);
797 
798 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
799 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
800 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
801 	adreno_recover(gpu);
802 }
803 
804 static void a5xx_destroy(struct msm_gpu *gpu)
805 {
806 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
807 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
808 
809 	DBG("%s", gpu->name);
810 
811 	a5xx_preempt_fini(gpu);
812 
813 	if (a5xx_gpu->pm4_bo) {
814 		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
815 		drm_gem_object_put(a5xx_gpu->pm4_bo);
816 	}
817 
818 	if (a5xx_gpu->pfp_bo) {
819 		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
820 		drm_gem_object_put(a5xx_gpu->pfp_bo);
821 	}
822 
823 	if (a5xx_gpu->gpmu_bo) {
824 		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
825 		drm_gem_object_put(a5xx_gpu->gpmu_bo);
826 	}
827 
828 	adreno_gpu_cleanup(adreno_gpu);
829 	kfree(a5xx_gpu);
830 }
831 
832 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
833 {
834 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
835 		return false;
836 
837 	/*
838 	 * Nearly every abnormality ends up pausing the GPU and triggering a
839 	 * fault so we can safely just watch for this one interrupt to fire
840 	 */
841 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
842 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
843 }
844 
845 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
846 {
847 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
848 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
849 
850 	if (ring != a5xx_gpu->cur_ring) {
851 		WARN(1, "Tried to idle a non-current ringbuffer\n");
852 		return false;
853 	}
854 
855 	/* wait for CP to drain ringbuffer: */
856 	if (!adreno_idle(gpu, ring))
857 		return false;
858 
859 	if (spin_until(_a5xx_check_idle(gpu))) {
860 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
861 			gpu->name, __builtin_return_address(0),
862 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
863 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
864 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
865 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
866 		return false;
867 	}
868 
869 	return true;
870 }
871 
872 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
873 {
874 	struct msm_gpu *gpu = arg;
875 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
876 			iova, flags,
877 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
878 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
879 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
880 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
881 
882 	return -EFAULT;
883 }
884 
885 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
886 {
887 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
888 
889 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
890 		u32 val;
891 
892 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
893 
894 		/*
895 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
896 		 * read it twice
897 		 */
898 
899 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
900 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
901 
902 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
903 			val);
904 	}
905 
906 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
907 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
908 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
909 
910 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
911 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
912 
913 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
914 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
915 
916 		dev_err_ratelimited(gpu->dev->dev,
917 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
918 			val & (1 << 24) ? "WRITE" : "READ",
919 			(val & 0xFFFFF) >> 2, val);
920 	}
921 
922 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
923 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
924 		const char *access[16] = { "reserved", "reserved",
925 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
926 			"", "", "me read", "me write", "", "", "crashdump read",
927 			"crashdump write" };
928 
929 		dev_err_ratelimited(gpu->dev->dev,
930 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
931 			status & 0xFFFFF, access[(status >> 24) & 0xF],
932 			(status & (1 << 31)), status);
933 	}
934 }
935 
936 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
937 {
938 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
939 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
940 
941 		dev_err_ratelimited(gpu->dev->dev,
942 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
943 			val & (1 << 28) ? "WRITE" : "READ",
944 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
945 			(val >> 24) & 0xF);
946 
947 		/* Clear the error */
948 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
949 
950 		/* Clear the interrupt */
951 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
952 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
953 	}
954 
955 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
956 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
957 
958 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
959 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
960 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
961 
962 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
963 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
964 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
965 
966 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
967 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
968 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
969 
970 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
971 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
972 
973 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
974 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
975 }
976 
977 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
978 {
979 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
980 
981 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
982 
983 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
984 		addr);
985 }
986 
987 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
988 {
989 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
990 }
991 
992 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
993 {
994 	struct drm_device *dev = gpu->dev;
995 	struct msm_drm_private *priv = dev->dev_private;
996 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
997 
998 	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
999 		ring ? ring->id : -1, ring ? ring->seqno : 0,
1000 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1001 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1002 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1003 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1004 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1005 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1006 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1007 
1008 	/* Turn off the hangcheck timer to keep it from bothering us */
1009 	del_timer(&gpu->hangcheck_timer);
1010 
1011 	queue_work(priv->wq, &gpu->recover_work);
1012 }
1013 
1014 #define RBBM_ERROR_MASK \
1015 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1016 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1017 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1018 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1019 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1020 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1021 
1022 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1023 {
1024 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1025 
1026 	/*
1027 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1028 	 * before the source is cleared the interrupt will storm.
1029 	 */
1030 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1031 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1032 
1033 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1034 	if (status & RBBM_ERROR_MASK)
1035 		a5xx_rbbm_err_irq(gpu, status);
1036 
1037 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1038 		a5xx_cp_err_irq(gpu);
1039 
1040 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1041 		a5xx_fault_detect_irq(gpu);
1042 
1043 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1044 		a5xx_uche_err_irq(gpu);
1045 
1046 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1047 		a5xx_gpmu_err_irq(gpu);
1048 
1049 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1050 		a5xx_preempt_trigger(gpu);
1051 		msm_gpu_retire(gpu);
1052 	}
1053 
1054 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1055 		a5xx_preempt_irq(gpu);
1056 
1057 	return IRQ_HANDLED;
1058 }
1059 
1060 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1061 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1062 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1063 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1064 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1065 		REG_A5XX_CP_RB_RPTR_ADDR_HI),
1066 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1067 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1068 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1069 };
1070 
1071 static const u32 a5xx_registers[] = {
1072 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1073 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1074 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1075 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1076 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1077 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1078 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1079 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1080 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1081 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1082 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1083 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1084 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1085 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1086 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1087 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1088 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1089 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1090 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1091 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1092 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1093 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1094 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1095 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1096 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1097 	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1098 	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1099 	0xAC60, 0xAC60, ~0,
1100 };
1101 
1102 static void a5xx_dump(struct msm_gpu *gpu)
1103 {
1104 	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1105 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1106 	adreno_dump(gpu);
1107 }
1108 
1109 static int a5xx_pm_resume(struct msm_gpu *gpu)
1110 {
1111 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1112 	int ret;
1113 
1114 	/* Turn on the core power */
1115 	ret = msm_gpu_pm_resume(gpu);
1116 	if (ret)
1117 		return ret;
1118 
1119 	if (adreno_is_a510(adreno_gpu)) {
1120 		/* Halt the sp_input_clk at HM level */
1121 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1122 		a5xx_set_hwcg(gpu, true);
1123 		/* Turn on sp_input_clk at HM level */
1124 		gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1125 		return 0;
1126 	}
1127 
1128 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1129 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1130 
1131 	/* Wait 3 usecs before polling */
1132 	udelay(3);
1133 
1134 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1135 		(1 << 20), (1 << 20));
1136 	if (ret) {
1137 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1138 			gpu->name,
1139 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1140 		return ret;
1141 	}
1142 
1143 	/* Turn on the SP domain */
1144 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1145 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1146 		(1 << 20), (1 << 20));
1147 	if (ret)
1148 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1149 			gpu->name);
1150 
1151 	return ret;
1152 }
1153 
1154 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1155 {
1156 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1157 	u32 mask = 0xf;
1158 
1159 	/* A510 has 3 XIN ports in VBIF */
1160 	if (adreno_is_a510(adreno_gpu))
1161 		mask = 0x7;
1162 
1163 	/* Clear the VBIF pipe before shutting down */
1164 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1165 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1166 				mask) == mask);
1167 
1168 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1169 
1170 	/*
1171 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1172 	 * entries
1173 	 */
1174 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1175 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1176 
1177 	return msm_gpu_pm_suspend(gpu);
1178 }
1179 
1180 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1181 {
1182 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1183 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1184 
1185 	return 0;
1186 }
1187 
1188 struct a5xx_crashdumper {
1189 	void *ptr;
1190 	struct drm_gem_object *bo;
1191 	u64 iova;
1192 };
1193 
1194 struct a5xx_gpu_state {
1195 	struct msm_gpu_state base;
1196 	u32 *hlsqregs;
1197 };
1198 
1199 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1200 		struct a5xx_crashdumper *dumper)
1201 {
1202 	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1203 		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1204 		&dumper->bo, &dumper->iova);
1205 
1206 	if (!IS_ERR(dumper->ptr))
1207 		msm_gem_object_set_name(dumper->bo, "crashdump");
1208 
1209 	return PTR_ERR_OR_ZERO(dumper->ptr);
1210 }
1211 
1212 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1213 		struct a5xx_crashdumper *dumper)
1214 {
1215 	u32 val;
1216 
1217 	if (IS_ERR_OR_NULL(dumper->ptr))
1218 		return -EINVAL;
1219 
1220 	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1221 		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1222 
1223 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1224 
1225 	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1226 		val & 0x04, 100, 10000);
1227 }
1228 
1229 /*
1230  * These are a list of the registers that need to be read through the HLSQ
1231  * aperture through the crashdumper.  These are not nominally accessible from
1232  * the CPU on a secure platform.
1233  */
1234 static const struct {
1235 	u32 type;
1236 	u32 regoffset;
1237 	u32 count;
1238 } a5xx_hlsq_aperture_regs[] = {
1239 	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1240 	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1241 	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1242 	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1243 	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1244 	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1245 	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1246 	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1247 	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1248 	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1249 	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1250 	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1251 	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1252 	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1253 	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1254 };
1255 
1256 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1257 		struct a5xx_gpu_state *a5xx_state)
1258 {
1259 	struct a5xx_crashdumper dumper = { 0 };
1260 	u32 offset, count = 0;
1261 	u64 *ptr;
1262 	int i;
1263 
1264 	if (a5xx_crashdumper_init(gpu, &dumper))
1265 		return;
1266 
1267 	/* The script will be written at offset 0 */
1268 	ptr = dumper.ptr;
1269 
1270 	/* Start writing the data at offset 256k */
1271 	offset = dumper.iova + (256 * SZ_1K);
1272 
1273 	/* Count how many additional registers to get from the HLSQ aperture */
1274 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1275 		count += a5xx_hlsq_aperture_regs[i].count;
1276 
1277 	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1278 	if (!a5xx_state->hlsqregs)
1279 		return;
1280 
1281 	/* Build the crashdump script */
1282 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1283 		u32 type = a5xx_hlsq_aperture_regs[i].type;
1284 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1285 
1286 		/* Write the register to select the desired bank */
1287 		*ptr++ = ((u64) type << 8);
1288 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1289 			(1 << 21) | 1;
1290 
1291 		*ptr++ = offset;
1292 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1293 			| c;
1294 
1295 		offset += c * sizeof(u32);
1296 	}
1297 
1298 	/* Write two zeros to close off the script */
1299 	*ptr++ = 0;
1300 	*ptr++ = 0;
1301 
1302 	if (a5xx_crashdumper_run(gpu, &dumper)) {
1303 		kfree(a5xx_state->hlsqregs);
1304 		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1305 		return;
1306 	}
1307 
1308 	/* Copy the data from the crashdumper to the state */
1309 	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1310 		count * sizeof(u32));
1311 
1312 	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1313 }
1314 
1315 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1316 {
1317 	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1318 			GFP_KERNEL);
1319 
1320 	if (!a5xx_state)
1321 		return ERR_PTR(-ENOMEM);
1322 
1323 	/* Temporarily disable hardware clock gating before reading the hw */
1324 	a5xx_set_hwcg(gpu, false);
1325 
1326 	/* First get the generic state from the adreno core */
1327 	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1328 
1329 	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1330 
1331 	/* Get the HLSQ regs with the help of the crashdumper */
1332 	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1333 
1334 	a5xx_set_hwcg(gpu, true);
1335 
1336 	return &a5xx_state->base;
1337 }
1338 
1339 static void a5xx_gpu_state_destroy(struct kref *kref)
1340 {
1341 	struct msm_gpu_state *state = container_of(kref,
1342 		struct msm_gpu_state, ref);
1343 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1344 		struct a5xx_gpu_state, base);
1345 
1346 	kfree(a5xx_state->hlsqregs);
1347 
1348 	adreno_gpu_state_destroy(state);
1349 	kfree(a5xx_state);
1350 }
1351 
1352 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1353 {
1354 	if (IS_ERR_OR_NULL(state))
1355 		return 1;
1356 
1357 	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1358 }
1359 
1360 
1361 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1362 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1363 		      struct drm_printer *p)
1364 {
1365 	int i, j;
1366 	u32 pos = 0;
1367 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1368 		struct a5xx_gpu_state, base);
1369 
1370 	if (IS_ERR_OR_NULL(state))
1371 		return;
1372 
1373 	adreno_show(gpu, state, p);
1374 
1375 	/* Dump the additional a5xx HLSQ registers */
1376 	if (!a5xx_state->hlsqregs)
1377 		return;
1378 
1379 	drm_printf(p, "registers-hlsq:\n");
1380 
1381 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1382 		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1383 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1384 
1385 		for (j = 0; j < c; j++, pos++, o++) {
1386 			/*
1387 			 * To keep the crashdump simple we pull the entire range
1388 			 * for each register type but not all of the registers
1389 			 * in the range are valid. Fortunately invalid registers
1390 			 * stick out like a sore thumb with a value of
1391 			 * 0xdeadbeef
1392 			 */
1393 			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1394 				continue;
1395 
1396 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1397 				o << 2, a5xx_state->hlsqregs[pos]);
1398 		}
1399 	}
1400 }
1401 #endif
1402 
1403 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1404 {
1405 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1406 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1407 
1408 	return a5xx_gpu->cur_ring;
1409 }
1410 
1411 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1412 {
1413 	u64 busy_cycles, busy_time;
1414 
1415 	/* Only read the gpu busy if the hardware is already active */
1416 	if (pm_runtime_get_if_in_use(&gpu->pdev->dev) == 0)
1417 		return 0;
1418 
1419 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1420 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1421 
1422 	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1423 	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1424 
1425 	gpu->devfreq.busy_cycles = busy_cycles;
1426 
1427 	pm_runtime_put(&gpu->pdev->dev);
1428 
1429 	if (WARN_ON(busy_time > ~0LU))
1430 		return ~0LU;
1431 
1432 	return (unsigned long)busy_time;
1433 }
1434 
1435 static const struct adreno_gpu_funcs funcs = {
1436 	.base = {
1437 		.get_param = adreno_get_param,
1438 		.hw_init = a5xx_hw_init,
1439 		.pm_suspend = a5xx_pm_suspend,
1440 		.pm_resume = a5xx_pm_resume,
1441 		.recover = a5xx_recover,
1442 		.submit = a5xx_submit,
1443 		.flush = a5xx_flush,
1444 		.active_ring = a5xx_active_ring,
1445 		.irq = a5xx_irq,
1446 		.destroy = a5xx_destroy,
1447 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1448 		.show = a5xx_show,
1449 #endif
1450 #if defined(CONFIG_DEBUG_FS)
1451 		.debugfs_init = a5xx_debugfs_init,
1452 #endif
1453 		.gpu_busy = a5xx_gpu_busy,
1454 		.gpu_state_get = a5xx_gpu_state_get,
1455 		.gpu_state_put = a5xx_gpu_state_put,
1456 		.create_address_space = adreno_iommu_create_address_space,
1457 	},
1458 	.get_timestamp = a5xx_get_timestamp,
1459 };
1460 
1461 static void check_speed_bin(struct device *dev)
1462 {
1463 	struct nvmem_cell *cell;
1464 	u32 val;
1465 
1466 	/*
1467 	 * If the OPP table specifies a opp-supported-hw property then we have
1468 	 * to set something with dev_pm_opp_set_supported_hw() or the table
1469 	 * doesn't get populated so pick an arbitrary value that should
1470 	 * ensure the default frequencies are selected but not conflict with any
1471 	 * actual bins
1472 	 */
1473 	val = 0x80;
1474 
1475 	cell = nvmem_cell_get(dev, "speed_bin");
1476 
1477 	if (!IS_ERR(cell)) {
1478 		void *buf = nvmem_cell_read(cell, NULL);
1479 
1480 		if (!IS_ERR(buf)) {
1481 			u8 bin = *((u8 *) buf);
1482 
1483 			val = (1 << bin);
1484 			kfree(buf);
1485 		}
1486 
1487 		nvmem_cell_put(cell);
1488 	}
1489 
1490 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1491 }
1492 
1493 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1494 {
1495 	struct msm_drm_private *priv = dev->dev_private;
1496 	struct platform_device *pdev = priv->gpu_pdev;
1497 	struct a5xx_gpu *a5xx_gpu = NULL;
1498 	struct adreno_gpu *adreno_gpu;
1499 	struct msm_gpu *gpu;
1500 	int ret;
1501 
1502 	if (!pdev) {
1503 		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1504 		return ERR_PTR(-ENXIO);
1505 	}
1506 
1507 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1508 	if (!a5xx_gpu)
1509 		return ERR_PTR(-ENOMEM);
1510 
1511 	adreno_gpu = &a5xx_gpu->base;
1512 	gpu = &adreno_gpu->base;
1513 
1514 	adreno_gpu->registers = a5xx_registers;
1515 	adreno_gpu->reg_offsets = a5xx_register_offsets;
1516 
1517 	a5xx_gpu->lm_leakage = 0x4E001A;
1518 
1519 	check_speed_bin(&pdev->dev);
1520 
1521 	/* Restricting nr_rings to 1 to temporarily disable preemption */
1522 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
1523 	if (ret) {
1524 		a5xx_destroy(&(a5xx_gpu->base.base));
1525 		return ERR_PTR(ret);
1526 	}
1527 
1528 	if (gpu->aspace)
1529 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1530 
1531 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1532 	a5xx_preempt_init(gpu);
1533 
1534 	return gpu;
1535 }
1536