xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision b9890054)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4 
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15 
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18 
19 #define GPU_PAS_ID 13
20 
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25 	uint32_t wptr;
26 	unsigned long flags;
27 
28 	spin_lock_irqsave(&ring->lock, flags);
29 
30 	/* Copy the shadow to the actual register */
31 	ring->cur = ring->next;
32 
33 	/* Make sure to wrap wptr if we need to */
34 	wptr = get_wptr(ring);
35 
36 	spin_unlock_irqrestore(&ring->lock, flags);
37 
38 	/* Make sure everything is posted before making a decision */
39 	mb();
40 
41 	/* Update HW if this is the current ring and we are not in preempt */
42 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44 }
45 
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 	struct msm_file_private *ctx)
48 {
49 	struct msm_drm_private *priv = gpu->dev->dev_private;
50 	struct msm_ringbuffer *ring = submit->ring;
51 	struct msm_gem_object *obj;
52 	uint32_t *ptr, dwords;
53 	unsigned int i;
54 
55 	for (i = 0; i < submit->nr_cmds; i++) {
56 		switch (submit->cmd[i].type) {
57 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58 			break;
59 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 			if (priv->lastctx == ctx)
61 				break;
62 			/* fall-thru */
63 		case MSM_SUBMIT_CMD_BUF:
64 			/* copy commands into RB: */
65 			obj = submit->bos[submit->cmd[i].idx].obj;
66 			dwords = submit->cmd[i].size;
67 
68 			ptr = msm_gem_get_vaddr(&obj->base);
69 
70 			/* _get_vaddr() shouldn't fail at this point,
71 			 * since we've already mapped it once in
72 			 * submit_reloc()
73 			 */
74 			if (WARN_ON(!ptr))
75 				return;
76 
77 			for (i = 0; i < dwords; i++) {
78 				/* normally the OUT_PKTn() would wait
79 				 * for space for the packet.  But since
80 				 * we just OUT_RING() the whole thing,
81 				 * need to call adreno_wait_ring()
82 				 * ourself:
83 				 */
84 				adreno_wait_ring(ring, 1);
85 				OUT_RING(ring, ptr[i]);
86 			}
87 
88 			msm_gem_put_vaddr(&obj->base);
89 
90 			break;
91 		}
92 	}
93 
94 	a5xx_flush(gpu, ring);
95 	a5xx_preempt_trigger(gpu);
96 
97 	/* we might not necessarily have a cmd from userspace to
98 	 * trigger an event to know that submit has completed, so
99 	 * do this manually:
100 	 */
101 	a5xx_idle(gpu, ring);
102 	ring->memptrs->fence = submit->seqno;
103 	msm_gpu_retire(gpu);
104 }
105 
106 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
107 	struct msm_file_private *ctx)
108 {
109 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
110 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
111 	struct msm_drm_private *priv = gpu->dev->dev_private;
112 	struct msm_ringbuffer *ring = submit->ring;
113 	unsigned int i, ibs = 0;
114 
115 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
116 		priv->lastctx = NULL;
117 		a5xx_submit_in_rb(gpu, submit, ctx);
118 		return;
119 	}
120 
121 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
122 	OUT_RING(ring, 0x02);
123 
124 	/* Turn off protected mode to write to special registers */
125 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
126 	OUT_RING(ring, 0);
127 
128 	/* Set the save preemption record for the ring/command */
129 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
130 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
132 
133 	/* Turn back on protected mode */
134 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
135 	OUT_RING(ring, 1);
136 
137 	/* Enable local preemption for finegrain preemption */
138 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
139 	OUT_RING(ring, 0x02);
140 
141 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
142 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
143 	OUT_RING(ring, 0x02);
144 
145 	/* Submit the commands */
146 	for (i = 0; i < submit->nr_cmds; i++) {
147 		switch (submit->cmd[i].type) {
148 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
149 			break;
150 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
151 			if (priv->lastctx == ctx)
152 				break;
153 			/* fall-thru */
154 		case MSM_SUBMIT_CMD_BUF:
155 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
156 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
157 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
158 			OUT_RING(ring, submit->cmd[i].size);
159 			ibs++;
160 			break;
161 		}
162 	}
163 
164 	/*
165 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
166 	 * are done rendering - otherwise a lucky preemption would start
167 	 * replaying from the last checkpoint
168 	 */
169 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
170 	OUT_RING(ring, 0);
171 	OUT_RING(ring, 0);
172 	OUT_RING(ring, 0);
173 	OUT_RING(ring, 0);
174 	OUT_RING(ring, 0);
175 
176 	/* Turn off IB level preemptions */
177 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
178 	OUT_RING(ring, 0x01);
179 
180 	/* Write the fence to the scratch register */
181 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
182 	OUT_RING(ring, submit->seqno);
183 
184 	/*
185 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
186 	 * timestamp is written to the memory and then triggers the interrupt
187 	 */
188 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
189 	OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
190 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
191 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
192 	OUT_RING(ring, submit->seqno);
193 
194 	/* Yield the floor on command completion */
195 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
196 	/*
197 	 * If dword[2:1] are non zero, they specify an address for the CP to
198 	 * write the value of dword[3] to on preemption complete. Write 0 to
199 	 * skip the write
200 	 */
201 	OUT_RING(ring, 0x00);
202 	OUT_RING(ring, 0x00);
203 	/* Data value - not used if the address above is 0 */
204 	OUT_RING(ring, 0x01);
205 	/* Set bit 0 to trigger an interrupt on preempt complete */
206 	OUT_RING(ring, 0x01);
207 
208 	a5xx_flush(gpu, ring);
209 
210 	/* Check to see if we need to start preemption */
211 	a5xx_preempt_trigger(gpu);
212 }
213 
214 static const struct {
215 	u32 offset;
216 	u32 value;
217 } a5xx_hwcg[] = {
218 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
219 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
220 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
221 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
222 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
223 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
224 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
225 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
226 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
227 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
228 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
229 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
230 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
231 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
232 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
233 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
234 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
235 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
236 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
237 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
238 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
239 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
240 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
241 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
242 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
243 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
244 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
245 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
246 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
247 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
248 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
249 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
250 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
251 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
252 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
253 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
254 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
255 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
256 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
257 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
258 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
259 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
260 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
261 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
262 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
263 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
264 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
265 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
266 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
267 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
268 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
269 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
270 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
271 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
272 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
273 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
274 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
275 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
276 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
277 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
278 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
279 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
280 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
281 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
282 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
283 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
284 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
285 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
286 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
287 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
288 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
289 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
290 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
291 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
292 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
293 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
294 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
295 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
296 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
297 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
298 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
299 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
300 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
301 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
302 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
303 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
304 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
305 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
306 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
307 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
308 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
309 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
310 };
311 
312 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
313 {
314 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
315 	unsigned int i;
316 
317 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
318 		gpu_write(gpu, a5xx_hwcg[i].offset,
319 			state ? a5xx_hwcg[i].value : 0);
320 
321 	if (adreno_is_a540(adreno_gpu)) {
322 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
323 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_HYST_GPMU, state ? 0x00000004 : 0);
324 	}
325 
326 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
327 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
328 }
329 
330 static int a5xx_me_init(struct msm_gpu *gpu)
331 {
332 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
333 	struct msm_ringbuffer *ring = gpu->rb[0];
334 
335 	OUT_PKT7(ring, CP_ME_INIT, 8);
336 
337 	OUT_RING(ring, 0x0000002F);
338 
339 	/* Enable multiple hardware contexts */
340 	OUT_RING(ring, 0x00000003);
341 
342 	/* Enable error detection */
343 	OUT_RING(ring, 0x20000000);
344 
345 	/* Don't enable header dump */
346 	OUT_RING(ring, 0x00000000);
347 	OUT_RING(ring, 0x00000000);
348 
349 	/* Specify workarounds for various microcode issues */
350 	if (adreno_is_a530(adreno_gpu)) {
351 		/* Workaround for token end syncs
352 		 * Force a WFI after every direct-render 3D mode draw and every
353 		 * 2D mode 3 draw
354 		 */
355 		OUT_RING(ring, 0x0000000B);
356 	} else if (adreno_is_a510(adreno_gpu)) {
357 		/* Workaround for token and syncs */
358 		OUT_RING(ring, 0x00000001);
359 	} else {
360 		/* No workarounds enabled */
361 		OUT_RING(ring, 0x00000000);
362 	}
363 
364 	OUT_RING(ring, 0x00000000);
365 	OUT_RING(ring, 0x00000000);
366 
367 	gpu->funcs->flush(gpu, ring);
368 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
369 }
370 
371 static int a5xx_preempt_start(struct msm_gpu *gpu)
372 {
373 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
374 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
375 	struct msm_ringbuffer *ring = gpu->rb[0];
376 
377 	if (gpu->nr_rings == 1)
378 		return 0;
379 
380 	/* Turn off protected mode to write to special registers */
381 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
382 	OUT_RING(ring, 0);
383 
384 	/* Set the save preemption record for the ring/command */
385 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
386 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
387 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
388 
389 	/* Turn back on protected mode */
390 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
391 	OUT_RING(ring, 1);
392 
393 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
394 	OUT_RING(ring, 0x00);
395 
396 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
397 	OUT_RING(ring, 0x01);
398 
399 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
400 	OUT_RING(ring, 0x01);
401 
402 	/* Yield the floor on command completion */
403 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
404 	OUT_RING(ring, 0x00);
405 	OUT_RING(ring, 0x00);
406 	OUT_RING(ring, 0x01);
407 	OUT_RING(ring, 0x01);
408 
409 	gpu->funcs->flush(gpu, ring);
410 
411 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
412 }
413 
414 static int a5xx_ucode_init(struct msm_gpu *gpu)
415 {
416 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
417 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
418 	int ret;
419 
420 	if (!a5xx_gpu->pm4_bo) {
421 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
422 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
423 
424 
425 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
426 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
427 			a5xx_gpu->pm4_bo = NULL;
428 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
429 				ret);
430 			return ret;
431 		}
432 
433 		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
434 	}
435 
436 	if (!a5xx_gpu->pfp_bo) {
437 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
438 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
439 
440 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
441 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
442 			a5xx_gpu->pfp_bo = NULL;
443 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
444 				ret);
445 			return ret;
446 		}
447 
448 		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
449 	}
450 
451 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
452 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
453 
454 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
455 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
456 
457 	return 0;
458 }
459 
460 #define SCM_GPU_ZAP_SHADER_RESUME 0
461 
462 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
463 {
464 	int ret;
465 
466 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
467 	if (ret)
468 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
469 			gpu->name, ret);
470 
471 	return ret;
472 }
473 
474 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
475 {
476 	static bool loaded;
477 	int ret;
478 
479 	/*
480 	 * If the zap shader is already loaded into memory we just need to kick
481 	 * the remote processor to reinitialize it
482 	 */
483 	if (loaded)
484 		return a5xx_zap_shader_resume(gpu);
485 
486 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
487 
488 	loaded = !ret;
489 	return ret;
490 }
491 
492 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
493 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
494 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
495 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
496 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
497 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
498 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
499 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
500 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
501 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
502 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
503 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
504 
505 static int a5xx_hw_init(struct msm_gpu *gpu)
506 {
507 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
508 	int ret;
509 
510 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
511 
512 	if (adreno_is_a540(adreno_gpu))
513 		gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
514 
515 	/* Make all blocks contribute to the GPU BUSY perf counter */
516 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
517 
518 	/* Enable RBBM error reporting bits */
519 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
520 
521 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
522 		/*
523 		 * Mask out the activity signals from RB1-3 to avoid false
524 		 * positives
525 		 */
526 
527 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
528 			0xF0000000);
529 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
530 			0xFFFFFFFF);
531 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
532 			0xFFFFFFFF);
533 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
534 			0xFFFFFFFF);
535 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
536 			0xFFFFFFFF);
537 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
538 			0xFFFFFFFF);
539 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
540 			0xFFFFFFFF);
541 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
542 			0xFFFFFFFF);
543 	}
544 
545 	/* Enable fault detection */
546 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
547 		(1 << 30) | 0xFFFF);
548 
549 	/* Turn on performance counters */
550 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
551 
552 	/* Select CP0 to always count cycles */
553 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
554 
555 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
556 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
557 
558 	/* Increase VFD cache access so LRZ and other data gets evicted less */
559 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
560 
561 	/* Disable L2 bypass in the UCHE */
562 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
563 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
564 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
565 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
566 
567 	/* Set the GMEM VA range (0 to gpu->gmem) */
568 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
569 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
570 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
571 		0x00100000 + adreno_gpu->gmem - 1);
572 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
573 
574 	if (adreno_is_a510(adreno_gpu)) {
575 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
576 		gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
577 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
578 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
579 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
580 			  (0x200 << 11 | 0x200 << 22));
581 	} else {
582 		gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
583 		if (adreno_is_a530(adreno_gpu))
584 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
585 		if (adreno_is_a540(adreno_gpu))
586 			gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
587 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
588 		gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
589 		gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
590 			  (0x400 << 11 | 0x300 << 22));
591 	}
592 
593 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
594 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
595 
596 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
597 
598 	/* Enable USE_RETENTION_FLOPS */
599 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
600 
601 	/* Enable ME/PFP split notification */
602 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
603 
604 	/*
605 	 *  In A5x, CCU can send context_done event of a particular context to
606 	 *  UCHE which ultimately reaches CP even when there is valid
607 	 *  transaction of that context inside CCU. This can let CP to program
608 	 *  config registers, which will make the "valid transaction" inside
609 	 *  CCU to be interpreted differently. This can cause gpu fault. This
610 	 *  bug is fixed in latest A510 revision. To enable this bug fix -
611 	 *  bit[11] of RB_DBG_ECO_CNTL need to be set to 0, default is 1
612 	 *  (disable). For older A510 version this bit is unused.
613 	 */
614 	if (adreno_is_a510(adreno_gpu))
615 		gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, (1 << 11), 0);
616 
617 	/* Enable HWCG */
618 	a5xx_set_hwcg(gpu, true);
619 
620 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
621 
622 	/* Set the highest bank bit */
623 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
624 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
625 	if (adreno_is_a540(adreno_gpu))
626 		gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
627 
628 	/* Protect registers from the CP */
629 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
630 
631 	/* RBBM */
632 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
633 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
634 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
635 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
636 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
637 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
638 
639 	/* Content protect */
640 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
641 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
642 			16));
643 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
644 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
645 
646 	/* CP */
647 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
648 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
649 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
650 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
651 
652 	/* RB */
653 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
654 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
655 
656 	/* VPC */
657 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
658 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
659 
660 	/* UCHE */
661 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
662 
663 	if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
664 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
665 			ADRENO_PROTECT_RW(0x10000, 0x8000));
666 
667 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
668 	/*
669 	 * Disable the trusted memory range - we don't actually supported secure
670 	 * memory rendering at this point in time and we don't want to block off
671 	 * part of the virtual memory space.
672 	 */
673 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
674 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
675 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
676 
677 	/* Put the GPU into 64 bit by default */
678 	gpu_write(gpu, REG_A5XX_CP_ADDR_MODE_CNTL, 0x1);
679 	gpu_write(gpu, REG_A5XX_VSC_ADDR_MODE_CNTL, 0x1);
680 	gpu_write(gpu, REG_A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
681 	gpu_write(gpu, REG_A5XX_RB_ADDR_MODE_CNTL, 0x1);
682 	gpu_write(gpu, REG_A5XX_PC_ADDR_MODE_CNTL, 0x1);
683 	gpu_write(gpu, REG_A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
684 	gpu_write(gpu, REG_A5XX_VFD_ADDR_MODE_CNTL, 0x1);
685 	gpu_write(gpu, REG_A5XX_VPC_ADDR_MODE_CNTL, 0x1);
686 	gpu_write(gpu, REG_A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
687 	gpu_write(gpu, REG_A5XX_SP_ADDR_MODE_CNTL, 0x1);
688 	gpu_write(gpu, REG_A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
689 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
690 
691 	/*
692 	 * VPC corner case with local memory load kill leads to corrupt
693 	 * internal state. Normal Disable does not work for all a5x chips.
694 	 * So do the following setting to disable it.
695 	 */
696 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_LMLOADKILL_DISABLE) {
697 		gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, BIT(23));
698 		gpu_rmw(gpu, REG_A5XX_HLSQ_DBG_ECO_CNTL, BIT(18), 0);
699 	}
700 
701 	ret = adreno_hw_init(gpu);
702 	if (ret)
703 		return ret;
704 
705 	a5xx_preempt_hw_init(gpu);
706 
707 	if (!adreno_is_a510(adreno_gpu))
708 		a5xx_gpmu_ucode_init(gpu);
709 
710 	ret = a5xx_ucode_init(gpu);
711 	if (ret)
712 		return ret;
713 
714 	/* Disable the interrupts through the initial bringup stage */
715 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
716 
717 	/* Clear ME_HALT to start the micro engine */
718 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
719 	ret = a5xx_me_init(gpu);
720 	if (ret)
721 		return ret;
722 
723 	ret = a5xx_power_init(gpu);
724 	if (ret)
725 		return ret;
726 
727 	/*
728 	 * Send a pipeline event stat to get misbehaving counters to start
729 	 * ticking correctly
730 	 */
731 	if (adreno_is_a530(adreno_gpu)) {
732 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
733 		OUT_RING(gpu->rb[0], 0x0F);
734 
735 		gpu->funcs->flush(gpu, gpu->rb[0]);
736 		if (!a5xx_idle(gpu, gpu->rb[0]))
737 			return -EINVAL;
738 	}
739 
740 	/*
741 	 * If the chip that we are using does support loading one, then
742 	 * try to load a zap shader into the secure world. If successful
743 	 * we can use the CP to switch out of secure mode. If not then we
744 	 * have no resource but to try to switch ourselves out manually. If we
745 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
746 	 * be blocked and a permissions violation will soon follow.
747 	 */
748 	ret = a5xx_zap_shader_init(gpu);
749 	if (!ret) {
750 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
751 		OUT_RING(gpu->rb[0], 0x00000000);
752 
753 		gpu->funcs->flush(gpu, gpu->rb[0]);
754 		if (!a5xx_idle(gpu, gpu->rb[0]))
755 			return -EINVAL;
756 	} else {
757 		/* Print a warning so if we die, we know why */
758 		dev_warn_once(gpu->dev->dev,
759 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
760 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
761 	}
762 
763 	/* Last step - yield the ringbuffer */
764 	a5xx_preempt_start(gpu);
765 
766 	return 0;
767 }
768 
769 static void a5xx_recover(struct msm_gpu *gpu)
770 {
771 	int i;
772 
773 	adreno_dump_info(gpu);
774 
775 	for (i = 0; i < 8; i++) {
776 		printk("CP_SCRATCH_REG%d: %u\n", i,
777 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
778 	}
779 
780 	if (hang_debug)
781 		a5xx_dump(gpu);
782 
783 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
784 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
785 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
786 	adreno_recover(gpu);
787 }
788 
789 static void a5xx_destroy(struct msm_gpu *gpu)
790 {
791 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
792 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
793 
794 	DBG("%s", gpu->name);
795 
796 	a5xx_preempt_fini(gpu);
797 
798 	if (a5xx_gpu->pm4_bo) {
799 		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
800 		drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
801 	}
802 
803 	if (a5xx_gpu->pfp_bo) {
804 		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
805 		drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
806 	}
807 
808 	if (a5xx_gpu->gpmu_bo) {
809 		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
810 		drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
811 	}
812 
813 	adreno_gpu_cleanup(adreno_gpu);
814 	kfree(a5xx_gpu);
815 }
816 
817 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
818 {
819 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
820 		return false;
821 
822 	/*
823 	 * Nearly every abnormality ends up pausing the GPU and triggering a
824 	 * fault so we can safely just watch for this one interrupt to fire
825 	 */
826 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
827 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
828 }
829 
830 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
831 {
832 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
833 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
834 
835 	if (ring != a5xx_gpu->cur_ring) {
836 		WARN(1, "Tried to idle a non-current ringbuffer\n");
837 		return false;
838 	}
839 
840 	/* wait for CP to drain ringbuffer: */
841 	if (!adreno_idle(gpu, ring))
842 		return false;
843 
844 	if (spin_until(_a5xx_check_idle(gpu))) {
845 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
846 			gpu->name, __builtin_return_address(0),
847 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
848 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
849 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
850 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
851 		return false;
852 	}
853 
854 	return true;
855 }
856 
857 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
858 {
859 	struct msm_gpu *gpu = arg;
860 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
861 			iova, flags,
862 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
863 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
864 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
865 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
866 
867 	return -EFAULT;
868 }
869 
870 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
871 {
872 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
873 
874 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
875 		u32 val;
876 
877 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
878 
879 		/*
880 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
881 		 * read it twice
882 		 */
883 
884 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
885 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
886 
887 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
888 			val);
889 	}
890 
891 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
892 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
893 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
894 
895 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
896 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
897 
898 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
899 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
900 
901 		dev_err_ratelimited(gpu->dev->dev,
902 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
903 			val & (1 << 24) ? "WRITE" : "READ",
904 			(val & 0xFFFFF) >> 2, val);
905 	}
906 
907 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
908 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
909 		const char *access[16] = { "reserved", "reserved",
910 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
911 			"", "", "me read", "me write", "", "", "crashdump read",
912 			"crashdump write" };
913 
914 		dev_err_ratelimited(gpu->dev->dev,
915 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
916 			status & 0xFFFFF, access[(status >> 24) & 0xF],
917 			(status & (1 << 31)), status);
918 	}
919 }
920 
921 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
922 {
923 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
924 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
925 
926 		dev_err_ratelimited(gpu->dev->dev,
927 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
928 			val & (1 << 28) ? "WRITE" : "READ",
929 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
930 			(val >> 24) & 0xF);
931 
932 		/* Clear the error */
933 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
934 
935 		/* Clear the interrupt */
936 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
937 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
938 	}
939 
940 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
941 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
942 
943 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
944 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
945 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
946 
947 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
948 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
949 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
950 
951 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
952 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
953 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
954 
955 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
956 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
957 
958 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
959 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
960 }
961 
962 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
963 {
964 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
965 
966 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
967 
968 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
969 		addr);
970 }
971 
972 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
973 {
974 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
975 }
976 
977 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
978 {
979 	struct drm_device *dev = gpu->dev;
980 	struct msm_drm_private *priv = dev->dev_private;
981 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
982 
983 	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
984 		ring ? ring->id : -1, ring ? ring->seqno : 0,
985 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
986 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
987 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
988 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
989 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
990 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
991 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
992 
993 	/* Turn off the hangcheck timer to keep it from bothering us */
994 	del_timer(&gpu->hangcheck_timer);
995 
996 	queue_work(priv->wq, &gpu->recover_work);
997 }
998 
999 #define RBBM_ERROR_MASK \
1000 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1001 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1002 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1003 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1004 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1005 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1006 
1007 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1008 {
1009 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1010 
1011 	/*
1012 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1013 	 * before the source is cleared the interrupt will storm.
1014 	 */
1015 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1016 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1017 
1018 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1019 	if (status & RBBM_ERROR_MASK)
1020 		a5xx_rbbm_err_irq(gpu, status);
1021 
1022 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1023 		a5xx_cp_err_irq(gpu);
1024 
1025 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1026 		a5xx_fault_detect_irq(gpu);
1027 
1028 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1029 		a5xx_uche_err_irq(gpu);
1030 
1031 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1032 		a5xx_gpmu_err_irq(gpu);
1033 
1034 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1035 		a5xx_preempt_trigger(gpu);
1036 		msm_gpu_retire(gpu);
1037 	}
1038 
1039 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1040 		a5xx_preempt_irq(gpu);
1041 
1042 	return IRQ_HANDLED;
1043 }
1044 
1045 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1046 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1047 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1048 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1049 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1050 		REG_A5XX_CP_RB_RPTR_ADDR_HI),
1051 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1052 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1053 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1054 };
1055 
1056 static const u32 a5xx_registers[] = {
1057 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1058 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1059 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1060 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1061 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1062 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1063 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1064 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1065 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1066 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1067 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1068 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1069 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1070 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1071 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1072 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1073 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1074 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1075 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1076 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1077 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1078 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1079 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1080 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1081 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1082 	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1083 	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1084 	0xAC60, 0xAC60, ~0,
1085 };
1086 
1087 static void a5xx_dump(struct msm_gpu *gpu)
1088 {
1089 	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1090 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1091 	adreno_dump(gpu);
1092 }
1093 
1094 static int a5xx_pm_resume(struct msm_gpu *gpu)
1095 {
1096 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1097 	int ret;
1098 
1099 	/* Turn on the core power */
1100 	ret = msm_gpu_pm_resume(gpu);
1101 	if (ret)
1102 		return ret;
1103 
1104 	if (adreno_is_a510(adreno_gpu)) {
1105 		/* Halt the sp_input_clk at HM level */
1106 		gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
1107 		a5xx_set_hwcg(gpu, true);
1108 		/* Turn on sp_input_clk at HM level */
1109 		gpu_rmw(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0xff, 0);
1110 		return 0;
1111 	}
1112 
1113 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1114 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1115 
1116 	/* Wait 3 usecs before polling */
1117 	udelay(3);
1118 
1119 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1120 		(1 << 20), (1 << 20));
1121 	if (ret) {
1122 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1123 			gpu->name,
1124 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1125 		return ret;
1126 	}
1127 
1128 	/* Turn on the SP domain */
1129 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1130 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1131 		(1 << 20), (1 << 20));
1132 	if (ret)
1133 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1134 			gpu->name);
1135 
1136 	return ret;
1137 }
1138 
1139 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1140 {
1141 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1142 	u32 mask = 0xf;
1143 
1144 	/* A510 has 3 XIN ports in VBIF */
1145 	if (adreno_is_a510(adreno_gpu))
1146 		mask = 0x7;
1147 
1148 	/* Clear the VBIF pipe before shutting down */
1149 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, mask);
1150 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) &
1151 				mask) == mask);
1152 
1153 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1154 
1155 	/*
1156 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1157 	 * entries
1158 	 */
1159 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1160 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1161 
1162 	return msm_gpu_pm_suspend(gpu);
1163 }
1164 
1165 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1166 {
1167 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1168 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1169 
1170 	return 0;
1171 }
1172 
1173 struct a5xx_crashdumper {
1174 	void *ptr;
1175 	struct drm_gem_object *bo;
1176 	u64 iova;
1177 };
1178 
1179 struct a5xx_gpu_state {
1180 	struct msm_gpu_state base;
1181 	u32 *hlsqregs;
1182 };
1183 
1184 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1185 		struct a5xx_crashdumper *dumper)
1186 {
1187 	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1188 		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1189 		&dumper->bo, &dumper->iova);
1190 
1191 	if (!IS_ERR(dumper->ptr))
1192 		msm_gem_object_set_name(dumper->bo, "crashdump");
1193 
1194 	return PTR_ERR_OR_ZERO(dumper->ptr);
1195 }
1196 
1197 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1198 		struct a5xx_crashdumper *dumper)
1199 {
1200 	u32 val;
1201 
1202 	if (IS_ERR_OR_NULL(dumper->ptr))
1203 		return -EINVAL;
1204 
1205 	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1206 		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1207 
1208 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1209 
1210 	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1211 		val & 0x04, 100, 10000);
1212 }
1213 
1214 /*
1215  * These are a list of the registers that need to be read through the HLSQ
1216  * aperture through the crashdumper.  These are not nominally accessible from
1217  * the CPU on a secure platform.
1218  */
1219 static const struct {
1220 	u32 type;
1221 	u32 regoffset;
1222 	u32 count;
1223 } a5xx_hlsq_aperture_regs[] = {
1224 	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1225 	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1226 	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1227 	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1228 	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1229 	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1230 	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1231 	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1232 	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1233 	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1234 	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1235 	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1236 	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1237 	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1238 	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1239 };
1240 
1241 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1242 		struct a5xx_gpu_state *a5xx_state)
1243 {
1244 	struct a5xx_crashdumper dumper = { 0 };
1245 	u32 offset, count = 0;
1246 	u64 *ptr;
1247 	int i;
1248 
1249 	if (a5xx_crashdumper_init(gpu, &dumper))
1250 		return;
1251 
1252 	/* The script will be written at offset 0 */
1253 	ptr = dumper.ptr;
1254 
1255 	/* Start writing the data at offset 256k */
1256 	offset = dumper.iova + (256 * SZ_1K);
1257 
1258 	/* Count how many additional registers to get from the HLSQ aperture */
1259 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1260 		count += a5xx_hlsq_aperture_regs[i].count;
1261 
1262 	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1263 	if (!a5xx_state->hlsqregs)
1264 		return;
1265 
1266 	/* Build the crashdump script */
1267 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1268 		u32 type = a5xx_hlsq_aperture_regs[i].type;
1269 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1270 
1271 		/* Write the register to select the desired bank */
1272 		*ptr++ = ((u64) type << 8);
1273 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1274 			(1 << 21) | 1;
1275 
1276 		*ptr++ = offset;
1277 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1278 			| c;
1279 
1280 		offset += c * sizeof(u32);
1281 	}
1282 
1283 	/* Write two zeros to close off the script */
1284 	*ptr++ = 0;
1285 	*ptr++ = 0;
1286 
1287 	if (a5xx_crashdumper_run(gpu, &dumper)) {
1288 		kfree(a5xx_state->hlsqregs);
1289 		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1290 		return;
1291 	}
1292 
1293 	/* Copy the data from the crashdumper to the state */
1294 	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1295 		count * sizeof(u32));
1296 
1297 	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1298 }
1299 
1300 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1301 {
1302 	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1303 			GFP_KERNEL);
1304 
1305 	if (!a5xx_state)
1306 		return ERR_PTR(-ENOMEM);
1307 
1308 	/* Temporarily disable hardware clock gating before reading the hw */
1309 	a5xx_set_hwcg(gpu, false);
1310 
1311 	/* First get the generic state from the adreno core */
1312 	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1313 
1314 	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1315 
1316 	/* Get the HLSQ regs with the help of the crashdumper */
1317 	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1318 
1319 	a5xx_set_hwcg(gpu, true);
1320 
1321 	return &a5xx_state->base;
1322 }
1323 
1324 static void a5xx_gpu_state_destroy(struct kref *kref)
1325 {
1326 	struct msm_gpu_state *state = container_of(kref,
1327 		struct msm_gpu_state, ref);
1328 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1329 		struct a5xx_gpu_state, base);
1330 
1331 	kfree(a5xx_state->hlsqregs);
1332 
1333 	adreno_gpu_state_destroy(state);
1334 	kfree(a5xx_state);
1335 }
1336 
1337 static int a5xx_gpu_state_put(struct msm_gpu_state *state)
1338 {
1339 	if (IS_ERR_OR_NULL(state))
1340 		return 1;
1341 
1342 	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1343 }
1344 
1345 
1346 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1347 static void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1348 		      struct drm_printer *p)
1349 {
1350 	int i, j;
1351 	u32 pos = 0;
1352 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1353 		struct a5xx_gpu_state, base);
1354 
1355 	if (IS_ERR_OR_NULL(state))
1356 		return;
1357 
1358 	adreno_show(gpu, state, p);
1359 
1360 	/* Dump the additional a5xx HLSQ registers */
1361 	if (!a5xx_state->hlsqregs)
1362 		return;
1363 
1364 	drm_printf(p, "registers-hlsq:\n");
1365 
1366 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1367 		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1368 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1369 
1370 		for (j = 0; j < c; j++, pos++, o++) {
1371 			/*
1372 			 * To keep the crashdump simple we pull the entire range
1373 			 * for each register type but not all of the registers
1374 			 * in the range are valid. Fortunately invalid registers
1375 			 * stick out like a sore thumb with a value of
1376 			 * 0xdeadbeef
1377 			 */
1378 			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1379 				continue;
1380 
1381 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1382 				o << 2, a5xx_state->hlsqregs[pos]);
1383 		}
1384 	}
1385 }
1386 #endif
1387 
1388 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1389 {
1390 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1391 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1392 
1393 	return a5xx_gpu->cur_ring;
1394 }
1395 
1396 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1397 {
1398 	u64 busy_cycles, busy_time;
1399 
1400 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1401 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1402 
1403 	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1404 	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1405 
1406 	gpu->devfreq.busy_cycles = busy_cycles;
1407 
1408 	if (WARN_ON(busy_time > ~0LU))
1409 		return ~0LU;
1410 
1411 	return (unsigned long)busy_time;
1412 }
1413 
1414 static const struct adreno_gpu_funcs funcs = {
1415 	.base = {
1416 		.get_param = adreno_get_param,
1417 		.hw_init = a5xx_hw_init,
1418 		.pm_suspend = a5xx_pm_suspend,
1419 		.pm_resume = a5xx_pm_resume,
1420 		.recover = a5xx_recover,
1421 		.submit = a5xx_submit,
1422 		.flush = a5xx_flush,
1423 		.active_ring = a5xx_active_ring,
1424 		.irq = a5xx_irq,
1425 		.destroy = a5xx_destroy,
1426 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1427 		.show = a5xx_show,
1428 #endif
1429 #if defined(CONFIG_DEBUG_FS)
1430 		.debugfs_init = a5xx_debugfs_init,
1431 #endif
1432 		.gpu_busy = a5xx_gpu_busy,
1433 		.gpu_state_get = a5xx_gpu_state_get,
1434 		.gpu_state_put = a5xx_gpu_state_put,
1435 	},
1436 	.get_timestamp = a5xx_get_timestamp,
1437 };
1438 
1439 static void check_speed_bin(struct device *dev)
1440 {
1441 	struct nvmem_cell *cell;
1442 	u32 bin, val;
1443 
1444 	cell = nvmem_cell_get(dev, "speed_bin");
1445 
1446 	/* If a nvmem cell isn't defined, nothing to do */
1447 	if (IS_ERR(cell))
1448 		return;
1449 
1450 	bin = *((u32 *) nvmem_cell_read(cell, NULL));
1451 	nvmem_cell_put(cell);
1452 
1453 	val = (1 << bin);
1454 
1455 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1456 }
1457 
1458 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1459 {
1460 	struct msm_drm_private *priv = dev->dev_private;
1461 	struct platform_device *pdev = priv->gpu_pdev;
1462 	struct a5xx_gpu *a5xx_gpu = NULL;
1463 	struct adreno_gpu *adreno_gpu;
1464 	struct msm_gpu *gpu;
1465 	int ret;
1466 
1467 	if (!pdev) {
1468 		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1469 		return ERR_PTR(-ENXIO);
1470 	}
1471 
1472 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1473 	if (!a5xx_gpu)
1474 		return ERR_PTR(-ENOMEM);
1475 
1476 	adreno_gpu = &a5xx_gpu->base;
1477 	gpu = &adreno_gpu->base;
1478 
1479 	adreno_gpu->registers = a5xx_registers;
1480 	adreno_gpu->reg_offsets = a5xx_register_offsets;
1481 
1482 	a5xx_gpu->lm_leakage = 0x4E001A;
1483 
1484 	check_speed_bin(&pdev->dev);
1485 
1486 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1487 	if (ret) {
1488 		a5xx_destroy(&(a5xx_gpu->base.base));
1489 		return ERR_PTR(ret);
1490 	}
1491 
1492 	if (gpu->aspace)
1493 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1494 
1495 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1496 	a5xx_preempt_init(gpu);
1497 
1498 	return gpu;
1499 }
1500