xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision 22d55f02)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
3  */
4 
5 #include <linux/kernel.h>
6 #include <linux/types.h>
7 #include <linux/cpumask.h>
8 #include <linux/qcom_scm.h>
9 #include <linux/pm_opp.h>
10 #include <linux/nvmem-consumer.h>
11 #include <linux/slab.h>
12 #include "msm_gem.h"
13 #include "msm_mmu.h"
14 #include "a5xx_gpu.h"
15 
16 extern bool hang_debug;
17 static void a5xx_dump(struct msm_gpu *gpu);
18 
19 #define GPU_PAS_ID 13
20 
21 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
22 {
23 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
24 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
25 	uint32_t wptr;
26 	unsigned long flags;
27 
28 	spin_lock_irqsave(&ring->lock, flags);
29 
30 	/* Copy the shadow to the actual register */
31 	ring->cur = ring->next;
32 
33 	/* Make sure to wrap wptr if we need to */
34 	wptr = get_wptr(ring);
35 
36 	spin_unlock_irqrestore(&ring->lock, flags);
37 
38 	/* Make sure everything is posted before making a decision */
39 	mb();
40 
41 	/* Update HW if this is the current ring and we are not in preempt */
42 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
43 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
44 }
45 
46 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
47 	struct msm_file_private *ctx)
48 {
49 	struct msm_drm_private *priv = gpu->dev->dev_private;
50 	struct msm_ringbuffer *ring = submit->ring;
51 	struct msm_gem_object *obj;
52 	uint32_t *ptr, dwords;
53 	unsigned int i;
54 
55 	for (i = 0; i < submit->nr_cmds; i++) {
56 		switch (submit->cmd[i].type) {
57 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
58 			break;
59 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
60 			if (priv->lastctx == ctx)
61 				break;
62 		case MSM_SUBMIT_CMD_BUF:
63 			/* copy commands into RB: */
64 			obj = submit->bos[submit->cmd[i].idx].obj;
65 			dwords = submit->cmd[i].size;
66 
67 			ptr = msm_gem_get_vaddr(&obj->base);
68 
69 			/* _get_vaddr() shouldn't fail at this point,
70 			 * since we've already mapped it once in
71 			 * submit_reloc()
72 			 */
73 			if (WARN_ON(!ptr))
74 				return;
75 
76 			for (i = 0; i < dwords; i++) {
77 				/* normally the OUT_PKTn() would wait
78 				 * for space for the packet.  But since
79 				 * we just OUT_RING() the whole thing,
80 				 * need to call adreno_wait_ring()
81 				 * ourself:
82 				 */
83 				adreno_wait_ring(ring, 1);
84 				OUT_RING(ring, ptr[i]);
85 			}
86 
87 			msm_gem_put_vaddr(&obj->base);
88 
89 			break;
90 		}
91 	}
92 
93 	a5xx_flush(gpu, ring);
94 	a5xx_preempt_trigger(gpu);
95 
96 	/* we might not necessarily have a cmd from userspace to
97 	 * trigger an event to know that submit has completed, so
98 	 * do this manually:
99 	 */
100 	a5xx_idle(gpu, ring);
101 	ring->memptrs->fence = submit->seqno;
102 	msm_gpu_retire(gpu);
103 }
104 
105 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
106 	struct msm_file_private *ctx)
107 {
108 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
109 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
110 	struct msm_drm_private *priv = gpu->dev->dev_private;
111 	struct msm_ringbuffer *ring = submit->ring;
112 	unsigned int i, ibs = 0;
113 
114 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
115 		priv->lastctx = NULL;
116 		a5xx_submit_in_rb(gpu, submit, ctx);
117 		return;
118 	}
119 
120 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
121 	OUT_RING(ring, 0x02);
122 
123 	/* Turn off protected mode to write to special registers */
124 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
125 	OUT_RING(ring, 0);
126 
127 	/* Set the save preemption record for the ring/command */
128 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
129 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
130 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
131 
132 	/* Turn back on protected mode */
133 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
134 	OUT_RING(ring, 1);
135 
136 	/* Enable local preemption for finegrain preemption */
137 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
138 	OUT_RING(ring, 0x02);
139 
140 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
141 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
142 	OUT_RING(ring, 0x02);
143 
144 	/* Submit the commands */
145 	for (i = 0; i < submit->nr_cmds; i++) {
146 		switch (submit->cmd[i].type) {
147 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
148 			break;
149 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
150 			if (priv->lastctx == ctx)
151 				break;
152 		case MSM_SUBMIT_CMD_BUF:
153 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
154 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
155 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
156 			OUT_RING(ring, submit->cmd[i].size);
157 			ibs++;
158 			break;
159 		}
160 	}
161 
162 	/*
163 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
164 	 * are done rendering - otherwise a lucky preemption would start
165 	 * replaying from the last checkpoint
166 	 */
167 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
168 	OUT_RING(ring, 0);
169 	OUT_RING(ring, 0);
170 	OUT_RING(ring, 0);
171 	OUT_RING(ring, 0);
172 	OUT_RING(ring, 0);
173 
174 	/* Turn off IB level preemptions */
175 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
176 	OUT_RING(ring, 0x01);
177 
178 	/* Write the fence to the scratch register */
179 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
180 	OUT_RING(ring, submit->seqno);
181 
182 	/*
183 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
184 	 * timestamp is written to the memory and then triggers the interrupt
185 	 */
186 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
187 	OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
188 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
189 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
190 	OUT_RING(ring, submit->seqno);
191 
192 	/* Yield the floor on command completion */
193 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
194 	/*
195 	 * If dword[2:1] are non zero, they specify an address for the CP to
196 	 * write the value of dword[3] to on preemption complete. Write 0 to
197 	 * skip the write
198 	 */
199 	OUT_RING(ring, 0x00);
200 	OUT_RING(ring, 0x00);
201 	/* Data value - not used if the address above is 0 */
202 	OUT_RING(ring, 0x01);
203 	/* Set bit 0 to trigger an interrupt on preempt complete */
204 	OUT_RING(ring, 0x01);
205 
206 	a5xx_flush(gpu, ring);
207 
208 	/* Check to see if we need to start preemption */
209 	a5xx_preempt_trigger(gpu);
210 }
211 
212 static const struct {
213 	u32 offset;
214 	u32 value;
215 } a5xx_hwcg[] = {
216 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
217 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
218 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
219 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
220 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
221 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
222 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
223 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
224 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
225 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
226 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
227 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
228 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
229 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
230 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
231 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
232 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
233 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
234 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
235 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
236 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
237 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
238 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
239 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
240 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
241 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
242 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
243 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
244 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
245 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
246 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
247 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
248 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
249 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
250 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
251 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
252 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
253 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
254 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
255 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
256 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
257 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
258 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
259 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
260 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
261 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
262 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
263 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
264 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
265 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
266 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
267 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
268 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
269 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
270 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
271 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
272 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
273 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
274 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
275 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
276 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
277 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
278 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
279 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
280 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
281 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
282 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
283 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
284 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
285 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
286 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
287 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
288 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
289 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
290 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
291 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
292 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
293 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
294 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
295 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
296 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
297 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
298 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
299 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
300 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
301 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
302 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
303 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
304 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
305 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
306 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
307 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
308 };
309 
310 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
311 {
312 	unsigned int i;
313 
314 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
315 		gpu_write(gpu, a5xx_hwcg[i].offset,
316 			state ? a5xx_hwcg[i].value : 0);
317 
318 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
319 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
320 }
321 
322 static int a5xx_me_init(struct msm_gpu *gpu)
323 {
324 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
325 	struct msm_ringbuffer *ring = gpu->rb[0];
326 
327 	OUT_PKT7(ring, CP_ME_INIT, 8);
328 
329 	OUT_RING(ring, 0x0000002F);
330 
331 	/* Enable multiple hardware contexts */
332 	OUT_RING(ring, 0x00000003);
333 
334 	/* Enable error detection */
335 	OUT_RING(ring, 0x20000000);
336 
337 	/* Don't enable header dump */
338 	OUT_RING(ring, 0x00000000);
339 	OUT_RING(ring, 0x00000000);
340 
341 	/* Specify workarounds for various microcode issues */
342 	if (adreno_is_a530(adreno_gpu)) {
343 		/* Workaround for token end syncs
344 		 * Force a WFI after every direct-render 3D mode draw and every
345 		 * 2D mode 3 draw
346 		 */
347 		OUT_RING(ring, 0x0000000B);
348 	} else {
349 		/* No workarounds enabled */
350 		OUT_RING(ring, 0x00000000);
351 	}
352 
353 	OUT_RING(ring, 0x00000000);
354 	OUT_RING(ring, 0x00000000);
355 
356 	gpu->funcs->flush(gpu, ring);
357 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
358 }
359 
360 static int a5xx_preempt_start(struct msm_gpu *gpu)
361 {
362 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
363 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
364 	struct msm_ringbuffer *ring = gpu->rb[0];
365 
366 	if (gpu->nr_rings == 1)
367 		return 0;
368 
369 	/* Turn off protected mode to write to special registers */
370 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
371 	OUT_RING(ring, 0);
372 
373 	/* Set the save preemption record for the ring/command */
374 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
375 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
376 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
377 
378 	/* Turn back on protected mode */
379 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
380 	OUT_RING(ring, 1);
381 
382 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
383 	OUT_RING(ring, 0x00);
384 
385 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
386 	OUT_RING(ring, 0x01);
387 
388 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
389 	OUT_RING(ring, 0x01);
390 
391 	/* Yield the floor on command completion */
392 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
393 	OUT_RING(ring, 0x00);
394 	OUT_RING(ring, 0x00);
395 	OUT_RING(ring, 0x01);
396 	OUT_RING(ring, 0x01);
397 
398 	gpu->funcs->flush(gpu, ring);
399 
400 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
401 }
402 
403 static int a5xx_ucode_init(struct msm_gpu *gpu)
404 {
405 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
406 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
407 	int ret;
408 
409 	if (!a5xx_gpu->pm4_bo) {
410 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
411 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
412 
413 
414 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
415 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
416 			a5xx_gpu->pm4_bo = NULL;
417 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
418 				ret);
419 			return ret;
420 		}
421 
422 		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
423 	}
424 
425 	if (!a5xx_gpu->pfp_bo) {
426 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
427 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
428 
429 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
430 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
431 			a5xx_gpu->pfp_bo = NULL;
432 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
433 				ret);
434 			return ret;
435 		}
436 
437 		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
438 	}
439 
440 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
441 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
442 
443 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
444 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
445 
446 	return 0;
447 }
448 
449 #define SCM_GPU_ZAP_SHADER_RESUME 0
450 
451 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
452 {
453 	int ret;
454 
455 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
456 	if (ret)
457 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
458 			gpu->name, ret);
459 
460 	return ret;
461 }
462 
463 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
464 {
465 	static bool loaded;
466 	int ret;
467 
468 	/*
469 	 * If the zap shader is already loaded into memory we just need to kick
470 	 * the remote processor to reinitialize it
471 	 */
472 	if (loaded)
473 		return a5xx_zap_shader_resume(gpu);
474 
475 	ret = adreno_zap_shader_load(gpu, GPU_PAS_ID);
476 
477 	loaded = !ret;
478 	return ret;
479 }
480 
481 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
482 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
483 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
484 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
485 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
486 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
487 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
488 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
489 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
490 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
491 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
492 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
493 
494 static int a5xx_hw_init(struct msm_gpu *gpu)
495 {
496 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
497 	int ret;
498 
499 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
500 
501 	/* Make all blocks contribute to the GPU BUSY perf counter */
502 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
503 
504 	/* Enable RBBM error reporting bits */
505 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
506 
507 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
508 		/*
509 		 * Mask out the activity signals from RB1-3 to avoid false
510 		 * positives
511 		 */
512 
513 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
514 			0xF0000000);
515 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
516 			0xFFFFFFFF);
517 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
518 			0xFFFFFFFF);
519 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
520 			0xFFFFFFFF);
521 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
522 			0xFFFFFFFF);
523 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
524 			0xFFFFFFFF);
525 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
526 			0xFFFFFFFF);
527 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
528 			0xFFFFFFFF);
529 	}
530 
531 	/* Enable fault detection */
532 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
533 		(1 << 30) | 0xFFFF);
534 
535 	/* Turn on performance counters */
536 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
537 
538 	/* Select CP0 to always count cycles */
539 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
540 
541 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
542 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
543 
544 	/* Increase VFD cache access so LRZ and other data gets evicted less */
545 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
546 
547 	/* Disable L2 bypass in the UCHE */
548 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
549 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
550 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
551 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
552 
553 	/* Set the GMEM VA range (0 to gpu->gmem) */
554 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
555 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
556 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
557 		0x00100000 + adreno_gpu->gmem - 1);
558 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
559 
560 	gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
561 	gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
562 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
563 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
564 
565 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
566 
567 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
568 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
569 
570 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
571 
572 	/* Enable USE_RETENTION_FLOPS */
573 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
574 
575 	/* Enable ME/PFP split notification */
576 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
577 
578 	/* Enable HWCG */
579 	a5xx_set_hwcg(gpu, true);
580 
581 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
582 
583 	/* Set the highest bank bit */
584 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
585 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
586 
587 	/* Protect registers from the CP */
588 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
589 
590 	/* RBBM */
591 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
592 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
593 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
594 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
595 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
596 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
597 
598 	/* Content protect */
599 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
600 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
601 			16));
602 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
603 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
604 
605 	/* CP */
606 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
607 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
608 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
609 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
610 
611 	/* RB */
612 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
613 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
614 
615 	/* VPC */
616 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
617 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
618 
619 	/* UCHE */
620 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
621 
622 	if (adreno_is_a530(adreno_gpu))
623 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
624 			ADRENO_PROTECT_RW(0x10000, 0x8000));
625 
626 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
627 	/*
628 	 * Disable the trusted memory range - we don't actually supported secure
629 	 * memory rendering at this point in time and we don't want to block off
630 	 * part of the virtual memory space.
631 	 */
632 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
633 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
634 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
635 
636 	ret = adreno_hw_init(gpu);
637 	if (ret)
638 		return ret;
639 
640 	a5xx_preempt_hw_init(gpu);
641 
642 	a5xx_gpmu_ucode_init(gpu);
643 
644 	ret = a5xx_ucode_init(gpu);
645 	if (ret)
646 		return ret;
647 
648 	/* Disable the interrupts through the initial bringup stage */
649 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
650 
651 	/* Clear ME_HALT to start the micro engine */
652 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
653 	ret = a5xx_me_init(gpu);
654 	if (ret)
655 		return ret;
656 
657 	ret = a5xx_power_init(gpu);
658 	if (ret)
659 		return ret;
660 
661 	/*
662 	 * Send a pipeline event stat to get misbehaving counters to start
663 	 * ticking correctly
664 	 */
665 	if (adreno_is_a530(adreno_gpu)) {
666 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
667 		OUT_RING(gpu->rb[0], 0x0F);
668 
669 		gpu->funcs->flush(gpu, gpu->rb[0]);
670 		if (!a5xx_idle(gpu, gpu->rb[0]))
671 			return -EINVAL;
672 	}
673 
674 	/*
675 	 * Try to load a zap shader into the secure world. If successful
676 	 * we can use the CP to switch out of secure mode. If not then we
677 	 * have no resource but to try to switch ourselves out manually. If we
678 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
679 	 * be blocked and a permissions violation will soon follow.
680 	 */
681 	ret = a5xx_zap_shader_init(gpu);
682 	if (!ret) {
683 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
684 		OUT_RING(gpu->rb[0], 0x00000000);
685 
686 		gpu->funcs->flush(gpu, gpu->rb[0]);
687 		if (!a5xx_idle(gpu, gpu->rb[0]))
688 			return -EINVAL;
689 	} else {
690 		/* Print a warning so if we die, we know why */
691 		dev_warn_once(gpu->dev->dev,
692 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
693 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
694 	}
695 
696 	/* Last step - yield the ringbuffer */
697 	a5xx_preempt_start(gpu);
698 
699 	return 0;
700 }
701 
702 static void a5xx_recover(struct msm_gpu *gpu)
703 {
704 	int i;
705 
706 	adreno_dump_info(gpu);
707 
708 	for (i = 0; i < 8; i++) {
709 		printk("CP_SCRATCH_REG%d: %u\n", i,
710 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
711 	}
712 
713 	if (hang_debug)
714 		a5xx_dump(gpu);
715 
716 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
717 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
718 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
719 	adreno_recover(gpu);
720 }
721 
722 static void a5xx_destroy(struct msm_gpu *gpu)
723 {
724 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
725 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
726 
727 	DBG("%s", gpu->name);
728 
729 	a5xx_preempt_fini(gpu);
730 
731 	if (a5xx_gpu->pm4_bo) {
732 		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
733 		drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
734 	}
735 
736 	if (a5xx_gpu->pfp_bo) {
737 		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
738 		drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
739 	}
740 
741 	if (a5xx_gpu->gpmu_bo) {
742 		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
743 		drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
744 	}
745 
746 	adreno_gpu_cleanup(adreno_gpu);
747 	kfree(a5xx_gpu);
748 }
749 
750 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
751 {
752 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
753 		return false;
754 
755 	/*
756 	 * Nearly every abnormality ends up pausing the GPU and triggering a
757 	 * fault so we can safely just watch for this one interrupt to fire
758 	 */
759 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
760 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
761 }
762 
763 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
764 {
765 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
766 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
767 
768 	if (ring != a5xx_gpu->cur_ring) {
769 		WARN(1, "Tried to idle a non-current ringbuffer\n");
770 		return false;
771 	}
772 
773 	/* wait for CP to drain ringbuffer: */
774 	if (!adreno_idle(gpu, ring))
775 		return false;
776 
777 	if (spin_until(_a5xx_check_idle(gpu))) {
778 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
779 			gpu->name, __builtin_return_address(0),
780 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
781 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
782 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
783 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
784 		return false;
785 	}
786 
787 	return true;
788 }
789 
790 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
791 {
792 	struct msm_gpu *gpu = arg;
793 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
794 			iova, flags,
795 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
796 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
797 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
798 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
799 
800 	return -EFAULT;
801 }
802 
803 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
804 {
805 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
806 
807 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
808 		u32 val;
809 
810 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
811 
812 		/*
813 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
814 		 * read it twice
815 		 */
816 
817 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
818 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
819 
820 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
821 			val);
822 	}
823 
824 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
825 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
826 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
827 
828 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
829 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
830 
831 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
832 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
833 
834 		dev_err_ratelimited(gpu->dev->dev,
835 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
836 			val & (1 << 24) ? "WRITE" : "READ",
837 			(val & 0xFFFFF) >> 2, val);
838 	}
839 
840 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
841 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
842 		const char *access[16] = { "reserved", "reserved",
843 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
844 			"", "", "me read", "me write", "", "", "crashdump read",
845 			"crashdump write" };
846 
847 		dev_err_ratelimited(gpu->dev->dev,
848 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
849 			status & 0xFFFFF, access[(status >> 24) & 0xF],
850 			(status & (1 << 31)), status);
851 	}
852 }
853 
854 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
855 {
856 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
857 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
858 
859 		dev_err_ratelimited(gpu->dev->dev,
860 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
861 			val & (1 << 28) ? "WRITE" : "READ",
862 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
863 			(val >> 24) & 0xF);
864 
865 		/* Clear the error */
866 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
867 
868 		/* Clear the interrupt */
869 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
870 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
871 	}
872 
873 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
874 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
875 
876 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
877 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
878 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
879 
880 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
881 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
882 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
883 
884 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
885 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
886 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
887 
888 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
889 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
890 
891 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
892 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
893 }
894 
895 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
896 {
897 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
898 
899 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
900 
901 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
902 		addr);
903 }
904 
905 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
906 {
907 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
908 }
909 
910 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
911 {
912 	struct drm_device *dev = gpu->dev;
913 	struct msm_drm_private *priv = dev->dev_private;
914 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
915 
916 	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
917 		ring ? ring->id : -1, ring ? ring->seqno : 0,
918 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
919 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
920 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
921 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
922 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
923 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
924 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
925 
926 	/* Turn off the hangcheck timer to keep it from bothering us */
927 	del_timer(&gpu->hangcheck_timer);
928 
929 	queue_work(priv->wq, &gpu->recover_work);
930 }
931 
932 #define RBBM_ERROR_MASK \
933 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
934 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
935 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
936 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
937 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
938 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
939 
940 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
941 {
942 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
943 
944 	/*
945 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
946 	 * before the source is cleared the interrupt will storm.
947 	 */
948 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
949 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
950 
951 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
952 	if (status & RBBM_ERROR_MASK)
953 		a5xx_rbbm_err_irq(gpu, status);
954 
955 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
956 		a5xx_cp_err_irq(gpu);
957 
958 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
959 		a5xx_fault_detect_irq(gpu);
960 
961 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
962 		a5xx_uche_err_irq(gpu);
963 
964 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
965 		a5xx_gpmu_err_irq(gpu);
966 
967 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
968 		a5xx_preempt_trigger(gpu);
969 		msm_gpu_retire(gpu);
970 	}
971 
972 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
973 		a5xx_preempt_irq(gpu);
974 
975 	return IRQ_HANDLED;
976 }
977 
978 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
979 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
980 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
981 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
982 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
983 		REG_A5XX_CP_RB_RPTR_ADDR_HI),
984 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
985 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
986 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
987 };
988 
989 static const u32 a5xx_registers[] = {
990 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
991 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
992 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
993 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
994 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
995 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
996 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
997 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
998 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
999 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1000 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1001 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1002 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1003 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1004 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1005 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1006 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1007 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1008 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1009 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1010 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1011 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1012 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1013 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1014 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1015 	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1016 	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1017 	0xAC60, 0xAC60, ~0,
1018 };
1019 
1020 static void a5xx_dump(struct msm_gpu *gpu)
1021 {
1022 	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1023 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1024 	adreno_dump(gpu);
1025 }
1026 
1027 static int a5xx_pm_resume(struct msm_gpu *gpu)
1028 {
1029 	int ret;
1030 
1031 	/* Turn on the core power */
1032 	ret = msm_gpu_pm_resume(gpu);
1033 	if (ret)
1034 		return ret;
1035 
1036 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1037 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1038 
1039 	/* Wait 3 usecs before polling */
1040 	udelay(3);
1041 
1042 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1043 		(1 << 20), (1 << 20));
1044 	if (ret) {
1045 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1046 			gpu->name,
1047 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1048 		return ret;
1049 	}
1050 
1051 	/* Turn on the SP domain */
1052 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1053 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1054 		(1 << 20), (1 << 20));
1055 	if (ret)
1056 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1057 			gpu->name);
1058 
1059 	return ret;
1060 }
1061 
1062 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1063 {
1064 	/* Clear the VBIF pipe before shutting down */
1065 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1066 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1067 
1068 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1069 
1070 	/*
1071 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1072 	 * entries
1073 	 */
1074 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1075 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1076 
1077 	return msm_gpu_pm_suspend(gpu);
1078 }
1079 
1080 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1081 {
1082 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1083 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1084 
1085 	return 0;
1086 }
1087 
1088 struct a5xx_crashdumper {
1089 	void *ptr;
1090 	struct drm_gem_object *bo;
1091 	u64 iova;
1092 };
1093 
1094 struct a5xx_gpu_state {
1095 	struct msm_gpu_state base;
1096 	u32 *hlsqregs;
1097 };
1098 
1099 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1100 		struct a5xx_crashdumper *dumper)
1101 {
1102 	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1103 		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1104 		&dumper->bo, &dumper->iova);
1105 
1106 	if (!IS_ERR(dumper->ptr))
1107 		msm_gem_object_set_name(dumper->bo, "crashdump");
1108 
1109 	return PTR_ERR_OR_ZERO(dumper->ptr);
1110 }
1111 
1112 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1113 		struct a5xx_crashdumper *dumper)
1114 {
1115 	u32 val;
1116 
1117 	if (IS_ERR_OR_NULL(dumper->ptr))
1118 		return -EINVAL;
1119 
1120 	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1121 		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1122 
1123 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1124 
1125 	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1126 		val & 0x04, 100, 10000);
1127 }
1128 
1129 /*
1130  * These are a list of the registers that need to be read through the HLSQ
1131  * aperture through the crashdumper.  These are not nominally accessible from
1132  * the CPU on a secure platform.
1133  */
1134 static const struct {
1135 	u32 type;
1136 	u32 regoffset;
1137 	u32 count;
1138 } a5xx_hlsq_aperture_regs[] = {
1139 	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1140 	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1141 	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1142 	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1143 	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1144 	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1145 	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1146 	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1147 	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1148 	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1149 	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1150 	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1151 	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1152 	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1153 	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1154 };
1155 
1156 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1157 		struct a5xx_gpu_state *a5xx_state)
1158 {
1159 	struct a5xx_crashdumper dumper = { 0 };
1160 	u32 offset, count = 0;
1161 	u64 *ptr;
1162 	int i;
1163 
1164 	if (a5xx_crashdumper_init(gpu, &dumper))
1165 		return;
1166 
1167 	/* The script will be written at offset 0 */
1168 	ptr = dumper.ptr;
1169 
1170 	/* Start writing the data at offset 256k */
1171 	offset = dumper.iova + (256 * SZ_1K);
1172 
1173 	/* Count how many additional registers to get from the HLSQ aperture */
1174 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1175 		count += a5xx_hlsq_aperture_regs[i].count;
1176 
1177 	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1178 	if (!a5xx_state->hlsqregs)
1179 		return;
1180 
1181 	/* Build the crashdump script */
1182 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1183 		u32 type = a5xx_hlsq_aperture_regs[i].type;
1184 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1185 
1186 		/* Write the register to select the desired bank */
1187 		*ptr++ = ((u64) type << 8);
1188 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1189 			(1 << 21) | 1;
1190 
1191 		*ptr++ = offset;
1192 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1193 			| c;
1194 
1195 		offset += c * sizeof(u32);
1196 	}
1197 
1198 	/* Write two zeros to close off the script */
1199 	*ptr++ = 0;
1200 	*ptr++ = 0;
1201 
1202 	if (a5xx_crashdumper_run(gpu, &dumper)) {
1203 		kfree(a5xx_state->hlsqregs);
1204 		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1205 		return;
1206 	}
1207 
1208 	/* Copy the data from the crashdumper to the state */
1209 	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1210 		count * sizeof(u32));
1211 
1212 	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1213 }
1214 
1215 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1216 {
1217 	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1218 			GFP_KERNEL);
1219 
1220 	if (!a5xx_state)
1221 		return ERR_PTR(-ENOMEM);
1222 
1223 	/* Temporarily disable hardware clock gating before reading the hw */
1224 	a5xx_set_hwcg(gpu, false);
1225 
1226 	/* First get the generic state from the adreno core */
1227 	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1228 
1229 	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1230 
1231 	/* Get the HLSQ regs with the help of the crashdumper */
1232 	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1233 
1234 	a5xx_set_hwcg(gpu, true);
1235 
1236 	return &a5xx_state->base;
1237 }
1238 
1239 static void a5xx_gpu_state_destroy(struct kref *kref)
1240 {
1241 	struct msm_gpu_state *state = container_of(kref,
1242 		struct msm_gpu_state, ref);
1243 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1244 		struct a5xx_gpu_state, base);
1245 
1246 	kfree(a5xx_state->hlsqregs);
1247 
1248 	adreno_gpu_state_destroy(state);
1249 	kfree(a5xx_state);
1250 }
1251 
1252 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1253 {
1254 	if (IS_ERR_OR_NULL(state))
1255 		return 1;
1256 
1257 	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1258 }
1259 
1260 
1261 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1262 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1263 		struct drm_printer *p)
1264 {
1265 	int i, j;
1266 	u32 pos = 0;
1267 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1268 		struct a5xx_gpu_state, base);
1269 
1270 	if (IS_ERR_OR_NULL(state))
1271 		return;
1272 
1273 	adreno_show(gpu, state, p);
1274 
1275 	/* Dump the additional a5xx HLSQ registers */
1276 	if (!a5xx_state->hlsqregs)
1277 		return;
1278 
1279 	drm_printf(p, "registers-hlsq:\n");
1280 
1281 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1282 		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1283 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1284 
1285 		for (j = 0; j < c; j++, pos++, o++) {
1286 			/*
1287 			 * To keep the crashdump simple we pull the entire range
1288 			 * for each register type but not all of the registers
1289 			 * in the range are valid. Fortunately invalid registers
1290 			 * stick out like a sore thumb with a value of
1291 			 * 0xdeadbeef
1292 			 */
1293 			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1294 				continue;
1295 
1296 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1297 				o << 2, a5xx_state->hlsqregs[pos]);
1298 		}
1299 	}
1300 }
1301 #endif
1302 
1303 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1304 {
1305 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1306 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1307 
1308 	return a5xx_gpu->cur_ring;
1309 }
1310 
1311 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1312 {
1313 	u64 busy_cycles, busy_time;
1314 
1315 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1316 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1317 
1318 	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1319 	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1320 
1321 	gpu->devfreq.busy_cycles = busy_cycles;
1322 
1323 	if (WARN_ON(busy_time > ~0LU))
1324 		return ~0LU;
1325 
1326 	return (unsigned long)busy_time;
1327 }
1328 
1329 static const struct adreno_gpu_funcs funcs = {
1330 	.base = {
1331 		.get_param = adreno_get_param,
1332 		.hw_init = a5xx_hw_init,
1333 		.pm_suspend = a5xx_pm_suspend,
1334 		.pm_resume = a5xx_pm_resume,
1335 		.recover = a5xx_recover,
1336 		.submit = a5xx_submit,
1337 		.flush = a5xx_flush,
1338 		.active_ring = a5xx_active_ring,
1339 		.irq = a5xx_irq,
1340 		.destroy = a5xx_destroy,
1341 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1342 		.show = a5xx_show,
1343 #endif
1344 #if defined(CONFIG_DEBUG_FS)
1345 		.debugfs_init = a5xx_debugfs_init,
1346 #endif
1347 		.gpu_busy = a5xx_gpu_busy,
1348 		.gpu_state_get = a5xx_gpu_state_get,
1349 		.gpu_state_put = a5xx_gpu_state_put,
1350 	},
1351 	.get_timestamp = a5xx_get_timestamp,
1352 };
1353 
1354 static void check_speed_bin(struct device *dev)
1355 {
1356 	struct nvmem_cell *cell;
1357 	u32 bin, val;
1358 
1359 	cell = nvmem_cell_get(dev, "speed_bin");
1360 
1361 	/* If a nvmem cell isn't defined, nothing to do */
1362 	if (IS_ERR(cell))
1363 		return;
1364 
1365 	bin = *((u32 *) nvmem_cell_read(cell, NULL));
1366 	nvmem_cell_put(cell);
1367 
1368 	val = (1 << bin);
1369 
1370 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1371 }
1372 
1373 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1374 {
1375 	struct msm_drm_private *priv = dev->dev_private;
1376 	struct platform_device *pdev = priv->gpu_pdev;
1377 	struct a5xx_gpu *a5xx_gpu = NULL;
1378 	struct adreno_gpu *adreno_gpu;
1379 	struct msm_gpu *gpu;
1380 	int ret;
1381 
1382 	if (!pdev) {
1383 		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1384 		return ERR_PTR(-ENXIO);
1385 	}
1386 
1387 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1388 	if (!a5xx_gpu)
1389 		return ERR_PTR(-ENOMEM);
1390 
1391 	adreno_gpu = &a5xx_gpu->base;
1392 	gpu = &adreno_gpu->base;
1393 
1394 	adreno_gpu->registers = a5xx_registers;
1395 	adreno_gpu->reg_offsets = a5xx_register_offsets;
1396 
1397 	a5xx_gpu->lm_leakage = 0x4E001A;
1398 
1399 	check_speed_bin(&pdev->dev);
1400 
1401 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1402 	if (ret) {
1403 		a5xx_destroy(&(a5xx_gpu->base.base));
1404 		return ERR_PTR(ret);
1405 	}
1406 
1407 	if (gpu->aspace)
1408 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1409 
1410 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1411 	a5xx_preempt_init(gpu);
1412 
1413 	return gpu;
1414 }
1415