xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision 95777591)
1 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 and
5  * only version 2 as published by the Free Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/types.h>
16 #include <linux/cpumask.h>
17 #include <linux/qcom_scm.h>
18 #include <linux/dma-mapping.h>
19 #include <linux/of_address.h>
20 #include <linux/soc/qcom/mdt_loader.h>
21 #include <linux/pm_opp.h>
22 #include <linux/nvmem-consumer.h>
23 #include <linux/slab.h>
24 #include "msm_gem.h"
25 #include "msm_mmu.h"
26 #include "a5xx_gpu.h"
27 
28 extern bool hang_debug;
29 static void a5xx_dump(struct msm_gpu *gpu);
30 
31 #define GPU_PAS_ID 13
32 
33 static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
34 {
35 	struct device *dev = &gpu->pdev->dev;
36 	const struct firmware *fw;
37 	struct device_node *np;
38 	struct resource r;
39 	phys_addr_t mem_phys;
40 	ssize_t mem_size;
41 	void *mem_region = NULL;
42 	int ret;
43 
44 	if (!IS_ENABLED(CONFIG_ARCH_QCOM))
45 		return -EINVAL;
46 
47 	np = of_get_child_by_name(dev->of_node, "zap-shader");
48 	if (!np)
49 		return -ENODEV;
50 
51 	np = of_parse_phandle(np, "memory-region", 0);
52 	if (!np)
53 		return -EINVAL;
54 
55 	ret = of_address_to_resource(np, 0, &r);
56 	if (ret)
57 		return ret;
58 
59 	mem_phys = r.start;
60 	mem_size = resource_size(&r);
61 
62 	/* Request the MDT file for the firmware */
63 	fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
64 	if (IS_ERR(fw)) {
65 		DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
66 		return PTR_ERR(fw);
67 	}
68 
69 	/* Figure out how much memory we need */
70 	mem_size = qcom_mdt_get_size(fw);
71 	if (mem_size < 0) {
72 		ret = mem_size;
73 		goto out;
74 	}
75 
76 	/* Allocate memory for the firmware image */
77 	mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
78 	if (!mem_region) {
79 		ret = -ENOMEM;
80 		goto out;
81 	}
82 
83 	/*
84 	 * Load the rest of the MDT
85 	 *
86 	 * Note that we could be dealing with two different paths, since
87 	 * with upstream linux-firmware it would be in a qcom/ subdir..
88 	 * adreno_request_fw() handles this, but qcom_mdt_load() does
89 	 * not.  But since we've already gotten thru adreno_request_fw()
90 	 * we know which of the two cases it is:
91 	 */
92 	if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
93 		ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
94 				mem_region, mem_phys, mem_size, NULL);
95 	} else {
96 		char *newname;
97 
98 		newname = kasprintf(GFP_KERNEL, "qcom/%s", fwname);
99 
100 		ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
101 				mem_region, mem_phys, mem_size, NULL);
102 		kfree(newname);
103 	}
104 	if (ret)
105 		goto out;
106 
107 	/* Send the image to the secure world */
108 	ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
109 	if (ret)
110 		DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
111 
112 out:
113 	if (mem_region)
114 		memunmap(mem_region);
115 
116 	release_firmware(fw);
117 
118 	return ret;
119 }
120 
121 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
122 {
123 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
124 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
125 	uint32_t wptr;
126 	unsigned long flags;
127 
128 	spin_lock_irqsave(&ring->lock, flags);
129 
130 	/* Copy the shadow to the actual register */
131 	ring->cur = ring->next;
132 
133 	/* Make sure to wrap wptr if we need to */
134 	wptr = get_wptr(ring);
135 
136 	spin_unlock_irqrestore(&ring->lock, flags);
137 
138 	/* Make sure everything is posted before making a decision */
139 	mb();
140 
141 	/* Update HW if this is the current ring and we are not in preempt */
142 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
143 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
144 }
145 
146 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
147 	struct msm_file_private *ctx)
148 {
149 	struct msm_drm_private *priv = gpu->dev->dev_private;
150 	struct msm_ringbuffer *ring = submit->ring;
151 	struct msm_gem_object *obj;
152 	uint32_t *ptr, dwords;
153 	unsigned int i;
154 
155 	for (i = 0; i < submit->nr_cmds; i++) {
156 		switch (submit->cmd[i].type) {
157 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
158 			break;
159 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
160 			if (priv->lastctx == ctx)
161 				break;
162 		case MSM_SUBMIT_CMD_BUF:
163 			/* copy commands into RB: */
164 			obj = submit->bos[submit->cmd[i].idx].obj;
165 			dwords = submit->cmd[i].size;
166 
167 			ptr = msm_gem_get_vaddr(&obj->base);
168 
169 			/* _get_vaddr() shouldn't fail at this point,
170 			 * since we've already mapped it once in
171 			 * submit_reloc()
172 			 */
173 			if (WARN_ON(!ptr))
174 				return;
175 
176 			for (i = 0; i < dwords; i++) {
177 				/* normally the OUT_PKTn() would wait
178 				 * for space for the packet.  But since
179 				 * we just OUT_RING() the whole thing,
180 				 * need to call adreno_wait_ring()
181 				 * ourself:
182 				 */
183 				adreno_wait_ring(ring, 1);
184 				OUT_RING(ring, ptr[i]);
185 			}
186 
187 			msm_gem_put_vaddr(&obj->base);
188 
189 			break;
190 		}
191 	}
192 
193 	a5xx_flush(gpu, ring);
194 	a5xx_preempt_trigger(gpu);
195 
196 	/* we might not necessarily have a cmd from userspace to
197 	 * trigger an event to know that submit has completed, so
198 	 * do this manually:
199 	 */
200 	a5xx_idle(gpu, ring);
201 	ring->memptrs->fence = submit->seqno;
202 	msm_gpu_retire(gpu);
203 }
204 
205 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
206 	struct msm_file_private *ctx)
207 {
208 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
209 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
210 	struct msm_drm_private *priv = gpu->dev->dev_private;
211 	struct msm_ringbuffer *ring = submit->ring;
212 	unsigned int i, ibs = 0;
213 
214 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
215 		priv->lastctx = NULL;
216 		a5xx_submit_in_rb(gpu, submit, ctx);
217 		return;
218 	}
219 
220 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
221 	OUT_RING(ring, 0x02);
222 
223 	/* Turn off protected mode to write to special registers */
224 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
225 	OUT_RING(ring, 0);
226 
227 	/* Set the save preemption record for the ring/command */
228 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
229 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
230 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
231 
232 	/* Turn back on protected mode */
233 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
234 	OUT_RING(ring, 1);
235 
236 	/* Enable local preemption for finegrain preemption */
237 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
238 	OUT_RING(ring, 0x02);
239 
240 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
241 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
242 	OUT_RING(ring, 0x02);
243 
244 	/* Submit the commands */
245 	for (i = 0; i < submit->nr_cmds; i++) {
246 		switch (submit->cmd[i].type) {
247 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
248 			break;
249 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
250 			if (priv->lastctx == ctx)
251 				break;
252 		case MSM_SUBMIT_CMD_BUF:
253 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
254 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
255 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
256 			OUT_RING(ring, submit->cmd[i].size);
257 			ibs++;
258 			break;
259 		}
260 	}
261 
262 	/*
263 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
264 	 * are done rendering - otherwise a lucky preemption would start
265 	 * replaying from the last checkpoint
266 	 */
267 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
268 	OUT_RING(ring, 0);
269 	OUT_RING(ring, 0);
270 	OUT_RING(ring, 0);
271 	OUT_RING(ring, 0);
272 	OUT_RING(ring, 0);
273 
274 	/* Turn off IB level preemptions */
275 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
276 	OUT_RING(ring, 0x01);
277 
278 	/* Write the fence to the scratch register */
279 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
280 	OUT_RING(ring, submit->seqno);
281 
282 	/*
283 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
284 	 * timestamp is written to the memory and then triggers the interrupt
285 	 */
286 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
287 	OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
288 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
289 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
290 	OUT_RING(ring, submit->seqno);
291 
292 	/* Yield the floor on command completion */
293 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
294 	/*
295 	 * If dword[2:1] are non zero, they specify an address for the CP to
296 	 * write the value of dword[3] to on preemption complete. Write 0 to
297 	 * skip the write
298 	 */
299 	OUT_RING(ring, 0x00);
300 	OUT_RING(ring, 0x00);
301 	/* Data value - not used if the address above is 0 */
302 	OUT_RING(ring, 0x01);
303 	/* Set bit 0 to trigger an interrupt on preempt complete */
304 	OUT_RING(ring, 0x01);
305 
306 	a5xx_flush(gpu, ring);
307 
308 	/* Check to see if we need to start preemption */
309 	a5xx_preempt_trigger(gpu);
310 }
311 
312 static const struct {
313 	u32 offset;
314 	u32 value;
315 } a5xx_hwcg[] = {
316 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
317 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
318 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
319 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
320 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
321 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
322 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
323 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
324 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
325 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
326 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
327 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
328 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
329 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
330 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
331 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
332 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
333 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
334 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
335 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
336 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
337 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
338 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
339 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
340 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
341 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
342 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
343 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
344 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
345 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
346 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
347 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
348 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
349 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
350 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
351 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
352 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
353 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
354 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
355 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
356 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
357 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
358 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
359 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
360 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
361 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
362 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
363 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
364 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
365 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
366 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
367 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
368 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
369 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
370 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
371 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
372 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
373 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
374 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
375 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
376 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
377 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
378 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
379 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
380 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
381 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
382 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
383 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
384 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
385 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
386 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
387 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
388 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
389 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
390 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
391 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
392 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
393 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
394 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
395 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
396 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
397 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
398 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
399 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
400 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
401 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
402 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
403 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
404 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
405 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
406 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
407 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
408 };
409 
410 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
411 {
412 	unsigned int i;
413 
414 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
415 		gpu_write(gpu, a5xx_hwcg[i].offset,
416 			state ? a5xx_hwcg[i].value : 0);
417 
418 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
419 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
420 }
421 
422 static int a5xx_me_init(struct msm_gpu *gpu)
423 {
424 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
425 	struct msm_ringbuffer *ring = gpu->rb[0];
426 
427 	OUT_PKT7(ring, CP_ME_INIT, 8);
428 
429 	OUT_RING(ring, 0x0000002F);
430 
431 	/* Enable multiple hardware contexts */
432 	OUT_RING(ring, 0x00000003);
433 
434 	/* Enable error detection */
435 	OUT_RING(ring, 0x20000000);
436 
437 	/* Don't enable header dump */
438 	OUT_RING(ring, 0x00000000);
439 	OUT_RING(ring, 0x00000000);
440 
441 	/* Specify workarounds for various microcode issues */
442 	if (adreno_is_a530(adreno_gpu)) {
443 		/* Workaround for token end syncs
444 		 * Force a WFI after every direct-render 3D mode draw and every
445 		 * 2D mode 3 draw
446 		 */
447 		OUT_RING(ring, 0x0000000B);
448 	} else {
449 		/* No workarounds enabled */
450 		OUT_RING(ring, 0x00000000);
451 	}
452 
453 	OUT_RING(ring, 0x00000000);
454 	OUT_RING(ring, 0x00000000);
455 
456 	gpu->funcs->flush(gpu, ring);
457 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
458 }
459 
460 static int a5xx_preempt_start(struct msm_gpu *gpu)
461 {
462 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
463 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
464 	struct msm_ringbuffer *ring = gpu->rb[0];
465 
466 	if (gpu->nr_rings == 1)
467 		return 0;
468 
469 	/* Turn off protected mode to write to special registers */
470 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
471 	OUT_RING(ring, 0);
472 
473 	/* Set the save preemption record for the ring/command */
474 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
475 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
476 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
477 
478 	/* Turn back on protected mode */
479 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
480 	OUT_RING(ring, 1);
481 
482 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
483 	OUT_RING(ring, 0x00);
484 
485 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
486 	OUT_RING(ring, 0x01);
487 
488 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
489 	OUT_RING(ring, 0x01);
490 
491 	/* Yield the floor on command completion */
492 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
493 	OUT_RING(ring, 0x00);
494 	OUT_RING(ring, 0x00);
495 	OUT_RING(ring, 0x01);
496 	OUT_RING(ring, 0x01);
497 
498 	gpu->funcs->flush(gpu, ring);
499 
500 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
501 }
502 
503 static int a5xx_ucode_init(struct msm_gpu *gpu)
504 {
505 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
506 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
507 	int ret;
508 
509 	if (!a5xx_gpu->pm4_bo) {
510 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
511 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
512 
513 
514 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
515 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
516 			a5xx_gpu->pm4_bo = NULL;
517 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PM4: %d\n",
518 				ret);
519 			return ret;
520 		}
521 
522 		msm_gem_object_set_name(a5xx_gpu->pm4_bo, "pm4fw");
523 	}
524 
525 	if (!a5xx_gpu->pfp_bo) {
526 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
527 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
528 
529 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
530 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
531 			a5xx_gpu->pfp_bo = NULL;
532 			DRM_DEV_ERROR(gpu->dev->dev, "could not allocate PFP: %d\n",
533 				ret);
534 			return ret;
535 		}
536 
537 		msm_gem_object_set_name(a5xx_gpu->pfp_bo, "pfpfw");
538 	}
539 
540 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
541 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
542 
543 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
544 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
545 
546 	return 0;
547 }
548 
549 #define SCM_GPU_ZAP_SHADER_RESUME 0
550 
551 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
552 {
553 	int ret;
554 
555 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
556 	if (ret)
557 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
558 			gpu->name, ret);
559 
560 	return ret;
561 }
562 
563 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
564 {
565 	static bool loaded;
566 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
567 	struct platform_device *pdev = gpu->pdev;
568 	int ret;
569 
570 	/*
571 	 * If the zap shader is already loaded into memory we just need to kick
572 	 * the remote processor to reinitialize it
573 	 */
574 	if (loaded)
575 		return a5xx_zap_shader_resume(gpu);
576 
577 	/* We need SCM to be able to load the firmware */
578 	if (!qcom_scm_is_available()) {
579 		DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
580 		return -EPROBE_DEFER;
581 	}
582 
583 	/* Each GPU has a target specific zap shader firmware name to use */
584 	if (!adreno_gpu->info->zapfw) {
585 		DRM_DEV_ERROR(&pdev->dev,
586 			"Zap shader firmware file not specified for this target\n");
587 		return -ENODEV;
588 	}
589 
590 	ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
591 
592 	loaded = !ret;
593 
594 	return ret;
595 }
596 
597 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
598 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
599 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
600 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
601 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
602 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
603 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
604 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
605 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
606 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
607 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
608 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
609 
610 static int a5xx_hw_init(struct msm_gpu *gpu)
611 {
612 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
613 	int ret;
614 
615 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
616 
617 	/* Make all blocks contribute to the GPU BUSY perf counter */
618 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
619 
620 	/* Enable RBBM error reporting bits */
621 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
622 
623 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
624 		/*
625 		 * Mask out the activity signals from RB1-3 to avoid false
626 		 * positives
627 		 */
628 
629 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
630 			0xF0000000);
631 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
632 			0xFFFFFFFF);
633 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
634 			0xFFFFFFFF);
635 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
636 			0xFFFFFFFF);
637 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
638 			0xFFFFFFFF);
639 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
640 			0xFFFFFFFF);
641 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
642 			0xFFFFFFFF);
643 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
644 			0xFFFFFFFF);
645 	}
646 
647 	/* Enable fault detection */
648 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
649 		(1 << 30) | 0xFFFF);
650 
651 	/* Turn on performance counters */
652 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
653 
654 	/* Select CP0 to always count cycles */
655 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
656 
657 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
658 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
659 
660 	/* Increase VFD cache access so LRZ and other data gets evicted less */
661 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
662 
663 	/* Disable L2 bypass in the UCHE */
664 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
665 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
666 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
667 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
668 
669 	/* Set the GMEM VA range (0 to gpu->gmem) */
670 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
671 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
672 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
673 		0x00100000 + adreno_gpu->gmem - 1);
674 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
675 
676 	gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
677 	gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
678 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
679 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
680 
681 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
682 
683 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
684 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
685 
686 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
687 
688 	/* Enable USE_RETENTION_FLOPS */
689 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
690 
691 	/* Enable ME/PFP split notification */
692 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
693 
694 	/* Enable HWCG */
695 	a5xx_set_hwcg(gpu, true);
696 
697 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
698 
699 	/* Set the highest bank bit */
700 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
701 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
702 
703 	/* Protect registers from the CP */
704 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
705 
706 	/* RBBM */
707 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
708 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
709 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
710 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
711 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
712 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
713 
714 	/* Content protect */
715 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
716 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
717 			16));
718 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
719 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
720 
721 	/* CP */
722 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
723 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
724 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
725 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
726 
727 	/* RB */
728 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
729 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
730 
731 	/* VPC */
732 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
733 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
734 
735 	/* UCHE */
736 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
737 
738 	if (adreno_is_a530(adreno_gpu))
739 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
740 			ADRENO_PROTECT_RW(0x10000, 0x8000));
741 
742 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
743 	/*
744 	 * Disable the trusted memory range - we don't actually supported secure
745 	 * memory rendering at this point in time and we don't want to block off
746 	 * part of the virtual memory space.
747 	 */
748 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
749 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
750 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
751 
752 	ret = adreno_hw_init(gpu);
753 	if (ret)
754 		return ret;
755 
756 	a5xx_preempt_hw_init(gpu);
757 
758 	a5xx_gpmu_ucode_init(gpu);
759 
760 	ret = a5xx_ucode_init(gpu);
761 	if (ret)
762 		return ret;
763 
764 	/* Disable the interrupts through the initial bringup stage */
765 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
766 
767 	/* Clear ME_HALT to start the micro engine */
768 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
769 	ret = a5xx_me_init(gpu);
770 	if (ret)
771 		return ret;
772 
773 	ret = a5xx_power_init(gpu);
774 	if (ret)
775 		return ret;
776 
777 	/*
778 	 * Send a pipeline event stat to get misbehaving counters to start
779 	 * ticking correctly
780 	 */
781 	if (adreno_is_a530(adreno_gpu)) {
782 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
783 		OUT_RING(gpu->rb[0], 0x0F);
784 
785 		gpu->funcs->flush(gpu, gpu->rb[0]);
786 		if (!a5xx_idle(gpu, gpu->rb[0]))
787 			return -EINVAL;
788 	}
789 
790 	/*
791 	 * Try to load a zap shader into the secure world. If successful
792 	 * we can use the CP to switch out of secure mode. If not then we
793 	 * have no resource but to try to switch ourselves out manually. If we
794 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
795 	 * be blocked and a permissions violation will soon follow.
796 	 */
797 	ret = a5xx_zap_shader_init(gpu);
798 	if (!ret) {
799 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
800 		OUT_RING(gpu->rb[0], 0x00000000);
801 
802 		gpu->funcs->flush(gpu, gpu->rb[0]);
803 		if (!a5xx_idle(gpu, gpu->rb[0]))
804 			return -EINVAL;
805 	} else {
806 		/* Print a warning so if we die, we know why */
807 		dev_warn_once(gpu->dev->dev,
808 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
809 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
810 	}
811 
812 	/* Last step - yield the ringbuffer */
813 	a5xx_preempt_start(gpu);
814 
815 	return 0;
816 }
817 
818 static void a5xx_recover(struct msm_gpu *gpu)
819 {
820 	int i;
821 
822 	adreno_dump_info(gpu);
823 
824 	for (i = 0; i < 8; i++) {
825 		printk("CP_SCRATCH_REG%d: %u\n", i,
826 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
827 	}
828 
829 	if (hang_debug)
830 		a5xx_dump(gpu);
831 
832 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
833 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
834 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
835 	adreno_recover(gpu);
836 }
837 
838 static void a5xx_destroy(struct msm_gpu *gpu)
839 {
840 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
841 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
842 
843 	DBG("%s", gpu->name);
844 
845 	a5xx_preempt_fini(gpu);
846 
847 	if (a5xx_gpu->pm4_bo) {
848 		msm_gem_unpin_iova(a5xx_gpu->pm4_bo, gpu->aspace);
849 		drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
850 	}
851 
852 	if (a5xx_gpu->pfp_bo) {
853 		msm_gem_unpin_iova(a5xx_gpu->pfp_bo, gpu->aspace);
854 		drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
855 	}
856 
857 	if (a5xx_gpu->gpmu_bo) {
858 		msm_gem_unpin_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
859 		drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
860 	}
861 
862 	adreno_gpu_cleanup(adreno_gpu);
863 	kfree(a5xx_gpu);
864 }
865 
866 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
867 {
868 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
869 		return false;
870 
871 	/*
872 	 * Nearly every abnormality ends up pausing the GPU and triggering a
873 	 * fault so we can safely just watch for this one interrupt to fire
874 	 */
875 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
876 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
877 }
878 
879 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
880 {
881 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
882 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
883 
884 	if (ring != a5xx_gpu->cur_ring) {
885 		WARN(1, "Tried to idle a non-current ringbuffer\n");
886 		return false;
887 	}
888 
889 	/* wait for CP to drain ringbuffer: */
890 	if (!adreno_idle(gpu, ring))
891 		return false;
892 
893 	if (spin_until(_a5xx_check_idle(gpu))) {
894 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
895 			gpu->name, __builtin_return_address(0),
896 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
897 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
898 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
899 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
900 		return false;
901 	}
902 
903 	return true;
904 }
905 
906 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
907 {
908 	struct msm_gpu *gpu = arg;
909 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
910 			iova, flags,
911 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
912 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
913 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
914 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
915 
916 	return -EFAULT;
917 }
918 
919 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
920 {
921 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
922 
923 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
924 		u32 val;
925 
926 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
927 
928 		/*
929 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
930 		 * read it twice
931 		 */
932 
933 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
934 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
935 
936 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
937 			val);
938 	}
939 
940 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
941 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
942 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
943 
944 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
945 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
946 
947 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
948 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
949 
950 		dev_err_ratelimited(gpu->dev->dev,
951 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
952 			val & (1 << 24) ? "WRITE" : "READ",
953 			(val & 0xFFFFF) >> 2, val);
954 	}
955 
956 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
957 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
958 		const char *access[16] = { "reserved", "reserved",
959 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
960 			"", "", "me read", "me write", "", "", "crashdump read",
961 			"crashdump write" };
962 
963 		dev_err_ratelimited(gpu->dev->dev,
964 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
965 			status & 0xFFFFF, access[(status >> 24) & 0xF],
966 			(status & (1 << 31)), status);
967 	}
968 }
969 
970 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
971 {
972 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
973 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
974 
975 		dev_err_ratelimited(gpu->dev->dev,
976 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
977 			val & (1 << 28) ? "WRITE" : "READ",
978 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
979 			(val >> 24) & 0xF);
980 
981 		/* Clear the error */
982 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
983 
984 		/* Clear the interrupt */
985 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
986 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
987 	}
988 
989 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
990 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
991 
992 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
993 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
994 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
995 
996 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
997 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
998 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
999 
1000 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
1001 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
1002 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
1003 
1004 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1005 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1006 
1007 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1008 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1009 }
1010 
1011 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1012 {
1013 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1014 
1015 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1016 
1017 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1018 		addr);
1019 }
1020 
1021 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1022 {
1023 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1024 }
1025 
1026 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1027 {
1028 	struct drm_device *dev = gpu->dev;
1029 	struct msm_drm_private *priv = dev->dev_private;
1030 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1031 
1032 	DRM_DEV_ERROR(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1033 		ring ? ring->id : -1, ring ? ring->seqno : 0,
1034 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1035 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1036 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1037 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1038 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1039 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1040 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1041 
1042 	/* Turn off the hangcheck timer to keep it from bothering us */
1043 	del_timer(&gpu->hangcheck_timer);
1044 
1045 	queue_work(priv->wq, &gpu->recover_work);
1046 }
1047 
1048 #define RBBM_ERROR_MASK \
1049 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1050 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1051 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1052 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1053 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1054 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1055 
1056 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1057 {
1058 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1059 
1060 	/*
1061 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1062 	 * before the source is cleared the interrupt will storm.
1063 	 */
1064 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1065 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1066 
1067 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1068 	if (status & RBBM_ERROR_MASK)
1069 		a5xx_rbbm_err_irq(gpu, status);
1070 
1071 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1072 		a5xx_cp_err_irq(gpu);
1073 
1074 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1075 		a5xx_fault_detect_irq(gpu);
1076 
1077 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1078 		a5xx_uche_err_irq(gpu);
1079 
1080 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1081 		a5xx_gpmu_err_irq(gpu);
1082 
1083 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1084 		a5xx_preempt_trigger(gpu);
1085 		msm_gpu_retire(gpu);
1086 	}
1087 
1088 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1089 		a5xx_preempt_irq(gpu);
1090 
1091 	return IRQ_HANDLED;
1092 }
1093 
1094 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1095 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1096 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1097 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1098 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1099 		REG_A5XX_CP_RB_RPTR_ADDR_HI),
1100 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1101 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1102 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1103 };
1104 
1105 static const u32 a5xx_registers[] = {
1106 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1107 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1108 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1109 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1110 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1111 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1112 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1113 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1114 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1115 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1116 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1117 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1118 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1119 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1120 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1121 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1122 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1123 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1124 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1125 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1126 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1127 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1128 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1129 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1130 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1131 	0xEAA5, 0xEAC2, 0xA800, 0xA800, 0xA820, 0xA828, 0xA840, 0xA87D,
1132 	0XA880, 0xA88D, 0xA890, 0xA8A3, 0xA8D0, 0xA8D8, 0xA8E0, 0xA8F5,
1133 	0xAC60, 0xAC60, ~0,
1134 };
1135 
1136 static void a5xx_dump(struct msm_gpu *gpu)
1137 {
1138 	DRM_DEV_INFO(gpu->dev->dev, "status:   %08x\n",
1139 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1140 	adreno_dump(gpu);
1141 }
1142 
1143 static int a5xx_pm_resume(struct msm_gpu *gpu)
1144 {
1145 	int ret;
1146 
1147 	/* Turn on the core power */
1148 	ret = msm_gpu_pm_resume(gpu);
1149 	if (ret)
1150 		return ret;
1151 
1152 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1153 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1154 
1155 	/* Wait 3 usecs before polling */
1156 	udelay(3);
1157 
1158 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1159 		(1 << 20), (1 << 20));
1160 	if (ret) {
1161 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1162 			gpu->name,
1163 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1164 		return ret;
1165 	}
1166 
1167 	/* Turn on the SP domain */
1168 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1169 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1170 		(1 << 20), (1 << 20));
1171 	if (ret)
1172 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1173 			gpu->name);
1174 
1175 	return ret;
1176 }
1177 
1178 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1179 {
1180 	/* Clear the VBIF pipe before shutting down */
1181 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1182 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1183 
1184 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1185 
1186 	/*
1187 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1188 	 * entries
1189 	 */
1190 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1191 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1192 
1193 	return msm_gpu_pm_suspend(gpu);
1194 }
1195 
1196 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1197 {
1198 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1199 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1200 
1201 	return 0;
1202 }
1203 
1204 struct a5xx_crashdumper {
1205 	void *ptr;
1206 	struct drm_gem_object *bo;
1207 	u64 iova;
1208 };
1209 
1210 struct a5xx_gpu_state {
1211 	struct msm_gpu_state base;
1212 	u32 *hlsqregs;
1213 };
1214 
1215 static int a5xx_crashdumper_init(struct msm_gpu *gpu,
1216 		struct a5xx_crashdumper *dumper)
1217 {
1218 	dumper->ptr = msm_gem_kernel_new_locked(gpu->dev,
1219 		SZ_1M, MSM_BO_UNCACHED, gpu->aspace,
1220 		&dumper->bo, &dumper->iova);
1221 
1222 	if (!IS_ERR(dumper->ptr))
1223 		msm_gem_object_set_name(dumper->bo, "crashdump");
1224 
1225 	return PTR_ERR_OR_ZERO(dumper->ptr);
1226 }
1227 
1228 static int a5xx_crashdumper_run(struct msm_gpu *gpu,
1229 		struct a5xx_crashdumper *dumper)
1230 {
1231 	u32 val;
1232 
1233 	if (IS_ERR_OR_NULL(dumper->ptr))
1234 		return -EINVAL;
1235 
1236 	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
1237 		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
1238 
1239 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
1240 
1241 	return gpu_poll_timeout(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, val,
1242 		val & 0x04, 100, 10000);
1243 }
1244 
1245 /*
1246  * These are a list of the registers that need to be read through the HLSQ
1247  * aperture through the crashdumper.  These are not nominally accessible from
1248  * the CPU on a secure platform.
1249  */
1250 static const struct {
1251 	u32 type;
1252 	u32 regoffset;
1253 	u32 count;
1254 } a5xx_hlsq_aperture_regs[] = {
1255 	{ 0x35, 0xe00, 0x32 },   /* HSLQ non-context */
1256 	{ 0x31, 0x2080, 0x1 },   /* HLSQ 2D context 0 */
1257 	{ 0x33, 0x2480, 0x1 },   /* HLSQ 2D context 1 */
1258 	{ 0x32, 0xe780, 0x62 },  /* HLSQ 3D context 0 */
1259 	{ 0x34, 0xef80, 0x62 },  /* HLSQ 3D context 1 */
1260 	{ 0x3f, 0x0ec0, 0x40 },  /* SP non-context */
1261 	{ 0x3d, 0x2040, 0x1 },   /* SP 2D context 0 */
1262 	{ 0x3b, 0x2440, 0x1 },   /* SP 2D context 1 */
1263 	{ 0x3e, 0xe580, 0x170 }, /* SP 3D context 0 */
1264 	{ 0x3c, 0xed80, 0x170 }, /* SP 3D context 1 */
1265 	{ 0x3a, 0x0f00, 0x1c },  /* TP non-context */
1266 	{ 0x38, 0x2000, 0xa },   /* TP 2D context 0 */
1267 	{ 0x36, 0x2400, 0xa },   /* TP 2D context 1 */
1268 	{ 0x39, 0xe700, 0x80 },  /* TP 3D context 0 */
1269 	{ 0x37, 0xef00, 0x80 },  /* TP 3D context 1 */
1270 };
1271 
1272 static void a5xx_gpu_state_get_hlsq_regs(struct msm_gpu *gpu,
1273 		struct a5xx_gpu_state *a5xx_state)
1274 {
1275 	struct a5xx_crashdumper dumper = { 0 };
1276 	u32 offset, count = 0;
1277 	u64 *ptr;
1278 	int i;
1279 
1280 	if (a5xx_crashdumper_init(gpu, &dumper))
1281 		return;
1282 
1283 	/* The script will be written at offset 0 */
1284 	ptr = dumper.ptr;
1285 
1286 	/* Start writing the data at offset 256k */
1287 	offset = dumper.iova + (256 * SZ_1K);
1288 
1289 	/* Count how many additional registers to get from the HLSQ aperture */
1290 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++)
1291 		count += a5xx_hlsq_aperture_regs[i].count;
1292 
1293 	a5xx_state->hlsqregs = kcalloc(count, sizeof(u32), GFP_KERNEL);
1294 	if (!a5xx_state->hlsqregs)
1295 		return;
1296 
1297 	/* Build the crashdump script */
1298 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1299 		u32 type = a5xx_hlsq_aperture_regs[i].type;
1300 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1301 
1302 		/* Write the register to select the desired bank */
1303 		*ptr++ = ((u64) type << 8);
1304 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_READ_SEL) << 44) |
1305 			(1 << 21) | 1;
1306 
1307 		*ptr++ = offset;
1308 		*ptr++ = (((u64) REG_A5XX_HLSQ_DBG_AHB_READ_APERTURE) << 44)
1309 			| c;
1310 
1311 		offset += c * sizeof(u32);
1312 	}
1313 
1314 	/* Write two zeros to close off the script */
1315 	*ptr++ = 0;
1316 	*ptr++ = 0;
1317 
1318 	if (a5xx_crashdumper_run(gpu, &dumper)) {
1319 		kfree(a5xx_state->hlsqregs);
1320 		msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1321 		return;
1322 	}
1323 
1324 	/* Copy the data from the crashdumper to the state */
1325 	memcpy(a5xx_state->hlsqregs, dumper.ptr + (256 * SZ_1K),
1326 		count * sizeof(u32));
1327 
1328 	msm_gem_kernel_put(dumper.bo, gpu->aspace, true);
1329 }
1330 
1331 static struct msm_gpu_state *a5xx_gpu_state_get(struct msm_gpu *gpu)
1332 {
1333 	struct a5xx_gpu_state *a5xx_state = kzalloc(sizeof(*a5xx_state),
1334 			GFP_KERNEL);
1335 
1336 	if (!a5xx_state)
1337 		return ERR_PTR(-ENOMEM);
1338 
1339 	/* Temporarily disable hardware clock gating before reading the hw */
1340 	a5xx_set_hwcg(gpu, false);
1341 
1342 	/* First get the generic state from the adreno core */
1343 	adreno_gpu_state_get(gpu, &(a5xx_state->base));
1344 
1345 	a5xx_state->base.rbbm_status = gpu_read(gpu, REG_A5XX_RBBM_STATUS);
1346 
1347 	/* Get the HLSQ regs with the help of the crashdumper */
1348 	a5xx_gpu_state_get_hlsq_regs(gpu, a5xx_state);
1349 
1350 	a5xx_set_hwcg(gpu, true);
1351 
1352 	return &a5xx_state->base;
1353 }
1354 
1355 static void a5xx_gpu_state_destroy(struct kref *kref)
1356 {
1357 	struct msm_gpu_state *state = container_of(kref,
1358 		struct msm_gpu_state, ref);
1359 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1360 		struct a5xx_gpu_state, base);
1361 
1362 	kfree(a5xx_state->hlsqregs);
1363 
1364 	adreno_gpu_state_destroy(state);
1365 	kfree(a5xx_state);
1366 }
1367 
1368 int a5xx_gpu_state_put(struct msm_gpu_state *state)
1369 {
1370 	if (IS_ERR_OR_NULL(state))
1371 		return 1;
1372 
1373 	return kref_put(&state->ref, a5xx_gpu_state_destroy);
1374 }
1375 
1376 
1377 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1378 void a5xx_show(struct msm_gpu *gpu, struct msm_gpu_state *state,
1379 		struct drm_printer *p)
1380 {
1381 	int i, j;
1382 	u32 pos = 0;
1383 	struct a5xx_gpu_state *a5xx_state = container_of(state,
1384 		struct a5xx_gpu_state, base);
1385 
1386 	if (IS_ERR_OR_NULL(state))
1387 		return;
1388 
1389 	adreno_show(gpu, state, p);
1390 
1391 	/* Dump the additional a5xx HLSQ registers */
1392 	if (!a5xx_state->hlsqregs)
1393 		return;
1394 
1395 	drm_printf(p, "registers-hlsq:\n");
1396 
1397 	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_aperture_regs); i++) {
1398 		u32 o = a5xx_hlsq_aperture_regs[i].regoffset;
1399 		u32 c = a5xx_hlsq_aperture_regs[i].count;
1400 
1401 		for (j = 0; j < c; j++, pos++, o++) {
1402 			/*
1403 			 * To keep the crashdump simple we pull the entire range
1404 			 * for each register type but not all of the registers
1405 			 * in the range are valid. Fortunately invalid registers
1406 			 * stick out like a sore thumb with a value of
1407 			 * 0xdeadbeef
1408 			 */
1409 			if (a5xx_state->hlsqregs[pos] == 0xdeadbeef)
1410 				continue;
1411 
1412 			drm_printf(p, "  - { offset: 0x%04x, value: 0x%08x }\n",
1413 				o << 2, a5xx_state->hlsqregs[pos]);
1414 		}
1415 	}
1416 }
1417 #endif
1418 
1419 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1420 {
1421 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1422 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1423 
1424 	return a5xx_gpu->cur_ring;
1425 }
1426 
1427 static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu)
1428 {
1429 	u64 busy_cycles, busy_time;
1430 
1431 	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1432 			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1433 
1434 	busy_time = busy_cycles - gpu->devfreq.busy_cycles;
1435 	do_div(busy_time, clk_get_rate(gpu->core_clk) / 1000000);
1436 
1437 	gpu->devfreq.busy_cycles = busy_cycles;
1438 
1439 	if (WARN_ON(busy_time > ~0LU))
1440 		return ~0LU;
1441 
1442 	return (unsigned long)busy_time;
1443 }
1444 
1445 static const struct adreno_gpu_funcs funcs = {
1446 	.base = {
1447 		.get_param = adreno_get_param,
1448 		.hw_init = a5xx_hw_init,
1449 		.pm_suspend = a5xx_pm_suspend,
1450 		.pm_resume = a5xx_pm_resume,
1451 		.recover = a5xx_recover,
1452 		.submit = a5xx_submit,
1453 		.flush = a5xx_flush,
1454 		.active_ring = a5xx_active_ring,
1455 		.irq = a5xx_irq,
1456 		.destroy = a5xx_destroy,
1457 #if defined(CONFIG_DEBUG_FS) || defined(CONFIG_DEV_COREDUMP)
1458 		.show = a5xx_show,
1459 #endif
1460 #if defined(CONFIG_DEBUG_FS)
1461 		.debugfs_init = a5xx_debugfs_init,
1462 #endif
1463 		.gpu_busy = a5xx_gpu_busy,
1464 		.gpu_state_get = a5xx_gpu_state_get,
1465 		.gpu_state_put = a5xx_gpu_state_put,
1466 	},
1467 	.get_timestamp = a5xx_get_timestamp,
1468 };
1469 
1470 static void check_speed_bin(struct device *dev)
1471 {
1472 	struct nvmem_cell *cell;
1473 	u32 bin, val;
1474 
1475 	cell = nvmem_cell_get(dev, "speed_bin");
1476 
1477 	/* If a nvmem cell isn't defined, nothing to do */
1478 	if (IS_ERR(cell))
1479 		return;
1480 
1481 	bin = *((u32 *) nvmem_cell_read(cell, NULL));
1482 	nvmem_cell_put(cell);
1483 
1484 	val = (1 << bin);
1485 
1486 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1487 }
1488 
1489 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1490 {
1491 	struct msm_drm_private *priv = dev->dev_private;
1492 	struct platform_device *pdev = priv->gpu_pdev;
1493 	struct a5xx_gpu *a5xx_gpu = NULL;
1494 	struct adreno_gpu *adreno_gpu;
1495 	struct msm_gpu *gpu;
1496 	int ret;
1497 
1498 	if (!pdev) {
1499 		DRM_DEV_ERROR(dev->dev, "No A5XX device is defined\n");
1500 		return ERR_PTR(-ENXIO);
1501 	}
1502 
1503 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1504 	if (!a5xx_gpu)
1505 		return ERR_PTR(-ENOMEM);
1506 
1507 	adreno_gpu = &a5xx_gpu->base;
1508 	gpu = &adreno_gpu->base;
1509 
1510 	adreno_gpu->registers = a5xx_registers;
1511 	adreno_gpu->reg_offsets = a5xx_register_offsets;
1512 
1513 	a5xx_gpu->lm_leakage = 0x4E001A;
1514 
1515 	check_speed_bin(&pdev->dev);
1516 
1517 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1518 	if (ret) {
1519 		a5xx_destroy(&(a5xx_gpu->base.base));
1520 		return ERR_PTR(ret);
1521 	}
1522 
1523 	if (gpu->aspace)
1524 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1525 
1526 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1527 	a5xx_preempt_init(gpu);
1528 
1529 	return gpu;
1530 }
1531