xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision 82e6fdd6)
1 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 and
5  * only version 2 as published by the Free Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  */
13 
14 #include <linux/types.h>
15 #include <linux/cpumask.h>
16 #include <linux/qcom_scm.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/of_address.h>
19 #include <linux/soc/qcom/mdt_loader.h>
20 #include <linux/pm_opp.h>
21 #include <linux/nvmem-consumer.h>
22 #include "msm_gem.h"
23 #include "msm_mmu.h"
24 #include "a5xx_gpu.h"
25 
26 extern bool hang_debug;
27 static void a5xx_dump(struct msm_gpu *gpu);
28 
29 #define GPU_PAS_ID 13
30 
31 static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
32 {
33 	struct device *dev = &gpu->pdev->dev;
34 	const struct firmware *fw;
35 	struct device_node *np;
36 	struct resource r;
37 	phys_addr_t mem_phys;
38 	ssize_t mem_size;
39 	void *mem_region = NULL;
40 	int ret;
41 
42 	if (!IS_ENABLED(CONFIG_ARCH_QCOM))
43 		return -EINVAL;
44 
45 	np = of_get_child_by_name(dev->of_node, "zap-shader");
46 	if (!np)
47 		return -ENODEV;
48 
49 	np = of_parse_phandle(np, "memory-region", 0);
50 	if (!np)
51 		return -EINVAL;
52 
53 	ret = of_address_to_resource(np, 0, &r);
54 	if (ret)
55 		return ret;
56 
57 	mem_phys = r.start;
58 	mem_size = resource_size(&r);
59 
60 	/* Request the MDT file for the firmware */
61 	fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
62 	if (IS_ERR(fw)) {
63 		DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
64 		return PTR_ERR(fw);
65 	}
66 
67 	/* Figure out how much memory we need */
68 	mem_size = qcom_mdt_get_size(fw);
69 	if (mem_size < 0) {
70 		ret = mem_size;
71 		goto out;
72 	}
73 
74 	/* Allocate memory for the firmware image */
75 	mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
76 	if (!mem_region) {
77 		ret = -ENOMEM;
78 		goto out;
79 	}
80 
81 	/*
82 	 * Load the rest of the MDT
83 	 *
84 	 * Note that we could be dealing with two different paths, since
85 	 * with upstream linux-firmware it would be in a qcom/ subdir..
86 	 * adreno_request_fw() handles this, but qcom_mdt_load() does
87 	 * not.  But since we've already gotten thru adreno_request_fw()
88 	 * we know which of the two cases it is:
89 	 */
90 	if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
91 		ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
92 				mem_region, mem_phys, mem_size);
93 	} else {
94 		char newname[strlen("qcom/") + strlen(fwname) + 1];
95 
96 		sprintf(newname, "qcom/%s", fwname);
97 
98 		ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
99 				mem_region, mem_phys, mem_size);
100 	}
101 	if (ret)
102 		goto out;
103 
104 	/* Send the image to the secure world */
105 	ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
106 	if (ret)
107 		DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
108 
109 out:
110 	if (mem_region)
111 		memunmap(mem_region);
112 
113 	release_firmware(fw);
114 
115 	return ret;
116 }
117 
118 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
119 {
120 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
121 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
122 	uint32_t wptr;
123 	unsigned long flags;
124 
125 	spin_lock_irqsave(&ring->lock, flags);
126 
127 	/* Copy the shadow to the actual register */
128 	ring->cur = ring->next;
129 
130 	/* Make sure to wrap wptr if we need to */
131 	wptr = get_wptr(ring);
132 
133 	spin_unlock_irqrestore(&ring->lock, flags);
134 
135 	/* Make sure everything is posted before making a decision */
136 	mb();
137 
138 	/* Update HW if this is the current ring and we are not in preempt */
139 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
140 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
141 }
142 
143 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
144 	struct msm_file_private *ctx)
145 {
146 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
147 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
148 	struct msm_drm_private *priv = gpu->dev->dev_private;
149 	struct msm_ringbuffer *ring = submit->ring;
150 	unsigned int i, ibs = 0;
151 
152 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
153 	OUT_RING(ring, 0x02);
154 
155 	/* Turn off protected mode to write to special registers */
156 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
157 	OUT_RING(ring, 0);
158 
159 	/* Set the save preemption record for the ring/command */
160 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
161 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
162 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
163 
164 	/* Turn back on protected mode */
165 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
166 	OUT_RING(ring, 1);
167 
168 	/* Enable local preemption for finegrain preemption */
169 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
170 	OUT_RING(ring, 0x02);
171 
172 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
173 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
174 	OUT_RING(ring, 0x02);
175 
176 	/* Submit the commands */
177 	for (i = 0; i < submit->nr_cmds; i++) {
178 		switch (submit->cmd[i].type) {
179 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
180 			break;
181 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
182 			if (priv->lastctx == ctx)
183 				break;
184 		case MSM_SUBMIT_CMD_BUF:
185 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
186 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
187 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
188 			OUT_RING(ring, submit->cmd[i].size);
189 			ibs++;
190 			break;
191 		}
192 	}
193 
194 	/*
195 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
196 	 * are done rendering - otherwise a lucky preemption would start
197 	 * replaying from the last checkpoint
198 	 */
199 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
200 	OUT_RING(ring, 0);
201 	OUT_RING(ring, 0);
202 	OUT_RING(ring, 0);
203 	OUT_RING(ring, 0);
204 	OUT_RING(ring, 0);
205 
206 	/* Turn off IB level preemptions */
207 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
208 	OUT_RING(ring, 0x01);
209 
210 	/* Write the fence to the scratch register */
211 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
212 	OUT_RING(ring, submit->seqno);
213 
214 	/*
215 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
216 	 * timestamp is written to the memory and then triggers the interrupt
217 	 */
218 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
219 	OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
220 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
221 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
222 	OUT_RING(ring, submit->seqno);
223 
224 	/* Yield the floor on command completion */
225 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
226 	/*
227 	 * If dword[2:1] are non zero, they specify an address for the CP to
228 	 * write the value of dword[3] to on preemption complete. Write 0 to
229 	 * skip the write
230 	 */
231 	OUT_RING(ring, 0x00);
232 	OUT_RING(ring, 0x00);
233 	/* Data value - not used if the address above is 0 */
234 	OUT_RING(ring, 0x01);
235 	/* Set bit 0 to trigger an interrupt on preempt complete */
236 	OUT_RING(ring, 0x01);
237 
238 	a5xx_flush(gpu, ring);
239 
240 	/* Check to see if we need to start preemption */
241 	a5xx_preempt_trigger(gpu);
242 }
243 
244 static const struct {
245 	u32 offset;
246 	u32 value;
247 } a5xx_hwcg[] = {
248 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
249 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
250 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
251 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
252 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
253 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
254 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
255 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
256 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
257 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
258 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
259 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
260 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
261 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
262 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
263 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
264 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
265 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
266 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
267 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
268 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
269 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
270 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
271 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
272 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
273 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
274 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
275 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
276 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
277 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
278 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
279 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
280 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
281 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
282 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
283 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
284 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
285 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
286 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
287 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
288 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
289 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
290 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
291 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
292 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
293 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
294 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
295 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
296 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
297 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
298 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
299 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
300 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
301 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
302 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
303 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
304 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
305 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
306 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
307 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
308 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
309 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
310 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
311 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
312 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
313 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
314 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
315 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
316 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
317 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
318 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
319 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
320 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
321 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
322 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
323 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
324 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
325 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
326 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
327 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
328 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
329 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
330 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
331 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
332 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
333 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
334 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
335 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
336 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
337 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
338 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
339 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
340 };
341 
342 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
343 {
344 	unsigned int i;
345 
346 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
347 		gpu_write(gpu, a5xx_hwcg[i].offset,
348 			state ? a5xx_hwcg[i].value : 0);
349 
350 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
351 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
352 }
353 
354 static int a5xx_me_init(struct msm_gpu *gpu)
355 {
356 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
357 	struct msm_ringbuffer *ring = gpu->rb[0];
358 
359 	OUT_PKT7(ring, CP_ME_INIT, 8);
360 
361 	OUT_RING(ring, 0x0000002F);
362 
363 	/* Enable multiple hardware contexts */
364 	OUT_RING(ring, 0x00000003);
365 
366 	/* Enable error detection */
367 	OUT_RING(ring, 0x20000000);
368 
369 	/* Don't enable header dump */
370 	OUT_RING(ring, 0x00000000);
371 	OUT_RING(ring, 0x00000000);
372 
373 	/* Specify workarounds for various microcode issues */
374 	if (adreno_is_a530(adreno_gpu)) {
375 		/* Workaround for token end syncs
376 		 * Force a WFI after every direct-render 3D mode draw and every
377 		 * 2D mode 3 draw
378 		 */
379 		OUT_RING(ring, 0x0000000B);
380 	} else {
381 		/* No workarounds enabled */
382 		OUT_RING(ring, 0x00000000);
383 	}
384 
385 	OUT_RING(ring, 0x00000000);
386 	OUT_RING(ring, 0x00000000);
387 
388 	gpu->funcs->flush(gpu, ring);
389 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
390 }
391 
392 static int a5xx_preempt_start(struct msm_gpu *gpu)
393 {
394 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
395 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
396 	struct msm_ringbuffer *ring = gpu->rb[0];
397 
398 	if (gpu->nr_rings == 1)
399 		return 0;
400 
401 	/* Turn off protected mode to write to special registers */
402 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
403 	OUT_RING(ring, 0);
404 
405 	/* Set the save preemption record for the ring/command */
406 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
407 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
408 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
409 
410 	/* Turn back on protected mode */
411 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
412 	OUT_RING(ring, 1);
413 
414 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
415 	OUT_RING(ring, 0x00);
416 
417 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
418 	OUT_RING(ring, 0x01);
419 
420 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
421 	OUT_RING(ring, 0x01);
422 
423 	/* Yield the floor on command completion */
424 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
425 	OUT_RING(ring, 0x00);
426 	OUT_RING(ring, 0x00);
427 	OUT_RING(ring, 0x01);
428 	OUT_RING(ring, 0x01);
429 
430 	gpu->funcs->flush(gpu, ring);
431 
432 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
433 }
434 
435 
436 static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu,
437 		const struct firmware *fw, u64 *iova)
438 {
439 	struct drm_gem_object *bo;
440 	void *ptr;
441 
442 	ptr = msm_gem_kernel_new_locked(gpu->dev, fw->size - 4,
443 		MSM_BO_UNCACHED | MSM_BO_GPU_READONLY, gpu->aspace, &bo, iova);
444 
445 	if (IS_ERR(ptr))
446 		return ERR_CAST(ptr);
447 
448 	memcpy(ptr, &fw->data[4], fw->size - 4);
449 
450 	msm_gem_put_vaddr(bo);
451 	return bo;
452 }
453 
454 static int a5xx_ucode_init(struct msm_gpu *gpu)
455 {
456 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
457 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
458 	int ret;
459 
460 	if (!a5xx_gpu->pm4_bo) {
461 		a5xx_gpu->pm4_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pm4,
462 			&a5xx_gpu->pm4_iova);
463 
464 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
465 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
466 			a5xx_gpu->pm4_bo = NULL;
467 			dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
468 				ret);
469 			return ret;
470 		}
471 	}
472 
473 	if (!a5xx_gpu->pfp_bo) {
474 		a5xx_gpu->pfp_bo = a5xx_ucode_load_bo(gpu, adreno_gpu->pfp,
475 			&a5xx_gpu->pfp_iova);
476 
477 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
478 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
479 			a5xx_gpu->pfp_bo = NULL;
480 			dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
481 				ret);
482 			return ret;
483 		}
484 	}
485 
486 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
487 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
488 
489 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
490 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
491 
492 	return 0;
493 }
494 
495 #define SCM_GPU_ZAP_SHADER_RESUME 0
496 
497 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
498 {
499 	int ret;
500 
501 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
502 	if (ret)
503 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
504 			gpu->name, ret);
505 
506 	return ret;
507 }
508 
509 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
510 {
511 	static bool loaded;
512 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
513 	struct platform_device *pdev = gpu->pdev;
514 	int ret;
515 
516 	/*
517 	 * If the zap shader is already loaded into memory we just need to kick
518 	 * the remote processor to reinitialize it
519 	 */
520 	if (loaded)
521 		return a5xx_zap_shader_resume(gpu);
522 
523 	/* We need SCM to be able to load the firmware */
524 	if (!qcom_scm_is_available()) {
525 		DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
526 		return -EPROBE_DEFER;
527 	}
528 
529 	/* Each GPU has a target specific zap shader firmware name to use */
530 	if (!adreno_gpu->info->zapfw) {
531 		DRM_DEV_ERROR(&pdev->dev,
532 			"Zap shader firmware file not specified for this target\n");
533 		return -ENODEV;
534 	}
535 
536 	ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
537 
538 	loaded = !ret;
539 
540 	return ret;
541 }
542 
543 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
544 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
545 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
546 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
547 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
548 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
549 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
550 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
551 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
552 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
553 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
554 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
555 
556 static int a5xx_hw_init(struct msm_gpu *gpu)
557 {
558 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
559 	int ret;
560 
561 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
562 
563 	/* Make all blocks contribute to the GPU BUSY perf counter */
564 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
565 
566 	/* Enable RBBM error reporting bits */
567 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
568 
569 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
570 		/*
571 		 * Mask out the activity signals from RB1-3 to avoid false
572 		 * positives
573 		 */
574 
575 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
576 			0xF0000000);
577 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
578 			0xFFFFFFFF);
579 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
580 			0xFFFFFFFF);
581 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
582 			0xFFFFFFFF);
583 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
584 			0xFFFFFFFF);
585 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
586 			0xFFFFFFFF);
587 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
588 			0xFFFFFFFF);
589 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
590 			0xFFFFFFFF);
591 	}
592 
593 	/* Enable fault detection */
594 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
595 		(1 << 30) | 0xFFFF);
596 
597 	/* Turn on performance counters */
598 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
599 
600 	/* Select CP0 to always count cycles */
601 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
602 
603 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
604 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
605 
606 	/* Increase VFD cache access so LRZ and other data gets evicted less */
607 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
608 
609 	/* Disable L2 bypass in the UCHE */
610 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
611 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
612 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
613 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
614 
615 	/* Set the GMEM VA range (0 to gpu->gmem) */
616 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
617 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
618 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
619 		0x00100000 + adreno_gpu->gmem - 1);
620 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
621 
622 	gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
623 	gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
624 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
625 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
626 
627 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
628 
629 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
630 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
631 
632 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
633 
634 	/* Enable USE_RETENTION_FLOPS */
635 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
636 
637 	/* Enable ME/PFP split notification */
638 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
639 
640 	/* Enable HWCG */
641 	a5xx_set_hwcg(gpu, true);
642 
643 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
644 
645 	/* Set the highest bank bit */
646 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
647 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
648 
649 	/* Protect registers from the CP */
650 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
651 
652 	/* RBBM */
653 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
654 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
655 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
656 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
657 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
658 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
659 
660 	/* Content protect */
661 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
662 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
663 			16));
664 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
665 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
666 
667 	/* CP */
668 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
669 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
670 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
671 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
672 
673 	/* RB */
674 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
675 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
676 
677 	/* VPC */
678 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
679 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
680 
681 	/* UCHE */
682 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
683 
684 	if (adreno_is_a530(adreno_gpu))
685 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
686 			ADRENO_PROTECT_RW(0x10000, 0x8000));
687 
688 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
689 	/*
690 	 * Disable the trusted memory range - we don't actually supported secure
691 	 * memory rendering at this point in time and we don't want to block off
692 	 * part of the virtual memory space.
693 	 */
694 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
695 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
696 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
697 
698 	ret = adreno_hw_init(gpu);
699 	if (ret)
700 		return ret;
701 
702 	a5xx_preempt_hw_init(gpu);
703 
704 	a5xx_gpmu_ucode_init(gpu);
705 
706 	ret = a5xx_ucode_init(gpu);
707 	if (ret)
708 		return ret;
709 
710 	/* Disable the interrupts through the initial bringup stage */
711 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
712 
713 	/* Clear ME_HALT to start the micro engine */
714 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
715 	ret = a5xx_me_init(gpu);
716 	if (ret)
717 		return ret;
718 
719 	ret = a5xx_power_init(gpu);
720 	if (ret)
721 		return ret;
722 
723 	/*
724 	 * Send a pipeline event stat to get misbehaving counters to start
725 	 * ticking correctly
726 	 */
727 	if (adreno_is_a530(adreno_gpu)) {
728 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
729 		OUT_RING(gpu->rb[0], 0x0F);
730 
731 		gpu->funcs->flush(gpu, gpu->rb[0]);
732 		if (!a5xx_idle(gpu, gpu->rb[0]))
733 			return -EINVAL;
734 	}
735 
736 	/*
737 	 * Try to load a zap shader into the secure world. If successful
738 	 * we can use the CP to switch out of secure mode. If not then we
739 	 * have no resource but to try to switch ourselves out manually. If we
740 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
741 	 * be blocked and a permissions violation will soon follow.
742 	 */
743 	ret = a5xx_zap_shader_init(gpu);
744 	if (!ret) {
745 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
746 		OUT_RING(gpu->rb[0], 0x00000000);
747 
748 		gpu->funcs->flush(gpu, gpu->rb[0]);
749 		if (!a5xx_idle(gpu, gpu->rb[0]))
750 			return -EINVAL;
751 	} else {
752 		/* Print a warning so if we die, we know why */
753 		dev_warn_once(gpu->dev->dev,
754 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
755 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
756 	}
757 
758 	/* Last step - yield the ringbuffer */
759 	a5xx_preempt_start(gpu);
760 
761 	return 0;
762 }
763 
764 static void a5xx_recover(struct msm_gpu *gpu)
765 {
766 	int i;
767 
768 	adreno_dump_info(gpu);
769 
770 	for (i = 0; i < 8; i++) {
771 		printk("CP_SCRATCH_REG%d: %u\n", i,
772 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
773 	}
774 
775 	if (hang_debug)
776 		a5xx_dump(gpu);
777 
778 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
779 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
780 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
781 	adreno_recover(gpu);
782 }
783 
784 static void a5xx_destroy(struct msm_gpu *gpu)
785 {
786 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
787 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
788 
789 	DBG("%s", gpu->name);
790 
791 	a5xx_preempt_fini(gpu);
792 
793 	if (a5xx_gpu->pm4_bo) {
794 		if (a5xx_gpu->pm4_iova)
795 			msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
796 		drm_gem_object_unreference_unlocked(a5xx_gpu->pm4_bo);
797 	}
798 
799 	if (a5xx_gpu->pfp_bo) {
800 		if (a5xx_gpu->pfp_iova)
801 			msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
802 		drm_gem_object_unreference_unlocked(a5xx_gpu->pfp_bo);
803 	}
804 
805 	if (a5xx_gpu->gpmu_bo) {
806 		if (a5xx_gpu->gpmu_iova)
807 			msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
808 		drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo);
809 	}
810 
811 	adreno_gpu_cleanup(adreno_gpu);
812 	kfree(a5xx_gpu);
813 }
814 
815 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
816 {
817 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
818 		return false;
819 
820 	/*
821 	 * Nearly every abnormality ends up pausing the GPU and triggering a
822 	 * fault so we can safely just watch for this one interrupt to fire
823 	 */
824 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
825 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
826 }
827 
828 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
829 {
830 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
831 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
832 
833 	if (ring != a5xx_gpu->cur_ring) {
834 		WARN(1, "Tried to idle a non-current ringbuffer\n");
835 		return false;
836 	}
837 
838 	/* wait for CP to drain ringbuffer: */
839 	if (!adreno_idle(gpu, ring))
840 		return false;
841 
842 	if (spin_until(_a5xx_check_idle(gpu))) {
843 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
844 			gpu->name, __builtin_return_address(0),
845 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
846 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
847 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
848 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
849 		return false;
850 	}
851 
852 	return true;
853 }
854 
855 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
856 {
857 	struct msm_gpu *gpu = arg;
858 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
859 			iova, flags,
860 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
861 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
862 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
863 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
864 
865 	return -EFAULT;
866 }
867 
868 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
869 {
870 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
871 
872 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
873 		u32 val;
874 
875 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
876 
877 		/*
878 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
879 		 * read it twice
880 		 */
881 
882 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
883 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
884 
885 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
886 			val);
887 	}
888 
889 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
890 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
891 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
892 
893 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
894 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
895 
896 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
897 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
898 
899 		dev_err_ratelimited(gpu->dev->dev,
900 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
901 			val & (1 << 24) ? "WRITE" : "READ",
902 			(val & 0xFFFFF) >> 2, val);
903 	}
904 
905 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
906 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
907 		const char *access[16] = { "reserved", "reserved",
908 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
909 			"", "", "me read", "me write", "", "", "crashdump read",
910 			"crashdump write" };
911 
912 		dev_err_ratelimited(gpu->dev->dev,
913 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
914 			status & 0xFFFFF, access[(status >> 24) & 0xF],
915 			(status & (1 << 31)), status);
916 	}
917 }
918 
919 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
920 {
921 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
922 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
923 
924 		dev_err_ratelimited(gpu->dev->dev,
925 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
926 			val & (1 << 28) ? "WRITE" : "READ",
927 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
928 			(val >> 24) & 0xF);
929 
930 		/* Clear the error */
931 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
932 
933 		/* Clear the interrupt */
934 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
935 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
936 	}
937 
938 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
939 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
940 
941 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
942 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
943 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
944 
945 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
946 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
947 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
948 
949 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
950 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
951 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
952 
953 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
954 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
955 
956 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
957 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
958 }
959 
960 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
961 {
962 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
963 
964 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
965 
966 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
967 		addr);
968 }
969 
970 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
971 {
972 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
973 }
974 
975 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
976 {
977 	struct drm_device *dev = gpu->dev;
978 	struct msm_drm_private *priv = dev->dev_private;
979 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
980 
981 	dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
982 		ring ? ring->id : -1, ring ? ring->seqno : 0,
983 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
984 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
985 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
986 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
987 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
988 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
989 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
990 
991 	/* Turn off the hangcheck timer to keep it from bothering us */
992 	del_timer(&gpu->hangcheck_timer);
993 
994 	queue_work(priv->wq, &gpu->recover_work);
995 }
996 
997 #define RBBM_ERROR_MASK \
998 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
999 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1000 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1001 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1002 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1003 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1004 
1005 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1006 {
1007 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1008 
1009 	/*
1010 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1011 	 * before the source is cleared the interrupt will storm.
1012 	 */
1013 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1014 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1015 
1016 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1017 	if (status & RBBM_ERROR_MASK)
1018 		a5xx_rbbm_err_irq(gpu, status);
1019 
1020 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1021 		a5xx_cp_err_irq(gpu);
1022 
1023 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1024 		a5xx_fault_detect_irq(gpu);
1025 
1026 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1027 		a5xx_uche_err_irq(gpu);
1028 
1029 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1030 		a5xx_gpmu_err_irq(gpu);
1031 
1032 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1033 		a5xx_preempt_trigger(gpu);
1034 		msm_gpu_retire(gpu);
1035 	}
1036 
1037 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1038 		a5xx_preempt_irq(gpu);
1039 
1040 	return IRQ_HANDLED;
1041 }
1042 
1043 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1044 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1045 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1046 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1047 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1048 		REG_A5XX_CP_RB_RPTR_ADDR_HI),
1049 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1050 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1051 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1052 };
1053 
1054 static const u32 a5xx_registers[] = {
1055 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1056 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1057 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1058 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1059 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1060 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1061 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1062 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1063 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1064 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1065 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1066 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1067 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1068 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1069 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1070 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1071 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1072 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1073 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1074 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1075 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1076 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1077 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1078 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1079 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1080 	0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F,
1081 	0xB9A0, 0xB9BF, ~0
1082 };
1083 
1084 static void a5xx_dump(struct msm_gpu *gpu)
1085 {
1086 	dev_info(gpu->dev->dev, "status:   %08x\n",
1087 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1088 	adreno_dump(gpu);
1089 }
1090 
1091 static int a5xx_pm_resume(struct msm_gpu *gpu)
1092 {
1093 	int ret;
1094 
1095 	/* Turn on the core power */
1096 	ret = msm_gpu_pm_resume(gpu);
1097 	if (ret)
1098 		return ret;
1099 
1100 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1101 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1102 
1103 	/* Wait 3 usecs before polling */
1104 	udelay(3);
1105 
1106 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1107 		(1 << 20), (1 << 20));
1108 	if (ret) {
1109 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1110 			gpu->name,
1111 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1112 		return ret;
1113 	}
1114 
1115 	/* Turn on the SP domain */
1116 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1117 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1118 		(1 << 20), (1 << 20));
1119 	if (ret)
1120 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1121 			gpu->name);
1122 
1123 	return ret;
1124 }
1125 
1126 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1127 {
1128 	/* Clear the VBIF pipe before shutting down */
1129 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1130 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1131 
1132 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1133 
1134 	/*
1135 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1136 	 * entries
1137 	 */
1138 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1139 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1140 
1141 	return msm_gpu_pm_suspend(gpu);
1142 }
1143 
1144 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1145 {
1146 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1147 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1148 
1149 	return 0;
1150 }
1151 
1152 #ifdef CONFIG_DEBUG_FS
1153 static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
1154 {
1155 	seq_printf(m, "status:   %08x\n",
1156 			gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1157 
1158 	/*
1159 	 * Temporarily disable hardware clock gating before going into
1160 	 * adreno_show to avoid issues while reading the registers
1161 	 */
1162 	a5xx_set_hwcg(gpu, false);
1163 	adreno_show(gpu, m);
1164 	a5xx_set_hwcg(gpu, true);
1165 }
1166 #endif
1167 
1168 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1169 {
1170 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1171 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1172 
1173 	return a5xx_gpu->cur_ring;
1174 }
1175 
1176 static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
1177 {
1178 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1179 		REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1180 
1181 	return 0;
1182 }
1183 
1184 static const struct adreno_gpu_funcs funcs = {
1185 	.base = {
1186 		.get_param = adreno_get_param,
1187 		.hw_init = a5xx_hw_init,
1188 		.pm_suspend = a5xx_pm_suspend,
1189 		.pm_resume = a5xx_pm_resume,
1190 		.recover = a5xx_recover,
1191 		.submit = a5xx_submit,
1192 		.flush = a5xx_flush,
1193 		.active_ring = a5xx_active_ring,
1194 		.irq = a5xx_irq,
1195 		.destroy = a5xx_destroy,
1196 #ifdef CONFIG_DEBUG_FS
1197 		.show = a5xx_show,
1198 #endif
1199 		.gpu_busy = a5xx_gpu_busy,
1200 	},
1201 	.get_timestamp = a5xx_get_timestamp,
1202 };
1203 
1204 static void check_speed_bin(struct device *dev)
1205 {
1206 	struct nvmem_cell *cell;
1207 	u32 bin, val;
1208 
1209 	cell = nvmem_cell_get(dev, "speed_bin");
1210 
1211 	/* If a nvmem cell isn't defined, nothing to do */
1212 	if (IS_ERR(cell))
1213 		return;
1214 
1215 	bin = *((u32 *) nvmem_cell_read(cell, NULL));
1216 	nvmem_cell_put(cell);
1217 
1218 	val = (1 << bin);
1219 
1220 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1221 }
1222 
1223 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1224 {
1225 	struct msm_drm_private *priv = dev->dev_private;
1226 	struct platform_device *pdev = priv->gpu_pdev;
1227 	struct a5xx_gpu *a5xx_gpu = NULL;
1228 	struct adreno_gpu *adreno_gpu;
1229 	struct msm_gpu *gpu;
1230 	int ret;
1231 
1232 	if (!pdev) {
1233 		dev_err(dev->dev, "No A5XX device is defined\n");
1234 		return ERR_PTR(-ENXIO);
1235 	}
1236 
1237 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1238 	if (!a5xx_gpu)
1239 		return ERR_PTR(-ENOMEM);
1240 
1241 	adreno_gpu = &a5xx_gpu->base;
1242 	gpu = &adreno_gpu->base;
1243 
1244 	adreno_gpu->registers = a5xx_registers;
1245 	adreno_gpu->reg_offsets = a5xx_register_offsets;
1246 
1247 	a5xx_gpu->lm_leakage = 0x4E001A;
1248 
1249 	check_speed_bin(&pdev->dev);
1250 
1251 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1252 	if (ret) {
1253 		a5xx_destroy(&(a5xx_gpu->base.base));
1254 		return ERR_PTR(ret);
1255 	}
1256 
1257 	if (gpu->aspace)
1258 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1259 
1260 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1261 	a5xx_preempt_init(gpu);
1262 
1263 	return gpu;
1264 }
1265