xref: /openbmc/linux/drivers/gpu/drm/msm/adreno/a5xx_gpu.c (revision 98ddec80)
1 /* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
2  *
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 and
5  * only version 2 as published by the Free Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10  * GNU General Public License for more details.
11  *
12  */
13 
14 #include <linux/types.h>
15 #include <linux/cpumask.h>
16 #include <linux/qcom_scm.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/of_address.h>
19 #include <linux/soc/qcom/mdt_loader.h>
20 #include <linux/pm_opp.h>
21 #include <linux/nvmem-consumer.h>
22 #include "msm_gem.h"
23 #include "msm_mmu.h"
24 #include "a5xx_gpu.h"
25 
26 extern bool hang_debug;
27 static void a5xx_dump(struct msm_gpu *gpu);
28 
29 #define GPU_PAS_ID 13
30 
31 static int zap_shader_load_mdt(struct msm_gpu *gpu, const char *fwname)
32 {
33 	struct device *dev = &gpu->pdev->dev;
34 	const struct firmware *fw;
35 	struct device_node *np;
36 	struct resource r;
37 	phys_addr_t mem_phys;
38 	ssize_t mem_size;
39 	void *mem_region = NULL;
40 	int ret;
41 
42 	if (!IS_ENABLED(CONFIG_ARCH_QCOM))
43 		return -EINVAL;
44 
45 	np = of_get_child_by_name(dev->of_node, "zap-shader");
46 	if (!np)
47 		return -ENODEV;
48 
49 	np = of_parse_phandle(np, "memory-region", 0);
50 	if (!np)
51 		return -EINVAL;
52 
53 	ret = of_address_to_resource(np, 0, &r);
54 	if (ret)
55 		return ret;
56 
57 	mem_phys = r.start;
58 	mem_size = resource_size(&r);
59 
60 	/* Request the MDT file for the firmware */
61 	fw = adreno_request_fw(to_adreno_gpu(gpu), fwname);
62 	if (IS_ERR(fw)) {
63 		DRM_DEV_ERROR(dev, "Unable to load %s\n", fwname);
64 		return PTR_ERR(fw);
65 	}
66 
67 	/* Figure out how much memory we need */
68 	mem_size = qcom_mdt_get_size(fw);
69 	if (mem_size < 0) {
70 		ret = mem_size;
71 		goto out;
72 	}
73 
74 	/* Allocate memory for the firmware image */
75 	mem_region = memremap(mem_phys, mem_size,  MEMREMAP_WC);
76 	if (!mem_region) {
77 		ret = -ENOMEM;
78 		goto out;
79 	}
80 
81 	/*
82 	 * Load the rest of the MDT
83 	 *
84 	 * Note that we could be dealing with two different paths, since
85 	 * with upstream linux-firmware it would be in a qcom/ subdir..
86 	 * adreno_request_fw() handles this, but qcom_mdt_load() does
87 	 * not.  But since we've already gotten thru adreno_request_fw()
88 	 * we know which of the two cases it is:
89 	 */
90 	if (to_adreno_gpu(gpu)->fwloc == FW_LOCATION_LEGACY) {
91 		ret = qcom_mdt_load(dev, fw, fwname, GPU_PAS_ID,
92 				mem_region, mem_phys, mem_size, NULL);
93 	} else {
94 		char newname[strlen("qcom/") + strlen(fwname) + 1];
95 
96 		sprintf(newname, "qcom/%s", fwname);
97 
98 		ret = qcom_mdt_load(dev, fw, newname, GPU_PAS_ID,
99 				mem_region, mem_phys, mem_size, NULL);
100 	}
101 	if (ret)
102 		goto out;
103 
104 	/* Send the image to the secure world */
105 	ret = qcom_scm_pas_auth_and_reset(GPU_PAS_ID);
106 	if (ret)
107 		DRM_DEV_ERROR(dev, "Unable to authorize the image\n");
108 
109 out:
110 	if (mem_region)
111 		memunmap(mem_region);
112 
113 	release_firmware(fw);
114 
115 	return ret;
116 }
117 
118 static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
119 {
120 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
121 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
122 	uint32_t wptr;
123 	unsigned long flags;
124 
125 	spin_lock_irqsave(&ring->lock, flags);
126 
127 	/* Copy the shadow to the actual register */
128 	ring->cur = ring->next;
129 
130 	/* Make sure to wrap wptr if we need to */
131 	wptr = get_wptr(ring);
132 
133 	spin_unlock_irqrestore(&ring->lock, flags);
134 
135 	/* Make sure everything is posted before making a decision */
136 	mb();
137 
138 	/* Update HW if this is the current ring and we are not in preempt */
139 	if (a5xx_gpu->cur_ring == ring && !a5xx_in_preempt(a5xx_gpu))
140 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
141 }
142 
143 static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit,
144 	struct msm_file_private *ctx)
145 {
146 	struct msm_drm_private *priv = gpu->dev->dev_private;
147 	struct msm_ringbuffer *ring = submit->ring;
148 	struct msm_gem_object *obj;
149 	uint32_t *ptr, dwords;
150 	unsigned int i;
151 
152 	for (i = 0; i < submit->nr_cmds; i++) {
153 		switch (submit->cmd[i].type) {
154 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
155 			break;
156 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
157 			if (priv->lastctx == ctx)
158 				break;
159 		case MSM_SUBMIT_CMD_BUF:
160 			/* copy commands into RB: */
161 			obj = submit->bos[submit->cmd[i].idx].obj;
162 			dwords = submit->cmd[i].size;
163 
164 			ptr = msm_gem_get_vaddr(&obj->base);
165 
166 			/* _get_vaddr() shouldn't fail at this point,
167 			 * since we've already mapped it once in
168 			 * submit_reloc()
169 			 */
170 			if (WARN_ON(!ptr))
171 				return;
172 
173 			for (i = 0; i < dwords; i++) {
174 				/* normally the OUT_PKTn() would wait
175 				 * for space for the packet.  But since
176 				 * we just OUT_RING() the whole thing,
177 				 * need to call adreno_wait_ring()
178 				 * ourself:
179 				 */
180 				adreno_wait_ring(ring, 1);
181 				OUT_RING(ring, ptr[i]);
182 			}
183 
184 			msm_gem_put_vaddr(&obj->base);
185 
186 			break;
187 		}
188 	}
189 
190 	a5xx_flush(gpu, ring);
191 	a5xx_preempt_trigger(gpu);
192 
193 	/* we might not necessarily have a cmd from userspace to
194 	 * trigger an event to know that submit has completed, so
195 	 * do this manually:
196 	 */
197 	a5xx_idle(gpu, ring);
198 	ring->memptrs->fence = submit->seqno;
199 	msm_gpu_retire(gpu);
200 }
201 
202 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
203 	struct msm_file_private *ctx)
204 {
205 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
206 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
207 	struct msm_drm_private *priv = gpu->dev->dev_private;
208 	struct msm_ringbuffer *ring = submit->ring;
209 	unsigned int i, ibs = 0;
210 
211 	if (IS_ENABLED(CONFIG_DRM_MSM_GPU_SUDO) && submit->in_rb) {
212 		priv->lastctx = NULL;
213 		a5xx_submit_in_rb(gpu, submit, ctx);
214 		return;
215 	}
216 
217 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
218 	OUT_RING(ring, 0x02);
219 
220 	/* Turn off protected mode to write to special registers */
221 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
222 	OUT_RING(ring, 0);
223 
224 	/* Set the save preemption record for the ring/command */
225 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
226 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
227 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[submit->ring->id]));
228 
229 	/* Turn back on protected mode */
230 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
231 	OUT_RING(ring, 1);
232 
233 	/* Enable local preemption for finegrain preemption */
234 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
235 	OUT_RING(ring, 0x02);
236 
237 	/* Allow CP_CONTEXT_SWITCH_YIELD packets in the IB2 */
238 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
239 	OUT_RING(ring, 0x02);
240 
241 	/* Submit the commands */
242 	for (i = 0; i < submit->nr_cmds; i++) {
243 		switch (submit->cmd[i].type) {
244 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
245 			break;
246 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
247 			if (priv->lastctx == ctx)
248 				break;
249 		case MSM_SUBMIT_CMD_BUF:
250 			OUT_PKT7(ring, CP_INDIRECT_BUFFER_PFE, 3);
251 			OUT_RING(ring, lower_32_bits(submit->cmd[i].iova));
252 			OUT_RING(ring, upper_32_bits(submit->cmd[i].iova));
253 			OUT_RING(ring, submit->cmd[i].size);
254 			ibs++;
255 			break;
256 		}
257 	}
258 
259 	/*
260 	 * Write the render mode to NULL (0) to indicate to the CP that the IBs
261 	 * are done rendering - otherwise a lucky preemption would start
262 	 * replaying from the last checkpoint
263 	 */
264 	OUT_PKT7(ring, CP_SET_RENDER_MODE, 5);
265 	OUT_RING(ring, 0);
266 	OUT_RING(ring, 0);
267 	OUT_RING(ring, 0);
268 	OUT_RING(ring, 0);
269 	OUT_RING(ring, 0);
270 
271 	/* Turn off IB level preemptions */
272 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
273 	OUT_RING(ring, 0x01);
274 
275 	/* Write the fence to the scratch register */
276 	OUT_PKT4(ring, REG_A5XX_CP_SCRATCH_REG(2), 1);
277 	OUT_RING(ring, submit->seqno);
278 
279 	/*
280 	 * Execute a CACHE_FLUSH_TS event. This will ensure that the
281 	 * timestamp is written to the memory and then triggers the interrupt
282 	 */
283 	OUT_PKT7(ring, CP_EVENT_WRITE, 4);
284 	OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31));
285 	OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence)));
286 	OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence)));
287 	OUT_RING(ring, submit->seqno);
288 
289 	/* Yield the floor on command completion */
290 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
291 	/*
292 	 * If dword[2:1] are non zero, they specify an address for the CP to
293 	 * write the value of dword[3] to on preemption complete. Write 0 to
294 	 * skip the write
295 	 */
296 	OUT_RING(ring, 0x00);
297 	OUT_RING(ring, 0x00);
298 	/* Data value - not used if the address above is 0 */
299 	OUT_RING(ring, 0x01);
300 	/* Set bit 0 to trigger an interrupt on preempt complete */
301 	OUT_RING(ring, 0x01);
302 
303 	a5xx_flush(gpu, ring);
304 
305 	/* Check to see if we need to start preemption */
306 	a5xx_preempt_trigger(gpu);
307 }
308 
309 static const struct {
310 	u32 offset;
311 	u32 value;
312 } a5xx_hwcg[] = {
313 	{REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
314 	{REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
315 	{REG_A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
316 	{REG_A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
317 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
318 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
319 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
320 	{REG_A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
321 	{REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
322 	{REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
323 	{REG_A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
324 	{REG_A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
325 	{REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
326 	{REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
327 	{REG_A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
328 	{REG_A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
329 	{REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
330 	{REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
331 	{REG_A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
332 	{REG_A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
333 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
334 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
335 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
336 	{REG_A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
337 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
338 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
339 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
340 	{REG_A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
341 	{REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
342 	{REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
343 	{REG_A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
344 	{REG_A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
345 	{REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
346 	{REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
347 	{REG_A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
348 	{REG_A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
349 	{REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
350 	{REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
351 	{REG_A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
352 	{REG_A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
353 	{REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
354 	{REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
355 	{REG_A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
356 	{REG_A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
357 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
358 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
359 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
360 	{REG_A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
361 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
362 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
363 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
364 	{REG_A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
365 	{REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
366 	{REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
367 	{REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
368 	{REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
369 	{REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
370 	{REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
371 	{REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
372 	{REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
373 	{REG_A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
374 	{REG_A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
375 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
376 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
377 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
378 	{REG_A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
379 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
380 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
381 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
382 	{REG_A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
383 	{REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
384 	{REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
385 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
386 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
387 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
388 	{REG_A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
389 	{REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
390 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
391 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
392 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
393 	{REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
394 	{REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
395 	{REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
396 	{REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
397 	{REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
398 	{REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
399 	{REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
400 	{REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
401 	{REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
402 	{REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
403 	{REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
404 	{REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
405 };
406 
407 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
408 {
409 	unsigned int i;
410 
411 	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
412 		gpu_write(gpu, a5xx_hwcg[i].offset,
413 			state ? a5xx_hwcg[i].value : 0);
414 
415 	gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, state ? 0xAAA8AA00 : 0);
416 	gpu_write(gpu, REG_A5XX_RBBM_ISDB_CNT, state ? 0x182 : 0x180);
417 }
418 
419 static int a5xx_me_init(struct msm_gpu *gpu)
420 {
421 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
422 	struct msm_ringbuffer *ring = gpu->rb[0];
423 
424 	OUT_PKT7(ring, CP_ME_INIT, 8);
425 
426 	OUT_RING(ring, 0x0000002F);
427 
428 	/* Enable multiple hardware contexts */
429 	OUT_RING(ring, 0x00000003);
430 
431 	/* Enable error detection */
432 	OUT_RING(ring, 0x20000000);
433 
434 	/* Don't enable header dump */
435 	OUT_RING(ring, 0x00000000);
436 	OUT_RING(ring, 0x00000000);
437 
438 	/* Specify workarounds for various microcode issues */
439 	if (adreno_is_a530(adreno_gpu)) {
440 		/* Workaround for token end syncs
441 		 * Force a WFI after every direct-render 3D mode draw and every
442 		 * 2D mode 3 draw
443 		 */
444 		OUT_RING(ring, 0x0000000B);
445 	} else {
446 		/* No workarounds enabled */
447 		OUT_RING(ring, 0x00000000);
448 	}
449 
450 	OUT_RING(ring, 0x00000000);
451 	OUT_RING(ring, 0x00000000);
452 
453 	gpu->funcs->flush(gpu, ring);
454 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
455 }
456 
457 static int a5xx_preempt_start(struct msm_gpu *gpu)
458 {
459 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
460 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
461 	struct msm_ringbuffer *ring = gpu->rb[0];
462 
463 	if (gpu->nr_rings == 1)
464 		return 0;
465 
466 	/* Turn off protected mode to write to special registers */
467 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
468 	OUT_RING(ring, 0);
469 
470 	/* Set the save preemption record for the ring/command */
471 	OUT_PKT4(ring, REG_A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 2);
472 	OUT_RING(ring, lower_32_bits(a5xx_gpu->preempt_iova[ring->id]));
473 	OUT_RING(ring, upper_32_bits(a5xx_gpu->preempt_iova[ring->id]));
474 
475 	/* Turn back on protected mode */
476 	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
477 	OUT_RING(ring, 1);
478 
479 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
480 	OUT_RING(ring, 0x00);
481 
482 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_LOCAL, 1);
483 	OUT_RING(ring, 0x01);
484 
485 	OUT_PKT7(ring, CP_YIELD_ENABLE, 1);
486 	OUT_RING(ring, 0x01);
487 
488 	/* Yield the floor on command completion */
489 	OUT_PKT7(ring, CP_CONTEXT_SWITCH_YIELD, 4);
490 	OUT_RING(ring, 0x00);
491 	OUT_RING(ring, 0x00);
492 	OUT_RING(ring, 0x01);
493 	OUT_RING(ring, 0x01);
494 
495 	gpu->funcs->flush(gpu, ring);
496 
497 	return a5xx_idle(gpu, ring) ? 0 : -EINVAL;
498 }
499 
500 static int a5xx_ucode_init(struct msm_gpu *gpu)
501 {
502 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
503 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
504 	int ret;
505 
506 	if (!a5xx_gpu->pm4_bo) {
507 		a5xx_gpu->pm4_bo = adreno_fw_create_bo(gpu,
508 			adreno_gpu->fw[ADRENO_FW_PM4], &a5xx_gpu->pm4_iova);
509 
510 		if (IS_ERR(a5xx_gpu->pm4_bo)) {
511 			ret = PTR_ERR(a5xx_gpu->pm4_bo);
512 			a5xx_gpu->pm4_bo = NULL;
513 			dev_err(gpu->dev->dev, "could not allocate PM4: %d\n",
514 				ret);
515 			return ret;
516 		}
517 	}
518 
519 	if (!a5xx_gpu->pfp_bo) {
520 		a5xx_gpu->pfp_bo = adreno_fw_create_bo(gpu,
521 			adreno_gpu->fw[ADRENO_FW_PFP], &a5xx_gpu->pfp_iova);
522 
523 		if (IS_ERR(a5xx_gpu->pfp_bo)) {
524 			ret = PTR_ERR(a5xx_gpu->pfp_bo);
525 			a5xx_gpu->pfp_bo = NULL;
526 			dev_err(gpu->dev->dev, "could not allocate PFP: %d\n",
527 				ret);
528 			return ret;
529 		}
530 	}
531 
532 	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
533 		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
534 
535 	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
536 		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
537 
538 	return 0;
539 }
540 
541 #define SCM_GPU_ZAP_SHADER_RESUME 0
542 
543 static int a5xx_zap_shader_resume(struct msm_gpu *gpu)
544 {
545 	int ret;
546 
547 	ret = qcom_scm_set_remote_state(SCM_GPU_ZAP_SHADER_RESUME, GPU_PAS_ID);
548 	if (ret)
549 		DRM_ERROR("%s: zap-shader resume failed: %d\n",
550 			gpu->name, ret);
551 
552 	return ret;
553 }
554 
555 static int a5xx_zap_shader_init(struct msm_gpu *gpu)
556 {
557 	static bool loaded;
558 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
559 	struct platform_device *pdev = gpu->pdev;
560 	int ret;
561 
562 	/*
563 	 * If the zap shader is already loaded into memory we just need to kick
564 	 * the remote processor to reinitialize it
565 	 */
566 	if (loaded)
567 		return a5xx_zap_shader_resume(gpu);
568 
569 	/* We need SCM to be able to load the firmware */
570 	if (!qcom_scm_is_available()) {
571 		DRM_DEV_ERROR(&pdev->dev, "SCM is not available\n");
572 		return -EPROBE_DEFER;
573 	}
574 
575 	/* Each GPU has a target specific zap shader firmware name to use */
576 	if (!adreno_gpu->info->zapfw) {
577 		DRM_DEV_ERROR(&pdev->dev,
578 			"Zap shader firmware file not specified for this target\n");
579 		return -ENODEV;
580 	}
581 
582 	ret = zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw);
583 
584 	loaded = !ret;
585 
586 	return ret;
587 }
588 
589 #define A5XX_INT_MASK (A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
590 	  A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
591 	  A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
592 	  A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
593 	  A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
594 	  A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW | \
595 	  A5XX_RBBM_INT_0_MASK_CP_HW_ERROR | \
596 	  A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT | \
597 	  A5XX_RBBM_INT_0_MASK_CP_SW | \
598 	  A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \
599 	  A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \
600 	  A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
601 
602 static int a5xx_hw_init(struct msm_gpu *gpu)
603 {
604 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
605 	int ret;
606 
607 	gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
608 
609 	/* Make all blocks contribute to the GPU BUSY perf counter */
610 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
611 
612 	/* Enable RBBM error reporting bits */
613 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
614 
615 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
616 		/*
617 		 * Mask out the activity signals from RB1-3 to avoid false
618 		 * positives
619 		 */
620 
621 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
622 			0xF0000000);
623 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
624 			0xFFFFFFFF);
625 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
626 			0xFFFFFFFF);
627 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
628 			0xFFFFFFFF);
629 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
630 			0xFFFFFFFF);
631 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
632 			0xFFFFFFFF);
633 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
634 			0xFFFFFFFF);
635 		gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
636 			0xFFFFFFFF);
637 	}
638 
639 	/* Enable fault detection */
640 	gpu_write(gpu, REG_A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
641 		(1 << 30) | 0xFFFF);
642 
643 	/* Turn on performance counters */
644 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_CNTL, 0x01);
645 
646 	/* Select CP0 to always count cycles */
647 	gpu_write(gpu, REG_A5XX_CP_PERFCTR_CP_SEL_0, PERF_CP_ALWAYS_COUNT);
648 
649 	/* Select RBBM0 to countable 6 to get the busy status for devfreq */
650 	gpu_write(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
651 
652 	/* Increase VFD cache access so LRZ and other data gets evicted less */
653 	gpu_write(gpu, REG_A5XX_UCHE_CACHE_WAYS, 0x02);
654 
655 	/* Disable L2 bypass in the UCHE */
656 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_LO, 0xFFFF0000);
657 	gpu_write(gpu, REG_A5XX_UCHE_TRAP_BASE_HI, 0x0001FFFF);
658 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_LO, 0xFFFF0000);
659 	gpu_write(gpu, REG_A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001FFFF);
660 
661 	/* Set the GMEM VA range (0 to gpu->gmem) */
662 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
663 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x00000000);
664 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_LO,
665 		0x00100000 + adreno_gpu->gmem - 1);
666 	gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
667 
668 	gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
669 	gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
670 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
671 	gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
672 
673 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
674 
675 	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
676 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
677 
678 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
679 
680 	/* Enable USE_RETENTION_FLOPS */
681 	gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
682 
683 	/* Enable ME/PFP split notification */
684 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
685 
686 	/* Enable HWCG */
687 	a5xx_set_hwcg(gpu, true);
688 
689 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
690 
691 	/* Set the highest bank bit */
692 	gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
693 	gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
694 
695 	/* Protect registers from the CP */
696 	gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
697 
698 	/* RBBM */
699 	gpu_write(gpu, REG_A5XX_CP_PROTECT(0), ADRENO_PROTECT_RW(0x04, 4));
700 	gpu_write(gpu, REG_A5XX_CP_PROTECT(1), ADRENO_PROTECT_RW(0x08, 8));
701 	gpu_write(gpu, REG_A5XX_CP_PROTECT(2), ADRENO_PROTECT_RW(0x10, 16));
702 	gpu_write(gpu, REG_A5XX_CP_PROTECT(3), ADRENO_PROTECT_RW(0x20, 32));
703 	gpu_write(gpu, REG_A5XX_CP_PROTECT(4), ADRENO_PROTECT_RW(0x40, 64));
704 	gpu_write(gpu, REG_A5XX_CP_PROTECT(5), ADRENO_PROTECT_RW(0x80, 64));
705 
706 	/* Content protect */
707 	gpu_write(gpu, REG_A5XX_CP_PROTECT(6),
708 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
709 			16));
710 	gpu_write(gpu, REG_A5XX_CP_PROTECT(7),
711 		ADRENO_PROTECT_RW(REG_A5XX_RBBM_SECVID_TRUST_CNTL, 2));
712 
713 	/* CP */
714 	gpu_write(gpu, REG_A5XX_CP_PROTECT(8), ADRENO_PROTECT_RW(0x800, 64));
715 	gpu_write(gpu, REG_A5XX_CP_PROTECT(9), ADRENO_PROTECT_RW(0x840, 8));
716 	gpu_write(gpu, REG_A5XX_CP_PROTECT(10), ADRENO_PROTECT_RW(0x880, 32));
717 	gpu_write(gpu, REG_A5XX_CP_PROTECT(11), ADRENO_PROTECT_RW(0xAA0, 1));
718 
719 	/* RB */
720 	gpu_write(gpu, REG_A5XX_CP_PROTECT(12), ADRENO_PROTECT_RW(0xCC0, 1));
721 	gpu_write(gpu, REG_A5XX_CP_PROTECT(13), ADRENO_PROTECT_RW(0xCF0, 2));
722 
723 	/* VPC */
724 	gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
725 	gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
726 
727 	/* UCHE */
728 	gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
729 
730 	if (adreno_is_a530(adreno_gpu))
731 		gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
732 			ADRENO_PROTECT_RW(0x10000, 0x8000));
733 
734 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_CNTL, 0);
735 	/*
736 	 * Disable the trusted memory range - we don't actually supported secure
737 	 * memory rendering at this point in time and we don't want to block off
738 	 * part of the virtual memory space.
739 	 */
740 	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
741 		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
742 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
743 
744 	ret = adreno_hw_init(gpu);
745 	if (ret)
746 		return ret;
747 
748 	a5xx_preempt_hw_init(gpu);
749 
750 	a5xx_gpmu_ucode_init(gpu);
751 
752 	ret = a5xx_ucode_init(gpu);
753 	if (ret)
754 		return ret;
755 
756 	/* Disable the interrupts through the initial bringup stage */
757 	gpu_write(gpu, REG_A5XX_RBBM_INT_0_MASK, A5XX_INT_MASK);
758 
759 	/* Clear ME_HALT to start the micro engine */
760 	gpu_write(gpu, REG_A5XX_CP_PFP_ME_CNTL, 0);
761 	ret = a5xx_me_init(gpu);
762 	if (ret)
763 		return ret;
764 
765 	ret = a5xx_power_init(gpu);
766 	if (ret)
767 		return ret;
768 
769 	/*
770 	 * Send a pipeline event stat to get misbehaving counters to start
771 	 * ticking correctly
772 	 */
773 	if (adreno_is_a530(adreno_gpu)) {
774 		OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1);
775 		OUT_RING(gpu->rb[0], 0x0F);
776 
777 		gpu->funcs->flush(gpu, gpu->rb[0]);
778 		if (!a5xx_idle(gpu, gpu->rb[0]))
779 			return -EINVAL;
780 	}
781 
782 	/*
783 	 * Try to load a zap shader into the secure world. If successful
784 	 * we can use the CP to switch out of secure mode. If not then we
785 	 * have no resource but to try to switch ourselves out manually. If we
786 	 * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will
787 	 * be blocked and a permissions violation will soon follow.
788 	 */
789 	ret = a5xx_zap_shader_init(gpu);
790 	if (!ret) {
791 		OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1);
792 		OUT_RING(gpu->rb[0], 0x00000000);
793 
794 		gpu->funcs->flush(gpu, gpu->rb[0]);
795 		if (!a5xx_idle(gpu, gpu->rb[0]))
796 			return -EINVAL;
797 	} else {
798 		/* Print a warning so if we die, we know why */
799 		dev_warn_once(gpu->dev->dev,
800 			"Zap shader not enabled - using SECVID_TRUST_CNTL instead\n");
801 		gpu_write(gpu, REG_A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
802 	}
803 
804 	/* Last step - yield the ringbuffer */
805 	a5xx_preempt_start(gpu);
806 
807 	return 0;
808 }
809 
810 static void a5xx_recover(struct msm_gpu *gpu)
811 {
812 	int i;
813 
814 	adreno_dump_info(gpu);
815 
816 	for (i = 0; i < 8; i++) {
817 		printk("CP_SCRATCH_REG%d: %u\n", i,
818 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(i)));
819 	}
820 
821 	if (hang_debug)
822 		a5xx_dump(gpu);
823 
824 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 1);
825 	gpu_read(gpu, REG_A5XX_RBBM_SW_RESET_CMD);
826 	gpu_write(gpu, REG_A5XX_RBBM_SW_RESET_CMD, 0);
827 	adreno_recover(gpu);
828 }
829 
830 static void a5xx_destroy(struct msm_gpu *gpu)
831 {
832 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
833 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
834 
835 	DBG("%s", gpu->name);
836 
837 	a5xx_preempt_fini(gpu);
838 
839 	if (a5xx_gpu->pm4_bo) {
840 		if (a5xx_gpu->pm4_iova)
841 			msm_gem_put_iova(a5xx_gpu->pm4_bo, gpu->aspace);
842 		drm_gem_object_put_unlocked(a5xx_gpu->pm4_bo);
843 	}
844 
845 	if (a5xx_gpu->pfp_bo) {
846 		if (a5xx_gpu->pfp_iova)
847 			msm_gem_put_iova(a5xx_gpu->pfp_bo, gpu->aspace);
848 		drm_gem_object_put_unlocked(a5xx_gpu->pfp_bo);
849 	}
850 
851 	if (a5xx_gpu->gpmu_bo) {
852 		if (a5xx_gpu->gpmu_iova)
853 			msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->aspace);
854 		drm_gem_object_put_unlocked(a5xx_gpu->gpmu_bo);
855 	}
856 
857 	adreno_gpu_cleanup(adreno_gpu);
858 	kfree(a5xx_gpu);
859 }
860 
861 static inline bool _a5xx_check_idle(struct msm_gpu *gpu)
862 {
863 	if (gpu_read(gpu, REG_A5XX_RBBM_STATUS) & ~A5XX_RBBM_STATUS_HI_BUSY)
864 		return false;
865 
866 	/*
867 	 * Nearly every abnormality ends up pausing the GPU and triggering a
868 	 * fault so we can safely just watch for this one interrupt to fire
869 	 */
870 	return !(gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS) &
871 		A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT);
872 }
873 
874 bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
875 {
876 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
877 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
878 
879 	if (ring != a5xx_gpu->cur_ring) {
880 		WARN(1, "Tried to idle a non-current ringbuffer\n");
881 		return false;
882 	}
883 
884 	/* wait for CP to drain ringbuffer: */
885 	if (!adreno_idle(gpu, ring))
886 		return false;
887 
888 	if (spin_until(_a5xx_check_idle(gpu))) {
889 		DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n",
890 			gpu->name, __builtin_return_address(0),
891 			gpu_read(gpu, REG_A5XX_RBBM_STATUS),
892 			gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS),
893 			gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
894 			gpu_read(gpu, REG_A5XX_CP_RB_WPTR));
895 		return false;
896 	}
897 
898 	return true;
899 }
900 
901 static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
902 {
903 	struct msm_gpu *gpu = arg;
904 	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
905 			iova, flags,
906 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
907 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
908 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
909 			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
910 
911 	return -EFAULT;
912 }
913 
914 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
915 {
916 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
917 
918 	if (status & A5XX_CP_INT_CP_OPCODE_ERROR) {
919 		u32 val;
920 
921 		gpu_write(gpu, REG_A5XX_CP_PFP_STAT_ADDR, 0);
922 
923 		/*
924 		 * REG_A5XX_CP_PFP_STAT_DATA is indexed, and we want index 1 so
925 		 * read it twice
926 		 */
927 
928 		gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
929 		val = gpu_read(gpu, REG_A5XX_CP_PFP_STAT_DATA);
930 
931 		dev_err_ratelimited(gpu->dev->dev, "CP | opcode error | possible opcode=0x%8.8X\n",
932 			val);
933 	}
934 
935 	if (status & A5XX_CP_INT_CP_HW_FAULT_ERROR)
936 		dev_err_ratelimited(gpu->dev->dev, "CP | HW fault | status=0x%8.8X\n",
937 			gpu_read(gpu, REG_A5XX_CP_HW_FAULT));
938 
939 	if (status & A5XX_CP_INT_CP_DMA_ERROR)
940 		dev_err_ratelimited(gpu->dev->dev, "CP | DMA error\n");
941 
942 	if (status & A5XX_CP_INT_CP_REGISTER_PROTECTION_ERROR) {
943 		u32 val = gpu_read(gpu, REG_A5XX_CP_PROTECT_STATUS);
944 
945 		dev_err_ratelimited(gpu->dev->dev,
946 			"CP | protected mode error | %s | addr=0x%8.8X | status=0x%8.8X\n",
947 			val & (1 << 24) ? "WRITE" : "READ",
948 			(val & 0xFFFFF) >> 2, val);
949 	}
950 
951 	if (status & A5XX_CP_INT_CP_AHB_ERROR) {
952 		u32 status = gpu_read(gpu, REG_A5XX_CP_AHB_FAULT);
953 		const char *access[16] = { "reserved", "reserved",
954 			"timestamp lo", "timestamp hi", "pfp read", "pfp write",
955 			"", "", "me read", "me write", "", "", "crashdump read",
956 			"crashdump write" };
957 
958 		dev_err_ratelimited(gpu->dev->dev,
959 			"CP | AHB error | addr=%X access=%s error=%d | status=0x%8.8X\n",
960 			status & 0xFFFFF, access[(status >> 24) & 0xF],
961 			(status & (1 << 31)), status);
962 	}
963 }
964 
965 static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
966 {
967 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
968 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
969 
970 		dev_err_ratelimited(gpu->dev->dev,
971 			"RBBM | AHB bus error | %s | addr=0x%X | ports=0x%X:0x%X\n",
972 			val & (1 << 28) ? "WRITE" : "READ",
973 			(val & 0xFFFFF) >> 2, (val >> 20) & 0x3,
974 			(val >> 24) & 0xF);
975 
976 		/* Clear the error */
977 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
978 
979 		/* Clear the interrupt */
980 		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
981 			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
982 	}
983 
984 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
985 		dev_err_ratelimited(gpu->dev->dev, "RBBM | AHB transfer timeout\n");
986 
987 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT)
988 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ME master split | status=0x%X\n",
989 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ME_SPLIT_STATUS));
990 
991 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT)
992 		dev_err_ratelimited(gpu->dev->dev, "RBBM | PFP master split | status=0x%X\n",
993 			gpu_read(gpu, REG_A5XX_RBBM_AHB_PFP_SPLIT_STATUS));
994 
995 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT)
996 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ETS master split | status=0x%X\n",
997 			gpu_read(gpu, REG_A5XX_RBBM_AHB_ETS_SPLIT_STATUS));
998 
999 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1000 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB ASYNC overflow\n");
1001 
1002 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW)
1003 		dev_err_ratelimited(gpu->dev->dev, "RBBM | ATB bus overflow\n");
1004 }
1005 
1006 static void a5xx_uche_err_irq(struct msm_gpu *gpu)
1007 {
1008 	uint64_t addr = (uint64_t) gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_HI);
1009 
1010 	addr |= gpu_read(gpu, REG_A5XX_UCHE_TRAP_LOG_LO);
1011 
1012 	dev_err_ratelimited(gpu->dev->dev, "UCHE | Out of bounds access | addr=0x%llX\n",
1013 		addr);
1014 }
1015 
1016 static void a5xx_gpmu_err_irq(struct msm_gpu *gpu)
1017 {
1018 	dev_err_ratelimited(gpu->dev->dev, "GPMU | voltage droop\n");
1019 }
1020 
1021 static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
1022 {
1023 	struct drm_device *dev = gpu->dev;
1024 	struct msm_drm_private *priv = dev->dev_private;
1025 	struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu);
1026 
1027 	dev_err(dev->dev, "gpu fault ring %d fence %x status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
1028 		ring ? ring->id : -1, ring ? ring->seqno : 0,
1029 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
1030 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
1031 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
1032 		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
1033 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
1034 		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
1035 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
1036 
1037 	/* Turn off the hangcheck timer to keep it from bothering us */
1038 	del_timer(&gpu->hangcheck_timer);
1039 
1040 	queue_work(priv->wq, &gpu->recover_work);
1041 }
1042 
1043 #define RBBM_ERROR_MASK \
1044 	(A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR | \
1045 	A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT | \
1046 	A5XX_RBBM_INT_0_MASK_RBBM_ME_MS_TIMEOUT | \
1047 	A5XX_RBBM_INT_0_MASK_RBBM_PFP_MS_TIMEOUT | \
1048 	A5XX_RBBM_INT_0_MASK_RBBM_ETS_MS_TIMEOUT | \
1049 	A5XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNC_OVERFLOW)
1050 
1051 static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
1052 {
1053 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
1054 
1055 	/*
1056 	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
1057 	 * before the source is cleared the interrupt will storm.
1058 	 */
1059 	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
1060 		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
1061 
1062 	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
1063 	if (status & RBBM_ERROR_MASK)
1064 		a5xx_rbbm_err_irq(gpu, status);
1065 
1066 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
1067 		a5xx_cp_err_irq(gpu);
1068 
1069 	if (status & A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT)
1070 		a5xx_fault_detect_irq(gpu);
1071 
1072 	if (status & A5XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS)
1073 		a5xx_uche_err_irq(gpu);
1074 
1075 	if (status & A5XX_RBBM_INT_0_MASK_GPMU_VOLTAGE_DROOP)
1076 		a5xx_gpmu_err_irq(gpu);
1077 
1078 	if (status & A5XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) {
1079 		a5xx_preempt_trigger(gpu);
1080 		msm_gpu_retire(gpu);
1081 	}
1082 
1083 	if (status & A5XX_RBBM_INT_0_MASK_CP_SW)
1084 		a5xx_preempt_irq(gpu);
1085 
1086 	return IRQ_HANDLED;
1087 }
1088 
1089 static const u32 a5xx_register_offsets[REG_ADRENO_REGISTER_MAX] = {
1090 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE, REG_A5XX_CP_RB_BASE),
1091 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_BASE_HI, REG_A5XX_CP_RB_BASE_HI),
1092 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR, REG_A5XX_CP_RB_RPTR_ADDR),
1093 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR_ADDR_HI,
1094 		REG_A5XX_CP_RB_RPTR_ADDR_HI),
1095 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_RPTR, REG_A5XX_CP_RB_RPTR),
1096 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_WPTR, REG_A5XX_CP_RB_WPTR),
1097 	REG_ADRENO_DEFINE(REG_ADRENO_CP_RB_CNTL, REG_A5XX_CP_RB_CNTL),
1098 };
1099 
1100 static const u32 a5xx_registers[] = {
1101 	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
1102 	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
1103 	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
1104 	0x04E0, 0x0533, 0x0540, 0x0555, 0x0800, 0x081A, 0x081F, 0x0841,
1105 	0x0860, 0x0860, 0x0880, 0x08A0, 0x0B00, 0x0B12, 0x0B15, 0x0B28,
1106 	0x0B78, 0x0B7F, 0x0BB0, 0x0BBD, 0x0BC0, 0x0BC6, 0x0BD0, 0x0C53,
1107 	0x0C60, 0x0C61, 0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98,
1108 	0x0CA0, 0x0CA0, 0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
1109 	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
1110 	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
1111 	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
1112 	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
1113 	0x24C0, 0x24C0, 0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
1114 	0x0E60, 0x0E7C, 0x0E80, 0x0E8E, 0x0E90, 0x0E96, 0x0EA0, 0x0EA8,
1115 	0x0EB0, 0x0EB2, 0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9,
1116 	0xE1B0, 0xE1B6, 0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201,
1117 	0xE210, 0xE21C, 0xE240, 0xE268, 0xE000, 0xE006, 0xE010, 0xE09A,
1118 	0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB, 0xE100, 0xE105, 0xE380, 0xE38F,
1119 	0xE3B0, 0xE3B0, 0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
1120 	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2, 0xE940, 0xE947,
1121 	0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6, 0xE9C0, 0xE9C7,
1122 	0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C, 0xEA40, 0xEA68,
1123 	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
1124 	0xE900, 0xE905, 0xEB80, 0xEB8F, 0xEBB0, 0xEBB0, 0xEC00, 0xEC05,
1125 	0xEC08, 0xECE9, 0xECF0, 0xECF0, 0xEA80, 0xEA80, 0xEA82, 0xEAA3,
1126 	0xEAA5, 0xEAC2, 0xA800, 0xA8FF, 0xAC60, 0xAC60, 0xB000, 0xB97F,
1127 	0xB9A0, 0xB9BF, ~0
1128 };
1129 
1130 static void a5xx_dump(struct msm_gpu *gpu)
1131 {
1132 	dev_info(gpu->dev->dev, "status:   %08x\n",
1133 		gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1134 	adreno_dump(gpu);
1135 }
1136 
1137 static int a5xx_pm_resume(struct msm_gpu *gpu)
1138 {
1139 	int ret;
1140 
1141 	/* Turn on the core power */
1142 	ret = msm_gpu_pm_resume(gpu);
1143 	if (ret)
1144 		return ret;
1145 
1146 	/* Turn the RBCCU domain first to limit the chances of voltage droop */
1147 	gpu_write(gpu, REG_A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
1148 
1149 	/* Wait 3 usecs before polling */
1150 	udelay(3);
1151 
1152 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS,
1153 		(1 << 20), (1 << 20));
1154 	if (ret) {
1155 		DRM_ERROR("%s: timeout waiting for RBCCU GDSC enable: %X\n",
1156 			gpu->name,
1157 			gpu_read(gpu, REG_A5XX_GPMU_RBCCU_PWR_CLK_STATUS));
1158 		return ret;
1159 	}
1160 
1161 	/* Turn on the SP domain */
1162 	gpu_write(gpu, REG_A5XX_GPMU_SP_POWER_CNTL, 0x778000);
1163 	ret = spin_usecs(gpu, 20, REG_A5XX_GPMU_SP_PWR_CLK_STATUS,
1164 		(1 << 20), (1 << 20));
1165 	if (ret)
1166 		DRM_ERROR("%s: timeout waiting for SP GDSC enable\n",
1167 			gpu->name);
1168 
1169 	return ret;
1170 }
1171 
1172 static int a5xx_pm_suspend(struct msm_gpu *gpu)
1173 {
1174 	/* Clear the VBIF pipe before shutting down */
1175 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0xF);
1176 	spin_until((gpu_read(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL1) & 0xF) == 0xF);
1177 
1178 	gpu_write(gpu, REG_A5XX_VBIF_XIN_HALT_CTRL0, 0);
1179 
1180 	/*
1181 	 * Reset the VBIF before power collapse to avoid issue with FIFO
1182 	 * entries
1183 	 */
1184 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
1185 	gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
1186 
1187 	return msm_gpu_pm_suspend(gpu);
1188 }
1189 
1190 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
1191 {
1192 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
1193 		REG_A5XX_RBBM_PERFCTR_CP_0_HI);
1194 
1195 	return 0;
1196 }
1197 
1198 #ifdef CONFIG_DEBUG_FS
1199 static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
1200 {
1201 	seq_printf(m, "status:   %08x\n",
1202 			gpu_read(gpu, REG_A5XX_RBBM_STATUS));
1203 
1204 	/*
1205 	 * Temporarily disable hardware clock gating before going into
1206 	 * adreno_show to avoid issues while reading the registers
1207 	 */
1208 	a5xx_set_hwcg(gpu, false);
1209 	adreno_show(gpu, m);
1210 	a5xx_set_hwcg(gpu, true);
1211 }
1212 #endif
1213 
1214 static struct msm_ringbuffer *a5xx_active_ring(struct msm_gpu *gpu)
1215 {
1216 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
1217 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
1218 
1219 	return a5xx_gpu->cur_ring;
1220 }
1221 
1222 static int a5xx_gpu_busy(struct msm_gpu *gpu, uint64_t *value)
1223 {
1224 	*value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
1225 		REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
1226 
1227 	return 0;
1228 }
1229 
1230 static const struct adreno_gpu_funcs funcs = {
1231 	.base = {
1232 		.get_param = adreno_get_param,
1233 		.hw_init = a5xx_hw_init,
1234 		.pm_suspend = a5xx_pm_suspend,
1235 		.pm_resume = a5xx_pm_resume,
1236 		.recover = a5xx_recover,
1237 		.submit = a5xx_submit,
1238 		.flush = a5xx_flush,
1239 		.active_ring = a5xx_active_ring,
1240 		.irq = a5xx_irq,
1241 		.destroy = a5xx_destroy,
1242 #ifdef CONFIG_DEBUG_FS
1243 		.show = a5xx_show,
1244 		.debugfs_init = a5xx_debugfs_init,
1245 #endif
1246 		.gpu_busy = a5xx_gpu_busy,
1247 	},
1248 	.get_timestamp = a5xx_get_timestamp,
1249 };
1250 
1251 static void check_speed_bin(struct device *dev)
1252 {
1253 	struct nvmem_cell *cell;
1254 	u32 bin, val;
1255 
1256 	cell = nvmem_cell_get(dev, "speed_bin");
1257 
1258 	/* If a nvmem cell isn't defined, nothing to do */
1259 	if (IS_ERR(cell))
1260 		return;
1261 
1262 	bin = *((u32 *) nvmem_cell_read(cell, NULL));
1263 	nvmem_cell_put(cell);
1264 
1265 	val = (1 << bin);
1266 
1267 	dev_pm_opp_set_supported_hw(dev, &val, 1);
1268 }
1269 
1270 struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
1271 {
1272 	struct msm_drm_private *priv = dev->dev_private;
1273 	struct platform_device *pdev = priv->gpu_pdev;
1274 	struct a5xx_gpu *a5xx_gpu = NULL;
1275 	struct adreno_gpu *adreno_gpu;
1276 	struct msm_gpu *gpu;
1277 	int ret;
1278 
1279 	if (!pdev) {
1280 		dev_err(dev->dev, "No A5XX device is defined\n");
1281 		return ERR_PTR(-ENXIO);
1282 	}
1283 
1284 	a5xx_gpu = kzalloc(sizeof(*a5xx_gpu), GFP_KERNEL);
1285 	if (!a5xx_gpu)
1286 		return ERR_PTR(-ENOMEM);
1287 
1288 	adreno_gpu = &a5xx_gpu->base;
1289 	gpu = &adreno_gpu->base;
1290 
1291 	adreno_gpu->registers = a5xx_registers;
1292 	adreno_gpu->reg_offsets = a5xx_register_offsets;
1293 
1294 	a5xx_gpu->lm_leakage = 0x4E001A;
1295 
1296 	check_speed_bin(&pdev->dev);
1297 
1298 	ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 4);
1299 	if (ret) {
1300 		a5xx_destroy(&(a5xx_gpu->base.base));
1301 		return ERR_PTR(ret);
1302 	}
1303 
1304 	if (gpu->aspace)
1305 		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
1306 
1307 	/* Set up the preemption specific bits and pieces for each ringbuffer */
1308 	a5xx_preempt_init(gpu);
1309 
1310 	return gpu;
1311 }
1312