1 /*
2  * Copyright (C) 2013 Red Hat
3  * Author: Rob Clark <robdclark@gmail.com>
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program.  If not, see <http://www.gnu.org/licenses/>.
16  */
17 
18 #include "adreno_gpu.h"
19 #include "msm_gem.h"
20 #include "msm_mmu.h"
21 
22 #define RB_SIZE    SZ_32K
23 #define RB_BLKSIZE 16
24 
25 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
26 {
27 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
28 
29 	switch (param) {
30 	case MSM_PARAM_GPU_ID:
31 		*value = adreno_gpu->info->revn;
32 		return 0;
33 	case MSM_PARAM_GMEM_SIZE:
34 		*value = adreno_gpu->gmem;
35 		return 0;
36 	case MSM_PARAM_CHIP_ID:
37 		*value = adreno_gpu->rev.patchid |
38 				(adreno_gpu->rev.minor << 8) |
39 				(adreno_gpu->rev.major << 16) |
40 				(adreno_gpu->rev.core << 24);
41 		return 0;
42 	default:
43 		DBG("%s: invalid param: %u", gpu->name, param);
44 		return -EINVAL;
45 	}
46 }
47 
48 #define rbmemptr(adreno_gpu, member)  \
49 	((adreno_gpu)->memptrs_iova + offsetof(struct adreno_rbmemptrs, member))
50 
51 int adreno_hw_init(struct msm_gpu *gpu)
52 {
53 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
54 	int ret;
55 
56 	DBG("%s", gpu->name);
57 
58 	ret = msm_gem_get_iova(gpu->rb->bo, gpu->id, &gpu->rb_iova);
59 	if (ret) {
60 		gpu->rb_iova = 0;
61 		dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret);
62 		return ret;
63 	}
64 
65 	/* Setup REG_CP_RB_CNTL: */
66 	gpu_write(gpu, REG_AXXX_CP_RB_CNTL,
67 			/* size is log2(quad-words): */
68 			AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
69 			AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)));
70 
71 	/* Setup ringbuffer address: */
72 	gpu_write(gpu, REG_AXXX_CP_RB_BASE, gpu->rb_iova);
73 	gpu_write(gpu, REG_AXXX_CP_RB_RPTR_ADDR, rbmemptr(adreno_gpu, rptr));
74 
75 	/* Setup scratch/timestamp: */
76 	gpu_write(gpu, REG_AXXX_SCRATCH_ADDR, rbmemptr(adreno_gpu, fence));
77 
78 	gpu_write(gpu, REG_AXXX_SCRATCH_UMSK, 0x1);
79 
80 	return 0;
81 }
82 
83 static uint32_t get_wptr(struct msm_ringbuffer *ring)
84 {
85 	return ring->cur - ring->start;
86 }
87 
88 uint32_t adreno_last_fence(struct msm_gpu *gpu)
89 {
90 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
91 	return adreno_gpu->memptrs->fence;
92 }
93 
94 void adreno_recover(struct msm_gpu *gpu)
95 {
96 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
97 	struct drm_device *dev = gpu->dev;
98 	int ret;
99 
100 	gpu->funcs->pm_suspend(gpu);
101 
102 	/* reset ringbuffer: */
103 	gpu->rb->cur = gpu->rb->start;
104 
105 	/* reset completed fence seqno, just discard anything pending: */
106 	adreno_gpu->memptrs->fence = gpu->submitted_fence;
107 	adreno_gpu->memptrs->rptr  = 0;
108 	adreno_gpu->memptrs->wptr  = 0;
109 
110 	gpu->funcs->pm_resume(gpu);
111 	ret = gpu->funcs->hw_init(gpu);
112 	if (ret) {
113 		dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
114 		/* hmm, oh well? */
115 	}
116 }
117 
118 int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
119 		struct msm_file_private *ctx)
120 {
121 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
122 	struct msm_drm_private *priv = gpu->dev->dev_private;
123 	struct msm_ringbuffer *ring = gpu->rb;
124 	unsigned i, ibs = 0;
125 
126 	for (i = 0; i < submit->nr_cmds; i++) {
127 		switch (submit->cmd[i].type) {
128 		case MSM_SUBMIT_CMD_IB_TARGET_BUF:
129 			/* ignore IB-targets */
130 			break;
131 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
132 			/* ignore if there has not been a ctx switch: */
133 			if (priv->lastctx == ctx)
134 				break;
135 		case MSM_SUBMIT_CMD_BUF:
136 			OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
137 			OUT_RING(ring, submit->cmd[i].iova);
138 			OUT_RING(ring, submit->cmd[i].size);
139 			ibs++;
140 			break;
141 		}
142 	}
143 
144 	/* on a320, at least, we seem to need to pad things out to an
145 	 * even number of qwords to avoid issue w/ CP hanging on wrap-
146 	 * around:
147 	 */
148 	if (ibs % 2)
149 		OUT_PKT2(ring);
150 
151 	OUT_PKT0(ring, REG_AXXX_CP_SCRATCH_REG2, 1);
152 	OUT_RING(ring, submit->fence);
153 
154 	if (adreno_is_a3xx(adreno_gpu)) {
155 		/* Flush HLSQ lazy updates to make sure there is nothing
156 		 * pending for indirect loads after the timestamp has
157 		 * passed:
158 		 */
159 		OUT_PKT3(ring, CP_EVENT_WRITE, 1);
160 		OUT_RING(ring, HLSQ_FLUSH);
161 
162 		OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);
163 		OUT_RING(ring, 0x00000000);
164 	}
165 
166 	OUT_PKT3(ring, CP_EVENT_WRITE, 3);
167 	OUT_RING(ring, CACHE_FLUSH_TS);
168 	OUT_RING(ring, rbmemptr(adreno_gpu, fence));
169 	OUT_RING(ring, submit->fence);
170 
171 	/* we could maybe be clever and only CP_COND_EXEC the interrupt: */
172 	OUT_PKT3(ring, CP_INTERRUPT, 1);
173 	OUT_RING(ring, 0x80000000);
174 
175 #if 0
176 	if (adreno_is_a3xx(adreno_gpu)) {
177 		/* Dummy set-constant to trigger context rollover */
178 		OUT_PKT3(ring, CP_SET_CONSTANT, 2);
179 		OUT_RING(ring, CP_REG(REG_A3XX_HLSQ_CL_KERNEL_GROUP_X_REG));
180 		OUT_RING(ring, 0x00000000);
181 	}
182 #endif
183 
184 	gpu->funcs->flush(gpu);
185 
186 	return 0;
187 }
188 
189 void adreno_flush(struct msm_gpu *gpu)
190 {
191 	uint32_t wptr = get_wptr(gpu->rb);
192 
193 	/* ensure writes to ringbuffer have hit system memory: */
194 	mb();
195 
196 	gpu_write(gpu, REG_AXXX_CP_RB_WPTR, wptr);
197 }
198 
199 void adreno_idle(struct msm_gpu *gpu)
200 {
201 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
202 	uint32_t wptr = get_wptr(gpu->rb);
203 
204 	/* wait for CP to drain ringbuffer: */
205 	if (spin_until(adreno_gpu->memptrs->rptr == wptr))
206 		DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
207 
208 	/* TODO maybe we need to reset GPU here to recover from hang? */
209 }
210 
211 #ifdef CONFIG_DEBUG_FS
212 void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
213 {
214 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
215 	int i;
216 
217 	seq_printf(m, "revision: %d (%d.%d.%d.%d)\n",
218 			adreno_gpu->info->revn, adreno_gpu->rev.core,
219 			adreno_gpu->rev.major, adreno_gpu->rev.minor,
220 			adreno_gpu->rev.patchid);
221 
222 	seq_printf(m, "fence:    %d/%d\n", adreno_gpu->memptrs->fence,
223 			gpu->submitted_fence);
224 	seq_printf(m, "rptr:     %d\n", adreno_gpu->memptrs->rptr);
225 	seq_printf(m, "wptr:     %d\n", adreno_gpu->memptrs->wptr);
226 	seq_printf(m, "rb wptr:  %d\n", get_wptr(gpu->rb));
227 
228 	gpu->funcs->pm_resume(gpu);
229 
230 	/* dump these out in a form that can be parsed by demsm: */
231 	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
232 	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
233 		uint32_t start = adreno_gpu->registers[i];
234 		uint32_t end   = adreno_gpu->registers[i+1];
235 		uint32_t addr;
236 
237 		for (addr = start; addr <= end; addr++) {
238 			uint32_t val = gpu_read(gpu, addr);
239 			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
240 		}
241 	}
242 
243 	gpu->funcs->pm_suspend(gpu);
244 }
245 #endif
246 
247 /* would be nice to not have to duplicate the _show() stuff with printk(): */
248 void adreno_dump(struct msm_gpu *gpu)
249 {
250 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
251 	int i;
252 
253 	printk("revision: %d (%d.%d.%d.%d)\n",
254 			adreno_gpu->info->revn, adreno_gpu->rev.core,
255 			adreno_gpu->rev.major, adreno_gpu->rev.minor,
256 			adreno_gpu->rev.patchid);
257 
258 	printk("fence:    %d/%d\n", adreno_gpu->memptrs->fence,
259 			gpu->submitted_fence);
260 	printk("rptr:     %d\n", adreno_gpu->memptrs->rptr);
261 	printk("wptr:     %d\n", adreno_gpu->memptrs->wptr);
262 	printk("rb wptr:  %d\n", get_wptr(gpu->rb));
263 
264 	/* dump these out in a form that can be parsed by demsm: */
265 	printk("IO:region %s 00000000 00020000\n", gpu->name);
266 	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
267 		uint32_t start = adreno_gpu->registers[i];
268 		uint32_t end   = adreno_gpu->registers[i+1];
269 		uint32_t addr;
270 
271 		for (addr = start; addr <= end; addr++) {
272 			uint32_t val = gpu_read(gpu, addr);
273 			printk("IO:R %08x %08x\n", addr<<2, val);
274 		}
275 	}
276 }
277 
278 static uint32_t ring_freewords(struct msm_gpu *gpu)
279 {
280 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
281 	uint32_t size = gpu->rb->size / 4;
282 	uint32_t wptr = get_wptr(gpu->rb);
283 	uint32_t rptr = adreno_gpu->memptrs->rptr;
284 	return (rptr + (size - 1) - wptr) % size;
285 }
286 
287 void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords)
288 {
289 	if (spin_until(ring_freewords(gpu) >= ndwords))
290 		DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name);
291 }
292 
293 static const char *iommu_ports[] = {
294 		"gfx3d_user", "gfx3d_priv",
295 		"gfx3d1_user", "gfx3d1_priv",
296 };
297 
298 int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
299 		struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs)
300 {
301 	struct adreno_platform_config *config = pdev->dev.platform_data;
302 	struct msm_gpu *gpu = &adreno_gpu->base;
303 	struct msm_mmu *mmu;
304 	int ret;
305 
306 	adreno_gpu->funcs = funcs;
307 	adreno_gpu->info = adreno_info(config->rev);
308 	adreno_gpu->gmem = adreno_gpu->info->gmem;
309 	adreno_gpu->revn = adreno_gpu->info->revn;
310 	adreno_gpu->rev = config->rev;
311 
312 	gpu->fast_rate = config->fast_rate;
313 	gpu->slow_rate = config->slow_rate;
314 	gpu->bus_freq  = config->bus_freq;
315 #ifdef CONFIG_MSM_BUS_SCALING
316 	gpu->bus_scale_table = config->bus_scale_table;
317 #endif
318 
319 	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
320 			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
321 
322 	ret = request_firmware(&adreno_gpu->pm4, adreno_gpu->info->pm4fw, drm->dev);
323 	if (ret) {
324 		dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n",
325 				adreno_gpu->info->pm4fw, ret);
326 		return ret;
327 	}
328 
329 	ret = request_firmware(&adreno_gpu->pfp, adreno_gpu->info->pfpfw, drm->dev);
330 	if (ret) {
331 		dev_err(drm->dev, "failed to load %s PFP firmware: %d\n",
332 				adreno_gpu->info->pfpfw, ret);
333 		return ret;
334 	}
335 
336 	ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
337 			adreno_gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq",
338 			RB_SIZE);
339 	if (ret)
340 		return ret;
341 
342 	mmu = gpu->mmu;
343 	if (mmu) {
344 		ret = mmu->funcs->attach(mmu, iommu_ports,
345 				ARRAY_SIZE(iommu_ports));
346 		if (ret)
347 			return ret;
348 	}
349 
350 	mutex_lock(&drm->struct_mutex);
351 	adreno_gpu->memptrs_bo = msm_gem_new(drm, sizeof(*adreno_gpu->memptrs),
352 			MSM_BO_UNCACHED);
353 	mutex_unlock(&drm->struct_mutex);
354 	if (IS_ERR(adreno_gpu->memptrs_bo)) {
355 		ret = PTR_ERR(adreno_gpu->memptrs_bo);
356 		adreno_gpu->memptrs_bo = NULL;
357 		dev_err(drm->dev, "could not allocate memptrs: %d\n", ret);
358 		return ret;
359 	}
360 
361 	adreno_gpu->memptrs = msm_gem_vaddr(adreno_gpu->memptrs_bo);
362 	if (!adreno_gpu->memptrs) {
363 		dev_err(drm->dev, "could not vmap memptrs\n");
364 		return -ENOMEM;
365 	}
366 
367 	ret = msm_gem_get_iova(adreno_gpu->memptrs_bo, gpu->id,
368 			&adreno_gpu->memptrs_iova);
369 	if (ret) {
370 		dev_err(drm->dev, "could not map memptrs: %d\n", ret);
371 		return ret;
372 	}
373 
374 	return 0;
375 }
376 
377 void adreno_gpu_cleanup(struct adreno_gpu *gpu)
378 {
379 	if (gpu->memptrs_bo) {
380 		if (gpu->memptrs_iova)
381 			msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
382 		drm_gem_object_unreference(gpu->memptrs_bo);
383 	}
384 	if (gpu->pm4)
385 		release_firmware(gpu->pm4);
386 	if (gpu->pfp)
387 		release_firmware(gpu->pfp);
388 	msm_gpu_cleanup(&gpu->base);
389 }
390