1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright 2019 Collabora Ltd */
3 
4 #include <drm/drm_file.h>
5 #include <drm/drm_gem_shmem_helper.h>
6 #include <drm/panfrost_drm.h>
7 #include <linux/completion.h>
8 #include <linux/iopoll.h>
9 #include <linux/pm_runtime.h>
10 #include <linux/slab.h>
11 #include <linux/uaccess.h>
12 
13 #include "panfrost_device.h"
14 #include "panfrost_features.h"
15 #include "panfrost_gem.h"
16 #include "panfrost_issues.h"
17 #include "panfrost_job.h"
18 #include "panfrost_mmu.h"
19 #include "panfrost_regs.h"
20 
21 #define COUNTERS_PER_BLOCK		64
22 #define BYTES_PER_COUNTER		4
23 #define BLOCKS_PER_COREGROUP		8
24 #define V4_SHADERS_PER_COREGROUP	4
25 
26 struct panfrost_perfcnt {
27 	struct panfrost_gem_object *bo;
28 	size_t bosize;
29 	void *buf;
30 	struct panfrost_file_priv *user;
31 	struct mutex lock;
32 	struct completion dump_comp;
33 };
34 
35 void panfrost_perfcnt_clean_cache_done(struct panfrost_device *pfdev)
36 {
37 	complete(&pfdev->perfcnt->dump_comp);
38 }
39 
40 void panfrost_perfcnt_sample_done(struct panfrost_device *pfdev)
41 {
42 	gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_CACHES);
43 }
44 
45 static int panfrost_perfcnt_dump_locked(struct panfrost_device *pfdev)
46 {
47 	u64 gpuva;
48 	int ret;
49 
50 	reinit_completion(&pfdev->perfcnt->dump_comp);
51 	gpuva = pfdev->perfcnt->bo->node.start << PAGE_SHIFT;
52 	gpu_write(pfdev, GPU_PERFCNT_BASE_LO, gpuva);
53 	gpu_write(pfdev, GPU_PERFCNT_BASE_HI, gpuva >> 32);
54 	gpu_write(pfdev, GPU_INT_CLEAR,
55 		  GPU_IRQ_CLEAN_CACHES_COMPLETED |
56 		  GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
57 	gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_SAMPLE);
58 	ret = wait_for_completion_interruptible_timeout(&pfdev->perfcnt->dump_comp,
59 							msecs_to_jiffies(1000));
60 	if (!ret)
61 		ret = -ETIMEDOUT;
62 	else if (ret > 0)
63 		ret = 0;
64 
65 	return ret;
66 }
67 
68 static int panfrost_perfcnt_enable_locked(struct panfrost_device *pfdev,
69 					  struct panfrost_file_priv *user,
70 					  unsigned int counterset)
71 {
72 	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
73 	struct drm_gem_shmem_object *bo;
74 	u32 cfg;
75 	int ret;
76 
77 	if (user == perfcnt->user)
78 		return 0;
79 	else if (perfcnt->user)
80 		return -EBUSY;
81 
82 	ret = pm_runtime_get_sync(pfdev->dev);
83 	if (ret < 0)
84 		return ret;
85 
86 	bo = drm_gem_shmem_create(pfdev->ddev, perfcnt->bosize);
87 	if (IS_ERR(bo))
88 		return PTR_ERR(bo);
89 
90 	perfcnt->bo = to_panfrost_bo(&bo->base);
91 
92 	/* Map the perfcnt buf in the address space attached to file_priv. */
93 	ret = panfrost_mmu_map(perfcnt->bo);
94 	if (ret)
95 		goto err_put_bo;
96 
97 	perfcnt->buf = drm_gem_shmem_vmap(&bo->base);
98 	if (IS_ERR(perfcnt->buf)) {
99 		ret = PTR_ERR(perfcnt->buf);
100 		goto err_put_bo;
101 	}
102 
103 	/*
104 	 * Invalidate the cache and clear the counters to start from a fresh
105 	 * state.
106 	 */
107 	reinit_completion(&pfdev->perfcnt->dump_comp);
108 	gpu_write(pfdev, GPU_INT_CLEAR,
109 		  GPU_IRQ_CLEAN_CACHES_COMPLETED |
110 		  GPU_IRQ_PERFCNT_SAMPLE_COMPLETED);
111 	gpu_write(pfdev, GPU_CMD, GPU_CMD_PERFCNT_CLEAR);
112 	gpu_write(pfdev, GPU_CMD, GPU_CMD_CLEAN_INV_CACHES);
113 	ret = wait_for_completion_timeout(&pfdev->perfcnt->dump_comp,
114 					  msecs_to_jiffies(1000));
115 	if (!ret) {
116 		ret = -ETIMEDOUT;
117 		goto err_vunmap;
118 	}
119 
120 	perfcnt->user = user;
121 
122 	/*
123 	 * Always use address space 0 for now.
124 	 * FIXME: this needs to be updated when we start using different
125 	 * address space.
126 	 */
127 	cfg = GPU_PERFCNT_CFG_AS(0) |
128 	      GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_MANUAL);
129 
130 	/*
131 	 * Bifrost GPUs have 2 set of counters, but we're only interested by
132 	 * the first one for now.
133 	 */
134 	if (panfrost_model_is_bifrost(pfdev))
135 		cfg |= GPU_PERFCNT_CFG_SETSEL(counterset);
136 
137 	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0xffffffff);
138 	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0xffffffff);
139 	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0xffffffff);
140 
141 	/*
142 	 * Due to PRLAM-8186 we need to disable the Tiler before we enable HW
143 	 * counters.
144 	 */
145 	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
146 		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
147 	else
148 		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
149 
150 	gpu_write(pfdev, GPU_PERFCNT_CFG, cfg);
151 
152 	if (panfrost_has_hw_issue(pfdev, HW_ISSUE_8186))
153 		gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0xffffffff);
154 
155 	return 0;
156 
157 err_vunmap:
158 	drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf);
159 err_put_bo:
160 	drm_gem_object_put_unlocked(&bo->base);
161 	return ret;
162 }
163 
164 static int panfrost_perfcnt_disable_locked(struct panfrost_device *pfdev,
165 					   struct panfrost_file_priv *user)
166 {
167 	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
168 
169 	if (user != perfcnt->user)
170 		return -EINVAL;
171 
172 	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0x0);
173 	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0x0);
174 	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0x0);
175 	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
176 	gpu_write(pfdev, GPU_PERFCNT_CFG,
177 		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
178 
179 	perfcnt->user = NULL;
180 	drm_gem_shmem_vunmap(&perfcnt->bo->base.base, perfcnt->buf);
181 	perfcnt->buf = NULL;
182 	drm_gem_object_put_unlocked(&perfcnt->bo->base.base);
183 	perfcnt->bo = NULL;
184 	pm_runtime_mark_last_busy(pfdev->dev);
185 	pm_runtime_put_autosuspend(pfdev->dev);
186 
187 	return 0;
188 }
189 
190 int panfrost_ioctl_perfcnt_enable(struct drm_device *dev, void *data,
191 				  struct drm_file *file_priv)
192 {
193 	struct panfrost_file_priv *pfile = file_priv->driver_priv;
194 	struct panfrost_device *pfdev = dev->dev_private;
195 	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
196 	struct drm_panfrost_perfcnt_enable *req = data;
197 	int ret;
198 
199 	ret = panfrost_unstable_ioctl_check();
200 	if (ret)
201 		return ret;
202 
203 	/* Only Bifrost GPUs have 2 set of counters. */
204 	if (req->counterset > (panfrost_model_is_bifrost(pfdev) ? 1 : 0))
205 		return -EINVAL;
206 
207 	mutex_lock(&perfcnt->lock);
208 	if (req->enable)
209 		ret = panfrost_perfcnt_enable_locked(pfdev, pfile,
210 						     req->counterset);
211 	else
212 		ret = panfrost_perfcnt_disable_locked(pfdev, pfile);
213 	mutex_unlock(&perfcnt->lock);
214 
215 	return ret;
216 }
217 
218 int panfrost_ioctl_perfcnt_dump(struct drm_device *dev, void *data,
219 				struct drm_file *file_priv)
220 {
221 	struct panfrost_device *pfdev = dev->dev_private;
222 	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
223 	struct drm_panfrost_perfcnt_dump *req = data;
224 	void __user *user_ptr = (void __user *)(uintptr_t)req->buf_ptr;
225 	int ret;
226 
227 	ret = panfrost_unstable_ioctl_check();
228 	if (ret)
229 		return ret;
230 
231 	mutex_lock(&perfcnt->lock);
232 	if (perfcnt->user != file_priv->driver_priv) {
233 		ret = -EINVAL;
234 		goto out;
235 	}
236 
237 	ret = panfrost_perfcnt_dump_locked(pfdev);
238 	if (ret)
239 		goto out;
240 
241 	if (copy_to_user(user_ptr, perfcnt->buf, perfcnt->bosize))
242 		ret = -EFAULT;
243 
244 out:
245 	mutex_unlock(&perfcnt->lock);
246 
247 	return ret;
248 }
249 
250 void panfrost_perfcnt_close(struct panfrost_file_priv *pfile)
251 {
252 	struct panfrost_device *pfdev = pfile->pfdev;
253 	struct panfrost_perfcnt *perfcnt = pfdev->perfcnt;
254 
255 	pm_runtime_get_sync(pfdev->dev);
256 	mutex_lock(&perfcnt->lock);
257 	if (perfcnt->user == pfile)
258 		panfrost_perfcnt_disable_locked(pfdev, pfile);
259 	mutex_unlock(&perfcnt->lock);
260 	pm_runtime_mark_last_busy(pfdev->dev);
261 	pm_runtime_put_autosuspend(pfdev->dev);
262 }
263 
264 int panfrost_perfcnt_init(struct panfrost_device *pfdev)
265 {
266 	struct panfrost_perfcnt *perfcnt;
267 	size_t size;
268 
269 	if (panfrost_has_hw_feature(pfdev, HW_FEATURE_V4)) {
270 		unsigned int ncoregroups;
271 
272 		ncoregroups = hweight64(pfdev->features.l2_present);
273 		size = ncoregroups * BLOCKS_PER_COREGROUP *
274 		       COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
275 	} else {
276 		unsigned int nl2c, ncores;
277 
278 		/*
279 		 * TODO: define a macro to extract the number of l2 caches from
280 		 * mem_features.
281 		 */
282 		nl2c = ((pfdev->features.mem_features >> 8) & GENMASK(3, 0)) + 1;
283 
284 		/*
285 		 * shader_present might be sparse, but the counters layout
286 		 * forces to dump unused regions too, hence the fls64() call
287 		 * instead of hweight64().
288 		 */
289 		ncores = fls64(pfdev->features.shader_present);
290 
291 		/*
292 		 * There's always one JM and one Tiler block, hence the '+ 2'
293 		 * here.
294 		 */
295 		size = (nl2c + ncores + 2) *
296 		       COUNTERS_PER_BLOCK * BYTES_PER_COUNTER;
297 	}
298 
299 	perfcnt = devm_kzalloc(pfdev->dev, sizeof(*perfcnt), GFP_KERNEL);
300 	if (!perfcnt)
301 		return -ENOMEM;
302 
303 	perfcnt->bosize = size;
304 
305 	/* Start with everything disabled. */
306 	gpu_write(pfdev, GPU_PERFCNT_CFG,
307 		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
308 	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
309 	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
310 	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
311 	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
312 
313 	init_completion(&perfcnt->dump_comp);
314 	mutex_init(&perfcnt->lock);
315 	pfdev->perfcnt = perfcnt;
316 
317 	return 0;
318 }
319 
320 void panfrost_perfcnt_fini(struct panfrost_device *pfdev)
321 {
322 	/* Disable everything before leaving. */
323 	gpu_write(pfdev, GPU_PERFCNT_CFG,
324 		  GPU_PERFCNT_CFG_MODE(GPU_PERFCNT_CFG_MODE_OFF));
325 	gpu_write(pfdev, GPU_PRFCNT_JM_EN, 0);
326 	gpu_write(pfdev, GPU_PRFCNT_SHADER_EN, 0);
327 	gpu_write(pfdev, GPU_PRFCNT_MMU_L2_EN, 0);
328 	gpu_write(pfdev, GPU_PRFCNT_TILER_EN, 0);
329 }
330