xref: /openbmc/linux/drivers/gpu/drm/i915/gt/selftest_engine_cs.c (revision de8c12110a130337c8e7e7b8250de0580e644dee)
1 /*
2  * SPDX-License-Identifier: GPL-2.0
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/sort.h>
8 
9 #include "intel_gpu_commands.h"
10 #include "intel_gt_pm.h"
11 #include "intel_rps.h"
12 
13 #include "i915_selftest.h"
14 #include "selftests/igt_flush_test.h"
15 
16 #define COUNT 5
17 
18 static int cmp_u32(const void *A, const void *B)
19 {
20 	const u32 *a = A, *b = B;
21 
22 	return *a - *b;
23 }
24 
25 static void perf_begin(struct intel_gt *gt)
26 {
27 	intel_gt_pm_get(gt);
28 
29 	/* Boost gpufreq to max [waitboost] and keep it fixed */
30 	atomic_inc(&gt->rps.num_waiters);
31 	schedule_work(&gt->rps.work);
32 	flush_work(&gt->rps.work);
33 }
34 
35 static int perf_end(struct intel_gt *gt)
36 {
37 	atomic_dec(&gt->rps.num_waiters);
38 	intel_gt_pm_put(gt);
39 
40 	return igt_flush_test(gt->i915);
41 }
42 
43 static int write_timestamp(struct i915_request *rq, int slot)
44 {
45 	u32 cmd;
46 	u32 *cs;
47 
48 	cs = intel_ring_begin(rq, 4);
49 	if (IS_ERR(cs))
50 		return PTR_ERR(cs);
51 
52 	cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
53 	if (INTEL_GEN(rq->engine->i915) >= 8)
54 		cmd++;
55 	*cs++ = cmd;
56 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
57 	*cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32);
58 	*cs++ = 0;
59 
60 	intel_ring_advance(rq, cs);
61 
62 	return 0;
63 }
64 
65 static struct i915_vma *create_empty_batch(struct intel_context *ce)
66 {
67 	struct drm_i915_gem_object *obj;
68 	struct i915_vma *vma;
69 	u32 *cs;
70 	int err;
71 
72 	obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE);
73 	if (IS_ERR(obj))
74 		return ERR_CAST(obj);
75 
76 	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
77 	if (IS_ERR(cs)) {
78 		err = PTR_ERR(cs);
79 		goto err_put;
80 	}
81 
82 	cs[0] = MI_BATCH_BUFFER_END;
83 
84 	i915_gem_object_flush_map(obj);
85 
86 	vma = i915_vma_instance(obj, ce->vm, NULL);
87 	if (IS_ERR(vma)) {
88 		err = PTR_ERR(vma);
89 		goto err_unpin;
90 	}
91 
92 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
93 	if (err)
94 		goto err_unpin;
95 
96 	i915_gem_object_unpin_map(obj);
97 	return vma;
98 
99 err_unpin:
100 	i915_gem_object_unpin_map(obj);
101 err_put:
102 	i915_gem_object_put(obj);
103 	return ERR_PTR(err);
104 }
105 
106 static u32 trifilter(u32 *a)
107 {
108 	u64 sum;
109 
110 	sort(a, COUNT, sizeof(*a), cmp_u32, NULL);
111 
112 	sum = mul_u32_u32(a[2], 2);
113 	sum += a[1];
114 	sum += a[3];
115 
116 	return sum >> 2;
117 }
118 
119 static int perf_mi_bb_start(void *arg)
120 {
121 	struct intel_gt *gt = arg;
122 	struct intel_engine_cs *engine;
123 	enum intel_engine_id id;
124 	int err = 0;
125 
126 	if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
127 		return 0;
128 
129 	perf_begin(gt);
130 	for_each_engine(engine, gt, id) {
131 		struct intel_context *ce = engine->kernel_context;
132 		struct i915_vma *batch;
133 		u32 cycles[COUNT];
134 		int i;
135 
136 		intel_engine_pm_get(engine);
137 
138 		batch = create_empty_batch(ce);
139 		if (IS_ERR(batch)) {
140 			err = PTR_ERR(batch);
141 			intel_engine_pm_put(engine);
142 			break;
143 		}
144 
145 		err = i915_vma_sync(batch);
146 		if (err) {
147 			intel_engine_pm_put(engine);
148 			i915_vma_put(batch);
149 			break;
150 		}
151 
152 		for (i = 0; i < ARRAY_SIZE(cycles); i++) {
153 			struct i915_request *rq;
154 
155 			rq = i915_request_create(ce);
156 			if (IS_ERR(rq)) {
157 				err = PTR_ERR(rq);
158 				break;
159 			}
160 
161 			err = write_timestamp(rq, 2);
162 			if (err)
163 				goto out;
164 
165 			err = rq->engine->emit_bb_start(rq,
166 							batch->node.start, 8,
167 							0);
168 			if (err)
169 				goto out;
170 
171 			err = write_timestamp(rq, 3);
172 			if (err)
173 				goto out;
174 
175 out:
176 			i915_request_get(rq);
177 			i915_request_add(rq);
178 
179 			if (i915_request_wait(rq, 0, HZ / 5) < 0)
180 				err = -EIO;
181 			i915_request_put(rq);
182 			if (err)
183 				break;
184 
185 			cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
186 		}
187 		i915_vma_put(batch);
188 		intel_engine_pm_put(engine);
189 		if (err)
190 			break;
191 
192 		pr_info("%s: MI_BB_START cycles: %u\n",
193 			engine->name, trifilter(cycles));
194 	}
195 	if (perf_end(gt))
196 		err = -EIO;
197 
198 	return err;
199 }
200 
201 static struct i915_vma *create_nop_batch(struct intel_context *ce)
202 {
203 	struct drm_i915_gem_object *obj;
204 	struct i915_vma *vma;
205 	u32 *cs;
206 	int err;
207 
208 	obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K);
209 	if (IS_ERR(obj))
210 		return ERR_CAST(obj);
211 
212 	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
213 	if (IS_ERR(cs)) {
214 		err = PTR_ERR(cs);
215 		goto err_put;
216 	}
217 
218 	memset(cs, 0, SZ_64K);
219 	cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END;
220 
221 	i915_gem_object_flush_map(obj);
222 
223 	vma = i915_vma_instance(obj, ce->vm, NULL);
224 	if (IS_ERR(vma)) {
225 		err = PTR_ERR(vma);
226 		goto err_unpin;
227 	}
228 
229 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
230 	if (err)
231 		goto err_unpin;
232 
233 	i915_gem_object_unpin_map(obj);
234 	return vma;
235 
236 err_unpin:
237 	i915_gem_object_unpin_map(obj);
238 err_put:
239 	i915_gem_object_put(obj);
240 	return ERR_PTR(err);
241 }
242 
243 static int perf_mi_noop(void *arg)
244 {
245 	struct intel_gt *gt = arg;
246 	struct intel_engine_cs *engine;
247 	enum intel_engine_id id;
248 	int err = 0;
249 
250 	if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
251 		return 0;
252 
253 	perf_begin(gt);
254 	for_each_engine(engine, gt, id) {
255 		struct intel_context *ce = engine->kernel_context;
256 		struct i915_vma *base, *nop;
257 		u32 cycles[COUNT];
258 		int i;
259 
260 		intel_engine_pm_get(engine);
261 
262 		base = create_empty_batch(ce);
263 		if (IS_ERR(base)) {
264 			err = PTR_ERR(base);
265 			intel_engine_pm_put(engine);
266 			break;
267 		}
268 
269 		err = i915_vma_sync(base);
270 		if (err) {
271 			i915_vma_put(base);
272 			intel_engine_pm_put(engine);
273 			break;
274 		}
275 
276 		nop = create_nop_batch(ce);
277 		if (IS_ERR(nop)) {
278 			err = PTR_ERR(nop);
279 			i915_vma_put(base);
280 			intel_engine_pm_put(engine);
281 			break;
282 		}
283 
284 		err = i915_vma_sync(nop);
285 		if (err) {
286 			i915_vma_put(nop);
287 			i915_vma_put(base);
288 			intel_engine_pm_put(engine);
289 			break;
290 		}
291 
292 		for (i = 0; i < ARRAY_SIZE(cycles); i++) {
293 			struct i915_request *rq;
294 
295 			rq = i915_request_create(ce);
296 			if (IS_ERR(rq)) {
297 				err = PTR_ERR(rq);
298 				break;
299 			}
300 
301 			err = write_timestamp(rq, 2);
302 			if (err)
303 				goto out;
304 
305 			err = rq->engine->emit_bb_start(rq,
306 							base->node.start, 8,
307 							0);
308 			if (err)
309 				goto out;
310 
311 			err = write_timestamp(rq, 3);
312 			if (err)
313 				goto out;
314 
315 			err = rq->engine->emit_bb_start(rq,
316 							nop->node.start,
317 							nop->node.size,
318 							0);
319 			if (err)
320 				goto out;
321 
322 			err = write_timestamp(rq, 4);
323 			if (err)
324 				goto out;
325 
326 out:
327 			i915_request_get(rq);
328 			i915_request_add(rq);
329 
330 			if (i915_request_wait(rq, 0, HZ / 5) < 0)
331 				err = -EIO;
332 			i915_request_put(rq);
333 			if (err)
334 				break;
335 
336 			cycles[i] =
337 				(rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) -
338 				(rq->hwsp_seqno[3] - rq->hwsp_seqno[2]);
339 		}
340 		i915_vma_put(nop);
341 		i915_vma_put(base);
342 		intel_engine_pm_put(engine);
343 		if (err)
344 			break;
345 
346 		pr_info("%s: 16K MI_NOOP cycles: %u\n",
347 			engine->name, trifilter(cycles));
348 	}
349 	if (perf_end(gt))
350 		err = -EIO;
351 
352 	return err;
353 }
354 
355 int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
356 {
357 	static const struct i915_subtest tests[] = {
358 		SUBTEST(perf_mi_bb_start),
359 		SUBTEST(perf_mi_noop),
360 	};
361 
362 	if (intel_gt_is_wedged(&i915->gt))
363 		return 0;
364 
365 	return intel_gt_live_subtests(tests, &i915->gt);
366 }
367 
368 static int intel_mmio_bases_check(void *arg)
369 {
370 	int i, j;
371 
372 	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
373 		const struct engine_info *info = &intel_engines[i];
374 		u8 prev = U8_MAX;
375 
376 		for (j = 0; j < MAX_MMIO_BASES; j++) {
377 			u8 gen = info->mmio_bases[j].gen;
378 			u32 base = info->mmio_bases[j].base;
379 
380 			if (gen >= prev) {
381 				pr_err("%s(%s, class:%d, instance:%d): mmio base for gen %x is before the one for gen %x\n",
382 				       __func__,
383 				       intel_engine_class_repr(info->class),
384 				       info->class, info->instance,
385 				       prev, gen);
386 				return -EINVAL;
387 			}
388 
389 			if (gen == 0)
390 				break;
391 
392 			if (!base) {
393 				pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for gen %x at entry %u\n",
394 				       __func__,
395 				       intel_engine_class_repr(info->class),
396 				       info->class, info->instance,
397 				       base, gen, j);
398 				return -EINVAL;
399 			}
400 
401 			prev = gen;
402 		}
403 
404 		pr_debug("%s: min gen supported for %s%d is %d\n",
405 			 __func__,
406 			 intel_engine_class_repr(info->class),
407 			 info->instance,
408 			 prev);
409 	}
410 
411 	return 0;
412 }
413 
414 int intel_engine_cs_mock_selftests(void)
415 {
416 	static const struct i915_subtest tests[] = {
417 		SUBTEST(intel_mmio_bases_check),
418 	};
419 
420 	return i915_subtests(tests, NULL);
421 }
422