1 /*
2  * SPDX-License-Identifier: GPL-2.0
3  *
4  * Copyright © 2018 Intel Corporation
5  */
6 
7 #include <linux/sort.h>
8 
9 #include "intel_gt_pm.h"
10 #include "intel_rps.h"
11 
12 #include "i915_selftest.h"
13 #include "selftests/igt_flush_test.h"
14 
15 #define COUNT 5
16 
17 static int cmp_u32(const void *A, const void *B)
18 {
19 	const u32 *a = A, *b = B;
20 
21 	return *a - *b;
22 }
23 
24 static void perf_begin(struct intel_gt *gt)
25 {
26 	intel_gt_pm_get(gt);
27 
28 	/* Boost gpufreq to max [waitboost] and keep it fixed */
29 	atomic_inc(&gt->rps.num_waiters);
30 	schedule_work(&gt->rps.work);
31 	flush_work(&gt->rps.work);
32 }
33 
34 static int perf_end(struct intel_gt *gt)
35 {
36 	atomic_dec(&gt->rps.num_waiters);
37 	intel_gt_pm_put(gt);
38 
39 	return igt_flush_test(gt->i915);
40 }
41 
42 static int write_timestamp(struct i915_request *rq, int slot)
43 {
44 	u32 cmd;
45 	u32 *cs;
46 
47 	cs = intel_ring_begin(rq, 4);
48 	if (IS_ERR(cs))
49 		return PTR_ERR(cs);
50 
51 	cmd = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
52 	if (INTEL_GEN(rq->engine->i915) >= 8)
53 		cmd++;
54 	*cs++ = cmd;
55 	*cs++ = i915_mmio_reg_offset(RING_TIMESTAMP(rq->engine->mmio_base));
56 	*cs++ = i915_request_timeline(rq)->hwsp_offset + slot * sizeof(u32);
57 	*cs++ = 0;
58 
59 	intel_ring_advance(rq, cs);
60 
61 	return 0;
62 }
63 
64 static struct i915_vma *create_empty_batch(struct intel_context *ce)
65 {
66 	struct drm_i915_gem_object *obj;
67 	struct i915_vma *vma;
68 	u32 *cs;
69 	int err;
70 
71 	obj = i915_gem_object_create_internal(ce->engine->i915, PAGE_SIZE);
72 	if (IS_ERR(obj))
73 		return ERR_CAST(obj);
74 
75 	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
76 	if (IS_ERR(cs)) {
77 		err = PTR_ERR(cs);
78 		goto err_put;
79 	}
80 
81 	cs[0] = MI_BATCH_BUFFER_END;
82 
83 	i915_gem_object_flush_map(obj);
84 
85 	vma = i915_vma_instance(obj, ce->vm, NULL);
86 	if (IS_ERR(vma)) {
87 		err = PTR_ERR(vma);
88 		goto err_unpin;
89 	}
90 
91 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
92 	if (err)
93 		goto err_unpin;
94 
95 	i915_gem_object_unpin_map(obj);
96 	return vma;
97 
98 err_unpin:
99 	i915_gem_object_unpin_map(obj);
100 err_put:
101 	i915_gem_object_put(obj);
102 	return ERR_PTR(err);
103 }
104 
105 static u32 trifilter(u32 *a)
106 {
107 	u64 sum;
108 
109 	sort(a, COUNT, sizeof(*a), cmp_u32, NULL);
110 
111 	sum = mul_u32_u32(a[2], 2);
112 	sum += a[1];
113 	sum += a[3];
114 
115 	return sum >> 2;
116 }
117 
118 static int perf_mi_bb_start(void *arg)
119 {
120 	struct intel_gt *gt = arg;
121 	struct intel_engine_cs *engine;
122 	enum intel_engine_id id;
123 	int err = 0;
124 
125 	if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
126 		return 0;
127 
128 	perf_begin(gt);
129 	for_each_engine(engine, gt, id) {
130 		struct intel_context *ce = engine->kernel_context;
131 		struct i915_vma *batch;
132 		u32 cycles[COUNT];
133 		int i;
134 
135 		intel_engine_pm_get(engine);
136 
137 		batch = create_empty_batch(ce);
138 		if (IS_ERR(batch)) {
139 			err = PTR_ERR(batch);
140 			intel_engine_pm_put(engine);
141 			break;
142 		}
143 
144 		err = i915_vma_sync(batch);
145 		if (err) {
146 			intel_engine_pm_put(engine);
147 			i915_vma_put(batch);
148 			break;
149 		}
150 
151 		for (i = 0; i < ARRAY_SIZE(cycles); i++) {
152 			struct i915_request *rq;
153 
154 			rq = i915_request_create(ce);
155 			if (IS_ERR(rq)) {
156 				err = PTR_ERR(rq);
157 				break;
158 			}
159 
160 			err = write_timestamp(rq, 2);
161 			if (err)
162 				goto out;
163 
164 			err = rq->engine->emit_bb_start(rq,
165 							batch->node.start, 8,
166 							0);
167 			if (err)
168 				goto out;
169 
170 			err = write_timestamp(rq, 3);
171 			if (err)
172 				goto out;
173 
174 out:
175 			i915_request_get(rq);
176 			i915_request_add(rq);
177 
178 			if (i915_request_wait(rq, 0, HZ / 5) < 0)
179 				err = -EIO;
180 			i915_request_put(rq);
181 			if (err)
182 				break;
183 
184 			cycles[i] = rq->hwsp_seqno[3] - rq->hwsp_seqno[2];
185 		}
186 		i915_vma_put(batch);
187 		intel_engine_pm_put(engine);
188 		if (err)
189 			break;
190 
191 		pr_info("%s: MI_BB_START cycles: %u\n",
192 			engine->name, trifilter(cycles));
193 	}
194 	if (perf_end(gt))
195 		err = -EIO;
196 
197 	return err;
198 }
199 
200 static struct i915_vma *create_nop_batch(struct intel_context *ce)
201 {
202 	struct drm_i915_gem_object *obj;
203 	struct i915_vma *vma;
204 	u32 *cs;
205 	int err;
206 
207 	obj = i915_gem_object_create_internal(ce->engine->i915, SZ_64K);
208 	if (IS_ERR(obj))
209 		return ERR_CAST(obj);
210 
211 	cs = i915_gem_object_pin_map(obj, I915_MAP_WB);
212 	if (IS_ERR(cs)) {
213 		err = PTR_ERR(cs);
214 		goto err_put;
215 	}
216 
217 	memset(cs, 0, SZ_64K);
218 	cs[SZ_64K / sizeof(*cs) - 1] = MI_BATCH_BUFFER_END;
219 
220 	i915_gem_object_flush_map(obj);
221 
222 	vma = i915_vma_instance(obj, ce->vm, NULL);
223 	if (IS_ERR(vma)) {
224 		err = PTR_ERR(vma);
225 		goto err_unpin;
226 	}
227 
228 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
229 	if (err)
230 		goto err_unpin;
231 
232 	i915_gem_object_unpin_map(obj);
233 	return vma;
234 
235 err_unpin:
236 	i915_gem_object_unpin_map(obj);
237 err_put:
238 	i915_gem_object_put(obj);
239 	return ERR_PTR(err);
240 }
241 
242 static int perf_mi_noop(void *arg)
243 {
244 	struct intel_gt *gt = arg;
245 	struct intel_engine_cs *engine;
246 	enum intel_engine_id id;
247 	int err = 0;
248 
249 	if (INTEL_GEN(gt->i915) < 7) /* for per-engine CS_TIMESTAMP */
250 		return 0;
251 
252 	perf_begin(gt);
253 	for_each_engine(engine, gt, id) {
254 		struct intel_context *ce = engine->kernel_context;
255 		struct i915_vma *base, *nop;
256 		u32 cycles[COUNT];
257 		int i;
258 
259 		intel_engine_pm_get(engine);
260 
261 		base = create_empty_batch(ce);
262 		if (IS_ERR(base)) {
263 			err = PTR_ERR(base);
264 			intel_engine_pm_put(engine);
265 			break;
266 		}
267 
268 		err = i915_vma_sync(base);
269 		if (err) {
270 			i915_vma_put(base);
271 			intel_engine_pm_put(engine);
272 			break;
273 		}
274 
275 		nop = create_nop_batch(ce);
276 		if (IS_ERR(nop)) {
277 			err = PTR_ERR(nop);
278 			i915_vma_put(base);
279 			intel_engine_pm_put(engine);
280 			break;
281 		}
282 
283 		err = i915_vma_sync(nop);
284 		if (err) {
285 			i915_vma_put(nop);
286 			i915_vma_put(base);
287 			intel_engine_pm_put(engine);
288 			break;
289 		}
290 
291 		for (i = 0; i < ARRAY_SIZE(cycles); i++) {
292 			struct i915_request *rq;
293 
294 			rq = i915_request_create(ce);
295 			if (IS_ERR(rq)) {
296 				err = PTR_ERR(rq);
297 				break;
298 			}
299 
300 			err = write_timestamp(rq, 2);
301 			if (err)
302 				goto out;
303 
304 			err = rq->engine->emit_bb_start(rq,
305 							base->node.start, 8,
306 							0);
307 			if (err)
308 				goto out;
309 
310 			err = write_timestamp(rq, 3);
311 			if (err)
312 				goto out;
313 
314 			err = rq->engine->emit_bb_start(rq,
315 							nop->node.start,
316 							nop->node.size,
317 							0);
318 			if (err)
319 				goto out;
320 
321 			err = write_timestamp(rq, 4);
322 			if (err)
323 				goto out;
324 
325 out:
326 			i915_request_get(rq);
327 			i915_request_add(rq);
328 
329 			if (i915_request_wait(rq, 0, HZ / 5) < 0)
330 				err = -EIO;
331 			i915_request_put(rq);
332 			if (err)
333 				break;
334 
335 			cycles[i] =
336 				(rq->hwsp_seqno[4] - rq->hwsp_seqno[3]) -
337 				(rq->hwsp_seqno[3] - rq->hwsp_seqno[2]);
338 		}
339 		i915_vma_put(nop);
340 		i915_vma_put(base);
341 		intel_engine_pm_put(engine);
342 		if (err)
343 			break;
344 
345 		pr_info("%s: 16K MI_NOOP cycles: %u\n",
346 			engine->name, trifilter(cycles));
347 	}
348 	if (perf_end(gt))
349 		err = -EIO;
350 
351 	return err;
352 }
353 
354 int intel_engine_cs_perf_selftests(struct drm_i915_private *i915)
355 {
356 	static const struct i915_subtest tests[] = {
357 		SUBTEST(perf_mi_bb_start),
358 		SUBTEST(perf_mi_noop),
359 	};
360 
361 	if (intel_gt_is_wedged(&i915->gt))
362 		return 0;
363 
364 	return intel_gt_live_subtests(tests, &i915->gt);
365 }
366 
367 static int intel_mmio_bases_check(void *arg)
368 {
369 	int i, j;
370 
371 	for (i = 0; i < ARRAY_SIZE(intel_engines); i++) {
372 		const struct engine_info *info = &intel_engines[i];
373 		u8 prev = U8_MAX;
374 
375 		for (j = 0; j < MAX_MMIO_BASES; j++) {
376 			u8 gen = info->mmio_bases[j].gen;
377 			u32 base = info->mmio_bases[j].base;
378 
379 			if (gen >= prev) {
380 				pr_err("%s(%s, class:%d, instance:%d): mmio base for gen %x is before the one for gen %x\n",
381 				       __func__,
382 				       intel_engine_class_repr(info->class),
383 				       info->class, info->instance,
384 				       prev, gen);
385 				return -EINVAL;
386 			}
387 
388 			if (gen == 0)
389 				break;
390 
391 			if (!base) {
392 				pr_err("%s(%s, class:%d, instance:%d): invalid mmio base (%x) for gen %x at entry %u\n",
393 				       __func__,
394 				       intel_engine_class_repr(info->class),
395 				       info->class, info->instance,
396 				       base, gen, j);
397 				return -EINVAL;
398 			}
399 
400 			prev = gen;
401 		}
402 
403 		pr_debug("%s: min gen supported for %s%d is %d\n",
404 			 __func__,
405 			 intel_engine_class_repr(info->class),
406 			 info->instance,
407 			 prev);
408 	}
409 
410 	return 0;
411 }
412 
413 int intel_engine_cs_mock_selftests(void)
414 {
415 	static const struct i915_subtest tests[] = {
416 		SUBTEST(intel_mmio_bases_check),
417 	};
418 
419 	return i915_subtests(tests, NULL);
420 }
421