1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/sort.h>
7 
8 #include "selftests/i915_random.h"
9 
10 static const unsigned int sizes[] = {
11 	SZ_4K,
12 	SZ_64K,
13 	SZ_2M,
14 	CHUNK_SZ - SZ_4K,
15 	CHUNK_SZ,
16 	CHUNK_SZ + SZ_4K,
17 	SZ_64M,
18 };
19 
20 static struct drm_i915_gem_object *
21 create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
22 {
23 	struct drm_i915_gem_object *obj;
24 
25 	obj = i915_gem_object_create_lmem(i915, size, 0);
26 	if (!IS_ERR(obj))
27 		return obj;
28 
29 	return i915_gem_object_create_internal(i915, size);
30 }
31 
32 static int copy(struct intel_migrate *migrate,
33 		int (*fn)(struct intel_migrate *migrate,
34 			  struct i915_gem_ww_ctx *ww,
35 			  struct drm_i915_gem_object *src,
36 			  struct drm_i915_gem_object *dst,
37 			  struct i915_request **out),
38 		u32 sz, struct rnd_state *prng)
39 {
40 	struct drm_i915_private *i915 = migrate->context->engine->i915;
41 	struct drm_i915_gem_object *src, *dst;
42 	struct i915_request *rq;
43 	struct i915_gem_ww_ctx ww;
44 	u32 *vaddr;
45 	int err = 0;
46 	int i;
47 
48 	src = create_lmem_or_internal(i915, sz);
49 	if (IS_ERR(src))
50 		return 0;
51 
52 	sz = src->base.size;
53 	dst = i915_gem_object_create_internal(i915, sz);
54 	if (IS_ERR(dst))
55 		goto err_free_src;
56 
57 	for_i915_gem_ww(&ww, err, true) {
58 		err = i915_gem_object_lock(src, &ww);
59 		if (err)
60 			continue;
61 
62 		err = i915_gem_object_lock(dst, &ww);
63 		if (err)
64 			continue;
65 
66 		vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
67 		if (IS_ERR(vaddr)) {
68 			err = PTR_ERR(vaddr);
69 			continue;
70 		}
71 
72 		for (i = 0; i < sz / sizeof(u32); i++)
73 			vaddr[i] = i;
74 		i915_gem_object_flush_map(src);
75 
76 		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
77 		if (IS_ERR(vaddr)) {
78 			err = PTR_ERR(vaddr);
79 			goto unpin_src;
80 		}
81 
82 		for (i = 0; i < sz / sizeof(u32); i++)
83 			vaddr[i] = ~i;
84 		i915_gem_object_flush_map(dst);
85 
86 		err = fn(migrate, &ww, src, dst, &rq);
87 		if (!err)
88 			continue;
89 
90 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
91 			pr_err("%ps failed, size: %u\n", fn, sz);
92 		if (rq) {
93 			i915_request_wait(rq, 0, HZ);
94 			i915_request_put(rq);
95 		}
96 		i915_gem_object_unpin_map(dst);
97 unpin_src:
98 		i915_gem_object_unpin_map(src);
99 	}
100 	if (err)
101 		goto err_out;
102 
103 	if (rq) {
104 		if (i915_request_wait(rq, 0, HZ) < 0) {
105 			pr_err("%ps timed out, size: %u\n", fn, sz);
106 			err = -ETIME;
107 		}
108 		i915_request_put(rq);
109 	}
110 
111 	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
112 		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
113 
114 		if (vaddr[x] != x) {
115 			pr_err("%ps failed, size: %u, offset: %zu\n",
116 			       fn, sz, x * sizeof(u32));
117 			igt_hexdump(vaddr + i * 1024, 4096);
118 			err = -EINVAL;
119 		}
120 	}
121 
122 	i915_gem_object_unpin_map(dst);
123 	i915_gem_object_unpin_map(src);
124 
125 err_out:
126 	i915_gem_object_put(dst);
127 err_free_src:
128 	i915_gem_object_put(src);
129 
130 	return err;
131 }
132 
133 static int clear(struct intel_migrate *migrate,
134 		 int (*fn)(struct intel_migrate *migrate,
135 			   struct i915_gem_ww_ctx *ww,
136 			   struct drm_i915_gem_object *obj,
137 			   u32 value,
138 			   struct i915_request **out),
139 		 u32 sz, struct rnd_state *prng)
140 {
141 	struct drm_i915_private *i915 = migrate->context->engine->i915;
142 	struct drm_i915_gem_object *obj;
143 	struct i915_request *rq;
144 	struct i915_gem_ww_ctx ww;
145 	u32 *vaddr;
146 	int err = 0;
147 	int i;
148 
149 	obj = create_lmem_or_internal(i915, sz);
150 	if (IS_ERR(obj))
151 		return 0;
152 
153 	for_i915_gem_ww(&ww, err, true) {
154 		err = i915_gem_object_lock(obj, &ww);
155 		if (err)
156 			continue;
157 
158 		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
159 		if (IS_ERR(vaddr)) {
160 			err = PTR_ERR(vaddr);
161 			continue;
162 		}
163 
164 		for (i = 0; i < sz / sizeof(u32); i++)
165 			vaddr[i] = ~i;
166 		i915_gem_object_flush_map(obj);
167 
168 		err = fn(migrate, &ww, obj, sz, &rq);
169 		if (!err)
170 			continue;
171 
172 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
173 			pr_err("%ps failed, size: %u\n", fn, sz);
174 		if (rq) {
175 			i915_request_wait(rq, 0, HZ);
176 			i915_request_put(rq);
177 		}
178 		i915_gem_object_unpin_map(obj);
179 	}
180 	if (err)
181 		goto err_out;
182 
183 	if (rq) {
184 		if (i915_request_wait(rq, 0, HZ) < 0) {
185 			pr_err("%ps timed out, size: %u\n", fn, sz);
186 			err = -ETIME;
187 		}
188 		i915_request_put(rq);
189 	}
190 
191 	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
192 		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
193 
194 		if (vaddr[x] != sz) {
195 			pr_err("%ps failed, size: %u, offset: %zu\n",
196 			       fn, sz, x * sizeof(u32));
197 			igt_hexdump(vaddr + i * 1024, 4096);
198 			err = -EINVAL;
199 		}
200 	}
201 
202 	i915_gem_object_unpin_map(obj);
203 err_out:
204 	i915_gem_object_put(obj);
205 
206 	return err;
207 }
208 
209 static int __migrate_copy(struct intel_migrate *migrate,
210 			  struct i915_gem_ww_ctx *ww,
211 			  struct drm_i915_gem_object *src,
212 			  struct drm_i915_gem_object *dst,
213 			  struct i915_request **out)
214 {
215 	return intel_migrate_copy(migrate, ww, NULL,
216 				  src->mm.pages->sgl, src->cache_level,
217 				  i915_gem_object_is_lmem(src),
218 				  dst->mm.pages->sgl, dst->cache_level,
219 				  i915_gem_object_is_lmem(dst),
220 				  out);
221 }
222 
223 static int __global_copy(struct intel_migrate *migrate,
224 			 struct i915_gem_ww_ctx *ww,
225 			 struct drm_i915_gem_object *src,
226 			 struct drm_i915_gem_object *dst,
227 			 struct i915_request **out)
228 {
229 	return intel_context_migrate_copy(migrate->context, NULL,
230 					  src->mm.pages->sgl, src->cache_level,
231 					  i915_gem_object_is_lmem(src),
232 					  dst->mm.pages->sgl, dst->cache_level,
233 					  i915_gem_object_is_lmem(dst),
234 					  out);
235 }
236 
237 static int
238 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
239 {
240 	return copy(migrate, __migrate_copy, sz, prng);
241 }
242 
243 static int
244 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
245 {
246 	return copy(migrate, __global_copy, sz, prng);
247 }
248 
249 static int __migrate_clear(struct intel_migrate *migrate,
250 			   struct i915_gem_ww_ctx *ww,
251 			   struct drm_i915_gem_object *obj,
252 			   u32 value,
253 			   struct i915_request **out)
254 {
255 	return intel_migrate_clear(migrate, ww, NULL,
256 				   obj->mm.pages->sgl,
257 				   obj->cache_level,
258 				   i915_gem_object_is_lmem(obj),
259 				   value, out);
260 }
261 
262 static int __global_clear(struct intel_migrate *migrate,
263 			  struct i915_gem_ww_ctx *ww,
264 			  struct drm_i915_gem_object *obj,
265 			  u32 value,
266 			  struct i915_request **out)
267 {
268 	return intel_context_migrate_clear(migrate->context, NULL,
269 					   obj->mm.pages->sgl,
270 					   obj->cache_level,
271 					   i915_gem_object_is_lmem(obj),
272 					   value, out);
273 }
274 
275 static int
276 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
277 {
278 	return clear(migrate, __migrate_clear, sz, prng);
279 }
280 
281 static int
282 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
283 {
284 	return clear(migrate, __global_clear, sz, prng);
285 }
286 
287 static int live_migrate_copy(void *arg)
288 {
289 	struct intel_migrate *migrate = arg;
290 	struct drm_i915_private *i915 = migrate->context->engine->i915;
291 	I915_RND_STATE(prng);
292 	int i;
293 
294 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
295 		int err;
296 
297 		err = migrate_copy(migrate, sizes[i], &prng);
298 		if (err == 0)
299 			err = global_copy(migrate, sizes[i], &prng);
300 		i915_gem_drain_freed_objects(i915);
301 		if (err)
302 			return err;
303 	}
304 
305 	return 0;
306 }
307 
308 static int live_migrate_clear(void *arg)
309 {
310 	struct intel_migrate *migrate = arg;
311 	struct drm_i915_private *i915 = migrate->context->engine->i915;
312 	I915_RND_STATE(prng);
313 	int i;
314 
315 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
316 		int err;
317 
318 		err = migrate_clear(migrate, sizes[i], &prng);
319 		if (err == 0)
320 			err = global_clear(migrate, sizes[i], &prng);
321 
322 		i915_gem_drain_freed_objects(i915);
323 		if (err)
324 			return err;
325 	}
326 
327 	return 0;
328 }
329 
330 struct threaded_migrate {
331 	struct intel_migrate *migrate;
332 	struct task_struct *tsk;
333 	struct rnd_state prng;
334 };
335 
336 static int threaded_migrate(struct intel_migrate *migrate,
337 			    int (*fn)(void *arg),
338 			    unsigned int flags)
339 {
340 	const unsigned int n_cpus = num_online_cpus() + 1;
341 	struct threaded_migrate *thread;
342 	I915_RND_STATE(prng);
343 	unsigned int i;
344 	int err = 0;
345 
346 	thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
347 	if (!thread)
348 		return 0;
349 
350 	for (i = 0; i < n_cpus; ++i) {
351 		struct task_struct *tsk;
352 
353 		thread[i].migrate = migrate;
354 		thread[i].prng =
355 			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
356 
357 		tsk = kthread_run(fn, &thread[i], "igt-%d", i);
358 		if (IS_ERR(tsk)) {
359 			err = PTR_ERR(tsk);
360 			break;
361 		}
362 
363 		get_task_struct(tsk);
364 		thread[i].tsk = tsk;
365 	}
366 
367 	msleep(10); /* start all threads before we kthread_stop() */
368 
369 	for (i = 0; i < n_cpus; ++i) {
370 		struct task_struct *tsk = thread[i].tsk;
371 		int status;
372 
373 		if (IS_ERR_OR_NULL(tsk))
374 			continue;
375 
376 		status = kthread_stop(tsk);
377 		if (status && !err)
378 			err = status;
379 
380 		put_task_struct(tsk);
381 	}
382 
383 	kfree(thread);
384 	return err;
385 }
386 
387 static int __thread_migrate_copy(void *arg)
388 {
389 	struct threaded_migrate *tm = arg;
390 
391 	return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
392 }
393 
394 static int thread_migrate_copy(void *arg)
395 {
396 	return threaded_migrate(arg, __thread_migrate_copy, 0);
397 }
398 
399 static int __thread_global_copy(void *arg)
400 {
401 	struct threaded_migrate *tm = arg;
402 
403 	return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
404 }
405 
406 static int thread_global_copy(void *arg)
407 {
408 	return threaded_migrate(arg, __thread_global_copy, 0);
409 }
410 
411 static int __thread_migrate_clear(void *arg)
412 {
413 	struct threaded_migrate *tm = arg;
414 
415 	return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
416 }
417 
418 static int __thread_global_clear(void *arg)
419 {
420 	struct threaded_migrate *tm = arg;
421 
422 	return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
423 }
424 
425 static int thread_migrate_clear(void *arg)
426 {
427 	return threaded_migrate(arg, __thread_migrate_clear, 0);
428 }
429 
430 static int thread_global_clear(void *arg)
431 {
432 	return threaded_migrate(arg, __thread_global_clear, 0);
433 }
434 
435 int intel_migrate_live_selftests(struct drm_i915_private *i915)
436 {
437 	static const struct i915_subtest tests[] = {
438 		SUBTEST(live_migrate_copy),
439 		SUBTEST(live_migrate_clear),
440 		SUBTEST(thread_migrate_copy),
441 		SUBTEST(thread_migrate_clear),
442 		SUBTEST(thread_global_copy),
443 		SUBTEST(thread_global_clear),
444 	};
445 	struct intel_gt *gt = to_gt(i915);
446 
447 	if (!gt->migrate.context)
448 		return 0;
449 
450 	return i915_subtests(tests, &gt->migrate);
451 }
452 
453 static struct drm_i915_gem_object *
454 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
455 {
456 	struct drm_i915_gem_object *obj = NULL;
457 	int err;
458 
459 	if (try_lmem)
460 		obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
461 
462 	if (IS_ERR_OR_NULL(obj)) {
463 		obj = i915_gem_object_create_internal(gt->i915, sz);
464 		if (IS_ERR(obj))
465 			return obj;
466 	}
467 
468 	i915_gem_object_trylock(obj, NULL);
469 	err = i915_gem_object_pin_pages(obj);
470 	if (err) {
471 		i915_gem_object_unlock(obj);
472 		i915_gem_object_put(obj);
473 		return ERR_PTR(err);
474 	}
475 
476 	return obj;
477 }
478 
479 static int wrap_ktime_compare(const void *A, const void *B)
480 {
481 	const ktime_t *a = A, *b = B;
482 
483 	return ktime_compare(*a, *b);
484 }
485 
486 static int __perf_clear_blt(struct intel_context *ce,
487 			    struct scatterlist *sg,
488 			    enum i915_cache_level cache_level,
489 			    bool is_lmem,
490 			    size_t sz)
491 {
492 	ktime_t t[5];
493 	int pass;
494 	int err = 0;
495 
496 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
497 		struct i915_request *rq;
498 		ktime_t t0, t1;
499 
500 		t0 = ktime_get();
501 
502 		err = intel_context_migrate_clear(ce, NULL, sg, cache_level,
503 						  is_lmem, 0, &rq);
504 		if (rq) {
505 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
506 				err = -EIO;
507 			i915_request_put(rq);
508 		}
509 		if (err)
510 			break;
511 
512 		t1 = ktime_get();
513 		t[pass] = ktime_sub(t1, t0);
514 	}
515 	if (err)
516 		return err;
517 
518 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
519 	pr_info("%s: %zd KiB fill: %lld MiB/s\n",
520 		ce->engine->name, sz >> 10,
521 		div64_u64(mul_u32_u32(4 * sz,
522 				      1000 * 1000 * 1000),
523 			  t[1] + 2 * t[2] + t[3]) >> 20);
524 	return 0;
525 }
526 
527 static int perf_clear_blt(void *arg)
528 {
529 	struct intel_gt *gt = arg;
530 	static const unsigned long sizes[] = {
531 		SZ_4K,
532 		SZ_64K,
533 		SZ_2M,
534 		SZ_64M
535 	};
536 	int i;
537 
538 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
539 		struct drm_i915_gem_object *dst;
540 		int err;
541 
542 		dst = create_init_lmem_internal(gt, sizes[i], true);
543 		if (IS_ERR(dst))
544 			return PTR_ERR(dst);
545 
546 		err = __perf_clear_blt(gt->migrate.context,
547 				       dst->mm.pages->sgl,
548 				       I915_CACHE_NONE,
549 				       i915_gem_object_is_lmem(dst),
550 				       sizes[i]);
551 
552 		i915_gem_object_unlock(dst);
553 		i915_gem_object_put(dst);
554 		if (err)
555 			return err;
556 	}
557 
558 	return 0;
559 }
560 
561 static int __perf_copy_blt(struct intel_context *ce,
562 			   struct scatterlist *src,
563 			   enum i915_cache_level src_cache_level,
564 			   bool src_is_lmem,
565 			   struct scatterlist *dst,
566 			   enum i915_cache_level dst_cache_level,
567 			   bool dst_is_lmem,
568 			   size_t sz)
569 {
570 	ktime_t t[5];
571 	int pass;
572 	int err = 0;
573 
574 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
575 		struct i915_request *rq;
576 		ktime_t t0, t1;
577 
578 		t0 = ktime_get();
579 
580 		err = intel_context_migrate_copy(ce, NULL,
581 						 src, src_cache_level,
582 						 src_is_lmem,
583 						 dst, dst_cache_level,
584 						 dst_is_lmem,
585 						 &rq);
586 		if (rq) {
587 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
588 				err = -EIO;
589 			i915_request_put(rq);
590 		}
591 		if (err)
592 			break;
593 
594 		t1 = ktime_get();
595 		t[pass] = ktime_sub(t1, t0);
596 	}
597 	if (err)
598 		return err;
599 
600 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
601 	pr_info("%s: %zd KiB copy: %lld MiB/s\n",
602 		ce->engine->name, sz >> 10,
603 		div64_u64(mul_u32_u32(4 * sz,
604 				      1000 * 1000 * 1000),
605 			  t[1] + 2 * t[2] + t[3]) >> 20);
606 	return 0;
607 }
608 
609 static int perf_copy_blt(void *arg)
610 {
611 	struct intel_gt *gt = arg;
612 	static const unsigned long sizes[] = {
613 		SZ_4K,
614 		SZ_64K,
615 		SZ_2M,
616 		SZ_64M
617 	};
618 	int i;
619 
620 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
621 		struct drm_i915_gem_object *src, *dst;
622 		int err;
623 
624 		src = create_init_lmem_internal(gt, sizes[i], true);
625 		if (IS_ERR(src))
626 			return PTR_ERR(src);
627 
628 		dst = create_init_lmem_internal(gt, sizes[i], false);
629 		if (IS_ERR(dst)) {
630 			err = PTR_ERR(dst);
631 			goto err_src;
632 		}
633 
634 		err = __perf_copy_blt(gt->migrate.context,
635 				      src->mm.pages->sgl,
636 				      I915_CACHE_NONE,
637 				      i915_gem_object_is_lmem(src),
638 				      dst->mm.pages->sgl,
639 				      I915_CACHE_NONE,
640 				      i915_gem_object_is_lmem(dst),
641 				      sizes[i]);
642 
643 		i915_gem_object_unlock(dst);
644 		i915_gem_object_put(dst);
645 err_src:
646 		i915_gem_object_unlock(src);
647 		i915_gem_object_put(src);
648 		if (err)
649 			return err;
650 	}
651 
652 	return 0;
653 }
654 
655 int intel_migrate_perf_selftests(struct drm_i915_private *i915)
656 {
657 	static const struct i915_subtest tests[] = {
658 		SUBTEST(perf_clear_blt),
659 		SUBTEST(perf_copy_blt),
660 	};
661 	struct intel_gt *gt = to_gt(i915);
662 
663 	if (intel_gt_is_wedged(gt))
664 		return 0;
665 
666 	if (!gt->migrate.context)
667 		return 0;
668 
669 	return intel_gt_live_subtests(tests, gt);
670 }
671