1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/sort.h>
7 
8 #include "selftests/i915_random.h"
9 
10 static const unsigned int sizes[] = {
11 	SZ_4K,
12 	SZ_64K,
13 	SZ_2M,
14 	CHUNK_SZ - SZ_4K,
15 	CHUNK_SZ,
16 	CHUNK_SZ + SZ_4K,
17 	SZ_64M,
18 };
19 
20 static struct drm_i915_gem_object *
21 create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
22 {
23 	struct drm_i915_gem_object *obj;
24 
25 	obj = i915_gem_object_create_lmem(i915, size, 0);
26 	if (!IS_ERR(obj))
27 		return obj;
28 
29 	return i915_gem_object_create_internal(i915, size);
30 }
31 
32 static int copy(struct intel_migrate *migrate,
33 		int (*fn)(struct intel_migrate *migrate,
34 			  struct i915_gem_ww_ctx *ww,
35 			  struct drm_i915_gem_object *src,
36 			  struct drm_i915_gem_object *dst,
37 			  struct i915_request **out),
38 		u32 sz, struct rnd_state *prng)
39 {
40 	struct drm_i915_private *i915 = migrate->context->engine->i915;
41 	struct drm_i915_gem_object *src, *dst;
42 	struct i915_request *rq;
43 	struct i915_gem_ww_ctx ww;
44 	u32 *vaddr;
45 	int err = 0;
46 	int i;
47 
48 	src = create_lmem_or_internal(i915, sz);
49 	if (IS_ERR(src))
50 		return 0;
51 
52 	dst = i915_gem_object_create_internal(i915, sz);
53 	if (IS_ERR(dst))
54 		goto err_free_src;
55 
56 	for_i915_gem_ww(&ww, err, true) {
57 		err = i915_gem_object_lock(src, &ww);
58 		if (err)
59 			continue;
60 
61 		err = i915_gem_object_lock(dst, &ww);
62 		if (err)
63 			continue;
64 
65 		vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
66 		if (IS_ERR(vaddr)) {
67 			err = PTR_ERR(vaddr);
68 			continue;
69 		}
70 
71 		for (i = 0; i < sz / sizeof(u32); i++)
72 			vaddr[i] = i;
73 		i915_gem_object_flush_map(src);
74 
75 		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
76 		if (IS_ERR(vaddr)) {
77 			err = PTR_ERR(vaddr);
78 			goto unpin_src;
79 		}
80 
81 		for (i = 0; i < sz / sizeof(u32); i++)
82 			vaddr[i] = ~i;
83 		i915_gem_object_flush_map(dst);
84 
85 		err = fn(migrate, &ww, src, dst, &rq);
86 		if (!err)
87 			continue;
88 
89 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
90 			pr_err("%ps failed, size: %u\n", fn, sz);
91 		if (rq) {
92 			i915_request_wait(rq, 0, HZ);
93 			i915_request_put(rq);
94 		}
95 		i915_gem_object_unpin_map(dst);
96 unpin_src:
97 		i915_gem_object_unpin_map(src);
98 	}
99 	if (err)
100 		goto err_out;
101 
102 	if (rq) {
103 		if (i915_request_wait(rq, 0, HZ) < 0) {
104 			pr_err("%ps timed out, size: %u\n", fn, sz);
105 			err = -ETIME;
106 		}
107 		i915_request_put(rq);
108 	}
109 
110 	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
111 		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
112 
113 		if (vaddr[x] != x) {
114 			pr_err("%ps failed, size: %u, offset: %zu\n",
115 			       fn, sz, x * sizeof(u32));
116 			igt_hexdump(vaddr + i * 1024, 4096);
117 			err = -EINVAL;
118 		}
119 	}
120 
121 	i915_gem_object_unpin_map(dst);
122 	i915_gem_object_unpin_map(src);
123 
124 err_out:
125 	i915_gem_object_put(dst);
126 err_free_src:
127 	i915_gem_object_put(src);
128 
129 	return err;
130 }
131 
132 static int clear(struct intel_migrate *migrate,
133 		 int (*fn)(struct intel_migrate *migrate,
134 			   struct i915_gem_ww_ctx *ww,
135 			   struct drm_i915_gem_object *obj,
136 			   u32 value,
137 			   struct i915_request **out),
138 		 u32 sz, struct rnd_state *prng)
139 {
140 	struct drm_i915_private *i915 = migrate->context->engine->i915;
141 	struct drm_i915_gem_object *obj;
142 	struct i915_request *rq;
143 	struct i915_gem_ww_ctx ww;
144 	u32 *vaddr;
145 	int err = 0;
146 	int i;
147 
148 	obj = create_lmem_or_internal(i915, sz);
149 	if (IS_ERR(obj))
150 		return 0;
151 
152 	for_i915_gem_ww(&ww, err, true) {
153 		err = i915_gem_object_lock(obj, &ww);
154 		if (err)
155 			continue;
156 
157 		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
158 		if (IS_ERR(vaddr)) {
159 			err = PTR_ERR(vaddr);
160 			continue;
161 		}
162 
163 		for (i = 0; i < sz / sizeof(u32); i++)
164 			vaddr[i] = ~i;
165 		i915_gem_object_flush_map(obj);
166 
167 		err = fn(migrate, &ww, obj, sz, &rq);
168 		if (!err)
169 			continue;
170 
171 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
172 			pr_err("%ps failed, size: %u\n", fn, sz);
173 		if (rq) {
174 			i915_request_wait(rq, 0, HZ);
175 			i915_request_put(rq);
176 		}
177 		i915_gem_object_unpin_map(obj);
178 	}
179 	if (err)
180 		goto err_out;
181 
182 	if (rq) {
183 		if (i915_request_wait(rq, 0, HZ) < 0) {
184 			pr_err("%ps timed out, size: %u\n", fn, sz);
185 			err = -ETIME;
186 		}
187 		i915_request_put(rq);
188 	}
189 
190 	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
191 		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
192 
193 		if (vaddr[x] != sz) {
194 			pr_err("%ps failed, size: %u, offset: %zu\n",
195 			       fn, sz, x * sizeof(u32));
196 			igt_hexdump(vaddr + i * 1024, 4096);
197 			err = -EINVAL;
198 		}
199 	}
200 
201 	i915_gem_object_unpin_map(obj);
202 err_out:
203 	i915_gem_object_put(obj);
204 
205 	return err;
206 }
207 
208 static int __migrate_copy(struct intel_migrate *migrate,
209 			  struct i915_gem_ww_ctx *ww,
210 			  struct drm_i915_gem_object *src,
211 			  struct drm_i915_gem_object *dst,
212 			  struct i915_request **out)
213 {
214 	return intel_migrate_copy(migrate, ww, NULL,
215 				  src->mm.pages->sgl, src->cache_level,
216 				  i915_gem_object_is_lmem(src),
217 				  dst->mm.pages->sgl, dst->cache_level,
218 				  i915_gem_object_is_lmem(dst),
219 				  out);
220 }
221 
222 static int __global_copy(struct intel_migrate *migrate,
223 			 struct i915_gem_ww_ctx *ww,
224 			 struct drm_i915_gem_object *src,
225 			 struct drm_i915_gem_object *dst,
226 			 struct i915_request **out)
227 {
228 	return intel_context_migrate_copy(migrate->context, NULL,
229 					  src->mm.pages->sgl, src->cache_level,
230 					  i915_gem_object_is_lmem(src),
231 					  dst->mm.pages->sgl, dst->cache_level,
232 					  i915_gem_object_is_lmem(dst),
233 					  out);
234 }
235 
236 static int
237 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
238 {
239 	return copy(migrate, __migrate_copy, sz, prng);
240 }
241 
242 static int
243 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
244 {
245 	return copy(migrate, __global_copy, sz, prng);
246 }
247 
248 static int __migrate_clear(struct intel_migrate *migrate,
249 			   struct i915_gem_ww_ctx *ww,
250 			   struct drm_i915_gem_object *obj,
251 			   u32 value,
252 			   struct i915_request **out)
253 {
254 	return intel_migrate_clear(migrate, ww, NULL,
255 				   obj->mm.pages->sgl,
256 				   obj->cache_level,
257 				   i915_gem_object_is_lmem(obj),
258 				   value, out);
259 }
260 
261 static int __global_clear(struct intel_migrate *migrate,
262 			  struct i915_gem_ww_ctx *ww,
263 			  struct drm_i915_gem_object *obj,
264 			  u32 value,
265 			  struct i915_request **out)
266 {
267 	return intel_context_migrate_clear(migrate->context, NULL,
268 					   obj->mm.pages->sgl,
269 					   obj->cache_level,
270 					   i915_gem_object_is_lmem(obj),
271 					   value, out);
272 }
273 
274 static int
275 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
276 {
277 	return clear(migrate, __migrate_clear, sz, prng);
278 }
279 
280 static int
281 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
282 {
283 	return clear(migrate, __global_clear, sz, prng);
284 }
285 
286 static int live_migrate_copy(void *arg)
287 {
288 	struct intel_migrate *migrate = arg;
289 	struct drm_i915_private *i915 = migrate->context->engine->i915;
290 	I915_RND_STATE(prng);
291 	int i;
292 
293 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
294 		int err;
295 
296 		err = migrate_copy(migrate, sizes[i], &prng);
297 		if (err == 0)
298 			err = global_copy(migrate, sizes[i], &prng);
299 		i915_gem_drain_freed_objects(i915);
300 		if (err)
301 			return err;
302 	}
303 
304 	return 0;
305 }
306 
307 static int live_migrate_clear(void *arg)
308 {
309 	struct intel_migrate *migrate = arg;
310 	struct drm_i915_private *i915 = migrate->context->engine->i915;
311 	I915_RND_STATE(prng);
312 	int i;
313 
314 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
315 		int err;
316 
317 		err = migrate_clear(migrate, sizes[i], &prng);
318 		if (err == 0)
319 			err = global_clear(migrate, sizes[i], &prng);
320 
321 		i915_gem_drain_freed_objects(i915);
322 		if (err)
323 			return err;
324 	}
325 
326 	return 0;
327 }
328 
329 struct threaded_migrate {
330 	struct intel_migrate *migrate;
331 	struct task_struct *tsk;
332 	struct rnd_state prng;
333 };
334 
335 static int threaded_migrate(struct intel_migrate *migrate,
336 			    int (*fn)(void *arg),
337 			    unsigned int flags)
338 {
339 	const unsigned int n_cpus = num_online_cpus() + 1;
340 	struct threaded_migrate *thread;
341 	I915_RND_STATE(prng);
342 	unsigned int i;
343 	int err = 0;
344 
345 	thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
346 	if (!thread)
347 		return 0;
348 
349 	for (i = 0; i < n_cpus; ++i) {
350 		struct task_struct *tsk;
351 
352 		thread[i].migrate = migrate;
353 		thread[i].prng =
354 			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
355 
356 		tsk = kthread_run(fn, &thread[i], "igt-%d", i);
357 		if (IS_ERR(tsk)) {
358 			err = PTR_ERR(tsk);
359 			break;
360 		}
361 
362 		get_task_struct(tsk);
363 		thread[i].tsk = tsk;
364 	}
365 
366 	msleep(10); /* start all threads before we kthread_stop() */
367 
368 	for (i = 0; i < n_cpus; ++i) {
369 		struct task_struct *tsk = thread[i].tsk;
370 		int status;
371 
372 		if (IS_ERR_OR_NULL(tsk))
373 			continue;
374 
375 		status = kthread_stop(tsk);
376 		if (status && !err)
377 			err = status;
378 
379 		put_task_struct(tsk);
380 	}
381 
382 	kfree(thread);
383 	return err;
384 }
385 
386 static int __thread_migrate_copy(void *arg)
387 {
388 	struct threaded_migrate *tm = arg;
389 
390 	return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
391 }
392 
393 static int thread_migrate_copy(void *arg)
394 {
395 	return threaded_migrate(arg, __thread_migrate_copy, 0);
396 }
397 
398 static int __thread_global_copy(void *arg)
399 {
400 	struct threaded_migrate *tm = arg;
401 
402 	return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
403 }
404 
405 static int thread_global_copy(void *arg)
406 {
407 	return threaded_migrate(arg, __thread_global_copy, 0);
408 }
409 
410 static int __thread_migrate_clear(void *arg)
411 {
412 	struct threaded_migrate *tm = arg;
413 
414 	return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
415 }
416 
417 static int __thread_global_clear(void *arg)
418 {
419 	struct threaded_migrate *tm = arg;
420 
421 	return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
422 }
423 
424 static int thread_migrate_clear(void *arg)
425 {
426 	return threaded_migrate(arg, __thread_migrate_clear, 0);
427 }
428 
429 static int thread_global_clear(void *arg)
430 {
431 	return threaded_migrate(arg, __thread_global_clear, 0);
432 }
433 
434 int intel_migrate_live_selftests(struct drm_i915_private *i915)
435 {
436 	static const struct i915_subtest tests[] = {
437 		SUBTEST(live_migrate_copy),
438 		SUBTEST(live_migrate_clear),
439 		SUBTEST(thread_migrate_copy),
440 		SUBTEST(thread_migrate_clear),
441 		SUBTEST(thread_global_copy),
442 		SUBTEST(thread_global_clear),
443 	};
444 	struct intel_gt *gt = &i915->gt;
445 
446 	if (!gt->migrate.context)
447 		return 0;
448 
449 	return i915_subtests(tests, &gt->migrate);
450 }
451 
452 static struct drm_i915_gem_object *
453 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
454 {
455 	struct drm_i915_gem_object *obj = NULL;
456 	int err;
457 
458 	if (try_lmem)
459 		obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
460 
461 	if (IS_ERR_OR_NULL(obj)) {
462 		obj = i915_gem_object_create_internal(gt->i915, sz);
463 		if (IS_ERR(obj))
464 			return obj;
465 	}
466 
467 	i915_gem_object_trylock(obj);
468 	err = i915_gem_object_pin_pages(obj);
469 	if (err) {
470 		i915_gem_object_unlock(obj);
471 		i915_gem_object_put(obj);
472 		return ERR_PTR(err);
473 	}
474 
475 	return obj;
476 }
477 
478 static int wrap_ktime_compare(const void *A, const void *B)
479 {
480 	const ktime_t *a = A, *b = B;
481 
482 	return ktime_compare(*a, *b);
483 }
484 
485 static int __perf_clear_blt(struct intel_context *ce,
486 			    struct scatterlist *sg,
487 			    enum i915_cache_level cache_level,
488 			    bool is_lmem,
489 			    size_t sz)
490 {
491 	ktime_t t[5];
492 	int pass;
493 	int err = 0;
494 
495 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
496 		struct i915_request *rq;
497 		ktime_t t0, t1;
498 
499 		t0 = ktime_get();
500 
501 		err = intel_context_migrate_clear(ce, NULL, sg, cache_level,
502 						  is_lmem, 0, &rq);
503 		if (rq) {
504 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
505 				err = -EIO;
506 			i915_request_put(rq);
507 		}
508 		if (err)
509 			break;
510 
511 		t1 = ktime_get();
512 		t[pass] = ktime_sub(t1, t0);
513 	}
514 	if (err)
515 		return err;
516 
517 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
518 	pr_info("%s: %zd KiB fill: %lld MiB/s\n",
519 		ce->engine->name, sz >> 10,
520 		div64_u64(mul_u32_u32(4 * sz,
521 				      1000 * 1000 * 1000),
522 			  t[1] + 2 * t[2] + t[3]) >> 20);
523 	return 0;
524 }
525 
526 static int perf_clear_blt(void *arg)
527 {
528 	struct intel_gt *gt = arg;
529 	static const unsigned long sizes[] = {
530 		SZ_4K,
531 		SZ_64K,
532 		SZ_2M,
533 		SZ_64M
534 	};
535 	int i;
536 
537 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
538 		struct drm_i915_gem_object *dst;
539 		int err;
540 
541 		dst = create_init_lmem_internal(gt, sizes[i], true);
542 		if (IS_ERR(dst))
543 			return PTR_ERR(dst);
544 
545 		err = __perf_clear_blt(gt->migrate.context,
546 				       dst->mm.pages->sgl,
547 				       I915_CACHE_NONE,
548 				       i915_gem_object_is_lmem(dst),
549 				       sizes[i]);
550 
551 		i915_gem_object_unlock(dst);
552 		i915_gem_object_put(dst);
553 		if (err)
554 			return err;
555 	}
556 
557 	return 0;
558 }
559 
560 static int __perf_copy_blt(struct intel_context *ce,
561 			   struct scatterlist *src,
562 			   enum i915_cache_level src_cache_level,
563 			   bool src_is_lmem,
564 			   struct scatterlist *dst,
565 			   enum i915_cache_level dst_cache_level,
566 			   bool dst_is_lmem,
567 			   size_t sz)
568 {
569 	ktime_t t[5];
570 	int pass;
571 	int err = 0;
572 
573 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
574 		struct i915_request *rq;
575 		ktime_t t0, t1;
576 
577 		t0 = ktime_get();
578 
579 		err = intel_context_migrate_copy(ce, NULL,
580 						 src, src_cache_level,
581 						 src_is_lmem,
582 						 dst, dst_cache_level,
583 						 dst_is_lmem,
584 						 &rq);
585 		if (rq) {
586 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
587 				err = -EIO;
588 			i915_request_put(rq);
589 		}
590 		if (err)
591 			break;
592 
593 		t1 = ktime_get();
594 		t[pass] = ktime_sub(t1, t0);
595 	}
596 	if (err)
597 		return err;
598 
599 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
600 	pr_info("%s: %zd KiB copy: %lld MiB/s\n",
601 		ce->engine->name, sz >> 10,
602 		div64_u64(mul_u32_u32(4 * sz,
603 				      1000 * 1000 * 1000),
604 			  t[1] + 2 * t[2] + t[3]) >> 20);
605 	return 0;
606 }
607 
608 static int perf_copy_blt(void *arg)
609 {
610 	struct intel_gt *gt = arg;
611 	static const unsigned long sizes[] = {
612 		SZ_4K,
613 		SZ_64K,
614 		SZ_2M,
615 		SZ_64M
616 	};
617 	int i;
618 
619 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
620 		struct drm_i915_gem_object *src, *dst;
621 		int err;
622 
623 		src = create_init_lmem_internal(gt, sizes[i], true);
624 		if (IS_ERR(src))
625 			return PTR_ERR(src);
626 
627 		dst = create_init_lmem_internal(gt, sizes[i], false);
628 		if (IS_ERR(dst)) {
629 			err = PTR_ERR(dst);
630 			goto err_src;
631 		}
632 
633 		err = __perf_copy_blt(gt->migrate.context,
634 				      src->mm.pages->sgl,
635 				      I915_CACHE_NONE,
636 				      i915_gem_object_is_lmem(src),
637 				      dst->mm.pages->sgl,
638 				      I915_CACHE_NONE,
639 				      i915_gem_object_is_lmem(dst),
640 				      sizes[i]);
641 
642 		i915_gem_object_unlock(dst);
643 		i915_gem_object_put(dst);
644 err_src:
645 		i915_gem_object_unlock(src);
646 		i915_gem_object_put(src);
647 		if (err)
648 			return err;
649 	}
650 
651 	return 0;
652 }
653 
654 int intel_migrate_perf_selftests(struct drm_i915_private *i915)
655 {
656 	static const struct i915_subtest tests[] = {
657 		SUBTEST(perf_clear_blt),
658 		SUBTEST(perf_copy_blt),
659 	};
660 	struct intel_gt *gt = &i915->gt;
661 
662 	if (intel_gt_is_wedged(gt))
663 		return 0;
664 
665 	if (!gt->migrate.context)
666 		return 0;
667 
668 	return intel_gt_live_subtests(tests, gt);
669 }
670