1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/sort.h>
7 
8 #include "gem/i915_gem_internal.h"
9 
10 #include "selftests/i915_random.h"
11 
12 static const unsigned int sizes[] = {
13 	SZ_4K,
14 	SZ_64K,
15 	SZ_2M,
16 	CHUNK_SZ - SZ_4K,
17 	CHUNK_SZ,
18 	CHUNK_SZ + SZ_4K,
19 	SZ_64M,
20 };
21 
22 static struct drm_i915_gem_object *
23 create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
24 {
25 	struct drm_i915_gem_object *obj;
26 
27 	obj = i915_gem_object_create_lmem(i915, size, 0);
28 	if (!IS_ERR(obj))
29 		return obj;
30 
31 	return i915_gem_object_create_internal(i915, size);
32 }
33 
34 static int copy(struct intel_migrate *migrate,
35 		int (*fn)(struct intel_migrate *migrate,
36 			  struct i915_gem_ww_ctx *ww,
37 			  struct drm_i915_gem_object *src,
38 			  struct drm_i915_gem_object *dst,
39 			  struct i915_request **out),
40 		u32 sz, struct rnd_state *prng)
41 {
42 	struct drm_i915_private *i915 = migrate->context->engine->i915;
43 	struct drm_i915_gem_object *src, *dst;
44 	struct i915_request *rq;
45 	struct i915_gem_ww_ctx ww;
46 	u32 *vaddr;
47 	int err = 0;
48 	int i;
49 
50 	src = create_lmem_or_internal(i915, sz);
51 	if (IS_ERR(src))
52 		return 0;
53 
54 	sz = src->base.size;
55 	dst = i915_gem_object_create_internal(i915, sz);
56 	if (IS_ERR(dst))
57 		goto err_free_src;
58 
59 	for_i915_gem_ww(&ww, err, true) {
60 		err = i915_gem_object_lock(src, &ww);
61 		if (err)
62 			continue;
63 
64 		err = i915_gem_object_lock(dst, &ww);
65 		if (err)
66 			continue;
67 
68 		vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
69 		if (IS_ERR(vaddr)) {
70 			err = PTR_ERR(vaddr);
71 			continue;
72 		}
73 
74 		for (i = 0; i < sz / sizeof(u32); i++)
75 			vaddr[i] = i;
76 		i915_gem_object_flush_map(src);
77 
78 		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
79 		if (IS_ERR(vaddr)) {
80 			err = PTR_ERR(vaddr);
81 			goto unpin_src;
82 		}
83 
84 		for (i = 0; i < sz / sizeof(u32); i++)
85 			vaddr[i] = ~i;
86 		i915_gem_object_flush_map(dst);
87 
88 		err = fn(migrate, &ww, src, dst, &rq);
89 		if (!err)
90 			continue;
91 
92 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
93 			pr_err("%ps failed, size: %u\n", fn, sz);
94 		if (rq) {
95 			i915_request_wait(rq, 0, HZ);
96 			i915_request_put(rq);
97 		}
98 		i915_gem_object_unpin_map(dst);
99 unpin_src:
100 		i915_gem_object_unpin_map(src);
101 	}
102 	if (err)
103 		goto err_out;
104 
105 	if (rq) {
106 		if (i915_request_wait(rq, 0, HZ) < 0) {
107 			pr_err("%ps timed out, size: %u\n", fn, sz);
108 			err = -ETIME;
109 		}
110 		i915_request_put(rq);
111 	}
112 
113 	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
114 		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
115 
116 		if (vaddr[x] != x) {
117 			pr_err("%ps failed, size: %u, offset: %zu\n",
118 			       fn, sz, x * sizeof(u32));
119 			igt_hexdump(vaddr + i * 1024, 4096);
120 			err = -EINVAL;
121 		}
122 	}
123 
124 	i915_gem_object_unpin_map(dst);
125 	i915_gem_object_unpin_map(src);
126 
127 err_out:
128 	i915_gem_object_put(dst);
129 err_free_src:
130 	i915_gem_object_put(src);
131 
132 	return err;
133 }
134 
135 static int clear(struct intel_migrate *migrate,
136 		 int (*fn)(struct intel_migrate *migrate,
137 			   struct i915_gem_ww_ctx *ww,
138 			   struct drm_i915_gem_object *obj,
139 			   u32 value,
140 			   struct i915_request **out),
141 		 u32 sz, struct rnd_state *prng)
142 {
143 	struct drm_i915_private *i915 = migrate->context->engine->i915;
144 	struct drm_i915_gem_object *obj;
145 	struct i915_request *rq;
146 	struct i915_gem_ww_ctx ww;
147 	u32 *vaddr;
148 	int err = 0;
149 	int i;
150 
151 	obj = create_lmem_or_internal(i915, sz);
152 	if (IS_ERR(obj))
153 		return 0;
154 
155 	for_i915_gem_ww(&ww, err, true) {
156 		err = i915_gem_object_lock(obj, &ww);
157 		if (err)
158 			continue;
159 
160 		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
161 		if (IS_ERR(vaddr)) {
162 			err = PTR_ERR(vaddr);
163 			continue;
164 		}
165 
166 		for (i = 0; i < sz / sizeof(u32); i++)
167 			vaddr[i] = ~i;
168 		i915_gem_object_flush_map(obj);
169 
170 		err = fn(migrate, &ww, obj, sz, &rq);
171 		if (!err)
172 			continue;
173 
174 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
175 			pr_err("%ps failed, size: %u\n", fn, sz);
176 		if (rq) {
177 			i915_request_wait(rq, 0, HZ);
178 			i915_request_put(rq);
179 		}
180 		i915_gem_object_unpin_map(obj);
181 	}
182 	if (err)
183 		goto err_out;
184 
185 	if (rq) {
186 		if (i915_request_wait(rq, 0, HZ) < 0) {
187 			pr_err("%ps timed out, size: %u\n", fn, sz);
188 			err = -ETIME;
189 		}
190 		i915_request_put(rq);
191 	}
192 
193 	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
194 		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
195 
196 		if (vaddr[x] != sz) {
197 			pr_err("%ps failed, size: %u, offset: %zu\n",
198 			       fn, sz, x * sizeof(u32));
199 			igt_hexdump(vaddr + i * 1024, 4096);
200 			err = -EINVAL;
201 		}
202 	}
203 
204 	i915_gem_object_unpin_map(obj);
205 err_out:
206 	i915_gem_object_put(obj);
207 
208 	return err;
209 }
210 
211 static int __migrate_copy(struct intel_migrate *migrate,
212 			  struct i915_gem_ww_ctx *ww,
213 			  struct drm_i915_gem_object *src,
214 			  struct drm_i915_gem_object *dst,
215 			  struct i915_request **out)
216 {
217 	return intel_migrate_copy(migrate, ww, NULL,
218 				  src->mm.pages->sgl, src->cache_level,
219 				  i915_gem_object_is_lmem(src),
220 				  dst->mm.pages->sgl, dst->cache_level,
221 				  i915_gem_object_is_lmem(dst),
222 				  out);
223 }
224 
225 static int __global_copy(struct intel_migrate *migrate,
226 			 struct i915_gem_ww_ctx *ww,
227 			 struct drm_i915_gem_object *src,
228 			 struct drm_i915_gem_object *dst,
229 			 struct i915_request **out)
230 {
231 	return intel_context_migrate_copy(migrate->context, NULL,
232 					  src->mm.pages->sgl, src->cache_level,
233 					  i915_gem_object_is_lmem(src),
234 					  dst->mm.pages->sgl, dst->cache_level,
235 					  i915_gem_object_is_lmem(dst),
236 					  out);
237 }
238 
239 static int
240 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
241 {
242 	return copy(migrate, __migrate_copy, sz, prng);
243 }
244 
245 static int
246 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
247 {
248 	return copy(migrate, __global_copy, sz, prng);
249 }
250 
251 static int __migrate_clear(struct intel_migrate *migrate,
252 			   struct i915_gem_ww_ctx *ww,
253 			   struct drm_i915_gem_object *obj,
254 			   u32 value,
255 			   struct i915_request **out)
256 {
257 	return intel_migrate_clear(migrate, ww, NULL,
258 				   obj->mm.pages->sgl,
259 				   obj->cache_level,
260 				   i915_gem_object_is_lmem(obj),
261 				   value, out);
262 }
263 
264 static int __global_clear(struct intel_migrate *migrate,
265 			  struct i915_gem_ww_ctx *ww,
266 			  struct drm_i915_gem_object *obj,
267 			  u32 value,
268 			  struct i915_request **out)
269 {
270 	return intel_context_migrate_clear(migrate->context, NULL,
271 					   obj->mm.pages->sgl,
272 					   obj->cache_level,
273 					   i915_gem_object_is_lmem(obj),
274 					   value, out);
275 }
276 
277 static int
278 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
279 {
280 	return clear(migrate, __migrate_clear, sz, prng);
281 }
282 
283 static int
284 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
285 {
286 	return clear(migrate, __global_clear, sz, prng);
287 }
288 
289 static int live_migrate_copy(void *arg)
290 {
291 	struct intel_migrate *migrate = arg;
292 	struct drm_i915_private *i915 = migrate->context->engine->i915;
293 	I915_RND_STATE(prng);
294 	int i;
295 
296 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
297 		int err;
298 
299 		err = migrate_copy(migrate, sizes[i], &prng);
300 		if (err == 0)
301 			err = global_copy(migrate, sizes[i], &prng);
302 		i915_gem_drain_freed_objects(i915);
303 		if (err)
304 			return err;
305 	}
306 
307 	return 0;
308 }
309 
310 static int live_migrate_clear(void *arg)
311 {
312 	struct intel_migrate *migrate = arg;
313 	struct drm_i915_private *i915 = migrate->context->engine->i915;
314 	I915_RND_STATE(prng);
315 	int i;
316 
317 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
318 		int err;
319 
320 		err = migrate_clear(migrate, sizes[i], &prng);
321 		if (err == 0)
322 			err = global_clear(migrate, sizes[i], &prng);
323 
324 		i915_gem_drain_freed_objects(i915);
325 		if (err)
326 			return err;
327 	}
328 
329 	return 0;
330 }
331 
332 struct threaded_migrate {
333 	struct intel_migrate *migrate;
334 	struct task_struct *tsk;
335 	struct rnd_state prng;
336 };
337 
338 static int threaded_migrate(struct intel_migrate *migrate,
339 			    int (*fn)(void *arg),
340 			    unsigned int flags)
341 {
342 	const unsigned int n_cpus = num_online_cpus() + 1;
343 	struct threaded_migrate *thread;
344 	I915_RND_STATE(prng);
345 	unsigned int i;
346 	int err = 0;
347 
348 	thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
349 	if (!thread)
350 		return 0;
351 
352 	for (i = 0; i < n_cpus; ++i) {
353 		struct task_struct *tsk;
354 
355 		thread[i].migrate = migrate;
356 		thread[i].prng =
357 			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
358 
359 		tsk = kthread_run(fn, &thread[i], "igt-%d", i);
360 		if (IS_ERR(tsk)) {
361 			err = PTR_ERR(tsk);
362 			break;
363 		}
364 
365 		get_task_struct(tsk);
366 		thread[i].tsk = tsk;
367 	}
368 
369 	msleep(10); /* start all threads before we kthread_stop() */
370 
371 	for (i = 0; i < n_cpus; ++i) {
372 		struct task_struct *tsk = thread[i].tsk;
373 		int status;
374 
375 		if (IS_ERR_OR_NULL(tsk))
376 			continue;
377 
378 		status = kthread_stop(tsk);
379 		if (status && !err)
380 			err = status;
381 
382 		put_task_struct(tsk);
383 	}
384 
385 	kfree(thread);
386 	return err;
387 }
388 
389 static int __thread_migrate_copy(void *arg)
390 {
391 	struct threaded_migrate *tm = arg;
392 
393 	return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
394 }
395 
396 static int thread_migrate_copy(void *arg)
397 {
398 	return threaded_migrate(arg, __thread_migrate_copy, 0);
399 }
400 
401 static int __thread_global_copy(void *arg)
402 {
403 	struct threaded_migrate *tm = arg;
404 
405 	return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
406 }
407 
408 static int thread_global_copy(void *arg)
409 {
410 	return threaded_migrate(arg, __thread_global_copy, 0);
411 }
412 
413 static int __thread_migrate_clear(void *arg)
414 {
415 	struct threaded_migrate *tm = arg;
416 
417 	return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
418 }
419 
420 static int __thread_global_clear(void *arg)
421 {
422 	struct threaded_migrate *tm = arg;
423 
424 	return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
425 }
426 
427 static int thread_migrate_clear(void *arg)
428 {
429 	return threaded_migrate(arg, __thread_migrate_clear, 0);
430 }
431 
432 static int thread_global_clear(void *arg)
433 {
434 	return threaded_migrate(arg, __thread_global_clear, 0);
435 }
436 
437 int intel_migrate_live_selftests(struct drm_i915_private *i915)
438 {
439 	static const struct i915_subtest tests[] = {
440 		SUBTEST(live_migrate_copy),
441 		SUBTEST(live_migrate_clear),
442 		SUBTEST(thread_migrate_copy),
443 		SUBTEST(thread_migrate_clear),
444 		SUBTEST(thread_global_copy),
445 		SUBTEST(thread_global_clear),
446 	};
447 	struct intel_gt *gt = to_gt(i915);
448 
449 	if (!gt->migrate.context)
450 		return 0;
451 
452 	return i915_subtests(tests, &gt->migrate);
453 }
454 
455 static struct drm_i915_gem_object *
456 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
457 {
458 	struct drm_i915_gem_object *obj = NULL;
459 	int err;
460 
461 	if (try_lmem)
462 		obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
463 
464 	if (IS_ERR_OR_NULL(obj)) {
465 		obj = i915_gem_object_create_internal(gt->i915, sz);
466 		if (IS_ERR(obj))
467 			return obj;
468 	}
469 
470 	i915_gem_object_trylock(obj, NULL);
471 	err = i915_gem_object_pin_pages(obj);
472 	if (err) {
473 		i915_gem_object_unlock(obj);
474 		i915_gem_object_put(obj);
475 		return ERR_PTR(err);
476 	}
477 
478 	return obj;
479 }
480 
481 static int wrap_ktime_compare(const void *A, const void *B)
482 {
483 	const ktime_t *a = A, *b = B;
484 
485 	return ktime_compare(*a, *b);
486 }
487 
488 static int __perf_clear_blt(struct intel_context *ce,
489 			    struct scatterlist *sg,
490 			    enum i915_cache_level cache_level,
491 			    bool is_lmem,
492 			    size_t sz)
493 {
494 	ktime_t t[5];
495 	int pass;
496 	int err = 0;
497 
498 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
499 		struct i915_request *rq;
500 		ktime_t t0, t1;
501 
502 		t0 = ktime_get();
503 
504 		err = intel_context_migrate_clear(ce, NULL, sg, cache_level,
505 						  is_lmem, 0, &rq);
506 		if (rq) {
507 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
508 				err = -EIO;
509 			i915_request_put(rq);
510 		}
511 		if (err)
512 			break;
513 
514 		t1 = ktime_get();
515 		t[pass] = ktime_sub(t1, t0);
516 	}
517 	if (err)
518 		return err;
519 
520 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
521 	pr_info("%s: %zd KiB fill: %lld MiB/s\n",
522 		ce->engine->name, sz >> 10,
523 		div64_u64(mul_u32_u32(4 * sz,
524 				      1000 * 1000 * 1000),
525 			  t[1] + 2 * t[2] + t[3]) >> 20);
526 	return 0;
527 }
528 
529 static int perf_clear_blt(void *arg)
530 {
531 	struct intel_gt *gt = arg;
532 	static const unsigned long sizes[] = {
533 		SZ_4K,
534 		SZ_64K,
535 		SZ_2M,
536 		SZ_64M
537 	};
538 	int i;
539 
540 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
541 		struct drm_i915_gem_object *dst;
542 		int err;
543 
544 		dst = create_init_lmem_internal(gt, sizes[i], true);
545 		if (IS_ERR(dst))
546 			return PTR_ERR(dst);
547 
548 		err = __perf_clear_blt(gt->migrate.context,
549 				       dst->mm.pages->sgl,
550 				       I915_CACHE_NONE,
551 				       i915_gem_object_is_lmem(dst),
552 				       sizes[i]);
553 
554 		i915_gem_object_unlock(dst);
555 		i915_gem_object_put(dst);
556 		if (err)
557 			return err;
558 	}
559 
560 	return 0;
561 }
562 
563 static int __perf_copy_blt(struct intel_context *ce,
564 			   struct scatterlist *src,
565 			   enum i915_cache_level src_cache_level,
566 			   bool src_is_lmem,
567 			   struct scatterlist *dst,
568 			   enum i915_cache_level dst_cache_level,
569 			   bool dst_is_lmem,
570 			   size_t sz)
571 {
572 	ktime_t t[5];
573 	int pass;
574 	int err = 0;
575 
576 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
577 		struct i915_request *rq;
578 		ktime_t t0, t1;
579 
580 		t0 = ktime_get();
581 
582 		err = intel_context_migrate_copy(ce, NULL,
583 						 src, src_cache_level,
584 						 src_is_lmem,
585 						 dst, dst_cache_level,
586 						 dst_is_lmem,
587 						 &rq);
588 		if (rq) {
589 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
590 				err = -EIO;
591 			i915_request_put(rq);
592 		}
593 		if (err)
594 			break;
595 
596 		t1 = ktime_get();
597 		t[pass] = ktime_sub(t1, t0);
598 	}
599 	if (err)
600 		return err;
601 
602 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
603 	pr_info("%s: %zd KiB copy: %lld MiB/s\n",
604 		ce->engine->name, sz >> 10,
605 		div64_u64(mul_u32_u32(4 * sz,
606 				      1000 * 1000 * 1000),
607 			  t[1] + 2 * t[2] + t[3]) >> 20);
608 	return 0;
609 }
610 
611 static int perf_copy_blt(void *arg)
612 {
613 	struct intel_gt *gt = arg;
614 	static const unsigned long sizes[] = {
615 		SZ_4K,
616 		SZ_64K,
617 		SZ_2M,
618 		SZ_64M
619 	};
620 	int i;
621 
622 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
623 		struct drm_i915_gem_object *src, *dst;
624 		int err;
625 
626 		src = create_init_lmem_internal(gt, sizes[i], true);
627 		if (IS_ERR(src))
628 			return PTR_ERR(src);
629 
630 		dst = create_init_lmem_internal(gt, sizes[i], false);
631 		if (IS_ERR(dst)) {
632 			err = PTR_ERR(dst);
633 			goto err_src;
634 		}
635 
636 		err = __perf_copy_blt(gt->migrate.context,
637 				      src->mm.pages->sgl,
638 				      I915_CACHE_NONE,
639 				      i915_gem_object_is_lmem(src),
640 				      dst->mm.pages->sgl,
641 				      I915_CACHE_NONE,
642 				      i915_gem_object_is_lmem(dst),
643 				      sizes[i]);
644 
645 		i915_gem_object_unlock(dst);
646 		i915_gem_object_put(dst);
647 err_src:
648 		i915_gem_object_unlock(src);
649 		i915_gem_object_put(src);
650 		if (err)
651 			return err;
652 	}
653 
654 	return 0;
655 }
656 
657 int intel_migrate_perf_selftests(struct drm_i915_private *i915)
658 {
659 	static const struct i915_subtest tests[] = {
660 		SUBTEST(perf_clear_blt),
661 		SUBTEST(perf_copy_blt),
662 	};
663 	struct intel_gt *gt = to_gt(i915);
664 
665 	if (intel_gt_is_wedged(gt))
666 		return 0;
667 
668 	if (!gt->migrate.context)
669 		return 0;
670 
671 	return intel_gt_live_subtests(tests, gt);
672 }
673