1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/sort.h>
7 
8 #include "gem/i915_gem_internal.h"
9 
10 #include "selftests/i915_random.h"
11 
12 static const unsigned int sizes[] = {
13 	SZ_4K,
14 	SZ_64K,
15 	SZ_2M,
16 	CHUNK_SZ - SZ_4K,
17 	CHUNK_SZ,
18 	CHUNK_SZ + SZ_4K,
19 	SZ_64M,
20 };
21 
22 static struct drm_i915_gem_object *
23 create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
24 {
25 	struct drm_i915_gem_object *obj;
26 
27 	obj = i915_gem_object_create_lmem(i915, size, 0);
28 	if (!IS_ERR(obj))
29 		return obj;
30 
31 	return i915_gem_object_create_internal(i915, size);
32 }
33 
34 static int copy(struct intel_migrate *migrate,
35 		int (*fn)(struct intel_migrate *migrate,
36 			  struct i915_gem_ww_ctx *ww,
37 			  struct drm_i915_gem_object *src,
38 			  struct drm_i915_gem_object *dst,
39 			  struct i915_request **out),
40 		u32 sz, struct rnd_state *prng)
41 {
42 	struct drm_i915_private *i915 = migrate->context->engine->i915;
43 	struct drm_i915_gem_object *src, *dst;
44 	struct i915_request *rq;
45 	struct i915_gem_ww_ctx ww;
46 	u32 *vaddr;
47 	int err = 0;
48 	int i;
49 
50 	src = create_lmem_or_internal(i915, sz);
51 	if (IS_ERR(src))
52 		return 0;
53 
54 	sz = src->base.size;
55 	dst = i915_gem_object_create_internal(i915, sz);
56 	if (IS_ERR(dst))
57 		goto err_free_src;
58 
59 	for_i915_gem_ww(&ww, err, true) {
60 		err = i915_gem_object_lock(src, &ww);
61 		if (err)
62 			continue;
63 
64 		err = i915_gem_object_lock(dst, &ww);
65 		if (err)
66 			continue;
67 
68 		vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
69 		if (IS_ERR(vaddr)) {
70 			err = PTR_ERR(vaddr);
71 			continue;
72 		}
73 
74 		for (i = 0; i < sz / sizeof(u32); i++)
75 			vaddr[i] = i;
76 		i915_gem_object_flush_map(src);
77 
78 		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
79 		if (IS_ERR(vaddr)) {
80 			err = PTR_ERR(vaddr);
81 			goto unpin_src;
82 		}
83 
84 		for (i = 0; i < sz / sizeof(u32); i++)
85 			vaddr[i] = ~i;
86 		i915_gem_object_flush_map(dst);
87 
88 		err = fn(migrate, &ww, src, dst, &rq);
89 		if (!err)
90 			continue;
91 
92 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
93 			pr_err("%ps failed, size: %u\n", fn, sz);
94 		if (rq) {
95 			i915_request_wait(rq, 0, HZ);
96 			i915_request_put(rq);
97 		}
98 		i915_gem_object_unpin_map(dst);
99 unpin_src:
100 		i915_gem_object_unpin_map(src);
101 	}
102 	if (err)
103 		goto err_out;
104 
105 	if (rq) {
106 		if (i915_request_wait(rq, 0, HZ) < 0) {
107 			pr_err("%ps timed out, size: %u\n", fn, sz);
108 			err = -ETIME;
109 		}
110 		i915_request_put(rq);
111 	}
112 
113 	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
114 		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
115 
116 		if (vaddr[x] != x) {
117 			pr_err("%ps failed, size: %u, offset: %zu\n",
118 			       fn, sz, x * sizeof(u32));
119 			igt_hexdump(vaddr + i * 1024, 4096);
120 			err = -EINVAL;
121 		}
122 	}
123 
124 	i915_gem_object_unpin_map(dst);
125 	i915_gem_object_unpin_map(src);
126 
127 err_out:
128 	i915_gem_object_put(dst);
129 err_free_src:
130 	i915_gem_object_put(src);
131 
132 	return err;
133 }
134 
135 static int intel_context_copy_ccs(struct intel_context *ce,
136 				  const struct i915_deps *deps,
137 				  struct scatterlist *sg,
138 				  enum i915_cache_level cache_level,
139 				  bool write_to_ccs,
140 				  struct i915_request **out)
141 {
142 	u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
143 	u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
144 	struct sgt_dma it = sg_sgt(sg);
145 	struct i915_request *rq;
146 	u32 offset;
147 	int err;
148 
149 	GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
150 	*out = NULL;
151 
152 	GEM_BUG_ON(ce->ring->size < SZ_64K);
153 
154 	offset = 0;
155 	if (HAS_64K_PAGES(ce->engine->i915))
156 		offset = CHUNK_SZ;
157 
158 	do {
159 		int len;
160 
161 		rq = i915_request_create(ce);
162 		if (IS_ERR(rq)) {
163 			err = PTR_ERR(rq);
164 			goto out_ce;
165 		}
166 
167 		if (deps) {
168 			err = i915_request_await_deps(rq, deps);
169 			if (err)
170 				goto out_rq;
171 
172 			if (rq->engine->emit_init_breadcrumb) {
173 				err = rq->engine->emit_init_breadcrumb(rq);
174 				if (err)
175 					goto out_rq;
176 			}
177 
178 			deps = NULL;
179 		}
180 
181 		/* The PTE updates + clear must not be interrupted. */
182 		err = emit_no_arbitration(rq);
183 		if (err)
184 			goto out_rq;
185 
186 		len = emit_pte(rq, &it, cache_level, true, offset, CHUNK_SZ);
187 		if (len <= 0) {
188 			err = len;
189 			goto out_rq;
190 		}
191 
192 		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
193 		if (err)
194 			goto out_rq;
195 
196 		err = emit_copy_ccs(rq, offset, dst_access,
197 				    offset, src_access, len);
198 		if (err)
199 			goto out_rq;
200 
201 		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
202 
203 		/* Arbitration is re-enabled between requests. */
204 out_rq:
205 		if (*out)
206 			i915_request_put(*out);
207 		*out = i915_request_get(rq);
208 		i915_request_add(rq);
209 		if (err || !it.sg || !sg_dma_len(it.sg))
210 			break;
211 
212 		cond_resched();
213 	} while (1);
214 
215 out_ce:
216 	return err;
217 }
218 
219 static int
220 intel_migrate_ccs_copy(struct intel_migrate *m,
221 		       struct i915_gem_ww_ctx *ww,
222 		       const struct i915_deps *deps,
223 		       struct scatterlist *sg,
224 		       enum i915_cache_level cache_level,
225 		       bool write_to_ccs,
226 		       struct i915_request **out)
227 {
228 	struct intel_context *ce;
229 	int err;
230 
231 	*out = NULL;
232 	if (!m->context)
233 		return -ENODEV;
234 
235 	ce = intel_migrate_create_context(m);
236 	if (IS_ERR(ce))
237 		ce = intel_context_get(m->context);
238 	GEM_BUG_ON(IS_ERR(ce));
239 
240 	err = intel_context_pin_ww(ce, ww);
241 	if (err)
242 		goto out;
243 
244 	err = intel_context_copy_ccs(ce, deps, sg, cache_level,
245 				     write_to_ccs, out);
246 
247 	intel_context_unpin(ce);
248 out:
249 	intel_context_put(ce);
250 	return err;
251 }
252 
253 static int clear(struct intel_migrate *migrate,
254 		 int (*fn)(struct intel_migrate *migrate,
255 			   struct i915_gem_ww_ctx *ww,
256 			   struct drm_i915_gem_object *obj,
257 			   u32 value,
258 			   struct i915_request **out),
259 		 u32 sz, struct rnd_state *prng)
260 {
261 	struct drm_i915_private *i915 = migrate->context->engine->i915;
262 	struct drm_i915_gem_object *obj;
263 	struct i915_request *rq;
264 	struct i915_gem_ww_ctx ww;
265 	u32 *vaddr, val = 0;
266 	bool ccs_cap = false;
267 	int err = 0;
268 	int i;
269 
270 	obj = create_lmem_or_internal(i915, sz);
271 	if (IS_ERR(obj))
272 		return 0;
273 
274 	/* Consider the rounded up memory too */
275 	sz = obj->base.size;
276 
277 	if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
278 		ccs_cap = true;
279 
280 	for_i915_gem_ww(&ww, err, true) {
281 		int ccs_bytes, ccs_bytes_per_chunk;
282 
283 		err = i915_gem_object_lock(obj, &ww);
284 		if (err)
285 			continue;
286 
287 		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
288 		if (IS_ERR(vaddr)) {
289 			err = PTR_ERR(vaddr);
290 			continue;
291 		}
292 
293 		for (i = 0; i < sz / sizeof(u32); i++)
294 			vaddr[i] = ~i;
295 		i915_gem_object_flush_map(obj);
296 
297 		if (ccs_cap && !val) {
298 			/* Write the obj data into ccs surface */
299 			err = intel_migrate_ccs_copy(migrate, &ww, NULL,
300 						     obj->mm.pages->sgl,
301 						     obj->cache_level,
302 						     true, &rq);
303 			if (rq && !err) {
304 				if (i915_request_wait(rq, 0, HZ) < 0) {
305 					pr_err("%ps timed out, size: %u\n",
306 					       fn, sz);
307 					err = -ETIME;
308 				}
309 				i915_request_put(rq);
310 				rq = NULL;
311 			}
312 			if (err)
313 				continue;
314 		}
315 
316 		err = fn(migrate, &ww, obj, val, &rq);
317 		if (rq && !err) {
318 			if (i915_request_wait(rq, 0, HZ) < 0) {
319 				pr_err("%ps timed out, size: %u\n", fn, sz);
320 				err = -ETIME;
321 			}
322 			i915_request_put(rq);
323 			rq = NULL;
324 		}
325 		if (err)
326 			continue;
327 
328 		i915_gem_object_flush_map(obj);
329 
330 		/* Verify the set/clear of the obj mem */
331 		for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
332 			int x = i * 1024 +
333 				i915_prandom_u32_max_state(1024, prng);
334 
335 			if (vaddr[x] != val) {
336 				pr_err("%ps failed, (%u != %u), offset: %zu\n",
337 				       fn, vaddr[x], val,  x * sizeof(u32));
338 				igt_hexdump(vaddr + i * 1024, 4096);
339 				err = -EINVAL;
340 			}
341 		}
342 		if (err)
343 			continue;
344 
345 		if (ccs_cap && !val) {
346 			for (i = 0; i < sz / sizeof(u32); i++)
347 				vaddr[i] = ~i;
348 			i915_gem_object_flush_map(obj);
349 
350 			err = intel_migrate_ccs_copy(migrate, &ww, NULL,
351 						     obj->mm.pages->sgl,
352 						     obj->cache_level,
353 						     false, &rq);
354 			if (rq && !err) {
355 				if (i915_request_wait(rq, 0, HZ) < 0) {
356 					pr_err("%ps timed out, size: %u\n",
357 					       fn, sz);
358 					err = -ETIME;
359 				}
360 				i915_request_put(rq);
361 				rq = NULL;
362 			}
363 			if (err)
364 				continue;
365 
366 			ccs_bytes = GET_CCS_BYTES(i915, sz);
367 			ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ);
368 			i915_gem_object_flush_map(obj);
369 
370 			for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) {
371 				int offset = ((i * PAGE_SIZE)  /
372 					ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32);
373 				int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32);
374 				int x = i915_prandom_u32_max_state(min_t(int, 1024,
375 									 ccs_bytes_left), prng);
376 
377 				if (vaddr[offset + x]) {
378 					pr_err("%ps ccs clearing failed, offset: %ld/%d\n",
379 					       fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes);
380 					igt_hexdump(vaddr + offset,
381 						    min_t(int, 4096,
382 							  ccs_bytes_left * sizeof(u32)));
383 					err = -EINVAL;
384 				}
385 			}
386 
387 			if (err)
388 				continue;
389 		}
390 		i915_gem_object_unpin_map(obj);
391 	}
392 
393 	if (err) {
394 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
395 			pr_err("%ps failed, size: %u\n", fn, sz);
396 		if (rq && err != -EINVAL) {
397 			i915_request_wait(rq, 0, HZ);
398 			i915_request_put(rq);
399 		}
400 
401 		i915_gem_object_unpin_map(obj);
402 	}
403 
404 	i915_gem_object_put(obj);
405 	return err;
406 }
407 
408 static int __migrate_copy(struct intel_migrate *migrate,
409 			  struct i915_gem_ww_ctx *ww,
410 			  struct drm_i915_gem_object *src,
411 			  struct drm_i915_gem_object *dst,
412 			  struct i915_request **out)
413 {
414 	return intel_migrate_copy(migrate, ww, NULL,
415 				  src->mm.pages->sgl, src->cache_level,
416 				  i915_gem_object_is_lmem(src),
417 				  dst->mm.pages->sgl, dst->cache_level,
418 				  i915_gem_object_is_lmem(dst),
419 				  out);
420 }
421 
422 static int __global_copy(struct intel_migrate *migrate,
423 			 struct i915_gem_ww_ctx *ww,
424 			 struct drm_i915_gem_object *src,
425 			 struct drm_i915_gem_object *dst,
426 			 struct i915_request **out)
427 {
428 	return intel_context_migrate_copy(migrate->context, NULL,
429 					  src->mm.pages->sgl, src->cache_level,
430 					  i915_gem_object_is_lmem(src),
431 					  dst->mm.pages->sgl, dst->cache_level,
432 					  i915_gem_object_is_lmem(dst),
433 					  out);
434 }
435 
436 static int
437 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
438 {
439 	return copy(migrate, __migrate_copy, sz, prng);
440 }
441 
442 static int
443 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
444 {
445 	return copy(migrate, __global_copy, sz, prng);
446 }
447 
448 static int __migrate_clear(struct intel_migrate *migrate,
449 			   struct i915_gem_ww_ctx *ww,
450 			   struct drm_i915_gem_object *obj,
451 			   u32 value,
452 			   struct i915_request **out)
453 {
454 	return intel_migrate_clear(migrate, ww, NULL,
455 				   obj->mm.pages->sgl,
456 				   obj->cache_level,
457 				   i915_gem_object_is_lmem(obj),
458 				   value, out);
459 }
460 
461 static int __global_clear(struct intel_migrate *migrate,
462 			  struct i915_gem_ww_ctx *ww,
463 			  struct drm_i915_gem_object *obj,
464 			  u32 value,
465 			  struct i915_request **out)
466 {
467 	return intel_context_migrate_clear(migrate->context, NULL,
468 					   obj->mm.pages->sgl,
469 					   obj->cache_level,
470 					   i915_gem_object_is_lmem(obj),
471 					   value, out);
472 }
473 
474 static int
475 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
476 {
477 	return clear(migrate, __migrate_clear, sz, prng);
478 }
479 
480 static int
481 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
482 {
483 	return clear(migrate, __global_clear, sz, prng);
484 }
485 
486 static int live_migrate_copy(void *arg)
487 {
488 	struct intel_migrate *migrate = arg;
489 	struct drm_i915_private *i915 = migrate->context->engine->i915;
490 	I915_RND_STATE(prng);
491 	int i;
492 
493 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
494 		int err;
495 
496 		err = migrate_copy(migrate, sizes[i], &prng);
497 		if (err == 0)
498 			err = global_copy(migrate, sizes[i], &prng);
499 		i915_gem_drain_freed_objects(i915);
500 		if (err)
501 			return err;
502 	}
503 
504 	return 0;
505 }
506 
507 static int live_migrate_clear(void *arg)
508 {
509 	struct intel_migrate *migrate = arg;
510 	struct drm_i915_private *i915 = migrate->context->engine->i915;
511 	I915_RND_STATE(prng);
512 	int i;
513 
514 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
515 		int err;
516 
517 		err = migrate_clear(migrate, sizes[i], &prng);
518 		if (err == 0)
519 			err = global_clear(migrate, sizes[i], &prng);
520 
521 		i915_gem_drain_freed_objects(i915);
522 		if (err)
523 			return err;
524 	}
525 
526 	return 0;
527 }
528 
529 struct threaded_migrate {
530 	struct intel_migrate *migrate;
531 	struct task_struct *tsk;
532 	struct rnd_state prng;
533 };
534 
535 static int threaded_migrate(struct intel_migrate *migrate,
536 			    int (*fn)(void *arg),
537 			    unsigned int flags)
538 {
539 	const unsigned int n_cpus = num_online_cpus() + 1;
540 	struct threaded_migrate *thread;
541 	I915_RND_STATE(prng);
542 	unsigned int i;
543 	int err = 0;
544 
545 	thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
546 	if (!thread)
547 		return 0;
548 
549 	for (i = 0; i < n_cpus; ++i) {
550 		struct task_struct *tsk;
551 
552 		thread[i].migrate = migrate;
553 		thread[i].prng =
554 			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
555 
556 		tsk = kthread_run(fn, &thread[i], "igt-%d", i);
557 		if (IS_ERR(tsk)) {
558 			err = PTR_ERR(tsk);
559 			break;
560 		}
561 
562 		get_task_struct(tsk);
563 		thread[i].tsk = tsk;
564 	}
565 
566 	msleep(10); /* start all threads before we kthread_stop() */
567 
568 	for (i = 0; i < n_cpus; ++i) {
569 		struct task_struct *tsk = thread[i].tsk;
570 		int status;
571 
572 		if (IS_ERR_OR_NULL(tsk))
573 			continue;
574 
575 		status = kthread_stop(tsk);
576 		if (status && !err)
577 			err = status;
578 
579 		put_task_struct(tsk);
580 	}
581 
582 	kfree(thread);
583 	return err;
584 }
585 
586 static int __thread_migrate_copy(void *arg)
587 {
588 	struct threaded_migrate *tm = arg;
589 
590 	return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
591 }
592 
593 static int thread_migrate_copy(void *arg)
594 {
595 	return threaded_migrate(arg, __thread_migrate_copy, 0);
596 }
597 
598 static int __thread_global_copy(void *arg)
599 {
600 	struct threaded_migrate *tm = arg;
601 
602 	return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
603 }
604 
605 static int thread_global_copy(void *arg)
606 {
607 	return threaded_migrate(arg, __thread_global_copy, 0);
608 }
609 
610 static int __thread_migrate_clear(void *arg)
611 {
612 	struct threaded_migrate *tm = arg;
613 
614 	return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
615 }
616 
617 static int __thread_global_clear(void *arg)
618 {
619 	struct threaded_migrate *tm = arg;
620 
621 	return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
622 }
623 
624 static int thread_migrate_clear(void *arg)
625 {
626 	return threaded_migrate(arg, __thread_migrate_clear, 0);
627 }
628 
629 static int thread_global_clear(void *arg)
630 {
631 	return threaded_migrate(arg, __thread_global_clear, 0);
632 }
633 
634 int intel_migrate_live_selftests(struct drm_i915_private *i915)
635 {
636 	static const struct i915_subtest tests[] = {
637 		SUBTEST(live_migrate_copy),
638 		SUBTEST(live_migrate_clear),
639 		SUBTEST(thread_migrate_copy),
640 		SUBTEST(thread_migrate_clear),
641 		SUBTEST(thread_global_copy),
642 		SUBTEST(thread_global_clear),
643 	};
644 	struct intel_gt *gt = to_gt(i915);
645 
646 	if (!gt->migrate.context)
647 		return 0;
648 
649 	return i915_subtests(tests, &gt->migrate);
650 }
651 
652 static struct drm_i915_gem_object *
653 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
654 {
655 	struct drm_i915_gem_object *obj = NULL;
656 	int err;
657 
658 	if (try_lmem)
659 		obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
660 
661 	if (IS_ERR_OR_NULL(obj)) {
662 		obj = i915_gem_object_create_internal(gt->i915, sz);
663 		if (IS_ERR(obj))
664 			return obj;
665 	}
666 
667 	i915_gem_object_trylock(obj, NULL);
668 	err = i915_gem_object_pin_pages(obj);
669 	if (err) {
670 		i915_gem_object_unlock(obj);
671 		i915_gem_object_put(obj);
672 		return ERR_PTR(err);
673 	}
674 
675 	return obj;
676 }
677 
678 static int wrap_ktime_compare(const void *A, const void *B)
679 {
680 	const ktime_t *a = A, *b = B;
681 
682 	return ktime_compare(*a, *b);
683 }
684 
685 static int __perf_clear_blt(struct intel_context *ce,
686 			    struct scatterlist *sg,
687 			    enum i915_cache_level cache_level,
688 			    bool is_lmem,
689 			    size_t sz)
690 {
691 	ktime_t t[5];
692 	int pass;
693 	int err = 0;
694 
695 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
696 		struct i915_request *rq;
697 		ktime_t t0, t1;
698 
699 		t0 = ktime_get();
700 
701 		err = intel_context_migrate_clear(ce, NULL, sg, cache_level,
702 						  is_lmem, 0, &rq);
703 		if (rq) {
704 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
705 				err = -EIO;
706 			i915_request_put(rq);
707 		}
708 		if (err)
709 			break;
710 
711 		t1 = ktime_get();
712 		t[pass] = ktime_sub(t1, t0);
713 	}
714 	if (err)
715 		return err;
716 
717 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
718 	pr_info("%s: %zd KiB fill: %lld MiB/s\n",
719 		ce->engine->name, sz >> 10,
720 		div64_u64(mul_u32_u32(4 * sz,
721 				      1000 * 1000 * 1000),
722 			  t[1] + 2 * t[2] + t[3]) >> 20);
723 	return 0;
724 }
725 
726 static int perf_clear_blt(void *arg)
727 {
728 	struct intel_gt *gt = arg;
729 	static const unsigned long sizes[] = {
730 		SZ_4K,
731 		SZ_64K,
732 		SZ_2M,
733 		SZ_64M
734 	};
735 	int i;
736 
737 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
738 		struct drm_i915_gem_object *dst;
739 		int err;
740 
741 		dst = create_init_lmem_internal(gt, sizes[i], true);
742 		if (IS_ERR(dst))
743 			return PTR_ERR(dst);
744 
745 		err = __perf_clear_blt(gt->migrate.context,
746 				       dst->mm.pages->sgl,
747 				       I915_CACHE_NONE,
748 				       i915_gem_object_is_lmem(dst),
749 				       sizes[i]);
750 
751 		i915_gem_object_unlock(dst);
752 		i915_gem_object_put(dst);
753 		if (err)
754 			return err;
755 	}
756 
757 	return 0;
758 }
759 
760 static int __perf_copy_blt(struct intel_context *ce,
761 			   struct scatterlist *src,
762 			   enum i915_cache_level src_cache_level,
763 			   bool src_is_lmem,
764 			   struct scatterlist *dst,
765 			   enum i915_cache_level dst_cache_level,
766 			   bool dst_is_lmem,
767 			   size_t sz)
768 {
769 	ktime_t t[5];
770 	int pass;
771 	int err = 0;
772 
773 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
774 		struct i915_request *rq;
775 		ktime_t t0, t1;
776 
777 		t0 = ktime_get();
778 
779 		err = intel_context_migrate_copy(ce, NULL,
780 						 src, src_cache_level,
781 						 src_is_lmem,
782 						 dst, dst_cache_level,
783 						 dst_is_lmem,
784 						 &rq);
785 		if (rq) {
786 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
787 				err = -EIO;
788 			i915_request_put(rq);
789 		}
790 		if (err)
791 			break;
792 
793 		t1 = ktime_get();
794 		t[pass] = ktime_sub(t1, t0);
795 	}
796 	if (err)
797 		return err;
798 
799 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
800 	pr_info("%s: %zd KiB copy: %lld MiB/s\n",
801 		ce->engine->name, sz >> 10,
802 		div64_u64(mul_u32_u32(4 * sz,
803 				      1000 * 1000 * 1000),
804 			  t[1] + 2 * t[2] + t[3]) >> 20);
805 	return 0;
806 }
807 
808 static int perf_copy_blt(void *arg)
809 {
810 	struct intel_gt *gt = arg;
811 	static const unsigned long sizes[] = {
812 		SZ_4K,
813 		SZ_64K,
814 		SZ_2M,
815 		SZ_64M
816 	};
817 	int i;
818 
819 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
820 		struct drm_i915_gem_object *src, *dst;
821 		size_t sz;
822 		int err;
823 
824 		src = create_init_lmem_internal(gt, sizes[i], true);
825 		if (IS_ERR(src))
826 			return PTR_ERR(src);
827 
828 		sz = src->base.size;
829 		dst = create_init_lmem_internal(gt, sz, false);
830 		if (IS_ERR(dst)) {
831 			err = PTR_ERR(dst);
832 			goto err_src;
833 		}
834 
835 		err = __perf_copy_blt(gt->migrate.context,
836 				      src->mm.pages->sgl,
837 				      I915_CACHE_NONE,
838 				      i915_gem_object_is_lmem(src),
839 				      dst->mm.pages->sgl,
840 				      I915_CACHE_NONE,
841 				      i915_gem_object_is_lmem(dst),
842 				      sz);
843 
844 		i915_gem_object_unlock(dst);
845 		i915_gem_object_put(dst);
846 err_src:
847 		i915_gem_object_unlock(src);
848 		i915_gem_object_put(src);
849 		if (err)
850 			return err;
851 	}
852 
853 	return 0;
854 }
855 
856 int intel_migrate_perf_selftests(struct drm_i915_private *i915)
857 {
858 	static const struct i915_subtest tests[] = {
859 		SUBTEST(perf_clear_blt),
860 		SUBTEST(perf_copy_blt),
861 	};
862 	struct intel_gt *gt = to_gt(i915);
863 
864 	if (intel_gt_is_wedged(gt))
865 		return 0;
866 
867 	if (!gt->migrate.context)
868 		return 0;
869 
870 	return intel_gt_live_subtests(tests, gt);
871 }
872