xref: /openbmc/linux/drivers/gpu/drm/i915/gt/selftest_migrate.c (revision 32bc7297d855608fcb13af62a95739a079b4f8e2)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2020 Intel Corporation
4  */
5 
6 #include <linux/sort.h>
7 
8 #include "gem/i915_gem_internal.h"
9 #include "gem/i915_gem_lmem.h"
10 
11 #include "selftests/igt_spinner.h"
12 #include "selftests/i915_random.h"
13 
14 static const unsigned int sizes[] = {
15 	SZ_4K,
16 	SZ_64K,
17 	SZ_2M,
18 	CHUNK_SZ - SZ_4K,
19 	CHUNK_SZ,
20 	CHUNK_SZ + SZ_4K,
21 	SZ_64M,
22 };
23 
24 static struct drm_i915_gem_object *
25 create_lmem_or_internal(struct drm_i915_private *i915, size_t size)
26 {
27 	struct drm_i915_gem_object *obj;
28 
29 	obj = i915_gem_object_create_lmem(i915, size, 0);
30 	if (!IS_ERR(obj))
31 		return obj;
32 
33 	return i915_gem_object_create_internal(i915, size);
34 }
35 
36 static int copy(struct intel_migrate *migrate,
37 		int (*fn)(struct intel_migrate *migrate,
38 			  struct i915_gem_ww_ctx *ww,
39 			  struct drm_i915_gem_object *src,
40 			  struct drm_i915_gem_object *dst,
41 			  struct i915_request **out),
42 		u32 sz, struct rnd_state *prng)
43 {
44 	struct drm_i915_private *i915 = migrate->context->engine->i915;
45 	struct drm_i915_gem_object *src, *dst;
46 	struct i915_request *rq;
47 	struct i915_gem_ww_ctx ww;
48 	u32 *vaddr;
49 	int err = 0;
50 	int i;
51 
52 	src = create_lmem_or_internal(i915, sz);
53 	if (IS_ERR(src))
54 		return 0;
55 
56 	sz = src->base.size;
57 	dst = i915_gem_object_create_internal(i915, sz);
58 	if (IS_ERR(dst))
59 		goto err_free_src;
60 
61 	for_i915_gem_ww(&ww, err, true) {
62 		err = i915_gem_object_lock(src, &ww);
63 		if (err)
64 			continue;
65 
66 		err = i915_gem_object_lock(dst, &ww);
67 		if (err)
68 			continue;
69 
70 		vaddr = i915_gem_object_pin_map(src, I915_MAP_WC);
71 		if (IS_ERR(vaddr)) {
72 			err = PTR_ERR(vaddr);
73 			continue;
74 		}
75 
76 		for (i = 0; i < sz / sizeof(u32); i++)
77 			vaddr[i] = i;
78 		i915_gem_object_flush_map(src);
79 
80 		vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC);
81 		if (IS_ERR(vaddr)) {
82 			err = PTR_ERR(vaddr);
83 			goto unpin_src;
84 		}
85 
86 		for (i = 0; i < sz / sizeof(u32); i++)
87 			vaddr[i] = ~i;
88 		i915_gem_object_flush_map(dst);
89 
90 		err = fn(migrate, &ww, src, dst, &rq);
91 		if (!err)
92 			continue;
93 
94 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
95 			pr_err("%ps failed, size: %u\n", fn, sz);
96 		if (rq) {
97 			i915_request_wait(rq, 0, HZ);
98 			i915_request_put(rq);
99 		}
100 		i915_gem_object_unpin_map(dst);
101 unpin_src:
102 		i915_gem_object_unpin_map(src);
103 	}
104 	if (err)
105 		goto err_out;
106 
107 	if (rq) {
108 		if (i915_request_wait(rq, 0, HZ) < 0) {
109 			pr_err("%ps timed out, size: %u\n", fn, sz);
110 			err = -ETIME;
111 		}
112 		i915_request_put(rq);
113 	}
114 
115 	for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
116 		int x = i * 1024 + i915_prandom_u32_max_state(1024, prng);
117 
118 		if (vaddr[x] != x) {
119 			pr_err("%ps failed, size: %u, offset: %zu\n",
120 			       fn, sz, x * sizeof(u32));
121 			igt_hexdump(vaddr + i * 1024, 4096);
122 			err = -EINVAL;
123 		}
124 	}
125 
126 	i915_gem_object_unpin_map(dst);
127 	i915_gem_object_unpin_map(src);
128 
129 err_out:
130 	i915_gem_object_put(dst);
131 err_free_src:
132 	i915_gem_object_put(src);
133 
134 	return err;
135 }
136 
137 static int intel_context_copy_ccs(struct intel_context *ce,
138 				  const struct i915_deps *deps,
139 				  struct scatterlist *sg,
140 				  unsigned int pat_index,
141 				  bool write_to_ccs,
142 				  struct i915_request **out)
143 {
144 	u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS;
145 	u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS;
146 	struct sgt_dma it = sg_sgt(sg);
147 	struct i915_request *rq;
148 	u32 offset;
149 	int err;
150 
151 	GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm);
152 	*out = NULL;
153 
154 	GEM_BUG_ON(ce->ring->size < SZ_64K);
155 
156 	offset = 0;
157 	if (HAS_64K_PAGES(ce->engine->i915))
158 		offset = CHUNK_SZ;
159 
160 	do {
161 		int len;
162 
163 		rq = i915_request_create(ce);
164 		if (IS_ERR(rq)) {
165 			err = PTR_ERR(rq);
166 			goto out_ce;
167 		}
168 
169 		if (deps) {
170 			err = i915_request_await_deps(rq, deps);
171 			if (err)
172 				goto out_rq;
173 
174 			if (rq->engine->emit_init_breadcrumb) {
175 				err = rq->engine->emit_init_breadcrumb(rq);
176 				if (err)
177 					goto out_rq;
178 			}
179 
180 			deps = NULL;
181 		}
182 
183 		/* The PTE updates + clear must not be interrupted. */
184 		err = emit_no_arbitration(rq);
185 		if (err)
186 			goto out_rq;
187 
188 		len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ);
189 		if (len <= 0) {
190 			err = len;
191 			goto out_rq;
192 		}
193 
194 		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
195 		if (err)
196 			goto out_rq;
197 
198 		err = emit_copy_ccs(rq, offset, dst_access,
199 				    offset, src_access, len);
200 		if (err)
201 			goto out_rq;
202 
203 		err = rq->engine->emit_flush(rq, EMIT_INVALIDATE);
204 
205 		/* Arbitration is re-enabled between requests. */
206 out_rq:
207 		if (*out)
208 			i915_request_put(*out);
209 		*out = i915_request_get(rq);
210 		i915_request_add(rq);
211 		if (err || !it.sg || !sg_dma_len(it.sg))
212 			break;
213 
214 		cond_resched();
215 	} while (1);
216 
217 out_ce:
218 	return err;
219 }
220 
221 static int
222 intel_migrate_ccs_copy(struct intel_migrate *m,
223 		       struct i915_gem_ww_ctx *ww,
224 		       const struct i915_deps *deps,
225 		       struct scatterlist *sg,
226 		       unsigned int pat_index,
227 		       bool write_to_ccs,
228 		       struct i915_request **out)
229 {
230 	struct intel_context *ce;
231 	int err;
232 
233 	*out = NULL;
234 	if (!m->context)
235 		return -ENODEV;
236 
237 	ce = intel_migrate_create_context(m);
238 	if (IS_ERR(ce))
239 		ce = intel_context_get(m->context);
240 	GEM_BUG_ON(IS_ERR(ce));
241 
242 	err = intel_context_pin_ww(ce, ww);
243 	if (err)
244 		goto out;
245 
246 	err = intel_context_copy_ccs(ce, deps, sg, pat_index,
247 				     write_to_ccs, out);
248 
249 	intel_context_unpin(ce);
250 out:
251 	intel_context_put(ce);
252 	return err;
253 }
254 
255 static int clear(struct intel_migrate *migrate,
256 		 int (*fn)(struct intel_migrate *migrate,
257 			   struct i915_gem_ww_ctx *ww,
258 			   struct drm_i915_gem_object *obj,
259 			   u32 value,
260 			   struct i915_request **out),
261 		 u32 sz, struct rnd_state *prng)
262 {
263 	struct drm_i915_private *i915 = migrate->context->engine->i915;
264 	struct drm_i915_gem_object *obj;
265 	struct i915_request *rq;
266 	struct i915_gem_ww_ctx ww;
267 	u32 *vaddr, val = 0;
268 	bool ccs_cap = false;
269 	int err = 0;
270 	int i;
271 
272 	obj = create_lmem_or_internal(i915, sz);
273 	if (IS_ERR(obj))
274 		return 0;
275 
276 	/* Consider the rounded up memory too */
277 	sz = obj->base.size;
278 
279 	if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj))
280 		ccs_cap = true;
281 
282 	for_i915_gem_ww(&ww, err, true) {
283 		int ccs_bytes, ccs_bytes_per_chunk;
284 
285 		err = i915_gem_object_lock(obj, &ww);
286 		if (err)
287 			continue;
288 
289 		vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
290 		if (IS_ERR(vaddr)) {
291 			err = PTR_ERR(vaddr);
292 			continue;
293 		}
294 
295 		for (i = 0; i < sz / sizeof(u32); i++)
296 			vaddr[i] = ~i;
297 		i915_gem_object_flush_map(obj);
298 
299 		if (ccs_cap && !val) {
300 			/* Write the obj data into ccs surface */
301 			err = intel_migrate_ccs_copy(migrate, &ww, NULL,
302 						     obj->mm.pages->sgl,
303 						     obj->pat_index,
304 						     true, &rq);
305 			if (rq && !err) {
306 				if (i915_request_wait(rq, 0, HZ) < 0) {
307 					pr_err("%ps timed out, size: %u\n",
308 					       fn, sz);
309 					err = -ETIME;
310 				}
311 				i915_request_put(rq);
312 				rq = NULL;
313 			}
314 			if (err)
315 				continue;
316 		}
317 
318 		err = fn(migrate, &ww, obj, val, &rq);
319 		if (rq && !err) {
320 			if (i915_request_wait(rq, 0, HZ) < 0) {
321 				pr_err("%ps timed out, size: %u\n", fn, sz);
322 				err = -ETIME;
323 			}
324 			i915_request_put(rq);
325 			rq = NULL;
326 		}
327 		if (err)
328 			continue;
329 
330 		i915_gem_object_flush_map(obj);
331 
332 		/* Verify the set/clear of the obj mem */
333 		for (i = 0; !err && i < sz / PAGE_SIZE; i++) {
334 			int x = i * 1024 +
335 				i915_prandom_u32_max_state(1024, prng);
336 
337 			if (vaddr[x] != val) {
338 				pr_err("%ps failed, (%u != %u), offset: %zu\n",
339 				       fn, vaddr[x], val,  x * sizeof(u32));
340 				igt_hexdump(vaddr + i * 1024, 4096);
341 				err = -EINVAL;
342 			}
343 		}
344 		if (err)
345 			continue;
346 
347 		if (ccs_cap && !val) {
348 			for (i = 0; i < sz / sizeof(u32); i++)
349 				vaddr[i] = ~i;
350 			i915_gem_object_flush_map(obj);
351 
352 			err = intel_migrate_ccs_copy(migrate, &ww, NULL,
353 						     obj->mm.pages->sgl,
354 						     obj->pat_index,
355 						     false, &rq);
356 			if (rq && !err) {
357 				if (i915_request_wait(rq, 0, HZ) < 0) {
358 					pr_err("%ps timed out, size: %u\n",
359 					       fn, sz);
360 					err = -ETIME;
361 				}
362 				i915_request_put(rq);
363 				rq = NULL;
364 			}
365 			if (err)
366 				continue;
367 
368 			ccs_bytes = GET_CCS_BYTES(i915, sz);
369 			ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ);
370 			i915_gem_object_flush_map(obj);
371 
372 			for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) {
373 				int offset = ((i * PAGE_SIZE)  /
374 					ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32);
375 				int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32);
376 				int x = i915_prandom_u32_max_state(min_t(int, 1024,
377 									 ccs_bytes_left), prng);
378 
379 				if (vaddr[offset + x]) {
380 					pr_err("%ps ccs clearing failed, offset: %ld/%d\n",
381 					       fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes);
382 					igt_hexdump(vaddr + offset,
383 						    min_t(int, 4096,
384 							  ccs_bytes_left * sizeof(u32)));
385 					err = -EINVAL;
386 				}
387 			}
388 
389 			if (err)
390 				continue;
391 		}
392 		i915_gem_object_unpin_map(obj);
393 	}
394 
395 	if (err) {
396 		if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS)
397 			pr_err("%ps failed, size: %u\n", fn, sz);
398 		if (rq && err != -EINVAL) {
399 			i915_request_wait(rq, 0, HZ);
400 			i915_request_put(rq);
401 		}
402 
403 		i915_gem_object_unpin_map(obj);
404 	}
405 
406 	i915_gem_object_put(obj);
407 	return err;
408 }
409 
410 static int __migrate_copy(struct intel_migrate *migrate,
411 			  struct i915_gem_ww_ctx *ww,
412 			  struct drm_i915_gem_object *src,
413 			  struct drm_i915_gem_object *dst,
414 			  struct i915_request **out)
415 {
416 	return intel_migrate_copy(migrate, ww, NULL,
417 				  src->mm.pages->sgl, src->pat_index,
418 				  i915_gem_object_is_lmem(src),
419 				  dst->mm.pages->sgl, dst->pat_index,
420 				  i915_gem_object_is_lmem(dst),
421 				  out);
422 }
423 
424 static int __global_copy(struct intel_migrate *migrate,
425 			 struct i915_gem_ww_ctx *ww,
426 			 struct drm_i915_gem_object *src,
427 			 struct drm_i915_gem_object *dst,
428 			 struct i915_request **out)
429 {
430 	return intel_context_migrate_copy(migrate->context, NULL,
431 					  src->mm.pages->sgl, src->pat_index,
432 					  i915_gem_object_is_lmem(src),
433 					  dst->mm.pages->sgl, dst->pat_index,
434 					  i915_gem_object_is_lmem(dst),
435 					  out);
436 }
437 
438 static int
439 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
440 {
441 	return copy(migrate, __migrate_copy, sz, prng);
442 }
443 
444 static int
445 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
446 {
447 	return copy(migrate, __global_copy, sz, prng);
448 }
449 
450 static int __migrate_clear(struct intel_migrate *migrate,
451 			   struct i915_gem_ww_ctx *ww,
452 			   struct drm_i915_gem_object *obj,
453 			   u32 value,
454 			   struct i915_request **out)
455 {
456 	return intel_migrate_clear(migrate, ww, NULL,
457 				   obj->mm.pages->sgl,
458 				   obj->pat_index,
459 				   i915_gem_object_is_lmem(obj),
460 				   value, out);
461 }
462 
463 static int __global_clear(struct intel_migrate *migrate,
464 			  struct i915_gem_ww_ctx *ww,
465 			  struct drm_i915_gem_object *obj,
466 			  u32 value,
467 			  struct i915_request **out)
468 {
469 	return intel_context_migrate_clear(migrate->context, NULL,
470 					   obj->mm.pages->sgl,
471 					   obj->pat_index,
472 					   i915_gem_object_is_lmem(obj),
473 					   value, out);
474 }
475 
476 static int
477 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
478 {
479 	return clear(migrate, __migrate_clear, sz, prng);
480 }
481 
482 static int
483 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng)
484 {
485 	return clear(migrate, __global_clear, sz, prng);
486 }
487 
488 static int live_migrate_copy(void *arg)
489 {
490 	struct intel_gt *gt = arg;
491 	struct intel_migrate *migrate = &gt->migrate;
492 	struct drm_i915_private *i915 = migrate->context->engine->i915;
493 	I915_RND_STATE(prng);
494 	int i;
495 
496 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
497 		int err;
498 
499 		err = migrate_copy(migrate, sizes[i], &prng);
500 		if (err == 0)
501 			err = global_copy(migrate, sizes[i], &prng);
502 		i915_gem_drain_freed_objects(i915);
503 		if (err)
504 			return err;
505 	}
506 
507 	return 0;
508 }
509 
510 static int live_migrate_clear(void *arg)
511 {
512 	struct intel_gt *gt = arg;
513 	struct intel_migrate *migrate = &gt->migrate;
514 	struct drm_i915_private *i915 = migrate->context->engine->i915;
515 	I915_RND_STATE(prng);
516 	int i;
517 
518 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
519 		int err;
520 
521 		err = migrate_clear(migrate, sizes[i], &prng);
522 		if (err == 0)
523 			err = global_clear(migrate, sizes[i], &prng);
524 
525 		i915_gem_drain_freed_objects(i915);
526 		if (err)
527 			return err;
528 	}
529 
530 	return 0;
531 }
532 
533 struct spinner_timer {
534 	struct timer_list timer;
535 	struct igt_spinner spin;
536 };
537 
538 static void spinner_kill(struct timer_list *timer)
539 {
540 	struct spinner_timer *st = from_timer(st, timer, timer);
541 
542 	igt_spinner_end(&st->spin);
543 	pr_info("%s\n", __func__);
544 }
545 
546 static int live_emit_pte_full_ring(void *arg)
547 {
548 	struct intel_gt *gt = arg;
549 	struct intel_migrate *migrate = &gt->migrate;
550 	struct drm_i915_private *i915 = migrate->context->engine->i915;
551 	struct drm_i915_gem_object *obj;
552 	struct intel_context *ce;
553 	struct i915_request *rq, *prev;
554 	struct spinner_timer st;
555 	struct sgt_dma it;
556 	int len, sz, err;
557 	u32 *cs;
558 
559 	/*
560 	 * Simple regression test to check that we don't trample the
561 	 * rq->reserved_space when returning from emit_pte(), if the ring is
562 	 * nearly full.
563 	 */
564 
565 	if (igt_spinner_init(&st.spin, to_gt(i915)))
566 		return -ENOMEM;
567 
568 	obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE);
569 	if (IS_ERR(obj)) {
570 		err = PTR_ERR(obj);
571 		goto out_spinner;
572 	}
573 
574 	err = i915_gem_object_pin_pages_unlocked(obj);
575 	if (err)
576 		goto out_obj;
577 
578 	ce = intel_migrate_create_context(migrate);
579 	if (IS_ERR(ce)) {
580 		err = PTR_ERR(ce);
581 		goto out_obj;
582 	}
583 
584 	ce->ring_size = SZ_4K; /* Not too big */
585 
586 	err = intel_context_pin(ce);
587 	if (err)
588 		goto out_put;
589 
590 	rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK);
591 	if (IS_ERR(rq)) {
592 		err = PTR_ERR(rq);
593 		goto out_unpin;
594 	}
595 
596 	i915_request_add(rq);
597 	if (!igt_wait_for_spinner(&st.spin, rq)) {
598 		err = -EIO;
599 		goto out_unpin;
600 	}
601 
602 	/*
603 	 * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS +
604 	 * ring->reserved_space at the end. To actually emit the PTEs we require
605 	 * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is
606 	 * greater than PAGE_SIZE. The correct behaviour is to wait for more
607 	 * ring space in emit_pte(), otherwise we trample on the reserved_space
608 	 * resulting in crashes when later submitting the rq.
609 	 */
610 
611 	prev = NULL;
612 	do {
613 		if (prev)
614 			i915_request_add(rq);
615 
616 		rq = i915_request_create(ce);
617 		if (IS_ERR(rq)) {
618 			err = PTR_ERR(rq);
619 			goto out_unpin;
620 		}
621 
622 		sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) -
623 			I915_EMIT_PTE_NUM_DWORDS;
624 		sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) -
625 			   I915_EMIT_PTE_NUM_DWORDS);
626 		cs = intel_ring_begin(rq, sz);
627 		if (IS_ERR(cs)) {
628 			err = PTR_ERR(cs);
629 			goto out_rq;
630 		}
631 
632 		memset32(cs, MI_NOOP, sz);
633 		cs += sz;
634 		intel_ring_advance(rq, cs);
635 
636 		pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz);
637 
638 		prev = rq;
639 	} while (rq->ring->space > (rq->reserved_space +
640 				    I915_EMIT_PTE_NUM_DWORDS * sizeof(u32)));
641 
642 	timer_setup_on_stack(&st.timer, spinner_kill, 0);
643 	mod_timer(&st.timer, jiffies + 2 * HZ);
644 
645 	/*
646 	 * This should wait for the spinner to be killed, otherwise we should go
647 	 * down in flames when doing i915_request_add().
648 	 */
649 	pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space);
650 	it = sg_sgt(obj->mm.pages->sgl);
651 	len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ);
652 	if (!len) {
653 		err = -EINVAL;
654 		goto out_rq;
655 	}
656 	if (len < 0) {
657 		err = len;
658 		goto out_rq;
659 	}
660 
661 out_rq:
662 	i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */
663 	del_timer_sync(&st.timer);
664 	destroy_timer_on_stack(&st.timer);
665 out_unpin:
666 	intel_context_unpin(ce);
667 out_put:
668 	intel_context_put(ce);
669 out_obj:
670 	i915_gem_object_put(obj);
671 out_spinner:
672 	igt_spinner_fini(&st.spin);
673 	return err;
674 }
675 
676 struct threaded_migrate {
677 	struct intel_migrate *migrate;
678 	struct task_struct *tsk;
679 	struct rnd_state prng;
680 };
681 
682 static int threaded_migrate(struct intel_migrate *migrate,
683 			    int (*fn)(void *arg),
684 			    unsigned int flags)
685 {
686 	const unsigned int n_cpus = num_online_cpus() + 1;
687 	struct threaded_migrate *thread;
688 	I915_RND_STATE(prng);
689 	unsigned int i;
690 	int err = 0;
691 
692 	thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL);
693 	if (!thread)
694 		return 0;
695 
696 	for (i = 0; i < n_cpus; ++i) {
697 		struct task_struct *tsk;
698 
699 		thread[i].migrate = migrate;
700 		thread[i].prng =
701 			I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng));
702 
703 		tsk = kthread_run(fn, &thread[i], "igt-%d", i);
704 		if (IS_ERR(tsk)) {
705 			err = PTR_ERR(tsk);
706 			break;
707 		}
708 
709 		get_task_struct(tsk);
710 		thread[i].tsk = tsk;
711 	}
712 
713 	msleep(10); /* start all threads before we kthread_stop() */
714 
715 	for (i = 0; i < n_cpus; ++i) {
716 		struct task_struct *tsk = thread[i].tsk;
717 		int status;
718 
719 		if (IS_ERR_OR_NULL(tsk))
720 			continue;
721 
722 		status = kthread_stop(tsk);
723 		if (status && !err)
724 			err = status;
725 
726 		put_task_struct(tsk);
727 	}
728 
729 	kfree(thread);
730 	return err;
731 }
732 
733 static int __thread_migrate_copy(void *arg)
734 {
735 	struct threaded_migrate *tm = arg;
736 
737 	return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
738 }
739 
740 static int thread_migrate_copy(void *arg)
741 {
742 	struct intel_gt *gt = arg;
743 	struct intel_migrate *migrate = &gt->migrate;
744 
745 	return threaded_migrate(migrate, __thread_migrate_copy, 0);
746 }
747 
748 static int __thread_global_copy(void *arg)
749 {
750 	struct threaded_migrate *tm = arg;
751 
752 	return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
753 }
754 
755 static int thread_global_copy(void *arg)
756 {
757 	struct intel_gt *gt = arg;
758 	struct intel_migrate *migrate = &gt->migrate;
759 
760 	return threaded_migrate(migrate, __thread_global_copy, 0);
761 }
762 
763 static int __thread_migrate_clear(void *arg)
764 {
765 	struct threaded_migrate *tm = arg;
766 
767 	return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
768 }
769 
770 static int __thread_global_clear(void *arg)
771 {
772 	struct threaded_migrate *tm = arg;
773 
774 	return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng);
775 }
776 
777 static int thread_migrate_clear(void *arg)
778 {
779 	struct intel_gt *gt = arg;
780 	struct intel_migrate *migrate = &gt->migrate;
781 
782 	return threaded_migrate(migrate, __thread_migrate_clear, 0);
783 }
784 
785 static int thread_global_clear(void *arg)
786 {
787 	struct intel_gt *gt = arg;
788 	struct intel_migrate *migrate = &gt->migrate;
789 
790 	return threaded_migrate(migrate, __thread_global_clear, 0);
791 }
792 
793 int intel_migrate_live_selftests(struct drm_i915_private *i915)
794 {
795 	static const struct i915_subtest tests[] = {
796 		SUBTEST(live_migrate_copy),
797 		SUBTEST(live_migrate_clear),
798 		SUBTEST(live_emit_pte_full_ring),
799 		SUBTEST(thread_migrate_copy),
800 		SUBTEST(thread_migrate_clear),
801 		SUBTEST(thread_global_copy),
802 		SUBTEST(thread_global_clear),
803 	};
804 	struct intel_gt *gt = to_gt(i915);
805 
806 	if (!gt->migrate.context)
807 		return 0;
808 
809 	return intel_gt_live_subtests(tests, gt);
810 }
811 
812 static struct drm_i915_gem_object *
813 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem)
814 {
815 	struct drm_i915_gem_object *obj = NULL;
816 	int err;
817 
818 	if (try_lmem)
819 		obj = i915_gem_object_create_lmem(gt->i915, sz, 0);
820 
821 	if (IS_ERR_OR_NULL(obj)) {
822 		obj = i915_gem_object_create_internal(gt->i915, sz);
823 		if (IS_ERR(obj))
824 			return obj;
825 	}
826 
827 	i915_gem_object_trylock(obj, NULL);
828 	err = i915_gem_object_pin_pages(obj);
829 	if (err) {
830 		i915_gem_object_unlock(obj);
831 		i915_gem_object_put(obj);
832 		return ERR_PTR(err);
833 	}
834 
835 	return obj;
836 }
837 
838 static int wrap_ktime_compare(const void *A, const void *B)
839 {
840 	const ktime_t *a = A, *b = B;
841 
842 	return ktime_compare(*a, *b);
843 }
844 
845 static int __perf_clear_blt(struct intel_context *ce,
846 			    struct scatterlist *sg,
847 			    unsigned int pat_index,
848 			    bool is_lmem,
849 			    size_t sz)
850 {
851 	ktime_t t[5];
852 	int pass;
853 	int err = 0;
854 
855 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
856 		struct i915_request *rq;
857 		ktime_t t0, t1;
858 
859 		t0 = ktime_get();
860 
861 		err = intel_context_migrate_clear(ce, NULL, sg, pat_index,
862 						  is_lmem, 0, &rq);
863 		if (rq) {
864 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
865 				err = -EIO;
866 			i915_request_put(rq);
867 		}
868 		if (err)
869 			break;
870 
871 		t1 = ktime_get();
872 		t[pass] = ktime_sub(t1, t0);
873 	}
874 	if (err)
875 		return err;
876 
877 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
878 	pr_info("%s: %zd KiB fill: %lld MiB/s\n",
879 		ce->engine->name, sz >> 10,
880 		div64_u64(mul_u32_u32(4 * sz,
881 				      1000 * 1000 * 1000),
882 			  t[1] + 2 * t[2] + t[3]) >> 20);
883 	return 0;
884 }
885 
886 static int perf_clear_blt(void *arg)
887 {
888 	struct intel_gt *gt = arg;
889 	static const unsigned long sizes[] = {
890 		SZ_4K,
891 		SZ_64K,
892 		SZ_2M,
893 		SZ_64M
894 	};
895 	int i;
896 
897 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
898 		struct drm_i915_gem_object *dst;
899 		int err;
900 
901 		dst = create_init_lmem_internal(gt, sizes[i], true);
902 		if (IS_ERR(dst))
903 			return PTR_ERR(dst);
904 
905 		err = __perf_clear_blt(gt->migrate.context,
906 				       dst->mm.pages->sgl,
907 				       i915_gem_get_pat_index(gt->i915,
908 							      I915_CACHE_NONE),
909 				       i915_gem_object_is_lmem(dst),
910 				       sizes[i]);
911 
912 		i915_gem_object_unlock(dst);
913 		i915_gem_object_put(dst);
914 		if (err)
915 			return err;
916 	}
917 
918 	return 0;
919 }
920 
921 static int __perf_copy_blt(struct intel_context *ce,
922 			   struct scatterlist *src,
923 			   unsigned int src_pat_index,
924 			   bool src_is_lmem,
925 			   struct scatterlist *dst,
926 			   unsigned int dst_pat_index,
927 			   bool dst_is_lmem,
928 			   size_t sz)
929 {
930 	ktime_t t[5];
931 	int pass;
932 	int err = 0;
933 
934 	for (pass = 0; pass < ARRAY_SIZE(t); pass++) {
935 		struct i915_request *rq;
936 		ktime_t t0, t1;
937 
938 		t0 = ktime_get();
939 
940 		err = intel_context_migrate_copy(ce, NULL,
941 						 src, src_pat_index,
942 						 src_is_lmem,
943 						 dst, dst_pat_index,
944 						 dst_is_lmem,
945 						 &rq);
946 		if (rq) {
947 			if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0)
948 				err = -EIO;
949 			i915_request_put(rq);
950 		}
951 		if (err)
952 			break;
953 
954 		t1 = ktime_get();
955 		t[pass] = ktime_sub(t1, t0);
956 	}
957 	if (err)
958 		return err;
959 
960 	sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL);
961 	pr_info("%s: %zd KiB copy: %lld MiB/s\n",
962 		ce->engine->name, sz >> 10,
963 		div64_u64(mul_u32_u32(4 * sz,
964 				      1000 * 1000 * 1000),
965 			  t[1] + 2 * t[2] + t[3]) >> 20);
966 	return 0;
967 }
968 
969 static int perf_copy_blt(void *arg)
970 {
971 	struct intel_gt *gt = arg;
972 	static const unsigned long sizes[] = {
973 		SZ_4K,
974 		SZ_64K,
975 		SZ_2M,
976 		SZ_64M
977 	};
978 	int i;
979 
980 	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
981 		struct drm_i915_gem_object *src, *dst;
982 		size_t sz;
983 		int err;
984 
985 		src = create_init_lmem_internal(gt, sizes[i], true);
986 		if (IS_ERR(src))
987 			return PTR_ERR(src);
988 
989 		sz = src->base.size;
990 		dst = create_init_lmem_internal(gt, sz, false);
991 		if (IS_ERR(dst)) {
992 			err = PTR_ERR(dst);
993 			goto err_src;
994 		}
995 
996 		err = __perf_copy_blt(gt->migrate.context,
997 				      src->mm.pages->sgl,
998 				      i915_gem_get_pat_index(gt->i915,
999 							     I915_CACHE_NONE),
1000 				      i915_gem_object_is_lmem(src),
1001 				      dst->mm.pages->sgl,
1002 				      i915_gem_get_pat_index(gt->i915,
1003 							     I915_CACHE_NONE),
1004 				      i915_gem_object_is_lmem(dst),
1005 				      sz);
1006 
1007 		i915_gem_object_unlock(dst);
1008 		i915_gem_object_put(dst);
1009 err_src:
1010 		i915_gem_object_unlock(src);
1011 		i915_gem_object_put(src);
1012 		if (err)
1013 			return err;
1014 	}
1015 
1016 	return 0;
1017 }
1018 
1019 int intel_migrate_perf_selftests(struct drm_i915_private *i915)
1020 {
1021 	static const struct i915_subtest tests[] = {
1022 		SUBTEST(perf_clear_blt),
1023 		SUBTEST(perf_copy_blt),
1024 	};
1025 	struct intel_gt *gt = to_gt(i915);
1026 
1027 	if (intel_gt_is_wedged(gt))
1028 		return 0;
1029 
1030 	if (!gt->migrate.context)
1031 		return 0;
1032 
1033 	return intel_gt_live_subtests(tests, gt);
1034 }
1035