1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 #include <linux/pm_qos.h>
27 
28 #include "gem/i915_gem_pm.h"
29 #include "gem/selftests/mock_context.h"
30 
31 #include "gt/intel_engine_pm.h"
32 #include "gt/intel_engine_user.h"
33 #include "gt/intel_gt.h"
34 
35 #include "i915_random.h"
36 #include "i915_selftest.h"
37 #include "igt_live_test.h"
38 #include "igt_spinner.h"
39 #include "lib_sw_fence.h"
40 
41 #include "mock_drm.h"
42 #include "mock_gem_device.h"
43 
44 static unsigned int num_uabi_engines(struct drm_i915_private *i915)
45 {
46 	struct intel_engine_cs *engine;
47 	unsigned int count;
48 
49 	count = 0;
50 	for_each_uabi_engine(engine, i915)
51 		count++;
52 
53 	return count;
54 }
55 
56 static struct intel_engine_cs *rcs0(struct drm_i915_private *i915)
57 {
58 	return intel_engine_lookup_user(i915, I915_ENGINE_CLASS_RENDER, 0);
59 }
60 
61 static int igt_add_request(void *arg)
62 {
63 	struct drm_i915_private *i915 = arg;
64 	struct i915_request *request;
65 
66 	/* Basic preliminary test to create a request and let it loose! */
67 
68 	request = mock_request(rcs0(i915)->kernel_context, HZ / 10);
69 	if (!request)
70 		return -ENOMEM;
71 
72 	i915_request_add(request);
73 
74 	return 0;
75 }
76 
77 static int igt_wait_request(void *arg)
78 {
79 	const long T = HZ / 4;
80 	struct drm_i915_private *i915 = arg;
81 	struct i915_request *request;
82 	int err = -EINVAL;
83 
84 	/* Submit a request, then wait upon it */
85 
86 	request = mock_request(rcs0(i915)->kernel_context, T);
87 	if (!request)
88 		return -ENOMEM;
89 
90 	i915_request_get(request);
91 
92 	if (i915_request_wait(request, 0, 0) != -ETIME) {
93 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
94 		goto out_request;
95 	}
96 
97 	if (i915_request_wait(request, 0, T) != -ETIME) {
98 		pr_err("request wait succeeded (expected timeout before submit!)\n");
99 		goto out_request;
100 	}
101 
102 	if (i915_request_completed(request)) {
103 		pr_err("request completed before submit!!\n");
104 		goto out_request;
105 	}
106 
107 	i915_request_add(request);
108 
109 	if (i915_request_wait(request, 0, 0) != -ETIME) {
110 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
111 		goto out_request;
112 	}
113 
114 	if (i915_request_completed(request)) {
115 		pr_err("request completed immediately!\n");
116 		goto out_request;
117 	}
118 
119 	if (i915_request_wait(request, 0, T / 2) != -ETIME) {
120 		pr_err("request wait succeeded (expected timeout!)\n");
121 		goto out_request;
122 	}
123 
124 	if (i915_request_wait(request, 0, T) == -ETIME) {
125 		pr_err("request wait timed out!\n");
126 		goto out_request;
127 	}
128 
129 	if (!i915_request_completed(request)) {
130 		pr_err("request not complete after waiting!\n");
131 		goto out_request;
132 	}
133 
134 	if (i915_request_wait(request, 0, T) == -ETIME) {
135 		pr_err("request wait timed out when already complete!\n");
136 		goto out_request;
137 	}
138 
139 	err = 0;
140 out_request:
141 	i915_request_put(request);
142 	mock_device_flush(i915);
143 	return err;
144 }
145 
146 static int igt_fence_wait(void *arg)
147 {
148 	const long T = HZ / 4;
149 	struct drm_i915_private *i915 = arg;
150 	struct i915_request *request;
151 	int err = -EINVAL;
152 
153 	/* Submit a request, treat it as a fence and wait upon it */
154 
155 	request = mock_request(rcs0(i915)->kernel_context, T);
156 	if (!request)
157 		return -ENOMEM;
158 
159 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
160 		pr_err("fence wait success before submit (expected timeout)!\n");
161 		goto out;
162 	}
163 
164 	i915_request_add(request);
165 
166 	if (dma_fence_is_signaled(&request->fence)) {
167 		pr_err("fence signaled immediately!\n");
168 		goto out;
169 	}
170 
171 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
172 		pr_err("fence wait success after submit (expected timeout)!\n");
173 		goto out;
174 	}
175 
176 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
177 		pr_err("fence wait timed out (expected success)!\n");
178 		goto out;
179 	}
180 
181 	if (!dma_fence_is_signaled(&request->fence)) {
182 		pr_err("fence unsignaled after waiting!\n");
183 		goto out;
184 	}
185 
186 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
187 		pr_err("fence wait timed out when complete (expected success)!\n");
188 		goto out;
189 	}
190 
191 	err = 0;
192 out:
193 	mock_device_flush(i915);
194 	return err;
195 }
196 
197 static int igt_request_rewind(void *arg)
198 {
199 	struct drm_i915_private *i915 = arg;
200 	struct i915_request *request, *vip;
201 	struct i915_gem_context *ctx[2];
202 	struct intel_context *ce;
203 	int err = -EINVAL;
204 
205 	ctx[0] = mock_context(i915, "A");
206 
207 	ce = i915_gem_context_get_engine(ctx[0], RCS0);
208 	GEM_BUG_ON(IS_ERR(ce));
209 	request = mock_request(ce, 2 * HZ);
210 	intel_context_put(ce);
211 	if (!request) {
212 		err = -ENOMEM;
213 		goto err_context_0;
214 	}
215 
216 	i915_request_get(request);
217 	i915_request_add(request);
218 
219 	ctx[1] = mock_context(i915, "B");
220 
221 	ce = i915_gem_context_get_engine(ctx[1], RCS0);
222 	GEM_BUG_ON(IS_ERR(ce));
223 	vip = mock_request(ce, 0);
224 	intel_context_put(ce);
225 	if (!vip) {
226 		err = -ENOMEM;
227 		goto err_context_1;
228 	}
229 
230 	/* Simulate preemption by manual reordering */
231 	if (!mock_cancel_request(request)) {
232 		pr_err("failed to cancel request (already executed)!\n");
233 		i915_request_add(vip);
234 		goto err_context_1;
235 	}
236 	i915_request_get(vip);
237 	i915_request_add(vip);
238 	rcu_read_lock();
239 	request->engine->submit_request(request);
240 	rcu_read_unlock();
241 
242 
243 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
244 		pr_err("timed out waiting for high priority request\n");
245 		goto err;
246 	}
247 
248 	if (i915_request_completed(request)) {
249 		pr_err("low priority request already completed\n");
250 		goto err;
251 	}
252 
253 	err = 0;
254 err:
255 	i915_request_put(vip);
256 err_context_1:
257 	mock_context_close(ctx[1]);
258 	i915_request_put(request);
259 err_context_0:
260 	mock_context_close(ctx[0]);
261 	mock_device_flush(i915);
262 	return err;
263 }
264 
265 struct smoketest {
266 	struct intel_engine_cs *engine;
267 	struct i915_gem_context **contexts;
268 	atomic_long_t num_waits, num_fences;
269 	int ncontexts, max_batch;
270 	struct i915_request *(*request_alloc)(struct intel_context *ce);
271 };
272 
273 static struct i915_request *
274 __mock_request_alloc(struct intel_context *ce)
275 {
276 	return mock_request(ce, 0);
277 }
278 
279 static struct i915_request *
280 __live_request_alloc(struct intel_context *ce)
281 {
282 	return intel_context_create_request(ce);
283 }
284 
285 static int __igt_breadcrumbs_smoketest(void *arg)
286 {
287 	struct smoketest *t = arg;
288 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
289 	const unsigned int total = 4 * t->ncontexts + 1;
290 	unsigned int num_waits = 0, num_fences = 0;
291 	struct i915_request **requests;
292 	I915_RND_STATE(prng);
293 	unsigned int *order;
294 	int err = 0;
295 
296 	/*
297 	 * A very simple test to catch the most egregious of list handling bugs.
298 	 *
299 	 * At its heart, we simply create oodles of requests running across
300 	 * multiple kthreads and enable signaling on them, for the sole purpose
301 	 * of stressing our breadcrumb handling. The only inspection we do is
302 	 * that the fences were marked as signaled.
303 	 */
304 
305 	requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
306 	if (!requests)
307 		return -ENOMEM;
308 
309 	order = i915_random_order(total, &prng);
310 	if (!order) {
311 		err = -ENOMEM;
312 		goto out_requests;
313 	}
314 
315 	while (!kthread_should_stop()) {
316 		struct i915_sw_fence *submit, *wait;
317 		unsigned int n, count;
318 
319 		submit = heap_fence_create(GFP_KERNEL);
320 		if (!submit) {
321 			err = -ENOMEM;
322 			break;
323 		}
324 
325 		wait = heap_fence_create(GFP_KERNEL);
326 		if (!wait) {
327 			i915_sw_fence_commit(submit);
328 			heap_fence_put(submit);
329 			err = ENOMEM;
330 			break;
331 		}
332 
333 		i915_random_reorder(order, total, &prng);
334 		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
335 
336 		for (n = 0; n < count; n++) {
337 			struct i915_gem_context *ctx =
338 				t->contexts[order[n] % t->ncontexts];
339 			struct i915_request *rq;
340 			struct intel_context *ce;
341 
342 			ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
343 			GEM_BUG_ON(IS_ERR(ce));
344 			rq = t->request_alloc(ce);
345 			intel_context_put(ce);
346 			if (IS_ERR(rq)) {
347 				err = PTR_ERR(rq);
348 				count = n;
349 				break;
350 			}
351 
352 			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
353 							       submit,
354 							       GFP_KERNEL);
355 
356 			requests[n] = i915_request_get(rq);
357 			i915_request_add(rq);
358 
359 			if (err >= 0)
360 				err = i915_sw_fence_await_dma_fence(wait,
361 								    &rq->fence,
362 								    0,
363 								    GFP_KERNEL);
364 
365 			if (err < 0) {
366 				i915_request_put(rq);
367 				count = n;
368 				break;
369 			}
370 		}
371 
372 		i915_sw_fence_commit(submit);
373 		i915_sw_fence_commit(wait);
374 
375 		if (!wait_event_timeout(wait->wait,
376 					i915_sw_fence_done(wait),
377 					5 * HZ)) {
378 			struct i915_request *rq = requests[count - 1];
379 
380 			pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
381 			       atomic_read(&wait->pending), count,
382 			       rq->fence.context, rq->fence.seqno,
383 			       t->engine->name);
384 			GEM_TRACE_DUMP();
385 
386 			intel_gt_set_wedged(t->engine->gt);
387 			GEM_BUG_ON(!i915_request_completed(rq));
388 			i915_sw_fence_wait(wait);
389 			err = -EIO;
390 		}
391 
392 		for (n = 0; n < count; n++) {
393 			struct i915_request *rq = requests[n];
394 
395 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
396 				      &rq->fence.flags)) {
397 				pr_err("%llu:%llu was not signaled!\n",
398 				       rq->fence.context, rq->fence.seqno);
399 				err = -EINVAL;
400 			}
401 
402 			i915_request_put(rq);
403 		}
404 
405 		heap_fence_put(wait);
406 		heap_fence_put(submit);
407 
408 		if (err < 0)
409 			break;
410 
411 		num_fences += count;
412 		num_waits++;
413 
414 		cond_resched();
415 	}
416 
417 	atomic_long_add(num_fences, &t->num_fences);
418 	atomic_long_add(num_waits, &t->num_waits);
419 
420 	kfree(order);
421 out_requests:
422 	kfree(requests);
423 	return err;
424 }
425 
426 static int mock_breadcrumbs_smoketest(void *arg)
427 {
428 	struct drm_i915_private *i915 = arg;
429 	struct smoketest t = {
430 		.engine = rcs0(i915),
431 		.ncontexts = 1024,
432 		.max_batch = 1024,
433 		.request_alloc = __mock_request_alloc
434 	};
435 	unsigned int ncpus = num_online_cpus();
436 	struct task_struct **threads;
437 	unsigned int n;
438 	int ret = 0;
439 
440 	/*
441 	 * Smoketest our breadcrumb/signal handling for requests across multiple
442 	 * threads. A very simple test to only catch the most egregious of bugs.
443 	 * See __igt_breadcrumbs_smoketest();
444 	 */
445 
446 	threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
447 	if (!threads)
448 		return -ENOMEM;
449 
450 	t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
451 	if (!t.contexts) {
452 		ret = -ENOMEM;
453 		goto out_threads;
454 	}
455 
456 	for (n = 0; n < t.ncontexts; n++) {
457 		t.contexts[n] = mock_context(t.engine->i915, "mock");
458 		if (!t.contexts[n]) {
459 			ret = -ENOMEM;
460 			goto out_contexts;
461 		}
462 	}
463 
464 	for (n = 0; n < ncpus; n++) {
465 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
466 					 &t, "igt/%d", n);
467 		if (IS_ERR(threads[n])) {
468 			ret = PTR_ERR(threads[n]);
469 			ncpus = n;
470 			break;
471 		}
472 
473 		get_task_struct(threads[n]);
474 	}
475 
476 	yield(); /* start all threads before we begin */
477 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
478 
479 	for (n = 0; n < ncpus; n++) {
480 		int err;
481 
482 		err = kthread_stop(threads[n]);
483 		if (err < 0 && !ret)
484 			ret = err;
485 
486 		put_task_struct(threads[n]);
487 	}
488 	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
489 		atomic_long_read(&t.num_waits),
490 		atomic_long_read(&t.num_fences),
491 		ncpus);
492 
493 out_contexts:
494 	for (n = 0; n < t.ncontexts; n++) {
495 		if (!t.contexts[n])
496 			break;
497 		mock_context_close(t.contexts[n]);
498 	}
499 	kfree(t.contexts);
500 out_threads:
501 	kfree(threads);
502 	return ret;
503 }
504 
505 int i915_request_mock_selftests(void)
506 {
507 	static const struct i915_subtest tests[] = {
508 		SUBTEST(igt_add_request),
509 		SUBTEST(igt_wait_request),
510 		SUBTEST(igt_fence_wait),
511 		SUBTEST(igt_request_rewind),
512 		SUBTEST(mock_breadcrumbs_smoketest),
513 	};
514 	struct drm_i915_private *i915;
515 	intel_wakeref_t wakeref;
516 	int err = 0;
517 
518 	i915 = mock_gem_device();
519 	if (!i915)
520 		return -ENOMEM;
521 
522 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
523 		err = i915_subtests(tests, i915);
524 
525 	drm_dev_put(&i915->drm);
526 
527 	return err;
528 }
529 
530 static int live_nop_request(void *arg)
531 {
532 	struct drm_i915_private *i915 = arg;
533 	struct intel_engine_cs *engine;
534 	struct igt_live_test t;
535 	int err = -ENODEV;
536 
537 	/*
538 	 * Submit various sized batches of empty requests, to each engine
539 	 * (individually), and wait for the batch to complete. We can check
540 	 * the overhead of submitting requests to the hardware.
541 	 */
542 
543 	for_each_uabi_engine(engine, i915) {
544 		unsigned long n, prime;
545 		IGT_TIMEOUT(end_time);
546 		ktime_t times[2] = {};
547 
548 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
549 		if (err)
550 			return err;
551 
552 		intel_engine_pm_get(engine);
553 		for_each_prime_number_from(prime, 1, 8192) {
554 			struct i915_request *request = NULL;
555 
556 			times[1] = ktime_get_raw();
557 
558 			for (n = 0; n < prime; n++) {
559 				i915_request_put(request);
560 				request = i915_request_create(engine->kernel_context);
561 				if (IS_ERR(request))
562 					return PTR_ERR(request);
563 
564 				/*
565 				 * This space is left intentionally blank.
566 				 *
567 				 * We do not actually want to perform any
568 				 * action with this request, we just want
569 				 * to measure the latency in allocation
570 				 * and submission of our breadcrumbs -
571 				 * ensuring that the bare request is sufficient
572 				 * for the system to work (i.e. proper HEAD
573 				 * tracking of the rings, interrupt handling,
574 				 * etc). It also gives us the lowest bounds
575 				 * for latency.
576 				 */
577 
578 				i915_request_get(request);
579 				i915_request_add(request);
580 			}
581 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
582 			i915_request_put(request);
583 
584 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
585 			if (prime == 1)
586 				times[0] = times[1];
587 
588 			if (__igt_timeout(end_time, NULL))
589 				break;
590 		}
591 		intel_engine_pm_put(engine);
592 
593 		err = igt_live_test_end(&t);
594 		if (err)
595 			return err;
596 
597 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
598 			engine->name,
599 			ktime_to_ns(times[0]),
600 			prime, div64_u64(ktime_to_ns(times[1]), prime));
601 	}
602 
603 	return err;
604 }
605 
606 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
607 {
608 	struct drm_i915_gem_object *obj;
609 	struct i915_vma *vma;
610 	u32 *cmd;
611 	int err;
612 
613 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
614 	if (IS_ERR(obj))
615 		return ERR_CAST(obj);
616 
617 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
618 	if (IS_ERR(cmd)) {
619 		err = PTR_ERR(cmd);
620 		goto err;
621 	}
622 
623 	*cmd = MI_BATCH_BUFFER_END;
624 
625 	__i915_gem_object_flush_map(obj, 0, 64);
626 	i915_gem_object_unpin_map(obj);
627 
628 	intel_gt_chipset_flush(&i915->gt);
629 
630 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
631 	if (IS_ERR(vma)) {
632 		err = PTR_ERR(vma);
633 		goto err;
634 	}
635 
636 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
637 	if (err)
638 		goto err;
639 
640 	/* Force the wait wait now to avoid including it in the benchmark */
641 	err = i915_vma_sync(vma);
642 	if (err)
643 		goto err_pin;
644 
645 	return vma;
646 
647 err_pin:
648 	i915_vma_unpin(vma);
649 err:
650 	i915_gem_object_put(obj);
651 	return ERR_PTR(err);
652 }
653 
654 static struct i915_request *
655 empty_request(struct intel_engine_cs *engine,
656 	      struct i915_vma *batch)
657 {
658 	struct i915_request *request;
659 	int err;
660 
661 	request = i915_request_create(engine->kernel_context);
662 	if (IS_ERR(request))
663 		return request;
664 
665 	err = engine->emit_bb_start(request,
666 				    batch->node.start,
667 				    batch->node.size,
668 				    I915_DISPATCH_SECURE);
669 	if (err)
670 		goto out_request;
671 
672 	i915_request_get(request);
673 out_request:
674 	i915_request_add(request);
675 	return err ? ERR_PTR(err) : request;
676 }
677 
678 static int live_empty_request(void *arg)
679 {
680 	struct drm_i915_private *i915 = arg;
681 	struct intel_engine_cs *engine;
682 	struct igt_live_test t;
683 	struct i915_vma *batch;
684 	int err = 0;
685 
686 	/*
687 	 * Submit various sized batches of empty requests, to each engine
688 	 * (individually), and wait for the batch to complete. We can check
689 	 * the overhead of submitting requests to the hardware.
690 	 */
691 
692 	batch = empty_batch(i915);
693 	if (IS_ERR(batch))
694 		return PTR_ERR(batch);
695 
696 	for_each_uabi_engine(engine, i915) {
697 		IGT_TIMEOUT(end_time);
698 		struct i915_request *request;
699 		unsigned long n, prime;
700 		ktime_t times[2] = {};
701 
702 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
703 		if (err)
704 			goto out_batch;
705 
706 		intel_engine_pm_get(engine);
707 
708 		/* Warmup / preload */
709 		request = empty_request(engine, batch);
710 		if (IS_ERR(request)) {
711 			err = PTR_ERR(request);
712 			intel_engine_pm_put(engine);
713 			goto out_batch;
714 		}
715 		i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
716 
717 		for_each_prime_number_from(prime, 1, 8192) {
718 			times[1] = ktime_get_raw();
719 
720 			for (n = 0; n < prime; n++) {
721 				i915_request_put(request);
722 				request = empty_request(engine, batch);
723 				if (IS_ERR(request)) {
724 					err = PTR_ERR(request);
725 					intel_engine_pm_put(engine);
726 					goto out_batch;
727 				}
728 			}
729 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
730 
731 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
732 			if (prime == 1)
733 				times[0] = times[1];
734 
735 			if (__igt_timeout(end_time, NULL))
736 				break;
737 		}
738 		i915_request_put(request);
739 		intel_engine_pm_put(engine);
740 
741 		err = igt_live_test_end(&t);
742 		if (err)
743 			goto out_batch;
744 
745 		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
746 			engine->name,
747 			ktime_to_ns(times[0]),
748 			prime, div64_u64(ktime_to_ns(times[1]), prime));
749 	}
750 
751 out_batch:
752 	i915_vma_unpin(batch);
753 	i915_vma_put(batch);
754 	return err;
755 }
756 
757 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
758 {
759 	struct drm_i915_gem_object *obj;
760 	const int gen = INTEL_GEN(i915);
761 	struct i915_vma *vma;
762 	u32 *cmd;
763 	int err;
764 
765 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
766 	if (IS_ERR(obj))
767 		return ERR_CAST(obj);
768 
769 	vma = i915_vma_instance(obj, i915->gt.vm, NULL);
770 	if (IS_ERR(vma)) {
771 		err = PTR_ERR(vma);
772 		goto err;
773 	}
774 
775 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
776 	if (err)
777 		goto err;
778 
779 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
780 	if (IS_ERR(cmd)) {
781 		err = PTR_ERR(cmd);
782 		goto err;
783 	}
784 
785 	if (gen >= 8) {
786 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
787 		*cmd++ = lower_32_bits(vma->node.start);
788 		*cmd++ = upper_32_bits(vma->node.start);
789 	} else if (gen >= 6) {
790 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
791 		*cmd++ = lower_32_bits(vma->node.start);
792 	} else {
793 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
794 		*cmd++ = lower_32_bits(vma->node.start);
795 	}
796 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
797 
798 	__i915_gem_object_flush_map(obj, 0, 64);
799 	i915_gem_object_unpin_map(obj);
800 
801 	intel_gt_chipset_flush(&i915->gt);
802 
803 	return vma;
804 
805 err:
806 	i915_gem_object_put(obj);
807 	return ERR_PTR(err);
808 }
809 
810 static int recursive_batch_resolve(struct i915_vma *batch)
811 {
812 	u32 *cmd;
813 
814 	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
815 	if (IS_ERR(cmd))
816 		return PTR_ERR(cmd);
817 
818 	*cmd = MI_BATCH_BUFFER_END;
819 
820 	__i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd));
821 	i915_gem_object_unpin_map(batch->obj);
822 
823 	intel_gt_chipset_flush(batch->vm->gt);
824 
825 	return 0;
826 }
827 
828 static int live_all_engines(void *arg)
829 {
830 	struct drm_i915_private *i915 = arg;
831 	const unsigned int nengines = num_uabi_engines(i915);
832 	struct intel_engine_cs *engine;
833 	struct i915_request **request;
834 	struct igt_live_test t;
835 	struct i915_vma *batch;
836 	unsigned int idx;
837 	int err;
838 
839 	/*
840 	 * Check we can submit requests to all engines simultaneously. We
841 	 * send a recursive batch to each engine - checking that we don't
842 	 * block doing so, and that they don't complete too soon.
843 	 */
844 
845 	request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
846 	if (!request)
847 		return -ENOMEM;
848 
849 	err = igt_live_test_begin(&t, i915, __func__, "");
850 	if (err)
851 		goto out_free;
852 
853 	batch = recursive_batch(i915);
854 	if (IS_ERR(batch)) {
855 		err = PTR_ERR(batch);
856 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
857 		goto out_free;
858 	}
859 
860 	idx = 0;
861 	for_each_uabi_engine(engine, i915) {
862 		request[idx] = intel_engine_create_kernel_request(engine);
863 		if (IS_ERR(request[idx])) {
864 			err = PTR_ERR(request[idx]);
865 			pr_err("%s: Request allocation failed with err=%d\n",
866 			       __func__, err);
867 			goto out_request;
868 		}
869 
870 		i915_vma_lock(batch);
871 		err = i915_request_await_object(request[idx], batch->obj, 0);
872 		if (err == 0)
873 			err = i915_vma_move_to_active(batch, request[idx], 0);
874 		i915_vma_unlock(batch);
875 		GEM_BUG_ON(err);
876 
877 		err = engine->emit_bb_start(request[idx],
878 					    batch->node.start,
879 					    batch->node.size,
880 					    0);
881 		GEM_BUG_ON(err);
882 		request[idx]->batch = batch;
883 
884 		i915_request_get(request[idx]);
885 		i915_request_add(request[idx]);
886 		idx++;
887 	}
888 
889 	idx = 0;
890 	for_each_uabi_engine(engine, i915) {
891 		if (i915_request_completed(request[idx])) {
892 			pr_err("%s(%s): request completed too early!\n",
893 			       __func__, engine->name);
894 			err = -EINVAL;
895 			goto out_request;
896 		}
897 		idx++;
898 	}
899 
900 	err = recursive_batch_resolve(batch);
901 	if (err) {
902 		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
903 		goto out_request;
904 	}
905 
906 	idx = 0;
907 	for_each_uabi_engine(engine, i915) {
908 		long timeout;
909 
910 		timeout = i915_request_wait(request[idx], 0,
911 					    MAX_SCHEDULE_TIMEOUT);
912 		if (timeout < 0) {
913 			err = timeout;
914 			pr_err("%s: error waiting for request on %s, err=%d\n",
915 			       __func__, engine->name, err);
916 			goto out_request;
917 		}
918 
919 		GEM_BUG_ON(!i915_request_completed(request[idx]));
920 		i915_request_put(request[idx]);
921 		request[idx] = NULL;
922 		idx++;
923 	}
924 
925 	err = igt_live_test_end(&t);
926 
927 out_request:
928 	idx = 0;
929 	for_each_uabi_engine(engine, i915) {
930 		if (request[idx])
931 			i915_request_put(request[idx]);
932 		idx++;
933 	}
934 	i915_vma_unpin(batch);
935 	i915_vma_put(batch);
936 out_free:
937 	kfree(request);
938 	return err;
939 }
940 
941 static int live_sequential_engines(void *arg)
942 {
943 	struct drm_i915_private *i915 = arg;
944 	const unsigned int nengines = num_uabi_engines(i915);
945 	struct i915_request **request;
946 	struct i915_request *prev = NULL;
947 	struct intel_engine_cs *engine;
948 	struct igt_live_test t;
949 	unsigned int idx;
950 	int err;
951 
952 	/*
953 	 * Check we can submit requests to all engines sequentially, such
954 	 * that each successive request waits for the earlier ones. This
955 	 * tests that we don't execute requests out of order, even though
956 	 * they are running on independent engines.
957 	 */
958 
959 	request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
960 	if (!request)
961 		return -ENOMEM;
962 
963 	err = igt_live_test_begin(&t, i915, __func__, "");
964 	if (err)
965 		goto out_free;
966 
967 	idx = 0;
968 	for_each_uabi_engine(engine, i915) {
969 		struct i915_vma *batch;
970 
971 		batch = recursive_batch(i915);
972 		if (IS_ERR(batch)) {
973 			err = PTR_ERR(batch);
974 			pr_err("%s: Unable to create batch for %s, err=%d\n",
975 			       __func__, engine->name, err);
976 			goto out_free;
977 		}
978 
979 		request[idx] = intel_engine_create_kernel_request(engine);
980 		if (IS_ERR(request[idx])) {
981 			err = PTR_ERR(request[idx]);
982 			pr_err("%s: Request allocation failed for %s with err=%d\n",
983 			       __func__, engine->name, err);
984 			goto out_request;
985 		}
986 
987 		if (prev) {
988 			err = i915_request_await_dma_fence(request[idx],
989 							   &prev->fence);
990 			if (err) {
991 				i915_request_add(request[idx]);
992 				pr_err("%s: Request await failed for %s with err=%d\n",
993 				       __func__, engine->name, err);
994 				goto out_request;
995 			}
996 		}
997 
998 		i915_vma_lock(batch);
999 		err = i915_request_await_object(request[idx],
1000 						batch->obj, false);
1001 		if (err == 0)
1002 			err = i915_vma_move_to_active(batch, request[idx], 0);
1003 		i915_vma_unlock(batch);
1004 		GEM_BUG_ON(err);
1005 
1006 		err = engine->emit_bb_start(request[idx],
1007 					    batch->node.start,
1008 					    batch->node.size,
1009 					    0);
1010 		GEM_BUG_ON(err);
1011 		request[idx]->batch = batch;
1012 
1013 		i915_request_get(request[idx]);
1014 		i915_request_add(request[idx]);
1015 
1016 		prev = request[idx];
1017 		idx++;
1018 	}
1019 
1020 	idx = 0;
1021 	for_each_uabi_engine(engine, i915) {
1022 		long timeout;
1023 
1024 		if (i915_request_completed(request[idx])) {
1025 			pr_err("%s(%s): request completed too early!\n",
1026 			       __func__, engine->name);
1027 			err = -EINVAL;
1028 			goto out_request;
1029 		}
1030 
1031 		err = recursive_batch_resolve(request[idx]->batch);
1032 		if (err) {
1033 			pr_err("%s: failed to resolve batch, err=%d\n",
1034 			       __func__, err);
1035 			goto out_request;
1036 		}
1037 
1038 		timeout = i915_request_wait(request[idx], 0,
1039 					    MAX_SCHEDULE_TIMEOUT);
1040 		if (timeout < 0) {
1041 			err = timeout;
1042 			pr_err("%s: error waiting for request on %s, err=%d\n",
1043 			       __func__, engine->name, err);
1044 			goto out_request;
1045 		}
1046 
1047 		GEM_BUG_ON(!i915_request_completed(request[idx]));
1048 		idx++;
1049 	}
1050 
1051 	err = igt_live_test_end(&t);
1052 
1053 out_request:
1054 	idx = 0;
1055 	for_each_uabi_engine(engine, i915) {
1056 		u32 *cmd;
1057 
1058 		if (!request[idx])
1059 			break;
1060 
1061 		cmd = i915_gem_object_pin_map(request[idx]->batch->obj,
1062 					      I915_MAP_WC);
1063 		if (!IS_ERR(cmd)) {
1064 			*cmd = MI_BATCH_BUFFER_END;
1065 
1066 			__i915_gem_object_flush_map(request[idx]->batch->obj,
1067 						    0, sizeof(*cmd));
1068 			i915_gem_object_unpin_map(request[idx]->batch->obj);
1069 
1070 			intel_gt_chipset_flush(engine->gt);
1071 		}
1072 
1073 		i915_vma_put(request[idx]->batch);
1074 		i915_request_put(request[idx]);
1075 		idx++;
1076 	}
1077 out_free:
1078 	kfree(request);
1079 	return err;
1080 }
1081 
1082 static int __live_parallel_engine1(void *arg)
1083 {
1084 	struct intel_engine_cs *engine = arg;
1085 	IGT_TIMEOUT(end_time);
1086 	unsigned long count;
1087 	int err = 0;
1088 
1089 	count = 0;
1090 	intel_engine_pm_get(engine);
1091 	do {
1092 		struct i915_request *rq;
1093 
1094 		rq = i915_request_create(engine->kernel_context);
1095 		if (IS_ERR(rq)) {
1096 			err = PTR_ERR(rq);
1097 			break;
1098 		}
1099 
1100 		i915_request_get(rq);
1101 		i915_request_add(rq);
1102 
1103 		err = 0;
1104 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
1105 			err = -ETIME;
1106 		i915_request_put(rq);
1107 		if (err)
1108 			break;
1109 
1110 		count++;
1111 	} while (!__igt_timeout(end_time, NULL));
1112 	intel_engine_pm_put(engine);
1113 
1114 	pr_info("%s: %lu request + sync\n", engine->name, count);
1115 	return err;
1116 }
1117 
1118 static int __live_parallel_engineN(void *arg)
1119 {
1120 	struct intel_engine_cs *engine = arg;
1121 	IGT_TIMEOUT(end_time);
1122 	unsigned long count;
1123 	int err = 0;
1124 
1125 	count = 0;
1126 	intel_engine_pm_get(engine);
1127 	do {
1128 		struct i915_request *rq;
1129 
1130 		rq = i915_request_create(engine->kernel_context);
1131 		if (IS_ERR(rq)) {
1132 			err = PTR_ERR(rq);
1133 			break;
1134 		}
1135 
1136 		i915_request_add(rq);
1137 		count++;
1138 	} while (!__igt_timeout(end_time, NULL));
1139 	intel_engine_pm_put(engine);
1140 
1141 	pr_info("%s: %lu requests\n", engine->name, count);
1142 	return err;
1143 }
1144 
1145 static bool wake_all(struct drm_i915_private *i915)
1146 {
1147 	if (atomic_dec_and_test(&i915->selftest.counter)) {
1148 		wake_up_var(&i915->selftest.counter);
1149 		return true;
1150 	}
1151 
1152 	return false;
1153 }
1154 
1155 static int wait_for_all(struct drm_i915_private *i915)
1156 {
1157 	if (wake_all(i915))
1158 		return 0;
1159 
1160 	if (wait_var_event_timeout(&i915->selftest.counter,
1161 				   !atomic_read(&i915->selftest.counter),
1162 				   i915_selftest.timeout_jiffies))
1163 		return 0;
1164 
1165 	return -ETIME;
1166 }
1167 
1168 static int __live_parallel_spin(void *arg)
1169 {
1170 	struct intel_engine_cs *engine = arg;
1171 	struct igt_spinner spin;
1172 	struct i915_request *rq;
1173 	int err = 0;
1174 
1175 	/*
1176 	 * Create a spinner running for eternity on each engine. If a second
1177 	 * spinner is incorrectly placed on the same engine, it will not be
1178 	 * able to start in time.
1179 	 */
1180 
1181 	if (igt_spinner_init(&spin, engine->gt)) {
1182 		wake_all(engine->i915);
1183 		return -ENOMEM;
1184 	}
1185 
1186 	intel_engine_pm_get(engine);
1187 	rq = igt_spinner_create_request(&spin,
1188 					engine->kernel_context,
1189 					MI_NOOP); /* no preemption */
1190 	intel_engine_pm_put(engine);
1191 	if (IS_ERR(rq)) {
1192 		err = PTR_ERR(rq);
1193 		if (err == -ENODEV)
1194 			err = 0;
1195 		wake_all(engine->i915);
1196 		goto out_spin;
1197 	}
1198 
1199 	i915_request_get(rq);
1200 	i915_request_add(rq);
1201 	if (igt_wait_for_spinner(&spin, rq)) {
1202 		/* Occupy this engine for the whole test */
1203 		err = wait_for_all(engine->i915);
1204 	} else {
1205 		pr_err("Failed to start spinner on %s\n", engine->name);
1206 		err = -EINVAL;
1207 	}
1208 	igt_spinner_end(&spin);
1209 
1210 	if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0)
1211 		err = -EIO;
1212 	i915_request_put(rq);
1213 
1214 out_spin:
1215 	igt_spinner_fini(&spin);
1216 	return err;
1217 }
1218 
1219 static int live_parallel_engines(void *arg)
1220 {
1221 	struct drm_i915_private *i915 = arg;
1222 	static int (* const func[])(void *arg) = {
1223 		__live_parallel_engine1,
1224 		__live_parallel_engineN,
1225 		__live_parallel_spin,
1226 		NULL,
1227 	};
1228 	const unsigned int nengines = num_uabi_engines(i915);
1229 	struct intel_engine_cs *engine;
1230 	int (* const *fn)(void *arg);
1231 	struct task_struct **tsk;
1232 	int err = 0;
1233 
1234 	/*
1235 	 * Check we can submit requests to all engines concurrently. This
1236 	 * tests that we load up the system maximally.
1237 	 */
1238 
1239 	tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
1240 	if (!tsk)
1241 		return -ENOMEM;
1242 
1243 	for (fn = func; !err && *fn; fn++) {
1244 		char name[KSYM_NAME_LEN];
1245 		struct igt_live_test t;
1246 		unsigned int idx;
1247 
1248 		snprintf(name, sizeof(name), "%ps", *fn);
1249 		err = igt_live_test_begin(&t, i915, __func__, name);
1250 		if (err)
1251 			break;
1252 
1253 		atomic_set(&i915->selftest.counter, nengines);
1254 
1255 		idx = 0;
1256 		for_each_uabi_engine(engine, i915) {
1257 			tsk[idx] = kthread_run(*fn, engine,
1258 					       "igt/parallel:%s",
1259 					       engine->name);
1260 			if (IS_ERR(tsk[idx])) {
1261 				err = PTR_ERR(tsk[idx]);
1262 				break;
1263 			}
1264 			get_task_struct(tsk[idx++]);
1265 		}
1266 
1267 		yield(); /* start all threads before we kthread_stop() */
1268 
1269 		idx = 0;
1270 		for_each_uabi_engine(engine, i915) {
1271 			int status;
1272 
1273 			if (IS_ERR(tsk[idx]))
1274 				break;
1275 
1276 			status = kthread_stop(tsk[idx]);
1277 			if (status && !err)
1278 				err = status;
1279 
1280 			put_task_struct(tsk[idx++]);
1281 		}
1282 
1283 		if (igt_live_test_end(&t))
1284 			err = -EIO;
1285 	}
1286 
1287 	kfree(tsk);
1288 	return err;
1289 }
1290 
1291 static int
1292 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1293 {
1294 	struct i915_request *rq;
1295 	int ret;
1296 
1297 	/*
1298 	 * Before execlists, all contexts share the same ringbuffer. With
1299 	 * execlists, each context/engine has a separate ringbuffer and
1300 	 * for the purposes of this test, inexhaustible.
1301 	 *
1302 	 * For the global ringbuffer though, we have to be very careful
1303 	 * that we do not wrap while preventing the execution of requests
1304 	 * with a unsignaled fence.
1305 	 */
1306 	if (HAS_EXECLISTS(ctx->i915))
1307 		return INT_MAX;
1308 
1309 	rq = igt_request_alloc(ctx, engine);
1310 	if (IS_ERR(rq)) {
1311 		ret = PTR_ERR(rq);
1312 	} else {
1313 		int sz;
1314 
1315 		ret = rq->ring->size - rq->reserved_space;
1316 		i915_request_add(rq);
1317 
1318 		sz = rq->ring->emit - rq->head;
1319 		if (sz < 0)
1320 			sz += rq->ring->size;
1321 		ret /= sz;
1322 		ret /= 2; /* leave half spare, in case of emergency! */
1323 	}
1324 
1325 	return ret;
1326 }
1327 
1328 static int live_breadcrumbs_smoketest(void *arg)
1329 {
1330 	struct drm_i915_private *i915 = arg;
1331 	const unsigned int nengines = num_uabi_engines(i915);
1332 	const unsigned int ncpus = num_online_cpus();
1333 	unsigned long num_waits, num_fences;
1334 	struct intel_engine_cs *engine;
1335 	struct task_struct **threads;
1336 	struct igt_live_test live;
1337 	intel_wakeref_t wakeref;
1338 	struct smoketest *smoke;
1339 	unsigned int n, idx;
1340 	struct file *file;
1341 	int ret = 0;
1342 
1343 	/*
1344 	 * Smoketest our breadcrumb/signal handling for requests across multiple
1345 	 * threads. A very simple test to only catch the most egregious of bugs.
1346 	 * See __igt_breadcrumbs_smoketest();
1347 	 *
1348 	 * On real hardware this time.
1349 	 */
1350 
1351 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1352 
1353 	file = mock_file(i915);
1354 	if (IS_ERR(file)) {
1355 		ret = PTR_ERR(file);
1356 		goto out_rpm;
1357 	}
1358 
1359 	smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
1360 	if (!smoke) {
1361 		ret = -ENOMEM;
1362 		goto out_file;
1363 	}
1364 
1365 	threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
1366 	if (!threads) {
1367 		ret = -ENOMEM;
1368 		goto out_smoke;
1369 	}
1370 
1371 	smoke[0].request_alloc = __live_request_alloc;
1372 	smoke[0].ncontexts = 64;
1373 	smoke[0].contexts = kcalloc(smoke[0].ncontexts,
1374 				    sizeof(*smoke[0].contexts),
1375 				    GFP_KERNEL);
1376 	if (!smoke[0].contexts) {
1377 		ret = -ENOMEM;
1378 		goto out_threads;
1379 	}
1380 
1381 	for (n = 0; n < smoke[0].ncontexts; n++) {
1382 		smoke[0].contexts[n] = live_context(i915, file);
1383 		if (!smoke[0].contexts[n]) {
1384 			ret = -ENOMEM;
1385 			goto out_contexts;
1386 		}
1387 	}
1388 
1389 	ret = igt_live_test_begin(&live, i915, __func__, "");
1390 	if (ret)
1391 		goto out_contexts;
1392 
1393 	idx = 0;
1394 	for_each_uabi_engine(engine, i915) {
1395 		smoke[idx] = smoke[0];
1396 		smoke[idx].engine = engine;
1397 		smoke[idx].max_batch =
1398 			max_batches(smoke[0].contexts[0], engine);
1399 		if (smoke[idx].max_batch < 0) {
1400 			ret = smoke[idx].max_batch;
1401 			goto out_flush;
1402 		}
1403 		/* One ring interleaved between requests from all cpus */
1404 		smoke[idx].max_batch /= num_online_cpus() + 1;
1405 		pr_debug("Limiting batches to %d requests on %s\n",
1406 			 smoke[idx].max_batch, engine->name);
1407 
1408 		for (n = 0; n < ncpus; n++) {
1409 			struct task_struct *tsk;
1410 
1411 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1412 					  &smoke[idx], "igt/%d.%d", idx, n);
1413 			if (IS_ERR(tsk)) {
1414 				ret = PTR_ERR(tsk);
1415 				goto out_flush;
1416 			}
1417 
1418 			get_task_struct(tsk);
1419 			threads[idx * ncpus + n] = tsk;
1420 		}
1421 
1422 		idx++;
1423 	}
1424 
1425 	yield(); /* start all threads before we begin */
1426 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1427 
1428 out_flush:
1429 	idx = 0;
1430 	num_waits = 0;
1431 	num_fences = 0;
1432 	for_each_uabi_engine(engine, i915) {
1433 		for (n = 0; n < ncpus; n++) {
1434 			struct task_struct *tsk = threads[idx * ncpus + n];
1435 			int err;
1436 
1437 			if (!tsk)
1438 				continue;
1439 
1440 			err = kthread_stop(tsk);
1441 			if (err < 0 && !ret)
1442 				ret = err;
1443 
1444 			put_task_struct(tsk);
1445 		}
1446 
1447 		num_waits += atomic_long_read(&smoke[idx].num_waits);
1448 		num_fences += atomic_long_read(&smoke[idx].num_fences);
1449 		idx++;
1450 	}
1451 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1452 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1453 
1454 	ret = igt_live_test_end(&live) ?: ret;
1455 out_contexts:
1456 	kfree(smoke[0].contexts);
1457 out_threads:
1458 	kfree(threads);
1459 out_smoke:
1460 	kfree(smoke);
1461 out_file:
1462 	fput(file);
1463 out_rpm:
1464 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1465 
1466 	return ret;
1467 }
1468 
1469 int i915_request_live_selftests(struct drm_i915_private *i915)
1470 {
1471 	static const struct i915_subtest tests[] = {
1472 		SUBTEST(live_nop_request),
1473 		SUBTEST(live_all_engines),
1474 		SUBTEST(live_sequential_engines),
1475 		SUBTEST(live_parallel_engines),
1476 		SUBTEST(live_empty_request),
1477 		SUBTEST(live_breadcrumbs_smoketest),
1478 	};
1479 
1480 	if (intel_gt_is_wedged(&i915->gt))
1481 		return 0;
1482 
1483 	return i915_subtests(tests, i915);
1484 }
1485 
1486 static int switch_to_kernel_sync(struct intel_context *ce, int err)
1487 {
1488 	struct i915_request *rq;
1489 	struct dma_fence *fence;
1490 
1491 	rq = intel_engine_create_kernel_request(ce->engine);
1492 	if (IS_ERR(rq))
1493 		return PTR_ERR(rq);
1494 
1495 	fence = i915_active_fence_get(&ce->timeline->last_request);
1496 	if (fence) {
1497 		i915_request_await_dma_fence(rq, fence);
1498 		dma_fence_put(fence);
1499 	}
1500 
1501 	rq = i915_request_get(rq);
1502 	i915_request_add(rq);
1503 	if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err)
1504 		err = -ETIME;
1505 	i915_request_put(rq);
1506 
1507 	while (!err && !intel_engine_is_idle(ce->engine))
1508 		intel_engine_flush_submission(ce->engine);
1509 
1510 	return err;
1511 }
1512 
1513 struct perf_stats {
1514 	struct intel_engine_cs *engine;
1515 	unsigned long count;
1516 	ktime_t time;
1517 	ktime_t busy;
1518 	u64 runtime;
1519 };
1520 
1521 struct perf_series {
1522 	struct drm_i915_private *i915;
1523 	unsigned int nengines;
1524 	struct intel_context *ce[];
1525 };
1526 
1527 static int s_sync0(void *arg)
1528 {
1529 	struct perf_series *ps = arg;
1530 	IGT_TIMEOUT(end_time);
1531 	unsigned int idx = 0;
1532 	int err = 0;
1533 
1534 	GEM_BUG_ON(!ps->nengines);
1535 	do {
1536 		struct i915_request *rq;
1537 
1538 		rq = i915_request_create(ps->ce[idx]);
1539 		if (IS_ERR(rq)) {
1540 			err = PTR_ERR(rq);
1541 			break;
1542 		}
1543 
1544 		i915_request_get(rq);
1545 		i915_request_add(rq);
1546 
1547 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
1548 			err = -ETIME;
1549 		i915_request_put(rq);
1550 		if (err)
1551 			break;
1552 
1553 		if (++idx == ps->nengines)
1554 			idx = 0;
1555 	} while (!__igt_timeout(end_time, NULL));
1556 
1557 	return err;
1558 }
1559 
1560 static int s_sync1(void *arg)
1561 {
1562 	struct perf_series *ps = arg;
1563 	struct i915_request *prev = NULL;
1564 	IGT_TIMEOUT(end_time);
1565 	unsigned int idx = 0;
1566 	int err = 0;
1567 
1568 	GEM_BUG_ON(!ps->nengines);
1569 	do {
1570 		struct i915_request *rq;
1571 
1572 		rq = i915_request_create(ps->ce[idx]);
1573 		if (IS_ERR(rq)) {
1574 			err = PTR_ERR(rq);
1575 			break;
1576 		}
1577 
1578 		i915_request_get(rq);
1579 		i915_request_add(rq);
1580 
1581 		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
1582 			err = -ETIME;
1583 		i915_request_put(prev);
1584 		prev = rq;
1585 		if (err)
1586 			break;
1587 
1588 		if (++idx == ps->nengines)
1589 			idx = 0;
1590 	} while (!__igt_timeout(end_time, NULL));
1591 	i915_request_put(prev);
1592 
1593 	return err;
1594 }
1595 
1596 static int s_many(void *arg)
1597 {
1598 	struct perf_series *ps = arg;
1599 	IGT_TIMEOUT(end_time);
1600 	unsigned int idx = 0;
1601 
1602 	GEM_BUG_ON(!ps->nengines);
1603 	do {
1604 		struct i915_request *rq;
1605 
1606 		rq = i915_request_create(ps->ce[idx]);
1607 		if (IS_ERR(rq))
1608 			return PTR_ERR(rq);
1609 
1610 		i915_request_add(rq);
1611 
1612 		if (++idx == ps->nengines)
1613 			idx = 0;
1614 	} while (!__igt_timeout(end_time, NULL));
1615 
1616 	return 0;
1617 }
1618 
1619 static int perf_series_engines(void *arg)
1620 {
1621 	struct drm_i915_private *i915 = arg;
1622 	static int (* const func[])(void *arg) = {
1623 		s_sync0,
1624 		s_sync1,
1625 		s_many,
1626 		NULL,
1627 	};
1628 	const unsigned int nengines = num_uabi_engines(i915);
1629 	struct intel_engine_cs *engine;
1630 	int (* const *fn)(void *arg);
1631 	struct pm_qos_request qos;
1632 	struct perf_stats *stats;
1633 	struct perf_series *ps;
1634 	unsigned int idx;
1635 	int err = 0;
1636 
1637 	stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL);
1638 	if (!stats)
1639 		return -ENOMEM;
1640 
1641 	ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL);
1642 	if (!ps) {
1643 		kfree(stats);
1644 		return -ENOMEM;
1645 	}
1646 
1647 	cpu_latency_qos_add_request(&qos, 0); /* disable cstates */
1648 
1649 	ps->i915 = i915;
1650 	ps->nengines = nengines;
1651 
1652 	idx = 0;
1653 	for_each_uabi_engine(engine, i915) {
1654 		struct intel_context *ce;
1655 
1656 		ce = intel_context_create(engine);
1657 		if (IS_ERR(ce))
1658 			goto out;
1659 
1660 		err = intel_context_pin(ce);
1661 		if (err) {
1662 			intel_context_put(ce);
1663 			goto out;
1664 		}
1665 
1666 		ps->ce[idx++] = ce;
1667 	}
1668 	GEM_BUG_ON(idx != ps->nengines);
1669 
1670 	for (fn = func; *fn && !err; fn++) {
1671 		char name[KSYM_NAME_LEN];
1672 		struct igt_live_test t;
1673 
1674 		snprintf(name, sizeof(name), "%ps", *fn);
1675 		err = igt_live_test_begin(&t, i915, __func__, name);
1676 		if (err)
1677 			break;
1678 
1679 		for (idx = 0; idx < nengines; idx++) {
1680 			struct perf_stats *p =
1681 				memset(&stats[idx], 0, sizeof(stats[idx]));
1682 			struct intel_context *ce = ps->ce[idx];
1683 
1684 			p->engine = ps->ce[idx]->engine;
1685 			intel_engine_pm_get(p->engine);
1686 
1687 			if (intel_engine_supports_stats(p->engine))
1688 				p->busy = intel_engine_get_busy_time(p->engine) + 1;
1689 			p->runtime = -intel_context_get_total_runtime_ns(ce);
1690 			p->time = ktime_get();
1691 		}
1692 
1693 		err = (*fn)(ps);
1694 		if (igt_live_test_end(&t))
1695 			err = -EIO;
1696 
1697 		for (idx = 0; idx < nengines; idx++) {
1698 			struct perf_stats *p = &stats[idx];
1699 			struct intel_context *ce = ps->ce[idx];
1700 			int integer, decimal;
1701 			u64 busy, dt;
1702 
1703 			p->time = ktime_sub(ktime_get(), p->time);
1704 			if (p->busy) {
1705 				p->busy = ktime_sub(intel_engine_get_busy_time(p->engine),
1706 						    p->busy - 1);
1707 			}
1708 
1709 			err = switch_to_kernel_sync(ce, err);
1710 			p->runtime += intel_context_get_total_runtime_ns(ce);
1711 			intel_engine_pm_put(p->engine);
1712 
1713 			busy = 100 * ktime_to_ns(p->busy);
1714 			dt = ktime_to_ns(p->time);
1715 			if (dt) {
1716 				integer = div64_u64(busy, dt);
1717 				busy -= integer * dt;
1718 				decimal = div64_u64(100 * busy, dt);
1719 			} else {
1720 				integer = 0;
1721 				decimal = 0;
1722 			}
1723 
1724 			pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
1725 				name, p->engine->name, ce->timeline->seqno,
1726 				integer, decimal,
1727 				div_u64(p->runtime, 1000 * 1000),
1728 				div_u64(ktime_to_ns(p->time), 1000 * 1000));
1729 		}
1730 	}
1731 
1732 out:
1733 	for (idx = 0; idx < nengines; idx++) {
1734 		if (IS_ERR_OR_NULL(ps->ce[idx]))
1735 			break;
1736 
1737 		intel_context_unpin(ps->ce[idx]);
1738 		intel_context_put(ps->ce[idx]);
1739 	}
1740 	kfree(ps);
1741 
1742 	cpu_latency_qos_remove_request(&qos);
1743 	kfree(stats);
1744 	return err;
1745 }
1746 
1747 static int p_sync0(void *arg)
1748 {
1749 	struct perf_stats *p = arg;
1750 	struct intel_engine_cs *engine = p->engine;
1751 	struct intel_context *ce;
1752 	IGT_TIMEOUT(end_time);
1753 	unsigned long count;
1754 	bool busy;
1755 	int err = 0;
1756 
1757 	ce = intel_context_create(engine);
1758 	if (IS_ERR(ce))
1759 		return PTR_ERR(ce);
1760 
1761 	err = intel_context_pin(ce);
1762 	if (err) {
1763 		intel_context_put(ce);
1764 		return err;
1765 	}
1766 
1767 	busy = false;
1768 	if (intel_engine_supports_stats(engine)) {
1769 		p->busy = intel_engine_get_busy_time(engine);
1770 		busy = true;
1771 	}
1772 
1773 	p->time = ktime_get();
1774 	count = 0;
1775 	do {
1776 		struct i915_request *rq;
1777 
1778 		rq = i915_request_create(ce);
1779 		if (IS_ERR(rq)) {
1780 			err = PTR_ERR(rq);
1781 			break;
1782 		}
1783 
1784 		i915_request_get(rq);
1785 		i915_request_add(rq);
1786 
1787 		err = 0;
1788 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
1789 			err = -ETIME;
1790 		i915_request_put(rq);
1791 		if (err)
1792 			break;
1793 
1794 		count++;
1795 	} while (!__igt_timeout(end_time, NULL));
1796 	p->time = ktime_sub(ktime_get(), p->time);
1797 
1798 	if (busy) {
1799 		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
1800 				    p->busy);
1801 	}
1802 
1803 	err = switch_to_kernel_sync(ce, err);
1804 	p->runtime = intel_context_get_total_runtime_ns(ce);
1805 	p->count = count;
1806 
1807 	intel_context_unpin(ce);
1808 	intel_context_put(ce);
1809 	return err;
1810 }
1811 
1812 static int p_sync1(void *arg)
1813 {
1814 	struct perf_stats *p = arg;
1815 	struct intel_engine_cs *engine = p->engine;
1816 	struct i915_request *prev = NULL;
1817 	struct intel_context *ce;
1818 	IGT_TIMEOUT(end_time);
1819 	unsigned long count;
1820 	bool busy;
1821 	int err = 0;
1822 
1823 	ce = intel_context_create(engine);
1824 	if (IS_ERR(ce))
1825 		return PTR_ERR(ce);
1826 
1827 	err = intel_context_pin(ce);
1828 	if (err) {
1829 		intel_context_put(ce);
1830 		return err;
1831 	}
1832 
1833 	busy = false;
1834 	if (intel_engine_supports_stats(engine)) {
1835 		p->busy = intel_engine_get_busy_time(engine);
1836 		busy = true;
1837 	}
1838 
1839 	p->time = ktime_get();
1840 	count = 0;
1841 	do {
1842 		struct i915_request *rq;
1843 
1844 		rq = i915_request_create(ce);
1845 		if (IS_ERR(rq)) {
1846 			err = PTR_ERR(rq);
1847 			break;
1848 		}
1849 
1850 		i915_request_get(rq);
1851 		i915_request_add(rq);
1852 
1853 		err = 0;
1854 		if (prev && i915_request_wait(prev, 0, HZ / 5) < 0)
1855 			err = -ETIME;
1856 		i915_request_put(prev);
1857 		prev = rq;
1858 		if (err)
1859 			break;
1860 
1861 		count++;
1862 	} while (!__igt_timeout(end_time, NULL));
1863 	i915_request_put(prev);
1864 	p->time = ktime_sub(ktime_get(), p->time);
1865 
1866 	if (busy) {
1867 		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
1868 				    p->busy);
1869 	}
1870 
1871 	err = switch_to_kernel_sync(ce, err);
1872 	p->runtime = intel_context_get_total_runtime_ns(ce);
1873 	p->count = count;
1874 
1875 	intel_context_unpin(ce);
1876 	intel_context_put(ce);
1877 	return err;
1878 }
1879 
1880 static int p_many(void *arg)
1881 {
1882 	struct perf_stats *p = arg;
1883 	struct intel_engine_cs *engine = p->engine;
1884 	struct intel_context *ce;
1885 	IGT_TIMEOUT(end_time);
1886 	unsigned long count;
1887 	int err = 0;
1888 	bool busy;
1889 
1890 	ce = intel_context_create(engine);
1891 	if (IS_ERR(ce))
1892 		return PTR_ERR(ce);
1893 
1894 	err = intel_context_pin(ce);
1895 	if (err) {
1896 		intel_context_put(ce);
1897 		return err;
1898 	}
1899 
1900 	busy = false;
1901 	if (intel_engine_supports_stats(engine)) {
1902 		p->busy = intel_engine_get_busy_time(engine);
1903 		busy = true;
1904 	}
1905 
1906 	count = 0;
1907 	p->time = ktime_get();
1908 	do {
1909 		struct i915_request *rq;
1910 
1911 		rq = i915_request_create(ce);
1912 		if (IS_ERR(rq)) {
1913 			err = PTR_ERR(rq);
1914 			break;
1915 		}
1916 
1917 		i915_request_add(rq);
1918 		count++;
1919 	} while (!__igt_timeout(end_time, NULL));
1920 	p->time = ktime_sub(ktime_get(), p->time);
1921 
1922 	if (busy) {
1923 		p->busy = ktime_sub(intel_engine_get_busy_time(engine),
1924 				    p->busy);
1925 	}
1926 
1927 	err = switch_to_kernel_sync(ce, err);
1928 	p->runtime = intel_context_get_total_runtime_ns(ce);
1929 	p->count = count;
1930 
1931 	intel_context_unpin(ce);
1932 	intel_context_put(ce);
1933 	return err;
1934 }
1935 
1936 static int perf_parallel_engines(void *arg)
1937 {
1938 	struct drm_i915_private *i915 = arg;
1939 	static int (* const func[])(void *arg) = {
1940 		p_sync0,
1941 		p_sync1,
1942 		p_many,
1943 		NULL,
1944 	};
1945 	const unsigned int nengines = num_uabi_engines(i915);
1946 	struct intel_engine_cs *engine;
1947 	int (* const *fn)(void *arg);
1948 	struct pm_qos_request qos;
1949 	struct {
1950 		struct perf_stats p;
1951 		struct task_struct *tsk;
1952 	} *engines;
1953 	int err = 0;
1954 
1955 	engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL);
1956 	if (!engines)
1957 		return -ENOMEM;
1958 
1959 	cpu_latency_qos_add_request(&qos, 0);
1960 
1961 	for (fn = func; *fn; fn++) {
1962 		char name[KSYM_NAME_LEN];
1963 		struct igt_live_test t;
1964 		unsigned int idx;
1965 
1966 		snprintf(name, sizeof(name), "%ps", *fn);
1967 		err = igt_live_test_begin(&t, i915, __func__, name);
1968 		if (err)
1969 			break;
1970 
1971 		atomic_set(&i915->selftest.counter, nengines);
1972 
1973 		idx = 0;
1974 		for_each_uabi_engine(engine, i915) {
1975 			intel_engine_pm_get(engine);
1976 
1977 			memset(&engines[idx].p, 0, sizeof(engines[idx].p));
1978 			engines[idx].p.engine = engine;
1979 
1980 			engines[idx].tsk = kthread_run(*fn, &engines[idx].p,
1981 						       "igt:%s", engine->name);
1982 			if (IS_ERR(engines[idx].tsk)) {
1983 				err = PTR_ERR(engines[idx].tsk);
1984 				intel_engine_pm_put(engine);
1985 				break;
1986 			}
1987 			get_task_struct(engines[idx++].tsk);
1988 		}
1989 
1990 		yield(); /* start all threads before we kthread_stop() */
1991 
1992 		idx = 0;
1993 		for_each_uabi_engine(engine, i915) {
1994 			int status;
1995 
1996 			if (IS_ERR(engines[idx].tsk))
1997 				break;
1998 
1999 			status = kthread_stop(engines[idx].tsk);
2000 			if (status && !err)
2001 				err = status;
2002 
2003 			intel_engine_pm_put(engine);
2004 			put_task_struct(engines[idx++].tsk);
2005 		}
2006 
2007 		if (igt_live_test_end(&t))
2008 			err = -EIO;
2009 		if (err)
2010 			break;
2011 
2012 		idx = 0;
2013 		for_each_uabi_engine(engine, i915) {
2014 			struct perf_stats *p = &engines[idx].p;
2015 			u64 busy = 100 * ktime_to_ns(p->busy);
2016 			u64 dt = ktime_to_ns(p->time);
2017 			int integer, decimal;
2018 
2019 			if (dt) {
2020 				integer = div64_u64(busy, dt);
2021 				busy -= integer * dt;
2022 				decimal = div64_u64(100 * busy, dt);
2023 			} else {
2024 				integer = 0;
2025 				decimal = 0;
2026 			}
2027 
2028 			GEM_BUG_ON(engine != p->engine);
2029 			pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n",
2030 				name, engine->name, p->count, integer, decimal,
2031 				div_u64(p->runtime, 1000 * 1000),
2032 				div_u64(ktime_to_ns(p->time), 1000 * 1000));
2033 			idx++;
2034 		}
2035 	}
2036 
2037 	cpu_latency_qos_remove_request(&qos);
2038 	kfree(engines);
2039 	return err;
2040 }
2041 
2042 int i915_request_perf_selftests(struct drm_i915_private *i915)
2043 {
2044 	static const struct i915_subtest tests[] = {
2045 		SUBTEST(perf_series_engines),
2046 		SUBTEST(perf_parallel_engines),
2047 	};
2048 
2049 	if (intel_gt_is_wedged(&i915->gt))
2050 		return 0;
2051 
2052 	return i915_subtests(tests, i915);
2053 }
2054