1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 
27 #include "gem/i915_gem_pm.h"
28 #include "gem/selftests/mock_context.h"
29 
30 #include "gt/intel_gt.h"
31 
32 #include "i915_random.h"
33 #include "i915_selftest.h"
34 #include "igt_live_test.h"
35 #include "lib_sw_fence.h"
36 
37 #include "mock_drm.h"
38 #include "mock_gem_device.h"
39 
40 static int igt_add_request(void *arg)
41 {
42 	struct drm_i915_private *i915 = arg;
43 	struct i915_request *request;
44 	int err = -ENOMEM;
45 
46 	/* Basic preliminary test to create a request and let it loose! */
47 
48 	mutex_lock(&i915->drm.struct_mutex);
49 	request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
50 	if (!request)
51 		goto out_unlock;
52 
53 	i915_request_add(request);
54 
55 	err = 0;
56 out_unlock:
57 	mutex_unlock(&i915->drm.struct_mutex);
58 	return err;
59 }
60 
61 static int igt_wait_request(void *arg)
62 {
63 	const long T = HZ / 4;
64 	struct drm_i915_private *i915 = arg;
65 	struct i915_request *request;
66 	int err = -EINVAL;
67 
68 	/* Submit a request, then wait upon it */
69 
70 	mutex_lock(&i915->drm.struct_mutex);
71 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
72 	if (!request) {
73 		err = -ENOMEM;
74 		goto out_unlock;
75 	}
76 	i915_request_get(request);
77 
78 	if (i915_request_wait(request, 0, 0) != -ETIME) {
79 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
80 		goto out_request;
81 	}
82 
83 	if (i915_request_wait(request, 0, T) != -ETIME) {
84 		pr_err("request wait succeeded (expected timeout before submit!)\n");
85 		goto out_request;
86 	}
87 
88 	if (i915_request_completed(request)) {
89 		pr_err("request completed before submit!!\n");
90 		goto out_request;
91 	}
92 
93 	i915_request_add(request);
94 
95 	if (i915_request_wait(request, 0, 0) != -ETIME) {
96 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
97 		goto out_request;
98 	}
99 
100 	if (i915_request_completed(request)) {
101 		pr_err("request completed immediately!\n");
102 		goto out_request;
103 	}
104 
105 	if (i915_request_wait(request, 0, T / 2) != -ETIME) {
106 		pr_err("request wait succeeded (expected timeout!)\n");
107 		goto out_request;
108 	}
109 
110 	if (i915_request_wait(request, 0, T) == -ETIME) {
111 		pr_err("request wait timed out!\n");
112 		goto out_request;
113 	}
114 
115 	if (!i915_request_completed(request)) {
116 		pr_err("request not complete after waiting!\n");
117 		goto out_request;
118 	}
119 
120 	if (i915_request_wait(request, 0, T) == -ETIME) {
121 		pr_err("request wait timed out when already complete!\n");
122 		goto out_request;
123 	}
124 
125 	err = 0;
126 out_request:
127 	i915_request_put(request);
128 out_unlock:
129 	mock_device_flush(i915);
130 	mutex_unlock(&i915->drm.struct_mutex);
131 	return err;
132 }
133 
134 static int igt_fence_wait(void *arg)
135 {
136 	const long T = HZ / 4;
137 	struct drm_i915_private *i915 = arg;
138 	struct i915_request *request;
139 	int err = -EINVAL;
140 
141 	/* Submit a request, treat it as a fence and wait upon it */
142 
143 	mutex_lock(&i915->drm.struct_mutex);
144 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
145 	if (!request) {
146 		err = -ENOMEM;
147 		goto out_locked;
148 	}
149 
150 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
151 		pr_err("fence wait success before submit (expected timeout)!\n");
152 		goto out_locked;
153 	}
154 
155 	i915_request_add(request);
156 	mutex_unlock(&i915->drm.struct_mutex);
157 
158 	if (dma_fence_is_signaled(&request->fence)) {
159 		pr_err("fence signaled immediately!\n");
160 		goto out_device;
161 	}
162 
163 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
164 		pr_err("fence wait success after submit (expected timeout)!\n");
165 		goto out_device;
166 	}
167 
168 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
169 		pr_err("fence wait timed out (expected success)!\n");
170 		goto out_device;
171 	}
172 
173 	if (!dma_fence_is_signaled(&request->fence)) {
174 		pr_err("fence unsignaled after waiting!\n");
175 		goto out_device;
176 	}
177 
178 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
179 		pr_err("fence wait timed out when complete (expected success)!\n");
180 		goto out_device;
181 	}
182 
183 	err = 0;
184 out_device:
185 	mutex_lock(&i915->drm.struct_mutex);
186 out_locked:
187 	mock_device_flush(i915);
188 	mutex_unlock(&i915->drm.struct_mutex);
189 	return err;
190 }
191 
192 static int igt_request_rewind(void *arg)
193 {
194 	struct drm_i915_private *i915 = arg;
195 	struct i915_request *request, *vip;
196 	struct i915_gem_context *ctx[2];
197 	struct intel_context *ce;
198 	int err = -EINVAL;
199 
200 	mutex_lock(&i915->drm.struct_mutex);
201 	ctx[0] = mock_context(i915, "A");
202 	ce = i915_gem_context_get_engine(ctx[0], RCS0);
203 	GEM_BUG_ON(IS_ERR(ce));
204 	request = mock_request(ce, 2 * HZ);
205 	intel_context_put(ce);
206 	if (!request) {
207 		err = -ENOMEM;
208 		goto err_context_0;
209 	}
210 
211 	i915_request_get(request);
212 	i915_request_add(request);
213 
214 	ctx[1] = mock_context(i915, "B");
215 	ce = i915_gem_context_get_engine(ctx[1], RCS0);
216 	GEM_BUG_ON(IS_ERR(ce));
217 	vip = mock_request(ce, 0);
218 	intel_context_put(ce);
219 	if (!vip) {
220 		err = -ENOMEM;
221 		goto err_context_1;
222 	}
223 
224 	/* Simulate preemption by manual reordering */
225 	if (!mock_cancel_request(request)) {
226 		pr_err("failed to cancel request (already executed)!\n");
227 		i915_request_add(vip);
228 		goto err_context_1;
229 	}
230 	i915_request_get(vip);
231 	i915_request_add(vip);
232 	rcu_read_lock();
233 	request->engine->submit_request(request);
234 	rcu_read_unlock();
235 
236 	mutex_unlock(&i915->drm.struct_mutex);
237 
238 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
239 		pr_err("timed out waiting for high priority request\n");
240 		goto err;
241 	}
242 
243 	if (i915_request_completed(request)) {
244 		pr_err("low priority request already completed\n");
245 		goto err;
246 	}
247 
248 	err = 0;
249 err:
250 	i915_request_put(vip);
251 	mutex_lock(&i915->drm.struct_mutex);
252 err_context_1:
253 	mock_context_close(ctx[1]);
254 	i915_request_put(request);
255 err_context_0:
256 	mock_context_close(ctx[0]);
257 	mock_device_flush(i915);
258 	mutex_unlock(&i915->drm.struct_mutex);
259 	return err;
260 }
261 
262 struct smoketest {
263 	struct intel_engine_cs *engine;
264 	struct i915_gem_context **contexts;
265 	atomic_long_t num_waits, num_fences;
266 	int ncontexts, max_batch;
267 	struct i915_request *(*request_alloc)(struct intel_context *ce);
268 };
269 
270 static struct i915_request *
271 __mock_request_alloc(struct intel_context *ce)
272 {
273 	return mock_request(ce, 0);
274 }
275 
276 static struct i915_request *
277 __live_request_alloc(struct intel_context *ce)
278 {
279 	return intel_context_create_request(ce);
280 }
281 
282 static int __igt_breadcrumbs_smoketest(void *arg)
283 {
284 	struct smoketest *t = arg;
285 	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
286 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
287 	const unsigned int total = 4 * t->ncontexts + 1;
288 	unsigned int num_waits = 0, num_fences = 0;
289 	struct i915_request **requests;
290 	I915_RND_STATE(prng);
291 	unsigned int *order;
292 	int err = 0;
293 
294 	/*
295 	 * A very simple test to catch the most egregious of list handling bugs.
296 	 *
297 	 * At its heart, we simply create oodles of requests running across
298 	 * multiple kthreads and enable signaling on them, for the sole purpose
299 	 * of stressing our breadcrumb handling. The only inspection we do is
300 	 * that the fences were marked as signaled.
301 	 */
302 
303 	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
304 	if (!requests)
305 		return -ENOMEM;
306 
307 	order = i915_random_order(total, &prng);
308 	if (!order) {
309 		err = -ENOMEM;
310 		goto out_requests;
311 	}
312 
313 	while (!kthread_should_stop()) {
314 		struct i915_sw_fence *submit, *wait;
315 		unsigned int n, count;
316 
317 		submit = heap_fence_create(GFP_KERNEL);
318 		if (!submit) {
319 			err = -ENOMEM;
320 			break;
321 		}
322 
323 		wait = heap_fence_create(GFP_KERNEL);
324 		if (!wait) {
325 			i915_sw_fence_commit(submit);
326 			heap_fence_put(submit);
327 			err = ENOMEM;
328 			break;
329 		}
330 
331 		i915_random_reorder(order, total, &prng);
332 		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
333 
334 		for (n = 0; n < count; n++) {
335 			struct i915_gem_context *ctx =
336 				t->contexts[order[n] % t->ncontexts];
337 			struct i915_request *rq;
338 			struct intel_context *ce;
339 
340 			mutex_lock(BKL);
341 
342 			ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
343 			GEM_BUG_ON(IS_ERR(ce));
344 			rq = t->request_alloc(ce);
345 			intel_context_put(ce);
346 			if (IS_ERR(rq)) {
347 				mutex_unlock(BKL);
348 				err = PTR_ERR(rq);
349 				count = n;
350 				break;
351 			}
352 
353 			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
354 							       submit,
355 							       GFP_KERNEL);
356 
357 			requests[n] = i915_request_get(rq);
358 			i915_request_add(rq);
359 
360 			mutex_unlock(BKL);
361 
362 			if (err >= 0)
363 				err = i915_sw_fence_await_dma_fence(wait,
364 								    &rq->fence,
365 								    0,
366 								    GFP_KERNEL);
367 
368 			if (err < 0) {
369 				i915_request_put(rq);
370 				count = n;
371 				break;
372 			}
373 		}
374 
375 		i915_sw_fence_commit(submit);
376 		i915_sw_fence_commit(wait);
377 
378 		if (!wait_event_timeout(wait->wait,
379 					i915_sw_fence_done(wait),
380 					5 * HZ)) {
381 			struct i915_request *rq = requests[count - 1];
382 
383 			pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
384 			       atomic_read(&wait->pending), count,
385 			       rq->fence.context, rq->fence.seqno,
386 			       t->engine->name);
387 			GEM_TRACE_DUMP();
388 
389 			intel_gt_set_wedged(t->engine->gt);
390 			GEM_BUG_ON(!i915_request_completed(rq));
391 			i915_sw_fence_wait(wait);
392 			err = -EIO;
393 		}
394 
395 		for (n = 0; n < count; n++) {
396 			struct i915_request *rq = requests[n];
397 
398 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
399 				      &rq->fence.flags)) {
400 				pr_err("%llu:%llu was not signaled!\n",
401 				       rq->fence.context, rq->fence.seqno);
402 				err = -EINVAL;
403 			}
404 
405 			i915_request_put(rq);
406 		}
407 
408 		heap_fence_put(wait);
409 		heap_fence_put(submit);
410 
411 		if (err < 0)
412 			break;
413 
414 		num_fences += count;
415 		num_waits++;
416 
417 		cond_resched();
418 	}
419 
420 	atomic_long_add(num_fences, &t->num_fences);
421 	atomic_long_add(num_waits, &t->num_waits);
422 
423 	kfree(order);
424 out_requests:
425 	kfree(requests);
426 	return err;
427 }
428 
429 static int mock_breadcrumbs_smoketest(void *arg)
430 {
431 	struct drm_i915_private *i915 = arg;
432 	struct smoketest t = {
433 		.engine = i915->engine[RCS0],
434 		.ncontexts = 1024,
435 		.max_batch = 1024,
436 		.request_alloc = __mock_request_alloc
437 	};
438 	unsigned int ncpus = num_online_cpus();
439 	struct task_struct **threads;
440 	unsigned int n;
441 	int ret = 0;
442 
443 	/*
444 	 * Smoketest our breadcrumb/signal handling for requests across multiple
445 	 * threads. A very simple test to only catch the most egregious of bugs.
446 	 * See __igt_breadcrumbs_smoketest();
447 	 */
448 
449 	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
450 	if (!threads)
451 		return -ENOMEM;
452 
453 	t.contexts =
454 		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
455 	if (!t.contexts) {
456 		ret = -ENOMEM;
457 		goto out_threads;
458 	}
459 
460 	mutex_lock(&t.engine->i915->drm.struct_mutex);
461 	for (n = 0; n < t.ncontexts; n++) {
462 		t.contexts[n] = mock_context(t.engine->i915, "mock");
463 		if (!t.contexts[n]) {
464 			ret = -ENOMEM;
465 			goto out_contexts;
466 		}
467 	}
468 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
469 
470 	for (n = 0; n < ncpus; n++) {
471 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
472 					 &t, "igt/%d", n);
473 		if (IS_ERR(threads[n])) {
474 			ret = PTR_ERR(threads[n]);
475 			ncpus = n;
476 			break;
477 		}
478 
479 		get_task_struct(threads[n]);
480 	}
481 
482 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
483 
484 	for (n = 0; n < ncpus; n++) {
485 		int err;
486 
487 		err = kthread_stop(threads[n]);
488 		if (err < 0 && !ret)
489 			ret = err;
490 
491 		put_task_struct(threads[n]);
492 	}
493 	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
494 		atomic_long_read(&t.num_waits),
495 		atomic_long_read(&t.num_fences),
496 		ncpus);
497 
498 	mutex_lock(&t.engine->i915->drm.struct_mutex);
499 out_contexts:
500 	for (n = 0; n < t.ncontexts; n++) {
501 		if (!t.contexts[n])
502 			break;
503 		mock_context_close(t.contexts[n]);
504 	}
505 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
506 	kfree(t.contexts);
507 out_threads:
508 	kfree(threads);
509 
510 	return ret;
511 }
512 
513 int i915_request_mock_selftests(void)
514 {
515 	static const struct i915_subtest tests[] = {
516 		SUBTEST(igt_add_request),
517 		SUBTEST(igt_wait_request),
518 		SUBTEST(igt_fence_wait),
519 		SUBTEST(igt_request_rewind),
520 		SUBTEST(mock_breadcrumbs_smoketest),
521 	};
522 	struct drm_i915_private *i915;
523 	intel_wakeref_t wakeref;
524 	int err = 0;
525 
526 	i915 = mock_gem_device();
527 	if (!i915)
528 		return -ENOMEM;
529 
530 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
531 		err = i915_subtests(tests, i915);
532 
533 	drm_dev_put(&i915->drm);
534 
535 	return err;
536 }
537 
538 static int live_nop_request(void *arg)
539 {
540 	struct drm_i915_private *i915 = arg;
541 	struct intel_engine_cs *engine;
542 	intel_wakeref_t wakeref;
543 	struct igt_live_test t;
544 	unsigned int id;
545 	int err = -ENODEV;
546 
547 	/* Submit various sized batches of empty requests, to each engine
548 	 * (individually), and wait for the batch to complete. We can check
549 	 * the overhead of submitting requests to the hardware.
550 	 */
551 
552 	mutex_lock(&i915->drm.struct_mutex);
553 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
554 
555 	for_each_engine(engine, i915, id) {
556 		struct i915_request *request = NULL;
557 		unsigned long n, prime;
558 		IGT_TIMEOUT(end_time);
559 		ktime_t times[2] = {};
560 
561 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
562 		if (err)
563 			goto out_unlock;
564 
565 		for_each_prime_number_from(prime, 1, 8192) {
566 			times[1] = ktime_get_raw();
567 
568 			for (n = 0; n < prime; n++) {
569 				request = i915_request_create(engine->kernel_context);
570 				if (IS_ERR(request)) {
571 					err = PTR_ERR(request);
572 					goto out_unlock;
573 				}
574 
575 				/* This space is left intentionally blank.
576 				 *
577 				 * We do not actually want to perform any
578 				 * action with this request, we just want
579 				 * to measure the latency in allocation
580 				 * and submission of our breadcrumbs -
581 				 * ensuring that the bare request is sufficient
582 				 * for the system to work (i.e. proper HEAD
583 				 * tracking of the rings, interrupt handling,
584 				 * etc). It also gives us the lowest bounds
585 				 * for latency.
586 				 */
587 
588 				i915_request_add(request);
589 			}
590 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
591 
592 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
593 			if (prime == 1)
594 				times[0] = times[1];
595 
596 			if (__igt_timeout(end_time, NULL))
597 				break;
598 		}
599 
600 		err = igt_live_test_end(&t);
601 		if (err)
602 			goto out_unlock;
603 
604 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
605 			engine->name,
606 			ktime_to_ns(times[0]),
607 			prime, div64_u64(ktime_to_ns(times[1]), prime));
608 	}
609 
610 out_unlock:
611 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
612 	mutex_unlock(&i915->drm.struct_mutex);
613 	return err;
614 }
615 
616 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
617 {
618 	struct drm_i915_gem_object *obj;
619 	struct i915_vma *vma;
620 	u32 *cmd;
621 	int err;
622 
623 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
624 	if (IS_ERR(obj))
625 		return ERR_CAST(obj);
626 
627 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
628 	if (IS_ERR(cmd)) {
629 		err = PTR_ERR(cmd);
630 		goto err;
631 	}
632 
633 	*cmd = MI_BATCH_BUFFER_END;
634 
635 	__i915_gem_object_flush_map(obj, 0, 64);
636 	i915_gem_object_unpin_map(obj);
637 
638 	intel_gt_chipset_flush(&i915->gt);
639 
640 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
641 	if (IS_ERR(vma)) {
642 		err = PTR_ERR(vma);
643 		goto err;
644 	}
645 
646 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
647 	if (err)
648 		goto err;
649 
650 	return vma;
651 
652 err:
653 	i915_gem_object_put(obj);
654 	return ERR_PTR(err);
655 }
656 
657 static struct i915_request *
658 empty_request(struct intel_engine_cs *engine,
659 	      struct i915_vma *batch)
660 {
661 	struct i915_request *request;
662 	int err;
663 
664 	request = i915_request_create(engine->kernel_context);
665 	if (IS_ERR(request))
666 		return request;
667 
668 	err = engine->emit_bb_start(request,
669 				    batch->node.start,
670 				    batch->node.size,
671 				    I915_DISPATCH_SECURE);
672 	if (err)
673 		goto out_request;
674 
675 out_request:
676 	i915_request_add(request);
677 	return err ? ERR_PTR(err) : request;
678 }
679 
680 static int live_empty_request(void *arg)
681 {
682 	struct drm_i915_private *i915 = arg;
683 	struct intel_engine_cs *engine;
684 	intel_wakeref_t wakeref;
685 	struct igt_live_test t;
686 	struct i915_vma *batch;
687 	unsigned int id;
688 	int err = 0;
689 
690 	/* Submit various sized batches of empty requests, to each engine
691 	 * (individually), and wait for the batch to complete. We can check
692 	 * the overhead of submitting requests to the hardware.
693 	 */
694 
695 	mutex_lock(&i915->drm.struct_mutex);
696 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
697 
698 	batch = empty_batch(i915);
699 	if (IS_ERR(batch)) {
700 		err = PTR_ERR(batch);
701 		goto out_unlock;
702 	}
703 
704 	for_each_engine(engine, i915, id) {
705 		IGT_TIMEOUT(end_time);
706 		struct i915_request *request;
707 		unsigned long n, prime;
708 		ktime_t times[2] = {};
709 
710 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
711 		if (err)
712 			goto out_batch;
713 
714 		/* Warmup / preload */
715 		request = empty_request(engine, batch);
716 		if (IS_ERR(request)) {
717 			err = PTR_ERR(request);
718 			goto out_batch;
719 		}
720 		i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
721 
722 		for_each_prime_number_from(prime, 1, 8192) {
723 			times[1] = ktime_get_raw();
724 
725 			for (n = 0; n < prime; n++) {
726 				request = empty_request(engine, batch);
727 				if (IS_ERR(request)) {
728 					err = PTR_ERR(request);
729 					goto out_batch;
730 				}
731 			}
732 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
733 
734 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
735 			if (prime == 1)
736 				times[0] = times[1];
737 
738 			if (__igt_timeout(end_time, NULL))
739 				break;
740 		}
741 
742 		err = igt_live_test_end(&t);
743 		if (err)
744 			goto out_batch;
745 
746 		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
747 			engine->name,
748 			ktime_to_ns(times[0]),
749 			prime, div64_u64(ktime_to_ns(times[1]), prime));
750 	}
751 
752 out_batch:
753 	i915_vma_unpin(batch);
754 	i915_vma_put(batch);
755 out_unlock:
756 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
757 	mutex_unlock(&i915->drm.struct_mutex);
758 	return err;
759 }
760 
761 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
762 {
763 	struct i915_gem_context *ctx = i915->kernel_context;
764 	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
765 	struct drm_i915_gem_object *obj;
766 	const int gen = INTEL_GEN(i915);
767 	struct i915_vma *vma;
768 	u32 *cmd;
769 	int err;
770 
771 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
772 	if (IS_ERR(obj))
773 		return ERR_CAST(obj);
774 
775 	vma = i915_vma_instance(obj, vm, NULL);
776 	if (IS_ERR(vma)) {
777 		err = PTR_ERR(vma);
778 		goto err;
779 	}
780 
781 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
782 	if (err)
783 		goto err;
784 
785 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
786 	if (IS_ERR(cmd)) {
787 		err = PTR_ERR(cmd);
788 		goto err;
789 	}
790 
791 	if (gen >= 8) {
792 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
793 		*cmd++ = lower_32_bits(vma->node.start);
794 		*cmd++ = upper_32_bits(vma->node.start);
795 	} else if (gen >= 6) {
796 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
797 		*cmd++ = lower_32_bits(vma->node.start);
798 	} else {
799 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
800 		*cmd++ = lower_32_bits(vma->node.start);
801 	}
802 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
803 
804 	__i915_gem_object_flush_map(obj, 0, 64);
805 	i915_gem_object_unpin_map(obj);
806 
807 	intel_gt_chipset_flush(&i915->gt);
808 
809 	return vma;
810 
811 err:
812 	i915_gem_object_put(obj);
813 	return ERR_PTR(err);
814 }
815 
816 static int recursive_batch_resolve(struct i915_vma *batch)
817 {
818 	u32 *cmd;
819 
820 	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
821 	if (IS_ERR(cmd))
822 		return PTR_ERR(cmd);
823 
824 	*cmd = MI_BATCH_BUFFER_END;
825 	intel_gt_chipset_flush(batch->vm->gt);
826 
827 	i915_gem_object_unpin_map(batch->obj);
828 
829 	return 0;
830 }
831 
832 static int live_all_engines(void *arg)
833 {
834 	struct drm_i915_private *i915 = arg;
835 	struct intel_engine_cs *engine;
836 	struct i915_request *request[I915_NUM_ENGINES];
837 	intel_wakeref_t wakeref;
838 	struct igt_live_test t;
839 	struct i915_vma *batch;
840 	unsigned int id;
841 	int err;
842 
843 	/* Check we can submit requests to all engines simultaneously. We
844 	 * send a recursive batch to each engine - checking that we don't
845 	 * block doing so, and that they don't complete too soon.
846 	 */
847 
848 	mutex_lock(&i915->drm.struct_mutex);
849 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
850 
851 	err = igt_live_test_begin(&t, i915, __func__, "");
852 	if (err)
853 		goto out_unlock;
854 
855 	batch = recursive_batch(i915);
856 	if (IS_ERR(batch)) {
857 		err = PTR_ERR(batch);
858 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
859 		goto out_unlock;
860 	}
861 
862 	for_each_engine(engine, i915, id) {
863 		request[id] = i915_request_create(engine->kernel_context);
864 		if (IS_ERR(request[id])) {
865 			err = PTR_ERR(request[id]);
866 			pr_err("%s: Request allocation failed with err=%d\n",
867 			       __func__, err);
868 			goto out_request;
869 		}
870 
871 		err = engine->emit_bb_start(request[id],
872 					    batch->node.start,
873 					    batch->node.size,
874 					    0);
875 		GEM_BUG_ON(err);
876 		request[id]->batch = batch;
877 
878 		i915_vma_lock(batch);
879 		err = i915_request_await_object(request[id], batch->obj, 0);
880 		if (err == 0)
881 			err = i915_vma_move_to_active(batch, request[id], 0);
882 		i915_vma_unlock(batch);
883 		GEM_BUG_ON(err);
884 
885 		i915_request_get(request[id]);
886 		i915_request_add(request[id]);
887 	}
888 
889 	for_each_engine(engine, i915, id) {
890 		if (i915_request_completed(request[id])) {
891 			pr_err("%s(%s): request completed too early!\n",
892 			       __func__, engine->name);
893 			err = -EINVAL;
894 			goto out_request;
895 		}
896 	}
897 
898 	err = recursive_batch_resolve(batch);
899 	if (err) {
900 		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
901 		goto out_request;
902 	}
903 
904 	for_each_engine(engine, i915, id) {
905 		long timeout;
906 
907 		timeout = i915_request_wait(request[id], 0,
908 					    MAX_SCHEDULE_TIMEOUT);
909 		if (timeout < 0) {
910 			err = timeout;
911 			pr_err("%s: error waiting for request on %s, err=%d\n",
912 			       __func__, engine->name, err);
913 			goto out_request;
914 		}
915 
916 		GEM_BUG_ON(!i915_request_completed(request[id]));
917 		i915_request_put(request[id]);
918 		request[id] = NULL;
919 	}
920 
921 	err = igt_live_test_end(&t);
922 
923 out_request:
924 	for_each_engine(engine, i915, id)
925 		if (request[id])
926 			i915_request_put(request[id]);
927 	i915_vma_unpin(batch);
928 	i915_vma_put(batch);
929 out_unlock:
930 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
931 	mutex_unlock(&i915->drm.struct_mutex);
932 	return err;
933 }
934 
935 static int live_sequential_engines(void *arg)
936 {
937 	struct drm_i915_private *i915 = arg;
938 	struct i915_request *request[I915_NUM_ENGINES] = {};
939 	struct i915_request *prev = NULL;
940 	struct intel_engine_cs *engine;
941 	intel_wakeref_t wakeref;
942 	struct igt_live_test t;
943 	unsigned int id;
944 	int err;
945 
946 	/* Check we can submit requests to all engines sequentially, such
947 	 * that each successive request waits for the earlier ones. This
948 	 * tests that we don't execute requests out of order, even though
949 	 * they are running on independent engines.
950 	 */
951 
952 	mutex_lock(&i915->drm.struct_mutex);
953 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
954 
955 	err = igt_live_test_begin(&t, i915, __func__, "");
956 	if (err)
957 		goto out_unlock;
958 
959 	for_each_engine(engine, i915, id) {
960 		struct i915_vma *batch;
961 
962 		batch = recursive_batch(i915);
963 		if (IS_ERR(batch)) {
964 			err = PTR_ERR(batch);
965 			pr_err("%s: Unable to create batch for %s, err=%d\n",
966 			       __func__, engine->name, err);
967 			goto out_unlock;
968 		}
969 
970 		request[id] = i915_request_create(engine->kernel_context);
971 		if (IS_ERR(request[id])) {
972 			err = PTR_ERR(request[id]);
973 			pr_err("%s: Request allocation failed for %s with err=%d\n",
974 			       __func__, engine->name, err);
975 			goto out_request;
976 		}
977 
978 		if (prev) {
979 			err = i915_request_await_dma_fence(request[id],
980 							   &prev->fence);
981 			if (err) {
982 				i915_request_add(request[id]);
983 				pr_err("%s: Request await failed for %s with err=%d\n",
984 				       __func__, engine->name, err);
985 				goto out_request;
986 			}
987 		}
988 
989 		err = engine->emit_bb_start(request[id],
990 					    batch->node.start,
991 					    batch->node.size,
992 					    0);
993 		GEM_BUG_ON(err);
994 		request[id]->batch = batch;
995 
996 		i915_vma_lock(batch);
997 		err = i915_request_await_object(request[id], batch->obj, false);
998 		if (err == 0)
999 			err = i915_vma_move_to_active(batch, request[id], 0);
1000 		i915_vma_unlock(batch);
1001 		GEM_BUG_ON(err);
1002 
1003 		i915_request_get(request[id]);
1004 		i915_request_add(request[id]);
1005 
1006 		prev = request[id];
1007 	}
1008 
1009 	for_each_engine(engine, i915, id) {
1010 		long timeout;
1011 
1012 		if (i915_request_completed(request[id])) {
1013 			pr_err("%s(%s): request completed too early!\n",
1014 			       __func__, engine->name);
1015 			err = -EINVAL;
1016 			goto out_request;
1017 		}
1018 
1019 		err = recursive_batch_resolve(request[id]->batch);
1020 		if (err) {
1021 			pr_err("%s: failed to resolve batch, err=%d\n",
1022 			       __func__, err);
1023 			goto out_request;
1024 		}
1025 
1026 		timeout = i915_request_wait(request[id], 0,
1027 					    MAX_SCHEDULE_TIMEOUT);
1028 		if (timeout < 0) {
1029 			err = timeout;
1030 			pr_err("%s: error waiting for request on %s, err=%d\n",
1031 			       __func__, engine->name, err);
1032 			goto out_request;
1033 		}
1034 
1035 		GEM_BUG_ON(!i915_request_completed(request[id]));
1036 	}
1037 
1038 	err = igt_live_test_end(&t);
1039 
1040 out_request:
1041 	for_each_engine(engine, i915, id) {
1042 		u32 *cmd;
1043 
1044 		if (!request[id])
1045 			break;
1046 
1047 		cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1048 					      I915_MAP_WC);
1049 		if (!IS_ERR(cmd)) {
1050 			*cmd = MI_BATCH_BUFFER_END;
1051 			intel_gt_chipset_flush(engine->gt);
1052 
1053 			i915_gem_object_unpin_map(request[id]->batch->obj);
1054 		}
1055 
1056 		i915_vma_put(request[id]->batch);
1057 		i915_request_put(request[id]);
1058 	}
1059 out_unlock:
1060 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1061 	mutex_unlock(&i915->drm.struct_mutex);
1062 	return err;
1063 }
1064 
1065 static int
1066 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1067 {
1068 	struct i915_request *rq;
1069 	int ret;
1070 
1071 	/*
1072 	 * Before execlists, all contexts share the same ringbuffer. With
1073 	 * execlists, each context/engine has a separate ringbuffer and
1074 	 * for the purposes of this test, inexhaustible.
1075 	 *
1076 	 * For the global ringbuffer though, we have to be very careful
1077 	 * that we do not wrap while preventing the execution of requests
1078 	 * with a unsignaled fence.
1079 	 */
1080 	if (HAS_EXECLISTS(ctx->i915))
1081 		return INT_MAX;
1082 
1083 	rq = igt_request_alloc(ctx, engine);
1084 	if (IS_ERR(rq)) {
1085 		ret = PTR_ERR(rq);
1086 	} else {
1087 		int sz;
1088 
1089 		ret = rq->ring->size - rq->reserved_space;
1090 		i915_request_add(rq);
1091 
1092 		sz = rq->ring->emit - rq->head;
1093 		if (sz < 0)
1094 			sz += rq->ring->size;
1095 		ret /= sz;
1096 		ret /= 2; /* leave half spare, in case of emergency! */
1097 	}
1098 
1099 	return ret;
1100 }
1101 
1102 static int live_breadcrumbs_smoketest(void *arg)
1103 {
1104 	struct drm_i915_private *i915 = arg;
1105 	struct smoketest t[I915_NUM_ENGINES];
1106 	unsigned int ncpus = num_online_cpus();
1107 	unsigned long num_waits, num_fences;
1108 	struct intel_engine_cs *engine;
1109 	struct task_struct **threads;
1110 	struct igt_live_test live;
1111 	enum intel_engine_id id;
1112 	intel_wakeref_t wakeref;
1113 	struct drm_file *file;
1114 	unsigned int n;
1115 	int ret = 0;
1116 
1117 	/*
1118 	 * Smoketest our breadcrumb/signal handling for requests across multiple
1119 	 * threads. A very simple test to only catch the most egregious of bugs.
1120 	 * See __igt_breadcrumbs_smoketest();
1121 	 *
1122 	 * On real hardware this time.
1123 	 */
1124 
1125 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1126 
1127 	file = mock_file(i915);
1128 	if (IS_ERR(file)) {
1129 		ret = PTR_ERR(file);
1130 		goto out_rpm;
1131 	}
1132 
1133 	threads = kcalloc(ncpus * I915_NUM_ENGINES,
1134 			  sizeof(*threads),
1135 			  GFP_KERNEL);
1136 	if (!threads) {
1137 		ret = -ENOMEM;
1138 		goto out_file;
1139 	}
1140 
1141 	memset(&t[0], 0, sizeof(t[0]));
1142 	t[0].request_alloc = __live_request_alloc;
1143 	t[0].ncontexts = 64;
1144 	t[0].contexts = kmalloc_array(t[0].ncontexts,
1145 				      sizeof(*t[0].contexts),
1146 				      GFP_KERNEL);
1147 	if (!t[0].contexts) {
1148 		ret = -ENOMEM;
1149 		goto out_threads;
1150 	}
1151 
1152 	mutex_lock(&i915->drm.struct_mutex);
1153 	for (n = 0; n < t[0].ncontexts; n++) {
1154 		t[0].contexts[n] = live_context(i915, file);
1155 		if (!t[0].contexts[n]) {
1156 			ret = -ENOMEM;
1157 			goto out_contexts;
1158 		}
1159 	}
1160 
1161 	ret = igt_live_test_begin(&live, i915, __func__, "");
1162 	if (ret)
1163 		goto out_contexts;
1164 
1165 	for_each_engine(engine, i915, id) {
1166 		t[id] = t[0];
1167 		t[id].engine = engine;
1168 		t[id].max_batch = max_batches(t[0].contexts[0], engine);
1169 		if (t[id].max_batch < 0) {
1170 			ret = t[id].max_batch;
1171 			mutex_unlock(&i915->drm.struct_mutex);
1172 			goto out_flush;
1173 		}
1174 		/* One ring interleaved between requests from all cpus */
1175 		t[id].max_batch /= num_online_cpus() + 1;
1176 		pr_debug("Limiting batches to %d requests on %s\n",
1177 			 t[id].max_batch, engine->name);
1178 
1179 		for (n = 0; n < ncpus; n++) {
1180 			struct task_struct *tsk;
1181 
1182 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1183 					  &t[id], "igt/%d.%d", id, n);
1184 			if (IS_ERR(tsk)) {
1185 				ret = PTR_ERR(tsk);
1186 				mutex_unlock(&i915->drm.struct_mutex);
1187 				goto out_flush;
1188 			}
1189 
1190 			get_task_struct(tsk);
1191 			threads[id * ncpus + n] = tsk;
1192 		}
1193 	}
1194 	mutex_unlock(&i915->drm.struct_mutex);
1195 
1196 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1197 
1198 out_flush:
1199 	num_waits = 0;
1200 	num_fences = 0;
1201 	for_each_engine(engine, i915, id) {
1202 		for (n = 0; n < ncpus; n++) {
1203 			struct task_struct *tsk = threads[id * ncpus + n];
1204 			int err;
1205 
1206 			if (!tsk)
1207 				continue;
1208 
1209 			err = kthread_stop(tsk);
1210 			if (err < 0 && !ret)
1211 				ret = err;
1212 
1213 			put_task_struct(tsk);
1214 		}
1215 
1216 		num_waits += atomic_long_read(&t[id].num_waits);
1217 		num_fences += atomic_long_read(&t[id].num_fences);
1218 	}
1219 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1220 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1221 
1222 	mutex_lock(&i915->drm.struct_mutex);
1223 	ret = igt_live_test_end(&live) ?: ret;
1224 out_contexts:
1225 	mutex_unlock(&i915->drm.struct_mutex);
1226 	kfree(t[0].contexts);
1227 out_threads:
1228 	kfree(threads);
1229 out_file:
1230 	mock_file_free(i915, file);
1231 out_rpm:
1232 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1233 
1234 	return ret;
1235 }
1236 
1237 int i915_request_live_selftests(struct drm_i915_private *i915)
1238 {
1239 	static const struct i915_subtest tests[] = {
1240 		SUBTEST(live_nop_request),
1241 		SUBTEST(live_all_engines),
1242 		SUBTEST(live_sequential_engines),
1243 		SUBTEST(live_empty_request),
1244 		SUBTEST(live_breadcrumbs_smoketest),
1245 	};
1246 
1247 	if (intel_gt_is_wedged(&i915->gt))
1248 		return 0;
1249 
1250 	return i915_subtests(tests, i915);
1251 }
1252