1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 
27 #include "gem/i915_gem_pm.h"
28 #include "gem/selftests/mock_context.h"
29 
30 #include "gt/intel_gt.h"
31 
32 #include "i915_random.h"
33 #include "i915_selftest.h"
34 #include "igt_live_test.h"
35 #include "lib_sw_fence.h"
36 
37 #include "mock_drm.h"
38 #include "mock_gem_device.h"
39 
40 static int igt_add_request(void *arg)
41 {
42 	struct drm_i915_private *i915 = arg;
43 	struct i915_request *request;
44 	int err = -ENOMEM;
45 
46 	/* Basic preliminary test to create a request and let it loose! */
47 
48 	mutex_lock(&i915->drm.struct_mutex);
49 	request = mock_request(i915->engine[RCS0],
50 			       i915->kernel_context,
51 			       HZ / 10);
52 	if (!request)
53 		goto out_unlock;
54 
55 	i915_request_add(request);
56 
57 	err = 0;
58 out_unlock:
59 	mutex_unlock(&i915->drm.struct_mutex);
60 	return err;
61 }
62 
63 static int igt_wait_request(void *arg)
64 {
65 	const long T = HZ / 4;
66 	struct drm_i915_private *i915 = arg;
67 	struct i915_request *request;
68 	int err = -EINVAL;
69 
70 	/* Submit a request, then wait upon it */
71 
72 	mutex_lock(&i915->drm.struct_mutex);
73 	request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
74 	if (!request) {
75 		err = -ENOMEM;
76 		goto out_unlock;
77 	}
78 	i915_request_get(request);
79 
80 	if (i915_request_wait(request, 0, 0) != -ETIME) {
81 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
82 		goto out_request;
83 	}
84 
85 	if (i915_request_wait(request, 0, T) != -ETIME) {
86 		pr_err("request wait succeeded (expected timeout before submit!)\n");
87 		goto out_request;
88 	}
89 
90 	if (i915_request_completed(request)) {
91 		pr_err("request completed before submit!!\n");
92 		goto out_request;
93 	}
94 
95 	i915_request_add(request);
96 
97 	if (i915_request_wait(request, 0, 0) != -ETIME) {
98 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
99 		goto out_request;
100 	}
101 
102 	if (i915_request_completed(request)) {
103 		pr_err("request completed immediately!\n");
104 		goto out_request;
105 	}
106 
107 	if (i915_request_wait(request, 0, T / 2) != -ETIME) {
108 		pr_err("request wait succeeded (expected timeout!)\n");
109 		goto out_request;
110 	}
111 
112 	if (i915_request_wait(request, 0, T) == -ETIME) {
113 		pr_err("request wait timed out!\n");
114 		goto out_request;
115 	}
116 
117 	if (!i915_request_completed(request)) {
118 		pr_err("request not complete after waiting!\n");
119 		goto out_request;
120 	}
121 
122 	if (i915_request_wait(request, 0, T) == -ETIME) {
123 		pr_err("request wait timed out when already complete!\n");
124 		goto out_request;
125 	}
126 
127 	err = 0;
128 out_request:
129 	i915_request_put(request);
130 out_unlock:
131 	mock_device_flush(i915);
132 	mutex_unlock(&i915->drm.struct_mutex);
133 	return err;
134 }
135 
136 static int igt_fence_wait(void *arg)
137 {
138 	const long T = HZ / 4;
139 	struct drm_i915_private *i915 = arg;
140 	struct i915_request *request;
141 	int err = -EINVAL;
142 
143 	/* Submit a request, treat it as a fence and wait upon it */
144 
145 	mutex_lock(&i915->drm.struct_mutex);
146 	request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
147 	if (!request) {
148 		err = -ENOMEM;
149 		goto out_locked;
150 	}
151 
152 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
153 		pr_err("fence wait success before submit (expected timeout)!\n");
154 		goto out_locked;
155 	}
156 
157 	i915_request_add(request);
158 	mutex_unlock(&i915->drm.struct_mutex);
159 
160 	if (dma_fence_is_signaled(&request->fence)) {
161 		pr_err("fence signaled immediately!\n");
162 		goto out_device;
163 	}
164 
165 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
166 		pr_err("fence wait success after submit (expected timeout)!\n");
167 		goto out_device;
168 	}
169 
170 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
171 		pr_err("fence wait timed out (expected success)!\n");
172 		goto out_device;
173 	}
174 
175 	if (!dma_fence_is_signaled(&request->fence)) {
176 		pr_err("fence unsignaled after waiting!\n");
177 		goto out_device;
178 	}
179 
180 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
181 		pr_err("fence wait timed out when complete (expected success)!\n");
182 		goto out_device;
183 	}
184 
185 	err = 0;
186 out_device:
187 	mutex_lock(&i915->drm.struct_mutex);
188 out_locked:
189 	mock_device_flush(i915);
190 	mutex_unlock(&i915->drm.struct_mutex);
191 	return err;
192 }
193 
194 static int igt_request_rewind(void *arg)
195 {
196 	struct drm_i915_private *i915 = arg;
197 	struct i915_request *request, *vip;
198 	struct i915_gem_context *ctx[2];
199 	int err = -EINVAL;
200 
201 	mutex_lock(&i915->drm.struct_mutex);
202 	ctx[0] = mock_context(i915, "A");
203 	request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ);
204 	if (!request) {
205 		err = -ENOMEM;
206 		goto err_context_0;
207 	}
208 
209 	i915_request_get(request);
210 	i915_request_add(request);
211 
212 	ctx[1] = mock_context(i915, "B");
213 	vip = mock_request(i915->engine[RCS0], ctx[1], 0);
214 	if (!vip) {
215 		err = -ENOMEM;
216 		goto err_context_1;
217 	}
218 
219 	/* Simulate preemption by manual reordering */
220 	if (!mock_cancel_request(request)) {
221 		pr_err("failed to cancel request (already executed)!\n");
222 		i915_request_add(vip);
223 		goto err_context_1;
224 	}
225 	i915_request_get(vip);
226 	i915_request_add(vip);
227 	rcu_read_lock();
228 	request->engine->submit_request(request);
229 	rcu_read_unlock();
230 
231 	mutex_unlock(&i915->drm.struct_mutex);
232 
233 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
234 		pr_err("timed out waiting for high priority request\n");
235 		goto err;
236 	}
237 
238 	if (i915_request_completed(request)) {
239 		pr_err("low priority request already completed\n");
240 		goto err;
241 	}
242 
243 	err = 0;
244 err:
245 	i915_request_put(vip);
246 	mutex_lock(&i915->drm.struct_mutex);
247 err_context_1:
248 	mock_context_close(ctx[1]);
249 	i915_request_put(request);
250 err_context_0:
251 	mock_context_close(ctx[0]);
252 	mock_device_flush(i915);
253 	mutex_unlock(&i915->drm.struct_mutex);
254 	return err;
255 }
256 
257 struct smoketest {
258 	struct intel_engine_cs *engine;
259 	struct i915_gem_context **contexts;
260 	atomic_long_t num_waits, num_fences;
261 	int ncontexts, max_batch;
262 	struct i915_request *(*request_alloc)(struct i915_gem_context *,
263 					      struct intel_engine_cs *);
264 };
265 
266 static struct i915_request *
267 __mock_request_alloc(struct i915_gem_context *ctx,
268 		     struct intel_engine_cs *engine)
269 {
270 	return mock_request(engine, ctx, 0);
271 }
272 
273 static struct i915_request *
274 __live_request_alloc(struct i915_gem_context *ctx,
275 		     struct intel_engine_cs *engine)
276 {
277 	return igt_request_alloc(ctx, engine);
278 }
279 
280 static int __igt_breadcrumbs_smoketest(void *arg)
281 {
282 	struct smoketest *t = arg;
283 	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
284 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
285 	const unsigned int total = 4 * t->ncontexts + 1;
286 	unsigned int num_waits = 0, num_fences = 0;
287 	struct i915_request **requests;
288 	I915_RND_STATE(prng);
289 	unsigned int *order;
290 	int err = 0;
291 
292 	/*
293 	 * A very simple test to catch the most egregious of list handling bugs.
294 	 *
295 	 * At its heart, we simply create oodles of requests running across
296 	 * multiple kthreads and enable signaling on them, for the sole purpose
297 	 * of stressing our breadcrumb handling. The only inspection we do is
298 	 * that the fences were marked as signaled.
299 	 */
300 
301 	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
302 	if (!requests)
303 		return -ENOMEM;
304 
305 	order = i915_random_order(total, &prng);
306 	if (!order) {
307 		err = -ENOMEM;
308 		goto out_requests;
309 	}
310 
311 	while (!kthread_should_stop()) {
312 		struct i915_sw_fence *submit, *wait;
313 		unsigned int n, count;
314 
315 		submit = heap_fence_create(GFP_KERNEL);
316 		if (!submit) {
317 			err = -ENOMEM;
318 			break;
319 		}
320 
321 		wait = heap_fence_create(GFP_KERNEL);
322 		if (!wait) {
323 			i915_sw_fence_commit(submit);
324 			heap_fence_put(submit);
325 			err = ENOMEM;
326 			break;
327 		}
328 
329 		i915_random_reorder(order, total, &prng);
330 		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
331 
332 		for (n = 0; n < count; n++) {
333 			struct i915_gem_context *ctx =
334 				t->contexts[order[n] % t->ncontexts];
335 			struct i915_request *rq;
336 
337 			mutex_lock(BKL);
338 
339 			rq = t->request_alloc(ctx, t->engine);
340 			if (IS_ERR(rq)) {
341 				mutex_unlock(BKL);
342 				err = PTR_ERR(rq);
343 				count = n;
344 				break;
345 			}
346 
347 			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
348 							       submit,
349 							       GFP_KERNEL);
350 
351 			requests[n] = i915_request_get(rq);
352 			i915_request_add(rq);
353 
354 			mutex_unlock(BKL);
355 
356 			if (err >= 0)
357 				err = i915_sw_fence_await_dma_fence(wait,
358 								    &rq->fence,
359 								    0,
360 								    GFP_KERNEL);
361 
362 			if (err < 0) {
363 				i915_request_put(rq);
364 				count = n;
365 				break;
366 			}
367 		}
368 
369 		i915_sw_fence_commit(submit);
370 		i915_sw_fence_commit(wait);
371 
372 		if (!wait_event_timeout(wait->wait,
373 					i915_sw_fence_done(wait),
374 					5 * HZ)) {
375 			struct i915_request *rq = requests[count - 1];
376 
377 			pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
378 			       atomic_read(&wait->pending), count,
379 			       rq->fence.context, rq->fence.seqno,
380 			       t->engine->name);
381 			GEM_TRACE_DUMP();
382 
383 			intel_gt_set_wedged(t->engine->gt);
384 			GEM_BUG_ON(!i915_request_completed(rq));
385 			i915_sw_fence_wait(wait);
386 			err = -EIO;
387 		}
388 
389 		for (n = 0; n < count; n++) {
390 			struct i915_request *rq = requests[n];
391 
392 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
393 				      &rq->fence.flags)) {
394 				pr_err("%llu:%llu was not signaled!\n",
395 				       rq->fence.context, rq->fence.seqno);
396 				err = -EINVAL;
397 			}
398 
399 			i915_request_put(rq);
400 		}
401 
402 		heap_fence_put(wait);
403 		heap_fence_put(submit);
404 
405 		if (err < 0)
406 			break;
407 
408 		num_fences += count;
409 		num_waits++;
410 
411 		cond_resched();
412 	}
413 
414 	atomic_long_add(num_fences, &t->num_fences);
415 	atomic_long_add(num_waits, &t->num_waits);
416 
417 	kfree(order);
418 out_requests:
419 	kfree(requests);
420 	return err;
421 }
422 
423 static int mock_breadcrumbs_smoketest(void *arg)
424 {
425 	struct drm_i915_private *i915 = arg;
426 	struct smoketest t = {
427 		.engine = i915->engine[RCS0],
428 		.ncontexts = 1024,
429 		.max_batch = 1024,
430 		.request_alloc = __mock_request_alloc
431 	};
432 	unsigned int ncpus = num_online_cpus();
433 	struct task_struct **threads;
434 	unsigned int n;
435 	int ret = 0;
436 
437 	/*
438 	 * Smoketest our breadcrumb/signal handling for requests across multiple
439 	 * threads. A very simple test to only catch the most egregious of bugs.
440 	 * See __igt_breadcrumbs_smoketest();
441 	 */
442 
443 	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
444 	if (!threads)
445 		return -ENOMEM;
446 
447 	t.contexts =
448 		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
449 	if (!t.contexts) {
450 		ret = -ENOMEM;
451 		goto out_threads;
452 	}
453 
454 	mutex_lock(&t.engine->i915->drm.struct_mutex);
455 	for (n = 0; n < t.ncontexts; n++) {
456 		t.contexts[n] = mock_context(t.engine->i915, "mock");
457 		if (!t.contexts[n]) {
458 			ret = -ENOMEM;
459 			goto out_contexts;
460 		}
461 	}
462 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
463 
464 	for (n = 0; n < ncpus; n++) {
465 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
466 					 &t, "igt/%d", n);
467 		if (IS_ERR(threads[n])) {
468 			ret = PTR_ERR(threads[n]);
469 			ncpus = n;
470 			break;
471 		}
472 
473 		get_task_struct(threads[n]);
474 	}
475 
476 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
477 
478 	for (n = 0; n < ncpus; n++) {
479 		int err;
480 
481 		err = kthread_stop(threads[n]);
482 		if (err < 0 && !ret)
483 			ret = err;
484 
485 		put_task_struct(threads[n]);
486 	}
487 	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
488 		atomic_long_read(&t.num_waits),
489 		atomic_long_read(&t.num_fences),
490 		ncpus);
491 
492 	mutex_lock(&t.engine->i915->drm.struct_mutex);
493 out_contexts:
494 	for (n = 0; n < t.ncontexts; n++) {
495 		if (!t.contexts[n])
496 			break;
497 		mock_context_close(t.contexts[n]);
498 	}
499 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
500 	kfree(t.contexts);
501 out_threads:
502 	kfree(threads);
503 
504 	return ret;
505 }
506 
507 int i915_request_mock_selftests(void)
508 {
509 	static const struct i915_subtest tests[] = {
510 		SUBTEST(igt_add_request),
511 		SUBTEST(igt_wait_request),
512 		SUBTEST(igt_fence_wait),
513 		SUBTEST(igt_request_rewind),
514 		SUBTEST(mock_breadcrumbs_smoketest),
515 	};
516 	struct drm_i915_private *i915;
517 	intel_wakeref_t wakeref;
518 	int err = 0;
519 
520 	i915 = mock_gem_device();
521 	if (!i915)
522 		return -ENOMEM;
523 
524 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
525 		err = i915_subtests(tests, i915);
526 
527 	drm_dev_put(&i915->drm);
528 
529 	return err;
530 }
531 
532 static int live_nop_request(void *arg)
533 {
534 	struct drm_i915_private *i915 = arg;
535 	struct intel_engine_cs *engine;
536 	intel_wakeref_t wakeref;
537 	struct igt_live_test t;
538 	unsigned int id;
539 	int err = -ENODEV;
540 
541 	/* Submit various sized batches of empty requests, to each engine
542 	 * (individually), and wait for the batch to complete. We can check
543 	 * the overhead of submitting requests to the hardware.
544 	 */
545 
546 	mutex_lock(&i915->drm.struct_mutex);
547 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
548 
549 	for_each_engine(engine, i915, id) {
550 		struct i915_request *request = NULL;
551 		unsigned long n, prime;
552 		IGT_TIMEOUT(end_time);
553 		ktime_t times[2] = {};
554 
555 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
556 		if (err)
557 			goto out_unlock;
558 
559 		for_each_prime_number_from(prime, 1, 8192) {
560 			times[1] = ktime_get_raw();
561 
562 			for (n = 0; n < prime; n++) {
563 				request = i915_request_create(engine->kernel_context);
564 				if (IS_ERR(request)) {
565 					err = PTR_ERR(request);
566 					goto out_unlock;
567 				}
568 
569 				/* This space is left intentionally blank.
570 				 *
571 				 * We do not actually want to perform any
572 				 * action with this request, we just want
573 				 * to measure the latency in allocation
574 				 * and submission of our breadcrumbs -
575 				 * ensuring that the bare request is sufficient
576 				 * for the system to work (i.e. proper HEAD
577 				 * tracking of the rings, interrupt handling,
578 				 * etc). It also gives us the lowest bounds
579 				 * for latency.
580 				 */
581 
582 				i915_request_add(request);
583 			}
584 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
585 
586 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
587 			if (prime == 1)
588 				times[0] = times[1];
589 
590 			if (__igt_timeout(end_time, NULL))
591 				break;
592 		}
593 
594 		err = igt_live_test_end(&t);
595 		if (err)
596 			goto out_unlock;
597 
598 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
599 			engine->name,
600 			ktime_to_ns(times[0]),
601 			prime, div64_u64(ktime_to_ns(times[1]), prime));
602 	}
603 
604 out_unlock:
605 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
606 	mutex_unlock(&i915->drm.struct_mutex);
607 	return err;
608 }
609 
610 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
611 {
612 	struct drm_i915_gem_object *obj;
613 	struct i915_vma *vma;
614 	u32 *cmd;
615 	int err;
616 
617 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
618 	if (IS_ERR(obj))
619 		return ERR_CAST(obj);
620 
621 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
622 	if (IS_ERR(cmd)) {
623 		err = PTR_ERR(cmd);
624 		goto err;
625 	}
626 
627 	*cmd = MI_BATCH_BUFFER_END;
628 
629 	__i915_gem_object_flush_map(obj, 0, 64);
630 	i915_gem_object_unpin_map(obj);
631 
632 	intel_gt_chipset_flush(&i915->gt);
633 
634 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
635 	if (IS_ERR(vma)) {
636 		err = PTR_ERR(vma);
637 		goto err;
638 	}
639 
640 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
641 	if (err)
642 		goto err;
643 
644 	return vma;
645 
646 err:
647 	i915_gem_object_put(obj);
648 	return ERR_PTR(err);
649 }
650 
651 static struct i915_request *
652 empty_request(struct intel_engine_cs *engine,
653 	      struct i915_vma *batch)
654 {
655 	struct i915_request *request;
656 	int err;
657 
658 	request = i915_request_create(engine->kernel_context);
659 	if (IS_ERR(request))
660 		return request;
661 
662 	err = engine->emit_bb_start(request,
663 				    batch->node.start,
664 				    batch->node.size,
665 				    I915_DISPATCH_SECURE);
666 	if (err)
667 		goto out_request;
668 
669 out_request:
670 	i915_request_add(request);
671 	return err ? ERR_PTR(err) : request;
672 }
673 
674 static int live_empty_request(void *arg)
675 {
676 	struct drm_i915_private *i915 = arg;
677 	struct intel_engine_cs *engine;
678 	intel_wakeref_t wakeref;
679 	struct igt_live_test t;
680 	struct i915_vma *batch;
681 	unsigned int id;
682 	int err = 0;
683 
684 	/* Submit various sized batches of empty requests, to each engine
685 	 * (individually), and wait for the batch to complete. We can check
686 	 * the overhead of submitting requests to the hardware.
687 	 */
688 
689 	mutex_lock(&i915->drm.struct_mutex);
690 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
691 
692 	batch = empty_batch(i915);
693 	if (IS_ERR(batch)) {
694 		err = PTR_ERR(batch);
695 		goto out_unlock;
696 	}
697 
698 	for_each_engine(engine, i915, id) {
699 		IGT_TIMEOUT(end_time);
700 		struct i915_request *request;
701 		unsigned long n, prime;
702 		ktime_t times[2] = {};
703 
704 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
705 		if (err)
706 			goto out_batch;
707 
708 		/* Warmup / preload */
709 		request = empty_request(engine, batch);
710 		if (IS_ERR(request)) {
711 			err = PTR_ERR(request);
712 			goto out_batch;
713 		}
714 		i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
715 
716 		for_each_prime_number_from(prime, 1, 8192) {
717 			times[1] = ktime_get_raw();
718 
719 			for (n = 0; n < prime; n++) {
720 				request = empty_request(engine, batch);
721 				if (IS_ERR(request)) {
722 					err = PTR_ERR(request);
723 					goto out_batch;
724 				}
725 			}
726 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
727 
728 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
729 			if (prime == 1)
730 				times[0] = times[1];
731 
732 			if (__igt_timeout(end_time, NULL))
733 				break;
734 		}
735 
736 		err = igt_live_test_end(&t);
737 		if (err)
738 			goto out_batch;
739 
740 		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
741 			engine->name,
742 			ktime_to_ns(times[0]),
743 			prime, div64_u64(ktime_to_ns(times[1]), prime));
744 	}
745 
746 out_batch:
747 	i915_vma_unpin(batch);
748 	i915_vma_put(batch);
749 out_unlock:
750 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
751 	mutex_unlock(&i915->drm.struct_mutex);
752 	return err;
753 }
754 
755 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
756 {
757 	struct i915_gem_context *ctx = i915->kernel_context;
758 	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
759 	struct drm_i915_gem_object *obj;
760 	const int gen = INTEL_GEN(i915);
761 	struct i915_vma *vma;
762 	u32 *cmd;
763 	int err;
764 
765 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
766 	if (IS_ERR(obj))
767 		return ERR_CAST(obj);
768 
769 	vma = i915_vma_instance(obj, vm, NULL);
770 	if (IS_ERR(vma)) {
771 		err = PTR_ERR(vma);
772 		goto err;
773 	}
774 
775 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
776 	if (err)
777 		goto err;
778 
779 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
780 	if (IS_ERR(cmd)) {
781 		err = PTR_ERR(cmd);
782 		goto err;
783 	}
784 
785 	if (gen >= 8) {
786 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
787 		*cmd++ = lower_32_bits(vma->node.start);
788 		*cmd++ = upper_32_bits(vma->node.start);
789 	} else if (gen >= 6) {
790 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
791 		*cmd++ = lower_32_bits(vma->node.start);
792 	} else {
793 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
794 		*cmd++ = lower_32_bits(vma->node.start);
795 	}
796 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
797 
798 	__i915_gem_object_flush_map(obj, 0, 64);
799 	i915_gem_object_unpin_map(obj);
800 
801 	intel_gt_chipset_flush(&i915->gt);
802 
803 	return vma;
804 
805 err:
806 	i915_gem_object_put(obj);
807 	return ERR_PTR(err);
808 }
809 
810 static int recursive_batch_resolve(struct i915_vma *batch)
811 {
812 	u32 *cmd;
813 
814 	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
815 	if (IS_ERR(cmd))
816 		return PTR_ERR(cmd);
817 
818 	*cmd = MI_BATCH_BUFFER_END;
819 	intel_gt_chipset_flush(batch->vm->gt);
820 
821 	i915_gem_object_unpin_map(batch->obj);
822 
823 	return 0;
824 }
825 
826 static int live_all_engines(void *arg)
827 {
828 	struct drm_i915_private *i915 = arg;
829 	struct intel_engine_cs *engine;
830 	struct i915_request *request[I915_NUM_ENGINES];
831 	intel_wakeref_t wakeref;
832 	struct igt_live_test t;
833 	struct i915_vma *batch;
834 	unsigned int id;
835 	int err;
836 
837 	/* Check we can submit requests to all engines simultaneously. We
838 	 * send a recursive batch to each engine - checking that we don't
839 	 * block doing so, and that they don't complete too soon.
840 	 */
841 
842 	mutex_lock(&i915->drm.struct_mutex);
843 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
844 
845 	err = igt_live_test_begin(&t, i915, __func__, "");
846 	if (err)
847 		goto out_unlock;
848 
849 	batch = recursive_batch(i915);
850 	if (IS_ERR(batch)) {
851 		err = PTR_ERR(batch);
852 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
853 		goto out_unlock;
854 	}
855 
856 	for_each_engine(engine, i915, id) {
857 		request[id] = i915_request_create(engine->kernel_context);
858 		if (IS_ERR(request[id])) {
859 			err = PTR_ERR(request[id]);
860 			pr_err("%s: Request allocation failed with err=%d\n",
861 			       __func__, err);
862 			goto out_request;
863 		}
864 
865 		err = engine->emit_bb_start(request[id],
866 					    batch->node.start,
867 					    batch->node.size,
868 					    0);
869 		GEM_BUG_ON(err);
870 		request[id]->batch = batch;
871 
872 		i915_vma_lock(batch);
873 		err = i915_vma_move_to_active(batch, request[id], 0);
874 		i915_vma_unlock(batch);
875 		GEM_BUG_ON(err);
876 
877 		i915_request_get(request[id]);
878 		i915_request_add(request[id]);
879 	}
880 
881 	for_each_engine(engine, i915, id) {
882 		if (i915_request_completed(request[id])) {
883 			pr_err("%s(%s): request completed too early!\n",
884 			       __func__, engine->name);
885 			err = -EINVAL;
886 			goto out_request;
887 		}
888 	}
889 
890 	err = recursive_batch_resolve(batch);
891 	if (err) {
892 		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
893 		goto out_request;
894 	}
895 
896 	for_each_engine(engine, i915, id) {
897 		long timeout;
898 
899 		timeout = i915_request_wait(request[id], 0,
900 					    MAX_SCHEDULE_TIMEOUT);
901 		if (timeout < 0) {
902 			err = timeout;
903 			pr_err("%s: error waiting for request on %s, err=%d\n",
904 			       __func__, engine->name, err);
905 			goto out_request;
906 		}
907 
908 		GEM_BUG_ON(!i915_request_completed(request[id]));
909 		i915_request_put(request[id]);
910 		request[id] = NULL;
911 	}
912 
913 	err = igt_live_test_end(&t);
914 
915 out_request:
916 	for_each_engine(engine, i915, id)
917 		if (request[id])
918 			i915_request_put(request[id]);
919 	i915_vma_unpin(batch);
920 	i915_vma_put(batch);
921 out_unlock:
922 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
923 	mutex_unlock(&i915->drm.struct_mutex);
924 	return err;
925 }
926 
927 static int live_sequential_engines(void *arg)
928 {
929 	struct drm_i915_private *i915 = arg;
930 	struct i915_request *request[I915_NUM_ENGINES] = {};
931 	struct i915_request *prev = NULL;
932 	struct intel_engine_cs *engine;
933 	intel_wakeref_t wakeref;
934 	struct igt_live_test t;
935 	unsigned int id;
936 	int err;
937 
938 	/* Check we can submit requests to all engines sequentially, such
939 	 * that each successive request waits for the earlier ones. This
940 	 * tests that we don't execute requests out of order, even though
941 	 * they are running on independent engines.
942 	 */
943 
944 	mutex_lock(&i915->drm.struct_mutex);
945 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
946 
947 	err = igt_live_test_begin(&t, i915, __func__, "");
948 	if (err)
949 		goto out_unlock;
950 
951 	for_each_engine(engine, i915, id) {
952 		struct i915_vma *batch;
953 
954 		batch = recursive_batch(i915);
955 		if (IS_ERR(batch)) {
956 			err = PTR_ERR(batch);
957 			pr_err("%s: Unable to create batch for %s, err=%d\n",
958 			       __func__, engine->name, err);
959 			goto out_unlock;
960 		}
961 
962 		request[id] = i915_request_create(engine->kernel_context);
963 		if (IS_ERR(request[id])) {
964 			err = PTR_ERR(request[id]);
965 			pr_err("%s: Request allocation failed for %s with err=%d\n",
966 			       __func__, engine->name, err);
967 			goto out_request;
968 		}
969 
970 		if (prev) {
971 			err = i915_request_await_dma_fence(request[id],
972 							   &prev->fence);
973 			if (err) {
974 				i915_request_add(request[id]);
975 				pr_err("%s: Request await failed for %s with err=%d\n",
976 				       __func__, engine->name, err);
977 				goto out_request;
978 			}
979 		}
980 
981 		err = engine->emit_bb_start(request[id],
982 					    batch->node.start,
983 					    batch->node.size,
984 					    0);
985 		GEM_BUG_ON(err);
986 		request[id]->batch = batch;
987 
988 		i915_vma_lock(batch);
989 		err = i915_vma_move_to_active(batch, request[id], 0);
990 		i915_vma_unlock(batch);
991 		GEM_BUG_ON(err);
992 
993 		i915_request_get(request[id]);
994 		i915_request_add(request[id]);
995 
996 		prev = request[id];
997 	}
998 
999 	for_each_engine(engine, i915, id) {
1000 		long timeout;
1001 
1002 		if (i915_request_completed(request[id])) {
1003 			pr_err("%s(%s): request completed too early!\n",
1004 			       __func__, engine->name);
1005 			err = -EINVAL;
1006 			goto out_request;
1007 		}
1008 
1009 		err = recursive_batch_resolve(request[id]->batch);
1010 		if (err) {
1011 			pr_err("%s: failed to resolve batch, err=%d\n",
1012 			       __func__, err);
1013 			goto out_request;
1014 		}
1015 
1016 		timeout = i915_request_wait(request[id], 0,
1017 					    MAX_SCHEDULE_TIMEOUT);
1018 		if (timeout < 0) {
1019 			err = timeout;
1020 			pr_err("%s: error waiting for request on %s, err=%d\n",
1021 			       __func__, engine->name, err);
1022 			goto out_request;
1023 		}
1024 
1025 		GEM_BUG_ON(!i915_request_completed(request[id]));
1026 	}
1027 
1028 	err = igt_live_test_end(&t);
1029 
1030 out_request:
1031 	for_each_engine(engine, i915, id) {
1032 		u32 *cmd;
1033 
1034 		if (!request[id])
1035 			break;
1036 
1037 		cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1038 					      I915_MAP_WC);
1039 		if (!IS_ERR(cmd)) {
1040 			*cmd = MI_BATCH_BUFFER_END;
1041 			intel_gt_chipset_flush(engine->gt);
1042 
1043 			i915_gem_object_unpin_map(request[id]->batch->obj);
1044 		}
1045 
1046 		i915_vma_put(request[id]->batch);
1047 		i915_request_put(request[id]);
1048 	}
1049 out_unlock:
1050 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1051 	mutex_unlock(&i915->drm.struct_mutex);
1052 	return err;
1053 }
1054 
1055 static int
1056 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1057 {
1058 	struct i915_request *rq;
1059 	int ret;
1060 
1061 	/*
1062 	 * Before execlists, all contexts share the same ringbuffer. With
1063 	 * execlists, each context/engine has a separate ringbuffer and
1064 	 * for the purposes of this test, inexhaustible.
1065 	 *
1066 	 * For the global ringbuffer though, we have to be very careful
1067 	 * that we do not wrap while preventing the execution of requests
1068 	 * with a unsignaled fence.
1069 	 */
1070 	if (HAS_EXECLISTS(ctx->i915))
1071 		return INT_MAX;
1072 
1073 	rq = igt_request_alloc(ctx, engine);
1074 	if (IS_ERR(rq)) {
1075 		ret = PTR_ERR(rq);
1076 	} else {
1077 		int sz;
1078 
1079 		ret = rq->ring->size - rq->reserved_space;
1080 		i915_request_add(rq);
1081 
1082 		sz = rq->ring->emit - rq->head;
1083 		if (sz < 0)
1084 			sz += rq->ring->size;
1085 		ret /= sz;
1086 		ret /= 2; /* leave half spare, in case of emergency! */
1087 	}
1088 
1089 	return ret;
1090 }
1091 
1092 static int live_breadcrumbs_smoketest(void *arg)
1093 {
1094 	struct drm_i915_private *i915 = arg;
1095 	struct smoketest t[I915_NUM_ENGINES];
1096 	unsigned int ncpus = num_online_cpus();
1097 	unsigned long num_waits, num_fences;
1098 	struct intel_engine_cs *engine;
1099 	struct task_struct **threads;
1100 	struct igt_live_test live;
1101 	enum intel_engine_id id;
1102 	intel_wakeref_t wakeref;
1103 	struct drm_file *file;
1104 	unsigned int n;
1105 	int ret = 0;
1106 
1107 	/*
1108 	 * Smoketest our breadcrumb/signal handling for requests across multiple
1109 	 * threads. A very simple test to only catch the most egregious of bugs.
1110 	 * See __igt_breadcrumbs_smoketest();
1111 	 *
1112 	 * On real hardware this time.
1113 	 */
1114 
1115 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1116 
1117 	file = mock_file(i915);
1118 	if (IS_ERR(file)) {
1119 		ret = PTR_ERR(file);
1120 		goto out_rpm;
1121 	}
1122 
1123 	threads = kcalloc(ncpus * I915_NUM_ENGINES,
1124 			  sizeof(*threads),
1125 			  GFP_KERNEL);
1126 	if (!threads) {
1127 		ret = -ENOMEM;
1128 		goto out_file;
1129 	}
1130 
1131 	memset(&t[0], 0, sizeof(t[0]));
1132 	t[0].request_alloc = __live_request_alloc;
1133 	t[0].ncontexts = 64;
1134 	t[0].contexts = kmalloc_array(t[0].ncontexts,
1135 				      sizeof(*t[0].contexts),
1136 				      GFP_KERNEL);
1137 	if (!t[0].contexts) {
1138 		ret = -ENOMEM;
1139 		goto out_threads;
1140 	}
1141 
1142 	mutex_lock(&i915->drm.struct_mutex);
1143 	for (n = 0; n < t[0].ncontexts; n++) {
1144 		t[0].contexts[n] = live_context(i915, file);
1145 		if (!t[0].contexts[n]) {
1146 			ret = -ENOMEM;
1147 			goto out_contexts;
1148 		}
1149 	}
1150 
1151 	ret = igt_live_test_begin(&live, i915, __func__, "");
1152 	if (ret)
1153 		goto out_contexts;
1154 
1155 	for_each_engine(engine, i915, id) {
1156 		t[id] = t[0];
1157 		t[id].engine = engine;
1158 		t[id].max_batch = max_batches(t[0].contexts[0], engine);
1159 		if (t[id].max_batch < 0) {
1160 			ret = t[id].max_batch;
1161 			mutex_unlock(&i915->drm.struct_mutex);
1162 			goto out_flush;
1163 		}
1164 		/* One ring interleaved between requests from all cpus */
1165 		t[id].max_batch /= num_online_cpus() + 1;
1166 		pr_debug("Limiting batches to %d requests on %s\n",
1167 			 t[id].max_batch, engine->name);
1168 
1169 		for (n = 0; n < ncpus; n++) {
1170 			struct task_struct *tsk;
1171 
1172 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1173 					  &t[id], "igt/%d.%d", id, n);
1174 			if (IS_ERR(tsk)) {
1175 				ret = PTR_ERR(tsk);
1176 				mutex_unlock(&i915->drm.struct_mutex);
1177 				goto out_flush;
1178 			}
1179 
1180 			get_task_struct(tsk);
1181 			threads[id * ncpus + n] = tsk;
1182 		}
1183 	}
1184 	mutex_unlock(&i915->drm.struct_mutex);
1185 
1186 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1187 
1188 out_flush:
1189 	num_waits = 0;
1190 	num_fences = 0;
1191 	for_each_engine(engine, i915, id) {
1192 		for (n = 0; n < ncpus; n++) {
1193 			struct task_struct *tsk = threads[id * ncpus + n];
1194 			int err;
1195 
1196 			if (!tsk)
1197 				continue;
1198 
1199 			err = kthread_stop(tsk);
1200 			if (err < 0 && !ret)
1201 				ret = err;
1202 
1203 			put_task_struct(tsk);
1204 		}
1205 
1206 		num_waits += atomic_long_read(&t[id].num_waits);
1207 		num_fences += atomic_long_read(&t[id].num_fences);
1208 	}
1209 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1210 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1211 
1212 	mutex_lock(&i915->drm.struct_mutex);
1213 	ret = igt_live_test_end(&live) ?: ret;
1214 out_contexts:
1215 	mutex_unlock(&i915->drm.struct_mutex);
1216 	kfree(t[0].contexts);
1217 out_threads:
1218 	kfree(threads);
1219 out_file:
1220 	mock_file_free(i915, file);
1221 out_rpm:
1222 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1223 
1224 	return ret;
1225 }
1226 
1227 int i915_request_live_selftests(struct drm_i915_private *i915)
1228 {
1229 	static const struct i915_subtest tests[] = {
1230 		SUBTEST(live_nop_request),
1231 		SUBTEST(live_all_engines),
1232 		SUBTEST(live_sequential_engines),
1233 		SUBTEST(live_empty_request),
1234 		SUBTEST(live_breadcrumbs_smoketest),
1235 	};
1236 
1237 	if (intel_gt_is_wedged(&i915->gt))
1238 		return 0;
1239 
1240 	return i915_subtests(tests, i915);
1241 }
1242