1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 
27 #include "../i915_selftest.h"
28 #include "i915_random.h"
29 #include "igt_live_test.h"
30 #include "lib_sw_fence.h"
31 
32 #include "mock_context.h"
33 #include "mock_drm.h"
34 #include "mock_gem_device.h"
35 
36 static int igt_add_request(void *arg)
37 {
38 	struct drm_i915_private *i915 = arg;
39 	struct i915_request *request;
40 	int err = -ENOMEM;
41 
42 	/* Basic preliminary test to create a request and let it loose! */
43 
44 	mutex_lock(&i915->drm.struct_mutex);
45 	request = mock_request(i915->engine[RCS],
46 			       i915->kernel_context,
47 			       HZ / 10);
48 	if (!request)
49 		goto out_unlock;
50 
51 	i915_request_add(request);
52 
53 	err = 0;
54 out_unlock:
55 	mutex_unlock(&i915->drm.struct_mutex);
56 	return err;
57 }
58 
59 static int igt_wait_request(void *arg)
60 {
61 	const long T = HZ / 4;
62 	struct drm_i915_private *i915 = arg;
63 	struct i915_request *request;
64 	int err = -EINVAL;
65 
66 	/* Submit a request, then wait upon it */
67 
68 	mutex_lock(&i915->drm.struct_mutex);
69 	request = mock_request(i915->engine[RCS], i915->kernel_context, T);
70 	if (!request) {
71 		err = -ENOMEM;
72 		goto out_unlock;
73 	}
74 
75 	if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) {
76 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
77 		goto out_unlock;
78 	}
79 
80 	if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) {
81 		pr_err("request wait succeeded (expected timeout before submit!)\n");
82 		goto out_unlock;
83 	}
84 
85 	if (i915_request_completed(request)) {
86 		pr_err("request completed before submit!!\n");
87 		goto out_unlock;
88 	}
89 
90 	i915_request_add(request);
91 
92 	if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) {
93 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
94 		goto out_unlock;
95 	}
96 
97 	if (i915_request_completed(request)) {
98 		pr_err("request completed immediately!\n");
99 		goto out_unlock;
100 	}
101 
102 	if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) {
103 		pr_err("request wait succeeded (expected timeout!)\n");
104 		goto out_unlock;
105 	}
106 
107 	if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) {
108 		pr_err("request wait timed out!\n");
109 		goto out_unlock;
110 	}
111 
112 	if (!i915_request_completed(request)) {
113 		pr_err("request not complete after waiting!\n");
114 		goto out_unlock;
115 	}
116 
117 	if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) {
118 		pr_err("request wait timed out when already complete!\n");
119 		goto out_unlock;
120 	}
121 
122 	err = 0;
123 out_unlock:
124 	mock_device_flush(i915);
125 	mutex_unlock(&i915->drm.struct_mutex);
126 	return err;
127 }
128 
129 static int igt_fence_wait(void *arg)
130 {
131 	const long T = HZ / 4;
132 	struct drm_i915_private *i915 = arg;
133 	struct i915_request *request;
134 	int err = -EINVAL;
135 
136 	/* Submit a request, treat it as a fence and wait upon it */
137 
138 	mutex_lock(&i915->drm.struct_mutex);
139 	request = mock_request(i915->engine[RCS], i915->kernel_context, T);
140 	if (!request) {
141 		err = -ENOMEM;
142 		goto out_locked;
143 	}
144 	mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */
145 
146 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
147 		pr_err("fence wait success before submit (expected timeout)!\n");
148 		goto out_device;
149 	}
150 
151 	mutex_lock(&i915->drm.struct_mutex);
152 	i915_request_add(request);
153 	mutex_unlock(&i915->drm.struct_mutex);
154 
155 	if (dma_fence_is_signaled(&request->fence)) {
156 		pr_err("fence signaled immediately!\n");
157 		goto out_device;
158 	}
159 
160 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
161 		pr_err("fence wait success after submit (expected timeout)!\n");
162 		goto out_device;
163 	}
164 
165 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
166 		pr_err("fence wait timed out (expected success)!\n");
167 		goto out_device;
168 	}
169 
170 	if (!dma_fence_is_signaled(&request->fence)) {
171 		pr_err("fence unsignaled after waiting!\n");
172 		goto out_device;
173 	}
174 
175 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
176 		pr_err("fence wait timed out when complete (expected success)!\n");
177 		goto out_device;
178 	}
179 
180 	err = 0;
181 out_device:
182 	mutex_lock(&i915->drm.struct_mutex);
183 out_locked:
184 	mock_device_flush(i915);
185 	mutex_unlock(&i915->drm.struct_mutex);
186 	return err;
187 }
188 
189 static int igt_request_rewind(void *arg)
190 {
191 	struct drm_i915_private *i915 = arg;
192 	struct i915_request *request, *vip;
193 	struct i915_gem_context *ctx[2];
194 	int err = -EINVAL;
195 
196 	mutex_lock(&i915->drm.struct_mutex);
197 	ctx[0] = mock_context(i915, "A");
198 	request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ);
199 	if (!request) {
200 		err = -ENOMEM;
201 		goto err_context_0;
202 	}
203 
204 	i915_request_get(request);
205 	i915_request_add(request);
206 
207 	ctx[1] = mock_context(i915, "B");
208 	vip = mock_request(i915->engine[RCS], ctx[1], 0);
209 	if (!vip) {
210 		err = -ENOMEM;
211 		goto err_context_1;
212 	}
213 
214 	/* Simulate preemption by manual reordering */
215 	if (!mock_cancel_request(request)) {
216 		pr_err("failed to cancel request (already executed)!\n");
217 		i915_request_add(vip);
218 		goto err_context_1;
219 	}
220 	i915_request_get(vip);
221 	i915_request_add(vip);
222 	rcu_read_lock();
223 	request->engine->submit_request(request);
224 	rcu_read_unlock();
225 
226 	mutex_unlock(&i915->drm.struct_mutex);
227 
228 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
229 		pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n",
230 		       vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS]));
231 		goto err;
232 	}
233 
234 	if (i915_request_completed(request)) {
235 		pr_err("low priority request already completed\n");
236 		goto err;
237 	}
238 
239 	err = 0;
240 err:
241 	i915_request_put(vip);
242 	mutex_lock(&i915->drm.struct_mutex);
243 err_context_1:
244 	mock_context_close(ctx[1]);
245 	i915_request_put(request);
246 err_context_0:
247 	mock_context_close(ctx[0]);
248 	mock_device_flush(i915);
249 	mutex_unlock(&i915->drm.struct_mutex);
250 	return err;
251 }
252 
253 struct smoketest {
254 	struct intel_engine_cs *engine;
255 	struct i915_gem_context **contexts;
256 	atomic_long_t num_waits, num_fences;
257 	int ncontexts, max_batch;
258 	struct i915_request *(*request_alloc)(struct i915_gem_context *,
259 					      struct intel_engine_cs *);
260 };
261 
262 static struct i915_request *
263 __mock_request_alloc(struct i915_gem_context *ctx,
264 		     struct intel_engine_cs *engine)
265 {
266 	return mock_request(engine, ctx, 0);
267 }
268 
269 static struct i915_request *
270 __live_request_alloc(struct i915_gem_context *ctx,
271 		     struct intel_engine_cs *engine)
272 {
273 	return i915_request_alloc(engine, ctx);
274 }
275 
276 static int __igt_breadcrumbs_smoketest(void *arg)
277 {
278 	struct smoketest *t = arg;
279 	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
280 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
281 	const unsigned int total = 4 * t->ncontexts + 1;
282 	unsigned int num_waits = 0, num_fences = 0;
283 	struct i915_request **requests;
284 	I915_RND_STATE(prng);
285 	unsigned int *order;
286 	int err = 0;
287 
288 	/*
289 	 * A very simple test to catch the most egregious of list handling bugs.
290 	 *
291 	 * At its heart, we simply create oodles of requests running across
292 	 * multiple kthreads and enable signaling on them, for the sole purpose
293 	 * of stressing our breadcrumb handling. The only inspection we do is
294 	 * that the fences were marked as signaled.
295 	 */
296 
297 	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
298 	if (!requests)
299 		return -ENOMEM;
300 
301 	order = i915_random_order(total, &prng);
302 	if (!order) {
303 		err = -ENOMEM;
304 		goto out_requests;
305 	}
306 
307 	while (!kthread_should_stop()) {
308 		struct i915_sw_fence *submit, *wait;
309 		unsigned int n, count;
310 
311 		submit = heap_fence_create(GFP_KERNEL);
312 		if (!submit) {
313 			err = -ENOMEM;
314 			break;
315 		}
316 
317 		wait = heap_fence_create(GFP_KERNEL);
318 		if (!wait) {
319 			i915_sw_fence_commit(submit);
320 			heap_fence_put(submit);
321 			err = ENOMEM;
322 			break;
323 		}
324 
325 		i915_random_reorder(order, total, &prng);
326 		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
327 
328 		for (n = 0; n < count; n++) {
329 			struct i915_gem_context *ctx =
330 				t->contexts[order[n] % t->ncontexts];
331 			struct i915_request *rq;
332 
333 			mutex_lock(BKL);
334 
335 			rq = t->request_alloc(ctx, t->engine);
336 			if (IS_ERR(rq)) {
337 				mutex_unlock(BKL);
338 				err = PTR_ERR(rq);
339 				count = n;
340 				break;
341 			}
342 
343 			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
344 							       submit,
345 							       GFP_KERNEL);
346 
347 			requests[n] = i915_request_get(rq);
348 			i915_request_add(rq);
349 
350 			mutex_unlock(BKL);
351 
352 			if (err >= 0)
353 				err = i915_sw_fence_await_dma_fence(wait,
354 								    &rq->fence,
355 								    0,
356 								    GFP_KERNEL);
357 
358 			if (err < 0) {
359 				i915_request_put(rq);
360 				count = n;
361 				break;
362 			}
363 		}
364 
365 		i915_sw_fence_commit(submit);
366 		i915_sw_fence_commit(wait);
367 
368 		if (!wait_event_timeout(wait->wait,
369 					i915_sw_fence_done(wait),
370 					HZ / 2)) {
371 			struct i915_request *rq = requests[count - 1];
372 
373 			pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
374 			       count,
375 			       rq->fence.context, rq->fence.seqno,
376 			       t->engine->name);
377 			i915_gem_set_wedged(t->engine->i915);
378 			GEM_BUG_ON(!i915_request_completed(rq));
379 			i915_sw_fence_wait(wait);
380 			err = -EIO;
381 		}
382 
383 		for (n = 0; n < count; n++) {
384 			struct i915_request *rq = requests[n];
385 
386 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
387 				      &rq->fence.flags)) {
388 				pr_err("%llu:%llu was not signaled!\n",
389 				       rq->fence.context, rq->fence.seqno);
390 				err = -EINVAL;
391 			}
392 
393 			i915_request_put(rq);
394 		}
395 
396 		heap_fence_put(wait);
397 		heap_fence_put(submit);
398 
399 		if (err < 0)
400 			break;
401 
402 		num_fences += count;
403 		num_waits++;
404 
405 		cond_resched();
406 	}
407 
408 	atomic_long_add(num_fences, &t->num_fences);
409 	atomic_long_add(num_waits, &t->num_waits);
410 
411 	kfree(order);
412 out_requests:
413 	kfree(requests);
414 	return err;
415 }
416 
417 static int mock_breadcrumbs_smoketest(void *arg)
418 {
419 	struct drm_i915_private *i915 = arg;
420 	struct smoketest t = {
421 		.engine = i915->engine[RCS],
422 		.ncontexts = 1024,
423 		.max_batch = 1024,
424 		.request_alloc = __mock_request_alloc
425 	};
426 	unsigned int ncpus = num_online_cpus();
427 	struct task_struct **threads;
428 	unsigned int n;
429 	int ret = 0;
430 
431 	/*
432 	 * Smoketest our breadcrumb/signal handling for requests across multiple
433 	 * threads. A very simple test to only catch the most egregious of bugs.
434 	 * See __igt_breadcrumbs_smoketest();
435 	 */
436 
437 	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
438 	if (!threads)
439 		return -ENOMEM;
440 
441 	t.contexts =
442 		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
443 	if (!t.contexts) {
444 		ret = -ENOMEM;
445 		goto out_threads;
446 	}
447 
448 	mutex_lock(&t.engine->i915->drm.struct_mutex);
449 	for (n = 0; n < t.ncontexts; n++) {
450 		t.contexts[n] = mock_context(t.engine->i915, "mock");
451 		if (!t.contexts[n]) {
452 			ret = -ENOMEM;
453 			goto out_contexts;
454 		}
455 	}
456 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
457 
458 	for (n = 0; n < ncpus; n++) {
459 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
460 					 &t, "igt/%d", n);
461 		if (IS_ERR(threads[n])) {
462 			ret = PTR_ERR(threads[n]);
463 			ncpus = n;
464 			break;
465 		}
466 
467 		get_task_struct(threads[n]);
468 	}
469 
470 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
471 
472 	for (n = 0; n < ncpus; n++) {
473 		int err;
474 
475 		err = kthread_stop(threads[n]);
476 		if (err < 0 && !ret)
477 			ret = err;
478 
479 		put_task_struct(threads[n]);
480 	}
481 	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
482 		atomic_long_read(&t.num_waits),
483 		atomic_long_read(&t.num_fences),
484 		ncpus);
485 
486 	mutex_lock(&t.engine->i915->drm.struct_mutex);
487 out_contexts:
488 	for (n = 0; n < t.ncontexts; n++) {
489 		if (!t.contexts[n])
490 			break;
491 		mock_context_close(t.contexts[n]);
492 	}
493 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
494 	kfree(t.contexts);
495 out_threads:
496 	kfree(threads);
497 
498 	return ret;
499 }
500 
501 int i915_request_mock_selftests(void)
502 {
503 	static const struct i915_subtest tests[] = {
504 		SUBTEST(igt_add_request),
505 		SUBTEST(igt_wait_request),
506 		SUBTEST(igt_fence_wait),
507 		SUBTEST(igt_request_rewind),
508 		SUBTEST(mock_breadcrumbs_smoketest),
509 	};
510 	struct drm_i915_private *i915;
511 	intel_wakeref_t wakeref;
512 	int err = 0;
513 
514 	i915 = mock_gem_device();
515 	if (!i915)
516 		return -ENOMEM;
517 
518 	with_intel_runtime_pm(i915, wakeref)
519 		err = i915_subtests(tests, i915);
520 
521 	drm_dev_put(&i915->drm);
522 
523 	return err;
524 }
525 
526 static int live_nop_request(void *arg)
527 {
528 	struct drm_i915_private *i915 = arg;
529 	struct intel_engine_cs *engine;
530 	intel_wakeref_t wakeref;
531 	struct igt_live_test t;
532 	unsigned int id;
533 	int err = -ENODEV;
534 
535 	/* Submit various sized batches of empty requests, to each engine
536 	 * (individually), and wait for the batch to complete. We can check
537 	 * the overhead of submitting requests to the hardware.
538 	 */
539 
540 	mutex_lock(&i915->drm.struct_mutex);
541 	wakeref = intel_runtime_pm_get(i915);
542 
543 	for_each_engine(engine, i915, id) {
544 		struct i915_request *request = NULL;
545 		unsigned long n, prime;
546 		IGT_TIMEOUT(end_time);
547 		ktime_t times[2] = {};
548 
549 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
550 		if (err)
551 			goto out_unlock;
552 
553 		for_each_prime_number_from(prime, 1, 8192) {
554 			times[1] = ktime_get_raw();
555 
556 			for (n = 0; n < prime; n++) {
557 				request = i915_request_alloc(engine,
558 							     i915->kernel_context);
559 				if (IS_ERR(request)) {
560 					err = PTR_ERR(request);
561 					goto out_unlock;
562 				}
563 
564 				/* This space is left intentionally blank.
565 				 *
566 				 * We do not actually want to perform any
567 				 * action with this request, we just want
568 				 * to measure the latency in allocation
569 				 * and submission of our breadcrumbs -
570 				 * ensuring that the bare request is sufficient
571 				 * for the system to work (i.e. proper HEAD
572 				 * tracking of the rings, interrupt handling,
573 				 * etc). It also gives us the lowest bounds
574 				 * for latency.
575 				 */
576 
577 				i915_request_add(request);
578 			}
579 			i915_request_wait(request,
580 					  I915_WAIT_LOCKED,
581 					  MAX_SCHEDULE_TIMEOUT);
582 
583 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
584 			if (prime == 1)
585 				times[0] = times[1];
586 
587 			if (__igt_timeout(end_time, NULL))
588 				break;
589 		}
590 
591 		err = igt_live_test_end(&t);
592 		if (err)
593 			goto out_unlock;
594 
595 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
596 			engine->name,
597 			ktime_to_ns(times[0]),
598 			prime, div64_u64(ktime_to_ns(times[1]), prime));
599 	}
600 
601 out_unlock:
602 	intel_runtime_pm_put(i915, wakeref);
603 	mutex_unlock(&i915->drm.struct_mutex);
604 	return err;
605 }
606 
607 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
608 {
609 	struct drm_i915_gem_object *obj;
610 	struct i915_vma *vma;
611 	u32 *cmd;
612 	int err;
613 
614 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
615 	if (IS_ERR(obj))
616 		return ERR_CAST(obj);
617 
618 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
619 	if (IS_ERR(cmd)) {
620 		err = PTR_ERR(cmd);
621 		goto err;
622 	}
623 
624 	*cmd = MI_BATCH_BUFFER_END;
625 	i915_gem_chipset_flush(i915);
626 
627 	i915_gem_object_unpin_map(obj);
628 
629 	err = i915_gem_object_set_to_gtt_domain(obj, false);
630 	if (err)
631 		goto err;
632 
633 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
634 	if (IS_ERR(vma)) {
635 		err = PTR_ERR(vma);
636 		goto err;
637 	}
638 
639 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
640 	if (err)
641 		goto err;
642 
643 	return vma;
644 
645 err:
646 	i915_gem_object_put(obj);
647 	return ERR_PTR(err);
648 }
649 
650 static struct i915_request *
651 empty_request(struct intel_engine_cs *engine,
652 	      struct i915_vma *batch)
653 {
654 	struct i915_request *request;
655 	int err;
656 
657 	request = i915_request_alloc(engine, engine->i915->kernel_context);
658 	if (IS_ERR(request))
659 		return request;
660 
661 	err = engine->emit_bb_start(request,
662 				    batch->node.start,
663 				    batch->node.size,
664 				    I915_DISPATCH_SECURE);
665 	if (err)
666 		goto out_request;
667 
668 out_request:
669 	i915_request_add(request);
670 	return err ? ERR_PTR(err) : request;
671 }
672 
673 static int live_empty_request(void *arg)
674 {
675 	struct drm_i915_private *i915 = arg;
676 	struct intel_engine_cs *engine;
677 	intel_wakeref_t wakeref;
678 	struct igt_live_test t;
679 	struct i915_vma *batch;
680 	unsigned int id;
681 	int err = 0;
682 
683 	/* Submit various sized batches of empty requests, to each engine
684 	 * (individually), and wait for the batch to complete. We can check
685 	 * the overhead of submitting requests to the hardware.
686 	 */
687 
688 	mutex_lock(&i915->drm.struct_mutex);
689 	wakeref = intel_runtime_pm_get(i915);
690 
691 	batch = empty_batch(i915);
692 	if (IS_ERR(batch)) {
693 		err = PTR_ERR(batch);
694 		goto out_unlock;
695 	}
696 
697 	for_each_engine(engine, i915, id) {
698 		IGT_TIMEOUT(end_time);
699 		struct i915_request *request;
700 		unsigned long n, prime;
701 		ktime_t times[2] = {};
702 
703 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
704 		if (err)
705 			goto out_batch;
706 
707 		/* Warmup / preload */
708 		request = empty_request(engine, batch);
709 		if (IS_ERR(request)) {
710 			err = PTR_ERR(request);
711 			goto out_batch;
712 		}
713 		i915_request_wait(request,
714 				  I915_WAIT_LOCKED,
715 				  MAX_SCHEDULE_TIMEOUT);
716 
717 		for_each_prime_number_from(prime, 1, 8192) {
718 			times[1] = ktime_get_raw();
719 
720 			for (n = 0; n < prime; n++) {
721 				request = empty_request(engine, batch);
722 				if (IS_ERR(request)) {
723 					err = PTR_ERR(request);
724 					goto out_batch;
725 				}
726 			}
727 			i915_request_wait(request,
728 					  I915_WAIT_LOCKED,
729 					  MAX_SCHEDULE_TIMEOUT);
730 
731 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
732 			if (prime == 1)
733 				times[0] = times[1];
734 
735 			if (__igt_timeout(end_time, NULL))
736 				break;
737 		}
738 
739 		err = igt_live_test_end(&t);
740 		if (err)
741 			goto out_batch;
742 
743 		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
744 			engine->name,
745 			ktime_to_ns(times[0]),
746 			prime, div64_u64(ktime_to_ns(times[1]), prime));
747 	}
748 
749 out_batch:
750 	i915_vma_unpin(batch);
751 	i915_vma_put(batch);
752 out_unlock:
753 	intel_runtime_pm_put(i915, wakeref);
754 	mutex_unlock(&i915->drm.struct_mutex);
755 	return err;
756 }
757 
758 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
759 {
760 	struct i915_gem_context *ctx = i915->kernel_context;
761 	struct i915_address_space *vm =
762 		ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
763 	struct drm_i915_gem_object *obj;
764 	const int gen = INTEL_GEN(i915);
765 	struct i915_vma *vma;
766 	u32 *cmd;
767 	int err;
768 
769 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
770 	if (IS_ERR(obj))
771 		return ERR_CAST(obj);
772 
773 	vma = i915_vma_instance(obj, vm, NULL);
774 	if (IS_ERR(vma)) {
775 		err = PTR_ERR(vma);
776 		goto err;
777 	}
778 
779 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
780 	if (err)
781 		goto err;
782 
783 	err = i915_gem_object_set_to_wc_domain(obj, true);
784 	if (err)
785 		goto err;
786 
787 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
788 	if (IS_ERR(cmd)) {
789 		err = PTR_ERR(cmd);
790 		goto err;
791 	}
792 
793 	if (gen >= 8) {
794 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
795 		*cmd++ = lower_32_bits(vma->node.start);
796 		*cmd++ = upper_32_bits(vma->node.start);
797 	} else if (gen >= 6) {
798 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
799 		*cmd++ = lower_32_bits(vma->node.start);
800 	} else {
801 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
802 		*cmd++ = lower_32_bits(vma->node.start);
803 	}
804 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
805 	i915_gem_chipset_flush(i915);
806 
807 	i915_gem_object_unpin_map(obj);
808 
809 	return vma;
810 
811 err:
812 	i915_gem_object_put(obj);
813 	return ERR_PTR(err);
814 }
815 
816 static int recursive_batch_resolve(struct i915_vma *batch)
817 {
818 	u32 *cmd;
819 
820 	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
821 	if (IS_ERR(cmd))
822 		return PTR_ERR(cmd);
823 
824 	*cmd = MI_BATCH_BUFFER_END;
825 	i915_gem_chipset_flush(batch->vm->i915);
826 
827 	i915_gem_object_unpin_map(batch->obj);
828 
829 	return 0;
830 }
831 
832 static int live_all_engines(void *arg)
833 {
834 	struct drm_i915_private *i915 = arg;
835 	struct intel_engine_cs *engine;
836 	struct i915_request *request[I915_NUM_ENGINES];
837 	intel_wakeref_t wakeref;
838 	struct igt_live_test t;
839 	struct i915_vma *batch;
840 	unsigned int id;
841 	int err;
842 
843 	/* Check we can submit requests to all engines simultaneously. We
844 	 * send a recursive batch to each engine - checking that we don't
845 	 * block doing so, and that they don't complete too soon.
846 	 */
847 
848 	mutex_lock(&i915->drm.struct_mutex);
849 	wakeref = intel_runtime_pm_get(i915);
850 
851 	err = igt_live_test_begin(&t, i915, __func__, "");
852 	if (err)
853 		goto out_unlock;
854 
855 	batch = recursive_batch(i915);
856 	if (IS_ERR(batch)) {
857 		err = PTR_ERR(batch);
858 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
859 		goto out_unlock;
860 	}
861 
862 	for_each_engine(engine, i915, id) {
863 		request[id] = i915_request_alloc(engine, i915->kernel_context);
864 		if (IS_ERR(request[id])) {
865 			err = PTR_ERR(request[id]);
866 			pr_err("%s: Request allocation failed with err=%d\n",
867 			       __func__, err);
868 			goto out_request;
869 		}
870 
871 		err = engine->emit_bb_start(request[id],
872 					    batch->node.start,
873 					    batch->node.size,
874 					    0);
875 		GEM_BUG_ON(err);
876 		request[id]->batch = batch;
877 
878 		if (!i915_gem_object_has_active_reference(batch->obj)) {
879 			i915_gem_object_get(batch->obj);
880 			i915_gem_object_set_active_reference(batch->obj);
881 		}
882 
883 		err = i915_vma_move_to_active(batch, request[id], 0);
884 		GEM_BUG_ON(err);
885 
886 		i915_request_get(request[id]);
887 		i915_request_add(request[id]);
888 	}
889 
890 	for_each_engine(engine, i915, id) {
891 		if (i915_request_completed(request[id])) {
892 			pr_err("%s(%s): request completed too early!\n",
893 			       __func__, engine->name);
894 			err = -EINVAL;
895 			goto out_request;
896 		}
897 	}
898 
899 	err = recursive_batch_resolve(batch);
900 	if (err) {
901 		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
902 		goto out_request;
903 	}
904 
905 	for_each_engine(engine, i915, id) {
906 		long timeout;
907 
908 		timeout = i915_request_wait(request[id],
909 					    I915_WAIT_LOCKED,
910 					    MAX_SCHEDULE_TIMEOUT);
911 		if (timeout < 0) {
912 			err = timeout;
913 			pr_err("%s: error waiting for request on %s, err=%d\n",
914 			       __func__, engine->name, err);
915 			goto out_request;
916 		}
917 
918 		GEM_BUG_ON(!i915_request_completed(request[id]));
919 		i915_request_put(request[id]);
920 		request[id] = NULL;
921 	}
922 
923 	err = igt_live_test_end(&t);
924 
925 out_request:
926 	for_each_engine(engine, i915, id)
927 		if (request[id])
928 			i915_request_put(request[id]);
929 	i915_vma_unpin(batch);
930 	i915_vma_put(batch);
931 out_unlock:
932 	intel_runtime_pm_put(i915, wakeref);
933 	mutex_unlock(&i915->drm.struct_mutex);
934 	return err;
935 }
936 
937 static int live_sequential_engines(void *arg)
938 {
939 	struct drm_i915_private *i915 = arg;
940 	struct i915_request *request[I915_NUM_ENGINES] = {};
941 	struct i915_request *prev = NULL;
942 	struct intel_engine_cs *engine;
943 	intel_wakeref_t wakeref;
944 	struct igt_live_test t;
945 	unsigned int id;
946 	int err;
947 
948 	/* Check we can submit requests to all engines sequentially, such
949 	 * that each successive request waits for the earlier ones. This
950 	 * tests that we don't execute requests out of order, even though
951 	 * they are running on independent engines.
952 	 */
953 
954 	mutex_lock(&i915->drm.struct_mutex);
955 	wakeref = intel_runtime_pm_get(i915);
956 
957 	err = igt_live_test_begin(&t, i915, __func__, "");
958 	if (err)
959 		goto out_unlock;
960 
961 	for_each_engine(engine, i915, id) {
962 		struct i915_vma *batch;
963 
964 		batch = recursive_batch(i915);
965 		if (IS_ERR(batch)) {
966 			err = PTR_ERR(batch);
967 			pr_err("%s: Unable to create batch for %s, err=%d\n",
968 			       __func__, engine->name, err);
969 			goto out_unlock;
970 		}
971 
972 		request[id] = i915_request_alloc(engine, i915->kernel_context);
973 		if (IS_ERR(request[id])) {
974 			err = PTR_ERR(request[id]);
975 			pr_err("%s: Request allocation failed for %s with err=%d\n",
976 			       __func__, engine->name, err);
977 			goto out_request;
978 		}
979 
980 		if (prev) {
981 			err = i915_request_await_dma_fence(request[id],
982 							   &prev->fence);
983 			if (err) {
984 				i915_request_add(request[id]);
985 				pr_err("%s: Request await failed for %s with err=%d\n",
986 				       __func__, engine->name, err);
987 				goto out_request;
988 			}
989 		}
990 
991 		err = engine->emit_bb_start(request[id],
992 					    batch->node.start,
993 					    batch->node.size,
994 					    0);
995 		GEM_BUG_ON(err);
996 		request[id]->batch = batch;
997 
998 		err = i915_vma_move_to_active(batch, request[id], 0);
999 		GEM_BUG_ON(err);
1000 
1001 		i915_gem_object_set_active_reference(batch->obj);
1002 		i915_vma_get(batch);
1003 
1004 		i915_request_get(request[id]);
1005 		i915_request_add(request[id]);
1006 
1007 		prev = request[id];
1008 	}
1009 
1010 	for_each_engine(engine, i915, id) {
1011 		long timeout;
1012 
1013 		if (i915_request_completed(request[id])) {
1014 			pr_err("%s(%s): request completed too early!\n",
1015 			       __func__, engine->name);
1016 			err = -EINVAL;
1017 			goto out_request;
1018 		}
1019 
1020 		err = recursive_batch_resolve(request[id]->batch);
1021 		if (err) {
1022 			pr_err("%s: failed to resolve batch, err=%d\n",
1023 			       __func__, err);
1024 			goto out_request;
1025 		}
1026 
1027 		timeout = i915_request_wait(request[id],
1028 					    I915_WAIT_LOCKED,
1029 					    MAX_SCHEDULE_TIMEOUT);
1030 		if (timeout < 0) {
1031 			err = timeout;
1032 			pr_err("%s: error waiting for request on %s, err=%d\n",
1033 			       __func__, engine->name, err);
1034 			goto out_request;
1035 		}
1036 
1037 		GEM_BUG_ON(!i915_request_completed(request[id]));
1038 	}
1039 
1040 	err = igt_live_test_end(&t);
1041 
1042 out_request:
1043 	for_each_engine(engine, i915, id) {
1044 		u32 *cmd;
1045 
1046 		if (!request[id])
1047 			break;
1048 
1049 		cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1050 					      I915_MAP_WC);
1051 		if (!IS_ERR(cmd)) {
1052 			*cmd = MI_BATCH_BUFFER_END;
1053 			i915_gem_chipset_flush(i915);
1054 
1055 			i915_gem_object_unpin_map(request[id]->batch->obj);
1056 		}
1057 
1058 		i915_vma_put(request[id]->batch);
1059 		i915_request_put(request[id]);
1060 	}
1061 out_unlock:
1062 	intel_runtime_pm_put(i915, wakeref);
1063 	mutex_unlock(&i915->drm.struct_mutex);
1064 	return err;
1065 }
1066 
1067 static int
1068 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1069 {
1070 	struct i915_request *rq;
1071 	int ret;
1072 
1073 	/*
1074 	 * Before execlists, all contexts share the same ringbuffer. With
1075 	 * execlists, each context/engine has a separate ringbuffer and
1076 	 * for the purposes of this test, inexhaustible.
1077 	 *
1078 	 * For the global ringbuffer though, we have to be very careful
1079 	 * that we do not wrap while preventing the execution of requests
1080 	 * with a unsignaled fence.
1081 	 */
1082 	if (HAS_EXECLISTS(ctx->i915))
1083 		return INT_MAX;
1084 
1085 	rq = i915_request_alloc(engine, ctx);
1086 	if (IS_ERR(rq)) {
1087 		ret = PTR_ERR(rq);
1088 	} else {
1089 		int sz;
1090 
1091 		ret = rq->ring->size - rq->reserved_space;
1092 		i915_request_add(rq);
1093 
1094 		sz = rq->ring->emit - rq->head;
1095 		if (sz < 0)
1096 			sz += rq->ring->size;
1097 		ret /= sz;
1098 		ret /= 2; /* leave half spare, in case of emergency! */
1099 	}
1100 
1101 	return ret;
1102 }
1103 
1104 static int live_breadcrumbs_smoketest(void *arg)
1105 {
1106 	struct drm_i915_private *i915 = arg;
1107 	struct smoketest t[I915_NUM_ENGINES];
1108 	unsigned int ncpus = num_online_cpus();
1109 	unsigned long num_waits, num_fences;
1110 	struct intel_engine_cs *engine;
1111 	struct task_struct **threads;
1112 	struct igt_live_test live;
1113 	enum intel_engine_id id;
1114 	intel_wakeref_t wakeref;
1115 	struct drm_file *file;
1116 	unsigned int n;
1117 	int ret = 0;
1118 
1119 	/*
1120 	 * Smoketest our breadcrumb/signal handling for requests across multiple
1121 	 * threads. A very simple test to only catch the most egregious of bugs.
1122 	 * See __igt_breadcrumbs_smoketest();
1123 	 *
1124 	 * On real hardware this time.
1125 	 */
1126 
1127 	wakeref = intel_runtime_pm_get(i915);
1128 
1129 	file = mock_file(i915);
1130 	if (IS_ERR(file)) {
1131 		ret = PTR_ERR(file);
1132 		goto out_rpm;
1133 	}
1134 
1135 	threads = kcalloc(ncpus * I915_NUM_ENGINES,
1136 			  sizeof(*threads),
1137 			  GFP_KERNEL);
1138 	if (!threads) {
1139 		ret = -ENOMEM;
1140 		goto out_file;
1141 	}
1142 
1143 	memset(&t[0], 0, sizeof(t[0]));
1144 	t[0].request_alloc = __live_request_alloc;
1145 	t[0].ncontexts = 64;
1146 	t[0].contexts = kmalloc_array(t[0].ncontexts,
1147 				      sizeof(*t[0].contexts),
1148 				      GFP_KERNEL);
1149 	if (!t[0].contexts) {
1150 		ret = -ENOMEM;
1151 		goto out_threads;
1152 	}
1153 
1154 	mutex_lock(&i915->drm.struct_mutex);
1155 	for (n = 0; n < t[0].ncontexts; n++) {
1156 		t[0].contexts[n] = live_context(i915, file);
1157 		if (!t[0].contexts[n]) {
1158 			ret = -ENOMEM;
1159 			goto out_contexts;
1160 		}
1161 	}
1162 
1163 	ret = igt_live_test_begin(&live, i915, __func__, "");
1164 	if (ret)
1165 		goto out_contexts;
1166 
1167 	for_each_engine(engine, i915, id) {
1168 		t[id] = t[0];
1169 		t[id].engine = engine;
1170 		t[id].max_batch = max_batches(t[0].contexts[0], engine);
1171 		if (t[id].max_batch < 0) {
1172 			ret = t[id].max_batch;
1173 			mutex_unlock(&i915->drm.struct_mutex);
1174 			goto out_flush;
1175 		}
1176 		/* One ring interleaved between requests from all cpus */
1177 		t[id].max_batch /= num_online_cpus() + 1;
1178 		pr_debug("Limiting batches to %d requests on %s\n",
1179 			 t[id].max_batch, engine->name);
1180 
1181 		for (n = 0; n < ncpus; n++) {
1182 			struct task_struct *tsk;
1183 
1184 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1185 					  &t[id], "igt/%d.%d", id, n);
1186 			if (IS_ERR(tsk)) {
1187 				ret = PTR_ERR(tsk);
1188 				mutex_unlock(&i915->drm.struct_mutex);
1189 				goto out_flush;
1190 			}
1191 
1192 			get_task_struct(tsk);
1193 			threads[id * ncpus + n] = tsk;
1194 		}
1195 	}
1196 	mutex_unlock(&i915->drm.struct_mutex);
1197 
1198 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1199 
1200 out_flush:
1201 	num_waits = 0;
1202 	num_fences = 0;
1203 	for_each_engine(engine, i915, id) {
1204 		for (n = 0; n < ncpus; n++) {
1205 			struct task_struct *tsk = threads[id * ncpus + n];
1206 			int err;
1207 
1208 			if (!tsk)
1209 				continue;
1210 
1211 			err = kthread_stop(tsk);
1212 			if (err < 0 && !ret)
1213 				ret = err;
1214 
1215 			put_task_struct(tsk);
1216 		}
1217 
1218 		num_waits += atomic_long_read(&t[id].num_waits);
1219 		num_fences += atomic_long_read(&t[id].num_fences);
1220 	}
1221 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1222 		num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus);
1223 
1224 	mutex_lock(&i915->drm.struct_mutex);
1225 	ret = igt_live_test_end(&live) ?: ret;
1226 out_contexts:
1227 	mutex_unlock(&i915->drm.struct_mutex);
1228 	kfree(t[0].contexts);
1229 out_threads:
1230 	kfree(threads);
1231 out_file:
1232 	mock_file_free(i915, file);
1233 out_rpm:
1234 	intel_runtime_pm_put(i915, wakeref);
1235 
1236 	return ret;
1237 }
1238 
1239 int i915_request_live_selftests(struct drm_i915_private *i915)
1240 {
1241 	static const struct i915_subtest tests[] = {
1242 		SUBTEST(live_nop_request),
1243 		SUBTEST(live_all_engines),
1244 		SUBTEST(live_sequential_engines),
1245 		SUBTEST(live_empty_request),
1246 		SUBTEST(live_breadcrumbs_smoketest),
1247 	};
1248 
1249 	if (i915_terminally_wedged(&i915->gpu_error))
1250 		return 0;
1251 
1252 	return i915_subtests(tests, i915);
1253 }
1254