1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 
27 #include "../i915_selftest.h"
28 #include "i915_random.h"
29 #include "igt_live_test.h"
30 #include "lib_sw_fence.h"
31 
32 #include "mock_context.h"
33 #include "mock_drm.h"
34 #include "mock_gem_device.h"
35 
36 static int igt_add_request(void *arg)
37 {
38 	struct drm_i915_private *i915 = arg;
39 	struct i915_request *request;
40 	int err = -ENOMEM;
41 
42 	/* Basic preliminary test to create a request and let it loose! */
43 
44 	mutex_lock(&i915->drm.struct_mutex);
45 	request = mock_request(i915->engine[RCS0],
46 			       i915->kernel_context,
47 			       HZ / 10);
48 	if (!request)
49 		goto out_unlock;
50 
51 	i915_request_add(request);
52 
53 	err = 0;
54 out_unlock:
55 	mutex_unlock(&i915->drm.struct_mutex);
56 	return err;
57 }
58 
59 static int igt_wait_request(void *arg)
60 {
61 	const long T = HZ / 4;
62 	struct drm_i915_private *i915 = arg;
63 	struct i915_request *request;
64 	int err = -EINVAL;
65 
66 	/* Submit a request, then wait upon it */
67 
68 	mutex_lock(&i915->drm.struct_mutex);
69 	request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
70 	if (!request) {
71 		err = -ENOMEM;
72 		goto out_unlock;
73 	}
74 
75 	if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) {
76 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
77 		goto out_unlock;
78 	}
79 
80 	if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) {
81 		pr_err("request wait succeeded (expected timeout before submit!)\n");
82 		goto out_unlock;
83 	}
84 
85 	if (i915_request_completed(request)) {
86 		pr_err("request completed before submit!!\n");
87 		goto out_unlock;
88 	}
89 
90 	i915_request_add(request);
91 
92 	if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) {
93 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
94 		goto out_unlock;
95 	}
96 
97 	if (i915_request_completed(request)) {
98 		pr_err("request completed immediately!\n");
99 		goto out_unlock;
100 	}
101 
102 	if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) {
103 		pr_err("request wait succeeded (expected timeout!)\n");
104 		goto out_unlock;
105 	}
106 
107 	if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) {
108 		pr_err("request wait timed out!\n");
109 		goto out_unlock;
110 	}
111 
112 	if (!i915_request_completed(request)) {
113 		pr_err("request not complete after waiting!\n");
114 		goto out_unlock;
115 	}
116 
117 	if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) {
118 		pr_err("request wait timed out when already complete!\n");
119 		goto out_unlock;
120 	}
121 
122 	err = 0;
123 out_unlock:
124 	mock_device_flush(i915);
125 	mutex_unlock(&i915->drm.struct_mutex);
126 	return err;
127 }
128 
129 static int igt_fence_wait(void *arg)
130 {
131 	const long T = HZ / 4;
132 	struct drm_i915_private *i915 = arg;
133 	struct i915_request *request;
134 	int err = -EINVAL;
135 
136 	/* Submit a request, treat it as a fence and wait upon it */
137 
138 	mutex_lock(&i915->drm.struct_mutex);
139 	request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
140 	if (!request) {
141 		err = -ENOMEM;
142 		goto out_locked;
143 	}
144 
145 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
146 		pr_err("fence wait success before submit (expected timeout)!\n");
147 		goto out_locked;
148 	}
149 
150 	i915_request_add(request);
151 	mutex_unlock(&i915->drm.struct_mutex);
152 
153 	if (dma_fence_is_signaled(&request->fence)) {
154 		pr_err("fence signaled immediately!\n");
155 		goto out_device;
156 	}
157 
158 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
159 		pr_err("fence wait success after submit (expected timeout)!\n");
160 		goto out_device;
161 	}
162 
163 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
164 		pr_err("fence wait timed out (expected success)!\n");
165 		goto out_device;
166 	}
167 
168 	if (!dma_fence_is_signaled(&request->fence)) {
169 		pr_err("fence unsignaled after waiting!\n");
170 		goto out_device;
171 	}
172 
173 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
174 		pr_err("fence wait timed out when complete (expected success)!\n");
175 		goto out_device;
176 	}
177 
178 	err = 0;
179 out_device:
180 	mutex_lock(&i915->drm.struct_mutex);
181 out_locked:
182 	mock_device_flush(i915);
183 	mutex_unlock(&i915->drm.struct_mutex);
184 	return err;
185 }
186 
187 static int igt_request_rewind(void *arg)
188 {
189 	struct drm_i915_private *i915 = arg;
190 	struct i915_request *request, *vip;
191 	struct i915_gem_context *ctx[2];
192 	int err = -EINVAL;
193 
194 	mutex_lock(&i915->drm.struct_mutex);
195 	ctx[0] = mock_context(i915, "A");
196 	request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ);
197 	if (!request) {
198 		err = -ENOMEM;
199 		goto err_context_0;
200 	}
201 
202 	i915_request_get(request);
203 	i915_request_add(request);
204 
205 	ctx[1] = mock_context(i915, "B");
206 	vip = mock_request(i915->engine[RCS0], ctx[1], 0);
207 	if (!vip) {
208 		err = -ENOMEM;
209 		goto err_context_1;
210 	}
211 
212 	/* Simulate preemption by manual reordering */
213 	if (!mock_cancel_request(request)) {
214 		pr_err("failed to cancel request (already executed)!\n");
215 		i915_request_add(vip);
216 		goto err_context_1;
217 	}
218 	i915_request_get(vip);
219 	i915_request_add(vip);
220 	rcu_read_lock();
221 	request->engine->submit_request(request);
222 	rcu_read_unlock();
223 
224 	mutex_unlock(&i915->drm.struct_mutex);
225 
226 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
227 		pr_err("timed out waiting for high priority request\n");
228 		goto err;
229 	}
230 
231 	if (i915_request_completed(request)) {
232 		pr_err("low priority request already completed\n");
233 		goto err;
234 	}
235 
236 	err = 0;
237 err:
238 	i915_request_put(vip);
239 	mutex_lock(&i915->drm.struct_mutex);
240 err_context_1:
241 	mock_context_close(ctx[1]);
242 	i915_request_put(request);
243 err_context_0:
244 	mock_context_close(ctx[0]);
245 	mock_device_flush(i915);
246 	mutex_unlock(&i915->drm.struct_mutex);
247 	return err;
248 }
249 
250 struct smoketest {
251 	struct intel_engine_cs *engine;
252 	struct i915_gem_context **contexts;
253 	atomic_long_t num_waits, num_fences;
254 	int ncontexts, max_batch;
255 	struct i915_request *(*request_alloc)(struct i915_gem_context *,
256 					      struct intel_engine_cs *);
257 };
258 
259 static struct i915_request *
260 __mock_request_alloc(struct i915_gem_context *ctx,
261 		     struct intel_engine_cs *engine)
262 {
263 	return mock_request(engine, ctx, 0);
264 }
265 
266 static struct i915_request *
267 __live_request_alloc(struct i915_gem_context *ctx,
268 		     struct intel_engine_cs *engine)
269 {
270 	return i915_request_alloc(engine, ctx);
271 }
272 
273 static int __igt_breadcrumbs_smoketest(void *arg)
274 {
275 	struct smoketest *t = arg;
276 	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
277 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
278 	const unsigned int total = 4 * t->ncontexts + 1;
279 	unsigned int num_waits = 0, num_fences = 0;
280 	struct i915_request **requests;
281 	I915_RND_STATE(prng);
282 	unsigned int *order;
283 	int err = 0;
284 
285 	/*
286 	 * A very simple test to catch the most egregious of list handling bugs.
287 	 *
288 	 * At its heart, we simply create oodles of requests running across
289 	 * multiple kthreads and enable signaling on them, for the sole purpose
290 	 * of stressing our breadcrumb handling. The only inspection we do is
291 	 * that the fences were marked as signaled.
292 	 */
293 
294 	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
295 	if (!requests)
296 		return -ENOMEM;
297 
298 	order = i915_random_order(total, &prng);
299 	if (!order) {
300 		err = -ENOMEM;
301 		goto out_requests;
302 	}
303 
304 	while (!kthread_should_stop()) {
305 		struct i915_sw_fence *submit, *wait;
306 		unsigned int n, count;
307 
308 		submit = heap_fence_create(GFP_KERNEL);
309 		if (!submit) {
310 			err = -ENOMEM;
311 			break;
312 		}
313 
314 		wait = heap_fence_create(GFP_KERNEL);
315 		if (!wait) {
316 			i915_sw_fence_commit(submit);
317 			heap_fence_put(submit);
318 			err = ENOMEM;
319 			break;
320 		}
321 
322 		i915_random_reorder(order, total, &prng);
323 		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
324 
325 		for (n = 0; n < count; n++) {
326 			struct i915_gem_context *ctx =
327 				t->contexts[order[n] % t->ncontexts];
328 			struct i915_request *rq;
329 
330 			mutex_lock(BKL);
331 
332 			rq = t->request_alloc(ctx, t->engine);
333 			if (IS_ERR(rq)) {
334 				mutex_unlock(BKL);
335 				err = PTR_ERR(rq);
336 				count = n;
337 				break;
338 			}
339 
340 			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
341 							       submit,
342 							       GFP_KERNEL);
343 
344 			requests[n] = i915_request_get(rq);
345 			i915_request_add(rq);
346 
347 			mutex_unlock(BKL);
348 
349 			if (err >= 0)
350 				err = i915_sw_fence_await_dma_fence(wait,
351 								    &rq->fence,
352 								    0,
353 								    GFP_KERNEL);
354 
355 			if (err < 0) {
356 				i915_request_put(rq);
357 				count = n;
358 				break;
359 			}
360 		}
361 
362 		i915_sw_fence_commit(submit);
363 		i915_sw_fence_commit(wait);
364 
365 		if (!wait_event_timeout(wait->wait,
366 					i915_sw_fence_done(wait),
367 					HZ / 2)) {
368 			struct i915_request *rq = requests[count - 1];
369 
370 			pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
371 			       count,
372 			       rq->fence.context, rq->fence.seqno,
373 			       t->engine->name);
374 			i915_gem_set_wedged(t->engine->i915);
375 			GEM_BUG_ON(!i915_request_completed(rq));
376 			i915_sw_fence_wait(wait);
377 			err = -EIO;
378 		}
379 
380 		for (n = 0; n < count; n++) {
381 			struct i915_request *rq = requests[n];
382 
383 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
384 				      &rq->fence.flags)) {
385 				pr_err("%llu:%llu was not signaled!\n",
386 				       rq->fence.context, rq->fence.seqno);
387 				err = -EINVAL;
388 			}
389 
390 			i915_request_put(rq);
391 		}
392 
393 		heap_fence_put(wait);
394 		heap_fence_put(submit);
395 
396 		if (err < 0)
397 			break;
398 
399 		num_fences += count;
400 		num_waits++;
401 
402 		cond_resched();
403 	}
404 
405 	atomic_long_add(num_fences, &t->num_fences);
406 	atomic_long_add(num_waits, &t->num_waits);
407 
408 	kfree(order);
409 out_requests:
410 	kfree(requests);
411 	return err;
412 }
413 
414 static int mock_breadcrumbs_smoketest(void *arg)
415 {
416 	struct drm_i915_private *i915 = arg;
417 	struct smoketest t = {
418 		.engine = i915->engine[RCS0],
419 		.ncontexts = 1024,
420 		.max_batch = 1024,
421 		.request_alloc = __mock_request_alloc
422 	};
423 	unsigned int ncpus = num_online_cpus();
424 	struct task_struct **threads;
425 	unsigned int n;
426 	int ret = 0;
427 
428 	/*
429 	 * Smoketest our breadcrumb/signal handling for requests across multiple
430 	 * threads. A very simple test to only catch the most egregious of bugs.
431 	 * See __igt_breadcrumbs_smoketest();
432 	 */
433 
434 	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
435 	if (!threads)
436 		return -ENOMEM;
437 
438 	t.contexts =
439 		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
440 	if (!t.contexts) {
441 		ret = -ENOMEM;
442 		goto out_threads;
443 	}
444 
445 	mutex_lock(&t.engine->i915->drm.struct_mutex);
446 	for (n = 0; n < t.ncontexts; n++) {
447 		t.contexts[n] = mock_context(t.engine->i915, "mock");
448 		if (!t.contexts[n]) {
449 			ret = -ENOMEM;
450 			goto out_contexts;
451 		}
452 	}
453 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
454 
455 	for (n = 0; n < ncpus; n++) {
456 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
457 					 &t, "igt/%d", n);
458 		if (IS_ERR(threads[n])) {
459 			ret = PTR_ERR(threads[n]);
460 			ncpus = n;
461 			break;
462 		}
463 
464 		get_task_struct(threads[n]);
465 	}
466 
467 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
468 
469 	for (n = 0; n < ncpus; n++) {
470 		int err;
471 
472 		err = kthread_stop(threads[n]);
473 		if (err < 0 && !ret)
474 			ret = err;
475 
476 		put_task_struct(threads[n]);
477 	}
478 	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
479 		atomic_long_read(&t.num_waits),
480 		atomic_long_read(&t.num_fences),
481 		ncpus);
482 
483 	mutex_lock(&t.engine->i915->drm.struct_mutex);
484 out_contexts:
485 	for (n = 0; n < t.ncontexts; n++) {
486 		if (!t.contexts[n])
487 			break;
488 		mock_context_close(t.contexts[n]);
489 	}
490 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
491 	kfree(t.contexts);
492 out_threads:
493 	kfree(threads);
494 
495 	return ret;
496 }
497 
498 int i915_request_mock_selftests(void)
499 {
500 	static const struct i915_subtest tests[] = {
501 		SUBTEST(igt_add_request),
502 		SUBTEST(igt_wait_request),
503 		SUBTEST(igt_fence_wait),
504 		SUBTEST(igt_request_rewind),
505 		SUBTEST(mock_breadcrumbs_smoketest),
506 	};
507 	struct drm_i915_private *i915;
508 	intel_wakeref_t wakeref;
509 	int err = 0;
510 
511 	i915 = mock_gem_device();
512 	if (!i915)
513 		return -ENOMEM;
514 
515 	with_intel_runtime_pm(i915, wakeref)
516 		err = i915_subtests(tests, i915);
517 
518 	drm_dev_put(&i915->drm);
519 
520 	return err;
521 }
522 
523 static int live_nop_request(void *arg)
524 {
525 	struct drm_i915_private *i915 = arg;
526 	struct intel_engine_cs *engine;
527 	intel_wakeref_t wakeref;
528 	struct igt_live_test t;
529 	unsigned int id;
530 	int err = -ENODEV;
531 
532 	/* Submit various sized batches of empty requests, to each engine
533 	 * (individually), and wait for the batch to complete. We can check
534 	 * the overhead of submitting requests to the hardware.
535 	 */
536 
537 	mutex_lock(&i915->drm.struct_mutex);
538 	wakeref = intel_runtime_pm_get(i915);
539 
540 	for_each_engine(engine, i915, id) {
541 		struct i915_request *request = NULL;
542 		unsigned long n, prime;
543 		IGT_TIMEOUT(end_time);
544 		ktime_t times[2] = {};
545 
546 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
547 		if (err)
548 			goto out_unlock;
549 
550 		for_each_prime_number_from(prime, 1, 8192) {
551 			times[1] = ktime_get_raw();
552 
553 			for (n = 0; n < prime; n++) {
554 				request = i915_request_alloc(engine,
555 							     i915->kernel_context);
556 				if (IS_ERR(request)) {
557 					err = PTR_ERR(request);
558 					goto out_unlock;
559 				}
560 
561 				/* This space is left intentionally blank.
562 				 *
563 				 * We do not actually want to perform any
564 				 * action with this request, we just want
565 				 * to measure the latency in allocation
566 				 * and submission of our breadcrumbs -
567 				 * ensuring that the bare request is sufficient
568 				 * for the system to work (i.e. proper HEAD
569 				 * tracking of the rings, interrupt handling,
570 				 * etc). It also gives us the lowest bounds
571 				 * for latency.
572 				 */
573 
574 				i915_request_add(request);
575 			}
576 			i915_request_wait(request,
577 					  I915_WAIT_LOCKED,
578 					  MAX_SCHEDULE_TIMEOUT);
579 
580 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
581 			if (prime == 1)
582 				times[0] = times[1];
583 
584 			if (__igt_timeout(end_time, NULL))
585 				break;
586 		}
587 
588 		err = igt_live_test_end(&t);
589 		if (err)
590 			goto out_unlock;
591 
592 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
593 			engine->name,
594 			ktime_to_ns(times[0]),
595 			prime, div64_u64(ktime_to_ns(times[1]), prime));
596 	}
597 
598 out_unlock:
599 	intel_runtime_pm_put(i915, wakeref);
600 	mutex_unlock(&i915->drm.struct_mutex);
601 	return err;
602 }
603 
604 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
605 {
606 	struct drm_i915_gem_object *obj;
607 	struct i915_vma *vma;
608 	u32 *cmd;
609 	int err;
610 
611 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
612 	if (IS_ERR(obj))
613 		return ERR_CAST(obj);
614 
615 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
616 	if (IS_ERR(cmd)) {
617 		err = PTR_ERR(cmd);
618 		goto err;
619 	}
620 
621 	*cmd = MI_BATCH_BUFFER_END;
622 
623 	__i915_gem_object_flush_map(obj, 0, 64);
624 	i915_gem_object_unpin_map(obj);
625 
626 	i915_gem_chipset_flush(i915);
627 
628 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
629 	if (IS_ERR(vma)) {
630 		err = PTR_ERR(vma);
631 		goto err;
632 	}
633 
634 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
635 	if (err)
636 		goto err;
637 
638 	return vma;
639 
640 err:
641 	i915_gem_object_put(obj);
642 	return ERR_PTR(err);
643 }
644 
645 static struct i915_request *
646 empty_request(struct intel_engine_cs *engine,
647 	      struct i915_vma *batch)
648 {
649 	struct i915_request *request;
650 	int err;
651 
652 	request = i915_request_alloc(engine, engine->i915->kernel_context);
653 	if (IS_ERR(request))
654 		return request;
655 
656 	err = engine->emit_bb_start(request,
657 				    batch->node.start,
658 				    batch->node.size,
659 				    I915_DISPATCH_SECURE);
660 	if (err)
661 		goto out_request;
662 
663 out_request:
664 	i915_request_add(request);
665 	return err ? ERR_PTR(err) : request;
666 }
667 
668 static int live_empty_request(void *arg)
669 {
670 	struct drm_i915_private *i915 = arg;
671 	struct intel_engine_cs *engine;
672 	intel_wakeref_t wakeref;
673 	struct igt_live_test t;
674 	struct i915_vma *batch;
675 	unsigned int id;
676 	int err = 0;
677 
678 	/* Submit various sized batches of empty requests, to each engine
679 	 * (individually), and wait for the batch to complete. We can check
680 	 * the overhead of submitting requests to the hardware.
681 	 */
682 
683 	mutex_lock(&i915->drm.struct_mutex);
684 	wakeref = intel_runtime_pm_get(i915);
685 
686 	batch = empty_batch(i915);
687 	if (IS_ERR(batch)) {
688 		err = PTR_ERR(batch);
689 		goto out_unlock;
690 	}
691 
692 	for_each_engine(engine, i915, id) {
693 		IGT_TIMEOUT(end_time);
694 		struct i915_request *request;
695 		unsigned long n, prime;
696 		ktime_t times[2] = {};
697 
698 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
699 		if (err)
700 			goto out_batch;
701 
702 		/* Warmup / preload */
703 		request = empty_request(engine, batch);
704 		if (IS_ERR(request)) {
705 			err = PTR_ERR(request);
706 			goto out_batch;
707 		}
708 		i915_request_wait(request,
709 				  I915_WAIT_LOCKED,
710 				  MAX_SCHEDULE_TIMEOUT);
711 
712 		for_each_prime_number_from(prime, 1, 8192) {
713 			times[1] = ktime_get_raw();
714 
715 			for (n = 0; n < prime; n++) {
716 				request = empty_request(engine, batch);
717 				if (IS_ERR(request)) {
718 					err = PTR_ERR(request);
719 					goto out_batch;
720 				}
721 			}
722 			i915_request_wait(request,
723 					  I915_WAIT_LOCKED,
724 					  MAX_SCHEDULE_TIMEOUT);
725 
726 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
727 			if (prime == 1)
728 				times[0] = times[1];
729 
730 			if (__igt_timeout(end_time, NULL))
731 				break;
732 		}
733 
734 		err = igt_live_test_end(&t);
735 		if (err)
736 			goto out_batch;
737 
738 		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
739 			engine->name,
740 			ktime_to_ns(times[0]),
741 			prime, div64_u64(ktime_to_ns(times[1]), prime));
742 	}
743 
744 out_batch:
745 	i915_vma_unpin(batch);
746 	i915_vma_put(batch);
747 out_unlock:
748 	intel_runtime_pm_put(i915, wakeref);
749 	mutex_unlock(&i915->drm.struct_mutex);
750 	return err;
751 }
752 
753 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
754 {
755 	struct i915_gem_context *ctx = i915->kernel_context;
756 	struct i915_address_space *vm =
757 		ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
758 	struct drm_i915_gem_object *obj;
759 	const int gen = INTEL_GEN(i915);
760 	struct i915_vma *vma;
761 	u32 *cmd;
762 	int err;
763 
764 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
765 	if (IS_ERR(obj))
766 		return ERR_CAST(obj);
767 
768 	vma = i915_vma_instance(obj, vm, NULL);
769 	if (IS_ERR(vma)) {
770 		err = PTR_ERR(vma);
771 		goto err;
772 	}
773 
774 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
775 	if (err)
776 		goto err;
777 
778 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
779 	if (IS_ERR(cmd)) {
780 		err = PTR_ERR(cmd);
781 		goto err;
782 	}
783 
784 	if (gen >= 8) {
785 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
786 		*cmd++ = lower_32_bits(vma->node.start);
787 		*cmd++ = upper_32_bits(vma->node.start);
788 	} else if (gen >= 6) {
789 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
790 		*cmd++ = lower_32_bits(vma->node.start);
791 	} else {
792 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
793 		*cmd++ = lower_32_bits(vma->node.start);
794 	}
795 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
796 
797 	__i915_gem_object_flush_map(obj, 0, 64);
798 	i915_gem_object_unpin_map(obj);
799 
800 	i915_gem_chipset_flush(i915);
801 
802 	return vma;
803 
804 err:
805 	i915_gem_object_put(obj);
806 	return ERR_PTR(err);
807 }
808 
809 static int recursive_batch_resolve(struct i915_vma *batch)
810 {
811 	u32 *cmd;
812 
813 	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
814 	if (IS_ERR(cmd))
815 		return PTR_ERR(cmd);
816 
817 	*cmd = MI_BATCH_BUFFER_END;
818 	i915_gem_chipset_flush(batch->vm->i915);
819 
820 	i915_gem_object_unpin_map(batch->obj);
821 
822 	return 0;
823 }
824 
825 static int live_all_engines(void *arg)
826 {
827 	struct drm_i915_private *i915 = arg;
828 	struct intel_engine_cs *engine;
829 	struct i915_request *request[I915_NUM_ENGINES];
830 	intel_wakeref_t wakeref;
831 	struct igt_live_test t;
832 	struct i915_vma *batch;
833 	unsigned int id;
834 	int err;
835 
836 	/* Check we can submit requests to all engines simultaneously. We
837 	 * send a recursive batch to each engine - checking that we don't
838 	 * block doing so, and that they don't complete too soon.
839 	 */
840 
841 	mutex_lock(&i915->drm.struct_mutex);
842 	wakeref = intel_runtime_pm_get(i915);
843 
844 	err = igt_live_test_begin(&t, i915, __func__, "");
845 	if (err)
846 		goto out_unlock;
847 
848 	batch = recursive_batch(i915);
849 	if (IS_ERR(batch)) {
850 		err = PTR_ERR(batch);
851 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
852 		goto out_unlock;
853 	}
854 
855 	for_each_engine(engine, i915, id) {
856 		request[id] = i915_request_alloc(engine, i915->kernel_context);
857 		if (IS_ERR(request[id])) {
858 			err = PTR_ERR(request[id]);
859 			pr_err("%s: Request allocation failed with err=%d\n",
860 			       __func__, err);
861 			goto out_request;
862 		}
863 
864 		err = engine->emit_bb_start(request[id],
865 					    batch->node.start,
866 					    batch->node.size,
867 					    0);
868 		GEM_BUG_ON(err);
869 		request[id]->batch = batch;
870 
871 		if (!i915_gem_object_has_active_reference(batch->obj)) {
872 			i915_gem_object_get(batch->obj);
873 			i915_gem_object_set_active_reference(batch->obj);
874 		}
875 
876 		err = i915_vma_move_to_active(batch, request[id], 0);
877 		GEM_BUG_ON(err);
878 
879 		i915_request_get(request[id]);
880 		i915_request_add(request[id]);
881 	}
882 
883 	for_each_engine(engine, i915, id) {
884 		if (i915_request_completed(request[id])) {
885 			pr_err("%s(%s): request completed too early!\n",
886 			       __func__, engine->name);
887 			err = -EINVAL;
888 			goto out_request;
889 		}
890 	}
891 
892 	err = recursive_batch_resolve(batch);
893 	if (err) {
894 		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
895 		goto out_request;
896 	}
897 
898 	for_each_engine(engine, i915, id) {
899 		long timeout;
900 
901 		timeout = i915_request_wait(request[id],
902 					    I915_WAIT_LOCKED,
903 					    MAX_SCHEDULE_TIMEOUT);
904 		if (timeout < 0) {
905 			err = timeout;
906 			pr_err("%s: error waiting for request on %s, err=%d\n",
907 			       __func__, engine->name, err);
908 			goto out_request;
909 		}
910 
911 		GEM_BUG_ON(!i915_request_completed(request[id]));
912 		i915_request_put(request[id]);
913 		request[id] = NULL;
914 	}
915 
916 	err = igt_live_test_end(&t);
917 
918 out_request:
919 	for_each_engine(engine, i915, id)
920 		if (request[id])
921 			i915_request_put(request[id]);
922 	i915_vma_unpin(batch);
923 	i915_vma_put(batch);
924 out_unlock:
925 	intel_runtime_pm_put(i915, wakeref);
926 	mutex_unlock(&i915->drm.struct_mutex);
927 	return err;
928 }
929 
930 static int live_sequential_engines(void *arg)
931 {
932 	struct drm_i915_private *i915 = arg;
933 	struct i915_request *request[I915_NUM_ENGINES] = {};
934 	struct i915_request *prev = NULL;
935 	struct intel_engine_cs *engine;
936 	intel_wakeref_t wakeref;
937 	struct igt_live_test t;
938 	unsigned int id;
939 	int err;
940 
941 	/* Check we can submit requests to all engines sequentially, such
942 	 * that each successive request waits for the earlier ones. This
943 	 * tests that we don't execute requests out of order, even though
944 	 * they are running on independent engines.
945 	 */
946 
947 	mutex_lock(&i915->drm.struct_mutex);
948 	wakeref = intel_runtime_pm_get(i915);
949 
950 	err = igt_live_test_begin(&t, i915, __func__, "");
951 	if (err)
952 		goto out_unlock;
953 
954 	for_each_engine(engine, i915, id) {
955 		struct i915_vma *batch;
956 
957 		batch = recursive_batch(i915);
958 		if (IS_ERR(batch)) {
959 			err = PTR_ERR(batch);
960 			pr_err("%s: Unable to create batch for %s, err=%d\n",
961 			       __func__, engine->name, err);
962 			goto out_unlock;
963 		}
964 
965 		request[id] = i915_request_alloc(engine, i915->kernel_context);
966 		if (IS_ERR(request[id])) {
967 			err = PTR_ERR(request[id]);
968 			pr_err("%s: Request allocation failed for %s with err=%d\n",
969 			       __func__, engine->name, err);
970 			goto out_request;
971 		}
972 
973 		if (prev) {
974 			err = i915_request_await_dma_fence(request[id],
975 							   &prev->fence);
976 			if (err) {
977 				i915_request_add(request[id]);
978 				pr_err("%s: Request await failed for %s with err=%d\n",
979 				       __func__, engine->name, err);
980 				goto out_request;
981 			}
982 		}
983 
984 		err = engine->emit_bb_start(request[id],
985 					    batch->node.start,
986 					    batch->node.size,
987 					    0);
988 		GEM_BUG_ON(err);
989 		request[id]->batch = batch;
990 
991 		err = i915_vma_move_to_active(batch, request[id], 0);
992 		GEM_BUG_ON(err);
993 
994 		i915_gem_object_set_active_reference(batch->obj);
995 		i915_vma_get(batch);
996 
997 		i915_request_get(request[id]);
998 		i915_request_add(request[id]);
999 
1000 		prev = request[id];
1001 	}
1002 
1003 	for_each_engine(engine, i915, id) {
1004 		long timeout;
1005 
1006 		if (i915_request_completed(request[id])) {
1007 			pr_err("%s(%s): request completed too early!\n",
1008 			       __func__, engine->name);
1009 			err = -EINVAL;
1010 			goto out_request;
1011 		}
1012 
1013 		err = recursive_batch_resolve(request[id]->batch);
1014 		if (err) {
1015 			pr_err("%s: failed to resolve batch, err=%d\n",
1016 			       __func__, err);
1017 			goto out_request;
1018 		}
1019 
1020 		timeout = i915_request_wait(request[id],
1021 					    I915_WAIT_LOCKED,
1022 					    MAX_SCHEDULE_TIMEOUT);
1023 		if (timeout < 0) {
1024 			err = timeout;
1025 			pr_err("%s: error waiting for request on %s, err=%d\n",
1026 			       __func__, engine->name, err);
1027 			goto out_request;
1028 		}
1029 
1030 		GEM_BUG_ON(!i915_request_completed(request[id]));
1031 	}
1032 
1033 	err = igt_live_test_end(&t);
1034 
1035 out_request:
1036 	for_each_engine(engine, i915, id) {
1037 		u32 *cmd;
1038 
1039 		if (!request[id])
1040 			break;
1041 
1042 		cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1043 					      I915_MAP_WC);
1044 		if (!IS_ERR(cmd)) {
1045 			*cmd = MI_BATCH_BUFFER_END;
1046 			i915_gem_chipset_flush(i915);
1047 
1048 			i915_gem_object_unpin_map(request[id]->batch->obj);
1049 		}
1050 
1051 		i915_vma_put(request[id]->batch);
1052 		i915_request_put(request[id]);
1053 	}
1054 out_unlock:
1055 	intel_runtime_pm_put(i915, wakeref);
1056 	mutex_unlock(&i915->drm.struct_mutex);
1057 	return err;
1058 }
1059 
1060 static int
1061 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1062 {
1063 	struct i915_request *rq;
1064 	int ret;
1065 
1066 	/*
1067 	 * Before execlists, all contexts share the same ringbuffer. With
1068 	 * execlists, each context/engine has a separate ringbuffer and
1069 	 * for the purposes of this test, inexhaustible.
1070 	 *
1071 	 * For the global ringbuffer though, we have to be very careful
1072 	 * that we do not wrap while preventing the execution of requests
1073 	 * with a unsignaled fence.
1074 	 */
1075 	if (HAS_EXECLISTS(ctx->i915))
1076 		return INT_MAX;
1077 
1078 	rq = i915_request_alloc(engine, ctx);
1079 	if (IS_ERR(rq)) {
1080 		ret = PTR_ERR(rq);
1081 	} else {
1082 		int sz;
1083 
1084 		ret = rq->ring->size - rq->reserved_space;
1085 		i915_request_add(rq);
1086 
1087 		sz = rq->ring->emit - rq->head;
1088 		if (sz < 0)
1089 			sz += rq->ring->size;
1090 		ret /= sz;
1091 		ret /= 2; /* leave half spare, in case of emergency! */
1092 	}
1093 
1094 	return ret;
1095 }
1096 
1097 static int live_breadcrumbs_smoketest(void *arg)
1098 {
1099 	struct drm_i915_private *i915 = arg;
1100 	struct smoketest t[I915_NUM_ENGINES];
1101 	unsigned int ncpus = num_online_cpus();
1102 	unsigned long num_waits, num_fences;
1103 	struct intel_engine_cs *engine;
1104 	struct task_struct **threads;
1105 	struct igt_live_test live;
1106 	enum intel_engine_id id;
1107 	intel_wakeref_t wakeref;
1108 	struct drm_file *file;
1109 	unsigned int n;
1110 	int ret = 0;
1111 
1112 	/*
1113 	 * Smoketest our breadcrumb/signal handling for requests across multiple
1114 	 * threads. A very simple test to only catch the most egregious of bugs.
1115 	 * See __igt_breadcrumbs_smoketest();
1116 	 *
1117 	 * On real hardware this time.
1118 	 */
1119 
1120 	wakeref = intel_runtime_pm_get(i915);
1121 
1122 	file = mock_file(i915);
1123 	if (IS_ERR(file)) {
1124 		ret = PTR_ERR(file);
1125 		goto out_rpm;
1126 	}
1127 
1128 	threads = kcalloc(ncpus * I915_NUM_ENGINES,
1129 			  sizeof(*threads),
1130 			  GFP_KERNEL);
1131 	if (!threads) {
1132 		ret = -ENOMEM;
1133 		goto out_file;
1134 	}
1135 
1136 	memset(&t[0], 0, sizeof(t[0]));
1137 	t[0].request_alloc = __live_request_alloc;
1138 	t[0].ncontexts = 64;
1139 	t[0].contexts = kmalloc_array(t[0].ncontexts,
1140 				      sizeof(*t[0].contexts),
1141 				      GFP_KERNEL);
1142 	if (!t[0].contexts) {
1143 		ret = -ENOMEM;
1144 		goto out_threads;
1145 	}
1146 
1147 	mutex_lock(&i915->drm.struct_mutex);
1148 	for (n = 0; n < t[0].ncontexts; n++) {
1149 		t[0].contexts[n] = live_context(i915, file);
1150 		if (!t[0].contexts[n]) {
1151 			ret = -ENOMEM;
1152 			goto out_contexts;
1153 		}
1154 	}
1155 
1156 	ret = igt_live_test_begin(&live, i915, __func__, "");
1157 	if (ret)
1158 		goto out_contexts;
1159 
1160 	for_each_engine(engine, i915, id) {
1161 		t[id] = t[0];
1162 		t[id].engine = engine;
1163 		t[id].max_batch = max_batches(t[0].contexts[0], engine);
1164 		if (t[id].max_batch < 0) {
1165 			ret = t[id].max_batch;
1166 			mutex_unlock(&i915->drm.struct_mutex);
1167 			goto out_flush;
1168 		}
1169 		/* One ring interleaved between requests from all cpus */
1170 		t[id].max_batch /= num_online_cpus() + 1;
1171 		pr_debug("Limiting batches to %d requests on %s\n",
1172 			 t[id].max_batch, engine->name);
1173 
1174 		for (n = 0; n < ncpus; n++) {
1175 			struct task_struct *tsk;
1176 
1177 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1178 					  &t[id], "igt/%d.%d", id, n);
1179 			if (IS_ERR(tsk)) {
1180 				ret = PTR_ERR(tsk);
1181 				mutex_unlock(&i915->drm.struct_mutex);
1182 				goto out_flush;
1183 			}
1184 
1185 			get_task_struct(tsk);
1186 			threads[id * ncpus + n] = tsk;
1187 		}
1188 	}
1189 	mutex_unlock(&i915->drm.struct_mutex);
1190 
1191 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1192 
1193 out_flush:
1194 	num_waits = 0;
1195 	num_fences = 0;
1196 	for_each_engine(engine, i915, id) {
1197 		for (n = 0; n < ncpus; n++) {
1198 			struct task_struct *tsk = threads[id * ncpus + n];
1199 			int err;
1200 
1201 			if (!tsk)
1202 				continue;
1203 
1204 			err = kthread_stop(tsk);
1205 			if (err < 0 && !ret)
1206 				ret = err;
1207 
1208 			put_task_struct(tsk);
1209 		}
1210 
1211 		num_waits += atomic_long_read(&t[id].num_waits);
1212 		num_fences += atomic_long_read(&t[id].num_fences);
1213 	}
1214 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1215 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1216 
1217 	mutex_lock(&i915->drm.struct_mutex);
1218 	ret = igt_live_test_end(&live) ?: ret;
1219 out_contexts:
1220 	mutex_unlock(&i915->drm.struct_mutex);
1221 	kfree(t[0].contexts);
1222 out_threads:
1223 	kfree(threads);
1224 out_file:
1225 	mock_file_free(i915, file);
1226 out_rpm:
1227 	intel_runtime_pm_put(i915, wakeref);
1228 
1229 	return ret;
1230 }
1231 
1232 int i915_request_live_selftests(struct drm_i915_private *i915)
1233 {
1234 	static const struct i915_subtest tests[] = {
1235 		SUBTEST(live_nop_request),
1236 		SUBTEST(live_all_engines),
1237 		SUBTEST(live_sequential_engines),
1238 		SUBTEST(live_empty_request),
1239 		SUBTEST(live_breadcrumbs_smoketest),
1240 	};
1241 
1242 	if (i915_terminally_wedged(i915))
1243 		return 0;
1244 
1245 	return i915_subtests(tests, i915);
1246 }
1247