1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 
27 #include "../i915_selftest.h"
28 #include "i915_random.h"
29 #include "igt_live_test.h"
30 #include "lib_sw_fence.h"
31 
32 #include "mock_context.h"
33 #include "mock_drm.h"
34 #include "mock_gem_device.h"
35 
36 static int igt_add_request(void *arg)
37 {
38 	struct drm_i915_private *i915 = arg;
39 	struct i915_request *request;
40 	int err = -ENOMEM;
41 
42 	/* Basic preliminary test to create a request and let it loose! */
43 
44 	mutex_lock(&i915->drm.struct_mutex);
45 	request = mock_request(i915->engine[RCS0],
46 			       i915->kernel_context,
47 			       HZ / 10);
48 	if (!request)
49 		goto out_unlock;
50 
51 	i915_request_add(request);
52 
53 	err = 0;
54 out_unlock:
55 	mutex_unlock(&i915->drm.struct_mutex);
56 	return err;
57 }
58 
59 static int igt_wait_request(void *arg)
60 {
61 	const long T = HZ / 4;
62 	struct drm_i915_private *i915 = arg;
63 	struct i915_request *request;
64 	int err = -EINVAL;
65 
66 	/* Submit a request, then wait upon it */
67 
68 	mutex_lock(&i915->drm.struct_mutex);
69 	request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
70 	if (!request) {
71 		err = -ENOMEM;
72 		goto out_unlock;
73 	}
74 
75 	if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) {
76 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
77 		goto out_unlock;
78 	}
79 
80 	if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) {
81 		pr_err("request wait succeeded (expected timeout before submit!)\n");
82 		goto out_unlock;
83 	}
84 
85 	if (i915_request_completed(request)) {
86 		pr_err("request completed before submit!!\n");
87 		goto out_unlock;
88 	}
89 
90 	i915_request_add(request);
91 
92 	if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) {
93 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
94 		goto out_unlock;
95 	}
96 
97 	if (i915_request_completed(request)) {
98 		pr_err("request completed immediately!\n");
99 		goto out_unlock;
100 	}
101 
102 	if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) {
103 		pr_err("request wait succeeded (expected timeout!)\n");
104 		goto out_unlock;
105 	}
106 
107 	if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) {
108 		pr_err("request wait timed out!\n");
109 		goto out_unlock;
110 	}
111 
112 	if (!i915_request_completed(request)) {
113 		pr_err("request not complete after waiting!\n");
114 		goto out_unlock;
115 	}
116 
117 	if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) {
118 		pr_err("request wait timed out when already complete!\n");
119 		goto out_unlock;
120 	}
121 
122 	err = 0;
123 out_unlock:
124 	mock_device_flush(i915);
125 	mutex_unlock(&i915->drm.struct_mutex);
126 	return err;
127 }
128 
129 static int igt_fence_wait(void *arg)
130 {
131 	const long T = HZ / 4;
132 	struct drm_i915_private *i915 = arg;
133 	struct i915_request *request;
134 	int err = -EINVAL;
135 
136 	/* Submit a request, treat it as a fence and wait upon it */
137 
138 	mutex_lock(&i915->drm.struct_mutex);
139 	request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
140 	if (!request) {
141 		err = -ENOMEM;
142 		goto out_locked;
143 	}
144 
145 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
146 		pr_err("fence wait success before submit (expected timeout)!\n");
147 		goto out_locked;
148 	}
149 
150 	i915_request_add(request);
151 	mutex_unlock(&i915->drm.struct_mutex);
152 
153 	if (dma_fence_is_signaled(&request->fence)) {
154 		pr_err("fence signaled immediately!\n");
155 		goto out_device;
156 	}
157 
158 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
159 		pr_err("fence wait success after submit (expected timeout)!\n");
160 		goto out_device;
161 	}
162 
163 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
164 		pr_err("fence wait timed out (expected success)!\n");
165 		goto out_device;
166 	}
167 
168 	if (!dma_fence_is_signaled(&request->fence)) {
169 		pr_err("fence unsignaled after waiting!\n");
170 		goto out_device;
171 	}
172 
173 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
174 		pr_err("fence wait timed out when complete (expected success)!\n");
175 		goto out_device;
176 	}
177 
178 	err = 0;
179 out_device:
180 	mutex_lock(&i915->drm.struct_mutex);
181 out_locked:
182 	mock_device_flush(i915);
183 	mutex_unlock(&i915->drm.struct_mutex);
184 	return err;
185 }
186 
187 static int igt_request_rewind(void *arg)
188 {
189 	struct drm_i915_private *i915 = arg;
190 	struct i915_request *request, *vip;
191 	struct i915_gem_context *ctx[2];
192 	int err = -EINVAL;
193 
194 	mutex_lock(&i915->drm.struct_mutex);
195 	ctx[0] = mock_context(i915, "A");
196 	request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ);
197 	if (!request) {
198 		err = -ENOMEM;
199 		goto err_context_0;
200 	}
201 
202 	i915_request_get(request);
203 	i915_request_add(request);
204 
205 	ctx[1] = mock_context(i915, "B");
206 	vip = mock_request(i915->engine[RCS0], ctx[1], 0);
207 	if (!vip) {
208 		err = -ENOMEM;
209 		goto err_context_1;
210 	}
211 
212 	/* Simulate preemption by manual reordering */
213 	if (!mock_cancel_request(request)) {
214 		pr_err("failed to cancel request (already executed)!\n");
215 		i915_request_add(vip);
216 		goto err_context_1;
217 	}
218 	i915_request_get(vip);
219 	i915_request_add(vip);
220 	rcu_read_lock();
221 	request->engine->submit_request(request);
222 	rcu_read_unlock();
223 
224 	mutex_unlock(&i915->drm.struct_mutex);
225 
226 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
227 		pr_err("timed out waiting for high priority request\n");
228 		goto err;
229 	}
230 
231 	if (i915_request_completed(request)) {
232 		pr_err("low priority request already completed\n");
233 		goto err;
234 	}
235 
236 	err = 0;
237 err:
238 	i915_request_put(vip);
239 	mutex_lock(&i915->drm.struct_mutex);
240 err_context_1:
241 	mock_context_close(ctx[1]);
242 	i915_request_put(request);
243 err_context_0:
244 	mock_context_close(ctx[0]);
245 	mock_device_flush(i915);
246 	mutex_unlock(&i915->drm.struct_mutex);
247 	return err;
248 }
249 
250 struct smoketest {
251 	struct intel_engine_cs *engine;
252 	struct i915_gem_context **contexts;
253 	atomic_long_t num_waits, num_fences;
254 	int ncontexts, max_batch;
255 	struct i915_request *(*request_alloc)(struct i915_gem_context *,
256 					      struct intel_engine_cs *);
257 };
258 
259 static struct i915_request *
260 __mock_request_alloc(struct i915_gem_context *ctx,
261 		     struct intel_engine_cs *engine)
262 {
263 	return mock_request(engine, ctx, 0);
264 }
265 
266 static struct i915_request *
267 __live_request_alloc(struct i915_gem_context *ctx,
268 		     struct intel_engine_cs *engine)
269 {
270 	return igt_request_alloc(ctx, engine);
271 }
272 
273 static int __igt_breadcrumbs_smoketest(void *arg)
274 {
275 	struct smoketest *t = arg;
276 	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
277 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
278 	const unsigned int total = 4 * t->ncontexts + 1;
279 	unsigned int num_waits = 0, num_fences = 0;
280 	struct i915_request **requests;
281 	I915_RND_STATE(prng);
282 	unsigned int *order;
283 	int err = 0;
284 
285 	/*
286 	 * A very simple test to catch the most egregious of list handling bugs.
287 	 *
288 	 * At its heart, we simply create oodles of requests running across
289 	 * multiple kthreads and enable signaling on them, for the sole purpose
290 	 * of stressing our breadcrumb handling. The only inspection we do is
291 	 * that the fences were marked as signaled.
292 	 */
293 
294 	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
295 	if (!requests)
296 		return -ENOMEM;
297 
298 	order = i915_random_order(total, &prng);
299 	if (!order) {
300 		err = -ENOMEM;
301 		goto out_requests;
302 	}
303 
304 	while (!kthread_should_stop()) {
305 		struct i915_sw_fence *submit, *wait;
306 		unsigned int n, count;
307 
308 		submit = heap_fence_create(GFP_KERNEL);
309 		if (!submit) {
310 			err = -ENOMEM;
311 			break;
312 		}
313 
314 		wait = heap_fence_create(GFP_KERNEL);
315 		if (!wait) {
316 			i915_sw_fence_commit(submit);
317 			heap_fence_put(submit);
318 			err = ENOMEM;
319 			break;
320 		}
321 
322 		i915_random_reorder(order, total, &prng);
323 		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
324 
325 		for (n = 0; n < count; n++) {
326 			struct i915_gem_context *ctx =
327 				t->contexts[order[n] % t->ncontexts];
328 			struct i915_request *rq;
329 
330 			mutex_lock(BKL);
331 
332 			rq = t->request_alloc(ctx, t->engine);
333 			if (IS_ERR(rq)) {
334 				mutex_unlock(BKL);
335 				err = PTR_ERR(rq);
336 				count = n;
337 				break;
338 			}
339 
340 			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
341 							       submit,
342 							       GFP_KERNEL);
343 
344 			requests[n] = i915_request_get(rq);
345 			i915_request_add(rq);
346 
347 			mutex_unlock(BKL);
348 
349 			if (err >= 0)
350 				err = i915_sw_fence_await_dma_fence(wait,
351 								    &rq->fence,
352 								    0,
353 								    GFP_KERNEL);
354 
355 			if (err < 0) {
356 				i915_request_put(rq);
357 				count = n;
358 				break;
359 			}
360 		}
361 
362 		i915_sw_fence_commit(submit);
363 		i915_sw_fence_commit(wait);
364 
365 		if (!wait_event_timeout(wait->wait,
366 					i915_sw_fence_done(wait),
367 					HZ / 2)) {
368 			struct i915_request *rq = requests[count - 1];
369 
370 			pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
371 			       count,
372 			       rq->fence.context, rq->fence.seqno,
373 			       t->engine->name);
374 			i915_gem_set_wedged(t->engine->i915);
375 			GEM_BUG_ON(!i915_request_completed(rq));
376 			i915_sw_fence_wait(wait);
377 			err = -EIO;
378 		}
379 
380 		for (n = 0; n < count; n++) {
381 			struct i915_request *rq = requests[n];
382 
383 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
384 				      &rq->fence.flags)) {
385 				pr_err("%llu:%llu was not signaled!\n",
386 				       rq->fence.context, rq->fence.seqno);
387 				err = -EINVAL;
388 			}
389 
390 			i915_request_put(rq);
391 		}
392 
393 		heap_fence_put(wait);
394 		heap_fence_put(submit);
395 
396 		if (err < 0)
397 			break;
398 
399 		num_fences += count;
400 		num_waits++;
401 
402 		cond_resched();
403 	}
404 
405 	atomic_long_add(num_fences, &t->num_fences);
406 	atomic_long_add(num_waits, &t->num_waits);
407 
408 	kfree(order);
409 out_requests:
410 	kfree(requests);
411 	return err;
412 }
413 
414 static int mock_breadcrumbs_smoketest(void *arg)
415 {
416 	struct drm_i915_private *i915 = arg;
417 	struct smoketest t = {
418 		.engine = i915->engine[RCS0],
419 		.ncontexts = 1024,
420 		.max_batch = 1024,
421 		.request_alloc = __mock_request_alloc
422 	};
423 	unsigned int ncpus = num_online_cpus();
424 	struct task_struct **threads;
425 	unsigned int n;
426 	int ret = 0;
427 
428 	/*
429 	 * Smoketest our breadcrumb/signal handling for requests across multiple
430 	 * threads. A very simple test to only catch the most egregious of bugs.
431 	 * See __igt_breadcrumbs_smoketest();
432 	 */
433 
434 	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
435 	if (!threads)
436 		return -ENOMEM;
437 
438 	t.contexts =
439 		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
440 	if (!t.contexts) {
441 		ret = -ENOMEM;
442 		goto out_threads;
443 	}
444 
445 	mutex_lock(&t.engine->i915->drm.struct_mutex);
446 	for (n = 0; n < t.ncontexts; n++) {
447 		t.contexts[n] = mock_context(t.engine->i915, "mock");
448 		if (!t.contexts[n]) {
449 			ret = -ENOMEM;
450 			goto out_contexts;
451 		}
452 	}
453 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
454 
455 	for (n = 0; n < ncpus; n++) {
456 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
457 					 &t, "igt/%d", n);
458 		if (IS_ERR(threads[n])) {
459 			ret = PTR_ERR(threads[n]);
460 			ncpus = n;
461 			break;
462 		}
463 
464 		get_task_struct(threads[n]);
465 	}
466 
467 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
468 
469 	for (n = 0; n < ncpus; n++) {
470 		int err;
471 
472 		err = kthread_stop(threads[n]);
473 		if (err < 0 && !ret)
474 			ret = err;
475 
476 		put_task_struct(threads[n]);
477 	}
478 	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
479 		atomic_long_read(&t.num_waits),
480 		atomic_long_read(&t.num_fences),
481 		ncpus);
482 
483 	mutex_lock(&t.engine->i915->drm.struct_mutex);
484 out_contexts:
485 	for (n = 0; n < t.ncontexts; n++) {
486 		if (!t.contexts[n])
487 			break;
488 		mock_context_close(t.contexts[n]);
489 	}
490 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
491 	kfree(t.contexts);
492 out_threads:
493 	kfree(threads);
494 
495 	return ret;
496 }
497 
498 int i915_request_mock_selftests(void)
499 {
500 	static const struct i915_subtest tests[] = {
501 		SUBTEST(igt_add_request),
502 		SUBTEST(igt_wait_request),
503 		SUBTEST(igt_fence_wait),
504 		SUBTEST(igt_request_rewind),
505 		SUBTEST(mock_breadcrumbs_smoketest),
506 	};
507 	struct drm_i915_private *i915;
508 	intel_wakeref_t wakeref;
509 	int err = 0;
510 
511 	i915 = mock_gem_device();
512 	if (!i915)
513 		return -ENOMEM;
514 
515 	with_intel_runtime_pm(i915, wakeref)
516 		err = i915_subtests(tests, i915);
517 
518 	drm_dev_put(&i915->drm);
519 
520 	return err;
521 }
522 
523 static int live_nop_request(void *arg)
524 {
525 	struct drm_i915_private *i915 = arg;
526 	struct intel_engine_cs *engine;
527 	intel_wakeref_t wakeref;
528 	struct igt_live_test t;
529 	unsigned int id;
530 	int err = -ENODEV;
531 
532 	/* Submit various sized batches of empty requests, to each engine
533 	 * (individually), and wait for the batch to complete. We can check
534 	 * the overhead of submitting requests to the hardware.
535 	 */
536 
537 	mutex_lock(&i915->drm.struct_mutex);
538 	wakeref = intel_runtime_pm_get(i915);
539 
540 	for_each_engine(engine, i915, id) {
541 		struct i915_request *request = NULL;
542 		unsigned long n, prime;
543 		IGT_TIMEOUT(end_time);
544 		ktime_t times[2] = {};
545 
546 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
547 		if (err)
548 			goto out_unlock;
549 
550 		for_each_prime_number_from(prime, 1, 8192) {
551 			times[1] = ktime_get_raw();
552 
553 			for (n = 0; n < prime; n++) {
554 				request = i915_request_create(engine->kernel_context);
555 				if (IS_ERR(request)) {
556 					err = PTR_ERR(request);
557 					goto out_unlock;
558 				}
559 
560 				/* This space is left intentionally blank.
561 				 *
562 				 * We do not actually want to perform any
563 				 * action with this request, we just want
564 				 * to measure the latency in allocation
565 				 * and submission of our breadcrumbs -
566 				 * ensuring that the bare request is sufficient
567 				 * for the system to work (i.e. proper HEAD
568 				 * tracking of the rings, interrupt handling,
569 				 * etc). It also gives us the lowest bounds
570 				 * for latency.
571 				 */
572 
573 				i915_request_add(request);
574 			}
575 			i915_request_wait(request,
576 					  I915_WAIT_LOCKED,
577 					  MAX_SCHEDULE_TIMEOUT);
578 
579 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
580 			if (prime == 1)
581 				times[0] = times[1];
582 
583 			if (__igt_timeout(end_time, NULL))
584 				break;
585 		}
586 
587 		err = igt_live_test_end(&t);
588 		if (err)
589 			goto out_unlock;
590 
591 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
592 			engine->name,
593 			ktime_to_ns(times[0]),
594 			prime, div64_u64(ktime_to_ns(times[1]), prime));
595 	}
596 
597 out_unlock:
598 	intel_runtime_pm_put(i915, wakeref);
599 	mutex_unlock(&i915->drm.struct_mutex);
600 	return err;
601 }
602 
603 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
604 {
605 	struct drm_i915_gem_object *obj;
606 	struct i915_vma *vma;
607 	u32 *cmd;
608 	int err;
609 
610 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
611 	if (IS_ERR(obj))
612 		return ERR_CAST(obj);
613 
614 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
615 	if (IS_ERR(cmd)) {
616 		err = PTR_ERR(cmd);
617 		goto err;
618 	}
619 
620 	*cmd = MI_BATCH_BUFFER_END;
621 
622 	__i915_gem_object_flush_map(obj, 0, 64);
623 	i915_gem_object_unpin_map(obj);
624 
625 	i915_gem_chipset_flush(i915);
626 
627 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
628 	if (IS_ERR(vma)) {
629 		err = PTR_ERR(vma);
630 		goto err;
631 	}
632 
633 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
634 	if (err)
635 		goto err;
636 
637 	return vma;
638 
639 err:
640 	i915_gem_object_put(obj);
641 	return ERR_PTR(err);
642 }
643 
644 static struct i915_request *
645 empty_request(struct intel_engine_cs *engine,
646 	      struct i915_vma *batch)
647 {
648 	struct i915_request *request;
649 	int err;
650 
651 	request = i915_request_create(engine->kernel_context);
652 	if (IS_ERR(request))
653 		return request;
654 
655 	err = engine->emit_bb_start(request,
656 				    batch->node.start,
657 				    batch->node.size,
658 				    I915_DISPATCH_SECURE);
659 	if (err)
660 		goto out_request;
661 
662 out_request:
663 	i915_request_add(request);
664 	return err ? ERR_PTR(err) : request;
665 }
666 
667 static int live_empty_request(void *arg)
668 {
669 	struct drm_i915_private *i915 = arg;
670 	struct intel_engine_cs *engine;
671 	intel_wakeref_t wakeref;
672 	struct igt_live_test t;
673 	struct i915_vma *batch;
674 	unsigned int id;
675 	int err = 0;
676 
677 	/* Submit various sized batches of empty requests, to each engine
678 	 * (individually), and wait for the batch to complete. We can check
679 	 * the overhead of submitting requests to the hardware.
680 	 */
681 
682 	mutex_lock(&i915->drm.struct_mutex);
683 	wakeref = intel_runtime_pm_get(i915);
684 
685 	batch = empty_batch(i915);
686 	if (IS_ERR(batch)) {
687 		err = PTR_ERR(batch);
688 		goto out_unlock;
689 	}
690 
691 	for_each_engine(engine, i915, id) {
692 		IGT_TIMEOUT(end_time);
693 		struct i915_request *request;
694 		unsigned long n, prime;
695 		ktime_t times[2] = {};
696 
697 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
698 		if (err)
699 			goto out_batch;
700 
701 		/* Warmup / preload */
702 		request = empty_request(engine, batch);
703 		if (IS_ERR(request)) {
704 			err = PTR_ERR(request);
705 			goto out_batch;
706 		}
707 		i915_request_wait(request,
708 				  I915_WAIT_LOCKED,
709 				  MAX_SCHEDULE_TIMEOUT);
710 
711 		for_each_prime_number_from(prime, 1, 8192) {
712 			times[1] = ktime_get_raw();
713 
714 			for (n = 0; n < prime; n++) {
715 				request = empty_request(engine, batch);
716 				if (IS_ERR(request)) {
717 					err = PTR_ERR(request);
718 					goto out_batch;
719 				}
720 			}
721 			i915_request_wait(request,
722 					  I915_WAIT_LOCKED,
723 					  MAX_SCHEDULE_TIMEOUT);
724 
725 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
726 			if (prime == 1)
727 				times[0] = times[1];
728 
729 			if (__igt_timeout(end_time, NULL))
730 				break;
731 		}
732 
733 		err = igt_live_test_end(&t);
734 		if (err)
735 			goto out_batch;
736 
737 		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
738 			engine->name,
739 			ktime_to_ns(times[0]),
740 			prime, div64_u64(ktime_to_ns(times[1]), prime));
741 	}
742 
743 out_batch:
744 	i915_vma_unpin(batch);
745 	i915_vma_put(batch);
746 out_unlock:
747 	intel_runtime_pm_put(i915, wakeref);
748 	mutex_unlock(&i915->drm.struct_mutex);
749 	return err;
750 }
751 
752 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
753 {
754 	struct i915_gem_context *ctx = i915->kernel_context;
755 	struct i915_address_space *vm =
756 		ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm;
757 	struct drm_i915_gem_object *obj;
758 	const int gen = INTEL_GEN(i915);
759 	struct i915_vma *vma;
760 	u32 *cmd;
761 	int err;
762 
763 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
764 	if (IS_ERR(obj))
765 		return ERR_CAST(obj);
766 
767 	vma = i915_vma_instance(obj, vm, NULL);
768 	if (IS_ERR(vma)) {
769 		err = PTR_ERR(vma);
770 		goto err;
771 	}
772 
773 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
774 	if (err)
775 		goto err;
776 
777 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
778 	if (IS_ERR(cmd)) {
779 		err = PTR_ERR(cmd);
780 		goto err;
781 	}
782 
783 	if (gen >= 8) {
784 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
785 		*cmd++ = lower_32_bits(vma->node.start);
786 		*cmd++ = upper_32_bits(vma->node.start);
787 	} else if (gen >= 6) {
788 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
789 		*cmd++ = lower_32_bits(vma->node.start);
790 	} else {
791 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
792 		*cmd++ = lower_32_bits(vma->node.start);
793 	}
794 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
795 
796 	__i915_gem_object_flush_map(obj, 0, 64);
797 	i915_gem_object_unpin_map(obj);
798 
799 	i915_gem_chipset_flush(i915);
800 
801 	return vma;
802 
803 err:
804 	i915_gem_object_put(obj);
805 	return ERR_PTR(err);
806 }
807 
808 static int recursive_batch_resolve(struct i915_vma *batch)
809 {
810 	u32 *cmd;
811 
812 	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
813 	if (IS_ERR(cmd))
814 		return PTR_ERR(cmd);
815 
816 	*cmd = MI_BATCH_BUFFER_END;
817 	i915_gem_chipset_flush(batch->vm->i915);
818 
819 	i915_gem_object_unpin_map(batch->obj);
820 
821 	return 0;
822 }
823 
824 static int live_all_engines(void *arg)
825 {
826 	struct drm_i915_private *i915 = arg;
827 	struct intel_engine_cs *engine;
828 	struct i915_request *request[I915_NUM_ENGINES];
829 	intel_wakeref_t wakeref;
830 	struct igt_live_test t;
831 	struct i915_vma *batch;
832 	unsigned int id;
833 	int err;
834 
835 	/* Check we can submit requests to all engines simultaneously. We
836 	 * send a recursive batch to each engine - checking that we don't
837 	 * block doing so, and that they don't complete too soon.
838 	 */
839 
840 	mutex_lock(&i915->drm.struct_mutex);
841 	wakeref = intel_runtime_pm_get(i915);
842 
843 	err = igt_live_test_begin(&t, i915, __func__, "");
844 	if (err)
845 		goto out_unlock;
846 
847 	batch = recursive_batch(i915);
848 	if (IS_ERR(batch)) {
849 		err = PTR_ERR(batch);
850 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
851 		goto out_unlock;
852 	}
853 
854 	for_each_engine(engine, i915, id) {
855 		request[id] = i915_request_create(engine->kernel_context);
856 		if (IS_ERR(request[id])) {
857 			err = PTR_ERR(request[id]);
858 			pr_err("%s: Request allocation failed with err=%d\n",
859 			       __func__, err);
860 			goto out_request;
861 		}
862 
863 		err = engine->emit_bb_start(request[id],
864 					    batch->node.start,
865 					    batch->node.size,
866 					    0);
867 		GEM_BUG_ON(err);
868 		request[id]->batch = batch;
869 
870 		if (!i915_gem_object_has_active_reference(batch->obj)) {
871 			i915_gem_object_get(batch->obj);
872 			i915_gem_object_set_active_reference(batch->obj);
873 		}
874 
875 		err = i915_vma_move_to_active(batch, request[id], 0);
876 		GEM_BUG_ON(err);
877 
878 		i915_request_get(request[id]);
879 		i915_request_add(request[id]);
880 	}
881 
882 	for_each_engine(engine, i915, id) {
883 		if (i915_request_completed(request[id])) {
884 			pr_err("%s(%s): request completed too early!\n",
885 			       __func__, engine->name);
886 			err = -EINVAL;
887 			goto out_request;
888 		}
889 	}
890 
891 	err = recursive_batch_resolve(batch);
892 	if (err) {
893 		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
894 		goto out_request;
895 	}
896 
897 	for_each_engine(engine, i915, id) {
898 		long timeout;
899 
900 		timeout = i915_request_wait(request[id],
901 					    I915_WAIT_LOCKED,
902 					    MAX_SCHEDULE_TIMEOUT);
903 		if (timeout < 0) {
904 			err = timeout;
905 			pr_err("%s: error waiting for request on %s, err=%d\n",
906 			       __func__, engine->name, err);
907 			goto out_request;
908 		}
909 
910 		GEM_BUG_ON(!i915_request_completed(request[id]));
911 		i915_request_put(request[id]);
912 		request[id] = NULL;
913 	}
914 
915 	err = igt_live_test_end(&t);
916 
917 out_request:
918 	for_each_engine(engine, i915, id)
919 		if (request[id])
920 			i915_request_put(request[id]);
921 	i915_vma_unpin(batch);
922 	i915_vma_put(batch);
923 out_unlock:
924 	intel_runtime_pm_put(i915, wakeref);
925 	mutex_unlock(&i915->drm.struct_mutex);
926 	return err;
927 }
928 
929 static int live_sequential_engines(void *arg)
930 {
931 	struct drm_i915_private *i915 = arg;
932 	struct i915_request *request[I915_NUM_ENGINES] = {};
933 	struct i915_request *prev = NULL;
934 	struct intel_engine_cs *engine;
935 	intel_wakeref_t wakeref;
936 	struct igt_live_test t;
937 	unsigned int id;
938 	int err;
939 
940 	/* Check we can submit requests to all engines sequentially, such
941 	 * that each successive request waits for the earlier ones. This
942 	 * tests that we don't execute requests out of order, even though
943 	 * they are running on independent engines.
944 	 */
945 
946 	mutex_lock(&i915->drm.struct_mutex);
947 	wakeref = intel_runtime_pm_get(i915);
948 
949 	err = igt_live_test_begin(&t, i915, __func__, "");
950 	if (err)
951 		goto out_unlock;
952 
953 	for_each_engine(engine, i915, id) {
954 		struct i915_vma *batch;
955 
956 		batch = recursive_batch(i915);
957 		if (IS_ERR(batch)) {
958 			err = PTR_ERR(batch);
959 			pr_err("%s: Unable to create batch for %s, err=%d\n",
960 			       __func__, engine->name, err);
961 			goto out_unlock;
962 		}
963 
964 		request[id] = i915_request_create(engine->kernel_context);
965 		if (IS_ERR(request[id])) {
966 			err = PTR_ERR(request[id]);
967 			pr_err("%s: Request allocation failed for %s with err=%d\n",
968 			       __func__, engine->name, err);
969 			goto out_request;
970 		}
971 
972 		if (prev) {
973 			err = i915_request_await_dma_fence(request[id],
974 							   &prev->fence);
975 			if (err) {
976 				i915_request_add(request[id]);
977 				pr_err("%s: Request await failed for %s with err=%d\n",
978 				       __func__, engine->name, err);
979 				goto out_request;
980 			}
981 		}
982 
983 		err = engine->emit_bb_start(request[id],
984 					    batch->node.start,
985 					    batch->node.size,
986 					    0);
987 		GEM_BUG_ON(err);
988 		request[id]->batch = batch;
989 
990 		err = i915_vma_move_to_active(batch, request[id], 0);
991 		GEM_BUG_ON(err);
992 
993 		i915_gem_object_set_active_reference(batch->obj);
994 		i915_vma_get(batch);
995 
996 		i915_request_get(request[id]);
997 		i915_request_add(request[id]);
998 
999 		prev = request[id];
1000 	}
1001 
1002 	for_each_engine(engine, i915, id) {
1003 		long timeout;
1004 
1005 		if (i915_request_completed(request[id])) {
1006 			pr_err("%s(%s): request completed too early!\n",
1007 			       __func__, engine->name);
1008 			err = -EINVAL;
1009 			goto out_request;
1010 		}
1011 
1012 		err = recursive_batch_resolve(request[id]->batch);
1013 		if (err) {
1014 			pr_err("%s: failed to resolve batch, err=%d\n",
1015 			       __func__, err);
1016 			goto out_request;
1017 		}
1018 
1019 		timeout = i915_request_wait(request[id],
1020 					    I915_WAIT_LOCKED,
1021 					    MAX_SCHEDULE_TIMEOUT);
1022 		if (timeout < 0) {
1023 			err = timeout;
1024 			pr_err("%s: error waiting for request on %s, err=%d\n",
1025 			       __func__, engine->name, err);
1026 			goto out_request;
1027 		}
1028 
1029 		GEM_BUG_ON(!i915_request_completed(request[id]));
1030 	}
1031 
1032 	err = igt_live_test_end(&t);
1033 
1034 out_request:
1035 	for_each_engine(engine, i915, id) {
1036 		u32 *cmd;
1037 
1038 		if (!request[id])
1039 			break;
1040 
1041 		cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1042 					      I915_MAP_WC);
1043 		if (!IS_ERR(cmd)) {
1044 			*cmd = MI_BATCH_BUFFER_END;
1045 			i915_gem_chipset_flush(i915);
1046 
1047 			i915_gem_object_unpin_map(request[id]->batch->obj);
1048 		}
1049 
1050 		i915_vma_put(request[id]->batch);
1051 		i915_request_put(request[id]);
1052 	}
1053 out_unlock:
1054 	intel_runtime_pm_put(i915, wakeref);
1055 	mutex_unlock(&i915->drm.struct_mutex);
1056 	return err;
1057 }
1058 
1059 static int
1060 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1061 {
1062 	struct i915_request *rq;
1063 	int ret;
1064 
1065 	/*
1066 	 * Before execlists, all contexts share the same ringbuffer. With
1067 	 * execlists, each context/engine has a separate ringbuffer and
1068 	 * for the purposes of this test, inexhaustible.
1069 	 *
1070 	 * For the global ringbuffer though, we have to be very careful
1071 	 * that we do not wrap while preventing the execution of requests
1072 	 * with a unsignaled fence.
1073 	 */
1074 	if (HAS_EXECLISTS(ctx->i915))
1075 		return INT_MAX;
1076 
1077 	rq = igt_request_alloc(ctx, engine);
1078 	if (IS_ERR(rq)) {
1079 		ret = PTR_ERR(rq);
1080 	} else {
1081 		int sz;
1082 
1083 		ret = rq->ring->size - rq->reserved_space;
1084 		i915_request_add(rq);
1085 
1086 		sz = rq->ring->emit - rq->head;
1087 		if (sz < 0)
1088 			sz += rq->ring->size;
1089 		ret /= sz;
1090 		ret /= 2; /* leave half spare, in case of emergency! */
1091 	}
1092 
1093 	return ret;
1094 }
1095 
1096 static int live_breadcrumbs_smoketest(void *arg)
1097 {
1098 	struct drm_i915_private *i915 = arg;
1099 	struct smoketest t[I915_NUM_ENGINES];
1100 	unsigned int ncpus = num_online_cpus();
1101 	unsigned long num_waits, num_fences;
1102 	struct intel_engine_cs *engine;
1103 	struct task_struct **threads;
1104 	struct igt_live_test live;
1105 	enum intel_engine_id id;
1106 	intel_wakeref_t wakeref;
1107 	struct drm_file *file;
1108 	unsigned int n;
1109 	int ret = 0;
1110 
1111 	/*
1112 	 * Smoketest our breadcrumb/signal handling for requests across multiple
1113 	 * threads. A very simple test to only catch the most egregious of bugs.
1114 	 * See __igt_breadcrumbs_smoketest();
1115 	 *
1116 	 * On real hardware this time.
1117 	 */
1118 
1119 	wakeref = intel_runtime_pm_get(i915);
1120 
1121 	file = mock_file(i915);
1122 	if (IS_ERR(file)) {
1123 		ret = PTR_ERR(file);
1124 		goto out_rpm;
1125 	}
1126 
1127 	threads = kcalloc(ncpus * I915_NUM_ENGINES,
1128 			  sizeof(*threads),
1129 			  GFP_KERNEL);
1130 	if (!threads) {
1131 		ret = -ENOMEM;
1132 		goto out_file;
1133 	}
1134 
1135 	memset(&t[0], 0, sizeof(t[0]));
1136 	t[0].request_alloc = __live_request_alloc;
1137 	t[0].ncontexts = 64;
1138 	t[0].contexts = kmalloc_array(t[0].ncontexts,
1139 				      sizeof(*t[0].contexts),
1140 				      GFP_KERNEL);
1141 	if (!t[0].contexts) {
1142 		ret = -ENOMEM;
1143 		goto out_threads;
1144 	}
1145 
1146 	mutex_lock(&i915->drm.struct_mutex);
1147 	for (n = 0; n < t[0].ncontexts; n++) {
1148 		t[0].contexts[n] = live_context(i915, file);
1149 		if (!t[0].contexts[n]) {
1150 			ret = -ENOMEM;
1151 			goto out_contexts;
1152 		}
1153 	}
1154 
1155 	ret = igt_live_test_begin(&live, i915, __func__, "");
1156 	if (ret)
1157 		goto out_contexts;
1158 
1159 	for_each_engine(engine, i915, id) {
1160 		t[id] = t[0];
1161 		t[id].engine = engine;
1162 		t[id].max_batch = max_batches(t[0].contexts[0], engine);
1163 		if (t[id].max_batch < 0) {
1164 			ret = t[id].max_batch;
1165 			mutex_unlock(&i915->drm.struct_mutex);
1166 			goto out_flush;
1167 		}
1168 		/* One ring interleaved between requests from all cpus */
1169 		t[id].max_batch /= num_online_cpus() + 1;
1170 		pr_debug("Limiting batches to %d requests on %s\n",
1171 			 t[id].max_batch, engine->name);
1172 
1173 		for (n = 0; n < ncpus; n++) {
1174 			struct task_struct *tsk;
1175 
1176 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1177 					  &t[id], "igt/%d.%d", id, n);
1178 			if (IS_ERR(tsk)) {
1179 				ret = PTR_ERR(tsk);
1180 				mutex_unlock(&i915->drm.struct_mutex);
1181 				goto out_flush;
1182 			}
1183 
1184 			get_task_struct(tsk);
1185 			threads[id * ncpus + n] = tsk;
1186 		}
1187 	}
1188 	mutex_unlock(&i915->drm.struct_mutex);
1189 
1190 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1191 
1192 out_flush:
1193 	num_waits = 0;
1194 	num_fences = 0;
1195 	for_each_engine(engine, i915, id) {
1196 		for (n = 0; n < ncpus; n++) {
1197 			struct task_struct *tsk = threads[id * ncpus + n];
1198 			int err;
1199 
1200 			if (!tsk)
1201 				continue;
1202 
1203 			err = kthread_stop(tsk);
1204 			if (err < 0 && !ret)
1205 				ret = err;
1206 
1207 			put_task_struct(tsk);
1208 		}
1209 
1210 		num_waits += atomic_long_read(&t[id].num_waits);
1211 		num_fences += atomic_long_read(&t[id].num_fences);
1212 	}
1213 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1214 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1215 
1216 	mutex_lock(&i915->drm.struct_mutex);
1217 	ret = igt_live_test_end(&live) ?: ret;
1218 out_contexts:
1219 	mutex_unlock(&i915->drm.struct_mutex);
1220 	kfree(t[0].contexts);
1221 out_threads:
1222 	kfree(threads);
1223 out_file:
1224 	mock_file_free(i915, file);
1225 out_rpm:
1226 	intel_runtime_pm_put(i915, wakeref);
1227 
1228 	return ret;
1229 }
1230 
1231 int i915_request_live_selftests(struct drm_i915_private *i915)
1232 {
1233 	static const struct i915_subtest tests[] = {
1234 		SUBTEST(live_nop_request),
1235 		SUBTEST(live_all_engines),
1236 		SUBTEST(live_sequential_engines),
1237 		SUBTEST(live_empty_request),
1238 		SUBTEST(live_breadcrumbs_smoketest),
1239 	};
1240 
1241 	if (i915_terminally_wedged(i915))
1242 		return 0;
1243 
1244 	return i915_subtests(tests, i915);
1245 }
1246