1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 
27 #include "gem/i915_gem_pm.h"
28 #include "gem/selftests/mock_context.h"
29 
30 #include "i915_random.h"
31 #include "i915_selftest.h"
32 #include "igt_live_test.h"
33 #include "lib_sw_fence.h"
34 
35 #include "mock_drm.h"
36 #include "mock_gem_device.h"
37 
38 static int igt_add_request(void *arg)
39 {
40 	struct drm_i915_private *i915 = arg;
41 	struct i915_request *request;
42 	int err = -ENOMEM;
43 
44 	/* Basic preliminary test to create a request and let it loose! */
45 
46 	mutex_lock(&i915->drm.struct_mutex);
47 	request = mock_request(i915->engine[RCS0],
48 			       i915->kernel_context,
49 			       HZ / 10);
50 	if (!request)
51 		goto out_unlock;
52 
53 	i915_request_add(request);
54 
55 	err = 0;
56 out_unlock:
57 	mutex_unlock(&i915->drm.struct_mutex);
58 	return err;
59 }
60 
61 static int igt_wait_request(void *arg)
62 {
63 	const long T = HZ / 4;
64 	struct drm_i915_private *i915 = arg;
65 	struct i915_request *request;
66 	int err = -EINVAL;
67 
68 	/* Submit a request, then wait upon it */
69 
70 	mutex_lock(&i915->drm.struct_mutex);
71 	request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
72 	if (!request) {
73 		err = -ENOMEM;
74 		goto out_unlock;
75 	}
76 
77 	if (i915_request_wait(request, 0, 0) != -ETIME) {
78 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
79 		goto out_unlock;
80 	}
81 
82 	if (i915_request_wait(request, 0, T) != -ETIME) {
83 		pr_err("request wait succeeded (expected timeout before submit!)\n");
84 		goto out_unlock;
85 	}
86 
87 	if (i915_request_completed(request)) {
88 		pr_err("request completed before submit!!\n");
89 		goto out_unlock;
90 	}
91 
92 	i915_request_add(request);
93 
94 	if (i915_request_wait(request, 0, 0) != -ETIME) {
95 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
96 		goto out_unlock;
97 	}
98 
99 	if (i915_request_completed(request)) {
100 		pr_err("request completed immediately!\n");
101 		goto out_unlock;
102 	}
103 
104 	if (i915_request_wait(request, 0, T / 2) != -ETIME) {
105 		pr_err("request wait succeeded (expected timeout!)\n");
106 		goto out_unlock;
107 	}
108 
109 	if (i915_request_wait(request, 0, T) == -ETIME) {
110 		pr_err("request wait timed out!\n");
111 		goto out_unlock;
112 	}
113 
114 	if (!i915_request_completed(request)) {
115 		pr_err("request not complete after waiting!\n");
116 		goto out_unlock;
117 	}
118 
119 	if (i915_request_wait(request, 0, T) == -ETIME) {
120 		pr_err("request wait timed out when already complete!\n");
121 		goto out_unlock;
122 	}
123 
124 	err = 0;
125 out_unlock:
126 	mock_device_flush(i915);
127 	mutex_unlock(&i915->drm.struct_mutex);
128 	return err;
129 }
130 
131 static int igt_fence_wait(void *arg)
132 {
133 	const long T = HZ / 4;
134 	struct drm_i915_private *i915 = arg;
135 	struct i915_request *request;
136 	int err = -EINVAL;
137 
138 	/* Submit a request, treat it as a fence and wait upon it */
139 
140 	mutex_lock(&i915->drm.struct_mutex);
141 	request = mock_request(i915->engine[RCS0], i915->kernel_context, T);
142 	if (!request) {
143 		err = -ENOMEM;
144 		goto out_locked;
145 	}
146 
147 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
148 		pr_err("fence wait success before submit (expected timeout)!\n");
149 		goto out_locked;
150 	}
151 
152 	i915_request_add(request);
153 	mutex_unlock(&i915->drm.struct_mutex);
154 
155 	if (dma_fence_is_signaled(&request->fence)) {
156 		pr_err("fence signaled immediately!\n");
157 		goto out_device;
158 	}
159 
160 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
161 		pr_err("fence wait success after submit (expected timeout)!\n");
162 		goto out_device;
163 	}
164 
165 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
166 		pr_err("fence wait timed out (expected success)!\n");
167 		goto out_device;
168 	}
169 
170 	if (!dma_fence_is_signaled(&request->fence)) {
171 		pr_err("fence unsignaled after waiting!\n");
172 		goto out_device;
173 	}
174 
175 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
176 		pr_err("fence wait timed out when complete (expected success)!\n");
177 		goto out_device;
178 	}
179 
180 	err = 0;
181 out_device:
182 	mutex_lock(&i915->drm.struct_mutex);
183 out_locked:
184 	mock_device_flush(i915);
185 	mutex_unlock(&i915->drm.struct_mutex);
186 	return err;
187 }
188 
189 static int igt_request_rewind(void *arg)
190 {
191 	struct drm_i915_private *i915 = arg;
192 	struct i915_request *request, *vip;
193 	struct i915_gem_context *ctx[2];
194 	int err = -EINVAL;
195 
196 	mutex_lock(&i915->drm.struct_mutex);
197 	ctx[0] = mock_context(i915, "A");
198 	request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ);
199 	if (!request) {
200 		err = -ENOMEM;
201 		goto err_context_0;
202 	}
203 
204 	i915_request_get(request);
205 	i915_request_add(request);
206 
207 	ctx[1] = mock_context(i915, "B");
208 	vip = mock_request(i915->engine[RCS0], ctx[1], 0);
209 	if (!vip) {
210 		err = -ENOMEM;
211 		goto err_context_1;
212 	}
213 
214 	/* Simulate preemption by manual reordering */
215 	if (!mock_cancel_request(request)) {
216 		pr_err("failed to cancel request (already executed)!\n");
217 		i915_request_add(vip);
218 		goto err_context_1;
219 	}
220 	i915_request_get(vip);
221 	i915_request_add(vip);
222 	rcu_read_lock();
223 	request->engine->submit_request(request);
224 	rcu_read_unlock();
225 
226 	mutex_unlock(&i915->drm.struct_mutex);
227 
228 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
229 		pr_err("timed out waiting for high priority request\n");
230 		goto err;
231 	}
232 
233 	if (i915_request_completed(request)) {
234 		pr_err("low priority request already completed\n");
235 		goto err;
236 	}
237 
238 	err = 0;
239 err:
240 	i915_request_put(vip);
241 	mutex_lock(&i915->drm.struct_mutex);
242 err_context_1:
243 	mock_context_close(ctx[1]);
244 	i915_request_put(request);
245 err_context_0:
246 	mock_context_close(ctx[0]);
247 	mock_device_flush(i915);
248 	mutex_unlock(&i915->drm.struct_mutex);
249 	return err;
250 }
251 
252 struct smoketest {
253 	struct intel_engine_cs *engine;
254 	struct i915_gem_context **contexts;
255 	atomic_long_t num_waits, num_fences;
256 	int ncontexts, max_batch;
257 	struct i915_request *(*request_alloc)(struct i915_gem_context *,
258 					      struct intel_engine_cs *);
259 };
260 
261 static struct i915_request *
262 __mock_request_alloc(struct i915_gem_context *ctx,
263 		     struct intel_engine_cs *engine)
264 {
265 	return mock_request(engine, ctx, 0);
266 }
267 
268 static struct i915_request *
269 __live_request_alloc(struct i915_gem_context *ctx,
270 		     struct intel_engine_cs *engine)
271 {
272 	return igt_request_alloc(ctx, engine);
273 }
274 
275 static int __igt_breadcrumbs_smoketest(void *arg)
276 {
277 	struct smoketest *t = arg;
278 	struct mutex * const BKL = &t->engine->i915->drm.struct_mutex;
279 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
280 	const unsigned int total = 4 * t->ncontexts + 1;
281 	unsigned int num_waits = 0, num_fences = 0;
282 	struct i915_request **requests;
283 	I915_RND_STATE(prng);
284 	unsigned int *order;
285 	int err = 0;
286 
287 	/*
288 	 * A very simple test to catch the most egregious of list handling bugs.
289 	 *
290 	 * At its heart, we simply create oodles of requests running across
291 	 * multiple kthreads and enable signaling on them, for the sole purpose
292 	 * of stressing our breadcrumb handling. The only inspection we do is
293 	 * that the fences were marked as signaled.
294 	 */
295 
296 	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
297 	if (!requests)
298 		return -ENOMEM;
299 
300 	order = i915_random_order(total, &prng);
301 	if (!order) {
302 		err = -ENOMEM;
303 		goto out_requests;
304 	}
305 
306 	while (!kthread_should_stop()) {
307 		struct i915_sw_fence *submit, *wait;
308 		unsigned int n, count;
309 
310 		submit = heap_fence_create(GFP_KERNEL);
311 		if (!submit) {
312 			err = -ENOMEM;
313 			break;
314 		}
315 
316 		wait = heap_fence_create(GFP_KERNEL);
317 		if (!wait) {
318 			i915_sw_fence_commit(submit);
319 			heap_fence_put(submit);
320 			err = ENOMEM;
321 			break;
322 		}
323 
324 		i915_random_reorder(order, total, &prng);
325 		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
326 
327 		for (n = 0; n < count; n++) {
328 			struct i915_gem_context *ctx =
329 				t->contexts[order[n] % t->ncontexts];
330 			struct i915_request *rq;
331 
332 			mutex_lock(BKL);
333 
334 			rq = t->request_alloc(ctx, t->engine);
335 			if (IS_ERR(rq)) {
336 				mutex_unlock(BKL);
337 				err = PTR_ERR(rq);
338 				count = n;
339 				break;
340 			}
341 
342 			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
343 							       submit,
344 							       GFP_KERNEL);
345 
346 			requests[n] = i915_request_get(rq);
347 			i915_request_add(rq);
348 
349 			mutex_unlock(BKL);
350 
351 			if (err >= 0)
352 				err = i915_sw_fence_await_dma_fence(wait,
353 								    &rq->fence,
354 								    0,
355 								    GFP_KERNEL);
356 
357 			if (err < 0) {
358 				i915_request_put(rq);
359 				count = n;
360 				break;
361 			}
362 		}
363 
364 		i915_sw_fence_commit(submit);
365 		i915_sw_fence_commit(wait);
366 
367 		if (!wait_event_timeout(wait->wait,
368 					i915_sw_fence_done(wait),
369 					HZ / 2)) {
370 			struct i915_request *rq = requests[count - 1];
371 
372 			pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n",
373 			       count,
374 			       rq->fence.context, rq->fence.seqno,
375 			       t->engine->name);
376 			i915_gem_set_wedged(t->engine->i915);
377 			GEM_BUG_ON(!i915_request_completed(rq));
378 			i915_sw_fence_wait(wait);
379 			err = -EIO;
380 		}
381 
382 		for (n = 0; n < count; n++) {
383 			struct i915_request *rq = requests[n];
384 
385 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
386 				      &rq->fence.flags)) {
387 				pr_err("%llu:%llu was not signaled!\n",
388 				       rq->fence.context, rq->fence.seqno);
389 				err = -EINVAL;
390 			}
391 
392 			i915_request_put(rq);
393 		}
394 
395 		heap_fence_put(wait);
396 		heap_fence_put(submit);
397 
398 		if (err < 0)
399 			break;
400 
401 		num_fences += count;
402 		num_waits++;
403 
404 		cond_resched();
405 	}
406 
407 	atomic_long_add(num_fences, &t->num_fences);
408 	atomic_long_add(num_waits, &t->num_waits);
409 
410 	kfree(order);
411 out_requests:
412 	kfree(requests);
413 	return err;
414 }
415 
416 static int mock_breadcrumbs_smoketest(void *arg)
417 {
418 	struct drm_i915_private *i915 = arg;
419 	struct smoketest t = {
420 		.engine = i915->engine[RCS0],
421 		.ncontexts = 1024,
422 		.max_batch = 1024,
423 		.request_alloc = __mock_request_alloc
424 	};
425 	unsigned int ncpus = num_online_cpus();
426 	struct task_struct **threads;
427 	unsigned int n;
428 	int ret = 0;
429 
430 	/*
431 	 * Smoketest our breadcrumb/signal handling for requests across multiple
432 	 * threads. A very simple test to only catch the most egregious of bugs.
433 	 * See __igt_breadcrumbs_smoketest();
434 	 */
435 
436 	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
437 	if (!threads)
438 		return -ENOMEM;
439 
440 	t.contexts =
441 		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
442 	if (!t.contexts) {
443 		ret = -ENOMEM;
444 		goto out_threads;
445 	}
446 
447 	mutex_lock(&t.engine->i915->drm.struct_mutex);
448 	for (n = 0; n < t.ncontexts; n++) {
449 		t.contexts[n] = mock_context(t.engine->i915, "mock");
450 		if (!t.contexts[n]) {
451 			ret = -ENOMEM;
452 			goto out_contexts;
453 		}
454 	}
455 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
456 
457 	for (n = 0; n < ncpus; n++) {
458 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
459 					 &t, "igt/%d", n);
460 		if (IS_ERR(threads[n])) {
461 			ret = PTR_ERR(threads[n]);
462 			ncpus = n;
463 			break;
464 		}
465 
466 		get_task_struct(threads[n]);
467 	}
468 
469 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
470 
471 	for (n = 0; n < ncpus; n++) {
472 		int err;
473 
474 		err = kthread_stop(threads[n]);
475 		if (err < 0 && !ret)
476 			ret = err;
477 
478 		put_task_struct(threads[n]);
479 	}
480 	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
481 		atomic_long_read(&t.num_waits),
482 		atomic_long_read(&t.num_fences),
483 		ncpus);
484 
485 	mutex_lock(&t.engine->i915->drm.struct_mutex);
486 out_contexts:
487 	for (n = 0; n < t.ncontexts; n++) {
488 		if (!t.contexts[n])
489 			break;
490 		mock_context_close(t.contexts[n]);
491 	}
492 	mutex_unlock(&t.engine->i915->drm.struct_mutex);
493 	kfree(t.contexts);
494 out_threads:
495 	kfree(threads);
496 
497 	return ret;
498 }
499 
500 int i915_request_mock_selftests(void)
501 {
502 	static const struct i915_subtest tests[] = {
503 		SUBTEST(igt_add_request),
504 		SUBTEST(igt_wait_request),
505 		SUBTEST(igt_fence_wait),
506 		SUBTEST(igt_request_rewind),
507 		SUBTEST(mock_breadcrumbs_smoketest),
508 	};
509 	struct drm_i915_private *i915;
510 	intel_wakeref_t wakeref;
511 	int err = 0;
512 
513 	i915 = mock_gem_device();
514 	if (!i915)
515 		return -ENOMEM;
516 
517 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
518 		err = i915_subtests(tests, i915);
519 
520 	drm_dev_put(&i915->drm);
521 
522 	return err;
523 }
524 
525 static int live_nop_request(void *arg)
526 {
527 	struct drm_i915_private *i915 = arg;
528 	struct intel_engine_cs *engine;
529 	intel_wakeref_t wakeref;
530 	struct igt_live_test t;
531 	unsigned int id;
532 	int err = -ENODEV;
533 
534 	/* Submit various sized batches of empty requests, to each engine
535 	 * (individually), and wait for the batch to complete. We can check
536 	 * the overhead of submitting requests to the hardware.
537 	 */
538 
539 	mutex_lock(&i915->drm.struct_mutex);
540 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
541 
542 	for_each_engine(engine, i915, id) {
543 		struct i915_request *request = NULL;
544 		unsigned long n, prime;
545 		IGT_TIMEOUT(end_time);
546 		ktime_t times[2] = {};
547 
548 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
549 		if (err)
550 			goto out_unlock;
551 
552 		for_each_prime_number_from(prime, 1, 8192) {
553 			times[1] = ktime_get_raw();
554 
555 			for (n = 0; n < prime; n++) {
556 				request = i915_request_create(engine->kernel_context);
557 				if (IS_ERR(request)) {
558 					err = PTR_ERR(request);
559 					goto out_unlock;
560 				}
561 
562 				/* This space is left intentionally blank.
563 				 *
564 				 * We do not actually want to perform any
565 				 * action with this request, we just want
566 				 * to measure the latency in allocation
567 				 * and submission of our breadcrumbs -
568 				 * ensuring that the bare request is sufficient
569 				 * for the system to work (i.e. proper HEAD
570 				 * tracking of the rings, interrupt handling,
571 				 * etc). It also gives us the lowest bounds
572 				 * for latency.
573 				 */
574 
575 				i915_request_add(request);
576 			}
577 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
578 
579 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
580 			if (prime == 1)
581 				times[0] = times[1];
582 
583 			if (__igt_timeout(end_time, NULL))
584 				break;
585 		}
586 
587 		err = igt_live_test_end(&t);
588 		if (err)
589 			goto out_unlock;
590 
591 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
592 			engine->name,
593 			ktime_to_ns(times[0]),
594 			prime, div64_u64(ktime_to_ns(times[1]), prime));
595 	}
596 
597 out_unlock:
598 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
599 	mutex_unlock(&i915->drm.struct_mutex);
600 	return err;
601 }
602 
603 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
604 {
605 	struct drm_i915_gem_object *obj;
606 	struct i915_vma *vma;
607 	u32 *cmd;
608 	int err;
609 
610 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
611 	if (IS_ERR(obj))
612 		return ERR_CAST(obj);
613 
614 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
615 	if (IS_ERR(cmd)) {
616 		err = PTR_ERR(cmd);
617 		goto err;
618 	}
619 
620 	*cmd = MI_BATCH_BUFFER_END;
621 
622 	__i915_gem_object_flush_map(obj, 0, 64);
623 	i915_gem_object_unpin_map(obj);
624 
625 	i915_gem_chipset_flush(i915);
626 
627 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
628 	if (IS_ERR(vma)) {
629 		err = PTR_ERR(vma);
630 		goto err;
631 	}
632 
633 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
634 	if (err)
635 		goto err;
636 
637 	return vma;
638 
639 err:
640 	i915_gem_object_put(obj);
641 	return ERR_PTR(err);
642 }
643 
644 static struct i915_request *
645 empty_request(struct intel_engine_cs *engine,
646 	      struct i915_vma *batch)
647 {
648 	struct i915_request *request;
649 	int err;
650 
651 	request = i915_request_create(engine->kernel_context);
652 	if (IS_ERR(request))
653 		return request;
654 
655 	err = engine->emit_bb_start(request,
656 				    batch->node.start,
657 				    batch->node.size,
658 				    I915_DISPATCH_SECURE);
659 	if (err)
660 		goto out_request;
661 
662 out_request:
663 	i915_request_add(request);
664 	return err ? ERR_PTR(err) : request;
665 }
666 
667 static int live_empty_request(void *arg)
668 {
669 	struct drm_i915_private *i915 = arg;
670 	struct intel_engine_cs *engine;
671 	intel_wakeref_t wakeref;
672 	struct igt_live_test t;
673 	struct i915_vma *batch;
674 	unsigned int id;
675 	int err = 0;
676 
677 	/* Submit various sized batches of empty requests, to each engine
678 	 * (individually), and wait for the batch to complete. We can check
679 	 * the overhead of submitting requests to the hardware.
680 	 */
681 
682 	mutex_lock(&i915->drm.struct_mutex);
683 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
684 
685 	batch = empty_batch(i915);
686 	if (IS_ERR(batch)) {
687 		err = PTR_ERR(batch);
688 		goto out_unlock;
689 	}
690 
691 	for_each_engine(engine, i915, id) {
692 		IGT_TIMEOUT(end_time);
693 		struct i915_request *request;
694 		unsigned long n, prime;
695 		ktime_t times[2] = {};
696 
697 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
698 		if (err)
699 			goto out_batch;
700 
701 		/* Warmup / preload */
702 		request = empty_request(engine, batch);
703 		if (IS_ERR(request)) {
704 			err = PTR_ERR(request);
705 			goto out_batch;
706 		}
707 		i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
708 
709 		for_each_prime_number_from(prime, 1, 8192) {
710 			times[1] = ktime_get_raw();
711 
712 			for (n = 0; n < prime; n++) {
713 				request = empty_request(engine, batch);
714 				if (IS_ERR(request)) {
715 					err = PTR_ERR(request);
716 					goto out_batch;
717 				}
718 			}
719 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
720 
721 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
722 			if (prime == 1)
723 				times[0] = times[1];
724 
725 			if (__igt_timeout(end_time, NULL))
726 				break;
727 		}
728 
729 		err = igt_live_test_end(&t);
730 		if (err)
731 			goto out_batch;
732 
733 		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
734 			engine->name,
735 			ktime_to_ns(times[0]),
736 			prime, div64_u64(ktime_to_ns(times[1]), prime));
737 	}
738 
739 out_batch:
740 	i915_vma_unpin(batch);
741 	i915_vma_put(batch);
742 out_unlock:
743 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
744 	mutex_unlock(&i915->drm.struct_mutex);
745 	return err;
746 }
747 
748 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
749 {
750 	struct i915_gem_context *ctx = i915->kernel_context;
751 	struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm;
752 	struct drm_i915_gem_object *obj;
753 	const int gen = INTEL_GEN(i915);
754 	struct i915_vma *vma;
755 	u32 *cmd;
756 	int err;
757 
758 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
759 	if (IS_ERR(obj))
760 		return ERR_CAST(obj);
761 
762 	vma = i915_vma_instance(obj, vm, NULL);
763 	if (IS_ERR(vma)) {
764 		err = PTR_ERR(vma);
765 		goto err;
766 	}
767 
768 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
769 	if (err)
770 		goto err;
771 
772 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
773 	if (IS_ERR(cmd)) {
774 		err = PTR_ERR(cmd);
775 		goto err;
776 	}
777 
778 	if (gen >= 8) {
779 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
780 		*cmd++ = lower_32_bits(vma->node.start);
781 		*cmd++ = upper_32_bits(vma->node.start);
782 	} else if (gen >= 6) {
783 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
784 		*cmd++ = lower_32_bits(vma->node.start);
785 	} else {
786 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
787 		*cmd++ = lower_32_bits(vma->node.start);
788 	}
789 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
790 
791 	__i915_gem_object_flush_map(obj, 0, 64);
792 	i915_gem_object_unpin_map(obj);
793 
794 	i915_gem_chipset_flush(i915);
795 
796 	return vma;
797 
798 err:
799 	i915_gem_object_put(obj);
800 	return ERR_PTR(err);
801 }
802 
803 static int recursive_batch_resolve(struct i915_vma *batch)
804 {
805 	u32 *cmd;
806 
807 	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
808 	if (IS_ERR(cmd))
809 		return PTR_ERR(cmd);
810 
811 	*cmd = MI_BATCH_BUFFER_END;
812 	i915_gem_chipset_flush(batch->vm->i915);
813 
814 	i915_gem_object_unpin_map(batch->obj);
815 
816 	return 0;
817 }
818 
819 static int live_all_engines(void *arg)
820 {
821 	struct drm_i915_private *i915 = arg;
822 	struct intel_engine_cs *engine;
823 	struct i915_request *request[I915_NUM_ENGINES];
824 	intel_wakeref_t wakeref;
825 	struct igt_live_test t;
826 	struct i915_vma *batch;
827 	unsigned int id;
828 	int err;
829 
830 	/* Check we can submit requests to all engines simultaneously. We
831 	 * send a recursive batch to each engine - checking that we don't
832 	 * block doing so, and that they don't complete too soon.
833 	 */
834 
835 	mutex_lock(&i915->drm.struct_mutex);
836 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
837 
838 	err = igt_live_test_begin(&t, i915, __func__, "");
839 	if (err)
840 		goto out_unlock;
841 
842 	batch = recursive_batch(i915);
843 	if (IS_ERR(batch)) {
844 		err = PTR_ERR(batch);
845 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
846 		goto out_unlock;
847 	}
848 
849 	for_each_engine(engine, i915, id) {
850 		request[id] = i915_request_create(engine->kernel_context);
851 		if (IS_ERR(request[id])) {
852 			err = PTR_ERR(request[id]);
853 			pr_err("%s: Request allocation failed with err=%d\n",
854 			       __func__, err);
855 			goto out_request;
856 		}
857 
858 		err = engine->emit_bb_start(request[id],
859 					    batch->node.start,
860 					    batch->node.size,
861 					    0);
862 		GEM_BUG_ON(err);
863 		request[id]->batch = batch;
864 
865 		i915_vma_lock(batch);
866 		err = i915_vma_move_to_active(batch, request[id], 0);
867 		i915_vma_unlock(batch);
868 		GEM_BUG_ON(err);
869 
870 		i915_request_get(request[id]);
871 		i915_request_add(request[id]);
872 	}
873 
874 	for_each_engine(engine, i915, id) {
875 		if (i915_request_completed(request[id])) {
876 			pr_err("%s(%s): request completed too early!\n",
877 			       __func__, engine->name);
878 			err = -EINVAL;
879 			goto out_request;
880 		}
881 	}
882 
883 	err = recursive_batch_resolve(batch);
884 	if (err) {
885 		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
886 		goto out_request;
887 	}
888 
889 	for_each_engine(engine, i915, id) {
890 		long timeout;
891 
892 		timeout = i915_request_wait(request[id], 0,
893 					    MAX_SCHEDULE_TIMEOUT);
894 		if (timeout < 0) {
895 			err = timeout;
896 			pr_err("%s: error waiting for request on %s, err=%d\n",
897 			       __func__, engine->name, err);
898 			goto out_request;
899 		}
900 
901 		GEM_BUG_ON(!i915_request_completed(request[id]));
902 		i915_request_put(request[id]);
903 		request[id] = NULL;
904 	}
905 
906 	err = igt_live_test_end(&t);
907 
908 out_request:
909 	for_each_engine(engine, i915, id)
910 		if (request[id])
911 			i915_request_put(request[id]);
912 	i915_vma_unpin(batch);
913 	i915_vma_put(batch);
914 out_unlock:
915 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
916 	mutex_unlock(&i915->drm.struct_mutex);
917 	return err;
918 }
919 
920 static int live_sequential_engines(void *arg)
921 {
922 	struct drm_i915_private *i915 = arg;
923 	struct i915_request *request[I915_NUM_ENGINES] = {};
924 	struct i915_request *prev = NULL;
925 	struct intel_engine_cs *engine;
926 	intel_wakeref_t wakeref;
927 	struct igt_live_test t;
928 	unsigned int id;
929 	int err;
930 
931 	/* Check we can submit requests to all engines sequentially, such
932 	 * that each successive request waits for the earlier ones. This
933 	 * tests that we don't execute requests out of order, even though
934 	 * they are running on independent engines.
935 	 */
936 
937 	mutex_lock(&i915->drm.struct_mutex);
938 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
939 
940 	err = igt_live_test_begin(&t, i915, __func__, "");
941 	if (err)
942 		goto out_unlock;
943 
944 	for_each_engine(engine, i915, id) {
945 		struct i915_vma *batch;
946 
947 		batch = recursive_batch(i915);
948 		if (IS_ERR(batch)) {
949 			err = PTR_ERR(batch);
950 			pr_err("%s: Unable to create batch for %s, err=%d\n",
951 			       __func__, engine->name, err);
952 			goto out_unlock;
953 		}
954 
955 		request[id] = i915_request_create(engine->kernel_context);
956 		if (IS_ERR(request[id])) {
957 			err = PTR_ERR(request[id]);
958 			pr_err("%s: Request allocation failed for %s with err=%d\n",
959 			       __func__, engine->name, err);
960 			goto out_request;
961 		}
962 
963 		if (prev) {
964 			err = i915_request_await_dma_fence(request[id],
965 							   &prev->fence);
966 			if (err) {
967 				i915_request_add(request[id]);
968 				pr_err("%s: Request await failed for %s with err=%d\n",
969 				       __func__, engine->name, err);
970 				goto out_request;
971 			}
972 		}
973 
974 		err = engine->emit_bb_start(request[id],
975 					    batch->node.start,
976 					    batch->node.size,
977 					    0);
978 		GEM_BUG_ON(err);
979 		request[id]->batch = batch;
980 
981 		i915_vma_lock(batch);
982 		err = i915_vma_move_to_active(batch, request[id], 0);
983 		i915_vma_unlock(batch);
984 		GEM_BUG_ON(err);
985 
986 		i915_request_get(request[id]);
987 		i915_request_add(request[id]);
988 
989 		prev = request[id];
990 	}
991 
992 	for_each_engine(engine, i915, id) {
993 		long timeout;
994 
995 		if (i915_request_completed(request[id])) {
996 			pr_err("%s(%s): request completed too early!\n",
997 			       __func__, engine->name);
998 			err = -EINVAL;
999 			goto out_request;
1000 		}
1001 
1002 		err = recursive_batch_resolve(request[id]->batch);
1003 		if (err) {
1004 			pr_err("%s: failed to resolve batch, err=%d\n",
1005 			       __func__, err);
1006 			goto out_request;
1007 		}
1008 
1009 		timeout = i915_request_wait(request[id], 0,
1010 					    MAX_SCHEDULE_TIMEOUT);
1011 		if (timeout < 0) {
1012 			err = timeout;
1013 			pr_err("%s: error waiting for request on %s, err=%d\n",
1014 			       __func__, engine->name, err);
1015 			goto out_request;
1016 		}
1017 
1018 		GEM_BUG_ON(!i915_request_completed(request[id]));
1019 	}
1020 
1021 	err = igt_live_test_end(&t);
1022 
1023 out_request:
1024 	for_each_engine(engine, i915, id) {
1025 		u32 *cmd;
1026 
1027 		if (!request[id])
1028 			break;
1029 
1030 		cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1031 					      I915_MAP_WC);
1032 		if (!IS_ERR(cmd)) {
1033 			*cmd = MI_BATCH_BUFFER_END;
1034 			i915_gem_chipset_flush(i915);
1035 
1036 			i915_gem_object_unpin_map(request[id]->batch->obj);
1037 		}
1038 
1039 		i915_vma_put(request[id]->batch);
1040 		i915_request_put(request[id]);
1041 	}
1042 out_unlock:
1043 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1044 	mutex_unlock(&i915->drm.struct_mutex);
1045 	return err;
1046 }
1047 
1048 static int
1049 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1050 {
1051 	struct i915_request *rq;
1052 	int ret;
1053 
1054 	/*
1055 	 * Before execlists, all contexts share the same ringbuffer. With
1056 	 * execlists, each context/engine has a separate ringbuffer and
1057 	 * for the purposes of this test, inexhaustible.
1058 	 *
1059 	 * For the global ringbuffer though, we have to be very careful
1060 	 * that we do not wrap while preventing the execution of requests
1061 	 * with a unsignaled fence.
1062 	 */
1063 	if (HAS_EXECLISTS(ctx->i915))
1064 		return INT_MAX;
1065 
1066 	rq = igt_request_alloc(ctx, engine);
1067 	if (IS_ERR(rq)) {
1068 		ret = PTR_ERR(rq);
1069 	} else {
1070 		int sz;
1071 
1072 		ret = rq->ring->size - rq->reserved_space;
1073 		i915_request_add(rq);
1074 
1075 		sz = rq->ring->emit - rq->head;
1076 		if (sz < 0)
1077 			sz += rq->ring->size;
1078 		ret /= sz;
1079 		ret /= 2; /* leave half spare, in case of emergency! */
1080 	}
1081 
1082 	return ret;
1083 }
1084 
1085 static int live_breadcrumbs_smoketest(void *arg)
1086 {
1087 	struct drm_i915_private *i915 = arg;
1088 	struct smoketest t[I915_NUM_ENGINES];
1089 	unsigned int ncpus = num_online_cpus();
1090 	unsigned long num_waits, num_fences;
1091 	struct intel_engine_cs *engine;
1092 	struct task_struct **threads;
1093 	struct igt_live_test live;
1094 	enum intel_engine_id id;
1095 	intel_wakeref_t wakeref;
1096 	struct drm_file *file;
1097 	unsigned int n;
1098 	int ret = 0;
1099 
1100 	/*
1101 	 * Smoketest our breadcrumb/signal handling for requests across multiple
1102 	 * threads. A very simple test to only catch the most egregious of bugs.
1103 	 * See __igt_breadcrumbs_smoketest();
1104 	 *
1105 	 * On real hardware this time.
1106 	 */
1107 
1108 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1109 
1110 	file = mock_file(i915);
1111 	if (IS_ERR(file)) {
1112 		ret = PTR_ERR(file);
1113 		goto out_rpm;
1114 	}
1115 
1116 	threads = kcalloc(ncpus * I915_NUM_ENGINES,
1117 			  sizeof(*threads),
1118 			  GFP_KERNEL);
1119 	if (!threads) {
1120 		ret = -ENOMEM;
1121 		goto out_file;
1122 	}
1123 
1124 	memset(&t[0], 0, sizeof(t[0]));
1125 	t[0].request_alloc = __live_request_alloc;
1126 	t[0].ncontexts = 64;
1127 	t[0].contexts = kmalloc_array(t[0].ncontexts,
1128 				      sizeof(*t[0].contexts),
1129 				      GFP_KERNEL);
1130 	if (!t[0].contexts) {
1131 		ret = -ENOMEM;
1132 		goto out_threads;
1133 	}
1134 
1135 	mutex_lock(&i915->drm.struct_mutex);
1136 	for (n = 0; n < t[0].ncontexts; n++) {
1137 		t[0].contexts[n] = live_context(i915, file);
1138 		if (!t[0].contexts[n]) {
1139 			ret = -ENOMEM;
1140 			goto out_contexts;
1141 		}
1142 	}
1143 
1144 	ret = igt_live_test_begin(&live, i915, __func__, "");
1145 	if (ret)
1146 		goto out_contexts;
1147 
1148 	for_each_engine(engine, i915, id) {
1149 		t[id] = t[0];
1150 		t[id].engine = engine;
1151 		t[id].max_batch = max_batches(t[0].contexts[0], engine);
1152 		if (t[id].max_batch < 0) {
1153 			ret = t[id].max_batch;
1154 			mutex_unlock(&i915->drm.struct_mutex);
1155 			goto out_flush;
1156 		}
1157 		/* One ring interleaved between requests from all cpus */
1158 		t[id].max_batch /= num_online_cpus() + 1;
1159 		pr_debug("Limiting batches to %d requests on %s\n",
1160 			 t[id].max_batch, engine->name);
1161 
1162 		for (n = 0; n < ncpus; n++) {
1163 			struct task_struct *tsk;
1164 
1165 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1166 					  &t[id], "igt/%d.%d", id, n);
1167 			if (IS_ERR(tsk)) {
1168 				ret = PTR_ERR(tsk);
1169 				mutex_unlock(&i915->drm.struct_mutex);
1170 				goto out_flush;
1171 			}
1172 
1173 			get_task_struct(tsk);
1174 			threads[id * ncpus + n] = tsk;
1175 		}
1176 	}
1177 	mutex_unlock(&i915->drm.struct_mutex);
1178 
1179 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1180 
1181 out_flush:
1182 	num_waits = 0;
1183 	num_fences = 0;
1184 	for_each_engine(engine, i915, id) {
1185 		for (n = 0; n < ncpus; n++) {
1186 			struct task_struct *tsk = threads[id * ncpus + n];
1187 			int err;
1188 
1189 			if (!tsk)
1190 				continue;
1191 
1192 			err = kthread_stop(tsk);
1193 			if (err < 0 && !ret)
1194 				ret = err;
1195 
1196 			put_task_struct(tsk);
1197 		}
1198 
1199 		num_waits += atomic_long_read(&t[id].num_waits);
1200 		num_fences += atomic_long_read(&t[id].num_fences);
1201 	}
1202 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1203 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1204 
1205 	mutex_lock(&i915->drm.struct_mutex);
1206 	ret = igt_live_test_end(&live) ?: ret;
1207 out_contexts:
1208 	mutex_unlock(&i915->drm.struct_mutex);
1209 	kfree(t[0].contexts);
1210 out_threads:
1211 	kfree(threads);
1212 out_file:
1213 	mock_file_free(i915, file);
1214 out_rpm:
1215 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1216 
1217 	return ret;
1218 }
1219 
1220 int i915_request_live_selftests(struct drm_i915_private *i915)
1221 {
1222 	static const struct i915_subtest tests[] = {
1223 		SUBTEST(live_nop_request),
1224 		SUBTEST(live_all_engines),
1225 		SUBTEST(live_sequential_engines),
1226 		SUBTEST(live_empty_request),
1227 		SUBTEST(live_breadcrumbs_smoketest),
1228 	};
1229 
1230 	if (i915_terminally_wedged(i915))
1231 		return 0;
1232 
1233 	return i915_subtests(tests, i915);
1234 }
1235