1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 
27 #include "gem/i915_gem_pm.h"
28 #include "gem/selftests/mock_context.h"
29 
30 #include "gt/intel_gt.h"
31 
32 #include "i915_random.h"
33 #include "i915_selftest.h"
34 #include "igt_live_test.h"
35 #include "lib_sw_fence.h"
36 
37 #include "mock_drm.h"
38 #include "mock_gem_device.h"
39 
40 static int igt_add_request(void *arg)
41 {
42 	struct drm_i915_private *i915 = arg;
43 	struct i915_request *request;
44 
45 	/* Basic preliminary test to create a request and let it loose! */
46 
47 	request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
48 	if (!request)
49 		return -ENOMEM;
50 
51 	i915_request_add(request);
52 
53 	return 0;
54 }
55 
56 static int igt_wait_request(void *arg)
57 {
58 	const long T = HZ / 4;
59 	struct drm_i915_private *i915 = arg;
60 	struct i915_request *request;
61 	int err = -EINVAL;
62 
63 	/* Submit a request, then wait upon it */
64 
65 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
66 	if (!request)
67 		return -ENOMEM;
68 
69 	i915_request_get(request);
70 
71 	if (i915_request_wait(request, 0, 0) != -ETIME) {
72 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
73 		goto out_request;
74 	}
75 
76 	if (i915_request_wait(request, 0, T) != -ETIME) {
77 		pr_err("request wait succeeded (expected timeout before submit!)\n");
78 		goto out_request;
79 	}
80 
81 	if (i915_request_completed(request)) {
82 		pr_err("request completed before submit!!\n");
83 		goto out_request;
84 	}
85 
86 	i915_request_add(request);
87 
88 	if (i915_request_wait(request, 0, 0) != -ETIME) {
89 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
90 		goto out_request;
91 	}
92 
93 	if (i915_request_completed(request)) {
94 		pr_err("request completed immediately!\n");
95 		goto out_request;
96 	}
97 
98 	if (i915_request_wait(request, 0, T / 2) != -ETIME) {
99 		pr_err("request wait succeeded (expected timeout!)\n");
100 		goto out_request;
101 	}
102 
103 	if (i915_request_wait(request, 0, T) == -ETIME) {
104 		pr_err("request wait timed out!\n");
105 		goto out_request;
106 	}
107 
108 	if (!i915_request_completed(request)) {
109 		pr_err("request not complete after waiting!\n");
110 		goto out_request;
111 	}
112 
113 	if (i915_request_wait(request, 0, T) == -ETIME) {
114 		pr_err("request wait timed out when already complete!\n");
115 		goto out_request;
116 	}
117 
118 	err = 0;
119 out_request:
120 	i915_request_put(request);
121 	mock_device_flush(i915);
122 	return err;
123 }
124 
125 static int igt_fence_wait(void *arg)
126 {
127 	const long T = HZ / 4;
128 	struct drm_i915_private *i915 = arg;
129 	struct i915_request *request;
130 	int err = -EINVAL;
131 
132 	/* Submit a request, treat it as a fence and wait upon it */
133 
134 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
135 	if (!request)
136 		return -ENOMEM;
137 
138 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
139 		pr_err("fence wait success before submit (expected timeout)!\n");
140 		goto out;
141 	}
142 
143 	i915_request_add(request);
144 
145 	if (dma_fence_is_signaled(&request->fence)) {
146 		pr_err("fence signaled immediately!\n");
147 		goto out;
148 	}
149 
150 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
151 		pr_err("fence wait success after submit (expected timeout)!\n");
152 		goto out;
153 	}
154 
155 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
156 		pr_err("fence wait timed out (expected success)!\n");
157 		goto out;
158 	}
159 
160 	if (!dma_fence_is_signaled(&request->fence)) {
161 		pr_err("fence unsignaled after waiting!\n");
162 		goto out;
163 	}
164 
165 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
166 		pr_err("fence wait timed out when complete (expected success)!\n");
167 		goto out;
168 	}
169 
170 	err = 0;
171 out:
172 	mock_device_flush(i915);
173 	return err;
174 }
175 
176 static int igt_request_rewind(void *arg)
177 {
178 	struct drm_i915_private *i915 = arg;
179 	struct i915_request *request, *vip;
180 	struct i915_gem_context *ctx[2];
181 	struct intel_context *ce;
182 	int err = -EINVAL;
183 
184 	ctx[0] = mock_context(i915, "A");
185 
186 	ce = i915_gem_context_get_engine(ctx[0], RCS0);
187 	GEM_BUG_ON(IS_ERR(ce));
188 	request = mock_request(ce, 2 * HZ);
189 	intel_context_put(ce);
190 	if (!request) {
191 		err = -ENOMEM;
192 		goto err_context_0;
193 	}
194 
195 	i915_request_get(request);
196 	i915_request_add(request);
197 
198 	ctx[1] = mock_context(i915, "B");
199 
200 	ce = i915_gem_context_get_engine(ctx[1], RCS0);
201 	GEM_BUG_ON(IS_ERR(ce));
202 	vip = mock_request(ce, 0);
203 	intel_context_put(ce);
204 	if (!vip) {
205 		err = -ENOMEM;
206 		goto err_context_1;
207 	}
208 
209 	/* Simulate preemption by manual reordering */
210 	if (!mock_cancel_request(request)) {
211 		pr_err("failed to cancel request (already executed)!\n");
212 		i915_request_add(vip);
213 		goto err_context_1;
214 	}
215 	i915_request_get(vip);
216 	i915_request_add(vip);
217 	rcu_read_lock();
218 	request->engine->submit_request(request);
219 	rcu_read_unlock();
220 
221 
222 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
223 		pr_err("timed out waiting for high priority request\n");
224 		goto err;
225 	}
226 
227 	if (i915_request_completed(request)) {
228 		pr_err("low priority request already completed\n");
229 		goto err;
230 	}
231 
232 	err = 0;
233 err:
234 	i915_request_put(vip);
235 err_context_1:
236 	mock_context_close(ctx[1]);
237 	i915_request_put(request);
238 err_context_0:
239 	mock_context_close(ctx[0]);
240 	mock_device_flush(i915);
241 	return err;
242 }
243 
244 struct smoketest {
245 	struct intel_engine_cs *engine;
246 	struct i915_gem_context **contexts;
247 	atomic_long_t num_waits, num_fences;
248 	int ncontexts, max_batch;
249 	struct i915_request *(*request_alloc)(struct intel_context *ce);
250 };
251 
252 static struct i915_request *
253 __mock_request_alloc(struct intel_context *ce)
254 {
255 	return mock_request(ce, 0);
256 }
257 
258 static struct i915_request *
259 __live_request_alloc(struct intel_context *ce)
260 {
261 	return intel_context_create_request(ce);
262 }
263 
264 static int __igt_breadcrumbs_smoketest(void *arg)
265 {
266 	struct smoketest *t = arg;
267 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
268 	const unsigned int total = 4 * t->ncontexts + 1;
269 	unsigned int num_waits = 0, num_fences = 0;
270 	struct i915_request **requests;
271 	I915_RND_STATE(prng);
272 	unsigned int *order;
273 	int err = 0;
274 
275 	/*
276 	 * A very simple test to catch the most egregious of list handling bugs.
277 	 *
278 	 * At its heart, we simply create oodles of requests running across
279 	 * multiple kthreads and enable signaling on them, for the sole purpose
280 	 * of stressing our breadcrumb handling. The only inspection we do is
281 	 * that the fences were marked as signaled.
282 	 */
283 
284 	requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL);
285 	if (!requests)
286 		return -ENOMEM;
287 
288 	order = i915_random_order(total, &prng);
289 	if (!order) {
290 		err = -ENOMEM;
291 		goto out_requests;
292 	}
293 
294 	while (!kthread_should_stop()) {
295 		struct i915_sw_fence *submit, *wait;
296 		unsigned int n, count;
297 
298 		submit = heap_fence_create(GFP_KERNEL);
299 		if (!submit) {
300 			err = -ENOMEM;
301 			break;
302 		}
303 
304 		wait = heap_fence_create(GFP_KERNEL);
305 		if (!wait) {
306 			i915_sw_fence_commit(submit);
307 			heap_fence_put(submit);
308 			err = ENOMEM;
309 			break;
310 		}
311 
312 		i915_random_reorder(order, total, &prng);
313 		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
314 
315 		for (n = 0; n < count; n++) {
316 			struct i915_gem_context *ctx =
317 				t->contexts[order[n] % t->ncontexts];
318 			struct i915_request *rq;
319 			struct intel_context *ce;
320 
321 			ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
322 			GEM_BUG_ON(IS_ERR(ce));
323 			rq = t->request_alloc(ce);
324 			intel_context_put(ce);
325 			if (IS_ERR(rq)) {
326 				err = PTR_ERR(rq);
327 				count = n;
328 				break;
329 			}
330 
331 			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
332 							       submit,
333 							       GFP_KERNEL);
334 
335 			requests[n] = i915_request_get(rq);
336 			i915_request_add(rq);
337 
338 			if (err >= 0)
339 				err = i915_sw_fence_await_dma_fence(wait,
340 								    &rq->fence,
341 								    0,
342 								    GFP_KERNEL);
343 
344 			if (err < 0) {
345 				i915_request_put(rq);
346 				count = n;
347 				break;
348 			}
349 		}
350 
351 		i915_sw_fence_commit(submit);
352 		i915_sw_fence_commit(wait);
353 
354 		if (!wait_event_timeout(wait->wait,
355 					i915_sw_fence_done(wait),
356 					5 * HZ)) {
357 			struct i915_request *rq = requests[count - 1];
358 
359 			pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
360 			       atomic_read(&wait->pending), count,
361 			       rq->fence.context, rq->fence.seqno,
362 			       t->engine->name);
363 			GEM_TRACE_DUMP();
364 
365 			intel_gt_set_wedged(t->engine->gt);
366 			GEM_BUG_ON(!i915_request_completed(rq));
367 			i915_sw_fence_wait(wait);
368 			err = -EIO;
369 		}
370 
371 		for (n = 0; n < count; n++) {
372 			struct i915_request *rq = requests[n];
373 
374 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
375 				      &rq->fence.flags)) {
376 				pr_err("%llu:%llu was not signaled!\n",
377 				       rq->fence.context, rq->fence.seqno);
378 				err = -EINVAL;
379 			}
380 
381 			i915_request_put(rq);
382 		}
383 
384 		heap_fence_put(wait);
385 		heap_fence_put(submit);
386 
387 		if (err < 0)
388 			break;
389 
390 		num_fences += count;
391 		num_waits++;
392 
393 		cond_resched();
394 	}
395 
396 	atomic_long_add(num_fences, &t->num_fences);
397 	atomic_long_add(num_waits, &t->num_waits);
398 
399 	kfree(order);
400 out_requests:
401 	kfree(requests);
402 	return err;
403 }
404 
405 static int mock_breadcrumbs_smoketest(void *arg)
406 {
407 	struct drm_i915_private *i915 = arg;
408 	struct smoketest t = {
409 		.engine = i915->engine[RCS0],
410 		.ncontexts = 1024,
411 		.max_batch = 1024,
412 		.request_alloc = __mock_request_alloc
413 	};
414 	unsigned int ncpus = num_online_cpus();
415 	struct task_struct **threads;
416 	unsigned int n;
417 	int ret = 0;
418 
419 	/*
420 	 * Smoketest our breadcrumb/signal handling for requests across multiple
421 	 * threads. A very simple test to only catch the most egregious of bugs.
422 	 * See __igt_breadcrumbs_smoketest();
423 	 */
424 
425 	threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL);
426 	if (!threads)
427 		return -ENOMEM;
428 
429 	t.contexts =
430 		kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
431 	if (!t.contexts) {
432 		ret = -ENOMEM;
433 		goto out_threads;
434 	}
435 
436 	for (n = 0; n < t.ncontexts; n++) {
437 		t.contexts[n] = mock_context(t.engine->i915, "mock");
438 		if (!t.contexts[n]) {
439 			ret = -ENOMEM;
440 			goto out_contexts;
441 		}
442 	}
443 
444 	for (n = 0; n < ncpus; n++) {
445 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
446 					 &t, "igt/%d", n);
447 		if (IS_ERR(threads[n])) {
448 			ret = PTR_ERR(threads[n]);
449 			ncpus = n;
450 			break;
451 		}
452 
453 		get_task_struct(threads[n]);
454 	}
455 
456 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
457 
458 	for (n = 0; n < ncpus; n++) {
459 		int err;
460 
461 		err = kthread_stop(threads[n]);
462 		if (err < 0 && !ret)
463 			ret = err;
464 
465 		put_task_struct(threads[n]);
466 	}
467 	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
468 		atomic_long_read(&t.num_waits),
469 		atomic_long_read(&t.num_fences),
470 		ncpus);
471 
472 out_contexts:
473 	for (n = 0; n < t.ncontexts; n++) {
474 		if (!t.contexts[n])
475 			break;
476 		mock_context_close(t.contexts[n]);
477 	}
478 	kfree(t.contexts);
479 out_threads:
480 	kfree(threads);
481 	return ret;
482 }
483 
484 int i915_request_mock_selftests(void)
485 {
486 	static const struct i915_subtest tests[] = {
487 		SUBTEST(igt_add_request),
488 		SUBTEST(igt_wait_request),
489 		SUBTEST(igt_fence_wait),
490 		SUBTEST(igt_request_rewind),
491 		SUBTEST(mock_breadcrumbs_smoketest),
492 	};
493 	struct drm_i915_private *i915;
494 	intel_wakeref_t wakeref;
495 	int err = 0;
496 
497 	i915 = mock_gem_device();
498 	if (!i915)
499 		return -ENOMEM;
500 
501 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
502 		err = i915_subtests(tests, i915);
503 
504 	drm_dev_put(&i915->drm);
505 
506 	return err;
507 }
508 
509 static int live_nop_request(void *arg)
510 {
511 	struct drm_i915_private *i915 = arg;
512 	struct intel_engine_cs *engine;
513 	struct igt_live_test t;
514 	unsigned int id;
515 	int err = -ENODEV;
516 
517 	/* Submit various sized batches of empty requests, to each engine
518 	 * (individually), and wait for the batch to complete. We can check
519 	 * the overhead of submitting requests to the hardware.
520 	 */
521 
522 	for_each_engine(engine, i915, id) {
523 		unsigned long n, prime;
524 		IGT_TIMEOUT(end_time);
525 		ktime_t times[2] = {};
526 
527 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
528 		if (err)
529 			return err;
530 
531 		for_each_prime_number_from(prime, 1, 8192) {
532 			struct i915_request *request = NULL;
533 
534 			times[1] = ktime_get_raw();
535 
536 			for (n = 0; n < prime; n++) {
537 				i915_request_put(request);
538 				request = i915_request_create(engine->kernel_context);
539 				if (IS_ERR(request))
540 					return PTR_ERR(request);
541 
542 				/* This space is left intentionally blank.
543 				 *
544 				 * We do not actually want to perform any
545 				 * action with this request, we just want
546 				 * to measure the latency in allocation
547 				 * and submission of our breadcrumbs -
548 				 * ensuring that the bare request is sufficient
549 				 * for the system to work (i.e. proper HEAD
550 				 * tracking of the rings, interrupt handling,
551 				 * etc). It also gives us the lowest bounds
552 				 * for latency.
553 				 */
554 
555 				i915_request_get(request);
556 				i915_request_add(request);
557 			}
558 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
559 			i915_request_put(request);
560 
561 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
562 			if (prime == 1)
563 				times[0] = times[1];
564 
565 			if (__igt_timeout(end_time, NULL))
566 				break;
567 		}
568 
569 		err = igt_live_test_end(&t);
570 		if (err)
571 			return err;
572 
573 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
574 			engine->name,
575 			ktime_to_ns(times[0]),
576 			prime, div64_u64(ktime_to_ns(times[1]), prime));
577 	}
578 
579 	return err;
580 }
581 
582 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
583 {
584 	struct drm_i915_gem_object *obj;
585 	struct i915_vma *vma;
586 	u32 *cmd;
587 	int err;
588 
589 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
590 	if (IS_ERR(obj))
591 		return ERR_CAST(obj);
592 
593 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
594 	if (IS_ERR(cmd)) {
595 		err = PTR_ERR(cmd);
596 		goto err;
597 	}
598 
599 	*cmd = MI_BATCH_BUFFER_END;
600 
601 	__i915_gem_object_flush_map(obj, 0, 64);
602 	i915_gem_object_unpin_map(obj);
603 
604 	intel_gt_chipset_flush(&i915->gt);
605 
606 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
607 	if (IS_ERR(vma)) {
608 		err = PTR_ERR(vma);
609 		goto err;
610 	}
611 
612 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
613 	if (err)
614 		goto err;
615 
616 	/* Force the wait wait now to avoid including it in the benchmark */
617 	err = i915_vma_sync(vma);
618 	if (err)
619 		goto err_pin;
620 
621 	return vma;
622 
623 err_pin:
624 	i915_vma_unpin(vma);
625 err:
626 	i915_gem_object_put(obj);
627 	return ERR_PTR(err);
628 }
629 
630 static struct i915_request *
631 empty_request(struct intel_engine_cs *engine,
632 	      struct i915_vma *batch)
633 {
634 	struct i915_request *request;
635 	int err;
636 
637 	request = i915_request_create(engine->kernel_context);
638 	if (IS_ERR(request))
639 		return request;
640 
641 	err = engine->emit_bb_start(request,
642 				    batch->node.start,
643 				    batch->node.size,
644 				    I915_DISPATCH_SECURE);
645 	if (err)
646 		goto out_request;
647 
648 	i915_request_get(request);
649 out_request:
650 	i915_request_add(request);
651 	return err ? ERR_PTR(err) : request;
652 }
653 
654 static int live_empty_request(void *arg)
655 {
656 	struct drm_i915_private *i915 = arg;
657 	struct intel_engine_cs *engine;
658 	struct igt_live_test t;
659 	struct i915_vma *batch;
660 	unsigned int id;
661 	int err = 0;
662 
663 	/* Submit various sized batches of empty requests, to each engine
664 	 * (individually), and wait for the batch to complete. We can check
665 	 * the overhead of submitting requests to the hardware.
666 	 */
667 
668 	batch = empty_batch(i915);
669 	if (IS_ERR(batch))
670 		return PTR_ERR(batch);
671 
672 	for_each_engine(engine, i915, id) {
673 		IGT_TIMEOUT(end_time);
674 		struct i915_request *request;
675 		unsigned long n, prime;
676 		ktime_t times[2] = {};
677 
678 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
679 		if (err)
680 			goto out_batch;
681 
682 		/* Warmup / preload */
683 		request = empty_request(engine, batch);
684 		if (IS_ERR(request)) {
685 			err = PTR_ERR(request);
686 			goto out_batch;
687 		}
688 		i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
689 
690 		for_each_prime_number_from(prime, 1, 8192) {
691 			times[1] = ktime_get_raw();
692 
693 			for (n = 0; n < prime; n++) {
694 				i915_request_put(request);
695 				request = empty_request(engine, batch);
696 				if (IS_ERR(request)) {
697 					err = PTR_ERR(request);
698 					goto out_batch;
699 				}
700 			}
701 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
702 
703 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
704 			if (prime == 1)
705 				times[0] = times[1];
706 
707 			if (__igt_timeout(end_time, NULL))
708 				break;
709 		}
710 		i915_request_put(request);
711 
712 		err = igt_live_test_end(&t);
713 		if (err)
714 			goto out_batch;
715 
716 		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
717 			engine->name,
718 			ktime_to_ns(times[0]),
719 			prime, div64_u64(ktime_to_ns(times[1]), prime));
720 	}
721 
722 out_batch:
723 	i915_vma_unpin(batch);
724 	i915_vma_put(batch);
725 	return err;
726 }
727 
728 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
729 {
730 	struct i915_gem_context *ctx = i915->kernel_context;
731 	struct drm_i915_gem_object *obj;
732 	const int gen = INTEL_GEN(i915);
733 	struct i915_address_space *vm;
734 	struct i915_vma *vma;
735 	u32 *cmd;
736 	int err;
737 
738 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
739 	if (IS_ERR(obj))
740 		return ERR_CAST(obj);
741 
742 	vm = i915_gem_context_get_vm_rcu(ctx);
743 	vma = i915_vma_instance(obj, vm, NULL);
744 	i915_vm_put(vm);
745 	if (IS_ERR(vma)) {
746 		err = PTR_ERR(vma);
747 		goto err;
748 	}
749 
750 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
751 	if (err)
752 		goto err;
753 
754 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
755 	if (IS_ERR(cmd)) {
756 		err = PTR_ERR(cmd);
757 		goto err;
758 	}
759 
760 	if (gen >= 8) {
761 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
762 		*cmd++ = lower_32_bits(vma->node.start);
763 		*cmd++ = upper_32_bits(vma->node.start);
764 	} else if (gen >= 6) {
765 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
766 		*cmd++ = lower_32_bits(vma->node.start);
767 	} else {
768 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
769 		*cmd++ = lower_32_bits(vma->node.start);
770 	}
771 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
772 
773 	__i915_gem_object_flush_map(obj, 0, 64);
774 	i915_gem_object_unpin_map(obj);
775 
776 	intel_gt_chipset_flush(&i915->gt);
777 
778 	return vma;
779 
780 err:
781 	i915_gem_object_put(obj);
782 	return ERR_PTR(err);
783 }
784 
785 static int recursive_batch_resolve(struct i915_vma *batch)
786 {
787 	u32 *cmd;
788 
789 	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
790 	if (IS_ERR(cmd))
791 		return PTR_ERR(cmd);
792 
793 	*cmd = MI_BATCH_BUFFER_END;
794 	intel_gt_chipset_flush(batch->vm->gt);
795 
796 	i915_gem_object_unpin_map(batch->obj);
797 
798 	return 0;
799 }
800 
801 static int live_all_engines(void *arg)
802 {
803 	struct drm_i915_private *i915 = arg;
804 	struct intel_engine_cs *engine;
805 	struct i915_request *request[I915_NUM_ENGINES];
806 	struct igt_live_test t;
807 	struct i915_vma *batch;
808 	unsigned int id;
809 	int err;
810 
811 	/* Check we can submit requests to all engines simultaneously. We
812 	 * send a recursive batch to each engine - checking that we don't
813 	 * block doing so, and that they don't complete too soon.
814 	 */
815 
816 	err = igt_live_test_begin(&t, i915, __func__, "");
817 	if (err)
818 		return err;
819 
820 	batch = recursive_batch(i915);
821 	if (IS_ERR(batch)) {
822 		err = PTR_ERR(batch);
823 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
824 		return err;
825 	}
826 
827 	for_each_engine(engine, i915, id) {
828 		request[id] = i915_request_create(engine->kernel_context);
829 		if (IS_ERR(request[id])) {
830 			err = PTR_ERR(request[id]);
831 			pr_err("%s: Request allocation failed with err=%d\n",
832 			       __func__, err);
833 			goto out_request;
834 		}
835 
836 		err = engine->emit_bb_start(request[id],
837 					    batch->node.start,
838 					    batch->node.size,
839 					    0);
840 		GEM_BUG_ON(err);
841 		request[id]->batch = batch;
842 
843 		i915_vma_lock(batch);
844 		err = i915_request_await_object(request[id], batch->obj, 0);
845 		if (err == 0)
846 			err = i915_vma_move_to_active(batch, request[id], 0);
847 		i915_vma_unlock(batch);
848 		GEM_BUG_ON(err);
849 
850 		i915_request_get(request[id]);
851 		i915_request_add(request[id]);
852 	}
853 
854 	for_each_engine(engine, i915, id) {
855 		if (i915_request_completed(request[id])) {
856 			pr_err("%s(%s): request completed too early!\n",
857 			       __func__, engine->name);
858 			err = -EINVAL;
859 			goto out_request;
860 		}
861 	}
862 
863 	err = recursive_batch_resolve(batch);
864 	if (err) {
865 		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
866 		goto out_request;
867 	}
868 
869 	for_each_engine(engine, i915, id) {
870 		long timeout;
871 
872 		timeout = i915_request_wait(request[id], 0,
873 					    MAX_SCHEDULE_TIMEOUT);
874 		if (timeout < 0) {
875 			err = timeout;
876 			pr_err("%s: error waiting for request on %s, err=%d\n",
877 			       __func__, engine->name, err);
878 			goto out_request;
879 		}
880 
881 		GEM_BUG_ON(!i915_request_completed(request[id]));
882 		i915_request_put(request[id]);
883 		request[id] = NULL;
884 	}
885 
886 	err = igt_live_test_end(&t);
887 
888 out_request:
889 	for_each_engine(engine, i915, id)
890 		if (request[id])
891 			i915_request_put(request[id]);
892 	i915_vma_unpin(batch);
893 	i915_vma_put(batch);
894 	return err;
895 }
896 
897 static int live_sequential_engines(void *arg)
898 {
899 	struct drm_i915_private *i915 = arg;
900 	struct i915_request *request[I915_NUM_ENGINES] = {};
901 	struct i915_request *prev = NULL;
902 	struct intel_engine_cs *engine;
903 	struct igt_live_test t;
904 	unsigned int id;
905 	int err;
906 
907 	/* Check we can submit requests to all engines sequentially, such
908 	 * that each successive request waits for the earlier ones. This
909 	 * tests that we don't execute requests out of order, even though
910 	 * they are running on independent engines.
911 	 */
912 
913 	err = igt_live_test_begin(&t, i915, __func__, "");
914 	if (err)
915 		return err;
916 
917 	for_each_engine(engine, i915, id) {
918 		struct i915_vma *batch;
919 
920 		batch = recursive_batch(i915);
921 		if (IS_ERR(batch)) {
922 			err = PTR_ERR(batch);
923 			pr_err("%s: Unable to create batch for %s, err=%d\n",
924 			       __func__, engine->name, err);
925 			return err;
926 		}
927 
928 		request[id] = i915_request_create(engine->kernel_context);
929 		if (IS_ERR(request[id])) {
930 			err = PTR_ERR(request[id]);
931 			pr_err("%s: Request allocation failed for %s with err=%d\n",
932 			       __func__, engine->name, err);
933 			goto out_request;
934 		}
935 
936 		if (prev) {
937 			err = i915_request_await_dma_fence(request[id],
938 							   &prev->fence);
939 			if (err) {
940 				i915_request_add(request[id]);
941 				pr_err("%s: Request await failed for %s with err=%d\n",
942 				       __func__, engine->name, err);
943 				goto out_request;
944 			}
945 		}
946 
947 		err = engine->emit_bb_start(request[id],
948 					    batch->node.start,
949 					    batch->node.size,
950 					    0);
951 		GEM_BUG_ON(err);
952 		request[id]->batch = batch;
953 
954 		i915_vma_lock(batch);
955 		err = i915_request_await_object(request[id], batch->obj, false);
956 		if (err == 0)
957 			err = i915_vma_move_to_active(batch, request[id], 0);
958 		i915_vma_unlock(batch);
959 		GEM_BUG_ON(err);
960 
961 		i915_request_get(request[id]);
962 		i915_request_add(request[id]);
963 
964 		prev = request[id];
965 	}
966 
967 	for_each_engine(engine, i915, id) {
968 		long timeout;
969 
970 		if (i915_request_completed(request[id])) {
971 			pr_err("%s(%s): request completed too early!\n",
972 			       __func__, engine->name);
973 			err = -EINVAL;
974 			goto out_request;
975 		}
976 
977 		err = recursive_batch_resolve(request[id]->batch);
978 		if (err) {
979 			pr_err("%s: failed to resolve batch, err=%d\n",
980 			       __func__, err);
981 			goto out_request;
982 		}
983 
984 		timeout = i915_request_wait(request[id], 0,
985 					    MAX_SCHEDULE_TIMEOUT);
986 		if (timeout < 0) {
987 			err = timeout;
988 			pr_err("%s: error waiting for request on %s, err=%d\n",
989 			       __func__, engine->name, err);
990 			goto out_request;
991 		}
992 
993 		GEM_BUG_ON(!i915_request_completed(request[id]));
994 	}
995 
996 	err = igt_live_test_end(&t);
997 
998 out_request:
999 	for_each_engine(engine, i915, id) {
1000 		u32 *cmd;
1001 
1002 		if (!request[id])
1003 			break;
1004 
1005 		cmd = i915_gem_object_pin_map(request[id]->batch->obj,
1006 					      I915_MAP_WC);
1007 		if (!IS_ERR(cmd)) {
1008 			*cmd = MI_BATCH_BUFFER_END;
1009 			intel_gt_chipset_flush(engine->gt);
1010 
1011 			i915_gem_object_unpin_map(request[id]->batch->obj);
1012 		}
1013 
1014 		i915_vma_put(request[id]->batch);
1015 		i915_request_put(request[id]);
1016 	}
1017 	return err;
1018 }
1019 
1020 static int __live_parallel_engine1(void *arg)
1021 {
1022 	struct intel_engine_cs *engine = arg;
1023 	IGT_TIMEOUT(end_time);
1024 	unsigned long count;
1025 
1026 	count = 0;
1027 	do {
1028 		struct i915_request *rq;
1029 		int err;
1030 
1031 		rq = i915_request_create(engine->kernel_context);
1032 		if (IS_ERR(rq))
1033 			return PTR_ERR(rq);
1034 
1035 		i915_request_get(rq);
1036 		i915_request_add(rq);
1037 
1038 		err = 0;
1039 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
1040 			err = -ETIME;
1041 		i915_request_put(rq);
1042 		if (err)
1043 			return err;
1044 
1045 		count++;
1046 	} while (!__igt_timeout(end_time, NULL));
1047 
1048 	pr_info("%s: %lu request + sync\n", engine->name, count);
1049 	return 0;
1050 }
1051 
1052 static int __live_parallel_engineN(void *arg)
1053 {
1054 	struct intel_engine_cs *engine = arg;
1055 	IGT_TIMEOUT(end_time);
1056 	unsigned long count;
1057 
1058 	count = 0;
1059 	do {
1060 		struct i915_request *rq;
1061 
1062 		rq = i915_request_create(engine->kernel_context);
1063 		if (IS_ERR(rq))
1064 			return PTR_ERR(rq);
1065 
1066 		i915_request_add(rq);
1067 		count++;
1068 	} while (!__igt_timeout(end_time, NULL));
1069 
1070 	pr_info("%s: %lu requests\n", engine->name, count);
1071 	return 0;
1072 }
1073 
1074 static int live_parallel_engines(void *arg)
1075 {
1076 	struct drm_i915_private *i915 = arg;
1077 	static int (* const func[])(void *arg) = {
1078 		__live_parallel_engine1,
1079 		__live_parallel_engineN,
1080 		NULL,
1081 	};
1082 	struct intel_engine_cs *engine;
1083 	enum intel_engine_id id;
1084 	int (* const *fn)(void *arg);
1085 	int err = 0;
1086 
1087 	/*
1088 	 * Check we can submit requests to all engines concurrently. This
1089 	 * tests that we load up the system maximally.
1090 	 */
1091 
1092 	for (fn = func; !err && *fn; fn++) {
1093 		struct task_struct *tsk[I915_NUM_ENGINES] = {};
1094 		struct igt_live_test t;
1095 
1096 		err = igt_live_test_begin(&t, i915, __func__, "");
1097 		if (err)
1098 			break;
1099 
1100 		for_each_engine(engine, i915, id) {
1101 			tsk[id] = kthread_run(*fn, engine,
1102 					      "igt/parallel:%s",
1103 					      engine->name);
1104 			if (IS_ERR(tsk[id])) {
1105 				err = PTR_ERR(tsk[id]);
1106 				break;
1107 			}
1108 			get_task_struct(tsk[id]);
1109 		}
1110 
1111 		for_each_engine(engine, i915, id) {
1112 			int status;
1113 
1114 			if (IS_ERR_OR_NULL(tsk[id]))
1115 				continue;
1116 
1117 			status = kthread_stop(tsk[id]);
1118 			if (status && !err)
1119 				err = status;
1120 
1121 			put_task_struct(tsk[id]);
1122 		}
1123 
1124 		if (igt_live_test_end(&t))
1125 			err = -EIO;
1126 	}
1127 
1128 	return err;
1129 }
1130 
1131 static int
1132 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1133 {
1134 	struct i915_request *rq;
1135 	int ret;
1136 
1137 	/*
1138 	 * Before execlists, all contexts share the same ringbuffer. With
1139 	 * execlists, each context/engine has a separate ringbuffer and
1140 	 * for the purposes of this test, inexhaustible.
1141 	 *
1142 	 * For the global ringbuffer though, we have to be very careful
1143 	 * that we do not wrap while preventing the execution of requests
1144 	 * with a unsignaled fence.
1145 	 */
1146 	if (HAS_EXECLISTS(ctx->i915))
1147 		return INT_MAX;
1148 
1149 	rq = igt_request_alloc(ctx, engine);
1150 	if (IS_ERR(rq)) {
1151 		ret = PTR_ERR(rq);
1152 	} else {
1153 		int sz;
1154 
1155 		ret = rq->ring->size - rq->reserved_space;
1156 		i915_request_add(rq);
1157 
1158 		sz = rq->ring->emit - rq->head;
1159 		if (sz < 0)
1160 			sz += rq->ring->size;
1161 		ret /= sz;
1162 		ret /= 2; /* leave half spare, in case of emergency! */
1163 	}
1164 
1165 	return ret;
1166 }
1167 
1168 static int live_breadcrumbs_smoketest(void *arg)
1169 {
1170 	struct drm_i915_private *i915 = arg;
1171 	struct smoketest t[I915_NUM_ENGINES];
1172 	unsigned int ncpus = num_online_cpus();
1173 	unsigned long num_waits, num_fences;
1174 	struct intel_engine_cs *engine;
1175 	struct task_struct **threads;
1176 	struct igt_live_test live;
1177 	enum intel_engine_id id;
1178 	intel_wakeref_t wakeref;
1179 	struct drm_file *file;
1180 	unsigned int n;
1181 	int ret = 0;
1182 
1183 	/*
1184 	 * Smoketest our breadcrumb/signal handling for requests across multiple
1185 	 * threads. A very simple test to only catch the most egregious of bugs.
1186 	 * See __igt_breadcrumbs_smoketest();
1187 	 *
1188 	 * On real hardware this time.
1189 	 */
1190 
1191 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1192 
1193 	file = mock_file(i915);
1194 	if (IS_ERR(file)) {
1195 		ret = PTR_ERR(file);
1196 		goto out_rpm;
1197 	}
1198 
1199 	threads = kcalloc(ncpus * I915_NUM_ENGINES,
1200 			  sizeof(*threads),
1201 			  GFP_KERNEL);
1202 	if (!threads) {
1203 		ret = -ENOMEM;
1204 		goto out_file;
1205 	}
1206 
1207 	memset(&t[0], 0, sizeof(t[0]));
1208 	t[0].request_alloc = __live_request_alloc;
1209 	t[0].ncontexts = 64;
1210 	t[0].contexts = kmalloc_array(t[0].ncontexts,
1211 				      sizeof(*t[0].contexts),
1212 				      GFP_KERNEL);
1213 	if (!t[0].contexts) {
1214 		ret = -ENOMEM;
1215 		goto out_threads;
1216 	}
1217 
1218 	for (n = 0; n < t[0].ncontexts; n++) {
1219 		t[0].contexts[n] = live_context(i915, file);
1220 		if (!t[0].contexts[n]) {
1221 			ret = -ENOMEM;
1222 			goto out_contexts;
1223 		}
1224 	}
1225 
1226 	ret = igt_live_test_begin(&live, i915, __func__, "");
1227 	if (ret)
1228 		goto out_contexts;
1229 
1230 	for_each_engine(engine, i915, id) {
1231 		t[id] = t[0];
1232 		t[id].engine = engine;
1233 		t[id].max_batch = max_batches(t[0].contexts[0], engine);
1234 		if (t[id].max_batch < 0) {
1235 			ret = t[id].max_batch;
1236 			goto out_flush;
1237 		}
1238 		/* One ring interleaved between requests from all cpus */
1239 		t[id].max_batch /= num_online_cpus() + 1;
1240 		pr_debug("Limiting batches to %d requests on %s\n",
1241 			 t[id].max_batch, engine->name);
1242 
1243 		for (n = 0; n < ncpus; n++) {
1244 			struct task_struct *tsk;
1245 
1246 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1247 					  &t[id], "igt/%d.%d", id, n);
1248 			if (IS_ERR(tsk)) {
1249 				ret = PTR_ERR(tsk);
1250 				goto out_flush;
1251 			}
1252 
1253 			get_task_struct(tsk);
1254 			threads[id * ncpus + n] = tsk;
1255 		}
1256 	}
1257 
1258 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1259 
1260 out_flush:
1261 	num_waits = 0;
1262 	num_fences = 0;
1263 	for_each_engine(engine, i915, id) {
1264 		for (n = 0; n < ncpus; n++) {
1265 			struct task_struct *tsk = threads[id * ncpus + n];
1266 			int err;
1267 
1268 			if (!tsk)
1269 				continue;
1270 
1271 			err = kthread_stop(tsk);
1272 			if (err < 0 && !ret)
1273 				ret = err;
1274 
1275 			put_task_struct(tsk);
1276 		}
1277 
1278 		num_waits += atomic_long_read(&t[id].num_waits);
1279 		num_fences += atomic_long_read(&t[id].num_fences);
1280 	}
1281 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1282 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1283 
1284 	ret = igt_live_test_end(&live) ?: ret;
1285 out_contexts:
1286 	kfree(t[0].contexts);
1287 out_threads:
1288 	kfree(threads);
1289 out_file:
1290 	mock_file_free(i915, file);
1291 out_rpm:
1292 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1293 
1294 	return ret;
1295 }
1296 
1297 int i915_request_live_selftests(struct drm_i915_private *i915)
1298 {
1299 	static const struct i915_subtest tests[] = {
1300 		SUBTEST(live_nop_request),
1301 		SUBTEST(live_all_engines),
1302 		SUBTEST(live_sequential_engines),
1303 		SUBTEST(live_parallel_engines),
1304 		SUBTEST(live_empty_request),
1305 		SUBTEST(live_breadcrumbs_smoketest),
1306 	};
1307 
1308 	if (intel_gt_is_wedged(&i915->gt))
1309 		return 0;
1310 
1311 	return i915_subtests(tests, i915);
1312 }
1313