1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  */
24 
25 #include <linux/prime_numbers.h>
26 
27 #include "gem/i915_gem_pm.h"
28 #include "gem/selftests/mock_context.h"
29 
30 #include "gt/intel_engine_pm.h"
31 #include "gt/intel_gt.h"
32 
33 #include "i915_random.h"
34 #include "i915_selftest.h"
35 #include "igt_live_test.h"
36 #include "igt_spinner.h"
37 #include "lib_sw_fence.h"
38 
39 #include "mock_drm.h"
40 #include "mock_gem_device.h"
41 
42 static unsigned int num_uabi_engines(struct drm_i915_private *i915)
43 {
44 	struct intel_engine_cs *engine;
45 	unsigned int count;
46 
47 	count = 0;
48 	for_each_uabi_engine(engine, i915)
49 		count++;
50 
51 	return count;
52 }
53 
54 static int igt_add_request(void *arg)
55 {
56 	struct drm_i915_private *i915 = arg;
57 	struct i915_request *request;
58 
59 	/* Basic preliminary test to create a request and let it loose! */
60 
61 	request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10);
62 	if (!request)
63 		return -ENOMEM;
64 
65 	i915_request_add(request);
66 
67 	return 0;
68 }
69 
70 static int igt_wait_request(void *arg)
71 {
72 	const long T = HZ / 4;
73 	struct drm_i915_private *i915 = arg;
74 	struct i915_request *request;
75 	int err = -EINVAL;
76 
77 	/* Submit a request, then wait upon it */
78 
79 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
80 	if (!request)
81 		return -ENOMEM;
82 
83 	i915_request_get(request);
84 
85 	if (i915_request_wait(request, 0, 0) != -ETIME) {
86 		pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n");
87 		goto out_request;
88 	}
89 
90 	if (i915_request_wait(request, 0, T) != -ETIME) {
91 		pr_err("request wait succeeded (expected timeout before submit!)\n");
92 		goto out_request;
93 	}
94 
95 	if (i915_request_completed(request)) {
96 		pr_err("request completed before submit!!\n");
97 		goto out_request;
98 	}
99 
100 	i915_request_add(request);
101 
102 	if (i915_request_wait(request, 0, 0) != -ETIME) {
103 		pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n");
104 		goto out_request;
105 	}
106 
107 	if (i915_request_completed(request)) {
108 		pr_err("request completed immediately!\n");
109 		goto out_request;
110 	}
111 
112 	if (i915_request_wait(request, 0, T / 2) != -ETIME) {
113 		pr_err("request wait succeeded (expected timeout!)\n");
114 		goto out_request;
115 	}
116 
117 	if (i915_request_wait(request, 0, T) == -ETIME) {
118 		pr_err("request wait timed out!\n");
119 		goto out_request;
120 	}
121 
122 	if (!i915_request_completed(request)) {
123 		pr_err("request not complete after waiting!\n");
124 		goto out_request;
125 	}
126 
127 	if (i915_request_wait(request, 0, T) == -ETIME) {
128 		pr_err("request wait timed out when already complete!\n");
129 		goto out_request;
130 	}
131 
132 	err = 0;
133 out_request:
134 	i915_request_put(request);
135 	mock_device_flush(i915);
136 	return err;
137 }
138 
139 static int igt_fence_wait(void *arg)
140 {
141 	const long T = HZ / 4;
142 	struct drm_i915_private *i915 = arg;
143 	struct i915_request *request;
144 	int err = -EINVAL;
145 
146 	/* Submit a request, treat it as a fence and wait upon it */
147 
148 	request = mock_request(i915->engine[RCS0]->kernel_context, T);
149 	if (!request)
150 		return -ENOMEM;
151 
152 	if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) {
153 		pr_err("fence wait success before submit (expected timeout)!\n");
154 		goto out;
155 	}
156 
157 	i915_request_add(request);
158 
159 	if (dma_fence_is_signaled(&request->fence)) {
160 		pr_err("fence signaled immediately!\n");
161 		goto out;
162 	}
163 
164 	if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) {
165 		pr_err("fence wait success after submit (expected timeout)!\n");
166 		goto out;
167 	}
168 
169 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
170 		pr_err("fence wait timed out (expected success)!\n");
171 		goto out;
172 	}
173 
174 	if (!dma_fence_is_signaled(&request->fence)) {
175 		pr_err("fence unsignaled after waiting!\n");
176 		goto out;
177 	}
178 
179 	if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) {
180 		pr_err("fence wait timed out when complete (expected success)!\n");
181 		goto out;
182 	}
183 
184 	err = 0;
185 out:
186 	mock_device_flush(i915);
187 	return err;
188 }
189 
190 static int igt_request_rewind(void *arg)
191 {
192 	struct drm_i915_private *i915 = arg;
193 	struct i915_request *request, *vip;
194 	struct i915_gem_context *ctx[2];
195 	struct intel_context *ce;
196 	int err = -EINVAL;
197 
198 	ctx[0] = mock_context(i915, "A");
199 
200 	ce = i915_gem_context_get_engine(ctx[0], RCS0);
201 	GEM_BUG_ON(IS_ERR(ce));
202 	request = mock_request(ce, 2 * HZ);
203 	intel_context_put(ce);
204 	if (!request) {
205 		err = -ENOMEM;
206 		goto err_context_0;
207 	}
208 
209 	i915_request_get(request);
210 	i915_request_add(request);
211 
212 	ctx[1] = mock_context(i915, "B");
213 
214 	ce = i915_gem_context_get_engine(ctx[1], RCS0);
215 	GEM_BUG_ON(IS_ERR(ce));
216 	vip = mock_request(ce, 0);
217 	intel_context_put(ce);
218 	if (!vip) {
219 		err = -ENOMEM;
220 		goto err_context_1;
221 	}
222 
223 	/* Simulate preemption by manual reordering */
224 	if (!mock_cancel_request(request)) {
225 		pr_err("failed to cancel request (already executed)!\n");
226 		i915_request_add(vip);
227 		goto err_context_1;
228 	}
229 	i915_request_get(vip);
230 	i915_request_add(vip);
231 	rcu_read_lock();
232 	request->engine->submit_request(request);
233 	rcu_read_unlock();
234 
235 
236 	if (i915_request_wait(vip, 0, HZ) == -ETIME) {
237 		pr_err("timed out waiting for high priority request\n");
238 		goto err;
239 	}
240 
241 	if (i915_request_completed(request)) {
242 		pr_err("low priority request already completed\n");
243 		goto err;
244 	}
245 
246 	err = 0;
247 err:
248 	i915_request_put(vip);
249 err_context_1:
250 	mock_context_close(ctx[1]);
251 	i915_request_put(request);
252 err_context_0:
253 	mock_context_close(ctx[0]);
254 	mock_device_flush(i915);
255 	return err;
256 }
257 
258 struct smoketest {
259 	struct intel_engine_cs *engine;
260 	struct i915_gem_context **contexts;
261 	atomic_long_t num_waits, num_fences;
262 	int ncontexts, max_batch;
263 	struct i915_request *(*request_alloc)(struct intel_context *ce);
264 };
265 
266 static struct i915_request *
267 __mock_request_alloc(struct intel_context *ce)
268 {
269 	return mock_request(ce, 0);
270 }
271 
272 static struct i915_request *
273 __live_request_alloc(struct intel_context *ce)
274 {
275 	return intel_context_create_request(ce);
276 }
277 
278 static int __igt_breadcrumbs_smoketest(void *arg)
279 {
280 	struct smoketest *t = arg;
281 	const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1;
282 	const unsigned int total = 4 * t->ncontexts + 1;
283 	unsigned int num_waits = 0, num_fences = 0;
284 	struct i915_request **requests;
285 	I915_RND_STATE(prng);
286 	unsigned int *order;
287 	int err = 0;
288 
289 	/*
290 	 * A very simple test to catch the most egregious of list handling bugs.
291 	 *
292 	 * At its heart, we simply create oodles of requests running across
293 	 * multiple kthreads and enable signaling on them, for the sole purpose
294 	 * of stressing our breadcrumb handling. The only inspection we do is
295 	 * that the fences were marked as signaled.
296 	 */
297 
298 	requests = kcalloc(total, sizeof(*requests), GFP_KERNEL);
299 	if (!requests)
300 		return -ENOMEM;
301 
302 	order = i915_random_order(total, &prng);
303 	if (!order) {
304 		err = -ENOMEM;
305 		goto out_requests;
306 	}
307 
308 	while (!kthread_should_stop()) {
309 		struct i915_sw_fence *submit, *wait;
310 		unsigned int n, count;
311 
312 		submit = heap_fence_create(GFP_KERNEL);
313 		if (!submit) {
314 			err = -ENOMEM;
315 			break;
316 		}
317 
318 		wait = heap_fence_create(GFP_KERNEL);
319 		if (!wait) {
320 			i915_sw_fence_commit(submit);
321 			heap_fence_put(submit);
322 			err = ENOMEM;
323 			break;
324 		}
325 
326 		i915_random_reorder(order, total, &prng);
327 		count = 1 + i915_prandom_u32_max_state(max_batch, &prng);
328 
329 		for (n = 0; n < count; n++) {
330 			struct i915_gem_context *ctx =
331 				t->contexts[order[n] % t->ncontexts];
332 			struct i915_request *rq;
333 			struct intel_context *ce;
334 
335 			ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx);
336 			GEM_BUG_ON(IS_ERR(ce));
337 			rq = t->request_alloc(ce);
338 			intel_context_put(ce);
339 			if (IS_ERR(rq)) {
340 				err = PTR_ERR(rq);
341 				count = n;
342 				break;
343 			}
344 
345 			err = i915_sw_fence_await_sw_fence_gfp(&rq->submit,
346 							       submit,
347 							       GFP_KERNEL);
348 
349 			requests[n] = i915_request_get(rq);
350 			i915_request_add(rq);
351 
352 			if (err >= 0)
353 				err = i915_sw_fence_await_dma_fence(wait,
354 								    &rq->fence,
355 								    0,
356 								    GFP_KERNEL);
357 
358 			if (err < 0) {
359 				i915_request_put(rq);
360 				count = n;
361 				break;
362 			}
363 		}
364 
365 		i915_sw_fence_commit(submit);
366 		i915_sw_fence_commit(wait);
367 
368 		if (!wait_event_timeout(wait->wait,
369 					i915_sw_fence_done(wait),
370 					5 * HZ)) {
371 			struct i915_request *rq = requests[count - 1];
372 
373 			pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n",
374 			       atomic_read(&wait->pending), count,
375 			       rq->fence.context, rq->fence.seqno,
376 			       t->engine->name);
377 			GEM_TRACE_DUMP();
378 
379 			intel_gt_set_wedged(t->engine->gt);
380 			GEM_BUG_ON(!i915_request_completed(rq));
381 			i915_sw_fence_wait(wait);
382 			err = -EIO;
383 		}
384 
385 		for (n = 0; n < count; n++) {
386 			struct i915_request *rq = requests[n];
387 
388 			if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
389 				      &rq->fence.flags)) {
390 				pr_err("%llu:%llu was not signaled!\n",
391 				       rq->fence.context, rq->fence.seqno);
392 				err = -EINVAL;
393 			}
394 
395 			i915_request_put(rq);
396 		}
397 
398 		heap_fence_put(wait);
399 		heap_fence_put(submit);
400 
401 		if (err < 0)
402 			break;
403 
404 		num_fences += count;
405 		num_waits++;
406 
407 		cond_resched();
408 	}
409 
410 	atomic_long_add(num_fences, &t->num_fences);
411 	atomic_long_add(num_waits, &t->num_waits);
412 
413 	kfree(order);
414 out_requests:
415 	kfree(requests);
416 	return err;
417 }
418 
419 static int mock_breadcrumbs_smoketest(void *arg)
420 {
421 	struct drm_i915_private *i915 = arg;
422 	struct smoketest t = {
423 		.engine = i915->engine[RCS0],
424 		.ncontexts = 1024,
425 		.max_batch = 1024,
426 		.request_alloc = __mock_request_alloc
427 	};
428 	unsigned int ncpus = num_online_cpus();
429 	struct task_struct **threads;
430 	unsigned int n;
431 	int ret = 0;
432 
433 	/*
434 	 * Smoketest our breadcrumb/signal handling for requests across multiple
435 	 * threads. A very simple test to only catch the most egregious of bugs.
436 	 * See __igt_breadcrumbs_smoketest();
437 	 */
438 
439 	threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL);
440 	if (!threads)
441 		return -ENOMEM;
442 
443 	t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL);
444 	if (!t.contexts) {
445 		ret = -ENOMEM;
446 		goto out_threads;
447 	}
448 
449 	for (n = 0; n < t.ncontexts; n++) {
450 		t.contexts[n] = mock_context(t.engine->i915, "mock");
451 		if (!t.contexts[n]) {
452 			ret = -ENOMEM;
453 			goto out_contexts;
454 		}
455 	}
456 
457 	for (n = 0; n < ncpus; n++) {
458 		threads[n] = kthread_run(__igt_breadcrumbs_smoketest,
459 					 &t, "igt/%d", n);
460 		if (IS_ERR(threads[n])) {
461 			ret = PTR_ERR(threads[n]);
462 			ncpus = n;
463 			break;
464 		}
465 
466 		get_task_struct(threads[n]);
467 	}
468 
469 	yield(); /* start all threads before we begin */
470 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
471 
472 	for (n = 0; n < ncpus; n++) {
473 		int err;
474 
475 		err = kthread_stop(threads[n]);
476 		if (err < 0 && !ret)
477 			ret = err;
478 
479 		put_task_struct(threads[n]);
480 	}
481 	pr_info("Completed %lu waits for %lu fence across %d cpus\n",
482 		atomic_long_read(&t.num_waits),
483 		atomic_long_read(&t.num_fences),
484 		ncpus);
485 
486 out_contexts:
487 	for (n = 0; n < t.ncontexts; n++) {
488 		if (!t.contexts[n])
489 			break;
490 		mock_context_close(t.contexts[n]);
491 	}
492 	kfree(t.contexts);
493 out_threads:
494 	kfree(threads);
495 	return ret;
496 }
497 
498 int i915_request_mock_selftests(void)
499 {
500 	static const struct i915_subtest tests[] = {
501 		SUBTEST(igt_add_request),
502 		SUBTEST(igt_wait_request),
503 		SUBTEST(igt_fence_wait),
504 		SUBTEST(igt_request_rewind),
505 		SUBTEST(mock_breadcrumbs_smoketest),
506 	};
507 	struct drm_i915_private *i915;
508 	intel_wakeref_t wakeref;
509 	int err = 0;
510 
511 	i915 = mock_gem_device();
512 	if (!i915)
513 		return -ENOMEM;
514 
515 	with_intel_runtime_pm(&i915->runtime_pm, wakeref)
516 		err = i915_subtests(tests, i915);
517 
518 	drm_dev_put(&i915->drm);
519 
520 	return err;
521 }
522 
523 static int live_nop_request(void *arg)
524 {
525 	struct drm_i915_private *i915 = arg;
526 	struct intel_engine_cs *engine;
527 	struct igt_live_test t;
528 	int err = -ENODEV;
529 
530 	/*
531 	 * Submit various sized batches of empty requests, to each engine
532 	 * (individually), and wait for the batch to complete. We can check
533 	 * the overhead of submitting requests to the hardware.
534 	 */
535 
536 	for_each_uabi_engine(engine, i915) {
537 		unsigned long n, prime;
538 		IGT_TIMEOUT(end_time);
539 		ktime_t times[2] = {};
540 
541 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
542 		if (err)
543 			return err;
544 
545 		intel_engine_pm_get(engine);
546 		for_each_prime_number_from(prime, 1, 8192) {
547 			struct i915_request *request = NULL;
548 
549 			times[1] = ktime_get_raw();
550 
551 			for (n = 0; n < prime; n++) {
552 				i915_request_put(request);
553 				request = i915_request_create(engine->kernel_context);
554 				if (IS_ERR(request))
555 					return PTR_ERR(request);
556 
557 				/*
558 				 * This space is left intentionally blank.
559 				 *
560 				 * We do not actually want to perform any
561 				 * action with this request, we just want
562 				 * to measure the latency in allocation
563 				 * and submission of our breadcrumbs -
564 				 * ensuring that the bare request is sufficient
565 				 * for the system to work (i.e. proper HEAD
566 				 * tracking of the rings, interrupt handling,
567 				 * etc). It also gives us the lowest bounds
568 				 * for latency.
569 				 */
570 
571 				i915_request_get(request);
572 				i915_request_add(request);
573 			}
574 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
575 			i915_request_put(request);
576 
577 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
578 			if (prime == 1)
579 				times[0] = times[1];
580 
581 			if (__igt_timeout(end_time, NULL))
582 				break;
583 		}
584 		intel_engine_pm_put(engine);
585 
586 		err = igt_live_test_end(&t);
587 		if (err)
588 			return err;
589 
590 		pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n",
591 			engine->name,
592 			ktime_to_ns(times[0]),
593 			prime, div64_u64(ktime_to_ns(times[1]), prime));
594 	}
595 
596 	return err;
597 }
598 
599 static struct i915_vma *empty_batch(struct drm_i915_private *i915)
600 {
601 	struct drm_i915_gem_object *obj;
602 	struct i915_vma *vma;
603 	u32 *cmd;
604 	int err;
605 
606 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
607 	if (IS_ERR(obj))
608 		return ERR_CAST(obj);
609 
610 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
611 	if (IS_ERR(cmd)) {
612 		err = PTR_ERR(cmd);
613 		goto err;
614 	}
615 
616 	*cmd = MI_BATCH_BUFFER_END;
617 
618 	__i915_gem_object_flush_map(obj, 0, 64);
619 	i915_gem_object_unpin_map(obj);
620 
621 	intel_gt_chipset_flush(&i915->gt);
622 
623 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
624 	if (IS_ERR(vma)) {
625 		err = PTR_ERR(vma);
626 		goto err;
627 	}
628 
629 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL);
630 	if (err)
631 		goto err;
632 
633 	/* Force the wait wait now to avoid including it in the benchmark */
634 	err = i915_vma_sync(vma);
635 	if (err)
636 		goto err_pin;
637 
638 	return vma;
639 
640 err_pin:
641 	i915_vma_unpin(vma);
642 err:
643 	i915_gem_object_put(obj);
644 	return ERR_PTR(err);
645 }
646 
647 static struct i915_request *
648 empty_request(struct intel_engine_cs *engine,
649 	      struct i915_vma *batch)
650 {
651 	struct i915_request *request;
652 	int err;
653 
654 	request = i915_request_create(engine->kernel_context);
655 	if (IS_ERR(request))
656 		return request;
657 
658 	err = engine->emit_bb_start(request,
659 				    batch->node.start,
660 				    batch->node.size,
661 				    I915_DISPATCH_SECURE);
662 	if (err)
663 		goto out_request;
664 
665 	i915_request_get(request);
666 out_request:
667 	i915_request_add(request);
668 	return err ? ERR_PTR(err) : request;
669 }
670 
671 static int live_empty_request(void *arg)
672 {
673 	struct drm_i915_private *i915 = arg;
674 	struct intel_engine_cs *engine;
675 	struct igt_live_test t;
676 	struct i915_vma *batch;
677 	int err = 0;
678 
679 	/*
680 	 * Submit various sized batches of empty requests, to each engine
681 	 * (individually), and wait for the batch to complete. We can check
682 	 * the overhead of submitting requests to the hardware.
683 	 */
684 
685 	batch = empty_batch(i915);
686 	if (IS_ERR(batch))
687 		return PTR_ERR(batch);
688 
689 	for_each_uabi_engine(engine, i915) {
690 		IGT_TIMEOUT(end_time);
691 		struct i915_request *request;
692 		unsigned long n, prime;
693 		ktime_t times[2] = {};
694 
695 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
696 		if (err)
697 			goto out_batch;
698 
699 		intel_engine_pm_get(engine);
700 
701 		/* Warmup / preload */
702 		request = empty_request(engine, batch);
703 		if (IS_ERR(request)) {
704 			err = PTR_ERR(request);
705 			intel_engine_pm_put(engine);
706 			goto out_batch;
707 		}
708 		i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
709 
710 		for_each_prime_number_from(prime, 1, 8192) {
711 			times[1] = ktime_get_raw();
712 
713 			for (n = 0; n < prime; n++) {
714 				i915_request_put(request);
715 				request = empty_request(engine, batch);
716 				if (IS_ERR(request)) {
717 					err = PTR_ERR(request);
718 					intel_engine_pm_put(engine);
719 					goto out_batch;
720 				}
721 			}
722 			i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT);
723 
724 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
725 			if (prime == 1)
726 				times[0] = times[1];
727 
728 			if (__igt_timeout(end_time, NULL))
729 				break;
730 		}
731 		i915_request_put(request);
732 		intel_engine_pm_put(engine);
733 
734 		err = igt_live_test_end(&t);
735 		if (err)
736 			goto out_batch;
737 
738 		pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n",
739 			engine->name,
740 			ktime_to_ns(times[0]),
741 			prime, div64_u64(ktime_to_ns(times[1]), prime));
742 	}
743 
744 out_batch:
745 	i915_vma_unpin(batch);
746 	i915_vma_put(batch);
747 	return err;
748 }
749 
750 static struct i915_vma *recursive_batch(struct drm_i915_private *i915)
751 {
752 	struct drm_i915_gem_object *obj;
753 	const int gen = INTEL_GEN(i915);
754 	struct i915_vma *vma;
755 	u32 *cmd;
756 	int err;
757 
758 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
759 	if (IS_ERR(obj))
760 		return ERR_CAST(obj);
761 
762 	vma = i915_vma_instance(obj, i915->gt.vm, NULL);
763 	if (IS_ERR(vma)) {
764 		err = PTR_ERR(vma);
765 		goto err;
766 	}
767 
768 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
769 	if (err)
770 		goto err;
771 
772 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WC);
773 	if (IS_ERR(cmd)) {
774 		err = PTR_ERR(cmd);
775 		goto err;
776 	}
777 
778 	if (gen >= 8) {
779 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
780 		*cmd++ = lower_32_bits(vma->node.start);
781 		*cmd++ = upper_32_bits(vma->node.start);
782 	} else if (gen >= 6) {
783 		*cmd++ = MI_BATCH_BUFFER_START | 1 << 8;
784 		*cmd++ = lower_32_bits(vma->node.start);
785 	} else {
786 		*cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT;
787 		*cmd++ = lower_32_bits(vma->node.start);
788 	}
789 	*cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */
790 
791 	__i915_gem_object_flush_map(obj, 0, 64);
792 	i915_gem_object_unpin_map(obj);
793 
794 	intel_gt_chipset_flush(&i915->gt);
795 
796 	return vma;
797 
798 err:
799 	i915_gem_object_put(obj);
800 	return ERR_PTR(err);
801 }
802 
803 static int recursive_batch_resolve(struct i915_vma *batch)
804 {
805 	u32 *cmd;
806 
807 	cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC);
808 	if (IS_ERR(cmd))
809 		return PTR_ERR(cmd);
810 
811 	*cmd = MI_BATCH_BUFFER_END;
812 	intel_gt_chipset_flush(batch->vm->gt);
813 
814 	i915_gem_object_unpin_map(batch->obj);
815 
816 	return 0;
817 }
818 
819 static int live_all_engines(void *arg)
820 {
821 	struct drm_i915_private *i915 = arg;
822 	const unsigned int nengines = num_uabi_engines(i915);
823 	struct intel_engine_cs *engine;
824 	struct i915_request **request;
825 	struct igt_live_test t;
826 	struct i915_vma *batch;
827 	unsigned int idx;
828 	int err;
829 
830 	/*
831 	 * Check we can submit requests to all engines simultaneously. We
832 	 * send a recursive batch to each engine - checking that we don't
833 	 * block doing so, and that they don't complete too soon.
834 	 */
835 
836 	request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
837 	if (!request)
838 		return -ENOMEM;
839 
840 	err = igt_live_test_begin(&t, i915, __func__, "");
841 	if (err)
842 		goto out_free;
843 
844 	batch = recursive_batch(i915);
845 	if (IS_ERR(batch)) {
846 		err = PTR_ERR(batch);
847 		pr_err("%s: Unable to create batch, err=%d\n", __func__, err);
848 		goto out_free;
849 	}
850 
851 	idx = 0;
852 	for_each_uabi_engine(engine, i915) {
853 		request[idx] = intel_engine_create_kernel_request(engine);
854 		if (IS_ERR(request[idx])) {
855 			err = PTR_ERR(request[idx]);
856 			pr_err("%s: Request allocation failed with err=%d\n",
857 			       __func__, err);
858 			goto out_request;
859 		}
860 
861 		err = engine->emit_bb_start(request[idx],
862 					    batch->node.start,
863 					    batch->node.size,
864 					    0);
865 		GEM_BUG_ON(err);
866 		request[idx]->batch = batch;
867 
868 		i915_vma_lock(batch);
869 		err = i915_request_await_object(request[idx], batch->obj, 0);
870 		if (err == 0)
871 			err = i915_vma_move_to_active(batch, request[idx], 0);
872 		i915_vma_unlock(batch);
873 		GEM_BUG_ON(err);
874 
875 		i915_request_get(request[idx]);
876 		i915_request_add(request[idx]);
877 		idx++;
878 	}
879 
880 	idx = 0;
881 	for_each_uabi_engine(engine, i915) {
882 		if (i915_request_completed(request[idx])) {
883 			pr_err("%s(%s): request completed too early!\n",
884 			       __func__, engine->name);
885 			err = -EINVAL;
886 			goto out_request;
887 		}
888 		idx++;
889 	}
890 
891 	err = recursive_batch_resolve(batch);
892 	if (err) {
893 		pr_err("%s: failed to resolve batch, err=%d\n", __func__, err);
894 		goto out_request;
895 	}
896 
897 	idx = 0;
898 	for_each_uabi_engine(engine, i915) {
899 		long timeout;
900 
901 		timeout = i915_request_wait(request[idx], 0,
902 					    MAX_SCHEDULE_TIMEOUT);
903 		if (timeout < 0) {
904 			err = timeout;
905 			pr_err("%s: error waiting for request on %s, err=%d\n",
906 			       __func__, engine->name, err);
907 			goto out_request;
908 		}
909 
910 		GEM_BUG_ON(!i915_request_completed(request[idx]));
911 		i915_request_put(request[idx]);
912 		request[idx] = NULL;
913 		idx++;
914 	}
915 
916 	err = igt_live_test_end(&t);
917 
918 out_request:
919 	idx = 0;
920 	for_each_uabi_engine(engine, i915) {
921 		if (request[idx])
922 			i915_request_put(request[idx]);
923 		idx++;
924 	}
925 	i915_vma_unpin(batch);
926 	i915_vma_put(batch);
927 out_free:
928 	kfree(request);
929 	return err;
930 }
931 
932 static int live_sequential_engines(void *arg)
933 {
934 	struct drm_i915_private *i915 = arg;
935 	const unsigned int nengines = num_uabi_engines(i915);
936 	struct i915_request **request;
937 	struct i915_request *prev = NULL;
938 	struct intel_engine_cs *engine;
939 	struct igt_live_test t;
940 	unsigned int idx;
941 	int err;
942 
943 	/*
944 	 * Check we can submit requests to all engines sequentially, such
945 	 * that each successive request waits for the earlier ones. This
946 	 * tests that we don't execute requests out of order, even though
947 	 * they are running on independent engines.
948 	 */
949 
950 	request = kcalloc(nengines, sizeof(*request), GFP_KERNEL);
951 	if (!request)
952 		return -ENOMEM;
953 
954 	err = igt_live_test_begin(&t, i915, __func__, "");
955 	if (err)
956 		goto out_free;
957 
958 	idx = 0;
959 	for_each_uabi_engine(engine, i915) {
960 		struct i915_vma *batch;
961 
962 		batch = recursive_batch(i915);
963 		if (IS_ERR(batch)) {
964 			err = PTR_ERR(batch);
965 			pr_err("%s: Unable to create batch for %s, err=%d\n",
966 			       __func__, engine->name, err);
967 			goto out_free;
968 		}
969 
970 		request[idx] = intel_engine_create_kernel_request(engine);
971 		if (IS_ERR(request[idx])) {
972 			err = PTR_ERR(request[idx]);
973 			pr_err("%s: Request allocation failed for %s with err=%d\n",
974 			       __func__, engine->name, err);
975 			goto out_request;
976 		}
977 
978 		if (prev) {
979 			err = i915_request_await_dma_fence(request[idx],
980 							   &prev->fence);
981 			if (err) {
982 				i915_request_add(request[idx]);
983 				pr_err("%s: Request await failed for %s with err=%d\n",
984 				       __func__, engine->name, err);
985 				goto out_request;
986 			}
987 		}
988 
989 		err = engine->emit_bb_start(request[idx],
990 					    batch->node.start,
991 					    batch->node.size,
992 					    0);
993 		GEM_BUG_ON(err);
994 		request[idx]->batch = batch;
995 
996 		i915_vma_lock(batch);
997 		err = i915_request_await_object(request[idx],
998 						batch->obj, false);
999 		if (err == 0)
1000 			err = i915_vma_move_to_active(batch, request[idx], 0);
1001 		i915_vma_unlock(batch);
1002 		GEM_BUG_ON(err);
1003 
1004 		i915_request_get(request[idx]);
1005 		i915_request_add(request[idx]);
1006 
1007 		prev = request[idx];
1008 		idx++;
1009 	}
1010 
1011 	idx = 0;
1012 	for_each_uabi_engine(engine, i915) {
1013 		long timeout;
1014 
1015 		if (i915_request_completed(request[idx])) {
1016 			pr_err("%s(%s): request completed too early!\n",
1017 			       __func__, engine->name);
1018 			err = -EINVAL;
1019 			goto out_request;
1020 		}
1021 
1022 		err = recursive_batch_resolve(request[idx]->batch);
1023 		if (err) {
1024 			pr_err("%s: failed to resolve batch, err=%d\n",
1025 			       __func__, err);
1026 			goto out_request;
1027 		}
1028 
1029 		timeout = i915_request_wait(request[idx], 0,
1030 					    MAX_SCHEDULE_TIMEOUT);
1031 		if (timeout < 0) {
1032 			err = timeout;
1033 			pr_err("%s: error waiting for request on %s, err=%d\n",
1034 			       __func__, engine->name, err);
1035 			goto out_request;
1036 		}
1037 
1038 		GEM_BUG_ON(!i915_request_completed(request[idx]));
1039 		idx++;
1040 	}
1041 
1042 	err = igt_live_test_end(&t);
1043 
1044 out_request:
1045 	idx = 0;
1046 	for_each_uabi_engine(engine, i915) {
1047 		u32 *cmd;
1048 
1049 		if (!request[idx])
1050 			break;
1051 
1052 		cmd = i915_gem_object_pin_map(request[idx]->batch->obj,
1053 					      I915_MAP_WC);
1054 		if (!IS_ERR(cmd)) {
1055 			*cmd = MI_BATCH_BUFFER_END;
1056 			intel_gt_chipset_flush(engine->gt);
1057 
1058 			i915_gem_object_unpin_map(request[idx]->batch->obj);
1059 		}
1060 
1061 		i915_vma_put(request[idx]->batch);
1062 		i915_request_put(request[idx]);
1063 		idx++;
1064 	}
1065 out_free:
1066 	kfree(request);
1067 	return err;
1068 }
1069 
1070 static int __live_parallel_engine1(void *arg)
1071 {
1072 	struct intel_engine_cs *engine = arg;
1073 	IGT_TIMEOUT(end_time);
1074 	unsigned long count;
1075 	int err = 0;
1076 
1077 	count = 0;
1078 	intel_engine_pm_get(engine);
1079 	do {
1080 		struct i915_request *rq;
1081 
1082 		rq = i915_request_create(engine->kernel_context);
1083 		if (IS_ERR(rq)) {
1084 			err = PTR_ERR(rq);
1085 			break;
1086 		}
1087 
1088 		i915_request_get(rq);
1089 		i915_request_add(rq);
1090 
1091 		err = 0;
1092 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
1093 			err = -ETIME;
1094 		i915_request_put(rq);
1095 		if (err)
1096 			break;
1097 
1098 		count++;
1099 	} while (!__igt_timeout(end_time, NULL));
1100 	intel_engine_pm_put(engine);
1101 
1102 	pr_info("%s: %lu request + sync\n", engine->name, count);
1103 	return err;
1104 }
1105 
1106 static int __live_parallel_engineN(void *arg)
1107 {
1108 	struct intel_engine_cs *engine = arg;
1109 	IGT_TIMEOUT(end_time);
1110 	unsigned long count;
1111 	int err = 0;
1112 
1113 	count = 0;
1114 	intel_engine_pm_get(engine);
1115 	do {
1116 		struct i915_request *rq;
1117 
1118 		rq = i915_request_create(engine->kernel_context);
1119 		if (IS_ERR(rq)) {
1120 			err = PTR_ERR(rq);
1121 			break;
1122 		}
1123 
1124 		i915_request_add(rq);
1125 		count++;
1126 	} while (!__igt_timeout(end_time, NULL));
1127 	intel_engine_pm_put(engine);
1128 
1129 	pr_info("%s: %lu requests\n", engine->name, count);
1130 	return err;
1131 }
1132 
1133 static bool wake_all(struct drm_i915_private *i915)
1134 {
1135 	if (atomic_dec_and_test(&i915->selftest.counter)) {
1136 		wake_up_var(&i915->selftest.counter);
1137 		return true;
1138 	}
1139 
1140 	return false;
1141 }
1142 
1143 static int wait_for_all(struct drm_i915_private *i915)
1144 {
1145 	if (wake_all(i915))
1146 		return 0;
1147 
1148 	if (wait_var_event_timeout(&i915->selftest.counter,
1149 				   !atomic_read(&i915->selftest.counter),
1150 				   i915_selftest.timeout_jiffies))
1151 		return 0;
1152 
1153 	return -ETIME;
1154 }
1155 
1156 static int __live_parallel_spin(void *arg)
1157 {
1158 	struct intel_engine_cs *engine = arg;
1159 	struct igt_spinner spin;
1160 	struct i915_request *rq;
1161 	int err = 0;
1162 
1163 	/*
1164 	 * Create a spinner running for eternity on each engine. If a second
1165 	 * spinner is incorrectly placed on the same engine, it will not be
1166 	 * able to start in time.
1167 	 */
1168 
1169 	if (igt_spinner_init(&spin, engine->gt)) {
1170 		wake_all(engine->i915);
1171 		return -ENOMEM;
1172 	}
1173 
1174 	intel_engine_pm_get(engine);
1175 	rq = igt_spinner_create_request(&spin,
1176 					engine->kernel_context,
1177 					MI_NOOP); /* no preemption */
1178 	intel_engine_pm_put(engine);
1179 	if (IS_ERR(rq)) {
1180 		err = PTR_ERR(rq);
1181 		if (err == -ENODEV)
1182 			err = 0;
1183 		wake_all(engine->i915);
1184 		goto out_spin;
1185 	}
1186 
1187 	i915_request_get(rq);
1188 	i915_request_add(rq);
1189 	if (igt_wait_for_spinner(&spin, rq)) {
1190 		/* Occupy this engine for the whole test */
1191 		err = wait_for_all(engine->i915);
1192 	} else {
1193 		pr_err("Failed to start spinner on %s\n", engine->name);
1194 		err = -EINVAL;
1195 	}
1196 	igt_spinner_end(&spin);
1197 
1198 	if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0)
1199 		err = -EIO;
1200 	i915_request_put(rq);
1201 
1202 out_spin:
1203 	igt_spinner_fini(&spin);
1204 	return err;
1205 }
1206 
1207 static int live_parallel_engines(void *arg)
1208 {
1209 	struct drm_i915_private *i915 = arg;
1210 	static int (* const func[])(void *arg) = {
1211 		__live_parallel_engine1,
1212 		__live_parallel_engineN,
1213 		__live_parallel_spin,
1214 		NULL,
1215 	};
1216 	const unsigned int nengines = num_uabi_engines(i915);
1217 	struct intel_engine_cs *engine;
1218 	int (* const *fn)(void *arg);
1219 	struct task_struct **tsk;
1220 	int err = 0;
1221 
1222 	/*
1223 	 * Check we can submit requests to all engines concurrently. This
1224 	 * tests that we load up the system maximally.
1225 	 */
1226 
1227 	tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL);
1228 	if (!tsk)
1229 		return -ENOMEM;
1230 
1231 	for (fn = func; !err && *fn; fn++) {
1232 		char name[KSYM_NAME_LEN];
1233 		struct igt_live_test t;
1234 		unsigned int idx;
1235 
1236 		snprintf(name, sizeof(name), "%pS", fn);
1237 		err = igt_live_test_begin(&t, i915, __func__, name);
1238 		if (err)
1239 			break;
1240 
1241 		atomic_set(&i915->selftest.counter, nengines);
1242 
1243 		idx = 0;
1244 		for_each_uabi_engine(engine, i915) {
1245 			tsk[idx] = kthread_run(*fn, engine,
1246 					       "igt/parallel:%s",
1247 					       engine->name);
1248 			if (IS_ERR(tsk[idx])) {
1249 				err = PTR_ERR(tsk[idx]);
1250 				break;
1251 			}
1252 			get_task_struct(tsk[idx++]);
1253 		}
1254 
1255 		yield(); /* start all threads before we kthread_stop() */
1256 
1257 		idx = 0;
1258 		for_each_uabi_engine(engine, i915) {
1259 			int status;
1260 
1261 			if (IS_ERR(tsk[idx]))
1262 				break;
1263 
1264 			status = kthread_stop(tsk[idx]);
1265 			if (status && !err)
1266 				err = status;
1267 
1268 			put_task_struct(tsk[idx++]);
1269 		}
1270 
1271 		if (igt_live_test_end(&t))
1272 			err = -EIO;
1273 	}
1274 
1275 	kfree(tsk);
1276 	return err;
1277 }
1278 
1279 static int
1280 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine)
1281 {
1282 	struct i915_request *rq;
1283 	int ret;
1284 
1285 	/*
1286 	 * Before execlists, all contexts share the same ringbuffer. With
1287 	 * execlists, each context/engine has a separate ringbuffer and
1288 	 * for the purposes of this test, inexhaustible.
1289 	 *
1290 	 * For the global ringbuffer though, we have to be very careful
1291 	 * that we do not wrap while preventing the execution of requests
1292 	 * with a unsignaled fence.
1293 	 */
1294 	if (HAS_EXECLISTS(ctx->i915))
1295 		return INT_MAX;
1296 
1297 	rq = igt_request_alloc(ctx, engine);
1298 	if (IS_ERR(rq)) {
1299 		ret = PTR_ERR(rq);
1300 	} else {
1301 		int sz;
1302 
1303 		ret = rq->ring->size - rq->reserved_space;
1304 		i915_request_add(rq);
1305 
1306 		sz = rq->ring->emit - rq->head;
1307 		if (sz < 0)
1308 			sz += rq->ring->size;
1309 		ret /= sz;
1310 		ret /= 2; /* leave half spare, in case of emergency! */
1311 	}
1312 
1313 	return ret;
1314 }
1315 
1316 static int live_breadcrumbs_smoketest(void *arg)
1317 {
1318 	struct drm_i915_private *i915 = arg;
1319 	const unsigned int nengines = num_uabi_engines(i915);
1320 	const unsigned int ncpus = num_online_cpus();
1321 	unsigned long num_waits, num_fences;
1322 	struct intel_engine_cs *engine;
1323 	struct task_struct **threads;
1324 	struct igt_live_test live;
1325 	intel_wakeref_t wakeref;
1326 	struct smoketest *smoke;
1327 	unsigned int n, idx;
1328 	struct file *file;
1329 	int ret = 0;
1330 
1331 	/*
1332 	 * Smoketest our breadcrumb/signal handling for requests across multiple
1333 	 * threads. A very simple test to only catch the most egregious of bugs.
1334 	 * See __igt_breadcrumbs_smoketest();
1335 	 *
1336 	 * On real hardware this time.
1337 	 */
1338 
1339 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
1340 
1341 	file = mock_file(i915);
1342 	if (IS_ERR(file)) {
1343 		ret = PTR_ERR(file);
1344 		goto out_rpm;
1345 	}
1346 
1347 	smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL);
1348 	if (!smoke) {
1349 		ret = -ENOMEM;
1350 		goto out_file;
1351 	}
1352 
1353 	threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL);
1354 	if (!threads) {
1355 		ret = -ENOMEM;
1356 		goto out_smoke;
1357 	}
1358 
1359 	smoke[0].request_alloc = __live_request_alloc;
1360 	smoke[0].ncontexts = 64;
1361 	smoke[0].contexts = kcalloc(smoke[0].ncontexts,
1362 				    sizeof(*smoke[0].contexts),
1363 				    GFP_KERNEL);
1364 	if (!smoke[0].contexts) {
1365 		ret = -ENOMEM;
1366 		goto out_threads;
1367 	}
1368 
1369 	for (n = 0; n < smoke[0].ncontexts; n++) {
1370 		smoke[0].contexts[n] = live_context(i915, file);
1371 		if (!smoke[0].contexts[n]) {
1372 			ret = -ENOMEM;
1373 			goto out_contexts;
1374 		}
1375 	}
1376 
1377 	ret = igt_live_test_begin(&live, i915, __func__, "");
1378 	if (ret)
1379 		goto out_contexts;
1380 
1381 	idx = 0;
1382 	for_each_uabi_engine(engine, i915) {
1383 		smoke[idx] = smoke[0];
1384 		smoke[idx].engine = engine;
1385 		smoke[idx].max_batch =
1386 			max_batches(smoke[0].contexts[0], engine);
1387 		if (smoke[idx].max_batch < 0) {
1388 			ret = smoke[idx].max_batch;
1389 			goto out_flush;
1390 		}
1391 		/* One ring interleaved between requests from all cpus */
1392 		smoke[idx].max_batch /= num_online_cpus() + 1;
1393 		pr_debug("Limiting batches to %d requests on %s\n",
1394 			 smoke[idx].max_batch, engine->name);
1395 
1396 		for (n = 0; n < ncpus; n++) {
1397 			struct task_struct *tsk;
1398 
1399 			tsk = kthread_run(__igt_breadcrumbs_smoketest,
1400 					  &smoke[idx], "igt/%d.%d", idx, n);
1401 			if (IS_ERR(tsk)) {
1402 				ret = PTR_ERR(tsk);
1403 				goto out_flush;
1404 			}
1405 
1406 			get_task_struct(tsk);
1407 			threads[idx * ncpus + n] = tsk;
1408 		}
1409 
1410 		idx++;
1411 	}
1412 
1413 	yield(); /* start all threads before we begin */
1414 	msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies));
1415 
1416 out_flush:
1417 	idx = 0;
1418 	num_waits = 0;
1419 	num_fences = 0;
1420 	for_each_uabi_engine(engine, i915) {
1421 		for (n = 0; n < ncpus; n++) {
1422 			struct task_struct *tsk = threads[idx * ncpus + n];
1423 			int err;
1424 
1425 			if (!tsk)
1426 				continue;
1427 
1428 			err = kthread_stop(tsk);
1429 			if (err < 0 && !ret)
1430 				ret = err;
1431 
1432 			put_task_struct(tsk);
1433 		}
1434 
1435 		num_waits += atomic_long_read(&smoke[idx].num_waits);
1436 		num_fences += atomic_long_read(&smoke[idx].num_fences);
1437 		idx++;
1438 	}
1439 	pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n",
1440 		num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus);
1441 
1442 	ret = igt_live_test_end(&live) ?: ret;
1443 out_contexts:
1444 	kfree(smoke[0].contexts);
1445 out_threads:
1446 	kfree(threads);
1447 out_smoke:
1448 	kfree(smoke);
1449 out_file:
1450 	fput(file);
1451 out_rpm:
1452 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
1453 
1454 	return ret;
1455 }
1456 
1457 int i915_request_live_selftests(struct drm_i915_private *i915)
1458 {
1459 	static const struct i915_subtest tests[] = {
1460 		SUBTEST(live_nop_request),
1461 		SUBTEST(live_all_engines),
1462 		SUBTEST(live_sequential_engines),
1463 		SUBTEST(live_parallel_engines),
1464 		SUBTEST(live_empty_request),
1465 		SUBTEST(live_breadcrumbs_smoketest),
1466 	};
1467 
1468 	if (intel_gt_is_wedged(&i915->gt))
1469 		return 0;
1470 
1471 	return i915_subtests(tests, i915);
1472 }
1473