1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_gt.h"
11 #include "gt/intel_gt_requests.h"
12 #include "gt/intel_reset.h"
13 #include "i915_selftest.h"
14 
15 #include "gem/selftests/igt_gem_utils.h"
16 #include "selftests/i915_random.h"
17 #include "selftests/igt_flush_test.h"
18 #include "selftests/igt_live_test.h"
19 #include "selftests/igt_reset.h"
20 #include "selftests/igt_spinner.h"
21 #include "selftests/mock_drm.h"
22 #include "selftests/mock_gem_device.h"
23 
24 #include "huge_gem_object.h"
25 #include "igt_gem_utils.h"
26 
27 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
28 
29 static int live_nop_switch(void *arg)
30 {
31 	const unsigned int nctx = 1024;
32 	struct drm_i915_private *i915 = arg;
33 	struct intel_engine_cs *engine;
34 	struct i915_gem_context **ctx;
35 	struct igt_live_test t;
36 	struct drm_file *file;
37 	unsigned long n;
38 	int err = -ENODEV;
39 
40 	/*
41 	 * Create as many contexts as we can feasibly get away with
42 	 * and check we can switch between them rapidly.
43 	 *
44 	 * Serves as very simple stress test for submission and HW switching
45 	 * between contexts.
46 	 */
47 
48 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
49 		return 0;
50 
51 	file = mock_file(i915);
52 	if (IS_ERR(file))
53 		return PTR_ERR(file);
54 
55 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
56 	if (!ctx) {
57 		err = -ENOMEM;
58 		goto out_file;
59 	}
60 
61 	for (n = 0; n < nctx; n++) {
62 		ctx[n] = live_context(i915, file);
63 		if (IS_ERR(ctx[n])) {
64 			err = PTR_ERR(ctx[n]);
65 			goto out_file;
66 		}
67 	}
68 
69 	for_each_uabi_engine(engine, i915) {
70 		struct i915_request *rq;
71 		unsigned long end_time, prime;
72 		ktime_t times[2] = {};
73 
74 		times[0] = ktime_get_raw();
75 		for (n = 0; n < nctx; n++) {
76 			rq = igt_request_alloc(ctx[n], engine);
77 			if (IS_ERR(rq)) {
78 				err = PTR_ERR(rq);
79 				goto out_file;
80 			}
81 			i915_request_add(rq);
82 		}
83 		if (i915_request_wait(rq, 0, HZ / 5) < 0) {
84 			pr_err("Failed to populated %d contexts\n", nctx);
85 			intel_gt_set_wedged(&i915->gt);
86 			err = -EIO;
87 			goto out_file;
88 		}
89 
90 		times[1] = ktime_get_raw();
91 
92 		pr_info("Populated %d contexts on %s in %lluns\n",
93 			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
94 
95 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
96 		if (err)
97 			goto out_file;
98 
99 		end_time = jiffies + i915_selftest.timeout_jiffies;
100 		for_each_prime_number_from(prime, 2, 8192) {
101 			times[1] = ktime_get_raw();
102 
103 			for (n = 0; n < prime; n++) {
104 				rq = igt_request_alloc(ctx[n % nctx], engine);
105 				if (IS_ERR(rq)) {
106 					err = PTR_ERR(rq);
107 					goto out_file;
108 				}
109 
110 				/*
111 				 * This space is left intentionally blank.
112 				 *
113 				 * We do not actually want to perform any
114 				 * action with this request, we just want
115 				 * to measure the latency in allocation
116 				 * and submission of our breadcrumbs -
117 				 * ensuring that the bare request is sufficient
118 				 * for the system to work (i.e. proper HEAD
119 				 * tracking of the rings, interrupt handling,
120 				 * etc). It also gives us the lowest bounds
121 				 * for latency.
122 				 */
123 
124 				i915_request_add(rq);
125 			}
126 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
127 				pr_err("Switching between %ld contexts timed out\n",
128 				       prime);
129 				intel_gt_set_wedged(&i915->gt);
130 				break;
131 			}
132 
133 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
134 			if (prime == 2)
135 				times[0] = times[1];
136 
137 			if (__igt_timeout(end_time, NULL))
138 				break;
139 		}
140 
141 		err = igt_live_test_end(&t);
142 		if (err)
143 			goto out_file;
144 
145 		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
146 			engine->name,
147 			ktime_to_ns(times[0]),
148 			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
149 	}
150 
151 out_file:
152 	mock_file_free(i915, file);
153 	return err;
154 }
155 
156 struct parallel_switch {
157 	struct task_struct *tsk;
158 	struct intel_context *ce[2];
159 };
160 
161 static int __live_parallel_switch1(void *data)
162 {
163 	struct parallel_switch *arg = data;
164 	IGT_TIMEOUT(end_time);
165 	unsigned long count;
166 
167 	count = 0;
168 	do {
169 		struct i915_request *rq = NULL;
170 		int err, n;
171 
172 		err = 0;
173 		for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
174 			struct i915_request *prev = rq;
175 
176 			rq = i915_request_create(arg->ce[n]);
177 			if (IS_ERR(rq)) {
178 				i915_request_put(prev);
179 				return PTR_ERR(rq);
180 			}
181 
182 			i915_request_get(rq);
183 			if (prev) {
184 				err = i915_request_await_dma_fence(rq, &prev->fence);
185 				i915_request_put(prev);
186 			}
187 
188 			i915_request_add(rq);
189 		}
190 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
191 			err = -ETIME;
192 		i915_request_put(rq);
193 		if (err)
194 			return err;
195 
196 		count++;
197 	} while (!__igt_timeout(end_time, NULL));
198 
199 	pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
200 	return 0;
201 }
202 
203 static int __live_parallel_switchN(void *data)
204 {
205 	struct parallel_switch *arg = data;
206 	struct i915_request *rq = NULL;
207 	IGT_TIMEOUT(end_time);
208 	unsigned long count;
209 	int n;
210 
211 	count = 0;
212 	do {
213 		for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
214 			struct i915_request *prev = rq;
215 			int err = 0;
216 
217 			rq = i915_request_create(arg->ce[n]);
218 			if (IS_ERR(rq)) {
219 				i915_request_put(prev);
220 				return PTR_ERR(rq);
221 			}
222 
223 			i915_request_get(rq);
224 			if (prev) {
225 				err = i915_request_await_dma_fence(rq, &prev->fence);
226 				i915_request_put(prev);
227 			}
228 
229 			i915_request_add(rq);
230 			if (err) {
231 				i915_request_put(rq);
232 				return err;
233 			}
234 		}
235 
236 		count++;
237 	} while (!__igt_timeout(end_time, NULL));
238 	i915_request_put(rq);
239 
240 	pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
241 	return 0;
242 }
243 
244 static int live_parallel_switch(void *arg)
245 {
246 	struct drm_i915_private *i915 = arg;
247 	static int (* const func[])(void *arg) = {
248 		__live_parallel_switch1,
249 		__live_parallel_switchN,
250 		NULL,
251 	};
252 	struct parallel_switch *data = NULL;
253 	struct i915_gem_engines *engines;
254 	struct i915_gem_engines_iter it;
255 	int (* const *fn)(void *arg);
256 	struct i915_gem_context *ctx;
257 	struct intel_context *ce;
258 	struct drm_file *file;
259 	int n, m, count;
260 	int err = 0;
261 
262 	/*
263 	 * Check we can process switches on all engines simultaneously.
264 	 */
265 
266 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
267 		return 0;
268 
269 	file = mock_file(i915);
270 	if (IS_ERR(file))
271 		return PTR_ERR(file);
272 
273 	ctx = live_context(i915, file);
274 	if (IS_ERR(ctx)) {
275 		err = PTR_ERR(ctx);
276 		goto out_file;
277 	}
278 
279 	engines = i915_gem_context_lock_engines(ctx);
280 	count = engines->num_engines;
281 
282 	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
283 	if (!data) {
284 		i915_gem_context_unlock_engines(ctx);
285 		err = -ENOMEM;
286 		goto out_file;
287 	}
288 
289 	m = 0; /* Use the first context as our template for the engines */
290 	for_each_gem_engine(ce, engines, it) {
291 		err = intel_context_pin(ce);
292 		if (err) {
293 			i915_gem_context_unlock_engines(ctx);
294 			goto out;
295 		}
296 		data[m++].ce[0] = intel_context_get(ce);
297 	}
298 	i915_gem_context_unlock_engines(ctx);
299 
300 	/* Clone the same set of engines into the other contexts */
301 	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
302 		ctx = live_context(i915, file);
303 		if (IS_ERR(ctx)) {
304 			err = PTR_ERR(ctx);
305 			goto out;
306 		}
307 
308 		for (m = 0; m < count; m++) {
309 			if (!data[m].ce[0])
310 				continue;
311 
312 			ce = intel_context_create(ctx, data[m].ce[0]->engine);
313 			if (IS_ERR(ce))
314 				goto out;
315 
316 			err = intel_context_pin(ce);
317 			if (err) {
318 				intel_context_put(ce);
319 				goto out;
320 			}
321 
322 			data[m].ce[n] = ce;
323 		}
324 	}
325 
326 	for (fn = func; !err && *fn; fn++) {
327 		struct igt_live_test t;
328 		int n;
329 
330 		err = igt_live_test_begin(&t, i915, __func__, "");
331 		if (err)
332 			break;
333 
334 		for (n = 0; n < count; n++) {
335 			if (!data[n].ce[0])
336 				continue;
337 
338 			data[n].tsk = kthread_run(*fn, &data[n],
339 						  "igt/parallel:%s",
340 						  data[n].ce[0]->engine->name);
341 			if (IS_ERR(data[n].tsk)) {
342 				err = PTR_ERR(data[n].tsk);
343 				break;
344 			}
345 			get_task_struct(data[n].tsk);
346 		}
347 
348 		yield(); /* start all threads before we kthread_stop() */
349 
350 		for (n = 0; n < count; n++) {
351 			int status;
352 
353 			if (IS_ERR_OR_NULL(data[n].tsk))
354 				continue;
355 
356 			status = kthread_stop(data[n].tsk);
357 			if (status && !err)
358 				err = status;
359 
360 			put_task_struct(data[n].tsk);
361 			data[n].tsk = NULL;
362 		}
363 
364 		if (igt_live_test_end(&t))
365 			err = -EIO;
366 	}
367 
368 out:
369 	for (n = 0; n < count; n++) {
370 		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
371 			if (!data[n].ce[m])
372 				continue;
373 
374 			intel_context_unpin(data[n].ce[m]);
375 			intel_context_put(data[n].ce[m]);
376 		}
377 	}
378 	kfree(data);
379 out_file:
380 	mock_file_free(i915, file);
381 	return err;
382 }
383 
384 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
385 {
386 	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
387 }
388 
389 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
390 {
391 	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
392 }
393 
394 static int gpu_fill(struct intel_context *ce,
395 		    struct drm_i915_gem_object *obj,
396 		    unsigned int dw)
397 {
398 	struct i915_vma *vma;
399 	int err;
400 
401 	GEM_BUG_ON(obj->base.size > ce->vm->total);
402 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
403 
404 	vma = i915_vma_instance(obj, ce->vm, NULL);
405 	if (IS_ERR(vma))
406 		return PTR_ERR(vma);
407 
408 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
409 	if (err)
410 		return err;
411 
412 	/*
413 	 * Within the GTT the huge objects maps every page onto
414 	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
415 	 * We set the nth dword within the page using the nth
416 	 * mapping via the GTT - this should exercise the GTT mapping
417 	 * whilst checking that each context provides a unique view
418 	 * into the object.
419 	 */
420 	err = igt_gpu_fill_dw(ce, vma,
421 			      (dw * real_page_count(obj)) << PAGE_SHIFT |
422 			      (dw * sizeof(u32)),
423 			      real_page_count(obj),
424 			      dw);
425 	i915_vma_unpin(vma);
426 
427 	return err;
428 }
429 
430 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
431 {
432 	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
433 	unsigned int n, m, need_flush;
434 	int err;
435 
436 	err = i915_gem_object_prepare_write(obj, &need_flush);
437 	if (err)
438 		return err;
439 
440 	for (n = 0; n < real_page_count(obj); n++) {
441 		u32 *map;
442 
443 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
444 		for (m = 0; m < DW_PER_PAGE; m++)
445 			map[m] = value;
446 		if (!has_llc)
447 			drm_clflush_virt_range(map, PAGE_SIZE);
448 		kunmap_atomic(map);
449 	}
450 
451 	i915_gem_object_finish_access(obj);
452 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
453 	obj->write_domain = 0;
454 	return 0;
455 }
456 
457 static noinline int cpu_check(struct drm_i915_gem_object *obj,
458 			      unsigned int idx, unsigned int max)
459 {
460 	unsigned int n, m, needs_flush;
461 	int err;
462 
463 	err = i915_gem_object_prepare_read(obj, &needs_flush);
464 	if (err)
465 		return err;
466 
467 	for (n = 0; n < real_page_count(obj); n++) {
468 		u32 *map;
469 
470 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
471 		if (needs_flush & CLFLUSH_BEFORE)
472 			drm_clflush_virt_range(map, PAGE_SIZE);
473 
474 		for (m = 0; m < max; m++) {
475 			if (map[m] != m) {
476 				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
477 				       __builtin_return_address(0), idx,
478 				       n, real_page_count(obj), m, max,
479 				       map[m], m);
480 				err = -EINVAL;
481 				goto out_unmap;
482 			}
483 		}
484 
485 		for (; m < DW_PER_PAGE; m++) {
486 			if (map[m] != STACK_MAGIC) {
487 				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
488 				       __builtin_return_address(0), idx, n, m,
489 				       map[m], STACK_MAGIC);
490 				err = -EINVAL;
491 				goto out_unmap;
492 			}
493 		}
494 
495 out_unmap:
496 		kunmap_atomic(map);
497 		if (err)
498 			break;
499 	}
500 
501 	i915_gem_object_finish_access(obj);
502 	return err;
503 }
504 
505 static int file_add_object(struct drm_file *file,
506 			    struct drm_i915_gem_object *obj)
507 {
508 	int err;
509 
510 	GEM_BUG_ON(obj->base.handle_count);
511 
512 	/* tie the object to the drm_file for easy reaping */
513 	err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL);
514 	if (err < 0)
515 		return  err;
516 
517 	i915_gem_object_get(obj);
518 	obj->base.handle_count++;
519 	return 0;
520 }
521 
522 static struct drm_i915_gem_object *
523 create_test_object(struct i915_address_space *vm,
524 		   struct drm_file *file,
525 		   struct list_head *objects)
526 {
527 	struct drm_i915_gem_object *obj;
528 	u64 size;
529 	int err;
530 
531 	/* Keep in GEM's good graces */
532 	intel_gt_retire_requests(vm->gt);
533 
534 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
535 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
536 
537 	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
538 	if (IS_ERR(obj))
539 		return obj;
540 
541 	err = file_add_object(file, obj);
542 	i915_gem_object_put(obj);
543 	if (err)
544 		return ERR_PTR(err);
545 
546 	err = cpu_fill(obj, STACK_MAGIC);
547 	if (err) {
548 		pr_err("Failed to fill object with cpu, err=%d\n",
549 		       err);
550 		return ERR_PTR(err);
551 	}
552 
553 	list_add_tail(&obj->st_link, objects);
554 	return obj;
555 }
556 
557 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
558 {
559 	unsigned long npages = fake_page_count(obj);
560 
561 	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
562 	return npages / DW_PER_PAGE;
563 }
564 
565 static void throttle_release(struct i915_request **q, int count)
566 {
567 	int i;
568 
569 	for (i = 0; i < count; i++) {
570 		if (IS_ERR_OR_NULL(q[i]))
571 			continue;
572 
573 		i915_request_put(fetch_and_zero(&q[i]));
574 	}
575 }
576 
577 static int throttle(struct intel_context *ce,
578 		    struct i915_request **q, int count)
579 {
580 	int i;
581 
582 	if (!IS_ERR_OR_NULL(q[0])) {
583 		if (i915_request_wait(q[0],
584 				      I915_WAIT_INTERRUPTIBLE,
585 				      MAX_SCHEDULE_TIMEOUT) < 0)
586 			return -EINTR;
587 
588 		i915_request_put(q[0]);
589 	}
590 
591 	for (i = 0; i < count - 1; i++)
592 		q[i] = q[i + 1];
593 
594 	q[i] = intel_context_create_request(ce);
595 	if (IS_ERR(q[i]))
596 		return PTR_ERR(q[i]);
597 
598 	i915_request_get(q[i]);
599 	i915_request_add(q[i]);
600 
601 	return 0;
602 }
603 
604 static int igt_ctx_exec(void *arg)
605 {
606 	struct drm_i915_private *i915 = arg;
607 	struct intel_engine_cs *engine;
608 	int err = -ENODEV;
609 
610 	/*
611 	 * Create a few different contexts (with different mm) and write
612 	 * through each ctx/mm using the GPU making sure those writes end
613 	 * up in the expected pages of our obj.
614 	 */
615 
616 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
617 		return 0;
618 
619 	for_each_uabi_engine(engine, i915) {
620 		struct drm_i915_gem_object *obj = NULL;
621 		unsigned long ncontexts, ndwords, dw;
622 		struct i915_request *tq[5] = {};
623 		struct igt_live_test t;
624 		struct drm_file *file;
625 		IGT_TIMEOUT(end_time);
626 		LIST_HEAD(objects);
627 
628 		if (!intel_engine_can_store_dword(engine))
629 			continue;
630 
631 		if (!engine->context_size)
632 			continue; /* No logical context support in HW */
633 
634 		file = mock_file(i915);
635 		if (IS_ERR(file))
636 			return PTR_ERR(file);
637 
638 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
639 		if (err)
640 			goto out_file;
641 
642 		ncontexts = 0;
643 		ndwords = 0;
644 		dw = 0;
645 		while (!time_after(jiffies, end_time)) {
646 			struct i915_gem_context *ctx;
647 			struct intel_context *ce;
648 
649 			ctx = kernel_context(i915);
650 			if (IS_ERR(ctx)) {
651 				err = PTR_ERR(ctx);
652 				goto out_file;
653 			}
654 
655 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
656 			GEM_BUG_ON(IS_ERR(ce));
657 
658 			if (!obj) {
659 				obj = create_test_object(ce->vm, file, &objects);
660 				if (IS_ERR(obj)) {
661 					err = PTR_ERR(obj);
662 					intel_context_put(ce);
663 					kernel_context_close(ctx);
664 					goto out_file;
665 				}
666 			}
667 
668 			err = gpu_fill(ce, obj, dw);
669 			if (err) {
670 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
671 				       ndwords, dw, max_dwords(obj),
672 				       engine->name,
673 				       yesno(!!rcu_access_pointer(ctx->vm)),
674 				       err);
675 				intel_context_put(ce);
676 				kernel_context_close(ctx);
677 				goto out_file;
678 			}
679 
680 			err = throttle(ce, tq, ARRAY_SIZE(tq));
681 			if (err) {
682 				intel_context_put(ce);
683 				kernel_context_close(ctx);
684 				goto out_file;
685 			}
686 
687 			if (++dw == max_dwords(obj)) {
688 				obj = NULL;
689 				dw = 0;
690 			}
691 
692 			ndwords++;
693 			ncontexts++;
694 
695 			intel_context_put(ce);
696 			kernel_context_close(ctx);
697 		}
698 
699 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
700 			ncontexts, engine->name, ndwords);
701 
702 		ncontexts = dw = 0;
703 		list_for_each_entry(obj, &objects, st_link) {
704 			unsigned int rem =
705 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
706 
707 			err = cpu_check(obj, ncontexts++, rem);
708 			if (err)
709 				break;
710 
711 			dw += rem;
712 		}
713 
714 out_file:
715 		throttle_release(tq, ARRAY_SIZE(tq));
716 		if (igt_live_test_end(&t))
717 			err = -EIO;
718 
719 		mock_file_free(i915, file);
720 		if (err)
721 			return err;
722 
723 		i915_gem_drain_freed_objects(i915);
724 	}
725 
726 	return 0;
727 }
728 
729 static int igt_shared_ctx_exec(void *arg)
730 {
731 	struct drm_i915_private *i915 = arg;
732 	struct i915_request *tq[5] = {};
733 	struct i915_gem_context *parent;
734 	struct intel_engine_cs *engine;
735 	struct igt_live_test t;
736 	struct drm_file *file;
737 	int err = 0;
738 
739 	/*
740 	 * Create a few different contexts with the same mm and write
741 	 * through each ctx using the GPU making sure those writes end
742 	 * up in the expected pages of our obj.
743 	 */
744 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
745 		return 0;
746 
747 	file = mock_file(i915);
748 	if (IS_ERR(file))
749 		return PTR_ERR(file);
750 
751 	parent = live_context(i915, file);
752 	if (IS_ERR(parent)) {
753 		err = PTR_ERR(parent);
754 		goto out_file;
755 	}
756 
757 	if (!parent->vm) { /* not full-ppgtt; nothing to share */
758 		err = 0;
759 		goto out_file;
760 	}
761 
762 	err = igt_live_test_begin(&t, i915, __func__, "");
763 	if (err)
764 		goto out_file;
765 
766 	for_each_uabi_engine(engine, i915) {
767 		unsigned long ncontexts, ndwords, dw;
768 		struct drm_i915_gem_object *obj = NULL;
769 		IGT_TIMEOUT(end_time);
770 		LIST_HEAD(objects);
771 
772 		if (!intel_engine_can_store_dword(engine))
773 			continue;
774 
775 		dw = 0;
776 		ndwords = 0;
777 		ncontexts = 0;
778 		while (!time_after(jiffies, end_time)) {
779 			struct i915_gem_context *ctx;
780 			struct intel_context *ce;
781 
782 			ctx = kernel_context(i915);
783 			if (IS_ERR(ctx)) {
784 				err = PTR_ERR(ctx);
785 				goto out_test;
786 			}
787 
788 			mutex_lock(&ctx->mutex);
789 			__assign_ppgtt(ctx, parent->vm);
790 			mutex_unlock(&ctx->mutex);
791 
792 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
793 			GEM_BUG_ON(IS_ERR(ce));
794 
795 			if (!obj) {
796 				obj = create_test_object(parent->vm, file, &objects);
797 				if (IS_ERR(obj)) {
798 					err = PTR_ERR(obj);
799 					intel_context_put(ce);
800 					kernel_context_close(ctx);
801 					goto out_test;
802 				}
803 			}
804 
805 			err = gpu_fill(ce, obj, dw);
806 			if (err) {
807 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
808 				       ndwords, dw, max_dwords(obj),
809 				       engine->name,
810 				       yesno(!!rcu_access_pointer(ctx->vm)),
811 				       err);
812 				intel_context_put(ce);
813 				kernel_context_close(ctx);
814 				goto out_test;
815 			}
816 
817 			err = throttle(ce, tq, ARRAY_SIZE(tq));
818 			if (err) {
819 				intel_context_put(ce);
820 				kernel_context_close(ctx);
821 				goto out_test;
822 			}
823 
824 			if (++dw == max_dwords(obj)) {
825 				obj = NULL;
826 				dw = 0;
827 			}
828 
829 			ndwords++;
830 			ncontexts++;
831 
832 			intel_context_put(ce);
833 			kernel_context_close(ctx);
834 		}
835 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
836 			ncontexts, engine->name, ndwords);
837 
838 		ncontexts = dw = 0;
839 		list_for_each_entry(obj, &objects, st_link) {
840 			unsigned int rem =
841 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
842 
843 			err = cpu_check(obj, ncontexts++, rem);
844 			if (err)
845 				goto out_test;
846 
847 			dw += rem;
848 		}
849 
850 		i915_gem_drain_freed_objects(i915);
851 	}
852 out_test:
853 	throttle_release(tq, ARRAY_SIZE(tq));
854 	if (igt_live_test_end(&t))
855 		err = -EIO;
856 out_file:
857 	mock_file_free(i915, file);
858 	return err;
859 }
860 
861 static struct i915_vma *rpcs_query_batch(struct i915_vma *vma)
862 {
863 	struct drm_i915_gem_object *obj;
864 	u32 *cmd;
865 	int err;
866 
867 	if (INTEL_GEN(vma->vm->i915) < 8)
868 		return ERR_PTR(-EINVAL);
869 
870 	obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE);
871 	if (IS_ERR(obj))
872 		return ERR_CAST(obj);
873 
874 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
875 	if (IS_ERR(cmd)) {
876 		err = PTR_ERR(cmd);
877 		goto err;
878 	}
879 
880 	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
881 	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
882 	*cmd++ = lower_32_bits(vma->node.start);
883 	*cmd++ = upper_32_bits(vma->node.start);
884 	*cmd = MI_BATCH_BUFFER_END;
885 
886 	__i915_gem_object_flush_map(obj, 0, 64);
887 	i915_gem_object_unpin_map(obj);
888 
889 	intel_gt_chipset_flush(vma->vm->gt);
890 
891 	vma = i915_vma_instance(obj, vma->vm, NULL);
892 	if (IS_ERR(vma)) {
893 		err = PTR_ERR(vma);
894 		goto err;
895 	}
896 
897 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
898 	if (err)
899 		goto err;
900 
901 	return vma;
902 
903 err:
904 	i915_gem_object_put(obj);
905 	return ERR_PTR(err);
906 }
907 
908 static int
909 emit_rpcs_query(struct drm_i915_gem_object *obj,
910 		struct intel_context *ce,
911 		struct i915_request **rq_out)
912 {
913 	struct i915_request *rq;
914 	struct i915_vma *batch;
915 	struct i915_vma *vma;
916 	int err;
917 
918 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
919 
920 	vma = i915_vma_instance(obj, ce->vm, NULL);
921 	if (IS_ERR(vma))
922 		return PTR_ERR(vma);
923 
924 	i915_gem_object_lock(obj);
925 	err = i915_gem_object_set_to_gtt_domain(obj, false);
926 	i915_gem_object_unlock(obj);
927 	if (err)
928 		return err;
929 
930 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
931 	if (err)
932 		return err;
933 
934 	batch = rpcs_query_batch(vma);
935 	if (IS_ERR(batch)) {
936 		err = PTR_ERR(batch);
937 		goto err_vma;
938 	}
939 
940 	rq = i915_request_create(ce);
941 	if (IS_ERR(rq)) {
942 		err = PTR_ERR(rq);
943 		goto err_batch;
944 	}
945 
946 	err = rq->engine->emit_bb_start(rq,
947 					batch->node.start, batch->node.size,
948 					0);
949 	if (err)
950 		goto err_request;
951 
952 	i915_vma_lock(batch);
953 	err = i915_request_await_object(rq, batch->obj, false);
954 	if (err == 0)
955 		err = i915_vma_move_to_active(batch, rq, 0);
956 	i915_vma_unlock(batch);
957 	if (err)
958 		goto skip_request;
959 
960 	i915_vma_lock(vma);
961 	err = i915_request_await_object(rq, vma->obj, true);
962 	if (err == 0)
963 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
964 	i915_vma_unlock(vma);
965 	if (err)
966 		goto skip_request;
967 
968 	i915_vma_unpin_and_release(&batch, 0);
969 	i915_vma_unpin(vma);
970 
971 	*rq_out = i915_request_get(rq);
972 
973 	i915_request_add(rq);
974 
975 	return 0;
976 
977 skip_request:
978 	i915_request_skip(rq, err);
979 err_request:
980 	i915_request_add(rq);
981 err_batch:
982 	i915_vma_unpin_and_release(&batch, 0);
983 err_vma:
984 	i915_vma_unpin(vma);
985 
986 	return err;
987 }
988 
989 #define TEST_IDLE	BIT(0)
990 #define TEST_BUSY	BIT(1)
991 #define TEST_RESET	BIT(2)
992 
993 static int
994 __sseu_prepare(const char *name,
995 	       unsigned int flags,
996 	       struct intel_context *ce,
997 	       struct igt_spinner **spin)
998 {
999 	struct i915_request *rq;
1000 	int ret;
1001 
1002 	*spin = NULL;
1003 	if (!(flags & (TEST_BUSY | TEST_RESET)))
1004 		return 0;
1005 
1006 	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1007 	if (!*spin)
1008 		return -ENOMEM;
1009 
1010 	ret = igt_spinner_init(*spin, ce->engine->gt);
1011 	if (ret)
1012 		goto err_free;
1013 
1014 	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1015 	if (IS_ERR(rq)) {
1016 		ret = PTR_ERR(rq);
1017 		goto err_fini;
1018 	}
1019 
1020 	i915_request_add(rq);
1021 
1022 	if (!igt_wait_for_spinner(*spin, rq)) {
1023 		pr_err("%s: Spinner failed to start!\n", name);
1024 		ret = -ETIMEDOUT;
1025 		goto err_end;
1026 	}
1027 
1028 	return 0;
1029 
1030 err_end:
1031 	igt_spinner_end(*spin);
1032 err_fini:
1033 	igt_spinner_fini(*spin);
1034 err_free:
1035 	kfree(fetch_and_zero(spin));
1036 	return ret;
1037 }
1038 
1039 static int
1040 __read_slice_count(struct intel_context *ce,
1041 		   struct drm_i915_gem_object *obj,
1042 		   struct igt_spinner *spin,
1043 		   u32 *rpcs)
1044 {
1045 	struct i915_request *rq = NULL;
1046 	u32 s_mask, s_shift;
1047 	unsigned int cnt;
1048 	u32 *buf, val;
1049 	long ret;
1050 
1051 	ret = emit_rpcs_query(obj, ce, &rq);
1052 	if (ret)
1053 		return ret;
1054 
1055 	if (spin)
1056 		igt_spinner_end(spin);
1057 
1058 	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1059 	i915_request_put(rq);
1060 	if (ret < 0)
1061 		return ret;
1062 
1063 	buf = i915_gem_object_pin_map(obj, I915_MAP_WB);
1064 	if (IS_ERR(buf)) {
1065 		ret = PTR_ERR(buf);
1066 		return ret;
1067 	}
1068 
1069 	if (INTEL_GEN(ce->engine->i915) >= 11) {
1070 		s_mask = GEN11_RPCS_S_CNT_MASK;
1071 		s_shift = GEN11_RPCS_S_CNT_SHIFT;
1072 	} else {
1073 		s_mask = GEN8_RPCS_S_CNT_MASK;
1074 		s_shift = GEN8_RPCS_S_CNT_SHIFT;
1075 	}
1076 
1077 	val = *buf;
1078 	cnt = (val & s_mask) >> s_shift;
1079 	*rpcs = val;
1080 
1081 	i915_gem_object_unpin_map(obj);
1082 
1083 	return cnt;
1084 }
1085 
1086 static int
1087 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1088 	     const char *prefix, const char *suffix)
1089 {
1090 	if (slices == expected)
1091 		return 0;
1092 
1093 	if (slices < 0) {
1094 		pr_err("%s: %s read slice count failed with %d%s\n",
1095 		       name, prefix, slices, suffix);
1096 		return slices;
1097 	}
1098 
1099 	pr_err("%s: %s slice count %d is not %u%s\n",
1100 	       name, prefix, slices, expected, suffix);
1101 
1102 	pr_info("RPCS=0x%x; %u%sx%u%s\n",
1103 		rpcs, slices,
1104 		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1105 		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1106 		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1107 
1108 	return -EINVAL;
1109 }
1110 
1111 static int
1112 __sseu_finish(const char *name,
1113 	      unsigned int flags,
1114 	      struct intel_context *ce,
1115 	      struct drm_i915_gem_object *obj,
1116 	      unsigned int expected,
1117 	      struct igt_spinner *spin)
1118 {
1119 	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1120 	u32 rpcs = 0;
1121 	int ret = 0;
1122 
1123 	if (flags & TEST_RESET) {
1124 		ret = intel_engine_reset(ce->engine, "sseu");
1125 		if (ret)
1126 			goto out;
1127 	}
1128 
1129 	ret = __read_slice_count(ce, obj,
1130 				 flags & TEST_RESET ? NULL : spin, &rpcs);
1131 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1132 	if (ret)
1133 		goto out;
1134 
1135 	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1136 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1137 
1138 out:
1139 	if (spin)
1140 		igt_spinner_end(spin);
1141 
1142 	if ((flags & TEST_IDLE) && ret == 0) {
1143 		ret = intel_gt_wait_for_idle(ce->engine->gt,
1144 					     MAX_SCHEDULE_TIMEOUT);
1145 		if (ret)
1146 			return ret;
1147 
1148 		ret = __read_slice_count(ce, obj, NULL, &rpcs);
1149 		ret = __check_rpcs(name, rpcs, ret, expected,
1150 				   "Context", " after idle!");
1151 	}
1152 
1153 	return ret;
1154 }
1155 
1156 static int
1157 __sseu_test(const char *name,
1158 	    unsigned int flags,
1159 	    struct intel_context *ce,
1160 	    struct drm_i915_gem_object *obj,
1161 	    struct intel_sseu sseu)
1162 {
1163 	struct igt_spinner *spin = NULL;
1164 	int ret;
1165 
1166 	ret = __sseu_prepare(name, flags, ce, &spin);
1167 	if (ret)
1168 		return ret;
1169 
1170 	ret = intel_context_reconfigure_sseu(ce, sseu);
1171 	if (ret)
1172 		goto out_spin;
1173 
1174 	ret = __sseu_finish(name, flags, ce, obj,
1175 			    hweight32(sseu.slice_mask), spin);
1176 
1177 out_spin:
1178 	if (spin) {
1179 		igt_spinner_end(spin);
1180 		igt_spinner_fini(spin);
1181 		kfree(spin);
1182 	}
1183 	return ret;
1184 }
1185 
1186 static int
1187 __igt_ctx_sseu(struct drm_i915_private *i915,
1188 	       const char *name,
1189 	       unsigned int flags)
1190 {
1191 	struct drm_i915_gem_object *obj;
1192 	int inst = 0;
1193 	int ret = 0;
1194 
1195 	if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg)
1196 		return 0;
1197 
1198 	if (flags & TEST_RESET)
1199 		igt_global_reset_lock(&i915->gt);
1200 
1201 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1202 	if (IS_ERR(obj)) {
1203 		ret = PTR_ERR(obj);
1204 		goto out_unlock;
1205 	}
1206 
1207 	do {
1208 		struct intel_engine_cs *engine;
1209 		struct intel_context *ce;
1210 		struct intel_sseu pg_sseu;
1211 
1212 		engine = intel_engine_lookup_user(i915,
1213 						  I915_ENGINE_CLASS_RENDER,
1214 						  inst++);
1215 		if (!engine)
1216 			break;
1217 
1218 		if (hweight32(engine->sseu.slice_mask) < 2)
1219 			continue;
1220 
1221 		/*
1222 		 * Gen11 VME friendly power-gated configuration with
1223 		 * half enabled sub-slices.
1224 		 */
1225 		pg_sseu = engine->sseu;
1226 		pg_sseu.slice_mask = 1;
1227 		pg_sseu.subslice_mask =
1228 			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1229 
1230 		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1231 			engine->name, name, flags,
1232 			hweight32(engine->sseu.slice_mask),
1233 			hweight32(pg_sseu.slice_mask));
1234 
1235 		ce = intel_context_create(engine->kernel_context->gem_context,
1236 					  engine);
1237 		if (IS_ERR(ce)) {
1238 			ret = PTR_ERR(ce);
1239 			goto out_put;
1240 		}
1241 
1242 		ret = intel_context_pin(ce);
1243 		if (ret)
1244 			goto out_ce;
1245 
1246 		/* First set the default mask. */
1247 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1248 		if (ret)
1249 			goto out_unpin;
1250 
1251 		/* Then set a power-gated configuration. */
1252 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1253 		if (ret)
1254 			goto out_unpin;
1255 
1256 		/* Back to defaults. */
1257 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1258 		if (ret)
1259 			goto out_unpin;
1260 
1261 		/* One last power-gated configuration for the road. */
1262 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1263 		if (ret)
1264 			goto out_unpin;
1265 
1266 out_unpin:
1267 		intel_context_unpin(ce);
1268 out_ce:
1269 		intel_context_put(ce);
1270 	} while (!ret);
1271 
1272 	if (igt_flush_test(i915))
1273 		ret = -EIO;
1274 
1275 out_put:
1276 	i915_gem_object_put(obj);
1277 
1278 out_unlock:
1279 	if (flags & TEST_RESET)
1280 		igt_global_reset_unlock(&i915->gt);
1281 
1282 	if (ret)
1283 		pr_err("%s: Failed with %d!\n", name, ret);
1284 
1285 	return ret;
1286 }
1287 
1288 static int igt_ctx_sseu(void *arg)
1289 {
1290 	struct {
1291 		const char *name;
1292 		unsigned int flags;
1293 	} *phase, phases[] = {
1294 		{ .name = "basic", .flags = 0 },
1295 		{ .name = "idle", .flags = TEST_IDLE },
1296 		{ .name = "busy", .flags = TEST_BUSY },
1297 		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1298 		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1299 		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1300 	};
1301 	unsigned int i;
1302 	int ret = 0;
1303 
1304 	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1305 	     i++, phase++)
1306 		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1307 
1308 	return ret;
1309 }
1310 
1311 static int igt_ctx_readonly(void *arg)
1312 {
1313 	struct drm_i915_private *i915 = arg;
1314 	struct drm_i915_gem_object *obj = NULL;
1315 	struct i915_request *tq[5] = {};
1316 	struct i915_address_space *vm;
1317 	struct i915_gem_context *ctx;
1318 	unsigned long idx, ndwords, dw;
1319 	struct igt_live_test t;
1320 	struct drm_file *file;
1321 	I915_RND_STATE(prng);
1322 	IGT_TIMEOUT(end_time);
1323 	LIST_HEAD(objects);
1324 	int err = -ENODEV;
1325 
1326 	/*
1327 	 * Create a few read-only objects (with the occasional writable object)
1328 	 * and try to write into these object checking that the GPU discards
1329 	 * any write to a read-only object.
1330 	 */
1331 
1332 	file = mock_file(i915);
1333 	if (IS_ERR(file))
1334 		return PTR_ERR(file);
1335 
1336 	err = igt_live_test_begin(&t, i915, __func__, "");
1337 	if (err)
1338 		goto out_file;
1339 
1340 	ctx = live_context(i915, file);
1341 	if (IS_ERR(ctx)) {
1342 		err = PTR_ERR(ctx);
1343 		goto out_file;
1344 	}
1345 
1346 	rcu_read_lock();
1347 	vm = rcu_dereference(ctx->vm) ?: &i915->ggtt.alias->vm;
1348 	if (!vm || !vm->has_read_only) {
1349 		rcu_read_unlock();
1350 		err = 0;
1351 		goto out_file;
1352 	}
1353 	rcu_read_unlock();
1354 
1355 	ndwords = 0;
1356 	dw = 0;
1357 	while (!time_after(jiffies, end_time)) {
1358 		struct i915_gem_engines_iter it;
1359 		struct intel_context *ce;
1360 
1361 		for_each_gem_engine(ce,
1362 				    i915_gem_context_lock_engines(ctx), it) {
1363 			if (!intel_engine_can_store_dword(ce->engine))
1364 				continue;
1365 
1366 			if (!obj) {
1367 				obj = create_test_object(ce->vm, file, &objects);
1368 				if (IS_ERR(obj)) {
1369 					err = PTR_ERR(obj);
1370 					i915_gem_context_unlock_engines(ctx);
1371 					goto out_file;
1372 				}
1373 
1374 				if (prandom_u32_state(&prng) & 1)
1375 					i915_gem_object_set_readonly(obj);
1376 			}
1377 
1378 			err = gpu_fill(ce, obj, dw);
1379 			if (err) {
1380 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1381 				       ndwords, dw, max_dwords(obj),
1382 				       ce->engine->name,
1383 				       yesno(!!rcu_access_pointer(ctx->vm)),
1384 				       err);
1385 				i915_gem_context_unlock_engines(ctx);
1386 				goto out_file;
1387 			}
1388 
1389 			err = throttle(ce, tq, ARRAY_SIZE(tq));
1390 			if (err) {
1391 				i915_gem_context_unlock_engines(ctx);
1392 				goto out_file;
1393 			}
1394 
1395 			if (++dw == max_dwords(obj)) {
1396 				obj = NULL;
1397 				dw = 0;
1398 			}
1399 			ndwords++;
1400 		}
1401 		i915_gem_context_unlock_engines(ctx);
1402 	}
1403 	pr_info("Submitted %lu dwords (across %u engines)\n",
1404 		ndwords, RUNTIME_INFO(i915)->num_engines);
1405 
1406 	dw = 0;
1407 	idx = 0;
1408 	list_for_each_entry(obj, &objects, st_link) {
1409 		unsigned int rem =
1410 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
1411 		unsigned int num_writes;
1412 
1413 		num_writes = rem;
1414 		if (i915_gem_object_is_readonly(obj))
1415 			num_writes = 0;
1416 
1417 		err = cpu_check(obj, idx++, num_writes);
1418 		if (err)
1419 			break;
1420 
1421 		dw += rem;
1422 	}
1423 
1424 out_file:
1425 	throttle_release(tq, ARRAY_SIZE(tq));
1426 	if (igt_live_test_end(&t))
1427 		err = -EIO;
1428 
1429 	mock_file_free(i915, file);
1430 	return err;
1431 }
1432 
1433 static int check_scratch(struct i915_address_space *vm, u64 offset)
1434 {
1435 	struct drm_mm_node *node =
1436 		__drm_mm_interval_first(&vm->mm,
1437 					offset, offset + sizeof(u32) - 1);
1438 	if (!node || node->start > offset)
1439 		return 0;
1440 
1441 	GEM_BUG_ON(offset >= node->start + node->size);
1442 
1443 	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1444 	       upper_32_bits(offset), lower_32_bits(offset));
1445 	return -EINVAL;
1446 }
1447 
1448 static int write_to_scratch(struct i915_gem_context *ctx,
1449 			    struct intel_engine_cs *engine,
1450 			    u64 offset, u32 value)
1451 {
1452 	struct drm_i915_private *i915 = ctx->i915;
1453 	struct drm_i915_gem_object *obj;
1454 	struct i915_address_space *vm;
1455 	struct i915_request *rq;
1456 	struct i915_vma *vma;
1457 	u32 *cmd;
1458 	int err;
1459 
1460 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1461 
1462 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1463 	if (IS_ERR(obj))
1464 		return PTR_ERR(obj);
1465 
1466 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
1467 	if (IS_ERR(cmd)) {
1468 		err = PTR_ERR(cmd);
1469 		goto err;
1470 	}
1471 
1472 	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
1473 	if (INTEL_GEN(i915) >= 8) {
1474 		*cmd++ = lower_32_bits(offset);
1475 		*cmd++ = upper_32_bits(offset);
1476 	} else {
1477 		*cmd++ = 0;
1478 		*cmd++ = offset;
1479 	}
1480 	*cmd++ = value;
1481 	*cmd = MI_BATCH_BUFFER_END;
1482 	__i915_gem_object_flush_map(obj, 0, 64);
1483 	i915_gem_object_unpin_map(obj);
1484 
1485 	intel_gt_chipset_flush(engine->gt);
1486 
1487 	vm = i915_gem_context_get_vm_rcu(ctx);
1488 	vma = i915_vma_instance(obj, vm, NULL);
1489 	if (IS_ERR(vma)) {
1490 		err = PTR_ERR(vma);
1491 		goto err_vm;
1492 	}
1493 
1494 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1495 	if (err)
1496 		goto err_vm;
1497 
1498 	err = check_scratch(vm, offset);
1499 	if (err)
1500 		goto err_unpin;
1501 
1502 	rq = igt_request_alloc(ctx, engine);
1503 	if (IS_ERR(rq)) {
1504 		err = PTR_ERR(rq);
1505 		goto err_unpin;
1506 	}
1507 
1508 	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
1509 	if (err)
1510 		goto err_request;
1511 
1512 	i915_vma_lock(vma);
1513 	err = i915_request_await_object(rq, vma->obj, false);
1514 	if (err == 0)
1515 		err = i915_vma_move_to_active(vma, rq, 0);
1516 	i915_vma_unlock(vma);
1517 	if (err)
1518 		goto skip_request;
1519 
1520 	i915_vma_unpin_and_release(&vma, 0);
1521 
1522 	i915_request_add(rq);
1523 
1524 	i915_vm_put(vm);
1525 	return 0;
1526 
1527 skip_request:
1528 	i915_request_skip(rq, err);
1529 err_request:
1530 	i915_request_add(rq);
1531 err_unpin:
1532 	i915_vma_unpin(vma);
1533 err_vm:
1534 	i915_vm_put(vm);
1535 err:
1536 	i915_gem_object_put(obj);
1537 	return err;
1538 }
1539 
1540 static int read_from_scratch(struct i915_gem_context *ctx,
1541 			     struct intel_engine_cs *engine,
1542 			     u64 offset, u32 *value)
1543 {
1544 	struct drm_i915_private *i915 = ctx->i915;
1545 	struct drm_i915_gem_object *obj;
1546 	struct i915_address_space *vm;
1547 	const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */
1548 	const u32 result = 0x100;
1549 	struct i915_request *rq;
1550 	struct i915_vma *vma;
1551 	u32 *cmd;
1552 	int err;
1553 
1554 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1555 
1556 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1557 	if (IS_ERR(obj))
1558 		return PTR_ERR(obj);
1559 
1560 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
1561 	if (IS_ERR(cmd)) {
1562 		err = PTR_ERR(cmd);
1563 		goto err;
1564 	}
1565 
1566 	memset(cmd, POISON_INUSE, PAGE_SIZE);
1567 	if (INTEL_GEN(i915) >= 8) {
1568 		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1569 		*cmd++ = RCS_GPR0;
1570 		*cmd++ = lower_32_bits(offset);
1571 		*cmd++ = upper_32_bits(offset);
1572 		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1573 		*cmd++ = RCS_GPR0;
1574 		*cmd++ = result;
1575 		*cmd++ = 0;
1576 	} else {
1577 		*cmd++ = MI_LOAD_REGISTER_MEM;
1578 		*cmd++ = RCS_GPR0;
1579 		*cmd++ = offset;
1580 		*cmd++ = MI_STORE_REGISTER_MEM;
1581 		*cmd++ = RCS_GPR0;
1582 		*cmd++ = result;
1583 	}
1584 	*cmd = MI_BATCH_BUFFER_END;
1585 
1586 	i915_gem_object_flush_map(obj);
1587 	i915_gem_object_unpin_map(obj);
1588 
1589 	intel_gt_chipset_flush(engine->gt);
1590 
1591 	vm = i915_gem_context_get_vm_rcu(ctx);
1592 	vma = i915_vma_instance(obj, vm, NULL);
1593 	if (IS_ERR(vma)) {
1594 		err = PTR_ERR(vma);
1595 		goto err_vm;
1596 	}
1597 
1598 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1599 	if (err)
1600 		goto err_vm;
1601 
1602 	err = check_scratch(vm, offset);
1603 	if (err)
1604 		goto err_unpin;
1605 
1606 	rq = igt_request_alloc(ctx, engine);
1607 	if (IS_ERR(rq)) {
1608 		err = PTR_ERR(rq);
1609 		goto err_unpin;
1610 	}
1611 
1612 	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
1613 	if (err)
1614 		goto err_request;
1615 
1616 	i915_vma_lock(vma);
1617 	err = i915_request_await_object(rq, vma->obj, true);
1618 	if (err == 0)
1619 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1620 	i915_vma_unlock(vma);
1621 	if (err)
1622 		goto skip_request;
1623 
1624 	i915_vma_unpin(vma);
1625 	i915_vma_close(vma);
1626 
1627 	i915_request_add(rq);
1628 
1629 	i915_gem_object_lock(obj);
1630 	err = i915_gem_object_set_to_cpu_domain(obj, false);
1631 	i915_gem_object_unlock(obj);
1632 	if (err)
1633 		goto err_vm;
1634 
1635 	cmd = i915_gem_object_pin_map(obj, I915_MAP_WB);
1636 	if (IS_ERR(cmd)) {
1637 		err = PTR_ERR(cmd);
1638 		goto err_vm;
1639 	}
1640 
1641 	*value = cmd[result / sizeof(*cmd)];
1642 	i915_gem_object_unpin_map(obj);
1643 	i915_gem_object_put(obj);
1644 
1645 	return 0;
1646 
1647 skip_request:
1648 	i915_request_skip(rq, err);
1649 err_request:
1650 	i915_request_add(rq);
1651 err_unpin:
1652 	i915_vma_unpin(vma);
1653 err_vm:
1654 	i915_vm_put(vm);
1655 err:
1656 	i915_gem_object_put(obj);
1657 	return err;
1658 }
1659 
1660 static int igt_vm_isolation(void *arg)
1661 {
1662 	struct drm_i915_private *i915 = arg;
1663 	struct i915_gem_context *ctx_a, *ctx_b;
1664 	struct intel_engine_cs *engine;
1665 	struct igt_live_test t;
1666 	struct drm_file *file;
1667 	I915_RND_STATE(prng);
1668 	unsigned long count;
1669 	u64 vm_total;
1670 	int err;
1671 
1672 	if (INTEL_GEN(i915) < 7)
1673 		return 0;
1674 
1675 	/*
1676 	 * The simple goal here is that a write into one context is not
1677 	 * observed in a second (separate page tables and scratch).
1678 	 */
1679 
1680 	file = mock_file(i915);
1681 	if (IS_ERR(file))
1682 		return PTR_ERR(file);
1683 
1684 	err = igt_live_test_begin(&t, i915, __func__, "");
1685 	if (err)
1686 		goto out_file;
1687 
1688 	ctx_a = live_context(i915, file);
1689 	if (IS_ERR(ctx_a)) {
1690 		err = PTR_ERR(ctx_a);
1691 		goto out_file;
1692 	}
1693 
1694 	ctx_b = live_context(i915, file);
1695 	if (IS_ERR(ctx_b)) {
1696 		err = PTR_ERR(ctx_b);
1697 		goto out_file;
1698 	}
1699 
1700 	/* We can only test vm isolation, if the vm are distinct */
1701 	if (ctx_a->vm == ctx_b->vm)
1702 		goto out_file;
1703 
1704 	vm_total = ctx_a->vm->total;
1705 	GEM_BUG_ON(ctx_b->vm->total != vm_total);
1706 	vm_total -= I915_GTT_PAGE_SIZE;
1707 
1708 	count = 0;
1709 	for_each_uabi_engine(engine, i915) {
1710 		IGT_TIMEOUT(end_time);
1711 		unsigned long this = 0;
1712 
1713 		if (!intel_engine_can_store_dword(engine))
1714 			continue;
1715 
1716 		while (!__igt_timeout(end_time, NULL)) {
1717 			u32 value = 0xc5c5c5c5;
1718 			u64 offset;
1719 
1720 			div64_u64_rem(i915_prandom_u64_state(&prng),
1721 				      vm_total, &offset);
1722 			offset = round_down(offset, alignof_dword);
1723 			offset += I915_GTT_PAGE_SIZE;
1724 
1725 			err = write_to_scratch(ctx_a, engine,
1726 					       offset, 0xdeadbeef);
1727 			if (err == 0)
1728 				err = read_from_scratch(ctx_b, engine,
1729 							offset, &value);
1730 			if (err)
1731 				goto out_file;
1732 
1733 			if (value) {
1734 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1735 				       engine->name, value,
1736 				       upper_32_bits(offset),
1737 				       lower_32_bits(offset),
1738 				       this);
1739 				err = -EINVAL;
1740 				goto out_file;
1741 			}
1742 
1743 			this++;
1744 		}
1745 		count += this;
1746 	}
1747 	pr_info("Checked %lu scratch offsets across %d engines\n",
1748 		count, RUNTIME_INFO(i915)->num_engines);
1749 
1750 out_file:
1751 	if (igt_live_test_end(&t))
1752 		err = -EIO;
1753 	mock_file_free(i915, file);
1754 	return err;
1755 }
1756 
1757 static bool skip_unused_engines(struct intel_context *ce, void *data)
1758 {
1759 	return !ce->state;
1760 }
1761 
1762 static void mock_barrier_task(void *data)
1763 {
1764 	unsigned int *counter = data;
1765 
1766 	++*counter;
1767 }
1768 
1769 static int mock_context_barrier(void *arg)
1770 {
1771 #undef pr_fmt
1772 #define pr_fmt(x) "context_barrier_task():" # x
1773 	struct drm_i915_private *i915 = arg;
1774 	struct i915_gem_context *ctx;
1775 	struct i915_request *rq;
1776 	unsigned int counter;
1777 	int err;
1778 
1779 	/*
1780 	 * The context barrier provides us with a callback after it emits
1781 	 * a request; useful for retiring old state after loading new.
1782 	 */
1783 
1784 	ctx = mock_context(i915, "mock");
1785 	if (!ctx)
1786 		return -ENOMEM;
1787 
1788 	counter = 0;
1789 	err = context_barrier_task(ctx, 0,
1790 				   NULL, NULL, mock_barrier_task, &counter);
1791 	if (err) {
1792 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
1793 		goto out;
1794 	}
1795 	if (counter == 0) {
1796 		pr_err("Did not retire immediately with 0 engines\n");
1797 		err = -EINVAL;
1798 		goto out;
1799 	}
1800 
1801 	counter = 0;
1802 	err = context_barrier_task(ctx, ALL_ENGINES,
1803 				   skip_unused_engines,
1804 				   NULL,
1805 				   mock_barrier_task,
1806 				   &counter);
1807 	if (err) {
1808 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
1809 		goto out;
1810 	}
1811 	if (counter == 0) {
1812 		pr_err("Did not retire immediately for all unused engines\n");
1813 		err = -EINVAL;
1814 		goto out;
1815 	}
1816 
1817 	rq = igt_request_alloc(ctx, i915->engine[RCS0]);
1818 	if (IS_ERR(rq)) {
1819 		pr_err("Request allocation failed!\n");
1820 		goto out;
1821 	}
1822 	i915_request_add(rq);
1823 
1824 	counter = 0;
1825 	context_barrier_inject_fault = BIT(RCS0);
1826 	err = context_barrier_task(ctx, ALL_ENGINES,
1827 				   NULL, NULL, mock_barrier_task, &counter);
1828 	context_barrier_inject_fault = 0;
1829 	if (err == -ENXIO)
1830 		err = 0;
1831 	else
1832 		pr_err("Did not hit fault injection!\n");
1833 	if (counter != 0) {
1834 		pr_err("Invoked callback on error!\n");
1835 		err = -EIO;
1836 	}
1837 	if (err)
1838 		goto out;
1839 
1840 	counter = 0;
1841 	err = context_barrier_task(ctx, ALL_ENGINES,
1842 				   skip_unused_engines,
1843 				   NULL,
1844 				   mock_barrier_task,
1845 				   &counter);
1846 	if (err) {
1847 		pr_err("Failed at line %d, err=%d\n", __LINE__, err);
1848 		goto out;
1849 	}
1850 	mock_device_flush(i915);
1851 	if (counter == 0) {
1852 		pr_err("Did not retire on each active engines\n");
1853 		err = -EINVAL;
1854 		goto out;
1855 	}
1856 
1857 out:
1858 	mock_context_close(ctx);
1859 	return err;
1860 #undef pr_fmt
1861 #define pr_fmt(x) x
1862 }
1863 
1864 int i915_gem_context_mock_selftests(void)
1865 {
1866 	static const struct i915_subtest tests[] = {
1867 		SUBTEST(mock_context_barrier),
1868 	};
1869 	struct drm_i915_private *i915;
1870 	int err;
1871 
1872 	i915 = mock_gem_device();
1873 	if (!i915)
1874 		return -ENOMEM;
1875 
1876 	err = i915_subtests(tests, i915);
1877 
1878 	drm_dev_put(&i915->drm);
1879 	return err;
1880 }
1881 
1882 int i915_gem_context_live_selftests(struct drm_i915_private *i915)
1883 {
1884 	static const struct i915_subtest tests[] = {
1885 		SUBTEST(live_nop_switch),
1886 		SUBTEST(live_parallel_switch),
1887 		SUBTEST(igt_ctx_exec),
1888 		SUBTEST(igt_ctx_readonly),
1889 		SUBTEST(igt_ctx_sseu),
1890 		SUBTEST(igt_shared_ctx_exec),
1891 		SUBTEST(igt_vm_isolation),
1892 	};
1893 
1894 	if (intel_gt_is_wedged(&i915->gt))
1895 		return 0;
1896 
1897 	return i915_live_subtests(tests, i915);
1898 }
1899