1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 #include <linux/string_helpers.h>
9 
10 #include "gem/i915_gem_internal.h"
11 #include "gem/i915_gem_pm.h"
12 #include "gt/intel_engine_pm.h"
13 #include "gt/intel_engine_regs.h"
14 #include "gt/intel_gt.h"
15 #include "gt/intel_gt_requests.h"
16 #include "gt/intel_reset.h"
17 #include "i915_selftest.h"
18 
19 #include "gem/selftests/igt_gem_utils.h"
20 #include "selftests/i915_random.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_live_test.h"
23 #include "selftests/igt_reset.h"
24 #include "selftests/igt_spinner.h"
25 #include "selftests/mock_drm.h"
26 #include "selftests/mock_gem_device.h"
27 
28 #include "huge_gem_object.h"
29 #include "igt_gem_utils.h"
30 
31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
32 
33 static int live_nop_switch(void *arg)
34 {
35 	const unsigned int nctx = 1024;
36 	struct drm_i915_private *i915 = arg;
37 	struct intel_engine_cs *engine;
38 	struct i915_gem_context **ctx;
39 	struct igt_live_test t;
40 	struct file *file;
41 	unsigned long n;
42 	int err = -ENODEV;
43 
44 	/*
45 	 * Create as many contexts as we can feasibly get away with
46 	 * and check we can switch between them rapidly.
47 	 *
48 	 * Serves as very simple stress test for submission and HW switching
49 	 * between contexts.
50 	 */
51 
52 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
53 		return 0;
54 
55 	file = mock_file(i915);
56 	if (IS_ERR(file))
57 		return PTR_ERR(file);
58 
59 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
60 	if (!ctx) {
61 		err = -ENOMEM;
62 		goto out_file;
63 	}
64 
65 	for (n = 0; n < nctx; n++) {
66 		ctx[n] = live_context(i915, file);
67 		if (IS_ERR(ctx[n])) {
68 			err = PTR_ERR(ctx[n]);
69 			goto out_file;
70 		}
71 	}
72 
73 	for_each_uabi_engine(engine, i915) {
74 		struct i915_request *rq = NULL;
75 		unsigned long end_time, prime;
76 		ktime_t times[2] = {};
77 
78 		times[0] = ktime_get_raw();
79 		for (n = 0; n < nctx; n++) {
80 			struct i915_request *this;
81 
82 			this = igt_request_alloc(ctx[n], engine);
83 			if (IS_ERR(this)) {
84 				err = PTR_ERR(this);
85 				goto out_file;
86 			}
87 			if (rq) {
88 				i915_request_await_dma_fence(this, &rq->fence);
89 				i915_request_put(rq);
90 			}
91 			rq = i915_request_get(this);
92 			i915_request_add(this);
93 		}
94 		if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
95 			pr_err("Failed to populated %d contexts\n", nctx);
96 			intel_gt_set_wedged(to_gt(i915));
97 			i915_request_put(rq);
98 			err = -EIO;
99 			goto out_file;
100 		}
101 		i915_request_put(rq);
102 
103 		times[1] = ktime_get_raw();
104 
105 		pr_info("Populated %d contexts on %s in %lluns\n",
106 			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
107 
108 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
109 		if (err)
110 			goto out_file;
111 
112 		end_time = jiffies + i915_selftest.timeout_jiffies;
113 		for_each_prime_number_from(prime, 2, 8192) {
114 			times[1] = ktime_get_raw();
115 
116 			rq = NULL;
117 			for (n = 0; n < prime; n++) {
118 				struct i915_request *this;
119 
120 				this = igt_request_alloc(ctx[n % nctx], engine);
121 				if (IS_ERR(this)) {
122 					err = PTR_ERR(this);
123 					goto out_file;
124 				}
125 
126 				if (rq) { /* Force submission order */
127 					i915_request_await_dma_fence(this, &rq->fence);
128 					i915_request_put(rq);
129 				}
130 
131 				/*
132 				 * This space is left intentionally blank.
133 				 *
134 				 * We do not actually want to perform any
135 				 * action with this request, we just want
136 				 * to measure the latency in allocation
137 				 * and submission of our breadcrumbs -
138 				 * ensuring that the bare request is sufficient
139 				 * for the system to work (i.e. proper HEAD
140 				 * tracking of the rings, interrupt handling,
141 				 * etc). It also gives us the lowest bounds
142 				 * for latency.
143 				 */
144 
145 				rq = i915_request_get(this);
146 				i915_request_add(this);
147 			}
148 			GEM_BUG_ON(!rq);
149 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
150 				pr_err("Switching between %ld contexts timed out\n",
151 				       prime);
152 				intel_gt_set_wedged(to_gt(i915));
153 				i915_request_put(rq);
154 				break;
155 			}
156 			i915_request_put(rq);
157 
158 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
159 			if (prime == 2)
160 				times[0] = times[1];
161 
162 			if (__igt_timeout(end_time, NULL))
163 				break;
164 		}
165 
166 		err = igt_live_test_end(&t);
167 		if (err)
168 			goto out_file;
169 
170 		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
171 			engine->name,
172 			ktime_to_ns(times[0]),
173 			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
174 	}
175 
176 out_file:
177 	fput(file);
178 	return err;
179 }
180 
181 struct parallel_switch {
182 	struct kthread_worker *worker;
183 	struct kthread_work work;
184 	struct intel_context *ce[2];
185 	int result;
186 };
187 
188 static void __live_parallel_switch1(struct kthread_work *work)
189 {
190 	struct parallel_switch *arg =
191 		container_of(work, typeof(*arg), work);
192 	IGT_TIMEOUT(end_time);
193 	unsigned long count;
194 
195 	count = 0;
196 	arg->result = 0;
197 	do {
198 		struct i915_request *rq = NULL;
199 		int n;
200 
201 		for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
202 			struct i915_request *prev = rq;
203 
204 			rq = i915_request_create(arg->ce[n]);
205 			if (IS_ERR(rq)) {
206 				i915_request_put(prev);
207 				arg->result = PTR_ERR(rq);
208 				break;
209 			}
210 
211 			i915_request_get(rq);
212 			if (prev) {
213 				arg->result =
214 					i915_request_await_dma_fence(rq,
215 								     &prev->fence);
216 				i915_request_put(prev);
217 			}
218 
219 			i915_request_add(rq);
220 		}
221 
222 		if (IS_ERR_OR_NULL(rq))
223 			break;
224 
225 		if (i915_request_wait(rq, 0, HZ) < 0)
226 			arg->result = -ETIME;
227 
228 		i915_request_put(rq);
229 
230 		count++;
231 	} while (!arg->result && !__igt_timeout(end_time, NULL));
232 
233 	pr_info("%s: %lu switches (sync) <%d>\n",
234 		arg->ce[0]->engine->name, count, arg->result);
235 }
236 
237 static void __live_parallel_switchN(struct kthread_work *work)
238 {
239 	struct parallel_switch *arg =
240 		container_of(work, typeof(*arg), work);
241 	struct i915_request *rq = NULL;
242 	IGT_TIMEOUT(end_time);
243 	unsigned long count;
244 	int n;
245 
246 	count = 0;
247 	arg->result = 0;
248 	do {
249 		for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
250 			struct i915_request *prev = rq;
251 
252 			rq = i915_request_create(arg->ce[n]);
253 			if (IS_ERR(rq)) {
254 				i915_request_put(prev);
255 				arg->result = PTR_ERR(rq);
256 				break;
257 			}
258 
259 			i915_request_get(rq);
260 			if (prev) {
261 				arg->result =
262 					i915_request_await_dma_fence(rq,
263 								     &prev->fence);
264 				i915_request_put(prev);
265 			}
266 
267 			i915_request_add(rq);
268 		}
269 
270 		count++;
271 	} while (!arg->result && !__igt_timeout(end_time, NULL));
272 
273 	if (!IS_ERR_OR_NULL(rq))
274 		i915_request_put(rq);
275 
276 	pr_info("%s: %lu switches (many) <%d>\n",
277 		arg->ce[0]->engine->name, count, arg->result);
278 }
279 
280 static int live_parallel_switch(void *arg)
281 {
282 	struct drm_i915_private *i915 = arg;
283 	static void (* const func[])(struct kthread_work *) = {
284 		__live_parallel_switch1,
285 		__live_parallel_switchN,
286 		NULL,
287 	};
288 	struct parallel_switch *data = NULL;
289 	struct i915_gem_engines *engines;
290 	struct i915_gem_engines_iter it;
291 	void (* const *fn)(struct kthread_work *);
292 	struct i915_gem_context *ctx;
293 	struct intel_context *ce;
294 	struct file *file;
295 	int n, m, count;
296 	int err = 0;
297 
298 	/*
299 	 * Check we can process switches on all engines simultaneously.
300 	 */
301 
302 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
303 		return 0;
304 
305 	file = mock_file(i915);
306 	if (IS_ERR(file))
307 		return PTR_ERR(file);
308 
309 	ctx = live_context(i915, file);
310 	if (IS_ERR(ctx)) {
311 		err = PTR_ERR(ctx);
312 		goto out_file;
313 	}
314 
315 	engines = i915_gem_context_lock_engines(ctx);
316 	count = engines->num_engines;
317 
318 	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
319 	if (!data) {
320 		i915_gem_context_unlock_engines(ctx);
321 		err = -ENOMEM;
322 		goto out_file;
323 	}
324 
325 	m = 0; /* Use the first context as our template for the engines */
326 	for_each_gem_engine(ce, engines, it) {
327 		err = intel_context_pin(ce);
328 		if (err) {
329 			i915_gem_context_unlock_engines(ctx);
330 			goto out;
331 		}
332 		data[m++].ce[0] = intel_context_get(ce);
333 	}
334 	i915_gem_context_unlock_engines(ctx);
335 
336 	/* Clone the same set of engines into the other contexts */
337 	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
338 		ctx = live_context(i915, file);
339 		if (IS_ERR(ctx)) {
340 			err = PTR_ERR(ctx);
341 			goto out;
342 		}
343 
344 		for (m = 0; m < count; m++) {
345 			if (!data[m].ce[0])
346 				continue;
347 
348 			ce = intel_context_create(data[m].ce[0]->engine);
349 			if (IS_ERR(ce))
350 				goto out;
351 
352 			err = intel_context_pin(ce);
353 			if (err) {
354 				intel_context_put(ce);
355 				goto out;
356 			}
357 
358 			data[m].ce[n] = ce;
359 		}
360 	}
361 
362 	for (n = 0; n < count; n++) {
363 		struct kthread_worker *worker;
364 
365 		if (!data[n].ce[0])
366 			continue;
367 
368 		worker = kthread_create_worker(0, "igt/parallel:%s",
369 					       data[n].ce[0]->engine->name);
370 		if (IS_ERR(worker))
371 			goto out;
372 
373 		data[n].worker = worker;
374 	}
375 
376 	for (fn = func; !err && *fn; fn++) {
377 		struct igt_live_test t;
378 
379 		err = igt_live_test_begin(&t, i915, __func__, "");
380 		if (err)
381 			break;
382 
383 		for (n = 0; n < count; n++) {
384 			if (!data[n].ce[0])
385 				continue;
386 
387 			data[n].result = 0;
388 			kthread_init_work(&data[n].work, *fn);
389 			kthread_queue_work(data[n].worker, &data[n].work);
390 		}
391 
392 		for (n = 0; n < count; n++) {
393 			if (data[n].ce[0]) {
394 				kthread_flush_work(&data[n].work);
395 				if (data[n].result && !err)
396 					err = data[n].result;
397 			}
398 		}
399 
400 		if (igt_live_test_end(&t))
401 			err = -EIO;
402 	}
403 
404 out:
405 	for (n = 0; n < count; n++) {
406 		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
407 			if (!data[n].ce[m])
408 				continue;
409 
410 			intel_context_unpin(data[n].ce[m]);
411 			intel_context_put(data[n].ce[m]);
412 		}
413 
414 		if (data[n].worker)
415 			kthread_destroy_worker(data[n].worker);
416 	}
417 	kfree(data);
418 out_file:
419 	fput(file);
420 	return err;
421 }
422 
423 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
424 {
425 	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
426 }
427 
428 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
429 {
430 	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
431 }
432 
433 static int gpu_fill(struct intel_context *ce,
434 		    struct drm_i915_gem_object *obj,
435 		    unsigned int dw)
436 {
437 	struct i915_vma *vma;
438 	int err;
439 
440 	GEM_BUG_ON(obj->base.size > ce->vm->total);
441 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
442 
443 	vma = i915_vma_instance(obj, ce->vm, NULL);
444 	if (IS_ERR(vma))
445 		return PTR_ERR(vma);
446 
447 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
448 	if (err)
449 		return err;
450 
451 	/*
452 	 * Within the GTT the huge objects maps every page onto
453 	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
454 	 * We set the nth dword within the page using the nth
455 	 * mapping via the GTT - this should exercise the GTT mapping
456 	 * whilst checking that each context provides a unique view
457 	 * into the object.
458 	 */
459 	err = igt_gpu_fill_dw(ce, vma,
460 			      (dw * real_page_count(obj)) << PAGE_SHIFT |
461 			      (dw * sizeof(u32)),
462 			      real_page_count(obj),
463 			      dw);
464 	i915_vma_unpin(vma);
465 
466 	return err;
467 }
468 
469 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
470 {
471 	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
472 	unsigned int n, m, need_flush;
473 	int err;
474 
475 	i915_gem_object_lock(obj, NULL);
476 	err = i915_gem_object_prepare_write(obj, &need_flush);
477 	if (err)
478 		goto out;
479 
480 	for (n = 0; n < real_page_count(obj); n++) {
481 		u32 *map;
482 
483 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
484 		for (m = 0; m < DW_PER_PAGE; m++)
485 			map[m] = value;
486 		if (!has_llc)
487 			drm_clflush_virt_range(map, PAGE_SIZE);
488 		kunmap_atomic(map);
489 	}
490 
491 	i915_gem_object_finish_access(obj);
492 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
493 	obj->write_domain = 0;
494 out:
495 	i915_gem_object_unlock(obj);
496 	return err;
497 }
498 
499 static noinline int cpu_check(struct drm_i915_gem_object *obj,
500 			      unsigned int idx, unsigned int max)
501 {
502 	unsigned int n, m, needs_flush;
503 	int err;
504 
505 	i915_gem_object_lock(obj, NULL);
506 	err = i915_gem_object_prepare_read(obj, &needs_flush);
507 	if (err)
508 		goto out_unlock;
509 
510 	for (n = 0; n < real_page_count(obj); n++) {
511 		u32 *map;
512 
513 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
514 		if (needs_flush & CLFLUSH_BEFORE)
515 			drm_clflush_virt_range(map, PAGE_SIZE);
516 
517 		for (m = 0; m < max; m++) {
518 			if (map[m] != m) {
519 				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
520 				       __builtin_return_address(0), idx,
521 				       n, real_page_count(obj), m, max,
522 				       map[m], m);
523 				err = -EINVAL;
524 				goto out_unmap;
525 			}
526 		}
527 
528 		for (; m < DW_PER_PAGE; m++) {
529 			if (map[m] != STACK_MAGIC) {
530 				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
531 				       __builtin_return_address(0), idx, n, m,
532 				       map[m], STACK_MAGIC);
533 				err = -EINVAL;
534 				goto out_unmap;
535 			}
536 		}
537 
538 out_unmap:
539 		kunmap_atomic(map);
540 		if (err)
541 			break;
542 	}
543 
544 	i915_gem_object_finish_access(obj);
545 out_unlock:
546 	i915_gem_object_unlock(obj);
547 	return err;
548 }
549 
550 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
551 {
552 	int err;
553 
554 	GEM_BUG_ON(obj->base.handle_count);
555 
556 	/* tie the object to the drm_file for easy reaping */
557 	err = idr_alloc(&to_drm_file(file)->object_idr,
558 			&obj->base, 1, 0, GFP_KERNEL);
559 	if (err < 0)
560 		return err;
561 
562 	i915_gem_object_get(obj);
563 	obj->base.handle_count++;
564 	return 0;
565 }
566 
567 static struct drm_i915_gem_object *
568 create_test_object(struct i915_address_space *vm,
569 		   struct file *file,
570 		   struct list_head *objects)
571 {
572 	struct drm_i915_gem_object *obj;
573 	u64 size;
574 	int err;
575 
576 	/* Keep in GEM's good graces */
577 	intel_gt_retire_requests(vm->gt);
578 
579 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
580 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
581 
582 	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
583 	if (IS_ERR(obj))
584 		return obj;
585 
586 	err = file_add_object(file, obj);
587 	i915_gem_object_put(obj);
588 	if (err)
589 		return ERR_PTR(err);
590 
591 	err = cpu_fill(obj, STACK_MAGIC);
592 	if (err) {
593 		pr_err("Failed to fill object with cpu, err=%d\n",
594 		       err);
595 		return ERR_PTR(err);
596 	}
597 
598 	list_add_tail(&obj->st_link, objects);
599 	return obj;
600 }
601 
602 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
603 {
604 	unsigned long npages = fake_page_count(obj);
605 
606 	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
607 	return npages / DW_PER_PAGE;
608 }
609 
610 static void throttle_release(struct i915_request **q, int count)
611 {
612 	int i;
613 
614 	for (i = 0; i < count; i++) {
615 		if (IS_ERR_OR_NULL(q[i]))
616 			continue;
617 
618 		i915_request_put(fetch_and_zero(&q[i]));
619 	}
620 }
621 
622 static int throttle(struct intel_context *ce,
623 		    struct i915_request **q, int count)
624 {
625 	int i;
626 
627 	if (!IS_ERR_OR_NULL(q[0])) {
628 		if (i915_request_wait(q[0],
629 				      I915_WAIT_INTERRUPTIBLE,
630 				      MAX_SCHEDULE_TIMEOUT) < 0)
631 			return -EINTR;
632 
633 		i915_request_put(q[0]);
634 	}
635 
636 	for (i = 0; i < count - 1; i++)
637 		q[i] = q[i + 1];
638 
639 	q[i] = intel_context_create_request(ce);
640 	if (IS_ERR(q[i]))
641 		return PTR_ERR(q[i]);
642 
643 	i915_request_get(q[i]);
644 	i915_request_add(q[i]);
645 
646 	return 0;
647 }
648 
649 static int igt_ctx_exec(void *arg)
650 {
651 	struct drm_i915_private *i915 = arg;
652 	struct intel_engine_cs *engine;
653 	int err = -ENODEV;
654 
655 	/*
656 	 * Create a few different contexts (with different mm) and write
657 	 * through each ctx/mm using the GPU making sure those writes end
658 	 * up in the expected pages of our obj.
659 	 */
660 
661 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
662 		return 0;
663 
664 	for_each_uabi_engine(engine, i915) {
665 		struct drm_i915_gem_object *obj = NULL;
666 		unsigned long ncontexts, ndwords, dw;
667 		struct i915_request *tq[5] = {};
668 		struct igt_live_test t;
669 		IGT_TIMEOUT(end_time);
670 		LIST_HEAD(objects);
671 		struct file *file;
672 
673 		if (!intel_engine_can_store_dword(engine))
674 			continue;
675 
676 		if (!engine->context_size)
677 			continue; /* No logical context support in HW */
678 
679 		file = mock_file(i915);
680 		if (IS_ERR(file))
681 			return PTR_ERR(file);
682 
683 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
684 		if (err)
685 			goto out_file;
686 
687 		ncontexts = 0;
688 		ndwords = 0;
689 		dw = 0;
690 		while (!time_after(jiffies, end_time)) {
691 			struct i915_gem_context *ctx;
692 			struct intel_context *ce;
693 
694 			ctx = kernel_context(i915, NULL);
695 			if (IS_ERR(ctx)) {
696 				err = PTR_ERR(ctx);
697 				goto out_file;
698 			}
699 
700 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
701 			GEM_BUG_ON(IS_ERR(ce));
702 
703 			if (!obj) {
704 				obj = create_test_object(ce->vm, file, &objects);
705 				if (IS_ERR(obj)) {
706 					err = PTR_ERR(obj);
707 					intel_context_put(ce);
708 					kernel_context_close(ctx);
709 					goto out_file;
710 				}
711 			}
712 
713 			err = gpu_fill(ce, obj, dw);
714 			if (err) {
715 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
716 				       ndwords, dw, max_dwords(obj),
717 				       engine->name,
718 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
719 				       err);
720 				intel_context_put(ce);
721 				kernel_context_close(ctx);
722 				goto out_file;
723 			}
724 
725 			err = throttle(ce, tq, ARRAY_SIZE(tq));
726 			if (err) {
727 				intel_context_put(ce);
728 				kernel_context_close(ctx);
729 				goto out_file;
730 			}
731 
732 			if (++dw == max_dwords(obj)) {
733 				obj = NULL;
734 				dw = 0;
735 			}
736 
737 			ndwords++;
738 			ncontexts++;
739 
740 			intel_context_put(ce);
741 			kernel_context_close(ctx);
742 		}
743 
744 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
745 			ncontexts, engine->name, ndwords);
746 
747 		ncontexts = dw = 0;
748 		list_for_each_entry(obj, &objects, st_link) {
749 			unsigned int rem =
750 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
751 
752 			err = cpu_check(obj, ncontexts++, rem);
753 			if (err)
754 				break;
755 
756 			dw += rem;
757 		}
758 
759 out_file:
760 		throttle_release(tq, ARRAY_SIZE(tq));
761 		if (igt_live_test_end(&t))
762 			err = -EIO;
763 
764 		fput(file);
765 		if (err)
766 			return err;
767 
768 		i915_gem_drain_freed_objects(i915);
769 	}
770 
771 	return 0;
772 }
773 
774 static int igt_shared_ctx_exec(void *arg)
775 {
776 	struct drm_i915_private *i915 = arg;
777 	struct i915_request *tq[5] = {};
778 	struct i915_gem_context *parent;
779 	struct intel_engine_cs *engine;
780 	struct igt_live_test t;
781 	struct file *file;
782 	int err = 0;
783 
784 	/*
785 	 * Create a few different contexts with the same mm and write
786 	 * through each ctx using the GPU making sure those writes end
787 	 * up in the expected pages of our obj.
788 	 */
789 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
790 		return 0;
791 
792 	file = mock_file(i915);
793 	if (IS_ERR(file))
794 		return PTR_ERR(file);
795 
796 	parent = live_context(i915, file);
797 	if (IS_ERR(parent)) {
798 		err = PTR_ERR(parent);
799 		goto out_file;
800 	}
801 
802 	if (!parent->vm) { /* not full-ppgtt; nothing to share */
803 		err = 0;
804 		goto out_file;
805 	}
806 
807 	err = igt_live_test_begin(&t, i915, __func__, "");
808 	if (err)
809 		goto out_file;
810 
811 	for_each_uabi_engine(engine, i915) {
812 		unsigned long ncontexts, ndwords, dw;
813 		struct drm_i915_gem_object *obj = NULL;
814 		IGT_TIMEOUT(end_time);
815 		LIST_HEAD(objects);
816 
817 		if (!intel_engine_can_store_dword(engine))
818 			continue;
819 
820 		dw = 0;
821 		ndwords = 0;
822 		ncontexts = 0;
823 		while (!time_after(jiffies, end_time)) {
824 			struct i915_gem_context *ctx;
825 			struct intel_context *ce;
826 
827 			ctx = kernel_context(i915, parent->vm);
828 			if (IS_ERR(ctx)) {
829 				err = PTR_ERR(ctx);
830 				goto out_test;
831 			}
832 
833 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
834 			GEM_BUG_ON(IS_ERR(ce));
835 
836 			if (!obj) {
837 				obj = create_test_object(parent->vm,
838 							 file, &objects);
839 				if (IS_ERR(obj)) {
840 					err = PTR_ERR(obj);
841 					intel_context_put(ce);
842 					kernel_context_close(ctx);
843 					goto out_test;
844 				}
845 			}
846 
847 			err = gpu_fill(ce, obj, dw);
848 			if (err) {
849 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
850 				       ndwords, dw, max_dwords(obj),
851 				       engine->name,
852 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
853 				       err);
854 				intel_context_put(ce);
855 				kernel_context_close(ctx);
856 				goto out_test;
857 			}
858 
859 			err = throttle(ce, tq, ARRAY_SIZE(tq));
860 			if (err) {
861 				intel_context_put(ce);
862 				kernel_context_close(ctx);
863 				goto out_test;
864 			}
865 
866 			if (++dw == max_dwords(obj)) {
867 				obj = NULL;
868 				dw = 0;
869 			}
870 
871 			ndwords++;
872 			ncontexts++;
873 
874 			intel_context_put(ce);
875 			kernel_context_close(ctx);
876 		}
877 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
878 			ncontexts, engine->name, ndwords);
879 
880 		ncontexts = dw = 0;
881 		list_for_each_entry(obj, &objects, st_link) {
882 			unsigned int rem =
883 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
884 
885 			err = cpu_check(obj, ncontexts++, rem);
886 			if (err)
887 				goto out_test;
888 
889 			dw += rem;
890 		}
891 
892 		i915_gem_drain_freed_objects(i915);
893 	}
894 out_test:
895 	throttle_release(tq, ARRAY_SIZE(tq));
896 	if (igt_live_test_end(&t))
897 		err = -EIO;
898 out_file:
899 	fput(file);
900 	return err;
901 }
902 
903 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs,
904 			    struct i915_vma *vma,
905 			    struct intel_engine_cs *engine)
906 {
907 	u32 *cmd;
908 
909 	GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8);
910 
911 	cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
912 	if (IS_ERR(cmd))
913 		return PTR_ERR(cmd);
914 
915 	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
916 	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base));
917 	*cmd++ = lower_32_bits(vma->node.start);
918 	*cmd++ = upper_32_bits(vma->node.start);
919 	*cmd = MI_BATCH_BUFFER_END;
920 
921 	__i915_gem_object_flush_map(rpcs, 0, 64);
922 	i915_gem_object_unpin_map(rpcs);
923 
924 	intel_gt_chipset_flush(vma->vm->gt);
925 
926 	return 0;
927 }
928 
929 static int
930 emit_rpcs_query(struct drm_i915_gem_object *obj,
931 		struct intel_context *ce,
932 		struct i915_request **rq_out)
933 {
934 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
935 	struct i915_request *rq;
936 	struct i915_gem_ww_ctx ww;
937 	struct i915_vma *batch;
938 	struct i915_vma *vma;
939 	struct drm_i915_gem_object *rpcs;
940 	int err;
941 
942 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
943 
944 	if (GRAPHICS_VER(i915) < 8)
945 		return -EINVAL;
946 
947 	vma = i915_vma_instance(obj, ce->vm, NULL);
948 	if (IS_ERR(vma))
949 		return PTR_ERR(vma);
950 
951 	rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
952 	if (IS_ERR(rpcs))
953 		return PTR_ERR(rpcs);
954 
955 	batch = i915_vma_instance(rpcs, ce->vm, NULL);
956 	if (IS_ERR(batch)) {
957 		err = PTR_ERR(batch);
958 		goto err_put;
959 	}
960 
961 	i915_gem_ww_ctx_init(&ww, false);
962 retry:
963 	err = i915_gem_object_lock(obj, &ww);
964 	if (!err)
965 		err = i915_gem_object_lock(rpcs, &ww);
966 	if (!err)
967 		err = i915_gem_object_set_to_gtt_domain(obj, false);
968 	if (!err)
969 		err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
970 	if (err)
971 		goto err_put;
972 
973 	err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
974 	if (err)
975 		goto err_vma;
976 
977 	err = rpcs_query_batch(rpcs, vma, ce->engine);
978 	if (err)
979 		goto err_batch;
980 
981 	rq = i915_request_create(ce);
982 	if (IS_ERR(rq)) {
983 		err = PTR_ERR(rq);
984 		goto err_batch;
985 	}
986 
987 	err = i915_request_await_object(rq, batch->obj, false);
988 	if (err == 0)
989 		err = i915_vma_move_to_active(batch, rq, 0);
990 	if (err)
991 		goto skip_request;
992 
993 	err = i915_request_await_object(rq, vma->obj, true);
994 	if (err == 0)
995 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
996 	if (err)
997 		goto skip_request;
998 
999 	if (rq->engine->emit_init_breadcrumb) {
1000 		err = rq->engine->emit_init_breadcrumb(rq);
1001 		if (err)
1002 			goto skip_request;
1003 	}
1004 
1005 	err = rq->engine->emit_bb_start(rq,
1006 					batch->node.start, batch->node.size,
1007 					0);
1008 	if (err)
1009 		goto skip_request;
1010 
1011 	*rq_out = i915_request_get(rq);
1012 
1013 skip_request:
1014 	if (err)
1015 		i915_request_set_error_once(rq, err);
1016 	i915_request_add(rq);
1017 err_batch:
1018 	i915_vma_unpin(batch);
1019 err_vma:
1020 	i915_vma_unpin(vma);
1021 err_put:
1022 	if (err == -EDEADLK) {
1023 		err = i915_gem_ww_ctx_backoff(&ww);
1024 		if (!err)
1025 			goto retry;
1026 	}
1027 	i915_gem_ww_ctx_fini(&ww);
1028 	i915_gem_object_put(rpcs);
1029 	return err;
1030 }
1031 
1032 #define TEST_IDLE	BIT(0)
1033 #define TEST_BUSY	BIT(1)
1034 #define TEST_RESET	BIT(2)
1035 
1036 static int
1037 __sseu_prepare(const char *name,
1038 	       unsigned int flags,
1039 	       struct intel_context *ce,
1040 	       struct igt_spinner **spin)
1041 {
1042 	struct i915_request *rq;
1043 	int ret;
1044 
1045 	*spin = NULL;
1046 	if (!(flags & (TEST_BUSY | TEST_RESET)))
1047 		return 0;
1048 
1049 	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1050 	if (!*spin)
1051 		return -ENOMEM;
1052 
1053 	ret = igt_spinner_init(*spin, ce->engine->gt);
1054 	if (ret)
1055 		goto err_free;
1056 
1057 	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1058 	if (IS_ERR(rq)) {
1059 		ret = PTR_ERR(rq);
1060 		goto err_fini;
1061 	}
1062 
1063 	i915_request_add(rq);
1064 
1065 	if (!igt_wait_for_spinner(*spin, rq)) {
1066 		pr_err("%s: Spinner failed to start!\n", name);
1067 		ret = -ETIMEDOUT;
1068 		goto err_end;
1069 	}
1070 
1071 	return 0;
1072 
1073 err_end:
1074 	igt_spinner_end(*spin);
1075 err_fini:
1076 	igt_spinner_fini(*spin);
1077 err_free:
1078 	kfree(fetch_and_zero(spin));
1079 	return ret;
1080 }
1081 
1082 static int
1083 __read_slice_count(struct intel_context *ce,
1084 		   struct drm_i915_gem_object *obj,
1085 		   struct igt_spinner *spin,
1086 		   u32 *rpcs)
1087 {
1088 	struct i915_request *rq = NULL;
1089 	u32 s_mask, s_shift;
1090 	unsigned int cnt;
1091 	u32 *buf, val;
1092 	long ret;
1093 
1094 	ret = emit_rpcs_query(obj, ce, &rq);
1095 	if (ret)
1096 		return ret;
1097 
1098 	if (spin)
1099 		igt_spinner_end(spin);
1100 
1101 	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1102 	i915_request_put(rq);
1103 	if (ret < 0)
1104 		return ret;
1105 
1106 	buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1107 	if (IS_ERR(buf)) {
1108 		ret = PTR_ERR(buf);
1109 		return ret;
1110 	}
1111 
1112 	if (GRAPHICS_VER(ce->engine->i915) >= 11) {
1113 		s_mask = GEN11_RPCS_S_CNT_MASK;
1114 		s_shift = GEN11_RPCS_S_CNT_SHIFT;
1115 	} else {
1116 		s_mask = GEN8_RPCS_S_CNT_MASK;
1117 		s_shift = GEN8_RPCS_S_CNT_SHIFT;
1118 	}
1119 
1120 	val = *buf;
1121 	cnt = (val & s_mask) >> s_shift;
1122 	*rpcs = val;
1123 
1124 	i915_gem_object_unpin_map(obj);
1125 
1126 	return cnt;
1127 }
1128 
1129 static int
1130 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1131 	     const char *prefix, const char *suffix)
1132 {
1133 	if (slices == expected)
1134 		return 0;
1135 
1136 	if (slices < 0) {
1137 		pr_err("%s: %s read slice count failed with %d%s\n",
1138 		       name, prefix, slices, suffix);
1139 		return slices;
1140 	}
1141 
1142 	pr_err("%s: %s slice count %d is not %u%s\n",
1143 	       name, prefix, slices, expected, suffix);
1144 
1145 	pr_info("RPCS=0x%x; %u%sx%u%s\n",
1146 		rpcs, slices,
1147 		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1148 		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1149 		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1150 
1151 	return -EINVAL;
1152 }
1153 
1154 static int
1155 __sseu_finish(const char *name,
1156 	      unsigned int flags,
1157 	      struct intel_context *ce,
1158 	      struct drm_i915_gem_object *obj,
1159 	      unsigned int expected,
1160 	      struct igt_spinner *spin)
1161 {
1162 	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1163 	u32 rpcs = 0;
1164 	int ret = 0;
1165 
1166 	if (flags & TEST_RESET) {
1167 		ret = intel_engine_reset(ce->engine, "sseu");
1168 		if (ret)
1169 			goto out;
1170 	}
1171 
1172 	ret = __read_slice_count(ce, obj,
1173 				 flags & TEST_RESET ? NULL : spin, &rpcs);
1174 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1175 	if (ret)
1176 		goto out;
1177 
1178 	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1179 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1180 
1181 out:
1182 	if (spin)
1183 		igt_spinner_end(spin);
1184 
1185 	if ((flags & TEST_IDLE) && ret == 0) {
1186 		ret = igt_flush_test(ce->engine->i915);
1187 		if (ret)
1188 			return ret;
1189 
1190 		ret = __read_slice_count(ce, obj, NULL, &rpcs);
1191 		ret = __check_rpcs(name, rpcs, ret, expected,
1192 				   "Context", " after idle!");
1193 	}
1194 
1195 	return ret;
1196 }
1197 
1198 static int
1199 __sseu_test(const char *name,
1200 	    unsigned int flags,
1201 	    struct intel_context *ce,
1202 	    struct drm_i915_gem_object *obj,
1203 	    struct intel_sseu sseu)
1204 {
1205 	struct igt_spinner *spin = NULL;
1206 	int ret;
1207 
1208 	intel_engine_pm_get(ce->engine);
1209 
1210 	ret = __sseu_prepare(name, flags, ce, &spin);
1211 	if (ret)
1212 		goto out_pm;
1213 
1214 	ret = intel_context_reconfigure_sseu(ce, sseu);
1215 	if (ret)
1216 		goto out_spin;
1217 
1218 	ret = __sseu_finish(name, flags, ce, obj,
1219 			    hweight32(sseu.slice_mask), spin);
1220 
1221 out_spin:
1222 	if (spin) {
1223 		igt_spinner_end(spin);
1224 		igt_spinner_fini(spin);
1225 		kfree(spin);
1226 	}
1227 out_pm:
1228 	intel_engine_pm_put(ce->engine);
1229 	return ret;
1230 }
1231 
1232 static int
1233 __igt_ctx_sseu(struct drm_i915_private *i915,
1234 	       const char *name,
1235 	       unsigned int flags)
1236 {
1237 	struct drm_i915_gem_object *obj;
1238 	int inst = 0;
1239 	int ret = 0;
1240 
1241 	if (GRAPHICS_VER(i915) < 9)
1242 		return 0;
1243 
1244 	if (flags & TEST_RESET)
1245 		igt_global_reset_lock(to_gt(i915));
1246 
1247 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1248 	if (IS_ERR(obj)) {
1249 		ret = PTR_ERR(obj);
1250 		goto out_unlock;
1251 	}
1252 
1253 	do {
1254 		struct intel_engine_cs *engine;
1255 		struct intel_context *ce;
1256 		struct intel_sseu pg_sseu;
1257 
1258 		engine = intel_engine_lookup_user(i915,
1259 						  I915_ENGINE_CLASS_RENDER,
1260 						  inst++);
1261 		if (!engine)
1262 			break;
1263 
1264 		if (hweight32(engine->sseu.slice_mask) < 2)
1265 			continue;
1266 
1267 		if (!engine->gt->info.sseu.has_slice_pg)
1268 			continue;
1269 
1270 		/*
1271 		 * Gen11 VME friendly power-gated configuration with
1272 		 * half enabled sub-slices.
1273 		 */
1274 		pg_sseu = engine->sseu;
1275 		pg_sseu.slice_mask = 1;
1276 		pg_sseu.subslice_mask =
1277 			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1278 
1279 		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1280 			engine->name, name, flags,
1281 			hweight32(engine->sseu.slice_mask),
1282 			hweight32(pg_sseu.slice_mask));
1283 
1284 		ce = intel_context_create(engine);
1285 		if (IS_ERR(ce)) {
1286 			ret = PTR_ERR(ce);
1287 			goto out_put;
1288 		}
1289 
1290 		ret = intel_context_pin(ce);
1291 		if (ret)
1292 			goto out_ce;
1293 
1294 		/* First set the default mask. */
1295 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1296 		if (ret)
1297 			goto out_unpin;
1298 
1299 		/* Then set a power-gated configuration. */
1300 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1301 		if (ret)
1302 			goto out_unpin;
1303 
1304 		/* Back to defaults. */
1305 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1306 		if (ret)
1307 			goto out_unpin;
1308 
1309 		/* One last power-gated configuration for the road. */
1310 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1311 		if (ret)
1312 			goto out_unpin;
1313 
1314 out_unpin:
1315 		intel_context_unpin(ce);
1316 out_ce:
1317 		intel_context_put(ce);
1318 	} while (!ret);
1319 
1320 	if (igt_flush_test(i915))
1321 		ret = -EIO;
1322 
1323 out_put:
1324 	i915_gem_object_put(obj);
1325 
1326 out_unlock:
1327 	if (flags & TEST_RESET)
1328 		igt_global_reset_unlock(to_gt(i915));
1329 
1330 	if (ret)
1331 		pr_err("%s: Failed with %d!\n", name, ret);
1332 
1333 	return ret;
1334 }
1335 
1336 static int igt_ctx_sseu(void *arg)
1337 {
1338 	struct {
1339 		const char *name;
1340 		unsigned int flags;
1341 	} *phase, phases[] = {
1342 		{ .name = "basic", .flags = 0 },
1343 		{ .name = "idle", .flags = TEST_IDLE },
1344 		{ .name = "busy", .flags = TEST_BUSY },
1345 		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1346 		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1347 		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1348 	};
1349 	unsigned int i;
1350 	int ret = 0;
1351 
1352 	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1353 	     i++, phase++)
1354 		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1355 
1356 	return ret;
1357 }
1358 
1359 static int igt_ctx_readonly(void *arg)
1360 {
1361 	struct drm_i915_private *i915 = arg;
1362 	unsigned long idx, ndwords, dw, num_engines;
1363 	struct drm_i915_gem_object *obj = NULL;
1364 	struct i915_request *tq[5] = {};
1365 	struct i915_gem_engines_iter it;
1366 	struct i915_address_space *vm;
1367 	struct i915_gem_context *ctx;
1368 	struct intel_context *ce;
1369 	struct igt_live_test t;
1370 	I915_RND_STATE(prng);
1371 	IGT_TIMEOUT(end_time);
1372 	LIST_HEAD(objects);
1373 	struct file *file;
1374 	int err = -ENODEV;
1375 
1376 	/*
1377 	 * Create a few read-only objects (with the occasional writable object)
1378 	 * and try to write into these object checking that the GPU discards
1379 	 * any write to a read-only object.
1380 	 */
1381 
1382 	file = mock_file(i915);
1383 	if (IS_ERR(file))
1384 		return PTR_ERR(file);
1385 
1386 	err = igt_live_test_begin(&t, i915, __func__, "");
1387 	if (err)
1388 		goto out_file;
1389 
1390 	ctx = live_context(i915, file);
1391 	if (IS_ERR(ctx)) {
1392 		err = PTR_ERR(ctx);
1393 		goto out_file;
1394 	}
1395 
1396 	vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
1397 	if (!vm || !vm->has_read_only) {
1398 		err = 0;
1399 		goto out_file;
1400 	}
1401 
1402 	num_engines = 0;
1403 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
1404 		if (intel_engine_can_store_dword(ce->engine))
1405 			num_engines++;
1406 	i915_gem_context_unlock_engines(ctx);
1407 
1408 	ndwords = 0;
1409 	dw = 0;
1410 	while (!time_after(jiffies, end_time)) {
1411 		for_each_gem_engine(ce,
1412 				    i915_gem_context_lock_engines(ctx), it) {
1413 			if (!intel_engine_can_store_dword(ce->engine))
1414 				continue;
1415 
1416 			if (!obj) {
1417 				obj = create_test_object(ce->vm, file, &objects);
1418 				if (IS_ERR(obj)) {
1419 					err = PTR_ERR(obj);
1420 					i915_gem_context_unlock_engines(ctx);
1421 					goto out_file;
1422 				}
1423 
1424 				if (prandom_u32_state(&prng) & 1)
1425 					i915_gem_object_set_readonly(obj);
1426 			}
1427 
1428 			err = gpu_fill(ce, obj, dw);
1429 			if (err) {
1430 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1431 				       ndwords, dw, max_dwords(obj),
1432 				       ce->engine->name,
1433 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
1434 				       err);
1435 				i915_gem_context_unlock_engines(ctx);
1436 				goto out_file;
1437 			}
1438 
1439 			err = throttle(ce, tq, ARRAY_SIZE(tq));
1440 			if (err) {
1441 				i915_gem_context_unlock_engines(ctx);
1442 				goto out_file;
1443 			}
1444 
1445 			if (++dw == max_dwords(obj)) {
1446 				obj = NULL;
1447 				dw = 0;
1448 			}
1449 			ndwords++;
1450 		}
1451 		i915_gem_context_unlock_engines(ctx);
1452 	}
1453 	pr_info("Submitted %lu dwords (across %lu engines)\n",
1454 		ndwords, num_engines);
1455 
1456 	dw = 0;
1457 	idx = 0;
1458 	list_for_each_entry(obj, &objects, st_link) {
1459 		unsigned int rem =
1460 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
1461 		unsigned int num_writes;
1462 
1463 		num_writes = rem;
1464 		if (i915_gem_object_is_readonly(obj))
1465 			num_writes = 0;
1466 
1467 		err = cpu_check(obj, idx++, num_writes);
1468 		if (err)
1469 			break;
1470 
1471 		dw += rem;
1472 	}
1473 
1474 out_file:
1475 	throttle_release(tq, ARRAY_SIZE(tq));
1476 	if (igt_live_test_end(&t))
1477 		err = -EIO;
1478 
1479 	fput(file);
1480 	return err;
1481 }
1482 
1483 static int check_scratch(struct i915_address_space *vm, u64 offset)
1484 {
1485 	struct drm_mm_node *node;
1486 
1487 	mutex_lock(&vm->mutex);
1488 	node = __drm_mm_interval_first(&vm->mm,
1489 				       offset, offset + sizeof(u32) - 1);
1490 	mutex_unlock(&vm->mutex);
1491 	if (!node || node->start > offset)
1492 		return 0;
1493 
1494 	GEM_BUG_ON(offset >= node->start + node->size);
1495 
1496 	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1497 	       upper_32_bits(offset), lower_32_bits(offset));
1498 	return -EINVAL;
1499 }
1500 
1501 static int write_to_scratch(struct i915_gem_context *ctx,
1502 			    struct intel_engine_cs *engine,
1503 			    struct drm_i915_gem_object *obj,
1504 			    u64 offset, u32 value)
1505 {
1506 	struct drm_i915_private *i915 = ctx->i915;
1507 	struct i915_address_space *vm;
1508 	struct i915_request *rq;
1509 	struct i915_vma *vma;
1510 	u32 *cmd;
1511 	int err;
1512 
1513 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1514 
1515 	err = check_scratch(ctx->vm, offset);
1516 	if (err)
1517 		return err;
1518 
1519 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1520 	if (IS_ERR(cmd))
1521 		return PTR_ERR(cmd);
1522 
1523 	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
1524 	if (GRAPHICS_VER(i915) >= 8) {
1525 		*cmd++ = lower_32_bits(offset);
1526 		*cmd++ = upper_32_bits(offset);
1527 	} else {
1528 		*cmd++ = 0;
1529 		*cmd++ = offset;
1530 	}
1531 	*cmd++ = value;
1532 	*cmd = MI_BATCH_BUFFER_END;
1533 	__i915_gem_object_flush_map(obj, 0, 64);
1534 	i915_gem_object_unpin_map(obj);
1535 
1536 	intel_gt_chipset_flush(engine->gt);
1537 
1538 	vm = i915_gem_context_get_eb_vm(ctx);
1539 	vma = i915_vma_instance(obj, vm, NULL);
1540 	if (IS_ERR(vma)) {
1541 		err = PTR_ERR(vma);
1542 		goto out_vm;
1543 	}
1544 
1545 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1546 	if (err)
1547 		goto out_vm;
1548 
1549 	rq = igt_request_alloc(ctx, engine);
1550 	if (IS_ERR(rq)) {
1551 		err = PTR_ERR(rq);
1552 		goto err_unpin;
1553 	}
1554 
1555 	i915_vma_lock(vma);
1556 	err = i915_request_await_object(rq, vma->obj, false);
1557 	if (err == 0)
1558 		err = i915_vma_move_to_active(vma, rq, 0);
1559 	i915_vma_unlock(vma);
1560 	if (err)
1561 		goto skip_request;
1562 
1563 	if (rq->engine->emit_init_breadcrumb) {
1564 		err = rq->engine->emit_init_breadcrumb(rq);
1565 		if (err)
1566 			goto skip_request;
1567 	}
1568 
1569 	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
1570 	if (err)
1571 		goto skip_request;
1572 
1573 	i915_vma_unpin(vma);
1574 
1575 	i915_request_add(rq);
1576 
1577 	goto out_vm;
1578 skip_request:
1579 	i915_request_set_error_once(rq, err);
1580 	i915_request_add(rq);
1581 err_unpin:
1582 	i915_vma_unpin(vma);
1583 out_vm:
1584 	i915_vm_put(vm);
1585 
1586 	if (!err)
1587 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1588 
1589 	return err;
1590 }
1591 
1592 static int read_from_scratch(struct i915_gem_context *ctx,
1593 			     struct intel_engine_cs *engine,
1594 			     struct drm_i915_gem_object *obj,
1595 			     u64 offset, u32 *value)
1596 {
1597 	struct drm_i915_private *i915 = ctx->i915;
1598 	struct i915_address_space *vm;
1599 	const u32 result = 0x100;
1600 	struct i915_request *rq;
1601 	struct i915_vma *vma;
1602 	unsigned int flags;
1603 	u32 *cmd;
1604 	int err;
1605 
1606 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1607 
1608 	err = check_scratch(ctx->vm, offset);
1609 	if (err)
1610 		return err;
1611 
1612 	if (GRAPHICS_VER(i915) >= 8) {
1613 		const u32 GPR0 = engine->mmio_base + 0x600;
1614 
1615 		vm = i915_gem_context_get_eb_vm(ctx);
1616 		vma = i915_vma_instance(obj, vm, NULL);
1617 		if (IS_ERR(vma)) {
1618 			err = PTR_ERR(vma);
1619 			goto out_vm;
1620 		}
1621 
1622 		err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1623 		if (err)
1624 			goto out_vm;
1625 
1626 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1627 		if (IS_ERR(cmd)) {
1628 			err = PTR_ERR(cmd);
1629 			goto err_unpin;
1630 		}
1631 
1632 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1633 		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1634 		*cmd++ = GPR0;
1635 		*cmd++ = lower_32_bits(offset);
1636 		*cmd++ = upper_32_bits(offset);
1637 		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1638 		*cmd++ = GPR0;
1639 		*cmd++ = result;
1640 		*cmd++ = 0;
1641 		*cmd = MI_BATCH_BUFFER_END;
1642 
1643 		i915_gem_object_flush_map(obj);
1644 		i915_gem_object_unpin_map(obj);
1645 
1646 		flags = 0;
1647 	} else {
1648 		const u32 reg = engine->mmio_base + 0x420;
1649 
1650 		/* hsw: register access even to 3DPRIM! is protected */
1651 		vm = i915_vm_get(&engine->gt->ggtt->vm);
1652 		vma = i915_vma_instance(obj, vm, NULL);
1653 		if (IS_ERR(vma)) {
1654 			err = PTR_ERR(vma);
1655 			goto out_vm;
1656 		}
1657 
1658 		err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1659 		if (err)
1660 			goto out_vm;
1661 
1662 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1663 		if (IS_ERR(cmd)) {
1664 			err = PTR_ERR(cmd);
1665 			goto err_unpin;
1666 		}
1667 
1668 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1669 		*cmd++ = MI_LOAD_REGISTER_MEM;
1670 		*cmd++ = reg;
1671 		*cmd++ = offset;
1672 		*cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
1673 		*cmd++ = reg;
1674 		*cmd++ = vma->node.start + result;
1675 		*cmd = MI_BATCH_BUFFER_END;
1676 
1677 		i915_gem_object_flush_map(obj);
1678 		i915_gem_object_unpin_map(obj);
1679 
1680 		flags = I915_DISPATCH_SECURE;
1681 	}
1682 
1683 	intel_gt_chipset_flush(engine->gt);
1684 
1685 	rq = igt_request_alloc(ctx, engine);
1686 	if (IS_ERR(rq)) {
1687 		err = PTR_ERR(rq);
1688 		goto err_unpin;
1689 	}
1690 
1691 	i915_vma_lock(vma);
1692 	err = i915_request_await_object(rq, vma->obj, true);
1693 	if (err == 0)
1694 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1695 	i915_vma_unlock(vma);
1696 	if (err)
1697 		goto skip_request;
1698 
1699 	if (rq->engine->emit_init_breadcrumb) {
1700 		err = rq->engine->emit_init_breadcrumb(rq);
1701 		if (err)
1702 			goto skip_request;
1703 	}
1704 
1705 	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
1706 	if (err)
1707 		goto skip_request;
1708 
1709 	i915_vma_unpin(vma);
1710 
1711 	i915_request_add(rq);
1712 
1713 	i915_gem_object_lock(obj, NULL);
1714 	err = i915_gem_object_set_to_cpu_domain(obj, false);
1715 	i915_gem_object_unlock(obj);
1716 	if (err)
1717 		goto out_vm;
1718 
1719 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1720 	if (IS_ERR(cmd)) {
1721 		err = PTR_ERR(cmd);
1722 		goto out_vm;
1723 	}
1724 
1725 	*value = cmd[result / sizeof(*cmd)];
1726 	i915_gem_object_unpin_map(obj);
1727 
1728 	goto out_vm;
1729 skip_request:
1730 	i915_request_set_error_once(rq, err);
1731 	i915_request_add(rq);
1732 err_unpin:
1733 	i915_vma_unpin(vma);
1734 out_vm:
1735 	i915_vm_put(vm);
1736 
1737 	if (!err)
1738 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1739 
1740 	return err;
1741 }
1742 
1743 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
1744 {
1745 	struct i915_address_space *vm;
1746 	u32 *vaddr;
1747 	int err = 0;
1748 
1749 	vm = ctx->vm;
1750 	if (!vm)
1751 		return -ENODEV;
1752 
1753 	if (!vm->scratch[0]) {
1754 		pr_err("No scratch page!\n");
1755 		return -EINVAL;
1756 	}
1757 
1758 	vaddr = __px_vaddr(vm->scratch[0]);
1759 
1760 	memcpy(out, vaddr, sizeof(*out));
1761 	if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
1762 		pr_err("Inconsistent initial state of scratch page!\n");
1763 		err = -EINVAL;
1764 	}
1765 
1766 	return err;
1767 }
1768 
1769 static int igt_vm_isolation(void *arg)
1770 {
1771 	struct drm_i915_private *i915 = arg;
1772 	struct i915_gem_context *ctx_a, *ctx_b;
1773 	struct drm_i915_gem_object *obj_a, *obj_b;
1774 	unsigned long num_engines, count;
1775 	struct intel_engine_cs *engine;
1776 	struct igt_live_test t;
1777 	I915_RND_STATE(prng);
1778 	struct file *file;
1779 	u64 vm_total;
1780 	u32 expected;
1781 	int err;
1782 
1783 	if (GRAPHICS_VER(i915) < 7)
1784 		return 0;
1785 
1786 	/*
1787 	 * The simple goal here is that a write into one context is not
1788 	 * observed in a second (separate page tables and scratch).
1789 	 */
1790 
1791 	file = mock_file(i915);
1792 	if (IS_ERR(file))
1793 		return PTR_ERR(file);
1794 
1795 	err = igt_live_test_begin(&t, i915, __func__, "");
1796 	if (err)
1797 		goto out_file;
1798 
1799 	ctx_a = live_context(i915, file);
1800 	if (IS_ERR(ctx_a)) {
1801 		err = PTR_ERR(ctx_a);
1802 		goto out_file;
1803 	}
1804 
1805 	ctx_b = live_context(i915, file);
1806 	if (IS_ERR(ctx_b)) {
1807 		err = PTR_ERR(ctx_b);
1808 		goto out_file;
1809 	}
1810 
1811 	/* We can only test vm isolation, if the vm are distinct */
1812 	if (ctx_a->vm == ctx_b->vm)
1813 		goto out_file;
1814 
1815 	/* Read the initial state of the scratch page */
1816 	err = check_scratch_page(ctx_a, &expected);
1817 	if (err)
1818 		goto out_file;
1819 
1820 	err = check_scratch_page(ctx_b, &expected);
1821 	if (err)
1822 		goto out_file;
1823 
1824 	vm_total = ctx_a->vm->total;
1825 	GEM_BUG_ON(ctx_b->vm->total != vm_total);
1826 
1827 	obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
1828 	if (IS_ERR(obj_a)) {
1829 		err = PTR_ERR(obj_a);
1830 		goto out_file;
1831 	}
1832 
1833 	obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
1834 	if (IS_ERR(obj_b)) {
1835 		err = PTR_ERR(obj_b);
1836 		goto put_a;
1837 	}
1838 
1839 	count = 0;
1840 	num_engines = 0;
1841 	for_each_uabi_engine(engine, i915) {
1842 		IGT_TIMEOUT(end_time);
1843 		unsigned long this = 0;
1844 
1845 		if (!intel_engine_can_store_dword(engine))
1846 			continue;
1847 
1848 		/* Not all engines have their own GPR! */
1849 		if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS)
1850 			continue;
1851 
1852 		while (!__igt_timeout(end_time, NULL)) {
1853 			u32 value = 0xc5c5c5c5;
1854 			u64 offset;
1855 
1856 			/* Leave enough space at offset 0 for the batch */
1857 			offset = igt_random_offset(&prng,
1858 						   I915_GTT_PAGE_SIZE, vm_total,
1859 						   sizeof(u32), alignof_dword);
1860 
1861 			err = write_to_scratch(ctx_a, engine, obj_a,
1862 					       offset, 0xdeadbeef);
1863 			if (err == 0)
1864 				err = read_from_scratch(ctx_b, engine, obj_b,
1865 							offset, &value);
1866 			if (err)
1867 				goto put_b;
1868 
1869 			if (value != expected) {
1870 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1871 				       engine->name, value,
1872 				       upper_32_bits(offset),
1873 				       lower_32_bits(offset),
1874 				       this);
1875 				err = -EINVAL;
1876 				goto put_b;
1877 			}
1878 
1879 			this++;
1880 		}
1881 		count += this;
1882 		num_engines++;
1883 	}
1884 	pr_info("Checked %lu scratch offsets across %lu engines\n",
1885 		count, num_engines);
1886 
1887 put_b:
1888 	i915_gem_object_put(obj_b);
1889 put_a:
1890 	i915_gem_object_put(obj_a);
1891 out_file:
1892 	if (igt_live_test_end(&t))
1893 		err = -EIO;
1894 	fput(file);
1895 	return err;
1896 }
1897 
1898 int i915_gem_context_live_selftests(struct drm_i915_private *i915)
1899 {
1900 	static const struct i915_subtest tests[] = {
1901 		SUBTEST(live_nop_switch),
1902 		SUBTEST(live_parallel_switch),
1903 		SUBTEST(igt_ctx_exec),
1904 		SUBTEST(igt_ctx_readonly),
1905 		SUBTEST(igt_ctx_sseu),
1906 		SUBTEST(igt_shared_ctx_exec),
1907 		SUBTEST(igt_vm_isolation),
1908 	};
1909 
1910 	if (intel_gt_is_wedged(to_gt(i915)))
1911 		return 0;
1912 
1913 	return i915_live_subtests(tests, i915);
1914 }
1915