xref: /openbmc/linux/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c (revision 583f12a80dfb7997d59a42e8642019695f5aa15a)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 #include <linux/string_helpers.h>
9 
10 #include "gem/i915_gem_internal.h"
11 #include "gem/i915_gem_pm.h"
12 #include "gt/intel_engine_pm.h"
13 #include "gt/intel_engine_regs.h"
14 #include "gt/intel_gt.h"
15 #include "gt/intel_gt_requests.h"
16 #include "gt/intel_reset.h"
17 #include "i915_selftest.h"
18 
19 #include "gem/selftests/igt_gem_utils.h"
20 #include "selftests/i915_random.h"
21 #include "selftests/igt_flush_test.h"
22 #include "selftests/igt_live_test.h"
23 #include "selftests/igt_reset.h"
24 #include "selftests/igt_spinner.h"
25 #include "selftests/mock_drm.h"
26 #include "selftests/mock_gem_device.h"
27 
28 #include "huge_gem_object.h"
29 #include "igt_gem_utils.h"
30 
31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
32 
33 static int live_nop_switch(void *arg)
34 {
35 	const unsigned int nctx = 1024;
36 	struct drm_i915_private *i915 = arg;
37 	struct intel_engine_cs *engine;
38 	struct i915_gem_context **ctx;
39 	struct igt_live_test t;
40 	struct file *file;
41 	unsigned long n;
42 	int err = -ENODEV;
43 
44 	/*
45 	 * Create as many contexts as we can feasibly get away with
46 	 * and check we can switch between them rapidly.
47 	 *
48 	 * Serves as very simple stress test for submission and HW switching
49 	 * between contexts.
50 	 */
51 
52 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
53 		return 0;
54 
55 	file = mock_file(i915);
56 	if (IS_ERR(file))
57 		return PTR_ERR(file);
58 
59 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
60 	if (!ctx) {
61 		err = -ENOMEM;
62 		goto out_file;
63 	}
64 
65 	for (n = 0; n < nctx; n++) {
66 		ctx[n] = live_context(i915, file);
67 		if (IS_ERR(ctx[n])) {
68 			err = PTR_ERR(ctx[n]);
69 			goto out_ctx;
70 		}
71 	}
72 
73 	for_each_uabi_engine(engine, i915) {
74 		struct i915_request *rq = NULL;
75 		unsigned long end_time, prime;
76 		ktime_t times[2] = {};
77 
78 		times[0] = ktime_get_raw();
79 		for (n = 0; n < nctx; n++) {
80 			struct i915_request *this;
81 
82 			this = igt_request_alloc(ctx[n], engine);
83 			if (IS_ERR(this)) {
84 				err = PTR_ERR(this);
85 				goto out_ctx;
86 			}
87 			if (rq) {
88 				i915_request_await_dma_fence(this, &rq->fence);
89 				i915_request_put(rq);
90 			}
91 			rq = i915_request_get(this);
92 			i915_request_add(this);
93 		}
94 		if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
95 			pr_err("Failed to populated %d contexts\n", nctx);
96 			intel_gt_set_wedged(engine->gt);
97 			i915_request_put(rq);
98 			err = -EIO;
99 			goto out_ctx;
100 		}
101 		i915_request_put(rq);
102 
103 		times[1] = ktime_get_raw();
104 
105 		pr_info("Populated %d contexts on %s in %lluns\n",
106 			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
107 
108 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
109 		if (err)
110 			goto out_ctx;
111 
112 		end_time = jiffies + i915_selftest.timeout_jiffies;
113 		for_each_prime_number_from(prime, 2, 8192) {
114 			times[1] = ktime_get_raw();
115 
116 			rq = NULL;
117 			for (n = 0; n < prime; n++) {
118 				struct i915_request *this;
119 
120 				this = igt_request_alloc(ctx[n % nctx], engine);
121 				if (IS_ERR(this)) {
122 					err = PTR_ERR(this);
123 					goto out_ctx;
124 				}
125 
126 				if (rq) { /* Force submission order */
127 					i915_request_await_dma_fence(this, &rq->fence);
128 					i915_request_put(rq);
129 				}
130 
131 				/*
132 				 * This space is left intentionally blank.
133 				 *
134 				 * We do not actually want to perform any
135 				 * action with this request, we just want
136 				 * to measure the latency in allocation
137 				 * and submission of our breadcrumbs -
138 				 * ensuring that the bare request is sufficient
139 				 * for the system to work (i.e. proper HEAD
140 				 * tracking of the rings, interrupt handling,
141 				 * etc). It also gives us the lowest bounds
142 				 * for latency.
143 				 */
144 
145 				rq = i915_request_get(this);
146 				i915_request_add(this);
147 			}
148 			GEM_BUG_ON(!rq);
149 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
150 				pr_err("Switching between %ld contexts timed out\n",
151 				       prime);
152 				intel_gt_set_wedged(engine->gt);
153 				i915_request_put(rq);
154 				break;
155 			}
156 			i915_request_put(rq);
157 
158 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
159 			if (prime == 2)
160 				times[0] = times[1];
161 
162 			if (__igt_timeout(end_time, NULL))
163 				break;
164 		}
165 
166 		err = igt_live_test_end(&t);
167 		if (err)
168 			goto out_ctx;
169 
170 		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
171 			engine->name,
172 			ktime_to_ns(times[0]),
173 			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
174 	}
175 
176 out_ctx:
177 	kfree(ctx);
178 out_file:
179 	fput(file);
180 	return err;
181 }
182 
183 struct parallel_switch {
184 	struct kthread_worker *worker;
185 	struct kthread_work work;
186 	struct intel_context *ce[2];
187 	int result;
188 };
189 
190 static void __live_parallel_switch1(struct kthread_work *work)
191 {
192 	struct parallel_switch *arg =
193 		container_of(work, typeof(*arg), work);
194 	IGT_TIMEOUT(end_time);
195 	unsigned long count;
196 
197 	count = 0;
198 	arg->result = 0;
199 	do {
200 		struct i915_request *rq = NULL;
201 		int n;
202 
203 		for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
204 			struct i915_request *prev = rq;
205 
206 			rq = i915_request_create(arg->ce[n]);
207 			if (IS_ERR(rq)) {
208 				i915_request_put(prev);
209 				arg->result = PTR_ERR(rq);
210 				break;
211 			}
212 
213 			i915_request_get(rq);
214 			if (prev) {
215 				arg->result =
216 					i915_request_await_dma_fence(rq,
217 								     &prev->fence);
218 				i915_request_put(prev);
219 			}
220 
221 			i915_request_add(rq);
222 		}
223 
224 		if (IS_ERR_OR_NULL(rq))
225 			break;
226 
227 		if (i915_request_wait(rq, 0, HZ) < 0)
228 			arg->result = -ETIME;
229 
230 		i915_request_put(rq);
231 
232 		count++;
233 	} while (!arg->result && !__igt_timeout(end_time, NULL));
234 
235 	pr_info("%s: %lu switches (sync) <%d>\n",
236 		arg->ce[0]->engine->name, count, arg->result);
237 }
238 
239 static void __live_parallel_switchN(struct kthread_work *work)
240 {
241 	struct parallel_switch *arg =
242 		container_of(work, typeof(*arg), work);
243 	struct i915_request *rq = NULL;
244 	IGT_TIMEOUT(end_time);
245 	unsigned long count;
246 	int n;
247 
248 	count = 0;
249 	arg->result = 0;
250 	do {
251 		for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) {
252 			struct i915_request *prev = rq;
253 
254 			rq = i915_request_create(arg->ce[n]);
255 			if (IS_ERR(rq)) {
256 				i915_request_put(prev);
257 				arg->result = PTR_ERR(rq);
258 				break;
259 			}
260 
261 			i915_request_get(rq);
262 			if (prev) {
263 				arg->result =
264 					i915_request_await_dma_fence(rq,
265 								     &prev->fence);
266 				i915_request_put(prev);
267 			}
268 
269 			i915_request_add(rq);
270 		}
271 
272 		count++;
273 	} while (!arg->result && !__igt_timeout(end_time, NULL));
274 
275 	if (!IS_ERR_OR_NULL(rq))
276 		i915_request_put(rq);
277 
278 	pr_info("%s: %lu switches (many) <%d>\n",
279 		arg->ce[0]->engine->name, count, arg->result);
280 }
281 
282 static int live_parallel_switch(void *arg)
283 {
284 	struct drm_i915_private *i915 = arg;
285 	static void (* const func[])(struct kthread_work *) = {
286 		__live_parallel_switch1,
287 		__live_parallel_switchN,
288 		NULL,
289 	};
290 	struct parallel_switch *data = NULL;
291 	struct i915_gem_engines *engines;
292 	struct i915_gem_engines_iter it;
293 	void (* const *fn)(struct kthread_work *);
294 	struct i915_gem_context *ctx;
295 	struct intel_context *ce;
296 	struct file *file;
297 	int n, m, count;
298 	int err = 0;
299 
300 	/*
301 	 * Check we can process switches on all engines simultaneously.
302 	 */
303 
304 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
305 		return 0;
306 
307 	file = mock_file(i915);
308 	if (IS_ERR(file))
309 		return PTR_ERR(file);
310 
311 	ctx = live_context(i915, file);
312 	if (IS_ERR(ctx)) {
313 		err = PTR_ERR(ctx);
314 		goto out_file;
315 	}
316 
317 	engines = i915_gem_context_lock_engines(ctx);
318 	count = engines->num_engines;
319 
320 	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
321 	if (!data) {
322 		i915_gem_context_unlock_engines(ctx);
323 		err = -ENOMEM;
324 		goto out_file;
325 	}
326 
327 	m = 0; /* Use the first context as our template for the engines */
328 	for_each_gem_engine(ce, engines, it) {
329 		err = intel_context_pin(ce);
330 		if (err) {
331 			i915_gem_context_unlock_engines(ctx);
332 			goto out;
333 		}
334 		data[m++].ce[0] = intel_context_get(ce);
335 	}
336 	i915_gem_context_unlock_engines(ctx);
337 
338 	/* Clone the same set of engines into the other contexts */
339 	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
340 		ctx = live_context(i915, file);
341 		if (IS_ERR(ctx)) {
342 			err = PTR_ERR(ctx);
343 			goto out;
344 		}
345 
346 		for (m = 0; m < count; m++) {
347 			if (!data[m].ce[0])
348 				continue;
349 
350 			ce = intel_context_create(data[m].ce[0]->engine);
351 			if (IS_ERR(ce))
352 				goto out;
353 
354 			err = intel_context_pin(ce);
355 			if (err) {
356 				intel_context_put(ce);
357 				goto out;
358 			}
359 
360 			data[m].ce[n] = ce;
361 		}
362 	}
363 
364 	for (n = 0; n < count; n++) {
365 		struct kthread_worker *worker;
366 
367 		if (!data[n].ce[0])
368 			continue;
369 
370 		worker = kthread_create_worker(0, "igt/parallel:%s",
371 					       data[n].ce[0]->engine->name);
372 		if (IS_ERR(worker))
373 			goto out;
374 
375 		data[n].worker = worker;
376 	}
377 
378 	for (fn = func; !err && *fn; fn++) {
379 		struct igt_live_test t;
380 
381 		err = igt_live_test_begin(&t, i915, __func__, "");
382 		if (err)
383 			break;
384 
385 		for (n = 0; n < count; n++) {
386 			if (!data[n].ce[0])
387 				continue;
388 
389 			data[n].result = 0;
390 			kthread_init_work(&data[n].work, *fn);
391 			kthread_queue_work(data[n].worker, &data[n].work);
392 		}
393 
394 		for (n = 0; n < count; n++) {
395 			if (data[n].ce[0]) {
396 				kthread_flush_work(&data[n].work);
397 				if (data[n].result && !err)
398 					err = data[n].result;
399 			}
400 		}
401 
402 		if (igt_live_test_end(&t))
403 			err = -EIO;
404 	}
405 
406 out:
407 	for (n = 0; n < count; n++) {
408 		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
409 			if (!data[n].ce[m])
410 				continue;
411 
412 			intel_context_unpin(data[n].ce[m]);
413 			intel_context_put(data[n].ce[m]);
414 		}
415 
416 		if (data[n].worker)
417 			kthread_destroy_worker(data[n].worker);
418 	}
419 	kfree(data);
420 out_file:
421 	fput(file);
422 	return err;
423 }
424 
425 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
426 {
427 	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
428 }
429 
430 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
431 {
432 	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
433 }
434 
435 static int gpu_fill(struct intel_context *ce,
436 		    struct drm_i915_gem_object *obj,
437 		    unsigned int dw)
438 {
439 	struct i915_vma *vma;
440 	int err;
441 
442 	GEM_BUG_ON(obj->base.size > ce->vm->total);
443 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
444 
445 	vma = i915_vma_instance(obj, ce->vm, NULL);
446 	if (IS_ERR(vma))
447 		return PTR_ERR(vma);
448 
449 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
450 	if (err)
451 		return err;
452 
453 	/*
454 	 * Within the GTT the huge objects maps every page onto
455 	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
456 	 * We set the nth dword within the page using the nth
457 	 * mapping via the GTT - this should exercise the GTT mapping
458 	 * whilst checking that each context provides a unique view
459 	 * into the object.
460 	 */
461 	err = igt_gpu_fill_dw(ce, vma,
462 			      (dw * real_page_count(obj)) << PAGE_SHIFT |
463 			      (dw * sizeof(u32)),
464 			      real_page_count(obj),
465 			      dw);
466 	i915_vma_unpin(vma);
467 
468 	return err;
469 }
470 
471 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
472 {
473 	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
474 	unsigned int need_flush;
475 	unsigned long n, m;
476 	int err;
477 
478 	i915_gem_object_lock(obj, NULL);
479 	err = i915_gem_object_prepare_write(obj, &need_flush);
480 	if (err)
481 		goto out;
482 
483 	for (n = 0; n < real_page_count(obj); n++) {
484 		u32 *map;
485 
486 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
487 		for (m = 0; m < DW_PER_PAGE; m++)
488 			map[m] = value;
489 		if (!has_llc)
490 			drm_clflush_virt_range(map, PAGE_SIZE);
491 		kunmap_atomic(map);
492 	}
493 
494 	i915_gem_object_finish_access(obj);
495 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
496 	obj->write_domain = 0;
497 out:
498 	i915_gem_object_unlock(obj);
499 	return err;
500 }
501 
502 static noinline int cpu_check(struct drm_i915_gem_object *obj,
503 			      unsigned int idx, unsigned int max)
504 {
505 	unsigned int needs_flush;
506 	unsigned long n;
507 	int err;
508 
509 	i915_gem_object_lock(obj, NULL);
510 	err = i915_gem_object_prepare_read(obj, &needs_flush);
511 	if (err)
512 		goto out_unlock;
513 
514 	for (n = 0; n < real_page_count(obj); n++) {
515 		u32 *map, m;
516 
517 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
518 		if (needs_flush & CLFLUSH_BEFORE)
519 			drm_clflush_virt_range(map, PAGE_SIZE);
520 
521 		for (m = 0; m < max; m++) {
522 			if (map[m] != m) {
523 				pr_err("%pS: Invalid value at object %d page %ld/%ld, offset %d/%d: found %x expected %x\n",
524 				       __builtin_return_address(0), idx,
525 				       n, real_page_count(obj), m, max,
526 				       map[m], m);
527 				err = -EINVAL;
528 				goto out_unmap;
529 			}
530 		}
531 
532 		for (; m < DW_PER_PAGE; m++) {
533 			if (map[m] != STACK_MAGIC) {
534 				pr_err("%pS: Invalid value at object %d page %ld, offset %d: found %x expected %x (uninitialised)\n",
535 				       __builtin_return_address(0), idx, n, m,
536 				       map[m], STACK_MAGIC);
537 				err = -EINVAL;
538 				goto out_unmap;
539 			}
540 		}
541 
542 out_unmap:
543 		kunmap_atomic(map);
544 		if (err)
545 			break;
546 	}
547 
548 	i915_gem_object_finish_access(obj);
549 out_unlock:
550 	i915_gem_object_unlock(obj);
551 	return err;
552 }
553 
554 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
555 {
556 	int err;
557 
558 	GEM_BUG_ON(obj->base.handle_count);
559 
560 	/* tie the object to the drm_file for easy reaping */
561 	err = idr_alloc(&to_drm_file(file)->object_idr,
562 			&obj->base, 1, 0, GFP_KERNEL);
563 	if (err < 0)
564 		return err;
565 
566 	i915_gem_object_get(obj);
567 	obj->base.handle_count++;
568 	return 0;
569 }
570 
571 static struct drm_i915_gem_object *
572 create_test_object(struct i915_address_space *vm,
573 		   struct file *file,
574 		   struct list_head *objects)
575 {
576 	struct drm_i915_gem_object *obj;
577 	u64 size;
578 	int err;
579 
580 	/* Keep in GEM's good graces */
581 	intel_gt_retire_requests(vm->gt);
582 
583 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
584 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
585 
586 	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
587 	if (IS_ERR(obj))
588 		return obj;
589 
590 	err = file_add_object(file, obj);
591 	i915_gem_object_put(obj);
592 	if (err)
593 		return ERR_PTR(err);
594 
595 	err = cpu_fill(obj, STACK_MAGIC);
596 	if (err) {
597 		pr_err("Failed to fill object with cpu, err=%d\n",
598 		       err);
599 		return ERR_PTR(err);
600 	}
601 
602 	list_add_tail(&obj->st_link, objects);
603 	return obj;
604 }
605 
606 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
607 {
608 	unsigned long npages = fake_page_count(obj);
609 
610 	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
611 	return npages / DW_PER_PAGE;
612 }
613 
614 static void throttle_release(struct i915_request **q, int count)
615 {
616 	int i;
617 
618 	for (i = 0; i < count; i++) {
619 		if (IS_ERR_OR_NULL(q[i]))
620 			continue;
621 
622 		i915_request_put(fetch_and_zero(&q[i]));
623 	}
624 }
625 
626 static int throttle(struct intel_context *ce,
627 		    struct i915_request **q, int count)
628 {
629 	int i;
630 
631 	if (!IS_ERR_OR_NULL(q[0])) {
632 		if (i915_request_wait(q[0],
633 				      I915_WAIT_INTERRUPTIBLE,
634 				      MAX_SCHEDULE_TIMEOUT) < 0)
635 			return -EINTR;
636 
637 		i915_request_put(q[0]);
638 	}
639 
640 	for (i = 0; i < count - 1; i++)
641 		q[i] = q[i + 1];
642 
643 	q[i] = intel_context_create_request(ce);
644 	if (IS_ERR(q[i]))
645 		return PTR_ERR(q[i]);
646 
647 	i915_request_get(q[i]);
648 	i915_request_add(q[i]);
649 
650 	return 0;
651 }
652 
653 static int igt_ctx_exec(void *arg)
654 {
655 	struct drm_i915_private *i915 = arg;
656 	struct intel_engine_cs *engine;
657 	int err = -ENODEV;
658 
659 	/*
660 	 * Create a few different contexts (with different mm) and write
661 	 * through each ctx/mm using the GPU making sure those writes end
662 	 * up in the expected pages of our obj.
663 	 */
664 
665 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
666 		return 0;
667 
668 	for_each_uabi_engine(engine, i915) {
669 		struct drm_i915_gem_object *obj = NULL;
670 		unsigned long ncontexts, ndwords, dw;
671 		struct i915_request *tq[5] = {};
672 		struct igt_live_test t;
673 		IGT_TIMEOUT(end_time);
674 		LIST_HEAD(objects);
675 		struct file *file;
676 
677 		if (!intel_engine_can_store_dword(engine))
678 			continue;
679 
680 		if (!engine->context_size)
681 			continue; /* No logical context support in HW */
682 
683 		file = mock_file(i915);
684 		if (IS_ERR(file))
685 			return PTR_ERR(file);
686 
687 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
688 		if (err)
689 			goto out_file;
690 
691 		ncontexts = 0;
692 		ndwords = 0;
693 		dw = 0;
694 		while (!time_after(jiffies, end_time)) {
695 			struct i915_gem_context *ctx;
696 			struct intel_context *ce;
697 
698 			ctx = kernel_context(i915, NULL);
699 			if (IS_ERR(ctx)) {
700 				err = PTR_ERR(ctx);
701 				goto out_file;
702 			}
703 
704 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
705 			GEM_BUG_ON(IS_ERR(ce));
706 
707 			if (!obj) {
708 				obj = create_test_object(ce->vm, file, &objects);
709 				if (IS_ERR(obj)) {
710 					err = PTR_ERR(obj);
711 					intel_context_put(ce);
712 					kernel_context_close(ctx);
713 					goto out_file;
714 				}
715 			}
716 
717 			err = gpu_fill(ce, obj, dw);
718 			if (err) {
719 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
720 				       ndwords, dw, max_dwords(obj),
721 				       engine->name,
722 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
723 				       err);
724 				intel_context_put(ce);
725 				kernel_context_close(ctx);
726 				goto out_file;
727 			}
728 
729 			err = throttle(ce, tq, ARRAY_SIZE(tq));
730 			if (err) {
731 				intel_context_put(ce);
732 				kernel_context_close(ctx);
733 				goto out_file;
734 			}
735 
736 			if (++dw == max_dwords(obj)) {
737 				obj = NULL;
738 				dw = 0;
739 			}
740 
741 			ndwords++;
742 			ncontexts++;
743 
744 			intel_context_put(ce);
745 			kernel_context_close(ctx);
746 		}
747 
748 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
749 			ncontexts, engine->name, ndwords);
750 
751 		ncontexts = dw = 0;
752 		list_for_each_entry(obj, &objects, st_link) {
753 			unsigned int rem =
754 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
755 
756 			err = cpu_check(obj, ncontexts++, rem);
757 			if (err)
758 				break;
759 
760 			dw += rem;
761 		}
762 
763 out_file:
764 		throttle_release(tq, ARRAY_SIZE(tq));
765 		if (igt_live_test_end(&t))
766 			err = -EIO;
767 
768 		fput(file);
769 		if (err)
770 			return err;
771 
772 		i915_gem_drain_freed_objects(i915);
773 	}
774 
775 	return 0;
776 }
777 
778 static int igt_shared_ctx_exec(void *arg)
779 {
780 	struct drm_i915_private *i915 = arg;
781 	struct i915_request *tq[5] = {};
782 	struct i915_gem_context *parent;
783 	struct intel_engine_cs *engine;
784 	struct igt_live_test t;
785 	struct file *file;
786 	int err = 0;
787 
788 	/*
789 	 * Create a few different contexts with the same mm and write
790 	 * through each ctx using the GPU making sure those writes end
791 	 * up in the expected pages of our obj.
792 	 */
793 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
794 		return 0;
795 
796 	file = mock_file(i915);
797 	if (IS_ERR(file))
798 		return PTR_ERR(file);
799 
800 	parent = live_context(i915, file);
801 	if (IS_ERR(parent)) {
802 		err = PTR_ERR(parent);
803 		goto out_file;
804 	}
805 
806 	if (!parent->vm) { /* not full-ppgtt; nothing to share */
807 		err = 0;
808 		goto out_file;
809 	}
810 
811 	err = igt_live_test_begin(&t, i915, __func__, "");
812 	if (err)
813 		goto out_file;
814 
815 	for_each_uabi_engine(engine, i915) {
816 		unsigned long ncontexts, ndwords, dw;
817 		struct drm_i915_gem_object *obj = NULL;
818 		IGT_TIMEOUT(end_time);
819 		LIST_HEAD(objects);
820 
821 		if (!intel_engine_can_store_dword(engine))
822 			continue;
823 
824 		dw = 0;
825 		ndwords = 0;
826 		ncontexts = 0;
827 		while (!time_after(jiffies, end_time)) {
828 			struct i915_gem_context *ctx;
829 			struct intel_context *ce;
830 
831 			ctx = kernel_context(i915, parent->vm);
832 			if (IS_ERR(ctx)) {
833 				err = PTR_ERR(ctx);
834 				goto out_test;
835 			}
836 
837 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
838 			GEM_BUG_ON(IS_ERR(ce));
839 
840 			if (!obj) {
841 				obj = create_test_object(parent->vm,
842 							 file, &objects);
843 				if (IS_ERR(obj)) {
844 					err = PTR_ERR(obj);
845 					intel_context_put(ce);
846 					kernel_context_close(ctx);
847 					goto out_test;
848 				}
849 			}
850 
851 			err = gpu_fill(ce, obj, dw);
852 			if (err) {
853 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
854 				       ndwords, dw, max_dwords(obj),
855 				       engine->name,
856 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
857 				       err);
858 				intel_context_put(ce);
859 				kernel_context_close(ctx);
860 				goto out_test;
861 			}
862 
863 			err = throttle(ce, tq, ARRAY_SIZE(tq));
864 			if (err) {
865 				intel_context_put(ce);
866 				kernel_context_close(ctx);
867 				goto out_test;
868 			}
869 
870 			if (++dw == max_dwords(obj)) {
871 				obj = NULL;
872 				dw = 0;
873 			}
874 
875 			ndwords++;
876 			ncontexts++;
877 
878 			intel_context_put(ce);
879 			kernel_context_close(ctx);
880 		}
881 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
882 			ncontexts, engine->name, ndwords);
883 
884 		ncontexts = dw = 0;
885 		list_for_each_entry(obj, &objects, st_link) {
886 			unsigned int rem =
887 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
888 
889 			err = cpu_check(obj, ncontexts++, rem);
890 			if (err)
891 				goto out_test;
892 
893 			dw += rem;
894 		}
895 
896 		i915_gem_drain_freed_objects(i915);
897 	}
898 out_test:
899 	throttle_release(tq, ARRAY_SIZE(tq));
900 	if (igt_live_test_end(&t))
901 		err = -EIO;
902 out_file:
903 	fput(file);
904 	return err;
905 }
906 
907 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs,
908 			    struct i915_vma *vma,
909 			    struct intel_engine_cs *engine)
910 {
911 	u32 *cmd;
912 
913 	GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8);
914 
915 	cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
916 	if (IS_ERR(cmd))
917 		return PTR_ERR(cmd);
918 
919 	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
920 	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base));
921 	*cmd++ = lower_32_bits(i915_vma_offset(vma));
922 	*cmd++ = upper_32_bits(i915_vma_offset(vma));
923 	*cmd = MI_BATCH_BUFFER_END;
924 
925 	__i915_gem_object_flush_map(rpcs, 0, 64);
926 	i915_gem_object_unpin_map(rpcs);
927 
928 	intel_gt_chipset_flush(vma->vm->gt);
929 
930 	return 0;
931 }
932 
933 static int
934 emit_rpcs_query(struct drm_i915_gem_object *obj,
935 		struct intel_context *ce,
936 		struct i915_request **rq_out)
937 {
938 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
939 	struct i915_request *rq;
940 	struct i915_gem_ww_ctx ww;
941 	struct i915_vma *batch;
942 	struct i915_vma *vma;
943 	struct drm_i915_gem_object *rpcs;
944 	int err;
945 
946 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
947 
948 	if (GRAPHICS_VER(i915) < 8)
949 		return -EINVAL;
950 
951 	vma = i915_vma_instance(obj, ce->vm, NULL);
952 	if (IS_ERR(vma))
953 		return PTR_ERR(vma);
954 
955 	rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
956 	if (IS_ERR(rpcs))
957 		return PTR_ERR(rpcs);
958 
959 	batch = i915_vma_instance(rpcs, ce->vm, NULL);
960 	if (IS_ERR(batch)) {
961 		err = PTR_ERR(batch);
962 		goto err_put;
963 	}
964 
965 	i915_gem_ww_ctx_init(&ww, false);
966 retry:
967 	err = i915_gem_object_lock(obj, &ww);
968 	if (!err)
969 		err = i915_gem_object_lock(rpcs, &ww);
970 	if (!err)
971 		err = i915_gem_object_set_to_gtt_domain(obj, false);
972 	if (!err)
973 		err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
974 	if (err)
975 		goto err_put;
976 
977 	err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
978 	if (err)
979 		goto err_vma;
980 
981 	err = rpcs_query_batch(rpcs, vma, ce->engine);
982 	if (err)
983 		goto err_batch;
984 
985 	rq = i915_request_create(ce);
986 	if (IS_ERR(rq)) {
987 		err = PTR_ERR(rq);
988 		goto err_batch;
989 	}
990 
991 	err = i915_vma_move_to_active(batch, rq, 0);
992 	if (err)
993 		goto skip_request;
994 
995 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
996 	if (err)
997 		goto skip_request;
998 
999 	if (rq->engine->emit_init_breadcrumb) {
1000 		err = rq->engine->emit_init_breadcrumb(rq);
1001 		if (err)
1002 			goto skip_request;
1003 	}
1004 
1005 	err = rq->engine->emit_bb_start(rq,
1006 					i915_vma_offset(batch),
1007 					i915_vma_size(batch),
1008 					0);
1009 	if (err)
1010 		goto skip_request;
1011 
1012 	*rq_out = i915_request_get(rq);
1013 
1014 skip_request:
1015 	if (err)
1016 		i915_request_set_error_once(rq, err);
1017 	i915_request_add(rq);
1018 err_batch:
1019 	i915_vma_unpin(batch);
1020 err_vma:
1021 	i915_vma_unpin(vma);
1022 err_put:
1023 	if (err == -EDEADLK) {
1024 		err = i915_gem_ww_ctx_backoff(&ww);
1025 		if (!err)
1026 			goto retry;
1027 	}
1028 	i915_gem_ww_ctx_fini(&ww);
1029 	i915_gem_object_put(rpcs);
1030 	return err;
1031 }
1032 
1033 #define TEST_IDLE	BIT(0)
1034 #define TEST_BUSY	BIT(1)
1035 #define TEST_RESET	BIT(2)
1036 
1037 static int
1038 __sseu_prepare(const char *name,
1039 	       unsigned int flags,
1040 	       struct intel_context *ce,
1041 	       struct igt_spinner **spin)
1042 {
1043 	struct i915_request *rq;
1044 	int ret;
1045 
1046 	*spin = NULL;
1047 	if (!(flags & (TEST_BUSY | TEST_RESET)))
1048 		return 0;
1049 
1050 	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1051 	if (!*spin)
1052 		return -ENOMEM;
1053 
1054 	ret = igt_spinner_init(*spin, ce->engine->gt);
1055 	if (ret)
1056 		goto err_free;
1057 
1058 	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1059 	if (IS_ERR(rq)) {
1060 		ret = PTR_ERR(rq);
1061 		goto err_fini;
1062 	}
1063 
1064 	i915_request_add(rq);
1065 
1066 	if (!igt_wait_for_spinner(*spin, rq)) {
1067 		pr_err("%s: Spinner failed to start!\n", name);
1068 		ret = -ETIMEDOUT;
1069 		goto err_end;
1070 	}
1071 
1072 	return 0;
1073 
1074 err_end:
1075 	igt_spinner_end(*spin);
1076 err_fini:
1077 	igt_spinner_fini(*spin);
1078 err_free:
1079 	kfree(fetch_and_zero(spin));
1080 	return ret;
1081 }
1082 
1083 static int
1084 __read_slice_count(struct intel_context *ce,
1085 		   struct drm_i915_gem_object *obj,
1086 		   struct igt_spinner *spin,
1087 		   u32 *rpcs)
1088 {
1089 	struct i915_request *rq = NULL;
1090 	u32 s_mask, s_shift;
1091 	unsigned int cnt;
1092 	u32 *buf, val;
1093 	long ret;
1094 
1095 	ret = emit_rpcs_query(obj, ce, &rq);
1096 	if (ret)
1097 		return ret;
1098 
1099 	if (spin)
1100 		igt_spinner_end(spin);
1101 
1102 	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1103 	i915_request_put(rq);
1104 	if (ret < 0)
1105 		return ret;
1106 
1107 	buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1108 	if (IS_ERR(buf)) {
1109 		ret = PTR_ERR(buf);
1110 		return ret;
1111 	}
1112 
1113 	if (GRAPHICS_VER(ce->engine->i915) >= 11) {
1114 		s_mask = GEN11_RPCS_S_CNT_MASK;
1115 		s_shift = GEN11_RPCS_S_CNT_SHIFT;
1116 	} else {
1117 		s_mask = GEN8_RPCS_S_CNT_MASK;
1118 		s_shift = GEN8_RPCS_S_CNT_SHIFT;
1119 	}
1120 
1121 	val = *buf;
1122 	cnt = (val & s_mask) >> s_shift;
1123 	*rpcs = val;
1124 
1125 	i915_gem_object_unpin_map(obj);
1126 
1127 	return cnt;
1128 }
1129 
1130 static int
1131 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1132 	     const char *prefix, const char *suffix)
1133 {
1134 	if (slices == expected)
1135 		return 0;
1136 
1137 	if (slices < 0) {
1138 		pr_err("%s: %s read slice count failed with %d%s\n",
1139 		       name, prefix, slices, suffix);
1140 		return slices;
1141 	}
1142 
1143 	pr_err("%s: %s slice count %d is not %u%s\n",
1144 	       name, prefix, slices, expected, suffix);
1145 
1146 	pr_info("RPCS=0x%x; %u%sx%u%s\n",
1147 		rpcs, slices,
1148 		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1149 		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1150 		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1151 
1152 	return -EINVAL;
1153 }
1154 
1155 static int
1156 __sseu_finish(const char *name,
1157 	      unsigned int flags,
1158 	      struct intel_context *ce,
1159 	      struct drm_i915_gem_object *obj,
1160 	      unsigned int expected,
1161 	      struct igt_spinner *spin)
1162 {
1163 	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1164 	u32 rpcs = 0;
1165 	int ret = 0;
1166 
1167 	if (flags & TEST_RESET) {
1168 		ret = intel_engine_reset(ce->engine, "sseu");
1169 		if (ret)
1170 			goto out;
1171 	}
1172 
1173 	ret = __read_slice_count(ce, obj,
1174 				 flags & TEST_RESET ? NULL : spin, &rpcs);
1175 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1176 	if (ret)
1177 		goto out;
1178 
1179 	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1180 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1181 
1182 out:
1183 	if (spin)
1184 		igt_spinner_end(spin);
1185 
1186 	if ((flags & TEST_IDLE) && ret == 0) {
1187 		ret = igt_flush_test(ce->engine->i915);
1188 		if (ret)
1189 			return ret;
1190 
1191 		ret = __read_slice_count(ce, obj, NULL, &rpcs);
1192 		ret = __check_rpcs(name, rpcs, ret, expected,
1193 				   "Context", " after idle!");
1194 	}
1195 
1196 	return ret;
1197 }
1198 
1199 static int
1200 __sseu_test(const char *name,
1201 	    unsigned int flags,
1202 	    struct intel_context *ce,
1203 	    struct drm_i915_gem_object *obj,
1204 	    struct intel_sseu sseu)
1205 {
1206 	struct igt_spinner *spin = NULL;
1207 	int ret;
1208 
1209 	intel_engine_pm_get(ce->engine);
1210 
1211 	ret = __sseu_prepare(name, flags, ce, &spin);
1212 	if (ret)
1213 		goto out_pm;
1214 
1215 	ret = intel_context_reconfigure_sseu(ce, sseu);
1216 	if (ret)
1217 		goto out_spin;
1218 
1219 	ret = __sseu_finish(name, flags, ce, obj,
1220 			    hweight32(sseu.slice_mask), spin);
1221 
1222 out_spin:
1223 	if (spin) {
1224 		igt_spinner_end(spin);
1225 		igt_spinner_fini(spin);
1226 		kfree(spin);
1227 	}
1228 out_pm:
1229 	intel_engine_pm_put(ce->engine);
1230 	return ret;
1231 }
1232 
1233 static int
1234 __igt_ctx_sseu(struct drm_i915_private *i915,
1235 	       const char *name,
1236 	       unsigned int flags)
1237 {
1238 	struct drm_i915_gem_object *obj;
1239 	int inst = 0;
1240 	int ret = 0;
1241 
1242 	if (GRAPHICS_VER(i915) < 9)
1243 		return 0;
1244 
1245 	if (flags & TEST_RESET)
1246 		igt_global_reset_lock(to_gt(i915));
1247 
1248 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1249 	if (IS_ERR(obj)) {
1250 		ret = PTR_ERR(obj);
1251 		goto out_unlock;
1252 	}
1253 
1254 	do {
1255 		struct intel_engine_cs *engine;
1256 		struct intel_context *ce;
1257 		struct intel_sseu pg_sseu;
1258 
1259 		engine = intel_engine_lookup_user(i915,
1260 						  I915_ENGINE_CLASS_RENDER,
1261 						  inst++);
1262 		if (!engine)
1263 			break;
1264 
1265 		if (hweight32(engine->sseu.slice_mask) < 2)
1266 			continue;
1267 
1268 		if (!engine->gt->info.sseu.has_slice_pg)
1269 			continue;
1270 
1271 		/*
1272 		 * Gen11 VME friendly power-gated configuration with
1273 		 * half enabled sub-slices.
1274 		 */
1275 		pg_sseu = engine->sseu;
1276 		pg_sseu.slice_mask = 1;
1277 		pg_sseu.subslice_mask =
1278 			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1279 
1280 		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1281 			engine->name, name, flags,
1282 			hweight32(engine->sseu.slice_mask),
1283 			hweight32(pg_sseu.slice_mask));
1284 
1285 		ce = intel_context_create(engine);
1286 		if (IS_ERR(ce)) {
1287 			ret = PTR_ERR(ce);
1288 			goto out_put;
1289 		}
1290 
1291 		ret = intel_context_pin(ce);
1292 		if (ret)
1293 			goto out_ce;
1294 
1295 		/* First set the default mask. */
1296 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1297 		if (ret)
1298 			goto out_unpin;
1299 
1300 		/* Then set a power-gated configuration. */
1301 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1302 		if (ret)
1303 			goto out_unpin;
1304 
1305 		/* Back to defaults. */
1306 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1307 		if (ret)
1308 			goto out_unpin;
1309 
1310 		/* One last power-gated configuration for the road. */
1311 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1312 		if (ret)
1313 			goto out_unpin;
1314 
1315 out_unpin:
1316 		intel_context_unpin(ce);
1317 out_ce:
1318 		intel_context_put(ce);
1319 	} while (!ret);
1320 
1321 	if (igt_flush_test(i915))
1322 		ret = -EIO;
1323 
1324 out_put:
1325 	i915_gem_object_put(obj);
1326 
1327 out_unlock:
1328 	if (flags & TEST_RESET)
1329 		igt_global_reset_unlock(to_gt(i915));
1330 
1331 	if (ret)
1332 		pr_err("%s: Failed with %d!\n", name, ret);
1333 
1334 	return ret;
1335 }
1336 
1337 static int igt_ctx_sseu(void *arg)
1338 {
1339 	struct {
1340 		const char *name;
1341 		unsigned int flags;
1342 	} *phase, phases[] = {
1343 		{ .name = "basic", .flags = 0 },
1344 		{ .name = "idle", .flags = TEST_IDLE },
1345 		{ .name = "busy", .flags = TEST_BUSY },
1346 		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1347 		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1348 		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1349 	};
1350 	unsigned int i;
1351 	int ret = 0;
1352 
1353 	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1354 	     i++, phase++)
1355 		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1356 
1357 	return ret;
1358 }
1359 
1360 static int igt_ctx_readonly(void *arg)
1361 {
1362 	struct drm_i915_private *i915 = arg;
1363 	unsigned long idx, ndwords, dw, num_engines;
1364 	struct drm_i915_gem_object *obj = NULL;
1365 	struct i915_request *tq[5] = {};
1366 	struct i915_gem_engines_iter it;
1367 	struct i915_address_space *vm;
1368 	struct i915_gem_context *ctx;
1369 	struct intel_context *ce;
1370 	struct igt_live_test t;
1371 	I915_RND_STATE(prng);
1372 	IGT_TIMEOUT(end_time);
1373 	LIST_HEAD(objects);
1374 	struct file *file;
1375 	int err = -ENODEV;
1376 
1377 	/*
1378 	 * Create a few read-only objects (with the occasional writable object)
1379 	 * and try to write into these object checking that the GPU discards
1380 	 * any write to a read-only object.
1381 	 */
1382 
1383 	file = mock_file(i915);
1384 	if (IS_ERR(file))
1385 		return PTR_ERR(file);
1386 
1387 	err = igt_live_test_begin(&t, i915, __func__, "");
1388 	if (err)
1389 		goto out_file;
1390 
1391 	ctx = live_context(i915, file);
1392 	if (IS_ERR(ctx)) {
1393 		err = PTR_ERR(ctx);
1394 		goto out_file;
1395 	}
1396 
1397 	vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm;
1398 	if (!vm || !vm->has_read_only) {
1399 		err = 0;
1400 		goto out_file;
1401 	}
1402 
1403 	num_engines = 0;
1404 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
1405 		if (intel_engine_can_store_dword(ce->engine))
1406 			num_engines++;
1407 	i915_gem_context_unlock_engines(ctx);
1408 
1409 	ndwords = 0;
1410 	dw = 0;
1411 	while (!time_after(jiffies, end_time)) {
1412 		for_each_gem_engine(ce,
1413 				    i915_gem_context_lock_engines(ctx), it) {
1414 			if (!intel_engine_can_store_dword(ce->engine))
1415 				continue;
1416 
1417 			if (!obj) {
1418 				obj = create_test_object(ce->vm, file, &objects);
1419 				if (IS_ERR(obj)) {
1420 					err = PTR_ERR(obj);
1421 					i915_gem_context_unlock_engines(ctx);
1422 					goto out_file;
1423 				}
1424 
1425 				if (prandom_u32_state(&prng) & 1)
1426 					i915_gem_object_set_readonly(obj);
1427 			}
1428 
1429 			err = gpu_fill(ce, obj, dw);
1430 			if (err) {
1431 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1432 				       ndwords, dw, max_dwords(obj),
1433 				       ce->engine->name,
1434 				       str_yes_no(i915_gem_context_has_full_ppgtt(ctx)),
1435 				       err);
1436 				i915_gem_context_unlock_engines(ctx);
1437 				goto out_file;
1438 			}
1439 
1440 			err = throttle(ce, tq, ARRAY_SIZE(tq));
1441 			if (err) {
1442 				i915_gem_context_unlock_engines(ctx);
1443 				goto out_file;
1444 			}
1445 
1446 			if (++dw == max_dwords(obj)) {
1447 				obj = NULL;
1448 				dw = 0;
1449 			}
1450 			ndwords++;
1451 		}
1452 		i915_gem_context_unlock_engines(ctx);
1453 	}
1454 	pr_info("Submitted %lu dwords (across %lu engines)\n",
1455 		ndwords, num_engines);
1456 
1457 	dw = 0;
1458 	idx = 0;
1459 	list_for_each_entry(obj, &objects, st_link) {
1460 		unsigned int rem =
1461 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
1462 		unsigned int num_writes;
1463 
1464 		num_writes = rem;
1465 		if (i915_gem_object_is_readonly(obj))
1466 			num_writes = 0;
1467 
1468 		err = cpu_check(obj, idx++, num_writes);
1469 		if (err)
1470 			break;
1471 
1472 		dw += rem;
1473 	}
1474 
1475 out_file:
1476 	throttle_release(tq, ARRAY_SIZE(tq));
1477 	if (igt_live_test_end(&t))
1478 		err = -EIO;
1479 
1480 	fput(file);
1481 	return err;
1482 }
1483 
1484 static int check_scratch(struct i915_address_space *vm, u64 offset)
1485 {
1486 	struct drm_mm_node *node;
1487 
1488 	mutex_lock(&vm->mutex);
1489 	node = __drm_mm_interval_first(&vm->mm,
1490 				       offset, offset + sizeof(u32) - 1);
1491 	mutex_unlock(&vm->mutex);
1492 	if (!node || node->start > offset)
1493 		return 0;
1494 
1495 	GEM_BUG_ON(offset >= node->start + node->size);
1496 
1497 	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1498 	       upper_32_bits(offset), lower_32_bits(offset));
1499 	return -EINVAL;
1500 }
1501 
1502 static int write_to_scratch(struct i915_gem_context *ctx,
1503 			    struct intel_engine_cs *engine,
1504 			    struct drm_i915_gem_object *obj,
1505 			    u64 offset, u32 value)
1506 {
1507 	struct drm_i915_private *i915 = ctx->i915;
1508 	struct i915_address_space *vm;
1509 	struct i915_request *rq;
1510 	struct i915_vma *vma;
1511 	u32 *cmd;
1512 	int err;
1513 
1514 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1515 
1516 	err = check_scratch(ctx->vm, offset);
1517 	if (err)
1518 		return err;
1519 
1520 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1521 	if (IS_ERR(cmd))
1522 		return PTR_ERR(cmd);
1523 
1524 	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
1525 	if (GRAPHICS_VER(i915) >= 8) {
1526 		*cmd++ = lower_32_bits(offset);
1527 		*cmd++ = upper_32_bits(offset);
1528 	} else {
1529 		*cmd++ = 0;
1530 		*cmd++ = offset;
1531 	}
1532 	*cmd++ = value;
1533 	*cmd = MI_BATCH_BUFFER_END;
1534 	__i915_gem_object_flush_map(obj, 0, 64);
1535 	i915_gem_object_unpin_map(obj);
1536 
1537 	intel_gt_chipset_flush(engine->gt);
1538 
1539 	vm = i915_gem_context_get_eb_vm(ctx);
1540 	vma = i915_vma_instance(obj, vm, NULL);
1541 	if (IS_ERR(vma)) {
1542 		err = PTR_ERR(vma);
1543 		goto out_vm;
1544 	}
1545 
1546 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1547 	if (err)
1548 		goto out_vm;
1549 
1550 	rq = igt_request_alloc(ctx, engine);
1551 	if (IS_ERR(rq)) {
1552 		err = PTR_ERR(rq);
1553 		goto err_unpin;
1554 	}
1555 
1556 	err = igt_vma_move_to_active_unlocked(vma, rq, 0);
1557 	if (err)
1558 		goto skip_request;
1559 
1560 	if (rq->engine->emit_init_breadcrumb) {
1561 		err = rq->engine->emit_init_breadcrumb(rq);
1562 		if (err)
1563 			goto skip_request;
1564 	}
1565 
1566 	err = engine->emit_bb_start(rq, i915_vma_offset(vma),
1567 				    i915_vma_size(vma), 0);
1568 	if (err)
1569 		goto skip_request;
1570 
1571 	i915_vma_unpin(vma);
1572 
1573 	i915_request_add(rq);
1574 
1575 	goto out_vm;
1576 skip_request:
1577 	i915_request_set_error_once(rq, err);
1578 	i915_request_add(rq);
1579 err_unpin:
1580 	i915_vma_unpin(vma);
1581 out_vm:
1582 	i915_vm_put(vm);
1583 
1584 	if (!err)
1585 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1586 
1587 	return err;
1588 }
1589 
1590 static int read_from_scratch(struct i915_gem_context *ctx,
1591 			     struct intel_engine_cs *engine,
1592 			     struct drm_i915_gem_object *obj,
1593 			     u64 offset, u32 *value)
1594 {
1595 	struct drm_i915_private *i915 = ctx->i915;
1596 	struct i915_address_space *vm;
1597 	const u32 result = 0x100;
1598 	struct i915_request *rq;
1599 	struct i915_vma *vma;
1600 	unsigned int flags;
1601 	u32 *cmd;
1602 	int err;
1603 
1604 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1605 
1606 	err = check_scratch(ctx->vm, offset);
1607 	if (err)
1608 		return err;
1609 
1610 	if (GRAPHICS_VER(i915) >= 8) {
1611 		const u32 GPR0 = engine->mmio_base + 0x600;
1612 
1613 		vm = i915_gem_context_get_eb_vm(ctx);
1614 		vma = i915_vma_instance(obj, vm, NULL);
1615 		if (IS_ERR(vma)) {
1616 			err = PTR_ERR(vma);
1617 			goto out_vm;
1618 		}
1619 
1620 		err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1621 		if (err)
1622 			goto out_vm;
1623 
1624 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1625 		if (IS_ERR(cmd)) {
1626 			err = PTR_ERR(cmd);
1627 			goto err_unpin;
1628 		}
1629 
1630 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1631 		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1632 		*cmd++ = GPR0;
1633 		*cmd++ = lower_32_bits(offset);
1634 		*cmd++ = upper_32_bits(offset);
1635 		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1636 		*cmd++ = GPR0;
1637 		*cmd++ = result;
1638 		*cmd++ = 0;
1639 		*cmd = MI_BATCH_BUFFER_END;
1640 
1641 		i915_gem_object_flush_map(obj);
1642 		i915_gem_object_unpin_map(obj);
1643 
1644 		flags = 0;
1645 	} else {
1646 		const u32 reg = engine->mmio_base + 0x420;
1647 
1648 		/* hsw: register access even to 3DPRIM! is protected */
1649 		vm = i915_vm_get(&engine->gt->ggtt->vm);
1650 		vma = i915_vma_instance(obj, vm, NULL);
1651 		if (IS_ERR(vma)) {
1652 			err = PTR_ERR(vma);
1653 			goto out_vm;
1654 		}
1655 
1656 		err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1657 		if (err)
1658 			goto out_vm;
1659 
1660 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1661 		if (IS_ERR(cmd)) {
1662 			err = PTR_ERR(cmd);
1663 			goto err_unpin;
1664 		}
1665 
1666 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1667 		*cmd++ = MI_LOAD_REGISTER_MEM;
1668 		*cmd++ = reg;
1669 		*cmd++ = offset;
1670 		*cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
1671 		*cmd++ = reg;
1672 		*cmd++ = i915_vma_offset(vma) + result;
1673 		*cmd = MI_BATCH_BUFFER_END;
1674 
1675 		i915_gem_object_flush_map(obj);
1676 		i915_gem_object_unpin_map(obj);
1677 
1678 		flags = I915_DISPATCH_SECURE;
1679 	}
1680 
1681 	intel_gt_chipset_flush(engine->gt);
1682 
1683 	rq = igt_request_alloc(ctx, engine);
1684 	if (IS_ERR(rq)) {
1685 		err = PTR_ERR(rq);
1686 		goto err_unpin;
1687 	}
1688 
1689 	err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE);
1690 	if (err)
1691 		goto skip_request;
1692 
1693 	if (rq->engine->emit_init_breadcrumb) {
1694 		err = rq->engine->emit_init_breadcrumb(rq);
1695 		if (err)
1696 			goto skip_request;
1697 	}
1698 
1699 	err = engine->emit_bb_start(rq, i915_vma_offset(vma),
1700 				    i915_vma_size(vma), flags);
1701 	if (err)
1702 		goto skip_request;
1703 
1704 	i915_vma_unpin(vma);
1705 
1706 	i915_request_add(rq);
1707 
1708 	i915_gem_object_lock(obj, NULL);
1709 	err = i915_gem_object_set_to_cpu_domain(obj, false);
1710 	i915_gem_object_unlock(obj);
1711 	if (err)
1712 		goto out_vm;
1713 
1714 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1715 	if (IS_ERR(cmd)) {
1716 		err = PTR_ERR(cmd);
1717 		goto out_vm;
1718 	}
1719 
1720 	*value = cmd[result / sizeof(*cmd)];
1721 	i915_gem_object_unpin_map(obj);
1722 
1723 	goto out_vm;
1724 skip_request:
1725 	i915_request_set_error_once(rq, err);
1726 	i915_request_add(rq);
1727 err_unpin:
1728 	i915_vma_unpin(vma);
1729 out_vm:
1730 	i915_vm_put(vm);
1731 
1732 	if (!err)
1733 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1734 
1735 	return err;
1736 }
1737 
1738 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
1739 {
1740 	struct i915_address_space *vm;
1741 	u32 *vaddr;
1742 	int err = 0;
1743 
1744 	vm = ctx->vm;
1745 	if (!vm)
1746 		return -ENODEV;
1747 
1748 	if (!vm->scratch[0]) {
1749 		pr_err("No scratch page!\n");
1750 		return -EINVAL;
1751 	}
1752 
1753 	vaddr = __px_vaddr(vm->scratch[0]);
1754 
1755 	memcpy(out, vaddr, sizeof(*out));
1756 	if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
1757 		pr_err("Inconsistent initial state of scratch page!\n");
1758 		err = -EINVAL;
1759 	}
1760 
1761 	return err;
1762 }
1763 
1764 static int igt_vm_isolation(void *arg)
1765 {
1766 	struct drm_i915_private *i915 = arg;
1767 	struct i915_gem_context *ctx_a, *ctx_b;
1768 	struct drm_i915_gem_object *obj_a, *obj_b;
1769 	unsigned long num_engines, count;
1770 	struct intel_engine_cs *engine;
1771 	struct igt_live_test t;
1772 	I915_RND_STATE(prng);
1773 	struct file *file;
1774 	u64 vm_total;
1775 	u32 expected;
1776 	int err;
1777 
1778 	if (GRAPHICS_VER(i915) < 7)
1779 		return 0;
1780 
1781 	/*
1782 	 * The simple goal here is that a write into one context is not
1783 	 * observed in a second (separate page tables and scratch).
1784 	 */
1785 
1786 	file = mock_file(i915);
1787 	if (IS_ERR(file))
1788 		return PTR_ERR(file);
1789 
1790 	err = igt_live_test_begin(&t, i915, __func__, "");
1791 	if (err)
1792 		goto out_file;
1793 
1794 	ctx_a = live_context(i915, file);
1795 	if (IS_ERR(ctx_a)) {
1796 		err = PTR_ERR(ctx_a);
1797 		goto out_file;
1798 	}
1799 
1800 	ctx_b = live_context(i915, file);
1801 	if (IS_ERR(ctx_b)) {
1802 		err = PTR_ERR(ctx_b);
1803 		goto out_file;
1804 	}
1805 
1806 	/* We can only test vm isolation, if the vm are distinct */
1807 	if (ctx_a->vm == ctx_b->vm)
1808 		goto out_file;
1809 
1810 	/* Read the initial state of the scratch page */
1811 	err = check_scratch_page(ctx_a, &expected);
1812 	if (err)
1813 		goto out_file;
1814 
1815 	err = check_scratch_page(ctx_b, &expected);
1816 	if (err)
1817 		goto out_file;
1818 
1819 	vm_total = ctx_a->vm->total;
1820 	GEM_BUG_ON(ctx_b->vm->total != vm_total);
1821 
1822 	obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
1823 	if (IS_ERR(obj_a)) {
1824 		err = PTR_ERR(obj_a);
1825 		goto out_file;
1826 	}
1827 
1828 	obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
1829 	if (IS_ERR(obj_b)) {
1830 		err = PTR_ERR(obj_b);
1831 		goto put_a;
1832 	}
1833 
1834 	count = 0;
1835 	num_engines = 0;
1836 	for_each_uabi_engine(engine, i915) {
1837 		IGT_TIMEOUT(end_time);
1838 		unsigned long this = 0;
1839 
1840 		if (!intel_engine_can_store_dword(engine))
1841 			continue;
1842 
1843 		/* Not all engines have their own GPR! */
1844 		if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS)
1845 			continue;
1846 
1847 		while (!__igt_timeout(end_time, NULL)) {
1848 			u32 value = 0xc5c5c5c5;
1849 			u64 offset;
1850 
1851 			/* Leave enough space at offset 0 for the batch */
1852 			offset = igt_random_offset(&prng,
1853 						   I915_GTT_PAGE_SIZE, vm_total,
1854 						   sizeof(u32), alignof_dword);
1855 
1856 			err = write_to_scratch(ctx_a, engine, obj_a,
1857 					       offset, 0xdeadbeef);
1858 			if (err == 0)
1859 				err = read_from_scratch(ctx_b, engine, obj_b,
1860 							offset, &value);
1861 			if (err)
1862 				goto put_b;
1863 
1864 			if (value != expected) {
1865 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1866 				       engine->name, value,
1867 				       upper_32_bits(offset),
1868 				       lower_32_bits(offset),
1869 				       this);
1870 				err = -EINVAL;
1871 				goto put_b;
1872 			}
1873 
1874 			this++;
1875 		}
1876 		count += this;
1877 		num_engines++;
1878 	}
1879 	pr_info("Checked %lu scratch offsets across %lu engines\n",
1880 		count, num_engines);
1881 
1882 put_b:
1883 	i915_gem_object_put(obj_b);
1884 put_a:
1885 	i915_gem_object_put(obj_a);
1886 out_file:
1887 	if (igt_live_test_end(&t))
1888 		err = -EIO;
1889 	fput(file);
1890 	return err;
1891 }
1892 
1893 int i915_gem_context_live_selftests(struct drm_i915_private *i915)
1894 {
1895 	static const struct i915_subtest tests[] = {
1896 		SUBTEST(live_nop_switch),
1897 		SUBTEST(live_parallel_switch),
1898 		SUBTEST(igt_ctx_exec),
1899 		SUBTEST(igt_ctx_readonly),
1900 		SUBTEST(igt_ctx_sseu),
1901 		SUBTEST(igt_shared_ctx_exec),
1902 		SUBTEST(igt_vm_isolation),
1903 	};
1904 
1905 	if (intel_gt_is_wedged(to_gt(i915)))
1906 		return 0;
1907 
1908 	return i915_live_subtests(tests, i915);
1909 }
1910