1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gem/i915_gem_pm.h"
10 #include "gt/intel_engine_pm.h"
11 #include "gt/intel_gt.h"
12 #include "gt/intel_gt_requests.h"
13 #include "gt/intel_reset.h"
14 #include "i915_selftest.h"
15 
16 #include "gem/selftests/igt_gem_utils.h"
17 #include "selftests/i915_random.h"
18 #include "selftests/igt_flush_test.h"
19 #include "selftests/igt_live_test.h"
20 #include "selftests/igt_reset.h"
21 #include "selftests/igt_spinner.h"
22 #include "selftests/mock_drm.h"
23 #include "selftests/mock_gem_device.h"
24 
25 #include "huge_gem_object.h"
26 #include "igt_gem_utils.h"
27 
28 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32))
29 
30 static int live_nop_switch(void *arg)
31 {
32 	const unsigned int nctx = 1024;
33 	struct drm_i915_private *i915 = arg;
34 	struct intel_engine_cs *engine;
35 	struct i915_gem_context **ctx;
36 	struct igt_live_test t;
37 	struct file *file;
38 	unsigned long n;
39 	int err = -ENODEV;
40 
41 	/*
42 	 * Create as many contexts as we can feasibly get away with
43 	 * and check we can switch between them rapidly.
44 	 *
45 	 * Serves as very simple stress test for submission and HW switching
46 	 * between contexts.
47 	 */
48 
49 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
50 		return 0;
51 
52 	file = mock_file(i915);
53 	if (IS_ERR(file))
54 		return PTR_ERR(file);
55 
56 	ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL);
57 	if (!ctx) {
58 		err = -ENOMEM;
59 		goto out_file;
60 	}
61 
62 	for (n = 0; n < nctx; n++) {
63 		ctx[n] = live_context(i915, file);
64 		if (IS_ERR(ctx[n])) {
65 			err = PTR_ERR(ctx[n]);
66 			goto out_file;
67 		}
68 	}
69 
70 	for_each_uabi_engine(engine, i915) {
71 		struct i915_request *rq = NULL;
72 		unsigned long end_time, prime;
73 		ktime_t times[2] = {};
74 
75 		times[0] = ktime_get_raw();
76 		for (n = 0; n < nctx; n++) {
77 			struct i915_request *this;
78 
79 			this = igt_request_alloc(ctx[n], engine);
80 			if (IS_ERR(this)) {
81 				err = PTR_ERR(this);
82 				goto out_file;
83 			}
84 			if (rq) {
85 				i915_request_await_dma_fence(this, &rq->fence);
86 				i915_request_put(rq);
87 			}
88 			rq = i915_request_get(this);
89 			i915_request_add(this);
90 		}
91 		if (i915_request_wait(rq, 0, 10 * HZ) < 0) {
92 			pr_err("Failed to populated %d contexts\n", nctx);
93 			intel_gt_set_wedged(to_gt(i915));
94 			i915_request_put(rq);
95 			err = -EIO;
96 			goto out_file;
97 		}
98 		i915_request_put(rq);
99 
100 		times[1] = ktime_get_raw();
101 
102 		pr_info("Populated %d contexts on %s in %lluns\n",
103 			nctx, engine->name, ktime_to_ns(times[1] - times[0]));
104 
105 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
106 		if (err)
107 			goto out_file;
108 
109 		end_time = jiffies + i915_selftest.timeout_jiffies;
110 		for_each_prime_number_from(prime, 2, 8192) {
111 			times[1] = ktime_get_raw();
112 
113 			rq = NULL;
114 			for (n = 0; n < prime; n++) {
115 				struct i915_request *this;
116 
117 				this = igt_request_alloc(ctx[n % nctx], engine);
118 				if (IS_ERR(this)) {
119 					err = PTR_ERR(this);
120 					goto out_file;
121 				}
122 
123 				if (rq) { /* Force submission order */
124 					i915_request_await_dma_fence(this, &rq->fence);
125 					i915_request_put(rq);
126 				}
127 
128 				/*
129 				 * This space is left intentionally blank.
130 				 *
131 				 * We do not actually want to perform any
132 				 * action with this request, we just want
133 				 * to measure the latency in allocation
134 				 * and submission of our breadcrumbs -
135 				 * ensuring that the bare request is sufficient
136 				 * for the system to work (i.e. proper HEAD
137 				 * tracking of the rings, interrupt handling,
138 				 * etc). It also gives us the lowest bounds
139 				 * for latency.
140 				 */
141 
142 				rq = i915_request_get(this);
143 				i915_request_add(this);
144 			}
145 			GEM_BUG_ON(!rq);
146 			if (i915_request_wait(rq, 0, HZ / 5) < 0) {
147 				pr_err("Switching between %ld contexts timed out\n",
148 				       prime);
149 				intel_gt_set_wedged(to_gt(i915));
150 				i915_request_put(rq);
151 				break;
152 			}
153 			i915_request_put(rq);
154 
155 			times[1] = ktime_sub(ktime_get_raw(), times[1]);
156 			if (prime == 2)
157 				times[0] = times[1];
158 
159 			if (__igt_timeout(end_time, NULL))
160 				break;
161 		}
162 
163 		err = igt_live_test_end(&t);
164 		if (err)
165 			goto out_file;
166 
167 		pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n",
168 			engine->name,
169 			ktime_to_ns(times[0]),
170 			prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1));
171 	}
172 
173 out_file:
174 	fput(file);
175 	return err;
176 }
177 
178 struct parallel_switch {
179 	struct task_struct *tsk;
180 	struct intel_context *ce[2];
181 };
182 
183 static int __live_parallel_switch1(void *data)
184 {
185 	struct parallel_switch *arg = data;
186 	IGT_TIMEOUT(end_time);
187 	unsigned long count;
188 
189 	count = 0;
190 	do {
191 		struct i915_request *rq = NULL;
192 		int err, n;
193 
194 		err = 0;
195 		for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) {
196 			struct i915_request *prev = rq;
197 
198 			rq = i915_request_create(arg->ce[n]);
199 			if (IS_ERR(rq)) {
200 				i915_request_put(prev);
201 				return PTR_ERR(rq);
202 			}
203 
204 			i915_request_get(rq);
205 			if (prev) {
206 				err = i915_request_await_dma_fence(rq, &prev->fence);
207 				i915_request_put(prev);
208 			}
209 
210 			i915_request_add(rq);
211 		}
212 		if (i915_request_wait(rq, 0, HZ / 5) < 0)
213 			err = -ETIME;
214 		i915_request_put(rq);
215 		if (err)
216 			return err;
217 
218 		count++;
219 	} while (!__igt_timeout(end_time, NULL));
220 
221 	pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count);
222 	return 0;
223 }
224 
225 static int __live_parallel_switchN(void *data)
226 {
227 	struct parallel_switch *arg = data;
228 	struct i915_request *rq = NULL;
229 	IGT_TIMEOUT(end_time);
230 	unsigned long count;
231 	int n;
232 
233 	count = 0;
234 	do {
235 		for (n = 0; n < ARRAY_SIZE(arg->ce); n++) {
236 			struct i915_request *prev = rq;
237 			int err = 0;
238 
239 			rq = i915_request_create(arg->ce[n]);
240 			if (IS_ERR(rq)) {
241 				i915_request_put(prev);
242 				return PTR_ERR(rq);
243 			}
244 
245 			i915_request_get(rq);
246 			if (prev) {
247 				err = i915_request_await_dma_fence(rq, &prev->fence);
248 				i915_request_put(prev);
249 			}
250 
251 			i915_request_add(rq);
252 			if (err) {
253 				i915_request_put(rq);
254 				return err;
255 			}
256 		}
257 
258 		count++;
259 	} while (!__igt_timeout(end_time, NULL));
260 	i915_request_put(rq);
261 
262 	pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count);
263 	return 0;
264 }
265 
266 static int live_parallel_switch(void *arg)
267 {
268 	struct drm_i915_private *i915 = arg;
269 	static int (* const func[])(void *arg) = {
270 		__live_parallel_switch1,
271 		__live_parallel_switchN,
272 		NULL,
273 	};
274 	struct parallel_switch *data = NULL;
275 	struct i915_gem_engines *engines;
276 	struct i915_gem_engines_iter it;
277 	int (* const *fn)(void *arg);
278 	struct i915_gem_context *ctx;
279 	struct intel_context *ce;
280 	struct file *file;
281 	int n, m, count;
282 	int err = 0;
283 
284 	/*
285 	 * Check we can process switches on all engines simultaneously.
286 	 */
287 
288 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
289 		return 0;
290 
291 	file = mock_file(i915);
292 	if (IS_ERR(file))
293 		return PTR_ERR(file);
294 
295 	ctx = live_context(i915, file);
296 	if (IS_ERR(ctx)) {
297 		err = PTR_ERR(ctx);
298 		goto out_file;
299 	}
300 
301 	engines = i915_gem_context_lock_engines(ctx);
302 	count = engines->num_engines;
303 
304 	data = kcalloc(count, sizeof(*data), GFP_KERNEL);
305 	if (!data) {
306 		i915_gem_context_unlock_engines(ctx);
307 		err = -ENOMEM;
308 		goto out_file;
309 	}
310 
311 	m = 0; /* Use the first context as our template for the engines */
312 	for_each_gem_engine(ce, engines, it) {
313 		err = intel_context_pin(ce);
314 		if (err) {
315 			i915_gem_context_unlock_engines(ctx);
316 			goto out;
317 		}
318 		data[m++].ce[0] = intel_context_get(ce);
319 	}
320 	i915_gem_context_unlock_engines(ctx);
321 
322 	/* Clone the same set of engines into the other contexts */
323 	for (n = 1; n < ARRAY_SIZE(data->ce); n++) {
324 		ctx = live_context(i915, file);
325 		if (IS_ERR(ctx)) {
326 			err = PTR_ERR(ctx);
327 			goto out;
328 		}
329 
330 		for (m = 0; m < count; m++) {
331 			if (!data[m].ce[0])
332 				continue;
333 
334 			ce = intel_context_create(data[m].ce[0]->engine);
335 			if (IS_ERR(ce))
336 				goto out;
337 
338 			err = intel_context_pin(ce);
339 			if (err) {
340 				intel_context_put(ce);
341 				goto out;
342 			}
343 
344 			data[m].ce[n] = ce;
345 		}
346 	}
347 
348 	for (fn = func; !err && *fn; fn++) {
349 		struct igt_live_test t;
350 		int n;
351 
352 		err = igt_live_test_begin(&t, i915, __func__, "");
353 		if (err)
354 			break;
355 
356 		for (n = 0; n < count; n++) {
357 			if (!data[n].ce[0])
358 				continue;
359 
360 			data[n].tsk = kthread_run(*fn, &data[n],
361 						  "igt/parallel:%s",
362 						  data[n].ce[0]->engine->name);
363 			if (IS_ERR(data[n].tsk)) {
364 				err = PTR_ERR(data[n].tsk);
365 				break;
366 			}
367 			get_task_struct(data[n].tsk);
368 		}
369 
370 		yield(); /* start all threads before we kthread_stop() */
371 
372 		for (n = 0; n < count; n++) {
373 			int status;
374 
375 			if (IS_ERR_OR_NULL(data[n].tsk))
376 				continue;
377 
378 			status = kthread_stop(data[n].tsk);
379 			if (status && !err)
380 				err = status;
381 
382 			put_task_struct(data[n].tsk);
383 			data[n].tsk = NULL;
384 		}
385 
386 		if (igt_live_test_end(&t))
387 			err = -EIO;
388 	}
389 
390 out:
391 	for (n = 0; n < count; n++) {
392 		for (m = 0; m < ARRAY_SIZE(data->ce); m++) {
393 			if (!data[n].ce[m])
394 				continue;
395 
396 			intel_context_unpin(data[n].ce[m]);
397 			intel_context_put(data[n].ce[m]);
398 		}
399 	}
400 	kfree(data);
401 out_file:
402 	fput(file);
403 	return err;
404 }
405 
406 static unsigned long real_page_count(struct drm_i915_gem_object *obj)
407 {
408 	return huge_gem_object_phys_size(obj) >> PAGE_SHIFT;
409 }
410 
411 static unsigned long fake_page_count(struct drm_i915_gem_object *obj)
412 {
413 	return huge_gem_object_dma_size(obj) >> PAGE_SHIFT;
414 }
415 
416 static int gpu_fill(struct intel_context *ce,
417 		    struct drm_i915_gem_object *obj,
418 		    unsigned int dw)
419 {
420 	struct i915_vma *vma;
421 	int err;
422 
423 	GEM_BUG_ON(obj->base.size > ce->vm->total);
424 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
425 
426 	vma = i915_vma_instance(obj, ce->vm, NULL);
427 	if (IS_ERR(vma))
428 		return PTR_ERR(vma);
429 
430 	err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER);
431 	if (err)
432 		return err;
433 
434 	/*
435 	 * Within the GTT the huge objects maps every page onto
436 	 * its 1024 real pages (using phys_pfn = dma_pfn % 1024).
437 	 * We set the nth dword within the page using the nth
438 	 * mapping via the GTT - this should exercise the GTT mapping
439 	 * whilst checking that each context provides a unique view
440 	 * into the object.
441 	 */
442 	err = igt_gpu_fill_dw(ce, vma,
443 			      (dw * real_page_count(obj)) << PAGE_SHIFT |
444 			      (dw * sizeof(u32)),
445 			      real_page_count(obj),
446 			      dw);
447 	i915_vma_unpin(vma);
448 
449 	return err;
450 }
451 
452 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value)
453 {
454 	const bool has_llc = HAS_LLC(to_i915(obj->base.dev));
455 	unsigned int n, m, need_flush;
456 	int err;
457 
458 	i915_gem_object_lock(obj, NULL);
459 	err = i915_gem_object_prepare_write(obj, &need_flush);
460 	if (err)
461 		goto out;
462 
463 	for (n = 0; n < real_page_count(obj); n++) {
464 		u32 *map;
465 
466 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
467 		for (m = 0; m < DW_PER_PAGE; m++)
468 			map[m] = value;
469 		if (!has_llc)
470 			drm_clflush_virt_range(map, PAGE_SIZE);
471 		kunmap_atomic(map);
472 	}
473 
474 	i915_gem_object_finish_access(obj);
475 	obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU;
476 	obj->write_domain = 0;
477 out:
478 	i915_gem_object_unlock(obj);
479 	return err;
480 }
481 
482 static noinline int cpu_check(struct drm_i915_gem_object *obj,
483 			      unsigned int idx, unsigned int max)
484 {
485 	unsigned int n, m, needs_flush;
486 	int err;
487 
488 	i915_gem_object_lock(obj, NULL);
489 	err = i915_gem_object_prepare_read(obj, &needs_flush);
490 	if (err)
491 		goto out_unlock;
492 
493 	for (n = 0; n < real_page_count(obj); n++) {
494 		u32 *map;
495 
496 		map = kmap_atomic(i915_gem_object_get_page(obj, n));
497 		if (needs_flush & CLFLUSH_BEFORE)
498 			drm_clflush_virt_range(map, PAGE_SIZE);
499 
500 		for (m = 0; m < max; m++) {
501 			if (map[m] != m) {
502 				pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n",
503 				       __builtin_return_address(0), idx,
504 				       n, real_page_count(obj), m, max,
505 				       map[m], m);
506 				err = -EINVAL;
507 				goto out_unmap;
508 			}
509 		}
510 
511 		for (; m < DW_PER_PAGE; m++) {
512 			if (map[m] != STACK_MAGIC) {
513 				pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n",
514 				       __builtin_return_address(0), idx, n, m,
515 				       map[m], STACK_MAGIC);
516 				err = -EINVAL;
517 				goto out_unmap;
518 			}
519 		}
520 
521 out_unmap:
522 		kunmap_atomic(map);
523 		if (err)
524 			break;
525 	}
526 
527 	i915_gem_object_finish_access(obj);
528 out_unlock:
529 	i915_gem_object_unlock(obj);
530 	return err;
531 }
532 
533 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj)
534 {
535 	int err;
536 
537 	GEM_BUG_ON(obj->base.handle_count);
538 
539 	/* tie the object to the drm_file for easy reaping */
540 	err = idr_alloc(&to_drm_file(file)->object_idr,
541 			&obj->base, 1, 0, GFP_KERNEL);
542 	if (err < 0)
543 		return err;
544 
545 	i915_gem_object_get(obj);
546 	obj->base.handle_count++;
547 	return 0;
548 }
549 
550 static struct drm_i915_gem_object *
551 create_test_object(struct i915_address_space *vm,
552 		   struct file *file,
553 		   struct list_head *objects)
554 {
555 	struct drm_i915_gem_object *obj;
556 	u64 size;
557 	int err;
558 
559 	/* Keep in GEM's good graces */
560 	intel_gt_retire_requests(vm->gt);
561 
562 	size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE);
563 	size = round_down(size, DW_PER_PAGE * PAGE_SIZE);
564 
565 	obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size);
566 	if (IS_ERR(obj))
567 		return obj;
568 
569 	err = file_add_object(file, obj);
570 	i915_gem_object_put(obj);
571 	if (err)
572 		return ERR_PTR(err);
573 
574 	err = cpu_fill(obj, STACK_MAGIC);
575 	if (err) {
576 		pr_err("Failed to fill object with cpu, err=%d\n",
577 		       err);
578 		return ERR_PTR(err);
579 	}
580 
581 	list_add_tail(&obj->st_link, objects);
582 	return obj;
583 }
584 
585 static unsigned long max_dwords(struct drm_i915_gem_object *obj)
586 {
587 	unsigned long npages = fake_page_count(obj);
588 
589 	GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE));
590 	return npages / DW_PER_PAGE;
591 }
592 
593 static void throttle_release(struct i915_request **q, int count)
594 {
595 	int i;
596 
597 	for (i = 0; i < count; i++) {
598 		if (IS_ERR_OR_NULL(q[i]))
599 			continue;
600 
601 		i915_request_put(fetch_and_zero(&q[i]));
602 	}
603 }
604 
605 static int throttle(struct intel_context *ce,
606 		    struct i915_request **q, int count)
607 {
608 	int i;
609 
610 	if (!IS_ERR_OR_NULL(q[0])) {
611 		if (i915_request_wait(q[0],
612 				      I915_WAIT_INTERRUPTIBLE,
613 				      MAX_SCHEDULE_TIMEOUT) < 0)
614 			return -EINTR;
615 
616 		i915_request_put(q[0]);
617 	}
618 
619 	for (i = 0; i < count - 1; i++)
620 		q[i] = q[i + 1];
621 
622 	q[i] = intel_context_create_request(ce);
623 	if (IS_ERR(q[i]))
624 		return PTR_ERR(q[i]);
625 
626 	i915_request_get(q[i]);
627 	i915_request_add(q[i]);
628 
629 	return 0;
630 }
631 
632 static int igt_ctx_exec(void *arg)
633 {
634 	struct drm_i915_private *i915 = arg;
635 	struct intel_engine_cs *engine;
636 	int err = -ENODEV;
637 
638 	/*
639 	 * Create a few different contexts (with different mm) and write
640 	 * through each ctx/mm using the GPU making sure those writes end
641 	 * up in the expected pages of our obj.
642 	 */
643 
644 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
645 		return 0;
646 
647 	for_each_uabi_engine(engine, i915) {
648 		struct drm_i915_gem_object *obj = NULL;
649 		unsigned long ncontexts, ndwords, dw;
650 		struct i915_request *tq[5] = {};
651 		struct igt_live_test t;
652 		IGT_TIMEOUT(end_time);
653 		LIST_HEAD(objects);
654 		struct file *file;
655 
656 		if (!intel_engine_can_store_dword(engine))
657 			continue;
658 
659 		if (!engine->context_size)
660 			continue; /* No logical context support in HW */
661 
662 		file = mock_file(i915);
663 		if (IS_ERR(file))
664 			return PTR_ERR(file);
665 
666 		err = igt_live_test_begin(&t, i915, __func__, engine->name);
667 		if (err)
668 			goto out_file;
669 
670 		ncontexts = 0;
671 		ndwords = 0;
672 		dw = 0;
673 		while (!time_after(jiffies, end_time)) {
674 			struct i915_gem_context *ctx;
675 			struct intel_context *ce;
676 
677 			ctx = kernel_context(i915, NULL);
678 			if (IS_ERR(ctx)) {
679 				err = PTR_ERR(ctx);
680 				goto out_file;
681 			}
682 
683 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
684 			GEM_BUG_ON(IS_ERR(ce));
685 
686 			if (!obj) {
687 				obj = create_test_object(ce->vm, file, &objects);
688 				if (IS_ERR(obj)) {
689 					err = PTR_ERR(obj);
690 					intel_context_put(ce);
691 					kernel_context_close(ctx);
692 					goto out_file;
693 				}
694 			}
695 
696 			err = gpu_fill(ce, obj, dw);
697 			if (err) {
698 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
699 				       ndwords, dw, max_dwords(obj),
700 				       engine->name,
701 				       yesno(i915_gem_context_has_full_ppgtt(ctx)),
702 				       err);
703 				intel_context_put(ce);
704 				kernel_context_close(ctx);
705 				goto out_file;
706 			}
707 
708 			err = throttle(ce, tq, ARRAY_SIZE(tq));
709 			if (err) {
710 				intel_context_put(ce);
711 				kernel_context_close(ctx);
712 				goto out_file;
713 			}
714 
715 			if (++dw == max_dwords(obj)) {
716 				obj = NULL;
717 				dw = 0;
718 			}
719 
720 			ndwords++;
721 			ncontexts++;
722 
723 			intel_context_put(ce);
724 			kernel_context_close(ctx);
725 		}
726 
727 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
728 			ncontexts, engine->name, ndwords);
729 
730 		ncontexts = dw = 0;
731 		list_for_each_entry(obj, &objects, st_link) {
732 			unsigned int rem =
733 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
734 
735 			err = cpu_check(obj, ncontexts++, rem);
736 			if (err)
737 				break;
738 
739 			dw += rem;
740 		}
741 
742 out_file:
743 		throttle_release(tq, ARRAY_SIZE(tq));
744 		if (igt_live_test_end(&t))
745 			err = -EIO;
746 
747 		fput(file);
748 		if (err)
749 			return err;
750 
751 		i915_gem_drain_freed_objects(i915);
752 	}
753 
754 	return 0;
755 }
756 
757 static int igt_shared_ctx_exec(void *arg)
758 {
759 	struct drm_i915_private *i915 = arg;
760 	struct i915_request *tq[5] = {};
761 	struct i915_gem_context *parent;
762 	struct intel_engine_cs *engine;
763 	struct igt_live_test t;
764 	struct file *file;
765 	int err = 0;
766 
767 	/*
768 	 * Create a few different contexts with the same mm and write
769 	 * through each ctx using the GPU making sure those writes end
770 	 * up in the expected pages of our obj.
771 	 */
772 	if (!DRIVER_CAPS(i915)->has_logical_contexts)
773 		return 0;
774 
775 	file = mock_file(i915);
776 	if (IS_ERR(file))
777 		return PTR_ERR(file);
778 
779 	parent = live_context(i915, file);
780 	if (IS_ERR(parent)) {
781 		err = PTR_ERR(parent);
782 		goto out_file;
783 	}
784 
785 	if (!parent->vm) { /* not full-ppgtt; nothing to share */
786 		err = 0;
787 		goto out_file;
788 	}
789 
790 	err = igt_live_test_begin(&t, i915, __func__, "");
791 	if (err)
792 		goto out_file;
793 
794 	for_each_uabi_engine(engine, i915) {
795 		unsigned long ncontexts, ndwords, dw;
796 		struct drm_i915_gem_object *obj = NULL;
797 		IGT_TIMEOUT(end_time);
798 		LIST_HEAD(objects);
799 
800 		if (!intel_engine_can_store_dword(engine))
801 			continue;
802 
803 		dw = 0;
804 		ndwords = 0;
805 		ncontexts = 0;
806 		while (!time_after(jiffies, end_time)) {
807 			struct i915_gem_context *ctx;
808 			struct intel_context *ce;
809 
810 			ctx = kernel_context(i915, parent->vm);
811 			if (IS_ERR(ctx)) {
812 				err = PTR_ERR(ctx);
813 				goto out_test;
814 			}
815 
816 			ce = i915_gem_context_get_engine(ctx, engine->legacy_idx);
817 			GEM_BUG_ON(IS_ERR(ce));
818 
819 			if (!obj) {
820 				obj = create_test_object(parent->vm,
821 							 file, &objects);
822 				if (IS_ERR(obj)) {
823 					err = PTR_ERR(obj);
824 					intel_context_put(ce);
825 					kernel_context_close(ctx);
826 					goto out_test;
827 				}
828 			}
829 
830 			err = gpu_fill(ce, obj, dw);
831 			if (err) {
832 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
833 				       ndwords, dw, max_dwords(obj),
834 				       engine->name,
835 				       yesno(i915_gem_context_has_full_ppgtt(ctx)),
836 				       err);
837 				intel_context_put(ce);
838 				kernel_context_close(ctx);
839 				goto out_test;
840 			}
841 
842 			err = throttle(ce, tq, ARRAY_SIZE(tq));
843 			if (err) {
844 				intel_context_put(ce);
845 				kernel_context_close(ctx);
846 				goto out_test;
847 			}
848 
849 			if (++dw == max_dwords(obj)) {
850 				obj = NULL;
851 				dw = 0;
852 			}
853 
854 			ndwords++;
855 			ncontexts++;
856 
857 			intel_context_put(ce);
858 			kernel_context_close(ctx);
859 		}
860 		pr_info("Submitted %lu contexts to %s, filling %lu dwords\n",
861 			ncontexts, engine->name, ndwords);
862 
863 		ncontexts = dw = 0;
864 		list_for_each_entry(obj, &objects, st_link) {
865 			unsigned int rem =
866 				min_t(unsigned int, ndwords - dw, max_dwords(obj));
867 
868 			err = cpu_check(obj, ncontexts++, rem);
869 			if (err)
870 				goto out_test;
871 
872 			dw += rem;
873 		}
874 
875 		i915_gem_drain_freed_objects(i915);
876 	}
877 out_test:
878 	throttle_release(tq, ARRAY_SIZE(tq));
879 	if (igt_live_test_end(&t))
880 		err = -EIO;
881 out_file:
882 	fput(file);
883 	return err;
884 }
885 
886 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma)
887 {
888 	u32 *cmd;
889 
890 	GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8);
891 
892 	cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB);
893 	if (IS_ERR(cmd))
894 		return PTR_ERR(cmd);
895 
896 	*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
897 	*cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE);
898 	*cmd++ = lower_32_bits(vma->node.start);
899 	*cmd++ = upper_32_bits(vma->node.start);
900 	*cmd = MI_BATCH_BUFFER_END;
901 
902 	__i915_gem_object_flush_map(rpcs, 0, 64);
903 	i915_gem_object_unpin_map(rpcs);
904 
905 	intel_gt_chipset_flush(vma->vm->gt);
906 
907 	return 0;
908 }
909 
910 static int
911 emit_rpcs_query(struct drm_i915_gem_object *obj,
912 		struct intel_context *ce,
913 		struct i915_request **rq_out)
914 {
915 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
916 	struct i915_request *rq;
917 	struct i915_gem_ww_ctx ww;
918 	struct i915_vma *batch;
919 	struct i915_vma *vma;
920 	struct drm_i915_gem_object *rpcs;
921 	int err;
922 
923 	GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine));
924 
925 	if (GRAPHICS_VER(i915) < 8)
926 		return -EINVAL;
927 
928 	vma = i915_vma_instance(obj, ce->vm, NULL);
929 	if (IS_ERR(vma))
930 		return PTR_ERR(vma);
931 
932 	rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE);
933 	if (IS_ERR(rpcs))
934 		return PTR_ERR(rpcs);
935 
936 	batch = i915_vma_instance(rpcs, ce->vm, NULL);
937 	if (IS_ERR(batch)) {
938 		err = PTR_ERR(batch);
939 		goto err_put;
940 	}
941 
942 	i915_gem_ww_ctx_init(&ww, false);
943 retry:
944 	err = i915_gem_object_lock(obj, &ww);
945 	if (!err)
946 		err = i915_gem_object_lock(rpcs, &ww);
947 	if (!err)
948 		err = i915_gem_object_set_to_gtt_domain(obj, false);
949 	if (!err)
950 		err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER);
951 	if (err)
952 		goto err_put;
953 
954 	err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER);
955 	if (err)
956 		goto err_vma;
957 
958 	err = rpcs_query_batch(rpcs, vma);
959 	if (err)
960 		goto err_batch;
961 
962 	rq = i915_request_create(ce);
963 	if (IS_ERR(rq)) {
964 		err = PTR_ERR(rq);
965 		goto err_batch;
966 	}
967 
968 	err = i915_request_await_object(rq, batch->obj, false);
969 	if (err == 0)
970 		err = i915_vma_move_to_active(batch, rq, 0);
971 	if (err)
972 		goto skip_request;
973 
974 	err = i915_request_await_object(rq, vma->obj, true);
975 	if (err == 0)
976 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
977 	if (err)
978 		goto skip_request;
979 
980 	if (rq->engine->emit_init_breadcrumb) {
981 		err = rq->engine->emit_init_breadcrumb(rq);
982 		if (err)
983 			goto skip_request;
984 	}
985 
986 	err = rq->engine->emit_bb_start(rq,
987 					batch->node.start, batch->node.size,
988 					0);
989 	if (err)
990 		goto skip_request;
991 
992 	*rq_out = i915_request_get(rq);
993 
994 skip_request:
995 	if (err)
996 		i915_request_set_error_once(rq, err);
997 	i915_request_add(rq);
998 err_batch:
999 	i915_vma_unpin(batch);
1000 err_vma:
1001 	i915_vma_unpin(vma);
1002 err_put:
1003 	if (err == -EDEADLK) {
1004 		err = i915_gem_ww_ctx_backoff(&ww);
1005 		if (!err)
1006 			goto retry;
1007 	}
1008 	i915_gem_ww_ctx_fini(&ww);
1009 	i915_gem_object_put(rpcs);
1010 	return err;
1011 }
1012 
1013 #define TEST_IDLE	BIT(0)
1014 #define TEST_BUSY	BIT(1)
1015 #define TEST_RESET	BIT(2)
1016 
1017 static int
1018 __sseu_prepare(const char *name,
1019 	       unsigned int flags,
1020 	       struct intel_context *ce,
1021 	       struct igt_spinner **spin)
1022 {
1023 	struct i915_request *rq;
1024 	int ret;
1025 
1026 	*spin = NULL;
1027 	if (!(flags & (TEST_BUSY | TEST_RESET)))
1028 		return 0;
1029 
1030 	*spin = kzalloc(sizeof(**spin), GFP_KERNEL);
1031 	if (!*spin)
1032 		return -ENOMEM;
1033 
1034 	ret = igt_spinner_init(*spin, ce->engine->gt);
1035 	if (ret)
1036 		goto err_free;
1037 
1038 	rq = igt_spinner_create_request(*spin, ce, MI_NOOP);
1039 	if (IS_ERR(rq)) {
1040 		ret = PTR_ERR(rq);
1041 		goto err_fini;
1042 	}
1043 
1044 	i915_request_add(rq);
1045 
1046 	if (!igt_wait_for_spinner(*spin, rq)) {
1047 		pr_err("%s: Spinner failed to start!\n", name);
1048 		ret = -ETIMEDOUT;
1049 		goto err_end;
1050 	}
1051 
1052 	return 0;
1053 
1054 err_end:
1055 	igt_spinner_end(*spin);
1056 err_fini:
1057 	igt_spinner_fini(*spin);
1058 err_free:
1059 	kfree(fetch_and_zero(spin));
1060 	return ret;
1061 }
1062 
1063 static int
1064 __read_slice_count(struct intel_context *ce,
1065 		   struct drm_i915_gem_object *obj,
1066 		   struct igt_spinner *spin,
1067 		   u32 *rpcs)
1068 {
1069 	struct i915_request *rq = NULL;
1070 	u32 s_mask, s_shift;
1071 	unsigned int cnt;
1072 	u32 *buf, val;
1073 	long ret;
1074 
1075 	ret = emit_rpcs_query(obj, ce, &rq);
1076 	if (ret)
1077 		return ret;
1078 
1079 	if (spin)
1080 		igt_spinner_end(spin);
1081 
1082 	ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
1083 	i915_request_put(rq);
1084 	if (ret < 0)
1085 		return ret;
1086 
1087 	buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1088 	if (IS_ERR(buf)) {
1089 		ret = PTR_ERR(buf);
1090 		return ret;
1091 	}
1092 
1093 	if (GRAPHICS_VER(ce->engine->i915) >= 11) {
1094 		s_mask = GEN11_RPCS_S_CNT_MASK;
1095 		s_shift = GEN11_RPCS_S_CNT_SHIFT;
1096 	} else {
1097 		s_mask = GEN8_RPCS_S_CNT_MASK;
1098 		s_shift = GEN8_RPCS_S_CNT_SHIFT;
1099 	}
1100 
1101 	val = *buf;
1102 	cnt = (val & s_mask) >> s_shift;
1103 	*rpcs = val;
1104 
1105 	i915_gem_object_unpin_map(obj);
1106 
1107 	return cnt;
1108 }
1109 
1110 static int
1111 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected,
1112 	     const char *prefix, const char *suffix)
1113 {
1114 	if (slices == expected)
1115 		return 0;
1116 
1117 	if (slices < 0) {
1118 		pr_err("%s: %s read slice count failed with %d%s\n",
1119 		       name, prefix, slices, suffix);
1120 		return slices;
1121 	}
1122 
1123 	pr_err("%s: %s slice count %d is not %u%s\n",
1124 	       name, prefix, slices, expected, suffix);
1125 
1126 	pr_info("RPCS=0x%x; %u%sx%u%s\n",
1127 		rpcs, slices,
1128 		(rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "",
1129 		(rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT,
1130 		(rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : "");
1131 
1132 	return -EINVAL;
1133 }
1134 
1135 static int
1136 __sseu_finish(const char *name,
1137 	      unsigned int flags,
1138 	      struct intel_context *ce,
1139 	      struct drm_i915_gem_object *obj,
1140 	      unsigned int expected,
1141 	      struct igt_spinner *spin)
1142 {
1143 	unsigned int slices = hweight32(ce->engine->sseu.slice_mask);
1144 	u32 rpcs = 0;
1145 	int ret = 0;
1146 
1147 	if (flags & TEST_RESET) {
1148 		ret = intel_engine_reset(ce->engine, "sseu");
1149 		if (ret)
1150 			goto out;
1151 	}
1152 
1153 	ret = __read_slice_count(ce, obj,
1154 				 flags & TEST_RESET ? NULL : spin, &rpcs);
1155 	ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!");
1156 	if (ret)
1157 		goto out;
1158 
1159 	ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs);
1160 	ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!");
1161 
1162 out:
1163 	if (spin)
1164 		igt_spinner_end(spin);
1165 
1166 	if ((flags & TEST_IDLE) && ret == 0) {
1167 		ret = igt_flush_test(ce->engine->i915);
1168 		if (ret)
1169 			return ret;
1170 
1171 		ret = __read_slice_count(ce, obj, NULL, &rpcs);
1172 		ret = __check_rpcs(name, rpcs, ret, expected,
1173 				   "Context", " after idle!");
1174 	}
1175 
1176 	return ret;
1177 }
1178 
1179 static int
1180 __sseu_test(const char *name,
1181 	    unsigned int flags,
1182 	    struct intel_context *ce,
1183 	    struct drm_i915_gem_object *obj,
1184 	    struct intel_sseu sseu)
1185 {
1186 	struct igt_spinner *spin = NULL;
1187 	int ret;
1188 
1189 	intel_engine_pm_get(ce->engine);
1190 
1191 	ret = __sseu_prepare(name, flags, ce, &spin);
1192 	if (ret)
1193 		goto out_pm;
1194 
1195 	ret = intel_context_reconfigure_sseu(ce, sseu);
1196 	if (ret)
1197 		goto out_spin;
1198 
1199 	ret = __sseu_finish(name, flags, ce, obj,
1200 			    hweight32(sseu.slice_mask), spin);
1201 
1202 out_spin:
1203 	if (spin) {
1204 		igt_spinner_end(spin);
1205 		igt_spinner_fini(spin);
1206 		kfree(spin);
1207 	}
1208 out_pm:
1209 	intel_engine_pm_put(ce->engine);
1210 	return ret;
1211 }
1212 
1213 static int
1214 __igt_ctx_sseu(struct drm_i915_private *i915,
1215 	       const char *name,
1216 	       unsigned int flags)
1217 {
1218 	struct drm_i915_gem_object *obj;
1219 	int inst = 0;
1220 	int ret = 0;
1221 
1222 	if (GRAPHICS_VER(i915) < 9)
1223 		return 0;
1224 
1225 	if (flags & TEST_RESET)
1226 		igt_global_reset_lock(to_gt(i915));
1227 
1228 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
1229 	if (IS_ERR(obj)) {
1230 		ret = PTR_ERR(obj);
1231 		goto out_unlock;
1232 	}
1233 
1234 	do {
1235 		struct intel_engine_cs *engine;
1236 		struct intel_context *ce;
1237 		struct intel_sseu pg_sseu;
1238 
1239 		engine = intel_engine_lookup_user(i915,
1240 						  I915_ENGINE_CLASS_RENDER,
1241 						  inst++);
1242 		if (!engine)
1243 			break;
1244 
1245 		if (hweight32(engine->sseu.slice_mask) < 2)
1246 			continue;
1247 
1248 		if (!engine->gt->info.sseu.has_slice_pg)
1249 			continue;
1250 
1251 		/*
1252 		 * Gen11 VME friendly power-gated configuration with
1253 		 * half enabled sub-slices.
1254 		 */
1255 		pg_sseu = engine->sseu;
1256 		pg_sseu.slice_mask = 1;
1257 		pg_sseu.subslice_mask =
1258 			~(~0 << (hweight32(engine->sseu.subslice_mask) / 2));
1259 
1260 		pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n",
1261 			engine->name, name, flags,
1262 			hweight32(engine->sseu.slice_mask),
1263 			hweight32(pg_sseu.slice_mask));
1264 
1265 		ce = intel_context_create(engine);
1266 		if (IS_ERR(ce)) {
1267 			ret = PTR_ERR(ce);
1268 			goto out_put;
1269 		}
1270 
1271 		ret = intel_context_pin(ce);
1272 		if (ret)
1273 			goto out_ce;
1274 
1275 		/* First set the default mask. */
1276 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1277 		if (ret)
1278 			goto out_unpin;
1279 
1280 		/* Then set a power-gated configuration. */
1281 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1282 		if (ret)
1283 			goto out_unpin;
1284 
1285 		/* Back to defaults. */
1286 		ret = __sseu_test(name, flags, ce, obj, engine->sseu);
1287 		if (ret)
1288 			goto out_unpin;
1289 
1290 		/* One last power-gated configuration for the road. */
1291 		ret = __sseu_test(name, flags, ce, obj, pg_sseu);
1292 		if (ret)
1293 			goto out_unpin;
1294 
1295 out_unpin:
1296 		intel_context_unpin(ce);
1297 out_ce:
1298 		intel_context_put(ce);
1299 	} while (!ret);
1300 
1301 	if (igt_flush_test(i915))
1302 		ret = -EIO;
1303 
1304 out_put:
1305 	i915_gem_object_put(obj);
1306 
1307 out_unlock:
1308 	if (flags & TEST_RESET)
1309 		igt_global_reset_unlock(to_gt(i915));
1310 
1311 	if (ret)
1312 		pr_err("%s: Failed with %d!\n", name, ret);
1313 
1314 	return ret;
1315 }
1316 
1317 static int igt_ctx_sseu(void *arg)
1318 {
1319 	struct {
1320 		const char *name;
1321 		unsigned int flags;
1322 	} *phase, phases[] = {
1323 		{ .name = "basic", .flags = 0 },
1324 		{ .name = "idle", .flags = TEST_IDLE },
1325 		{ .name = "busy", .flags = TEST_BUSY },
1326 		{ .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET },
1327 		{ .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE },
1328 		{ .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE },
1329 	};
1330 	unsigned int i;
1331 	int ret = 0;
1332 
1333 	for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases);
1334 	     i++, phase++)
1335 		ret = __igt_ctx_sseu(arg, phase->name, phase->flags);
1336 
1337 	return ret;
1338 }
1339 
1340 static int igt_ctx_readonly(void *arg)
1341 {
1342 	struct drm_i915_private *i915 = arg;
1343 	unsigned long idx, ndwords, dw, num_engines;
1344 	struct drm_i915_gem_object *obj = NULL;
1345 	struct i915_request *tq[5] = {};
1346 	struct i915_gem_engines_iter it;
1347 	struct i915_address_space *vm;
1348 	struct i915_gem_context *ctx;
1349 	struct intel_context *ce;
1350 	struct igt_live_test t;
1351 	I915_RND_STATE(prng);
1352 	IGT_TIMEOUT(end_time);
1353 	LIST_HEAD(objects);
1354 	struct file *file;
1355 	int err = -ENODEV;
1356 
1357 	/*
1358 	 * Create a few read-only objects (with the occasional writable object)
1359 	 * and try to write into these object checking that the GPU discards
1360 	 * any write to a read-only object.
1361 	 */
1362 
1363 	file = mock_file(i915);
1364 	if (IS_ERR(file))
1365 		return PTR_ERR(file);
1366 
1367 	err = igt_live_test_begin(&t, i915, __func__, "");
1368 	if (err)
1369 		goto out_file;
1370 
1371 	ctx = live_context(i915, file);
1372 	if (IS_ERR(ctx)) {
1373 		err = PTR_ERR(ctx);
1374 		goto out_file;
1375 	}
1376 
1377 	vm = ctx->vm ?: &i915->ggtt.alias->vm;
1378 	if (!vm || !vm->has_read_only) {
1379 		err = 0;
1380 		goto out_file;
1381 	}
1382 
1383 	num_engines = 0;
1384 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it)
1385 		if (intel_engine_can_store_dword(ce->engine))
1386 			num_engines++;
1387 	i915_gem_context_unlock_engines(ctx);
1388 
1389 	ndwords = 0;
1390 	dw = 0;
1391 	while (!time_after(jiffies, end_time)) {
1392 		for_each_gem_engine(ce,
1393 				    i915_gem_context_lock_engines(ctx), it) {
1394 			if (!intel_engine_can_store_dword(ce->engine))
1395 				continue;
1396 
1397 			if (!obj) {
1398 				obj = create_test_object(ce->vm, file, &objects);
1399 				if (IS_ERR(obj)) {
1400 					err = PTR_ERR(obj);
1401 					i915_gem_context_unlock_engines(ctx);
1402 					goto out_file;
1403 				}
1404 
1405 				if (prandom_u32_state(&prng) & 1)
1406 					i915_gem_object_set_readonly(obj);
1407 			}
1408 
1409 			err = gpu_fill(ce, obj, dw);
1410 			if (err) {
1411 				pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n",
1412 				       ndwords, dw, max_dwords(obj),
1413 				       ce->engine->name,
1414 				       yesno(i915_gem_context_has_full_ppgtt(ctx)),
1415 				       err);
1416 				i915_gem_context_unlock_engines(ctx);
1417 				goto out_file;
1418 			}
1419 
1420 			err = throttle(ce, tq, ARRAY_SIZE(tq));
1421 			if (err) {
1422 				i915_gem_context_unlock_engines(ctx);
1423 				goto out_file;
1424 			}
1425 
1426 			if (++dw == max_dwords(obj)) {
1427 				obj = NULL;
1428 				dw = 0;
1429 			}
1430 			ndwords++;
1431 		}
1432 		i915_gem_context_unlock_engines(ctx);
1433 	}
1434 	pr_info("Submitted %lu dwords (across %lu engines)\n",
1435 		ndwords, num_engines);
1436 
1437 	dw = 0;
1438 	idx = 0;
1439 	list_for_each_entry(obj, &objects, st_link) {
1440 		unsigned int rem =
1441 			min_t(unsigned int, ndwords - dw, max_dwords(obj));
1442 		unsigned int num_writes;
1443 
1444 		num_writes = rem;
1445 		if (i915_gem_object_is_readonly(obj))
1446 			num_writes = 0;
1447 
1448 		err = cpu_check(obj, idx++, num_writes);
1449 		if (err)
1450 			break;
1451 
1452 		dw += rem;
1453 	}
1454 
1455 out_file:
1456 	throttle_release(tq, ARRAY_SIZE(tq));
1457 	if (igt_live_test_end(&t))
1458 		err = -EIO;
1459 
1460 	fput(file);
1461 	return err;
1462 }
1463 
1464 static int check_scratch(struct i915_address_space *vm, u64 offset)
1465 {
1466 	struct drm_mm_node *node;
1467 
1468 	mutex_lock(&vm->mutex);
1469 	node = __drm_mm_interval_first(&vm->mm,
1470 				       offset, offset + sizeof(u32) - 1);
1471 	mutex_unlock(&vm->mutex);
1472 	if (!node || node->start > offset)
1473 		return 0;
1474 
1475 	GEM_BUG_ON(offset >= node->start + node->size);
1476 
1477 	pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n",
1478 	       upper_32_bits(offset), lower_32_bits(offset));
1479 	return -EINVAL;
1480 }
1481 
1482 static int write_to_scratch(struct i915_gem_context *ctx,
1483 			    struct intel_engine_cs *engine,
1484 			    struct drm_i915_gem_object *obj,
1485 			    u64 offset, u32 value)
1486 {
1487 	struct drm_i915_private *i915 = ctx->i915;
1488 	struct i915_address_space *vm;
1489 	struct i915_request *rq;
1490 	struct i915_vma *vma;
1491 	u32 *cmd;
1492 	int err;
1493 
1494 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1495 
1496 	err = check_scratch(ctx->vm, offset);
1497 	if (err)
1498 		return err;
1499 
1500 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1501 	if (IS_ERR(cmd))
1502 		return PTR_ERR(cmd);
1503 
1504 	*cmd++ = MI_STORE_DWORD_IMM_GEN4;
1505 	if (GRAPHICS_VER(i915) >= 8) {
1506 		*cmd++ = lower_32_bits(offset);
1507 		*cmd++ = upper_32_bits(offset);
1508 	} else {
1509 		*cmd++ = 0;
1510 		*cmd++ = offset;
1511 	}
1512 	*cmd++ = value;
1513 	*cmd = MI_BATCH_BUFFER_END;
1514 	__i915_gem_object_flush_map(obj, 0, 64);
1515 	i915_gem_object_unpin_map(obj);
1516 
1517 	intel_gt_chipset_flush(engine->gt);
1518 
1519 	vm = i915_gem_context_get_eb_vm(ctx);
1520 	vma = i915_vma_instance(obj, vm, NULL);
1521 	if (IS_ERR(vma)) {
1522 		err = PTR_ERR(vma);
1523 		goto out_vm;
1524 	}
1525 
1526 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1527 	if (err)
1528 		goto out_vm;
1529 
1530 	rq = igt_request_alloc(ctx, engine);
1531 	if (IS_ERR(rq)) {
1532 		err = PTR_ERR(rq);
1533 		goto err_unpin;
1534 	}
1535 
1536 	i915_vma_lock(vma);
1537 	err = i915_request_await_object(rq, vma->obj, false);
1538 	if (err == 0)
1539 		err = i915_vma_move_to_active(vma, rq, 0);
1540 	i915_vma_unlock(vma);
1541 	if (err)
1542 		goto skip_request;
1543 
1544 	if (rq->engine->emit_init_breadcrumb) {
1545 		err = rq->engine->emit_init_breadcrumb(rq);
1546 		if (err)
1547 			goto skip_request;
1548 	}
1549 
1550 	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0);
1551 	if (err)
1552 		goto skip_request;
1553 
1554 	i915_vma_unpin(vma);
1555 
1556 	i915_request_add(rq);
1557 
1558 	goto out_vm;
1559 skip_request:
1560 	i915_request_set_error_once(rq, err);
1561 	i915_request_add(rq);
1562 err_unpin:
1563 	i915_vma_unpin(vma);
1564 out_vm:
1565 	i915_vm_put(vm);
1566 
1567 	if (!err)
1568 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1569 
1570 	return err;
1571 }
1572 
1573 static int read_from_scratch(struct i915_gem_context *ctx,
1574 			     struct intel_engine_cs *engine,
1575 			     struct drm_i915_gem_object *obj,
1576 			     u64 offset, u32 *value)
1577 {
1578 	struct drm_i915_private *i915 = ctx->i915;
1579 	struct i915_address_space *vm;
1580 	const u32 result = 0x100;
1581 	struct i915_request *rq;
1582 	struct i915_vma *vma;
1583 	unsigned int flags;
1584 	u32 *cmd;
1585 	int err;
1586 
1587 	GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE);
1588 
1589 	err = check_scratch(ctx->vm, offset);
1590 	if (err)
1591 		return err;
1592 
1593 	if (GRAPHICS_VER(i915) >= 8) {
1594 		const u32 GPR0 = engine->mmio_base + 0x600;
1595 
1596 		vm = i915_gem_context_get_eb_vm(ctx);
1597 		vma = i915_vma_instance(obj, vm, NULL);
1598 		if (IS_ERR(vma)) {
1599 			err = PTR_ERR(vma);
1600 			goto out_vm;
1601 		}
1602 
1603 		err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED);
1604 		if (err)
1605 			goto out_vm;
1606 
1607 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1608 		if (IS_ERR(cmd)) {
1609 			err = PTR_ERR(cmd);
1610 			goto err_unpin;
1611 		}
1612 
1613 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1614 		*cmd++ = MI_LOAD_REGISTER_MEM_GEN8;
1615 		*cmd++ = GPR0;
1616 		*cmd++ = lower_32_bits(offset);
1617 		*cmd++ = upper_32_bits(offset);
1618 		*cmd++ = MI_STORE_REGISTER_MEM_GEN8;
1619 		*cmd++ = GPR0;
1620 		*cmd++ = result;
1621 		*cmd++ = 0;
1622 		*cmd = MI_BATCH_BUFFER_END;
1623 
1624 		i915_gem_object_flush_map(obj);
1625 		i915_gem_object_unpin_map(obj);
1626 
1627 		flags = 0;
1628 	} else {
1629 		const u32 reg = engine->mmio_base + 0x420;
1630 
1631 		/* hsw: register access even to 3DPRIM! is protected */
1632 		vm = i915_vm_get(&engine->gt->ggtt->vm);
1633 		vma = i915_vma_instance(obj, vm, NULL);
1634 		if (IS_ERR(vma)) {
1635 			err = PTR_ERR(vma);
1636 			goto out_vm;
1637 		}
1638 
1639 		err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL);
1640 		if (err)
1641 			goto out_vm;
1642 
1643 		cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1644 		if (IS_ERR(cmd)) {
1645 			err = PTR_ERR(cmd);
1646 			goto err_unpin;
1647 		}
1648 
1649 		memset(cmd, POISON_INUSE, PAGE_SIZE);
1650 		*cmd++ = MI_LOAD_REGISTER_MEM;
1651 		*cmd++ = reg;
1652 		*cmd++ = offset;
1653 		*cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT;
1654 		*cmd++ = reg;
1655 		*cmd++ = vma->node.start + result;
1656 		*cmd = MI_BATCH_BUFFER_END;
1657 
1658 		i915_gem_object_flush_map(obj);
1659 		i915_gem_object_unpin_map(obj);
1660 
1661 		flags = I915_DISPATCH_SECURE;
1662 	}
1663 
1664 	intel_gt_chipset_flush(engine->gt);
1665 
1666 	rq = igt_request_alloc(ctx, engine);
1667 	if (IS_ERR(rq)) {
1668 		err = PTR_ERR(rq);
1669 		goto err_unpin;
1670 	}
1671 
1672 	i915_vma_lock(vma);
1673 	err = i915_request_await_object(rq, vma->obj, true);
1674 	if (err == 0)
1675 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
1676 	i915_vma_unlock(vma);
1677 	if (err)
1678 		goto skip_request;
1679 
1680 	if (rq->engine->emit_init_breadcrumb) {
1681 		err = rq->engine->emit_init_breadcrumb(rq);
1682 		if (err)
1683 			goto skip_request;
1684 	}
1685 
1686 	err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags);
1687 	if (err)
1688 		goto skip_request;
1689 
1690 	i915_vma_unpin(vma);
1691 
1692 	i915_request_add(rq);
1693 
1694 	i915_gem_object_lock(obj, NULL);
1695 	err = i915_gem_object_set_to_cpu_domain(obj, false);
1696 	i915_gem_object_unlock(obj);
1697 	if (err)
1698 		goto out_vm;
1699 
1700 	cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1701 	if (IS_ERR(cmd)) {
1702 		err = PTR_ERR(cmd);
1703 		goto out_vm;
1704 	}
1705 
1706 	*value = cmd[result / sizeof(*cmd)];
1707 	i915_gem_object_unpin_map(obj);
1708 
1709 	goto out_vm;
1710 skip_request:
1711 	i915_request_set_error_once(rq, err);
1712 	i915_request_add(rq);
1713 err_unpin:
1714 	i915_vma_unpin(vma);
1715 out_vm:
1716 	i915_vm_put(vm);
1717 
1718 	if (!err)
1719 		err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1720 
1721 	return err;
1722 }
1723 
1724 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out)
1725 {
1726 	struct i915_address_space *vm;
1727 	u32 *vaddr;
1728 	int err = 0;
1729 
1730 	vm = ctx->vm;
1731 	if (!vm)
1732 		return -ENODEV;
1733 
1734 	if (!vm->scratch[0]) {
1735 		pr_err("No scratch page!\n");
1736 		return -EINVAL;
1737 	}
1738 
1739 	vaddr = __px_vaddr(vm->scratch[0]);
1740 
1741 	memcpy(out, vaddr, sizeof(*out));
1742 	if (memchr_inv(vaddr, *out, PAGE_SIZE)) {
1743 		pr_err("Inconsistent initial state of scratch page!\n");
1744 		err = -EINVAL;
1745 	}
1746 
1747 	return err;
1748 }
1749 
1750 static int igt_vm_isolation(void *arg)
1751 {
1752 	struct drm_i915_private *i915 = arg;
1753 	struct i915_gem_context *ctx_a, *ctx_b;
1754 	struct drm_i915_gem_object *obj_a, *obj_b;
1755 	unsigned long num_engines, count;
1756 	struct intel_engine_cs *engine;
1757 	struct igt_live_test t;
1758 	I915_RND_STATE(prng);
1759 	struct file *file;
1760 	u64 vm_total;
1761 	u32 expected;
1762 	int err;
1763 
1764 	if (GRAPHICS_VER(i915) < 7)
1765 		return 0;
1766 
1767 	/*
1768 	 * The simple goal here is that a write into one context is not
1769 	 * observed in a second (separate page tables and scratch).
1770 	 */
1771 
1772 	file = mock_file(i915);
1773 	if (IS_ERR(file))
1774 		return PTR_ERR(file);
1775 
1776 	err = igt_live_test_begin(&t, i915, __func__, "");
1777 	if (err)
1778 		goto out_file;
1779 
1780 	ctx_a = live_context(i915, file);
1781 	if (IS_ERR(ctx_a)) {
1782 		err = PTR_ERR(ctx_a);
1783 		goto out_file;
1784 	}
1785 
1786 	ctx_b = live_context(i915, file);
1787 	if (IS_ERR(ctx_b)) {
1788 		err = PTR_ERR(ctx_b);
1789 		goto out_file;
1790 	}
1791 
1792 	/* We can only test vm isolation, if the vm are distinct */
1793 	if (ctx_a->vm == ctx_b->vm)
1794 		goto out_file;
1795 
1796 	/* Read the initial state of the scratch page */
1797 	err = check_scratch_page(ctx_a, &expected);
1798 	if (err)
1799 		goto out_file;
1800 
1801 	err = check_scratch_page(ctx_b, &expected);
1802 	if (err)
1803 		goto out_file;
1804 
1805 	vm_total = ctx_a->vm->total;
1806 	GEM_BUG_ON(ctx_b->vm->total != vm_total);
1807 
1808 	obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE);
1809 	if (IS_ERR(obj_a)) {
1810 		err = PTR_ERR(obj_a);
1811 		goto out_file;
1812 	}
1813 
1814 	obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE);
1815 	if (IS_ERR(obj_b)) {
1816 		err = PTR_ERR(obj_b);
1817 		goto put_a;
1818 	}
1819 
1820 	count = 0;
1821 	num_engines = 0;
1822 	for_each_uabi_engine(engine, i915) {
1823 		IGT_TIMEOUT(end_time);
1824 		unsigned long this = 0;
1825 
1826 		if (!intel_engine_can_store_dword(engine))
1827 			continue;
1828 
1829 		/* Not all engines have their own GPR! */
1830 		if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS)
1831 			continue;
1832 
1833 		while (!__igt_timeout(end_time, NULL)) {
1834 			u32 value = 0xc5c5c5c5;
1835 			u64 offset;
1836 
1837 			/* Leave enough space at offset 0 for the batch */
1838 			offset = igt_random_offset(&prng,
1839 						   I915_GTT_PAGE_SIZE, vm_total,
1840 						   sizeof(u32), alignof_dword);
1841 
1842 			err = write_to_scratch(ctx_a, engine, obj_a,
1843 					       offset, 0xdeadbeef);
1844 			if (err == 0)
1845 				err = read_from_scratch(ctx_b, engine, obj_b,
1846 							offset, &value);
1847 			if (err)
1848 				goto put_b;
1849 
1850 			if (value != expected) {
1851 				pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n",
1852 				       engine->name, value,
1853 				       upper_32_bits(offset),
1854 				       lower_32_bits(offset),
1855 				       this);
1856 				err = -EINVAL;
1857 				goto put_b;
1858 			}
1859 
1860 			this++;
1861 		}
1862 		count += this;
1863 		num_engines++;
1864 	}
1865 	pr_info("Checked %lu scratch offsets across %lu engines\n",
1866 		count, num_engines);
1867 
1868 put_b:
1869 	i915_gem_object_put(obj_b);
1870 put_a:
1871 	i915_gem_object_put(obj_a);
1872 out_file:
1873 	if (igt_live_test_end(&t))
1874 		err = -EIO;
1875 	fput(file);
1876 	return err;
1877 }
1878 
1879 int i915_gem_context_live_selftests(struct drm_i915_private *i915)
1880 {
1881 	static const struct i915_subtest tests[] = {
1882 		SUBTEST(live_nop_switch),
1883 		SUBTEST(live_parallel_switch),
1884 		SUBTEST(igt_ctx_exec),
1885 		SUBTEST(igt_ctx_readonly),
1886 		SUBTEST(igt_ctx_sseu),
1887 		SUBTEST(igt_shared_ctx_exec),
1888 		SUBTEST(igt_vm_isolation),
1889 	};
1890 
1891 	if (intel_gt_is_wedged(to_gt(i915)))
1892 		return 0;
1893 
1894 	return i915_live_subtests(tests, i915);
1895 }
1896