1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2022 Intel Corporation
4  */
5 
6 #include "i915_selftest.h"
7 
8 #include "gem/i915_gem_internal.h"
9 #include "gem/i915_gem_region.h"
10 
11 #include "gen8_engine_cs.h"
12 #include "i915_gem_ww.h"
13 #include "intel_engine_regs.h"
14 #include "intel_gpu_commands.h"
15 #include "intel_context.h"
16 #include "intel_gt.h"
17 #include "intel_ring.h"
18 
19 #include "selftests/igt_flush_test.h"
20 #include "selftests/i915_random.h"
21 
22 static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val)
23 {
24 	GEM_BUG_ON(addr < i915_vma_offset(vma));
25 	GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val));
26 	memset64(page_mask_bits(vma->obj->mm.mapping) +
27 		 (addr - i915_vma_offset(vma)), val, 1);
28 }
29 
30 static int
31 pte_tlbinv(struct intel_context *ce,
32 	   struct i915_vma *va,
33 	   struct i915_vma *vb,
34 	   u64 align,
35 	   void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length),
36 	   u64 length,
37 	   struct rnd_state *prng)
38 {
39 	struct drm_i915_gem_object *batch;
40 	struct drm_mm_node vb_node;
41 	struct i915_request *rq;
42 	struct i915_vma *vma;
43 	u64 addr;
44 	int err;
45 	u32 *cs;
46 
47 	batch = i915_gem_object_create_internal(ce->vm->i915, 4096);
48 	if (IS_ERR(batch))
49 		return PTR_ERR(batch);
50 
51 	vma = i915_vma_instance(batch, ce->vm, NULL);
52 	if (IS_ERR(vma)) {
53 		err = PTR_ERR(vma);
54 		goto out;
55 	}
56 
57 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
58 	if (err)
59 		goto out;
60 
61 	/* Pin va at random but aligned offset after vma */
62 	addr = round_up(vma->node.start + vma->node.size, align);
63 	/* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */
64 	addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)),
65 				 va->size, align);
66 	err = i915_vma_pin(va,  0, 0, addr | PIN_OFFSET_FIXED | PIN_USER);
67 	if (err) {
68 		pr_err("Cannot pin at %llx+%llx\n", addr, va->size);
69 		goto out;
70 	}
71 	GEM_BUG_ON(i915_vma_offset(va) != addr);
72 	if (vb != va) {
73 		vb_node = vb->node;
74 		vb->node = va->node; /* overwrites the _same_ PTE  */
75 	}
76 
77 	/*
78 	 * Now choose random dword at the 1st pinned page.
79 	 *
80 	 * SZ_64K pages on dg1 require that the whole PT be marked
81 	 * containing 64KiB entries. So we make sure that vma
82 	 * covers the whole PT, despite being randomly aligned to 64KiB
83 	 * and restrict our sampling to the 2MiB PT within where
84 	 * we know that we will be using 64KiB pages.
85 	 */
86 	if (align == SZ_64K)
87 		addr = round_up(addr, SZ_2M);
88 	addr = igt_random_offset(prng, addr, addr + align, 8, 8);
89 
90 	if (va != vb)
91 		pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n",
92 			ce->engine->name, va->obj->mm.region->name ?: "smem",
93 			addr, align, va->resource->page_sizes_gtt,
94 			va->page_sizes.phys, va->page_sizes.sg,
95 			addr & -length, length);
96 
97 	cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC);
98 	*cs++ = MI_NOOP; /* for later termination */
99 	/*
100 	 * Sample the target to see if we spot the updated backing store.
101 	 * Gen8 VCS compares immediate value with bitwise-and of two
102 	 * consecutive DWORDS pointed by addr, other gen/engines compare value
103 	 * with DWORD pointed by addr. Moreover we want to exercise DWORD size
104 	 * invalidations. To fulfill all these requirements below values
105 	 * have been chosen.
106 	 */
107 	*cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2;
108 	*cs++ = 0; /* break if *addr == 0 */
109 	*cs++ = lower_32_bits(addr);
110 	*cs++ = upper_32_bits(addr);
111 	vma_set_qw(va, addr, -1);
112 	vma_set_qw(vb, addr, 0);
113 
114 	/* Keep sampling until we get bored */
115 	*cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1;
116 	*cs++ = lower_32_bits(i915_vma_offset(vma));
117 	*cs++ = upper_32_bits(i915_vma_offset(vma));
118 
119 	i915_gem_object_flush_map(batch);
120 
121 	rq = i915_request_create(ce);
122 	if (IS_ERR(rq)) {
123 		err = PTR_ERR(rq);
124 		goto out_va;
125 	}
126 
127 	err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0);
128 	if (err) {
129 		i915_request_add(rq);
130 		goto out_va;
131 	}
132 
133 	i915_request_get(rq);
134 	i915_request_add(rq);
135 
136 	/* Short sleep to sanitycheck the batch is spinning before we begin */
137 	msleep(10);
138 	if (va == vb) {
139 		if (!i915_request_completed(rq)) {
140 			pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n",
141 			       ce->engine->name, va->obj->mm.region->name ?: "smem",
142 			       addr, align, va->resource->page_sizes_gtt,
143 			       va->page_sizes.phys, va->page_sizes.sg);
144 			err = -EIO;
145 		}
146 	} else if (!i915_request_completed(rq)) {
147 		struct i915_vma_resource vb_res = {
148 			.bi.pages = vb->obj->mm.pages,
149 			.bi.page_sizes = vb->obj->mm.page_sizes,
150 			.start = i915_vma_offset(vb),
151 			.vma_size = i915_vma_size(vb)
152 		};
153 		unsigned int pte_flags = 0;
154 
155 		/* Flip the PTE between A and B */
156 		if (i915_gem_object_is_lmem(vb->obj))
157 			pte_flags |= PTE_LM;
158 		ce->vm->insert_entries(ce->vm, &vb_res, 0, pte_flags);
159 
160 		/* Flush the PTE update to concurrent HW */
161 		tlbinv(ce->vm, addr & -length, length);
162 
163 		if (wait_for(i915_request_completed(rq), HZ / 2)) {
164 			pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n",
165 			       ce->engine->name);
166 			err = -EINVAL;
167 		}
168 	} else {
169 		pr_err("Spinner ended unexpectedly\n");
170 		err = -EIO;
171 	}
172 	i915_request_put(rq);
173 
174 	cs = page_mask_bits(batch->mm.mapping);
175 	*cs = MI_BATCH_BUFFER_END;
176 	wmb();
177 
178 out_va:
179 	if (vb != va)
180 		vb->node = vb_node;
181 	i915_vma_unpin(va);
182 	if (i915_vma_unbind_unlocked(va))
183 		err = -EIO;
184 out:
185 	i915_gem_object_put(batch);
186 	return err;
187 }
188 
189 static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt)
190 {
191 	/*
192 	 * Allocation of largest possible page size allows to test all types
193 	 * of pages.
194 	 */
195 	return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS);
196 }
197 
198 static struct drm_i915_gem_object *create_smem(struct intel_gt *gt)
199 {
200 	/*
201 	 * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1).
202 	 * While that does not require the whole 2M block to be contiguous
203 	 * it is easier to make it so, since we need that for SZ_2M pagees.
204 	 * Since we randomly offset the start of the vma, we need a 4M object
205 	 * so that there is a 2M range within it is suitable for SZ_64K PTE.
206 	 */
207 	return i915_gem_object_create_internal(gt->i915, SZ_4M);
208 }
209 
210 static int
211 mem_tlbinv(struct intel_gt *gt,
212 	   struct drm_i915_gem_object *(*create_fn)(struct intel_gt *),
213 	   void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length))
214 {
215 	unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size;
216 	struct intel_engine_cs *engine;
217 	struct drm_i915_gem_object *A, *B;
218 	struct i915_ppgtt *ppgtt;
219 	struct i915_vma *va, *vb;
220 	enum intel_engine_id id;
221 	I915_RND_STATE(prng);
222 	void *vaddr;
223 	int err;
224 
225 	/*
226 	 * Check that the TLB invalidate is able to revoke an active
227 	 * page. We load a page into a spinning COND_BBE loop and then
228 	 * remap that page to a new physical address. The old address, and
229 	 * so the loop keeps spinning, is retained in the TLB cache until
230 	 * we issue an invalidate.
231 	 */
232 
233 	A = create_fn(gt);
234 	if (IS_ERR(A))
235 		return PTR_ERR(A);
236 
237 	vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC);
238 	if (IS_ERR(vaddr)) {
239 		err = PTR_ERR(vaddr);
240 		goto out_a;
241 	}
242 
243 	B = create_fn(gt);
244 	if (IS_ERR(B)) {
245 		err = PTR_ERR(B);
246 		goto out_a;
247 	}
248 
249 	vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC);
250 	if (IS_ERR(vaddr)) {
251 		err = PTR_ERR(vaddr);
252 		goto out_b;
253 	}
254 
255 	GEM_BUG_ON(A->base.size != B->base.size);
256 	if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1))
257 		pr_warn("Failed to allocate contiguous pages for size %zx\n",
258 			A->base.size);
259 
260 	ppgtt = i915_ppgtt_create(gt, 0);
261 	if (IS_ERR(ppgtt)) {
262 		err = PTR_ERR(ppgtt);
263 		goto out_b;
264 	}
265 
266 	va = i915_vma_instance(A, &ppgtt->vm, NULL);
267 	if (IS_ERR(va)) {
268 		err = PTR_ERR(va);
269 		goto out_vm;
270 	}
271 
272 	vb = i915_vma_instance(B, &ppgtt->vm, NULL);
273 	if (IS_ERR(vb)) {
274 		err = PTR_ERR(vb);
275 		goto out_vm;
276 	}
277 
278 	err = 0;
279 	for_each_engine(engine, gt, id) {
280 		struct i915_gem_ww_ctx ww;
281 		struct intel_context *ce;
282 		int bit;
283 
284 		ce = intel_context_create(engine);
285 		if (IS_ERR(ce)) {
286 			err = PTR_ERR(ce);
287 			break;
288 		}
289 
290 		i915_vm_put(ce->vm);
291 		ce->vm = i915_vm_get(&ppgtt->vm);
292 
293 		for_i915_gem_ww(&ww, err, true)
294 			err = intel_context_pin_ww(ce, &ww);
295 		if (err)
296 			goto err_put;
297 
298 		for_each_set_bit(bit,
299 				 (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes,
300 				 BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) {
301 			unsigned int len;
302 
303 			if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj))
304 				continue;
305 
306 			/* sanitycheck the semaphore wake up */
307 			err = pte_tlbinv(ce, va, va,
308 					 BIT_ULL(bit),
309 					 NULL, SZ_4K,
310 					 &prng);
311 			if (err)
312 				goto err_unpin;
313 
314 			for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) {
315 				err = pte_tlbinv(ce, va, vb,
316 						 BIT_ULL(bit),
317 						 tlbinv,
318 						 BIT_ULL(len),
319 						 &prng);
320 				if (err)
321 					goto err_unpin;
322 				if (len == ppgtt_size)
323 					break;
324 			}
325 		}
326 err_unpin:
327 		intel_context_unpin(ce);
328 err_put:
329 		intel_context_put(ce);
330 		if (err)
331 			break;
332 	}
333 
334 	if (igt_flush_test(gt->i915))
335 		err = -EIO;
336 
337 out_vm:
338 	i915_vm_put(&ppgtt->vm);
339 out_b:
340 	i915_gem_object_put(B);
341 out_a:
342 	i915_gem_object_put(A);
343 	return err;
344 }
345 
346 static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length)
347 {
348 	intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1);
349 }
350 
351 static int invalidate_full(void *arg)
352 {
353 	struct intel_gt *gt = arg;
354 	int err;
355 
356 	if (GRAPHICS_VER(gt->i915) < 8)
357 		return 0; /* TLB invalidate not implemented */
358 
359 	err = mem_tlbinv(gt, create_smem, tlbinv_full);
360 	if (err == 0)
361 		err = mem_tlbinv(gt, create_lmem, tlbinv_full);
362 	if (err == -ENODEV || err == -ENXIO)
363 		err = 0;
364 
365 	return err;
366 }
367 
368 int intel_tlb_live_selftests(struct drm_i915_private *i915)
369 {
370 	static const struct i915_subtest tests[] = {
371 		SUBTEST(invalidate_full),
372 	};
373 	struct intel_gt *gt;
374 	unsigned int i;
375 
376 	for_each_gt(gt, i915, i) {
377 		int err;
378 
379 		if (intel_gt_is_wedged(gt))
380 			continue;
381 
382 		err = intel_gt_live_subtests(tests, gt);
383 		if (err)
384 			return err;
385 	}
386 
387 	return 0;
388 }
389