1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gt/intel_engine_pm.h"
10 #include "gt/intel_gt.h"
11 #include "gt/intel_gt_pm.h"
12 #include "gt/intel_ring.h"
13 
14 #include "i915_selftest.h"
15 #include "selftests/i915_random.h"
16 
17 struct context {
18 	struct drm_i915_gem_object *obj;
19 	struct intel_engine_cs *engine;
20 };
21 
22 static int cpu_set(struct context *ctx, unsigned long offset, u32 v)
23 {
24 	unsigned int needs_clflush;
25 	struct page *page;
26 	void *map;
27 	u32 *cpu;
28 	int err;
29 
30 	i915_gem_object_lock(ctx->obj, NULL);
31 	err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush);
32 	if (err)
33 		goto out;
34 
35 	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
36 	map = kmap_atomic(page);
37 	cpu = map + offset_in_page(offset);
38 
39 	if (needs_clflush & CLFLUSH_BEFORE)
40 		drm_clflush_virt_range(cpu, sizeof(*cpu));
41 
42 	*cpu = v;
43 
44 	if (needs_clflush & CLFLUSH_AFTER)
45 		drm_clflush_virt_range(cpu, sizeof(*cpu));
46 
47 	kunmap_atomic(map);
48 	i915_gem_object_finish_access(ctx->obj);
49 
50 out:
51 	i915_gem_object_unlock(ctx->obj);
52 	return err;
53 }
54 
55 static int cpu_get(struct context *ctx, unsigned long offset, u32 *v)
56 {
57 	unsigned int needs_clflush;
58 	struct page *page;
59 	void *map;
60 	u32 *cpu;
61 	int err;
62 
63 	i915_gem_object_lock(ctx->obj, NULL);
64 	err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush);
65 	if (err)
66 		goto out;
67 
68 	page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT);
69 	map = kmap_atomic(page);
70 	cpu = map + offset_in_page(offset);
71 
72 	if (needs_clflush & CLFLUSH_BEFORE)
73 		drm_clflush_virt_range(cpu, sizeof(*cpu));
74 
75 	*v = *cpu;
76 
77 	kunmap_atomic(map);
78 	i915_gem_object_finish_access(ctx->obj);
79 
80 out:
81 	i915_gem_object_unlock(ctx->obj);
82 	return err;
83 }
84 
85 static int gtt_set(struct context *ctx, unsigned long offset, u32 v)
86 {
87 	struct i915_vma *vma;
88 	u32 __iomem *map;
89 	int err = 0;
90 
91 	i915_gem_object_lock(ctx->obj, NULL);
92 	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
93 	i915_gem_object_unlock(ctx->obj);
94 	if (err)
95 		return err;
96 
97 	vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
98 	if (IS_ERR(vma))
99 		return PTR_ERR(vma);
100 
101 	intel_gt_pm_get(vma->vm->gt);
102 
103 	map = i915_vma_pin_iomap(vma);
104 	i915_vma_unpin(vma);
105 	if (IS_ERR(map)) {
106 		err = PTR_ERR(map);
107 		goto out_rpm;
108 	}
109 
110 	iowrite32(v, &map[offset / sizeof(*map)]);
111 	i915_vma_unpin_iomap(vma);
112 
113 out_rpm:
114 	intel_gt_pm_put(vma->vm->gt);
115 	return err;
116 }
117 
118 static int gtt_get(struct context *ctx, unsigned long offset, u32 *v)
119 {
120 	struct i915_vma *vma;
121 	u32 __iomem *map;
122 	int err = 0;
123 
124 	i915_gem_object_lock(ctx->obj, NULL);
125 	err = i915_gem_object_set_to_gtt_domain(ctx->obj, false);
126 	i915_gem_object_unlock(ctx->obj);
127 	if (err)
128 		return err;
129 
130 	vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE);
131 	if (IS_ERR(vma))
132 		return PTR_ERR(vma);
133 
134 	intel_gt_pm_get(vma->vm->gt);
135 
136 	map = i915_vma_pin_iomap(vma);
137 	i915_vma_unpin(vma);
138 	if (IS_ERR(map)) {
139 		err = PTR_ERR(map);
140 		goto out_rpm;
141 	}
142 
143 	*v = ioread32(&map[offset / sizeof(*map)]);
144 	i915_vma_unpin_iomap(vma);
145 
146 out_rpm:
147 	intel_gt_pm_put(vma->vm->gt);
148 	return err;
149 }
150 
151 static int wc_set(struct context *ctx, unsigned long offset, u32 v)
152 {
153 	u32 *map;
154 	int err;
155 
156 	i915_gem_object_lock(ctx->obj, NULL);
157 	err = i915_gem_object_set_to_wc_domain(ctx->obj, true);
158 	i915_gem_object_unlock(ctx->obj);
159 	if (err)
160 		return err;
161 
162 	map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC);
163 	if (IS_ERR(map))
164 		return PTR_ERR(map);
165 
166 	map[offset / sizeof(*map)] = v;
167 
168 	__i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map));
169 	i915_gem_object_unpin_map(ctx->obj);
170 
171 	return 0;
172 }
173 
174 static int wc_get(struct context *ctx, unsigned long offset, u32 *v)
175 {
176 	u32 *map;
177 	int err;
178 
179 	i915_gem_object_lock(ctx->obj, NULL);
180 	err = i915_gem_object_set_to_wc_domain(ctx->obj, false);
181 	i915_gem_object_unlock(ctx->obj);
182 	if (err)
183 		return err;
184 
185 	map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC);
186 	if (IS_ERR(map))
187 		return PTR_ERR(map);
188 
189 	*v = map[offset / sizeof(*map)];
190 	i915_gem_object_unpin_map(ctx->obj);
191 
192 	return 0;
193 }
194 
195 static int gpu_set(struct context *ctx, unsigned long offset, u32 v)
196 {
197 	struct i915_request *rq;
198 	struct i915_vma *vma;
199 	u32 *cs;
200 	int err;
201 
202 	i915_gem_object_lock(ctx->obj, NULL);
203 	err = i915_gem_object_set_to_gtt_domain(ctx->obj, true);
204 	if (err)
205 		goto out_unlock;
206 
207 	vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0);
208 	if (IS_ERR(vma)) {
209 		err = PTR_ERR(vma);
210 		goto out_unlock;
211 	}
212 
213 	rq = intel_engine_create_kernel_request(ctx->engine);
214 	if (IS_ERR(rq)) {
215 		err = PTR_ERR(rq);
216 		goto out_unpin;
217 	}
218 
219 	cs = intel_ring_begin(rq, 4);
220 	if (IS_ERR(cs)) {
221 		err = PTR_ERR(cs);
222 		goto out_rq;
223 	}
224 
225 	if (INTEL_GEN(ctx->engine->i915) >= 8) {
226 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22;
227 		*cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset);
228 		*cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset);
229 		*cs++ = v;
230 	} else if (INTEL_GEN(ctx->engine->i915) >= 4) {
231 		*cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT;
232 		*cs++ = 0;
233 		*cs++ = i915_ggtt_offset(vma) + offset;
234 		*cs++ = v;
235 	} else {
236 		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
237 		*cs++ = i915_ggtt_offset(vma) + offset;
238 		*cs++ = v;
239 		*cs++ = MI_NOOP;
240 	}
241 	intel_ring_advance(rq, cs);
242 
243 	err = i915_request_await_object(rq, vma->obj, true);
244 	if (err == 0)
245 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
246 
247 out_rq:
248 	i915_request_add(rq);
249 out_unpin:
250 	i915_vma_unpin(vma);
251 out_unlock:
252 	i915_gem_object_unlock(ctx->obj);
253 
254 	return err;
255 }
256 
257 static bool always_valid(struct context *ctx)
258 {
259 	return true;
260 }
261 
262 static bool needs_fence_registers(struct context *ctx)
263 {
264 	struct intel_gt *gt = ctx->engine->gt;
265 
266 	if (intel_gt_is_wedged(gt))
267 		return false;
268 
269 	return gt->ggtt->num_fences;
270 }
271 
272 static bool needs_mi_store_dword(struct context *ctx)
273 {
274 	if (intel_gt_is_wedged(ctx->engine->gt))
275 		return false;
276 
277 	return intel_engine_can_store_dword(ctx->engine);
278 }
279 
280 static const struct igt_coherency_mode {
281 	const char *name;
282 	int (*set)(struct context *ctx, unsigned long offset, u32 v);
283 	int (*get)(struct context *ctx, unsigned long offset, u32 *v);
284 	bool (*valid)(struct context *ctx);
285 } igt_coherency_mode[] = {
286 	{ "cpu", cpu_set, cpu_get, always_valid },
287 	{ "gtt", gtt_set, gtt_get, needs_fence_registers },
288 	{ "wc", wc_set, wc_get, always_valid },
289 	{ "gpu", gpu_set, NULL, needs_mi_store_dword },
290 	{ },
291 };
292 
293 static struct intel_engine_cs *
294 random_engine(struct drm_i915_private *i915, struct rnd_state *prng)
295 {
296 	struct intel_engine_cs *engine;
297 	unsigned int count;
298 
299 	count = 0;
300 	for_each_uabi_engine(engine, i915)
301 		count++;
302 
303 	count = i915_prandom_u32_max_state(count, prng);
304 	for_each_uabi_engine(engine, i915)
305 		if (count-- == 0)
306 			return engine;
307 
308 	return NULL;
309 }
310 
311 static int igt_gem_coherency(void *arg)
312 {
313 	const unsigned int ncachelines = PAGE_SIZE/64;
314 	struct drm_i915_private *i915 = arg;
315 	const struct igt_coherency_mode *read, *write, *over;
316 	unsigned long count, n;
317 	u32 *offsets, *values;
318 	I915_RND_STATE(prng);
319 	struct context ctx;
320 	int err = 0;
321 
322 	/*
323 	 * We repeatedly write, overwrite and read from a sequence of
324 	 * cachelines in order to try and detect incoherency (unflushed writes
325 	 * from either the CPU or GPU). Each setter/getter uses our cache
326 	 * domain API which should prevent incoherency.
327 	 */
328 
329 	offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL);
330 	if (!offsets)
331 		return -ENOMEM;
332 	for (count = 0; count < ncachelines; count++)
333 		offsets[count] = count * 64 + 4 * (count % 16);
334 
335 	values = offsets + ncachelines;
336 
337 	ctx.engine = random_engine(i915, &prng);
338 	if (!ctx.engine) {
339 		err = -ENODEV;
340 		goto out_free;
341 	}
342 	pr_info("%s: using %s\n", __func__, ctx.engine->name);
343 	intel_engine_pm_get(ctx.engine);
344 
345 	for (over = igt_coherency_mode; over->name; over++) {
346 		if (!over->set)
347 			continue;
348 
349 		if (!over->valid(&ctx))
350 			continue;
351 
352 		for (write = igt_coherency_mode; write->name; write++) {
353 			if (!write->set)
354 				continue;
355 
356 			if (!write->valid(&ctx))
357 				continue;
358 
359 			for (read = igt_coherency_mode; read->name; read++) {
360 				if (!read->get)
361 					continue;
362 
363 				if (!read->valid(&ctx))
364 					continue;
365 
366 				for_each_prime_number_from(count, 1, ncachelines) {
367 					ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
368 					if (IS_ERR(ctx.obj)) {
369 						err = PTR_ERR(ctx.obj);
370 						goto out_pm;
371 					}
372 
373 					i915_random_reorder(offsets, ncachelines, &prng);
374 					for (n = 0; n < count; n++)
375 						values[n] = prandom_u32_state(&prng);
376 
377 					for (n = 0; n < count; n++) {
378 						err = over->set(&ctx, offsets[n], ~values[n]);
379 						if (err) {
380 							pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n",
381 							       n, count, over->name, err);
382 							goto put_object;
383 						}
384 					}
385 
386 					for (n = 0; n < count; n++) {
387 						err = write->set(&ctx, offsets[n], values[n]);
388 						if (err) {
389 							pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n",
390 							       n, count, write->name, err);
391 							goto put_object;
392 						}
393 					}
394 
395 					for (n = 0; n < count; n++) {
396 						u32 found;
397 
398 						err = read->get(&ctx, offsets[n], &found);
399 						if (err) {
400 							pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n",
401 							       n, count, read->name, err);
402 							goto put_object;
403 						}
404 
405 						if (found != values[n]) {
406 							pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n",
407 							       n, count, over->name,
408 							       write->name, values[n],
409 							       read->name, found,
410 							       ~values[n], offsets[n]);
411 							err = -EINVAL;
412 							goto put_object;
413 						}
414 					}
415 
416 					i915_gem_object_put(ctx.obj);
417 				}
418 			}
419 		}
420 	}
421 out_pm:
422 	intel_engine_pm_put(ctx.engine);
423 out_free:
424 	kfree(offsets);
425 	return err;
426 
427 put_object:
428 	i915_gem_object_put(ctx.obj);
429 	goto out_pm;
430 }
431 
432 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915)
433 {
434 	static const struct i915_subtest tests[] = {
435 		SUBTEST(igt_gem_coherency),
436 	};
437 
438 	return i915_subtests(tests, i915);
439 }
440