1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include "i915_selftest.h"
7 
8 #include "gt/intel_context.h"
9 #include "gt/intel_engine_user.h"
10 #include "gt/intel_gt.h"
11 #include "gt/intel_gpu_commands.h"
12 #include "gem/i915_gem_lmem.h"
13 
14 #include "selftests/igt_flush_test.h"
15 #include "selftests/mock_drm.h"
16 #include "selftests/i915_random.h"
17 #include "huge_gem_object.h"
18 #include "mock_context.h"
19 
20 enum client_tiling {
21 	CLIENT_TILING_LINEAR,
22 	CLIENT_TILING_X,
23 	CLIENT_TILING_Y,
24 	CLIENT_NUM_TILING_TYPES
25 };
26 
27 #define WIDTH 512
28 #define HEIGHT 32
29 
30 struct blit_buffer {
31 	struct i915_vma *vma;
32 	u32 start_val;
33 	enum client_tiling tiling;
34 };
35 
36 struct tiled_blits {
37 	struct intel_context *ce;
38 	struct blit_buffer buffers[3];
39 	struct blit_buffer scratch;
40 	struct i915_vma *batch;
41 	u64 hole;
42 	u32 width;
43 	u32 height;
44 };
45 
46 static int prepare_blit(const struct tiled_blits *t,
47 			struct blit_buffer *dst,
48 			struct blit_buffer *src,
49 			struct drm_i915_gem_object *batch)
50 {
51 	const int ver = GRAPHICS_VER(to_i915(batch->base.dev));
52 	bool use_64b_reloc = ver >= 8;
53 	u32 src_pitch, dst_pitch;
54 	u32 cmd, *cs;
55 
56 	cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC);
57 	if (IS_ERR(cs))
58 		return PTR_ERR(cs);
59 
60 	*cs++ = MI_LOAD_REGISTER_IMM(1);
61 	*cs++ = i915_mmio_reg_offset(BCS_SWCTRL);
62 	cmd = (BCS_SRC_Y | BCS_DST_Y) << 16;
63 	if (src->tiling == CLIENT_TILING_Y)
64 		cmd |= BCS_SRC_Y;
65 	if (dst->tiling == CLIENT_TILING_Y)
66 		cmd |= BCS_DST_Y;
67 	*cs++ = cmd;
68 
69 	cmd = MI_FLUSH_DW;
70 	if (ver >= 8)
71 		cmd++;
72 	*cs++ = cmd;
73 	*cs++ = 0;
74 	*cs++ = 0;
75 	*cs++ = 0;
76 
77 	cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2);
78 	if (ver >= 8)
79 		cmd += 2;
80 
81 	src_pitch = t->width * 4;
82 	if (src->tiling) {
83 		cmd |= XY_SRC_COPY_BLT_SRC_TILED;
84 		src_pitch /= 4;
85 	}
86 
87 	dst_pitch = t->width * 4;
88 	if (dst->tiling) {
89 		cmd |= XY_SRC_COPY_BLT_DST_TILED;
90 		dst_pitch /= 4;
91 	}
92 
93 	*cs++ = cmd;
94 	*cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch;
95 	*cs++ = 0;
96 	*cs++ = t->height << 16 | t->width;
97 	*cs++ = lower_32_bits(dst->vma->node.start);
98 	if (use_64b_reloc)
99 		*cs++ = upper_32_bits(dst->vma->node.start);
100 	*cs++ = 0;
101 	*cs++ = src_pitch;
102 	*cs++ = lower_32_bits(src->vma->node.start);
103 	if (use_64b_reloc)
104 		*cs++ = upper_32_bits(src->vma->node.start);
105 
106 	*cs++ = MI_BATCH_BUFFER_END;
107 
108 	i915_gem_object_flush_map(batch);
109 	i915_gem_object_unpin_map(batch);
110 
111 	return 0;
112 }
113 
114 static void tiled_blits_destroy_buffers(struct tiled_blits *t)
115 {
116 	int i;
117 
118 	for (i = 0; i < ARRAY_SIZE(t->buffers); i++)
119 		i915_vma_put(t->buffers[i].vma);
120 
121 	i915_vma_put(t->scratch.vma);
122 	i915_vma_put(t->batch);
123 }
124 
125 static struct i915_vma *
126 __create_vma(struct tiled_blits *t, size_t size, bool lmem)
127 {
128 	struct drm_i915_private *i915 = t->ce->vm->i915;
129 	struct drm_i915_gem_object *obj;
130 	struct i915_vma *vma;
131 
132 	if (lmem)
133 		obj = i915_gem_object_create_lmem(i915, size, 0);
134 	else
135 		obj = i915_gem_object_create_shmem(i915, size);
136 	if (IS_ERR(obj))
137 		return ERR_CAST(obj);
138 
139 	vma = i915_vma_instance(obj, t->ce->vm, NULL);
140 	if (IS_ERR(vma))
141 		i915_gem_object_put(obj);
142 
143 	return vma;
144 }
145 
146 static struct i915_vma *create_vma(struct tiled_blits *t, bool lmem)
147 {
148 	return __create_vma(t, PAGE_ALIGN(t->width * t->height * 4), lmem);
149 }
150 
151 static int tiled_blits_create_buffers(struct tiled_blits *t,
152 				      int width, int height,
153 				      struct rnd_state *prng)
154 {
155 	struct drm_i915_private *i915 = t->ce->engine->i915;
156 	int i;
157 
158 	t->width = width;
159 	t->height = height;
160 
161 	t->batch = __create_vma(t, PAGE_SIZE, false);
162 	if (IS_ERR(t->batch))
163 		return PTR_ERR(t->batch);
164 
165 	t->scratch.vma = create_vma(t, false);
166 	if (IS_ERR(t->scratch.vma)) {
167 		i915_vma_put(t->batch);
168 		return PTR_ERR(t->scratch.vma);
169 	}
170 
171 	for (i = 0; i < ARRAY_SIZE(t->buffers); i++) {
172 		struct i915_vma *vma;
173 
174 		vma = create_vma(t, HAS_LMEM(i915) && i % 2);
175 		if (IS_ERR(vma)) {
176 			tiled_blits_destroy_buffers(t);
177 			return PTR_ERR(vma);
178 		}
179 
180 		t->buffers[i].vma = vma;
181 		t->buffers[i].tiling =
182 			i915_prandom_u32_max_state(CLIENT_TILING_Y + 1, prng);
183 	}
184 
185 	return 0;
186 }
187 
188 static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val)
189 {
190 	int i;
191 
192 	t->scratch.start_val = val;
193 	for (i = 0; i < t->width * t->height; i++)
194 		vaddr[i] = val++;
195 
196 	i915_gem_object_flush_map(t->scratch.vma->obj);
197 }
198 
199 static u64 swizzle_bit(unsigned int bit, u64 offset)
200 {
201 	return (offset & BIT_ULL(bit)) >> (bit - 6);
202 }
203 
204 static u64 tiled_offset(const struct intel_gt *gt,
205 			u64 v,
206 			unsigned int stride,
207 			enum client_tiling tiling)
208 {
209 	unsigned int swizzle;
210 	u64 x, y;
211 
212 	if (tiling == CLIENT_TILING_LINEAR)
213 		return v;
214 
215 	y = div64_u64_rem(v, stride, &x);
216 
217 	if (tiling == CLIENT_TILING_X) {
218 		v = div64_u64_rem(y, 8, &y) * stride * 8;
219 		v += y * 512;
220 		v += div64_u64_rem(x, 512, &x) << 12;
221 		v += x;
222 
223 		swizzle = gt->ggtt->bit_6_swizzle_x;
224 	} else {
225 		const unsigned int ytile_span = 16;
226 		const unsigned int ytile_height = 512;
227 
228 		v = div64_u64_rem(y, 32, &y) * stride * 32;
229 		v += y * ytile_span;
230 		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
231 		v += x;
232 
233 		swizzle = gt->ggtt->bit_6_swizzle_y;
234 	}
235 
236 	switch (swizzle) {
237 	case I915_BIT_6_SWIZZLE_9:
238 		v ^= swizzle_bit(9, v);
239 		break;
240 	case I915_BIT_6_SWIZZLE_9_10:
241 		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
242 		break;
243 	case I915_BIT_6_SWIZZLE_9_11:
244 		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
245 		break;
246 	case I915_BIT_6_SWIZZLE_9_10_11:
247 		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
248 		break;
249 	}
250 
251 	return v;
252 }
253 
254 static const char *repr_tiling(enum client_tiling tiling)
255 {
256 	switch (tiling) {
257 	case CLIENT_TILING_LINEAR: return "linear";
258 	case CLIENT_TILING_X: return "X";
259 	case CLIENT_TILING_Y: return "Y";
260 	default: return "unknown";
261 	}
262 }
263 
264 static int verify_buffer(const struct tiled_blits *t,
265 			 struct blit_buffer *buf,
266 			 struct rnd_state *prng)
267 {
268 	const u32 *vaddr;
269 	int ret = 0;
270 	int x, y, p;
271 
272 	x = i915_prandom_u32_max_state(t->width, prng);
273 	y = i915_prandom_u32_max_state(t->height, prng);
274 	p = y * t->width + x;
275 
276 	vaddr = i915_gem_object_pin_map_unlocked(buf->vma->obj, I915_MAP_WC);
277 	if (IS_ERR(vaddr))
278 		return PTR_ERR(vaddr);
279 
280 	if (vaddr[0] != buf->start_val) {
281 		ret = -EINVAL;
282 	} else {
283 		u64 v = tiled_offset(buf->vma->vm->gt,
284 				     p * 4, t->width * 4,
285 				     buf->tiling);
286 
287 		if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p)
288 			ret = -EINVAL;
289 	}
290 	if (ret) {
291 		pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n",
292 		       repr_tiling(buf->tiling),
293 		       x, y, buf->start_val);
294 		igt_hexdump(vaddr, 4096);
295 	}
296 
297 	i915_gem_object_unpin_map(buf->vma->obj);
298 	return ret;
299 }
300 
301 static int move_to_active(struct i915_vma *vma,
302 			  struct i915_request *rq,
303 			  unsigned int flags)
304 {
305 	int err;
306 
307 	i915_vma_lock(vma);
308 	err = i915_request_await_object(rq, vma->obj, false);
309 	if (err == 0)
310 		err = i915_vma_move_to_active(vma, rq, flags);
311 	i915_vma_unlock(vma);
312 
313 	return err;
314 }
315 
316 static int pin_buffer(struct i915_vma *vma, u64 addr)
317 {
318 	int err;
319 
320 	if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) {
321 		err = i915_vma_unbind(vma);
322 		if (err)
323 			return err;
324 	}
325 
326 	err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED | addr);
327 	if (err)
328 		return err;
329 
330 	return 0;
331 }
332 
333 static int
334 tiled_blit(struct tiled_blits *t,
335 	   struct blit_buffer *dst, u64 dst_addr,
336 	   struct blit_buffer *src, u64 src_addr)
337 {
338 	struct i915_request *rq;
339 	int err;
340 
341 	err = pin_buffer(src->vma, src_addr);
342 	if (err) {
343 		pr_err("Cannot pin src @ %llx\n", src_addr);
344 		return err;
345 	}
346 
347 	err = pin_buffer(dst->vma, dst_addr);
348 	if (err) {
349 		pr_err("Cannot pin dst @ %llx\n", dst_addr);
350 		goto err_src;
351 	}
352 
353 	err = i915_vma_pin(t->batch, 0, 0, PIN_USER | PIN_HIGH);
354 	if (err) {
355 		pr_err("cannot pin batch\n");
356 		goto err_dst;
357 	}
358 
359 	err = prepare_blit(t, dst, src, t->batch->obj);
360 	if (err)
361 		goto err_bb;
362 
363 	rq = intel_context_create_request(t->ce);
364 	if (IS_ERR(rq)) {
365 		err = PTR_ERR(rq);
366 		goto err_bb;
367 	}
368 
369 	err = move_to_active(t->batch, rq, 0);
370 	if (!err)
371 		err = move_to_active(src->vma, rq, 0);
372 	if (!err)
373 		err = move_to_active(dst->vma, rq, 0);
374 	if (!err)
375 		err = rq->engine->emit_bb_start(rq,
376 						t->batch->node.start,
377 						t->batch->node.size,
378 						0);
379 	i915_request_get(rq);
380 	i915_request_add(rq);
381 	if (i915_request_wait(rq, 0, HZ / 2) < 0)
382 		err = -ETIME;
383 	i915_request_put(rq);
384 
385 	dst->start_val = src->start_val;
386 err_bb:
387 	i915_vma_unpin(t->batch);
388 err_dst:
389 	i915_vma_unpin(dst->vma);
390 err_src:
391 	i915_vma_unpin(src->vma);
392 	return err;
393 }
394 
395 static struct tiled_blits *
396 tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng)
397 {
398 	struct drm_mm_node hole;
399 	struct tiled_blits *t;
400 	u64 hole_size;
401 	int err;
402 
403 	t = kzalloc(sizeof(*t), GFP_KERNEL);
404 	if (!t)
405 		return ERR_PTR(-ENOMEM);
406 
407 	t->ce = intel_context_create(engine);
408 	if (IS_ERR(t->ce)) {
409 		err = PTR_ERR(t->ce);
410 		goto err_free;
411 	}
412 
413 	hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4);
414 	hole_size *= 2; /* room to maneuver */
415 	hole_size += 2 * I915_GTT_MIN_ALIGNMENT;
416 
417 	mutex_lock(&t->ce->vm->mutex);
418 	memset(&hole, 0, sizeof(hole));
419 	err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole,
420 					  hole_size, 0, I915_COLOR_UNEVICTABLE,
421 					  0, U64_MAX,
422 					  DRM_MM_INSERT_BEST);
423 	if (!err)
424 		drm_mm_remove_node(&hole);
425 	mutex_unlock(&t->ce->vm->mutex);
426 	if (err) {
427 		err = -ENODEV;
428 		goto err_put;
429 	}
430 
431 	t->hole = hole.start + I915_GTT_MIN_ALIGNMENT;
432 	pr_info("Using hole at %llx\n", t->hole);
433 
434 	err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng);
435 	if (err)
436 		goto err_put;
437 
438 	return t;
439 
440 err_put:
441 	intel_context_put(t->ce);
442 err_free:
443 	kfree(t);
444 	return ERR_PTR(err);
445 }
446 
447 static void tiled_blits_destroy(struct tiled_blits *t)
448 {
449 	tiled_blits_destroy_buffers(t);
450 
451 	intel_context_put(t->ce);
452 	kfree(t);
453 }
454 
455 static int tiled_blits_prepare(struct tiled_blits *t,
456 			       struct rnd_state *prng)
457 {
458 	u64 offset = PAGE_ALIGN(t->width * t->height * 4);
459 	u32 *map;
460 	int err;
461 	int i;
462 
463 	map = i915_gem_object_pin_map_unlocked(t->scratch.vma->obj, I915_MAP_WC);
464 	if (IS_ERR(map))
465 		return PTR_ERR(map);
466 
467 	/* Use scratch to fill objects */
468 	for (i = 0; i < ARRAY_SIZE(t->buffers); i++) {
469 		fill_scratch(t, map, prandom_u32_state(prng));
470 		GEM_BUG_ON(verify_buffer(t, &t->scratch, prng));
471 
472 		err = tiled_blit(t,
473 				 &t->buffers[i], t->hole + offset,
474 				 &t->scratch, t->hole);
475 		if (err == 0)
476 			err = verify_buffer(t, &t->buffers[i], prng);
477 		if (err) {
478 			pr_err("Failed to create buffer %d\n", i);
479 			break;
480 		}
481 	}
482 
483 	i915_gem_object_unpin_map(t->scratch.vma->obj);
484 	return err;
485 }
486 
487 static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng)
488 {
489 	u64 offset =
490 		round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT);
491 	int err;
492 
493 	/* We want to check position invariant tiling across GTT eviction */
494 
495 	err = tiled_blit(t,
496 			 &t->buffers[1], t->hole + offset / 2,
497 			 &t->buffers[0], t->hole + 2 * offset);
498 	if (err)
499 		return err;
500 
501 	/* Reposition so that we overlap the old addresses, and slightly off */
502 	err = tiled_blit(t,
503 			 &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT,
504 			 &t->buffers[1], t->hole + 3 * offset / 2);
505 	if (err)
506 		return err;
507 
508 	err = verify_buffer(t, &t->buffers[2], prng);
509 	if (err)
510 		return err;
511 
512 	return 0;
513 }
514 
515 static int __igt_client_tiled_blits(struct intel_engine_cs *engine,
516 				    struct rnd_state *prng)
517 {
518 	struct tiled_blits *t;
519 	int err;
520 
521 	t = tiled_blits_create(engine, prng);
522 	if (IS_ERR(t))
523 		return PTR_ERR(t);
524 
525 	err = tiled_blits_prepare(t, prng);
526 	if (err)
527 		goto out;
528 
529 	err = tiled_blits_bounce(t, prng);
530 	if (err)
531 		goto out;
532 
533 out:
534 	tiled_blits_destroy(t);
535 	return err;
536 }
537 
538 static bool has_bit17_swizzle(int sw)
539 {
540 	return (sw == I915_BIT_6_SWIZZLE_9_10_17 ||
541 		sw == I915_BIT_6_SWIZZLE_9_17);
542 }
543 
544 static bool bad_swizzling(struct drm_i915_private *i915)
545 {
546 	struct i915_ggtt *ggtt = &i915->ggtt;
547 
548 	if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
549 		return true;
550 
551 	if (has_bit17_swizzle(ggtt->bit_6_swizzle_x) ||
552 	    has_bit17_swizzle(ggtt->bit_6_swizzle_y))
553 		return true;
554 
555 	return false;
556 }
557 
558 static int igt_client_tiled_blits(void *arg)
559 {
560 	struct drm_i915_private *i915 = arg;
561 	I915_RND_STATE(prng);
562 	int inst = 0;
563 
564 	/* Test requires explicit BLT tiling controls */
565 	if (GRAPHICS_VER(i915) < 4)
566 		return 0;
567 
568 	if (bad_swizzling(i915)) /* Requires sane (sub-page) swizzling */
569 		return 0;
570 
571 	do {
572 		struct intel_engine_cs *engine;
573 		int err;
574 
575 		engine = intel_engine_lookup_user(i915,
576 						  I915_ENGINE_CLASS_COPY,
577 						  inst++);
578 		if (!engine)
579 			return 0;
580 
581 		err = __igt_client_tiled_blits(engine, &prng);
582 		if (err == -ENODEV)
583 			err = 0;
584 		if (err)
585 			return err;
586 	} while (1);
587 }
588 
589 int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915)
590 {
591 	static const struct i915_subtest tests[] = {
592 		SUBTEST(igt_client_tiled_blits),
593 	};
594 
595 	if (intel_gt_is_wedged(to_gt(i915)))
596 		return 0;
597 
598 	return i915_live_subtests(tests, i915);
599 }
600