1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2016 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gt/intel_gt.h"
10 #include "gt/intel_gt_pm.h"
11 #include "huge_gem_object.h"
12 #include "i915_selftest.h"
13 #include "selftests/igt_flush_test.h"
14 
15 struct tile {
16 	unsigned int width;
17 	unsigned int height;
18 	unsigned int stride;
19 	unsigned int size;
20 	unsigned int tiling;
21 	unsigned int swizzle;
22 };
23 
24 static u64 swizzle_bit(unsigned int bit, u64 offset)
25 {
26 	return (offset & BIT_ULL(bit)) >> (bit - 6);
27 }
28 
29 static u64 tiled_offset(const struct tile *tile, u64 v)
30 {
31 	u64 x, y;
32 
33 	if (tile->tiling == I915_TILING_NONE)
34 		return v;
35 
36 	y = div64_u64_rem(v, tile->stride, &x);
37 	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
38 
39 	if (tile->tiling == I915_TILING_X) {
40 		v += y * tile->width;
41 		v += div64_u64_rem(x, tile->width, &x) << tile->size;
42 		v += x;
43 	} else if (tile->width == 128) {
44 		const unsigned int ytile_span = 16;
45 		const unsigned int ytile_height = 512;
46 
47 		v += y * ytile_span;
48 		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
49 		v += x;
50 	} else {
51 		const unsigned int ytile_span = 32;
52 		const unsigned int ytile_height = 256;
53 
54 		v += y * ytile_span;
55 		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
56 		v += x;
57 	}
58 
59 	switch (tile->swizzle) {
60 	case I915_BIT_6_SWIZZLE_9:
61 		v ^= swizzle_bit(9, v);
62 		break;
63 	case I915_BIT_6_SWIZZLE_9_10:
64 		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
65 		break;
66 	case I915_BIT_6_SWIZZLE_9_11:
67 		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
68 		break;
69 	case I915_BIT_6_SWIZZLE_9_10_11:
70 		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
71 		break;
72 	}
73 
74 	return v;
75 }
76 
77 static int check_partial_mapping(struct drm_i915_gem_object *obj,
78 				 const struct tile *tile,
79 				 unsigned long end_time)
80 {
81 	const unsigned int nreal = obj->scratch / PAGE_SIZE;
82 	const unsigned long npages = obj->base.size / PAGE_SIZE;
83 	struct i915_vma *vma;
84 	unsigned long page;
85 	int err;
86 
87 	if (igt_timeout(end_time,
88 			"%s: timed out before tiling=%d stride=%d\n",
89 			__func__, tile->tiling, tile->stride))
90 		return -EINTR;
91 
92 	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
93 	if (err) {
94 		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
95 		       tile->tiling, tile->stride, err);
96 		return err;
97 	}
98 
99 	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
100 	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
101 
102 	i915_gem_object_lock(obj);
103 	err = i915_gem_object_set_to_gtt_domain(obj, true);
104 	i915_gem_object_unlock(obj);
105 	if (err) {
106 		pr_err("Failed to flush to GTT write domain; err=%d\n", err);
107 		return err;
108 	}
109 
110 	for_each_prime_number_from(page, 1, npages) {
111 		struct i915_ggtt_view view =
112 			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
113 		u32 __iomem *io;
114 		struct page *p;
115 		unsigned int n;
116 		u64 offset;
117 		u32 *cpu;
118 
119 		GEM_BUG_ON(view.partial.size > nreal);
120 		cond_resched();
121 
122 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
123 		if (IS_ERR(vma)) {
124 			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
125 			       page, (int)PTR_ERR(vma));
126 			return PTR_ERR(vma);
127 		}
128 
129 		n = page - view.partial.offset;
130 		GEM_BUG_ON(n >= view.partial.size);
131 
132 		io = i915_vma_pin_iomap(vma);
133 		i915_vma_unpin(vma);
134 		if (IS_ERR(io)) {
135 			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
136 			       page, (int)PTR_ERR(io));
137 			return PTR_ERR(io);
138 		}
139 
140 		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
141 		i915_vma_unpin_iomap(vma);
142 
143 		offset = tiled_offset(tile, page << PAGE_SHIFT);
144 		if (offset >= obj->base.size)
145 			continue;
146 
147 		intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
148 
149 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
150 		cpu = kmap(p) + offset_in_page(offset);
151 		drm_clflush_virt_range(cpu, sizeof(*cpu));
152 		if (*cpu != (u32)page) {
153 			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
154 			       page, n,
155 			       view.partial.offset,
156 			       view.partial.size,
157 			       vma->size >> PAGE_SHIFT,
158 			       tile->tiling ? tile_row_pages(obj) : 0,
159 			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
160 			       offset >> PAGE_SHIFT,
161 			       (unsigned int)offset_in_page(offset),
162 			       offset,
163 			       (u32)page, *cpu);
164 			err = -EINVAL;
165 		}
166 		*cpu = 0;
167 		drm_clflush_virt_range(cpu, sizeof(*cpu));
168 		kunmap(p);
169 		if (err)
170 			return err;
171 
172 		i915_vma_destroy(vma);
173 	}
174 
175 	return 0;
176 }
177 
178 static int igt_partial_tiling(void *arg)
179 {
180 	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
181 	struct drm_i915_private *i915 = arg;
182 	struct drm_i915_gem_object *obj;
183 	intel_wakeref_t wakeref;
184 	int tiling;
185 	int err;
186 
187 	/* We want to check the page mapping and fencing of a large object
188 	 * mmapped through the GTT. The object we create is larger than can
189 	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
190 	 * We then check that a write through each partial GGTT vma ends up
191 	 * in the right set of pages within the object, and with the expected
192 	 * tiling, which we verify by manual swizzling.
193 	 */
194 
195 	obj = huge_gem_object(i915,
196 			      nreal << PAGE_SHIFT,
197 			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
198 	if (IS_ERR(obj))
199 		return PTR_ERR(obj);
200 
201 	err = i915_gem_object_pin_pages(obj);
202 	if (err) {
203 		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
204 		       nreal, obj->base.size / PAGE_SIZE, err);
205 		goto out;
206 	}
207 
208 	mutex_lock(&i915->drm.struct_mutex);
209 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
210 
211 	if (1) {
212 		IGT_TIMEOUT(end);
213 		struct tile tile;
214 
215 		tile.height = 1;
216 		tile.width = 1;
217 		tile.size = 0;
218 		tile.stride = 0;
219 		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
220 		tile.tiling = I915_TILING_NONE;
221 
222 		err = check_partial_mapping(obj, &tile, end);
223 		if (err && err != -EINTR)
224 			goto out_unlock;
225 	}
226 
227 	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
228 		IGT_TIMEOUT(end);
229 		unsigned int max_pitch;
230 		unsigned int pitch;
231 		struct tile tile;
232 
233 		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
234 			/*
235 			 * The swizzling pattern is actually unknown as it
236 			 * varies based on physical address of each page.
237 			 * See i915_gem_detect_bit_6_swizzle().
238 			 */
239 			break;
240 
241 		tile.tiling = tiling;
242 		switch (tiling) {
243 		case I915_TILING_X:
244 			tile.swizzle = i915->mm.bit_6_swizzle_x;
245 			break;
246 		case I915_TILING_Y:
247 			tile.swizzle = i915->mm.bit_6_swizzle_y;
248 			break;
249 		}
250 
251 		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
252 		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
253 		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
254 			continue;
255 
256 		if (INTEL_GEN(i915) <= 2) {
257 			tile.height = 16;
258 			tile.width = 128;
259 			tile.size = 11;
260 		} else if (tile.tiling == I915_TILING_Y &&
261 			   HAS_128_BYTE_Y_TILING(i915)) {
262 			tile.height = 32;
263 			tile.width = 128;
264 			tile.size = 12;
265 		} else {
266 			tile.height = 8;
267 			tile.width = 512;
268 			tile.size = 12;
269 		}
270 
271 		if (INTEL_GEN(i915) < 4)
272 			max_pitch = 8192 / tile.width;
273 		else if (INTEL_GEN(i915) < 7)
274 			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
275 		else
276 			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
277 
278 		for (pitch = max_pitch; pitch; pitch >>= 1) {
279 			tile.stride = tile.width * pitch;
280 			err = check_partial_mapping(obj, &tile, end);
281 			if (err == -EINTR)
282 				goto next_tiling;
283 			if (err)
284 				goto out_unlock;
285 
286 			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
287 				tile.stride = tile.width * (pitch - 1);
288 				err = check_partial_mapping(obj, &tile, end);
289 				if (err == -EINTR)
290 					goto next_tiling;
291 				if (err)
292 					goto out_unlock;
293 			}
294 
295 			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
296 				tile.stride = tile.width * (pitch + 1);
297 				err = check_partial_mapping(obj, &tile, end);
298 				if (err == -EINTR)
299 					goto next_tiling;
300 				if (err)
301 					goto out_unlock;
302 			}
303 		}
304 
305 		if (INTEL_GEN(i915) >= 4) {
306 			for_each_prime_number(pitch, max_pitch) {
307 				tile.stride = tile.width * pitch;
308 				err = check_partial_mapping(obj, &tile, end);
309 				if (err == -EINTR)
310 					goto next_tiling;
311 				if (err)
312 					goto out_unlock;
313 			}
314 		}
315 
316 next_tiling: ;
317 	}
318 
319 out_unlock:
320 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
321 	mutex_unlock(&i915->drm.struct_mutex);
322 	i915_gem_object_unpin_pages(obj);
323 out:
324 	i915_gem_object_put(obj);
325 	return err;
326 }
327 
328 static int make_obj_busy(struct drm_i915_gem_object *obj)
329 {
330 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
331 	struct intel_engine_cs *engine;
332 	enum intel_engine_id id;
333 	struct i915_vma *vma;
334 	int err;
335 
336 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
337 	if (IS_ERR(vma))
338 		return PTR_ERR(vma);
339 
340 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
341 	if (err)
342 		return err;
343 
344 	for_each_engine(engine, i915, id) {
345 		struct i915_request *rq;
346 
347 		rq = i915_request_create(engine->kernel_context);
348 		if (IS_ERR(rq)) {
349 			i915_vma_unpin(vma);
350 			return PTR_ERR(rq);
351 		}
352 
353 		i915_vma_lock(vma);
354 		err = i915_request_await_object(rq, vma->obj, true);
355 		if (err == 0)
356 			err = i915_vma_move_to_active(vma, rq,
357 						      EXEC_OBJECT_WRITE);
358 		i915_vma_unlock(vma);
359 
360 		i915_request_add(rq);
361 	}
362 
363 	i915_vma_unpin(vma);
364 	i915_gem_object_put(obj); /* leave it only alive via its active ref */
365 
366 	return err;
367 }
368 
369 static bool assert_mmap_offset(struct drm_i915_private *i915,
370 			       unsigned long size,
371 			       int expected)
372 {
373 	struct drm_i915_gem_object *obj;
374 	int err;
375 
376 	obj = i915_gem_object_create_internal(i915, size);
377 	if (IS_ERR(obj))
378 		return PTR_ERR(obj);
379 
380 	err = create_mmap_offset(obj);
381 	i915_gem_object_put(obj);
382 
383 	return err == expected;
384 }
385 
386 static void disable_retire_worker(struct drm_i915_private *i915)
387 {
388 	i915_gem_driver_unregister__shrinker(i915);
389 
390 	intel_gt_pm_get(&i915->gt);
391 
392 	cancel_delayed_work_sync(&i915->gem.retire_work);
393 	flush_work(&i915->gem.idle_work);
394 }
395 
396 static void restore_retire_worker(struct drm_i915_private *i915)
397 {
398 	intel_gt_pm_put(&i915->gt);
399 
400 	mutex_lock(&i915->drm.struct_mutex);
401 	igt_flush_test(i915, I915_WAIT_LOCKED);
402 	mutex_unlock(&i915->drm.struct_mutex);
403 
404 	i915_gem_driver_register__shrinker(i915);
405 }
406 
407 static void mmap_offset_lock(struct drm_i915_private *i915)
408 	__acquires(&i915->drm.vma_offset_manager->vm_lock)
409 {
410 	write_lock(&i915->drm.vma_offset_manager->vm_lock);
411 }
412 
413 static void mmap_offset_unlock(struct drm_i915_private *i915)
414 	__releases(&i915->drm.vma_offset_manager->vm_lock)
415 {
416 	write_unlock(&i915->drm.vma_offset_manager->vm_lock);
417 }
418 
419 static int igt_mmap_offset_exhaustion(void *arg)
420 {
421 	struct drm_i915_private *i915 = arg;
422 	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
423 	struct drm_i915_gem_object *obj;
424 	struct drm_mm_node resv, *hole;
425 	u64 hole_start, hole_end;
426 	int loop, err;
427 
428 	/* Disable background reaper */
429 	disable_retire_worker(i915);
430 	GEM_BUG_ON(!i915->gt.awake);
431 
432 	/* Trim the device mmap space to only a page */
433 	memset(&resv, 0, sizeof(resv));
434 	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
435 		resv.start = hole_start;
436 		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
437 		mmap_offset_lock(i915);
438 		err = drm_mm_reserve_node(mm, &resv);
439 		mmap_offset_unlock(i915);
440 		if (err) {
441 			pr_err("Failed to trim VMA manager, err=%d\n", err);
442 			goto out_park;
443 		}
444 		break;
445 	}
446 
447 	/* Just fits! */
448 	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
449 		pr_err("Unable to insert object into single page hole\n");
450 		err = -EINVAL;
451 		goto out;
452 	}
453 
454 	/* Too large */
455 	if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) {
456 		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
457 		err = -EINVAL;
458 		goto out;
459 	}
460 
461 	/* Fill the hole, further allocation attempts should then fail */
462 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
463 	if (IS_ERR(obj)) {
464 		err = PTR_ERR(obj);
465 		goto out;
466 	}
467 
468 	err = create_mmap_offset(obj);
469 	if (err) {
470 		pr_err("Unable to insert object into reclaimed hole\n");
471 		goto err_obj;
472 	}
473 
474 	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
475 		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
476 		err = -EINVAL;
477 		goto err_obj;
478 	}
479 
480 	i915_gem_object_put(obj);
481 
482 	/* Now fill with busy dead objects that we expect to reap */
483 	for (loop = 0; loop < 3; loop++) {
484 		if (intel_gt_is_wedged(&i915->gt))
485 			break;
486 
487 		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
488 		if (IS_ERR(obj)) {
489 			err = PTR_ERR(obj);
490 			goto out;
491 		}
492 
493 		mutex_lock(&i915->drm.struct_mutex);
494 		err = make_obj_busy(obj);
495 		mutex_unlock(&i915->drm.struct_mutex);
496 		if (err) {
497 			pr_err("[loop %d] Failed to busy the object\n", loop);
498 			goto err_obj;
499 		}
500 	}
501 
502 out:
503 	mmap_offset_lock(i915);
504 	drm_mm_remove_node(&resv);
505 	mmap_offset_unlock(i915);
506 out_park:
507 	restore_retire_worker(i915);
508 	return err;
509 err_obj:
510 	i915_gem_object_put(obj);
511 	goto out;
512 }
513 
514 int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
515 {
516 	static const struct i915_subtest tests[] = {
517 		SUBTEST(igt_partial_tiling),
518 		SUBTEST(igt_mmap_offset_exhaustion),
519 	};
520 
521 	return i915_subtests(tests, i915);
522 }
523