1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2016 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gt/intel_gt.h"
10 #include "gt/intel_gt_pm.h"
11 #include "huge_gem_object.h"
12 #include "i915_selftest.h"
13 #include "selftests/igt_flush_test.h"
14 
15 struct tile {
16 	unsigned int width;
17 	unsigned int height;
18 	unsigned int stride;
19 	unsigned int size;
20 	unsigned int tiling;
21 	unsigned int swizzle;
22 };
23 
24 static u64 swizzle_bit(unsigned int bit, u64 offset)
25 {
26 	return (offset & BIT_ULL(bit)) >> (bit - 6);
27 }
28 
29 static u64 tiled_offset(const struct tile *tile, u64 v)
30 {
31 	u64 x, y;
32 
33 	if (tile->tiling == I915_TILING_NONE)
34 		return v;
35 
36 	y = div64_u64_rem(v, tile->stride, &x);
37 	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
38 
39 	if (tile->tiling == I915_TILING_X) {
40 		v += y * tile->width;
41 		v += div64_u64_rem(x, tile->width, &x) << tile->size;
42 		v += x;
43 	} else if (tile->width == 128) {
44 		const unsigned int ytile_span = 16;
45 		const unsigned int ytile_height = 512;
46 
47 		v += y * ytile_span;
48 		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
49 		v += x;
50 	} else {
51 		const unsigned int ytile_span = 32;
52 		const unsigned int ytile_height = 256;
53 
54 		v += y * ytile_span;
55 		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
56 		v += x;
57 	}
58 
59 	switch (tile->swizzle) {
60 	case I915_BIT_6_SWIZZLE_9:
61 		v ^= swizzle_bit(9, v);
62 		break;
63 	case I915_BIT_6_SWIZZLE_9_10:
64 		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
65 		break;
66 	case I915_BIT_6_SWIZZLE_9_11:
67 		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
68 		break;
69 	case I915_BIT_6_SWIZZLE_9_10_11:
70 		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
71 		break;
72 	}
73 
74 	return v;
75 }
76 
77 static int check_partial_mapping(struct drm_i915_gem_object *obj,
78 				 const struct tile *tile,
79 				 unsigned long end_time)
80 {
81 	const unsigned int nreal = obj->scratch / PAGE_SIZE;
82 	const unsigned long npages = obj->base.size / PAGE_SIZE;
83 	struct i915_vma *vma;
84 	unsigned long page;
85 	int err;
86 
87 	if (igt_timeout(end_time,
88 			"%s: timed out before tiling=%d stride=%d\n",
89 			__func__, tile->tiling, tile->stride))
90 		return -EINTR;
91 
92 	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
93 	if (err) {
94 		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
95 		       tile->tiling, tile->stride, err);
96 		return err;
97 	}
98 
99 	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
100 	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
101 
102 	i915_gem_object_lock(obj);
103 	err = i915_gem_object_set_to_gtt_domain(obj, true);
104 	i915_gem_object_unlock(obj);
105 	if (err) {
106 		pr_err("Failed to flush to GTT write domain; err=%d\n", err);
107 		return err;
108 	}
109 
110 	for_each_prime_number_from(page, 1, npages) {
111 		struct i915_ggtt_view view =
112 			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
113 		u32 __iomem *io;
114 		struct page *p;
115 		unsigned int n;
116 		u64 offset;
117 		u32 *cpu;
118 
119 		GEM_BUG_ON(view.partial.size > nreal);
120 		cond_resched();
121 
122 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
123 		if (IS_ERR(vma)) {
124 			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
125 			       page, (int)PTR_ERR(vma));
126 			return PTR_ERR(vma);
127 		}
128 
129 		n = page - view.partial.offset;
130 		GEM_BUG_ON(n >= view.partial.size);
131 
132 		io = i915_vma_pin_iomap(vma);
133 		i915_vma_unpin(vma);
134 		if (IS_ERR(io)) {
135 			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
136 			       page, (int)PTR_ERR(io));
137 			return PTR_ERR(io);
138 		}
139 
140 		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
141 		i915_vma_unpin_iomap(vma);
142 
143 		offset = tiled_offset(tile, page << PAGE_SHIFT);
144 		if (offset >= obj->base.size)
145 			continue;
146 
147 		intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt);
148 
149 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
150 		cpu = kmap(p) + offset_in_page(offset);
151 		drm_clflush_virt_range(cpu, sizeof(*cpu));
152 		if (*cpu != (u32)page) {
153 			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
154 			       page, n,
155 			       view.partial.offset,
156 			       view.partial.size,
157 			       vma->size >> PAGE_SHIFT,
158 			       tile->tiling ? tile_row_pages(obj) : 0,
159 			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
160 			       offset >> PAGE_SHIFT,
161 			       (unsigned int)offset_in_page(offset),
162 			       offset,
163 			       (u32)page, *cpu);
164 			err = -EINVAL;
165 		}
166 		*cpu = 0;
167 		drm_clflush_virt_range(cpu, sizeof(*cpu));
168 		kunmap(p);
169 		if (err)
170 			return err;
171 
172 		i915_vma_destroy(vma);
173 	}
174 
175 	return 0;
176 }
177 
178 static int igt_partial_tiling(void *arg)
179 {
180 	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
181 	struct drm_i915_private *i915 = arg;
182 	struct drm_i915_gem_object *obj;
183 	intel_wakeref_t wakeref;
184 	int tiling;
185 	int err;
186 
187 	/* We want to check the page mapping and fencing of a large object
188 	 * mmapped through the GTT. The object we create is larger than can
189 	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
190 	 * We then check that a write through each partial GGTT vma ends up
191 	 * in the right set of pages within the object, and with the expected
192 	 * tiling, which we verify by manual swizzling.
193 	 */
194 
195 	obj = huge_gem_object(i915,
196 			      nreal << PAGE_SHIFT,
197 			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
198 	if (IS_ERR(obj))
199 		return PTR_ERR(obj);
200 
201 	err = i915_gem_object_pin_pages(obj);
202 	if (err) {
203 		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
204 		       nreal, obj->base.size / PAGE_SIZE, err);
205 		goto out;
206 	}
207 
208 	mutex_lock(&i915->drm.struct_mutex);
209 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
210 
211 	if (1) {
212 		IGT_TIMEOUT(end);
213 		struct tile tile;
214 
215 		tile.height = 1;
216 		tile.width = 1;
217 		tile.size = 0;
218 		tile.stride = 0;
219 		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
220 		tile.tiling = I915_TILING_NONE;
221 
222 		err = check_partial_mapping(obj, &tile, end);
223 		if (err && err != -EINTR)
224 			goto out_unlock;
225 	}
226 
227 	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
228 		IGT_TIMEOUT(end);
229 		unsigned int max_pitch;
230 		unsigned int pitch;
231 		struct tile tile;
232 
233 		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
234 			/*
235 			 * The swizzling pattern is actually unknown as it
236 			 * varies based on physical address of each page.
237 			 * See i915_gem_detect_bit_6_swizzle().
238 			 */
239 			break;
240 
241 		tile.tiling = tiling;
242 		switch (tiling) {
243 		case I915_TILING_X:
244 			tile.swizzle = i915->mm.bit_6_swizzle_x;
245 			break;
246 		case I915_TILING_Y:
247 			tile.swizzle = i915->mm.bit_6_swizzle_y;
248 			break;
249 		}
250 
251 		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
252 		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
253 		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
254 			continue;
255 
256 		if (INTEL_GEN(i915) <= 2) {
257 			tile.height = 16;
258 			tile.width = 128;
259 			tile.size = 11;
260 		} else if (tile.tiling == I915_TILING_Y &&
261 			   HAS_128_BYTE_Y_TILING(i915)) {
262 			tile.height = 32;
263 			tile.width = 128;
264 			tile.size = 12;
265 		} else {
266 			tile.height = 8;
267 			tile.width = 512;
268 			tile.size = 12;
269 		}
270 
271 		if (INTEL_GEN(i915) < 4)
272 			max_pitch = 8192 / tile.width;
273 		else if (INTEL_GEN(i915) < 7)
274 			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
275 		else
276 			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
277 
278 		for (pitch = max_pitch; pitch; pitch >>= 1) {
279 			tile.stride = tile.width * pitch;
280 			err = check_partial_mapping(obj, &tile, end);
281 			if (err == -EINTR)
282 				goto next_tiling;
283 			if (err)
284 				goto out_unlock;
285 
286 			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
287 				tile.stride = tile.width * (pitch - 1);
288 				err = check_partial_mapping(obj, &tile, end);
289 				if (err == -EINTR)
290 					goto next_tiling;
291 				if (err)
292 					goto out_unlock;
293 			}
294 
295 			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
296 				tile.stride = tile.width * (pitch + 1);
297 				err = check_partial_mapping(obj, &tile, end);
298 				if (err == -EINTR)
299 					goto next_tiling;
300 				if (err)
301 					goto out_unlock;
302 			}
303 		}
304 
305 		if (INTEL_GEN(i915) >= 4) {
306 			for_each_prime_number(pitch, max_pitch) {
307 				tile.stride = tile.width * pitch;
308 				err = check_partial_mapping(obj, &tile, end);
309 				if (err == -EINTR)
310 					goto next_tiling;
311 				if (err)
312 					goto out_unlock;
313 			}
314 		}
315 
316 next_tiling: ;
317 	}
318 
319 out_unlock:
320 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
321 	mutex_unlock(&i915->drm.struct_mutex);
322 	i915_gem_object_unpin_pages(obj);
323 out:
324 	i915_gem_object_put(obj);
325 	return err;
326 }
327 
328 static int make_obj_busy(struct drm_i915_gem_object *obj)
329 {
330 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
331 	struct intel_engine_cs *engine;
332 	enum intel_engine_id id;
333 	struct i915_vma *vma;
334 	int err;
335 
336 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
337 	if (IS_ERR(vma))
338 		return PTR_ERR(vma);
339 
340 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
341 	if (err)
342 		return err;
343 
344 	for_each_engine(engine, i915, id) {
345 		struct i915_request *rq;
346 
347 		rq = i915_request_create(engine->kernel_context);
348 		if (IS_ERR(rq)) {
349 			i915_vma_unpin(vma);
350 			return PTR_ERR(rq);
351 		}
352 
353 		i915_vma_lock(vma);
354 		err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
355 		i915_vma_unlock(vma);
356 
357 		i915_request_add(rq);
358 	}
359 
360 	i915_vma_unpin(vma);
361 	i915_gem_object_put(obj); /* leave it only alive via its active ref */
362 
363 	return err;
364 }
365 
366 static bool assert_mmap_offset(struct drm_i915_private *i915,
367 			       unsigned long size,
368 			       int expected)
369 {
370 	struct drm_i915_gem_object *obj;
371 	int err;
372 
373 	obj = i915_gem_object_create_internal(i915, size);
374 	if (IS_ERR(obj))
375 		return PTR_ERR(obj);
376 
377 	err = create_mmap_offset(obj);
378 	i915_gem_object_put(obj);
379 
380 	return err == expected;
381 }
382 
383 static void disable_retire_worker(struct drm_i915_private *i915)
384 {
385 	i915_gem_shrinker_unregister(i915);
386 
387 	intel_gt_pm_get(&i915->gt);
388 
389 	cancel_delayed_work_sync(&i915->gem.retire_work);
390 	flush_work(&i915->gem.idle_work);
391 }
392 
393 static void restore_retire_worker(struct drm_i915_private *i915)
394 {
395 	intel_gt_pm_put(&i915->gt);
396 
397 	mutex_lock(&i915->drm.struct_mutex);
398 	igt_flush_test(i915, I915_WAIT_LOCKED);
399 	mutex_unlock(&i915->drm.struct_mutex);
400 
401 	i915_gem_shrinker_register(i915);
402 }
403 
404 static void mmap_offset_lock(struct drm_i915_private *i915)
405 	__acquires(&i915->drm.vma_offset_manager->vm_lock)
406 {
407 	write_lock(&i915->drm.vma_offset_manager->vm_lock);
408 }
409 
410 static void mmap_offset_unlock(struct drm_i915_private *i915)
411 	__releases(&i915->drm.vma_offset_manager->vm_lock)
412 {
413 	write_unlock(&i915->drm.vma_offset_manager->vm_lock);
414 }
415 
416 static int igt_mmap_offset_exhaustion(void *arg)
417 {
418 	struct drm_i915_private *i915 = arg;
419 	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
420 	struct drm_i915_gem_object *obj;
421 	struct drm_mm_node resv, *hole;
422 	u64 hole_start, hole_end;
423 	int loop, err;
424 
425 	/* Disable background reaper */
426 	disable_retire_worker(i915);
427 	GEM_BUG_ON(!i915->gt.awake);
428 
429 	/* Trim the device mmap space to only a page */
430 	memset(&resv, 0, sizeof(resv));
431 	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
432 		resv.start = hole_start;
433 		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
434 		mmap_offset_lock(i915);
435 		err = drm_mm_reserve_node(mm, &resv);
436 		mmap_offset_unlock(i915);
437 		if (err) {
438 			pr_err("Failed to trim VMA manager, err=%d\n", err);
439 			goto out_park;
440 		}
441 		break;
442 	}
443 
444 	/* Just fits! */
445 	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
446 		pr_err("Unable to insert object into single page hole\n");
447 		err = -EINVAL;
448 		goto out;
449 	}
450 
451 	/* Too large */
452 	if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) {
453 		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
454 		err = -EINVAL;
455 		goto out;
456 	}
457 
458 	/* Fill the hole, further allocation attempts should then fail */
459 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
460 	if (IS_ERR(obj)) {
461 		err = PTR_ERR(obj);
462 		goto out;
463 	}
464 
465 	err = create_mmap_offset(obj);
466 	if (err) {
467 		pr_err("Unable to insert object into reclaimed hole\n");
468 		goto err_obj;
469 	}
470 
471 	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
472 		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
473 		err = -EINVAL;
474 		goto err_obj;
475 	}
476 
477 	i915_gem_object_put(obj);
478 
479 	/* Now fill with busy dead objects that we expect to reap */
480 	for (loop = 0; loop < 3; loop++) {
481 		if (intel_gt_is_wedged(&i915->gt))
482 			break;
483 
484 		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
485 		if (IS_ERR(obj)) {
486 			err = PTR_ERR(obj);
487 			goto out;
488 		}
489 
490 		mutex_lock(&i915->drm.struct_mutex);
491 		err = make_obj_busy(obj);
492 		mutex_unlock(&i915->drm.struct_mutex);
493 		if (err) {
494 			pr_err("[loop %d] Failed to busy the object\n", loop);
495 			goto err_obj;
496 		}
497 	}
498 
499 out:
500 	mmap_offset_lock(i915);
501 	drm_mm_remove_node(&resv);
502 	mmap_offset_unlock(i915);
503 out_park:
504 	restore_retire_worker(i915);
505 	return err;
506 err_obj:
507 	i915_gem_object_put(obj);
508 	goto out;
509 }
510 
511 int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
512 {
513 	static const struct i915_subtest tests[] = {
514 		SUBTEST(igt_partial_tiling),
515 		SUBTEST(igt_mmap_offset_exhaustion),
516 	};
517 
518 	return i915_subtests(tests, i915);
519 }
520