1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2016 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 
9 #include "gt/intel_gt_pm.h"
10 #include "huge_gem_object.h"
11 #include "i915_selftest.h"
12 #include "selftests/igt_flush_test.h"
13 
14 struct tile {
15 	unsigned int width;
16 	unsigned int height;
17 	unsigned int stride;
18 	unsigned int size;
19 	unsigned int tiling;
20 	unsigned int swizzle;
21 };
22 
23 static u64 swizzle_bit(unsigned int bit, u64 offset)
24 {
25 	return (offset & BIT_ULL(bit)) >> (bit - 6);
26 }
27 
28 static u64 tiled_offset(const struct tile *tile, u64 v)
29 {
30 	u64 x, y;
31 
32 	if (tile->tiling == I915_TILING_NONE)
33 		return v;
34 
35 	y = div64_u64_rem(v, tile->stride, &x);
36 	v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height;
37 
38 	if (tile->tiling == I915_TILING_X) {
39 		v += y * tile->width;
40 		v += div64_u64_rem(x, tile->width, &x) << tile->size;
41 		v += x;
42 	} else if (tile->width == 128) {
43 		const unsigned int ytile_span = 16;
44 		const unsigned int ytile_height = 512;
45 
46 		v += y * ytile_span;
47 		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
48 		v += x;
49 	} else {
50 		const unsigned int ytile_span = 32;
51 		const unsigned int ytile_height = 256;
52 
53 		v += y * ytile_span;
54 		v += div64_u64_rem(x, ytile_span, &x) * ytile_height;
55 		v += x;
56 	}
57 
58 	switch (tile->swizzle) {
59 	case I915_BIT_6_SWIZZLE_9:
60 		v ^= swizzle_bit(9, v);
61 		break;
62 	case I915_BIT_6_SWIZZLE_9_10:
63 		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v);
64 		break;
65 	case I915_BIT_6_SWIZZLE_9_11:
66 		v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v);
67 		break;
68 	case I915_BIT_6_SWIZZLE_9_10_11:
69 		v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v);
70 		break;
71 	}
72 
73 	return v;
74 }
75 
76 static int check_partial_mapping(struct drm_i915_gem_object *obj,
77 				 const struct tile *tile,
78 				 unsigned long end_time)
79 {
80 	const unsigned int nreal = obj->scratch / PAGE_SIZE;
81 	const unsigned long npages = obj->base.size / PAGE_SIZE;
82 	struct i915_vma *vma;
83 	unsigned long page;
84 	int err;
85 
86 	if (igt_timeout(end_time,
87 			"%s: timed out before tiling=%d stride=%d\n",
88 			__func__, tile->tiling, tile->stride))
89 		return -EINTR;
90 
91 	err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride);
92 	if (err) {
93 		pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n",
94 		       tile->tiling, tile->stride, err);
95 		return err;
96 	}
97 
98 	GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling);
99 	GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride);
100 
101 	i915_gem_object_lock(obj);
102 	err = i915_gem_object_set_to_gtt_domain(obj, true);
103 	i915_gem_object_unlock(obj);
104 	if (err) {
105 		pr_err("Failed to flush to GTT write domain; err=%d\n", err);
106 		return err;
107 	}
108 
109 	for_each_prime_number_from(page, 1, npages) {
110 		struct i915_ggtt_view view =
111 			compute_partial_view(obj, page, MIN_CHUNK_PAGES);
112 		u32 __iomem *io;
113 		struct page *p;
114 		unsigned int n;
115 		u64 offset;
116 		u32 *cpu;
117 
118 		GEM_BUG_ON(view.partial.size > nreal);
119 		cond_resched();
120 
121 		vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE);
122 		if (IS_ERR(vma)) {
123 			pr_err("Failed to pin partial view: offset=%lu; err=%d\n",
124 			       page, (int)PTR_ERR(vma));
125 			return PTR_ERR(vma);
126 		}
127 
128 		n = page - view.partial.offset;
129 		GEM_BUG_ON(n >= view.partial.size);
130 
131 		io = i915_vma_pin_iomap(vma);
132 		i915_vma_unpin(vma);
133 		if (IS_ERR(io)) {
134 			pr_err("Failed to iomap partial view: offset=%lu; err=%d\n",
135 			       page, (int)PTR_ERR(io));
136 			return PTR_ERR(io);
137 		}
138 
139 		iowrite32(page, io + n * PAGE_SIZE / sizeof(*io));
140 		i915_vma_unpin_iomap(vma);
141 
142 		offset = tiled_offset(tile, page << PAGE_SHIFT);
143 		if (offset >= obj->base.size)
144 			continue;
145 
146 		i915_gem_flush_ggtt_writes(to_i915(obj->base.dev));
147 
148 		p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT);
149 		cpu = kmap(p) + offset_in_page(offset);
150 		drm_clflush_virt_range(cpu, sizeof(*cpu));
151 		if (*cpu != (u32)page) {
152 			pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n",
153 			       page, n,
154 			       view.partial.offset,
155 			       view.partial.size,
156 			       vma->size >> PAGE_SHIFT,
157 			       tile->tiling ? tile_row_pages(obj) : 0,
158 			       vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride,
159 			       offset >> PAGE_SHIFT,
160 			       (unsigned int)offset_in_page(offset),
161 			       offset,
162 			       (u32)page, *cpu);
163 			err = -EINVAL;
164 		}
165 		*cpu = 0;
166 		drm_clflush_virt_range(cpu, sizeof(*cpu));
167 		kunmap(p);
168 		if (err)
169 			return err;
170 
171 		i915_vma_destroy(vma);
172 	}
173 
174 	return 0;
175 }
176 
177 static int igt_partial_tiling(void *arg)
178 {
179 	const unsigned int nreal = 1 << 12; /* largest tile row x2 */
180 	struct drm_i915_private *i915 = arg;
181 	struct drm_i915_gem_object *obj;
182 	intel_wakeref_t wakeref;
183 	int tiling;
184 	int err;
185 
186 	/* We want to check the page mapping and fencing of a large object
187 	 * mmapped through the GTT. The object we create is larger than can
188 	 * possibly be mmaped as a whole, and so we must use partial GGTT vma.
189 	 * We then check that a write through each partial GGTT vma ends up
190 	 * in the right set of pages within the object, and with the expected
191 	 * tiling, which we verify by manual swizzling.
192 	 */
193 
194 	obj = huge_gem_object(i915,
195 			      nreal << PAGE_SHIFT,
196 			      (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT);
197 	if (IS_ERR(obj))
198 		return PTR_ERR(obj);
199 
200 	err = i915_gem_object_pin_pages(obj);
201 	if (err) {
202 		pr_err("Failed to allocate %u pages (%lu total), err=%d\n",
203 		       nreal, obj->base.size / PAGE_SIZE, err);
204 		goto out;
205 	}
206 
207 	mutex_lock(&i915->drm.struct_mutex);
208 	wakeref = intel_runtime_pm_get(&i915->runtime_pm);
209 
210 	if (1) {
211 		IGT_TIMEOUT(end);
212 		struct tile tile;
213 
214 		tile.height = 1;
215 		tile.width = 1;
216 		tile.size = 0;
217 		tile.stride = 0;
218 		tile.swizzle = I915_BIT_6_SWIZZLE_NONE;
219 		tile.tiling = I915_TILING_NONE;
220 
221 		err = check_partial_mapping(obj, &tile, end);
222 		if (err && err != -EINTR)
223 			goto out_unlock;
224 	}
225 
226 	for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) {
227 		IGT_TIMEOUT(end);
228 		unsigned int max_pitch;
229 		unsigned int pitch;
230 		struct tile tile;
231 
232 		if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES)
233 			/*
234 			 * The swizzling pattern is actually unknown as it
235 			 * varies based on physical address of each page.
236 			 * See i915_gem_detect_bit_6_swizzle().
237 			 */
238 			break;
239 
240 		tile.tiling = tiling;
241 		switch (tiling) {
242 		case I915_TILING_X:
243 			tile.swizzle = i915->mm.bit_6_swizzle_x;
244 			break;
245 		case I915_TILING_Y:
246 			tile.swizzle = i915->mm.bit_6_swizzle_y;
247 			break;
248 		}
249 
250 		GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN);
251 		if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 ||
252 		    tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17)
253 			continue;
254 
255 		if (INTEL_GEN(i915) <= 2) {
256 			tile.height = 16;
257 			tile.width = 128;
258 			tile.size = 11;
259 		} else if (tile.tiling == I915_TILING_Y &&
260 			   HAS_128_BYTE_Y_TILING(i915)) {
261 			tile.height = 32;
262 			tile.width = 128;
263 			tile.size = 12;
264 		} else {
265 			tile.height = 8;
266 			tile.width = 512;
267 			tile.size = 12;
268 		}
269 
270 		if (INTEL_GEN(i915) < 4)
271 			max_pitch = 8192 / tile.width;
272 		else if (INTEL_GEN(i915) < 7)
273 			max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width;
274 		else
275 			max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width;
276 
277 		for (pitch = max_pitch; pitch; pitch >>= 1) {
278 			tile.stride = tile.width * pitch;
279 			err = check_partial_mapping(obj, &tile, end);
280 			if (err == -EINTR)
281 				goto next_tiling;
282 			if (err)
283 				goto out_unlock;
284 
285 			if (pitch > 2 && INTEL_GEN(i915) >= 4) {
286 				tile.stride = tile.width * (pitch - 1);
287 				err = check_partial_mapping(obj, &tile, end);
288 				if (err == -EINTR)
289 					goto next_tiling;
290 				if (err)
291 					goto out_unlock;
292 			}
293 
294 			if (pitch < max_pitch && INTEL_GEN(i915) >= 4) {
295 				tile.stride = tile.width * (pitch + 1);
296 				err = check_partial_mapping(obj, &tile, end);
297 				if (err == -EINTR)
298 					goto next_tiling;
299 				if (err)
300 					goto out_unlock;
301 			}
302 		}
303 
304 		if (INTEL_GEN(i915) >= 4) {
305 			for_each_prime_number(pitch, max_pitch) {
306 				tile.stride = tile.width * pitch;
307 				err = check_partial_mapping(obj, &tile, end);
308 				if (err == -EINTR)
309 					goto next_tiling;
310 				if (err)
311 					goto out_unlock;
312 			}
313 		}
314 
315 next_tiling: ;
316 	}
317 
318 out_unlock:
319 	intel_runtime_pm_put(&i915->runtime_pm, wakeref);
320 	mutex_unlock(&i915->drm.struct_mutex);
321 	i915_gem_object_unpin_pages(obj);
322 out:
323 	i915_gem_object_put(obj);
324 	return err;
325 }
326 
327 static int make_obj_busy(struct drm_i915_gem_object *obj)
328 {
329 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
330 	struct i915_request *rq;
331 	struct i915_vma *vma;
332 	int err;
333 
334 	vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
335 	if (IS_ERR(vma))
336 		return PTR_ERR(vma);
337 
338 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
339 	if (err)
340 		return err;
341 
342 	rq = i915_request_create(i915->engine[RCS0]->kernel_context);
343 	if (IS_ERR(rq)) {
344 		i915_vma_unpin(vma);
345 		return PTR_ERR(rq);
346 	}
347 
348 	i915_vma_lock(vma);
349 	err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
350 	i915_vma_unlock(vma);
351 
352 	i915_request_add(rq);
353 
354 	i915_vma_unpin(vma);
355 	i915_gem_object_put(obj); /* leave it only alive via its active ref */
356 
357 	return err;
358 }
359 
360 static bool assert_mmap_offset(struct drm_i915_private *i915,
361 			       unsigned long size,
362 			       int expected)
363 {
364 	struct drm_i915_gem_object *obj;
365 	int err;
366 
367 	obj = i915_gem_object_create_internal(i915, size);
368 	if (IS_ERR(obj))
369 		return PTR_ERR(obj);
370 
371 	err = create_mmap_offset(obj);
372 	i915_gem_object_put(obj);
373 
374 	return err == expected;
375 }
376 
377 static void disable_retire_worker(struct drm_i915_private *i915)
378 {
379 	i915_gem_shrinker_unregister(i915);
380 
381 	intel_gt_pm_get(i915);
382 
383 	cancel_delayed_work_sync(&i915->gem.retire_work);
384 	flush_work(&i915->gem.idle_work);
385 }
386 
387 static void restore_retire_worker(struct drm_i915_private *i915)
388 {
389 	intel_gt_pm_put(i915);
390 
391 	mutex_lock(&i915->drm.struct_mutex);
392 	igt_flush_test(i915, I915_WAIT_LOCKED);
393 	mutex_unlock(&i915->drm.struct_mutex);
394 
395 	i915_gem_shrinker_register(i915);
396 }
397 
398 static int igt_mmap_offset_exhaustion(void *arg)
399 {
400 	struct drm_i915_private *i915 = arg;
401 	struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm;
402 	struct drm_i915_gem_object *obj;
403 	struct drm_mm_node resv, *hole;
404 	u64 hole_start, hole_end;
405 	int loop, err;
406 
407 	/* Disable background reaper */
408 	disable_retire_worker(i915);
409 	GEM_BUG_ON(!i915->gt.awake);
410 
411 	/* Trim the device mmap space to only a page */
412 	memset(&resv, 0, sizeof(resv));
413 	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
414 		resv.start = hole_start;
415 		resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */
416 		err = drm_mm_reserve_node(mm, &resv);
417 		if (err) {
418 			pr_err("Failed to trim VMA manager, err=%d\n", err);
419 			goto out_park;
420 		}
421 		break;
422 	}
423 
424 	/* Just fits! */
425 	if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) {
426 		pr_err("Unable to insert object into single page hole\n");
427 		err = -EINVAL;
428 		goto out;
429 	}
430 
431 	/* Too large */
432 	if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) {
433 		pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n");
434 		err = -EINVAL;
435 		goto out;
436 	}
437 
438 	/* Fill the hole, further allocation attempts should then fail */
439 	obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
440 	if (IS_ERR(obj)) {
441 		err = PTR_ERR(obj);
442 		goto out;
443 	}
444 
445 	err = create_mmap_offset(obj);
446 	if (err) {
447 		pr_err("Unable to insert object into reclaimed hole\n");
448 		goto err_obj;
449 	}
450 
451 	if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) {
452 		pr_err("Unexpectedly succeeded in inserting object into no holes!\n");
453 		err = -EINVAL;
454 		goto err_obj;
455 	}
456 
457 	i915_gem_object_put(obj);
458 
459 	/* Now fill with busy dead objects that we expect to reap */
460 	for (loop = 0; loop < 3; loop++) {
461 		if (i915_terminally_wedged(i915))
462 			break;
463 
464 		obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
465 		if (IS_ERR(obj)) {
466 			err = PTR_ERR(obj);
467 			goto out;
468 		}
469 
470 		mutex_lock(&i915->drm.struct_mutex);
471 		err = make_obj_busy(obj);
472 		mutex_unlock(&i915->drm.struct_mutex);
473 		if (err) {
474 			pr_err("[loop %d] Failed to busy the object\n", loop);
475 			goto err_obj;
476 		}
477 
478 		/* NB we rely on the _active_ reference to access obj now */
479 		GEM_BUG_ON(!i915_gem_object_is_active(obj));
480 		err = create_mmap_offset(obj);
481 		if (err) {
482 			pr_err("[loop %d] create_mmap_offset failed with err=%d\n",
483 			       loop, err);
484 			goto out;
485 		}
486 	}
487 
488 out:
489 	drm_mm_remove_node(&resv);
490 out_park:
491 	restore_retire_worker(i915);
492 	return err;
493 err_obj:
494 	i915_gem_object_put(obj);
495 	goto out;
496 }
497 
498 int i915_gem_mman_live_selftests(struct drm_i915_private *i915)
499 {
500 	static const struct i915_subtest tests[] = {
501 		SUBTEST(igt_partial_tiling),
502 		SUBTEST(igt_mmap_offset_exhaustion),
503 	};
504 
505 	return i915_subtests(tests, i915);
506 }
507