xref: /openbmc/linux/drivers/gpu/drm/i915/gem/selftests/huge_pages.c (revision f43e47c090dc7fe32d5410d8740c3a004eb2676f)
1 /*
2  * SPDX-License-Identifier: MIT
3  *
4  * Copyright © 2017 Intel Corporation
5  */
6 
7 #include <linux/prime_numbers.h>
8 #include <linux/string_helpers.h>
9 #include <linux/swap.h>
10 
11 #include "i915_selftest.h"
12 
13 #include "gem/i915_gem_internal.h"
14 #include "gem/i915_gem_lmem.h"
15 #include "gem/i915_gem_pm.h"
16 #include "gem/i915_gem_region.h"
17 
18 #include "gt/intel_gt.h"
19 
20 #include "igt_gem_utils.h"
21 #include "mock_context.h"
22 
23 #include "selftests/mock_drm.h"
24 #include "selftests/mock_gem_device.h"
25 #include "selftests/mock_region.h"
26 #include "selftests/i915_random.h"
27 
28 static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915,
29 					     struct file *file)
30 {
31 	struct i915_gem_context *ctx = live_context(i915, file);
32 	struct i915_address_space *vm;
33 
34 	if (IS_ERR(ctx))
35 		return ctx;
36 
37 	vm = ctx->vm;
38 	if (vm)
39 		WRITE_ONCE(vm->scrub_64K, true);
40 
41 	return ctx;
42 }
43 
44 static const unsigned int page_sizes[] = {
45 	I915_GTT_PAGE_SIZE_2M,
46 	I915_GTT_PAGE_SIZE_64K,
47 	I915_GTT_PAGE_SIZE_4K,
48 };
49 
50 static unsigned int get_largest_page_size(struct drm_i915_private *i915,
51 					  u64 rem)
52 {
53 	int i;
54 
55 	for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
56 		unsigned int page_size = page_sizes[i];
57 
58 		if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size)
59 			return page_size;
60 	}
61 
62 	return 0;
63 }
64 
65 static void huge_pages_free_pages(struct sg_table *st)
66 {
67 	struct scatterlist *sg;
68 
69 	for (sg = st->sgl; sg; sg = __sg_next(sg)) {
70 		if (sg_page(sg))
71 			__free_pages(sg_page(sg), get_order(sg->length));
72 	}
73 
74 	sg_free_table(st);
75 	kfree(st);
76 }
77 
78 static int get_huge_pages(struct drm_i915_gem_object *obj)
79 {
80 #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
81 	unsigned int page_mask = obj->mm.page_mask;
82 	struct sg_table *st;
83 	struct scatterlist *sg;
84 	unsigned int sg_page_sizes;
85 	u64 rem;
86 
87 	st = kmalloc(sizeof(*st), GFP);
88 	if (!st)
89 		return -ENOMEM;
90 
91 	if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
92 		kfree(st);
93 		return -ENOMEM;
94 	}
95 
96 	rem = obj->base.size;
97 	sg = st->sgl;
98 	st->nents = 0;
99 	sg_page_sizes = 0;
100 
101 	/*
102 	 * Our goal here is simple, we want to greedily fill the object from
103 	 * largest to smallest page-size, while ensuring that we use *every*
104 	 * page-size as per the given page-mask.
105 	 */
106 	do {
107 		unsigned int bit = ilog2(page_mask);
108 		unsigned int page_size = BIT(bit);
109 		int order = get_order(page_size);
110 
111 		do {
112 			struct page *page;
113 
114 			GEM_BUG_ON(order >= MAX_ORDER);
115 			page = alloc_pages(GFP | __GFP_ZERO, order);
116 			if (!page)
117 				goto err;
118 
119 			sg_set_page(sg, page, page_size, 0);
120 			sg_page_sizes |= page_size;
121 			st->nents++;
122 
123 			rem -= page_size;
124 			if (!rem) {
125 				sg_mark_end(sg);
126 				break;
127 			}
128 
129 			sg = __sg_next(sg);
130 		} while ((rem - ((page_size-1) & page_mask)) >= page_size);
131 
132 		page_mask &= (page_size-1);
133 	} while (page_mask);
134 
135 	if (i915_gem_gtt_prepare_pages(obj, st))
136 		goto err;
137 
138 	GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask);
139 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
140 
141 	return 0;
142 
143 err:
144 	sg_set_page(sg, NULL, 0, 0);
145 	sg_mark_end(sg);
146 	huge_pages_free_pages(st);
147 
148 	return -ENOMEM;
149 }
150 
151 static void put_huge_pages(struct drm_i915_gem_object *obj,
152 			   struct sg_table *pages)
153 {
154 	i915_gem_gtt_finish_pages(obj, pages);
155 	huge_pages_free_pages(pages);
156 
157 	obj->mm.dirty = false;
158 
159 	__start_cpu_write(obj);
160 }
161 
162 static const struct drm_i915_gem_object_ops huge_page_ops = {
163 	.name = "huge-gem",
164 	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
165 	.get_pages = get_huge_pages,
166 	.put_pages = put_huge_pages,
167 };
168 
169 static struct drm_i915_gem_object *
170 huge_pages_object(struct drm_i915_private *i915,
171 		  u64 size,
172 		  unsigned int page_mask)
173 {
174 	static struct lock_class_key lock_class;
175 	struct drm_i915_gem_object *obj;
176 	unsigned int cache_level;
177 
178 	GEM_BUG_ON(!size);
179 	GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask))));
180 
181 	if (size >> PAGE_SHIFT > INT_MAX)
182 		return ERR_PTR(-E2BIG);
183 
184 	if (overflows_type(size, obj->base.size))
185 		return ERR_PTR(-E2BIG);
186 
187 	obj = i915_gem_object_alloc();
188 	if (!obj)
189 		return ERR_PTR(-ENOMEM);
190 
191 	drm_gem_private_object_init(&i915->drm, &obj->base, size);
192 	i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0);
193 	obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
194 	i915_gem_object_set_volatile(obj);
195 
196 	obj->write_domain = I915_GEM_DOMAIN_CPU;
197 	obj->read_domains = I915_GEM_DOMAIN_CPU;
198 
199 	cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
200 	i915_gem_object_set_cache_coherency(obj, cache_level);
201 
202 	obj->mm.page_mask = page_mask;
203 
204 	return obj;
205 }
206 
207 static int fake_get_huge_pages(struct drm_i915_gem_object *obj)
208 {
209 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
210 	const u64 max_len = rounddown_pow_of_two(UINT_MAX);
211 	struct sg_table *st;
212 	struct scatterlist *sg;
213 	unsigned int sg_page_sizes;
214 	u64 rem;
215 
216 	st = kmalloc(sizeof(*st), GFP);
217 	if (!st)
218 		return -ENOMEM;
219 
220 	if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
221 		kfree(st);
222 		return -ENOMEM;
223 	}
224 
225 	/* Use optimal page sized chunks to fill in the sg table */
226 	rem = obj->base.size;
227 	sg = st->sgl;
228 	st->nents = 0;
229 	sg_page_sizes = 0;
230 	do {
231 		unsigned int page_size = get_largest_page_size(i915, rem);
232 		unsigned int len = min(page_size * div_u64(rem, page_size),
233 				       max_len);
234 
235 		GEM_BUG_ON(!page_size);
236 
237 		sg->offset = 0;
238 		sg->length = len;
239 		sg_dma_len(sg) = len;
240 		sg_dma_address(sg) = page_size;
241 
242 		sg_page_sizes |= len;
243 
244 		st->nents++;
245 
246 		rem -= len;
247 		if (!rem) {
248 			sg_mark_end(sg);
249 			break;
250 		}
251 
252 		sg = sg_next(sg);
253 	} while (1);
254 
255 	i915_sg_trim(st);
256 
257 	__i915_gem_object_set_pages(obj, st, sg_page_sizes);
258 
259 	return 0;
260 }
261 
262 static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj)
263 {
264 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
265 	struct sg_table *st;
266 	struct scatterlist *sg;
267 	unsigned int page_size;
268 
269 	st = kmalloc(sizeof(*st), GFP);
270 	if (!st)
271 		return -ENOMEM;
272 
273 	if (sg_alloc_table(st, 1, GFP)) {
274 		kfree(st);
275 		return -ENOMEM;
276 	}
277 
278 	sg = st->sgl;
279 	st->nents = 1;
280 
281 	page_size = get_largest_page_size(i915, obj->base.size);
282 	GEM_BUG_ON(!page_size);
283 
284 	sg->offset = 0;
285 	sg->length = obj->base.size;
286 	sg_dma_len(sg) = obj->base.size;
287 	sg_dma_address(sg) = page_size;
288 
289 	__i915_gem_object_set_pages(obj, st, sg->length);
290 
291 	return 0;
292 #undef GFP
293 }
294 
295 static void fake_free_huge_pages(struct drm_i915_gem_object *obj,
296 				 struct sg_table *pages)
297 {
298 	sg_free_table(pages);
299 	kfree(pages);
300 }
301 
302 static void fake_put_huge_pages(struct drm_i915_gem_object *obj,
303 				struct sg_table *pages)
304 {
305 	fake_free_huge_pages(obj, pages);
306 	obj->mm.dirty = false;
307 }
308 
309 static const struct drm_i915_gem_object_ops fake_ops = {
310 	.name = "fake-gem",
311 	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
312 	.get_pages = fake_get_huge_pages,
313 	.put_pages = fake_put_huge_pages,
314 };
315 
316 static const struct drm_i915_gem_object_ops fake_ops_single = {
317 	.name = "fake-gem",
318 	.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
319 	.get_pages = fake_get_huge_pages_single,
320 	.put_pages = fake_put_huge_pages,
321 };
322 
323 static struct drm_i915_gem_object *
324 fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
325 {
326 	static struct lock_class_key lock_class;
327 	struct drm_i915_gem_object *obj;
328 
329 	GEM_BUG_ON(!size);
330 	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
331 
332 	if (size >> PAGE_SHIFT > UINT_MAX)
333 		return ERR_PTR(-E2BIG);
334 
335 	if (overflows_type(size, obj->base.size))
336 		return ERR_PTR(-E2BIG);
337 
338 	obj = i915_gem_object_alloc();
339 	if (!obj)
340 		return ERR_PTR(-ENOMEM);
341 
342 	drm_gem_private_object_init(&i915->drm, &obj->base, size);
343 
344 	if (single)
345 		i915_gem_object_init(obj, &fake_ops_single, &lock_class, 0);
346 	else
347 		i915_gem_object_init(obj, &fake_ops, &lock_class, 0);
348 
349 	i915_gem_object_set_volatile(obj);
350 
351 	obj->write_domain = I915_GEM_DOMAIN_CPU;
352 	obj->read_domains = I915_GEM_DOMAIN_CPU;
353 	obj->cache_level = I915_CACHE_NONE;
354 
355 	return obj;
356 }
357 
358 static int igt_check_page_sizes(struct i915_vma *vma)
359 {
360 	struct drm_i915_private *i915 = vma->vm->i915;
361 	unsigned int supported = RUNTIME_INFO(i915)->page_sizes;
362 	struct drm_i915_gem_object *obj = vma->obj;
363 	int err;
364 
365 	/* We have to wait for the async bind to complete before our asserts */
366 	err = i915_vma_sync(vma);
367 	if (err)
368 		return err;
369 
370 	if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
371 		pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
372 		       vma->page_sizes.sg & ~supported, supported);
373 		err = -EINVAL;
374 	}
375 
376 	if (!HAS_PAGE_SIZES(i915, vma->resource->page_sizes_gtt)) {
377 		pr_err("unsupported page_sizes.gtt=%u, supported=%u\n",
378 		       vma->resource->page_sizes_gtt & ~supported, supported);
379 		err = -EINVAL;
380 	}
381 
382 	if (vma->page_sizes.phys != obj->mm.page_sizes.phys) {
383 		pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n",
384 		       vma->page_sizes.phys, obj->mm.page_sizes.phys);
385 		err = -EINVAL;
386 	}
387 
388 	if (vma->page_sizes.sg != obj->mm.page_sizes.sg) {
389 		pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n",
390 		       vma->page_sizes.sg, obj->mm.page_sizes.sg);
391 		err = -EINVAL;
392 	}
393 
394 	/*
395 	 * The dma-api is like a box of chocolates when it comes to the
396 	 * alignment of dma addresses, however for LMEM we have total control
397 	 * and so can guarantee alignment, likewise when we allocate our blocks
398 	 * they should appear in descending order, and if we know that we align
399 	 * to the largest page size for the GTT address, we should be able to
400 	 * assert that if we see 2M physical pages then we should also get 2M
401 	 * GTT pages. If we don't then something might be wrong in our
402 	 * construction of the backing pages.
403 	 *
404 	 * Maintaining alignment is required to utilise huge pages in the ppGGT.
405 	 */
406 	if (i915_gem_object_is_lmem(obj) &&
407 	    IS_ALIGNED(vma->node.start, SZ_2M) &&
408 	    vma->page_sizes.sg & SZ_2M &&
409 	    vma->resource->page_sizes_gtt < SZ_2M) {
410 		pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n",
411 		       vma->page_sizes.sg, vma->resource->page_sizes_gtt);
412 		err = -EINVAL;
413 	}
414 
415 	return err;
416 }
417 
418 static int igt_mock_exhaust_device_supported_pages(void *arg)
419 {
420 	struct i915_ppgtt *ppgtt = arg;
421 	struct drm_i915_private *i915 = ppgtt->vm.i915;
422 	unsigned int saved_mask = RUNTIME_INFO(i915)->page_sizes;
423 	struct drm_i915_gem_object *obj;
424 	struct i915_vma *vma;
425 	int i, j, single;
426 	int err;
427 
428 	/*
429 	 * Sanity check creating objects with every valid page support
430 	 * combination for our mock device.
431 	 */
432 
433 	for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) {
434 		unsigned int combination = SZ_4K; /* Required for ppGTT */
435 
436 		for (j = 0; j < ARRAY_SIZE(page_sizes); j++) {
437 			if (i & BIT(j))
438 				combination |= page_sizes[j];
439 		}
440 
441 		RUNTIME_INFO(i915)->page_sizes = combination;
442 
443 		for (single = 0; single <= 1; ++single) {
444 			obj = fake_huge_pages_object(i915, combination, !!single);
445 			if (IS_ERR(obj)) {
446 				err = PTR_ERR(obj);
447 				goto out_device;
448 			}
449 
450 			if (obj->base.size != combination) {
451 				pr_err("obj->base.size=%zu, expected=%u\n",
452 				       obj->base.size, combination);
453 				err = -EINVAL;
454 				goto out_put;
455 			}
456 
457 			vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
458 			if (IS_ERR(vma)) {
459 				err = PTR_ERR(vma);
460 				goto out_put;
461 			}
462 
463 			err = i915_vma_pin(vma, 0, 0, PIN_USER);
464 			if (err)
465 				goto out_put;
466 
467 			err = igt_check_page_sizes(vma);
468 
469 			if (vma->page_sizes.sg != combination) {
470 				pr_err("page_sizes.sg=%u, expected=%u\n",
471 				       vma->page_sizes.sg, combination);
472 				err = -EINVAL;
473 			}
474 
475 			i915_vma_unpin(vma);
476 			i915_gem_object_put(obj);
477 
478 			if (err)
479 				goto out_device;
480 		}
481 	}
482 
483 	goto out_device;
484 
485 out_put:
486 	i915_gem_object_put(obj);
487 out_device:
488 	RUNTIME_INFO(i915)->page_sizes = saved_mask;
489 
490 	return err;
491 }
492 
493 static int igt_mock_memory_region_huge_pages(void *arg)
494 {
495 	const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS };
496 	struct i915_ppgtt *ppgtt = arg;
497 	struct drm_i915_private *i915 = ppgtt->vm.i915;
498 	unsigned long supported = RUNTIME_INFO(i915)->page_sizes;
499 	struct intel_memory_region *mem;
500 	struct drm_i915_gem_object *obj;
501 	struct i915_vma *vma;
502 	int bit;
503 	int err = 0;
504 
505 	mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
506 	if (IS_ERR(mem)) {
507 		pr_err("%s failed to create memory region\n", __func__);
508 		return PTR_ERR(mem);
509 	}
510 
511 	for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
512 		unsigned int page_size = BIT(bit);
513 		resource_size_t phys;
514 		int i;
515 
516 		for (i = 0; i < ARRAY_SIZE(flags); ++i) {
517 			obj = i915_gem_object_create_region(mem,
518 							    page_size, page_size,
519 							    flags[i]);
520 			if (IS_ERR(obj)) {
521 				err = PTR_ERR(obj);
522 				goto out_region;
523 			}
524 
525 			vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
526 			if (IS_ERR(vma)) {
527 				err = PTR_ERR(vma);
528 				goto out_put;
529 			}
530 
531 			err = i915_vma_pin(vma, 0, 0, PIN_USER);
532 			if (err)
533 				goto out_put;
534 
535 			err = igt_check_page_sizes(vma);
536 			if (err)
537 				goto out_unpin;
538 
539 			phys = i915_gem_object_get_dma_address(obj, 0);
540 			if (!IS_ALIGNED(phys, page_size)) {
541 				pr_err("%s addr misaligned(%pa) page_size=%u\n",
542 				       __func__, &phys, page_size);
543 				err = -EINVAL;
544 				goto out_unpin;
545 			}
546 
547 			if (vma->resource->page_sizes_gtt != page_size) {
548 				pr_err("%s page_sizes.gtt=%u, expected=%u\n",
549 				       __func__, vma->resource->page_sizes_gtt,
550 				       page_size);
551 				err = -EINVAL;
552 				goto out_unpin;
553 			}
554 
555 			i915_vma_unpin(vma);
556 			__i915_gem_object_put_pages(obj);
557 			i915_gem_object_put(obj);
558 		}
559 	}
560 
561 	goto out_region;
562 
563 out_unpin:
564 	i915_vma_unpin(vma);
565 out_put:
566 	i915_gem_object_put(obj);
567 out_region:
568 	intel_memory_region_destroy(mem);
569 	return err;
570 }
571 
572 static int igt_mock_ppgtt_misaligned_dma(void *arg)
573 {
574 	struct i915_ppgtt *ppgtt = arg;
575 	struct drm_i915_private *i915 = ppgtt->vm.i915;
576 	unsigned long supported = RUNTIME_INFO(i915)->page_sizes;
577 	struct drm_i915_gem_object *obj;
578 	int bit;
579 	int err;
580 
581 	/*
582 	 * Sanity check dma misalignment for huge pages -- the dma addresses we
583 	 * insert into the paging structures need to always respect the page
584 	 * size alignment.
585 	 */
586 
587 	bit = ilog2(I915_GTT_PAGE_SIZE_64K);
588 
589 	for_each_set_bit_from(bit, &supported,
590 			      ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
591 		IGT_TIMEOUT(end_time);
592 		unsigned int page_size = BIT(bit);
593 		unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
594 		unsigned int offset;
595 		unsigned int size =
596 			round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1;
597 		struct i915_vma *vma;
598 
599 		obj = fake_huge_pages_object(i915, size, true);
600 		if (IS_ERR(obj))
601 			return PTR_ERR(obj);
602 
603 		if (obj->base.size != size) {
604 			pr_err("obj->base.size=%zu, expected=%u\n",
605 			       obj->base.size, size);
606 			err = -EINVAL;
607 			goto out_put;
608 		}
609 
610 		err = i915_gem_object_pin_pages_unlocked(obj);
611 		if (err)
612 			goto out_put;
613 
614 		/* Force the page size for this object */
615 		obj->mm.page_sizes.sg = page_size;
616 
617 		vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
618 		if (IS_ERR(vma)) {
619 			err = PTR_ERR(vma);
620 			goto out_unpin;
621 		}
622 
623 		err = i915_vma_pin(vma, 0, 0, flags);
624 		if (err)
625 			goto out_unpin;
626 
627 
628 		err = igt_check_page_sizes(vma);
629 
630 		if (vma->resource->page_sizes_gtt != page_size) {
631 			pr_err("page_sizes.gtt=%u, expected %u\n",
632 			       vma->resource->page_sizes_gtt, page_size);
633 			err = -EINVAL;
634 		}
635 
636 		i915_vma_unpin(vma);
637 
638 		if (err)
639 			goto out_unpin;
640 
641 		/*
642 		 * Try all the other valid offsets until the next
643 		 * boundary -- should always fall back to using 4K
644 		 * pages.
645 		 */
646 		for (offset = 4096; offset < page_size; offset += 4096) {
647 			err = i915_vma_unbind_unlocked(vma);
648 			if (err)
649 				goto out_unpin;
650 
651 			err = i915_vma_pin(vma, 0, 0, flags | offset);
652 			if (err)
653 				goto out_unpin;
654 
655 			err = igt_check_page_sizes(vma);
656 
657 			if (vma->resource->page_sizes_gtt != I915_GTT_PAGE_SIZE_4K) {
658 				pr_err("page_sizes.gtt=%u, expected %llu\n",
659 				       vma->resource->page_sizes_gtt,
660 				       I915_GTT_PAGE_SIZE_4K);
661 				err = -EINVAL;
662 			}
663 
664 			i915_vma_unpin(vma);
665 
666 			if (err)
667 				goto out_unpin;
668 
669 			if (igt_timeout(end_time,
670 					"%s timed out at offset %x with page-size %x\n",
671 					__func__, offset, page_size))
672 				break;
673 		}
674 
675 		i915_gem_object_lock(obj, NULL);
676 		i915_gem_object_unpin_pages(obj);
677 		__i915_gem_object_put_pages(obj);
678 		i915_gem_object_unlock(obj);
679 		i915_gem_object_put(obj);
680 	}
681 
682 	return 0;
683 
684 out_unpin:
685 	i915_gem_object_lock(obj, NULL);
686 	i915_gem_object_unpin_pages(obj);
687 	i915_gem_object_unlock(obj);
688 out_put:
689 	i915_gem_object_put(obj);
690 
691 	return err;
692 }
693 
694 static void close_object_list(struct list_head *objects,
695 			      struct i915_ppgtt *ppgtt)
696 {
697 	struct drm_i915_gem_object *obj, *on;
698 
699 	list_for_each_entry_safe(obj, on, objects, st_link) {
700 		list_del(&obj->st_link);
701 		i915_gem_object_lock(obj, NULL);
702 		i915_gem_object_unpin_pages(obj);
703 		__i915_gem_object_put_pages(obj);
704 		i915_gem_object_unlock(obj);
705 		i915_gem_object_put(obj);
706 	}
707 }
708 
709 static int igt_mock_ppgtt_huge_fill(void *arg)
710 {
711 	struct i915_ppgtt *ppgtt = arg;
712 	struct drm_i915_private *i915 = ppgtt->vm.i915;
713 	unsigned long max_pages = ppgtt->vm.total >> PAGE_SHIFT;
714 	unsigned long page_num;
715 	bool single = false;
716 	LIST_HEAD(objects);
717 	IGT_TIMEOUT(end_time);
718 	int err = -ENODEV;
719 
720 	for_each_prime_number_from(page_num, 1, max_pages) {
721 		struct drm_i915_gem_object *obj;
722 		u64 size = page_num << PAGE_SHIFT;
723 		struct i915_vma *vma;
724 		unsigned int expected_gtt = 0;
725 		int i;
726 
727 		obj = fake_huge_pages_object(i915, size, single);
728 		if (IS_ERR(obj)) {
729 			err = PTR_ERR(obj);
730 			break;
731 		}
732 
733 		if (obj->base.size != size) {
734 			pr_err("obj->base.size=%zd, expected=%llu\n",
735 			       obj->base.size, size);
736 			i915_gem_object_put(obj);
737 			err = -EINVAL;
738 			break;
739 		}
740 
741 		err = i915_gem_object_pin_pages_unlocked(obj);
742 		if (err) {
743 			i915_gem_object_put(obj);
744 			break;
745 		}
746 
747 		list_add(&obj->st_link, &objects);
748 
749 		vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
750 		if (IS_ERR(vma)) {
751 			err = PTR_ERR(vma);
752 			break;
753 		}
754 
755 		err = i915_vma_pin(vma, 0, 0, PIN_USER);
756 		if (err)
757 			break;
758 
759 		err = igt_check_page_sizes(vma);
760 		if (err) {
761 			i915_vma_unpin(vma);
762 			break;
763 		}
764 
765 		/*
766 		 * Figure out the expected gtt page size knowing that we go from
767 		 * largest to smallest page size sg chunks, and that we align to
768 		 * the largest page size.
769 		 */
770 		for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
771 			unsigned int page_size = page_sizes[i];
772 
773 			if (HAS_PAGE_SIZES(i915, page_size) &&
774 			    size >= page_size) {
775 				expected_gtt |= page_size;
776 				size &= page_size-1;
777 			}
778 		}
779 
780 		GEM_BUG_ON(!expected_gtt);
781 		GEM_BUG_ON(size);
782 
783 		if (expected_gtt & I915_GTT_PAGE_SIZE_4K)
784 			expected_gtt &= ~I915_GTT_PAGE_SIZE_64K;
785 
786 		i915_vma_unpin(vma);
787 
788 		if (vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
789 			if (!IS_ALIGNED(vma->node.start,
790 					I915_GTT_PAGE_SIZE_2M)) {
791 				pr_err("node.start(%llx) not aligned to 2M\n",
792 				       vma->node.start);
793 				err = -EINVAL;
794 				break;
795 			}
796 
797 			if (!IS_ALIGNED(vma->node.size,
798 					I915_GTT_PAGE_SIZE_2M)) {
799 				pr_err("node.size(%llx) not aligned to 2M\n",
800 				       vma->node.size);
801 				err = -EINVAL;
802 				break;
803 			}
804 		}
805 
806 		if (vma->resource->page_sizes_gtt != expected_gtt) {
807 			pr_err("gtt=%u, expected=%u, size=%zd, single=%s\n",
808 			       vma->resource->page_sizes_gtt, expected_gtt,
809 			       obj->base.size, str_yes_no(!!single));
810 			err = -EINVAL;
811 			break;
812 		}
813 
814 		if (igt_timeout(end_time,
815 				"%s timed out at size %zd\n",
816 				__func__, obj->base.size))
817 			break;
818 
819 		single = !single;
820 	}
821 
822 	close_object_list(&objects, ppgtt);
823 
824 	if (err == -ENOMEM || err == -ENOSPC)
825 		err = 0;
826 
827 	return err;
828 }
829 
830 static int igt_mock_ppgtt_64K(void *arg)
831 {
832 	struct i915_ppgtt *ppgtt = arg;
833 	struct drm_i915_private *i915 = ppgtt->vm.i915;
834 	struct drm_i915_gem_object *obj;
835 	const struct object_info {
836 		unsigned int size;
837 		unsigned int gtt;
838 		unsigned int offset;
839 	} objects[] = {
840 		/* Cases with forced padding/alignment */
841 		{
842 			.size = SZ_64K,
843 			.gtt = I915_GTT_PAGE_SIZE_64K,
844 			.offset = 0,
845 		},
846 		{
847 			.size = SZ_64K + SZ_4K,
848 			.gtt = I915_GTT_PAGE_SIZE_4K,
849 			.offset = 0,
850 		},
851 		{
852 			.size = SZ_64K - SZ_4K,
853 			.gtt = I915_GTT_PAGE_SIZE_4K,
854 			.offset = 0,
855 		},
856 		{
857 			.size = SZ_2M,
858 			.gtt = I915_GTT_PAGE_SIZE_64K,
859 			.offset = 0,
860 		},
861 		{
862 			.size = SZ_2M - SZ_4K,
863 			.gtt = I915_GTT_PAGE_SIZE_4K,
864 			.offset = 0,
865 		},
866 		{
867 			.size = SZ_2M + SZ_4K,
868 			.gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K,
869 			.offset = 0,
870 		},
871 		{
872 			.size = SZ_2M + SZ_64K,
873 			.gtt = I915_GTT_PAGE_SIZE_64K,
874 			.offset = 0,
875 		},
876 		{
877 			.size = SZ_2M - SZ_64K,
878 			.gtt = I915_GTT_PAGE_SIZE_64K,
879 			.offset = 0,
880 		},
881 		/* Try without any forced padding/alignment */
882 		{
883 			.size = SZ_64K,
884 			.offset = SZ_2M,
885 			.gtt = I915_GTT_PAGE_SIZE_4K,
886 		},
887 		{
888 			.size = SZ_128K,
889 			.offset = SZ_2M - SZ_64K,
890 			.gtt = I915_GTT_PAGE_SIZE_4K,
891 		},
892 	};
893 	struct i915_vma *vma;
894 	int i, single;
895 	int err;
896 
897 	/*
898 	 * Sanity check some of the trickiness with 64K pages -- either we can
899 	 * safely mark the whole page-table(2M block) as 64K, or we have to
900 	 * always fallback to 4K.
901 	 */
902 
903 	if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K))
904 		return 0;
905 
906 	for (i = 0; i < ARRAY_SIZE(objects); ++i) {
907 		unsigned int size = objects[i].size;
908 		unsigned int expected_gtt = objects[i].gtt;
909 		unsigned int offset = objects[i].offset;
910 		unsigned int flags = PIN_USER;
911 
912 		for (single = 0; single <= 1; single++) {
913 			obj = fake_huge_pages_object(i915, size, !!single);
914 			if (IS_ERR(obj))
915 				return PTR_ERR(obj);
916 
917 			err = i915_gem_object_pin_pages_unlocked(obj);
918 			if (err)
919 				goto out_object_put;
920 
921 			/*
922 			 * Disable 2M pages -- We only want to use 64K/4K pages
923 			 * for this test.
924 			 */
925 			obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M;
926 
927 			vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
928 			if (IS_ERR(vma)) {
929 				err = PTR_ERR(vma);
930 				goto out_object_unpin;
931 			}
932 
933 			if (offset)
934 				flags |= PIN_OFFSET_FIXED | offset;
935 
936 			err = i915_vma_pin(vma, 0, 0, flags);
937 			if (err)
938 				goto out_object_unpin;
939 
940 			err = igt_check_page_sizes(vma);
941 			if (err)
942 				goto out_vma_unpin;
943 
944 			if (!offset && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
945 				if (!IS_ALIGNED(vma->node.start,
946 						I915_GTT_PAGE_SIZE_2M)) {
947 					pr_err("node.start(%llx) not aligned to 2M\n",
948 					       vma->node.start);
949 					err = -EINVAL;
950 					goto out_vma_unpin;
951 				}
952 
953 				if (!IS_ALIGNED(vma->node.size,
954 						I915_GTT_PAGE_SIZE_2M)) {
955 					pr_err("node.size(%llx) not aligned to 2M\n",
956 					       vma->node.size);
957 					err = -EINVAL;
958 					goto out_vma_unpin;
959 				}
960 			}
961 
962 			if (vma->resource->page_sizes_gtt != expected_gtt) {
963 				pr_err("gtt=%u, expected=%u, i=%d, single=%s\n",
964 				       vma->resource->page_sizes_gtt,
965 				       expected_gtt, i, str_yes_no(!!single));
966 				err = -EINVAL;
967 				goto out_vma_unpin;
968 			}
969 
970 			i915_vma_unpin(vma);
971 			i915_gem_object_lock(obj, NULL);
972 			i915_gem_object_unpin_pages(obj);
973 			__i915_gem_object_put_pages(obj);
974 			i915_gem_object_unlock(obj);
975 			i915_gem_object_put(obj);
976 
977 			i915_gem_drain_freed_objects(i915);
978 		}
979 	}
980 
981 	return 0;
982 
983 out_vma_unpin:
984 	i915_vma_unpin(vma);
985 out_object_unpin:
986 	i915_gem_object_lock(obj, NULL);
987 	i915_gem_object_unpin_pages(obj);
988 	i915_gem_object_unlock(obj);
989 out_object_put:
990 	i915_gem_object_put(obj);
991 
992 	return err;
993 }
994 
995 static int gpu_write(struct intel_context *ce,
996 		     struct i915_vma *vma,
997 		     u32 dw,
998 		     u32 val)
999 {
1000 	int err;
1001 
1002 	i915_gem_object_lock(vma->obj, NULL);
1003 	err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
1004 	i915_gem_object_unlock(vma->obj);
1005 	if (err)
1006 		return err;
1007 
1008 	return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32),
1009 			       vma->size >> PAGE_SHIFT, val);
1010 }
1011 
1012 static int
1013 __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
1014 {
1015 	unsigned int needs_flush;
1016 	unsigned long n;
1017 	int err;
1018 
1019 	i915_gem_object_lock(obj, NULL);
1020 	err = i915_gem_object_prepare_read(obj, &needs_flush);
1021 	if (err)
1022 		goto err_unlock;
1023 
1024 	for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
1025 		u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n));
1026 
1027 		if (needs_flush & CLFLUSH_BEFORE)
1028 			drm_clflush_virt_range(ptr, PAGE_SIZE);
1029 
1030 		if (ptr[dword] != val) {
1031 			pr_err("n=%lu ptr[%u]=%u, val=%u\n",
1032 			       n, dword, ptr[dword], val);
1033 			kunmap_atomic(ptr);
1034 			err = -EINVAL;
1035 			break;
1036 		}
1037 
1038 		kunmap_atomic(ptr);
1039 	}
1040 
1041 	i915_gem_object_finish_access(obj);
1042 err_unlock:
1043 	i915_gem_object_unlock(obj);
1044 
1045 	return err;
1046 }
1047 
1048 static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val)
1049 {
1050 	unsigned long n = obj->base.size >> PAGE_SHIFT;
1051 	u32 *ptr;
1052 	int err;
1053 
1054 	err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
1055 	if (err)
1056 		return err;
1057 
1058 	ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
1059 	if (IS_ERR(ptr))
1060 		return PTR_ERR(ptr);
1061 
1062 	ptr += dword;
1063 	while (n--) {
1064 		if (*ptr != val) {
1065 			pr_err("base[%u]=%08x, val=%08x\n",
1066 			       dword, *ptr, val);
1067 			err = -EINVAL;
1068 			break;
1069 		}
1070 
1071 		ptr += PAGE_SIZE / sizeof(*ptr);
1072 	}
1073 
1074 	i915_gem_object_unpin_map(obj);
1075 	return err;
1076 }
1077 
1078 static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
1079 {
1080 	if (i915_gem_object_has_struct_page(obj))
1081 		return __cpu_check_shmem(obj, dword, val);
1082 	else
1083 		return __cpu_check_vmap(obj, dword, val);
1084 }
1085 
1086 static int __igt_write_huge(struct intel_context *ce,
1087 			    struct drm_i915_gem_object *obj,
1088 			    u64 size, u64 offset,
1089 			    u32 dword, u32 val)
1090 {
1091 	unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
1092 	struct i915_vma *vma;
1093 	int err;
1094 
1095 	vma = i915_vma_instance(obj, ce->vm, NULL);
1096 	if (IS_ERR(vma))
1097 		return PTR_ERR(vma);
1098 
1099 	err = i915_vma_pin(vma, size, 0, flags | offset);
1100 	if (err) {
1101 		/*
1102 		 * The ggtt may have some pages reserved so
1103 		 * refrain from erroring out.
1104 		 */
1105 		if (err == -ENOSPC && i915_is_ggtt(ce->vm))
1106 			err = 0;
1107 
1108 		return err;
1109 	}
1110 
1111 	err = igt_check_page_sizes(vma);
1112 	if (err)
1113 		goto out_vma_unpin;
1114 
1115 	err = gpu_write(ce, vma, dword, val);
1116 	if (err) {
1117 		pr_err("gpu-write failed at offset=%llx\n", offset);
1118 		goto out_vma_unpin;
1119 	}
1120 
1121 	err = cpu_check(obj, dword, val);
1122 	if (err) {
1123 		pr_err("cpu-check failed at offset=%llx\n", offset);
1124 		goto out_vma_unpin;
1125 	}
1126 
1127 out_vma_unpin:
1128 	i915_vma_unpin(vma);
1129 	return err;
1130 }
1131 
1132 static int igt_write_huge(struct drm_i915_private *i915,
1133 			  struct drm_i915_gem_object *obj)
1134 {
1135 	struct i915_gem_engines *engines;
1136 	struct i915_gem_engines_iter it;
1137 	struct intel_context *ce;
1138 	I915_RND_STATE(prng);
1139 	IGT_TIMEOUT(end_time);
1140 	unsigned int max_page_size;
1141 	unsigned int count;
1142 	struct i915_gem_context *ctx;
1143 	struct file *file;
1144 	u64 max;
1145 	u64 num;
1146 	u64 size;
1147 	int *order;
1148 	int i, n;
1149 	int err = 0;
1150 
1151 	file = mock_file(i915);
1152 	if (IS_ERR(file))
1153 		return PTR_ERR(file);
1154 
1155 	ctx = hugepage_ctx(i915, file);
1156 	if (IS_ERR(ctx)) {
1157 		err = PTR_ERR(ctx);
1158 		goto out;
1159 	}
1160 
1161 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
1162 
1163 	size = obj->base.size;
1164 	if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
1165 	    !HAS_64K_PAGES(i915))
1166 		size = round_up(size, I915_GTT_PAGE_SIZE_2M);
1167 
1168 	n = 0;
1169 	count = 0;
1170 	max = U64_MAX;
1171 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
1172 		count++;
1173 		if (!intel_engine_can_store_dword(ce->engine))
1174 			continue;
1175 
1176 		max = min(max, ce->vm->total);
1177 		n++;
1178 	}
1179 	i915_gem_context_unlock_engines(ctx);
1180 	if (!n)
1181 		goto out;
1182 
1183 	/*
1184 	 * To keep things interesting when alternating between engines in our
1185 	 * randomized order, lets also make feeding to the same engine a few
1186 	 * times in succession a possibility by enlarging the permutation array.
1187 	 */
1188 	order = i915_random_order(count * count, &prng);
1189 	if (!order)
1190 		return -ENOMEM;
1191 
1192 	max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
1193 	max = div_u64(max - size, max_page_size);
1194 
1195 	/*
1196 	 * Try various offsets in an ascending/descending fashion until we
1197 	 * timeout -- we want to avoid issues hidden by effectively always using
1198 	 * offset = 0.
1199 	 */
1200 	i = 0;
1201 	engines = i915_gem_context_lock_engines(ctx);
1202 	for_each_prime_number_from(num, 0, max) {
1203 		u64 offset_low = num * max_page_size;
1204 		u64 offset_high = (max - num) * max_page_size;
1205 		u32 dword = offset_in_page(num) / 4;
1206 		struct intel_context *ce;
1207 
1208 		ce = engines->engines[order[i] % engines->num_engines];
1209 		i = (i + 1) % (count * count);
1210 		if (!ce || !intel_engine_can_store_dword(ce->engine))
1211 			continue;
1212 
1213 		/*
1214 		 * In order to utilize 64K pages we need to both pad the vma
1215 		 * size and ensure the vma offset is at the start of the pt
1216 		 * boundary, however to improve coverage we opt for testing both
1217 		 * aligned and unaligned offsets.
1218 		 *
1219 		 * With PS64 this is no longer the case, but to ensure we
1220 		 * sometimes get the compact layout for smaller objects, apply
1221 		 * the round_up anyway.
1222 		 */
1223 		if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
1224 			offset_low = round_down(offset_low,
1225 						I915_GTT_PAGE_SIZE_2M);
1226 
1227 		err = __igt_write_huge(ce, obj, size, offset_low,
1228 				       dword, num + 1);
1229 		if (err)
1230 			break;
1231 
1232 		err = __igt_write_huge(ce, obj, size, offset_high,
1233 				       dword, num + 1);
1234 		if (err)
1235 			break;
1236 
1237 		if (igt_timeout(end_time,
1238 				"%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
1239 				__func__, ce->engine->name, offset_low, offset_high,
1240 				max_page_size))
1241 			break;
1242 	}
1243 	i915_gem_context_unlock_engines(ctx);
1244 
1245 	kfree(order);
1246 
1247 out:
1248 	fput(file);
1249 	return err;
1250 }
1251 
1252 typedef struct drm_i915_gem_object *
1253 (*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags);
1254 
1255 static inline bool igt_can_allocate_thp(struct drm_i915_private *i915)
1256 {
1257 	return i915->mm.gemfs && has_transparent_hugepage();
1258 }
1259 
1260 static struct drm_i915_gem_object *
1261 igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags)
1262 {
1263 	if (!igt_can_allocate_thp(i915)) {
1264 		pr_info("%s missing THP support, skipping\n", __func__);
1265 		return ERR_PTR(-ENODEV);
1266 	}
1267 
1268 	return i915_gem_object_create_shmem(i915, size);
1269 }
1270 
1271 static struct drm_i915_gem_object *
1272 igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags)
1273 {
1274 	return i915_gem_object_create_internal(i915, size);
1275 }
1276 
1277 static struct drm_i915_gem_object *
1278 igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags)
1279 {
1280 	return huge_pages_object(i915, size, size);
1281 }
1282 
1283 static struct drm_i915_gem_object *
1284 igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags)
1285 {
1286 	return i915_gem_object_create_lmem(i915, size, flags);
1287 }
1288 
1289 static u32 igt_random_size(struct rnd_state *prng,
1290 			   u32 min_page_size,
1291 			   u32 max_page_size)
1292 {
1293 	u64 mask;
1294 	u32 size;
1295 
1296 	GEM_BUG_ON(!is_power_of_2(min_page_size));
1297 	GEM_BUG_ON(!is_power_of_2(max_page_size));
1298 	GEM_BUG_ON(min_page_size < PAGE_SIZE);
1299 	GEM_BUG_ON(min_page_size > max_page_size);
1300 
1301 	mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK;
1302 	size = prandom_u32_state(prng) & mask;
1303 	if (size < min_page_size)
1304 		size |= min_page_size;
1305 
1306 	return size;
1307 }
1308 
1309 static int igt_ppgtt_smoke_huge(void *arg)
1310 {
1311 	struct drm_i915_private *i915 = arg;
1312 	struct drm_i915_gem_object *obj;
1313 	I915_RND_STATE(prng);
1314 	struct {
1315 		igt_create_fn fn;
1316 		u32 min;
1317 		u32 max;
1318 	} backends[] = {
1319 		{ igt_create_internal, SZ_64K, SZ_2M,  },
1320 		{ igt_create_shmem,    SZ_64K, SZ_32M, },
1321 		{ igt_create_local,    SZ_64K, SZ_1G,  },
1322 	};
1323 	int err;
1324 	int i;
1325 
1326 	/*
1327 	 * Sanity check that the HW uses huge pages correctly through our
1328 	 * various backends -- ensure that our writes land in the right place.
1329 	 */
1330 
1331 	for (i = 0; i < ARRAY_SIZE(backends); ++i) {
1332 		u32 min = backends[i].min;
1333 		u32 max = backends[i].max;
1334 		u32 size = max;
1335 
1336 try_again:
1337 		size = igt_random_size(&prng, min, rounddown_pow_of_two(size));
1338 
1339 		obj = backends[i].fn(i915, size, 0);
1340 		if (IS_ERR(obj)) {
1341 			err = PTR_ERR(obj);
1342 			if (err == -E2BIG) {
1343 				size >>= 1;
1344 				goto try_again;
1345 			} else if (err == -ENODEV) {
1346 				err = 0;
1347 				continue;
1348 			}
1349 
1350 			return err;
1351 		}
1352 
1353 		err = i915_gem_object_pin_pages_unlocked(obj);
1354 		if (err) {
1355 			if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
1356 				i915_gem_object_put(obj);
1357 				size >>= 1;
1358 				goto try_again;
1359 			}
1360 			goto out_put;
1361 		}
1362 
1363 		if (obj->mm.page_sizes.phys < min) {
1364 			pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n",
1365 				__func__, size, i);
1366 			err = -ENOMEM;
1367 			goto out_unpin;
1368 		}
1369 
1370 		err = igt_write_huge(i915, obj);
1371 		if (err) {
1372 			pr_err("%s write-huge failed with size=%u, i=%d\n",
1373 			       __func__, size, i);
1374 		}
1375 out_unpin:
1376 		i915_gem_object_lock(obj, NULL);
1377 		i915_gem_object_unpin_pages(obj);
1378 		__i915_gem_object_put_pages(obj);
1379 		i915_gem_object_unlock(obj);
1380 out_put:
1381 		i915_gem_object_put(obj);
1382 
1383 		if (err == -ENOMEM || err == -ENXIO)
1384 			err = 0;
1385 
1386 		if (err)
1387 			break;
1388 
1389 		cond_resched();
1390 	}
1391 
1392 	return err;
1393 }
1394 
1395 static int igt_ppgtt_sanity_check(void *arg)
1396 {
1397 	struct drm_i915_private *i915 = arg;
1398 	unsigned int supported = RUNTIME_INFO(i915)->page_sizes;
1399 	struct {
1400 		igt_create_fn fn;
1401 		unsigned int flags;
1402 	} backends[] = {
1403 		{ igt_create_system, 0,                        },
1404 		{ igt_create_local,  0,                        },
1405 		{ igt_create_local,  I915_BO_ALLOC_CONTIGUOUS, },
1406 	};
1407 	struct {
1408 		u32 size;
1409 		u32 pages;
1410 	} combos[] = {
1411 		{ SZ_64K,		SZ_64K		},
1412 		{ SZ_2M,		SZ_2M		},
1413 		{ SZ_2M,		SZ_64K		},
1414 		{ SZ_2M - SZ_64K,	SZ_64K		},
1415 		{ SZ_2M - SZ_4K,	SZ_64K | SZ_4K	},
1416 		{ SZ_2M + SZ_4K,	SZ_64K | SZ_4K	},
1417 		{ SZ_2M + SZ_4K,	SZ_2M  | SZ_4K	},
1418 		{ SZ_2M + SZ_64K,	SZ_2M  | SZ_64K },
1419 		{ SZ_2M + SZ_64K,	SZ_64K		},
1420 	};
1421 	int i, j;
1422 	int err;
1423 
1424 	if (supported == I915_GTT_PAGE_SIZE_4K)
1425 		return 0;
1426 
1427 	/*
1428 	 * Sanity check that the HW behaves with a limited set of combinations.
1429 	 * We already have a bunch of randomised testing, which should give us
1430 	 * a decent amount of variation between runs, however we should keep
1431 	 * this to limit the chances of introducing a temporary regression, by
1432 	 * testing the most obvious cases that might make something blow up.
1433 	 */
1434 
1435 	for (i = 0; i < ARRAY_SIZE(backends); ++i) {
1436 		for (j = 0; j < ARRAY_SIZE(combos); ++j) {
1437 			struct drm_i915_gem_object *obj;
1438 			u32 size = combos[j].size;
1439 			u32 pages = combos[j].pages;
1440 
1441 			obj = backends[i].fn(i915, size, backends[i].flags);
1442 			if (IS_ERR(obj)) {
1443 				err = PTR_ERR(obj);
1444 				if (err == -ENODEV) {
1445 					pr_info("Device lacks local memory, skipping\n");
1446 					err = 0;
1447 					break;
1448 				}
1449 
1450 				return err;
1451 			}
1452 
1453 			err = i915_gem_object_pin_pages_unlocked(obj);
1454 			if (err) {
1455 				i915_gem_object_put(obj);
1456 				goto out;
1457 			}
1458 
1459 			GEM_BUG_ON(pages > obj->base.size);
1460 			pages = pages & supported;
1461 
1462 			if (pages)
1463 				obj->mm.page_sizes.sg = pages;
1464 
1465 			err = igt_write_huge(i915, obj);
1466 
1467 			i915_gem_object_lock(obj, NULL);
1468 			i915_gem_object_unpin_pages(obj);
1469 			__i915_gem_object_put_pages(obj);
1470 			i915_gem_object_unlock(obj);
1471 			i915_gem_object_put(obj);
1472 
1473 			if (err) {
1474 				pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n",
1475 				       __func__, size, pages, i, j);
1476 				goto out;
1477 			}
1478 		}
1479 
1480 		cond_resched();
1481 	}
1482 
1483 out:
1484 	if (err == -ENOMEM)
1485 		err = 0;
1486 
1487 	return err;
1488 }
1489 
1490 static int igt_ppgtt_compact(void *arg)
1491 {
1492 	struct drm_i915_private *i915 = arg;
1493 	struct drm_i915_gem_object *obj;
1494 	int err;
1495 
1496 	/*
1497 	 * Simple test to catch issues with compact 64K pages -- since the pt is
1498 	 * compacted to 256B that gives us 32 entries per pt, however since the
1499 	 * backing page for the pt is 4K, any extra entries we might incorrectly
1500 	 * write out should be ignored by the HW. If ever hit such a case this
1501 	 * test should catch it since some of our writes would land in scratch.
1502 	 */
1503 
1504 	if (!HAS_64K_PAGES(i915)) {
1505 		pr_info("device lacks compact 64K page support, skipping\n");
1506 		return 0;
1507 	}
1508 
1509 	if (!HAS_LMEM(i915)) {
1510 		pr_info("device lacks LMEM support, skipping\n");
1511 		return 0;
1512 	}
1513 
1514 	/* We want the range to cover multiple page-table boundaries. */
1515 	obj = i915_gem_object_create_lmem(i915, SZ_4M, 0);
1516 	if (IS_ERR(obj))
1517 		return PTR_ERR(obj);
1518 
1519 	err = i915_gem_object_pin_pages_unlocked(obj);
1520 	if (err)
1521 		goto out_put;
1522 
1523 	if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
1524 		pr_info("LMEM compact unable to allocate huge-page(s)\n");
1525 		goto out_unpin;
1526 	}
1527 
1528 	/*
1529 	 * Disable 2M GTT pages by forcing the page-size to 64K for the GTT
1530 	 * insertion.
1531 	 */
1532 	obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K;
1533 
1534 	err = igt_write_huge(i915, obj);
1535 	if (err)
1536 		pr_err("LMEM compact write-huge failed\n");
1537 
1538 out_unpin:
1539 	i915_gem_object_unpin_pages(obj);
1540 out_put:
1541 	i915_gem_object_put(obj);
1542 
1543 	if (err == -ENOMEM)
1544 		err = 0;
1545 
1546 	return err;
1547 }
1548 
1549 static int igt_ppgtt_mixed(void *arg)
1550 {
1551 	struct drm_i915_private *i915 = arg;
1552 	const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
1553 	struct drm_i915_gem_object *obj, *on;
1554 	struct i915_gem_engines *engines;
1555 	struct i915_gem_engines_iter it;
1556 	struct i915_address_space *vm;
1557 	struct i915_gem_context *ctx;
1558 	struct intel_context *ce;
1559 	struct file *file;
1560 	I915_RND_STATE(prng);
1561 	LIST_HEAD(objects);
1562 	struct intel_memory_region *mr;
1563 	struct i915_vma *vma;
1564 	unsigned int count;
1565 	u32 i, addr;
1566 	int *order;
1567 	int n, err;
1568 
1569 	/*
1570 	 * Sanity check mixing 4K and 64K pages within the same page-table via
1571 	 * the new PS64 TLB hint.
1572 	 */
1573 
1574 	if (!HAS_64K_PAGES(i915)) {
1575 		pr_info("device lacks PS64, skipping\n");
1576 		return 0;
1577 	}
1578 
1579 	file = mock_file(i915);
1580 	if (IS_ERR(file))
1581 		return PTR_ERR(file);
1582 
1583 	ctx = hugepage_ctx(i915, file);
1584 	if (IS_ERR(ctx)) {
1585 		err = PTR_ERR(ctx);
1586 		goto out;
1587 	}
1588 	vm = i915_gem_context_get_eb_vm(ctx);
1589 
1590 	i = 0;
1591 	addr = 0;
1592 	do {
1593 		u32 sz;
1594 
1595 		sz = i915_prandom_u32_max_state(SZ_4M, &prng);
1596 		sz = max_t(u32, sz, SZ_4K);
1597 
1598 		mr = i915->mm.regions[INTEL_REGION_LMEM_0];
1599 		if (i & 1)
1600 			mr = i915->mm.regions[INTEL_REGION_SMEM];
1601 
1602 		obj = i915_gem_object_create_region(mr, sz, 0, 0);
1603 		if (IS_ERR(obj)) {
1604 			err = PTR_ERR(obj);
1605 			goto out_vm;
1606 		}
1607 
1608 		list_add_tail(&obj->st_link, &objects);
1609 
1610 		vma = i915_vma_instance(obj, vm, NULL);
1611 		if (IS_ERR(vma)) {
1612 			err = PTR_ERR(vma);
1613 			goto err_put;
1614 		}
1615 
1616 		addr = round_up(addr, mr->min_page_size);
1617 		err = i915_vma_pin(vma, 0, 0, addr | flags);
1618 		if (err)
1619 			goto err_put;
1620 
1621 		if (mr->type == INTEL_MEMORY_LOCAL &&
1622 		    (vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) {
1623 			err = -EINVAL;
1624 			goto err_put;
1625 		}
1626 
1627 		addr += obj->base.size;
1628 		i++;
1629 	} while (addr <= SZ_16M);
1630 
1631 	n = 0;
1632 	count = 0;
1633 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
1634 		count++;
1635 		if (!intel_engine_can_store_dword(ce->engine))
1636 			continue;
1637 
1638 		n++;
1639 	}
1640 	i915_gem_context_unlock_engines(ctx);
1641 	if (!n)
1642 		goto err_put;
1643 
1644 	order = i915_random_order(count * count, &prng);
1645 	if (!order) {
1646 		err = -ENOMEM;
1647 		goto err_put;
1648 	}
1649 
1650 	i = 0;
1651 	addr = 0;
1652 	engines = i915_gem_context_lock_engines(ctx);
1653 	list_for_each_entry(obj, &objects, st_link) {
1654 		u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng);
1655 
1656 		addr = round_up(addr, obj->mm.region->min_page_size);
1657 
1658 		ce = engines->engines[order[i] % engines->num_engines];
1659 		i = (i + 1) % (count * count);
1660 		if (!ce || !intel_engine_can_store_dword(ce->engine))
1661 			continue;
1662 
1663 		err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd);
1664 		if (err)
1665 			break;
1666 
1667 		err = __igt_write_huge(ce, obj, obj->base.size, addr,
1668 				       offset_in_page(rnd) / sizeof(u32), rnd + 1);
1669 		if (err)
1670 			break;
1671 
1672 		err = __igt_write_huge(ce, obj, obj->base.size, addr,
1673 				       (PAGE_SIZE / sizeof(u32)) - 1,
1674 				       rnd + 2);
1675 		if (err)
1676 			break;
1677 
1678 		addr += obj->base.size;
1679 
1680 		cond_resched();
1681 	}
1682 
1683 	i915_gem_context_unlock_engines(ctx);
1684 	kfree(order);
1685 err_put:
1686 	list_for_each_entry_safe(obj, on, &objects, st_link) {
1687 		list_del(&obj->st_link);
1688 		i915_gem_object_put(obj);
1689 	}
1690 out_vm:
1691 	i915_vm_put(vm);
1692 out:
1693 	fput(file);
1694 	return err;
1695 }
1696 
1697 static int igt_tmpfs_fallback(void *arg)
1698 {
1699 	struct drm_i915_private *i915 = arg;
1700 	struct i915_address_space *vm;
1701 	struct i915_gem_context *ctx;
1702 	struct vfsmount *gemfs = i915->mm.gemfs;
1703 	struct drm_i915_gem_object *obj;
1704 	struct i915_vma *vma;
1705 	struct file *file;
1706 	u32 *vaddr;
1707 	int err = 0;
1708 
1709 	file = mock_file(i915);
1710 	if (IS_ERR(file))
1711 		return PTR_ERR(file);
1712 
1713 	ctx = hugepage_ctx(i915, file);
1714 	if (IS_ERR(ctx)) {
1715 		err = PTR_ERR(ctx);
1716 		goto out;
1717 	}
1718 	vm = i915_gem_context_get_eb_vm(ctx);
1719 
1720 	/*
1721 	 * Make sure that we don't burst into a ball of flames upon falling back
1722 	 * to tmpfs, which we rely on if on the off-chance we encouter a failure
1723 	 * when setting up gemfs.
1724 	 */
1725 
1726 	i915->mm.gemfs = NULL;
1727 
1728 	obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
1729 	if (IS_ERR(obj)) {
1730 		err = PTR_ERR(obj);
1731 		goto out_restore;
1732 	}
1733 
1734 	vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
1735 	if (IS_ERR(vaddr)) {
1736 		err = PTR_ERR(vaddr);
1737 		goto out_put;
1738 	}
1739 	*vaddr = 0xdeadbeaf;
1740 
1741 	__i915_gem_object_flush_map(obj, 0, 64);
1742 	i915_gem_object_unpin_map(obj);
1743 
1744 	vma = i915_vma_instance(obj, vm, NULL);
1745 	if (IS_ERR(vma)) {
1746 		err = PTR_ERR(vma);
1747 		goto out_put;
1748 	}
1749 
1750 	err = i915_vma_pin(vma, 0, 0, PIN_USER);
1751 	if (err)
1752 		goto out_put;
1753 
1754 	err = igt_check_page_sizes(vma);
1755 
1756 	i915_vma_unpin(vma);
1757 out_put:
1758 	i915_gem_object_put(obj);
1759 out_restore:
1760 	i915->mm.gemfs = gemfs;
1761 
1762 	i915_vm_put(vm);
1763 out:
1764 	fput(file);
1765 	return err;
1766 }
1767 
1768 static int igt_shrink_thp(void *arg)
1769 {
1770 	struct drm_i915_private *i915 = arg;
1771 	struct i915_address_space *vm;
1772 	struct i915_gem_context *ctx;
1773 	struct drm_i915_gem_object *obj;
1774 	struct i915_gem_engines_iter it;
1775 	struct intel_context *ce;
1776 	struct i915_vma *vma;
1777 	struct file *file;
1778 	unsigned int flags = PIN_USER;
1779 	unsigned int n;
1780 	intel_wakeref_t wf;
1781 	bool should_swap;
1782 	int err;
1783 
1784 	if (!igt_can_allocate_thp(i915)) {
1785 		pr_info("missing THP support, skipping\n");
1786 		return 0;
1787 	}
1788 
1789 	file = mock_file(i915);
1790 	if (IS_ERR(file))
1791 		return PTR_ERR(file);
1792 
1793 	ctx = hugepage_ctx(i915, file);
1794 	if (IS_ERR(ctx)) {
1795 		err = PTR_ERR(ctx);
1796 		goto out;
1797 	}
1798 	vm = i915_gem_context_get_eb_vm(ctx);
1799 
1800 	/*
1801 	 * Sanity check shrinking huge-paged object -- make sure nothing blows
1802 	 * up.
1803 	 */
1804 
1805 	obj = i915_gem_object_create_shmem(i915, SZ_2M);
1806 	if (IS_ERR(obj)) {
1807 		err = PTR_ERR(obj);
1808 		goto out_vm;
1809 	}
1810 
1811 	vma = i915_vma_instance(obj, vm, NULL);
1812 	if (IS_ERR(vma)) {
1813 		err = PTR_ERR(vma);
1814 		goto out_put;
1815 	}
1816 
1817 	wf = intel_runtime_pm_get(&i915->runtime_pm); /* active shrink */
1818 
1819 	err = i915_vma_pin(vma, 0, 0, flags);
1820 	if (err)
1821 		goto out_wf;
1822 
1823 	if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
1824 		pr_info("failed to allocate THP, finishing test early\n");
1825 		goto out_unpin;
1826 	}
1827 
1828 	err = igt_check_page_sizes(vma);
1829 	if (err)
1830 		goto out_unpin;
1831 
1832 	n = 0;
1833 
1834 	for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
1835 		if (!intel_engine_can_store_dword(ce->engine))
1836 			continue;
1837 
1838 		err = gpu_write(ce, vma, n++, 0xdeadbeaf);
1839 		if (err)
1840 			break;
1841 	}
1842 	i915_gem_context_unlock_engines(ctx);
1843 	/*
1844 	 * Nuke everything *before* we unpin the pages so we can be reasonably
1845 	 * sure that when later checking get_nr_swap_pages() that some random
1846 	 * leftover object doesn't steal the remaining swap space.
1847 	 */
1848 	i915_gem_shrink(NULL, i915, -1UL, NULL,
1849 			I915_SHRINK_BOUND |
1850 			I915_SHRINK_UNBOUND |
1851 			I915_SHRINK_ACTIVE);
1852 	i915_vma_unpin(vma);
1853 	if (err)
1854 		goto out_put;
1855 
1856 	/*
1857 	 * Now that the pages are *unpinned* shrinking should invoke
1858 	 * shmem to truncate our pages, if we have available swap.
1859 	 */
1860 	should_swap = get_nr_swap_pages() > 0;
1861 	i915_gem_shrink(NULL, i915, -1UL, NULL,
1862 			I915_SHRINK_BOUND |
1863 			I915_SHRINK_UNBOUND |
1864 			I915_SHRINK_ACTIVE |
1865 			I915_SHRINK_WRITEBACK);
1866 	if (should_swap == i915_gem_object_has_pages(obj)) {
1867 		pr_err("unexpected pages mismatch, should_swap=%s\n",
1868 		       str_yes_no(should_swap));
1869 		err = -EINVAL;
1870 		goto out_put;
1871 	}
1872 
1873 	if (should_swap == (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys)) {
1874 		pr_err("unexpected residual page-size bits, should_swap=%s\n",
1875 		       str_yes_no(should_swap));
1876 		err = -EINVAL;
1877 		goto out_put;
1878 	}
1879 
1880 	err = i915_vma_pin(vma, 0, 0, flags);
1881 	if (err)
1882 		goto out_put;
1883 
1884 	while (n--) {
1885 		err = cpu_check(obj, n, 0xdeadbeaf);
1886 		if (err)
1887 			break;
1888 	}
1889 
1890 out_unpin:
1891 	i915_vma_unpin(vma);
1892 out_wf:
1893 	intel_runtime_pm_put(&i915->runtime_pm, wf);
1894 out_put:
1895 	i915_gem_object_put(obj);
1896 out_vm:
1897 	i915_vm_put(vm);
1898 out:
1899 	fput(file);
1900 	return err;
1901 }
1902 
1903 int i915_gem_huge_page_mock_selftests(void)
1904 {
1905 	static const struct i915_subtest tests[] = {
1906 		SUBTEST(igt_mock_exhaust_device_supported_pages),
1907 		SUBTEST(igt_mock_memory_region_huge_pages),
1908 		SUBTEST(igt_mock_ppgtt_misaligned_dma),
1909 		SUBTEST(igt_mock_ppgtt_huge_fill),
1910 		SUBTEST(igt_mock_ppgtt_64K),
1911 	};
1912 	struct drm_i915_private *dev_priv;
1913 	struct i915_ppgtt *ppgtt;
1914 	int err;
1915 
1916 	dev_priv = mock_gem_device();
1917 	if (!dev_priv)
1918 		return -ENOMEM;
1919 
1920 	/* Pretend to be a device which supports the 48b PPGTT */
1921 	RUNTIME_INFO(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL;
1922 	RUNTIME_INFO(dev_priv)->ppgtt_size = 48;
1923 
1924 	ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
1925 	if (IS_ERR(ppgtt)) {
1926 		err = PTR_ERR(ppgtt);
1927 		goto out_unlock;
1928 	}
1929 
1930 	if (!i915_vm_is_4lvl(&ppgtt->vm)) {
1931 		pr_err("failed to create 48b PPGTT\n");
1932 		err = -EINVAL;
1933 		goto out_put;
1934 	}
1935 
1936 	/* If we were ever hit this then it's time to mock the 64K scratch */
1937 	if (!i915_vm_has_scratch_64K(&ppgtt->vm)) {
1938 		pr_err("PPGTT missing 64K scratch page\n");
1939 		err = -EINVAL;
1940 		goto out_put;
1941 	}
1942 
1943 	err = i915_subtests(tests, ppgtt);
1944 
1945 out_put:
1946 	i915_vm_put(&ppgtt->vm);
1947 out_unlock:
1948 	mock_destroy_device(dev_priv);
1949 	return err;
1950 }
1951 
1952 int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
1953 {
1954 	static const struct i915_subtest tests[] = {
1955 		SUBTEST(igt_shrink_thp),
1956 		SUBTEST(igt_tmpfs_fallback),
1957 		SUBTEST(igt_ppgtt_smoke_huge),
1958 		SUBTEST(igt_ppgtt_sanity_check),
1959 		SUBTEST(igt_ppgtt_compact),
1960 		SUBTEST(igt_ppgtt_mixed),
1961 	};
1962 
1963 	if (!HAS_PPGTT(i915)) {
1964 		pr_info("PPGTT not supported, skipping live-selftests\n");
1965 		return 0;
1966 	}
1967 
1968 	if (intel_gt_is_wedged(to_gt(i915)))
1969 		return 0;
1970 
1971 	return i915_live_subtests(tests, i915);
1972 }
1973