1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 #include <linux/string_helpers.h> 9 #include <linux/swap.h> 10 11 #include "i915_selftest.h" 12 13 #include "gem/i915_gem_internal.h" 14 #include "gem/i915_gem_lmem.h" 15 #include "gem/i915_gem_pm.h" 16 #include "gem/i915_gem_region.h" 17 18 #include "gt/intel_gt.h" 19 20 #include "igt_gem_utils.h" 21 #include "mock_context.h" 22 23 #include "selftests/mock_drm.h" 24 #include "selftests/mock_gem_device.h" 25 #include "selftests/mock_region.h" 26 #include "selftests/i915_random.h" 27 28 static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915, 29 struct file *file) 30 { 31 struct i915_gem_context *ctx = live_context(i915, file); 32 struct i915_address_space *vm; 33 34 if (IS_ERR(ctx)) 35 return ctx; 36 37 vm = ctx->vm; 38 if (vm) 39 WRITE_ONCE(vm->scrub_64K, true); 40 41 return ctx; 42 } 43 44 static const unsigned int page_sizes[] = { 45 I915_GTT_PAGE_SIZE_2M, 46 I915_GTT_PAGE_SIZE_64K, 47 I915_GTT_PAGE_SIZE_4K, 48 }; 49 50 static unsigned int get_largest_page_size(struct drm_i915_private *i915, 51 u64 rem) 52 { 53 int i; 54 55 for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { 56 unsigned int page_size = page_sizes[i]; 57 58 if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size) 59 return page_size; 60 } 61 62 return 0; 63 } 64 65 static void huge_pages_free_pages(struct sg_table *st) 66 { 67 struct scatterlist *sg; 68 69 for (sg = st->sgl; sg; sg = __sg_next(sg)) { 70 if (sg_page(sg)) 71 __free_pages(sg_page(sg), get_order(sg->length)); 72 } 73 74 sg_free_table(st); 75 kfree(st); 76 } 77 78 static int get_huge_pages(struct drm_i915_gem_object *obj) 79 { 80 #define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY) 81 unsigned int page_mask = obj->mm.page_mask; 82 struct sg_table *st; 83 struct scatterlist *sg; 84 unsigned int sg_page_sizes; 85 u64 rem; 86 87 /* restricted by sg_alloc_table */ 88 if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int)) 89 return -E2BIG; 90 91 st = kmalloc(sizeof(*st), GFP); 92 if (!st) 93 return -ENOMEM; 94 95 if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { 96 kfree(st); 97 return -ENOMEM; 98 } 99 100 rem = obj->base.size; 101 sg = st->sgl; 102 st->nents = 0; 103 sg_page_sizes = 0; 104 105 /* 106 * Our goal here is simple, we want to greedily fill the object from 107 * largest to smallest page-size, while ensuring that we use *every* 108 * page-size as per the given page-mask. 109 */ 110 do { 111 unsigned int bit = ilog2(page_mask); 112 unsigned int page_size = BIT(bit); 113 int order = get_order(page_size); 114 115 do { 116 struct page *page; 117 118 GEM_BUG_ON(order > MAX_ORDER); 119 page = alloc_pages(GFP | __GFP_ZERO, order); 120 if (!page) 121 goto err; 122 123 sg_set_page(sg, page, page_size, 0); 124 sg_page_sizes |= page_size; 125 st->nents++; 126 127 rem -= page_size; 128 if (!rem) { 129 sg_mark_end(sg); 130 break; 131 } 132 133 sg = __sg_next(sg); 134 } while ((rem - ((page_size-1) & page_mask)) >= page_size); 135 136 page_mask &= (page_size-1); 137 } while (page_mask); 138 139 if (i915_gem_gtt_prepare_pages(obj, st)) 140 goto err; 141 142 GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask); 143 __i915_gem_object_set_pages(obj, st); 144 145 return 0; 146 147 err: 148 sg_set_page(sg, NULL, 0, 0); 149 sg_mark_end(sg); 150 huge_pages_free_pages(st); 151 152 return -ENOMEM; 153 } 154 155 static void put_huge_pages(struct drm_i915_gem_object *obj, 156 struct sg_table *pages) 157 { 158 i915_gem_gtt_finish_pages(obj, pages); 159 huge_pages_free_pages(pages); 160 161 obj->mm.dirty = false; 162 163 __start_cpu_write(obj); 164 } 165 166 static const struct drm_i915_gem_object_ops huge_page_ops = { 167 .name = "huge-gem", 168 .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 169 .get_pages = get_huge_pages, 170 .put_pages = put_huge_pages, 171 }; 172 173 static struct drm_i915_gem_object * 174 huge_pages_object(struct drm_i915_private *i915, 175 u64 size, 176 unsigned int page_mask) 177 { 178 static struct lock_class_key lock_class; 179 struct drm_i915_gem_object *obj; 180 unsigned int cache_level; 181 182 GEM_BUG_ON(!size); 183 GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask)))); 184 185 if (size >> PAGE_SHIFT > INT_MAX) 186 return ERR_PTR(-E2BIG); 187 188 if (overflows_type(size, obj->base.size)) 189 return ERR_PTR(-E2BIG); 190 191 obj = i915_gem_object_alloc(); 192 if (!obj) 193 return ERR_PTR(-ENOMEM); 194 195 drm_gem_private_object_init(&i915->drm, &obj->base, size); 196 i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0); 197 obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE; 198 i915_gem_object_set_volatile(obj); 199 200 obj->write_domain = I915_GEM_DOMAIN_CPU; 201 obj->read_domains = I915_GEM_DOMAIN_CPU; 202 203 cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE; 204 i915_gem_object_set_cache_coherency(obj, cache_level); 205 206 obj->mm.page_mask = page_mask; 207 208 return obj; 209 } 210 211 static int fake_get_huge_pages(struct drm_i915_gem_object *obj) 212 { 213 struct drm_i915_private *i915 = to_i915(obj->base.dev); 214 const u64 max_len = rounddown_pow_of_two(UINT_MAX); 215 struct sg_table *st; 216 struct scatterlist *sg; 217 u64 rem; 218 219 /* restricted by sg_alloc_table */ 220 if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int)) 221 return -E2BIG; 222 223 st = kmalloc(sizeof(*st), GFP); 224 if (!st) 225 return -ENOMEM; 226 227 if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) { 228 kfree(st); 229 return -ENOMEM; 230 } 231 232 /* Use optimal page sized chunks to fill in the sg table */ 233 rem = obj->base.size; 234 sg = st->sgl; 235 st->nents = 0; 236 do { 237 unsigned int page_size = get_largest_page_size(i915, rem); 238 unsigned int len = min(page_size * div_u64(rem, page_size), 239 max_len); 240 241 GEM_BUG_ON(!page_size); 242 243 sg->offset = 0; 244 sg->length = len; 245 sg_dma_len(sg) = len; 246 sg_dma_address(sg) = page_size; 247 248 st->nents++; 249 250 rem -= len; 251 if (!rem) { 252 sg_mark_end(sg); 253 break; 254 } 255 256 sg = sg_next(sg); 257 } while (1); 258 259 i915_sg_trim(st); 260 261 __i915_gem_object_set_pages(obj, st); 262 263 return 0; 264 } 265 266 static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj) 267 { 268 struct drm_i915_private *i915 = to_i915(obj->base.dev); 269 struct sg_table *st; 270 struct scatterlist *sg; 271 unsigned int page_size; 272 273 st = kmalloc(sizeof(*st), GFP); 274 if (!st) 275 return -ENOMEM; 276 277 if (sg_alloc_table(st, 1, GFP)) { 278 kfree(st); 279 return -ENOMEM; 280 } 281 282 sg = st->sgl; 283 st->nents = 1; 284 285 page_size = get_largest_page_size(i915, obj->base.size); 286 GEM_BUG_ON(!page_size); 287 288 sg->offset = 0; 289 sg->length = obj->base.size; 290 sg_dma_len(sg) = obj->base.size; 291 sg_dma_address(sg) = page_size; 292 293 __i915_gem_object_set_pages(obj, st); 294 295 return 0; 296 #undef GFP 297 } 298 299 static void fake_free_huge_pages(struct drm_i915_gem_object *obj, 300 struct sg_table *pages) 301 { 302 sg_free_table(pages); 303 kfree(pages); 304 } 305 306 static void fake_put_huge_pages(struct drm_i915_gem_object *obj, 307 struct sg_table *pages) 308 { 309 fake_free_huge_pages(obj, pages); 310 obj->mm.dirty = false; 311 } 312 313 static const struct drm_i915_gem_object_ops fake_ops = { 314 .name = "fake-gem", 315 .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 316 .get_pages = fake_get_huge_pages, 317 .put_pages = fake_put_huge_pages, 318 }; 319 320 static const struct drm_i915_gem_object_ops fake_ops_single = { 321 .name = "fake-gem", 322 .flags = I915_GEM_OBJECT_IS_SHRINKABLE, 323 .get_pages = fake_get_huge_pages_single, 324 .put_pages = fake_put_huge_pages, 325 }; 326 327 static struct drm_i915_gem_object * 328 fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single) 329 { 330 static struct lock_class_key lock_class; 331 struct drm_i915_gem_object *obj; 332 333 GEM_BUG_ON(!size); 334 GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE)); 335 336 if (size >> PAGE_SHIFT > UINT_MAX) 337 return ERR_PTR(-E2BIG); 338 339 if (overflows_type(size, obj->base.size)) 340 return ERR_PTR(-E2BIG); 341 342 obj = i915_gem_object_alloc(); 343 if (!obj) 344 return ERR_PTR(-ENOMEM); 345 346 drm_gem_private_object_init(&i915->drm, &obj->base, size); 347 348 if (single) 349 i915_gem_object_init(obj, &fake_ops_single, &lock_class, 0); 350 else 351 i915_gem_object_init(obj, &fake_ops, &lock_class, 0); 352 353 i915_gem_object_set_volatile(obj); 354 355 obj->write_domain = I915_GEM_DOMAIN_CPU; 356 obj->read_domains = I915_GEM_DOMAIN_CPU; 357 obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE); 358 359 return obj; 360 } 361 362 static int igt_check_page_sizes(struct i915_vma *vma) 363 { 364 struct drm_i915_private *i915 = vma->vm->i915; 365 unsigned int supported = RUNTIME_INFO(i915)->page_sizes; 366 struct drm_i915_gem_object *obj = vma->obj; 367 int err; 368 369 /* We have to wait for the async bind to complete before our asserts */ 370 err = i915_vma_sync(vma); 371 if (err) 372 return err; 373 374 if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) { 375 pr_err("unsupported page_sizes.sg=%u, supported=%u\n", 376 vma->page_sizes.sg & ~supported, supported); 377 err = -EINVAL; 378 } 379 380 if (!HAS_PAGE_SIZES(i915, vma->resource->page_sizes_gtt)) { 381 pr_err("unsupported page_sizes.gtt=%u, supported=%u\n", 382 vma->resource->page_sizes_gtt & ~supported, supported); 383 err = -EINVAL; 384 } 385 386 if (vma->page_sizes.phys != obj->mm.page_sizes.phys) { 387 pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n", 388 vma->page_sizes.phys, obj->mm.page_sizes.phys); 389 err = -EINVAL; 390 } 391 392 if (vma->page_sizes.sg != obj->mm.page_sizes.sg) { 393 pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n", 394 vma->page_sizes.sg, obj->mm.page_sizes.sg); 395 err = -EINVAL; 396 } 397 398 /* 399 * The dma-api is like a box of chocolates when it comes to the 400 * alignment of dma addresses, however for LMEM we have total control 401 * and so can guarantee alignment, likewise when we allocate our blocks 402 * they should appear in descending order, and if we know that we align 403 * to the largest page size for the GTT address, we should be able to 404 * assert that if we see 2M physical pages then we should also get 2M 405 * GTT pages. If we don't then something might be wrong in our 406 * construction of the backing pages. 407 * 408 * Maintaining alignment is required to utilise huge pages in the ppGGT. 409 */ 410 if (i915_gem_object_is_lmem(obj) && 411 IS_ALIGNED(i915_vma_offset(vma), SZ_2M) && 412 vma->page_sizes.sg & SZ_2M && 413 vma->resource->page_sizes_gtt < SZ_2M) { 414 pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n", 415 vma->page_sizes.sg, vma->resource->page_sizes_gtt); 416 err = -EINVAL; 417 } 418 419 return err; 420 } 421 422 static int igt_mock_exhaust_device_supported_pages(void *arg) 423 { 424 struct i915_ppgtt *ppgtt = arg; 425 struct drm_i915_private *i915 = ppgtt->vm.i915; 426 unsigned int saved_mask = RUNTIME_INFO(i915)->page_sizes; 427 struct drm_i915_gem_object *obj; 428 struct i915_vma *vma; 429 int i, j, single; 430 int err; 431 432 /* 433 * Sanity check creating objects with every valid page support 434 * combination for our mock device. 435 */ 436 437 for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) { 438 unsigned int combination = SZ_4K; /* Required for ppGTT */ 439 440 for (j = 0; j < ARRAY_SIZE(page_sizes); j++) { 441 if (i & BIT(j)) 442 combination |= page_sizes[j]; 443 } 444 445 RUNTIME_INFO(i915)->page_sizes = combination; 446 447 for (single = 0; single <= 1; ++single) { 448 obj = fake_huge_pages_object(i915, combination, !!single); 449 if (IS_ERR(obj)) { 450 err = PTR_ERR(obj); 451 goto out_device; 452 } 453 454 if (obj->base.size != combination) { 455 pr_err("obj->base.size=%zu, expected=%u\n", 456 obj->base.size, combination); 457 err = -EINVAL; 458 goto out_put; 459 } 460 461 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 462 if (IS_ERR(vma)) { 463 err = PTR_ERR(vma); 464 goto out_put; 465 } 466 467 err = i915_vma_pin(vma, 0, 0, PIN_USER); 468 if (err) 469 goto out_put; 470 471 err = igt_check_page_sizes(vma); 472 473 if (vma->page_sizes.sg != combination) { 474 pr_err("page_sizes.sg=%u, expected=%u\n", 475 vma->page_sizes.sg, combination); 476 err = -EINVAL; 477 } 478 479 i915_vma_unpin(vma); 480 i915_gem_object_put(obj); 481 482 if (err) 483 goto out_device; 484 } 485 } 486 487 goto out_device; 488 489 out_put: 490 i915_gem_object_put(obj); 491 out_device: 492 RUNTIME_INFO(i915)->page_sizes = saved_mask; 493 494 return err; 495 } 496 497 static int igt_mock_memory_region_huge_pages(void *arg) 498 { 499 const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS }; 500 struct i915_ppgtt *ppgtt = arg; 501 struct drm_i915_private *i915 = ppgtt->vm.i915; 502 unsigned long supported = RUNTIME_INFO(i915)->page_sizes; 503 struct intel_memory_region *mem; 504 struct drm_i915_gem_object *obj; 505 struct i915_vma *vma; 506 int bit; 507 int err = 0; 508 509 mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0); 510 if (IS_ERR(mem)) { 511 pr_err("%s failed to create memory region\n", __func__); 512 return PTR_ERR(mem); 513 } 514 515 for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 516 unsigned int page_size = BIT(bit); 517 resource_size_t phys; 518 int i; 519 520 for (i = 0; i < ARRAY_SIZE(flags); ++i) { 521 obj = i915_gem_object_create_region(mem, 522 page_size, page_size, 523 flags[i]); 524 if (IS_ERR(obj)) { 525 err = PTR_ERR(obj); 526 goto out_region; 527 } 528 529 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 530 if (IS_ERR(vma)) { 531 err = PTR_ERR(vma); 532 goto out_put; 533 } 534 535 err = i915_vma_pin(vma, 0, 0, PIN_USER); 536 if (err) 537 goto out_put; 538 539 err = igt_check_page_sizes(vma); 540 if (err) 541 goto out_unpin; 542 543 phys = i915_gem_object_get_dma_address(obj, 0); 544 if (!IS_ALIGNED(phys, page_size)) { 545 pr_err("%s addr misaligned(%pa) page_size=%u\n", 546 __func__, &phys, page_size); 547 err = -EINVAL; 548 goto out_unpin; 549 } 550 551 if (vma->resource->page_sizes_gtt != page_size) { 552 pr_err("%s page_sizes.gtt=%u, expected=%u\n", 553 __func__, vma->resource->page_sizes_gtt, 554 page_size); 555 err = -EINVAL; 556 goto out_unpin; 557 } 558 559 i915_vma_unpin(vma); 560 __i915_gem_object_put_pages(obj); 561 i915_gem_object_put(obj); 562 } 563 } 564 565 goto out_region; 566 567 out_unpin: 568 i915_vma_unpin(vma); 569 out_put: 570 i915_gem_object_put(obj); 571 out_region: 572 intel_memory_region_destroy(mem); 573 return err; 574 } 575 576 static int igt_mock_ppgtt_misaligned_dma(void *arg) 577 { 578 struct i915_ppgtt *ppgtt = arg; 579 struct drm_i915_private *i915 = ppgtt->vm.i915; 580 unsigned long supported = RUNTIME_INFO(i915)->page_sizes; 581 struct drm_i915_gem_object *obj; 582 int bit; 583 int err; 584 585 /* 586 * Sanity check dma misalignment for huge pages -- the dma addresses we 587 * insert into the paging structures need to always respect the page 588 * size alignment. 589 */ 590 591 bit = ilog2(I915_GTT_PAGE_SIZE_64K); 592 593 for_each_set_bit_from(bit, &supported, 594 ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) { 595 IGT_TIMEOUT(end_time); 596 unsigned int page_size = BIT(bit); 597 unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; 598 unsigned int offset; 599 unsigned int size = 600 round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1; 601 struct i915_vma *vma; 602 603 obj = fake_huge_pages_object(i915, size, true); 604 if (IS_ERR(obj)) 605 return PTR_ERR(obj); 606 607 if (obj->base.size != size) { 608 pr_err("obj->base.size=%zu, expected=%u\n", 609 obj->base.size, size); 610 err = -EINVAL; 611 goto out_put; 612 } 613 614 err = i915_gem_object_pin_pages_unlocked(obj); 615 if (err) 616 goto out_put; 617 618 /* Force the page size for this object */ 619 obj->mm.page_sizes.sg = page_size; 620 621 vma = i915_vma_instance(obj, &ppgtt->vm, NULL); 622 if (IS_ERR(vma)) { 623 err = PTR_ERR(vma); 624 goto out_unpin; 625 } 626 627 err = i915_vma_pin(vma, 0, 0, flags); 628 if (err) 629 goto out_unpin; 630 631 632 err = igt_check_page_sizes(vma); 633 634 if (vma->resource->page_sizes_gtt != page_size) { 635 pr_err("page_sizes.gtt=%u, expected %u\n", 636 vma->resource->page_sizes_gtt, page_size); 637 err = -EINVAL; 638 } 639 640 i915_vma_unpin(vma); 641 642 if (err) 643 goto out_unpin; 644 645 /* 646 * Try all the other valid offsets until the next 647 * boundary -- should always fall back to using 4K 648 * pages. 649 */ 650 for (offset = 4096; offset < page_size; offset += 4096) { 651 err = i915_vma_unbind_unlocked(vma); 652 if (err) 653 goto out_unpin; 654 655 err = i915_vma_pin(vma, 0, 0, flags | offset); 656 if (err) 657 goto out_unpin; 658 659 err = igt_check_page_sizes(vma); 660 661 if (vma->resource->page_sizes_gtt != I915_GTT_PAGE_SIZE_4K) { 662 pr_err("page_sizes.gtt=%u, expected %llu\n", 663 vma->resource->page_sizes_gtt, 664 I915_GTT_PAGE_SIZE_4K); 665 err = -EINVAL; 666 } 667 668 i915_vma_unpin(vma); 669 670 if (err) 671 goto out_unpin; 672 673 if (igt_timeout(end_time, 674 "%s timed out at offset %x with page-size %x\n", 675 __func__, offset, page_size)) 676 break; 677 } 678 679 i915_gem_object_lock(obj, NULL); 680 i915_gem_object_unpin_pages(obj); 681 __i915_gem_object_put_pages(obj); 682 i915_gem_object_unlock(obj); 683 i915_gem_object_put(obj); 684 } 685 686 return 0; 687 688 out_unpin: 689 i915_gem_object_lock(obj, NULL); 690 i915_gem_object_unpin_pages(obj); 691 i915_gem_object_unlock(obj); 692 out_put: 693 i915_gem_object_put(obj); 694 695 return err; 696 } 697 698 static void close_object_list(struct list_head *objects) 699 { 700 struct drm_i915_gem_object *obj, *on; 701 702 list_for_each_entry_safe(obj, on, objects, st_link) { 703 list_del(&obj->st_link); 704 i915_gem_object_lock(obj, NULL); 705 i915_gem_object_unpin_pages(obj); 706 __i915_gem_object_put_pages(obj); 707 i915_gem_object_unlock(obj); 708 i915_gem_object_put(obj); 709 } 710 } 711 712 static int igt_ppgtt_huge_fill(void *arg) 713 { 714 struct drm_i915_private *i915 = arg; 715 unsigned int supported = RUNTIME_INFO(i915)->page_sizes; 716 bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50); 717 struct i915_address_space *vm; 718 struct i915_gem_context *ctx; 719 unsigned long max_pages; 720 unsigned long page_num; 721 struct file *file; 722 bool single = false; 723 LIST_HEAD(objects); 724 IGT_TIMEOUT(end_time); 725 int err = -ENODEV; 726 727 if (supported == I915_GTT_PAGE_SIZE_4K) 728 return 0; 729 730 file = mock_file(i915); 731 if (IS_ERR(file)) 732 return PTR_ERR(file); 733 734 ctx = hugepage_ctx(i915, file); 735 if (IS_ERR(ctx)) { 736 err = PTR_ERR(ctx); 737 goto out; 738 } 739 vm = i915_gem_context_get_eb_vm(ctx); 740 max_pages = vm->total >> PAGE_SHIFT; 741 742 for_each_prime_number_from(page_num, 1, max_pages) { 743 struct drm_i915_gem_object *obj; 744 u64 size = page_num << PAGE_SHIFT; 745 struct i915_vma *vma; 746 unsigned int expected_gtt = 0; 747 int i; 748 749 obj = fake_huge_pages_object(i915, size, single); 750 if (IS_ERR(obj)) { 751 err = PTR_ERR(obj); 752 break; 753 } 754 755 if (obj->base.size != size) { 756 pr_err("obj->base.size=%zd, expected=%llu\n", 757 obj->base.size, size); 758 i915_gem_object_put(obj); 759 err = -EINVAL; 760 break; 761 } 762 763 err = i915_gem_object_pin_pages_unlocked(obj); 764 if (err) { 765 i915_gem_object_put(obj); 766 break; 767 } 768 769 list_add(&obj->st_link, &objects); 770 771 vma = i915_vma_instance(obj, vm, NULL); 772 if (IS_ERR(vma)) { 773 err = PTR_ERR(vma); 774 break; 775 } 776 777 /* vma start must be aligned to BIT(21) to allow 2M PTEs */ 778 err = i915_vma_pin(vma, 0, BIT(21), PIN_USER); 779 if (err) 780 break; 781 782 err = igt_check_page_sizes(vma); 783 if (err) { 784 i915_vma_unpin(vma); 785 break; 786 } 787 788 /* 789 * Figure out the expected gtt page size knowing that we go from 790 * largest to smallest page size sg chunks, and that we align to 791 * the largest page size. 792 */ 793 for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) { 794 unsigned int page_size = page_sizes[i]; 795 796 if (HAS_PAGE_SIZES(i915, page_size) && 797 size >= page_size) { 798 expected_gtt |= page_size; 799 size &= page_size-1; 800 } 801 } 802 803 GEM_BUG_ON(!expected_gtt); 804 GEM_BUG_ON(size); 805 806 if (!has_pte64 && (obj->base.size < I915_GTT_PAGE_SIZE_2M || 807 expected_gtt & I915_GTT_PAGE_SIZE_2M)) 808 expected_gtt &= ~I915_GTT_PAGE_SIZE_64K; 809 810 i915_vma_unpin(vma); 811 812 if (!has_pte64 && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 813 if (!IS_ALIGNED(vma->node.start, 814 I915_GTT_PAGE_SIZE_2M)) { 815 pr_err("node.start(%llx) not aligned to 2M\n", 816 vma->node.start); 817 err = -EINVAL; 818 break; 819 } 820 821 if (!IS_ALIGNED(vma->node.size, 822 I915_GTT_PAGE_SIZE_2M)) { 823 pr_err("node.size(%llx) not aligned to 2M\n", 824 vma->node.size); 825 err = -EINVAL; 826 break; 827 } 828 } 829 830 if (vma->resource->page_sizes_gtt != expected_gtt) { 831 pr_err("gtt=%#x, expected=%#x, size=0x%zx, single=%s\n", 832 vma->resource->page_sizes_gtt, expected_gtt, 833 obj->base.size, str_yes_no(!!single)); 834 err = -EINVAL; 835 break; 836 } 837 838 if (igt_timeout(end_time, 839 "%s timed out at size %zd\n", 840 __func__, obj->base.size)) 841 break; 842 843 single = !single; 844 } 845 846 close_object_list(&objects); 847 848 if (err == -ENOMEM || err == -ENOSPC) 849 err = 0; 850 851 i915_vm_put(vm); 852 out: 853 fput(file); 854 return err; 855 } 856 857 static int igt_ppgtt_64K(void *arg) 858 { 859 struct drm_i915_private *i915 = arg; 860 bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50); 861 struct drm_i915_gem_object *obj; 862 struct i915_address_space *vm; 863 struct i915_gem_context *ctx; 864 struct file *file; 865 const struct object_info { 866 unsigned int size; 867 unsigned int gtt; 868 unsigned int offset; 869 } objects[] = { 870 /* Cases with forced padding/alignment */ 871 { 872 .size = SZ_64K, 873 .gtt = I915_GTT_PAGE_SIZE_64K, 874 .offset = 0, 875 }, 876 { 877 .size = SZ_64K + SZ_4K, 878 .gtt = I915_GTT_PAGE_SIZE_4K, 879 .offset = 0, 880 }, 881 { 882 .size = SZ_64K - SZ_4K, 883 .gtt = I915_GTT_PAGE_SIZE_4K, 884 .offset = 0, 885 }, 886 { 887 .size = SZ_2M, 888 .gtt = I915_GTT_PAGE_SIZE_64K, 889 .offset = 0, 890 }, 891 { 892 .size = SZ_2M - SZ_4K, 893 .gtt = I915_GTT_PAGE_SIZE_4K, 894 .offset = 0, 895 }, 896 { 897 .size = SZ_2M + SZ_4K, 898 .gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K, 899 .offset = 0, 900 }, 901 { 902 .size = SZ_2M + SZ_64K, 903 .gtt = I915_GTT_PAGE_SIZE_64K, 904 .offset = 0, 905 }, 906 { 907 .size = SZ_2M - SZ_64K, 908 .gtt = I915_GTT_PAGE_SIZE_64K, 909 .offset = 0, 910 }, 911 /* Try without any forced padding/alignment */ 912 { 913 .size = SZ_64K, 914 .offset = SZ_2M, 915 .gtt = I915_GTT_PAGE_SIZE_4K, 916 }, 917 { 918 .size = SZ_128K, 919 .offset = SZ_2M - SZ_64K, 920 .gtt = I915_GTT_PAGE_SIZE_4K, 921 }, 922 }; 923 struct i915_vma *vma; 924 int i, single; 925 int err; 926 927 /* 928 * Sanity check some of the trickiness with 64K pages -- either we can 929 * safely mark the whole page-table(2M block) as 64K, or we have to 930 * always fallback to 4K. 931 */ 932 933 if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K)) 934 return 0; 935 936 file = mock_file(i915); 937 if (IS_ERR(file)) 938 return PTR_ERR(file); 939 940 ctx = hugepage_ctx(i915, file); 941 if (IS_ERR(ctx)) { 942 err = PTR_ERR(ctx); 943 goto out; 944 } 945 vm = i915_gem_context_get_eb_vm(ctx); 946 947 for (i = 0; i < ARRAY_SIZE(objects); ++i) { 948 unsigned int size = objects[i].size; 949 unsigned int expected_gtt = objects[i].gtt; 950 unsigned int offset = objects[i].offset; 951 unsigned int flags = PIN_USER; 952 953 /* 954 * For modern GTT models, the requirements for marking a page-table 955 * as 64K have been relaxed. Account for this. 956 */ 957 if (has_pte64) { 958 expected_gtt = 0; 959 if (size >= SZ_64K) 960 expected_gtt |= I915_GTT_PAGE_SIZE_64K; 961 if (size & (SZ_64K - 1)) 962 expected_gtt |= I915_GTT_PAGE_SIZE_4K; 963 } 964 965 for (single = 0; single <= 1; single++) { 966 obj = fake_huge_pages_object(i915, size, !!single); 967 if (IS_ERR(obj)) { 968 err = PTR_ERR(obj); 969 goto out_vm; 970 } 971 972 err = i915_gem_object_pin_pages_unlocked(obj); 973 if (err) 974 goto out_object_put; 975 976 /* 977 * Disable 2M pages -- We only want to use 64K/4K pages 978 * for this test. 979 */ 980 obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M; 981 982 vma = i915_vma_instance(obj, vm, NULL); 983 if (IS_ERR(vma)) { 984 err = PTR_ERR(vma); 985 goto out_object_unpin; 986 } 987 988 if (offset) 989 flags |= PIN_OFFSET_FIXED | offset; 990 991 err = i915_vma_pin(vma, 0, 0, flags); 992 if (err) 993 goto out_object_unpin; 994 995 err = igt_check_page_sizes(vma); 996 if (err) 997 goto out_vma_unpin; 998 999 if (!has_pte64 && !offset && 1000 vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) { 1001 if (!IS_ALIGNED(vma->node.start, 1002 I915_GTT_PAGE_SIZE_2M)) { 1003 pr_err("node.start(%llx) not aligned to 2M\n", 1004 vma->node.start); 1005 err = -EINVAL; 1006 goto out_vma_unpin; 1007 } 1008 1009 if (!IS_ALIGNED(vma->node.size, 1010 I915_GTT_PAGE_SIZE_2M)) { 1011 pr_err("node.size(%llx) not aligned to 2M\n", 1012 vma->node.size); 1013 err = -EINVAL; 1014 goto out_vma_unpin; 1015 } 1016 } 1017 1018 if (vma->resource->page_sizes_gtt != expected_gtt) { 1019 pr_err("gtt=%#x, expected=%#x, i=%d, single=%s offset=%#x size=%#x\n", 1020 vma->resource->page_sizes_gtt, 1021 expected_gtt, i, str_yes_no(!!single), 1022 offset, size); 1023 err = -EINVAL; 1024 goto out_vma_unpin; 1025 } 1026 1027 i915_vma_unpin(vma); 1028 i915_gem_object_lock(obj, NULL); 1029 i915_gem_object_unpin_pages(obj); 1030 __i915_gem_object_put_pages(obj); 1031 i915_gem_object_unlock(obj); 1032 i915_gem_object_put(obj); 1033 1034 i915_gem_drain_freed_objects(i915); 1035 } 1036 } 1037 1038 goto out_vm; 1039 1040 out_vma_unpin: 1041 i915_vma_unpin(vma); 1042 out_object_unpin: 1043 i915_gem_object_lock(obj, NULL); 1044 i915_gem_object_unpin_pages(obj); 1045 i915_gem_object_unlock(obj); 1046 out_object_put: 1047 i915_gem_object_put(obj); 1048 out_vm: 1049 i915_vm_put(vm); 1050 out: 1051 fput(file); 1052 return err; 1053 } 1054 1055 static int gpu_write(struct intel_context *ce, 1056 struct i915_vma *vma, 1057 u32 dw, 1058 u32 val) 1059 { 1060 int err; 1061 1062 i915_gem_object_lock(vma->obj, NULL); 1063 err = i915_gem_object_set_to_gtt_domain(vma->obj, true); 1064 i915_gem_object_unlock(vma->obj); 1065 if (err) 1066 return err; 1067 1068 return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32), 1069 vma->size >> PAGE_SHIFT, val); 1070 } 1071 1072 static int 1073 __cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val) 1074 { 1075 unsigned int needs_flush; 1076 unsigned long n; 1077 int err; 1078 1079 i915_gem_object_lock(obj, NULL); 1080 err = i915_gem_object_prepare_read(obj, &needs_flush); 1081 if (err) 1082 goto err_unlock; 1083 1084 for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) { 1085 u32 *ptr = kmap_atomic(i915_gem_object_get_page(obj, n)); 1086 1087 if (needs_flush & CLFLUSH_BEFORE) 1088 drm_clflush_virt_range(ptr, PAGE_SIZE); 1089 1090 if (ptr[dword] != val) { 1091 pr_err("n=%lu ptr[%u]=%u, val=%u\n", 1092 n, dword, ptr[dword], val); 1093 kunmap_atomic(ptr); 1094 err = -EINVAL; 1095 break; 1096 } 1097 1098 kunmap_atomic(ptr); 1099 } 1100 1101 i915_gem_object_finish_access(obj); 1102 err_unlock: 1103 i915_gem_object_unlock(obj); 1104 1105 return err; 1106 } 1107 1108 static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val) 1109 { 1110 unsigned long n = obj->base.size >> PAGE_SHIFT; 1111 u32 *ptr; 1112 int err; 1113 1114 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1115 if (err) 1116 return err; 1117 1118 ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 1119 if (IS_ERR(ptr)) 1120 return PTR_ERR(ptr); 1121 1122 ptr += dword; 1123 while (n--) { 1124 if (*ptr != val) { 1125 pr_err("base[%u]=%08x, val=%08x\n", 1126 dword, *ptr, val); 1127 err = -EINVAL; 1128 break; 1129 } 1130 1131 ptr += PAGE_SIZE / sizeof(*ptr); 1132 } 1133 1134 i915_gem_object_unpin_map(obj); 1135 return err; 1136 } 1137 1138 static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) 1139 { 1140 if (i915_gem_object_has_struct_page(obj)) 1141 return __cpu_check_shmem(obj, dword, val); 1142 else 1143 return __cpu_check_vmap(obj, dword, val); 1144 } 1145 1146 static int __igt_write_huge(struct intel_context *ce, 1147 struct drm_i915_gem_object *obj, 1148 u64 size, u64 offset, 1149 u32 dword, u32 val) 1150 { 1151 unsigned int flags = PIN_USER | PIN_OFFSET_FIXED; 1152 struct i915_vma *vma; 1153 int err; 1154 1155 vma = i915_vma_instance(obj, ce->vm, NULL); 1156 if (IS_ERR(vma)) 1157 return PTR_ERR(vma); 1158 1159 err = i915_vma_pin(vma, size, 0, flags | offset); 1160 if (err) { 1161 /* 1162 * The ggtt may have some pages reserved so 1163 * refrain from erroring out. 1164 */ 1165 if (err == -ENOSPC && i915_is_ggtt(ce->vm)) 1166 err = 0; 1167 1168 return err; 1169 } 1170 1171 err = igt_check_page_sizes(vma); 1172 if (err) 1173 goto out_vma_unpin; 1174 1175 err = gpu_write(ce, vma, dword, val); 1176 if (err) { 1177 pr_err("gpu-write failed at offset=%llx\n", offset); 1178 goto out_vma_unpin; 1179 } 1180 1181 err = cpu_check(obj, dword, val); 1182 if (err) { 1183 pr_err("cpu-check failed at offset=%llx\n", offset); 1184 goto out_vma_unpin; 1185 } 1186 1187 out_vma_unpin: 1188 i915_vma_unpin(vma); 1189 return err; 1190 } 1191 1192 static int igt_write_huge(struct drm_i915_private *i915, 1193 struct drm_i915_gem_object *obj) 1194 { 1195 struct i915_gem_engines *engines; 1196 struct i915_gem_engines_iter it; 1197 struct intel_context *ce; 1198 I915_RND_STATE(prng); 1199 IGT_TIMEOUT(end_time); 1200 unsigned int max_page_size; 1201 unsigned int count; 1202 struct i915_gem_context *ctx; 1203 struct file *file; 1204 u64 max; 1205 u64 num; 1206 u64 size; 1207 int *order; 1208 int i, n; 1209 int err = 0; 1210 1211 file = mock_file(i915); 1212 if (IS_ERR(file)) 1213 return PTR_ERR(file); 1214 1215 ctx = hugepage_ctx(i915, file); 1216 if (IS_ERR(ctx)) { 1217 err = PTR_ERR(ctx); 1218 goto out; 1219 } 1220 1221 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 1222 1223 size = obj->base.size; 1224 if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && 1225 !HAS_64K_PAGES(i915)) 1226 size = round_up(size, I915_GTT_PAGE_SIZE_2M); 1227 1228 n = 0; 1229 count = 0; 1230 max = U64_MAX; 1231 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1232 count++; 1233 if (!intel_engine_can_store_dword(ce->engine)) 1234 continue; 1235 1236 max = min(max, ce->vm->total); 1237 n++; 1238 } 1239 i915_gem_context_unlock_engines(ctx); 1240 if (!n) 1241 goto out; 1242 1243 /* 1244 * To keep things interesting when alternating between engines in our 1245 * randomized order, lets also make feeding to the same engine a few 1246 * times in succession a possibility by enlarging the permutation array. 1247 */ 1248 order = i915_random_order(count * count, &prng); 1249 if (!order) 1250 return -ENOMEM; 1251 1252 max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg); 1253 max = div_u64(max - size, max_page_size); 1254 1255 /* 1256 * Try various offsets in an ascending/descending fashion until we 1257 * timeout -- we want to avoid issues hidden by effectively always using 1258 * offset = 0. 1259 */ 1260 i = 0; 1261 engines = i915_gem_context_lock_engines(ctx); 1262 for_each_prime_number_from(num, 0, max) { 1263 u64 offset_low = num * max_page_size; 1264 u64 offset_high = (max - num) * max_page_size; 1265 u32 dword = offset_in_page(num) / 4; 1266 struct intel_context *ce; 1267 1268 ce = engines->engines[order[i] % engines->num_engines]; 1269 i = (i + 1) % (count * count); 1270 if (!ce || !intel_engine_can_store_dword(ce->engine)) 1271 continue; 1272 1273 /* 1274 * In order to utilize 64K pages we need to both pad the vma 1275 * size and ensure the vma offset is at the start of the pt 1276 * boundary, however to improve coverage we opt for testing both 1277 * aligned and unaligned offsets. 1278 * 1279 * With PS64 this is no longer the case, but to ensure we 1280 * sometimes get the compact layout for smaller objects, apply 1281 * the round_up anyway. 1282 */ 1283 if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) 1284 offset_low = round_down(offset_low, 1285 I915_GTT_PAGE_SIZE_2M); 1286 1287 err = __igt_write_huge(ce, obj, size, offset_low, 1288 dword, num + 1); 1289 if (err) 1290 break; 1291 1292 err = __igt_write_huge(ce, obj, size, offset_high, 1293 dword, num + 1); 1294 if (err) 1295 break; 1296 1297 if (igt_timeout(end_time, 1298 "%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n", 1299 __func__, ce->engine->name, offset_low, offset_high, 1300 max_page_size)) 1301 break; 1302 } 1303 i915_gem_context_unlock_engines(ctx); 1304 1305 kfree(order); 1306 1307 out: 1308 fput(file); 1309 return err; 1310 } 1311 1312 typedef struct drm_i915_gem_object * 1313 (*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags); 1314 1315 static inline bool igt_can_allocate_thp(struct drm_i915_private *i915) 1316 { 1317 return i915->mm.gemfs && has_transparent_hugepage(); 1318 } 1319 1320 static struct drm_i915_gem_object * 1321 igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags) 1322 { 1323 if (!igt_can_allocate_thp(i915)) { 1324 pr_info("%s missing THP support, skipping\n", __func__); 1325 return ERR_PTR(-ENODEV); 1326 } 1327 1328 return i915_gem_object_create_shmem(i915, size); 1329 } 1330 1331 static struct drm_i915_gem_object * 1332 igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags) 1333 { 1334 return i915_gem_object_create_internal(i915, size); 1335 } 1336 1337 static struct drm_i915_gem_object * 1338 igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags) 1339 { 1340 return huge_pages_object(i915, size, size); 1341 } 1342 1343 static struct drm_i915_gem_object * 1344 igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags) 1345 { 1346 return i915_gem_object_create_lmem(i915, size, flags); 1347 } 1348 1349 static u32 igt_random_size(struct rnd_state *prng, 1350 u32 min_page_size, 1351 u32 max_page_size) 1352 { 1353 u64 mask; 1354 u32 size; 1355 1356 GEM_BUG_ON(!is_power_of_2(min_page_size)); 1357 GEM_BUG_ON(!is_power_of_2(max_page_size)); 1358 GEM_BUG_ON(min_page_size < PAGE_SIZE); 1359 GEM_BUG_ON(min_page_size > max_page_size); 1360 1361 mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK; 1362 size = prandom_u32_state(prng) & mask; 1363 if (size < min_page_size) 1364 size |= min_page_size; 1365 1366 return size; 1367 } 1368 1369 static int igt_ppgtt_smoke_huge(void *arg) 1370 { 1371 struct drm_i915_private *i915 = arg; 1372 struct drm_i915_gem_object *obj; 1373 I915_RND_STATE(prng); 1374 struct { 1375 igt_create_fn fn; 1376 u32 min; 1377 u32 max; 1378 } backends[] = { 1379 { igt_create_internal, SZ_64K, SZ_2M, }, 1380 { igt_create_shmem, SZ_64K, SZ_32M, }, 1381 { igt_create_local, SZ_64K, SZ_1G, }, 1382 }; 1383 int err; 1384 int i; 1385 1386 /* 1387 * Sanity check that the HW uses huge pages correctly through our 1388 * various backends -- ensure that our writes land in the right place. 1389 */ 1390 1391 for (i = 0; i < ARRAY_SIZE(backends); ++i) { 1392 u32 min = backends[i].min; 1393 u32 max = backends[i].max; 1394 u32 size = max; 1395 1396 try_again: 1397 size = igt_random_size(&prng, min, rounddown_pow_of_two(size)); 1398 1399 obj = backends[i].fn(i915, size, 0); 1400 if (IS_ERR(obj)) { 1401 err = PTR_ERR(obj); 1402 if (err == -E2BIG) { 1403 size >>= 1; 1404 goto try_again; 1405 } else if (err == -ENODEV) { 1406 err = 0; 1407 continue; 1408 } 1409 1410 return err; 1411 } 1412 1413 err = i915_gem_object_pin_pages_unlocked(obj); 1414 if (err) { 1415 if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) { 1416 i915_gem_object_put(obj); 1417 size >>= 1; 1418 goto try_again; 1419 } 1420 goto out_put; 1421 } 1422 1423 if (obj->mm.page_sizes.phys < min) { 1424 pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n", 1425 __func__, size, i); 1426 err = -ENOMEM; 1427 goto out_unpin; 1428 } 1429 1430 err = igt_write_huge(i915, obj); 1431 if (err) { 1432 pr_err("%s write-huge failed with size=%u, i=%d\n", 1433 __func__, size, i); 1434 } 1435 out_unpin: 1436 i915_gem_object_lock(obj, NULL); 1437 i915_gem_object_unpin_pages(obj); 1438 __i915_gem_object_put_pages(obj); 1439 i915_gem_object_unlock(obj); 1440 out_put: 1441 i915_gem_object_put(obj); 1442 1443 if (err == -ENOMEM || err == -ENXIO) 1444 err = 0; 1445 1446 if (err) 1447 break; 1448 1449 cond_resched(); 1450 } 1451 1452 return err; 1453 } 1454 1455 static int igt_ppgtt_sanity_check(void *arg) 1456 { 1457 struct drm_i915_private *i915 = arg; 1458 unsigned int supported = RUNTIME_INFO(i915)->page_sizes; 1459 struct { 1460 igt_create_fn fn; 1461 unsigned int flags; 1462 } backends[] = { 1463 { igt_create_system, 0, }, 1464 { igt_create_local, 0, }, 1465 { igt_create_local, I915_BO_ALLOC_CONTIGUOUS, }, 1466 }; 1467 struct { 1468 u32 size; 1469 u32 pages; 1470 } combos[] = { 1471 { SZ_64K, SZ_64K }, 1472 { SZ_2M, SZ_2M }, 1473 { SZ_2M, SZ_64K }, 1474 { SZ_2M - SZ_64K, SZ_64K }, 1475 { SZ_2M - SZ_4K, SZ_64K | SZ_4K }, 1476 { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, 1477 { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, 1478 { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, 1479 { SZ_2M + SZ_64K, SZ_64K }, 1480 }; 1481 int i, j; 1482 int err; 1483 1484 if (supported == I915_GTT_PAGE_SIZE_4K) 1485 return 0; 1486 1487 /* 1488 * Sanity check that the HW behaves with a limited set of combinations. 1489 * We already have a bunch of randomised testing, which should give us 1490 * a decent amount of variation between runs, however we should keep 1491 * this to limit the chances of introducing a temporary regression, by 1492 * testing the most obvious cases that might make something blow up. 1493 */ 1494 1495 for (i = 0; i < ARRAY_SIZE(backends); ++i) { 1496 for (j = 0; j < ARRAY_SIZE(combos); ++j) { 1497 struct drm_i915_gem_object *obj; 1498 u32 size = combos[j].size; 1499 u32 pages = combos[j].pages; 1500 1501 obj = backends[i].fn(i915, size, backends[i].flags); 1502 if (IS_ERR(obj)) { 1503 err = PTR_ERR(obj); 1504 if (err == -ENODEV) { 1505 pr_info("Device lacks local memory, skipping\n"); 1506 err = 0; 1507 break; 1508 } 1509 1510 return err; 1511 } 1512 1513 err = i915_gem_object_pin_pages_unlocked(obj); 1514 if (err) { 1515 i915_gem_object_put(obj); 1516 goto out; 1517 } 1518 1519 GEM_BUG_ON(pages > obj->base.size); 1520 pages = pages & supported; 1521 1522 if (pages) 1523 obj->mm.page_sizes.sg = pages; 1524 1525 err = igt_write_huge(i915, obj); 1526 1527 i915_gem_object_lock(obj, NULL); 1528 i915_gem_object_unpin_pages(obj); 1529 __i915_gem_object_put_pages(obj); 1530 i915_gem_object_unlock(obj); 1531 i915_gem_object_put(obj); 1532 1533 if (err) { 1534 pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n", 1535 __func__, size, pages, i, j); 1536 goto out; 1537 } 1538 } 1539 1540 cond_resched(); 1541 } 1542 1543 out: 1544 if (err == -ENOMEM) 1545 err = 0; 1546 1547 return err; 1548 } 1549 1550 static int igt_ppgtt_compact(void *arg) 1551 { 1552 struct drm_i915_private *i915 = arg; 1553 struct drm_i915_gem_object *obj; 1554 int err; 1555 1556 /* 1557 * Simple test to catch issues with compact 64K pages -- since the pt is 1558 * compacted to 256B that gives us 32 entries per pt, however since the 1559 * backing page for the pt is 4K, any extra entries we might incorrectly 1560 * write out should be ignored by the HW. If ever hit such a case this 1561 * test should catch it since some of our writes would land in scratch. 1562 */ 1563 1564 if (!HAS_64K_PAGES(i915)) { 1565 pr_info("device lacks compact 64K page support, skipping\n"); 1566 return 0; 1567 } 1568 1569 if (!HAS_LMEM(i915)) { 1570 pr_info("device lacks LMEM support, skipping\n"); 1571 return 0; 1572 } 1573 1574 /* We want the range to cover multiple page-table boundaries. */ 1575 obj = i915_gem_object_create_lmem(i915, SZ_4M, 0); 1576 if (IS_ERR(obj)) 1577 return PTR_ERR(obj); 1578 1579 err = i915_gem_object_pin_pages_unlocked(obj); 1580 if (err) 1581 goto out_put; 1582 1583 if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) { 1584 pr_info("LMEM compact unable to allocate huge-page(s)\n"); 1585 goto out_unpin; 1586 } 1587 1588 /* 1589 * Disable 2M GTT pages by forcing the page-size to 64K for the GTT 1590 * insertion. 1591 */ 1592 obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K; 1593 1594 err = igt_write_huge(i915, obj); 1595 if (err) 1596 pr_err("LMEM compact write-huge failed\n"); 1597 1598 out_unpin: 1599 i915_gem_object_unpin_pages(obj); 1600 out_put: 1601 i915_gem_object_put(obj); 1602 1603 if (err == -ENOMEM) 1604 err = 0; 1605 1606 return err; 1607 } 1608 1609 static int igt_ppgtt_mixed(void *arg) 1610 { 1611 struct drm_i915_private *i915 = arg; 1612 const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; 1613 struct drm_i915_gem_object *obj, *on; 1614 struct i915_gem_engines *engines; 1615 struct i915_gem_engines_iter it; 1616 struct i915_address_space *vm; 1617 struct i915_gem_context *ctx; 1618 struct intel_context *ce; 1619 struct file *file; 1620 I915_RND_STATE(prng); 1621 LIST_HEAD(objects); 1622 struct intel_memory_region *mr; 1623 struct i915_vma *vma; 1624 unsigned int count; 1625 u32 i, addr; 1626 int *order; 1627 int n, err; 1628 1629 /* 1630 * Sanity check mixing 4K and 64K pages within the same page-table via 1631 * the new PS64 TLB hint. 1632 */ 1633 1634 if (!HAS_64K_PAGES(i915)) { 1635 pr_info("device lacks PS64, skipping\n"); 1636 return 0; 1637 } 1638 1639 file = mock_file(i915); 1640 if (IS_ERR(file)) 1641 return PTR_ERR(file); 1642 1643 ctx = hugepage_ctx(i915, file); 1644 if (IS_ERR(ctx)) { 1645 err = PTR_ERR(ctx); 1646 goto out; 1647 } 1648 vm = i915_gem_context_get_eb_vm(ctx); 1649 1650 i = 0; 1651 addr = 0; 1652 do { 1653 u32 sz; 1654 1655 sz = i915_prandom_u32_max_state(SZ_4M, &prng); 1656 sz = max_t(u32, sz, SZ_4K); 1657 1658 mr = i915->mm.regions[INTEL_REGION_LMEM_0]; 1659 if (i & 1) 1660 mr = i915->mm.regions[INTEL_REGION_SMEM]; 1661 1662 obj = i915_gem_object_create_region(mr, sz, 0, 0); 1663 if (IS_ERR(obj)) { 1664 err = PTR_ERR(obj); 1665 goto out_vm; 1666 } 1667 1668 list_add_tail(&obj->st_link, &objects); 1669 1670 vma = i915_vma_instance(obj, vm, NULL); 1671 if (IS_ERR(vma)) { 1672 err = PTR_ERR(vma); 1673 goto err_put; 1674 } 1675 1676 addr = round_up(addr, mr->min_page_size); 1677 err = i915_vma_pin(vma, 0, 0, addr | flags); 1678 if (err) 1679 goto err_put; 1680 1681 if (mr->type == INTEL_MEMORY_LOCAL && 1682 (vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) { 1683 err = -EINVAL; 1684 goto err_put; 1685 } 1686 1687 addr += obj->base.size; 1688 i++; 1689 } while (addr <= SZ_16M); 1690 1691 n = 0; 1692 count = 0; 1693 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1694 count++; 1695 if (!intel_engine_can_store_dword(ce->engine)) 1696 continue; 1697 1698 n++; 1699 } 1700 i915_gem_context_unlock_engines(ctx); 1701 if (!n) 1702 goto err_put; 1703 1704 order = i915_random_order(count * count, &prng); 1705 if (!order) { 1706 err = -ENOMEM; 1707 goto err_put; 1708 } 1709 1710 i = 0; 1711 addr = 0; 1712 engines = i915_gem_context_lock_engines(ctx); 1713 list_for_each_entry(obj, &objects, st_link) { 1714 u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng); 1715 1716 addr = round_up(addr, obj->mm.region->min_page_size); 1717 1718 ce = engines->engines[order[i] % engines->num_engines]; 1719 i = (i + 1) % (count * count); 1720 if (!ce || !intel_engine_can_store_dword(ce->engine)) 1721 continue; 1722 1723 err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd); 1724 if (err) 1725 break; 1726 1727 err = __igt_write_huge(ce, obj, obj->base.size, addr, 1728 offset_in_page(rnd) / sizeof(u32), rnd + 1); 1729 if (err) 1730 break; 1731 1732 err = __igt_write_huge(ce, obj, obj->base.size, addr, 1733 (PAGE_SIZE / sizeof(u32)) - 1, 1734 rnd + 2); 1735 if (err) 1736 break; 1737 1738 addr += obj->base.size; 1739 1740 cond_resched(); 1741 } 1742 1743 i915_gem_context_unlock_engines(ctx); 1744 kfree(order); 1745 err_put: 1746 list_for_each_entry_safe(obj, on, &objects, st_link) { 1747 list_del(&obj->st_link); 1748 i915_gem_object_put(obj); 1749 } 1750 out_vm: 1751 i915_vm_put(vm); 1752 out: 1753 fput(file); 1754 return err; 1755 } 1756 1757 static int igt_tmpfs_fallback(void *arg) 1758 { 1759 struct drm_i915_private *i915 = arg; 1760 struct i915_address_space *vm; 1761 struct i915_gem_context *ctx; 1762 struct vfsmount *gemfs = i915->mm.gemfs; 1763 struct drm_i915_gem_object *obj; 1764 struct i915_vma *vma; 1765 struct file *file; 1766 u32 *vaddr; 1767 int err = 0; 1768 1769 file = mock_file(i915); 1770 if (IS_ERR(file)) 1771 return PTR_ERR(file); 1772 1773 ctx = hugepage_ctx(i915, file); 1774 if (IS_ERR(ctx)) { 1775 err = PTR_ERR(ctx); 1776 goto out; 1777 } 1778 vm = i915_gem_context_get_eb_vm(ctx); 1779 1780 /* 1781 * Make sure that we don't burst into a ball of flames upon falling back 1782 * to tmpfs, which we rely on if on the off-chance we encouter a failure 1783 * when setting up gemfs. 1784 */ 1785 1786 i915->mm.gemfs = NULL; 1787 1788 obj = i915_gem_object_create_shmem(i915, PAGE_SIZE); 1789 if (IS_ERR(obj)) { 1790 err = PTR_ERR(obj); 1791 goto out_restore; 1792 } 1793 1794 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1795 if (IS_ERR(vaddr)) { 1796 err = PTR_ERR(vaddr); 1797 goto out_put; 1798 } 1799 *vaddr = 0xdeadbeaf; 1800 1801 __i915_gem_object_flush_map(obj, 0, 64); 1802 i915_gem_object_unpin_map(obj); 1803 1804 vma = i915_vma_instance(obj, vm, NULL); 1805 if (IS_ERR(vma)) { 1806 err = PTR_ERR(vma); 1807 goto out_put; 1808 } 1809 1810 err = i915_vma_pin(vma, 0, 0, PIN_USER); 1811 if (err) 1812 goto out_put; 1813 1814 err = igt_check_page_sizes(vma); 1815 1816 i915_vma_unpin(vma); 1817 out_put: 1818 i915_gem_object_put(obj); 1819 out_restore: 1820 i915->mm.gemfs = gemfs; 1821 1822 i915_vm_put(vm); 1823 out: 1824 fput(file); 1825 return err; 1826 } 1827 1828 static int igt_shrink_thp(void *arg) 1829 { 1830 struct drm_i915_private *i915 = arg; 1831 struct i915_address_space *vm; 1832 struct i915_gem_context *ctx; 1833 struct drm_i915_gem_object *obj; 1834 struct i915_gem_engines_iter it; 1835 struct intel_context *ce; 1836 struct i915_vma *vma; 1837 struct file *file; 1838 unsigned int flags = PIN_USER; 1839 unsigned int n; 1840 intel_wakeref_t wf; 1841 bool should_swap; 1842 int err; 1843 1844 if (!igt_can_allocate_thp(i915)) { 1845 pr_info("missing THP support, skipping\n"); 1846 return 0; 1847 } 1848 1849 file = mock_file(i915); 1850 if (IS_ERR(file)) 1851 return PTR_ERR(file); 1852 1853 ctx = hugepage_ctx(i915, file); 1854 if (IS_ERR(ctx)) { 1855 err = PTR_ERR(ctx); 1856 goto out; 1857 } 1858 vm = i915_gem_context_get_eb_vm(ctx); 1859 1860 /* 1861 * Sanity check shrinking huge-paged object -- make sure nothing blows 1862 * up. 1863 */ 1864 1865 obj = i915_gem_object_create_shmem(i915, SZ_2M); 1866 if (IS_ERR(obj)) { 1867 err = PTR_ERR(obj); 1868 goto out_vm; 1869 } 1870 1871 vma = i915_vma_instance(obj, vm, NULL); 1872 if (IS_ERR(vma)) { 1873 err = PTR_ERR(vma); 1874 goto out_put; 1875 } 1876 1877 wf = intel_runtime_pm_get(&i915->runtime_pm); /* active shrink */ 1878 1879 err = i915_vma_pin(vma, 0, 0, flags); 1880 if (err) 1881 goto out_wf; 1882 1883 if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) { 1884 pr_info("failed to allocate THP, finishing test early\n"); 1885 goto out_unpin; 1886 } 1887 1888 err = igt_check_page_sizes(vma); 1889 if (err) 1890 goto out_unpin; 1891 1892 n = 0; 1893 1894 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 1895 if (!intel_engine_can_store_dword(ce->engine)) 1896 continue; 1897 1898 err = gpu_write(ce, vma, n++, 0xdeadbeaf); 1899 if (err) 1900 break; 1901 } 1902 i915_gem_context_unlock_engines(ctx); 1903 /* 1904 * Nuke everything *before* we unpin the pages so we can be reasonably 1905 * sure that when later checking get_nr_swap_pages() that some random 1906 * leftover object doesn't steal the remaining swap space. 1907 */ 1908 i915_gem_shrink(NULL, i915, -1UL, NULL, 1909 I915_SHRINK_BOUND | 1910 I915_SHRINK_UNBOUND | 1911 I915_SHRINK_ACTIVE); 1912 i915_vma_unpin(vma); 1913 if (err) 1914 goto out_wf; 1915 1916 /* 1917 * Now that the pages are *unpinned* shrinking should invoke 1918 * shmem to truncate our pages, if we have available swap. 1919 */ 1920 should_swap = get_nr_swap_pages() > 0; 1921 i915_gem_shrink(NULL, i915, -1UL, NULL, 1922 I915_SHRINK_BOUND | 1923 I915_SHRINK_UNBOUND | 1924 I915_SHRINK_ACTIVE | 1925 I915_SHRINK_WRITEBACK); 1926 if (should_swap == i915_gem_object_has_pages(obj)) { 1927 pr_err("unexpected pages mismatch, should_swap=%s\n", 1928 str_yes_no(should_swap)); 1929 err = -EINVAL; 1930 goto out_wf; 1931 } 1932 1933 if (should_swap == (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys)) { 1934 pr_err("unexpected residual page-size bits, should_swap=%s\n", 1935 str_yes_no(should_swap)); 1936 err = -EINVAL; 1937 goto out_wf; 1938 } 1939 1940 err = i915_vma_pin(vma, 0, 0, flags); 1941 if (err) 1942 goto out_wf; 1943 1944 while (n--) { 1945 err = cpu_check(obj, n, 0xdeadbeaf); 1946 if (err) 1947 break; 1948 } 1949 1950 out_unpin: 1951 i915_vma_unpin(vma); 1952 out_wf: 1953 intel_runtime_pm_put(&i915->runtime_pm, wf); 1954 out_put: 1955 i915_gem_object_put(obj); 1956 out_vm: 1957 i915_vm_put(vm); 1958 out: 1959 fput(file); 1960 return err; 1961 } 1962 1963 int i915_gem_huge_page_mock_selftests(void) 1964 { 1965 static const struct i915_subtest tests[] = { 1966 SUBTEST(igt_mock_exhaust_device_supported_pages), 1967 SUBTEST(igt_mock_memory_region_huge_pages), 1968 SUBTEST(igt_mock_ppgtt_misaligned_dma), 1969 }; 1970 struct drm_i915_private *dev_priv; 1971 struct i915_ppgtt *ppgtt; 1972 int err; 1973 1974 dev_priv = mock_gem_device(); 1975 if (!dev_priv) 1976 return -ENOMEM; 1977 1978 /* Pretend to be a device which supports the 48b PPGTT */ 1979 RUNTIME_INFO(dev_priv)->ppgtt_type = INTEL_PPGTT_FULL; 1980 RUNTIME_INFO(dev_priv)->ppgtt_size = 48; 1981 1982 ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0); 1983 if (IS_ERR(ppgtt)) { 1984 err = PTR_ERR(ppgtt); 1985 goto out_unlock; 1986 } 1987 1988 if (!i915_vm_is_4lvl(&ppgtt->vm)) { 1989 pr_err("failed to create 48b PPGTT\n"); 1990 err = -EINVAL; 1991 goto out_put; 1992 } 1993 1994 /* If we were ever hit this then it's time to mock the 64K scratch */ 1995 if (!i915_vm_has_scratch_64K(&ppgtt->vm)) { 1996 pr_err("PPGTT missing 64K scratch page\n"); 1997 err = -EINVAL; 1998 goto out_put; 1999 } 2000 2001 err = i915_subtests(tests, ppgtt); 2002 2003 out_put: 2004 i915_vm_put(&ppgtt->vm); 2005 out_unlock: 2006 mock_destroy_device(dev_priv); 2007 return err; 2008 } 2009 2010 int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) 2011 { 2012 static const struct i915_subtest tests[] = { 2013 SUBTEST(igt_shrink_thp), 2014 SUBTEST(igt_tmpfs_fallback), 2015 SUBTEST(igt_ppgtt_smoke_huge), 2016 SUBTEST(igt_ppgtt_sanity_check), 2017 SUBTEST(igt_ppgtt_compact), 2018 SUBTEST(igt_ppgtt_mixed), 2019 SUBTEST(igt_ppgtt_huge_fill), 2020 SUBTEST(igt_ppgtt_64K), 2021 }; 2022 2023 if (!HAS_PPGTT(i915)) { 2024 pr_info("PPGTT not supported, skipping live-selftests\n"); 2025 return 0; 2026 } 2027 2028 if (intel_gt_is_wedged(to_gt(i915))) 2029 return 0; 2030 2031 return i915_live_subtests(tests, i915); 2032 } 2033