1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2016 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gt/intel_gt.h" 10 #include "gt/intel_gt_pm.h" 11 #include "huge_gem_object.h" 12 #include "i915_selftest.h" 13 #include "selftests/i915_random.h" 14 #include "selftests/igt_flush_test.h" 15 16 struct tile { 17 unsigned int width; 18 unsigned int height; 19 unsigned int stride; 20 unsigned int size; 21 unsigned int tiling; 22 unsigned int swizzle; 23 }; 24 25 static u64 swizzle_bit(unsigned int bit, u64 offset) 26 { 27 return (offset & BIT_ULL(bit)) >> (bit - 6); 28 } 29 30 static u64 tiled_offset(const struct tile *tile, u64 v) 31 { 32 u64 x, y; 33 34 if (tile->tiling == I915_TILING_NONE) 35 return v; 36 37 y = div64_u64_rem(v, tile->stride, &x); 38 v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height; 39 40 if (tile->tiling == I915_TILING_X) { 41 v += y * tile->width; 42 v += div64_u64_rem(x, tile->width, &x) << tile->size; 43 v += x; 44 } else if (tile->width == 128) { 45 const unsigned int ytile_span = 16; 46 const unsigned int ytile_height = 512; 47 48 v += y * ytile_span; 49 v += div64_u64_rem(x, ytile_span, &x) * ytile_height; 50 v += x; 51 } else { 52 const unsigned int ytile_span = 32; 53 const unsigned int ytile_height = 256; 54 55 v += y * ytile_span; 56 v += div64_u64_rem(x, ytile_span, &x) * ytile_height; 57 v += x; 58 } 59 60 switch (tile->swizzle) { 61 case I915_BIT_6_SWIZZLE_9: 62 v ^= swizzle_bit(9, v); 63 break; 64 case I915_BIT_6_SWIZZLE_9_10: 65 v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); 66 break; 67 case I915_BIT_6_SWIZZLE_9_11: 68 v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); 69 break; 70 case I915_BIT_6_SWIZZLE_9_10_11: 71 v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); 72 break; 73 } 74 75 return v; 76 } 77 78 static int check_partial_mapping(struct drm_i915_gem_object *obj, 79 const struct tile *tile, 80 struct rnd_state *prng) 81 { 82 const unsigned long npages = obj->base.size / PAGE_SIZE; 83 struct i915_ggtt_view view; 84 struct i915_vma *vma; 85 unsigned long page; 86 u32 __iomem *io; 87 struct page *p; 88 unsigned int n; 89 u64 offset; 90 u32 *cpu; 91 int err; 92 93 err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); 94 if (err) { 95 pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", 96 tile->tiling, tile->stride, err); 97 return err; 98 } 99 100 GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); 101 GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); 102 103 i915_gem_object_lock(obj); 104 err = i915_gem_object_set_to_gtt_domain(obj, true); 105 i915_gem_object_unlock(obj); 106 if (err) { 107 pr_err("Failed to flush to GTT write domain; err=%d\n", err); 108 return err; 109 } 110 111 page = i915_prandom_u32_max_state(npages, prng); 112 view = compute_partial_view(obj, page, MIN_CHUNK_PAGES); 113 114 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); 115 if (IS_ERR(vma)) { 116 pr_err("Failed to pin partial view: offset=%lu; err=%d\n", 117 page, (int)PTR_ERR(vma)); 118 return PTR_ERR(vma); 119 } 120 121 n = page - view.partial.offset; 122 GEM_BUG_ON(n >= view.partial.size); 123 124 io = i915_vma_pin_iomap(vma); 125 i915_vma_unpin(vma); 126 if (IS_ERR(io)) { 127 pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", 128 page, (int)PTR_ERR(io)); 129 err = PTR_ERR(io); 130 goto out; 131 } 132 133 iowrite32(page, io + n * PAGE_SIZE / sizeof(*io)); 134 i915_vma_unpin_iomap(vma); 135 136 offset = tiled_offset(tile, page << PAGE_SHIFT); 137 if (offset >= obj->base.size) 138 goto out; 139 140 intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); 141 142 p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); 143 cpu = kmap(p) + offset_in_page(offset); 144 drm_clflush_virt_range(cpu, sizeof(*cpu)); 145 if (*cpu != (u32)page) { 146 pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", 147 page, n, 148 view.partial.offset, 149 view.partial.size, 150 vma->size >> PAGE_SHIFT, 151 tile->tiling ? tile_row_pages(obj) : 0, 152 vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, 153 offset >> PAGE_SHIFT, 154 (unsigned int)offset_in_page(offset), 155 offset, 156 (u32)page, *cpu); 157 err = -EINVAL; 158 } 159 *cpu = 0; 160 drm_clflush_virt_range(cpu, sizeof(*cpu)); 161 kunmap(p); 162 163 out: 164 i915_vma_destroy(vma); 165 return err; 166 } 167 168 static int check_partial_mappings(struct drm_i915_gem_object *obj, 169 const struct tile *tile, 170 unsigned long end_time) 171 { 172 const unsigned int nreal = obj->scratch / PAGE_SIZE; 173 const unsigned long npages = obj->base.size / PAGE_SIZE; 174 struct i915_vma *vma; 175 unsigned long page; 176 int err; 177 178 err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); 179 if (err) { 180 pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", 181 tile->tiling, tile->stride, err); 182 return err; 183 } 184 185 GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); 186 GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); 187 188 i915_gem_object_lock(obj); 189 err = i915_gem_object_set_to_gtt_domain(obj, true); 190 i915_gem_object_unlock(obj); 191 if (err) { 192 pr_err("Failed to flush to GTT write domain; err=%d\n", err); 193 return err; 194 } 195 196 for_each_prime_number_from(page, 1, npages) { 197 struct i915_ggtt_view view = 198 compute_partial_view(obj, page, MIN_CHUNK_PAGES); 199 u32 __iomem *io; 200 struct page *p; 201 unsigned int n; 202 u64 offset; 203 u32 *cpu; 204 205 GEM_BUG_ON(view.partial.size > nreal); 206 cond_resched(); 207 208 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); 209 if (IS_ERR(vma)) { 210 pr_err("Failed to pin partial view: offset=%lu; err=%d\n", 211 page, (int)PTR_ERR(vma)); 212 return PTR_ERR(vma); 213 } 214 215 n = page - view.partial.offset; 216 GEM_BUG_ON(n >= view.partial.size); 217 218 io = i915_vma_pin_iomap(vma); 219 i915_vma_unpin(vma); 220 if (IS_ERR(io)) { 221 pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", 222 page, (int)PTR_ERR(io)); 223 return PTR_ERR(io); 224 } 225 226 iowrite32(page, io + n * PAGE_SIZE / sizeof(*io)); 227 i915_vma_unpin_iomap(vma); 228 229 offset = tiled_offset(tile, page << PAGE_SHIFT); 230 if (offset >= obj->base.size) 231 continue; 232 233 intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); 234 235 p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); 236 cpu = kmap(p) + offset_in_page(offset); 237 drm_clflush_virt_range(cpu, sizeof(*cpu)); 238 if (*cpu != (u32)page) { 239 pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", 240 page, n, 241 view.partial.offset, 242 view.partial.size, 243 vma->size >> PAGE_SHIFT, 244 tile->tiling ? tile_row_pages(obj) : 0, 245 vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, 246 offset >> PAGE_SHIFT, 247 (unsigned int)offset_in_page(offset), 248 offset, 249 (u32)page, *cpu); 250 err = -EINVAL; 251 } 252 *cpu = 0; 253 drm_clflush_virt_range(cpu, sizeof(*cpu)); 254 kunmap(p); 255 if (err) 256 return err; 257 258 i915_vma_destroy(vma); 259 260 if (igt_timeout(end_time, 261 "%s: timed out after tiling=%d stride=%d\n", 262 __func__, tile->tiling, tile->stride)) 263 return -EINTR; 264 } 265 266 return 0; 267 } 268 269 static unsigned int 270 setup_tile_size(struct tile *tile, struct drm_i915_private *i915) 271 { 272 if (INTEL_GEN(i915) <= 2) { 273 tile->height = 16; 274 tile->width = 128; 275 tile->size = 11; 276 } else if (tile->tiling == I915_TILING_Y && 277 HAS_128_BYTE_Y_TILING(i915)) { 278 tile->height = 32; 279 tile->width = 128; 280 tile->size = 12; 281 } else { 282 tile->height = 8; 283 tile->width = 512; 284 tile->size = 12; 285 } 286 287 if (INTEL_GEN(i915) < 4) 288 return 8192 / tile->width; 289 else if (INTEL_GEN(i915) < 7) 290 return 128 * I965_FENCE_MAX_PITCH_VAL / tile->width; 291 else 292 return 128 * GEN7_FENCE_MAX_PITCH_VAL / tile->width; 293 } 294 295 static int igt_partial_tiling(void *arg) 296 { 297 const unsigned int nreal = 1 << 12; /* largest tile row x2 */ 298 struct drm_i915_private *i915 = arg; 299 struct drm_i915_gem_object *obj; 300 intel_wakeref_t wakeref; 301 int tiling; 302 int err; 303 304 /* We want to check the page mapping and fencing of a large object 305 * mmapped through the GTT. The object we create is larger than can 306 * possibly be mmaped as a whole, and so we must use partial GGTT vma. 307 * We then check that a write through each partial GGTT vma ends up 308 * in the right set of pages within the object, and with the expected 309 * tiling, which we verify by manual swizzling. 310 */ 311 312 obj = huge_gem_object(i915, 313 nreal << PAGE_SHIFT, 314 (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); 315 if (IS_ERR(obj)) 316 return PTR_ERR(obj); 317 318 err = i915_gem_object_pin_pages(obj); 319 if (err) { 320 pr_err("Failed to allocate %u pages (%lu total), err=%d\n", 321 nreal, obj->base.size / PAGE_SIZE, err); 322 goto out; 323 } 324 325 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 326 327 if (1) { 328 IGT_TIMEOUT(end); 329 struct tile tile; 330 331 tile.height = 1; 332 tile.width = 1; 333 tile.size = 0; 334 tile.stride = 0; 335 tile.swizzle = I915_BIT_6_SWIZZLE_NONE; 336 tile.tiling = I915_TILING_NONE; 337 338 err = check_partial_mappings(obj, &tile, end); 339 if (err && err != -EINTR) 340 goto out_unlock; 341 } 342 343 for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) { 344 IGT_TIMEOUT(end); 345 unsigned int max_pitch; 346 unsigned int pitch; 347 struct tile tile; 348 349 if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) 350 /* 351 * The swizzling pattern is actually unknown as it 352 * varies based on physical address of each page. 353 * See i915_gem_detect_bit_6_swizzle(). 354 */ 355 break; 356 357 tile.tiling = tiling; 358 switch (tiling) { 359 case I915_TILING_X: 360 tile.swizzle = i915->mm.bit_6_swizzle_x; 361 break; 362 case I915_TILING_Y: 363 tile.swizzle = i915->mm.bit_6_swizzle_y; 364 break; 365 } 366 367 GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN); 368 if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || 369 tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) 370 continue; 371 372 max_pitch = setup_tile_size(&tile, i915); 373 374 for (pitch = max_pitch; pitch; pitch >>= 1) { 375 tile.stride = tile.width * pitch; 376 err = check_partial_mappings(obj, &tile, end); 377 if (err == -EINTR) 378 goto next_tiling; 379 if (err) 380 goto out_unlock; 381 382 if (pitch > 2 && INTEL_GEN(i915) >= 4) { 383 tile.stride = tile.width * (pitch - 1); 384 err = check_partial_mappings(obj, &tile, end); 385 if (err == -EINTR) 386 goto next_tiling; 387 if (err) 388 goto out_unlock; 389 } 390 391 if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { 392 tile.stride = tile.width * (pitch + 1); 393 err = check_partial_mappings(obj, &tile, end); 394 if (err == -EINTR) 395 goto next_tiling; 396 if (err) 397 goto out_unlock; 398 } 399 } 400 401 if (INTEL_GEN(i915) >= 4) { 402 for_each_prime_number(pitch, max_pitch) { 403 tile.stride = tile.width * pitch; 404 err = check_partial_mappings(obj, &tile, end); 405 if (err == -EINTR) 406 goto next_tiling; 407 if (err) 408 goto out_unlock; 409 } 410 } 411 412 next_tiling: ; 413 } 414 415 out_unlock: 416 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 417 i915_gem_object_unpin_pages(obj); 418 out: 419 i915_gem_object_put(obj); 420 return err; 421 } 422 423 static int igt_smoke_tiling(void *arg) 424 { 425 const unsigned int nreal = 1 << 12; /* largest tile row x2 */ 426 struct drm_i915_private *i915 = arg; 427 struct drm_i915_gem_object *obj; 428 intel_wakeref_t wakeref; 429 I915_RND_STATE(prng); 430 unsigned long count; 431 IGT_TIMEOUT(end); 432 int err; 433 434 /* 435 * igt_partial_tiling() does an exhastive check of partial tiling 436 * chunking, but will undoubtably run out of time. Here, we do a 437 * randomised search and hope over many runs of 1s with different 438 * seeds we will do a thorough check. 439 * 440 * Remember to look at the st_seed if we see a flip-flop in BAT! 441 */ 442 443 if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) 444 return 0; 445 446 obj = huge_gem_object(i915, 447 nreal << PAGE_SHIFT, 448 (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); 449 if (IS_ERR(obj)) 450 return PTR_ERR(obj); 451 452 err = i915_gem_object_pin_pages(obj); 453 if (err) { 454 pr_err("Failed to allocate %u pages (%lu total), err=%d\n", 455 nreal, obj->base.size / PAGE_SIZE, err); 456 goto out; 457 } 458 459 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 460 461 count = 0; 462 do { 463 struct tile tile; 464 465 tile.tiling = 466 i915_prandom_u32_max_state(I915_TILING_Y + 1, &prng); 467 switch (tile.tiling) { 468 case I915_TILING_NONE: 469 tile.height = 1; 470 tile.width = 1; 471 tile.size = 0; 472 tile.stride = 0; 473 tile.swizzle = I915_BIT_6_SWIZZLE_NONE; 474 break; 475 476 case I915_TILING_X: 477 tile.swizzle = i915->mm.bit_6_swizzle_x; 478 break; 479 case I915_TILING_Y: 480 tile.swizzle = i915->mm.bit_6_swizzle_y; 481 break; 482 } 483 484 if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || 485 tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) 486 continue; 487 488 if (tile.tiling != I915_TILING_NONE) { 489 unsigned int max_pitch = setup_tile_size(&tile, i915); 490 491 tile.stride = 492 i915_prandom_u32_max_state(max_pitch, &prng); 493 tile.stride = (1 + tile.stride) * tile.width; 494 if (INTEL_GEN(i915) < 4) 495 tile.stride = rounddown_pow_of_two(tile.stride); 496 } 497 498 err = check_partial_mapping(obj, &tile, &prng); 499 if (err) 500 break; 501 502 count++; 503 } while (!__igt_timeout(end, NULL)); 504 505 pr_info("%s: Completed %lu trials\n", __func__, count); 506 507 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 508 i915_gem_object_unpin_pages(obj); 509 out: 510 i915_gem_object_put(obj); 511 return err; 512 } 513 514 static int make_obj_busy(struct drm_i915_gem_object *obj) 515 { 516 struct drm_i915_private *i915 = to_i915(obj->base.dev); 517 struct intel_engine_cs *engine; 518 enum intel_engine_id id; 519 struct i915_vma *vma; 520 int err; 521 522 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 523 if (IS_ERR(vma)) 524 return PTR_ERR(vma); 525 526 err = i915_vma_pin(vma, 0, 0, PIN_USER); 527 if (err) 528 return err; 529 530 for_each_engine(engine, i915, id) { 531 struct i915_request *rq; 532 533 rq = i915_request_create(engine->kernel_context); 534 if (IS_ERR(rq)) { 535 i915_vma_unpin(vma); 536 return PTR_ERR(rq); 537 } 538 539 i915_vma_lock(vma); 540 err = i915_request_await_object(rq, vma->obj, true); 541 if (err == 0) 542 err = i915_vma_move_to_active(vma, rq, 543 EXEC_OBJECT_WRITE); 544 i915_vma_unlock(vma); 545 546 i915_request_add(rq); 547 } 548 549 i915_vma_unpin(vma); 550 i915_gem_object_put(obj); /* leave it only alive via its active ref */ 551 552 return err; 553 } 554 555 static bool assert_mmap_offset(struct drm_i915_private *i915, 556 unsigned long size, 557 int expected) 558 { 559 struct drm_i915_gem_object *obj; 560 int err; 561 562 obj = i915_gem_object_create_internal(i915, size); 563 if (IS_ERR(obj)) 564 return PTR_ERR(obj); 565 566 err = create_mmap_offset(obj); 567 i915_gem_object_put(obj); 568 569 return err == expected; 570 } 571 572 static void disable_retire_worker(struct drm_i915_private *i915) 573 { 574 i915_gem_driver_unregister__shrinker(i915); 575 intel_gt_pm_get(&i915->gt); 576 cancel_delayed_work_sync(&i915->gt.requests.retire_work); 577 } 578 579 static void restore_retire_worker(struct drm_i915_private *i915) 580 { 581 igt_flush_test(i915); 582 intel_gt_pm_put(&i915->gt); 583 i915_gem_driver_register__shrinker(i915); 584 } 585 586 static void mmap_offset_lock(struct drm_i915_private *i915) 587 __acquires(&i915->drm.vma_offset_manager->vm_lock) 588 { 589 write_lock(&i915->drm.vma_offset_manager->vm_lock); 590 } 591 592 static void mmap_offset_unlock(struct drm_i915_private *i915) 593 __releases(&i915->drm.vma_offset_manager->vm_lock) 594 { 595 write_unlock(&i915->drm.vma_offset_manager->vm_lock); 596 } 597 598 static int igt_mmap_offset_exhaustion(void *arg) 599 { 600 struct drm_i915_private *i915 = arg; 601 struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; 602 struct drm_i915_gem_object *obj; 603 struct drm_mm_node resv, *hole; 604 u64 hole_start, hole_end; 605 int loop, err; 606 607 /* Disable background reaper */ 608 disable_retire_worker(i915); 609 GEM_BUG_ON(!i915->gt.awake); 610 611 /* Trim the device mmap space to only a page */ 612 memset(&resv, 0, sizeof(resv)); 613 drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { 614 resv.start = hole_start; 615 resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ 616 mmap_offset_lock(i915); 617 err = drm_mm_reserve_node(mm, &resv); 618 mmap_offset_unlock(i915); 619 if (err) { 620 pr_err("Failed to trim VMA manager, err=%d\n", err); 621 goto out_park; 622 } 623 break; 624 } 625 626 /* Just fits! */ 627 if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { 628 pr_err("Unable to insert object into single page hole\n"); 629 err = -EINVAL; 630 goto out; 631 } 632 633 /* Too large */ 634 if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) { 635 pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); 636 err = -EINVAL; 637 goto out; 638 } 639 640 /* Fill the hole, further allocation attempts should then fail */ 641 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 642 if (IS_ERR(obj)) { 643 err = PTR_ERR(obj); 644 goto out; 645 } 646 647 err = create_mmap_offset(obj); 648 if (err) { 649 pr_err("Unable to insert object into reclaimed hole\n"); 650 goto err_obj; 651 } 652 653 if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { 654 pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); 655 err = -EINVAL; 656 goto err_obj; 657 } 658 659 i915_gem_object_put(obj); 660 661 /* Now fill with busy dead objects that we expect to reap */ 662 for (loop = 0; loop < 3; loop++) { 663 if (intel_gt_is_wedged(&i915->gt)) 664 break; 665 666 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 667 if (IS_ERR(obj)) { 668 err = PTR_ERR(obj); 669 goto out; 670 } 671 672 err = make_obj_busy(obj); 673 if (err) { 674 pr_err("[loop %d] Failed to busy the object\n", loop); 675 goto err_obj; 676 } 677 } 678 679 out: 680 mmap_offset_lock(i915); 681 drm_mm_remove_node(&resv); 682 mmap_offset_unlock(i915); 683 out_park: 684 restore_retire_worker(i915); 685 return err; 686 err_obj: 687 i915_gem_object_put(obj); 688 goto out; 689 } 690 691 int i915_gem_mman_live_selftests(struct drm_i915_private *i915) 692 { 693 static const struct i915_subtest tests[] = { 694 SUBTEST(igt_partial_tiling), 695 SUBTEST(igt_smoke_tiling), 696 SUBTEST(igt_mmap_offset_exhaustion), 697 }; 698 699 return i915_subtests(tests, i915); 700 } 701