1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "i915_selftest.h" 7 8 #include "gt/intel_engine_user.h" 9 #include "gt/intel_gt.h" 10 #include "gt/intel_gpu_commands.h" 11 #include "gem/i915_gem_lmem.h" 12 13 #include "selftests/igt_flush_test.h" 14 #include "selftests/mock_drm.h" 15 #include "selftests/i915_random.h" 16 #include "huge_gem_object.h" 17 #include "mock_context.h" 18 19 static int __igt_client_fill(struct intel_engine_cs *engine) 20 { 21 struct intel_context *ce = engine->kernel_context; 22 struct drm_i915_gem_object *obj; 23 I915_RND_STATE(prng); 24 IGT_TIMEOUT(end); 25 u32 *vaddr; 26 int err = 0; 27 28 intel_engine_pm_get(engine); 29 do { 30 const u32 max_block_size = S16_MAX * PAGE_SIZE; 31 u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); 32 u32 phys_sz = sz % (max_block_size + 1); 33 u32 val = prandom_u32_state(&prng); 34 u32 i; 35 36 sz = round_up(sz, PAGE_SIZE); 37 phys_sz = round_up(phys_sz, PAGE_SIZE); 38 39 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, 40 phys_sz, sz, val); 41 42 obj = huge_gem_object(engine->i915, phys_sz, sz); 43 if (IS_ERR(obj)) { 44 err = PTR_ERR(obj); 45 goto err_flush; 46 } 47 48 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 49 if (IS_ERR(vaddr)) { 50 err = PTR_ERR(vaddr); 51 goto err_put; 52 } 53 54 /* 55 * XXX: The goal is move this to get_pages, so try to dirty the 56 * CPU cache first to check that we do the required clflush 57 * before scheduling the blt for !llc platforms. This matches 58 * some version of reality where at get_pages the pages 59 * themselves may not yet be coherent with the GPU(swap-in). If 60 * we are missing the flush then we should see the stale cache 61 * values after we do the set_to_cpu_domain and pick it up as a 62 * test failure. 63 */ 64 memset32(vaddr, val ^ 0xdeadbeaf, 65 huge_gem_object_phys_size(obj) / sizeof(u32)); 66 67 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 68 obj->cache_dirty = true; 69 70 err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages, 71 &obj->mm.page_sizes, 72 val); 73 if (err) 74 goto err_unpin; 75 76 i915_gem_object_lock(obj, NULL); 77 err = i915_gem_object_set_to_cpu_domain(obj, false); 78 i915_gem_object_unlock(obj); 79 if (err) 80 goto err_unpin; 81 82 for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { 83 if (vaddr[i] != val) { 84 pr_err("vaddr[%u]=%x, expected=%x\n", i, 85 vaddr[i], val); 86 err = -EINVAL; 87 goto err_unpin; 88 } 89 } 90 91 i915_gem_object_unpin_map(obj); 92 i915_gem_object_put(obj); 93 } while (!time_after(jiffies, end)); 94 95 goto err_flush; 96 97 err_unpin: 98 i915_gem_object_unpin_map(obj); 99 err_put: 100 i915_gem_object_put(obj); 101 err_flush: 102 if (err == -ENOMEM) 103 err = 0; 104 intel_engine_pm_put(engine); 105 106 return err; 107 } 108 109 static int igt_client_fill(void *arg) 110 { 111 int inst = 0; 112 113 do { 114 struct intel_engine_cs *engine; 115 int err; 116 117 engine = intel_engine_lookup_user(arg, 118 I915_ENGINE_CLASS_COPY, 119 inst++); 120 if (!engine) 121 return 0; 122 123 err = __igt_client_fill(engine); 124 if (err == -ENOMEM) 125 err = 0; 126 if (err) 127 return err; 128 } while (1); 129 } 130 131 #define WIDTH 512 132 #define HEIGHT 32 133 134 struct blit_buffer { 135 struct i915_vma *vma; 136 u32 start_val; 137 u32 tiling; 138 }; 139 140 struct tiled_blits { 141 struct intel_context *ce; 142 struct blit_buffer buffers[3]; 143 struct blit_buffer scratch; 144 struct i915_vma *batch; 145 u64 hole; 146 u32 width; 147 u32 height; 148 }; 149 150 static int prepare_blit(const struct tiled_blits *t, 151 struct blit_buffer *dst, 152 struct blit_buffer *src, 153 struct drm_i915_gem_object *batch) 154 { 155 const int gen = INTEL_GEN(to_i915(batch->base.dev)); 156 bool use_64b_reloc = gen >= 8; 157 u32 src_pitch, dst_pitch; 158 u32 cmd, *cs; 159 160 cs = i915_gem_object_pin_map(batch, I915_MAP_WC); 161 if (IS_ERR(cs)) 162 return PTR_ERR(cs); 163 164 *cs++ = MI_LOAD_REGISTER_IMM(1); 165 *cs++ = i915_mmio_reg_offset(BCS_SWCTRL); 166 cmd = (BCS_SRC_Y | BCS_DST_Y) << 16; 167 if (src->tiling == I915_TILING_Y) 168 cmd |= BCS_SRC_Y; 169 if (dst->tiling == I915_TILING_Y) 170 cmd |= BCS_DST_Y; 171 *cs++ = cmd; 172 173 cmd = MI_FLUSH_DW; 174 if (gen >= 8) 175 cmd++; 176 *cs++ = cmd; 177 *cs++ = 0; 178 *cs++ = 0; 179 *cs++ = 0; 180 181 cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2); 182 if (gen >= 8) 183 cmd += 2; 184 185 src_pitch = t->width * 4; 186 if (src->tiling) { 187 cmd |= XY_SRC_COPY_BLT_SRC_TILED; 188 src_pitch /= 4; 189 } 190 191 dst_pitch = t->width * 4; 192 if (dst->tiling) { 193 cmd |= XY_SRC_COPY_BLT_DST_TILED; 194 dst_pitch /= 4; 195 } 196 197 *cs++ = cmd; 198 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch; 199 *cs++ = 0; 200 *cs++ = t->height << 16 | t->width; 201 *cs++ = lower_32_bits(dst->vma->node.start); 202 if (use_64b_reloc) 203 *cs++ = upper_32_bits(dst->vma->node.start); 204 *cs++ = 0; 205 *cs++ = src_pitch; 206 *cs++ = lower_32_bits(src->vma->node.start); 207 if (use_64b_reloc) 208 *cs++ = upper_32_bits(src->vma->node.start); 209 210 *cs++ = MI_BATCH_BUFFER_END; 211 212 i915_gem_object_flush_map(batch); 213 i915_gem_object_unpin_map(batch); 214 215 return 0; 216 } 217 218 static void tiled_blits_destroy_buffers(struct tiled_blits *t) 219 { 220 int i; 221 222 for (i = 0; i < ARRAY_SIZE(t->buffers); i++) 223 i915_vma_put(t->buffers[i].vma); 224 225 i915_vma_put(t->scratch.vma); 226 i915_vma_put(t->batch); 227 } 228 229 static struct i915_vma * 230 __create_vma(struct tiled_blits *t, size_t size, bool lmem) 231 { 232 struct drm_i915_private *i915 = t->ce->vm->i915; 233 struct drm_i915_gem_object *obj; 234 struct i915_vma *vma; 235 236 if (lmem) 237 obj = i915_gem_object_create_lmem(i915, size, 0); 238 else 239 obj = i915_gem_object_create_shmem(i915, size); 240 if (IS_ERR(obj)) 241 return ERR_CAST(obj); 242 243 vma = i915_vma_instance(obj, t->ce->vm, NULL); 244 if (IS_ERR(vma)) 245 i915_gem_object_put(obj); 246 247 return vma; 248 } 249 250 static struct i915_vma *create_vma(struct tiled_blits *t, bool lmem) 251 { 252 return __create_vma(t, PAGE_ALIGN(t->width * t->height * 4), lmem); 253 } 254 255 static int tiled_blits_create_buffers(struct tiled_blits *t, 256 int width, int height, 257 struct rnd_state *prng) 258 { 259 struct drm_i915_private *i915 = t->ce->engine->i915; 260 int i; 261 262 t->width = width; 263 t->height = height; 264 265 t->batch = __create_vma(t, PAGE_SIZE, false); 266 if (IS_ERR(t->batch)) 267 return PTR_ERR(t->batch); 268 269 t->scratch.vma = create_vma(t, false); 270 if (IS_ERR(t->scratch.vma)) { 271 i915_vma_put(t->batch); 272 return PTR_ERR(t->scratch.vma); 273 } 274 275 for (i = 0; i < ARRAY_SIZE(t->buffers); i++) { 276 struct i915_vma *vma; 277 278 vma = create_vma(t, HAS_LMEM(i915) && i % 2); 279 if (IS_ERR(vma)) { 280 tiled_blits_destroy_buffers(t); 281 return PTR_ERR(vma); 282 } 283 284 t->buffers[i].vma = vma; 285 t->buffers[i].tiling = 286 i915_prandom_u32_max_state(I915_TILING_Y + 1, prng); 287 } 288 289 return 0; 290 } 291 292 static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val) 293 { 294 int i; 295 296 t->scratch.start_val = val; 297 for (i = 0; i < t->width * t->height; i++) 298 vaddr[i] = val++; 299 300 i915_gem_object_flush_map(t->scratch.vma->obj); 301 } 302 303 static u64 swizzle_bit(unsigned int bit, u64 offset) 304 { 305 return (offset & BIT_ULL(bit)) >> (bit - 6); 306 } 307 308 static u64 tiled_offset(const struct intel_gt *gt, 309 u64 v, 310 unsigned int stride, 311 unsigned int tiling) 312 { 313 unsigned int swizzle; 314 u64 x, y; 315 316 if (tiling == I915_TILING_NONE) 317 return v; 318 319 y = div64_u64_rem(v, stride, &x); 320 321 if (tiling == I915_TILING_X) { 322 v = div64_u64_rem(y, 8, &y) * stride * 8; 323 v += y * 512; 324 v += div64_u64_rem(x, 512, &x) << 12; 325 v += x; 326 327 swizzle = gt->ggtt->bit_6_swizzle_x; 328 } else { 329 const unsigned int ytile_span = 16; 330 const unsigned int ytile_height = 512; 331 332 v = div64_u64_rem(y, 32, &y) * stride * 32; 333 v += y * ytile_span; 334 v += div64_u64_rem(x, ytile_span, &x) * ytile_height; 335 v += x; 336 337 swizzle = gt->ggtt->bit_6_swizzle_y; 338 } 339 340 switch (swizzle) { 341 case I915_BIT_6_SWIZZLE_9: 342 v ^= swizzle_bit(9, v); 343 break; 344 case I915_BIT_6_SWIZZLE_9_10: 345 v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); 346 break; 347 case I915_BIT_6_SWIZZLE_9_11: 348 v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); 349 break; 350 case I915_BIT_6_SWIZZLE_9_10_11: 351 v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); 352 break; 353 } 354 355 return v; 356 } 357 358 static const char *repr_tiling(int tiling) 359 { 360 switch (tiling) { 361 case I915_TILING_NONE: return "linear"; 362 case I915_TILING_X: return "X"; 363 case I915_TILING_Y: return "Y"; 364 default: return "unknown"; 365 } 366 } 367 368 static int verify_buffer(const struct tiled_blits *t, 369 struct blit_buffer *buf, 370 struct rnd_state *prng) 371 { 372 const u32 *vaddr; 373 int ret = 0; 374 int x, y, p; 375 376 x = i915_prandom_u32_max_state(t->width, prng); 377 y = i915_prandom_u32_max_state(t->height, prng); 378 p = y * t->width + x; 379 380 vaddr = i915_gem_object_pin_map(buf->vma->obj, I915_MAP_WC); 381 if (IS_ERR(vaddr)) 382 return PTR_ERR(vaddr); 383 384 if (vaddr[0] != buf->start_val) { 385 ret = -EINVAL; 386 } else { 387 u64 v = tiled_offset(buf->vma->vm->gt, 388 p * 4, t->width * 4, 389 buf->tiling); 390 391 if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p) 392 ret = -EINVAL; 393 } 394 if (ret) { 395 pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n", 396 repr_tiling(buf->tiling), 397 x, y, buf->start_val); 398 igt_hexdump(vaddr, 4096); 399 } 400 401 i915_gem_object_unpin_map(buf->vma->obj); 402 return ret; 403 } 404 405 static int move_to_active(struct i915_vma *vma, 406 struct i915_request *rq, 407 unsigned int flags) 408 { 409 int err; 410 411 i915_vma_lock(vma); 412 err = i915_request_await_object(rq, vma->obj, false); 413 if (err == 0) 414 err = i915_vma_move_to_active(vma, rq, flags); 415 i915_vma_unlock(vma); 416 417 return err; 418 } 419 420 static int pin_buffer(struct i915_vma *vma, u64 addr) 421 { 422 int err; 423 424 if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) { 425 err = i915_vma_unbind(vma); 426 if (err) 427 return err; 428 } 429 430 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED | addr); 431 if (err) 432 return err; 433 434 return 0; 435 } 436 437 static int 438 tiled_blit(struct tiled_blits *t, 439 struct blit_buffer *dst, u64 dst_addr, 440 struct blit_buffer *src, u64 src_addr) 441 { 442 struct i915_request *rq; 443 int err; 444 445 err = pin_buffer(src->vma, src_addr); 446 if (err) { 447 pr_err("Cannot pin src @ %llx\n", src_addr); 448 return err; 449 } 450 451 err = pin_buffer(dst->vma, dst_addr); 452 if (err) { 453 pr_err("Cannot pin dst @ %llx\n", dst_addr); 454 goto err_src; 455 } 456 457 err = i915_vma_pin(t->batch, 0, 0, PIN_USER | PIN_HIGH); 458 if (err) { 459 pr_err("cannot pin batch\n"); 460 goto err_dst; 461 } 462 463 err = prepare_blit(t, dst, src, t->batch->obj); 464 if (err) 465 goto err_bb; 466 467 rq = intel_context_create_request(t->ce); 468 if (IS_ERR(rq)) { 469 err = PTR_ERR(rq); 470 goto err_bb; 471 } 472 473 err = move_to_active(t->batch, rq, 0); 474 if (!err) 475 err = move_to_active(src->vma, rq, 0); 476 if (!err) 477 err = move_to_active(dst->vma, rq, 0); 478 if (!err) 479 err = rq->engine->emit_bb_start(rq, 480 t->batch->node.start, 481 t->batch->node.size, 482 0); 483 i915_request_get(rq); 484 i915_request_add(rq); 485 if (i915_request_wait(rq, 0, HZ / 2) < 0) 486 err = -ETIME; 487 i915_request_put(rq); 488 489 dst->start_val = src->start_val; 490 err_bb: 491 i915_vma_unpin(t->batch); 492 err_dst: 493 i915_vma_unpin(dst->vma); 494 err_src: 495 i915_vma_unpin(src->vma); 496 return err; 497 } 498 499 static struct tiled_blits * 500 tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) 501 { 502 struct drm_mm_node hole; 503 struct tiled_blits *t; 504 u64 hole_size; 505 int err; 506 507 t = kzalloc(sizeof(*t), GFP_KERNEL); 508 if (!t) 509 return ERR_PTR(-ENOMEM); 510 511 t->ce = intel_context_create(engine); 512 if (IS_ERR(t->ce)) { 513 err = PTR_ERR(t->ce); 514 goto err_free; 515 } 516 517 hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); 518 hole_size *= 2; /* room to maneuver */ 519 hole_size += 2 * I915_GTT_MIN_ALIGNMENT; 520 521 mutex_lock(&t->ce->vm->mutex); 522 memset(&hole, 0, sizeof(hole)); 523 err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, 524 hole_size, 0, I915_COLOR_UNEVICTABLE, 525 0, U64_MAX, 526 DRM_MM_INSERT_BEST); 527 if (!err) 528 drm_mm_remove_node(&hole); 529 mutex_unlock(&t->ce->vm->mutex); 530 if (err) { 531 err = -ENODEV; 532 goto err_put; 533 } 534 535 t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; 536 pr_info("Using hole at %llx\n", t->hole); 537 538 err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); 539 if (err) 540 goto err_put; 541 542 return t; 543 544 err_put: 545 intel_context_put(t->ce); 546 err_free: 547 kfree(t); 548 return ERR_PTR(err); 549 } 550 551 static void tiled_blits_destroy(struct tiled_blits *t) 552 { 553 tiled_blits_destroy_buffers(t); 554 555 intel_context_put(t->ce); 556 kfree(t); 557 } 558 559 static int tiled_blits_prepare(struct tiled_blits *t, 560 struct rnd_state *prng) 561 { 562 u64 offset = PAGE_ALIGN(t->width * t->height * 4); 563 u32 *map; 564 int err; 565 int i; 566 567 map = i915_gem_object_pin_map(t->scratch.vma->obj, I915_MAP_WC); 568 if (IS_ERR(map)) 569 return PTR_ERR(map); 570 571 /* Use scratch to fill objects */ 572 for (i = 0; i < ARRAY_SIZE(t->buffers); i++) { 573 fill_scratch(t, map, prandom_u32_state(prng)); 574 GEM_BUG_ON(verify_buffer(t, &t->scratch, prng)); 575 576 err = tiled_blit(t, 577 &t->buffers[i], t->hole + offset, 578 &t->scratch, t->hole); 579 if (err == 0) 580 err = verify_buffer(t, &t->buffers[i], prng); 581 if (err) { 582 pr_err("Failed to create buffer %d\n", i); 583 break; 584 } 585 } 586 587 i915_gem_object_unpin_map(t->scratch.vma->obj); 588 return err; 589 } 590 591 static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) 592 { 593 u64 offset = 594 round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); 595 int err; 596 597 /* We want to check position invariant tiling across GTT eviction */ 598 599 err = tiled_blit(t, 600 &t->buffers[1], t->hole + offset / 2, 601 &t->buffers[0], t->hole + 2 * offset); 602 if (err) 603 return err; 604 605 /* Reposition so that we overlap the old addresses, and slightly off */ 606 err = tiled_blit(t, 607 &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, 608 &t->buffers[1], t->hole + 3 * offset / 2); 609 if (err) 610 return err; 611 612 err = verify_buffer(t, &t->buffers[2], prng); 613 if (err) 614 return err; 615 616 return 0; 617 } 618 619 static int __igt_client_tiled_blits(struct intel_engine_cs *engine, 620 struct rnd_state *prng) 621 { 622 struct tiled_blits *t; 623 int err; 624 625 t = tiled_blits_create(engine, prng); 626 if (IS_ERR(t)) 627 return PTR_ERR(t); 628 629 err = tiled_blits_prepare(t, prng); 630 if (err) 631 goto out; 632 633 err = tiled_blits_bounce(t, prng); 634 if (err) 635 goto out; 636 637 out: 638 tiled_blits_destroy(t); 639 return err; 640 } 641 642 static bool has_bit17_swizzle(int sw) 643 { 644 return (sw == I915_BIT_6_SWIZZLE_9_10_17 || 645 sw == I915_BIT_6_SWIZZLE_9_17); 646 } 647 648 static bool bad_swizzling(struct drm_i915_private *i915) 649 { 650 struct i915_ggtt *ggtt = &i915->ggtt; 651 652 if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) 653 return true; 654 655 if (has_bit17_swizzle(ggtt->bit_6_swizzle_x) || 656 has_bit17_swizzle(ggtt->bit_6_swizzle_y)) 657 return true; 658 659 return false; 660 } 661 662 static int igt_client_tiled_blits(void *arg) 663 { 664 struct drm_i915_private *i915 = arg; 665 I915_RND_STATE(prng); 666 int inst = 0; 667 668 /* Test requires explicit BLT tiling controls */ 669 if (INTEL_GEN(i915) < 4) 670 return 0; 671 672 if (bad_swizzling(i915)) /* Requires sane (sub-page) swizzling */ 673 return 0; 674 675 do { 676 struct intel_engine_cs *engine; 677 int err; 678 679 engine = intel_engine_lookup_user(i915, 680 I915_ENGINE_CLASS_COPY, 681 inst++); 682 if (!engine) 683 return 0; 684 685 err = __igt_client_tiled_blits(engine, &prng); 686 if (err == -ENODEV) 687 err = 0; 688 if (err) 689 return err; 690 } while (1); 691 } 692 693 int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) 694 { 695 static const struct i915_subtest tests[] = { 696 SUBTEST(igt_client_fill), 697 SUBTEST(igt_client_tiled_blits), 698 }; 699 700 if (intel_gt_is_wedged(&i915->gt)) 701 return 0; 702 703 return i915_live_subtests(tests, i915); 704 } 705