1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "i915_selftest.h" 7 8 #include "gt/intel_engine_user.h" 9 #include "gt/intel_gt.h" 10 #include "gt/intel_gpu_commands.h" 11 #include "gem/i915_gem_lmem.h" 12 13 #include "selftests/igt_flush_test.h" 14 #include "selftests/mock_drm.h" 15 #include "selftests/i915_random.h" 16 #include "huge_gem_object.h" 17 #include "mock_context.h" 18 19 static int __igt_client_fill(struct intel_engine_cs *engine) 20 { 21 struct intel_context *ce = engine->kernel_context; 22 struct drm_i915_gem_object *obj; 23 struct rnd_state prng; 24 IGT_TIMEOUT(end); 25 u32 *vaddr; 26 int err = 0; 27 28 prandom_seed_state(&prng, i915_selftest.random_seed); 29 30 intel_engine_pm_get(engine); 31 do { 32 const u32 max_block_size = S16_MAX * PAGE_SIZE; 33 u32 sz = min_t(u64, ce->vm->total >> 4, prandom_u32_state(&prng)); 34 u32 phys_sz = sz % (max_block_size + 1); 35 u32 val = prandom_u32_state(&prng); 36 u32 i; 37 38 sz = round_up(sz, PAGE_SIZE); 39 phys_sz = round_up(phys_sz, PAGE_SIZE); 40 41 pr_debug("%s with phys_sz= %x, sz=%x, val=%x\n", __func__, 42 phys_sz, sz, val); 43 44 obj = huge_gem_object(engine->i915, phys_sz, sz); 45 if (IS_ERR(obj)) { 46 err = PTR_ERR(obj); 47 goto err_flush; 48 } 49 50 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 51 if (IS_ERR(vaddr)) { 52 err = PTR_ERR(vaddr); 53 goto err_put; 54 } 55 56 /* 57 * XXX: The goal is move this to get_pages, so try to dirty the 58 * CPU cache first to check that we do the required clflush 59 * before scheduling the blt for !llc platforms. This matches 60 * some version of reality where at get_pages the pages 61 * themselves may not yet be coherent with the GPU(swap-in). If 62 * we are missing the flush then we should see the stale cache 63 * values after we do the set_to_cpu_domain and pick it up as a 64 * test failure. 65 */ 66 memset32(vaddr, val ^ 0xdeadbeaf, 67 huge_gem_object_phys_size(obj) / sizeof(u32)); 68 69 if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) 70 obj->cache_dirty = true; 71 72 err = i915_gem_schedule_fill_pages_blt(obj, ce, obj->mm.pages, 73 &obj->mm.page_sizes, 74 val); 75 if (err) 76 goto err_unpin; 77 78 i915_gem_object_lock(obj); 79 err = i915_gem_object_set_to_cpu_domain(obj, false); 80 i915_gem_object_unlock(obj); 81 if (err) 82 goto err_unpin; 83 84 for (i = 0; i < huge_gem_object_phys_size(obj) / sizeof(u32); ++i) { 85 if (vaddr[i] != val) { 86 pr_err("vaddr[%u]=%x, expected=%x\n", i, 87 vaddr[i], val); 88 err = -EINVAL; 89 goto err_unpin; 90 } 91 } 92 93 i915_gem_object_unpin_map(obj); 94 i915_gem_object_put(obj); 95 } while (!time_after(jiffies, end)); 96 97 goto err_flush; 98 99 err_unpin: 100 i915_gem_object_unpin_map(obj); 101 err_put: 102 i915_gem_object_put(obj); 103 err_flush: 104 if (err == -ENOMEM) 105 err = 0; 106 intel_engine_pm_put(engine); 107 108 return err; 109 } 110 111 static int igt_client_fill(void *arg) 112 { 113 int inst = 0; 114 115 do { 116 struct intel_engine_cs *engine; 117 int err; 118 119 engine = intel_engine_lookup_user(arg, 120 I915_ENGINE_CLASS_COPY, 121 inst++); 122 if (!engine) 123 return 0; 124 125 err = __igt_client_fill(engine); 126 if (err == -ENOMEM) 127 err = 0; 128 if (err) 129 return err; 130 } while (1); 131 } 132 133 #define WIDTH 512 134 #define HEIGHT 32 135 136 struct blit_buffer { 137 struct i915_vma *vma; 138 u32 start_val; 139 u32 tiling; 140 }; 141 142 struct tiled_blits { 143 struct intel_context *ce; 144 struct blit_buffer buffers[3]; 145 struct blit_buffer scratch; 146 struct i915_vma *batch; 147 u64 hole; 148 u32 width; 149 u32 height; 150 }; 151 152 static int prepare_blit(const struct tiled_blits *t, 153 struct blit_buffer *dst, 154 struct blit_buffer *src, 155 struct drm_i915_gem_object *batch) 156 { 157 const int gen = INTEL_GEN(to_i915(batch->base.dev)); 158 bool use_64b_reloc = gen >= 8; 159 u32 src_pitch, dst_pitch; 160 u32 cmd, *cs; 161 162 cs = i915_gem_object_pin_map(batch, I915_MAP_WC); 163 if (IS_ERR(cs)) 164 return PTR_ERR(cs); 165 166 *cs++ = MI_LOAD_REGISTER_IMM(1); 167 *cs++ = i915_mmio_reg_offset(BCS_SWCTRL); 168 cmd = (BCS_SRC_Y | BCS_DST_Y) << 16; 169 if (src->tiling == I915_TILING_Y) 170 cmd |= BCS_SRC_Y; 171 if (dst->tiling == I915_TILING_Y) 172 cmd |= BCS_DST_Y; 173 *cs++ = cmd; 174 175 cmd = MI_FLUSH_DW; 176 if (gen >= 8) 177 cmd++; 178 *cs++ = cmd; 179 *cs++ = 0; 180 *cs++ = 0; 181 *cs++ = 0; 182 183 cmd = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (8 - 2); 184 if (gen >= 8) 185 cmd += 2; 186 187 src_pitch = t->width * 4; 188 if (src->tiling) { 189 cmd |= XY_SRC_COPY_BLT_SRC_TILED; 190 src_pitch /= 4; 191 } 192 193 dst_pitch = t->width * 4; 194 if (dst->tiling) { 195 cmd |= XY_SRC_COPY_BLT_DST_TILED; 196 dst_pitch /= 4; 197 } 198 199 *cs++ = cmd; 200 *cs++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | dst_pitch; 201 *cs++ = 0; 202 *cs++ = t->height << 16 | t->width; 203 *cs++ = lower_32_bits(dst->vma->node.start); 204 if (use_64b_reloc) 205 *cs++ = upper_32_bits(dst->vma->node.start); 206 *cs++ = 0; 207 *cs++ = src_pitch; 208 *cs++ = lower_32_bits(src->vma->node.start); 209 if (use_64b_reloc) 210 *cs++ = upper_32_bits(src->vma->node.start); 211 212 *cs++ = MI_BATCH_BUFFER_END; 213 214 i915_gem_object_flush_map(batch); 215 i915_gem_object_unpin_map(batch); 216 217 return 0; 218 } 219 220 static void tiled_blits_destroy_buffers(struct tiled_blits *t) 221 { 222 int i; 223 224 for (i = 0; i < ARRAY_SIZE(t->buffers); i++) 225 i915_vma_put(t->buffers[i].vma); 226 227 i915_vma_put(t->scratch.vma); 228 i915_vma_put(t->batch); 229 } 230 231 static struct i915_vma * 232 __create_vma(struct tiled_blits *t, size_t size, bool lmem) 233 { 234 struct drm_i915_private *i915 = t->ce->vm->i915; 235 struct drm_i915_gem_object *obj; 236 struct i915_vma *vma; 237 238 if (lmem) 239 obj = i915_gem_object_create_lmem(i915, size, 0); 240 else 241 obj = i915_gem_object_create_shmem(i915, size); 242 if (IS_ERR(obj)) 243 return ERR_CAST(obj); 244 245 vma = i915_vma_instance(obj, t->ce->vm, NULL); 246 if (IS_ERR(vma)) 247 i915_gem_object_put(obj); 248 249 return vma; 250 } 251 252 static struct i915_vma *create_vma(struct tiled_blits *t, bool lmem) 253 { 254 return __create_vma(t, PAGE_ALIGN(t->width * t->height * 4), lmem); 255 } 256 257 static int tiled_blits_create_buffers(struct tiled_blits *t, 258 int width, int height, 259 struct rnd_state *prng) 260 { 261 struct drm_i915_private *i915 = t->ce->engine->i915; 262 int i; 263 264 t->width = width; 265 t->height = height; 266 267 t->batch = __create_vma(t, PAGE_SIZE, false); 268 if (IS_ERR(t->batch)) 269 return PTR_ERR(t->batch); 270 271 t->scratch.vma = create_vma(t, false); 272 if (IS_ERR(t->scratch.vma)) { 273 i915_vma_put(t->batch); 274 return PTR_ERR(t->scratch.vma); 275 } 276 277 for (i = 0; i < ARRAY_SIZE(t->buffers); i++) { 278 struct i915_vma *vma; 279 280 vma = create_vma(t, HAS_LMEM(i915) && i % 2); 281 if (IS_ERR(vma)) { 282 tiled_blits_destroy_buffers(t); 283 return PTR_ERR(vma); 284 } 285 286 t->buffers[i].vma = vma; 287 t->buffers[i].tiling = 288 i915_prandom_u32_max_state(I915_TILING_Y + 1, prng); 289 } 290 291 return 0; 292 } 293 294 static void fill_scratch(struct tiled_blits *t, u32 *vaddr, u32 val) 295 { 296 int i; 297 298 t->scratch.start_val = val; 299 for (i = 0; i < t->width * t->height; i++) 300 vaddr[i] = val++; 301 302 i915_gem_object_flush_map(t->scratch.vma->obj); 303 } 304 305 static u64 swizzle_bit(unsigned int bit, u64 offset) 306 { 307 return (offset & BIT_ULL(bit)) >> (bit - 6); 308 } 309 310 static u64 tiled_offset(const struct intel_gt *gt, 311 u64 v, 312 unsigned int stride, 313 unsigned int tiling) 314 { 315 unsigned int swizzle; 316 u64 x, y; 317 318 if (tiling == I915_TILING_NONE) 319 return v; 320 321 y = div64_u64_rem(v, stride, &x); 322 323 if (tiling == I915_TILING_X) { 324 v = div64_u64_rem(y, 8, &y) * stride * 8; 325 v += y * 512; 326 v += div64_u64_rem(x, 512, &x) << 12; 327 v += x; 328 329 swizzle = gt->ggtt->bit_6_swizzle_x; 330 } else { 331 const unsigned int ytile_span = 16; 332 const unsigned int ytile_height = 512; 333 334 v = div64_u64_rem(y, 32, &y) * stride * 32; 335 v += y * ytile_span; 336 v += div64_u64_rem(x, ytile_span, &x) * ytile_height; 337 v += x; 338 339 swizzle = gt->ggtt->bit_6_swizzle_y; 340 } 341 342 switch (swizzle) { 343 case I915_BIT_6_SWIZZLE_9: 344 v ^= swizzle_bit(9, v); 345 break; 346 case I915_BIT_6_SWIZZLE_9_10: 347 v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); 348 break; 349 case I915_BIT_6_SWIZZLE_9_11: 350 v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); 351 break; 352 case I915_BIT_6_SWIZZLE_9_10_11: 353 v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); 354 break; 355 } 356 357 return v; 358 } 359 360 static const char *repr_tiling(int tiling) 361 { 362 switch (tiling) { 363 case I915_TILING_NONE: return "linear"; 364 case I915_TILING_X: return "X"; 365 case I915_TILING_Y: return "Y"; 366 default: return "unknown"; 367 } 368 } 369 370 static int verify_buffer(const struct tiled_blits *t, 371 struct blit_buffer *buf, 372 struct rnd_state *prng) 373 { 374 const u32 *vaddr; 375 int ret = 0; 376 int x, y, p; 377 378 x = i915_prandom_u32_max_state(t->width, prng); 379 y = i915_prandom_u32_max_state(t->height, prng); 380 p = y * t->width + x; 381 382 vaddr = i915_gem_object_pin_map(buf->vma->obj, I915_MAP_WC); 383 if (IS_ERR(vaddr)) 384 return PTR_ERR(vaddr); 385 386 if (vaddr[0] != buf->start_val) { 387 ret = -EINVAL; 388 } else { 389 u64 v = tiled_offset(buf->vma->vm->gt, 390 p * 4, t->width * 4, 391 buf->tiling); 392 393 if (vaddr[v / sizeof(*vaddr)] != buf->start_val + p) 394 ret = -EINVAL; 395 } 396 if (ret) { 397 pr_err("Invalid %s tiling detected at (%d, %d), start_val %x\n", 398 repr_tiling(buf->tiling), 399 x, y, buf->start_val); 400 igt_hexdump(vaddr, 4096); 401 } 402 403 i915_gem_object_unpin_map(buf->vma->obj); 404 return ret; 405 } 406 407 static int move_to_active(struct i915_vma *vma, 408 struct i915_request *rq, 409 unsigned int flags) 410 { 411 int err; 412 413 i915_vma_lock(vma); 414 err = i915_request_await_object(rq, vma->obj, false); 415 if (err == 0) 416 err = i915_vma_move_to_active(vma, rq, flags); 417 i915_vma_unlock(vma); 418 419 return err; 420 } 421 422 static int pin_buffer(struct i915_vma *vma, u64 addr) 423 { 424 int err; 425 426 if (drm_mm_node_allocated(&vma->node) && vma->node.start != addr) { 427 err = i915_vma_unbind(vma); 428 if (err) 429 return err; 430 } 431 432 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED | addr); 433 if (err) 434 return err; 435 436 return 0; 437 } 438 439 static int 440 tiled_blit(struct tiled_blits *t, 441 struct blit_buffer *dst, u64 dst_addr, 442 struct blit_buffer *src, u64 src_addr) 443 { 444 struct i915_request *rq; 445 int err; 446 447 err = pin_buffer(src->vma, src_addr); 448 if (err) { 449 pr_err("Cannot pin src @ %llx\n", src_addr); 450 return err; 451 } 452 453 err = pin_buffer(dst->vma, dst_addr); 454 if (err) { 455 pr_err("Cannot pin dst @ %llx\n", dst_addr); 456 goto err_src; 457 } 458 459 err = i915_vma_pin(t->batch, 0, 0, PIN_USER | PIN_HIGH); 460 if (err) { 461 pr_err("cannot pin batch\n"); 462 goto err_dst; 463 } 464 465 err = prepare_blit(t, dst, src, t->batch->obj); 466 if (err) 467 goto err_bb; 468 469 rq = intel_context_create_request(t->ce); 470 if (IS_ERR(rq)) { 471 err = PTR_ERR(rq); 472 goto err_bb; 473 } 474 475 err = move_to_active(t->batch, rq, 0); 476 if (!err) 477 err = move_to_active(src->vma, rq, 0); 478 if (!err) 479 err = move_to_active(dst->vma, rq, 0); 480 if (!err) 481 err = rq->engine->emit_bb_start(rq, 482 t->batch->node.start, 483 t->batch->node.size, 484 0); 485 i915_request_get(rq); 486 i915_request_add(rq); 487 if (i915_request_wait(rq, 0, HZ / 2) < 0) 488 err = -ETIME; 489 i915_request_put(rq); 490 491 dst->start_val = src->start_val; 492 err_bb: 493 i915_vma_unpin(t->batch); 494 err_dst: 495 i915_vma_unpin(dst->vma); 496 err_src: 497 i915_vma_unpin(src->vma); 498 return err; 499 } 500 501 static struct tiled_blits * 502 tiled_blits_create(struct intel_engine_cs *engine, struct rnd_state *prng) 503 { 504 struct drm_mm_node hole; 505 struct tiled_blits *t; 506 u64 hole_size; 507 int err; 508 509 t = kzalloc(sizeof(*t), GFP_KERNEL); 510 if (!t) 511 return ERR_PTR(-ENOMEM); 512 513 t->ce = intel_context_create(engine); 514 if (IS_ERR(t->ce)) { 515 err = PTR_ERR(t->ce); 516 goto err_free; 517 } 518 519 hole_size = 2 * PAGE_ALIGN(WIDTH * HEIGHT * 4); 520 hole_size *= 2; /* room to maneuver */ 521 hole_size += 2 * I915_GTT_MIN_ALIGNMENT; 522 523 mutex_lock(&t->ce->vm->mutex); 524 memset(&hole, 0, sizeof(hole)); 525 err = drm_mm_insert_node_in_range(&t->ce->vm->mm, &hole, 526 hole_size, 0, I915_COLOR_UNEVICTABLE, 527 0, U64_MAX, 528 DRM_MM_INSERT_BEST); 529 if (!err) 530 drm_mm_remove_node(&hole); 531 mutex_unlock(&t->ce->vm->mutex); 532 if (err) { 533 err = -ENODEV; 534 goto err_put; 535 } 536 537 t->hole = hole.start + I915_GTT_MIN_ALIGNMENT; 538 pr_info("Using hole at %llx\n", t->hole); 539 540 err = tiled_blits_create_buffers(t, WIDTH, HEIGHT, prng); 541 if (err) 542 goto err_put; 543 544 return t; 545 546 err_put: 547 intel_context_put(t->ce); 548 err_free: 549 kfree(t); 550 return ERR_PTR(err); 551 } 552 553 static void tiled_blits_destroy(struct tiled_blits *t) 554 { 555 tiled_blits_destroy_buffers(t); 556 557 intel_context_put(t->ce); 558 kfree(t); 559 } 560 561 static int tiled_blits_prepare(struct tiled_blits *t, 562 struct rnd_state *prng) 563 { 564 u64 offset = PAGE_ALIGN(t->width * t->height * 4); 565 u32 *map; 566 int err; 567 int i; 568 569 map = i915_gem_object_pin_map(t->scratch.vma->obj, I915_MAP_WC); 570 if (IS_ERR(map)) 571 return PTR_ERR(map); 572 573 /* Use scratch to fill objects */ 574 for (i = 0; i < ARRAY_SIZE(t->buffers); i++) { 575 fill_scratch(t, map, prandom_u32_state(prng)); 576 GEM_BUG_ON(verify_buffer(t, &t->scratch, prng)); 577 578 err = tiled_blit(t, 579 &t->buffers[i], t->hole + offset, 580 &t->scratch, t->hole); 581 if (err == 0) 582 err = verify_buffer(t, &t->buffers[i], prng); 583 if (err) { 584 pr_err("Failed to create buffer %d\n", i); 585 break; 586 } 587 } 588 589 i915_gem_object_unpin_map(t->scratch.vma->obj); 590 return err; 591 } 592 593 static int tiled_blits_bounce(struct tiled_blits *t, struct rnd_state *prng) 594 { 595 u64 offset = 596 round_up(t->width * t->height * 4, 2 * I915_GTT_MIN_ALIGNMENT); 597 int err; 598 599 /* We want to check position invariant tiling across GTT eviction */ 600 601 err = tiled_blit(t, 602 &t->buffers[1], t->hole + offset / 2, 603 &t->buffers[0], t->hole + 2 * offset); 604 if (err) 605 return err; 606 607 /* Reposition so that we overlap the old addresses, and slightly off */ 608 err = tiled_blit(t, 609 &t->buffers[2], t->hole + I915_GTT_MIN_ALIGNMENT, 610 &t->buffers[1], t->hole + 3 * offset / 2); 611 if (err) 612 return err; 613 614 err = verify_buffer(t, &t->buffers[2], prng); 615 if (err) 616 return err; 617 618 return 0; 619 } 620 621 static int __igt_client_tiled_blits(struct intel_engine_cs *engine, 622 struct rnd_state *prng) 623 { 624 struct tiled_blits *t; 625 int err; 626 627 t = tiled_blits_create(engine, prng); 628 if (IS_ERR(t)) 629 return PTR_ERR(t); 630 631 err = tiled_blits_prepare(t, prng); 632 if (err) 633 goto out; 634 635 err = tiled_blits_bounce(t, prng); 636 if (err) 637 goto out; 638 639 out: 640 tiled_blits_destroy(t); 641 return err; 642 } 643 644 static bool has_bit17_swizzle(int sw) 645 { 646 return (sw == I915_BIT_6_SWIZZLE_9_10_17 || 647 sw == I915_BIT_6_SWIZZLE_9_17); 648 } 649 650 static bool bad_swizzling(struct drm_i915_private *i915) 651 { 652 struct i915_ggtt *ggtt = &i915->ggtt; 653 654 if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) 655 return true; 656 657 if (has_bit17_swizzle(ggtt->bit_6_swizzle_x) || 658 has_bit17_swizzle(ggtt->bit_6_swizzle_y)) 659 return true; 660 661 return false; 662 } 663 664 static int igt_client_tiled_blits(void *arg) 665 { 666 struct drm_i915_private *i915 = arg; 667 I915_RND_STATE(prng); 668 int inst = 0; 669 670 /* Test requires explicit BLT tiling controls */ 671 if (INTEL_GEN(i915) < 4) 672 return 0; 673 674 if (bad_swizzling(i915)) /* Requires sane (sub-page) swizzling */ 675 return 0; 676 677 do { 678 struct intel_engine_cs *engine; 679 int err; 680 681 engine = intel_engine_lookup_user(i915, 682 I915_ENGINE_CLASS_COPY, 683 inst++); 684 if (!engine) 685 return 0; 686 687 err = __igt_client_tiled_blits(engine, &prng); 688 if (err == -ENODEV) 689 err = 0; 690 if (err) 691 return err; 692 } while (1); 693 } 694 695 int i915_gem_client_blt_live_selftests(struct drm_i915_private *i915) 696 { 697 static const struct i915_subtest tests[] = { 698 SUBTEST(igt_client_fill), 699 SUBTEST(igt_client_tiled_blits), 700 }; 701 702 if (intel_gt_is_wedged(&i915->gt)) 703 return 0; 704 705 if (!HAS_ENGINE(i915, BCS0)) 706 return 0; 707 708 return i915_live_subtests(tests, i915); 709 } 710