1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/sort.h> 7 8 #include "gem/i915_gem_internal.h" 9 #include "gem/i915_gem_lmem.h" 10 11 #include "selftests/igt_spinner.h" 12 #include "selftests/i915_random.h" 13 14 static const unsigned int sizes[] = { 15 SZ_4K, 16 SZ_64K, 17 SZ_2M, 18 CHUNK_SZ - SZ_4K, 19 CHUNK_SZ, 20 CHUNK_SZ + SZ_4K, 21 SZ_64M, 22 }; 23 24 static struct drm_i915_gem_object * 25 create_lmem_or_internal(struct drm_i915_private *i915, size_t size) 26 { 27 struct drm_i915_gem_object *obj; 28 29 obj = i915_gem_object_create_lmem(i915, size, 0); 30 if (!IS_ERR(obj)) 31 return obj; 32 33 return i915_gem_object_create_internal(i915, size); 34 } 35 36 static int copy(struct intel_migrate *migrate, 37 int (*fn)(struct intel_migrate *migrate, 38 struct i915_gem_ww_ctx *ww, 39 struct drm_i915_gem_object *src, 40 struct drm_i915_gem_object *dst, 41 struct i915_request **out), 42 u32 sz, struct rnd_state *prng) 43 { 44 struct drm_i915_private *i915 = migrate->context->engine->i915; 45 struct drm_i915_gem_object *src, *dst; 46 struct i915_request *rq; 47 struct i915_gem_ww_ctx ww; 48 u32 *vaddr; 49 int err = 0; 50 int i; 51 52 src = create_lmem_or_internal(i915, sz); 53 if (IS_ERR(src)) 54 return 0; 55 56 sz = src->base.size; 57 dst = i915_gem_object_create_internal(i915, sz); 58 if (IS_ERR(dst)) 59 goto err_free_src; 60 61 for_i915_gem_ww(&ww, err, true) { 62 err = i915_gem_object_lock(src, &ww); 63 if (err) 64 continue; 65 66 err = i915_gem_object_lock(dst, &ww); 67 if (err) 68 continue; 69 70 vaddr = i915_gem_object_pin_map(src, I915_MAP_WC); 71 if (IS_ERR(vaddr)) { 72 err = PTR_ERR(vaddr); 73 continue; 74 } 75 76 for (i = 0; i < sz / sizeof(u32); i++) 77 vaddr[i] = i; 78 i915_gem_object_flush_map(src); 79 80 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC); 81 if (IS_ERR(vaddr)) { 82 err = PTR_ERR(vaddr); 83 goto unpin_src; 84 } 85 86 for (i = 0; i < sz / sizeof(u32); i++) 87 vaddr[i] = ~i; 88 i915_gem_object_flush_map(dst); 89 90 err = fn(migrate, &ww, src, dst, &rq); 91 if (!err) 92 continue; 93 94 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 95 pr_err("%ps failed, size: %u\n", fn, sz); 96 if (rq) { 97 i915_request_wait(rq, 0, HZ); 98 i915_request_put(rq); 99 } 100 i915_gem_object_unpin_map(dst); 101 unpin_src: 102 i915_gem_object_unpin_map(src); 103 } 104 if (err) 105 goto err_out; 106 107 if (rq) { 108 if (i915_request_wait(rq, 0, HZ) < 0) { 109 pr_err("%ps timed out, size: %u\n", fn, sz); 110 err = -ETIME; 111 } 112 i915_request_put(rq); 113 } 114 115 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 116 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); 117 118 if (vaddr[x] != x) { 119 pr_err("%ps failed, size: %u, offset: %zu\n", 120 fn, sz, x * sizeof(u32)); 121 igt_hexdump(vaddr + i * 1024, 4096); 122 err = -EINVAL; 123 } 124 } 125 126 i915_gem_object_unpin_map(dst); 127 i915_gem_object_unpin_map(src); 128 129 err_out: 130 i915_gem_object_put(dst); 131 err_free_src: 132 i915_gem_object_put(src); 133 134 return err; 135 } 136 137 static int intel_context_copy_ccs(struct intel_context *ce, 138 const struct i915_deps *deps, 139 struct scatterlist *sg, 140 unsigned int pat_index, 141 bool write_to_ccs, 142 struct i915_request **out) 143 { 144 u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS; 145 u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS; 146 struct sgt_dma it = sg_sgt(sg); 147 struct i915_request *rq; 148 u32 offset; 149 int err; 150 151 GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); 152 *out = NULL; 153 154 GEM_BUG_ON(ce->ring->size < SZ_64K); 155 156 offset = 0; 157 if (HAS_64K_PAGES(ce->engine->i915)) 158 offset = CHUNK_SZ; 159 160 do { 161 int len; 162 163 rq = i915_request_create(ce); 164 if (IS_ERR(rq)) { 165 err = PTR_ERR(rq); 166 goto out_ce; 167 } 168 169 if (deps) { 170 err = i915_request_await_deps(rq, deps); 171 if (err) 172 goto out_rq; 173 174 if (rq->engine->emit_init_breadcrumb) { 175 err = rq->engine->emit_init_breadcrumb(rq); 176 if (err) 177 goto out_rq; 178 } 179 180 deps = NULL; 181 } 182 183 /* The PTE updates + clear must not be interrupted. */ 184 err = emit_no_arbitration(rq); 185 if (err) 186 goto out_rq; 187 188 len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ); 189 if (len <= 0) { 190 err = len; 191 goto out_rq; 192 } 193 194 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 195 if (err) 196 goto out_rq; 197 198 err = emit_copy_ccs(rq, offset, dst_access, 199 offset, src_access, len); 200 if (err) 201 goto out_rq; 202 203 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 204 205 /* Arbitration is re-enabled between requests. */ 206 out_rq: 207 if (*out) 208 i915_request_put(*out); 209 *out = i915_request_get(rq); 210 i915_request_add(rq); 211 if (err || !it.sg || !sg_dma_len(it.sg)) 212 break; 213 214 cond_resched(); 215 } while (1); 216 217 out_ce: 218 return err; 219 } 220 221 static int 222 intel_migrate_ccs_copy(struct intel_migrate *m, 223 struct i915_gem_ww_ctx *ww, 224 const struct i915_deps *deps, 225 struct scatterlist *sg, 226 unsigned int pat_index, 227 bool write_to_ccs, 228 struct i915_request **out) 229 { 230 struct intel_context *ce; 231 int err; 232 233 *out = NULL; 234 if (!m->context) 235 return -ENODEV; 236 237 ce = intel_migrate_create_context(m); 238 if (IS_ERR(ce)) 239 ce = intel_context_get(m->context); 240 GEM_BUG_ON(IS_ERR(ce)); 241 242 err = intel_context_pin_ww(ce, ww); 243 if (err) 244 goto out; 245 246 err = intel_context_copy_ccs(ce, deps, sg, pat_index, 247 write_to_ccs, out); 248 249 intel_context_unpin(ce); 250 out: 251 intel_context_put(ce); 252 return err; 253 } 254 255 static int clear(struct intel_migrate *migrate, 256 int (*fn)(struct intel_migrate *migrate, 257 struct i915_gem_ww_ctx *ww, 258 struct drm_i915_gem_object *obj, 259 u32 value, 260 struct i915_request **out), 261 u32 sz, struct rnd_state *prng) 262 { 263 struct drm_i915_private *i915 = migrate->context->engine->i915; 264 struct drm_i915_gem_object *obj; 265 struct i915_request *rq; 266 struct i915_gem_ww_ctx ww; 267 u32 *vaddr, val = 0; 268 bool ccs_cap = false; 269 int err = 0; 270 int i; 271 272 obj = create_lmem_or_internal(i915, sz); 273 if (IS_ERR(obj)) 274 return 0; 275 276 /* Consider the rounded up memory too */ 277 sz = obj->base.size; 278 279 if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj)) 280 ccs_cap = true; 281 282 for_i915_gem_ww(&ww, err, true) { 283 int ccs_bytes, ccs_bytes_per_chunk; 284 285 err = i915_gem_object_lock(obj, &ww); 286 if (err) 287 continue; 288 289 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 290 if (IS_ERR(vaddr)) { 291 err = PTR_ERR(vaddr); 292 continue; 293 } 294 295 for (i = 0; i < sz / sizeof(u32); i++) 296 vaddr[i] = ~i; 297 i915_gem_object_flush_map(obj); 298 299 if (ccs_cap && !val) { 300 /* Write the obj data into ccs surface */ 301 err = intel_migrate_ccs_copy(migrate, &ww, NULL, 302 obj->mm.pages->sgl, 303 obj->pat_index, 304 true, &rq); 305 if (rq && !err) { 306 if (i915_request_wait(rq, 0, HZ) < 0) { 307 pr_err("%ps timed out, size: %u\n", 308 fn, sz); 309 err = -ETIME; 310 } 311 i915_request_put(rq); 312 rq = NULL; 313 } 314 if (err) 315 continue; 316 } 317 318 err = fn(migrate, &ww, obj, val, &rq); 319 if (rq && !err) { 320 if (i915_request_wait(rq, 0, HZ) < 0) { 321 pr_err("%ps timed out, size: %u\n", fn, sz); 322 err = -ETIME; 323 } 324 i915_request_put(rq); 325 rq = NULL; 326 } 327 if (err) 328 continue; 329 330 i915_gem_object_flush_map(obj); 331 332 /* Verify the set/clear of the obj mem */ 333 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 334 int x = i * 1024 + 335 i915_prandom_u32_max_state(1024, prng); 336 337 if (vaddr[x] != val) { 338 pr_err("%ps failed, (%u != %u), offset: %zu\n", 339 fn, vaddr[x], val, x * sizeof(u32)); 340 igt_hexdump(vaddr + i * 1024, 4096); 341 err = -EINVAL; 342 } 343 } 344 if (err) 345 continue; 346 347 if (ccs_cap && !val) { 348 for (i = 0; i < sz / sizeof(u32); i++) 349 vaddr[i] = ~i; 350 i915_gem_object_flush_map(obj); 351 352 err = intel_migrate_ccs_copy(migrate, &ww, NULL, 353 obj->mm.pages->sgl, 354 obj->pat_index, 355 false, &rq); 356 if (rq && !err) { 357 if (i915_request_wait(rq, 0, HZ) < 0) { 358 pr_err("%ps timed out, size: %u\n", 359 fn, sz); 360 err = -ETIME; 361 } 362 i915_request_put(rq); 363 rq = NULL; 364 } 365 if (err) 366 continue; 367 368 ccs_bytes = GET_CCS_BYTES(i915, sz); 369 ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ); 370 i915_gem_object_flush_map(obj); 371 372 for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) { 373 int offset = ((i * PAGE_SIZE) / 374 ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32); 375 int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32); 376 int x = i915_prandom_u32_max_state(min_t(int, 1024, 377 ccs_bytes_left), prng); 378 379 if (vaddr[offset + x]) { 380 pr_err("%ps ccs clearing failed, offset: %ld/%d\n", 381 fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes); 382 igt_hexdump(vaddr + offset, 383 min_t(int, 4096, 384 ccs_bytes_left * sizeof(u32))); 385 err = -EINVAL; 386 } 387 } 388 389 if (err) 390 continue; 391 } 392 i915_gem_object_unpin_map(obj); 393 } 394 395 if (err) { 396 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 397 pr_err("%ps failed, size: %u\n", fn, sz); 398 if (rq && err != -EINVAL) { 399 i915_request_wait(rq, 0, HZ); 400 i915_request_put(rq); 401 } 402 403 i915_gem_object_unpin_map(obj); 404 } 405 406 i915_gem_object_put(obj); 407 return err; 408 } 409 410 static int __migrate_copy(struct intel_migrate *migrate, 411 struct i915_gem_ww_ctx *ww, 412 struct drm_i915_gem_object *src, 413 struct drm_i915_gem_object *dst, 414 struct i915_request **out) 415 { 416 return intel_migrate_copy(migrate, ww, NULL, 417 src->mm.pages->sgl, src->pat_index, 418 i915_gem_object_is_lmem(src), 419 dst->mm.pages->sgl, dst->pat_index, 420 i915_gem_object_is_lmem(dst), 421 out); 422 } 423 424 static int __global_copy(struct intel_migrate *migrate, 425 struct i915_gem_ww_ctx *ww, 426 struct drm_i915_gem_object *src, 427 struct drm_i915_gem_object *dst, 428 struct i915_request **out) 429 { 430 return intel_context_migrate_copy(migrate->context, NULL, 431 src->mm.pages->sgl, src->pat_index, 432 i915_gem_object_is_lmem(src), 433 dst->mm.pages->sgl, dst->pat_index, 434 i915_gem_object_is_lmem(dst), 435 out); 436 } 437 438 static int 439 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 440 { 441 return copy(migrate, __migrate_copy, sz, prng); 442 } 443 444 static int 445 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 446 { 447 return copy(migrate, __global_copy, sz, prng); 448 } 449 450 static int __migrate_clear(struct intel_migrate *migrate, 451 struct i915_gem_ww_ctx *ww, 452 struct drm_i915_gem_object *obj, 453 u32 value, 454 struct i915_request **out) 455 { 456 return intel_migrate_clear(migrate, ww, NULL, 457 obj->mm.pages->sgl, 458 obj->pat_index, 459 i915_gem_object_is_lmem(obj), 460 value, out); 461 } 462 463 static int __global_clear(struct intel_migrate *migrate, 464 struct i915_gem_ww_ctx *ww, 465 struct drm_i915_gem_object *obj, 466 u32 value, 467 struct i915_request **out) 468 { 469 return intel_context_migrate_clear(migrate->context, NULL, 470 obj->mm.pages->sgl, 471 obj->pat_index, 472 i915_gem_object_is_lmem(obj), 473 value, out); 474 } 475 476 static int 477 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 478 { 479 return clear(migrate, __migrate_clear, sz, prng); 480 } 481 482 static int 483 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 484 { 485 return clear(migrate, __global_clear, sz, prng); 486 } 487 488 static int live_migrate_copy(void *arg) 489 { 490 struct intel_gt *gt = arg; 491 struct intel_migrate *migrate = >->migrate; 492 struct drm_i915_private *i915 = migrate->context->engine->i915; 493 I915_RND_STATE(prng); 494 int i; 495 496 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 497 int err; 498 499 err = migrate_copy(migrate, sizes[i], &prng); 500 if (err == 0) 501 err = global_copy(migrate, sizes[i], &prng); 502 i915_gem_drain_freed_objects(i915); 503 if (err) 504 return err; 505 } 506 507 return 0; 508 } 509 510 static int live_migrate_clear(void *arg) 511 { 512 struct intel_gt *gt = arg; 513 struct intel_migrate *migrate = >->migrate; 514 struct drm_i915_private *i915 = migrate->context->engine->i915; 515 I915_RND_STATE(prng); 516 int i; 517 518 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 519 int err; 520 521 err = migrate_clear(migrate, sizes[i], &prng); 522 if (err == 0) 523 err = global_clear(migrate, sizes[i], &prng); 524 525 i915_gem_drain_freed_objects(i915); 526 if (err) 527 return err; 528 } 529 530 return 0; 531 } 532 533 struct spinner_timer { 534 struct timer_list timer; 535 struct igt_spinner spin; 536 }; 537 538 static void spinner_kill(struct timer_list *timer) 539 { 540 struct spinner_timer *st = from_timer(st, timer, timer); 541 542 igt_spinner_end(&st->spin); 543 pr_info("%s\n", __func__); 544 } 545 546 static int live_emit_pte_full_ring(void *arg) 547 { 548 struct intel_gt *gt = arg; 549 struct intel_migrate *migrate = >->migrate; 550 struct drm_i915_private *i915 = migrate->context->engine->i915; 551 struct drm_i915_gem_object *obj; 552 struct intel_context *ce; 553 struct i915_request *rq, *prev; 554 struct spinner_timer st; 555 struct sgt_dma it; 556 int len, sz, err; 557 u32 *cs; 558 559 /* 560 * Simple regression test to check that we don't trample the 561 * rq->reserved_space when returning from emit_pte(), if the ring is 562 * nearly full. 563 */ 564 565 if (igt_spinner_init(&st.spin, to_gt(i915))) 566 return -ENOMEM; 567 568 obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE); 569 if (IS_ERR(obj)) { 570 err = PTR_ERR(obj); 571 goto out_spinner; 572 } 573 574 err = i915_gem_object_pin_pages_unlocked(obj); 575 if (err) 576 goto out_obj; 577 578 ce = intel_migrate_create_context(migrate); 579 if (IS_ERR(ce)) { 580 err = PTR_ERR(ce); 581 goto out_obj; 582 } 583 584 ce->ring_size = SZ_4K; /* Not too big */ 585 586 err = intel_context_pin(ce); 587 if (err) 588 goto out_put; 589 590 rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK); 591 if (IS_ERR(rq)) { 592 err = PTR_ERR(rq); 593 goto out_unpin; 594 } 595 596 i915_request_add(rq); 597 if (!igt_wait_for_spinner(&st.spin, rq)) { 598 err = -EIO; 599 goto out_unpin; 600 } 601 602 /* 603 * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS + 604 * ring->reserved_space at the end. To actually emit the PTEs we require 605 * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is 606 * greater than PAGE_SIZE. The correct behaviour is to wait for more 607 * ring space in emit_pte(), otherwise we trample on the reserved_space 608 * resulting in crashes when later submitting the rq. 609 */ 610 611 prev = NULL; 612 do { 613 if (prev) 614 i915_request_add(rq); 615 616 rq = i915_request_create(ce); 617 if (IS_ERR(rq)) { 618 err = PTR_ERR(rq); 619 goto out_unpin; 620 } 621 622 sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) - 623 I915_EMIT_PTE_NUM_DWORDS; 624 sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) - 625 I915_EMIT_PTE_NUM_DWORDS); 626 cs = intel_ring_begin(rq, sz); 627 if (IS_ERR(cs)) { 628 err = PTR_ERR(cs); 629 goto out_rq; 630 } 631 632 memset32(cs, MI_NOOP, sz); 633 cs += sz; 634 intel_ring_advance(rq, cs); 635 636 pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz); 637 638 prev = rq; 639 } while (rq->ring->space > (rq->reserved_space + 640 I915_EMIT_PTE_NUM_DWORDS * sizeof(u32))); 641 642 timer_setup_on_stack(&st.timer, spinner_kill, 0); 643 mod_timer(&st.timer, jiffies + 2 * HZ); 644 645 /* 646 * This should wait for the spinner to be killed, otherwise we should go 647 * down in flames when doing i915_request_add(). 648 */ 649 pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space); 650 it = sg_sgt(obj->mm.pages->sgl); 651 len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ); 652 if (!len) { 653 err = -EINVAL; 654 goto out_rq; 655 } 656 if (len < 0) { 657 err = len; 658 goto out_rq; 659 } 660 661 out_rq: 662 i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */ 663 del_timer_sync(&st.timer); 664 destroy_timer_on_stack(&st.timer); 665 out_unpin: 666 intel_context_unpin(ce); 667 out_put: 668 intel_context_put(ce); 669 out_obj: 670 i915_gem_object_put(obj); 671 out_spinner: 672 igt_spinner_fini(&st.spin); 673 return err; 674 } 675 676 struct threaded_migrate { 677 struct intel_migrate *migrate; 678 struct task_struct *tsk; 679 struct rnd_state prng; 680 }; 681 682 static int threaded_migrate(struct intel_migrate *migrate, 683 int (*fn)(void *arg), 684 unsigned int flags) 685 { 686 const unsigned int n_cpus = num_online_cpus() + 1; 687 struct threaded_migrate *thread; 688 I915_RND_STATE(prng); 689 unsigned int i; 690 int err = 0; 691 692 thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL); 693 if (!thread) 694 return 0; 695 696 for (i = 0; i < n_cpus; ++i) { 697 struct task_struct *tsk; 698 699 thread[i].migrate = migrate; 700 thread[i].prng = 701 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); 702 703 tsk = kthread_run(fn, &thread[i], "igt-%d", i); 704 if (IS_ERR(tsk)) { 705 err = PTR_ERR(tsk); 706 break; 707 } 708 709 get_task_struct(tsk); 710 thread[i].tsk = tsk; 711 } 712 713 msleep(10); /* start all threads before we kthread_stop() */ 714 715 for (i = 0; i < n_cpus; ++i) { 716 struct task_struct *tsk = thread[i].tsk; 717 int status; 718 719 if (IS_ERR_OR_NULL(tsk)) 720 continue; 721 722 status = kthread_stop_put(tsk); 723 if (status && !err) 724 err = status; 725 } 726 727 kfree(thread); 728 return err; 729 } 730 731 static int __thread_migrate_copy(void *arg) 732 { 733 struct threaded_migrate *tm = arg; 734 735 return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 736 } 737 738 static int thread_migrate_copy(void *arg) 739 { 740 struct intel_gt *gt = arg; 741 struct intel_migrate *migrate = >->migrate; 742 743 return threaded_migrate(migrate, __thread_migrate_copy, 0); 744 } 745 746 static int __thread_global_copy(void *arg) 747 { 748 struct threaded_migrate *tm = arg; 749 750 return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 751 } 752 753 static int thread_global_copy(void *arg) 754 { 755 struct intel_gt *gt = arg; 756 struct intel_migrate *migrate = >->migrate; 757 758 return threaded_migrate(migrate, __thread_global_copy, 0); 759 } 760 761 static int __thread_migrate_clear(void *arg) 762 { 763 struct threaded_migrate *tm = arg; 764 765 return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 766 } 767 768 static int __thread_global_clear(void *arg) 769 { 770 struct threaded_migrate *tm = arg; 771 772 return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 773 } 774 775 static int thread_migrate_clear(void *arg) 776 { 777 struct intel_gt *gt = arg; 778 struct intel_migrate *migrate = >->migrate; 779 780 return threaded_migrate(migrate, __thread_migrate_clear, 0); 781 } 782 783 static int thread_global_clear(void *arg) 784 { 785 struct intel_gt *gt = arg; 786 struct intel_migrate *migrate = >->migrate; 787 788 return threaded_migrate(migrate, __thread_global_clear, 0); 789 } 790 791 int intel_migrate_live_selftests(struct drm_i915_private *i915) 792 { 793 static const struct i915_subtest tests[] = { 794 SUBTEST(live_migrate_copy), 795 SUBTEST(live_migrate_clear), 796 SUBTEST(live_emit_pte_full_ring), 797 SUBTEST(thread_migrate_copy), 798 SUBTEST(thread_migrate_clear), 799 SUBTEST(thread_global_copy), 800 SUBTEST(thread_global_clear), 801 }; 802 struct intel_gt *gt = to_gt(i915); 803 804 if (!gt->migrate.context) 805 return 0; 806 807 return intel_gt_live_subtests(tests, gt); 808 } 809 810 static struct drm_i915_gem_object * 811 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) 812 { 813 struct drm_i915_gem_object *obj = NULL; 814 int err; 815 816 if (try_lmem) 817 obj = i915_gem_object_create_lmem(gt->i915, sz, 0); 818 819 if (IS_ERR_OR_NULL(obj)) { 820 obj = i915_gem_object_create_internal(gt->i915, sz); 821 if (IS_ERR(obj)) 822 return obj; 823 } 824 825 i915_gem_object_trylock(obj, NULL); 826 err = i915_gem_object_pin_pages(obj); 827 if (err) { 828 i915_gem_object_unlock(obj); 829 i915_gem_object_put(obj); 830 return ERR_PTR(err); 831 } 832 833 return obj; 834 } 835 836 static int wrap_ktime_compare(const void *A, const void *B) 837 { 838 const ktime_t *a = A, *b = B; 839 840 return ktime_compare(*a, *b); 841 } 842 843 static int __perf_clear_blt(struct intel_context *ce, 844 struct scatterlist *sg, 845 unsigned int pat_index, 846 bool is_lmem, 847 size_t sz) 848 { 849 ktime_t t[5]; 850 int pass; 851 int err = 0; 852 853 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 854 struct i915_request *rq; 855 ktime_t t0, t1; 856 857 t0 = ktime_get(); 858 859 err = intel_context_migrate_clear(ce, NULL, sg, pat_index, 860 is_lmem, 0, &rq); 861 if (rq) { 862 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 863 err = -EIO; 864 i915_request_put(rq); 865 } 866 if (err) 867 break; 868 869 t1 = ktime_get(); 870 t[pass] = ktime_sub(t1, t0); 871 } 872 if (err) 873 return err; 874 875 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 876 pr_info("%s: %zd KiB fill: %lld MiB/s\n", 877 ce->engine->name, sz >> 10, 878 div64_u64(mul_u32_u32(4 * sz, 879 1000 * 1000 * 1000), 880 t[1] + 2 * t[2] + t[3]) >> 20); 881 return 0; 882 } 883 884 static int perf_clear_blt(void *arg) 885 { 886 struct intel_gt *gt = arg; 887 static const unsigned long sizes[] = { 888 SZ_4K, 889 SZ_64K, 890 SZ_2M, 891 SZ_64M 892 }; 893 int i; 894 895 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 896 struct drm_i915_gem_object *dst; 897 int err; 898 899 dst = create_init_lmem_internal(gt, sizes[i], true); 900 if (IS_ERR(dst)) 901 return PTR_ERR(dst); 902 903 err = __perf_clear_blt(gt->migrate.context, 904 dst->mm.pages->sgl, 905 i915_gem_get_pat_index(gt->i915, 906 I915_CACHE_NONE), 907 i915_gem_object_is_lmem(dst), 908 sizes[i]); 909 910 i915_gem_object_unlock(dst); 911 i915_gem_object_put(dst); 912 if (err) 913 return err; 914 } 915 916 return 0; 917 } 918 919 static int __perf_copy_blt(struct intel_context *ce, 920 struct scatterlist *src, 921 unsigned int src_pat_index, 922 bool src_is_lmem, 923 struct scatterlist *dst, 924 unsigned int dst_pat_index, 925 bool dst_is_lmem, 926 size_t sz) 927 { 928 ktime_t t[5]; 929 int pass; 930 int err = 0; 931 932 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 933 struct i915_request *rq; 934 ktime_t t0, t1; 935 936 t0 = ktime_get(); 937 938 err = intel_context_migrate_copy(ce, NULL, 939 src, src_pat_index, 940 src_is_lmem, 941 dst, dst_pat_index, 942 dst_is_lmem, 943 &rq); 944 if (rq) { 945 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 946 err = -EIO; 947 i915_request_put(rq); 948 } 949 if (err) 950 break; 951 952 t1 = ktime_get(); 953 t[pass] = ktime_sub(t1, t0); 954 } 955 if (err) 956 return err; 957 958 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 959 pr_info("%s: %zd KiB copy: %lld MiB/s\n", 960 ce->engine->name, sz >> 10, 961 div64_u64(mul_u32_u32(4 * sz, 962 1000 * 1000 * 1000), 963 t[1] + 2 * t[2] + t[3]) >> 20); 964 return 0; 965 } 966 967 static int perf_copy_blt(void *arg) 968 { 969 struct intel_gt *gt = arg; 970 static const unsigned long sizes[] = { 971 SZ_4K, 972 SZ_64K, 973 SZ_2M, 974 SZ_64M 975 }; 976 int i; 977 978 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 979 struct drm_i915_gem_object *src, *dst; 980 size_t sz; 981 int err; 982 983 src = create_init_lmem_internal(gt, sizes[i], true); 984 if (IS_ERR(src)) 985 return PTR_ERR(src); 986 987 sz = src->base.size; 988 dst = create_init_lmem_internal(gt, sz, false); 989 if (IS_ERR(dst)) { 990 err = PTR_ERR(dst); 991 goto err_src; 992 } 993 994 err = __perf_copy_blt(gt->migrate.context, 995 src->mm.pages->sgl, 996 i915_gem_get_pat_index(gt->i915, 997 I915_CACHE_NONE), 998 i915_gem_object_is_lmem(src), 999 dst->mm.pages->sgl, 1000 i915_gem_get_pat_index(gt->i915, 1001 I915_CACHE_NONE), 1002 i915_gem_object_is_lmem(dst), 1003 sz); 1004 1005 i915_gem_object_unlock(dst); 1006 i915_gem_object_put(dst); 1007 err_src: 1008 i915_gem_object_unlock(src); 1009 i915_gem_object_put(src); 1010 if (err) 1011 return err; 1012 } 1013 1014 return 0; 1015 } 1016 1017 int intel_migrate_perf_selftests(struct drm_i915_private *i915) 1018 { 1019 static const struct i915_subtest tests[] = { 1020 SUBTEST(perf_clear_blt), 1021 SUBTEST(perf_copy_blt), 1022 }; 1023 struct intel_gt *gt = to_gt(i915); 1024 1025 if (intel_gt_is_wedged(gt)) 1026 return 0; 1027 1028 if (!gt->migrate.context) 1029 return 0; 1030 1031 return intel_gt_live_subtests(tests, gt); 1032 } 1033