1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/sort.h> 7 8 #include "gem/i915_gem_internal.h" 9 #include "gem/i915_gem_lmem.h" 10 11 #include "selftests/igt_spinner.h" 12 #include "selftests/i915_random.h" 13 14 static const unsigned int sizes[] = { 15 SZ_4K, 16 SZ_64K, 17 SZ_2M, 18 CHUNK_SZ - SZ_4K, 19 CHUNK_SZ, 20 CHUNK_SZ + SZ_4K, 21 SZ_64M, 22 }; 23 24 static struct drm_i915_gem_object * 25 create_lmem_or_internal(struct drm_i915_private *i915, size_t size) 26 { 27 struct drm_i915_gem_object *obj; 28 29 obj = i915_gem_object_create_lmem(i915, size, 0); 30 if (!IS_ERR(obj)) 31 return obj; 32 33 return i915_gem_object_create_internal(i915, size); 34 } 35 36 static int copy(struct intel_migrate *migrate, 37 int (*fn)(struct intel_migrate *migrate, 38 struct i915_gem_ww_ctx *ww, 39 struct drm_i915_gem_object *src, 40 struct drm_i915_gem_object *dst, 41 struct i915_request **out), 42 u32 sz, struct rnd_state *prng) 43 { 44 struct drm_i915_private *i915 = migrate->context->engine->i915; 45 struct drm_i915_gem_object *src, *dst; 46 struct i915_request *rq; 47 struct i915_gem_ww_ctx ww; 48 u32 *vaddr; 49 int err = 0; 50 int i; 51 52 src = create_lmem_or_internal(i915, sz); 53 if (IS_ERR(src)) 54 return 0; 55 56 sz = src->base.size; 57 dst = i915_gem_object_create_internal(i915, sz); 58 if (IS_ERR(dst)) 59 goto err_free_src; 60 61 for_i915_gem_ww(&ww, err, true) { 62 err = i915_gem_object_lock(src, &ww); 63 if (err) 64 continue; 65 66 err = i915_gem_object_lock(dst, &ww); 67 if (err) 68 continue; 69 70 vaddr = i915_gem_object_pin_map(src, I915_MAP_WC); 71 if (IS_ERR(vaddr)) { 72 err = PTR_ERR(vaddr); 73 continue; 74 } 75 76 for (i = 0; i < sz / sizeof(u32); i++) 77 vaddr[i] = i; 78 i915_gem_object_flush_map(src); 79 80 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC); 81 if (IS_ERR(vaddr)) { 82 err = PTR_ERR(vaddr); 83 goto unpin_src; 84 } 85 86 for (i = 0; i < sz / sizeof(u32); i++) 87 vaddr[i] = ~i; 88 i915_gem_object_flush_map(dst); 89 90 err = fn(migrate, &ww, src, dst, &rq); 91 if (!err) 92 continue; 93 94 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 95 pr_err("%ps failed, size: %u\n", fn, sz); 96 if (rq) { 97 i915_request_wait(rq, 0, HZ); 98 i915_request_put(rq); 99 } 100 i915_gem_object_unpin_map(dst); 101 unpin_src: 102 i915_gem_object_unpin_map(src); 103 } 104 if (err) 105 goto err_out; 106 107 if (rq) { 108 if (i915_request_wait(rq, 0, HZ) < 0) { 109 pr_err("%ps timed out, size: %u\n", fn, sz); 110 err = -ETIME; 111 } 112 i915_request_put(rq); 113 } 114 115 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 116 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); 117 118 if (vaddr[x] != x) { 119 pr_err("%ps failed, size: %u, offset: %zu\n", 120 fn, sz, x * sizeof(u32)); 121 igt_hexdump(vaddr + i * 1024, 4096); 122 err = -EINVAL; 123 } 124 } 125 126 i915_gem_object_unpin_map(dst); 127 i915_gem_object_unpin_map(src); 128 129 err_out: 130 i915_gem_object_put(dst); 131 err_free_src: 132 i915_gem_object_put(src); 133 134 return err; 135 } 136 137 static int intel_context_copy_ccs(struct intel_context *ce, 138 const struct i915_deps *deps, 139 struct scatterlist *sg, 140 unsigned int pat_index, 141 bool write_to_ccs, 142 struct i915_request **out) 143 { 144 u8 src_access = write_to_ccs ? DIRECT_ACCESS : INDIRECT_ACCESS; 145 u8 dst_access = write_to_ccs ? INDIRECT_ACCESS : DIRECT_ACCESS; 146 struct sgt_dma it = sg_sgt(sg); 147 struct i915_request *rq; 148 u32 offset; 149 int err; 150 151 GEM_BUG_ON(ce->vm != ce->engine->gt->migrate.context->vm); 152 *out = NULL; 153 154 GEM_BUG_ON(ce->ring->size < SZ_64K); 155 156 offset = 0; 157 if (HAS_64K_PAGES(ce->engine->i915)) 158 offset = CHUNK_SZ; 159 160 do { 161 int len; 162 163 rq = i915_request_create(ce); 164 if (IS_ERR(rq)) { 165 err = PTR_ERR(rq); 166 goto out_ce; 167 } 168 169 if (deps) { 170 err = i915_request_await_deps(rq, deps); 171 if (err) 172 goto out_rq; 173 174 if (rq->engine->emit_init_breadcrumb) { 175 err = rq->engine->emit_init_breadcrumb(rq); 176 if (err) 177 goto out_rq; 178 } 179 180 deps = NULL; 181 } 182 183 /* The PTE updates + clear must not be interrupted. */ 184 err = emit_no_arbitration(rq); 185 if (err) 186 goto out_rq; 187 188 len = emit_pte(rq, &it, pat_index, true, offset, CHUNK_SZ); 189 if (len <= 0) { 190 err = len; 191 goto out_rq; 192 } 193 194 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 195 if (err) 196 goto out_rq; 197 198 err = emit_copy_ccs(rq, offset, dst_access, 199 offset, src_access, len); 200 if (err) 201 goto out_rq; 202 203 err = rq->engine->emit_flush(rq, EMIT_INVALIDATE); 204 205 /* Arbitration is re-enabled between requests. */ 206 out_rq: 207 if (*out) 208 i915_request_put(*out); 209 *out = i915_request_get(rq); 210 i915_request_add(rq); 211 if (err || !it.sg || !sg_dma_len(it.sg)) 212 break; 213 214 cond_resched(); 215 } while (1); 216 217 out_ce: 218 return err; 219 } 220 221 static int 222 intel_migrate_ccs_copy(struct intel_migrate *m, 223 struct i915_gem_ww_ctx *ww, 224 const struct i915_deps *deps, 225 struct scatterlist *sg, 226 unsigned int pat_index, 227 bool write_to_ccs, 228 struct i915_request **out) 229 { 230 struct intel_context *ce; 231 int err; 232 233 *out = NULL; 234 if (!m->context) 235 return -ENODEV; 236 237 ce = intel_migrate_create_context(m); 238 if (IS_ERR(ce)) 239 ce = intel_context_get(m->context); 240 GEM_BUG_ON(IS_ERR(ce)); 241 242 err = intel_context_pin_ww(ce, ww); 243 if (err) 244 goto out; 245 246 err = intel_context_copy_ccs(ce, deps, sg, pat_index, 247 write_to_ccs, out); 248 249 intel_context_unpin(ce); 250 out: 251 intel_context_put(ce); 252 return err; 253 } 254 255 static int clear(struct intel_migrate *migrate, 256 int (*fn)(struct intel_migrate *migrate, 257 struct i915_gem_ww_ctx *ww, 258 struct drm_i915_gem_object *obj, 259 u32 value, 260 struct i915_request **out), 261 u32 sz, struct rnd_state *prng) 262 { 263 struct drm_i915_private *i915 = migrate->context->engine->i915; 264 struct drm_i915_gem_object *obj; 265 struct i915_request *rq; 266 struct i915_gem_ww_ctx ww; 267 u32 *vaddr, val = 0; 268 bool ccs_cap = false; 269 int err = 0; 270 int i; 271 272 obj = create_lmem_or_internal(i915, sz); 273 if (IS_ERR(obj)) 274 return 0; 275 276 /* Consider the rounded up memory too */ 277 sz = obj->base.size; 278 279 if (HAS_FLAT_CCS(i915) && i915_gem_object_is_lmem(obj)) 280 ccs_cap = true; 281 282 for_i915_gem_ww(&ww, err, true) { 283 int ccs_bytes, ccs_bytes_per_chunk; 284 285 err = i915_gem_object_lock(obj, &ww); 286 if (err) 287 continue; 288 289 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 290 if (IS_ERR(vaddr)) { 291 err = PTR_ERR(vaddr); 292 continue; 293 } 294 295 for (i = 0; i < sz / sizeof(u32); i++) 296 vaddr[i] = ~i; 297 i915_gem_object_flush_map(obj); 298 299 if (ccs_cap && !val) { 300 /* Write the obj data into ccs surface */ 301 err = intel_migrate_ccs_copy(migrate, &ww, NULL, 302 obj->mm.pages->sgl, 303 obj->pat_index, 304 true, &rq); 305 if (rq && !err) { 306 if (i915_request_wait(rq, 0, HZ) < 0) { 307 pr_err("%ps timed out, size: %u\n", 308 fn, sz); 309 err = -ETIME; 310 } 311 i915_request_put(rq); 312 rq = NULL; 313 } 314 if (err) 315 continue; 316 } 317 318 err = fn(migrate, &ww, obj, val, &rq); 319 if (rq && !err) { 320 if (i915_request_wait(rq, 0, HZ) < 0) { 321 pr_err("%ps timed out, size: %u\n", fn, sz); 322 err = -ETIME; 323 } 324 i915_request_put(rq); 325 rq = NULL; 326 } 327 if (err) 328 continue; 329 330 i915_gem_object_flush_map(obj); 331 332 /* Verify the set/clear of the obj mem */ 333 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 334 int x = i * 1024 + 335 i915_prandom_u32_max_state(1024, prng); 336 337 if (vaddr[x] != val) { 338 pr_err("%ps failed, (%u != %u), offset: %zu\n", 339 fn, vaddr[x], val, x * sizeof(u32)); 340 igt_hexdump(vaddr + i * 1024, 4096); 341 err = -EINVAL; 342 } 343 } 344 if (err) 345 continue; 346 347 if (ccs_cap && !val) { 348 for (i = 0; i < sz / sizeof(u32); i++) 349 vaddr[i] = ~i; 350 i915_gem_object_flush_map(obj); 351 352 err = intel_migrate_ccs_copy(migrate, &ww, NULL, 353 obj->mm.pages->sgl, 354 obj->pat_index, 355 false, &rq); 356 if (rq && !err) { 357 if (i915_request_wait(rq, 0, HZ) < 0) { 358 pr_err("%ps timed out, size: %u\n", 359 fn, sz); 360 err = -ETIME; 361 } 362 i915_request_put(rq); 363 rq = NULL; 364 } 365 if (err) 366 continue; 367 368 ccs_bytes = GET_CCS_BYTES(i915, sz); 369 ccs_bytes_per_chunk = GET_CCS_BYTES(i915, CHUNK_SZ); 370 i915_gem_object_flush_map(obj); 371 372 for (i = 0; !err && i < DIV_ROUND_UP(ccs_bytes, PAGE_SIZE); i++) { 373 int offset = ((i * PAGE_SIZE) / 374 ccs_bytes_per_chunk) * CHUNK_SZ / sizeof(u32); 375 int ccs_bytes_left = (ccs_bytes - i * PAGE_SIZE) / sizeof(u32); 376 int x = i915_prandom_u32_max_state(min_t(int, 1024, 377 ccs_bytes_left), prng); 378 379 if (vaddr[offset + x]) { 380 pr_err("%ps ccs clearing failed, offset: %ld/%d\n", 381 fn, i * PAGE_SIZE + x * sizeof(u32), ccs_bytes); 382 igt_hexdump(vaddr + offset, 383 min_t(int, 4096, 384 ccs_bytes_left * sizeof(u32))); 385 err = -EINVAL; 386 } 387 } 388 389 if (err) 390 continue; 391 } 392 i915_gem_object_unpin_map(obj); 393 } 394 395 if (err) { 396 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 397 pr_err("%ps failed, size: %u\n", fn, sz); 398 if (rq && err != -EINVAL) { 399 i915_request_wait(rq, 0, HZ); 400 i915_request_put(rq); 401 } 402 403 i915_gem_object_unpin_map(obj); 404 } 405 406 i915_gem_object_put(obj); 407 return err; 408 } 409 410 static int __migrate_copy(struct intel_migrate *migrate, 411 struct i915_gem_ww_ctx *ww, 412 struct drm_i915_gem_object *src, 413 struct drm_i915_gem_object *dst, 414 struct i915_request **out) 415 { 416 return intel_migrate_copy(migrate, ww, NULL, 417 src->mm.pages->sgl, src->pat_index, 418 i915_gem_object_is_lmem(src), 419 dst->mm.pages->sgl, dst->pat_index, 420 i915_gem_object_is_lmem(dst), 421 out); 422 } 423 424 static int __global_copy(struct intel_migrate *migrate, 425 struct i915_gem_ww_ctx *ww, 426 struct drm_i915_gem_object *src, 427 struct drm_i915_gem_object *dst, 428 struct i915_request **out) 429 { 430 return intel_context_migrate_copy(migrate->context, NULL, 431 src->mm.pages->sgl, src->pat_index, 432 i915_gem_object_is_lmem(src), 433 dst->mm.pages->sgl, dst->pat_index, 434 i915_gem_object_is_lmem(dst), 435 out); 436 } 437 438 static int 439 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 440 { 441 return copy(migrate, __migrate_copy, sz, prng); 442 } 443 444 static int 445 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 446 { 447 return copy(migrate, __global_copy, sz, prng); 448 } 449 450 static int __migrate_clear(struct intel_migrate *migrate, 451 struct i915_gem_ww_ctx *ww, 452 struct drm_i915_gem_object *obj, 453 u32 value, 454 struct i915_request **out) 455 { 456 return intel_migrate_clear(migrate, ww, NULL, 457 obj->mm.pages->sgl, 458 obj->pat_index, 459 i915_gem_object_is_lmem(obj), 460 value, out); 461 } 462 463 static int __global_clear(struct intel_migrate *migrate, 464 struct i915_gem_ww_ctx *ww, 465 struct drm_i915_gem_object *obj, 466 u32 value, 467 struct i915_request **out) 468 { 469 return intel_context_migrate_clear(migrate->context, NULL, 470 obj->mm.pages->sgl, 471 obj->pat_index, 472 i915_gem_object_is_lmem(obj), 473 value, out); 474 } 475 476 static int 477 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 478 { 479 return clear(migrate, __migrate_clear, sz, prng); 480 } 481 482 static int 483 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 484 { 485 return clear(migrate, __global_clear, sz, prng); 486 } 487 488 static int live_migrate_copy(void *arg) 489 { 490 struct intel_gt *gt = arg; 491 struct intel_migrate *migrate = >->migrate; 492 struct drm_i915_private *i915 = migrate->context->engine->i915; 493 I915_RND_STATE(prng); 494 int i; 495 496 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 497 int err; 498 499 err = migrate_copy(migrate, sizes[i], &prng); 500 if (err == 0) 501 err = global_copy(migrate, sizes[i], &prng); 502 i915_gem_drain_freed_objects(i915); 503 if (err) 504 return err; 505 } 506 507 return 0; 508 } 509 510 static int live_migrate_clear(void *arg) 511 { 512 struct intel_gt *gt = arg; 513 struct intel_migrate *migrate = >->migrate; 514 struct drm_i915_private *i915 = migrate->context->engine->i915; 515 I915_RND_STATE(prng); 516 int i; 517 518 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 519 int err; 520 521 err = migrate_clear(migrate, sizes[i], &prng); 522 if (err == 0) 523 err = global_clear(migrate, sizes[i], &prng); 524 525 i915_gem_drain_freed_objects(i915); 526 if (err) 527 return err; 528 } 529 530 return 0; 531 } 532 533 struct spinner_timer { 534 struct timer_list timer; 535 struct igt_spinner spin; 536 }; 537 538 static void spinner_kill(struct timer_list *timer) 539 { 540 struct spinner_timer *st = from_timer(st, timer, timer); 541 542 igt_spinner_end(&st->spin); 543 pr_info("%s\n", __func__); 544 } 545 546 static int live_emit_pte_full_ring(void *arg) 547 { 548 struct intel_gt *gt = arg; 549 struct intel_migrate *migrate = >->migrate; 550 struct drm_i915_private *i915 = migrate->context->engine->i915; 551 struct drm_i915_gem_object *obj; 552 struct intel_context *ce; 553 struct i915_request *rq, *prev; 554 struct spinner_timer st; 555 struct sgt_dma it; 556 int len, sz, err; 557 u32 *cs; 558 559 /* 560 * Simple regression test to check that we don't trample the 561 * rq->reserved_space when returning from emit_pte(), if the ring is 562 * nearly full. 563 */ 564 565 if (igt_spinner_init(&st.spin, to_gt(i915))) 566 return -ENOMEM; 567 568 obj = i915_gem_object_create_internal(i915, 2 * PAGE_SIZE); 569 if (IS_ERR(obj)) { 570 err = PTR_ERR(obj); 571 goto out_spinner; 572 } 573 574 err = i915_gem_object_pin_pages_unlocked(obj); 575 if (err) 576 goto out_obj; 577 578 ce = intel_migrate_create_context(migrate); 579 if (IS_ERR(ce)) { 580 err = PTR_ERR(ce); 581 goto out_obj; 582 } 583 584 ce->ring_size = SZ_4K; /* Not too big */ 585 586 err = intel_context_pin(ce); 587 if (err) 588 goto out_put; 589 590 rq = igt_spinner_create_request(&st.spin, ce, MI_ARB_CHECK); 591 if (IS_ERR(rq)) { 592 err = PTR_ERR(rq); 593 goto out_unpin; 594 } 595 596 i915_request_add(rq); 597 if (!igt_wait_for_spinner(&st.spin, rq)) { 598 err = -EIO; 599 goto out_unpin; 600 } 601 602 /* 603 * Fill the rest of the ring leaving I915_EMIT_PTE_NUM_DWORDS + 604 * ring->reserved_space at the end. To actually emit the PTEs we require 605 * slightly more than I915_EMIT_PTE_NUM_DWORDS, since our object size is 606 * greater than PAGE_SIZE. The correct behaviour is to wait for more 607 * ring space in emit_pte(), otherwise we trample on the reserved_space 608 * resulting in crashes when later submitting the rq. 609 */ 610 611 prev = NULL; 612 do { 613 if (prev) 614 i915_request_add(rq); 615 616 rq = i915_request_create(ce); 617 if (IS_ERR(rq)) { 618 err = PTR_ERR(rq); 619 goto out_unpin; 620 } 621 622 sz = (rq->ring->space - rq->reserved_space) / sizeof(u32) - 623 I915_EMIT_PTE_NUM_DWORDS; 624 sz = min_t(u32, sz, (SZ_1K - rq->reserved_space) / sizeof(u32) - 625 I915_EMIT_PTE_NUM_DWORDS); 626 cs = intel_ring_begin(rq, sz); 627 if (IS_ERR(cs)) { 628 err = PTR_ERR(cs); 629 goto out_rq; 630 } 631 632 memset32(cs, MI_NOOP, sz); 633 cs += sz; 634 intel_ring_advance(rq, cs); 635 636 pr_info("%s emit=%u sz=%d\n", __func__, rq->ring->emit, sz); 637 638 prev = rq; 639 } while (rq->ring->space > (rq->reserved_space + 640 I915_EMIT_PTE_NUM_DWORDS * sizeof(u32))); 641 642 timer_setup_on_stack(&st.timer, spinner_kill, 0); 643 mod_timer(&st.timer, jiffies + 2 * HZ); 644 645 /* 646 * This should wait for the spinner to be killed, otherwise we should go 647 * down in flames when doing i915_request_add(). 648 */ 649 pr_info("%s emite_pte ring space=%u\n", __func__, rq->ring->space); 650 it = sg_sgt(obj->mm.pages->sgl); 651 len = emit_pte(rq, &it, obj->pat_index, false, 0, CHUNK_SZ); 652 if (!len) { 653 err = -EINVAL; 654 goto out_rq; 655 } 656 if (len < 0) { 657 err = len; 658 goto out_rq; 659 } 660 661 out_rq: 662 i915_request_add(rq); /* GEM_BUG_ON(rq->reserved_space > ring->space)? */ 663 del_timer_sync(&st.timer); 664 destroy_timer_on_stack(&st.timer); 665 out_unpin: 666 intel_context_unpin(ce); 667 out_put: 668 intel_context_put(ce); 669 out_obj: 670 i915_gem_object_put(obj); 671 out_spinner: 672 igt_spinner_fini(&st.spin); 673 return err; 674 } 675 676 struct threaded_migrate { 677 struct intel_migrate *migrate; 678 struct task_struct *tsk; 679 struct rnd_state prng; 680 }; 681 682 static int threaded_migrate(struct intel_migrate *migrate, 683 int (*fn)(void *arg), 684 unsigned int flags) 685 { 686 const unsigned int n_cpus = num_online_cpus() + 1; 687 struct threaded_migrate *thread; 688 I915_RND_STATE(prng); 689 unsigned int i; 690 int err = 0; 691 692 thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL); 693 if (!thread) 694 return 0; 695 696 for (i = 0; i < n_cpus; ++i) { 697 struct task_struct *tsk; 698 699 thread[i].migrate = migrate; 700 thread[i].prng = 701 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); 702 703 tsk = kthread_run(fn, &thread[i], "igt-%d", i); 704 if (IS_ERR(tsk)) { 705 err = PTR_ERR(tsk); 706 break; 707 } 708 709 get_task_struct(tsk); 710 thread[i].tsk = tsk; 711 } 712 713 msleep(10); /* start all threads before we kthread_stop() */ 714 715 for (i = 0; i < n_cpus; ++i) { 716 struct task_struct *tsk = thread[i].tsk; 717 int status; 718 719 if (IS_ERR_OR_NULL(tsk)) 720 continue; 721 722 status = kthread_stop(tsk); 723 if (status && !err) 724 err = status; 725 726 put_task_struct(tsk); 727 } 728 729 kfree(thread); 730 return err; 731 } 732 733 static int __thread_migrate_copy(void *arg) 734 { 735 struct threaded_migrate *tm = arg; 736 737 return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 738 } 739 740 static int thread_migrate_copy(void *arg) 741 { 742 struct intel_gt *gt = arg; 743 struct intel_migrate *migrate = >->migrate; 744 745 return threaded_migrate(migrate, __thread_migrate_copy, 0); 746 } 747 748 static int __thread_global_copy(void *arg) 749 { 750 struct threaded_migrate *tm = arg; 751 752 return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 753 } 754 755 static int thread_global_copy(void *arg) 756 { 757 struct intel_gt *gt = arg; 758 struct intel_migrate *migrate = >->migrate; 759 760 return threaded_migrate(migrate, __thread_global_copy, 0); 761 } 762 763 static int __thread_migrate_clear(void *arg) 764 { 765 struct threaded_migrate *tm = arg; 766 767 return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 768 } 769 770 static int __thread_global_clear(void *arg) 771 { 772 struct threaded_migrate *tm = arg; 773 774 return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 775 } 776 777 static int thread_migrate_clear(void *arg) 778 { 779 struct intel_gt *gt = arg; 780 struct intel_migrate *migrate = >->migrate; 781 782 return threaded_migrate(migrate, __thread_migrate_clear, 0); 783 } 784 785 static int thread_global_clear(void *arg) 786 { 787 struct intel_gt *gt = arg; 788 struct intel_migrate *migrate = >->migrate; 789 790 return threaded_migrate(migrate, __thread_global_clear, 0); 791 } 792 793 int intel_migrate_live_selftests(struct drm_i915_private *i915) 794 { 795 static const struct i915_subtest tests[] = { 796 SUBTEST(live_migrate_copy), 797 SUBTEST(live_migrate_clear), 798 SUBTEST(live_emit_pte_full_ring), 799 SUBTEST(thread_migrate_copy), 800 SUBTEST(thread_migrate_clear), 801 SUBTEST(thread_global_copy), 802 SUBTEST(thread_global_clear), 803 }; 804 struct intel_gt *gt = to_gt(i915); 805 806 if (!gt->migrate.context) 807 return 0; 808 809 return intel_gt_live_subtests(tests, gt); 810 } 811 812 static struct drm_i915_gem_object * 813 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) 814 { 815 struct drm_i915_gem_object *obj = NULL; 816 int err; 817 818 if (try_lmem) 819 obj = i915_gem_object_create_lmem(gt->i915, sz, 0); 820 821 if (IS_ERR_OR_NULL(obj)) { 822 obj = i915_gem_object_create_internal(gt->i915, sz); 823 if (IS_ERR(obj)) 824 return obj; 825 } 826 827 i915_gem_object_trylock(obj, NULL); 828 err = i915_gem_object_pin_pages(obj); 829 if (err) { 830 i915_gem_object_unlock(obj); 831 i915_gem_object_put(obj); 832 return ERR_PTR(err); 833 } 834 835 return obj; 836 } 837 838 static int wrap_ktime_compare(const void *A, const void *B) 839 { 840 const ktime_t *a = A, *b = B; 841 842 return ktime_compare(*a, *b); 843 } 844 845 static int __perf_clear_blt(struct intel_context *ce, 846 struct scatterlist *sg, 847 unsigned int pat_index, 848 bool is_lmem, 849 size_t sz) 850 { 851 ktime_t t[5]; 852 int pass; 853 int err = 0; 854 855 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 856 struct i915_request *rq; 857 ktime_t t0, t1; 858 859 t0 = ktime_get(); 860 861 err = intel_context_migrate_clear(ce, NULL, sg, pat_index, 862 is_lmem, 0, &rq); 863 if (rq) { 864 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 865 err = -EIO; 866 i915_request_put(rq); 867 } 868 if (err) 869 break; 870 871 t1 = ktime_get(); 872 t[pass] = ktime_sub(t1, t0); 873 } 874 if (err) 875 return err; 876 877 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 878 pr_info("%s: %zd KiB fill: %lld MiB/s\n", 879 ce->engine->name, sz >> 10, 880 div64_u64(mul_u32_u32(4 * sz, 881 1000 * 1000 * 1000), 882 t[1] + 2 * t[2] + t[3]) >> 20); 883 return 0; 884 } 885 886 static int perf_clear_blt(void *arg) 887 { 888 struct intel_gt *gt = arg; 889 static const unsigned long sizes[] = { 890 SZ_4K, 891 SZ_64K, 892 SZ_2M, 893 SZ_64M 894 }; 895 int i; 896 897 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 898 struct drm_i915_gem_object *dst; 899 int err; 900 901 dst = create_init_lmem_internal(gt, sizes[i], true); 902 if (IS_ERR(dst)) 903 return PTR_ERR(dst); 904 905 err = __perf_clear_blt(gt->migrate.context, 906 dst->mm.pages->sgl, 907 i915_gem_get_pat_index(gt->i915, 908 I915_CACHE_NONE), 909 i915_gem_object_is_lmem(dst), 910 sizes[i]); 911 912 i915_gem_object_unlock(dst); 913 i915_gem_object_put(dst); 914 if (err) 915 return err; 916 } 917 918 return 0; 919 } 920 921 static int __perf_copy_blt(struct intel_context *ce, 922 struct scatterlist *src, 923 unsigned int src_pat_index, 924 bool src_is_lmem, 925 struct scatterlist *dst, 926 unsigned int dst_pat_index, 927 bool dst_is_lmem, 928 size_t sz) 929 { 930 ktime_t t[5]; 931 int pass; 932 int err = 0; 933 934 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 935 struct i915_request *rq; 936 ktime_t t0, t1; 937 938 t0 = ktime_get(); 939 940 err = intel_context_migrate_copy(ce, NULL, 941 src, src_pat_index, 942 src_is_lmem, 943 dst, dst_pat_index, 944 dst_is_lmem, 945 &rq); 946 if (rq) { 947 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 948 err = -EIO; 949 i915_request_put(rq); 950 } 951 if (err) 952 break; 953 954 t1 = ktime_get(); 955 t[pass] = ktime_sub(t1, t0); 956 } 957 if (err) 958 return err; 959 960 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 961 pr_info("%s: %zd KiB copy: %lld MiB/s\n", 962 ce->engine->name, sz >> 10, 963 div64_u64(mul_u32_u32(4 * sz, 964 1000 * 1000 * 1000), 965 t[1] + 2 * t[2] + t[3]) >> 20); 966 return 0; 967 } 968 969 static int perf_copy_blt(void *arg) 970 { 971 struct intel_gt *gt = arg; 972 static const unsigned long sizes[] = { 973 SZ_4K, 974 SZ_64K, 975 SZ_2M, 976 SZ_64M 977 }; 978 int i; 979 980 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 981 struct drm_i915_gem_object *src, *dst; 982 size_t sz; 983 int err; 984 985 src = create_init_lmem_internal(gt, sizes[i], true); 986 if (IS_ERR(src)) 987 return PTR_ERR(src); 988 989 sz = src->base.size; 990 dst = create_init_lmem_internal(gt, sz, false); 991 if (IS_ERR(dst)) { 992 err = PTR_ERR(dst); 993 goto err_src; 994 } 995 996 err = __perf_copy_blt(gt->migrate.context, 997 src->mm.pages->sgl, 998 i915_gem_get_pat_index(gt->i915, 999 I915_CACHE_NONE), 1000 i915_gem_object_is_lmem(src), 1001 dst->mm.pages->sgl, 1002 i915_gem_get_pat_index(gt->i915, 1003 I915_CACHE_NONE), 1004 i915_gem_object_is_lmem(dst), 1005 sz); 1006 1007 i915_gem_object_unlock(dst); 1008 i915_gem_object_put(dst); 1009 err_src: 1010 i915_gem_object_unlock(src); 1011 i915_gem_object_put(src); 1012 if (err) 1013 return err; 1014 } 1015 1016 return 0; 1017 } 1018 1019 int intel_migrate_perf_selftests(struct drm_i915_private *i915) 1020 { 1021 static const struct i915_subtest tests[] = { 1022 SUBTEST(perf_clear_blt), 1023 SUBTEST(perf_copy_blt), 1024 }; 1025 struct intel_gt *gt = to_gt(i915); 1026 1027 if (intel_gt_is_wedged(gt)) 1028 return 0; 1029 1030 if (!gt->migrate.context) 1031 return 0; 1032 1033 return intel_gt_live_subtests(tests, gt); 1034 } 1035