1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/sort.h> 7 8 #include "gem/i915_gem_internal.h" 9 10 #include "selftests/i915_random.h" 11 12 static const unsigned int sizes[] = { 13 SZ_4K, 14 SZ_64K, 15 SZ_2M, 16 CHUNK_SZ - SZ_4K, 17 CHUNK_SZ, 18 CHUNK_SZ + SZ_4K, 19 SZ_64M, 20 }; 21 22 static struct drm_i915_gem_object * 23 create_lmem_or_internal(struct drm_i915_private *i915, size_t size) 24 { 25 struct drm_i915_gem_object *obj; 26 27 obj = i915_gem_object_create_lmem(i915, size, 0); 28 if (!IS_ERR(obj)) 29 return obj; 30 31 return i915_gem_object_create_internal(i915, size); 32 } 33 34 static int copy(struct intel_migrate *migrate, 35 int (*fn)(struct intel_migrate *migrate, 36 struct i915_gem_ww_ctx *ww, 37 struct drm_i915_gem_object *src, 38 struct drm_i915_gem_object *dst, 39 struct i915_request **out), 40 u32 sz, struct rnd_state *prng) 41 { 42 struct drm_i915_private *i915 = migrate->context->engine->i915; 43 struct drm_i915_gem_object *src, *dst; 44 struct i915_request *rq; 45 struct i915_gem_ww_ctx ww; 46 u32 *vaddr; 47 int err = 0; 48 int i; 49 50 src = create_lmem_or_internal(i915, sz); 51 if (IS_ERR(src)) 52 return 0; 53 54 sz = src->base.size; 55 dst = i915_gem_object_create_internal(i915, sz); 56 if (IS_ERR(dst)) 57 goto err_free_src; 58 59 for_i915_gem_ww(&ww, err, true) { 60 err = i915_gem_object_lock(src, &ww); 61 if (err) 62 continue; 63 64 err = i915_gem_object_lock(dst, &ww); 65 if (err) 66 continue; 67 68 vaddr = i915_gem_object_pin_map(src, I915_MAP_WC); 69 if (IS_ERR(vaddr)) { 70 err = PTR_ERR(vaddr); 71 continue; 72 } 73 74 for (i = 0; i < sz / sizeof(u32); i++) 75 vaddr[i] = i; 76 i915_gem_object_flush_map(src); 77 78 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC); 79 if (IS_ERR(vaddr)) { 80 err = PTR_ERR(vaddr); 81 goto unpin_src; 82 } 83 84 for (i = 0; i < sz / sizeof(u32); i++) 85 vaddr[i] = ~i; 86 i915_gem_object_flush_map(dst); 87 88 err = fn(migrate, &ww, src, dst, &rq); 89 if (!err) 90 continue; 91 92 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 93 pr_err("%ps failed, size: %u\n", fn, sz); 94 if (rq) { 95 i915_request_wait(rq, 0, HZ); 96 i915_request_put(rq); 97 } 98 i915_gem_object_unpin_map(dst); 99 unpin_src: 100 i915_gem_object_unpin_map(src); 101 } 102 if (err) 103 goto err_out; 104 105 if (rq) { 106 if (i915_request_wait(rq, 0, HZ) < 0) { 107 pr_err("%ps timed out, size: %u\n", fn, sz); 108 err = -ETIME; 109 } 110 i915_request_put(rq); 111 } 112 113 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 114 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); 115 116 if (vaddr[x] != x) { 117 pr_err("%ps failed, size: %u, offset: %zu\n", 118 fn, sz, x * sizeof(u32)); 119 igt_hexdump(vaddr + i * 1024, 4096); 120 err = -EINVAL; 121 } 122 } 123 124 i915_gem_object_unpin_map(dst); 125 i915_gem_object_unpin_map(src); 126 127 err_out: 128 i915_gem_object_put(dst); 129 err_free_src: 130 i915_gem_object_put(src); 131 132 return err; 133 } 134 135 static int clear(struct intel_migrate *migrate, 136 int (*fn)(struct intel_migrate *migrate, 137 struct i915_gem_ww_ctx *ww, 138 struct drm_i915_gem_object *obj, 139 u32 value, 140 struct i915_request **out), 141 u32 sz, struct rnd_state *prng) 142 { 143 struct drm_i915_private *i915 = migrate->context->engine->i915; 144 struct drm_i915_gem_object *obj; 145 struct i915_request *rq; 146 struct i915_gem_ww_ctx ww; 147 u32 *vaddr; 148 int err = 0; 149 int i; 150 151 obj = create_lmem_or_internal(i915, sz); 152 if (IS_ERR(obj)) 153 return 0; 154 155 for_i915_gem_ww(&ww, err, true) { 156 err = i915_gem_object_lock(obj, &ww); 157 if (err) 158 continue; 159 160 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 161 if (IS_ERR(vaddr)) { 162 err = PTR_ERR(vaddr); 163 continue; 164 } 165 166 for (i = 0; i < sz / sizeof(u32); i++) 167 vaddr[i] = ~i; 168 i915_gem_object_flush_map(obj); 169 170 err = fn(migrate, &ww, obj, sz, &rq); 171 if (!err) 172 continue; 173 174 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 175 pr_err("%ps failed, size: %u\n", fn, sz); 176 if (rq) { 177 i915_request_wait(rq, 0, HZ); 178 i915_request_put(rq); 179 } 180 i915_gem_object_unpin_map(obj); 181 } 182 if (err) 183 goto err_out; 184 185 if (rq) { 186 if (i915_request_wait(rq, 0, HZ) < 0) { 187 pr_err("%ps timed out, size: %u\n", fn, sz); 188 err = -ETIME; 189 } 190 i915_request_put(rq); 191 } 192 193 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 194 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); 195 196 if (vaddr[x] != sz) { 197 pr_err("%ps failed, size: %u, offset: %zu\n", 198 fn, sz, x * sizeof(u32)); 199 igt_hexdump(vaddr + i * 1024, 4096); 200 err = -EINVAL; 201 } 202 } 203 204 i915_gem_object_unpin_map(obj); 205 err_out: 206 i915_gem_object_put(obj); 207 208 return err; 209 } 210 211 static int __migrate_copy(struct intel_migrate *migrate, 212 struct i915_gem_ww_ctx *ww, 213 struct drm_i915_gem_object *src, 214 struct drm_i915_gem_object *dst, 215 struct i915_request **out) 216 { 217 return intel_migrate_copy(migrate, ww, NULL, 218 src->mm.pages->sgl, src->cache_level, 219 i915_gem_object_is_lmem(src), 220 dst->mm.pages->sgl, dst->cache_level, 221 i915_gem_object_is_lmem(dst), 222 out); 223 } 224 225 static int __global_copy(struct intel_migrate *migrate, 226 struct i915_gem_ww_ctx *ww, 227 struct drm_i915_gem_object *src, 228 struct drm_i915_gem_object *dst, 229 struct i915_request **out) 230 { 231 return intel_context_migrate_copy(migrate->context, NULL, 232 src->mm.pages->sgl, src->cache_level, 233 i915_gem_object_is_lmem(src), 234 dst->mm.pages->sgl, dst->cache_level, 235 i915_gem_object_is_lmem(dst), 236 out); 237 } 238 239 static int 240 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 241 { 242 return copy(migrate, __migrate_copy, sz, prng); 243 } 244 245 static int 246 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 247 { 248 return copy(migrate, __global_copy, sz, prng); 249 } 250 251 static int __migrate_clear(struct intel_migrate *migrate, 252 struct i915_gem_ww_ctx *ww, 253 struct drm_i915_gem_object *obj, 254 u32 value, 255 struct i915_request **out) 256 { 257 return intel_migrate_clear(migrate, ww, NULL, 258 obj->mm.pages->sgl, 259 obj->cache_level, 260 i915_gem_object_is_lmem(obj), 261 value, out); 262 } 263 264 static int __global_clear(struct intel_migrate *migrate, 265 struct i915_gem_ww_ctx *ww, 266 struct drm_i915_gem_object *obj, 267 u32 value, 268 struct i915_request **out) 269 { 270 return intel_context_migrate_clear(migrate->context, NULL, 271 obj->mm.pages->sgl, 272 obj->cache_level, 273 i915_gem_object_is_lmem(obj), 274 value, out); 275 } 276 277 static int 278 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 279 { 280 return clear(migrate, __migrate_clear, sz, prng); 281 } 282 283 static int 284 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 285 { 286 return clear(migrate, __global_clear, sz, prng); 287 } 288 289 static int live_migrate_copy(void *arg) 290 { 291 struct intel_migrate *migrate = arg; 292 struct drm_i915_private *i915 = migrate->context->engine->i915; 293 I915_RND_STATE(prng); 294 int i; 295 296 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 297 int err; 298 299 err = migrate_copy(migrate, sizes[i], &prng); 300 if (err == 0) 301 err = global_copy(migrate, sizes[i], &prng); 302 i915_gem_drain_freed_objects(i915); 303 if (err) 304 return err; 305 } 306 307 return 0; 308 } 309 310 static int live_migrate_clear(void *arg) 311 { 312 struct intel_migrate *migrate = arg; 313 struct drm_i915_private *i915 = migrate->context->engine->i915; 314 I915_RND_STATE(prng); 315 int i; 316 317 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 318 int err; 319 320 err = migrate_clear(migrate, sizes[i], &prng); 321 if (err == 0) 322 err = global_clear(migrate, sizes[i], &prng); 323 324 i915_gem_drain_freed_objects(i915); 325 if (err) 326 return err; 327 } 328 329 return 0; 330 } 331 332 struct threaded_migrate { 333 struct intel_migrate *migrate; 334 struct task_struct *tsk; 335 struct rnd_state prng; 336 }; 337 338 static int threaded_migrate(struct intel_migrate *migrate, 339 int (*fn)(void *arg), 340 unsigned int flags) 341 { 342 const unsigned int n_cpus = num_online_cpus() + 1; 343 struct threaded_migrate *thread; 344 I915_RND_STATE(prng); 345 unsigned int i; 346 int err = 0; 347 348 thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL); 349 if (!thread) 350 return 0; 351 352 for (i = 0; i < n_cpus; ++i) { 353 struct task_struct *tsk; 354 355 thread[i].migrate = migrate; 356 thread[i].prng = 357 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); 358 359 tsk = kthread_run(fn, &thread[i], "igt-%d", i); 360 if (IS_ERR(tsk)) { 361 err = PTR_ERR(tsk); 362 break; 363 } 364 365 get_task_struct(tsk); 366 thread[i].tsk = tsk; 367 } 368 369 msleep(10); /* start all threads before we kthread_stop() */ 370 371 for (i = 0; i < n_cpus; ++i) { 372 struct task_struct *tsk = thread[i].tsk; 373 int status; 374 375 if (IS_ERR_OR_NULL(tsk)) 376 continue; 377 378 status = kthread_stop(tsk); 379 if (status && !err) 380 err = status; 381 382 put_task_struct(tsk); 383 } 384 385 kfree(thread); 386 return err; 387 } 388 389 static int __thread_migrate_copy(void *arg) 390 { 391 struct threaded_migrate *tm = arg; 392 393 return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 394 } 395 396 static int thread_migrate_copy(void *arg) 397 { 398 return threaded_migrate(arg, __thread_migrate_copy, 0); 399 } 400 401 static int __thread_global_copy(void *arg) 402 { 403 struct threaded_migrate *tm = arg; 404 405 return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 406 } 407 408 static int thread_global_copy(void *arg) 409 { 410 return threaded_migrate(arg, __thread_global_copy, 0); 411 } 412 413 static int __thread_migrate_clear(void *arg) 414 { 415 struct threaded_migrate *tm = arg; 416 417 return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 418 } 419 420 static int __thread_global_clear(void *arg) 421 { 422 struct threaded_migrate *tm = arg; 423 424 return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 425 } 426 427 static int thread_migrate_clear(void *arg) 428 { 429 return threaded_migrate(arg, __thread_migrate_clear, 0); 430 } 431 432 static int thread_global_clear(void *arg) 433 { 434 return threaded_migrate(arg, __thread_global_clear, 0); 435 } 436 437 int intel_migrate_live_selftests(struct drm_i915_private *i915) 438 { 439 static const struct i915_subtest tests[] = { 440 SUBTEST(live_migrate_copy), 441 SUBTEST(live_migrate_clear), 442 SUBTEST(thread_migrate_copy), 443 SUBTEST(thread_migrate_clear), 444 SUBTEST(thread_global_copy), 445 SUBTEST(thread_global_clear), 446 }; 447 struct intel_gt *gt = to_gt(i915); 448 449 if (!gt->migrate.context) 450 return 0; 451 452 return i915_subtests(tests, >->migrate); 453 } 454 455 static struct drm_i915_gem_object * 456 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) 457 { 458 struct drm_i915_gem_object *obj = NULL; 459 int err; 460 461 if (try_lmem) 462 obj = i915_gem_object_create_lmem(gt->i915, sz, 0); 463 464 if (IS_ERR_OR_NULL(obj)) { 465 obj = i915_gem_object_create_internal(gt->i915, sz); 466 if (IS_ERR(obj)) 467 return obj; 468 } 469 470 i915_gem_object_trylock(obj, NULL); 471 err = i915_gem_object_pin_pages(obj); 472 if (err) { 473 i915_gem_object_unlock(obj); 474 i915_gem_object_put(obj); 475 return ERR_PTR(err); 476 } 477 478 return obj; 479 } 480 481 static int wrap_ktime_compare(const void *A, const void *B) 482 { 483 const ktime_t *a = A, *b = B; 484 485 return ktime_compare(*a, *b); 486 } 487 488 static int __perf_clear_blt(struct intel_context *ce, 489 struct scatterlist *sg, 490 enum i915_cache_level cache_level, 491 bool is_lmem, 492 size_t sz) 493 { 494 ktime_t t[5]; 495 int pass; 496 int err = 0; 497 498 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 499 struct i915_request *rq; 500 ktime_t t0, t1; 501 502 t0 = ktime_get(); 503 504 err = intel_context_migrate_clear(ce, NULL, sg, cache_level, 505 is_lmem, 0, &rq); 506 if (rq) { 507 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 508 err = -EIO; 509 i915_request_put(rq); 510 } 511 if (err) 512 break; 513 514 t1 = ktime_get(); 515 t[pass] = ktime_sub(t1, t0); 516 } 517 if (err) 518 return err; 519 520 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 521 pr_info("%s: %zd KiB fill: %lld MiB/s\n", 522 ce->engine->name, sz >> 10, 523 div64_u64(mul_u32_u32(4 * sz, 524 1000 * 1000 * 1000), 525 t[1] + 2 * t[2] + t[3]) >> 20); 526 return 0; 527 } 528 529 static int perf_clear_blt(void *arg) 530 { 531 struct intel_gt *gt = arg; 532 static const unsigned long sizes[] = { 533 SZ_4K, 534 SZ_64K, 535 SZ_2M, 536 SZ_64M 537 }; 538 int i; 539 540 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 541 struct drm_i915_gem_object *dst; 542 int err; 543 544 dst = create_init_lmem_internal(gt, sizes[i], true); 545 if (IS_ERR(dst)) 546 return PTR_ERR(dst); 547 548 err = __perf_clear_blt(gt->migrate.context, 549 dst->mm.pages->sgl, 550 I915_CACHE_NONE, 551 i915_gem_object_is_lmem(dst), 552 sizes[i]); 553 554 i915_gem_object_unlock(dst); 555 i915_gem_object_put(dst); 556 if (err) 557 return err; 558 } 559 560 return 0; 561 } 562 563 static int __perf_copy_blt(struct intel_context *ce, 564 struct scatterlist *src, 565 enum i915_cache_level src_cache_level, 566 bool src_is_lmem, 567 struct scatterlist *dst, 568 enum i915_cache_level dst_cache_level, 569 bool dst_is_lmem, 570 size_t sz) 571 { 572 ktime_t t[5]; 573 int pass; 574 int err = 0; 575 576 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 577 struct i915_request *rq; 578 ktime_t t0, t1; 579 580 t0 = ktime_get(); 581 582 err = intel_context_migrate_copy(ce, NULL, 583 src, src_cache_level, 584 src_is_lmem, 585 dst, dst_cache_level, 586 dst_is_lmem, 587 &rq); 588 if (rq) { 589 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 590 err = -EIO; 591 i915_request_put(rq); 592 } 593 if (err) 594 break; 595 596 t1 = ktime_get(); 597 t[pass] = ktime_sub(t1, t0); 598 } 599 if (err) 600 return err; 601 602 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 603 pr_info("%s: %zd KiB copy: %lld MiB/s\n", 604 ce->engine->name, sz >> 10, 605 div64_u64(mul_u32_u32(4 * sz, 606 1000 * 1000 * 1000), 607 t[1] + 2 * t[2] + t[3]) >> 20); 608 return 0; 609 } 610 611 static int perf_copy_blt(void *arg) 612 { 613 struct intel_gt *gt = arg; 614 static const unsigned long sizes[] = { 615 SZ_4K, 616 SZ_64K, 617 SZ_2M, 618 SZ_64M 619 }; 620 int i; 621 622 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 623 struct drm_i915_gem_object *src, *dst; 624 int err; 625 626 src = create_init_lmem_internal(gt, sizes[i], true); 627 if (IS_ERR(src)) 628 return PTR_ERR(src); 629 630 dst = create_init_lmem_internal(gt, sizes[i], false); 631 if (IS_ERR(dst)) { 632 err = PTR_ERR(dst); 633 goto err_src; 634 } 635 636 err = __perf_copy_blt(gt->migrate.context, 637 src->mm.pages->sgl, 638 I915_CACHE_NONE, 639 i915_gem_object_is_lmem(src), 640 dst->mm.pages->sgl, 641 I915_CACHE_NONE, 642 i915_gem_object_is_lmem(dst), 643 sizes[i]); 644 645 i915_gem_object_unlock(dst); 646 i915_gem_object_put(dst); 647 err_src: 648 i915_gem_object_unlock(src); 649 i915_gem_object_put(src); 650 if (err) 651 return err; 652 } 653 654 return 0; 655 } 656 657 int intel_migrate_perf_selftests(struct drm_i915_private *i915) 658 { 659 static const struct i915_subtest tests[] = { 660 SUBTEST(perf_clear_blt), 661 SUBTEST(perf_copy_blt), 662 }; 663 struct intel_gt *gt = to_gt(i915); 664 665 if (intel_gt_is_wedged(gt)) 666 return 0; 667 668 if (!gt->migrate.context) 669 return 0; 670 671 return intel_gt_live_subtests(tests, gt); 672 } 673