1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/sort.h> 7 8 #include "selftests/i915_random.h" 9 10 static const unsigned int sizes[] = { 11 SZ_4K, 12 SZ_64K, 13 SZ_2M, 14 CHUNK_SZ - SZ_4K, 15 CHUNK_SZ, 16 CHUNK_SZ + SZ_4K, 17 SZ_64M, 18 }; 19 20 static struct drm_i915_gem_object * 21 create_lmem_or_internal(struct drm_i915_private *i915, size_t size) 22 { 23 struct drm_i915_gem_object *obj; 24 25 obj = i915_gem_object_create_lmem(i915, size, 0); 26 if (!IS_ERR(obj)) 27 return obj; 28 29 return i915_gem_object_create_internal(i915, size); 30 } 31 32 static int copy(struct intel_migrate *migrate, 33 int (*fn)(struct intel_migrate *migrate, 34 struct i915_gem_ww_ctx *ww, 35 struct drm_i915_gem_object *src, 36 struct drm_i915_gem_object *dst, 37 struct i915_request **out), 38 u32 sz, struct rnd_state *prng) 39 { 40 struct drm_i915_private *i915 = migrate->context->engine->i915; 41 struct drm_i915_gem_object *src, *dst; 42 struct i915_request *rq; 43 struct i915_gem_ww_ctx ww; 44 u32 *vaddr; 45 int err = 0; 46 int i; 47 48 src = create_lmem_or_internal(i915, sz); 49 if (IS_ERR(src)) 50 return 0; 51 52 dst = i915_gem_object_create_internal(i915, sz); 53 if (IS_ERR(dst)) 54 goto err_free_src; 55 56 for_i915_gem_ww(&ww, err, true) { 57 err = i915_gem_object_lock(src, &ww); 58 if (err) 59 continue; 60 61 err = i915_gem_object_lock(dst, &ww); 62 if (err) 63 continue; 64 65 vaddr = i915_gem_object_pin_map(src, I915_MAP_WC); 66 if (IS_ERR(vaddr)) { 67 err = PTR_ERR(vaddr); 68 continue; 69 } 70 71 for (i = 0; i < sz / sizeof(u32); i++) 72 vaddr[i] = i; 73 i915_gem_object_flush_map(src); 74 75 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC); 76 if (IS_ERR(vaddr)) { 77 err = PTR_ERR(vaddr); 78 goto unpin_src; 79 } 80 81 for (i = 0; i < sz / sizeof(u32); i++) 82 vaddr[i] = ~i; 83 i915_gem_object_flush_map(dst); 84 85 err = fn(migrate, &ww, src, dst, &rq); 86 if (!err) 87 continue; 88 89 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 90 pr_err("%ps failed, size: %u\n", fn, sz); 91 if (rq) { 92 i915_request_wait(rq, 0, HZ); 93 i915_request_put(rq); 94 } 95 i915_gem_object_unpin_map(dst); 96 unpin_src: 97 i915_gem_object_unpin_map(src); 98 } 99 if (err) 100 goto err_out; 101 102 if (rq) { 103 if (i915_request_wait(rq, 0, HZ) < 0) { 104 pr_err("%ps timed out, size: %u\n", fn, sz); 105 err = -ETIME; 106 } 107 i915_request_put(rq); 108 } 109 110 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 111 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); 112 113 if (vaddr[x] != x) { 114 pr_err("%ps failed, size: %u, offset: %zu\n", 115 fn, sz, x * sizeof(u32)); 116 igt_hexdump(vaddr + i * 1024, 4096); 117 err = -EINVAL; 118 } 119 } 120 121 i915_gem_object_unpin_map(dst); 122 i915_gem_object_unpin_map(src); 123 124 err_out: 125 i915_gem_object_put(dst); 126 err_free_src: 127 i915_gem_object_put(src); 128 129 return err; 130 } 131 132 static int clear(struct intel_migrate *migrate, 133 int (*fn)(struct intel_migrate *migrate, 134 struct i915_gem_ww_ctx *ww, 135 struct drm_i915_gem_object *obj, 136 u32 value, 137 struct i915_request **out), 138 u32 sz, struct rnd_state *prng) 139 { 140 struct drm_i915_private *i915 = migrate->context->engine->i915; 141 struct drm_i915_gem_object *obj; 142 struct i915_request *rq; 143 struct i915_gem_ww_ctx ww; 144 u32 *vaddr; 145 int err = 0; 146 int i; 147 148 obj = create_lmem_or_internal(i915, sz); 149 if (IS_ERR(obj)) 150 return 0; 151 152 for_i915_gem_ww(&ww, err, true) { 153 err = i915_gem_object_lock(obj, &ww); 154 if (err) 155 continue; 156 157 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 158 if (IS_ERR(vaddr)) { 159 err = PTR_ERR(vaddr); 160 continue; 161 } 162 163 for (i = 0; i < sz / sizeof(u32); i++) 164 vaddr[i] = ~i; 165 i915_gem_object_flush_map(obj); 166 167 err = fn(migrate, &ww, obj, sz, &rq); 168 if (!err) 169 continue; 170 171 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 172 pr_err("%ps failed, size: %u\n", fn, sz); 173 if (rq) { 174 i915_request_wait(rq, 0, HZ); 175 i915_request_put(rq); 176 } 177 i915_gem_object_unpin_map(obj); 178 } 179 if (err) 180 goto err_out; 181 182 if (rq) { 183 if (i915_request_wait(rq, 0, HZ) < 0) { 184 pr_err("%ps timed out, size: %u\n", fn, sz); 185 err = -ETIME; 186 } 187 i915_request_put(rq); 188 } 189 190 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 191 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); 192 193 if (vaddr[x] != sz) { 194 pr_err("%ps failed, size: %u, offset: %zu\n", 195 fn, sz, x * sizeof(u32)); 196 igt_hexdump(vaddr + i * 1024, 4096); 197 err = -EINVAL; 198 } 199 } 200 201 i915_gem_object_unpin_map(obj); 202 err_out: 203 i915_gem_object_put(obj); 204 205 return err; 206 } 207 208 static int __migrate_copy(struct intel_migrate *migrate, 209 struct i915_gem_ww_ctx *ww, 210 struct drm_i915_gem_object *src, 211 struct drm_i915_gem_object *dst, 212 struct i915_request **out) 213 { 214 return intel_migrate_copy(migrate, ww, NULL, 215 src->mm.pages->sgl, src->cache_level, 216 i915_gem_object_is_lmem(src), 217 dst->mm.pages->sgl, dst->cache_level, 218 i915_gem_object_is_lmem(dst), 219 out); 220 } 221 222 static int __global_copy(struct intel_migrate *migrate, 223 struct i915_gem_ww_ctx *ww, 224 struct drm_i915_gem_object *src, 225 struct drm_i915_gem_object *dst, 226 struct i915_request **out) 227 { 228 return intel_context_migrate_copy(migrate->context, NULL, 229 src->mm.pages->sgl, src->cache_level, 230 i915_gem_object_is_lmem(src), 231 dst->mm.pages->sgl, dst->cache_level, 232 i915_gem_object_is_lmem(dst), 233 out); 234 } 235 236 static int 237 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 238 { 239 return copy(migrate, __migrate_copy, sz, prng); 240 } 241 242 static int 243 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 244 { 245 return copy(migrate, __global_copy, sz, prng); 246 } 247 248 static int __migrate_clear(struct intel_migrate *migrate, 249 struct i915_gem_ww_ctx *ww, 250 struct drm_i915_gem_object *obj, 251 u32 value, 252 struct i915_request **out) 253 { 254 return intel_migrate_clear(migrate, ww, NULL, 255 obj->mm.pages->sgl, 256 obj->cache_level, 257 i915_gem_object_is_lmem(obj), 258 value, out); 259 } 260 261 static int __global_clear(struct intel_migrate *migrate, 262 struct i915_gem_ww_ctx *ww, 263 struct drm_i915_gem_object *obj, 264 u32 value, 265 struct i915_request **out) 266 { 267 return intel_context_migrate_clear(migrate->context, NULL, 268 obj->mm.pages->sgl, 269 obj->cache_level, 270 i915_gem_object_is_lmem(obj), 271 value, out); 272 } 273 274 static int 275 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 276 { 277 return clear(migrate, __migrate_clear, sz, prng); 278 } 279 280 static int 281 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 282 { 283 return clear(migrate, __global_clear, sz, prng); 284 } 285 286 static int live_migrate_copy(void *arg) 287 { 288 struct intel_migrate *migrate = arg; 289 struct drm_i915_private *i915 = migrate->context->engine->i915; 290 I915_RND_STATE(prng); 291 int i; 292 293 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 294 int err; 295 296 err = migrate_copy(migrate, sizes[i], &prng); 297 if (err == 0) 298 err = global_copy(migrate, sizes[i], &prng); 299 i915_gem_drain_freed_objects(i915); 300 if (err) 301 return err; 302 } 303 304 return 0; 305 } 306 307 static int live_migrate_clear(void *arg) 308 { 309 struct intel_migrate *migrate = arg; 310 struct drm_i915_private *i915 = migrate->context->engine->i915; 311 I915_RND_STATE(prng); 312 int i; 313 314 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 315 int err; 316 317 err = migrate_clear(migrate, sizes[i], &prng); 318 if (err == 0) 319 err = global_clear(migrate, sizes[i], &prng); 320 321 i915_gem_drain_freed_objects(i915); 322 if (err) 323 return err; 324 } 325 326 return 0; 327 } 328 329 struct threaded_migrate { 330 struct intel_migrate *migrate; 331 struct task_struct *tsk; 332 struct rnd_state prng; 333 }; 334 335 static int threaded_migrate(struct intel_migrate *migrate, 336 int (*fn)(void *arg), 337 unsigned int flags) 338 { 339 const unsigned int n_cpus = num_online_cpus() + 1; 340 struct threaded_migrate *thread; 341 I915_RND_STATE(prng); 342 unsigned int i; 343 int err = 0; 344 345 thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL); 346 if (!thread) 347 return 0; 348 349 for (i = 0; i < n_cpus; ++i) { 350 struct task_struct *tsk; 351 352 thread[i].migrate = migrate; 353 thread[i].prng = 354 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); 355 356 tsk = kthread_run(fn, &thread[i], "igt-%d", i); 357 if (IS_ERR(tsk)) { 358 err = PTR_ERR(tsk); 359 break; 360 } 361 362 get_task_struct(tsk); 363 thread[i].tsk = tsk; 364 } 365 366 msleep(10); /* start all threads before we kthread_stop() */ 367 368 for (i = 0; i < n_cpus; ++i) { 369 struct task_struct *tsk = thread[i].tsk; 370 int status; 371 372 if (IS_ERR_OR_NULL(tsk)) 373 continue; 374 375 status = kthread_stop(tsk); 376 if (status && !err) 377 err = status; 378 379 put_task_struct(tsk); 380 } 381 382 kfree(thread); 383 return err; 384 } 385 386 static int __thread_migrate_copy(void *arg) 387 { 388 struct threaded_migrate *tm = arg; 389 390 return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 391 } 392 393 static int thread_migrate_copy(void *arg) 394 { 395 return threaded_migrate(arg, __thread_migrate_copy, 0); 396 } 397 398 static int __thread_global_copy(void *arg) 399 { 400 struct threaded_migrate *tm = arg; 401 402 return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 403 } 404 405 static int thread_global_copy(void *arg) 406 { 407 return threaded_migrate(arg, __thread_global_copy, 0); 408 } 409 410 static int __thread_migrate_clear(void *arg) 411 { 412 struct threaded_migrate *tm = arg; 413 414 return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 415 } 416 417 static int __thread_global_clear(void *arg) 418 { 419 struct threaded_migrate *tm = arg; 420 421 return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 422 } 423 424 static int thread_migrate_clear(void *arg) 425 { 426 return threaded_migrate(arg, __thread_migrate_clear, 0); 427 } 428 429 static int thread_global_clear(void *arg) 430 { 431 return threaded_migrate(arg, __thread_global_clear, 0); 432 } 433 434 int intel_migrate_live_selftests(struct drm_i915_private *i915) 435 { 436 static const struct i915_subtest tests[] = { 437 SUBTEST(live_migrate_copy), 438 SUBTEST(live_migrate_clear), 439 SUBTEST(thread_migrate_copy), 440 SUBTEST(thread_migrate_clear), 441 SUBTEST(thread_global_copy), 442 SUBTEST(thread_global_clear), 443 }; 444 struct intel_gt *gt = &i915->gt; 445 446 if (!gt->migrate.context) 447 return 0; 448 449 return i915_subtests(tests, >->migrate); 450 } 451 452 static struct drm_i915_gem_object * 453 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) 454 { 455 struct drm_i915_gem_object *obj = NULL; 456 int err; 457 458 if (try_lmem) 459 obj = i915_gem_object_create_lmem(gt->i915, sz, 0); 460 461 if (IS_ERR_OR_NULL(obj)) { 462 obj = i915_gem_object_create_internal(gt->i915, sz); 463 if (IS_ERR(obj)) 464 return obj; 465 } 466 467 i915_gem_object_trylock(obj); 468 err = i915_gem_object_pin_pages(obj); 469 if (err) { 470 i915_gem_object_unlock(obj); 471 i915_gem_object_put(obj); 472 return ERR_PTR(err); 473 } 474 475 return obj; 476 } 477 478 static int wrap_ktime_compare(const void *A, const void *B) 479 { 480 const ktime_t *a = A, *b = B; 481 482 return ktime_compare(*a, *b); 483 } 484 485 static int __perf_clear_blt(struct intel_context *ce, 486 struct scatterlist *sg, 487 enum i915_cache_level cache_level, 488 bool is_lmem, 489 size_t sz) 490 { 491 ktime_t t[5]; 492 int pass; 493 int err = 0; 494 495 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 496 struct i915_request *rq; 497 ktime_t t0, t1; 498 499 t0 = ktime_get(); 500 501 err = intel_context_migrate_clear(ce, NULL, sg, cache_level, 502 is_lmem, 0, &rq); 503 if (rq) { 504 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 505 err = -EIO; 506 i915_request_put(rq); 507 } 508 if (err) 509 break; 510 511 t1 = ktime_get(); 512 t[pass] = ktime_sub(t1, t0); 513 } 514 if (err) 515 return err; 516 517 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 518 pr_info("%s: %zd KiB fill: %lld MiB/s\n", 519 ce->engine->name, sz >> 10, 520 div64_u64(mul_u32_u32(4 * sz, 521 1000 * 1000 * 1000), 522 t[1] + 2 * t[2] + t[3]) >> 20); 523 return 0; 524 } 525 526 static int perf_clear_blt(void *arg) 527 { 528 struct intel_gt *gt = arg; 529 static const unsigned long sizes[] = { 530 SZ_4K, 531 SZ_64K, 532 SZ_2M, 533 SZ_64M 534 }; 535 int i; 536 537 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 538 struct drm_i915_gem_object *dst; 539 int err; 540 541 dst = create_init_lmem_internal(gt, sizes[i], true); 542 if (IS_ERR(dst)) 543 return PTR_ERR(dst); 544 545 err = __perf_clear_blt(gt->migrate.context, 546 dst->mm.pages->sgl, 547 I915_CACHE_NONE, 548 i915_gem_object_is_lmem(dst), 549 sizes[i]); 550 551 i915_gem_object_unlock(dst); 552 i915_gem_object_put(dst); 553 if (err) 554 return err; 555 } 556 557 return 0; 558 } 559 560 static int __perf_copy_blt(struct intel_context *ce, 561 struct scatterlist *src, 562 enum i915_cache_level src_cache_level, 563 bool src_is_lmem, 564 struct scatterlist *dst, 565 enum i915_cache_level dst_cache_level, 566 bool dst_is_lmem, 567 size_t sz) 568 { 569 ktime_t t[5]; 570 int pass; 571 int err = 0; 572 573 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 574 struct i915_request *rq; 575 ktime_t t0, t1; 576 577 t0 = ktime_get(); 578 579 err = intel_context_migrate_copy(ce, NULL, 580 src, src_cache_level, 581 src_is_lmem, 582 dst, dst_cache_level, 583 dst_is_lmem, 584 &rq); 585 if (rq) { 586 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 587 err = -EIO; 588 i915_request_put(rq); 589 } 590 if (err) 591 break; 592 593 t1 = ktime_get(); 594 t[pass] = ktime_sub(t1, t0); 595 } 596 if (err) 597 return err; 598 599 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 600 pr_info("%s: %zd KiB copy: %lld MiB/s\n", 601 ce->engine->name, sz >> 10, 602 div64_u64(mul_u32_u32(4 * sz, 603 1000 * 1000 * 1000), 604 t[1] + 2 * t[2] + t[3]) >> 20); 605 return 0; 606 } 607 608 static int perf_copy_blt(void *arg) 609 { 610 struct intel_gt *gt = arg; 611 static const unsigned long sizes[] = { 612 SZ_4K, 613 SZ_64K, 614 SZ_2M, 615 SZ_64M 616 }; 617 int i; 618 619 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 620 struct drm_i915_gem_object *src, *dst; 621 int err; 622 623 src = create_init_lmem_internal(gt, sizes[i], true); 624 if (IS_ERR(src)) 625 return PTR_ERR(src); 626 627 dst = create_init_lmem_internal(gt, sizes[i], false); 628 if (IS_ERR(dst)) { 629 err = PTR_ERR(dst); 630 goto err_src; 631 } 632 633 err = __perf_copy_blt(gt->migrate.context, 634 src->mm.pages->sgl, 635 I915_CACHE_NONE, 636 i915_gem_object_is_lmem(src), 637 dst->mm.pages->sgl, 638 I915_CACHE_NONE, 639 i915_gem_object_is_lmem(dst), 640 sizes[i]); 641 642 i915_gem_object_unlock(dst); 643 i915_gem_object_put(dst); 644 err_src: 645 i915_gem_object_unlock(src); 646 i915_gem_object_put(src); 647 if (err) 648 return err; 649 } 650 651 return 0; 652 } 653 654 int intel_migrate_perf_selftests(struct drm_i915_private *i915) 655 { 656 static const struct i915_subtest tests[] = { 657 SUBTEST(perf_clear_blt), 658 SUBTEST(perf_copy_blt), 659 }; 660 struct intel_gt *gt = &i915->gt; 661 662 if (intel_gt_is_wedged(gt)) 663 return 0; 664 665 if (!gt->migrate.context) 666 return 0; 667 668 return intel_gt_live_subtests(tests, gt); 669 } 670