1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2020 Intel Corporation 4 */ 5 6 #include <linux/sort.h> 7 8 #include "selftests/i915_random.h" 9 10 static const unsigned int sizes[] = { 11 SZ_4K, 12 SZ_64K, 13 SZ_2M, 14 CHUNK_SZ - SZ_4K, 15 CHUNK_SZ, 16 CHUNK_SZ + SZ_4K, 17 SZ_64M, 18 }; 19 20 static struct drm_i915_gem_object * 21 create_lmem_or_internal(struct drm_i915_private *i915, size_t size) 22 { 23 struct drm_i915_gem_object *obj; 24 25 obj = i915_gem_object_create_lmem(i915, size, 0); 26 if (!IS_ERR(obj)) 27 return obj; 28 29 return i915_gem_object_create_internal(i915, size); 30 } 31 32 static int copy(struct intel_migrate *migrate, 33 int (*fn)(struct intel_migrate *migrate, 34 struct i915_gem_ww_ctx *ww, 35 struct drm_i915_gem_object *src, 36 struct drm_i915_gem_object *dst, 37 struct i915_request **out), 38 u32 sz, struct rnd_state *prng) 39 { 40 struct drm_i915_private *i915 = migrate->context->engine->i915; 41 struct drm_i915_gem_object *src, *dst; 42 struct i915_request *rq; 43 struct i915_gem_ww_ctx ww; 44 u32 *vaddr; 45 int err = 0; 46 int i; 47 48 src = create_lmem_or_internal(i915, sz); 49 if (IS_ERR(src)) 50 return 0; 51 52 sz = src->base.size; 53 dst = i915_gem_object_create_internal(i915, sz); 54 if (IS_ERR(dst)) 55 goto err_free_src; 56 57 for_i915_gem_ww(&ww, err, true) { 58 err = i915_gem_object_lock(src, &ww); 59 if (err) 60 continue; 61 62 err = i915_gem_object_lock(dst, &ww); 63 if (err) 64 continue; 65 66 vaddr = i915_gem_object_pin_map(src, I915_MAP_WC); 67 if (IS_ERR(vaddr)) { 68 err = PTR_ERR(vaddr); 69 continue; 70 } 71 72 for (i = 0; i < sz / sizeof(u32); i++) 73 vaddr[i] = i; 74 i915_gem_object_flush_map(src); 75 76 vaddr = i915_gem_object_pin_map(dst, I915_MAP_WC); 77 if (IS_ERR(vaddr)) { 78 err = PTR_ERR(vaddr); 79 goto unpin_src; 80 } 81 82 for (i = 0; i < sz / sizeof(u32); i++) 83 vaddr[i] = ~i; 84 i915_gem_object_flush_map(dst); 85 86 err = fn(migrate, &ww, src, dst, &rq); 87 if (!err) 88 continue; 89 90 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 91 pr_err("%ps failed, size: %u\n", fn, sz); 92 if (rq) { 93 i915_request_wait(rq, 0, HZ); 94 i915_request_put(rq); 95 } 96 i915_gem_object_unpin_map(dst); 97 unpin_src: 98 i915_gem_object_unpin_map(src); 99 } 100 if (err) 101 goto err_out; 102 103 if (rq) { 104 if (i915_request_wait(rq, 0, HZ) < 0) { 105 pr_err("%ps timed out, size: %u\n", fn, sz); 106 err = -ETIME; 107 } 108 i915_request_put(rq); 109 } 110 111 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 112 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); 113 114 if (vaddr[x] != x) { 115 pr_err("%ps failed, size: %u, offset: %zu\n", 116 fn, sz, x * sizeof(u32)); 117 igt_hexdump(vaddr + i * 1024, 4096); 118 err = -EINVAL; 119 } 120 } 121 122 i915_gem_object_unpin_map(dst); 123 i915_gem_object_unpin_map(src); 124 125 err_out: 126 i915_gem_object_put(dst); 127 err_free_src: 128 i915_gem_object_put(src); 129 130 return err; 131 } 132 133 static int clear(struct intel_migrate *migrate, 134 int (*fn)(struct intel_migrate *migrate, 135 struct i915_gem_ww_ctx *ww, 136 struct drm_i915_gem_object *obj, 137 u32 value, 138 struct i915_request **out), 139 u32 sz, struct rnd_state *prng) 140 { 141 struct drm_i915_private *i915 = migrate->context->engine->i915; 142 struct drm_i915_gem_object *obj; 143 struct i915_request *rq; 144 struct i915_gem_ww_ctx ww; 145 u32 *vaddr; 146 int err = 0; 147 int i; 148 149 obj = create_lmem_or_internal(i915, sz); 150 if (IS_ERR(obj)) 151 return 0; 152 153 for_i915_gem_ww(&ww, err, true) { 154 err = i915_gem_object_lock(obj, &ww); 155 if (err) 156 continue; 157 158 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 159 if (IS_ERR(vaddr)) { 160 err = PTR_ERR(vaddr); 161 continue; 162 } 163 164 for (i = 0; i < sz / sizeof(u32); i++) 165 vaddr[i] = ~i; 166 i915_gem_object_flush_map(obj); 167 168 err = fn(migrate, &ww, obj, sz, &rq); 169 if (!err) 170 continue; 171 172 if (err != -EDEADLK && err != -EINTR && err != -ERESTARTSYS) 173 pr_err("%ps failed, size: %u\n", fn, sz); 174 if (rq) { 175 i915_request_wait(rq, 0, HZ); 176 i915_request_put(rq); 177 } 178 i915_gem_object_unpin_map(obj); 179 } 180 if (err) 181 goto err_out; 182 183 if (rq) { 184 if (i915_request_wait(rq, 0, HZ) < 0) { 185 pr_err("%ps timed out, size: %u\n", fn, sz); 186 err = -ETIME; 187 } 188 i915_request_put(rq); 189 } 190 191 for (i = 0; !err && i < sz / PAGE_SIZE; i++) { 192 int x = i * 1024 + i915_prandom_u32_max_state(1024, prng); 193 194 if (vaddr[x] != sz) { 195 pr_err("%ps failed, size: %u, offset: %zu\n", 196 fn, sz, x * sizeof(u32)); 197 igt_hexdump(vaddr + i * 1024, 4096); 198 err = -EINVAL; 199 } 200 } 201 202 i915_gem_object_unpin_map(obj); 203 err_out: 204 i915_gem_object_put(obj); 205 206 return err; 207 } 208 209 static int __migrate_copy(struct intel_migrate *migrate, 210 struct i915_gem_ww_ctx *ww, 211 struct drm_i915_gem_object *src, 212 struct drm_i915_gem_object *dst, 213 struct i915_request **out) 214 { 215 return intel_migrate_copy(migrate, ww, NULL, 216 src->mm.pages->sgl, src->cache_level, 217 i915_gem_object_is_lmem(src), 218 dst->mm.pages->sgl, dst->cache_level, 219 i915_gem_object_is_lmem(dst), 220 out); 221 } 222 223 static int __global_copy(struct intel_migrate *migrate, 224 struct i915_gem_ww_ctx *ww, 225 struct drm_i915_gem_object *src, 226 struct drm_i915_gem_object *dst, 227 struct i915_request **out) 228 { 229 return intel_context_migrate_copy(migrate->context, NULL, 230 src->mm.pages->sgl, src->cache_level, 231 i915_gem_object_is_lmem(src), 232 dst->mm.pages->sgl, dst->cache_level, 233 i915_gem_object_is_lmem(dst), 234 out); 235 } 236 237 static int 238 migrate_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 239 { 240 return copy(migrate, __migrate_copy, sz, prng); 241 } 242 243 static int 244 global_copy(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 245 { 246 return copy(migrate, __global_copy, sz, prng); 247 } 248 249 static int __migrate_clear(struct intel_migrate *migrate, 250 struct i915_gem_ww_ctx *ww, 251 struct drm_i915_gem_object *obj, 252 u32 value, 253 struct i915_request **out) 254 { 255 return intel_migrate_clear(migrate, ww, NULL, 256 obj->mm.pages->sgl, 257 obj->cache_level, 258 i915_gem_object_is_lmem(obj), 259 value, out); 260 } 261 262 static int __global_clear(struct intel_migrate *migrate, 263 struct i915_gem_ww_ctx *ww, 264 struct drm_i915_gem_object *obj, 265 u32 value, 266 struct i915_request **out) 267 { 268 return intel_context_migrate_clear(migrate->context, NULL, 269 obj->mm.pages->sgl, 270 obj->cache_level, 271 i915_gem_object_is_lmem(obj), 272 value, out); 273 } 274 275 static int 276 migrate_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 277 { 278 return clear(migrate, __migrate_clear, sz, prng); 279 } 280 281 static int 282 global_clear(struct intel_migrate *migrate, u32 sz, struct rnd_state *prng) 283 { 284 return clear(migrate, __global_clear, sz, prng); 285 } 286 287 static int live_migrate_copy(void *arg) 288 { 289 struct intel_migrate *migrate = arg; 290 struct drm_i915_private *i915 = migrate->context->engine->i915; 291 I915_RND_STATE(prng); 292 int i; 293 294 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 295 int err; 296 297 err = migrate_copy(migrate, sizes[i], &prng); 298 if (err == 0) 299 err = global_copy(migrate, sizes[i], &prng); 300 i915_gem_drain_freed_objects(i915); 301 if (err) 302 return err; 303 } 304 305 return 0; 306 } 307 308 static int live_migrate_clear(void *arg) 309 { 310 struct intel_migrate *migrate = arg; 311 struct drm_i915_private *i915 = migrate->context->engine->i915; 312 I915_RND_STATE(prng); 313 int i; 314 315 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 316 int err; 317 318 err = migrate_clear(migrate, sizes[i], &prng); 319 if (err == 0) 320 err = global_clear(migrate, sizes[i], &prng); 321 322 i915_gem_drain_freed_objects(i915); 323 if (err) 324 return err; 325 } 326 327 return 0; 328 } 329 330 struct threaded_migrate { 331 struct intel_migrate *migrate; 332 struct task_struct *tsk; 333 struct rnd_state prng; 334 }; 335 336 static int threaded_migrate(struct intel_migrate *migrate, 337 int (*fn)(void *arg), 338 unsigned int flags) 339 { 340 const unsigned int n_cpus = num_online_cpus() + 1; 341 struct threaded_migrate *thread; 342 I915_RND_STATE(prng); 343 unsigned int i; 344 int err = 0; 345 346 thread = kcalloc(n_cpus, sizeof(*thread), GFP_KERNEL); 347 if (!thread) 348 return 0; 349 350 for (i = 0; i < n_cpus; ++i) { 351 struct task_struct *tsk; 352 353 thread[i].migrate = migrate; 354 thread[i].prng = 355 I915_RND_STATE_INITIALIZER(prandom_u32_state(&prng)); 356 357 tsk = kthread_run(fn, &thread[i], "igt-%d", i); 358 if (IS_ERR(tsk)) { 359 err = PTR_ERR(tsk); 360 break; 361 } 362 363 get_task_struct(tsk); 364 thread[i].tsk = tsk; 365 } 366 367 msleep(10); /* start all threads before we kthread_stop() */ 368 369 for (i = 0; i < n_cpus; ++i) { 370 struct task_struct *tsk = thread[i].tsk; 371 int status; 372 373 if (IS_ERR_OR_NULL(tsk)) 374 continue; 375 376 status = kthread_stop(tsk); 377 if (status && !err) 378 err = status; 379 380 put_task_struct(tsk); 381 } 382 383 kfree(thread); 384 return err; 385 } 386 387 static int __thread_migrate_copy(void *arg) 388 { 389 struct threaded_migrate *tm = arg; 390 391 return migrate_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 392 } 393 394 static int thread_migrate_copy(void *arg) 395 { 396 return threaded_migrate(arg, __thread_migrate_copy, 0); 397 } 398 399 static int __thread_global_copy(void *arg) 400 { 401 struct threaded_migrate *tm = arg; 402 403 return global_copy(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 404 } 405 406 static int thread_global_copy(void *arg) 407 { 408 return threaded_migrate(arg, __thread_global_copy, 0); 409 } 410 411 static int __thread_migrate_clear(void *arg) 412 { 413 struct threaded_migrate *tm = arg; 414 415 return migrate_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 416 } 417 418 static int __thread_global_clear(void *arg) 419 { 420 struct threaded_migrate *tm = arg; 421 422 return global_clear(tm->migrate, 2 * CHUNK_SZ, &tm->prng); 423 } 424 425 static int thread_migrate_clear(void *arg) 426 { 427 return threaded_migrate(arg, __thread_migrate_clear, 0); 428 } 429 430 static int thread_global_clear(void *arg) 431 { 432 return threaded_migrate(arg, __thread_global_clear, 0); 433 } 434 435 int intel_migrate_live_selftests(struct drm_i915_private *i915) 436 { 437 static const struct i915_subtest tests[] = { 438 SUBTEST(live_migrate_copy), 439 SUBTEST(live_migrate_clear), 440 SUBTEST(thread_migrate_copy), 441 SUBTEST(thread_migrate_clear), 442 SUBTEST(thread_global_copy), 443 SUBTEST(thread_global_clear), 444 }; 445 struct intel_gt *gt = to_gt(i915); 446 447 if (!gt->migrate.context) 448 return 0; 449 450 return i915_subtests(tests, >->migrate); 451 } 452 453 static struct drm_i915_gem_object * 454 create_init_lmem_internal(struct intel_gt *gt, size_t sz, bool try_lmem) 455 { 456 struct drm_i915_gem_object *obj = NULL; 457 int err; 458 459 if (try_lmem) 460 obj = i915_gem_object_create_lmem(gt->i915, sz, 0); 461 462 if (IS_ERR_OR_NULL(obj)) { 463 obj = i915_gem_object_create_internal(gt->i915, sz); 464 if (IS_ERR(obj)) 465 return obj; 466 } 467 468 i915_gem_object_trylock(obj, NULL); 469 err = i915_gem_object_pin_pages(obj); 470 if (err) { 471 i915_gem_object_unlock(obj); 472 i915_gem_object_put(obj); 473 return ERR_PTR(err); 474 } 475 476 return obj; 477 } 478 479 static int wrap_ktime_compare(const void *A, const void *B) 480 { 481 const ktime_t *a = A, *b = B; 482 483 return ktime_compare(*a, *b); 484 } 485 486 static int __perf_clear_blt(struct intel_context *ce, 487 struct scatterlist *sg, 488 enum i915_cache_level cache_level, 489 bool is_lmem, 490 size_t sz) 491 { 492 ktime_t t[5]; 493 int pass; 494 int err = 0; 495 496 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 497 struct i915_request *rq; 498 ktime_t t0, t1; 499 500 t0 = ktime_get(); 501 502 err = intel_context_migrate_clear(ce, NULL, sg, cache_level, 503 is_lmem, 0, &rq); 504 if (rq) { 505 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 506 err = -EIO; 507 i915_request_put(rq); 508 } 509 if (err) 510 break; 511 512 t1 = ktime_get(); 513 t[pass] = ktime_sub(t1, t0); 514 } 515 if (err) 516 return err; 517 518 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 519 pr_info("%s: %zd KiB fill: %lld MiB/s\n", 520 ce->engine->name, sz >> 10, 521 div64_u64(mul_u32_u32(4 * sz, 522 1000 * 1000 * 1000), 523 t[1] + 2 * t[2] + t[3]) >> 20); 524 return 0; 525 } 526 527 static int perf_clear_blt(void *arg) 528 { 529 struct intel_gt *gt = arg; 530 static const unsigned long sizes[] = { 531 SZ_4K, 532 SZ_64K, 533 SZ_2M, 534 SZ_64M 535 }; 536 int i; 537 538 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 539 struct drm_i915_gem_object *dst; 540 int err; 541 542 dst = create_init_lmem_internal(gt, sizes[i], true); 543 if (IS_ERR(dst)) 544 return PTR_ERR(dst); 545 546 err = __perf_clear_blt(gt->migrate.context, 547 dst->mm.pages->sgl, 548 I915_CACHE_NONE, 549 i915_gem_object_is_lmem(dst), 550 sizes[i]); 551 552 i915_gem_object_unlock(dst); 553 i915_gem_object_put(dst); 554 if (err) 555 return err; 556 } 557 558 return 0; 559 } 560 561 static int __perf_copy_blt(struct intel_context *ce, 562 struct scatterlist *src, 563 enum i915_cache_level src_cache_level, 564 bool src_is_lmem, 565 struct scatterlist *dst, 566 enum i915_cache_level dst_cache_level, 567 bool dst_is_lmem, 568 size_t sz) 569 { 570 ktime_t t[5]; 571 int pass; 572 int err = 0; 573 574 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 575 struct i915_request *rq; 576 ktime_t t0, t1; 577 578 t0 = ktime_get(); 579 580 err = intel_context_migrate_copy(ce, NULL, 581 src, src_cache_level, 582 src_is_lmem, 583 dst, dst_cache_level, 584 dst_is_lmem, 585 &rq); 586 if (rq) { 587 if (i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT) < 0) 588 err = -EIO; 589 i915_request_put(rq); 590 } 591 if (err) 592 break; 593 594 t1 = ktime_get(); 595 t[pass] = ktime_sub(t1, t0); 596 } 597 if (err) 598 return err; 599 600 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 601 pr_info("%s: %zd KiB copy: %lld MiB/s\n", 602 ce->engine->name, sz >> 10, 603 div64_u64(mul_u32_u32(4 * sz, 604 1000 * 1000 * 1000), 605 t[1] + 2 * t[2] + t[3]) >> 20); 606 return 0; 607 } 608 609 static int perf_copy_blt(void *arg) 610 { 611 struct intel_gt *gt = arg; 612 static const unsigned long sizes[] = { 613 SZ_4K, 614 SZ_64K, 615 SZ_2M, 616 SZ_64M 617 }; 618 int i; 619 620 for (i = 0; i < ARRAY_SIZE(sizes); i++) { 621 struct drm_i915_gem_object *src, *dst; 622 int err; 623 624 src = create_init_lmem_internal(gt, sizes[i], true); 625 if (IS_ERR(src)) 626 return PTR_ERR(src); 627 628 dst = create_init_lmem_internal(gt, sizes[i], false); 629 if (IS_ERR(dst)) { 630 err = PTR_ERR(dst); 631 goto err_src; 632 } 633 634 err = __perf_copy_blt(gt->migrate.context, 635 src->mm.pages->sgl, 636 I915_CACHE_NONE, 637 i915_gem_object_is_lmem(src), 638 dst->mm.pages->sgl, 639 I915_CACHE_NONE, 640 i915_gem_object_is_lmem(dst), 641 sizes[i]); 642 643 i915_gem_object_unlock(dst); 644 i915_gem_object_put(dst); 645 err_src: 646 i915_gem_object_unlock(src); 647 i915_gem_object_put(src); 648 if (err) 649 return err; 650 } 651 652 return 0; 653 } 654 655 int intel_migrate_perf_selftests(struct drm_i915_private *i915) 656 { 657 static const struct i915_subtest tests[] = { 658 SUBTEST(perf_clear_blt), 659 SUBTEST(perf_copy_blt), 660 }; 661 struct intel_gt *gt = to_gt(i915); 662 663 if (intel_gt_is_wedged(gt)) 664 return 0; 665 666 if (!gt->migrate.context) 667 return 0; 668 669 return intel_gt_live_subtests(tests, gt); 670 } 671