1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 #include <linux/pm_qos.h> 27 #include <linux/sort.h> 28 29 #include "gem/i915_gem_pm.h" 30 #include "gem/selftests/mock_context.h" 31 32 #include "gt/intel_engine_heartbeat.h" 33 #include "gt/intel_engine_pm.h" 34 #include "gt/intel_engine_user.h" 35 #include "gt/intel_gt.h" 36 #include "gt/intel_gt_clock_utils.h" 37 #include "gt/intel_gt_requests.h" 38 #include "gt/selftest_engine_heartbeat.h" 39 40 #include "i915_random.h" 41 #include "i915_selftest.h" 42 #include "igt_flush_test.h" 43 #include "igt_live_test.h" 44 #include "igt_spinner.h" 45 #include "lib_sw_fence.h" 46 47 #include "mock_drm.h" 48 #include "mock_gem_device.h" 49 50 static unsigned int num_uabi_engines(struct drm_i915_private *i915) 51 { 52 struct intel_engine_cs *engine; 53 unsigned int count; 54 55 count = 0; 56 for_each_uabi_engine(engine, i915) 57 count++; 58 59 return count; 60 } 61 62 static struct intel_engine_cs *rcs0(struct drm_i915_private *i915) 63 { 64 return intel_engine_lookup_user(i915, I915_ENGINE_CLASS_RENDER, 0); 65 } 66 67 static int igt_add_request(void *arg) 68 { 69 struct drm_i915_private *i915 = arg; 70 struct i915_request *request; 71 72 /* Basic preliminary test to create a request and let it loose! */ 73 74 request = mock_request(rcs0(i915)->kernel_context, HZ / 10); 75 if (!request) 76 return -ENOMEM; 77 78 i915_request_add(request); 79 80 return 0; 81 } 82 83 static int igt_wait_request(void *arg) 84 { 85 const long T = HZ / 4; 86 struct drm_i915_private *i915 = arg; 87 struct i915_request *request; 88 int err = -EINVAL; 89 90 /* Submit a request, then wait upon it */ 91 92 request = mock_request(rcs0(i915)->kernel_context, T); 93 if (!request) 94 return -ENOMEM; 95 96 i915_request_get(request); 97 98 if (i915_request_wait(request, 0, 0) != -ETIME) { 99 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 100 goto out_request; 101 } 102 103 if (i915_request_wait(request, 0, T) != -ETIME) { 104 pr_err("request wait succeeded (expected timeout before submit!)\n"); 105 goto out_request; 106 } 107 108 if (i915_request_completed(request)) { 109 pr_err("request completed before submit!!\n"); 110 goto out_request; 111 } 112 113 i915_request_add(request); 114 115 if (i915_request_wait(request, 0, 0) != -ETIME) { 116 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 117 goto out_request; 118 } 119 120 if (i915_request_completed(request)) { 121 pr_err("request completed immediately!\n"); 122 goto out_request; 123 } 124 125 if (i915_request_wait(request, 0, T / 2) != -ETIME) { 126 pr_err("request wait succeeded (expected timeout!)\n"); 127 goto out_request; 128 } 129 130 if (i915_request_wait(request, 0, T) == -ETIME) { 131 pr_err("request wait timed out!\n"); 132 goto out_request; 133 } 134 135 if (!i915_request_completed(request)) { 136 pr_err("request not complete after waiting!\n"); 137 goto out_request; 138 } 139 140 if (i915_request_wait(request, 0, T) == -ETIME) { 141 pr_err("request wait timed out when already complete!\n"); 142 goto out_request; 143 } 144 145 err = 0; 146 out_request: 147 i915_request_put(request); 148 mock_device_flush(i915); 149 return err; 150 } 151 152 static int igt_fence_wait(void *arg) 153 { 154 const long T = HZ / 4; 155 struct drm_i915_private *i915 = arg; 156 struct i915_request *request; 157 int err = -EINVAL; 158 159 /* Submit a request, treat it as a fence and wait upon it */ 160 161 request = mock_request(rcs0(i915)->kernel_context, T); 162 if (!request) 163 return -ENOMEM; 164 165 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 166 pr_err("fence wait success before submit (expected timeout)!\n"); 167 goto out; 168 } 169 170 i915_request_add(request); 171 172 if (dma_fence_is_signaled(&request->fence)) { 173 pr_err("fence signaled immediately!\n"); 174 goto out; 175 } 176 177 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 178 pr_err("fence wait success after submit (expected timeout)!\n"); 179 goto out; 180 } 181 182 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 183 pr_err("fence wait timed out (expected success)!\n"); 184 goto out; 185 } 186 187 if (!dma_fence_is_signaled(&request->fence)) { 188 pr_err("fence unsignaled after waiting!\n"); 189 goto out; 190 } 191 192 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 193 pr_err("fence wait timed out when complete (expected success)!\n"); 194 goto out; 195 } 196 197 err = 0; 198 out: 199 mock_device_flush(i915); 200 return err; 201 } 202 203 static int igt_request_rewind(void *arg) 204 { 205 struct drm_i915_private *i915 = arg; 206 struct i915_request *request, *vip; 207 struct i915_gem_context *ctx[2]; 208 struct intel_context *ce; 209 int err = -EINVAL; 210 211 ctx[0] = mock_context(i915, "A"); 212 if (!ctx[0]) { 213 err = -ENOMEM; 214 goto err_ctx_0; 215 } 216 217 ce = i915_gem_context_get_engine(ctx[0], RCS0); 218 GEM_BUG_ON(IS_ERR(ce)); 219 request = mock_request(ce, 2 * HZ); 220 intel_context_put(ce); 221 if (!request) { 222 err = -ENOMEM; 223 goto err_context_0; 224 } 225 226 i915_request_get(request); 227 i915_request_add(request); 228 229 ctx[1] = mock_context(i915, "B"); 230 if (!ctx[1]) { 231 err = -ENOMEM; 232 goto err_ctx_1; 233 } 234 235 ce = i915_gem_context_get_engine(ctx[1], RCS0); 236 GEM_BUG_ON(IS_ERR(ce)); 237 vip = mock_request(ce, 0); 238 intel_context_put(ce); 239 if (!vip) { 240 err = -ENOMEM; 241 goto err_context_1; 242 } 243 244 /* Simulate preemption by manual reordering */ 245 if (!mock_cancel_request(request)) { 246 pr_err("failed to cancel request (already executed)!\n"); 247 i915_request_add(vip); 248 goto err_context_1; 249 } 250 i915_request_get(vip); 251 i915_request_add(vip); 252 rcu_read_lock(); 253 request->engine->submit_request(request); 254 rcu_read_unlock(); 255 256 257 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 258 pr_err("timed out waiting for high priority request\n"); 259 goto err; 260 } 261 262 if (i915_request_completed(request)) { 263 pr_err("low priority request already completed\n"); 264 goto err; 265 } 266 267 err = 0; 268 err: 269 i915_request_put(vip); 270 err_context_1: 271 mock_context_close(ctx[1]); 272 err_ctx_1: 273 i915_request_put(request); 274 err_context_0: 275 mock_context_close(ctx[0]); 276 err_ctx_0: 277 mock_device_flush(i915); 278 return err; 279 } 280 281 struct smoketest { 282 struct intel_engine_cs *engine; 283 struct i915_gem_context **contexts; 284 atomic_long_t num_waits, num_fences; 285 int ncontexts, max_batch; 286 struct i915_request *(*request_alloc)(struct intel_context *ce); 287 }; 288 289 static struct i915_request * 290 __mock_request_alloc(struct intel_context *ce) 291 { 292 return mock_request(ce, 0); 293 } 294 295 static struct i915_request * 296 __live_request_alloc(struct intel_context *ce) 297 { 298 return intel_context_create_request(ce); 299 } 300 301 static int __igt_breadcrumbs_smoketest(void *arg) 302 { 303 struct smoketest *t = arg; 304 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 305 const unsigned int total = 4 * t->ncontexts + 1; 306 unsigned int num_waits = 0, num_fences = 0; 307 struct i915_request **requests; 308 I915_RND_STATE(prng); 309 unsigned int *order; 310 int err = 0; 311 312 /* 313 * A very simple test to catch the most egregious of list handling bugs. 314 * 315 * At its heart, we simply create oodles of requests running across 316 * multiple kthreads and enable signaling on them, for the sole purpose 317 * of stressing our breadcrumb handling. The only inspection we do is 318 * that the fences were marked as signaled. 319 */ 320 321 requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); 322 if (!requests) 323 return -ENOMEM; 324 325 order = i915_random_order(total, &prng); 326 if (!order) { 327 err = -ENOMEM; 328 goto out_requests; 329 } 330 331 while (!kthread_should_stop()) { 332 struct i915_sw_fence *submit, *wait; 333 unsigned int n, count; 334 335 submit = heap_fence_create(GFP_KERNEL); 336 if (!submit) { 337 err = -ENOMEM; 338 break; 339 } 340 341 wait = heap_fence_create(GFP_KERNEL); 342 if (!wait) { 343 i915_sw_fence_commit(submit); 344 heap_fence_put(submit); 345 err = -ENOMEM; 346 break; 347 } 348 349 i915_random_reorder(order, total, &prng); 350 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 351 352 for (n = 0; n < count; n++) { 353 struct i915_gem_context *ctx = 354 t->contexts[order[n] % t->ncontexts]; 355 struct i915_request *rq; 356 struct intel_context *ce; 357 358 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); 359 GEM_BUG_ON(IS_ERR(ce)); 360 rq = t->request_alloc(ce); 361 intel_context_put(ce); 362 if (IS_ERR(rq)) { 363 err = PTR_ERR(rq); 364 count = n; 365 break; 366 } 367 368 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 369 submit, 370 GFP_KERNEL); 371 372 requests[n] = i915_request_get(rq); 373 i915_request_add(rq); 374 375 if (err >= 0) 376 err = i915_sw_fence_await_dma_fence(wait, 377 &rq->fence, 378 0, 379 GFP_KERNEL); 380 381 if (err < 0) { 382 i915_request_put(rq); 383 count = n; 384 break; 385 } 386 } 387 388 i915_sw_fence_commit(submit); 389 i915_sw_fence_commit(wait); 390 391 if (!wait_event_timeout(wait->wait, 392 i915_sw_fence_done(wait), 393 5 * HZ)) { 394 struct i915_request *rq = requests[count - 1]; 395 396 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", 397 atomic_read(&wait->pending), count, 398 rq->fence.context, rq->fence.seqno, 399 t->engine->name); 400 GEM_TRACE_DUMP(); 401 402 intel_gt_set_wedged(t->engine->gt); 403 GEM_BUG_ON(!i915_request_completed(rq)); 404 i915_sw_fence_wait(wait); 405 err = -EIO; 406 } 407 408 for (n = 0; n < count; n++) { 409 struct i915_request *rq = requests[n]; 410 411 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 412 &rq->fence.flags)) { 413 pr_err("%llu:%llu was not signaled!\n", 414 rq->fence.context, rq->fence.seqno); 415 err = -EINVAL; 416 } 417 418 i915_request_put(rq); 419 } 420 421 heap_fence_put(wait); 422 heap_fence_put(submit); 423 424 if (err < 0) 425 break; 426 427 num_fences += count; 428 num_waits++; 429 430 cond_resched(); 431 } 432 433 atomic_long_add(num_fences, &t->num_fences); 434 atomic_long_add(num_waits, &t->num_waits); 435 436 kfree(order); 437 out_requests: 438 kfree(requests); 439 return err; 440 } 441 442 static int mock_breadcrumbs_smoketest(void *arg) 443 { 444 struct drm_i915_private *i915 = arg; 445 struct smoketest t = { 446 .engine = rcs0(i915), 447 .ncontexts = 1024, 448 .max_batch = 1024, 449 .request_alloc = __mock_request_alloc 450 }; 451 unsigned int ncpus = num_online_cpus(); 452 struct task_struct **threads; 453 unsigned int n; 454 int ret = 0; 455 456 /* 457 * Smoketest our breadcrumb/signal handling for requests across multiple 458 * threads. A very simple test to only catch the most egregious of bugs. 459 * See __igt_breadcrumbs_smoketest(); 460 */ 461 462 threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL); 463 if (!threads) 464 return -ENOMEM; 465 466 t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 467 if (!t.contexts) { 468 ret = -ENOMEM; 469 goto out_threads; 470 } 471 472 for (n = 0; n < t.ncontexts; n++) { 473 t.contexts[n] = mock_context(t.engine->i915, "mock"); 474 if (!t.contexts[n]) { 475 ret = -ENOMEM; 476 goto out_contexts; 477 } 478 } 479 480 for (n = 0; n < ncpus; n++) { 481 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 482 &t, "igt/%d", n); 483 if (IS_ERR(threads[n])) { 484 ret = PTR_ERR(threads[n]); 485 ncpus = n; 486 break; 487 } 488 489 get_task_struct(threads[n]); 490 } 491 492 yield(); /* start all threads before we begin */ 493 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 494 495 for (n = 0; n < ncpus; n++) { 496 int err; 497 498 err = kthread_stop(threads[n]); 499 if (err < 0 && !ret) 500 ret = err; 501 502 put_task_struct(threads[n]); 503 } 504 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 505 atomic_long_read(&t.num_waits), 506 atomic_long_read(&t.num_fences), 507 ncpus); 508 509 out_contexts: 510 for (n = 0; n < t.ncontexts; n++) { 511 if (!t.contexts[n]) 512 break; 513 mock_context_close(t.contexts[n]); 514 } 515 kfree(t.contexts); 516 out_threads: 517 kfree(threads); 518 return ret; 519 } 520 521 int i915_request_mock_selftests(void) 522 { 523 static const struct i915_subtest tests[] = { 524 SUBTEST(igt_add_request), 525 SUBTEST(igt_wait_request), 526 SUBTEST(igt_fence_wait), 527 SUBTEST(igt_request_rewind), 528 SUBTEST(mock_breadcrumbs_smoketest), 529 }; 530 struct drm_i915_private *i915; 531 intel_wakeref_t wakeref; 532 int err = 0; 533 534 i915 = mock_gem_device(); 535 if (!i915) 536 return -ENOMEM; 537 538 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 539 err = i915_subtests(tests, i915); 540 541 mock_destroy_device(i915); 542 543 return err; 544 } 545 546 static int live_nop_request(void *arg) 547 { 548 struct drm_i915_private *i915 = arg; 549 struct intel_engine_cs *engine; 550 struct igt_live_test t; 551 int err = -ENODEV; 552 553 /* 554 * Submit various sized batches of empty requests, to each engine 555 * (individually), and wait for the batch to complete. We can check 556 * the overhead of submitting requests to the hardware. 557 */ 558 559 for_each_uabi_engine(engine, i915) { 560 unsigned long n, prime; 561 IGT_TIMEOUT(end_time); 562 ktime_t times[2] = {}; 563 564 err = igt_live_test_begin(&t, i915, __func__, engine->name); 565 if (err) 566 return err; 567 568 intel_engine_pm_get(engine); 569 for_each_prime_number_from(prime, 1, 8192) { 570 struct i915_request *request = NULL; 571 572 times[1] = ktime_get_raw(); 573 574 for (n = 0; n < prime; n++) { 575 i915_request_put(request); 576 request = i915_request_create(engine->kernel_context); 577 if (IS_ERR(request)) 578 return PTR_ERR(request); 579 580 /* 581 * This space is left intentionally blank. 582 * 583 * We do not actually want to perform any 584 * action with this request, we just want 585 * to measure the latency in allocation 586 * and submission of our breadcrumbs - 587 * ensuring that the bare request is sufficient 588 * for the system to work (i.e. proper HEAD 589 * tracking of the rings, interrupt handling, 590 * etc). It also gives us the lowest bounds 591 * for latency. 592 */ 593 594 i915_request_get(request); 595 i915_request_add(request); 596 } 597 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 598 i915_request_put(request); 599 600 times[1] = ktime_sub(ktime_get_raw(), times[1]); 601 if (prime == 1) 602 times[0] = times[1]; 603 604 if (__igt_timeout(end_time, NULL)) 605 break; 606 } 607 intel_engine_pm_put(engine); 608 609 err = igt_live_test_end(&t); 610 if (err) 611 return err; 612 613 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 614 engine->name, 615 ktime_to_ns(times[0]), 616 prime, div64_u64(ktime_to_ns(times[1]), prime)); 617 } 618 619 return err; 620 } 621 622 static int __cancel_inactive(struct intel_engine_cs *engine) 623 { 624 struct intel_context *ce; 625 struct igt_spinner spin; 626 struct i915_request *rq; 627 int err = 0; 628 629 if (igt_spinner_init(&spin, engine->gt)) 630 return -ENOMEM; 631 632 ce = intel_context_create(engine); 633 if (IS_ERR(ce)) { 634 err = PTR_ERR(ce); 635 goto out_spin; 636 } 637 638 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 639 if (IS_ERR(rq)) { 640 err = PTR_ERR(rq); 641 goto out_ce; 642 } 643 644 pr_debug("%s: Cancelling inactive request\n", engine->name); 645 i915_request_cancel(rq, -EINTR); 646 i915_request_get(rq); 647 i915_request_add(rq); 648 649 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 650 struct drm_printer p = drm_info_printer(engine->i915->drm.dev); 651 652 pr_err("%s: Failed to cancel inactive request\n", engine->name); 653 intel_engine_dump(engine, &p, "%s\n", engine->name); 654 err = -ETIME; 655 goto out_rq; 656 } 657 658 if (rq->fence.error != -EINTR) { 659 pr_err("%s: fence not cancelled (%u)\n", 660 engine->name, rq->fence.error); 661 err = -EINVAL; 662 } 663 664 out_rq: 665 i915_request_put(rq); 666 out_ce: 667 intel_context_put(ce); 668 out_spin: 669 igt_spinner_fini(&spin); 670 if (err) 671 pr_err("%s: %s error %d\n", __func__, engine->name, err); 672 return err; 673 } 674 675 static int __cancel_active(struct intel_engine_cs *engine) 676 { 677 struct intel_context *ce; 678 struct igt_spinner spin; 679 struct i915_request *rq; 680 int err = 0; 681 682 if (igt_spinner_init(&spin, engine->gt)) 683 return -ENOMEM; 684 685 ce = intel_context_create(engine); 686 if (IS_ERR(ce)) { 687 err = PTR_ERR(ce); 688 goto out_spin; 689 } 690 691 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 692 if (IS_ERR(rq)) { 693 err = PTR_ERR(rq); 694 goto out_ce; 695 } 696 697 pr_debug("%s: Cancelling active request\n", engine->name); 698 i915_request_get(rq); 699 i915_request_add(rq); 700 if (!igt_wait_for_spinner(&spin, rq)) { 701 struct drm_printer p = drm_info_printer(engine->i915->drm.dev); 702 703 pr_err("Failed to start spinner on %s\n", engine->name); 704 intel_engine_dump(engine, &p, "%s\n", engine->name); 705 err = -ETIME; 706 goto out_rq; 707 } 708 i915_request_cancel(rq, -EINTR); 709 710 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 711 struct drm_printer p = drm_info_printer(engine->i915->drm.dev); 712 713 pr_err("%s: Failed to cancel active request\n", engine->name); 714 intel_engine_dump(engine, &p, "%s\n", engine->name); 715 err = -ETIME; 716 goto out_rq; 717 } 718 719 if (rq->fence.error != -EINTR) { 720 pr_err("%s: fence not cancelled (%u)\n", 721 engine->name, rq->fence.error); 722 err = -EINVAL; 723 } 724 725 out_rq: 726 i915_request_put(rq); 727 out_ce: 728 intel_context_put(ce); 729 out_spin: 730 igt_spinner_fini(&spin); 731 if (err) 732 pr_err("%s: %s error %d\n", __func__, engine->name, err); 733 return err; 734 } 735 736 static int __cancel_completed(struct intel_engine_cs *engine) 737 { 738 struct intel_context *ce; 739 struct igt_spinner spin; 740 struct i915_request *rq; 741 int err = 0; 742 743 if (igt_spinner_init(&spin, engine->gt)) 744 return -ENOMEM; 745 746 ce = intel_context_create(engine); 747 if (IS_ERR(ce)) { 748 err = PTR_ERR(ce); 749 goto out_spin; 750 } 751 752 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 753 if (IS_ERR(rq)) { 754 err = PTR_ERR(rq); 755 goto out_ce; 756 } 757 igt_spinner_end(&spin); 758 i915_request_get(rq); 759 i915_request_add(rq); 760 761 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 762 err = -ETIME; 763 goto out_rq; 764 } 765 766 pr_debug("%s: Cancelling completed request\n", engine->name); 767 i915_request_cancel(rq, -EINTR); 768 if (rq->fence.error) { 769 pr_err("%s: fence not cancelled (%u)\n", 770 engine->name, rq->fence.error); 771 err = -EINVAL; 772 } 773 774 out_rq: 775 i915_request_put(rq); 776 out_ce: 777 intel_context_put(ce); 778 out_spin: 779 igt_spinner_fini(&spin); 780 if (err) 781 pr_err("%s: %s error %d\n", __func__, engine->name, err); 782 return err; 783 } 784 785 static int live_cancel_request(void *arg) 786 { 787 struct drm_i915_private *i915 = arg; 788 struct intel_engine_cs *engine; 789 790 /* 791 * Check cancellation of requests. We expect to be able to immediately 792 * cancel active requests, even if they are currently on the GPU. 793 */ 794 795 for_each_uabi_engine(engine, i915) { 796 struct igt_live_test t; 797 int err, err2; 798 799 if (!intel_engine_has_preemption(engine)) 800 continue; 801 802 err = igt_live_test_begin(&t, i915, __func__, engine->name); 803 if (err) 804 return err; 805 806 err = __cancel_inactive(engine); 807 if (err == 0) 808 err = __cancel_active(engine); 809 if (err == 0) 810 err = __cancel_completed(engine); 811 812 err2 = igt_live_test_end(&t); 813 if (err) 814 return err; 815 if (err2) 816 return err2; 817 } 818 819 return 0; 820 } 821 822 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 823 { 824 struct drm_i915_gem_object *obj; 825 struct i915_vma *vma; 826 u32 *cmd; 827 int err; 828 829 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 830 if (IS_ERR(obj)) 831 return ERR_CAST(obj); 832 833 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 834 if (IS_ERR(cmd)) { 835 err = PTR_ERR(cmd); 836 goto err; 837 } 838 839 *cmd = MI_BATCH_BUFFER_END; 840 841 __i915_gem_object_flush_map(obj, 0, 64); 842 i915_gem_object_unpin_map(obj); 843 844 intel_gt_chipset_flush(to_gt(i915)); 845 846 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 847 if (IS_ERR(vma)) { 848 err = PTR_ERR(vma); 849 goto err; 850 } 851 852 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 853 if (err) 854 goto err; 855 856 /* Force the wait wait now to avoid including it in the benchmark */ 857 err = i915_vma_sync(vma); 858 if (err) 859 goto err_pin; 860 861 return vma; 862 863 err_pin: 864 i915_vma_unpin(vma); 865 err: 866 i915_gem_object_put(obj); 867 return ERR_PTR(err); 868 } 869 870 static struct i915_request * 871 empty_request(struct intel_engine_cs *engine, 872 struct i915_vma *batch) 873 { 874 struct i915_request *request; 875 int err; 876 877 request = i915_request_create(engine->kernel_context); 878 if (IS_ERR(request)) 879 return request; 880 881 err = engine->emit_bb_start(request, 882 batch->node.start, 883 batch->node.size, 884 I915_DISPATCH_SECURE); 885 if (err) 886 goto out_request; 887 888 i915_request_get(request); 889 out_request: 890 i915_request_add(request); 891 return err ? ERR_PTR(err) : request; 892 } 893 894 static int live_empty_request(void *arg) 895 { 896 struct drm_i915_private *i915 = arg; 897 struct intel_engine_cs *engine; 898 struct igt_live_test t; 899 struct i915_vma *batch; 900 int err = 0; 901 902 /* 903 * Submit various sized batches of empty requests, to each engine 904 * (individually), and wait for the batch to complete. We can check 905 * the overhead of submitting requests to the hardware. 906 */ 907 908 batch = empty_batch(i915); 909 if (IS_ERR(batch)) 910 return PTR_ERR(batch); 911 912 for_each_uabi_engine(engine, i915) { 913 IGT_TIMEOUT(end_time); 914 struct i915_request *request; 915 unsigned long n, prime; 916 ktime_t times[2] = {}; 917 918 err = igt_live_test_begin(&t, i915, __func__, engine->name); 919 if (err) 920 goto out_batch; 921 922 intel_engine_pm_get(engine); 923 924 /* Warmup / preload */ 925 request = empty_request(engine, batch); 926 if (IS_ERR(request)) { 927 err = PTR_ERR(request); 928 intel_engine_pm_put(engine); 929 goto out_batch; 930 } 931 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 932 933 for_each_prime_number_from(prime, 1, 8192) { 934 times[1] = ktime_get_raw(); 935 936 for (n = 0; n < prime; n++) { 937 i915_request_put(request); 938 request = empty_request(engine, batch); 939 if (IS_ERR(request)) { 940 err = PTR_ERR(request); 941 intel_engine_pm_put(engine); 942 goto out_batch; 943 } 944 } 945 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 946 947 times[1] = ktime_sub(ktime_get_raw(), times[1]); 948 if (prime == 1) 949 times[0] = times[1]; 950 951 if (__igt_timeout(end_time, NULL)) 952 break; 953 } 954 i915_request_put(request); 955 intel_engine_pm_put(engine); 956 957 err = igt_live_test_end(&t); 958 if (err) 959 goto out_batch; 960 961 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 962 engine->name, 963 ktime_to_ns(times[0]), 964 prime, div64_u64(ktime_to_ns(times[1]), prime)); 965 } 966 967 out_batch: 968 i915_vma_unpin(batch); 969 i915_vma_put(batch); 970 return err; 971 } 972 973 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 974 { 975 struct drm_i915_gem_object *obj; 976 const int ver = GRAPHICS_VER(i915); 977 struct i915_vma *vma; 978 u32 *cmd; 979 int err; 980 981 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 982 if (IS_ERR(obj)) 983 return ERR_CAST(obj); 984 985 vma = i915_vma_instance(obj, to_gt(i915)->vm, NULL); 986 if (IS_ERR(vma)) { 987 err = PTR_ERR(vma); 988 goto err; 989 } 990 991 err = i915_vma_pin(vma, 0, 0, PIN_USER); 992 if (err) 993 goto err; 994 995 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 996 if (IS_ERR(cmd)) { 997 err = PTR_ERR(cmd); 998 goto err; 999 } 1000 1001 if (ver >= 8) { 1002 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 1003 *cmd++ = lower_32_bits(vma->node.start); 1004 *cmd++ = upper_32_bits(vma->node.start); 1005 } else if (ver >= 6) { 1006 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 1007 *cmd++ = lower_32_bits(vma->node.start); 1008 } else { 1009 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 1010 *cmd++ = lower_32_bits(vma->node.start); 1011 } 1012 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 1013 1014 __i915_gem_object_flush_map(obj, 0, 64); 1015 i915_gem_object_unpin_map(obj); 1016 1017 intel_gt_chipset_flush(to_gt(i915)); 1018 1019 return vma; 1020 1021 err: 1022 i915_gem_object_put(obj); 1023 return ERR_PTR(err); 1024 } 1025 1026 static int recursive_batch_resolve(struct i915_vma *batch) 1027 { 1028 u32 *cmd; 1029 1030 cmd = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); 1031 if (IS_ERR(cmd)) 1032 return PTR_ERR(cmd); 1033 1034 *cmd = MI_BATCH_BUFFER_END; 1035 1036 __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd)); 1037 i915_gem_object_unpin_map(batch->obj); 1038 1039 intel_gt_chipset_flush(batch->vm->gt); 1040 1041 return 0; 1042 } 1043 1044 static int live_all_engines(void *arg) 1045 { 1046 struct drm_i915_private *i915 = arg; 1047 const unsigned int nengines = num_uabi_engines(i915); 1048 struct intel_engine_cs *engine; 1049 struct i915_request **request; 1050 struct igt_live_test t; 1051 struct i915_vma *batch; 1052 unsigned int idx; 1053 int err; 1054 1055 /* 1056 * Check we can submit requests to all engines simultaneously. We 1057 * send a recursive batch to each engine - checking that we don't 1058 * block doing so, and that they don't complete too soon. 1059 */ 1060 1061 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 1062 if (!request) 1063 return -ENOMEM; 1064 1065 err = igt_live_test_begin(&t, i915, __func__, ""); 1066 if (err) 1067 goto out_free; 1068 1069 batch = recursive_batch(i915); 1070 if (IS_ERR(batch)) { 1071 err = PTR_ERR(batch); 1072 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 1073 goto out_free; 1074 } 1075 1076 i915_vma_lock(batch); 1077 1078 idx = 0; 1079 for_each_uabi_engine(engine, i915) { 1080 request[idx] = intel_engine_create_kernel_request(engine); 1081 if (IS_ERR(request[idx])) { 1082 err = PTR_ERR(request[idx]); 1083 pr_err("%s: Request allocation failed with err=%d\n", 1084 __func__, err); 1085 goto out_request; 1086 } 1087 1088 err = i915_request_await_object(request[idx], batch->obj, 0); 1089 if (err == 0) 1090 err = i915_vma_move_to_active(batch, request[idx], 0); 1091 GEM_BUG_ON(err); 1092 1093 err = engine->emit_bb_start(request[idx], 1094 batch->node.start, 1095 batch->node.size, 1096 0); 1097 GEM_BUG_ON(err); 1098 request[idx]->batch = batch; 1099 1100 i915_request_get(request[idx]); 1101 i915_request_add(request[idx]); 1102 idx++; 1103 } 1104 1105 i915_vma_unlock(batch); 1106 1107 idx = 0; 1108 for_each_uabi_engine(engine, i915) { 1109 if (i915_request_completed(request[idx])) { 1110 pr_err("%s(%s): request completed too early!\n", 1111 __func__, engine->name); 1112 err = -EINVAL; 1113 goto out_request; 1114 } 1115 idx++; 1116 } 1117 1118 err = recursive_batch_resolve(batch); 1119 if (err) { 1120 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 1121 goto out_request; 1122 } 1123 1124 idx = 0; 1125 for_each_uabi_engine(engine, i915) { 1126 long timeout; 1127 1128 timeout = i915_request_wait(request[idx], 0, 1129 MAX_SCHEDULE_TIMEOUT); 1130 if (timeout < 0) { 1131 err = timeout; 1132 pr_err("%s: error waiting for request on %s, err=%d\n", 1133 __func__, engine->name, err); 1134 goto out_request; 1135 } 1136 1137 GEM_BUG_ON(!i915_request_completed(request[idx])); 1138 i915_request_put(request[idx]); 1139 request[idx] = NULL; 1140 idx++; 1141 } 1142 1143 err = igt_live_test_end(&t); 1144 1145 out_request: 1146 idx = 0; 1147 for_each_uabi_engine(engine, i915) { 1148 if (request[idx]) 1149 i915_request_put(request[idx]); 1150 idx++; 1151 } 1152 i915_vma_unpin(batch); 1153 i915_vma_put(batch); 1154 out_free: 1155 kfree(request); 1156 return err; 1157 } 1158 1159 static int live_sequential_engines(void *arg) 1160 { 1161 struct drm_i915_private *i915 = arg; 1162 const unsigned int nengines = num_uabi_engines(i915); 1163 struct i915_request **request; 1164 struct i915_request *prev = NULL; 1165 struct intel_engine_cs *engine; 1166 struct igt_live_test t; 1167 unsigned int idx; 1168 int err; 1169 1170 /* 1171 * Check we can submit requests to all engines sequentially, such 1172 * that each successive request waits for the earlier ones. This 1173 * tests that we don't execute requests out of order, even though 1174 * they are running on independent engines. 1175 */ 1176 1177 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 1178 if (!request) 1179 return -ENOMEM; 1180 1181 err = igt_live_test_begin(&t, i915, __func__, ""); 1182 if (err) 1183 goto out_free; 1184 1185 idx = 0; 1186 for_each_uabi_engine(engine, i915) { 1187 struct i915_vma *batch; 1188 1189 batch = recursive_batch(i915); 1190 if (IS_ERR(batch)) { 1191 err = PTR_ERR(batch); 1192 pr_err("%s: Unable to create batch for %s, err=%d\n", 1193 __func__, engine->name, err); 1194 goto out_free; 1195 } 1196 1197 i915_vma_lock(batch); 1198 request[idx] = intel_engine_create_kernel_request(engine); 1199 if (IS_ERR(request[idx])) { 1200 err = PTR_ERR(request[idx]); 1201 pr_err("%s: Request allocation failed for %s with err=%d\n", 1202 __func__, engine->name, err); 1203 goto out_unlock; 1204 } 1205 1206 if (prev) { 1207 err = i915_request_await_dma_fence(request[idx], 1208 &prev->fence); 1209 if (err) { 1210 i915_request_add(request[idx]); 1211 pr_err("%s: Request await failed for %s with err=%d\n", 1212 __func__, engine->name, err); 1213 goto out_unlock; 1214 } 1215 } 1216 1217 err = i915_request_await_object(request[idx], 1218 batch->obj, false); 1219 if (err == 0) 1220 err = i915_vma_move_to_active(batch, request[idx], 0); 1221 GEM_BUG_ON(err); 1222 1223 err = engine->emit_bb_start(request[idx], 1224 batch->node.start, 1225 batch->node.size, 1226 0); 1227 GEM_BUG_ON(err); 1228 request[idx]->batch = batch; 1229 1230 i915_request_get(request[idx]); 1231 i915_request_add(request[idx]); 1232 1233 prev = request[idx]; 1234 idx++; 1235 1236 out_unlock: 1237 i915_vma_unlock(batch); 1238 if (err) 1239 goto out_request; 1240 } 1241 1242 idx = 0; 1243 for_each_uabi_engine(engine, i915) { 1244 long timeout; 1245 1246 if (i915_request_completed(request[idx])) { 1247 pr_err("%s(%s): request completed too early!\n", 1248 __func__, engine->name); 1249 err = -EINVAL; 1250 goto out_request; 1251 } 1252 1253 err = recursive_batch_resolve(request[idx]->batch); 1254 if (err) { 1255 pr_err("%s: failed to resolve batch, err=%d\n", 1256 __func__, err); 1257 goto out_request; 1258 } 1259 1260 timeout = i915_request_wait(request[idx], 0, 1261 MAX_SCHEDULE_TIMEOUT); 1262 if (timeout < 0) { 1263 err = timeout; 1264 pr_err("%s: error waiting for request on %s, err=%d\n", 1265 __func__, engine->name, err); 1266 goto out_request; 1267 } 1268 1269 GEM_BUG_ON(!i915_request_completed(request[idx])); 1270 idx++; 1271 } 1272 1273 err = igt_live_test_end(&t); 1274 1275 out_request: 1276 idx = 0; 1277 for_each_uabi_engine(engine, i915) { 1278 u32 *cmd; 1279 1280 if (!request[idx]) 1281 break; 1282 1283 cmd = i915_gem_object_pin_map_unlocked(request[idx]->batch->obj, 1284 I915_MAP_WC); 1285 if (!IS_ERR(cmd)) { 1286 *cmd = MI_BATCH_BUFFER_END; 1287 1288 __i915_gem_object_flush_map(request[idx]->batch->obj, 1289 0, sizeof(*cmd)); 1290 i915_gem_object_unpin_map(request[idx]->batch->obj); 1291 1292 intel_gt_chipset_flush(engine->gt); 1293 } 1294 1295 i915_vma_put(request[idx]->batch); 1296 i915_request_put(request[idx]); 1297 idx++; 1298 } 1299 out_free: 1300 kfree(request); 1301 return err; 1302 } 1303 1304 static int __live_parallel_engine1(void *arg) 1305 { 1306 struct intel_engine_cs *engine = arg; 1307 IGT_TIMEOUT(end_time); 1308 unsigned long count; 1309 int err = 0; 1310 1311 count = 0; 1312 intel_engine_pm_get(engine); 1313 do { 1314 struct i915_request *rq; 1315 1316 rq = i915_request_create(engine->kernel_context); 1317 if (IS_ERR(rq)) { 1318 err = PTR_ERR(rq); 1319 break; 1320 } 1321 1322 i915_request_get(rq); 1323 i915_request_add(rq); 1324 1325 err = 0; 1326 if (i915_request_wait(rq, 0, HZ) < 0) 1327 err = -ETIME; 1328 i915_request_put(rq); 1329 if (err) 1330 break; 1331 1332 count++; 1333 } while (!__igt_timeout(end_time, NULL)); 1334 intel_engine_pm_put(engine); 1335 1336 pr_info("%s: %lu request + sync\n", engine->name, count); 1337 return err; 1338 } 1339 1340 static int __live_parallel_engineN(void *arg) 1341 { 1342 struct intel_engine_cs *engine = arg; 1343 IGT_TIMEOUT(end_time); 1344 unsigned long count; 1345 int err = 0; 1346 1347 count = 0; 1348 intel_engine_pm_get(engine); 1349 do { 1350 struct i915_request *rq; 1351 1352 rq = i915_request_create(engine->kernel_context); 1353 if (IS_ERR(rq)) { 1354 err = PTR_ERR(rq); 1355 break; 1356 } 1357 1358 i915_request_add(rq); 1359 count++; 1360 } while (!__igt_timeout(end_time, NULL)); 1361 intel_engine_pm_put(engine); 1362 1363 pr_info("%s: %lu requests\n", engine->name, count); 1364 return err; 1365 } 1366 1367 static bool wake_all(struct drm_i915_private *i915) 1368 { 1369 if (atomic_dec_and_test(&i915->selftest.counter)) { 1370 wake_up_var(&i915->selftest.counter); 1371 return true; 1372 } 1373 1374 return false; 1375 } 1376 1377 static int wait_for_all(struct drm_i915_private *i915) 1378 { 1379 if (wake_all(i915)) 1380 return 0; 1381 1382 if (wait_var_event_timeout(&i915->selftest.counter, 1383 !atomic_read(&i915->selftest.counter), 1384 i915_selftest.timeout_jiffies)) 1385 return 0; 1386 1387 return -ETIME; 1388 } 1389 1390 static int __live_parallel_spin(void *arg) 1391 { 1392 struct intel_engine_cs *engine = arg; 1393 struct igt_spinner spin; 1394 struct i915_request *rq; 1395 int err = 0; 1396 1397 /* 1398 * Create a spinner running for eternity on each engine. If a second 1399 * spinner is incorrectly placed on the same engine, it will not be 1400 * able to start in time. 1401 */ 1402 1403 if (igt_spinner_init(&spin, engine->gt)) { 1404 wake_all(engine->i915); 1405 return -ENOMEM; 1406 } 1407 1408 intel_engine_pm_get(engine); 1409 rq = igt_spinner_create_request(&spin, 1410 engine->kernel_context, 1411 MI_NOOP); /* no preemption */ 1412 intel_engine_pm_put(engine); 1413 if (IS_ERR(rq)) { 1414 err = PTR_ERR(rq); 1415 if (err == -ENODEV) 1416 err = 0; 1417 wake_all(engine->i915); 1418 goto out_spin; 1419 } 1420 1421 i915_request_get(rq); 1422 i915_request_add(rq); 1423 if (igt_wait_for_spinner(&spin, rq)) { 1424 /* Occupy this engine for the whole test */ 1425 err = wait_for_all(engine->i915); 1426 } else { 1427 pr_err("Failed to start spinner on %s\n", engine->name); 1428 err = -EINVAL; 1429 } 1430 igt_spinner_end(&spin); 1431 1432 if (err == 0 && i915_request_wait(rq, 0, HZ) < 0) 1433 err = -EIO; 1434 i915_request_put(rq); 1435 1436 out_spin: 1437 igt_spinner_fini(&spin); 1438 return err; 1439 } 1440 1441 static int live_parallel_engines(void *arg) 1442 { 1443 struct drm_i915_private *i915 = arg; 1444 static int (* const func[])(void *arg) = { 1445 __live_parallel_engine1, 1446 __live_parallel_engineN, 1447 __live_parallel_spin, 1448 NULL, 1449 }; 1450 const unsigned int nengines = num_uabi_engines(i915); 1451 struct intel_engine_cs *engine; 1452 int (* const *fn)(void *arg); 1453 struct task_struct **tsk; 1454 int err = 0; 1455 1456 /* 1457 * Check we can submit requests to all engines concurrently. This 1458 * tests that we load up the system maximally. 1459 */ 1460 1461 tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); 1462 if (!tsk) 1463 return -ENOMEM; 1464 1465 for (fn = func; !err && *fn; fn++) { 1466 char name[KSYM_NAME_LEN]; 1467 struct igt_live_test t; 1468 unsigned int idx; 1469 1470 snprintf(name, sizeof(name), "%ps", *fn); 1471 err = igt_live_test_begin(&t, i915, __func__, name); 1472 if (err) 1473 break; 1474 1475 atomic_set(&i915->selftest.counter, nengines); 1476 1477 idx = 0; 1478 for_each_uabi_engine(engine, i915) { 1479 tsk[idx] = kthread_run(*fn, engine, 1480 "igt/parallel:%s", 1481 engine->name); 1482 if (IS_ERR(tsk[idx])) { 1483 err = PTR_ERR(tsk[idx]); 1484 break; 1485 } 1486 get_task_struct(tsk[idx++]); 1487 } 1488 1489 yield(); /* start all threads before we kthread_stop() */ 1490 1491 idx = 0; 1492 for_each_uabi_engine(engine, i915) { 1493 int status; 1494 1495 if (IS_ERR(tsk[idx])) 1496 break; 1497 1498 status = kthread_stop(tsk[idx]); 1499 if (status && !err) 1500 err = status; 1501 1502 put_task_struct(tsk[idx++]); 1503 } 1504 1505 if (igt_live_test_end(&t)) 1506 err = -EIO; 1507 } 1508 1509 kfree(tsk); 1510 return err; 1511 } 1512 1513 static int 1514 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1515 { 1516 struct i915_request *rq; 1517 int ret; 1518 1519 /* 1520 * Before execlists, all contexts share the same ringbuffer. With 1521 * execlists, each context/engine has a separate ringbuffer and 1522 * for the purposes of this test, inexhaustible. 1523 * 1524 * For the global ringbuffer though, we have to be very careful 1525 * that we do not wrap while preventing the execution of requests 1526 * with a unsignaled fence. 1527 */ 1528 if (HAS_EXECLISTS(ctx->i915)) 1529 return INT_MAX; 1530 1531 rq = igt_request_alloc(ctx, engine); 1532 if (IS_ERR(rq)) { 1533 ret = PTR_ERR(rq); 1534 } else { 1535 int sz; 1536 1537 ret = rq->ring->size - rq->reserved_space; 1538 i915_request_add(rq); 1539 1540 sz = rq->ring->emit - rq->head; 1541 if (sz < 0) 1542 sz += rq->ring->size; 1543 ret /= sz; 1544 ret /= 2; /* leave half spare, in case of emergency! */ 1545 } 1546 1547 return ret; 1548 } 1549 1550 static int live_breadcrumbs_smoketest(void *arg) 1551 { 1552 struct drm_i915_private *i915 = arg; 1553 const unsigned int nengines = num_uabi_engines(i915); 1554 const unsigned int ncpus = num_online_cpus(); 1555 unsigned long num_waits, num_fences; 1556 struct intel_engine_cs *engine; 1557 struct task_struct **threads; 1558 struct igt_live_test live; 1559 intel_wakeref_t wakeref; 1560 struct smoketest *smoke; 1561 unsigned int n, idx; 1562 struct file *file; 1563 int ret = 0; 1564 1565 /* 1566 * Smoketest our breadcrumb/signal handling for requests across multiple 1567 * threads. A very simple test to only catch the most egregious of bugs. 1568 * See __igt_breadcrumbs_smoketest(); 1569 * 1570 * On real hardware this time. 1571 */ 1572 1573 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1574 1575 file = mock_file(i915); 1576 if (IS_ERR(file)) { 1577 ret = PTR_ERR(file); 1578 goto out_rpm; 1579 } 1580 1581 smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL); 1582 if (!smoke) { 1583 ret = -ENOMEM; 1584 goto out_file; 1585 } 1586 1587 threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL); 1588 if (!threads) { 1589 ret = -ENOMEM; 1590 goto out_smoke; 1591 } 1592 1593 smoke[0].request_alloc = __live_request_alloc; 1594 smoke[0].ncontexts = 64; 1595 smoke[0].contexts = kcalloc(smoke[0].ncontexts, 1596 sizeof(*smoke[0].contexts), 1597 GFP_KERNEL); 1598 if (!smoke[0].contexts) { 1599 ret = -ENOMEM; 1600 goto out_threads; 1601 } 1602 1603 for (n = 0; n < smoke[0].ncontexts; n++) { 1604 smoke[0].contexts[n] = live_context(i915, file); 1605 if (IS_ERR(smoke[0].contexts[n])) { 1606 ret = PTR_ERR(smoke[0].contexts[n]); 1607 goto out_contexts; 1608 } 1609 } 1610 1611 ret = igt_live_test_begin(&live, i915, __func__, ""); 1612 if (ret) 1613 goto out_contexts; 1614 1615 idx = 0; 1616 for_each_uabi_engine(engine, i915) { 1617 smoke[idx] = smoke[0]; 1618 smoke[idx].engine = engine; 1619 smoke[idx].max_batch = 1620 max_batches(smoke[0].contexts[0], engine); 1621 if (smoke[idx].max_batch < 0) { 1622 ret = smoke[idx].max_batch; 1623 goto out_flush; 1624 } 1625 /* One ring interleaved between requests from all cpus */ 1626 smoke[idx].max_batch /= num_online_cpus() + 1; 1627 pr_debug("Limiting batches to %d requests on %s\n", 1628 smoke[idx].max_batch, engine->name); 1629 1630 for (n = 0; n < ncpus; n++) { 1631 struct task_struct *tsk; 1632 1633 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1634 &smoke[idx], "igt/%d.%d", idx, n); 1635 if (IS_ERR(tsk)) { 1636 ret = PTR_ERR(tsk); 1637 goto out_flush; 1638 } 1639 1640 get_task_struct(tsk); 1641 threads[idx * ncpus + n] = tsk; 1642 } 1643 1644 idx++; 1645 } 1646 1647 yield(); /* start all threads before we begin */ 1648 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1649 1650 out_flush: 1651 idx = 0; 1652 num_waits = 0; 1653 num_fences = 0; 1654 for_each_uabi_engine(engine, i915) { 1655 for (n = 0; n < ncpus; n++) { 1656 struct task_struct *tsk = threads[idx * ncpus + n]; 1657 int err; 1658 1659 if (!tsk) 1660 continue; 1661 1662 err = kthread_stop(tsk); 1663 if (err < 0 && !ret) 1664 ret = err; 1665 1666 put_task_struct(tsk); 1667 } 1668 1669 num_waits += atomic_long_read(&smoke[idx].num_waits); 1670 num_fences += atomic_long_read(&smoke[idx].num_fences); 1671 idx++; 1672 } 1673 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1674 num_waits, num_fences, idx, ncpus); 1675 1676 ret = igt_live_test_end(&live) ?: ret; 1677 out_contexts: 1678 kfree(smoke[0].contexts); 1679 out_threads: 1680 kfree(threads); 1681 out_smoke: 1682 kfree(smoke); 1683 out_file: 1684 fput(file); 1685 out_rpm: 1686 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1687 1688 return ret; 1689 } 1690 1691 int i915_request_live_selftests(struct drm_i915_private *i915) 1692 { 1693 static const struct i915_subtest tests[] = { 1694 SUBTEST(live_nop_request), 1695 SUBTEST(live_all_engines), 1696 SUBTEST(live_sequential_engines), 1697 SUBTEST(live_parallel_engines), 1698 SUBTEST(live_empty_request), 1699 SUBTEST(live_cancel_request), 1700 SUBTEST(live_breadcrumbs_smoketest), 1701 }; 1702 1703 if (intel_gt_is_wedged(to_gt(i915))) 1704 return 0; 1705 1706 return i915_subtests(tests, i915); 1707 } 1708 1709 static int switch_to_kernel_sync(struct intel_context *ce, int err) 1710 { 1711 struct i915_request *rq; 1712 struct dma_fence *fence; 1713 1714 rq = intel_engine_create_kernel_request(ce->engine); 1715 if (IS_ERR(rq)) 1716 return PTR_ERR(rq); 1717 1718 fence = i915_active_fence_get(&ce->timeline->last_request); 1719 if (fence) { 1720 i915_request_await_dma_fence(rq, fence); 1721 dma_fence_put(fence); 1722 } 1723 1724 rq = i915_request_get(rq); 1725 i915_request_add(rq); 1726 if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err) 1727 err = -ETIME; 1728 i915_request_put(rq); 1729 1730 while (!err && !intel_engine_is_idle(ce->engine)) 1731 intel_engine_flush_submission(ce->engine); 1732 1733 return err; 1734 } 1735 1736 struct perf_stats { 1737 struct intel_engine_cs *engine; 1738 unsigned long count; 1739 ktime_t time; 1740 ktime_t busy; 1741 u64 runtime; 1742 }; 1743 1744 struct perf_series { 1745 struct drm_i915_private *i915; 1746 unsigned int nengines; 1747 struct intel_context *ce[]; 1748 }; 1749 1750 static int cmp_u32(const void *A, const void *B) 1751 { 1752 const u32 *a = A, *b = B; 1753 1754 return *a - *b; 1755 } 1756 1757 static u32 trifilter(u32 *a) 1758 { 1759 u64 sum; 1760 1761 #define TF_COUNT 5 1762 sort(a, TF_COUNT, sizeof(*a), cmp_u32, NULL); 1763 1764 sum = mul_u32_u32(a[2], 2); 1765 sum += a[1]; 1766 sum += a[3]; 1767 1768 GEM_BUG_ON(sum > U32_MAX); 1769 return sum; 1770 #define TF_BIAS 2 1771 } 1772 1773 static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles) 1774 { 1775 u64 ns = intel_gt_clock_interval_to_ns(engine->gt, cycles); 1776 1777 return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS); 1778 } 1779 1780 static u32 *emit_timestamp_store(u32 *cs, struct intel_context *ce, u32 offset) 1781 { 1782 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 1783 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP((ce->engine->mmio_base))); 1784 *cs++ = offset; 1785 *cs++ = 0; 1786 1787 return cs; 1788 } 1789 1790 static u32 *emit_store_dw(u32 *cs, u32 offset, u32 value) 1791 { 1792 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1793 *cs++ = offset; 1794 *cs++ = 0; 1795 *cs++ = value; 1796 1797 return cs; 1798 } 1799 1800 static u32 *emit_semaphore_poll(u32 *cs, u32 mode, u32 value, u32 offset) 1801 { 1802 *cs++ = MI_SEMAPHORE_WAIT | 1803 MI_SEMAPHORE_GLOBAL_GTT | 1804 MI_SEMAPHORE_POLL | 1805 mode; 1806 *cs++ = value; 1807 *cs++ = offset; 1808 *cs++ = 0; 1809 1810 return cs; 1811 } 1812 1813 static u32 *emit_semaphore_poll_until(u32 *cs, u32 offset, u32 value) 1814 { 1815 return emit_semaphore_poll(cs, MI_SEMAPHORE_SAD_EQ_SDD, value, offset); 1816 } 1817 1818 static void semaphore_set(u32 *sema, u32 value) 1819 { 1820 WRITE_ONCE(*sema, value); 1821 wmb(); /* flush the update to the cache, and beyond */ 1822 } 1823 1824 static u32 *hwsp_scratch(const struct intel_context *ce) 1825 { 1826 return memset32(ce->engine->status_page.addr + 1000, 0, 21); 1827 } 1828 1829 static u32 hwsp_offset(const struct intel_context *ce, u32 *dw) 1830 { 1831 return (i915_ggtt_offset(ce->engine->status_page.vma) + 1832 offset_in_page(dw)); 1833 } 1834 1835 static int measure_semaphore_response(struct intel_context *ce) 1836 { 1837 u32 *sema = hwsp_scratch(ce); 1838 const u32 offset = hwsp_offset(ce, sema); 1839 u32 elapsed[TF_COUNT], cycles; 1840 struct i915_request *rq; 1841 u32 *cs; 1842 int err; 1843 int i; 1844 1845 /* 1846 * Measure how many cycles it takes for the HW to detect the change 1847 * in a semaphore value. 1848 * 1849 * A: read CS_TIMESTAMP from CPU 1850 * poke semaphore 1851 * B: read CS_TIMESTAMP on GPU 1852 * 1853 * Semaphore latency: B - A 1854 */ 1855 1856 semaphore_set(sema, -1); 1857 1858 rq = i915_request_create(ce); 1859 if (IS_ERR(rq)) 1860 return PTR_ERR(rq); 1861 1862 cs = intel_ring_begin(rq, 4 + 12 * ARRAY_SIZE(elapsed)); 1863 if (IS_ERR(cs)) { 1864 i915_request_add(rq); 1865 err = PTR_ERR(cs); 1866 goto err; 1867 } 1868 1869 cs = emit_store_dw(cs, offset, 0); 1870 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 1871 cs = emit_semaphore_poll_until(cs, offset, i); 1872 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); 1873 cs = emit_store_dw(cs, offset, 0); 1874 } 1875 1876 intel_ring_advance(rq, cs); 1877 i915_request_add(rq); 1878 1879 if (wait_for(READ_ONCE(*sema) == 0, 50)) { 1880 err = -EIO; 1881 goto err; 1882 } 1883 1884 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 1885 preempt_disable(); 1886 cycles = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); 1887 semaphore_set(sema, i); 1888 preempt_enable(); 1889 1890 if (wait_for(READ_ONCE(*sema) == 0, 50)) { 1891 err = -EIO; 1892 goto err; 1893 } 1894 1895 elapsed[i - 1] = sema[i] - cycles; 1896 } 1897 1898 cycles = trifilter(elapsed); 1899 pr_info("%s: semaphore response %d cycles, %lluns\n", 1900 ce->engine->name, cycles >> TF_BIAS, 1901 cycles_to_ns(ce->engine, cycles)); 1902 1903 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 1904 1905 err: 1906 intel_gt_set_wedged(ce->engine->gt); 1907 return err; 1908 } 1909 1910 static int measure_idle_dispatch(struct intel_context *ce) 1911 { 1912 u32 *sema = hwsp_scratch(ce); 1913 const u32 offset = hwsp_offset(ce, sema); 1914 u32 elapsed[TF_COUNT], cycles; 1915 u32 *cs; 1916 int err; 1917 int i; 1918 1919 /* 1920 * Measure how long it takes for us to submit a request while the 1921 * engine is idle, but is resting in our context. 1922 * 1923 * A: read CS_TIMESTAMP from CPU 1924 * submit request 1925 * B: read CS_TIMESTAMP on GPU 1926 * 1927 * Submission latency: B - A 1928 */ 1929 1930 for (i = 0; i < ARRAY_SIZE(elapsed); i++) { 1931 struct i915_request *rq; 1932 1933 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); 1934 if (err) 1935 return err; 1936 1937 rq = i915_request_create(ce); 1938 if (IS_ERR(rq)) { 1939 err = PTR_ERR(rq); 1940 goto err; 1941 } 1942 1943 cs = intel_ring_begin(rq, 4); 1944 if (IS_ERR(cs)) { 1945 i915_request_add(rq); 1946 err = PTR_ERR(cs); 1947 goto err; 1948 } 1949 1950 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); 1951 1952 intel_ring_advance(rq, cs); 1953 1954 preempt_disable(); 1955 local_bh_disable(); 1956 elapsed[i] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); 1957 i915_request_add(rq); 1958 local_bh_enable(); 1959 preempt_enable(); 1960 } 1961 1962 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); 1963 if (err) 1964 goto err; 1965 1966 for (i = 0; i < ARRAY_SIZE(elapsed); i++) 1967 elapsed[i] = sema[i] - elapsed[i]; 1968 1969 cycles = trifilter(elapsed); 1970 pr_info("%s: idle dispatch latency %d cycles, %lluns\n", 1971 ce->engine->name, cycles >> TF_BIAS, 1972 cycles_to_ns(ce->engine, cycles)); 1973 1974 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 1975 1976 err: 1977 intel_gt_set_wedged(ce->engine->gt); 1978 return err; 1979 } 1980 1981 static int measure_busy_dispatch(struct intel_context *ce) 1982 { 1983 u32 *sema = hwsp_scratch(ce); 1984 const u32 offset = hwsp_offset(ce, sema); 1985 u32 elapsed[TF_COUNT + 1], cycles; 1986 u32 *cs; 1987 int err; 1988 int i; 1989 1990 /* 1991 * Measure how long it takes for us to submit a request while the 1992 * engine is busy, polling on a semaphore in our context. With 1993 * direct submission, this will include the cost of a lite restore. 1994 * 1995 * A: read CS_TIMESTAMP from CPU 1996 * submit request 1997 * B: read CS_TIMESTAMP on GPU 1998 * 1999 * Submission latency: B - A 2000 */ 2001 2002 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 2003 struct i915_request *rq; 2004 2005 rq = i915_request_create(ce); 2006 if (IS_ERR(rq)) { 2007 err = PTR_ERR(rq); 2008 goto err; 2009 } 2010 2011 cs = intel_ring_begin(rq, 12); 2012 if (IS_ERR(cs)) { 2013 i915_request_add(rq); 2014 err = PTR_ERR(cs); 2015 goto err; 2016 } 2017 2018 cs = emit_store_dw(cs, offset + i * sizeof(u32), -1); 2019 cs = emit_semaphore_poll_until(cs, offset, i); 2020 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); 2021 2022 intel_ring_advance(rq, cs); 2023 2024 if (i > 1 && wait_for(READ_ONCE(sema[i - 1]), 500)) { 2025 err = -EIO; 2026 goto err; 2027 } 2028 2029 preempt_disable(); 2030 local_bh_disable(); 2031 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); 2032 i915_request_add(rq); 2033 local_bh_enable(); 2034 semaphore_set(sema, i - 1); 2035 preempt_enable(); 2036 } 2037 2038 wait_for(READ_ONCE(sema[i - 1]), 500); 2039 semaphore_set(sema, i - 1); 2040 2041 for (i = 1; i <= TF_COUNT; i++) { 2042 GEM_BUG_ON(sema[i] == -1); 2043 elapsed[i - 1] = sema[i] - elapsed[i]; 2044 } 2045 2046 cycles = trifilter(elapsed); 2047 pr_info("%s: busy dispatch latency %d cycles, %lluns\n", 2048 ce->engine->name, cycles >> TF_BIAS, 2049 cycles_to_ns(ce->engine, cycles)); 2050 2051 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 2052 2053 err: 2054 intel_gt_set_wedged(ce->engine->gt); 2055 return err; 2056 } 2057 2058 static int plug(struct intel_engine_cs *engine, u32 *sema, u32 mode, int value) 2059 { 2060 const u32 offset = 2061 i915_ggtt_offset(engine->status_page.vma) + 2062 offset_in_page(sema); 2063 struct i915_request *rq; 2064 u32 *cs; 2065 2066 rq = i915_request_create(engine->kernel_context); 2067 if (IS_ERR(rq)) 2068 return PTR_ERR(rq); 2069 2070 cs = intel_ring_begin(rq, 4); 2071 if (IS_ERR(cs)) { 2072 i915_request_add(rq); 2073 return PTR_ERR(cs); 2074 } 2075 2076 cs = emit_semaphore_poll(cs, mode, value, offset); 2077 2078 intel_ring_advance(rq, cs); 2079 i915_request_add(rq); 2080 2081 return 0; 2082 } 2083 2084 static int measure_inter_request(struct intel_context *ce) 2085 { 2086 u32 *sema = hwsp_scratch(ce); 2087 const u32 offset = hwsp_offset(ce, sema); 2088 u32 elapsed[TF_COUNT + 1], cycles; 2089 struct i915_sw_fence *submit; 2090 int i, err; 2091 2092 /* 2093 * Measure how long it takes to advance from one request into the 2094 * next. Between each request we flush the GPU caches to memory, 2095 * update the breadcrumbs, and then invalidate those caches. 2096 * We queue up all the requests to be submitted in one batch so 2097 * it should be one set of contiguous measurements. 2098 * 2099 * A: read CS_TIMESTAMP on GPU 2100 * advance request 2101 * B: read CS_TIMESTAMP on GPU 2102 * 2103 * Request latency: B - A 2104 */ 2105 2106 err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0); 2107 if (err) 2108 return err; 2109 2110 submit = heap_fence_create(GFP_KERNEL); 2111 if (!submit) { 2112 semaphore_set(sema, 1); 2113 return -ENOMEM; 2114 } 2115 2116 intel_engine_flush_submission(ce->engine); 2117 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 2118 struct i915_request *rq; 2119 u32 *cs; 2120 2121 rq = i915_request_create(ce); 2122 if (IS_ERR(rq)) { 2123 err = PTR_ERR(rq); 2124 goto err_submit; 2125 } 2126 2127 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 2128 submit, 2129 GFP_KERNEL); 2130 if (err < 0) { 2131 i915_request_add(rq); 2132 goto err_submit; 2133 } 2134 2135 cs = intel_ring_begin(rq, 4); 2136 if (IS_ERR(cs)) { 2137 i915_request_add(rq); 2138 err = PTR_ERR(cs); 2139 goto err_submit; 2140 } 2141 2142 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); 2143 2144 intel_ring_advance(rq, cs); 2145 i915_request_add(rq); 2146 } 2147 i915_sw_fence_commit(submit); 2148 intel_engine_flush_submission(ce->engine); 2149 heap_fence_put(submit); 2150 2151 semaphore_set(sema, 1); 2152 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); 2153 if (err) 2154 goto err; 2155 2156 for (i = 1; i <= TF_COUNT; i++) 2157 elapsed[i - 1] = sema[i + 1] - sema[i]; 2158 2159 cycles = trifilter(elapsed); 2160 pr_info("%s: inter-request latency %d cycles, %lluns\n", 2161 ce->engine->name, cycles >> TF_BIAS, 2162 cycles_to_ns(ce->engine, cycles)); 2163 2164 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 2165 2166 err_submit: 2167 i915_sw_fence_commit(submit); 2168 heap_fence_put(submit); 2169 semaphore_set(sema, 1); 2170 err: 2171 intel_gt_set_wedged(ce->engine->gt); 2172 return err; 2173 } 2174 2175 static int measure_context_switch(struct intel_context *ce) 2176 { 2177 u32 *sema = hwsp_scratch(ce); 2178 const u32 offset = hwsp_offset(ce, sema); 2179 struct i915_request *fence = NULL; 2180 u32 elapsed[TF_COUNT + 1], cycles; 2181 int i, j, err; 2182 u32 *cs; 2183 2184 /* 2185 * Measure how long it takes to advance from one request in one 2186 * context to a request in another context. This allows us to 2187 * measure how long the context save/restore take, along with all 2188 * the inter-context setup we require. 2189 * 2190 * A: read CS_TIMESTAMP on GPU 2191 * switch context 2192 * B: read CS_TIMESTAMP on GPU 2193 * 2194 * Context switch latency: B - A 2195 */ 2196 2197 err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0); 2198 if (err) 2199 return err; 2200 2201 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 2202 struct intel_context *arr[] = { 2203 ce, ce->engine->kernel_context 2204 }; 2205 u32 addr = offset + ARRAY_SIZE(arr) * i * sizeof(u32); 2206 2207 for (j = 0; j < ARRAY_SIZE(arr); j++) { 2208 struct i915_request *rq; 2209 2210 rq = i915_request_create(arr[j]); 2211 if (IS_ERR(rq)) { 2212 err = PTR_ERR(rq); 2213 goto err_fence; 2214 } 2215 2216 if (fence) { 2217 err = i915_request_await_dma_fence(rq, 2218 &fence->fence); 2219 if (err) { 2220 i915_request_add(rq); 2221 goto err_fence; 2222 } 2223 } 2224 2225 cs = intel_ring_begin(rq, 4); 2226 if (IS_ERR(cs)) { 2227 i915_request_add(rq); 2228 err = PTR_ERR(cs); 2229 goto err_fence; 2230 } 2231 2232 cs = emit_timestamp_store(cs, ce, addr); 2233 addr += sizeof(u32); 2234 2235 intel_ring_advance(rq, cs); 2236 2237 i915_request_put(fence); 2238 fence = i915_request_get(rq); 2239 2240 i915_request_add(rq); 2241 } 2242 } 2243 i915_request_put(fence); 2244 intel_engine_flush_submission(ce->engine); 2245 2246 semaphore_set(sema, 1); 2247 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); 2248 if (err) 2249 goto err; 2250 2251 for (i = 1; i <= TF_COUNT; i++) 2252 elapsed[i - 1] = sema[2 * i + 2] - sema[2 * i + 1]; 2253 2254 cycles = trifilter(elapsed); 2255 pr_info("%s: context switch latency %d cycles, %lluns\n", 2256 ce->engine->name, cycles >> TF_BIAS, 2257 cycles_to_ns(ce->engine, cycles)); 2258 2259 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 2260 2261 err_fence: 2262 i915_request_put(fence); 2263 semaphore_set(sema, 1); 2264 err: 2265 intel_gt_set_wedged(ce->engine->gt); 2266 return err; 2267 } 2268 2269 static int measure_preemption(struct intel_context *ce) 2270 { 2271 u32 *sema = hwsp_scratch(ce); 2272 const u32 offset = hwsp_offset(ce, sema); 2273 u32 elapsed[TF_COUNT], cycles; 2274 u32 *cs; 2275 int err; 2276 int i; 2277 2278 /* 2279 * We measure two latencies while triggering preemption. The first 2280 * latency is how long it takes for us to submit a preempting request. 2281 * The second latency is how it takes for us to return from the 2282 * preemption back to the original context. 2283 * 2284 * A: read CS_TIMESTAMP from CPU 2285 * submit preemption 2286 * B: read CS_TIMESTAMP on GPU (in preempting context) 2287 * context switch 2288 * C: read CS_TIMESTAMP on GPU (in original context) 2289 * 2290 * Preemption dispatch latency: B - A 2291 * Preemption switch latency: C - B 2292 */ 2293 2294 if (!intel_engine_has_preemption(ce->engine)) 2295 return 0; 2296 2297 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 2298 u32 addr = offset + 2 * i * sizeof(u32); 2299 struct i915_request *rq; 2300 2301 rq = i915_request_create(ce); 2302 if (IS_ERR(rq)) { 2303 err = PTR_ERR(rq); 2304 goto err; 2305 } 2306 2307 cs = intel_ring_begin(rq, 12); 2308 if (IS_ERR(cs)) { 2309 i915_request_add(rq); 2310 err = PTR_ERR(cs); 2311 goto err; 2312 } 2313 2314 cs = emit_store_dw(cs, addr, -1); 2315 cs = emit_semaphore_poll_until(cs, offset, i); 2316 cs = emit_timestamp_store(cs, ce, addr + sizeof(u32)); 2317 2318 intel_ring_advance(rq, cs); 2319 i915_request_add(rq); 2320 2321 if (wait_for(READ_ONCE(sema[2 * i]) == -1, 500)) { 2322 err = -EIO; 2323 goto err; 2324 } 2325 2326 rq = i915_request_create(ce->engine->kernel_context); 2327 if (IS_ERR(rq)) { 2328 err = PTR_ERR(rq); 2329 goto err; 2330 } 2331 2332 cs = intel_ring_begin(rq, 8); 2333 if (IS_ERR(cs)) { 2334 i915_request_add(rq); 2335 err = PTR_ERR(cs); 2336 goto err; 2337 } 2338 2339 cs = emit_timestamp_store(cs, ce, addr); 2340 cs = emit_store_dw(cs, offset, i); 2341 2342 intel_ring_advance(rq, cs); 2343 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2344 2345 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); 2346 i915_request_add(rq); 2347 } 2348 2349 if (wait_for(READ_ONCE(sema[2 * i - 2]) != -1, 500)) { 2350 err = -EIO; 2351 goto err; 2352 } 2353 2354 for (i = 1; i <= TF_COUNT; i++) 2355 elapsed[i - 1] = sema[2 * i + 0] - elapsed[i - 1]; 2356 2357 cycles = trifilter(elapsed); 2358 pr_info("%s: preemption dispatch latency %d cycles, %lluns\n", 2359 ce->engine->name, cycles >> TF_BIAS, 2360 cycles_to_ns(ce->engine, cycles)); 2361 2362 for (i = 1; i <= TF_COUNT; i++) 2363 elapsed[i - 1] = sema[2 * i + 1] - sema[2 * i + 0]; 2364 2365 cycles = trifilter(elapsed); 2366 pr_info("%s: preemption switch latency %d cycles, %lluns\n", 2367 ce->engine->name, cycles >> TF_BIAS, 2368 cycles_to_ns(ce->engine, cycles)); 2369 2370 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 2371 2372 err: 2373 intel_gt_set_wedged(ce->engine->gt); 2374 return err; 2375 } 2376 2377 struct signal_cb { 2378 struct dma_fence_cb base; 2379 bool seen; 2380 }; 2381 2382 static void signal_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 2383 { 2384 struct signal_cb *s = container_of(cb, typeof(*s), base); 2385 2386 smp_store_mb(s->seen, true); /* be safe, be strong */ 2387 } 2388 2389 static int measure_completion(struct intel_context *ce) 2390 { 2391 u32 *sema = hwsp_scratch(ce); 2392 const u32 offset = hwsp_offset(ce, sema); 2393 u32 elapsed[TF_COUNT], cycles; 2394 u32 *cs; 2395 int err; 2396 int i; 2397 2398 /* 2399 * Measure how long it takes for the signal (interrupt) to be 2400 * sent from the GPU to be processed by the CPU. 2401 * 2402 * A: read CS_TIMESTAMP on GPU 2403 * signal 2404 * B: read CS_TIMESTAMP from CPU 2405 * 2406 * Completion latency: B - A 2407 */ 2408 2409 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 2410 struct signal_cb cb = { .seen = false }; 2411 struct i915_request *rq; 2412 2413 rq = i915_request_create(ce); 2414 if (IS_ERR(rq)) { 2415 err = PTR_ERR(rq); 2416 goto err; 2417 } 2418 2419 cs = intel_ring_begin(rq, 12); 2420 if (IS_ERR(cs)) { 2421 i915_request_add(rq); 2422 err = PTR_ERR(cs); 2423 goto err; 2424 } 2425 2426 cs = emit_store_dw(cs, offset + i * sizeof(u32), -1); 2427 cs = emit_semaphore_poll_until(cs, offset, i); 2428 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); 2429 2430 intel_ring_advance(rq, cs); 2431 2432 dma_fence_add_callback(&rq->fence, &cb.base, signal_cb); 2433 i915_request_add(rq); 2434 2435 intel_engine_flush_submission(ce->engine); 2436 if (wait_for(READ_ONCE(sema[i]) == -1, 50)) { 2437 err = -EIO; 2438 goto err; 2439 } 2440 2441 preempt_disable(); 2442 semaphore_set(sema, i); 2443 while (!READ_ONCE(cb.seen)) 2444 cpu_relax(); 2445 2446 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); 2447 preempt_enable(); 2448 } 2449 2450 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); 2451 if (err) 2452 goto err; 2453 2454 for (i = 0; i < ARRAY_SIZE(elapsed); i++) { 2455 GEM_BUG_ON(sema[i + 1] == -1); 2456 elapsed[i] = elapsed[i] - sema[i + 1]; 2457 } 2458 2459 cycles = trifilter(elapsed); 2460 pr_info("%s: completion latency %d cycles, %lluns\n", 2461 ce->engine->name, cycles >> TF_BIAS, 2462 cycles_to_ns(ce->engine, cycles)); 2463 2464 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 2465 2466 err: 2467 intel_gt_set_wedged(ce->engine->gt); 2468 return err; 2469 } 2470 2471 static void rps_pin(struct intel_gt *gt) 2472 { 2473 /* Pin the frequency to max */ 2474 atomic_inc(>->rps.num_waiters); 2475 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 2476 2477 mutex_lock(>->rps.lock); 2478 intel_rps_set(>->rps, gt->rps.max_freq); 2479 mutex_unlock(>->rps.lock); 2480 } 2481 2482 static void rps_unpin(struct intel_gt *gt) 2483 { 2484 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 2485 atomic_dec(>->rps.num_waiters); 2486 } 2487 2488 static int perf_request_latency(void *arg) 2489 { 2490 struct drm_i915_private *i915 = arg; 2491 struct intel_engine_cs *engine; 2492 struct pm_qos_request qos; 2493 int err = 0; 2494 2495 if (GRAPHICS_VER(i915) < 8) /* per-engine CS timestamp, semaphores */ 2496 return 0; 2497 2498 cpu_latency_qos_add_request(&qos, 0); /* disable cstates */ 2499 2500 for_each_uabi_engine(engine, i915) { 2501 struct intel_context *ce; 2502 2503 ce = intel_context_create(engine); 2504 if (IS_ERR(ce)) { 2505 err = PTR_ERR(ce); 2506 goto out; 2507 } 2508 2509 err = intel_context_pin(ce); 2510 if (err) { 2511 intel_context_put(ce); 2512 goto out; 2513 } 2514 2515 st_engine_heartbeat_disable(engine); 2516 rps_pin(engine->gt); 2517 2518 if (err == 0) 2519 err = measure_semaphore_response(ce); 2520 if (err == 0) 2521 err = measure_idle_dispatch(ce); 2522 if (err == 0) 2523 err = measure_busy_dispatch(ce); 2524 if (err == 0) 2525 err = measure_inter_request(ce); 2526 if (err == 0) 2527 err = measure_context_switch(ce); 2528 if (err == 0) 2529 err = measure_preemption(ce); 2530 if (err == 0) 2531 err = measure_completion(ce); 2532 2533 rps_unpin(engine->gt); 2534 st_engine_heartbeat_enable(engine); 2535 2536 intel_context_unpin(ce); 2537 intel_context_put(ce); 2538 if (err) 2539 goto out; 2540 } 2541 2542 out: 2543 if (igt_flush_test(i915)) 2544 err = -EIO; 2545 2546 cpu_latency_qos_remove_request(&qos); 2547 return err; 2548 } 2549 2550 static int s_sync0(void *arg) 2551 { 2552 struct perf_series *ps = arg; 2553 IGT_TIMEOUT(end_time); 2554 unsigned int idx = 0; 2555 int err = 0; 2556 2557 GEM_BUG_ON(!ps->nengines); 2558 do { 2559 struct i915_request *rq; 2560 2561 rq = i915_request_create(ps->ce[idx]); 2562 if (IS_ERR(rq)) { 2563 err = PTR_ERR(rq); 2564 break; 2565 } 2566 2567 i915_request_get(rq); 2568 i915_request_add(rq); 2569 2570 if (i915_request_wait(rq, 0, HZ / 5) < 0) 2571 err = -ETIME; 2572 i915_request_put(rq); 2573 if (err) 2574 break; 2575 2576 if (++idx == ps->nengines) 2577 idx = 0; 2578 } while (!__igt_timeout(end_time, NULL)); 2579 2580 return err; 2581 } 2582 2583 static int s_sync1(void *arg) 2584 { 2585 struct perf_series *ps = arg; 2586 struct i915_request *prev = NULL; 2587 IGT_TIMEOUT(end_time); 2588 unsigned int idx = 0; 2589 int err = 0; 2590 2591 GEM_BUG_ON(!ps->nengines); 2592 do { 2593 struct i915_request *rq; 2594 2595 rq = i915_request_create(ps->ce[idx]); 2596 if (IS_ERR(rq)) { 2597 err = PTR_ERR(rq); 2598 break; 2599 } 2600 2601 i915_request_get(rq); 2602 i915_request_add(rq); 2603 2604 if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) 2605 err = -ETIME; 2606 i915_request_put(prev); 2607 prev = rq; 2608 if (err) 2609 break; 2610 2611 if (++idx == ps->nengines) 2612 idx = 0; 2613 } while (!__igt_timeout(end_time, NULL)); 2614 i915_request_put(prev); 2615 2616 return err; 2617 } 2618 2619 static int s_many(void *arg) 2620 { 2621 struct perf_series *ps = arg; 2622 IGT_TIMEOUT(end_time); 2623 unsigned int idx = 0; 2624 2625 GEM_BUG_ON(!ps->nengines); 2626 do { 2627 struct i915_request *rq; 2628 2629 rq = i915_request_create(ps->ce[idx]); 2630 if (IS_ERR(rq)) 2631 return PTR_ERR(rq); 2632 2633 i915_request_add(rq); 2634 2635 if (++idx == ps->nengines) 2636 idx = 0; 2637 } while (!__igt_timeout(end_time, NULL)); 2638 2639 return 0; 2640 } 2641 2642 static int perf_series_engines(void *arg) 2643 { 2644 struct drm_i915_private *i915 = arg; 2645 static int (* const func[])(void *arg) = { 2646 s_sync0, 2647 s_sync1, 2648 s_many, 2649 NULL, 2650 }; 2651 const unsigned int nengines = num_uabi_engines(i915); 2652 struct intel_engine_cs *engine; 2653 int (* const *fn)(void *arg); 2654 struct pm_qos_request qos; 2655 struct perf_stats *stats; 2656 struct perf_series *ps; 2657 unsigned int idx; 2658 int err = 0; 2659 2660 stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL); 2661 if (!stats) 2662 return -ENOMEM; 2663 2664 ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL); 2665 if (!ps) { 2666 kfree(stats); 2667 return -ENOMEM; 2668 } 2669 2670 cpu_latency_qos_add_request(&qos, 0); /* disable cstates */ 2671 2672 ps->i915 = i915; 2673 ps->nengines = nengines; 2674 2675 idx = 0; 2676 for_each_uabi_engine(engine, i915) { 2677 struct intel_context *ce; 2678 2679 ce = intel_context_create(engine); 2680 if (IS_ERR(ce)) { 2681 err = PTR_ERR(ce); 2682 goto out; 2683 } 2684 2685 err = intel_context_pin(ce); 2686 if (err) { 2687 intel_context_put(ce); 2688 goto out; 2689 } 2690 2691 ps->ce[idx++] = ce; 2692 } 2693 GEM_BUG_ON(idx != ps->nengines); 2694 2695 for (fn = func; *fn && !err; fn++) { 2696 char name[KSYM_NAME_LEN]; 2697 struct igt_live_test t; 2698 2699 snprintf(name, sizeof(name), "%ps", *fn); 2700 err = igt_live_test_begin(&t, i915, __func__, name); 2701 if (err) 2702 break; 2703 2704 for (idx = 0; idx < nengines; idx++) { 2705 struct perf_stats *p = 2706 memset(&stats[idx], 0, sizeof(stats[idx])); 2707 struct intel_context *ce = ps->ce[idx]; 2708 2709 p->engine = ps->ce[idx]->engine; 2710 intel_engine_pm_get(p->engine); 2711 2712 if (intel_engine_supports_stats(p->engine)) 2713 p->busy = intel_engine_get_busy_time(p->engine, 2714 &p->time) + 1; 2715 else 2716 p->time = ktime_get(); 2717 p->runtime = -intel_context_get_total_runtime_ns(ce); 2718 } 2719 2720 err = (*fn)(ps); 2721 if (igt_live_test_end(&t)) 2722 err = -EIO; 2723 2724 for (idx = 0; idx < nengines; idx++) { 2725 struct perf_stats *p = &stats[idx]; 2726 struct intel_context *ce = ps->ce[idx]; 2727 int integer, decimal; 2728 u64 busy, dt, now; 2729 2730 if (p->busy) 2731 p->busy = ktime_sub(intel_engine_get_busy_time(p->engine, 2732 &now), 2733 p->busy - 1); 2734 else 2735 now = ktime_get(); 2736 p->time = ktime_sub(now, p->time); 2737 2738 err = switch_to_kernel_sync(ce, err); 2739 p->runtime += intel_context_get_total_runtime_ns(ce); 2740 intel_engine_pm_put(p->engine); 2741 2742 busy = 100 * ktime_to_ns(p->busy); 2743 dt = ktime_to_ns(p->time); 2744 if (dt) { 2745 integer = div64_u64(busy, dt); 2746 busy -= integer * dt; 2747 decimal = div64_u64(100 * busy, dt); 2748 } else { 2749 integer = 0; 2750 decimal = 0; 2751 } 2752 2753 pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", 2754 name, p->engine->name, ce->timeline->seqno, 2755 integer, decimal, 2756 div_u64(p->runtime, 1000 * 1000), 2757 div_u64(ktime_to_ns(p->time), 1000 * 1000)); 2758 } 2759 } 2760 2761 out: 2762 for (idx = 0; idx < nengines; idx++) { 2763 if (IS_ERR_OR_NULL(ps->ce[idx])) 2764 break; 2765 2766 intel_context_unpin(ps->ce[idx]); 2767 intel_context_put(ps->ce[idx]); 2768 } 2769 kfree(ps); 2770 2771 cpu_latency_qos_remove_request(&qos); 2772 kfree(stats); 2773 return err; 2774 } 2775 2776 static int p_sync0(void *arg) 2777 { 2778 struct perf_stats *p = arg; 2779 struct intel_engine_cs *engine = p->engine; 2780 struct intel_context *ce; 2781 IGT_TIMEOUT(end_time); 2782 unsigned long count; 2783 bool busy; 2784 int err = 0; 2785 2786 ce = intel_context_create(engine); 2787 if (IS_ERR(ce)) 2788 return PTR_ERR(ce); 2789 2790 err = intel_context_pin(ce); 2791 if (err) { 2792 intel_context_put(ce); 2793 return err; 2794 } 2795 2796 if (intel_engine_supports_stats(engine)) { 2797 p->busy = intel_engine_get_busy_time(engine, &p->time); 2798 busy = true; 2799 } else { 2800 p->time = ktime_get(); 2801 busy = false; 2802 } 2803 2804 count = 0; 2805 do { 2806 struct i915_request *rq; 2807 2808 rq = i915_request_create(ce); 2809 if (IS_ERR(rq)) { 2810 err = PTR_ERR(rq); 2811 break; 2812 } 2813 2814 i915_request_get(rq); 2815 i915_request_add(rq); 2816 2817 err = 0; 2818 if (i915_request_wait(rq, 0, HZ) < 0) 2819 err = -ETIME; 2820 i915_request_put(rq); 2821 if (err) 2822 break; 2823 2824 count++; 2825 } while (!__igt_timeout(end_time, NULL)); 2826 2827 if (busy) { 2828 ktime_t now; 2829 2830 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), 2831 p->busy); 2832 p->time = ktime_sub(now, p->time); 2833 } else { 2834 p->time = ktime_sub(ktime_get(), p->time); 2835 } 2836 2837 err = switch_to_kernel_sync(ce, err); 2838 p->runtime = intel_context_get_total_runtime_ns(ce); 2839 p->count = count; 2840 2841 intel_context_unpin(ce); 2842 intel_context_put(ce); 2843 return err; 2844 } 2845 2846 static int p_sync1(void *arg) 2847 { 2848 struct perf_stats *p = arg; 2849 struct intel_engine_cs *engine = p->engine; 2850 struct i915_request *prev = NULL; 2851 struct intel_context *ce; 2852 IGT_TIMEOUT(end_time); 2853 unsigned long count; 2854 bool busy; 2855 int err = 0; 2856 2857 ce = intel_context_create(engine); 2858 if (IS_ERR(ce)) 2859 return PTR_ERR(ce); 2860 2861 err = intel_context_pin(ce); 2862 if (err) { 2863 intel_context_put(ce); 2864 return err; 2865 } 2866 2867 if (intel_engine_supports_stats(engine)) { 2868 p->busy = intel_engine_get_busy_time(engine, &p->time); 2869 busy = true; 2870 } else { 2871 p->time = ktime_get(); 2872 busy = false; 2873 } 2874 2875 count = 0; 2876 do { 2877 struct i915_request *rq; 2878 2879 rq = i915_request_create(ce); 2880 if (IS_ERR(rq)) { 2881 err = PTR_ERR(rq); 2882 break; 2883 } 2884 2885 i915_request_get(rq); 2886 i915_request_add(rq); 2887 2888 err = 0; 2889 if (prev && i915_request_wait(prev, 0, HZ) < 0) 2890 err = -ETIME; 2891 i915_request_put(prev); 2892 prev = rq; 2893 if (err) 2894 break; 2895 2896 count++; 2897 } while (!__igt_timeout(end_time, NULL)); 2898 i915_request_put(prev); 2899 2900 if (busy) { 2901 ktime_t now; 2902 2903 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), 2904 p->busy); 2905 p->time = ktime_sub(now, p->time); 2906 } else { 2907 p->time = ktime_sub(ktime_get(), p->time); 2908 } 2909 2910 err = switch_to_kernel_sync(ce, err); 2911 p->runtime = intel_context_get_total_runtime_ns(ce); 2912 p->count = count; 2913 2914 intel_context_unpin(ce); 2915 intel_context_put(ce); 2916 return err; 2917 } 2918 2919 static int p_many(void *arg) 2920 { 2921 struct perf_stats *p = arg; 2922 struct intel_engine_cs *engine = p->engine; 2923 struct intel_context *ce; 2924 IGT_TIMEOUT(end_time); 2925 unsigned long count; 2926 int err = 0; 2927 bool busy; 2928 2929 ce = intel_context_create(engine); 2930 if (IS_ERR(ce)) 2931 return PTR_ERR(ce); 2932 2933 err = intel_context_pin(ce); 2934 if (err) { 2935 intel_context_put(ce); 2936 return err; 2937 } 2938 2939 if (intel_engine_supports_stats(engine)) { 2940 p->busy = intel_engine_get_busy_time(engine, &p->time); 2941 busy = true; 2942 } else { 2943 p->time = ktime_get(); 2944 busy = false; 2945 } 2946 2947 count = 0; 2948 do { 2949 struct i915_request *rq; 2950 2951 rq = i915_request_create(ce); 2952 if (IS_ERR(rq)) { 2953 err = PTR_ERR(rq); 2954 break; 2955 } 2956 2957 i915_request_add(rq); 2958 count++; 2959 } while (!__igt_timeout(end_time, NULL)); 2960 2961 if (busy) { 2962 ktime_t now; 2963 2964 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), 2965 p->busy); 2966 p->time = ktime_sub(now, p->time); 2967 } else { 2968 p->time = ktime_sub(ktime_get(), p->time); 2969 } 2970 2971 err = switch_to_kernel_sync(ce, err); 2972 p->runtime = intel_context_get_total_runtime_ns(ce); 2973 p->count = count; 2974 2975 intel_context_unpin(ce); 2976 intel_context_put(ce); 2977 return err; 2978 } 2979 2980 static int perf_parallel_engines(void *arg) 2981 { 2982 struct drm_i915_private *i915 = arg; 2983 static int (* const func[])(void *arg) = { 2984 p_sync0, 2985 p_sync1, 2986 p_many, 2987 NULL, 2988 }; 2989 const unsigned int nengines = num_uabi_engines(i915); 2990 struct intel_engine_cs *engine; 2991 int (* const *fn)(void *arg); 2992 struct pm_qos_request qos; 2993 struct { 2994 struct perf_stats p; 2995 struct task_struct *tsk; 2996 } *engines; 2997 int err = 0; 2998 2999 engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL); 3000 if (!engines) 3001 return -ENOMEM; 3002 3003 cpu_latency_qos_add_request(&qos, 0); 3004 3005 for (fn = func; *fn; fn++) { 3006 char name[KSYM_NAME_LEN]; 3007 struct igt_live_test t; 3008 unsigned int idx; 3009 3010 snprintf(name, sizeof(name), "%ps", *fn); 3011 err = igt_live_test_begin(&t, i915, __func__, name); 3012 if (err) 3013 break; 3014 3015 atomic_set(&i915->selftest.counter, nengines); 3016 3017 idx = 0; 3018 for_each_uabi_engine(engine, i915) { 3019 intel_engine_pm_get(engine); 3020 3021 memset(&engines[idx].p, 0, sizeof(engines[idx].p)); 3022 engines[idx].p.engine = engine; 3023 3024 engines[idx].tsk = kthread_run(*fn, &engines[idx].p, 3025 "igt:%s", engine->name); 3026 if (IS_ERR(engines[idx].tsk)) { 3027 err = PTR_ERR(engines[idx].tsk); 3028 intel_engine_pm_put(engine); 3029 break; 3030 } 3031 get_task_struct(engines[idx++].tsk); 3032 } 3033 3034 yield(); /* start all threads before we kthread_stop() */ 3035 3036 idx = 0; 3037 for_each_uabi_engine(engine, i915) { 3038 int status; 3039 3040 if (IS_ERR(engines[idx].tsk)) 3041 break; 3042 3043 status = kthread_stop(engines[idx].tsk); 3044 if (status && !err) 3045 err = status; 3046 3047 intel_engine_pm_put(engine); 3048 put_task_struct(engines[idx++].tsk); 3049 } 3050 3051 if (igt_live_test_end(&t)) 3052 err = -EIO; 3053 if (err) 3054 break; 3055 3056 idx = 0; 3057 for_each_uabi_engine(engine, i915) { 3058 struct perf_stats *p = &engines[idx].p; 3059 u64 busy = 100 * ktime_to_ns(p->busy); 3060 u64 dt = ktime_to_ns(p->time); 3061 int integer, decimal; 3062 3063 if (dt) { 3064 integer = div64_u64(busy, dt); 3065 busy -= integer * dt; 3066 decimal = div64_u64(100 * busy, dt); 3067 } else { 3068 integer = 0; 3069 decimal = 0; 3070 } 3071 3072 GEM_BUG_ON(engine != p->engine); 3073 pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", 3074 name, engine->name, p->count, integer, decimal, 3075 div_u64(p->runtime, 1000 * 1000), 3076 div_u64(ktime_to_ns(p->time), 1000 * 1000)); 3077 idx++; 3078 } 3079 } 3080 3081 cpu_latency_qos_remove_request(&qos); 3082 kfree(engines); 3083 return err; 3084 } 3085 3086 int i915_request_perf_selftests(struct drm_i915_private *i915) 3087 { 3088 static const struct i915_subtest tests[] = { 3089 SUBTEST(perf_request_latency), 3090 SUBTEST(perf_series_engines), 3091 SUBTEST(perf_parallel_engines), 3092 }; 3093 3094 if (intel_gt_is_wedged(to_gt(i915))) 3095 return 0; 3096 3097 return i915_subtests(tests, i915); 3098 } 3099