1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 #include <linux/pm_qos.h> 27 #include <linux/sort.h> 28 29 #include "gem/i915_gem_pm.h" 30 #include "gem/selftests/mock_context.h" 31 32 #include "gt/intel_engine_heartbeat.h" 33 #include "gt/intel_engine_pm.h" 34 #include "gt/intel_engine_user.h" 35 #include "gt/intel_gt.h" 36 #include "gt/intel_gt_clock_utils.h" 37 #include "gt/intel_gt_requests.h" 38 #include "gt/selftest_engine_heartbeat.h" 39 40 #include "i915_random.h" 41 #include "i915_selftest.h" 42 #include "igt_flush_test.h" 43 #include "igt_live_test.h" 44 #include "igt_spinner.h" 45 #include "lib_sw_fence.h" 46 47 #include "mock_drm.h" 48 #include "mock_gem_device.h" 49 50 static unsigned int num_uabi_engines(struct drm_i915_private *i915) 51 { 52 struct intel_engine_cs *engine; 53 unsigned int count; 54 55 count = 0; 56 for_each_uabi_engine(engine, i915) 57 count++; 58 59 return count; 60 } 61 62 static struct intel_engine_cs *rcs0(struct drm_i915_private *i915) 63 { 64 return intel_engine_lookup_user(i915, I915_ENGINE_CLASS_RENDER, 0); 65 } 66 67 static int igt_add_request(void *arg) 68 { 69 struct drm_i915_private *i915 = arg; 70 struct i915_request *request; 71 72 /* Basic preliminary test to create a request and let it loose! */ 73 74 request = mock_request(rcs0(i915)->kernel_context, HZ / 10); 75 if (!request) 76 return -ENOMEM; 77 78 i915_request_add(request); 79 80 return 0; 81 } 82 83 static int igt_wait_request(void *arg) 84 { 85 const long T = HZ / 4; 86 struct drm_i915_private *i915 = arg; 87 struct i915_request *request; 88 int err = -EINVAL; 89 90 /* Submit a request, then wait upon it */ 91 92 request = mock_request(rcs0(i915)->kernel_context, T); 93 if (!request) 94 return -ENOMEM; 95 96 i915_request_get(request); 97 98 if (i915_request_wait(request, 0, 0) != -ETIME) { 99 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 100 goto out_request; 101 } 102 103 if (i915_request_wait(request, 0, T) != -ETIME) { 104 pr_err("request wait succeeded (expected timeout before submit!)\n"); 105 goto out_request; 106 } 107 108 if (i915_request_completed(request)) { 109 pr_err("request completed before submit!!\n"); 110 goto out_request; 111 } 112 113 i915_request_add(request); 114 115 if (i915_request_wait(request, 0, 0) != -ETIME) { 116 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 117 goto out_request; 118 } 119 120 if (i915_request_completed(request)) { 121 pr_err("request completed immediately!\n"); 122 goto out_request; 123 } 124 125 if (i915_request_wait(request, 0, T / 2) != -ETIME) { 126 pr_err("request wait succeeded (expected timeout!)\n"); 127 goto out_request; 128 } 129 130 if (i915_request_wait(request, 0, T) == -ETIME) { 131 pr_err("request wait timed out!\n"); 132 goto out_request; 133 } 134 135 if (!i915_request_completed(request)) { 136 pr_err("request not complete after waiting!\n"); 137 goto out_request; 138 } 139 140 if (i915_request_wait(request, 0, T) == -ETIME) { 141 pr_err("request wait timed out when already complete!\n"); 142 goto out_request; 143 } 144 145 err = 0; 146 out_request: 147 i915_request_put(request); 148 mock_device_flush(i915); 149 return err; 150 } 151 152 static int igt_fence_wait(void *arg) 153 { 154 const long T = HZ / 4; 155 struct drm_i915_private *i915 = arg; 156 struct i915_request *request; 157 int err = -EINVAL; 158 159 /* Submit a request, treat it as a fence and wait upon it */ 160 161 request = mock_request(rcs0(i915)->kernel_context, T); 162 if (!request) 163 return -ENOMEM; 164 165 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 166 pr_err("fence wait success before submit (expected timeout)!\n"); 167 goto out; 168 } 169 170 i915_request_add(request); 171 172 if (dma_fence_is_signaled(&request->fence)) { 173 pr_err("fence signaled immediately!\n"); 174 goto out; 175 } 176 177 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 178 pr_err("fence wait success after submit (expected timeout)!\n"); 179 goto out; 180 } 181 182 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 183 pr_err("fence wait timed out (expected success)!\n"); 184 goto out; 185 } 186 187 if (!dma_fence_is_signaled(&request->fence)) { 188 pr_err("fence unsignaled after waiting!\n"); 189 goto out; 190 } 191 192 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 193 pr_err("fence wait timed out when complete (expected success)!\n"); 194 goto out; 195 } 196 197 err = 0; 198 out: 199 mock_device_flush(i915); 200 return err; 201 } 202 203 static int igt_request_rewind(void *arg) 204 { 205 struct drm_i915_private *i915 = arg; 206 struct i915_request *request, *vip; 207 struct i915_gem_context *ctx[2]; 208 struct intel_context *ce; 209 int err = -EINVAL; 210 211 ctx[0] = mock_context(i915, "A"); 212 213 ce = i915_gem_context_get_engine(ctx[0], RCS0); 214 GEM_BUG_ON(IS_ERR(ce)); 215 request = mock_request(ce, 2 * HZ); 216 intel_context_put(ce); 217 if (!request) { 218 err = -ENOMEM; 219 goto err_context_0; 220 } 221 222 i915_request_get(request); 223 i915_request_add(request); 224 225 ctx[1] = mock_context(i915, "B"); 226 227 ce = i915_gem_context_get_engine(ctx[1], RCS0); 228 GEM_BUG_ON(IS_ERR(ce)); 229 vip = mock_request(ce, 0); 230 intel_context_put(ce); 231 if (!vip) { 232 err = -ENOMEM; 233 goto err_context_1; 234 } 235 236 /* Simulate preemption by manual reordering */ 237 if (!mock_cancel_request(request)) { 238 pr_err("failed to cancel request (already executed)!\n"); 239 i915_request_add(vip); 240 goto err_context_1; 241 } 242 i915_request_get(vip); 243 i915_request_add(vip); 244 rcu_read_lock(); 245 request->engine->submit_request(request); 246 rcu_read_unlock(); 247 248 249 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 250 pr_err("timed out waiting for high priority request\n"); 251 goto err; 252 } 253 254 if (i915_request_completed(request)) { 255 pr_err("low priority request already completed\n"); 256 goto err; 257 } 258 259 err = 0; 260 err: 261 i915_request_put(vip); 262 err_context_1: 263 mock_context_close(ctx[1]); 264 i915_request_put(request); 265 err_context_0: 266 mock_context_close(ctx[0]); 267 mock_device_flush(i915); 268 return err; 269 } 270 271 struct smoketest { 272 struct intel_engine_cs *engine; 273 struct i915_gem_context **contexts; 274 atomic_long_t num_waits, num_fences; 275 int ncontexts, max_batch; 276 struct i915_request *(*request_alloc)(struct intel_context *ce); 277 }; 278 279 static struct i915_request * 280 __mock_request_alloc(struct intel_context *ce) 281 { 282 return mock_request(ce, 0); 283 } 284 285 static struct i915_request * 286 __live_request_alloc(struct intel_context *ce) 287 { 288 return intel_context_create_request(ce); 289 } 290 291 static int __igt_breadcrumbs_smoketest(void *arg) 292 { 293 struct smoketest *t = arg; 294 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 295 const unsigned int total = 4 * t->ncontexts + 1; 296 unsigned int num_waits = 0, num_fences = 0; 297 struct i915_request **requests; 298 I915_RND_STATE(prng); 299 unsigned int *order; 300 int err = 0; 301 302 /* 303 * A very simple test to catch the most egregious of list handling bugs. 304 * 305 * At its heart, we simply create oodles of requests running across 306 * multiple kthreads and enable signaling on them, for the sole purpose 307 * of stressing our breadcrumb handling. The only inspection we do is 308 * that the fences were marked as signaled. 309 */ 310 311 requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); 312 if (!requests) 313 return -ENOMEM; 314 315 order = i915_random_order(total, &prng); 316 if (!order) { 317 err = -ENOMEM; 318 goto out_requests; 319 } 320 321 while (!kthread_should_stop()) { 322 struct i915_sw_fence *submit, *wait; 323 unsigned int n, count; 324 325 submit = heap_fence_create(GFP_KERNEL); 326 if (!submit) { 327 err = -ENOMEM; 328 break; 329 } 330 331 wait = heap_fence_create(GFP_KERNEL); 332 if (!wait) { 333 i915_sw_fence_commit(submit); 334 heap_fence_put(submit); 335 err = -ENOMEM; 336 break; 337 } 338 339 i915_random_reorder(order, total, &prng); 340 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 341 342 for (n = 0; n < count; n++) { 343 struct i915_gem_context *ctx = 344 t->contexts[order[n] % t->ncontexts]; 345 struct i915_request *rq; 346 struct intel_context *ce; 347 348 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); 349 GEM_BUG_ON(IS_ERR(ce)); 350 rq = t->request_alloc(ce); 351 intel_context_put(ce); 352 if (IS_ERR(rq)) { 353 err = PTR_ERR(rq); 354 count = n; 355 break; 356 } 357 358 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 359 submit, 360 GFP_KERNEL); 361 362 requests[n] = i915_request_get(rq); 363 i915_request_add(rq); 364 365 if (err >= 0) 366 err = i915_sw_fence_await_dma_fence(wait, 367 &rq->fence, 368 0, 369 GFP_KERNEL); 370 371 if (err < 0) { 372 i915_request_put(rq); 373 count = n; 374 break; 375 } 376 } 377 378 i915_sw_fence_commit(submit); 379 i915_sw_fence_commit(wait); 380 381 if (!wait_event_timeout(wait->wait, 382 i915_sw_fence_done(wait), 383 5 * HZ)) { 384 struct i915_request *rq = requests[count - 1]; 385 386 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", 387 atomic_read(&wait->pending), count, 388 rq->fence.context, rq->fence.seqno, 389 t->engine->name); 390 GEM_TRACE_DUMP(); 391 392 intel_gt_set_wedged(t->engine->gt); 393 GEM_BUG_ON(!i915_request_completed(rq)); 394 i915_sw_fence_wait(wait); 395 err = -EIO; 396 } 397 398 for (n = 0; n < count; n++) { 399 struct i915_request *rq = requests[n]; 400 401 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 402 &rq->fence.flags)) { 403 pr_err("%llu:%llu was not signaled!\n", 404 rq->fence.context, rq->fence.seqno); 405 err = -EINVAL; 406 } 407 408 i915_request_put(rq); 409 } 410 411 heap_fence_put(wait); 412 heap_fence_put(submit); 413 414 if (err < 0) 415 break; 416 417 num_fences += count; 418 num_waits++; 419 420 cond_resched(); 421 } 422 423 atomic_long_add(num_fences, &t->num_fences); 424 atomic_long_add(num_waits, &t->num_waits); 425 426 kfree(order); 427 out_requests: 428 kfree(requests); 429 return err; 430 } 431 432 static int mock_breadcrumbs_smoketest(void *arg) 433 { 434 struct drm_i915_private *i915 = arg; 435 struct smoketest t = { 436 .engine = rcs0(i915), 437 .ncontexts = 1024, 438 .max_batch = 1024, 439 .request_alloc = __mock_request_alloc 440 }; 441 unsigned int ncpus = num_online_cpus(); 442 struct task_struct **threads; 443 unsigned int n; 444 int ret = 0; 445 446 /* 447 * Smoketest our breadcrumb/signal handling for requests across multiple 448 * threads. A very simple test to only catch the most egregious of bugs. 449 * See __igt_breadcrumbs_smoketest(); 450 */ 451 452 threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL); 453 if (!threads) 454 return -ENOMEM; 455 456 t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 457 if (!t.contexts) { 458 ret = -ENOMEM; 459 goto out_threads; 460 } 461 462 for (n = 0; n < t.ncontexts; n++) { 463 t.contexts[n] = mock_context(t.engine->i915, "mock"); 464 if (!t.contexts[n]) { 465 ret = -ENOMEM; 466 goto out_contexts; 467 } 468 } 469 470 for (n = 0; n < ncpus; n++) { 471 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 472 &t, "igt/%d", n); 473 if (IS_ERR(threads[n])) { 474 ret = PTR_ERR(threads[n]); 475 ncpus = n; 476 break; 477 } 478 479 get_task_struct(threads[n]); 480 } 481 482 yield(); /* start all threads before we begin */ 483 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 484 485 for (n = 0; n < ncpus; n++) { 486 int err; 487 488 err = kthread_stop(threads[n]); 489 if (err < 0 && !ret) 490 ret = err; 491 492 put_task_struct(threads[n]); 493 } 494 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 495 atomic_long_read(&t.num_waits), 496 atomic_long_read(&t.num_fences), 497 ncpus); 498 499 out_contexts: 500 for (n = 0; n < t.ncontexts; n++) { 501 if (!t.contexts[n]) 502 break; 503 mock_context_close(t.contexts[n]); 504 } 505 kfree(t.contexts); 506 out_threads: 507 kfree(threads); 508 return ret; 509 } 510 511 int i915_request_mock_selftests(void) 512 { 513 static const struct i915_subtest tests[] = { 514 SUBTEST(igt_add_request), 515 SUBTEST(igt_wait_request), 516 SUBTEST(igt_fence_wait), 517 SUBTEST(igt_request_rewind), 518 SUBTEST(mock_breadcrumbs_smoketest), 519 }; 520 struct drm_i915_private *i915; 521 intel_wakeref_t wakeref; 522 int err = 0; 523 524 i915 = mock_gem_device(); 525 if (!i915) 526 return -ENOMEM; 527 528 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 529 err = i915_subtests(tests, i915); 530 531 mock_destroy_device(i915); 532 533 return err; 534 } 535 536 static int live_nop_request(void *arg) 537 { 538 struct drm_i915_private *i915 = arg; 539 struct intel_engine_cs *engine; 540 struct igt_live_test t; 541 int err = -ENODEV; 542 543 /* 544 * Submit various sized batches of empty requests, to each engine 545 * (individually), and wait for the batch to complete. We can check 546 * the overhead of submitting requests to the hardware. 547 */ 548 549 for_each_uabi_engine(engine, i915) { 550 unsigned long n, prime; 551 IGT_TIMEOUT(end_time); 552 ktime_t times[2] = {}; 553 554 err = igt_live_test_begin(&t, i915, __func__, engine->name); 555 if (err) 556 return err; 557 558 intel_engine_pm_get(engine); 559 for_each_prime_number_from(prime, 1, 8192) { 560 struct i915_request *request = NULL; 561 562 times[1] = ktime_get_raw(); 563 564 for (n = 0; n < prime; n++) { 565 i915_request_put(request); 566 request = i915_request_create(engine->kernel_context); 567 if (IS_ERR(request)) 568 return PTR_ERR(request); 569 570 /* 571 * This space is left intentionally blank. 572 * 573 * We do not actually want to perform any 574 * action with this request, we just want 575 * to measure the latency in allocation 576 * and submission of our breadcrumbs - 577 * ensuring that the bare request is sufficient 578 * for the system to work (i.e. proper HEAD 579 * tracking of the rings, interrupt handling, 580 * etc). It also gives us the lowest bounds 581 * for latency. 582 */ 583 584 i915_request_get(request); 585 i915_request_add(request); 586 } 587 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 588 i915_request_put(request); 589 590 times[1] = ktime_sub(ktime_get_raw(), times[1]); 591 if (prime == 1) 592 times[0] = times[1]; 593 594 if (__igt_timeout(end_time, NULL)) 595 break; 596 } 597 intel_engine_pm_put(engine); 598 599 err = igt_live_test_end(&t); 600 if (err) 601 return err; 602 603 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 604 engine->name, 605 ktime_to_ns(times[0]), 606 prime, div64_u64(ktime_to_ns(times[1]), prime)); 607 } 608 609 return err; 610 } 611 612 static int __cancel_inactive(struct intel_engine_cs *engine) 613 { 614 struct intel_context *ce; 615 struct igt_spinner spin; 616 struct i915_request *rq; 617 int err = 0; 618 619 if (igt_spinner_init(&spin, engine->gt)) 620 return -ENOMEM; 621 622 ce = intel_context_create(engine); 623 if (IS_ERR(ce)) { 624 err = PTR_ERR(ce); 625 goto out_spin; 626 } 627 628 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 629 if (IS_ERR(rq)) { 630 err = PTR_ERR(rq); 631 goto out_ce; 632 } 633 634 pr_debug("%s: Cancelling inactive request\n", engine->name); 635 i915_request_cancel(rq, -EINTR); 636 i915_request_get(rq); 637 i915_request_add(rq); 638 639 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 640 struct drm_printer p = drm_info_printer(engine->i915->drm.dev); 641 642 pr_err("%s: Failed to cancel inactive request\n", engine->name); 643 intel_engine_dump(engine, &p, "%s\n", engine->name); 644 err = -ETIME; 645 goto out_rq; 646 } 647 648 if (rq->fence.error != -EINTR) { 649 pr_err("%s: fence not cancelled (%u)\n", 650 engine->name, rq->fence.error); 651 err = -EINVAL; 652 } 653 654 out_rq: 655 i915_request_put(rq); 656 out_ce: 657 intel_context_put(ce); 658 out_spin: 659 igt_spinner_fini(&spin); 660 if (err) 661 pr_err("%s: %s error %d\n", __func__, engine->name, err); 662 return err; 663 } 664 665 static int __cancel_active(struct intel_engine_cs *engine) 666 { 667 struct intel_context *ce; 668 struct igt_spinner spin; 669 struct i915_request *rq; 670 int err = 0; 671 672 if (igt_spinner_init(&spin, engine->gt)) 673 return -ENOMEM; 674 675 ce = intel_context_create(engine); 676 if (IS_ERR(ce)) { 677 err = PTR_ERR(ce); 678 goto out_spin; 679 } 680 681 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 682 if (IS_ERR(rq)) { 683 err = PTR_ERR(rq); 684 goto out_ce; 685 } 686 687 pr_debug("%s: Cancelling active request\n", engine->name); 688 i915_request_get(rq); 689 i915_request_add(rq); 690 if (!igt_wait_for_spinner(&spin, rq)) { 691 struct drm_printer p = drm_info_printer(engine->i915->drm.dev); 692 693 pr_err("Failed to start spinner on %s\n", engine->name); 694 intel_engine_dump(engine, &p, "%s\n", engine->name); 695 err = -ETIME; 696 goto out_rq; 697 } 698 i915_request_cancel(rq, -EINTR); 699 700 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 701 struct drm_printer p = drm_info_printer(engine->i915->drm.dev); 702 703 pr_err("%s: Failed to cancel active request\n", engine->name); 704 intel_engine_dump(engine, &p, "%s\n", engine->name); 705 err = -ETIME; 706 goto out_rq; 707 } 708 709 if (rq->fence.error != -EINTR) { 710 pr_err("%s: fence not cancelled (%u)\n", 711 engine->name, rq->fence.error); 712 err = -EINVAL; 713 } 714 715 out_rq: 716 i915_request_put(rq); 717 out_ce: 718 intel_context_put(ce); 719 out_spin: 720 igt_spinner_fini(&spin); 721 if (err) 722 pr_err("%s: %s error %d\n", __func__, engine->name, err); 723 return err; 724 } 725 726 static int __cancel_completed(struct intel_engine_cs *engine) 727 { 728 struct intel_context *ce; 729 struct igt_spinner spin; 730 struct i915_request *rq; 731 int err = 0; 732 733 if (igt_spinner_init(&spin, engine->gt)) 734 return -ENOMEM; 735 736 ce = intel_context_create(engine); 737 if (IS_ERR(ce)) { 738 err = PTR_ERR(ce); 739 goto out_spin; 740 } 741 742 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 743 if (IS_ERR(rq)) { 744 err = PTR_ERR(rq); 745 goto out_ce; 746 } 747 igt_spinner_end(&spin); 748 i915_request_get(rq); 749 i915_request_add(rq); 750 751 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 752 err = -ETIME; 753 goto out_rq; 754 } 755 756 pr_debug("%s: Cancelling completed request\n", engine->name); 757 i915_request_cancel(rq, -EINTR); 758 if (rq->fence.error) { 759 pr_err("%s: fence not cancelled (%u)\n", 760 engine->name, rq->fence.error); 761 err = -EINVAL; 762 } 763 764 out_rq: 765 i915_request_put(rq); 766 out_ce: 767 intel_context_put(ce); 768 out_spin: 769 igt_spinner_fini(&spin); 770 if (err) 771 pr_err("%s: %s error %d\n", __func__, engine->name, err); 772 return err; 773 } 774 775 static int live_cancel_request(void *arg) 776 { 777 struct drm_i915_private *i915 = arg; 778 struct intel_engine_cs *engine; 779 780 /* 781 * Check cancellation of requests. We expect to be able to immediately 782 * cancel active requests, even if they are currently on the GPU. 783 */ 784 785 for_each_uabi_engine(engine, i915) { 786 struct igt_live_test t; 787 int err, err2; 788 789 if (!intel_engine_has_preemption(engine)) 790 continue; 791 792 err = igt_live_test_begin(&t, i915, __func__, engine->name); 793 if (err) 794 return err; 795 796 err = __cancel_inactive(engine); 797 if (err == 0) 798 err = __cancel_active(engine); 799 if (err == 0) 800 err = __cancel_completed(engine); 801 802 err2 = igt_live_test_end(&t); 803 if (err) 804 return err; 805 if (err2) 806 return err2; 807 } 808 809 return 0; 810 } 811 812 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 813 { 814 struct drm_i915_gem_object *obj; 815 struct i915_vma *vma; 816 u32 *cmd; 817 int err; 818 819 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 820 if (IS_ERR(obj)) 821 return ERR_CAST(obj); 822 823 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 824 if (IS_ERR(cmd)) { 825 err = PTR_ERR(cmd); 826 goto err; 827 } 828 829 *cmd = MI_BATCH_BUFFER_END; 830 831 __i915_gem_object_flush_map(obj, 0, 64); 832 i915_gem_object_unpin_map(obj); 833 834 intel_gt_chipset_flush(&i915->gt); 835 836 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 837 if (IS_ERR(vma)) { 838 err = PTR_ERR(vma); 839 goto err; 840 } 841 842 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 843 if (err) 844 goto err; 845 846 /* Force the wait wait now to avoid including it in the benchmark */ 847 err = i915_vma_sync(vma); 848 if (err) 849 goto err_pin; 850 851 return vma; 852 853 err_pin: 854 i915_vma_unpin(vma); 855 err: 856 i915_gem_object_put(obj); 857 return ERR_PTR(err); 858 } 859 860 static struct i915_request * 861 empty_request(struct intel_engine_cs *engine, 862 struct i915_vma *batch) 863 { 864 struct i915_request *request; 865 int err; 866 867 request = i915_request_create(engine->kernel_context); 868 if (IS_ERR(request)) 869 return request; 870 871 err = engine->emit_bb_start(request, 872 batch->node.start, 873 batch->node.size, 874 I915_DISPATCH_SECURE); 875 if (err) 876 goto out_request; 877 878 i915_request_get(request); 879 out_request: 880 i915_request_add(request); 881 return err ? ERR_PTR(err) : request; 882 } 883 884 static int live_empty_request(void *arg) 885 { 886 struct drm_i915_private *i915 = arg; 887 struct intel_engine_cs *engine; 888 struct igt_live_test t; 889 struct i915_vma *batch; 890 int err = 0; 891 892 /* 893 * Submit various sized batches of empty requests, to each engine 894 * (individually), and wait for the batch to complete. We can check 895 * the overhead of submitting requests to the hardware. 896 */ 897 898 batch = empty_batch(i915); 899 if (IS_ERR(batch)) 900 return PTR_ERR(batch); 901 902 for_each_uabi_engine(engine, i915) { 903 IGT_TIMEOUT(end_time); 904 struct i915_request *request; 905 unsigned long n, prime; 906 ktime_t times[2] = {}; 907 908 err = igt_live_test_begin(&t, i915, __func__, engine->name); 909 if (err) 910 goto out_batch; 911 912 intel_engine_pm_get(engine); 913 914 /* Warmup / preload */ 915 request = empty_request(engine, batch); 916 if (IS_ERR(request)) { 917 err = PTR_ERR(request); 918 intel_engine_pm_put(engine); 919 goto out_batch; 920 } 921 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 922 923 for_each_prime_number_from(prime, 1, 8192) { 924 times[1] = ktime_get_raw(); 925 926 for (n = 0; n < prime; n++) { 927 i915_request_put(request); 928 request = empty_request(engine, batch); 929 if (IS_ERR(request)) { 930 err = PTR_ERR(request); 931 intel_engine_pm_put(engine); 932 goto out_batch; 933 } 934 } 935 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 936 937 times[1] = ktime_sub(ktime_get_raw(), times[1]); 938 if (prime == 1) 939 times[0] = times[1]; 940 941 if (__igt_timeout(end_time, NULL)) 942 break; 943 } 944 i915_request_put(request); 945 intel_engine_pm_put(engine); 946 947 err = igt_live_test_end(&t); 948 if (err) 949 goto out_batch; 950 951 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 952 engine->name, 953 ktime_to_ns(times[0]), 954 prime, div64_u64(ktime_to_ns(times[1]), prime)); 955 } 956 957 out_batch: 958 i915_vma_unpin(batch); 959 i915_vma_put(batch); 960 return err; 961 } 962 963 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 964 { 965 struct drm_i915_gem_object *obj; 966 const int ver = GRAPHICS_VER(i915); 967 struct i915_vma *vma; 968 u32 *cmd; 969 int err; 970 971 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 972 if (IS_ERR(obj)) 973 return ERR_CAST(obj); 974 975 vma = i915_vma_instance(obj, i915->gt.vm, NULL); 976 if (IS_ERR(vma)) { 977 err = PTR_ERR(vma); 978 goto err; 979 } 980 981 err = i915_vma_pin(vma, 0, 0, PIN_USER); 982 if (err) 983 goto err; 984 985 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 986 if (IS_ERR(cmd)) { 987 err = PTR_ERR(cmd); 988 goto err; 989 } 990 991 if (ver >= 8) { 992 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 993 *cmd++ = lower_32_bits(vma->node.start); 994 *cmd++ = upper_32_bits(vma->node.start); 995 } else if (ver >= 6) { 996 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 997 *cmd++ = lower_32_bits(vma->node.start); 998 } else { 999 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 1000 *cmd++ = lower_32_bits(vma->node.start); 1001 } 1002 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 1003 1004 __i915_gem_object_flush_map(obj, 0, 64); 1005 i915_gem_object_unpin_map(obj); 1006 1007 intel_gt_chipset_flush(&i915->gt); 1008 1009 return vma; 1010 1011 err: 1012 i915_gem_object_put(obj); 1013 return ERR_PTR(err); 1014 } 1015 1016 static int recursive_batch_resolve(struct i915_vma *batch) 1017 { 1018 u32 *cmd; 1019 1020 cmd = i915_gem_object_pin_map_unlocked(batch->obj, I915_MAP_WC); 1021 if (IS_ERR(cmd)) 1022 return PTR_ERR(cmd); 1023 1024 *cmd = MI_BATCH_BUFFER_END; 1025 1026 __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd)); 1027 i915_gem_object_unpin_map(batch->obj); 1028 1029 intel_gt_chipset_flush(batch->vm->gt); 1030 1031 return 0; 1032 } 1033 1034 static int live_all_engines(void *arg) 1035 { 1036 struct drm_i915_private *i915 = arg; 1037 const unsigned int nengines = num_uabi_engines(i915); 1038 struct intel_engine_cs *engine; 1039 struct i915_request **request; 1040 struct igt_live_test t; 1041 struct i915_vma *batch; 1042 unsigned int idx; 1043 int err; 1044 1045 /* 1046 * Check we can submit requests to all engines simultaneously. We 1047 * send a recursive batch to each engine - checking that we don't 1048 * block doing so, and that they don't complete too soon. 1049 */ 1050 1051 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 1052 if (!request) 1053 return -ENOMEM; 1054 1055 err = igt_live_test_begin(&t, i915, __func__, ""); 1056 if (err) 1057 goto out_free; 1058 1059 batch = recursive_batch(i915); 1060 if (IS_ERR(batch)) { 1061 err = PTR_ERR(batch); 1062 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 1063 goto out_free; 1064 } 1065 1066 i915_vma_lock(batch); 1067 1068 idx = 0; 1069 for_each_uabi_engine(engine, i915) { 1070 request[idx] = intel_engine_create_kernel_request(engine); 1071 if (IS_ERR(request[idx])) { 1072 err = PTR_ERR(request[idx]); 1073 pr_err("%s: Request allocation failed with err=%d\n", 1074 __func__, err); 1075 goto out_request; 1076 } 1077 1078 err = i915_request_await_object(request[idx], batch->obj, 0); 1079 if (err == 0) 1080 err = i915_vma_move_to_active(batch, request[idx], 0); 1081 GEM_BUG_ON(err); 1082 1083 err = engine->emit_bb_start(request[idx], 1084 batch->node.start, 1085 batch->node.size, 1086 0); 1087 GEM_BUG_ON(err); 1088 request[idx]->batch = batch; 1089 1090 i915_request_get(request[idx]); 1091 i915_request_add(request[idx]); 1092 idx++; 1093 } 1094 1095 i915_vma_unlock(batch); 1096 1097 idx = 0; 1098 for_each_uabi_engine(engine, i915) { 1099 if (i915_request_completed(request[idx])) { 1100 pr_err("%s(%s): request completed too early!\n", 1101 __func__, engine->name); 1102 err = -EINVAL; 1103 goto out_request; 1104 } 1105 idx++; 1106 } 1107 1108 err = recursive_batch_resolve(batch); 1109 if (err) { 1110 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 1111 goto out_request; 1112 } 1113 1114 idx = 0; 1115 for_each_uabi_engine(engine, i915) { 1116 long timeout; 1117 1118 timeout = i915_request_wait(request[idx], 0, 1119 MAX_SCHEDULE_TIMEOUT); 1120 if (timeout < 0) { 1121 err = timeout; 1122 pr_err("%s: error waiting for request on %s, err=%d\n", 1123 __func__, engine->name, err); 1124 goto out_request; 1125 } 1126 1127 GEM_BUG_ON(!i915_request_completed(request[idx])); 1128 i915_request_put(request[idx]); 1129 request[idx] = NULL; 1130 idx++; 1131 } 1132 1133 err = igt_live_test_end(&t); 1134 1135 out_request: 1136 idx = 0; 1137 for_each_uabi_engine(engine, i915) { 1138 if (request[idx]) 1139 i915_request_put(request[idx]); 1140 idx++; 1141 } 1142 i915_vma_unpin(batch); 1143 i915_vma_put(batch); 1144 out_free: 1145 kfree(request); 1146 return err; 1147 } 1148 1149 static int live_sequential_engines(void *arg) 1150 { 1151 struct drm_i915_private *i915 = arg; 1152 const unsigned int nengines = num_uabi_engines(i915); 1153 struct i915_request **request; 1154 struct i915_request *prev = NULL; 1155 struct intel_engine_cs *engine; 1156 struct igt_live_test t; 1157 unsigned int idx; 1158 int err; 1159 1160 /* 1161 * Check we can submit requests to all engines sequentially, such 1162 * that each successive request waits for the earlier ones. This 1163 * tests that we don't execute requests out of order, even though 1164 * they are running on independent engines. 1165 */ 1166 1167 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 1168 if (!request) 1169 return -ENOMEM; 1170 1171 err = igt_live_test_begin(&t, i915, __func__, ""); 1172 if (err) 1173 goto out_free; 1174 1175 idx = 0; 1176 for_each_uabi_engine(engine, i915) { 1177 struct i915_vma *batch; 1178 1179 batch = recursive_batch(i915); 1180 if (IS_ERR(batch)) { 1181 err = PTR_ERR(batch); 1182 pr_err("%s: Unable to create batch for %s, err=%d\n", 1183 __func__, engine->name, err); 1184 goto out_free; 1185 } 1186 1187 i915_vma_lock(batch); 1188 request[idx] = intel_engine_create_kernel_request(engine); 1189 if (IS_ERR(request[idx])) { 1190 err = PTR_ERR(request[idx]); 1191 pr_err("%s: Request allocation failed for %s with err=%d\n", 1192 __func__, engine->name, err); 1193 goto out_unlock; 1194 } 1195 1196 if (prev) { 1197 err = i915_request_await_dma_fence(request[idx], 1198 &prev->fence); 1199 if (err) { 1200 i915_request_add(request[idx]); 1201 pr_err("%s: Request await failed for %s with err=%d\n", 1202 __func__, engine->name, err); 1203 goto out_unlock; 1204 } 1205 } 1206 1207 err = i915_request_await_object(request[idx], 1208 batch->obj, false); 1209 if (err == 0) 1210 err = i915_vma_move_to_active(batch, request[idx], 0); 1211 GEM_BUG_ON(err); 1212 1213 err = engine->emit_bb_start(request[idx], 1214 batch->node.start, 1215 batch->node.size, 1216 0); 1217 GEM_BUG_ON(err); 1218 request[idx]->batch = batch; 1219 1220 i915_request_get(request[idx]); 1221 i915_request_add(request[idx]); 1222 1223 prev = request[idx]; 1224 idx++; 1225 1226 out_unlock: 1227 i915_vma_unlock(batch); 1228 if (err) 1229 goto out_request; 1230 } 1231 1232 idx = 0; 1233 for_each_uabi_engine(engine, i915) { 1234 long timeout; 1235 1236 if (i915_request_completed(request[idx])) { 1237 pr_err("%s(%s): request completed too early!\n", 1238 __func__, engine->name); 1239 err = -EINVAL; 1240 goto out_request; 1241 } 1242 1243 err = recursive_batch_resolve(request[idx]->batch); 1244 if (err) { 1245 pr_err("%s: failed to resolve batch, err=%d\n", 1246 __func__, err); 1247 goto out_request; 1248 } 1249 1250 timeout = i915_request_wait(request[idx], 0, 1251 MAX_SCHEDULE_TIMEOUT); 1252 if (timeout < 0) { 1253 err = timeout; 1254 pr_err("%s: error waiting for request on %s, err=%d\n", 1255 __func__, engine->name, err); 1256 goto out_request; 1257 } 1258 1259 GEM_BUG_ON(!i915_request_completed(request[idx])); 1260 idx++; 1261 } 1262 1263 err = igt_live_test_end(&t); 1264 1265 out_request: 1266 idx = 0; 1267 for_each_uabi_engine(engine, i915) { 1268 u32 *cmd; 1269 1270 if (!request[idx]) 1271 break; 1272 1273 cmd = i915_gem_object_pin_map_unlocked(request[idx]->batch->obj, 1274 I915_MAP_WC); 1275 if (!IS_ERR(cmd)) { 1276 *cmd = MI_BATCH_BUFFER_END; 1277 1278 __i915_gem_object_flush_map(request[idx]->batch->obj, 1279 0, sizeof(*cmd)); 1280 i915_gem_object_unpin_map(request[idx]->batch->obj); 1281 1282 intel_gt_chipset_flush(engine->gt); 1283 } 1284 1285 i915_vma_put(request[idx]->batch); 1286 i915_request_put(request[idx]); 1287 idx++; 1288 } 1289 out_free: 1290 kfree(request); 1291 return err; 1292 } 1293 1294 static int __live_parallel_engine1(void *arg) 1295 { 1296 struct intel_engine_cs *engine = arg; 1297 IGT_TIMEOUT(end_time); 1298 unsigned long count; 1299 int err = 0; 1300 1301 count = 0; 1302 intel_engine_pm_get(engine); 1303 do { 1304 struct i915_request *rq; 1305 1306 rq = i915_request_create(engine->kernel_context); 1307 if (IS_ERR(rq)) { 1308 err = PTR_ERR(rq); 1309 break; 1310 } 1311 1312 i915_request_get(rq); 1313 i915_request_add(rq); 1314 1315 err = 0; 1316 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1317 err = -ETIME; 1318 i915_request_put(rq); 1319 if (err) 1320 break; 1321 1322 count++; 1323 } while (!__igt_timeout(end_time, NULL)); 1324 intel_engine_pm_put(engine); 1325 1326 pr_info("%s: %lu request + sync\n", engine->name, count); 1327 return err; 1328 } 1329 1330 static int __live_parallel_engineN(void *arg) 1331 { 1332 struct intel_engine_cs *engine = arg; 1333 IGT_TIMEOUT(end_time); 1334 unsigned long count; 1335 int err = 0; 1336 1337 count = 0; 1338 intel_engine_pm_get(engine); 1339 do { 1340 struct i915_request *rq; 1341 1342 rq = i915_request_create(engine->kernel_context); 1343 if (IS_ERR(rq)) { 1344 err = PTR_ERR(rq); 1345 break; 1346 } 1347 1348 i915_request_add(rq); 1349 count++; 1350 } while (!__igt_timeout(end_time, NULL)); 1351 intel_engine_pm_put(engine); 1352 1353 pr_info("%s: %lu requests\n", engine->name, count); 1354 return err; 1355 } 1356 1357 static bool wake_all(struct drm_i915_private *i915) 1358 { 1359 if (atomic_dec_and_test(&i915->selftest.counter)) { 1360 wake_up_var(&i915->selftest.counter); 1361 return true; 1362 } 1363 1364 return false; 1365 } 1366 1367 static int wait_for_all(struct drm_i915_private *i915) 1368 { 1369 if (wake_all(i915)) 1370 return 0; 1371 1372 if (wait_var_event_timeout(&i915->selftest.counter, 1373 !atomic_read(&i915->selftest.counter), 1374 i915_selftest.timeout_jiffies)) 1375 return 0; 1376 1377 return -ETIME; 1378 } 1379 1380 static int __live_parallel_spin(void *arg) 1381 { 1382 struct intel_engine_cs *engine = arg; 1383 struct igt_spinner spin; 1384 struct i915_request *rq; 1385 int err = 0; 1386 1387 /* 1388 * Create a spinner running for eternity on each engine. If a second 1389 * spinner is incorrectly placed on the same engine, it will not be 1390 * able to start in time. 1391 */ 1392 1393 if (igt_spinner_init(&spin, engine->gt)) { 1394 wake_all(engine->i915); 1395 return -ENOMEM; 1396 } 1397 1398 intel_engine_pm_get(engine); 1399 rq = igt_spinner_create_request(&spin, 1400 engine->kernel_context, 1401 MI_NOOP); /* no preemption */ 1402 intel_engine_pm_put(engine); 1403 if (IS_ERR(rq)) { 1404 err = PTR_ERR(rq); 1405 if (err == -ENODEV) 1406 err = 0; 1407 wake_all(engine->i915); 1408 goto out_spin; 1409 } 1410 1411 i915_request_get(rq); 1412 i915_request_add(rq); 1413 if (igt_wait_for_spinner(&spin, rq)) { 1414 /* Occupy this engine for the whole test */ 1415 err = wait_for_all(engine->i915); 1416 } else { 1417 pr_err("Failed to start spinner on %s\n", engine->name); 1418 err = -EINVAL; 1419 } 1420 igt_spinner_end(&spin); 1421 1422 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) 1423 err = -EIO; 1424 i915_request_put(rq); 1425 1426 out_spin: 1427 igt_spinner_fini(&spin); 1428 return err; 1429 } 1430 1431 static int live_parallel_engines(void *arg) 1432 { 1433 struct drm_i915_private *i915 = arg; 1434 static int (* const func[])(void *arg) = { 1435 __live_parallel_engine1, 1436 __live_parallel_engineN, 1437 __live_parallel_spin, 1438 NULL, 1439 }; 1440 const unsigned int nengines = num_uabi_engines(i915); 1441 struct intel_engine_cs *engine; 1442 int (* const *fn)(void *arg); 1443 struct task_struct **tsk; 1444 int err = 0; 1445 1446 /* 1447 * Check we can submit requests to all engines concurrently. This 1448 * tests that we load up the system maximally. 1449 */ 1450 1451 tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); 1452 if (!tsk) 1453 return -ENOMEM; 1454 1455 for (fn = func; !err && *fn; fn++) { 1456 char name[KSYM_NAME_LEN]; 1457 struct igt_live_test t; 1458 unsigned int idx; 1459 1460 snprintf(name, sizeof(name), "%ps", *fn); 1461 err = igt_live_test_begin(&t, i915, __func__, name); 1462 if (err) 1463 break; 1464 1465 atomic_set(&i915->selftest.counter, nengines); 1466 1467 idx = 0; 1468 for_each_uabi_engine(engine, i915) { 1469 tsk[idx] = kthread_run(*fn, engine, 1470 "igt/parallel:%s", 1471 engine->name); 1472 if (IS_ERR(tsk[idx])) { 1473 err = PTR_ERR(tsk[idx]); 1474 break; 1475 } 1476 get_task_struct(tsk[idx++]); 1477 } 1478 1479 yield(); /* start all threads before we kthread_stop() */ 1480 1481 idx = 0; 1482 for_each_uabi_engine(engine, i915) { 1483 int status; 1484 1485 if (IS_ERR(tsk[idx])) 1486 break; 1487 1488 status = kthread_stop(tsk[idx]); 1489 if (status && !err) 1490 err = status; 1491 1492 put_task_struct(tsk[idx++]); 1493 } 1494 1495 if (igt_live_test_end(&t)) 1496 err = -EIO; 1497 } 1498 1499 kfree(tsk); 1500 return err; 1501 } 1502 1503 static int 1504 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1505 { 1506 struct i915_request *rq; 1507 int ret; 1508 1509 /* 1510 * Before execlists, all contexts share the same ringbuffer. With 1511 * execlists, each context/engine has a separate ringbuffer and 1512 * for the purposes of this test, inexhaustible. 1513 * 1514 * For the global ringbuffer though, we have to be very careful 1515 * that we do not wrap while preventing the execution of requests 1516 * with a unsignaled fence. 1517 */ 1518 if (HAS_EXECLISTS(ctx->i915)) 1519 return INT_MAX; 1520 1521 rq = igt_request_alloc(ctx, engine); 1522 if (IS_ERR(rq)) { 1523 ret = PTR_ERR(rq); 1524 } else { 1525 int sz; 1526 1527 ret = rq->ring->size - rq->reserved_space; 1528 i915_request_add(rq); 1529 1530 sz = rq->ring->emit - rq->head; 1531 if (sz < 0) 1532 sz += rq->ring->size; 1533 ret /= sz; 1534 ret /= 2; /* leave half spare, in case of emergency! */ 1535 } 1536 1537 return ret; 1538 } 1539 1540 static int live_breadcrumbs_smoketest(void *arg) 1541 { 1542 struct drm_i915_private *i915 = arg; 1543 const unsigned int nengines = num_uabi_engines(i915); 1544 const unsigned int ncpus = num_online_cpus(); 1545 unsigned long num_waits, num_fences; 1546 struct intel_engine_cs *engine; 1547 struct task_struct **threads; 1548 struct igt_live_test live; 1549 intel_wakeref_t wakeref; 1550 struct smoketest *smoke; 1551 unsigned int n, idx; 1552 struct file *file; 1553 int ret = 0; 1554 1555 /* 1556 * Smoketest our breadcrumb/signal handling for requests across multiple 1557 * threads. A very simple test to only catch the most egregious of bugs. 1558 * See __igt_breadcrumbs_smoketest(); 1559 * 1560 * On real hardware this time. 1561 */ 1562 1563 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1564 1565 file = mock_file(i915); 1566 if (IS_ERR(file)) { 1567 ret = PTR_ERR(file); 1568 goto out_rpm; 1569 } 1570 1571 smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL); 1572 if (!smoke) { 1573 ret = -ENOMEM; 1574 goto out_file; 1575 } 1576 1577 threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL); 1578 if (!threads) { 1579 ret = -ENOMEM; 1580 goto out_smoke; 1581 } 1582 1583 smoke[0].request_alloc = __live_request_alloc; 1584 smoke[0].ncontexts = 64; 1585 smoke[0].contexts = kcalloc(smoke[0].ncontexts, 1586 sizeof(*smoke[0].contexts), 1587 GFP_KERNEL); 1588 if (!smoke[0].contexts) { 1589 ret = -ENOMEM; 1590 goto out_threads; 1591 } 1592 1593 for (n = 0; n < smoke[0].ncontexts; n++) { 1594 smoke[0].contexts[n] = live_context(i915, file); 1595 if (IS_ERR(smoke[0].contexts[n])) { 1596 ret = PTR_ERR(smoke[0].contexts[n]); 1597 goto out_contexts; 1598 } 1599 } 1600 1601 ret = igt_live_test_begin(&live, i915, __func__, ""); 1602 if (ret) 1603 goto out_contexts; 1604 1605 idx = 0; 1606 for_each_uabi_engine(engine, i915) { 1607 smoke[idx] = smoke[0]; 1608 smoke[idx].engine = engine; 1609 smoke[idx].max_batch = 1610 max_batches(smoke[0].contexts[0], engine); 1611 if (smoke[idx].max_batch < 0) { 1612 ret = smoke[idx].max_batch; 1613 goto out_flush; 1614 } 1615 /* One ring interleaved between requests from all cpus */ 1616 smoke[idx].max_batch /= num_online_cpus() + 1; 1617 pr_debug("Limiting batches to %d requests on %s\n", 1618 smoke[idx].max_batch, engine->name); 1619 1620 for (n = 0; n < ncpus; n++) { 1621 struct task_struct *tsk; 1622 1623 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1624 &smoke[idx], "igt/%d.%d", idx, n); 1625 if (IS_ERR(tsk)) { 1626 ret = PTR_ERR(tsk); 1627 goto out_flush; 1628 } 1629 1630 get_task_struct(tsk); 1631 threads[idx * ncpus + n] = tsk; 1632 } 1633 1634 idx++; 1635 } 1636 1637 yield(); /* start all threads before we begin */ 1638 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1639 1640 out_flush: 1641 idx = 0; 1642 num_waits = 0; 1643 num_fences = 0; 1644 for_each_uabi_engine(engine, i915) { 1645 for (n = 0; n < ncpus; n++) { 1646 struct task_struct *tsk = threads[idx * ncpus + n]; 1647 int err; 1648 1649 if (!tsk) 1650 continue; 1651 1652 err = kthread_stop(tsk); 1653 if (err < 0 && !ret) 1654 ret = err; 1655 1656 put_task_struct(tsk); 1657 } 1658 1659 num_waits += atomic_long_read(&smoke[idx].num_waits); 1660 num_fences += atomic_long_read(&smoke[idx].num_fences); 1661 idx++; 1662 } 1663 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1664 num_waits, num_fences, idx, ncpus); 1665 1666 ret = igt_live_test_end(&live) ?: ret; 1667 out_contexts: 1668 kfree(smoke[0].contexts); 1669 out_threads: 1670 kfree(threads); 1671 out_smoke: 1672 kfree(smoke); 1673 out_file: 1674 fput(file); 1675 out_rpm: 1676 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1677 1678 return ret; 1679 } 1680 1681 int i915_request_live_selftests(struct drm_i915_private *i915) 1682 { 1683 static const struct i915_subtest tests[] = { 1684 SUBTEST(live_nop_request), 1685 SUBTEST(live_all_engines), 1686 SUBTEST(live_sequential_engines), 1687 SUBTEST(live_parallel_engines), 1688 SUBTEST(live_empty_request), 1689 SUBTEST(live_cancel_request), 1690 SUBTEST(live_breadcrumbs_smoketest), 1691 }; 1692 1693 if (intel_gt_is_wedged(&i915->gt)) 1694 return 0; 1695 1696 return i915_subtests(tests, i915); 1697 } 1698 1699 static int switch_to_kernel_sync(struct intel_context *ce, int err) 1700 { 1701 struct i915_request *rq; 1702 struct dma_fence *fence; 1703 1704 rq = intel_engine_create_kernel_request(ce->engine); 1705 if (IS_ERR(rq)) 1706 return PTR_ERR(rq); 1707 1708 fence = i915_active_fence_get(&ce->timeline->last_request); 1709 if (fence) { 1710 i915_request_await_dma_fence(rq, fence); 1711 dma_fence_put(fence); 1712 } 1713 1714 rq = i915_request_get(rq); 1715 i915_request_add(rq); 1716 if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err) 1717 err = -ETIME; 1718 i915_request_put(rq); 1719 1720 while (!err && !intel_engine_is_idle(ce->engine)) 1721 intel_engine_flush_submission(ce->engine); 1722 1723 return err; 1724 } 1725 1726 struct perf_stats { 1727 struct intel_engine_cs *engine; 1728 unsigned long count; 1729 ktime_t time; 1730 ktime_t busy; 1731 u64 runtime; 1732 }; 1733 1734 struct perf_series { 1735 struct drm_i915_private *i915; 1736 unsigned int nengines; 1737 struct intel_context *ce[]; 1738 }; 1739 1740 static int cmp_u32(const void *A, const void *B) 1741 { 1742 const u32 *a = A, *b = B; 1743 1744 return *a - *b; 1745 } 1746 1747 static u32 trifilter(u32 *a) 1748 { 1749 u64 sum; 1750 1751 #define TF_COUNT 5 1752 sort(a, TF_COUNT, sizeof(*a), cmp_u32, NULL); 1753 1754 sum = mul_u32_u32(a[2], 2); 1755 sum += a[1]; 1756 sum += a[3]; 1757 1758 GEM_BUG_ON(sum > U32_MAX); 1759 return sum; 1760 #define TF_BIAS 2 1761 } 1762 1763 static u64 cycles_to_ns(struct intel_engine_cs *engine, u32 cycles) 1764 { 1765 u64 ns = intel_gt_clock_interval_to_ns(engine->gt, cycles); 1766 1767 return DIV_ROUND_CLOSEST(ns, 1 << TF_BIAS); 1768 } 1769 1770 static u32 *emit_timestamp_store(u32 *cs, struct intel_context *ce, u32 offset) 1771 { 1772 *cs++ = MI_STORE_REGISTER_MEM_GEN8 | MI_USE_GGTT; 1773 *cs++ = i915_mmio_reg_offset(RING_TIMESTAMP((ce->engine->mmio_base))); 1774 *cs++ = offset; 1775 *cs++ = 0; 1776 1777 return cs; 1778 } 1779 1780 static u32 *emit_store_dw(u32 *cs, u32 offset, u32 value) 1781 { 1782 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 1783 *cs++ = offset; 1784 *cs++ = 0; 1785 *cs++ = value; 1786 1787 return cs; 1788 } 1789 1790 static u32 *emit_semaphore_poll(u32 *cs, u32 mode, u32 value, u32 offset) 1791 { 1792 *cs++ = MI_SEMAPHORE_WAIT | 1793 MI_SEMAPHORE_GLOBAL_GTT | 1794 MI_SEMAPHORE_POLL | 1795 mode; 1796 *cs++ = value; 1797 *cs++ = offset; 1798 *cs++ = 0; 1799 1800 return cs; 1801 } 1802 1803 static u32 *emit_semaphore_poll_until(u32 *cs, u32 offset, u32 value) 1804 { 1805 return emit_semaphore_poll(cs, MI_SEMAPHORE_SAD_EQ_SDD, value, offset); 1806 } 1807 1808 static void semaphore_set(u32 *sema, u32 value) 1809 { 1810 WRITE_ONCE(*sema, value); 1811 wmb(); /* flush the update to the cache, and beyond */ 1812 } 1813 1814 static u32 *hwsp_scratch(const struct intel_context *ce) 1815 { 1816 return memset32(ce->engine->status_page.addr + 1000, 0, 21); 1817 } 1818 1819 static u32 hwsp_offset(const struct intel_context *ce, u32 *dw) 1820 { 1821 return (i915_ggtt_offset(ce->engine->status_page.vma) + 1822 offset_in_page(dw)); 1823 } 1824 1825 static int measure_semaphore_response(struct intel_context *ce) 1826 { 1827 u32 *sema = hwsp_scratch(ce); 1828 const u32 offset = hwsp_offset(ce, sema); 1829 u32 elapsed[TF_COUNT], cycles; 1830 struct i915_request *rq; 1831 u32 *cs; 1832 int err; 1833 int i; 1834 1835 /* 1836 * Measure how many cycles it takes for the HW to detect the change 1837 * in a semaphore value. 1838 * 1839 * A: read CS_TIMESTAMP from CPU 1840 * poke semaphore 1841 * B: read CS_TIMESTAMP on GPU 1842 * 1843 * Semaphore latency: B - A 1844 */ 1845 1846 semaphore_set(sema, -1); 1847 1848 rq = i915_request_create(ce); 1849 if (IS_ERR(rq)) 1850 return PTR_ERR(rq); 1851 1852 cs = intel_ring_begin(rq, 4 + 12 * ARRAY_SIZE(elapsed)); 1853 if (IS_ERR(cs)) { 1854 i915_request_add(rq); 1855 err = PTR_ERR(cs); 1856 goto err; 1857 } 1858 1859 cs = emit_store_dw(cs, offset, 0); 1860 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 1861 cs = emit_semaphore_poll_until(cs, offset, i); 1862 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); 1863 cs = emit_store_dw(cs, offset, 0); 1864 } 1865 1866 intel_ring_advance(rq, cs); 1867 i915_request_add(rq); 1868 1869 if (wait_for(READ_ONCE(*sema) == 0, 50)) { 1870 err = -EIO; 1871 goto err; 1872 } 1873 1874 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 1875 preempt_disable(); 1876 cycles = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); 1877 semaphore_set(sema, i); 1878 preempt_enable(); 1879 1880 if (wait_for(READ_ONCE(*sema) == 0, 50)) { 1881 err = -EIO; 1882 goto err; 1883 } 1884 1885 elapsed[i - 1] = sema[i] - cycles; 1886 } 1887 1888 cycles = trifilter(elapsed); 1889 pr_info("%s: semaphore response %d cycles, %lluns\n", 1890 ce->engine->name, cycles >> TF_BIAS, 1891 cycles_to_ns(ce->engine, cycles)); 1892 1893 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 1894 1895 err: 1896 intel_gt_set_wedged(ce->engine->gt); 1897 return err; 1898 } 1899 1900 static int measure_idle_dispatch(struct intel_context *ce) 1901 { 1902 u32 *sema = hwsp_scratch(ce); 1903 const u32 offset = hwsp_offset(ce, sema); 1904 u32 elapsed[TF_COUNT], cycles; 1905 u32 *cs; 1906 int err; 1907 int i; 1908 1909 /* 1910 * Measure how long it takes for us to submit a request while the 1911 * engine is idle, but is resting in our context. 1912 * 1913 * A: read CS_TIMESTAMP from CPU 1914 * submit request 1915 * B: read CS_TIMESTAMP on GPU 1916 * 1917 * Submission latency: B - A 1918 */ 1919 1920 for (i = 0; i < ARRAY_SIZE(elapsed); i++) { 1921 struct i915_request *rq; 1922 1923 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); 1924 if (err) 1925 return err; 1926 1927 rq = i915_request_create(ce); 1928 if (IS_ERR(rq)) { 1929 err = PTR_ERR(rq); 1930 goto err; 1931 } 1932 1933 cs = intel_ring_begin(rq, 4); 1934 if (IS_ERR(cs)) { 1935 i915_request_add(rq); 1936 err = PTR_ERR(cs); 1937 goto err; 1938 } 1939 1940 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); 1941 1942 intel_ring_advance(rq, cs); 1943 1944 preempt_disable(); 1945 local_bh_disable(); 1946 elapsed[i] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); 1947 i915_request_add(rq); 1948 local_bh_enable(); 1949 preempt_enable(); 1950 } 1951 1952 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); 1953 if (err) 1954 goto err; 1955 1956 for (i = 0; i < ARRAY_SIZE(elapsed); i++) 1957 elapsed[i] = sema[i] - elapsed[i]; 1958 1959 cycles = trifilter(elapsed); 1960 pr_info("%s: idle dispatch latency %d cycles, %lluns\n", 1961 ce->engine->name, cycles >> TF_BIAS, 1962 cycles_to_ns(ce->engine, cycles)); 1963 1964 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 1965 1966 err: 1967 intel_gt_set_wedged(ce->engine->gt); 1968 return err; 1969 } 1970 1971 static int measure_busy_dispatch(struct intel_context *ce) 1972 { 1973 u32 *sema = hwsp_scratch(ce); 1974 const u32 offset = hwsp_offset(ce, sema); 1975 u32 elapsed[TF_COUNT + 1], cycles; 1976 u32 *cs; 1977 int err; 1978 int i; 1979 1980 /* 1981 * Measure how long it takes for us to submit a request while the 1982 * engine is busy, polling on a semaphore in our context. With 1983 * direct submission, this will include the cost of a lite restore. 1984 * 1985 * A: read CS_TIMESTAMP from CPU 1986 * submit request 1987 * B: read CS_TIMESTAMP on GPU 1988 * 1989 * Submission latency: B - A 1990 */ 1991 1992 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 1993 struct i915_request *rq; 1994 1995 rq = i915_request_create(ce); 1996 if (IS_ERR(rq)) { 1997 err = PTR_ERR(rq); 1998 goto err; 1999 } 2000 2001 cs = intel_ring_begin(rq, 12); 2002 if (IS_ERR(cs)) { 2003 i915_request_add(rq); 2004 err = PTR_ERR(cs); 2005 goto err; 2006 } 2007 2008 cs = emit_store_dw(cs, offset + i * sizeof(u32), -1); 2009 cs = emit_semaphore_poll_until(cs, offset, i); 2010 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); 2011 2012 intel_ring_advance(rq, cs); 2013 2014 if (i > 1 && wait_for(READ_ONCE(sema[i - 1]), 500)) { 2015 err = -EIO; 2016 goto err; 2017 } 2018 2019 preempt_disable(); 2020 local_bh_disable(); 2021 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); 2022 i915_request_add(rq); 2023 local_bh_enable(); 2024 semaphore_set(sema, i - 1); 2025 preempt_enable(); 2026 } 2027 2028 wait_for(READ_ONCE(sema[i - 1]), 500); 2029 semaphore_set(sema, i - 1); 2030 2031 for (i = 1; i <= TF_COUNT; i++) { 2032 GEM_BUG_ON(sema[i] == -1); 2033 elapsed[i - 1] = sema[i] - elapsed[i]; 2034 } 2035 2036 cycles = trifilter(elapsed); 2037 pr_info("%s: busy dispatch latency %d cycles, %lluns\n", 2038 ce->engine->name, cycles >> TF_BIAS, 2039 cycles_to_ns(ce->engine, cycles)); 2040 2041 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 2042 2043 err: 2044 intel_gt_set_wedged(ce->engine->gt); 2045 return err; 2046 } 2047 2048 static int plug(struct intel_engine_cs *engine, u32 *sema, u32 mode, int value) 2049 { 2050 const u32 offset = 2051 i915_ggtt_offset(engine->status_page.vma) + 2052 offset_in_page(sema); 2053 struct i915_request *rq; 2054 u32 *cs; 2055 2056 rq = i915_request_create(engine->kernel_context); 2057 if (IS_ERR(rq)) 2058 return PTR_ERR(rq); 2059 2060 cs = intel_ring_begin(rq, 4); 2061 if (IS_ERR(cs)) { 2062 i915_request_add(rq); 2063 return PTR_ERR(cs); 2064 } 2065 2066 cs = emit_semaphore_poll(cs, mode, value, offset); 2067 2068 intel_ring_advance(rq, cs); 2069 i915_request_add(rq); 2070 2071 return 0; 2072 } 2073 2074 static int measure_inter_request(struct intel_context *ce) 2075 { 2076 u32 *sema = hwsp_scratch(ce); 2077 const u32 offset = hwsp_offset(ce, sema); 2078 u32 elapsed[TF_COUNT + 1], cycles; 2079 struct i915_sw_fence *submit; 2080 int i, err; 2081 2082 /* 2083 * Measure how long it takes to advance from one request into the 2084 * next. Between each request we flush the GPU caches to memory, 2085 * update the breadcrumbs, and then invalidate those caches. 2086 * We queue up all the requests to be submitted in one batch so 2087 * it should be one set of contiguous measurements. 2088 * 2089 * A: read CS_TIMESTAMP on GPU 2090 * advance request 2091 * B: read CS_TIMESTAMP on GPU 2092 * 2093 * Request latency: B - A 2094 */ 2095 2096 err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0); 2097 if (err) 2098 return err; 2099 2100 submit = heap_fence_create(GFP_KERNEL); 2101 if (!submit) { 2102 semaphore_set(sema, 1); 2103 return -ENOMEM; 2104 } 2105 2106 intel_engine_flush_submission(ce->engine); 2107 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 2108 struct i915_request *rq; 2109 u32 *cs; 2110 2111 rq = i915_request_create(ce); 2112 if (IS_ERR(rq)) { 2113 err = PTR_ERR(rq); 2114 goto err_submit; 2115 } 2116 2117 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 2118 submit, 2119 GFP_KERNEL); 2120 if (err < 0) { 2121 i915_request_add(rq); 2122 goto err_submit; 2123 } 2124 2125 cs = intel_ring_begin(rq, 4); 2126 if (IS_ERR(cs)) { 2127 i915_request_add(rq); 2128 err = PTR_ERR(cs); 2129 goto err_submit; 2130 } 2131 2132 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); 2133 2134 intel_ring_advance(rq, cs); 2135 i915_request_add(rq); 2136 } 2137 i915_sw_fence_commit(submit); 2138 intel_engine_flush_submission(ce->engine); 2139 heap_fence_put(submit); 2140 2141 semaphore_set(sema, 1); 2142 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); 2143 if (err) 2144 goto err; 2145 2146 for (i = 1; i <= TF_COUNT; i++) 2147 elapsed[i - 1] = sema[i + 1] - sema[i]; 2148 2149 cycles = trifilter(elapsed); 2150 pr_info("%s: inter-request latency %d cycles, %lluns\n", 2151 ce->engine->name, cycles >> TF_BIAS, 2152 cycles_to_ns(ce->engine, cycles)); 2153 2154 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 2155 2156 err_submit: 2157 i915_sw_fence_commit(submit); 2158 heap_fence_put(submit); 2159 semaphore_set(sema, 1); 2160 err: 2161 intel_gt_set_wedged(ce->engine->gt); 2162 return err; 2163 } 2164 2165 static int measure_context_switch(struct intel_context *ce) 2166 { 2167 u32 *sema = hwsp_scratch(ce); 2168 const u32 offset = hwsp_offset(ce, sema); 2169 struct i915_request *fence = NULL; 2170 u32 elapsed[TF_COUNT + 1], cycles; 2171 int i, j, err; 2172 u32 *cs; 2173 2174 /* 2175 * Measure how long it takes to advance from one request in one 2176 * context to a request in another context. This allows us to 2177 * measure how long the context save/restore take, along with all 2178 * the inter-context setup we require. 2179 * 2180 * A: read CS_TIMESTAMP on GPU 2181 * switch context 2182 * B: read CS_TIMESTAMP on GPU 2183 * 2184 * Context switch latency: B - A 2185 */ 2186 2187 err = plug(ce->engine, sema, MI_SEMAPHORE_SAD_NEQ_SDD, 0); 2188 if (err) 2189 return err; 2190 2191 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 2192 struct intel_context *arr[] = { 2193 ce, ce->engine->kernel_context 2194 }; 2195 u32 addr = offset + ARRAY_SIZE(arr) * i * sizeof(u32); 2196 2197 for (j = 0; j < ARRAY_SIZE(arr); j++) { 2198 struct i915_request *rq; 2199 2200 rq = i915_request_create(arr[j]); 2201 if (IS_ERR(rq)) { 2202 err = PTR_ERR(rq); 2203 goto err_fence; 2204 } 2205 2206 if (fence) { 2207 err = i915_request_await_dma_fence(rq, 2208 &fence->fence); 2209 if (err) { 2210 i915_request_add(rq); 2211 goto err_fence; 2212 } 2213 } 2214 2215 cs = intel_ring_begin(rq, 4); 2216 if (IS_ERR(cs)) { 2217 i915_request_add(rq); 2218 err = PTR_ERR(cs); 2219 goto err_fence; 2220 } 2221 2222 cs = emit_timestamp_store(cs, ce, addr); 2223 addr += sizeof(u32); 2224 2225 intel_ring_advance(rq, cs); 2226 2227 i915_request_put(fence); 2228 fence = i915_request_get(rq); 2229 2230 i915_request_add(rq); 2231 } 2232 } 2233 i915_request_put(fence); 2234 intel_engine_flush_submission(ce->engine); 2235 2236 semaphore_set(sema, 1); 2237 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); 2238 if (err) 2239 goto err; 2240 2241 for (i = 1; i <= TF_COUNT; i++) 2242 elapsed[i - 1] = sema[2 * i + 2] - sema[2 * i + 1]; 2243 2244 cycles = trifilter(elapsed); 2245 pr_info("%s: context switch latency %d cycles, %lluns\n", 2246 ce->engine->name, cycles >> TF_BIAS, 2247 cycles_to_ns(ce->engine, cycles)); 2248 2249 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 2250 2251 err_fence: 2252 i915_request_put(fence); 2253 semaphore_set(sema, 1); 2254 err: 2255 intel_gt_set_wedged(ce->engine->gt); 2256 return err; 2257 } 2258 2259 static int measure_preemption(struct intel_context *ce) 2260 { 2261 u32 *sema = hwsp_scratch(ce); 2262 const u32 offset = hwsp_offset(ce, sema); 2263 u32 elapsed[TF_COUNT], cycles; 2264 u32 *cs; 2265 int err; 2266 int i; 2267 2268 /* 2269 * We measure two latencies while triggering preemption. The first 2270 * latency is how long it takes for us to submit a preempting request. 2271 * The second latency is how it takes for us to return from the 2272 * preemption back to the original context. 2273 * 2274 * A: read CS_TIMESTAMP from CPU 2275 * submit preemption 2276 * B: read CS_TIMESTAMP on GPU (in preempting context) 2277 * context switch 2278 * C: read CS_TIMESTAMP on GPU (in original context) 2279 * 2280 * Preemption dispatch latency: B - A 2281 * Preemption switch latency: C - B 2282 */ 2283 2284 if (!intel_engine_has_preemption(ce->engine)) 2285 return 0; 2286 2287 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 2288 u32 addr = offset + 2 * i * sizeof(u32); 2289 struct i915_request *rq; 2290 2291 rq = i915_request_create(ce); 2292 if (IS_ERR(rq)) { 2293 err = PTR_ERR(rq); 2294 goto err; 2295 } 2296 2297 cs = intel_ring_begin(rq, 12); 2298 if (IS_ERR(cs)) { 2299 i915_request_add(rq); 2300 err = PTR_ERR(cs); 2301 goto err; 2302 } 2303 2304 cs = emit_store_dw(cs, addr, -1); 2305 cs = emit_semaphore_poll_until(cs, offset, i); 2306 cs = emit_timestamp_store(cs, ce, addr + sizeof(u32)); 2307 2308 intel_ring_advance(rq, cs); 2309 i915_request_add(rq); 2310 2311 if (wait_for(READ_ONCE(sema[2 * i]) == -1, 500)) { 2312 err = -EIO; 2313 goto err; 2314 } 2315 2316 rq = i915_request_create(ce->engine->kernel_context); 2317 if (IS_ERR(rq)) { 2318 err = PTR_ERR(rq); 2319 goto err; 2320 } 2321 2322 cs = intel_ring_begin(rq, 8); 2323 if (IS_ERR(cs)) { 2324 i915_request_add(rq); 2325 err = PTR_ERR(cs); 2326 goto err; 2327 } 2328 2329 cs = emit_timestamp_store(cs, ce, addr); 2330 cs = emit_store_dw(cs, offset, i); 2331 2332 intel_ring_advance(rq, cs); 2333 rq->sched.attr.priority = I915_PRIORITY_BARRIER; 2334 2335 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); 2336 i915_request_add(rq); 2337 } 2338 2339 if (wait_for(READ_ONCE(sema[2 * i - 2]) != -1, 500)) { 2340 err = -EIO; 2341 goto err; 2342 } 2343 2344 for (i = 1; i <= TF_COUNT; i++) 2345 elapsed[i - 1] = sema[2 * i + 0] - elapsed[i - 1]; 2346 2347 cycles = trifilter(elapsed); 2348 pr_info("%s: preemption dispatch latency %d cycles, %lluns\n", 2349 ce->engine->name, cycles >> TF_BIAS, 2350 cycles_to_ns(ce->engine, cycles)); 2351 2352 for (i = 1; i <= TF_COUNT; i++) 2353 elapsed[i - 1] = sema[2 * i + 1] - sema[2 * i + 0]; 2354 2355 cycles = trifilter(elapsed); 2356 pr_info("%s: preemption switch latency %d cycles, %lluns\n", 2357 ce->engine->name, cycles >> TF_BIAS, 2358 cycles_to_ns(ce->engine, cycles)); 2359 2360 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 2361 2362 err: 2363 intel_gt_set_wedged(ce->engine->gt); 2364 return err; 2365 } 2366 2367 struct signal_cb { 2368 struct dma_fence_cb base; 2369 bool seen; 2370 }; 2371 2372 static void signal_cb(struct dma_fence *fence, struct dma_fence_cb *cb) 2373 { 2374 struct signal_cb *s = container_of(cb, typeof(*s), base); 2375 2376 smp_store_mb(s->seen, true); /* be safe, be strong */ 2377 } 2378 2379 static int measure_completion(struct intel_context *ce) 2380 { 2381 u32 *sema = hwsp_scratch(ce); 2382 const u32 offset = hwsp_offset(ce, sema); 2383 u32 elapsed[TF_COUNT], cycles; 2384 u32 *cs; 2385 int err; 2386 int i; 2387 2388 /* 2389 * Measure how long it takes for the signal (interrupt) to be 2390 * sent from the GPU to be processed by the CPU. 2391 * 2392 * A: read CS_TIMESTAMP on GPU 2393 * signal 2394 * B: read CS_TIMESTAMP from CPU 2395 * 2396 * Completion latency: B - A 2397 */ 2398 2399 for (i = 1; i <= ARRAY_SIZE(elapsed); i++) { 2400 struct signal_cb cb = { .seen = false }; 2401 struct i915_request *rq; 2402 2403 rq = i915_request_create(ce); 2404 if (IS_ERR(rq)) { 2405 err = PTR_ERR(rq); 2406 goto err; 2407 } 2408 2409 cs = intel_ring_begin(rq, 12); 2410 if (IS_ERR(cs)) { 2411 i915_request_add(rq); 2412 err = PTR_ERR(cs); 2413 goto err; 2414 } 2415 2416 cs = emit_store_dw(cs, offset + i * sizeof(u32), -1); 2417 cs = emit_semaphore_poll_until(cs, offset, i); 2418 cs = emit_timestamp_store(cs, ce, offset + i * sizeof(u32)); 2419 2420 intel_ring_advance(rq, cs); 2421 2422 dma_fence_add_callback(&rq->fence, &cb.base, signal_cb); 2423 i915_request_add(rq); 2424 2425 intel_engine_flush_submission(ce->engine); 2426 if (wait_for(READ_ONCE(sema[i]) == -1, 50)) { 2427 err = -EIO; 2428 goto err; 2429 } 2430 2431 preempt_disable(); 2432 semaphore_set(sema, i); 2433 while (!READ_ONCE(cb.seen)) 2434 cpu_relax(); 2435 2436 elapsed[i - 1] = ENGINE_READ_FW(ce->engine, RING_TIMESTAMP); 2437 preempt_enable(); 2438 } 2439 2440 err = intel_gt_wait_for_idle(ce->engine->gt, HZ / 2); 2441 if (err) 2442 goto err; 2443 2444 for (i = 0; i < ARRAY_SIZE(elapsed); i++) { 2445 GEM_BUG_ON(sema[i + 1] == -1); 2446 elapsed[i] = elapsed[i] - sema[i + 1]; 2447 } 2448 2449 cycles = trifilter(elapsed); 2450 pr_info("%s: completion latency %d cycles, %lluns\n", 2451 ce->engine->name, cycles >> TF_BIAS, 2452 cycles_to_ns(ce->engine, cycles)); 2453 2454 return intel_gt_wait_for_idle(ce->engine->gt, HZ); 2455 2456 err: 2457 intel_gt_set_wedged(ce->engine->gt); 2458 return err; 2459 } 2460 2461 static void rps_pin(struct intel_gt *gt) 2462 { 2463 /* Pin the frequency to max */ 2464 atomic_inc(>->rps.num_waiters); 2465 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 2466 2467 mutex_lock(>->rps.lock); 2468 intel_rps_set(>->rps, gt->rps.max_freq); 2469 mutex_unlock(>->rps.lock); 2470 } 2471 2472 static void rps_unpin(struct intel_gt *gt) 2473 { 2474 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 2475 atomic_dec(>->rps.num_waiters); 2476 } 2477 2478 static int perf_request_latency(void *arg) 2479 { 2480 struct drm_i915_private *i915 = arg; 2481 struct intel_engine_cs *engine; 2482 struct pm_qos_request qos; 2483 int err = 0; 2484 2485 if (GRAPHICS_VER(i915) < 8) /* per-engine CS timestamp, semaphores */ 2486 return 0; 2487 2488 cpu_latency_qos_add_request(&qos, 0); /* disable cstates */ 2489 2490 for_each_uabi_engine(engine, i915) { 2491 struct intel_context *ce; 2492 2493 ce = intel_context_create(engine); 2494 if (IS_ERR(ce)) { 2495 err = PTR_ERR(ce); 2496 goto out; 2497 } 2498 2499 err = intel_context_pin(ce); 2500 if (err) { 2501 intel_context_put(ce); 2502 goto out; 2503 } 2504 2505 st_engine_heartbeat_disable(engine); 2506 rps_pin(engine->gt); 2507 2508 if (err == 0) 2509 err = measure_semaphore_response(ce); 2510 if (err == 0) 2511 err = measure_idle_dispatch(ce); 2512 if (err == 0) 2513 err = measure_busy_dispatch(ce); 2514 if (err == 0) 2515 err = measure_inter_request(ce); 2516 if (err == 0) 2517 err = measure_context_switch(ce); 2518 if (err == 0) 2519 err = measure_preemption(ce); 2520 if (err == 0) 2521 err = measure_completion(ce); 2522 2523 rps_unpin(engine->gt); 2524 st_engine_heartbeat_enable(engine); 2525 2526 intel_context_unpin(ce); 2527 intel_context_put(ce); 2528 if (err) 2529 goto out; 2530 } 2531 2532 out: 2533 if (igt_flush_test(i915)) 2534 err = -EIO; 2535 2536 cpu_latency_qos_remove_request(&qos); 2537 return err; 2538 } 2539 2540 static int s_sync0(void *arg) 2541 { 2542 struct perf_series *ps = arg; 2543 IGT_TIMEOUT(end_time); 2544 unsigned int idx = 0; 2545 int err = 0; 2546 2547 GEM_BUG_ON(!ps->nengines); 2548 do { 2549 struct i915_request *rq; 2550 2551 rq = i915_request_create(ps->ce[idx]); 2552 if (IS_ERR(rq)) { 2553 err = PTR_ERR(rq); 2554 break; 2555 } 2556 2557 i915_request_get(rq); 2558 i915_request_add(rq); 2559 2560 if (i915_request_wait(rq, 0, HZ / 5) < 0) 2561 err = -ETIME; 2562 i915_request_put(rq); 2563 if (err) 2564 break; 2565 2566 if (++idx == ps->nengines) 2567 idx = 0; 2568 } while (!__igt_timeout(end_time, NULL)); 2569 2570 return err; 2571 } 2572 2573 static int s_sync1(void *arg) 2574 { 2575 struct perf_series *ps = arg; 2576 struct i915_request *prev = NULL; 2577 IGT_TIMEOUT(end_time); 2578 unsigned int idx = 0; 2579 int err = 0; 2580 2581 GEM_BUG_ON(!ps->nengines); 2582 do { 2583 struct i915_request *rq; 2584 2585 rq = i915_request_create(ps->ce[idx]); 2586 if (IS_ERR(rq)) { 2587 err = PTR_ERR(rq); 2588 break; 2589 } 2590 2591 i915_request_get(rq); 2592 i915_request_add(rq); 2593 2594 if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) 2595 err = -ETIME; 2596 i915_request_put(prev); 2597 prev = rq; 2598 if (err) 2599 break; 2600 2601 if (++idx == ps->nengines) 2602 idx = 0; 2603 } while (!__igt_timeout(end_time, NULL)); 2604 i915_request_put(prev); 2605 2606 return err; 2607 } 2608 2609 static int s_many(void *arg) 2610 { 2611 struct perf_series *ps = arg; 2612 IGT_TIMEOUT(end_time); 2613 unsigned int idx = 0; 2614 2615 GEM_BUG_ON(!ps->nengines); 2616 do { 2617 struct i915_request *rq; 2618 2619 rq = i915_request_create(ps->ce[idx]); 2620 if (IS_ERR(rq)) 2621 return PTR_ERR(rq); 2622 2623 i915_request_add(rq); 2624 2625 if (++idx == ps->nengines) 2626 idx = 0; 2627 } while (!__igt_timeout(end_time, NULL)); 2628 2629 return 0; 2630 } 2631 2632 static int perf_series_engines(void *arg) 2633 { 2634 struct drm_i915_private *i915 = arg; 2635 static int (* const func[])(void *arg) = { 2636 s_sync0, 2637 s_sync1, 2638 s_many, 2639 NULL, 2640 }; 2641 const unsigned int nengines = num_uabi_engines(i915); 2642 struct intel_engine_cs *engine; 2643 int (* const *fn)(void *arg); 2644 struct pm_qos_request qos; 2645 struct perf_stats *stats; 2646 struct perf_series *ps; 2647 unsigned int idx; 2648 int err = 0; 2649 2650 stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL); 2651 if (!stats) 2652 return -ENOMEM; 2653 2654 ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL); 2655 if (!ps) { 2656 kfree(stats); 2657 return -ENOMEM; 2658 } 2659 2660 cpu_latency_qos_add_request(&qos, 0); /* disable cstates */ 2661 2662 ps->i915 = i915; 2663 ps->nengines = nengines; 2664 2665 idx = 0; 2666 for_each_uabi_engine(engine, i915) { 2667 struct intel_context *ce; 2668 2669 ce = intel_context_create(engine); 2670 if (IS_ERR(ce)) { 2671 err = PTR_ERR(ce); 2672 goto out; 2673 } 2674 2675 err = intel_context_pin(ce); 2676 if (err) { 2677 intel_context_put(ce); 2678 goto out; 2679 } 2680 2681 ps->ce[idx++] = ce; 2682 } 2683 GEM_BUG_ON(idx != ps->nengines); 2684 2685 for (fn = func; *fn && !err; fn++) { 2686 char name[KSYM_NAME_LEN]; 2687 struct igt_live_test t; 2688 2689 snprintf(name, sizeof(name), "%ps", *fn); 2690 err = igt_live_test_begin(&t, i915, __func__, name); 2691 if (err) 2692 break; 2693 2694 for (idx = 0; idx < nengines; idx++) { 2695 struct perf_stats *p = 2696 memset(&stats[idx], 0, sizeof(stats[idx])); 2697 struct intel_context *ce = ps->ce[idx]; 2698 2699 p->engine = ps->ce[idx]->engine; 2700 intel_engine_pm_get(p->engine); 2701 2702 if (intel_engine_supports_stats(p->engine)) 2703 p->busy = intel_engine_get_busy_time(p->engine, 2704 &p->time) + 1; 2705 else 2706 p->time = ktime_get(); 2707 p->runtime = -intel_context_get_total_runtime_ns(ce); 2708 } 2709 2710 err = (*fn)(ps); 2711 if (igt_live_test_end(&t)) 2712 err = -EIO; 2713 2714 for (idx = 0; idx < nengines; idx++) { 2715 struct perf_stats *p = &stats[idx]; 2716 struct intel_context *ce = ps->ce[idx]; 2717 int integer, decimal; 2718 u64 busy, dt, now; 2719 2720 if (p->busy) 2721 p->busy = ktime_sub(intel_engine_get_busy_time(p->engine, 2722 &now), 2723 p->busy - 1); 2724 else 2725 now = ktime_get(); 2726 p->time = ktime_sub(now, p->time); 2727 2728 err = switch_to_kernel_sync(ce, err); 2729 p->runtime += intel_context_get_total_runtime_ns(ce); 2730 intel_engine_pm_put(p->engine); 2731 2732 busy = 100 * ktime_to_ns(p->busy); 2733 dt = ktime_to_ns(p->time); 2734 if (dt) { 2735 integer = div64_u64(busy, dt); 2736 busy -= integer * dt; 2737 decimal = div64_u64(100 * busy, dt); 2738 } else { 2739 integer = 0; 2740 decimal = 0; 2741 } 2742 2743 pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", 2744 name, p->engine->name, ce->timeline->seqno, 2745 integer, decimal, 2746 div_u64(p->runtime, 1000 * 1000), 2747 div_u64(ktime_to_ns(p->time), 1000 * 1000)); 2748 } 2749 } 2750 2751 out: 2752 for (idx = 0; idx < nengines; idx++) { 2753 if (IS_ERR_OR_NULL(ps->ce[idx])) 2754 break; 2755 2756 intel_context_unpin(ps->ce[idx]); 2757 intel_context_put(ps->ce[idx]); 2758 } 2759 kfree(ps); 2760 2761 cpu_latency_qos_remove_request(&qos); 2762 kfree(stats); 2763 return err; 2764 } 2765 2766 static int p_sync0(void *arg) 2767 { 2768 struct perf_stats *p = arg; 2769 struct intel_engine_cs *engine = p->engine; 2770 struct intel_context *ce; 2771 IGT_TIMEOUT(end_time); 2772 unsigned long count; 2773 bool busy; 2774 int err = 0; 2775 2776 ce = intel_context_create(engine); 2777 if (IS_ERR(ce)) 2778 return PTR_ERR(ce); 2779 2780 err = intel_context_pin(ce); 2781 if (err) { 2782 intel_context_put(ce); 2783 return err; 2784 } 2785 2786 if (intel_engine_supports_stats(engine)) { 2787 p->busy = intel_engine_get_busy_time(engine, &p->time); 2788 busy = true; 2789 } else { 2790 p->time = ktime_get(); 2791 busy = false; 2792 } 2793 2794 count = 0; 2795 do { 2796 struct i915_request *rq; 2797 2798 rq = i915_request_create(ce); 2799 if (IS_ERR(rq)) { 2800 err = PTR_ERR(rq); 2801 break; 2802 } 2803 2804 i915_request_get(rq); 2805 i915_request_add(rq); 2806 2807 err = 0; 2808 if (i915_request_wait(rq, 0, HZ / 5) < 0) 2809 err = -ETIME; 2810 i915_request_put(rq); 2811 if (err) 2812 break; 2813 2814 count++; 2815 } while (!__igt_timeout(end_time, NULL)); 2816 2817 if (busy) { 2818 ktime_t now; 2819 2820 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), 2821 p->busy); 2822 p->time = ktime_sub(now, p->time); 2823 } else { 2824 p->time = ktime_sub(ktime_get(), p->time); 2825 } 2826 2827 err = switch_to_kernel_sync(ce, err); 2828 p->runtime = intel_context_get_total_runtime_ns(ce); 2829 p->count = count; 2830 2831 intel_context_unpin(ce); 2832 intel_context_put(ce); 2833 return err; 2834 } 2835 2836 static int p_sync1(void *arg) 2837 { 2838 struct perf_stats *p = arg; 2839 struct intel_engine_cs *engine = p->engine; 2840 struct i915_request *prev = NULL; 2841 struct intel_context *ce; 2842 IGT_TIMEOUT(end_time); 2843 unsigned long count; 2844 bool busy; 2845 int err = 0; 2846 2847 ce = intel_context_create(engine); 2848 if (IS_ERR(ce)) 2849 return PTR_ERR(ce); 2850 2851 err = intel_context_pin(ce); 2852 if (err) { 2853 intel_context_put(ce); 2854 return err; 2855 } 2856 2857 if (intel_engine_supports_stats(engine)) { 2858 p->busy = intel_engine_get_busy_time(engine, &p->time); 2859 busy = true; 2860 } else { 2861 p->time = ktime_get(); 2862 busy = false; 2863 } 2864 2865 count = 0; 2866 do { 2867 struct i915_request *rq; 2868 2869 rq = i915_request_create(ce); 2870 if (IS_ERR(rq)) { 2871 err = PTR_ERR(rq); 2872 break; 2873 } 2874 2875 i915_request_get(rq); 2876 i915_request_add(rq); 2877 2878 err = 0; 2879 if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) 2880 err = -ETIME; 2881 i915_request_put(prev); 2882 prev = rq; 2883 if (err) 2884 break; 2885 2886 count++; 2887 } while (!__igt_timeout(end_time, NULL)); 2888 i915_request_put(prev); 2889 2890 if (busy) { 2891 ktime_t now; 2892 2893 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), 2894 p->busy); 2895 p->time = ktime_sub(now, p->time); 2896 } else { 2897 p->time = ktime_sub(ktime_get(), p->time); 2898 } 2899 2900 err = switch_to_kernel_sync(ce, err); 2901 p->runtime = intel_context_get_total_runtime_ns(ce); 2902 p->count = count; 2903 2904 intel_context_unpin(ce); 2905 intel_context_put(ce); 2906 return err; 2907 } 2908 2909 static int p_many(void *arg) 2910 { 2911 struct perf_stats *p = arg; 2912 struct intel_engine_cs *engine = p->engine; 2913 struct intel_context *ce; 2914 IGT_TIMEOUT(end_time); 2915 unsigned long count; 2916 int err = 0; 2917 bool busy; 2918 2919 ce = intel_context_create(engine); 2920 if (IS_ERR(ce)) 2921 return PTR_ERR(ce); 2922 2923 err = intel_context_pin(ce); 2924 if (err) { 2925 intel_context_put(ce); 2926 return err; 2927 } 2928 2929 if (intel_engine_supports_stats(engine)) { 2930 p->busy = intel_engine_get_busy_time(engine, &p->time); 2931 busy = true; 2932 } else { 2933 p->time = ktime_get(); 2934 busy = false; 2935 } 2936 2937 count = 0; 2938 do { 2939 struct i915_request *rq; 2940 2941 rq = i915_request_create(ce); 2942 if (IS_ERR(rq)) { 2943 err = PTR_ERR(rq); 2944 break; 2945 } 2946 2947 i915_request_add(rq); 2948 count++; 2949 } while (!__igt_timeout(end_time, NULL)); 2950 2951 if (busy) { 2952 ktime_t now; 2953 2954 p->busy = ktime_sub(intel_engine_get_busy_time(engine, &now), 2955 p->busy); 2956 p->time = ktime_sub(now, p->time); 2957 } else { 2958 p->time = ktime_sub(ktime_get(), p->time); 2959 } 2960 2961 err = switch_to_kernel_sync(ce, err); 2962 p->runtime = intel_context_get_total_runtime_ns(ce); 2963 p->count = count; 2964 2965 intel_context_unpin(ce); 2966 intel_context_put(ce); 2967 return err; 2968 } 2969 2970 static int perf_parallel_engines(void *arg) 2971 { 2972 struct drm_i915_private *i915 = arg; 2973 static int (* const func[])(void *arg) = { 2974 p_sync0, 2975 p_sync1, 2976 p_many, 2977 NULL, 2978 }; 2979 const unsigned int nengines = num_uabi_engines(i915); 2980 struct intel_engine_cs *engine; 2981 int (* const *fn)(void *arg); 2982 struct pm_qos_request qos; 2983 struct { 2984 struct perf_stats p; 2985 struct task_struct *tsk; 2986 } *engines; 2987 int err = 0; 2988 2989 engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL); 2990 if (!engines) 2991 return -ENOMEM; 2992 2993 cpu_latency_qos_add_request(&qos, 0); 2994 2995 for (fn = func; *fn; fn++) { 2996 char name[KSYM_NAME_LEN]; 2997 struct igt_live_test t; 2998 unsigned int idx; 2999 3000 snprintf(name, sizeof(name), "%ps", *fn); 3001 err = igt_live_test_begin(&t, i915, __func__, name); 3002 if (err) 3003 break; 3004 3005 atomic_set(&i915->selftest.counter, nengines); 3006 3007 idx = 0; 3008 for_each_uabi_engine(engine, i915) { 3009 intel_engine_pm_get(engine); 3010 3011 memset(&engines[idx].p, 0, sizeof(engines[idx].p)); 3012 engines[idx].p.engine = engine; 3013 3014 engines[idx].tsk = kthread_run(*fn, &engines[idx].p, 3015 "igt:%s", engine->name); 3016 if (IS_ERR(engines[idx].tsk)) { 3017 err = PTR_ERR(engines[idx].tsk); 3018 intel_engine_pm_put(engine); 3019 break; 3020 } 3021 get_task_struct(engines[idx++].tsk); 3022 } 3023 3024 yield(); /* start all threads before we kthread_stop() */ 3025 3026 idx = 0; 3027 for_each_uabi_engine(engine, i915) { 3028 int status; 3029 3030 if (IS_ERR(engines[idx].tsk)) 3031 break; 3032 3033 status = kthread_stop(engines[idx].tsk); 3034 if (status && !err) 3035 err = status; 3036 3037 intel_engine_pm_put(engine); 3038 put_task_struct(engines[idx++].tsk); 3039 } 3040 3041 if (igt_live_test_end(&t)) 3042 err = -EIO; 3043 if (err) 3044 break; 3045 3046 idx = 0; 3047 for_each_uabi_engine(engine, i915) { 3048 struct perf_stats *p = &engines[idx].p; 3049 u64 busy = 100 * ktime_to_ns(p->busy); 3050 u64 dt = ktime_to_ns(p->time); 3051 int integer, decimal; 3052 3053 if (dt) { 3054 integer = div64_u64(busy, dt); 3055 busy -= integer * dt; 3056 decimal = div64_u64(100 * busy, dt); 3057 } else { 3058 integer = 0; 3059 decimal = 0; 3060 } 3061 3062 GEM_BUG_ON(engine != p->engine); 3063 pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", 3064 name, engine->name, p->count, integer, decimal, 3065 div_u64(p->runtime, 1000 * 1000), 3066 div_u64(ktime_to_ns(p->time), 1000 * 1000)); 3067 idx++; 3068 } 3069 } 3070 3071 cpu_latency_qos_remove_request(&qos); 3072 kfree(engines); 3073 return err; 3074 } 3075 3076 int i915_request_perf_selftests(struct drm_i915_private *i915) 3077 { 3078 static const struct i915_subtest tests[] = { 3079 SUBTEST(perf_request_latency), 3080 SUBTEST(perf_series_engines), 3081 SUBTEST(perf_parallel_engines), 3082 }; 3083 3084 if (intel_gt_is_wedged(&i915->gt)) 3085 return 0; 3086 3087 return i915_subtests(tests, i915); 3088 } 3089