1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 #include <linux/pm_qos.h> 27 28 #include "gem/i915_gem_pm.h" 29 #include "gem/selftests/mock_context.h" 30 31 #include "gt/intel_engine_pm.h" 32 #include "gt/intel_engine_user.h" 33 #include "gt/intel_gt.h" 34 35 #include "i915_random.h" 36 #include "i915_selftest.h" 37 #include "igt_live_test.h" 38 #include "igt_spinner.h" 39 #include "lib_sw_fence.h" 40 41 #include "mock_drm.h" 42 #include "mock_gem_device.h" 43 44 static unsigned int num_uabi_engines(struct drm_i915_private *i915) 45 { 46 struct intel_engine_cs *engine; 47 unsigned int count; 48 49 count = 0; 50 for_each_uabi_engine(engine, i915) 51 count++; 52 53 return count; 54 } 55 56 static struct intel_engine_cs *rcs0(struct drm_i915_private *i915) 57 { 58 return intel_engine_lookup_user(i915, I915_ENGINE_CLASS_RENDER, 0); 59 } 60 61 static int igt_add_request(void *arg) 62 { 63 struct drm_i915_private *i915 = arg; 64 struct i915_request *request; 65 66 /* Basic preliminary test to create a request and let it loose! */ 67 68 request = mock_request(rcs0(i915)->kernel_context, HZ / 10); 69 if (!request) 70 return -ENOMEM; 71 72 i915_request_add(request); 73 74 return 0; 75 } 76 77 static int igt_wait_request(void *arg) 78 { 79 const long T = HZ / 4; 80 struct drm_i915_private *i915 = arg; 81 struct i915_request *request; 82 int err = -EINVAL; 83 84 /* Submit a request, then wait upon it */ 85 86 request = mock_request(rcs0(i915)->kernel_context, T); 87 if (!request) 88 return -ENOMEM; 89 90 i915_request_get(request); 91 92 if (i915_request_wait(request, 0, 0) != -ETIME) { 93 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 94 goto out_request; 95 } 96 97 if (i915_request_wait(request, 0, T) != -ETIME) { 98 pr_err("request wait succeeded (expected timeout before submit!)\n"); 99 goto out_request; 100 } 101 102 if (i915_request_completed(request)) { 103 pr_err("request completed before submit!!\n"); 104 goto out_request; 105 } 106 107 i915_request_add(request); 108 109 if (i915_request_wait(request, 0, 0) != -ETIME) { 110 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 111 goto out_request; 112 } 113 114 if (i915_request_completed(request)) { 115 pr_err("request completed immediately!\n"); 116 goto out_request; 117 } 118 119 if (i915_request_wait(request, 0, T / 2) != -ETIME) { 120 pr_err("request wait succeeded (expected timeout!)\n"); 121 goto out_request; 122 } 123 124 if (i915_request_wait(request, 0, T) == -ETIME) { 125 pr_err("request wait timed out!\n"); 126 goto out_request; 127 } 128 129 if (!i915_request_completed(request)) { 130 pr_err("request not complete after waiting!\n"); 131 goto out_request; 132 } 133 134 if (i915_request_wait(request, 0, T) == -ETIME) { 135 pr_err("request wait timed out when already complete!\n"); 136 goto out_request; 137 } 138 139 err = 0; 140 out_request: 141 i915_request_put(request); 142 mock_device_flush(i915); 143 return err; 144 } 145 146 static int igt_fence_wait(void *arg) 147 { 148 const long T = HZ / 4; 149 struct drm_i915_private *i915 = arg; 150 struct i915_request *request; 151 int err = -EINVAL; 152 153 /* Submit a request, treat it as a fence and wait upon it */ 154 155 request = mock_request(rcs0(i915)->kernel_context, T); 156 if (!request) 157 return -ENOMEM; 158 159 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 160 pr_err("fence wait success before submit (expected timeout)!\n"); 161 goto out; 162 } 163 164 i915_request_add(request); 165 166 if (dma_fence_is_signaled(&request->fence)) { 167 pr_err("fence signaled immediately!\n"); 168 goto out; 169 } 170 171 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 172 pr_err("fence wait success after submit (expected timeout)!\n"); 173 goto out; 174 } 175 176 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 177 pr_err("fence wait timed out (expected success)!\n"); 178 goto out; 179 } 180 181 if (!dma_fence_is_signaled(&request->fence)) { 182 pr_err("fence unsignaled after waiting!\n"); 183 goto out; 184 } 185 186 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 187 pr_err("fence wait timed out when complete (expected success)!\n"); 188 goto out; 189 } 190 191 err = 0; 192 out: 193 mock_device_flush(i915); 194 return err; 195 } 196 197 static int igt_request_rewind(void *arg) 198 { 199 struct drm_i915_private *i915 = arg; 200 struct i915_request *request, *vip; 201 struct i915_gem_context *ctx[2]; 202 struct intel_context *ce; 203 int err = -EINVAL; 204 205 ctx[0] = mock_context(i915, "A"); 206 207 ce = i915_gem_context_get_engine(ctx[0], RCS0); 208 GEM_BUG_ON(IS_ERR(ce)); 209 request = mock_request(ce, 2 * HZ); 210 intel_context_put(ce); 211 if (!request) { 212 err = -ENOMEM; 213 goto err_context_0; 214 } 215 216 i915_request_get(request); 217 i915_request_add(request); 218 219 ctx[1] = mock_context(i915, "B"); 220 221 ce = i915_gem_context_get_engine(ctx[1], RCS0); 222 GEM_BUG_ON(IS_ERR(ce)); 223 vip = mock_request(ce, 0); 224 intel_context_put(ce); 225 if (!vip) { 226 err = -ENOMEM; 227 goto err_context_1; 228 } 229 230 /* Simulate preemption by manual reordering */ 231 if (!mock_cancel_request(request)) { 232 pr_err("failed to cancel request (already executed)!\n"); 233 i915_request_add(vip); 234 goto err_context_1; 235 } 236 i915_request_get(vip); 237 i915_request_add(vip); 238 rcu_read_lock(); 239 request->engine->submit_request(request); 240 rcu_read_unlock(); 241 242 243 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 244 pr_err("timed out waiting for high priority request\n"); 245 goto err; 246 } 247 248 if (i915_request_completed(request)) { 249 pr_err("low priority request already completed\n"); 250 goto err; 251 } 252 253 err = 0; 254 err: 255 i915_request_put(vip); 256 err_context_1: 257 mock_context_close(ctx[1]); 258 i915_request_put(request); 259 err_context_0: 260 mock_context_close(ctx[0]); 261 mock_device_flush(i915); 262 return err; 263 } 264 265 struct smoketest { 266 struct intel_engine_cs *engine; 267 struct i915_gem_context **contexts; 268 atomic_long_t num_waits, num_fences; 269 int ncontexts, max_batch; 270 struct i915_request *(*request_alloc)(struct intel_context *ce); 271 }; 272 273 static struct i915_request * 274 __mock_request_alloc(struct intel_context *ce) 275 { 276 return mock_request(ce, 0); 277 } 278 279 static struct i915_request * 280 __live_request_alloc(struct intel_context *ce) 281 { 282 return intel_context_create_request(ce); 283 } 284 285 static int __igt_breadcrumbs_smoketest(void *arg) 286 { 287 struct smoketest *t = arg; 288 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 289 const unsigned int total = 4 * t->ncontexts + 1; 290 unsigned int num_waits = 0, num_fences = 0; 291 struct i915_request **requests; 292 I915_RND_STATE(prng); 293 unsigned int *order; 294 int err = 0; 295 296 /* 297 * A very simple test to catch the most egregious of list handling bugs. 298 * 299 * At its heart, we simply create oodles of requests running across 300 * multiple kthreads and enable signaling on them, for the sole purpose 301 * of stressing our breadcrumb handling. The only inspection we do is 302 * that the fences were marked as signaled. 303 */ 304 305 requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); 306 if (!requests) 307 return -ENOMEM; 308 309 order = i915_random_order(total, &prng); 310 if (!order) { 311 err = -ENOMEM; 312 goto out_requests; 313 } 314 315 while (!kthread_should_stop()) { 316 struct i915_sw_fence *submit, *wait; 317 unsigned int n, count; 318 319 submit = heap_fence_create(GFP_KERNEL); 320 if (!submit) { 321 err = -ENOMEM; 322 break; 323 } 324 325 wait = heap_fence_create(GFP_KERNEL); 326 if (!wait) { 327 i915_sw_fence_commit(submit); 328 heap_fence_put(submit); 329 err = ENOMEM; 330 break; 331 } 332 333 i915_random_reorder(order, total, &prng); 334 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 335 336 for (n = 0; n < count; n++) { 337 struct i915_gem_context *ctx = 338 t->contexts[order[n] % t->ncontexts]; 339 struct i915_request *rq; 340 struct intel_context *ce; 341 342 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); 343 GEM_BUG_ON(IS_ERR(ce)); 344 rq = t->request_alloc(ce); 345 intel_context_put(ce); 346 if (IS_ERR(rq)) { 347 err = PTR_ERR(rq); 348 count = n; 349 break; 350 } 351 352 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 353 submit, 354 GFP_KERNEL); 355 356 requests[n] = i915_request_get(rq); 357 i915_request_add(rq); 358 359 if (err >= 0) 360 err = i915_sw_fence_await_dma_fence(wait, 361 &rq->fence, 362 0, 363 GFP_KERNEL); 364 365 if (err < 0) { 366 i915_request_put(rq); 367 count = n; 368 break; 369 } 370 } 371 372 i915_sw_fence_commit(submit); 373 i915_sw_fence_commit(wait); 374 375 if (!wait_event_timeout(wait->wait, 376 i915_sw_fence_done(wait), 377 5 * HZ)) { 378 struct i915_request *rq = requests[count - 1]; 379 380 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", 381 atomic_read(&wait->pending), count, 382 rq->fence.context, rq->fence.seqno, 383 t->engine->name); 384 GEM_TRACE_DUMP(); 385 386 intel_gt_set_wedged(t->engine->gt); 387 GEM_BUG_ON(!i915_request_completed(rq)); 388 i915_sw_fence_wait(wait); 389 err = -EIO; 390 } 391 392 for (n = 0; n < count; n++) { 393 struct i915_request *rq = requests[n]; 394 395 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 396 &rq->fence.flags)) { 397 pr_err("%llu:%llu was not signaled!\n", 398 rq->fence.context, rq->fence.seqno); 399 err = -EINVAL; 400 } 401 402 i915_request_put(rq); 403 } 404 405 heap_fence_put(wait); 406 heap_fence_put(submit); 407 408 if (err < 0) 409 break; 410 411 num_fences += count; 412 num_waits++; 413 414 cond_resched(); 415 } 416 417 atomic_long_add(num_fences, &t->num_fences); 418 atomic_long_add(num_waits, &t->num_waits); 419 420 kfree(order); 421 out_requests: 422 kfree(requests); 423 return err; 424 } 425 426 static int mock_breadcrumbs_smoketest(void *arg) 427 { 428 struct drm_i915_private *i915 = arg; 429 struct smoketest t = { 430 .engine = rcs0(i915), 431 .ncontexts = 1024, 432 .max_batch = 1024, 433 .request_alloc = __mock_request_alloc 434 }; 435 unsigned int ncpus = num_online_cpus(); 436 struct task_struct **threads; 437 unsigned int n; 438 int ret = 0; 439 440 /* 441 * Smoketest our breadcrumb/signal handling for requests across multiple 442 * threads. A very simple test to only catch the most egregious of bugs. 443 * See __igt_breadcrumbs_smoketest(); 444 */ 445 446 threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL); 447 if (!threads) 448 return -ENOMEM; 449 450 t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 451 if (!t.contexts) { 452 ret = -ENOMEM; 453 goto out_threads; 454 } 455 456 for (n = 0; n < t.ncontexts; n++) { 457 t.contexts[n] = mock_context(t.engine->i915, "mock"); 458 if (!t.contexts[n]) { 459 ret = -ENOMEM; 460 goto out_contexts; 461 } 462 } 463 464 for (n = 0; n < ncpus; n++) { 465 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 466 &t, "igt/%d", n); 467 if (IS_ERR(threads[n])) { 468 ret = PTR_ERR(threads[n]); 469 ncpus = n; 470 break; 471 } 472 473 get_task_struct(threads[n]); 474 } 475 476 yield(); /* start all threads before we begin */ 477 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 478 479 for (n = 0; n < ncpus; n++) { 480 int err; 481 482 err = kthread_stop(threads[n]); 483 if (err < 0 && !ret) 484 ret = err; 485 486 put_task_struct(threads[n]); 487 } 488 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 489 atomic_long_read(&t.num_waits), 490 atomic_long_read(&t.num_fences), 491 ncpus); 492 493 out_contexts: 494 for (n = 0; n < t.ncontexts; n++) { 495 if (!t.contexts[n]) 496 break; 497 mock_context_close(t.contexts[n]); 498 } 499 kfree(t.contexts); 500 out_threads: 501 kfree(threads); 502 return ret; 503 } 504 505 int i915_request_mock_selftests(void) 506 { 507 static const struct i915_subtest tests[] = { 508 SUBTEST(igt_add_request), 509 SUBTEST(igt_wait_request), 510 SUBTEST(igt_fence_wait), 511 SUBTEST(igt_request_rewind), 512 SUBTEST(mock_breadcrumbs_smoketest), 513 }; 514 struct drm_i915_private *i915; 515 intel_wakeref_t wakeref; 516 int err = 0; 517 518 i915 = mock_gem_device(); 519 if (!i915) 520 return -ENOMEM; 521 522 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 523 err = i915_subtests(tests, i915); 524 525 drm_dev_put(&i915->drm); 526 527 return err; 528 } 529 530 static int live_nop_request(void *arg) 531 { 532 struct drm_i915_private *i915 = arg; 533 struct intel_engine_cs *engine; 534 struct igt_live_test t; 535 int err = -ENODEV; 536 537 /* 538 * Submit various sized batches of empty requests, to each engine 539 * (individually), and wait for the batch to complete. We can check 540 * the overhead of submitting requests to the hardware. 541 */ 542 543 for_each_uabi_engine(engine, i915) { 544 unsigned long n, prime; 545 IGT_TIMEOUT(end_time); 546 ktime_t times[2] = {}; 547 548 err = igt_live_test_begin(&t, i915, __func__, engine->name); 549 if (err) 550 return err; 551 552 intel_engine_pm_get(engine); 553 for_each_prime_number_from(prime, 1, 8192) { 554 struct i915_request *request = NULL; 555 556 times[1] = ktime_get_raw(); 557 558 for (n = 0; n < prime; n++) { 559 i915_request_put(request); 560 request = i915_request_create(engine->kernel_context); 561 if (IS_ERR(request)) 562 return PTR_ERR(request); 563 564 /* 565 * This space is left intentionally blank. 566 * 567 * We do not actually want to perform any 568 * action with this request, we just want 569 * to measure the latency in allocation 570 * and submission of our breadcrumbs - 571 * ensuring that the bare request is sufficient 572 * for the system to work (i.e. proper HEAD 573 * tracking of the rings, interrupt handling, 574 * etc). It also gives us the lowest bounds 575 * for latency. 576 */ 577 578 i915_request_get(request); 579 i915_request_add(request); 580 } 581 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 582 i915_request_put(request); 583 584 times[1] = ktime_sub(ktime_get_raw(), times[1]); 585 if (prime == 1) 586 times[0] = times[1]; 587 588 if (__igt_timeout(end_time, NULL)) 589 break; 590 } 591 intel_engine_pm_put(engine); 592 593 err = igt_live_test_end(&t); 594 if (err) 595 return err; 596 597 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 598 engine->name, 599 ktime_to_ns(times[0]), 600 prime, div64_u64(ktime_to_ns(times[1]), prime)); 601 } 602 603 return err; 604 } 605 606 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 607 { 608 struct drm_i915_gem_object *obj; 609 struct i915_vma *vma; 610 u32 *cmd; 611 int err; 612 613 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 614 if (IS_ERR(obj)) 615 return ERR_CAST(obj); 616 617 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 618 if (IS_ERR(cmd)) { 619 err = PTR_ERR(cmd); 620 goto err; 621 } 622 623 *cmd = MI_BATCH_BUFFER_END; 624 625 __i915_gem_object_flush_map(obj, 0, 64); 626 i915_gem_object_unpin_map(obj); 627 628 intel_gt_chipset_flush(&i915->gt); 629 630 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 631 if (IS_ERR(vma)) { 632 err = PTR_ERR(vma); 633 goto err; 634 } 635 636 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 637 if (err) 638 goto err; 639 640 /* Force the wait wait now to avoid including it in the benchmark */ 641 err = i915_vma_sync(vma); 642 if (err) 643 goto err_pin; 644 645 return vma; 646 647 err_pin: 648 i915_vma_unpin(vma); 649 err: 650 i915_gem_object_put(obj); 651 return ERR_PTR(err); 652 } 653 654 static struct i915_request * 655 empty_request(struct intel_engine_cs *engine, 656 struct i915_vma *batch) 657 { 658 struct i915_request *request; 659 int err; 660 661 request = i915_request_create(engine->kernel_context); 662 if (IS_ERR(request)) 663 return request; 664 665 err = engine->emit_bb_start(request, 666 batch->node.start, 667 batch->node.size, 668 I915_DISPATCH_SECURE); 669 if (err) 670 goto out_request; 671 672 i915_request_get(request); 673 out_request: 674 i915_request_add(request); 675 return err ? ERR_PTR(err) : request; 676 } 677 678 static int live_empty_request(void *arg) 679 { 680 struct drm_i915_private *i915 = arg; 681 struct intel_engine_cs *engine; 682 struct igt_live_test t; 683 struct i915_vma *batch; 684 int err = 0; 685 686 /* 687 * Submit various sized batches of empty requests, to each engine 688 * (individually), and wait for the batch to complete. We can check 689 * the overhead of submitting requests to the hardware. 690 */ 691 692 batch = empty_batch(i915); 693 if (IS_ERR(batch)) 694 return PTR_ERR(batch); 695 696 for_each_uabi_engine(engine, i915) { 697 IGT_TIMEOUT(end_time); 698 struct i915_request *request; 699 unsigned long n, prime; 700 ktime_t times[2] = {}; 701 702 err = igt_live_test_begin(&t, i915, __func__, engine->name); 703 if (err) 704 goto out_batch; 705 706 intel_engine_pm_get(engine); 707 708 /* Warmup / preload */ 709 request = empty_request(engine, batch); 710 if (IS_ERR(request)) { 711 err = PTR_ERR(request); 712 intel_engine_pm_put(engine); 713 goto out_batch; 714 } 715 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 716 717 for_each_prime_number_from(prime, 1, 8192) { 718 times[1] = ktime_get_raw(); 719 720 for (n = 0; n < prime; n++) { 721 i915_request_put(request); 722 request = empty_request(engine, batch); 723 if (IS_ERR(request)) { 724 err = PTR_ERR(request); 725 intel_engine_pm_put(engine); 726 goto out_batch; 727 } 728 } 729 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 730 731 times[1] = ktime_sub(ktime_get_raw(), times[1]); 732 if (prime == 1) 733 times[0] = times[1]; 734 735 if (__igt_timeout(end_time, NULL)) 736 break; 737 } 738 i915_request_put(request); 739 intel_engine_pm_put(engine); 740 741 err = igt_live_test_end(&t); 742 if (err) 743 goto out_batch; 744 745 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 746 engine->name, 747 ktime_to_ns(times[0]), 748 prime, div64_u64(ktime_to_ns(times[1]), prime)); 749 } 750 751 out_batch: 752 i915_vma_unpin(batch); 753 i915_vma_put(batch); 754 return err; 755 } 756 757 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 758 { 759 struct drm_i915_gem_object *obj; 760 const int gen = INTEL_GEN(i915); 761 struct i915_vma *vma; 762 u32 *cmd; 763 int err; 764 765 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 766 if (IS_ERR(obj)) 767 return ERR_CAST(obj); 768 769 vma = i915_vma_instance(obj, i915->gt.vm, NULL); 770 if (IS_ERR(vma)) { 771 err = PTR_ERR(vma); 772 goto err; 773 } 774 775 err = i915_vma_pin(vma, 0, 0, PIN_USER); 776 if (err) 777 goto err; 778 779 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 780 if (IS_ERR(cmd)) { 781 err = PTR_ERR(cmd); 782 goto err; 783 } 784 785 if (gen >= 8) { 786 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 787 *cmd++ = lower_32_bits(vma->node.start); 788 *cmd++ = upper_32_bits(vma->node.start); 789 } else if (gen >= 6) { 790 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 791 *cmd++ = lower_32_bits(vma->node.start); 792 } else { 793 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 794 *cmd++ = lower_32_bits(vma->node.start); 795 } 796 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 797 798 __i915_gem_object_flush_map(obj, 0, 64); 799 i915_gem_object_unpin_map(obj); 800 801 intel_gt_chipset_flush(&i915->gt); 802 803 return vma; 804 805 err: 806 i915_gem_object_put(obj); 807 return ERR_PTR(err); 808 } 809 810 static int recursive_batch_resolve(struct i915_vma *batch) 811 { 812 u32 *cmd; 813 814 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 815 if (IS_ERR(cmd)) 816 return PTR_ERR(cmd); 817 818 *cmd = MI_BATCH_BUFFER_END; 819 820 __i915_gem_object_flush_map(batch->obj, 0, sizeof(*cmd)); 821 i915_gem_object_unpin_map(batch->obj); 822 823 intel_gt_chipset_flush(batch->vm->gt); 824 825 return 0; 826 } 827 828 static int live_all_engines(void *arg) 829 { 830 struct drm_i915_private *i915 = arg; 831 const unsigned int nengines = num_uabi_engines(i915); 832 struct intel_engine_cs *engine; 833 struct i915_request **request; 834 struct igt_live_test t; 835 struct i915_vma *batch; 836 unsigned int idx; 837 int err; 838 839 /* 840 * Check we can submit requests to all engines simultaneously. We 841 * send a recursive batch to each engine - checking that we don't 842 * block doing so, and that they don't complete too soon. 843 */ 844 845 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 846 if (!request) 847 return -ENOMEM; 848 849 err = igt_live_test_begin(&t, i915, __func__, ""); 850 if (err) 851 goto out_free; 852 853 batch = recursive_batch(i915); 854 if (IS_ERR(batch)) { 855 err = PTR_ERR(batch); 856 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 857 goto out_free; 858 } 859 860 idx = 0; 861 for_each_uabi_engine(engine, i915) { 862 request[idx] = intel_engine_create_kernel_request(engine); 863 if (IS_ERR(request[idx])) { 864 err = PTR_ERR(request[idx]); 865 pr_err("%s: Request allocation failed with err=%d\n", 866 __func__, err); 867 goto out_request; 868 } 869 870 i915_vma_lock(batch); 871 err = i915_request_await_object(request[idx], batch->obj, 0); 872 if (err == 0) 873 err = i915_vma_move_to_active(batch, request[idx], 0); 874 i915_vma_unlock(batch); 875 GEM_BUG_ON(err); 876 877 err = engine->emit_bb_start(request[idx], 878 batch->node.start, 879 batch->node.size, 880 0); 881 GEM_BUG_ON(err); 882 request[idx]->batch = batch; 883 884 i915_request_get(request[idx]); 885 i915_request_add(request[idx]); 886 idx++; 887 } 888 889 idx = 0; 890 for_each_uabi_engine(engine, i915) { 891 if (i915_request_completed(request[idx])) { 892 pr_err("%s(%s): request completed too early!\n", 893 __func__, engine->name); 894 err = -EINVAL; 895 goto out_request; 896 } 897 idx++; 898 } 899 900 err = recursive_batch_resolve(batch); 901 if (err) { 902 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 903 goto out_request; 904 } 905 906 idx = 0; 907 for_each_uabi_engine(engine, i915) { 908 long timeout; 909 910 timeout = i915_request_wait(request[idx], 0, 911 MAX_SCHEDULE_TIMEOUT); 912 if (timeout < 0) { 913 err = timeout; 914 pr_err("%s: error waiting for request on %s, err=%d\n", 915 __func__, engine->name, err); 916 goto out_request; 917 } 918 919 GEM_BUG_ON(!i915_request_completed(request[idx])); 920 i915_request_put(request[idx]); 921 request[idx] = NULL; 922 idx++; 923 } 924 925 err = igt_live_test_end(&t); 926 927 out_request: 928 idx = 0; 929 for_each_uabi_engine(engine, i915) { 930 if (request[idx]) 931 i915_request_put(request[idx]); 932 idx++; 933 } 934 i915_vma_unpin(batch); 935 i915_vma_put(batch); 936 out_free: 937 kfree(request); 938 return err; 939 } 940 941 static int live_sequential_engines(void *arg) 942 { 943 struct drm_i915_private *i915 = arg; 944 const unsigned int nengines = num_uabi_engines(i915); 945 struct i915_request **request; 946 struct i915_request *prev = NULL; 947 struct intel_engine_cs *engine; 948 struct igt_live_test t; 949 unsigned int idx; 950 int err; 951 952 /* 953 * Check we can submit requests to all engines sequentially, such 954 * that each successive request waits for the earlier ones. This 955 * tests that we don't execute requests out of order, even though 956 * they are running on independent engines. 957 */ 958 959 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 960 if (!request) 961 return -ENOMEM; 962 963 err = igt_live_test_begin(&t, i915, __func__, ""); 964 if (err) 965 goto out_free; 966 967 idx = 0; 968 for_each_uabi_engine(engine, i915) { 969 struct i915_vma *batch; 970 971 batch = recursive_batch(i915); 972 if (IS_ERR(batch)) { 973 err = PTR_ERR(batch); 974 pr_err("%s: Unable to create batch for %s, err=%d\n", 975 __func__, engine->name, err); 976 goto out_free; 977 } 978 979 request[idx] = intel_engine_create_kernel_request(engine); 980 if (IS_ERR(request[idx])) { 981 err = PTR_ERR(request[idx]); 982 pr_err("%s: Request allocation failed for %s with err=%d\n", 983 __func__, engine->name, err); 984 goto out_request; 985 } 986 987 if (prev) { 988 err = i915_request_await_dma_fence(request[idx], 989 &prev->fence); 990 if (err) { 991 i915_request_add(request[idx]); 992 pr_err("%s: Request await failed for %s with err=%d\n", 993 __func__, engine->name, err); 994 goto out_request; 995 } 996 } 997 998 i915_vma_lock(batch); 999 err = i915_request_await_object(request[idx], 1000 batch->obj, false); 1001 if (err == 0) 1002 err = i915_vma_move_to_active(batch, request[idx], 0); 1003 i915_vma_unlock(batch); 1004 GEM_BUG_ON(err); 1005 1006 err = engine->emit_bb_start(request[idx], 1007 batch->node.start, 1008 batch->node.size, 1009 0); 1010 GEM_BUG_ON(err); 1011 request[idx]->batch = batch; 1012 1013 i915_request_get(request[idx]); 1014 i915_request_add(request[idx]); 1015 1016 prev = request[idx]; 1017 idx++; 1018 } 1019 1020 idx = 0; 1021 for_each_uabi_engine(engine, i915) { 1022 long timeout; 1023 1024 if (i915_request_completed(request[idx])) { 1025 pr_err("%s(%s): request completed too early!\n", 1026 __func__, engine->name); 1027 err = -EINVAL; 1028 goto out_request; 1029 } 1030 1031 err = recursive_batch_resolve(request[idx]->batch); 1032 if (err) { 1033 pr_err("%s: failed to resolve batch, err=%d\n", 1034 __func__, err); 1035 goto out_request; 1036 } 1037 1038 timeout = i915_request_wait(request[idx], 0, 1039 MAX_SCHEDULE_TIMEOUT); 1040 if (timeout < 0) { 1041 err = timeout; 1042 pr_err("%s: error waiting for request on %s, err=%d\n", 1043 __func__, engine->name, err); 1044 goto out_request; 1045 } 1046 1047 GEM_BUG_ON(!i915_request_completed(request[idx])); 1048 idx++; 1049 } 1050 1051 err = igt_live_test_end(&t); 1052 1053 out_request: 1054 idx = 0; 1055 for_each_uabi_engine(engine, i915) { 1056 u32 *cmd; 1057 1058 if (!request[idx]) 1059 break; 1060 1061 cmd = i915_gem_object_pin_map(request[idx]->batch->obj, 1062 I915_MAP_WC); 1063 if (!IS_ERR(cmd)) { 1064 *cmd = MI_BATCH_BUFFER_END; 1065 1066 __i915_gem_object_flush_map(request[idx]->batch->obj, 1067 0, sizeof(*cmd)); 1068 i915_gem_object_unpin_map(request[idx]->batch->obj); 1069 1070 intel_gt_chipset_flush(engine->gt); 1071 } 1072 1073 i915_vma_put(request[idx]->batch); 1074 i915_request_put(request[idx]); 1075 idx++; 1076 } 1077 out_free: 1078 kfree(request); 1079 return err; 1080 } 1081 1082 static int __live_parallel_engine1(void *arg) 1083 { 1084 struct intel_engine_cs *engine = arg; 1085 IGT_TIMEOUT(end_time); 1086 unsigned long count; 1087 int err = 0; 1088 1089 count = 0; 1090 intel_engine_pm_get(engine); 1091 do { 1092 struct i915_request *rq; 1093 1094 rq = i915_request_create(engine->kernel_context); 1095 if (IS_ERR(rq)) { 1096 err = PTR_ERR(rq); 1097 break; 1098 } 1099 1100 i915_request_get(rq); 1101 i915_request_add(rq); 1102 1103 err = 0; 1104 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1105 err = -ETIME; 1106 i915_request_put(rq); 1107 if (err) 1108 break; 1109 1110 count++; 1111 } while (!__igt_timeout(end_time, NULL)); 1112 intel_engine_pm_put(engine); 1113 1114 pr_info("%s: %lu request + sync\n", engine->name, count); 1115 return err; 1116 } 1117 1118 static int __live_parallel_engineN(void *arg) 1119 { 1120 struct intel_engine_cs *engine = arg; 1121 IGT_TIMEOUT(end_time); 1122 unsigned long count; 1123 int err = 0; 1124 1125 count = 0; 1126 intel_engine_pm_get(engine); 1127 do { 1128 struct i915_request *rq; 1129 1130 rq = i915_request_create(engine->kernel_context); 1131 if (IS_ERR(rq)) { 1132 err = PTR_ERR(rq); 1133 break; 1134 } 1135 1136 i915_request_add(rq); 1137 count++; 1138 } while (!__igt_timeout(end_time, NULL)); 1139 intel_engine_pm_put(engine); 1140 1141 pr_info("%s: %lu requests\n", engine->name, count); 1142 return err; 1143 } 1144 1145 static bool wake_all(struct drm_i915_private *i915) 1146 { 1147 if (atomic_dec_and_test(&i915->selftest.counter)) { 1148 wake_up_var(&i915->selftest.counter); 1149 return true; 1150 } 1151 1152 return false; 1153 } 1154 1155 static int wait_for_all(struct drm_i915_private *i915) 1156 { 1157 if (wake_all(i915)) 1158 return 0; 1159 1160 if (wait_var_event_timeout(&i915->selftest.counter, 1161 !atomic_read(&i915->selftest.counter), 1162 i915_selftest.timeout_jiffies)) 1163 return 0; 1164 1165 return -ETIME; 1166 } 1167 1168 static int __live_parallel_spin(void *arg) 1169 { 1170 struct intel_engine_cs *engine = arg; 1171 struct igt_spinner spin; 1172 struct i915_request *rq; 1173 int err = 0; 1174 1175 /* 1176 * Create a spinner running for eternity on each engine. If a second 1177 * spinner is incorrectly placed on the same engine, it will not be 1178 * able to start in time. 1179 */ 1180 1181 if (igt_spinner_init(&spin, engine->gt)) { 1182 wake_all(engine->i915); 1183 return -ENOMEM; 1184 } 1185 1186 intel_engine_pm_get(engine); 1187 rq = igt_spinner_create_request(&spin, 1188 engine->kernel_context, 1189 MI_NOOP); /* no preemption */ 1190 intel_engine_pm_put(engine); 1191 if (IS_ERR(rq)) { 1192 err = PTR_ERR(rq); 1193 if (err == -ENODEV) 1194 err = 0; 1195 wake_all(engine->i915); 1196 goto out_spin; 1197 } 1198 1199 i915_request_get(rq); 1200 i915_request_add(rq); 1201 if (igt_wait_for_spinner(&spin, rq)) { 1202 /* Occupy this engine for the whole test */ 1203 err = wait_for_all(engine->i915); 1204 } else { 1205 pr_err("Failed to start spinner on %s\n", engine->name); 1206 err = -EINVAL; 1207 } 1208 igt_spinner_end(&spin); 1209 1210 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) 1211 err = -EIO; 1212 i915_request_put(rq); 1213 1214 out_spin: 1215 igt_spinner_fini(&spin); 1216 return err; 1217 } 1218 1219 static int live_parallel_engines(void *arg) 1220 { 1221 struct drm_i915_private *i915 = arg; 1222 static int (* const func[])(void *arg) = { 1223 __live_parallel_engine1, 1224 __live_parallel_engineN, 1225 __live_parallel_spin, 1226 NULL, 1227 }; 1228 const unsigned int nengines = num_uabi_engines(i915); 1229 struct intel_engine_cs *engine; 1230 int (* const *fn)(void *arg); 1231 struct task_struct **tsk; 1232 int err = 0; 1233 1234 /* 1235 * Check we can submit requests to all engines concurrently. This 1236 * tests that we load up the system maximally. 1237 */ 1238 1239 tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); 1240 if (!tsk) 1241 return -ENOMEM; 1242 1243 for (fn = func; !err && *fn; fn++) { 1244 char name[KSYM_NAME_LEN]; 1245 struct igt_live_test t; 1246 unsigned int idx; 1247 1248 snprintf(name, sizeof(name), "%ps", *fn); 1249 err = igt_live_test_begin(&t, i915, __func__, name); 1250 if (err) 1251 break; 1252 1253 atomic_set(&i915->selftest.counter, nengines); 1254 1255 idx = 0; 1256 for_each_uabi_engine(engine, i915) { 1257 tsk[idx] = kthread_run(*fn, engine, 1258 "igt/parallel:%s", 1259 engine->name); 1260 if (IS_ERR(tsk[idx])) { 1261 err = PTR_ERR(tsk[idx]); 1262 break; 1263 } 1264 get_task_struct(tsk[idx++]); 1265 } 1266 1267 yield(); /* start all threads before we kthread_stop() */ 1268 1269 idx = 0; 1270 for_each_uabi_engine(engine, i915) { 1271 int status; 1272 1273 if (IS_ERR(tsk[idx])) 1274 break; 1275 1276 status = kthread_stop(tsk[idx]); 1277 if (status && !err) 1278 err = status; 1279 1280 put_task_struct(tsk[idx++]); 1281 } 1282 1283 if (igt_live_test_end(&t)) 1284 err = -EIO; 1285 } 1286 1287 kfree(tsk); 1288 return err; 1289 } 1290 1291 static int 1292 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1293 { 1294 struct i915_request *rq; 1295 int ret; 1296 1297 /* 1298 * Before execlists, all contexts share the same ringbuffer. With 1299 * execlists, each context/engine has a separate ringbuffer and 1300 * for the purposes of this test, inexhaustible. 1301 * 1302 * For the global ringbuffer though, we have to be very careful 1303 * that we do not wrap while preventing the execution of requests 1304 * with a unsignaled fence. 1305 */ 1306 if (HAS_EXECLISTS(ctx->i915)) 1307 return INT_MAX; 1308 1309 rq = igt_request_alloc(ctx, engine); 1310 if (IS_ERR(rq)) { 1311 ret = PTR_ERR(rq); 1312 } else { 1313 int sz; 1314 1315 ret = rq->ring->size - rq->reserved_space; 1316 i915_request_add(rq); 1317 1318 sz = rq->ring->emit - rq->head; 1319 if (sz < 0) 1320 sz += rq->ring->size; 1321 ret /= sz; 1322 ret /= 2; /* leave half spare, in case of emergency! */ 1323 } 1324 1325 return ret; 1326 } 1327 1328 static int live_breadcrumbs_smoketest(void *arg) 1329 { 1330 struct drm_i915_private *i915 = arg; 1331 const unsigned int nengines = num_uabi_engines(i915); 1332 const unsigned int ncpus = num_online_cpus(); 1333 unsigned long num_waits, num_fences; 1334 struct intel_engine_cs *engine; 1335 struct task_struct **threads; 1336 struct igt_live_test live; 1337 intel_wakeref_t wakeref; 1338 struct smoketest *smoke; 1339 unsigned int n, idx; 1340 struct file *file; 1341 int ret = 0; 1342 1343 /* 1344 * Smoketest our breadcrumb/signal handling for requests across multiple 1345 * threads. A very simple test to only catch the most egregious of bugs. 1346 * See __igt_breadcrumbs_smoketest(); 1347 * 1348 * On real hardware this time. 1349 */ 1350 1351 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1352 1353 file = mock_file(i915); 1354 if (IS_ERR(file)) { 1355 ret = PTR_ERR(file); 1356 goto out_rpm; 1357 } 1358 1359 smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL); 1360 if (!smoke) { 1361 ret = -ENOMEM; 1362 goto out_file; 1363 } 1364 1365 threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL); 1366 if (!threads) { 1367 ret = -ENOMEM; 1368 goto out_smoke; 1369 } 1370 1371 smoke[0].request_alloc = __live_request_alloc; 1372 smoke[0].ncontexts = 64; 1373 smoke[0].contexts = kcalloc(smoke[0].ncontexts, 1374 sizeof(*smoke[0].contexts), 1375 GFP_KERNEL); 1376 if (!smoke[0].contexts) { 1377 ret = -ENOMEM; 1378 goto out_threads; 1379 } 1380 1381 for (n = 0; n < smoke[0].ncontexts; n++) { 1382 smoke[0].contexts[n] = live_context(i915, file); 1383 if (!smoke[0].contexts[n]) { 1384 ret = -ENOMEM; 1385 goto out_contexts; 1386 } 1387 } 1388 1389 ret = igt_live_test_begin(&live, i915, __func__, ""); 1390 if (ret) 1391 goto out_contexts; 1392 1393 idx = 0; 1394 for_each_uabi_engine(engine, i915) { 1395 smoke[idx] = smoke[0]; 1396 smoke[idx].engine = engine; 1397 smoke[idx].max_batch = 1398 max_batches(smoke[0].contexts[0], engine); 1399 if (smoke[idx].max_batch < 0) { 1400 ret = smoke[idx].max_batch; 1401 goto out_flush; 1402 } 1403 /* One ring interleaved between requests from all cpus */ 1404 smoke[idx].max_batch /= num_online_cpus() + 1; 1405 pr_debug("Limiting batches to %d requests on %s\n", 1406 smoke[idx].max_batch, engine->name); 1407 1408 for (n = 0; n < ncpus; n++) { 1409 struct task_struct *tsk; 1410 1411 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1412 &smoke[idx], "igt/%d.%d", idx, n); 1413 if (IS_ERR(tsk)) { 1414 ret = PTR_ERR(tsk); 1415 goto out_flush; 1416 } 1417 1418 get_task_struct(tsk); 1419 threads[idx * ncpus + n] = tsk; 1420 } 1421 1422 idx++; 1423 } 1424 1425 yield(); /* start all threads before we begin */ 1426 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1427 1428 out_flush: 1429 idx = 0; 1430 num_waits = 0; 1431 num_fences = 0; 1432 for_each_uabi_engine(engine, i915) { 1433 for (n = 0; n < ncpus; n++) { 1434 struct task_struct *tsk = threads[idx * ncpus + n]; 1435 int err; 1436 1437 if (!tsk) 1438 continue; 1439 1440 err = kthread_stop(tsk); 1441 if (err < 0 && !ret) 1442 ret = err; 1443 1444 put_task_struct(tsk); 1445 } 1446 1447 num_waits += atomic_long_read(&smoke[idx].num_waits); 1448 num_fences += atomic_long_read(&smoke[idx].num_fences); 1449 idx++; 1450 } 1451 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1452 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); 1453 1454 ret = igt_live_test_end(&live) ?: ret; 1455 out_contexts: 1456 kfree(smoke[0].contexts); 1457 out_threads: 1458 kfree(threads); 1459 out_smoke: 1460 kfree(smoke); 1461 out_file: 1462 fput(file); 1463 out_rpm: 1464 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1465 1466 return ret; 1467 } 1468 1469 int i915_request_live_selftests(struct drm_i915_private *i915) 1470 { 1471 static const struct i915_subtest tests[] = { 1472 SUBTEST(live_nop_request), 1473 SUBTEST(live_all_engines), 1474 SUBTEST(live_sequential_engines), 1475 SUBTEST(live_parallel_engines), 1476 SUBTEST(live_empty_request), 1477 SUBTEST(live_breadcrumbs_smoketest), 1478 }; 1479 1480 if (intel_gt_is_wedged(&i915->gt)) 1481 return 0; 1482 1483 return i915_subtests(tests, i915); 1484 } 1485 1486 static int switch_to_kernel_sync(struct intel_context *ce, int err) 1487 { 1488 struct i915_request *rq; 1489 struct dma_fence *fence; 1490 1491 rq = intel_engine_create_kernel_request(ce->engine); 1492 if (IS_ERR(rq)) 1493 return PTR_ERR(rq); 1494 1495 fence = i915_active_fence_get(&ce->timeline->last_request); 1496 if (fence) { 1497 i915_request_await_dma_fence(rq, fence); 1498 dma_fence_put(fence); 1499 } 1500 1501 rq = i915_request_get(rq); 1502 i915_request_add(rq); 1503 if (i915_request_wait(rq, 0, HZ / 2) < 0 && !err) 1504 err = -ETIME; 1505 i915_request_put(rq); 1506 1507 while (!err && !intel_engine_is_idle(ce->engine)) 1508 intel_engine_flush_submission(ce->engine); 1509 1510 return err; 1511 } 1512 1513 struct perf_stats { 1514 struct intel_engine_cs *engine; 1515 unsigned long count; 1516 ktime_t time; 1517 ktime_t busy; 1518 u64 runtime; 1519 }; 1520 1521 struct perf_series { 1522 struct drm_i915_private *i915; 1523 unsigned int nengines; 1524 struct intel_context *ce[]; 1525 }; 1526 1527 static int s_sync0(void *arg) 1528 { 1529 struct perf_series *ps = arg; 1530 IGT_TIMEOUT(end_time); 1531 unsigned int idx = 0; 1532 int err = 0; 1533 1534 GEM_BUG_ON(!ps->nengines); 1535 do { 1536 struct i915_request *rq; 1537 1538 rq = i915_request_create(ps->ce[idx]); 1539 if (IS_ERR(rq)) { 1540 err = PTR_ERR(rq); 1541 break; 1542 } 1543 1544 i915_request_get(rq); 1545 i915_request_add(rq); 1546 1547 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1548 err = -ETIME; 1549 i915_request_put(rq); 1550 if (err) 1551 break; 1552 1553 if (++idx == ps->nengines) 1554 idx = 0; 1555 } while (!__igt_timeout(end_time, NULL)); 1556 1557 return err; 1558 } 1559 1560 static int s_sync1(void *arg) 1561 { 1562 struct perf_series *ps = arg; 1563 struct i915_request *prev = NULL; 1564 IGT_TIMEOUT(end_time); 1565 unsigned int idx = 0; 1566 int err = 0; 1567 1568 GEM_BUG_ON(!ps->nengines); 1569 do { 1570 struct i915_request *rq; 1571 1572 rq = i915_request_create(ps->ce[idx]); 1573 if (IS_ERR(rq)) { 1574 err = PTR_ERR(rq); 1575 break; 1576 } 1577 1578 i915_request_get(rq); 1579 i915_request_add(rq); 1580 1581 if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) 1582 err = -ETIME; 1583 i915_request_put(prev); 1584 prev = rq; 1585 if (err) 1586 break; 1587 1588 if (++idx == ps->nengines) 1589 idx = 0; 1590 } while (!__igt_timeout(end_time, NULL)); 1591 i915_request_put(prev); 1592 1593 return err; 1594 } 1595 1596 static int s_many(void *arg) 1597 { 1598 struct perf_series *ps = arg; 1599 IGT_TIMEOUT(end_time); 1600 unsigned int idx = 0; 1601 1602 GEM_BUG_ON(!ps->nengines); 1603 do { 1604 struct i915_request *rq; 1605 1606 rq = i915_request_create(ps->ce[idx]); 1607 if (IS_ERR(rq)) 1608 return PTR_ERR(rq); 1609 1610 i915_request_add(rq); 1611 1612 if (++idx == ps->nengines) 1613 idx = 0; 1614 } while (!__igt_timeout(end_time, NULL)); 1615 1616 return 0; 1617 } 1618 1619 static int perf_series_engines(void *arg) 1620 { 1621 struct drm_i915_private *i915 = arg; 1622 static int (* const func[])(void *arg) = { 1623 s_sync0, 1624 s_sync1, 1625 s_many, 1626 NULL, 1627 }; 1628 const unsigned int nengines = num_uabi_engines(i915); 1629 struct intel_engine_cs *engine; 1630 int (* const *fn)(void *arg); 1631 struct pm_qos_request qos; 1632 struct perf_stats *stats; 1633 struct perf_series *ps; 1634 unsigned int idx; 1635 int err = 0; 1636 1637 stats = kcalloc(nengines, sizeof(*stats), GFP_KERNEL); 1638 if (!stats) 1639 return -ENOMEM; 1640 1641 ps = kzalloc(struct_size(ps, ce, nengines), GFP_KERNEL); 1642 if (!ps) { 1643 kfree(stats); 1644 return -ENOMEM; 1645 } 1646 1647 cpu_latency_qos_add_request(&qos, 0); /* disable cstates */ 1648 1649 ps->i915 = i915; 1650 ps->nengines = nengines; 1651 1652 idx = 0; 1653 for_each_uabi_engine(engine, i915) { 1654 struct intel_context *ce; 1655 1656 ce = intel_context_create(engine); 1657 if (IS_ERR(ce)) 1658 goto out; 1659 1660 err = intel_context_pin(ce); 1661 if (err) { 1662 intel_context_put(ce); 1663 goto out; 1664 } 1665 1666 ps->ce[idx++] = ce; 1667 } 1668 GEM_BUG_ON(idx != ps->nengines); 1669 1670 for (fn = func; *fn && !err; fn++) { 1671 char name[KSYM_NAME_LEN]; 1672 struct igt_live_test t; 1673 1674 snprintf(name, sizeof(name), "%ps", *fn); 1675 err = igt_live_test_begin(&t, i915, __func__, name); 1676 if (err) 1677 break; 1678 1679 for (idx = 0; idx < nengines; idx++) { 1680 struct perf_stats *p = 1681 memset(&stats[idx], 0, sizeof(stats[idx])); 1682 struct intel_context *ce = ps->ce[idx]; 1683 1684 p->engine = ps->ce[idx]->engine; 1685 intel_engine_pm_get(p->engine); 1686 1687 if (intel_engine_supports_stats(p->engine)) 1688 p->busy = intel_engine_get_busy_time(p->engine) + 1; 1689 p->runtime = -intel_context_get_total_runtime_ns(ce); 1690 p->time = ktime_get(); 1691 } 1692 1693 err = (*fn)(ps); 1694 if (igt_live_test_end(&t)) 1695 err = -EIO; 1696 1697 for (idx = 0; idx < nengines; idx++) { 1698 struct perf_stats *p = &stats[idx]; 1699 struct intel_context *ce = ps->ce[idx]; 1700 int integer, decimal; 1701 u64 busy, dt; 1702 1703 p->time = ktime_sub(ktime_get(), p->time); 1704 if (p->busy) { 1705 p->busy = ktime_sub(intel_engine_get_busy_time(p->engine), 1706 p->busy - 1); 1707 } 1708 1709 err = switch_to_kernel_sync(ce, err); 1710 p->runtime += intel_context_get_total_runtime_ns(ce); 1711 intel_engine_pm_put(p->engine); 1712 1713 busy = 100 * ktime_to_ns(p->busy); 1714 dt = ktime_to_ns(p->time); 1715 if (dt) { 1716 integer = div64_u64(busy, dt); 1717 busy -= integer * dt; 1718 decimal = div64_u64(100 * busy, dt); 1719 } else { 1720 integer = 0; 1721 decimal = 0; 1722 } 1723 1724 pr_info("%s %5s: { seqno:%d, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", 1725 name, p->engine->name, ce->timeline->seqno, 1726 integer, decimal, 1727 div_u64(p->runtime, 1000 * 1000), 1728 div_u64(ktime_to_ns(p->time), 1000 * 1000)); 1729 } 1730 } 1731 1732 out: 1733 for (idx = 0; idx < nengines; idx++) { 1734 if (IS_ERR_OR_NULL(ps->ce[idx])) 1735 break; 1736 1737 intel_context_unpin(ps->ce[idx]); 1738 intel_context_put(ps->ce[idx]); 1739 } 1740 kfree(ps); 1741 1742 cpu_latency_qos_remove_request(&qos); 1743 kfree(stats); 1744 return err; 1745 } 1746 1747 static int p_sync0(void *arg) 1748 { 1749 struct perf_stats *p = arg; 1750 struct intel_engine_cs *engine = p->engine; 1751 struct intel_context *ce; 1752 IGT_TIMEOUT(end_time); 1753 unsigned long count; 1754 bool busy; 1755 int err = 0; 1756 1757 ce = intel_context_create(engine); 1758 if (IS_ERR(ce)) 1759 return PTR_ERR(ce); 1760 1761 err = intel_context_pin(ce); 1762 if (err) { 1763 intel_context_put(ce); 1764 return err; 1765 } 1766 1767 busy = false; 1768 if (intel_engine_supports_stats(engine)) { 1769 p->busy = intel_engine_get_busy_time(engine); 1770 busy = true; 1771 } 1772 1773 p->time = ktime_get(); 1774 count = 0; 1775 do { 1776 struct i915_request *rq; 1777 1778 rq = i915_request_create(ce); 1779 if (IS_ERR(rq)) { 1780 err = PTR_ERR(rq); 1781 break; 1782 } 1783 1784 i915_request_get(rq); 1785 i915_request_add(rq); 1786 1787 err = 0; 1788 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1789 err = -ETIME; 1790 i915_request_put(rq); 1791 if (err) 1792 break; 1793 1794 count++; 1795 } while (!__igt_timeout(end_time, NULL)); 1796 p->time = ktime_sub(ktime_get(), p->time); 1797 1798 if (busy) { 1799 p->busy = ktime_sub(intel_engine_get_busy_time(engine), 1800 p->busy); 1801 } 1802 1803 err = switch_to_kernel_sync(ce, err); 1804 p->runtime = intel_context_get_total_runtime_ns(ce); 1805 p->count = count; 1806 1807 intel_context_unpin(ce); 1808 intel_context_put(ce); 1809 return err; 1810 } 1811 1812 static int p_sync1(void *arg) 1813 { 1814 struct perf_stats *p = arg; 1815 struct intel_engine_cs *engine = p->engine; 1816 struct i915_request *prev = NULL; 1817 struct intel_context *ce; 1818 IGT_TIMEOUT(end_time); 1819 unsigned long count; 1820 bool busy; 1821 int err = 0; 1822 1823 ce = intel_context_create(engine); 1824 if (IS_ERR(ce)) 1825 return PTR_ERR(ce); 1826 1827 err = intel_context_pin(ce); 1828 if (err) { 1829 intel_context_put(ce); 1830 return err; 1831 } 1832 1833 busy = false; 1834 if (intel_engine_supports_stats(engine)) { 1835 p->busy = intel_engine_get_busy_time(engine); 1836 busy = true; 1837 } 1838 1839 p->time = ktime_get(); 1840 count = 0; 1841 do { 1842 struct i915_request *rq; 1843 1844 rq = i915_request_create(ce); 1845 if (IS_ERR(rq)) { 1846 err = PTR_ERR(rq); 1847 break; 1848 } 1849 1850 i915_request_get(rq); 1851 i915_request_add(rq); 1852 1853 err = 0; 1854 if (prev && i915_request_wait(prev, 0, HZ / 5) < 0) 1855 err = -ETIME; 1856 i915_request_put(prev); 1857 prev = rq; 1858 if (err) 1859 break; 1860 1861 count++; 1862 } while (!__igt_timeout(end_time, NULL)); 1863 i915_request_put(prev); 1864 p->time = ktime_sub(ktime_get(), p->time); 1865 1866 if (busy) { 1867 p->busy = ktime_sub(intel_engine_get_busy_time(engine), 1868 p->busy); 1869 } 1870 1871 err = switch_to_kernel_sync(ce, err); 1872 p->runtime = intel_context_get_total_runtime_ns(ce); 1873 p->count = count; 1874 1875 intel_context_unpin(ce); 1876 intel_context_put(ce); 1877 return err; 1878 } 1879 1880 static int p_many(void *arg) 1881 { 1882 struct perf_stats *p = arg; 1883 struct intel_engine_cs *engine = p->engine; 1884 struct intel_context *ce; 1885 IGT_TIMEOUT(end_time); 1886 unsigned long count; 1887 int err = 0; 1888 bool busy; 1889 1890 ce = intel_context_create(engine); 1891 if (IS_ERR(ce)) 1892 return PTR_ERR(ce); 1893 1894 err = intel_context_pin(ce); 1895 if (err) { 1896 intel_context_put(ce); 1897 return err; 1898 } 1899 1900 busy = false; 1901 if (intel_engine_supports_stats(engine)) { 1902 p->busy = intel_engine_get_busy_time(engine); 1903 busy = true; 1904 } 1905 1906 count = 0; 1907 p->time = ktime_get(); 1908 do { 1909 struct i915_request *rq; 1910 1911 rq = i915_request_create(ce); 1912 if (IS_ERR(rq)) { 1913 err = PTR_ERR(rq); 1914 break; 1915 } 1916 1917 i915_request_add(rq); 1918 count++; 1919 } while (!__igt_timeout(end_time, NULL)); 1920 p->time = ktime_sub(ktime_get(), p->time); 1921 1922 if (busy) { 1923 p->busy = ktime_sub(intel_engine_get_busy_time(engine), 1924 p->busy); 1925 } 1926 1927 err = switch_to_kernel_sync(ce, err); 1928 p->runtime = intel_context_get_total_runtime_ns(ce); 1929 p->count = count; 1930 1931 intel_context_unpin(ce); 1932 intel_context_put(ce); 1933 return err; 1934 } 1935 1936 static int perf_parallel_engines(void *arg) 1937 { 1938 struct drm_i915_private *i915 = arg; 1939 static int (* const func[])(void *arg) = { 1940 p_sync0, 1941 p_sync1, 1942 p_many, 1943 NULL, 1944 }; 1945 const unsigned int nengines = num_uabi_engines(i915); 1946 struct intel_engine_cs *engine; 1947 int (* const *fn)(void *arg); 1948 struct pm_qos_request qos; 1949 struct { 1950 struct perf_stats p; 1951 struct task_struct *tsk; 1952 } *engines; 1953 int err = 0; 1954 1955 engines = kcalloc(nengines, sizeof(*engines), GFP_KERNEL); 1956 if (!engines) 1957 return -ENOMEM; 1958 1959 cpu_latency_qos_add_request(&qos, 0); 1960 1961 for (fn = func; *fn; fn++) { 1962 char name[KSYM_NAME_LEN]; 1963 struct igt_live_test t; 1964 unsigned int idx; 1965 1966 snprintf(name, sizeof(name), "%ps", *fn); 1967 err = igt_live_test_begin(&t, i915, __func__, name); 1968 if (err) 1969 break; 1970 1971 atomic_set(&i915->selftest.counter, nengines); 1972 1973 idx = 0; 1974 for_each_uabi_engine(engine, i915) { 1975 intel_engine_pm_get(engine); 1976 1977 memset(&engines[idx].p, 0, sizeof(engines[idx].p)); 1978 engines[idx].p.engine = engine; 1979 1980 engines[idx].tsk = kthread_run(*fn, &engines[idx].p, 1981 "igt:%s", engine->name); 1982 if (IS_ERR(engines[idx].tsk)) { 1983 err = PTR_ERR(engines[idx].tsk); 1984 intel_engine_pm_put(engine); 1985 break; 1986 } 1987 get_task_struct(engines[idx++].tsk); 1988 } 1989 1990 yield(); /* start all threads before we kthread_stop() */ 1991 1992 idx = 0; 1993 for_each_uabi_engine(engine, i915) { 1994 int status; 1995 1996 if (IS_ERR(engines[idx].tsk)) 1997 break; 1998 1999 status = kthread_stop(engines[idx].tsk); 2000 if (status && !err) 2001 err = status; 2002 2003 intel_engine_pm_put(engine); 2004 put_task_struct(engines[idx++].tsk); 2005 } 2006 2007 if (igt_live_test_end(&t)) 2008 err = -EIO; 2009 if (err) 2010 break; 2011 2012 idx = 0; 2013 for_each_uabi_engine(engine, i915) { 2014 struct perf_stats *p = &engines[idx].p; 2015 u64 busy = 100 * ktime_to_ns(p->busy); 2016 u64 dt = ktime_to_ns(p->time); 2017 int integer, decimal; 2018 2019 if (dt) { 2020 integer = div64_u64(busy, dt); 2021 busy -= integer * dt; 2022 decimal = div64_u64(100 * busy, dt); 2023 } else { 2024 integer = 0; 2025 decimal = 0; 2026 } 2027 2028 GEM_BUG_ON(engine != p->engine); 2029 pr_info("%s %5s: { count:%lu, busy:%d.%02d%%, runtime:%lldms, walltime:%lldms }\n", 2030 name, engine->name, p->count, integer, decimal, 2031 div_u64(p->runtime, 1000 * 1000), 2032 div_u64(ktime_to_ns(p->time), 1000 * 1000)); 2033 idx++; 2034 } 2035 } 2036 2037 cpu_latency_qos_remove_request(&qos); 2038 kfree(engines); 2039 return err; 2040 } 2041 2042 int i915_request_perf_selftests(struct drm_i915_private *i915) 2043 { 2044 static const struct i915_subtest tests[] = { 2045 SUBTEST(perf_series_engines), 2046 SUBTEST(perf_parallel_engines), 2047 }; 2048 2049 if (intel_gt_is_wedged(&i915->gt)) 2050 return 0; 2051 2052 return i915_subtests(tests, i915); 2053 } 2054