1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 27 #include "gem/i915_gem_pm.h" 28 #include "gem/selftests/mock_context.h" 29 30 #include "gt/intel_engine_pm.h" 31 #include "gt/intel_gt.h" 32 33 #include "i915_random.h" 34 #include "i915_selftest.h" 35 #include "igt_live_test.h" 36 #include "igt_spinner.h" 37 #include "lib_sw_fence.h" 38 39 #include "mock_drm.h" 40 #include "mock_gem_device.h" 41 42 static unsigned int num_uabi_engines(struct drm_i915_private *i915) 43 { 44 struct intel_engine_cs *engine; 45 unsigned int count; 46 47 count = 0; 48 for_each_uabi_engine(engine, i915) 49 count++; 50 51 return count; 52 } 53 54 static int igt_add_request(void *arg) 55 { 56 struct drm_i915_private *i915 = arg; 57 struct i915_request *request; 58 59 /* Basic preliminary test to create a request and let it loose! */ 60 61 request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10); 62 if (!request) 63 return -ENOMEM; 64 65 i915_request_add(request); 66 67 return 0; 68 } 69 70 static int igt_wait_request(void *arg) 71 { 72 const long T = HZ / 4; 73 struct drm_i915_private *i915 = arg; 74 struct i915_request *request; 75 int err = -EINVAL; 76 77 /* Submit a request, then wait upon it */ 78 79 request = mock_request(i915->engine[RCS0]->kernel_context, T); 80 if (!request) 81 return -ENOMEM; 82 83 i915_request_get(request); 84 85 if (i915_request_wait(request, 0, 0) != -ETIME) { 86 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 87 goto out_request; 88 } 89 90 if (i915_request_wait(request, 0, T) != -ETIME) { 91 pr_err("request wait succeeded (expected timeout before submit!)\n"); 92 goto out_request; 93 } 94 95 if (i915_request_completed(request)) { 96 pr_err("request completed before submit!!\n"); 97 goto out_request; 98 } 99 100 i915_request_add(request); 101 102 if (i915_request_wait(request, 0, 0) != -ETIME) { 103 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 104 goto out_request; 105 } 106 107 if (i915_request_completed(request)) { 108 pr_err("request completed immediately!\n"); 109 goto out_request; 110 } 111 112 if (i915_request_wait(request, 0, T / 2) != -ETIME) { 113 pr_err("request wait succeeded (expected timeout!)\n"); 114 goto out_request; 115 } 116 117 if (i915_request_wait(request, 0, T) == -ETIME) { 118 pr_err("request wait timed out!\n"); 119 goto out_request; 120 } 121 122 if (!i915_request_completed(request)) { 123 pr_err("request not complete after waiting!\n"); 124 goto out_request; 125 } 126 127 if (i915_request_wait(request, 0, T) == -ETIME) { 128 pr_err("request wait timed out when already complete!\n"); 129 goto out_request; 130 } 131 132 err = 0; 133 out_request: 134 i915_request_put(request); 135 mock_device_flush(i915); 136 return err; 137 } 138 139 static int igt_fence_wait(void *arg) 140 { 141 const long T = HZ / 4; 142 struct drm_i915_private *i915 = arg; 143 struct i915_request *request; 144 int err = -EINVAL; 145 146 /* Submit a request, treat it as a fence and wait upon it */ 147 148 request = mock_request(i915->engine[RCS0]->kernel_context, T); 149 if (!request) 150 return -ENOMEM; 151 152 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 153 pr_err("fence wait success before submit (expected timeout)!\n"); 154 goto out; 155 } 156 157 i915_request_add(request); 158 159 if (dma_fence_is_signaled(&request->fence)) { 160 pr_err("fence signaled immediately!\n"); 161 goto out; 162 } 163 164 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 165 pr_err("fence wait success after submit (expected timeout)!\n"); 166 goto out; 167 } 168 169 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 170 pr_err("fence wait timed out (expected success)!\n"); 171 goto out; 172 } 173 174 if (!dma_fence_is_signaled(&request->fence)) { 175 pr_err("fence unsignaled after waiting!\n"); 176 goto out; 177 } 178 179 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 180 pr_err("fence wait timed out when complete (expected success)!\n"); 181 goto out; 182 } 183 184 err = 0; 185 out: 186 mock_device_flush(i915); 187 return err; 188 } 189 190 static int igt_request_rewind(void *arg) 191 { 192 struct drm_i915_private *i915 = arg; 193 struct i915_request *request, *vip; 194 struct i915_gem_context *ctx[2]; 195 struct intel_context *ce; 196 int err = -EINVAL; 197 198 ctx[0] = mock_context(i915, "A"); 199 200 ce = i915_gem_context_get_engine(ctx[0], RCS0); 201 GEM_BUG_ON(IS_ERR(ce)); 202 request = mock_request(ce, 2 * HZ); 203 intel_context_put(ce); 204 if (!request) { 205 err = -ENOMEM; 206 goto err_context_0; 207 } 208 209 i915_request_get(request); 210 i915_request_add(request); 211 212 ctx[1] = mock_context(i915, "B"); 213 214 ce = i915_gem_context_get_engine(ctx[1], RCS0); 215 GEM_BUG_ON(IS_ERR(ce)); 216 vip = mock_request(ce, 0); 217 intel_context_put(ce); 218 if (!vip) { 219 err = -ENOMEM; 220 goto err_context_1; 221 } 222 223 /* Simulate preemption by manual reordering */ 224 if (!mock_cancel_request(request)) { 225 pr_err("failed to cancel request (already executed)!\n"); 226 i915_request_add(vip); 227 goto err_context_1; 228 } 229 i915_request_get(vip); 230 i915_request_add(vip); 231 rcu_read_lock(); 232 request->engine->submit_request(request); 233 rcu_read_unlock(); 234 235 236 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 237 pr_err("timed out waiting for high priority request\n"); 238 goto err; 239 } 240 241 if (i915_request_completed(request)) { 242 pr_err("low priority request already completed\n"); 243 goto err; 244 } 245 246 err = 0; 247 err: 248 i915_request_put(vip); 249 err_context_1: 250 mock_context_close(ctx[1]); 251 i915_request_put(request); 252 err_context_0: 253 mock_context_close(ctx[0]); 254 mock_device_flush(i915); 255 return err; 256 } 257 258 struct smoketest { 259 struct intel_engine_cs *engine; 260 struct i915_gem_context **contexts; 261 atomic_long_t num_waits, num_fences; 262 int ncontexts, max_batch; 263 struct i915_request *(*request_alloc)(struct intel_context *ce); 264 }; 265 266 static struct i915_request * 267 __mock_request_alloc(struct intel_context *ce) 268 { 269 return mock_request(ce, 0); 270 } 271 272 static struct i915_request * 273 __live_request_alloc(struct intel_context *ce) 274 { 275 return intel_context_create_request(ce); 276 } 277 278 static int __igt_breadcrumbs_smoketest(void *arg) 279 { 280 struct smoketest *t = arg; 281 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 282 const unsigned int total = 4 * t->ncontexts + 1; 283 unsigned int num_waits = 0, num_fences = 0; 284 struct i915_request **requests; 285 I915_RND_STATE(prng); 286 unsigned int *order; 287 int err = 0; 288 289 /* 290 * A very simple test to catch the most egregious of list handling bugs. 291 * 292 * At its heart, we simply create oodles of requests running across 293 * multiple kthreads and enable signaling on them, for the sole purpose 294 * of stressing our breadcrumb handling. The only inspection we do is 295 * that the fences were marked as signaled. 296 */ 297 298 requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); 299 if (!requests) 300 return -ENOMEM; 301 302 order = i915_random_order(total, &prng); 303 if (!order) { 304 err = -ENOMEM; 305 goto out_requests; 306 } 307 308 while (!kthread_should_stop()) { 309 struct i915_sw_fence *submit, *wait; 310 unsigned int n, count; 311 312 submit = heap_fence_create(GFP_KERNEL); 313 if (!submit) { 314 err = -ENOMEM; 315 break; 316 } 317 318 wait = heap_fence_create(GFP_KERNEL); 319 if (!wait) { 320 i915_sw_fence_commit(submit); 321 heap_fence_put(submit); 322 err = ENOMEM; 323 break; 324 } 325 326 i915_random_reorder(order, total, &prng); 327 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 328 329 for (n = 0; n < count; n++) { 330 struct i915_gem_context *ctx = 331 t->contexts[order[n] % t->ncontexts]; 332 struct i915_request *rq; 333 struct intel_context *ce; 334 335 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); 336 GEM_BUG_ON(IS_ERR(ce)); 337 rq = t->request_alloc(ce); 338 intel_context_put(ce); 339 if (IS_ERR(rq)) { 340 err = PTR_ERR(rq); 341 count = n; 342 break; 343 } 344 345 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 346 submit, 347 GFP_KERNEL); 348 349 requests[n] = i915_request_get(rq); 350 i915_request_add(rq); 351 352 if (err >= 0) 353 err = i915_sw_fence_await_dma_fence(wait, 354 &rq->fence, 355 0, 356 GFP_KERNEL); 357 358 if (err < 0) { 359 i915_request_put(rq); 360 count = n; 361 break; 362 } 363 } 364 365 i915_sw_fence_commit(submit); 366 i915_sw_fence_commit(wait); 367 368 if (!wait_event_timeout(wait->wait, 369 i915_sw_fence_done(wait), 370 5 * HZ)) { 371 struct i915_request *rq = requests[count - 1]; 372 373 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", 374 atomic_read(&wait->pending), count, 375 rq->fence.context, rq->fence.seqno, 376 t->engine->name); 377 GEM_TRACE_DUMP(); 378 379 intel_gt_set_wedged(t->engine->gt); 380 GEM_BUG_ON(!i915_request_completed(rq)); 381 i915_sw_fence_wait(wait); 382 err = -EIO; 383 } 384 385 for (n = 0; n < count; n++) { 386 struct i915_request *rq = requests[n]; 387 388 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 389 &rq->fence.flags)) { 390 pr_err("%llu:%llu was not signaled!\n", 391 rq->fence.context, rq->fence.seqno); 392 err = -EINVAL; 393 } 394 395 i915_request_put(rq); 396 } 397 398 heap_fence_put(wait); 399 heap_fence_put(submit); 400 401 if (err < 0) 402 break; 403 404 num_fences += count; 405 num_waits++; 406 407 cond_resched(); 408 } 409 410 atomic_long_add(num_fences, &t->num_fences); 411 atomic_long_add(num_waits, &t->num_waits); 412 413 kfree(order); 414 out_requests: 415 kfree(requests); 416 return err; 417 } 418 419 static int mock_breadcrumbs_smoketest(void *arg) 420 { 421 struct drm_i915_private *i915 = arg; 422 struct smoketest t = { 423 .engine = i915->engine[RCS0], 424 .ncontexts = 1024, 425 .max_batch = 1024, 426 .request_alloc = __mock_request_alloc 427 }; 428 unsigned int ncpus = num_online_cpus(); 429 struct task_struct **threads; 430 unsigned int n; 431 int ret = 0; 432 433 /* 434 * Smoketest our breadcrumb/signal handling for requests across multiple 435 * threads. A very simple test to only catch the most egregious of bugs. 436 * See __igt_breadcrumbs_smoketest(); 437 */ 438 439 threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL); 440 if (!threads) 441 return -ENOMEM; 442 443 t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 444 if (!t.contexts) { 445 ret = -ENOMEM; 446 goto out_threads; 447 } 448 449 for (n = 0; n < t.ncontexts; n++) { 450 t.contexts[n] = mock_context(t.engine->i915, "mock"); 451 if (!t.contexts[n]) { 452 ret = -ENOMEM; 453 goto out_contexts; 454 } 455 } 456 457 for (n = 0; n < ncpus; n++) { 458 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 459 &t, "igt/%d", n); 460 if (IS_ERR(threads[n])) { 461 ret = PTR_ERR(threads[n]); 462 ncpus = n; 463 break; 464 } 465 466 get_task_struct(threads[n]); 467 } 468 469 yield(); /* start all threads before we begin */ 470 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 471 472 for (n = 0; n < ncpus; n++) { 473 int err; 474 475 err = kthread_stop(threads[n]); 476 if (err < 0 && !ret) 477 ret = err; 478 479 put_task_struct(threads[n]); 480 } 481 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 482 atomic_long_read(&t.num_waits), 483 atomic_long_read(&t.num_fences), 484 ncpus); 485 486 out_contexts: 487 for (n = 0; n < t.ncontexts; n++) { 488 if (!t.contexts[n]) 489 break; 490 mock_context_close(t.contexts[n]); 491 } 492 kfree(t.contexts); 493 out_threads: 494 kfree(threads); 495 return ret; 496 } 497 498 int i915_request_mock_selftests(void) 499 { 500 static const struct i915_subtest tests[] = { 501 SUBTEST(igt_add_request), 502 SUBTEST(igt_wait_request), 503 SUBTEST(igt_fence_wait), 504 SUBTEST(igt_request_rewind), 505 SUBTEST(mock_breadcrumbs_smoketest), 506 }; 507 struct drm_i915_private *i915; 508 intel_wakeref_t wakeref; 509 int err = 0; 510 511 i915 = mock_gem_device(); 512 if (!i915) 513 return -ENOMEM; 514 515 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 516 err = i915_subtests(tests, i915); 517 518 drm_dev_put(&i915->drm); 519 520 return err; 521 } 522 523 static int live_nop_request(void *arg) 524 { 525 struct drm_i915_private *i915 = arg; 526 struct intel_engine_cs *engine; 527 struct igt_live_test t; 528 int err = -ENODEV; 529 530 /* 531 * Submit various sized batches of empty requests, to each engine 532 * (individually), and wait for the batch to complete. We can check 533 * the overhead of submitting requests to the hardware. 534 */ 535 536 for_each_uabi_engine(engine, i915) { 537 unsigned long n, prime; 538 IGT_TIMEOUT(end_time); 539 ktime_t times[2] = {}; 540 541 err = igt_live_test_begin(&t, i915, __func__, engine->name); 542 if (err) 543 return err; 544 545 intel_engine_pm_get(engine); 546 for_each_prime_number_from(prime, 1, 8192) { 547 struct i915_request *request = NULL; 548 549 times[1] = ktime_get_raw(); 550 551 for (n = 0; n < prime; n++) { 552 i915_request_put(request); 553 request = i915_request_create(engine->kernel_context); 554 if (IS_ERR(request)) 555 return PTR_ERR(request); 556 557 /* 558 * This space is left intentionally blank. 559 * 560 * We do not actually want to perform any 561 * action with this request, we just want 562 * to measure the latency in allocation 563 * and submission of our breadcrumbs - 564 * ensuring that the bare request is sufficient 565 * for the system to work (i.e. proper HEAD 566 * tracking of the rings, interrupt handling, 567 * etc). It also gives us the lowest bounds 568 * for latency. 569 */ 570 571 i915_request_get(request); 572 i915_request_add(request); 573 } 574 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 575 i915_request_put(request); 576 577 times[1] = ktime_sub(ktime_get_raw(), times[1]); 578 if (prime == 1) 579 times[0] = times[1]; 580 581 if (__igt_timeout(end_time, NULL)) 582 break; 583 } 584 intel_engine_pm_put(engine); 585 586 err = igt_live_test_end(&t); 587 if (err) 588 return err; 589 590 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 591 engine->name, 592 ktime_to_ns(times[0]), 593 prime, div64_u64(ktime_to_ns(times[1]), prime)); 594 } 595 596 return err; 597 } 598 599 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 600 { 601 struct drm_i915_gem_object *obj; 602 struct i915_vma *vma; 603 u32 *cmd; 604 int err; 605 606 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 607 if (IS_ERR(obj)) 608 return ERR_CAST(obj); 609 610 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 611 if (IS_ERR(cmd)) { 612 err = PTR_ERR(cmd); 613 goto err; 614 } 615 616 *cmd = MI_BATCH_BUFFER_END; 617 618 __i915_gem_object_flush_map(obj, 0, 64); 619 i915_gem_object_unpin_map(obj); 620 621 intel_gt_chipset_flush(&i915->gt); 622 623 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 624 if (IS_ERR(vma)) { 625 err = PTR_ERR(vma); 626 goto err; 627 } 628 629 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 630 if (err) 631 goto err; 632 633 /* Force the wait wait now to avoid including it in the benchmark */ 634 err = i915_vma_sync(vma); 635 if (err) 636 goto err_pin; 637 638 return vma; 639 640 err_pin: 641 i915_vma_unpin(vma); 642 err: 643 i915_gem_object_put(obj); 644 return ERR_PTR(err); 645 } 646 647 static struct i915_request * 648 empty_request(struct intel_engine_cs *engine, 649 struct i915_vma *batch) 650 { 651 struct i915_request *request; 652 int err; 653 654 request = i915_request_create(engine->kernel_context); 655 if (IS_ERR(request)) 656 return request; 657 658 err = engine->emit_bb_start(request, 659 batch->node.start, 660 batch->node.size, 661 I915_DISPATCH_SECURE); 662 if (err) 663 goto out_request; 664 665 i915_request_get(request); 666 out_request: 667 i915_request_add(request); 668 return err ? ERR_PTR(err) : request; 669 } 670 671 static int live_empty_request(void *arg) 672 { 673 struct drm_i915_private *i915 = arg; 674 struct intel_engine_cs *engine; 675 struct igt_live_test t; 676 struct i915_vma *batch; 677 int err = 0; 678 679 /* 680 * Submit various sized batches of empty requests, to each engine 681 * (individually), and wait for the batch to complete. We can check 682 * the overhead of submitting requests to the hardware. 683 */ 684 685 batch = empty_batch(i915); 686 if (IS_ERR(batch)) 687 return PTR_ERR(batch); 688 689 for_each_uabi_engine(engine, i915) { 690 IGT_TIMEOUT(end_time); 691 struct i915_request *request; 692 unsigned long n, prime; 693 ktime_t times[2] = {}; 694 695 err = igt_live_test_begin(&t, i915, __func__, engine->name); 696 if (err) 697 goto out_batch; 698 699 intel_engine_pm_get(engine); 700 701 /* Warmup / preload */ 702 request = empty_request(engine, batch); 703 if (IS_ERR(request)) { 704 err = PTR_ERR(request); 705 intel_engine_pm_put(engine); 706 goto out_batch; 707 } 708 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 709 710 for_each_prime_number_from(prime, 1, 8192) { 711 times[1] = ktime_get_raw(); 712 713 for (n = 0; n < prime; n++) { 714 i915_request_put(request); 715 request = empty_request(engine, batch); 716 if (IS_ERR(request)) { 717 err = PTR_ERR(request); 718 intel_engine_pm_put(engine); 719 goto out_batch; 720 } 721 } 722 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 723 724 times[1] = ktime_sub(ktime_get_raw(), times[1]); 725 if (prime == 1) 726 times[0] = times[1]; 727 728 if (__igt_timeout(end_time, NULL)) 729 break; 730 } 731 i915_request_put(request); 732 intel_engine_pm_put(engine); 733 734 err = igt_live_test_end(&t); 735 if (err) 736 goto out_batch; 737 738 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 739 engine->name, 740 ktime_to_ns(times[0]), 741 prime, div64_u64(ktime_to_ns(times[1]), prime)); 742 } 743 744 out_batch: 745 i915_vma_unpin(batch); 746 i915_vma_put(batch); 747 return err; 748 } 749 750 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 751 { 752 struct drm_i915_gem_object *obj; 753 const int gen = INTEL_GEN(i915); 754 struct i915_vma *vma; 755 u32 *cmd; 756 int err; 757 758 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 759 if (IS_ERR(obj)) 760 return ERR_CAST(obj); 761 762 vma = i915_vma_instance(obj, i915->gt.vm, NULL); 763 if (IS_ERR(vma)) { 764 err = PTR_ERR(vma); 765 goto err; 766 } 767 768 err = i915_vma_pin(vma, 0, 0, PIN_USER); 769 if (err) 770 goto err; 771 772 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 773 if (IS_ERR(cmd)) { 774 err = PTR_ERR(cmd); 775 goto err; 776 } 777 778 if (gen >= 8) { 779 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 780 *cmd++ = lower_32_bits(vma->node.start); 781 *cmd++ = upper_32_bits(vma->node.start); 782 } else if (gen >= 6) { 783 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 784 *cmd++ = lower_32_bits(vma->node.start); 785 } else { 786 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 787 *cmd++ = lower_32_bits(vma->node.start); 788 } 789 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 790 791 __i915_gem_object_flush_map(obj, 0, 64); 792 i915_gem_object_unpin_map(obj); 793 794 intel_gt_chipset_flush(&i915->gt); 795 796 return vma; 797 798 err: 799 i915_gem_object_put(obj); 800 return ERR_PTR(err); 801 } 802 803 static int recursive_batch_resolve(struct i915_vma *batch) 804 { 805 u32 *cmd; 806 807 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 808 if (IS_ERR(cmd)) 809 return PTR_ERR(cmd); 810 811 *cmd = MI_BATCH_BUFFER_END; 812 intel_gt_chipset_flush(batch->vm->gt); 813 814 i915_gem_object_unpin_map(batch->obj); 815 816 return 0; 817 } 818 819 static int live_all_engines(void *arg) 820 { 821 struct drm_i915_private *i915 = arg; 822 const unsigned int nengines = num_uabi_engines(i915); 823 struct intel_engine_cs *engine; 824 struct i915_request **request; 825 struct igt_live_test t; 826 struct i915_vma *batch; 827 unsigned int idx; 828 int err; 829 830 /* 831 * Check we can submit requests to all engines simultaneously. We 832 * send a recursive batch to each engine - checking that we don't 833 * block doing so, and that they don't complete too soon. 834 */ 835 836 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 837 if (!request) 838 return -ENOMEM; 839 840 err = igt_live_test_begin(&t, i915, __func__, ""); 841 if (err) 842 goto out_free; 843 844 batch = recursive_batch(i915); 845 if (IS_ERR(batch)) { 846 err = PTR_ERR(batch); 847 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 848 goto out_free; 849 } 850 851 idx = 0; 852 for_each_uabi_engine(engine, i915) { 853 request[idx] = intel_engine_create_kernel_request(engine); 854 if (IS_ERR(request[idx])) { 855 err = PTR_ERR(request[idx]); 856 pr_err("%s: Request allocation failed with err=%d\n", 857 __func__, err); 858 goto out_request; 859 } 860 861 err = engine->emit_bb_start(request[idx], 862 batch->node.start, 863 batch->node.size, 864 0); 865 GEM_BUG_ON(err); 866 request[idx]->batch = batch; 867 868 i915_vma_lock(batch); 869 err = i915_request_await_object(request[idx], batch->obj, 0); 870 if (err == 0) 871 err = i915_vma_move_to_active(batch, request[idx], 0); 872 i915_vma_unlock(batch); 873 GEM_BUG_ON(err); 874 875 i915_request_get(request[idx]); 876 i915_request_add(request[idx]); 877 idx++; 878 } 879 880 idx = 0; 881 for_each_uabi_engine(engine, i915) { 882 if (i915_request_completed(request[idx])) { 883 pr_err("%s(%s): request completed too early!\n", 884 __func__, engine->name); 885 err = -EINVAL; 886 goto out_request; 887 } 888 idx++; 889 } 890 891 err = recursive_batch_resolve(batch); 892 if (err) { 893 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 894 goto out_request; 895 } 896 897 idx = 0; 898 for_each_uabi_engine(engine, i915) { 899 long timeout; 900 901 timeout = i915_request_wait(request[idx], 0, 902 MAX_SCHEDULE_TIMEOUT); 903 if (timeout < 0) { 904 err = timeout; 905 pr_err("%s: error waiting for request on %s, err=%d\n", 906 __func__, engine->name, err); 907 goto out_request; 908 } 909 910 GEM_BUG_ON(!i915_request_completed(request[idx])); 911 i915_request_put(request[idx]); 912 request[idx] = NULL; 913 idx++; 914 } 915 916 err = igt_live_test_end(&t); 917 918 out_request: 919 idx = 0; 920 for_each_uabi_engine(engine, i915) { 921 if (request[idx]) 922 i915_request_put(request[idx]); 923 idx++; 924 } 925 i915_vma_unpin(batch); 926 i915_vma_put(batch); 927 out_free: 928 kfree(request); 929 return err; 930 } 931 932 static int live_sequential_engines(void *arg) 933 { 934 struct drm_i915_private *i915 = arg; 935 const unsigned int nengines = num_uabi_engines(i915); 936 struct i915_request **request; 937 struct i915_request *prev = NULL; 938 struct intel_engine_cs *engine; 939 struct igt_live_test t; 940 unsigned int idx; 941 int err; 942 943 /* 944 * Check we can submit requests to all engines sequentially, such 945 * that each successive request waits for the earlier ones. This 946 * tests that we don't execute requests out of order, even though 947 * they are running on independent engines. 948 */ 949 950 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 951 if (!request) 952 return -ENOMEM; 953 954 err = igt_live_test_begin(&t, i915, __func__, ""); 955 if (err) 956 goto out_free; 957 958 idx = 0; 959 for_each_uabi_engine(engine, i915) { 960 struct i915_vma *batch; 961 962 batch = recursive_batch(i915); 963 if (IS_ERR(batch)) { 964 err = PTR_ERR(batch); 965 pr_err("%s: Unable to create batch for %s, err=%d\n", 966 __func__, engine->name, err); 967 goto out_free; 968 } 969 970 request[idx] = intel_engine_create_kernel_request(engine); 971 if (IS_ERR(request[idx])) { 972 err = PTR_ERR(request[idx]); 973 pr_err("%s: Request allocation failed for %s with err=%d\n", 974 __func__, engine->name, err); 975 goto out_request; 976 } 977 978 if (prev) { 979 err = i915_request_await_dma_fence(request[idx], 980 &prev->fence); 981 if (err) { 982 i915_request_add(request[idx]); 983 pr_err("%s: Request await failed for %s with err=%d\n", 984 __func__, engine->name, err); 985 goto out_request; 986 } 987 } 988 989 err = engine->emit_bb_start(request[idx], 990 batch->node.start, 991 batch->node.size, 992 0); 993 GEM_BUG_ON(err); 994 request[idx]->batch = batch; 995 996 i915_vma_lock(batch); 997 err = i915_request_await_object(request[idx], 998 batch->obj, false); 999 if (err == 0) 1000 err = i915_vma_move_to_active(batch, request[idx], 0); 1001 i915_vma_unlock(batch); 1002 GEM_BUG_ON(err); 1003 1004 i915_request_get(request[idx]); 1005 i915_request_add(request[idx]); 1006 1007 prev = request[idx]; 1008 idx++; 1009 } 1010 1011 idx = 0; 1012 for_each_uabi_engine(engine, i915) { 1013 long timeout; 1014 1015 if (i915_request_completed(request[idx])) { 1016 pr_err("%s(%s): request completed too early!\n", 1017 __func__, engine->name); 1018 err = -EINVAL; 1019 goto out_request; 1020 } 1021 1022 err = recursive_batch_resolve(request[idx]->batch); 1023 if (err) { 1024 pr_err("%s: failed to resolve batch, err=%d\n", 1025 __func__, err); 1026 goto out_request; 1027 } 1028 1029 timeout = i915_request_wait(request[idx], 0, 1030 MAX_SCHEDULE_TIMEOUT); 1031 if (timeout < 0) { 1032 err = timeout; 1033 pr_err("%s: error waiting for request on %s, err=%d\n", 1034 __func__, engine->name, err); 1035 goto out_request; 1036 } 1037 1038 GEM_BUG_ON(!i915_request_completed(request[idx])); 1039 idx++; 1040 } 1041 1042 err = igt_live_test_end(&t); 1043 1044 out_request: 1045 idx = 0; 1046 for_each_uabi_engine(engine, i915) { 1047 u32 *cmd; 1048 1049 if (!request[idx]) 1050 break; 1051 1052 cmd = i915_gem_object_pin_map(request[idx]->batch->obj, 1053 I915_MAP_WC); 1054 if (!IS_ERR(cmd)) { 1055 *cmd = MI_BATCH_BUFFER_END; 1056 intel_gt_chipset_flush(engine->gt); 1057 1058 i915_gem_object_unpin_map(request[idx]->batch->obj); 1059 } 1060 1061 i915_vma_put(request[idx]->batch); 1062 i915_request_put(request[idx]); 1063 idx++; 1064 } 1065 out_free: 1066 kfree(request); 1067 return err; 1068 } 1069 1070 static int __live_parallel_engine1(void *arg) 1071 { 1072 struct intel_engine_cs *engine = arg; 1073 IGT_TIMEOUT(end_time); 1074 unsigned long count; 1075 int err = 0; 1076 1077 count = 0; 1078 intel_engine_pm_get(engine); 1079 do { 1080 struct i915_request *rq; 1081 1082 rq = i915_request_create(engine->kernel_context); 1083 if (IS_ERR(rq)) { 1084 err = PTR_ERR(rq); 1085 break; 1086 } 1087 1088 i915_request_get(rq); 1089 i915_request_add(rq); 1090 1091 err = 0; 1092 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1093 err = -ETIME; 1094 i915_request_put(rq); 1095 if (err) 1096 break; 1097 1098 count++; 1099 } while (!__igt_timeout(end_time, NULL)); 1100 intel_engine_pm_put(engine); 1101 1102 pr_info("%s: %lu request + sync\n", engine->name, count); 1103 return err; 1104 } 1105 1106 static int __live_parallel_engineN(void *arg) 1107 { 1108 struct intel_engine_cs *engine = arg; 1109 IGT_TIMEOUT(end_time); 1110 unsigned long count; 1111 int err = 0; 1112 1113 count = 0; 1114 intel_engine_pm_get(engine); 1115 do { 1116 struct i915_request *rq; 1117 1118 rq = i915_request_create(engine->kernel_context); 1119 if (IS_ERR(rq)) { 1120 err = PTR_ERR(rq); 1121 break; 1122 } 1123 1124 i915_request_add(rq); 1125 count++; 1126 } while (!__igt_timeout(end_time, NULL)); 1127 intel_engine_pm_put(engine); 1128 1129 pr_info("%s: %lu requests\n", engine->name, count); 1130 return err; 1131 } 1132 1133 static bool wake_all(struct drm_i915_private *i915) 1134 { 1135 if (atomic_dec_and_test(&i915->selftest.counter)) { 1136 wake_up_var(&i915->selftest.counter); 1137 return true; 1138 } 1139 1140 return false; 1141 } 1142 1143 static int wait_for_all(struct drm_i915_private *i915) 1144 { 1145 if (wake_all(i915)) 1146 return 0; 1147 1148 if (wait_var_event_timeout(&i915->selftest.counter, 1149 !atomic_read(&i915->selftest.counter), 1150 i915_selftest.timeout_jiffies)) 1151 return 0; 1152 1153 return -ETIME; 1154 } 1155 1156 static int __live_parallel_spin(void *arg) 1157 { 1158 struct intel_engine_cs *engine = arg; 1159 struct igt_spinner spin; 1160 struct i915_request *rq; 1161 int err = 0; 1162 1163 /* 1164 * Create a spinner running for eternity on each engine. If a second 1165 * spinner is incorrectly placed on the same engine, it will not be 1166 * able to start in time. 1167 */ 1168 1169 if (igt_spinner_init(&spin, engine->gt)) { 1170 wake_all(engine->i915); 1171 return -ENOMEM; 1172 } 1173 1174 intel_engine_pm_get(engine); 1175 rq = igt_spinner_create_request(&spin, 1176 engine->kernel_context, 1177 MI_NOOP); /* no preemption */ 1178 intel_engine_pm_put(engine); 1179 if (IS_ERR(rq)) { 1180 err = PTR_ERR(rq); 1181 if (err == -ENODEV) 1182 err = 0; 1183 wake_all(engine->i915); 1184 goto out_spin; 1185 } 1186 1187 i915_request_get(rq); 1188 i915_request_add(rq); 1189 if (igt_wait_for_spinner(&spin, rq)) { 1190 /* Occupy this engine for the whole test */ 1191 err = wait_for_all(engine->i915); 1192 } else { 1193 pr_err("Failed to start spinner on %s\n", engine->name); 1194 err = -EINVAL; 1195 } 1196 igt_spinner_end(&spin); 1197 1198 if (err == 0 && i915_request_wait(rq, 0, HZ / 5) < 0) 1199 err = -EIO; 1200 i915_request_put(rq); 1201 1202 out_spin: 1203 igt_spinner_fini(&spin); 1204 return err; 1205 } 1206 1207 static int live_parallel_engines(void *arg) 1208 { 1209 struct drm_i915_private *i915 = arg; 1210 static int (* const func[])(void *arg) = { 1211 __live_parallel_engine1, 1212 __live_parallel_engineN, 1213 __live_parallel_spin, 1214 NULL, 1215 }; 1216 const unsigned int nengines = num_uabi_engines(i915); 1217 struct intel_engine_cs *engine; 1218 int (* const *fn)(void *arg); 1219 struct task_struct **tsk; 1220 int err = 0; 1221 1222 /* 1223 * Check we can submit requests to all engines concurrently. This 1224 * tests that we load up the system maximally. 1225 */ 1226 1227 tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); 1228 if (!tsk) 1229 return -ENOMEM; 1230 1231 for (fn = func; !err && *fn; fn++) { 1232 char name[KSYM_NAME_LEN]; 1233 struct igt_live_test t; 1234 unsigned int idx; 1235 1236 snprintf(name, sizeof(name), "%pS", fn); 1237 err = igt_live_test_begin(&t, i915, __func__, name); 1238 if (err) 1239 break; 1240 1241 atomic_set(&i915->selftest.counter, nengines); 1242 1243 idx = 0; 1244 for_each_uabi_engine(engine, i915) { 1245 tsk[idx] = kthread_run(*fn, engine, 1246 "igt/parallel:%s", 1247 engine->name); 1248 if (IS_ERR(tsk[idx])) { 1249 err = PTR_ERR(tsk[idx]); 1250 break; 1251 } 1252 get_task_struct(tsk[idx++]); 1253 } 1254 1255 yield(); /* start all threads before we kthread_stop() */ 1256 1257 idx = 0; 1258 for_each_uabi_engine(engine, i915) { 1259 int status; 1260 1261 if (IS_ERR(tsk[idx])) 1262 break; 1263 1264 status = kthread_stop(tsk[idx]); 1265 if (status && !err) 1266 err = status; 1267 1268 put_task_struct(tsk[idx++]); 1269 } 1270 1271 if (igt_live_test_end(&t)) 1272 err = -EIO; 1273 } 1274 1275 kfree(tsk); 1276 return err; 1277 } 1278 1279 static int 1280 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1281 { 1282 struct i915_request *rq; 1283 int ret; 1284 1285 /* 1286 * Before execlists, all contexts share the same ringbuffer. With 1287 * execlists, each context/engine has a separate ringbuffer and 1288 * for the purposes of this test, inexhaustible. 1289 * 1290 * For the global ringbuffer though, we have to be very careful 1291 * that we do not wrap while preventing the execution of requests 1292 * with a unsignaled fence. 1293 */ 1294 if (HAS_EXECLISTS(ctx->i915)) 1295 return INT_MAX; 1296 1297 rq = igt_request_alloc(ctx, engine); 1298 if (IS_ERR(rq)) { 1299 ret = PTR_ERR(rq); 1300 } else { 1301 int sz; 1302 1303 ret = rq->ring->size - rq->reserved_space; 1304 i915_request_add(rq); 1305 1306 sz = rq->ring->emit - rq->head; 1307 if (sz < 0) 1308 sz += rq->ring->size; 1309 ret /= sz; 1310 ret /= 2; /* leave half spare, in case of emergency! */ 1311 } 1312 1313 return ret; 1314 } 1315 1316 static int live_breadcrumbs_smoketest(void *arg) 1317 { 1318 struct drm_i915_private *i915 = arg; 1319 const unsigned int nengines = num_uabi_engines(i915); 1320 const unsigned int ncpus = num_online_cpus(); 1321 unsigned long num_waits, num_fences; 1322 struct intel_engine_cs *engine; 1323 struct task_struct **threads; 1324 struct igt_live_test live; 1325 intel_wakeref_t wakeref; 1326 struct smoketest *smoke; 1327 unsigned int n, idx; 1328 struct file *file; 1329 int ret = 0; 1330 1331 /* 1332 * Smoketest our breadcrumb/signal handling for requests across multiple 1333 * threads. A very simple test to only catch the most egregious of bugs. 1334 * See __igt_breadcrumbs_smoketest(); 1335 * 1336 * On real hardware this time. 1337 */ 1338 1339 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1340 1341 file = mock_file(i915); 1342 if (IS_ERR(file)) { 1343 ret = PTR_ERR(file); 1344 goto out_rpm; 1345 } 1346 1347 smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL); 1348 if (!smoke) { 1349 ret = -ENOMEM; 1350 goto out_file; 1351 } 1352 1353 threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL); 1354 if (!threads) { 1355 ret = -ENOMEM; 1356 goto out_smoke; 1357 } 1358 1359 smoke[0].request_alloc = __live_request_alloc; 1360 smoke[0].ncontexts = 64; 1361 smoke[0].contexts = kcalloc(smoke[0].ncontexts, 1362 sizeof(*smoke[0].contexts), 1363 GFP_KERNEL); 1364 if (!smoke[0].contexts) { 1365 ret = -ENOMEM; 1366 goto out_threads; 1367 } 1368 1369 for (n = 0; n < smoke[0].ncontexts; n++) { 1370 smoke[0].contexts[n] = live_context(i915, file); 1371 if (!smoke[0].contexts[n]) { 1372 ret = -ENOMEM; 1373 goto out_contexts; 1374 } 1375 } 1376 1377 ret = igt_live_test_begin(&live, i915, __func__, ""); 1378 if (ret) 1379 goto out_contexts; 1380 1381 idx = 0; 1382 for_each_uabi_engine(engine, i915) { 1383 smoke[idx] = smoke[0]; 1384 smoke[idx].engine = engine; 1385 smoke[idx].max_batch = 1386 max_batches(smoke[0].contexts[0], engine); 1387 if (smoke[idx].max_batch < 0) { 1388 ret = smoke[idx].max_batch; 1389 goto out_flush; 1390 } 1391 /* One ring interleaved between requests from all cpus */ 1392 smoke[idx].max_batch /= num_online_cpus() + 1; 1393 pr_debug("Limiting batches to %d requests on %s\n", 1394 smoke[idx].max_batch, engine->name); 1395 1396 for (n = 0; n < ncpus; n++) { 1397 struct task_struct *tsk; 1398 1399 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1400 &smoke[idx], "igt/%d.%d", idx, n); 1401 if (IS_ERR(tsk)) { 1402 ret = PTR_ERR(tsk); 1403 goto out_flush; 1404 } 1405 1406 get_task_struct(tsk); 1407 threads[idx * ncpus + n] = tsk; 1408 } 1409 1410 idx++; 1411 } 1412 1413 yield(); /* start all threads before we begin */ 1414 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1415 1416 out_flush: 1417 idx = 0; 1418 num_waits = 0; 1419 num_fences = 0; 1420 for_each_uabi_engine(engine, i915) { 1421 for (n = 0; n < ncpus; n++) { 1422 struct task_struct *tsk = threads[idx * ncpus + n]; 1423 int err; 1424 1425 if (!tsk) 1426 continue; 1427 1428 err = kthread_stop(tsk); 1429 if (err < 0 && !ret) 1430 ret = err; 1431 1432 put_task_struct(tsk); 1433 } 1434 1435 num_waits += atomic_long_read(&smoke[idx].num_waits); 1436 num_fences += atomic_long_read(&smoke[idx].num_fences); 1437 idx++; 1438 } 1439 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1440 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); 1441 1442 ret = igt_live_test_end(&live) ?: ret; 1443 out_contexts: 1444 kfree(smoke[0].contexts); 1445 out_threads: 1446 kfree(threads); 1447 out_smoke: 1448 kfree(smoke); 1449 out_file: 1450 fput(file); 1451 out_rpm: 1452 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1453 1454 return ret; 1455 } 1456 1457 int i915_request_live_selftests(struct drm_i915_private *i915) 1458 { 1459 static const struct i915_subtest tests[] = { 1460 SUBTEST(live_nop_request), 1461 SUBTEST(live_all_engines), 1462 SUBTEST(live_sequential_engines), 1463 SUBTEST(live_parallel_engines), 1464 SUBTEST(live_empty_request), 1465 SUBTEST(live_breadcrumbs_smoketest), 1466 }; 1467 1468 if (intel_gt_is_wedged(&i915->gt)) 1469 return 0; 1470 1471 return i915_subtests(tests, i915); 1472 } 1473