1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 27 #include "gem/i915_gem_pm.h" 28 #include "gem/selftests/mock_context.h" 29 30 #include "gt/intel_gt.h" 31 32 #include "i915_random.h" 33 #include "i915_selftest.h" 34 #include "igt_live_test.h" 35 #include "lib_sw_fence.h" 36 37 #include "mock_drm.h" 38 #include "mock_gem_device.h" 39 40 static unsigned int num_uabi_engines(struct drm_i915_private *i915) 41 { 42 struct intel_engine_cs *engine; 43 unsigned int count; 44 45 count = 0; 46 for_each_uabi_engine(engine, i915) 47 count++; 48 49 return count; 50 } 51 52 static int igt_add_request(void *arg) 53 { 54 struct drm_i915_private *i915 = arg; 55 struct i915_request *request; 56 57 /* Basic preliminary test to create a request and let it loose! */ 58 59 request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10); 60 if (!request) 61 return -ENOMEM; 62 63 i915_request_add(request); 64 65 return 0; 66 } 67 68 static int igt_wait_request(void *arg) 69 { 70 const long T = HZ / 4; 71 struct drm_i915_private *i915 = arg; 72 struct i915_request *request; 73 int err = -EINVAL; 74 75 /* Submit a request, then wait upon it */ 76 77 request = mock_request(i915->engine[RCS0]->kernel_context, T); 78 if (!request) 79 return -ENOMEM; 80 81 i915_request_get(request); 82 83 if (i915_request_wait(request, 0, 0) != -ETIME) { 84 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 85 goto out_request; 86 } 87 88 if (i915_request_wait(request, 0, T) != -ETIME) { 89 pr_err("request wait succeeded (expected timeout before submit!)\n"); 90 goto out_request; 91 } 92 93 if (i915_request_completed(request)) { 94 pr_err("request completed before submit!!\n"); 95 goto out_request; 96 } 97 98 i915_request_add(request); 99 100 if (i915_request_wait(request, 0, 0) != -ETIME) { 101 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 102 goto out_request; 103 } 104 105 if (i915_request_completed(request)) { 106 pr_err("request completed immediately!\n"); 107 goto out_request; 108 } 109 110 if (i915_request_wait(request, 0, T / 2) != -ETIME) { 111 pr_err("request wait succeeded (expected timeout!)\n"); 112 goto out_request; 113 } 114 115 if (i915_request_wait(request, 0, T) == -ETIME) { 116 pr_err("request wait timed out!\n"); 117 goto out_request; 118 } 119 120 if (!i915_request_completed(request)) { 121 pr_err("request not complete after waiting!\n"); 122 goto out_request; 123 } 124 125 if (i915_request_wait(request, 0, T) == -ETIME) { 126 pr_err("request wait timed out when already complete!\n"); 127 goto out_request; 128 } 129 130 err = 0; 131 out_request: 132 i915_request_put(request); 133 mock_device_flush(i915); 134 return err; 135 } 136 137 static int igt_fence_wait(void *arg) 138 { 139 const long T = HZ / 4; 140 struct drm_i915_private *i915 = arg; 141 struct i915_request *request; 142 int err = -EINVAL; 143 144 /* Submit a request, treat it as a fence and wait upon it */ 145 146 request = mock_request(i915->engine[RCS0]->kernel_context, T); 147 if (!request) 148 return -ENOMEM; 149 150 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 151 pr_err("fence wait success before submit (expected timeout)!\n"); 152 goto out; 153 } 154 155 i915_request_add(request); 156 157 if (dma_fence_is_signaled(&request->fence)) { 158 pr_err("fence signaled immediately!\n"); 159 goto out; 160 } 161 162 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 163 pr_err("fence wait success after submit (expected timeout)!\n"); 164 goto out; 165 } 166 167 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 168 pr_err("fence wait timed out (expected success)!\n"); 169 goto out; 170 } 171 172 if (!dma_fence_is_signaled(&request->fence)) { 173 pr_err("fence unsignaled after waiting!\n"); 174 goto out; 175 } 176 177 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 178 pr_err("fence wait timed out when complete (expected success)!\n"); 179 goto out; 180 } 181 182 err = 0; 183 out: 184 mock_device_flush(i915); 185 return err; 186 } 187 188 static int igt_request_rewind(void *arg) 189 { 190 struct drm_i915_private *i915 = arg; 191 struct i915_request *request, *vip; 192 struct i915_gem_context *ctx[2]; 193 struct intel_context *ce; 194 int err = -EINVAL; 195 196 ctx[0] = mock_context(i915, "A"); 197 198 ce = i915_gem_context_get_engine(ctx[0], RCS0); 199 GEM_BUG_ON(IS_ERR(ce)); 200 request = mock_request(ce, 2 * HZ); 201 intel_context_put(ce); 202 if (!request) { 203 err = -ENOMEM; 204 goto err_context_0; 205 } 206 207 i915_request_get(request); 208 i915_request_add(request); 209 210 ctx[1] = mock_context(i915, "B"); 211 212 ce = i915_gem_context_get_engine(ctx[1], RCS0); 213 GEM_BUG_ON(IS_ERR(ce)); 214 vip = mock_request(ce, 0); 215 intel_context_put(ce); 216 if (!vip) { 217 err = -ENOMEM; 218 goto err_context_1; 219 } 220 221 /* Simulate preemption by manual reordering */ 222 if (!mock_cancel_request(request)) { 223 pr_err("failed to cancel request (already executed)!\n"); 224 i915_request_add(vip); 225 goto err_context_1; 226 } 227 i915_request_get(vip); 228 i915_request_add(vip); 229 rcu_read_lock(); 230 request->engine->submit_request(request); 231 rcu_read_unlock(); 232 233 234 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 235 pr_err("timed out waiting for high priority request\n"); 236 goto err; 237 } 238 239 if (i915_request_completed(request)) { 240 pr_err("low priority request already completed\n"); 241 goto err; 242 } 243 244 err = 0; 245 err: 246 i915_request_put(vip); 247 err_context_1: 248 mock_context_close(ctx[1]); 249 i915_request_put(request); 250 err_context_0: 251 mock_context_close(ctx[0]); 252 mock_device_flush(i915); 253 return err; 254 } 255 256 struct smoketest { 257 struct intel_engine_cs *engine; 258 struct i915_gem_context **contexts; 259 atomic_long_t num_waits, num_fences; 260 int ncontexts, max_batch; 261 struct i915_request *(*request_alloc)(struct intel_context *ce); 262 }; 263 264 static struct i915_request * 265 __mock_request_alloc(struct intel_context *ce) 266 { 267 return mock_request(ce, 0); 268 } 269 270 static struct i915_request * 271 __live_request_alloc(struct intel_context *ce) 272 { 273 return intel_context_create_request(ce); 274 } 275 276 static int __igt_breadcrumbs_smoketest(void *arg) 277 { 278 struct smoketest *t = arg; 279 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 280 const unsigned int total = 4 * t->ncontexts + 1; 281 unsigned int num_waits = 0, num_fences = 0; 282 struct i915_request **requests; 283 I915_RND_STATE(prng); 284 unsigned int *order; 285 int err = 0; 286 287 /* 288 * A very simple test to catch the most egregious of list handling bugs. 289 * 290 * At its heart, we simply create oodles of requests running across 291 * multiple kthreads and enable signaling on them, for the sole purpose 292 * of stressing our breadcrumb handling. The only inspection we do is 293 * that the fences were marked as signaled. 294 */ 295 296 requests = kcalloc(total, sizeof(*requests), GFP_KERNEL); 297 if (!requests) 298 return -ENOMEM; 299 300 order = i915_random_order(total, &prng); 301 if (!order) { 302 err = -ENOMEM; 303 goto out_requests; 304 } 305 306 while (!kthread_should_stop()) { 307 struct i915_sw_fence *submit, *wait; 308 unsigned int n, count; 309 310 submit = heap_fence_create(GFP_KERNEL); 311 if (!submit) { 312 err = -ENOMEM; 313 break; 314 } 315 316 wait = heap_fence_create(GFP_KERNEL); 317 if (!wait) { 318 i915_sw_fence_commit(submit); 319 heap_fence_put(submit); 320 err = ENOMEM; 321 break; 322 } 323 324 i915_random_reorder(order, total, &prng); 325 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 326 327 for (n = 0; n < count; n++) { 328 struct i915_gem_context *ctx = 329 t->contexts[order[n] % t->ncontexts]; 330 struct i915_request *rq; 331 struct intel_context *ce; 332 333 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); 334 GEM_BUG_ON(IS_ERR(ce)); 335 rq = t->request_alloc(ce); 336 intel_context_put(ce); 337 if (IS_ERR(rq)) { 338 err = PTR_ERR(rq); 339 count = n; 340 break; 341 } 342 343 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 344 submit, 345 GFP_KERNEL); 346 347 requests[n] = i915_request_get(rq); 348 i915_request_add(rq); 349 350 if (err >= 0) 351 err = i915_sw_fence_await_dma_fence(wait, 352 &rq->fence, 353 0, 354 GFP_KERNEL); 355 356 if (err < 0) { 357 i915_request_put(rq); 358 count = n; 359 break; 360 } 361 } 362 363 i915_sw_fence_commit(submit); 364 i915_sw_fence_commit(wait); 365 366 if (!wait_event_timeout(wait->wait, 367 i915_sw_fence_done(wait), 368 5 * HZ)) { 369 struct i915_request *rq = requests[count - 1]; 370 371 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", 372 atomic_read(&wait->pending), count, 373 rq->fence.context, rq->fence.seqno, 374 t->engine->name); 375 GEM_TRACE_DUMP(); 376 377 intel_gt_set_wedged(t->engine->gt); 378 GEM_BUG_ON(!i915_request_completed(rq)); 379 i915_sw_fence_wait(wait); 380 err = -EIO; 381 } 382 383 for (n = 0; n < count; n++) { 384 struct i915_request *rq = requests[n]; 385 386 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 387 &rq->fence.flags)) { 388 pr_err("%llu:%llu was not signaled!\n", 389 rq->fence.context, rq->fence.seqno); 390 err = -EINVAL; 391 } 392 393 i915_request_put(rq); 394 } 395 396 heap_fence_put(wait); 397 heap_fence_put(submit); 398 399 if (err < 0) 400 break; 401 402 num_fences += count; 403 num_waits++; 404 405 cond_resched(); 406 } 407 408 atomic_long_add(num_fences, &t->num_fences); 409 atomic_long_add(num_waits, &t->num_waits); 410 411 kfree(order); 412 out_requests: 413 kfree(requests); 414 return err; 415 } 416 417 static int mock_breadcrumbs_smoketest(void *arg) 418 { 419 struct drm_i915_private *i915 = arg; 420 struct smoketest t = { 421 .engine = i915->engine[RCS0], 422 .ncontexts = 1024, 423 .max_batch = 1024, 424 .request_alloc = __mock_request_alloc 425 }; 426 unsigned int ncpus = num_online_cpus(); 427 struct task_struct **threads; 428 unsigned int n; 429 int ret = 0; 430 431 /* 432 * Smoketest our breadcrumb/signal handling for requests across multiple 433 * threads. A very simple test to only catch the most egregious of bugs. 434 * See __igt_breadcrumbs_smoketest(); 435 */ 436 437 threads = kcalloc(ncpus, sizeof(*threads), GFP_KERNEL); 438 if (!threads) 439 return -ENOMEM; 440 441 t.contexts = kcalloc(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 442 if (!t.contexts) { 443 ret = -ENOMEM; 444 goto out_threads; 445 } 446 447 for (n = 0; n < t.ncontexts; n++) { 448 t.contexts[n] = mock_context(t.engine->i915, "mock"); 449 if (!t.contexts[n]) { 450 ret = -ENOMEM; 451 goto out_contexts; 452 } 453 } 454 455 for (n = 0; n < ncpus; n++) { 456 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 457 &t, "igt/%d", n); 458 if (IS_ERR(threads[n])) { 459 ret = PTR_ERR(threads[n]); 460 ncpus = n; 461 break; 462 } 463 464 get_task_struct(threads[n]); 465 } 466 467 yield(); /* start all threads before we begin */ 468 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 469 470 for (n = 0; n < ncpus; n++) { 471 int err; 472 473 err = kthread_stop(threads[n]); 474 if (err < 0 && !ret) 475 ret = err; 476 477 put_task_struct(threads[n]); 478 } 479 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 480 atomic_long_read(&t.num_waits), 481 atomic_long_read(&t.num_fences), 482 ncpus); 483 484 out_contexts: 485 for (n = 0; n < t.ncontexts; n++) { 486 if (!t.contexts[n]) 487 break; 488 mock_context_close(t.contexts[n]); 489 } 490 kfree(t.contexts); 491 out_threads: 492 kfree(threads); 493 return ret; 494 } 495 496 int i915_request_mock_selftests(void) 497 { 498 static const struct i915_subtest tests[] = { 499 SUBTEST(igt_add_request), 500 SUBTEST(igt_wait_request), 501 SUBTEST(igt_fence_wait), 502 SUBTEST(igt_request_rewind), 503 SUBTEST(mock_breadcrumbs_smoketest), 504 }; 505 struct drm_i915_private *i915; 506 intel_wakeref_t wakeref; 507 int err = 0; 508 509 i915 = mock_gem_device(); 510 if (!i915) 511 return -ENOMEM; 512 513 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 514 err = i915_subtests(tests, i915); 515 516 drm_dev_put(&i915->drm); 517 518 return err; 519 } 520 521 static int live_nop_request(void *arg) 522 { 523 struct drm_i915_private *i915 = arg; 524 struct intel_engine_cs *engine; 525 struct igt_live_test t; 526 int err = -ENODEV; 527 528 /* 529 * Submit various sized batches of empty requests, to each engine 530 * (individually), and wait for the batch to complete. We can check 531 * the overhead of submitting requests to the hardware. 532 */ 533 534 for_each_uabi_engine(engine, i915) { 535 unsigned long n, prime; 536 IGT_TIMEOUT(end_time); 537 ktime_t times[2] = {}; 538 539 err = igt_live_test_begin(&t, i915, __func__, engine->name); 540 if (err) 541 return err; 542 543 for_each_prime_number_from(prime, 1, 8192) { 544 struct i915_request *request = NULL; 545 546 times[1] = ktime_get_raw(); 547 548 for (n = 0; n < prime; n++) { 549 i915_request_put(request); 550 request = i915_request_create(engine->kernel_context); 551 if (IS_ERR(request)) 552 return PTR_ERR(request); 553 554 /* 555 * This space is left intentionally blank. 556 * 557 * We do not actually want to perform any 558 * action with this request, we just want 559 * to measure the latency in allocation 560 * and submission of our breadcrumbs - 561 * ensuring that the bare request is sufficient 562 * for the system to work (i.e. proper HEAD 563 * tracking of the rings, interrupt handling, 564 * etc). It also gives us the lowest bounds 565 * for latency. 566 */ 567 568 i915_request_get(request); 569 i915_request_add(request); 570 } 571 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 572 i915_request_put(request); 573 574 times[1] = ktime_sub(ktime_get_raw(), times[1]); 575 if (prime == 1) 576 times[0] = times[1]; 577 578 if (__igt_timeout(end_time, NULL)) 579 break; 580 } 581 582 err = igt_live_test_end(&t); 583 if (err) 584 return err; 585 586 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 587 engine->name, 588 ktime_to_ns(times[0]), 589 prime, div64_u64(ktime_to_ns(times[1]), prime)); 590 } 591 592 return err; 593 } 594 595 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 596 { 597 struct drm_i915_gem_object *obj; 598 struct i915_vma *vma; 599 u32 *cmd; 600 int err; 601 602 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 603 if (IS_ERR(obj)) 604 return ERR_CAST(obj); 605 606 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 607 if (IS_ERR(cmd)) { 608 err = PTR_ERR(cmd); 609 goto err; 610 } 611 612 *cmd = MI_BATCH_BUFFER_END; 613 614 __i915_gem_object_flush_map(obj, 0, 64); 615 i915_gem_object_unpin_map(obj); 616 617 intel_gt_chipset_flush(&i915->gt); 618 619 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 620 if (IS_ERR(vma)) { 621 err = PTR_ERR(vma); 622 goto err; 623 } 624 625 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 626 if (err) 627 goto err; 628 629 /* Force the wait wait now to avoid including it in the benchmark */ 630 err = i915_vma_sync(vma); 631 if (err) 632 goto err_pin; 633 634 return vma; 635 636 err_pin: 637 i915_vma_unpin(vma); 638 err: 639 i915_gem_object_put(obj); 640 return ERR_PTR(err); 641 } 642 643 static struct i915_request * 644 empty_request(struct intel_engine_cs *engine, 645 struct i915_vma *batch) 646 { 647 struct i915_request *request; 648 int err; 649 650 request = i915_request_create(engine->kernel_context); 651 if (IS_ERR(request)) 652 return request; 653 654 err = engine->emit_bb_start(request, 655 batch->node.start, 656 batch->node.size, 657 I915_DISPATCH_SECURE); 658 if (err) 659 goto out_request; 660 661 i915_request_get(request); 662 out_request: 663 i915_request_add(request); 664 return err ? ERR_PTR(err) : request; 665 } 666 667 static int live_empty_request(void *arg) 668 { 669 struct drm_i915_private *i915 = arg; 670 struct intel_engine_cs *engine; 671 struct igt_live_test t; 672 struct i915_vma *batch; 673 int err = 0; 674 675 /* 676 * Submit various sized batches of empty requests, to each engine 677 * (individually), and wait for the batch to complete. We can check 678 * the overhead of submitting requests to the hardware. 679 */ 680 681 batch = empty_batch(i915); 682 if (IS_ERR(batch)) 683 return PTR_ERR(batch); 684 685 for_each_uabi_engine(engine, i915) { 686 IGT_TIMEOUT(end_time); 687 struct i915_request *request; 688 unsigned long n, prime; 689 ktime_t times[2] = {}; 690 691 err = igt_live_test_begin(&t, i915, __func__, engine->name); 692 if (err) 693 goto out_batch; 694 695 /* Warmup / preload */ 696 request = empty_request(engine, batch); 697 if (IS_ERR(request)) { 698 err = PTR_ERR(request); 699 goto out_batch; 700 } 701 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 702 703 for_each_prime_number_from(prime, 1, 8192) { 704 times[1] = ktime_get_raw(); 705 706 for (n = 0; n < prime; n++) { 707 i915_request_put(request); 708 request = empty_request(engine, batch); 709 if (IS_ERR(request)) { 710 err = PTR_ERR(request); 711 goto out_batch; 712 } 713 } 714 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 715 716 times[1] = ktime_sub(ktime_get_raw(), times[1]); 717 if (prime == 1) 718 times[0] = times[1]; 719 720 if (__igt_timeout(end_time, NULL)) 721 break; 722 } 723 i915_request_put(request); 724 725 err = igt_live_test_end(&t); 726 if (err) 727 goto out_batch; 728 729 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 730 engine->name, 731 ktime_to_ns(times[0]), 732 prime, div64_u64(ktime_to_ns(times[1]), prime)); 733 } 734 735 out_batch: 736 i915_vma_unpin(batch); 737 i915_vma_put(batch); 738 return err; 739 } 740 741 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 742 { 743 struct i915_gem_context *ctx = i915->kernel_context; 744 struct drm_i915_gem_object *obj; 745 const int gen = INTEL_GEN(i915); 746 struct i915_address_space *vm; 747 struct i915_vma *vma; 748 u32 *cmd; 749 int err; 750 751 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 752 if (IS_ERR(obj)) 753 return ERR_CAST(obj); 754 755 vm = i915_gem_context_get_vm_rcu(ctx); 756 vma = i915_vma_instance(obj, vm, NULL); 757 i915_vm_put(vm); 758 if (IS_ERR(vma)) { 759 err = PTR_ERR(vma); 760 goto err; 761 } 762 763 err = i915_vma_pin(vma, 0, 0, PIN_USER); 764 if (err) 765 goto err; 766 767 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 768 if (IS_ERR(cmd)) { 769 err = PTR_ERR(cmd); 770 goto err; 771 } 772 773 if (gen >= 8) { 774 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 775 *cmd++ = lower_32_bits(vma->node.start); 776 *cmd++ = upper_32_bits(vma->node.start); 777 } else if (gen >= 6) { 778 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 779 *cmd++ = lower_32_bits(vma->node.start); 780 } else { 781 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 782 *cmd++ = lower_32_bits(vma->node.start); 783 } 784 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 785 786 __i915_gem_object_flush_map(obj, 0, 64); 787 i915_gem_object_unpin_map(obj); 788 789 intel_gt_chipset_flush(&i915->gt); 790 791 return vma; 792 793 err: 794 i915_gem_object_put(obj); 795 return ERR_PTR(err); 796 } 797 798 static int recursive_batch_resolve(struct i915_vma *batch) 799 { 800 u32 *cmd; 801 802 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 803 if (IS_ERR(cmd)) 804 return PTR_ERR(cmd); 805 806 *cmd = MI_BATCH_BUFFER_END; 807 intel_gt_chipset_flush(batch->vm->gt); 808 809 i915_gem_object_unpin_map(batch->obj); 810 811 return 0; 812 } 813 814 static int live_all_engines(void *arg) 815 { 816 struct drm_i915_private *i915 = arg; 817 const unsigned int nengines = num_uabi_engines(i915); 818 struct intel_engine_cs *engine; 819 struct i915_request **request; 820 struct igt_live_test t; 821 struct i915_vma *batch; 822 unsigned int idx; 823 int err; 824 825 /* 826 * Check we can submit requests to all engines simultaneously. We 827 * send a recursive batch to each engine - checking that we don't 828 * block doing so, and that they don't complete too soon. 829 */ 830 831 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 832 if (!request) 833 return -ENOMEM; 834 835 err = igt_live_test_begin(&t, i915, __func__, ""); 836 if (err) 837 goto out_free; 838 839 batch = recursive_batch(i915); 840 if (IS_ERR(batch)) { 841 err = PTR_ERR(batch); 842 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 843 goto out_free; 844 } 845 846 idx = 0; 847 for_each_uabi_engine(engine, i915) { 848 request[idx] = i915_request_create(engine->kernel_context); 849 if (IS_ERR(request[idx])) { 850 err = PTR_ERR(request[idx]); 851 pr_err("%s: Request allocation failed with err=%d\n", 852 __func__, err); 853 goto out_request; 854 } 855 856 err = engine->emit_bb_start(request[idx], 857 batch->node.start, 858 batch->node.size, 859 0); 860 GEM_BUG_ON(err); 861 request[idx]->batch = batch; 862 863 i915_vma_lock(batch); 864 err = i915_request_await_object(request[idx], batch->obj, 0); 865 if (err == 0) 866 err = i915_vma_move_to_active(batch, request[idx], 0); 867 i915_vma_unlock(batch); 868 GEM_BUG_ON(err); 869 870 i915_request_get(request[idx]); 871 i915_request_add(request[idx]); 872 idx++; 873 } 874 875 idx = 0; 876 for_each_uabi_engine(engine, i915) { 877 if (i915_request_completed(request[idx])) { 878 pr_err("%s(%s): request completed too early!\n", 879 __func__, engine->name); 880 err = -EINVAL; 881 goto out_request; 882 } 883 idx++; 884 } 885 886 err = recursive_batch_resolve(batch); 887 if (err) { 888 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 889 goto out_request; 890 } 891 892 idx = 0; 893 for_each_uabi_engine(engine, i915) { 894 long timeout; 895 896 timeout = i915_request_wait(request[idx], 0, 897 MAX_SCHEDULE_TIMEOUT); 898 if (timeout < 0) { 899 err = timeout; 900 pr_err("%s: error waiting for request on %s, err=%d\n", 901 __func__, engine->name, err); 902 goto out_request; 903 } 904 905 GEM_BUG_ON(!i915_request_completed(request[idx])); 906 i915_request_put(request[idx]); 907 request[idx] = NULL; 908 idx++; 909 } 910 911 err = igt_live_test_end(&t); 912 913 out_request: 914 idx = 0; 915 for_each_uabi_engine(engine, i915) { 916 if (request[idx]) 917 i915_request_put(request[idx]); 918 idx++; 919 } 920 i915_vma_unpin(batch); 921 i915_vma_put(batch); 922 out_free: 923 kfree(request); 924 return err; 925 } 926 927 static int live_sequential_engines(void *arg) 928 { 929 struct drm_i915_private *i915 = arg; 930 const unsigned int nengines = num_uabi_engines(i915); 931 struct i915_request **request; 932 struct i915_request *prev = NULL; 933 struct intel_engine_cs *engine; 934 struct igt_live_test t; 935 unsigned int idx; 936 int err; 937 938 /* 939 * Check we can submit requests to all engines sequentially, such 940 * that each successive request waits for the earlier ones. This 941 * tests that we don't execute requests out of order, even though 942 * they are running on independent engines. 943 */ 944 945 request = kcalloc(nengines, sizeof(*request), GFP_KERNEL); 946 if (!request) 947 return -ENOMEM; 948 949 err = igt_live_test_begin(&t, i915, __func__, ""); 950 if (err) 951 goto out_free; 952 953 idx = 0; 954 for_each_uabi_engine(engine, i915) { 955 struct i915_vma *batch; 956 957 batch = recursive_batch(i915); 958 if (IS_ERR(batch)) { 959 err = PTR_ERR(batch); 960 pr_err("%s: Unable to create batch for %s, err=%d\n", 961 __func__, engine->name, err); 962 goto out_free; 963 } 964 965 request[idx] = i915_request_create(engine->kernel_context); 966 if (IS_ERR(request[idx])) { 967 err = PTR_ERR(request[idx]); 968 pr_err("%s: Request allocation failed for %s with err=%d\n", 969 __func__, engine->name, err); 970 goto out_request; 971 } 972 973 if (prev) { 974 err = i915_request_await_dma_fence(request[idx], 975 &prev->fence); 976 if (err) { 977 i915_request_add(request[idx]); 978 pr_err("%s: Request await failed for %s with err=%d\n", 979 __func__, engine->name, err); 980 goto out_request; 981 } 982 } 983 984 err = engine->emit_bb_start(request[idx], 985 batch->node.start, 986 batch->node.size, 987 0); 988 GEM_BUG_ON(err); 989 request[idx]->batch = batch; 990 991 i915_vma_lock(batch); 992 err = i915_request_await_object(request[idx], 993 batch->obj, false); 994 if (err == 0) 995 err = i915_vma_move_to_active(batch, request[idx], 0); 996 i915_vma_unlock(batch); 997 GEM_BUG_ON(err); 998 999 i915_request_get(request[idx]); 1000 i915_request_add(request[idx]); 1001 1002 prev = request[idx]; 1003 idx++; 1004 } 1005 1006 idx = 0; 1007 for_each_uabi_engine(engine, i915) { 1008 long timeout; 1009 1010 if (i915_request_completed(request[idx])) { 1011 pr_err("%s(%s): request completed too early!\n", 1012 __func__, engine->name); 1013 err = -EINVAL; 1014 goto out_request; 1015 } 1016 1017 err = recursive_batch_resolve(request[idx]->batch); 1018 if (err) { 1019 pr_err("%s: failed to resolve batch, err=%d\n", 1020 __func__, err); 1021 goto out_request; 1022 } 1023 1024 timeout = i915_request_wait(request[idx], 0, 1025 MAX_SCHEDULE_TIMEOUT); 1026 if (timeout < 0) { 1027 err = timeout; 1028 pr_err("%s: error waiting for request on %s, err=%d\n", 1029 __func__, engine->name, err); 1030 goto out_request; 1031 } 1032 1033 GEM_BUG_ON(!i915_request_completed(request[idx])); 1034 idx++; 1035 } 1036 1037 err = igt_live_test_end(&t); 1038 1039 out_request: 1040 idx = 0; 1041 for_each_uabi_engine(engine, i915) { 1042 u32 *cmd; 1043 1044 if (!request[idx]) 1045 break; 1046 1047 cmd = i915_gem_object_pin_map(request[idx]->batch->obj, 1048 I915_MAP_WC); 1049 if (!IS_ERR(cmd)) { 1050 *cmd = MI_BATCH_BUFFER_END; 1051 intel_gt_chipset_flush(engine->gt); 1052 1053 i915_gem_object_unpin_map(request[idx]->batch->obj); 1054 } 1055 1056 i915_vma_put(request[idx]->batch); 1057 i915_request_put(request[idx]); 1058 idx++; 1059 } 1060 out_free: 1061 kfree(request); 1062 return err; 1063 } 1064 1065 static int __live_parallel_engine1(void *arg) 1066 { 1067 struct intel_engine_cs *engine = arg; 1068 IGT_TIMEOUT(end_time); 1069 unsigned long count; 1070 1071 count = 0; 1072 do { 1073 struct i915_request *rq; 1074 int err; 1075 1076 rq = i915_request_create(engine->kernel_context); 1077 if (IS_ERR(rq)) 1078 return PTR_ERR(rq); 1079 1080 i915_request_get(rq); 1081 i915_request_add(rq); 1082 1083 err = 0; 1084 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1085 err = -ETIME; 1086 i915_request_put(rq); 1087 if (err) 1088 return err; 1089 1090 count++; 1091 } while (!__igt_timeout(end_time, NULL)); 1092 1093 pr_info("%s: %lu request + sync\n", engine->name, count); 1094 return 0; 1095 } 1096 1097 static int __live_parallel_engineN(void *arg) 1098 { 1099 struct intel_engine_cs *engine = arg; 1100 IGT_TIMEOUT(end_time); 1101 unsigned long count; 1102 1103 count = 0; 1104 do { 1105 struct i915_request *rq; 1106 1107 rq = i915_request_create(engine->kernel_context); 1108 if (IS_ERR(rq)) 1109 return PTR_ERR(rq); 1110 1111 i915_request_add(rq); 1112 count++; 1113 } while (!__igt_timeout(end_time, NULL)); 1114 1115 pr_info("%s: %lu requests\n", engine->name, count); 1116 return 0; 1117 } 1118 1119 static int live_parallel_engines(void *arg) 1120 { 1121 struct drm_i915_private *i915 = arg; 1122 static int (* const func[])(void *arg) = { 1123 __live_parallel_engine1, 1124 __live_parallel_engineN, 1125 NULL, 1126 }; 1127 const unsigned int nengines = num_uabi_engines(i915); 1128 struct intel_engine_cs *engine; 1129 int (* const *fn)(void *arg); 1130 struct task_struct **tsk; 1131 int err = 0; 1132 1133 /* 1134 * Check we can submit requests to all engines concurrently. This 1135 * tests that we load up the system maximally. 1136 */ 1137 1138 tsk = kcalloc(nengines, sizeof(*tsk), GFP_KERNEL); 1139 if (!tsk) 1140 return -ENOMEM; 1141 1142 for (fn = func; !err && *fn; fn++) { 1143 struct igt_live_test t; 1144 unsigned int idx; 1145 1146 err = igt_live_test_begin(&t, i915, __func__, ""); 1147 if (err) 1148 break; 1149 1150 idx = 0; 1151 for_each_uabi_engine(engine, i915) { 1152 tsk[idx] = kthread_run(*fn, engine, 1153 "igt/parallel:%s", 1154 engine->name); 1155 if (IS_ERR(tsk[idx])) { 1156 err = PTR_ERR(tsk[idx]); 1157 break; 1158 } 1159 get_task_struct(tsk[idx++]); 1160 } 1161 1162 yield(); /* start all threads before we kthread_stop() */ 1163 1164 idx = 0; 1165 for_each_uabi_engine(engine, i915) { 1166 int status; 1167 1168 if (IS_ERR(tsk[idx])) 1169 break; 1170 1171 status = kthread_stop(tsk[idx]); 1172 if (status && !err) 1173 err = status; 1174 1175 put_task_struct(tsk[idx++]); 1176 } 1177 1178 if (igt_live_test_end(&t)) 1179 err = -EIO; 1180 } 1181 1182 kfree(tsk); 1183 return err; 1184 } 1185 1186 static int 1187 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1188 { 1189 struct i915_request *rq; 1190 int ret; 1191 1192 /* 1193 * Before execlists, all contexts share the same ringbuffer. With 1194 * execlists, each context/engine has a separate ringbuffer and 1195 * for the purposes of this test, inexhaustible. 1196 * 1197 * For the global ringbuffer though, we have to be very careful 1198 * that we do not wrap while preventing the execution of requests 1199 * with a unsignaled fence. 1200 */ 1201 if (HAS_EXECLISTS(ctx->i915)) 1202 return INT_MAX; 1203 1204 rq = igt_request_alloc(ctx, engine); 1205 if (IS_ERR(rq)) { 1206 ret = PTR_ERR(rq); 1207 } else { 1208 int sz; 1209 1210 ret = rq->ring->size - rq->reserved_space; 1211 i915_request_add(rq); 1212 1213 sz = rq->ring->emit - rq->head; 1214 if (sz < 0) 1215 sz += rq->ring->size; 1216 ret /= sz; 1217 ret /= 2; /* leave half spare, in case of emergency! */ 1218 } 1219 1220 return ret; 1221 } 1222 1223 static int live_breadcrumbs_smoketest(void *arg) 1224 { 1225 struct drm_i915_private *i915 = arg; 1226 const unsigned int nengines = num_uabi_engines(i915); 1227 const unsigned int ncpus = num_online_cpus(); 1228 unsigned long num_waits, num_fences; 1229 struct intel_engine_cs *engine; 1230 struct task_struct **threads; 1231 struct igt_live_test live; 1232 intel_wakeref_t wakeref; 1233 struct drm_file *file; 1234 struct smoketest *smoke; 1235 unsigned int n, idx; 1236 int ret = 0; 1237 1238 /* 1239 * Smoketest our breadcrumb/signal handling for requests across multiple 1240 * threads. A very simple test to only catch the most egregious of bugs. 1241 * See __igt_breadcrumbs_smoketest(); 1242 * 1243 * On real hardware this time. 1244 */ 1245 1246 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1247 1248 file = mock_file(i915); 1249 if (IS_ERR(file)) { 1250 ret = PTR_ERR(file); 1251 goto out_rpm; 1252 } 1253 1254 smoke = kcalloc(nengines, sizeof(*smoke), GFP_KERNEL); 1255 if (!smoke) { 1256 ret = -ENOMEM; 1257 goto out_file; 1258 } 1259 1260 threads = kcalloc(ncpus * nengines, sizeof(*threads), GFP_KERNEL); 1261 if (!threads) { 1262 ret = -ENOMEM; 1263 goto out_smoke; 1264 } 1265 1266 smoke[0].request_alloc = __live_request_alloc; 1267 smoke[0].ncontexts = 64; 1268 smoke[0].contexts = kcalloc(smoke[0].ncontexts, 1269 sizeof(*smoke[0].contexts), 1270 GFP_KERNEL); 1271 if (!smoke[0].contexts) { 1272 ret = -ENOMEM; 1273 goto out_threads; 1274 } 1275 1276 for (n = 0; n < smoke[0].ncontexts; n++) { 1277 smoke[0].contexts[n] = live_context(i915, file); 1278 if (!smoke[0].contexts[n]) { 1279 ret = -ENOMEM; 1280 goto out_contexts; 1281 } 1282 } 1283 1284 ret = igt_live_test_begin(&live, i915, __func__, ""); 1285 if (ret) 1286 goto out_contexts; 1287 1288 idx = 0; 1289 for_each_uabi_engine(engine, i915) { 1290 smoke[idx] = smoke[0]; 1291 smoke[idx].engine = engine; 1292 smoke[idx].max_batch = 1293 max_batches(smoke[0].contexts[0], engine); 1294 if (smoke[idx].max_batch < 0) { 1295 ret = smoke[idx].max_batch; 1296 goto out_flush; 1297 } 1298 /* One ring interleaved between requests from all cpus */ 1299 smoke[idx].max_batch /= num_online_cpus() + 1; 1300 pr_debug("Limiting batches to %d requests on %s\n", 1301 smoke[idx].max_batch, engine->name); 1302 1303 for (n = 0; n < ncpus; n++) { 1304 struct task_struct *tsk; 1305 1306 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1307 &smoke[idx], "igt/%d.%d", idx, n); 1308 if (IS_ERR(tsk)) { 1309 ret = PTR_ERR(tsk); 1310 goto out_flush; 1311 } 1312 1313 get_task_struct(tsk); 1314 threads[idx * ncpus + n] = tsk; 1315 } 1316 1317 idx++; 1318 } 1319 1320 yield(); /* start all threads before we begin */ 1321 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1322 1323 out_flush: 1324 idx = 0; 1325 num_waits = 0; 1326 num_fences = 0; 1327 for_each_uabi_engine(engine, i915) { 1328 for (n = 0; n < ncpus; n++) { 1329 struct task_struct *tsk = threads[idx * ncpus + n]; 1330 int err; 1331 1332 if (!tsk) 1333 continue; 1334 1335 err = kthread_stop(tsk); 1336 if (err < 0 && !ret) 1337 ret = err; 1338 1339 put_task_struct(tsk); 1340 } 1341 1342 num_waits += atomic_long_read(&smoke[idx].num_waits); 1343 num_fences += atomic_long_read(&smoke[idx].num_fences); 1344 idx++; 1345 } 1346 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1347 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); 1348 1349 ret = igt_live_test_end(&live) ?: ret; 1350 out_contexts: 1351 kfree(smoke[0].contexts); 1352 out_threads: 1353 kfree(threads); 1354 out_smoke: 1355 kfree(smoke); 1356 out_file: 1357 mock_file_free(i915, file); 1358 out_rpm: 1359 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1360 1361 return ret; 1362 } 1363 1364 int i915_request_live_selftests(struct drm_i915_private *i915) 1365 { 1366 static const struct i915_subtest tests[] = { 1367 SUBTEST(live_nop_request), 1368 SUBTEST(live_all_engines), 1369 SUBTEST(live_sequential_engines), 1370 SUBTEST(live_parallel_engines), 1371 SUBTEST(live_empty_request), 1372 SUBTEST(live_breadcrumbs_smoketest), 1373 }; 1374 1375 if (intel_gt_is_wedged(&i915->gt)) 1376 return 0; 1377 1378 return i915_subtests(tests, i915); 1379 } 1380