1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 27 #include "gem/i915_gem_pm.h" 28 #include "gem/selftests/mock_context.h" 29 30 #include "gt/intel_gt.h" 31 32 #include "i915_random.h" 33 #include "i915_selftest.h" 34 #include "igt_live_test.h" 35 #include "lib_sw_fence.h" 36 37 #include "mock_drm.h" 38 #include "mock_gem_device.h" 39 40 static int igt_add_request(void *arg) 41 { 42 struct drm_i915_private *i915 = arg; 43 struct i915_request *request; 44 45 /* Basic preliminary test to create a request and let it loose! */ 46 47 request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10); 48 if (!request) 49 return -ENOMEM; 50 51 i915_request_add(request); 52 53 return 0; 54 } 55 56 static int igt_wait_request(void *arg) 57 { 58 const long T = HZ / 4; 59 struct drm_i915_private *i915 = arg; 60 struct i915_request *request; 61 int err = -EINVAL; 62 63 /* Submit a request, then wait upon it */ 64 65 request = mock_request(i915->engine[RCS0]->kernel_context, T); 66 if (!request) 67 return -ENOMEM; 68 69 i915_request_get(request); 70 71 if (i915_request_wait(request, 0, 0) != -ETIME) { 72 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 73 goto out_request; 74 } 75 76 if (i915_request_wait(request, 0, T) != -ETIME) { 77 pr_err("request wait succeeded (expected timeout before submit!)\n"); 78 goto out_request; 79 } 80 81 if (i915_request_completed(request)) { 82 pr_err("request completed before submit!!\n"); 83 goto out_request; 84 } 85 86 i915_request_add(request); 87 88 if (i915_request_wait(request, 0, 0) != -ETIME) { 89 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 90 goto out_request; 91 } 92 93 if (i915_request_completed(request)) { 94 pr_err("request completed immediately!\n"); 95 goto out_request; 96 } 97 98 if (i915_request_wait(request, 0, T / 2) != -ETIME) { 99 pr_err("request wait succeeded (expected timeout!)\n"); 100 goto out_request; 101 } 102 103 if (i915_request_wait(request, 0, T) == -ETIME) { 104 pr_err("request wait timed out!\n"); 105 goto out_request; 106 } 107 108 if (!i915_request_completed(request)) { 109 pr_err("request not complete after waiting!\n"); 110 goto out_request; 111 } 112 113 if (i915_request_wait(request, 0, T) == -ETIME) { 114 pr_err("request wait timed out when already complete!\n"); 115 goto out_request; 116 } 117 118 err = 0; 119 out_request: 120 i915_request_put(request); 121 mock_device_flush(i915); 122 return err; 123 } 124 125 static int igt_fence_wait(void *arg) 126 { 127 const long T = HZ / 4; 128 struct drm_i915_private *i915 = arg; 129 struct i915_request *request; 130 int err = -EINVAL; 131 132 /* Submit a request, treat it as a fence and wait upon it */ 133 134 request = mock_request(i915->engine[RCS0]->kernel_context, T); 135 if (!request) 136 return -ENOMEM; 137 138 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 139 pr_err("fence wait success before submit (expected timeout)!\n"); 140 goto out; 141 } 142 143 i915_request_add(request); 144 145 if (dma_fence_is_signaled(&request->fence)) { 146 pr_err("fence signaled immediately!\n"); 147 goto out; 148 } 149 150 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 151 pr_err("fence wait success after submit (expected timeout)!\n"); 152 goto out; 153 } 154 155 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 156 pr_err("fence wait timed out (expected success)!\n"); 157 goto out; 158 } 159 160 if (!dma_fence_is_signaled(&request->fence)) { 161 pr_err("fence unsignaled after waiting!\n"); 162 goto out; 163 } 164 165 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 166 pr_err("fence wait timed out when complete (expected success)!\n"); 167 goto out; 168 } 169 170 err = 0; 171 out: 172 mock_device_flush(i915); 173 return err; 174 } 175 176 static int igt_request_rewind(void *arg) 177 { 178 struct drm_i915_private *i915 = arg; 179 struct i915_request *request, *vip; 180 struct i915_gem_context *ctx[2]; 181 struct intel_context *ce; 182 int err = -EINVAL; 183 184 ctx[0] = mock_context(i915, "A"); 185 186 ce = i915_gem_context_get_engine(ctx[0], RCS0); 187 GEM_BUG_ON(IS_ERR(ce)); 188 request = mock_request(ce, 2 * HZ); 189 intel_context_put(ce); 190 if (!request) { 191 err = -ENOMEM; 192 goto err_context_0; 193 } 194 195 i915_request_get(request); 196 i915_request_add(request); 197 198 ctx[1] = mock_context(i915, "B"); 199 200 ce = i915_gem_context_get_engine(ctx[1], RCS0); 201 GEM_BUG_ON(IS_ERR(ce)); 202 vip = mock_request(ce, 0); 203 intel_context_put(ce); 204 if (!vip) { 205 err = -ENOMEM; 206 goto err_context_1; 207 } 208 209 /* Simulate preemption by manual reordering */ 210 if (!mock_cancel_request(request)) { 211 pr_err("failed to cancel request (already executed)!\n"); 212 i915_request_add(vip); 213 goto err_context_1; 214 } 215 i915_request_get(vip); 216 i915_request_add(vip); 217 rcu_read_lock(); 218 request->engine->submit_request(request); 219 rcu_read_unlock(); 220 221 222 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 223 pr_err("timed out waiting for high priority request\n"); 224 goto err; 225 } 226 227 if (i915_request_completed(request)) { 228 pr_err("low priority request already completed\n"); 229 goto err; 230 } 231 232 err = 0; 233 err: 234 i915_request_put(vip); 235 err_context_1: 236 mock_context_close(ctx[1]); 237 i915_request_put(request); 238 err_context_0: 239 mock_context_close(ctx[0]); 240 mock_device_flush(i915); 241 return err; 242 } 243 244 struct smoketest { 245 struct intel_engine_cs *engine; 246 struct i915_gem_context **contexts; 247 atomic_long_t num_waits, num_fences; 248 int ncontexts, max_batch; 249 struct i915_request *(*request_alloc)(struct intel_context *ce); 250 }; 251 252 static struct i915_request * 253 __mock_request_alloc(struct intel_context *ce) 254 { 255 return mock_request(ce, 0); 256 } 257 258 static struct i915_request * 259 __live_request_alloc(struct intel_context *ce) 260 { 261 return intel_context_create_request(ce); 262 } 263 264 static int __igt_breadcrumbs_smoketest(void *arg) 265 { 266 struct smoketest *t = arg; 267 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 268 const unsigned int total = 4 * t->ncontexts + 1; 269 unsigned int num_waits = 0, num_fences = 0; 270 struct i915_request **requests; 271 I915_RND_STATE(prng); 272 unsigned int *order; 273 int err = 0; 274 275 /* 276 * A very simple test to catch the most egregious of list handling bugs. 277 * 278 * At its heart, we simply create oodles of requests running across 279 * multiple kthreads and enable signaling on them, for the sole purpose 280 * of stressing our breadcrumb handling. The only inspection we do is 281 * that the fences were marked as signaled. 282 */ 283 284 requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); 285 if (!requests) 286 return -ENOMEM; 287 288 order = i915_random_order(total, &prng); 289 if (!order) { 290 err = -ENOMEM; 291 goto out_requests; 292 } 293 294 while (!kthread_should_stop()) { 295 struct i915_sw_fence *submit, *wait; 296 unsigned int n, count; 297 298 submit = heap_fence_create(GFP_KERNEL); 299 if (!submit) { 300 err = -ENOMEM; 301 break; 302 } 303 304 wait = heap_fence_create(GFP_KERNEL); 305 if (!wait) { 306 i915_sw_fence_commit(submit); 307 heap_fence_put(submit); 308 err = ENOMEM; 309 break; 310 } 311 312 i915_random_reorder(order, total, &prng); 313 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 314 315 for (n = 0; n < count; n++) { 316 struct i915_gem_context *ctx = 317 t->contexts[order[n] % t->ncontexts]; 318 struct i915_request *rq; 319 struct intel_context *ce; 320 321 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); 322 GEM_BUG_ON(IS_ERR(ce)); 323 rq = t->request_alloc(ce); 324 intel_context_put(ce); 325 if (IS_ERR(rq)) { 326 err = PTR_ERR(rq); 327 count = n; 328 break; 329 } 330 331 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 332 submit, 333 GFP_KERNEL); 334 335 requests[n] = i915_request_get(rq); 336 i915_request_add(rq); 337 338 if (err >= 0) 339 err = i915_sw_fence_await_dma_fence(wait, 340 &rq->fence, 341 0, 342 GFP_KERNEL); 343 344 if (err < 0) { 345 i915_request_put(rq); 346 count = n; 347 break; 348 } 349 } 350 351 i915_sw_fence_commit(submit); 352 i915_sw_fence_commit(wait); 353 354 if (!wait_event_timeout(wait->wait, 355 i915_sw_fence_done(wait), 356 5 * HZ)) { 357 struct i915_request *rq = requests[count - 1]; 358 359 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", 360 atomic_read(&wait->pending), count, 361 rq->fence.context, rq->fence.seqno, 362 t->engine->name); 363 GEM_TRACE_DUMP(); 364 365 intel_gt_set_wedged(t->engine->gt); 366 GEM_BUG_ON(!i915_request_completed(rq)); 367 i915_sw_fence_wait(wait); 368 err = -EIO; 369 } 370 371 for (n = 0; n < count; n++) { 372 struct i915_request *rq = requests[n]; 373 374 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 375 &rq->fence.flags)) { 376 pr_err("%llu:%llu was not signaled!\n", 377 rq->fence.context, rq->fence.seqno); 378 err = -EINVAL; 379 } 380 381 i915_request_put(rq); 382 } 383 384 heap_fence_put(wait); 385 heap_fence_put(submit); 386 387 if (err < 0) 388 break; 389 390 num_fences += count; 391 num_waits++; 392 393 cond_resched(); 394 } 395 396 atomic_long_add(num_fences, &t->num_fences); 397 atomic_long_add(num_waits, &t->num_waits); 398 399 kfree(order); 400 out_requests: 401 kfree(requests); 402 return err; 403 } 404 405 static int mock_breadcrumbs_smoketest(void *arg) 406 { 407 struct drm_i915_private *i915 = arg; 408 struct smoketest t = { 409 .engine = i915->engine[RCS0], 410 .ncontexts = 1024, 411 .max_batch = 1024, 412 .request_alloc = __mock_request_alloc 413 }; 414 unsigned int ncpus = num_online_cpus(); 415 struct task_struct **threads; 416 unsigned int n; 417 int ret = 0; 418 419 /* 420 * Smoketest our breadcrumb/signal handling for requests across multiple 421 * threads. A very simple test to only catch the most egregious of bugs. 422 * See __igt_breadcrumbs_smoketest(); 423 */ 424 425 threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); 426 if (!threads) 427 return -ENOMEM; 428 429 t.contexts = 430 kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 431 if (!t.contexts) { 432 ret = -ENOMEM; 433 goto out_threads; 434 } 435 436 for (n = 0; n < t.ncontexts; n++) { 437 t.contexts[n] = mock_context(t.engine->i915, "mock"); 438 if (!t.contexts[n]) { 439 ret = -ENOMEM; 440 goto out_contexts; 441 } 442 } 443 444 for (n = 0; n < ncpus; n++) { 445 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 446 &t, "igt/%d", n); 447 if (IS_ERR(threads[n])) { 448 ret = PTR_ERR(threads[n]); 449 ncpus = n; 450 break; 451 } 452 453 get_task_struct(threads[n]); 454 } 455 456 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 457 458 for (n = 0; n < ncpus; n++) { 459 int err; 460 461 err = kthread_stop(threads[n]); 462 if (err < 0 && !ret) 463 ret = err; 464 465 put_task_struct(threads[n]); 466 } 467 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 468 atomic_long_read(&t.num_waits), 469 atomic_long_read(&t.num_fences), 470 ncpus); 471 472 out_contexts: 473 for (n = 0; n < t.ncontexts; n++) { 474 if (!t.contexts[n]) 475 break; 476 mock_context_close(t.contexts[n]); 477 } 478 kfree(t.contexts); 479 out_threads: 480 kfree(threads); 481 return ret; 482 } 483 484 int i915_request_mock_selftests(void) 485 { 486 static const struct i915_subtest tests[] = { 487 SUBTEST(igt_add_request), 488 SUBTEST(igt_wait_request), 489 SUBTEST(igt_fence_wait), 490 SUBTEST(igt_request_rewind), 491 SUBTEST(mock_breadcrumbs_smoketest), 492 }; 493 struct drm_i915_private *i915; 494 intel_wakeref_t wakeref; 495 int err = 0; 496 497 i915 = mock_gem_device(); 498 if (!i915) 499 return -ENOMEM; 500 501 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 502 err = i915_subtests(tests, i915); 503 504 drm_dev_put(&i915->drm); 505 506 return err; 507 } 508 509 static int live_nop_request(void *arg) 510 { 511 struct drm_i915_private *i915 = arg; 512 struct intel_engine_cs *engine; 513 struct igt_live_test t; 514 unsigned int id; 515 int err = -ENODEV; 516 517 /* Submit various sized batches of empty requests, to each engine 518 * (individually), and wait for the batch to complete. We can check 519 * the overhead of submitting requests to the hardware. 520 */ 521 522 for_each_engine(engine, i915, id) { 523 unsigned long n, prime; 524 IGT_TIMEOUT(end_time); 525 ktime_t times[2] = {}; 526 527 err = igt_live_test_begin(&t, i915, __func__, engine->name); 528 if (err) 529 return err; 530 531 for_each_prime_number_from(prime, 1, 8192) { 532 struct i915_request *request = NULL; 533 534 times[1] = ktime_get_raw(); 535 536 for (n = 0; n < prime; n++) { 537 i915_request_put(request); 538 request = i915_request_create(engine->kernel_context); 539 if (IS_ERR(request)) 540 return PTR_ERR(request); 541 542 /* This space is left intentionally blank. 543 * 544 * We do not actually want to perform any 545 * action with this request, we just want 546 * to measure the latency in allocation 547 * and submission of our breadcrumbs - 548 * ensuring that the bare request is sufficient 549 * for the system to work (i.e. proper HEAD 550 * tracking of the rings, interrupt handling, 551 * etc). It also gives us the lowest bounds 552 * for latency. 553 */ 554 555 i915_request_get(request); 556 i915_request_add(request); 557 } 558 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 559 i915_request_put(request); 560 561 times[1] = ktime_sub(ktime_get_raw(), times[1]); 562 if (prime == 1) 563 times[0] = times[1]; 564 565 if (__igt_timeout(end_time, NULL)) 566 break; 567 } 568 569 err = igt_live_test_end(&t); 570 if (err) 571 return err; 572 573 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 574 engine->name, 575 ktime_to_ns(times[0]), 576 prime, div64_u64(ktime_to_ns(times[1]), prime)); 577 } 578 579 return err; 580 } 581 582 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 583 { 584 struct drm_i915_gem_object *obj; 585 struct i915_vma *vma; 586 u32 *cmd; 587 int err; 588 589 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 590 if (IS_ERR(obj)) 591 return ERR_CAST(obj); 592 593 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 594 if (IS_ERR(cmd)) { 595 err = PTR_ERR(cmd); 596 goto err; 597 } 598 599 *cmd = MI_BATCH_BUFFER_END; 600 601 __i915_gem_object_flush_map(obj, 0, 64); 602 i915_gem_object_unpin_map(obj); 603 604 intel_gt_chipset_flush(&i915->gt); 605 606 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 607 if (IS_ERR(vma)) { 608 err = PTR_ERR(vma); 609 goto err; 610 } 611 612 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 613 if (err) 614 goto err; 615 616 /* Force the wait wait now to avoid including it in the benchmark */ 617 err = i915_vma_sync(vma); 618 if (err) 619 goto err_pin; 620 621 return vma; 622 623 err_pin: 624 i915_vma_unpin(vma); 625 err: 626 i915_gem_object_put(obj); 627 return ERR_PTR(err); 628 } 629 630 static struct i915_request * 631 empty_request(struct intel_engine_cs *engine, 632 struct i915_vma *batch) 633 { 634 struct i915_request *request; 635 int err; 636 637 request = i915_request_create(engine->kernel_context); 638 if (IS_ERR(request)) 639 return request; 640 641 err = engine->emit_bb_start(request, 642 batch->node.start, 643 batch->node.size, 644 I915_DISPATCH_SECURE); 645 if (err) 646 goto out_request; 647 648 i915_request_get(request); 649 out_request: 650 i915_request_add(request); 651 return err ? ERR_PTR(err) : request; 652 } 653 654 static int live_empty_request(void *arg) 655 { 656 struct drm_i915_private *i915 = arg; 657 struct intel_engine_cs *engine; 658 struct igt_live_test t; 659 struct i915_vma *batch; 660 unsigned int id; 661 int err = 0; 662 663 /* Submit various sized batches of empty requests, to each engine 664 * (individually), and wait for the batch to complete. We can check 665 * the overhead of submitting requests to the hardware. 666 */ 667 668 batch = empty_batch(i915); 669 if (IS_ERR(batch)) 670 return PTR_ERR(batch); 671 672 for_each_engine(engine, i915, id) { 673 IGT_TIMEOUT(end_time); 674 struct i915_request *request; 675 unsigned long n, prime; 676 ktime_t times[2] = {}; 677 678 err = igt_live_test_begin(&t, i915, __func__, engine->name); 679 if (err) 680 goto out_batch; 681 682 /* Warmup / preload */ 683 request = empty_request(engine, batch); 684 if (IS_ERR(request)) { 685 err = PTR_ERR(request); 686 goto out_batch; 687 } 688 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 689 690 for_each_prime_number_from(prime, 1, 8192) { 691 times[1] = ktime_get_raw(); 692 693 for (n = 0; n < prime; n++) { 694 i915_request_put(request); 695 request = empty_request(engine, batch); 696 if (IS_ERR(request)) { 697 err = PTR_ERR(request); 698 goto out_batch; 699 } 700 } 701 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 702 703 times[1] = ktime_sub(ktime_get_raw(), times[1]); 704 if (prime == 1) 705 times[0] = times[1]; 706 707 if (__igt_timeout(end_time, NULL)) 708 break; 709 } 710 i915_request_put(request); 711 712 err = igt_live_test_end(&t); 713 if (err) 714 goto out_batch; 715 716 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 717 engine->name, 718 ktime_to_ns(times[0]), 719 prime, div64_u64(ktime_to_ns(times[1]), prime)); 720 } 721 722 out_batch: 723 i915_vma_unpin(batch); 724 i915_vma_put(batch); 725 return err; 726 } 727 728 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 729 { 730 struct i915_gem_context *ctx = i915->kernel_context; 731 struct drm_i915_gem_object *obj; 732 const int gen = INTEL_GEN(i915); 733 struct i915_address_space *vm; 734 struct i915_vma *vma; 735 u32 *cmd; 736 int err; 737 738 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 739 if (IS_ERR(obj)) 740 return ERR_CAST(obj); 741 742 vm = i915_gem_context_get_vm_rcu(ctx); 743 vma = i915_vma_instance(obj, vm, NULL); 744 i915_vm_put(vm); 745 if (IS_ERR(vma)) { 746 err = PTR_ERR(vma); 747 goto err; 748 } 749 750 err = i915_vma_pin(vma, 0, 0, PIN_USER); 751 if (err) 752 goto err; 753 754 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 755 if (IS_ERR(cmd)) { 756 err = PTR_ERR(cmd); 757 goto err; 758 } 759 760 if (gen >= 8) { 761 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 762 *cmd++ = lower_32_bits(vma->node.start); 763 *cmd++ = upper_32_bits(vma->node.start); 764 } else if (gen >= 6) { 765 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 766 *cmd++ = lower_32_bits(vma->node.start); 767 } else { 768 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 769 *cmd++ = lower_32_bits(vma->node.start); 770 } 771 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 772 773 __i915_gem_object_flush_map(obj, 0, 64); 774 i915_gem_object_unpin_map(obj); 775 776 intel_gt_chipset_flush(&i915->gt); 777 778 return vma; 779 780 err: 781 i915_gem_object_put(obj); 782 return ERR_PTR(err); 783 } 784 785 static int recursive_batch_resolve(struct i915_vma *batch) 786 { 787 u32 *cmd; 788 789 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 790 if (IS_ERR(cmd)) 791 return PTR_ERR(cmd); 792 793 *cmd = MI_BATCH_BUFFER_END; 794 intel_gt_chipset_flush(batch->vm->gt); 795 796 i915_gem_object_unpin_map(batch->obj); 797 798 return 0; 799 } 800 801 static int live_all_engines(void *arg) 802 { 803 struct drm_i915_private *i915 = arg; 804 struct intel_engine_cs *engine; 805 struct i915_request *request[I915_NUM_ENGINES]; 806 struct igt_live_test t; 807 struct i915_vma *batch; 808 unsigned int id; 809 int err; 810 811 /* Check we can submit requests to all engines simultaneously. We 812 * send a recursive batch to each engine - checking that we don't 813 * block doing so, and that they don't complete too soon. 814 */ 815 816 err = igt_live_test_begin(&t, i915, __func__, ""); 817 if (err) 818 return err; 819 820 batch = recursive_batch(i915); 821 if (IS_ERR(batch)) { 822 err = PTR_ERR(batch); 823 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 824 return err; 825 } 826 827 for_each_engine(engine, i915, id) { 828 request[id] = i915_request_create(engine->kernel_context); 829 if (IS_ERR(request[id])) { 830 err = PTR_ERR(request[id]); 831 pr_err("%s: Request allocation failed with err=%d\n", 832 __func__, err); 833 goto out_request; 834 } 835 836 err = engine->emit_bb_start(request[id], 837 batch->node.start, 838 batch->node.size, 839 0); 840 GEM_BUG_ON(err); 841 request[id]->batch = batch; 842 843 i915_vma_lock(batch); 844 err = i915_request_await_object(request[id], batch->obj, 0); 845 if (err == 0) 846 err = i915_vma_move_to_active(batch, request[id], 0); 847 i915_vma_unlock(batch); 848 GEM_BUG_ON(err); 849 850 i915_request_get(request[id]); 851 i915_request_add(request[id]); 852 } 853 854 for_each_engine(engine, i915, id) { 855 if (i915_request_completed(request[id])) { 856 pr_err("%s(%s): request completed too early!\n", 857 __func__, engine->name); 858 err = -EINVAL; 859 goto out_request; 860 } 861 } 862 863 err = recursive_batch_resolve(batch); 864 if (err) { 865 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 866 goto out_request; 867 } 868 869 for_each_engine(engine, i915, id) { 870 long timeout; 871 872 timeout = i915_request_wait(request[id], 0, 873 MAX_SCHEDULE_TIMEOUT); 874 if (timeout < 0) { 875 err = timeout; 876 pr_err("%s: error waiting for request on %s, err=%d\n", 877 __func__, engine->name, err); 878 goto out_request; 879 } 880 881 GEM_BUG_ON(!i915_request_completed(request[id])); 882 i915_request_put(request[id]); 883 request[id] = NULL; 884 } 885 886 err = igt_live_test_end(&t); 887 888 out_request: 889 for_each_engine(engine, i915, id) 890 if (request[id]) 891 i915_request_put(request[id]); 892 i915_vma_unpin(batch); 893 i915_vma_put(batch); 894 return err; 895 } 896 897 static int live_sequential_engines(void *arg) 898 { 899 struct drm_i915_private *i915 = arg; 900 struct i915_request *request[I915_NUM_ENGINES] = {}; 901 struct i915_request *prev = NULL; 902 struct intel_engine_cs *engine; 903 struct igt_live_test t; 904 unsigned int id; 905 int err; 906 907 /* Check we can submit requests to all engines sequentially, such 908 * that each successive request waits for the earlier ones. This 909 * tests that we don't execute requests out of order, even though 910 * they are running on independent engines. 911 */ 912 913 err = igt_live_test_begin(&t, i915, __func__, ""); 914 if (err) 915 return err; 916 917 for_each_engine(engine, i915, id) { 918 struct i915_vma *batch; 919 920 batch = recursive_batch(i915); 921 if (IS_ERR(batch)) { 922 err = PTR_ERR(batch); 923 pr_err("%s: Unable to create batch for %s, err=%d\n", 924 __func__, engine->name, err); 925 return err; 926 } 927 928 request[id] = i915_request_create(engine->kernel_context); 929 if (IS_ERR(request[id])) { 930 err = PTR_ERR(request[id]); 931 pr_err("%s: Request allocation failed for %s with err=%d\n", 932 __func__, engine->name, err); 933 goto out_request; 934 } 935 936 if (prev) { 937 err = i915_request_await_dma_fence(request[id], 938 &prev->fence); 939 if (err) { 940 i915_request_add(request[id]); 941 pr_err("%s: Request await failed for %s with err=%d\n", 942 __func__, engine->name, err); 943 goto out_request; 944 } 945 } 946 947 err = engine->emit_bb_start(request[id], 948 batch->node.start, 949 batch->node.size, 950 0); 951 GEM_BUG_ON(err); 952 request[id]->batch = batch; 953 954 i915_vma_lock(batch); 955 err = i915_request_await_object(request[id], batch->obj, false); 956 if (err == 0) 957 err = i915_vma_move_to_active(batch, request[id], 0); 958 i915_vma_unlock(batch); 959 GEM_BUG_ON(err); 960 961 i915_request_get(request[id]); 962 i915_request_add(request[id]); 963 964 prev = request[id]; 965 } 966 967 for_each_engine(engine, i915, id) { 968 long timeout; 969 970 if (i915_request_completed(request[id])) { 971 pr_err("%s(%s): request completed too early!\n", 972 __func__, engine->name); 973 err = -EINVAL; 974 goto out_request; 975 } 976 977 err = recursive_batch_resolve(request[id]->batch); 978 if (err) { 979 pr_err("%s: failed to resolve batch, err=%d\n", 980 __func__, err); 981 goto out_request; 982 } 983 984 timeout = i915_request_wait(request[id], 0, 985 MAX_SCHEDULE_TIMEOUT); 986 if (timeout < 0) { 987 err = timeout; 988 pr_err("%s: error waiting for request on %s, err=%d\n", 989 __func__, engine->name, err); 990 goto out_request; 991 } 992 993 GEM_BUG_ON(!i915_request_completed(request[id])); 994 } 995 996 err = igt_live_test_end(&t); 997 998 out_request: 999 for_each_engine(engine, i915, id) { 1000 u32 *cmd; 1001 1002 if (!request[id]) 1003 break; 1004 1005 cmd = i915_gem_object_pin_map(request[id]->batch->obj, 1006 I915_MAP_WC); 1007 if (!IS_ERR(cmd)) { 1008 *cmd = MI_BATCH_BUFFER_END; 1009 intel_gt_chipset_flush(engine->gt); 1010 1011 i915_gem_object_unpin_map(request[id]->batch->obj); 1012 } 1013 1014 i915_vma_put(request[id]->batch); 1015 i915_request_put(request[id]); 1016 } 1017 return err; 1018 } 1019 1020 static int __live_parallel_engine1(void *arg) 1021 { 1022 struct intel_engine_cs *engine = arg; 1023 IGT_TIMEOUT(end_time); 1024 unsigned long count; 1025 1026 count = 0; 1027 do { 1028 struct i915_request *rq; 1029 int err; 1030 1031 rq = i915_request_create(engine->kernel_context); 1032 if (IS_ERR(rq)) 1033 return PTR_ERR(rq); 1034 1035 i915_request_get(rq); 1036 i915_request_add(rq); 1037 1038 err = 0; 1039 if (i915_request_wait(rq, 0, HZ / 5) < 0) 1040 err = -ETIME; 1041 i915_request_put(rq); 1042 if (err) 1043 return err; 1044 1045 count++; 1046 } while (!__igt_timeout(end_time, NULL)); 1047 1048 pr_info("%s: %lu request + sync\n", engine->name, count); 1049 return 0; 1050 } 1051 1052 static int __live_parallel_engineN(void *arg) 1053 { 1054 struct intel_engine_cs *engine = arg; 1055 IGT_TIMEOUT(end_time); 1056 unsigned long count; 1057 1058 count = 0; 1059 do { 1060 struct i915_request *rq; 1061 1062 rq = i915_request_create(engine->kernel_context); 1063 if (IS_ERR(rq)) 1064 return PTR_ERR(rq); 1065 1066 i915_request_add(rq); 1067 count++; 1068 } while (!__igt_timeout(end_time, NULL)); 1069 1070 pr_info("%s: %lu requests\n", engine->name, count); 1071 return 0; 1072 } 1073 1074 static int live_parallel_engines(void *arg) 1075 { 1076 struct drm_i915_private *i915 = arg; 1077 static int (* const func[])(void *arg) = { 1078 __live_parallel_engine1, 1079 __live_parallel_engineN, 1080 NULL, 1081 }; 1082 struct intel_engine_cs *engine; 1083 enum intel_engine_id id; 1084 int (* const *fn)(void *arg); 1085 int err = 0; 1086 1087 /* 1088 * Check we can submit requests to all engines concurrently. This 1089 * tests that we load up the system maximally. 1090 */ 1091 1092 for (fn = func; !err && *fn; fn++) { 1093 struct task_struct *tsk[I915_NUM_ENGINES] = {}; 1094 struct igt_live_test t; 1095 1096 err = igt_live_test_begin(&t, i915, __func__, ""); 1097 if (err) 1098 break; 1099 1100 for_each_engine(engine, i915, id) { 1101 tsk[id] = kthread_run(*fn, engine, 1102 "igt/parallel:%s", 1103 engine->name); 1104 if (IS_ERR(tsk[id])) { 1105 err = PTR_ERR(tsk[id]); 1106 break; 1107 } 1108 get_task_struct(tsk[id]); 1109 } 1110 1111 for_each_engine(engine, i915, id) { 1112 int status; 1113 1114 if (IS_ERR_OR_NULL(tsk[id])) 1115 continue; 1116 1117 status = kthread_stop(tsk[id]); 1118 if (status && !err) 1119 err = status; 1120 1121 put_task_struct(tsk[id]); 1122 } 1123 1124 if (igt_live_test_end(&t)) 1125 err = -EIO; 1126 } 1127 1128 return err; 1129 } 1130 1131 static int 1132 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1133 { 1134 struct i915_request *rq; 1135 int ret; 1136 1137 /* 1138 * Before execlists, all contexts share the same ringbuffer. With 1139 * execlists, each context/engine has a separate ringbuffer and 1140 * for the purposes of this test, inexhaustible. 1141 * 1142 * For the global ringbuffer though, we have to be very careful 1143 * that we do not wrap while preventing the execution of requests 1144 * with a unsignaled fence. 1145 */ 1146 if (HAS_EXECLISTS(ctx->i915)) 1147 return INT_MAX; 1148 1149 rq = igt_request_alloc(ctx, engine); 1150 if (IS_ERR(rq)) { 1151 ret = PTR_ERR(rq); 1152 } else { 1153 int sz; 1154 1155 ret = rq->ring->size - rq->reserved_space; 1156 i915_request_add(rq); 1157 1158 sz = rq->ring->emit - rq->head; 1159 if (sz < 0) 1160 sz += rq->ring->size; 1161 ret /= sz; 1162 ret /= 2; /* leave half spare, in case of emergency! */ 1163 } 1164 1165 return ret; 1166 } 1167 1168 static int live_breadcrumbs_smoketest(void *arg) 1169 { 1170 struct drm_i915_private *i915 = arg; 1171 struct smoketest t[I915_NUM_ENGINES]; 1172 unsigned int ncpus = num_online_cpus(); 1173 unsigned long num_waits, num_fences; 1174 struct intel_engine_cs *engine; 1175 struct task_struct **threads; 1176 struct igt_live_test live; 1177 enum intel_engine_id id; 1178 intel_wakeref_t wakeref; 1179 struct drm_file *file; 1180 unsigned int n; 1181 int ret = 0; 1182 1183 /* 1184 * Smoketest our breadcrumb/signal handling for requests across multiple 1185 * threads. A very simple test to only catch the most egregious of bugs. 1186 * See __igt_breadcrumbs_smoketest(); 1187 * 1188 * On real hardware this time. 1189 */ 1190 1191 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1192 1193 file = mock_file(i915); 1194 if (IS_ERR(file)) { 1195 ret = PTR_ERR(file); 1196 goto out_rpm; 1197 } 1198 1199 threads = kcalloc(ncpus * I915_NUM_ENGINES, 1200 sizeof(*threads), 1201 GFP_KERNEL); 1202 if (!threads) { 1203 ret = -ENOMEM; 1204 goto out_file; 1205 } 1206 1207 memset(&t[0], 0, sizeof(t[0])); 1208 t[0].request_alloc = __live_request_alloc; 1209 t[0].ncontexts = 64; 1210 t[0].contexts = kmalloc_array(t[0].ncontexts, 1211 sizeof(*t[0].contexts), 1212 GFP_KERNEL); 1213 if (!t[0].contexts) { 1214 ret = -ENOMEM; 1215 goto out_threads; 1216 } 1217 1218 for (n = 0; n < t[0].ncontexts; n++) { 1219 t[0].contexts[n] = live_context(i915, file); 1220 if (!t[0].contexts[n]) { 1221 ret = -ENOMEM; 1222 goto out_contexts; 1223 } 1224 } 1225 1226 ret = igt_live_test_begin(&live, i915, __func__, ""); 1227 if (ret) 1228 goto out_contexts; 1229 1230 for_each_engine(engine, i915, id) { 1231 t[id] = t[0]; 1232 t[id].engine = engine; 1233 t[id].max_batch = max_batches(t[0].contexts[0], engine); 1234 if (t[id].max_batch < 0) { 1235 ret = t[id].max_batch; 1236 goto out_flush; 1237 } 1238 /* One ring interleaved between requests from all cpus */ 1239 t[id].max_batch /= num_online_cpus() + 1; 1240 pr_debug("Limiting batches to %d requests on %s\n", 1241 t[id].max_batch, engine->name); 1242 1243 for (n = 0; n < ncpus; n++) { 1244 struct task_struct *tsk; 1245 1246 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1247 &t[id], "igt/%d.%d", id, n); 1248 if (IS_ERR(tsk)) { 1249 ret = PTR_ERR(tsk); 1250 goto out_flush; 1251 } 1252 1253 get_task_struct(tsk); 1254 threads[id * ncpus + n] = tsk; 1255 } 1256 } 1257 1258 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1259 1260 out_flush: 1261 num_waits = 0; 1262 num_fences = 0; 1263 for_each_engine(engine, i915, id) { 1264 for (n = 0; n < ncpus; n++) { 1265 struct task_struct *tsk = threads[id * ncpus + n]; 1266 int err; 1267 1268 if (!tsk) 1269 continue; 1270 1271 err = kthread_stop(tsk); 1272 if (err < 0 && !ret) 1273 ret = err; 1274 1275 put_task_struct(tsk); 1276 } 1277 1278 num_waits += atomic_long_read(&t[id].num_waits); 1279 num_fences += atomic_long_read(&t[id].num_fences); 1280 } 1281 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1282 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); 1283 1284 ret = igt_live_test_end(&live) ?: ret; 1285 out_contexts: 1286 kfree(t[0].contexts); 1287 out_threads: 1288 kfree(threads); 1289 out_file: 1290 mock_file_free(i915, file); 1291 out_rpm: 1292 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1293 1294 return ret; 1295 } 1296 1297 int i915_request_live_selftests(struct drm_i915_private *i915) 1298 { 1299 static const struct i915_subtest tests[] = { 1300 SUBTEST(live_nop_request), 1301 SUBTEST(live_all_engines), 1302 SUBTEST(live_sequential_engines), 1303 SUBTEST(live_parallel_engines), 1304 SUBTEST(live_empty_request), 1305 SUBTEST(live_breadcrumbs_smoketest), 1306 }; 1307 1308 if (intel_gt_is_wedged(&i915->gt)) 1309 return 0; 1310 1311 return i915_subtests(tests, i915); 1312 } 1313