1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 27 #include "gem/i915_gem_pm.h" 28 #include "gem/selftests/mock_context.h" 29 30 #include "gt/intel_gt.h" 31 32 #include "i915_random.h" 33 #include "i915_selftest.h" 34 #include "igt_live_test.h" 35 #include "lib_sw_fence.h" 36 37 #include "mock_drm.h" 38 #include "mock_gem_device.h" 39 40 static int igt_add_request(void *arg) 41 { 42 struct drm_i915_private *i915 = arg; 43 struct i915_request *request; 44 int err = -ENOMEM; 45 46 /* Basic preliminary test to create a request and let it loose! */ 47 48 mutex_lock(&i915->drm.struct_mutex); 49 request = mock_request(i915->engine[RCS0]->kernel_context, HZ / 10); 50 if (!request) 51 goto out_unlock; 52 53 i915_request_add(request); 54 55 err = 0; 56 out_unlock: 57 mutex_unlock(&i915->drm.struct_mutex); 58 return err; 59 } 60 61 static int igt_wait_request(void *arg) 62 { 63 const long T = HZ / 4; 64 struct drm_i915_private *i915 = arg; 65 struct i915_request *request; 66 int err = -EINVAL; 67 68 /* Submit a request, then wait upon it */ 69 70 mutex_lock(&i915->drm.struct_mutex); 71 request = mock_request(i915->engine[RCS0]->kernel_context, T); 72 if (!request) { 73 err = -ENOMEM; 74 goto out_unlock; 75 } 76 i915_request_get(request); 77 78 if (i915_request_wait(request, 0, 0) != -ETIME) { 79 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 80 goto out_request; 81 } 82 83 if (i915_request_wait(request, 0, T) != -ETIME) { 84 pr_err("request wait succeeded (expected timeout before submit!)\n"); 85 goto out_request; 86 } 87 88 if (i915_request_completed(request)) { 89 pr_err("request completed before submit!!\n"); 90 goto out_request; 91 } 92 93 i915_request_add(request); 94 95 if (i915_request_wait(request, 0, 0) != -ETIME) { 96 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 97 goto out_request; 98 } 99 100 if (i915_request_completed(request)) { 101 pr_err("request completed immediately!\n"); 102 goto out_request; 103 } 104 105 if (i915_request_wait(request, 0, T / 2) != -ETIME) { 106 pr_err("request wait succeeded (expected timeout!)\n"); 107 goto out_request; 108 } 109 110 if (i915_request_wait(request, 0, T) == -ETIME) { 111 pr_err("request wait timed out!\n"); 112 goto out_request; 113 } 114 115 if (!i915_request_completed(request)) { 116 pr_err("request not complete after waiting!\n"); 117 goto out_request; 118 } 119 120 if (i915_request_wait(request, 0, T) == -ETIME) { 121 pr_err("request wait timed out when already complete!\n"); 122 goto out_request; 123 } 124 125 err = 0; 126 out_request: 127 i915_request_put(request); 128 out_unlock: 129 mock_device_flush(i915); 130 mutex_unlock(&i915->drm.struct_mutex); 131 return err; 132 } 133 134 static int igt_fence_wait(void *arg) 135 { 136 const long T = HZ / 4; 137 struct drm_i915_private *i915 = arg; 138 struct i915_request *request; 139 int err = -EINVAL; 140 141 /* Submit a request, treat it as a fence and wait upon it */ 142 143 mutex_lock(&i915->drm.struct_mutex); 144 request = mock_request(i915->engine[RCS0]->kernel_context, T); 145 if (!request) { 146 err = -ENOMEM; 147 goto out_locked; 148 } 149 150 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 151 pr_err("fence wait success before submit (expected timeout)!\n"); 152 goto out_locked; 153 } 154 155 i915_request_add(request); 156 mutex_unlock(&i915->drm.struct_mutex); 157 158 if (dma_fence_is_signaled(&request->fence)) { 159 pr_err("fence signaled immediately!\n"); 160 goto out_device; 161 } 162 163 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 164 pr_err("fence wait success after submit (expected timeout)!\n"); 165 goto out_device; 166 } 167 168 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 169 pr_err("fence wait timed out (expected success)!\n"); 170 goto out_device; 171 } 172 173 if (!dma_fence_is_signaled(&request->fence)) { 174 pr_err("fence unsignaled after waiting!\n"); 175 goto out_device; 176 } 177 178 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 179 pr_err("fence wait timed out when complete (expected success)!\n"); 180 goto out_device; 181 } 182 183 err = 0; 184 out_device: 185 mutex_lock(&i915->drm.struct_mutex); 186 out_locked: 187 mock_device_flush(i915); 188 mutex_unlock(&i915->drm.struct_mutex); 189 return err; 190 } 191 192 static int igt_request_rewind(void *arg) 193 { 194 struct drm_i915_private *i915 = arg; 195 struct i915_request *request, *vip; 196 struct i915_gem_context *ctx[2]; 197 struct intel_context *ce; 198 int err = -EINVAL; 199 200 mutex_lock(&i915->drm.struct_mutex); 201 ctx[0] = mock_context(i915, "A"); 202 ce = i915_gem_context_get_engine(ctx[0], RCS0); 203 GEM_BUG_ON(IS_ERR(ce)); 204 request = mock_request(ce, 2 * HZ); 205 intel_context_put(ce); 206 if (!request) { 207 err = -ENOMEM; 208 goto err_context_0; 209 } 210 211 i915_request_get(request); 212 i915_request_add(request); 213 214 ctx[1] = mock_context(i915, "B"); 215 ce = i915_gem_context_get_engine(ctx[1], RCS0); 216 GEM_BUG_ON(IS_ERR(ce)); 217 vip = mock_request(ce, 0); 218 intel_context_put(ce); 219 if (!vip) { 220 err = -ENOMEM; 221 goto err_context_1; 222 } 223 224 /* Simulate preemption by manual reordering */ 225 if (!mock_cancel_request(request)) { 226 pr_err("failed to cancel request (already executed)!\n"); 227 i915_request_add(vip); 228 goto err_context_1; 229 } 230 i915_request_get(vip); 231 i915_request_add(vip); 232 rcu_read_lock(); 233 request->engine->submit_request(request); 234 rcu_read_unlock(); 235 236 mutex_unlock(&i915->drm.struct_mutex); 237 238 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 239 pr_err("timed out waiting for high priority request\n"); 240 goto err; 241 } 242 243 if (i915_request_completed(request)) { 244 pr_err("low priority request already completed\n"); 245 goto err; 246 } 247 248 err = 0; 249 err: 250 i915_request_put(vip); 251 mutex_lock(&i915->drm.struct_mutex); 252 err_context_1: 253 mock_context_close(ctx[1]); 254 i915_request_put(request); 255 err_context_0: 256 mock_context_close(ctx[0]); 257 mock_device_flush(i915); 258 mutex_unlock(&i915->drm.struct_mutex); 259 return err; 260 } 261 262 struct smoketest { 263 struct intel_engine_cs *engine; 264 struct i915_gem_context **contexts; 265 atomic_long_t num_waits, num_fences; 266 int ncontexts, max_batch; 267 struct i915_request *(*request_alloc)(struct intel_context *ce); 268 }; 269 270 static struct i915_request * 271 __mock_request_alloc(struct intel_context *ce) 272 { 273 return mock_request(ce, 0); 274 } 275 276 static struct i915_request * 277 __live_request_alloc(struct intel_context *ce) 278 { 279 return intel_context_create_request(ce); 280 } 281 282 static int __igt_breadcrumbs_smoketest(void *arg) 283 { 284 struct smoketest *t = arg; 285 struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; 286 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 287 const unsigned int total = 4 * t->ncontexts + 1; 288 unsigned int num_waits = 0, num_fences = 0; 289 struct i915_request **requests; 290 I915_RND_STATE(prng); 291 unsigned int *order; 292 int err = 0; 293 294 /* 295 * A very simple test to catch the most egregious of list handling bugs. 296 * 297 * At its heart, we simply create oodles of requests running across 298 * multiple kthreads and enable signaling on them, for the sole purpose 299 * of stressing our breadcrumb handling. The only inspection we do is 300 * that the fences were marked as signaled. 301 */ 302 303 requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); 304 if (!requests) 305 return -ENOMEM; 306 307 order = i915_random_order(total, &prng); 308 if (!order) { 309 err = -ENOMEM; 310 goto out_requests; 311 } 312 313 while (!kthread_should_stop()) { 314 struct i915_sw_fence *submit, *wait; 315 unsigned int n, count; 316 317 submit = heap_fence_create(GFP_KERNEL); 318 if (!submit) { 319 err = -ENOMEM; 320 break; 321 } 322 323 wait = heap_fence_create(GFP_KERNEL); 324 if (!wait) { 325 i915_sw_fence_commit(submit); 326 heap_fence_put(submit); 327 err = ENOMEM; 328 break; 329 } 330 331 i915_random_reorder(order, total, &prng); 332 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 333 334 for (n = 0; n < count; n++) { 335 struct i915_gem_context *ctx = 336 t->contexts[order[n] % t->ncontexts]; 337 struct i915_request *rq; 338 struct intel_context *ce; 339 340 mutex_lock(BKL); 341 342 ce = i915_gem_context_get_engine(ctx, t->engine->legacy_idx); 343 GEM_BUG_ON(IS_ERR(ce)); 344 rq = t->request_alloc(ce); 345 intel_context_put(ce); 346 if (IS_ERR(rq)) { 347 mutex_unlock(BKL); 348 err = PTR_ERR(rq); 349 count = n; 350 break; 351 } 352 353 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 354 submit, 355 GFP_KERNEL); 356 357 requests[n] = i915_request_get(rq); 358 i915_request_add(rq); 359 360 mutex_unlock(BKL); 361 362 if (err >= 0) 363 err = i915_sw_fence_await_dma_fence(wait, 364 &rq->fence, 365 0, 366 GFP_KERNEL); 367 368 if (err < 0) { 369 i915_request_put(rq); 370 count = n; 371 break; 372 } 373 } 374 375 i915_sw_fence_commit(submit); 376 i915_sw_fence_commit(wait); 377 378 if (!wait_event_timeout(wait->wait, 379 i915_sw_fence_done(wait), 380 5 * HZ)) { 381 struct i915_request *rq = requests[count - 1]; 382 383 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", 384 atomic_read(&wait->pending), count, 385 rq->fence.context, rq->fence.seqno, 386 t->engine->name); 387 GEM_TRACE_DUMP(); 388 389 intel_gt_set_wedged(t->engine->gt); 390 GEM_BUG_ON(!i915_request_completed(rq)); 391 i915_sw_fence_wait(wait); 392 err = -EIO; 393 } 394 395 for (n = 0; n < count; n++) { 396 struct i915_request *rq = requests[n]; 397 398 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 399 &rq->fence.flags)) { 400 pr_err("%llu:%llu was not signaled!\n", 401 rq->fence.context, rq->fence.seqno); 402 err = -EINVAL; 403 } 404 405 i915_request_put(rq); 406 } 407 408 heap_fence_put(wait); 409 heap_fence_put(submit); 410 411 if (err < 0) 412 break; 413 414 num_fences += count; 415 num_waits++; 416 417 cond_resched(); 418 } 419 420 atomic_long_add(num_fences, &t->num_fences); 421 atomic_long_add(num_waits, &t->num_waits); 422 423 kfree(order); 424 out_requests: 425 kfree(requests); 426 return err; 427 } 428 429 static int mock_breadcrumbs_smoketest(void *arg) 430 { 431 struct drm_i915_private *i915 = arg; 432 struct smoketest t = { 433 .engine = i915->engine[RCS0], 434 .ncontexts = 1024, 435 .max_batch = 1024, 436 .request_alloc = __mock_request_alloc 437 }; 438 unsigned int ncpus = num_online_cpus(); 439 struct task_struct **threads; 440 unsigned int n; 441 int ret = 0; 442 443 /* 444 * Smoketest our breadcrumb/signal handling for requests across multiple 445 * threads. A very simple test to only catch the most egregious of bugs. 446 * See __igt_breadcrumbs_smoketest(); 447 */ 448 449 threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); 450 if (!threads) 451 return -ENOMEM; 452 453 t.contexts = 454 kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 455 if (!t.contexts) { 456 ret = -ENOMEM; 457 goto out_threads; 458 } 459 460 mutex_lock(&t.engine->i915->drm.struct_mutex); 461 for (n = 0; n < t.ncontexts; n++) { 462 t.contexts[n] = mock_context(t.engine->i915, "mock"); 463 if (!t.contexts[n]) { 464 ret = -ENOMEM; 465 goto out_contexts; 466 } 467 } 468 mutex_unlock(&t.engine->i915->drm.struct_mutex); 469 470 for (n = 0; n < ncpus; n++) { 471 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 472 &t, "igt/%d", n); 473 if (IS_ERR(threads[n])) { 474 ret = PTR_ERR(threads[n]); 475 ncpus = n; 476 break; 477 } 478 479 get_task_struct(threads[n]); 480 } 481 482 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 483 484 for (n = 0; n < ncpus; n++) { 485 int err; 486 487 err = kthread_stop(threads[n]); 488 if (err < 0 && !ret) 489 ret = err; 490 491 put_task_struct(threads[n]); 492 } 493 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 494 atomic_long_read(&t.num_waits), 495 atomic_long_read(&t.num_fences), 496 ncpus); 497 498 mutex_lock(&t.engine->i915->drm.struct_mutex); 499 out_contexts: 500 for (n = 0; n < t.ncontexts; n++) { 501 if (!t.contexts[n]) 502 break; 503 mock_context_close(t.contexts[n]); 504 } 505 mutex_unlock(&t.engine->i915->drm.struct_mutex); 506 kfree(t.contexts); 507 out_threads: 508 kfree(threads); 509 510 return ret; 511 } 512 513 int i915_request_mock_selftests(void) 514 { 515 static const struct i915_subtest tests[] = { 516 SUBTEST(igt_add_request), 517 SUBTEST(igt_wait_request), 518 SUBTEST(igt_fence_wait), 519 SUBTEST(igt_request_rewind), 520 SUBTEST(mock_breadcrumbs_smoketest), 521 }; 522 struct drm_i915_private *i915; 523 intel_wakeref_t wakeref; 524 int err = 0; 525 526 i915 = mock_gem_device(); 527 if (!i915) 528 return -ENOMEM; 529 530 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 531 err = i915_subtests(tests, i915); 532 533 drm_dev_put(&i915->drm); 534 535 return err; 536 } 537 538 static int live_nop_request(void *arg) 539 { 540 struct drm_i915_private *i915 = arg; 541 struct intel_engine_cs *engine; 542 intel_wakeref_t wakeref; 543 struct igt_live_test t; 544 unsigned int id; 545 int err = -ENODEV; 546 547 /* Submit various sized batches of empty requests, to each engine 548 * (individually), and wait for the batch to complete. We can check 549 * the overhead of submitting requests to the hardware. 550 */ 551 552 mutex_lock(&i915->drm.struct_mutex); 553 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 554 555 for_each_engine(engine, i915, id) { 556 struct i915_request *request = NULL; 557 unsigned long n, prime; 558 IGT_TIMEOUT(end_time); 559 ktime_t times[2] = {}; 560 561 err = igt_live_test_begin(&t, i915, __func__, engine->name); 562 if (err) 563 goto out_unlock; 564 565 for_each_prime_number_from(prime, 1, 8192) { 566 times[1] = ktime_get_raw(); 567 568 for (n = 0; n < prime; n++) { 569 request = i915_request_create(engine->kernel_context); 570 if (IS_ERR(request)) { 571 err = PTR_ERR(request); 572 goto out_unlock; 573 } 574 575 /* This space is left intentionally blank. 576 * 577 * We do not actually want to perform any 578 * action with this request, we just want 579 * to measure the latency in allocation 580 * and submission of our breadcrumbs - 581 * ensuring that the bare request is sufficient 582 * for the system to work (i.e. proper HEAD 583 * tracking of the rings, interrupt handling, 584 * etc). It also gives us the lowest bounds 585 * for latency. 586 */ 587 588 i915_request_add(request); 589 } 590 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 591 592 times[1] = ktime_sub(ktime_get_raw(), times[1]); 593 if (prime == 1) 594 times[0] = times[1]; 595 596 if (__igt_timeout(end_time, NULL)) 597 break; 598 } 599 600 err = igt_live_test_end(&t); 601 if (err) 602 goto out_unlock; 603 604 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 605 engine->name, 606 ktime_to_ns(times[0]), 607 prime, div64_u64(ktime_to_ns(times[1]), prime)); 608 } 609 610 out_unlock: 611 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 612 mutex_unlock(&i915->drm.struct_mutex); 613 return err; 614 } 615 616 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 617 { 618 struct drm_i915_gem_object *obj; 619 struct i915_vma *vma; 620 u32 *cmd; 621 int err; 622 623 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 624 if (IS_ERR(obj)) 625 return ERR_CAST(obj); 626 627 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 628 if (IS_ERR(cmd)) { 629 err = PTR_ERR(cmd); 630 goto err; 631 } 632 633 *cmd = MI_BATCH_BUFFER_END; 634 635 __i915_gem_object_flush_map(obj, 0, 64); 636 i915_gem_object_unpin_map(obj); 637 638 intel_gt_chipset_flush(&i915->gt); 639 640 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 641 if (IS_ERR(vma)) { 642 err = PTR_ERR(vma); 643 goto err; 644 } 645 646 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 647 if (err) 648 goto err; 649 650 return vma; 651 652 err: 653 i915_gem_object_put(obj); 654 return ERR_PTR(err); 655 } 656 657 static struct i915_request * 658 empty_request(struct intel_engine_cs *engine, 659 struct i915_vma *batch) 660 { 661 struct i915_request *request; 662 int err; 663 664 request = i915_request_create(engine->kernel_context); 665 if (IS_ERR(request)) 666 return request; 667 668 err = engine->emit_bb_start(request, 669 batch->node.start, 670 batch->node.size, 671 I915_DISPATCH_SECURE); 672 if (err) 673 goto out_request; 674 675 out_request: 676 i915_request_add(request); 677 return err ? ERR_PTR(err) : request; 678 } 679 680 static int live_empty_request(void *arg) 681 { 682 struct drm_i915_private *i915 = arg; 683 struct intel_engine_cs *engine; 684 intel_wakeref_t wakeref; 685 struct igt_live_test t; 686 struct i915_vma *batch; 687 unsigned int id; 688 int err = 0; 689 690 /* Submit various sized batches of empty requests, to each engine 691 * (individually), and wait for the batch to complete. We can check 692 * the overhead of submitting requests to the hardware. 693 */ 694 695 mutex_lock(&i915->drm.struct_mutex); 696 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 697 698 batch = empty_batch(i915); 699 if (IS_ERR(batch)) { 700 err = PTR_ERR(batch); 701 goto out_unlock; 702 } 703 704 for_each_engine(engine, i915, id) { 705 IGT_TIMEOUT(end_time); 706 struct i915_request *request; 707 unsigned long n, prime; 708 ktime_t times[2] = {}; 709 710 err = igt_live_test_begin(&t, i915, __func__, engine->name); 711 if (err) 712 goto out_batch; 713 714 /* Warmup / preload */ 715 request = empty_request(engine, batch); 716 if (IS_ERR(request)) { 717 err = PTR_ERR(request); 718 goto out_batch; 719 } 720 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 721 722 for_each_prime_number_from(prime, 1, 8192) { 723 times[1] = ktime_get_raw(); 724 725 for (n = 0; n < prime; n++) { 726 request = empty_request(engine, batch); 727 if (IS_ERR(request)) { 728 err = PTR_ERR(request); 729 goto out_batch; 730 } 731 } 732 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 733 734 times[1] = ktime_sub(ktime_get_raw(), times[1]); 735 if (prime == 1) 736 times[0] = times[1]; 737 738 if (__igt_timeout(end_time, NULL)) 739 break; 740 } 741 742 err = igt_live_test_end(&t); 743 if (err) 744 goto out_batch; 745 746 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 747 engine->name, 748 ktime_to_ns(times[0]), 749 prime, div64_u64(ktime_to_ns(times[1]), prime)); 750 } 751 752 out_batch: 753 i915_vma_unpin(batch); 754 i915_vma_put(batch); 755 out_unlock: 756 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 757 mutex_unlock(&i915->drm.struct_mutex); 758 return err; 759 } 760 761 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 762 { 763 struct i915_gem_context *ctx = i915->kernel_context; 764 struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; 765 struct drm_i915_gem_object *obj; 766 const int gen = INTEL_GEN(i915); 767 struct i915_vma *vma; 768 u32 *cmd; 769 int err; 770 771 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 772 if (IS_ERR(obj)) 773 return ERR_CAST(obj); 774 775 vma = i915_vma_instance(obj, vm, NULL); 776 if (IS_ERR(vma)) { 777 err = PTR_ERR(vma); 778 goto err; 779 } 780 781 err = i915_vma_pin(vma, 0, 0, PIN_USER); 782 if (err) 783 goto err; 784 785 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 786 if (IS_ERR(cmd)) { 787 err = PTR_ERR(cmd); 788 goto err; 789 } 790 791 if (gen >= 8) { 792 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 793 *cmd++ = lower_32_bits(vma->node.start); 794 *cmd++ = upper_32_bits(vma->node.start); 795 } else if (gen >= 6) { 796 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 797 *cmd++ = lower_32_bits(vma->node.start); 798 } else { 799 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 800 *cmd++ = lower_32_bits(vma->node.start); 801 } 802 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 803 804 __i915_gem_object_flush_map(obj, 0, 64); 805 i915_gem_object_unpin_map(obj); 806 807 intel_gt_chipset_flush(&i915->gt); 808 809 return vma; 810 811 err: 812 i915_gem_object_put(obj); 813 return ERR_PTR(err); 814 } 815 816 static int recursive_batch_resolve(struct i915_vma *batch) 817 { 818 u32 *cmd; 819 820 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 821 if (IS_ERR(cmd)) 822 return PTR_ERR(cmd); 823 824 *cmd = MI_BATCH_BUFFER_END; 825 intel_gt_chipset_flush(batch->vm->gt); 826 827 i915_gem_object_unpin_map(batch->obj); 828 829 return 0; 830 } 831 832 static int live_all_engines(void *arg) 833 { 834 struct drm_i915_private *i915 = arg; 835 struct intel_engine_cs *engine; 836 struct i915_request *request[I915_NUM_ENGINES]; 837 intel_wakeref_t wakeref; 838 struct igt_live_test t; 839 struct i915_vma *batch; 840 unsigned int id; 841 int err; 842 843 /* Check we can submit requests to all engines simultaneously. We 844 * send a recursive batch to each engine - checking that we don't 845 * block doing so, and that they don't complete too soon. 846 */ 847 848 mutex_lock(&i915->drm.struct_mutex); 849 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 850 851 err = igt_live_test_begin(&t, i915, __func__, ""); 852 if (err) 853 goto out_unlock; 854 855 batch = recursive_batch(i915); 856 if (IS_ERR(batch)) { 857 err = PTR_ERR(batch); 858 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 859 goto out_unlock; 860 } 861 862 for_each_engine(engine, i915, id) { 863 request[id] = i915_request_create(engine->kernel_context); 864 if (IS_ERR(request[id])) { 865 err = PTR_ERR(request[id]); 866 pr_err("%s: Request allocation failed with err=%d\n", 867 __func__, err); 868 goto out_request; 869 } 870 871 err = engine->emit_bb_start(request[id], 872 batch->node.start, 873 batch->node.size, 874 0); 875 GEM_BUG_ON(err); 876 request[id]->batch = batch; 877 878 i915_vma_lock(batch); 879 err = i915_request_await_object(request[id], batch->obj, 0); 880 if (err == 0) 881 err = i915_vma_move_to_active(batch, request[id], 0); 882 i915_vma_unlock(batch); 883 GEM_BUG_ON(err); 884 885 i915_request_get(request[id]); 886 i915_request_add(request[id]); 887 } 888 889 for_each_engine(engine, i915, id) { 890 if (i915_request_completed(request[id])) { 891 pr_err("%s(%s): request completed too early!\n", 892 __func__, engine->name); 893 err = -EINVAL; 894 goto out_request; 895 } 896 } 897 898 err = recursive_batch_resolve(batch); 899 if (err) { 900 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 901 goto out_request; 902 } 903 904 for_each_engine(engine, i915, id) { 905 long timeout; 906 907 timeout = i915_request_wait(request[id], 0, 908 MAX_SCHEDULE_TIMEOUT); 909 if (timeout < 0) { 910 err = timeout; 911 pr_err("%s: error waiting for request on %s, err=%d\n", 912 __func__, engine->name, err); 913 goto out_request; 914 } 915 916 GEM_BUG_ON(!i915_request_completed(request[id])); 917 i915_request_put(request[id]); 918 request[id] = NULL; 919 } 920 921 err = igt_live_test_end(&t); 922 923 out_request: 924 for_each_engine(engine, i915, id) 925 if (request[id]) 926 i915_request_put(request[id]); 927 i915_vma_unpin(batch); 928 i915_vma_put(batch); 929 out_unlock: 930 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 931 mutex_unlock(&i915->drm.struct_mutex); 932 return err; 933 } 934 935 static int live_sequential_engines(void *arg) 936 { 937 struct drm_i915_private *i915 = arg; 938 struct i915_request *request[I915_NUM_ENGINES] = {}; 939 struct i915_request *prev = NULL; 940 struct intel_engine_cs *engine; 941 intel_wakeref_t wakeref; 942 struct igt_live_test t; 943 unsigned int id; 944 int err; 945 946 /* Check we can submit requests to all engines sequentially, such 947 * that each successive request waits for the earlier ones. This 948 * tests that we don't execute requests out of order, even though 949 * they are running on independent engines. 950 */ 951 952 mutex_lock(&i915->drm.struct_mutex); 953 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 954 955 err = igt_live_test_begin(&t, i915, __func__, ""); 956 if (err) 957 goto out_unlock; 958 959 for_each_engine(engine, i915, id) { 960 struct i915_vma *batch; 961 962 batch = recursive_batch(i915); 963 if (IS_ERR(batch)) { 964 err = PTR_ERR(batch); 965 pr_err("%s: Unable to create batch for %s, err=%d\n", 966 __func__, engine->name, err); 967 goto out_unlock; 968 } 969 970 request[id] = i915_request_create(engine->kernel_context); 971 if (IS_ERR(request[id])) { 972 err = PTR_ERR(request[id]); 973 pr_err("%s: Request allocation failed for %s with err=%d\n", 974 __func__, engine->name, err); 975 goto out_request; 976 } 977 978 if (prev) { 979 err = i915_request_await_dma_fence(request[id], 980 &prev->fence); 981 if (err) { 982 i915_request_add(request[id]); 983 pr_err("%s: Request await failed for %s with err=%d\n", 984 __func__, engine->name, err); 985 goto out_request; 986 } 987 } 988 989 err = engine->emit_bb_start(request[id], 990 batch->node.start, 991 batch->node.size, 992 0); 993 GEM_BUG_ON(err); 994 request[id]->batch = batch; 995 996 i915_vma_lock(batch); 997 err = i915_request_await_object(request[id], batch->obj, false); 998 if (err == 0) 999 err = i915_vma_move_to_active(batch, request[id], 0); 1000 i915_vma_unlock(batch); 1001 GEM_BUG_ON(err); 1002 1003 i915_request_get(request[id]); 1004 i915_request_add(request[id]); 1005 1006 prev = request[id]; 1007 } 1008 1009 for_each_engine(engine, i915, id) { 1010 long timeout; 1011 1012 if (i915_request_completed(request[id])) { 1013 pr_err("%s(%s): request completed too early!\n", 1014 __func__, engine->name); 1015 err = -EINVAL; 1016 goto out_request; 1017 } 1018 1019 err = recursive_batch_resolve(request[id]->batch); 1020 if (err) { 1021 pr_err("%s: failed to resolve batch, err=%d\n", 1022 __func__, err); 1023 goto out_request; 1024 } 1025 1026 timeout = i915_request_wait(request[id], 0, 1027 MAX_SCHEDULE_TIMEOUT); 1028 if (timeout < 0) { 1029 err = timeout; 1030 pr_err("%s: error waiting for request on %s, err=%d\n", 1031 __func__, engine->name, err); 1032 goto out_request; 1033 } 1034 1035 GEM_BUG_ON(!i915_request_completed(request[id])); 1036 } 1037 1038 err = igt_live_test_end(&t); 1039 1040 out_request: 1041 for_each_engine(engine, i915, id) { 1042 u32 *cmd; 1043 1044 if (!request[id]) 1045 break; 1046 1047 cmd = i915_gem_object_pin_map(request[id]->batch->obj, 1048 I915_MAP_WC); 1049 if (!IS_ERR(cmd)) { 1050 *cmd = MI_BATCH_BUFFER_END; 1051 intel_gt_chipset_flush(engine->gt); 1052 1053 i915_gem_object_unpin_map(request[id]->batch->obj); 1054 } 1055 1056 i915_vma_put(request[id]->batch); 1057 i915_request_put(request[id]); 1058 } 1059 out_unlock: 1060 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1061 mutex_unlock(&i915->drm.struct_mutex); 1062 return err; 1063 } 1064 1065 static int 1066 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1067 { 1068 struct i915_request *rq; 1069 int ret; 1070 1071 /* 1072 * Before execlists, all contexts share the same ringbuffer. With 1073 * execlists, each context/engine has a separate ringbuffer and 1074 * for the purposes of this test, inexhaustible. 1075 * 1076 * For the global ringbuffer though, we have to be very careful 1077 * that we do not wrap while preventing the execution of requests 1078 * with a unsignaled fence. 1079 */ 1080 if (HAS_EXECLISTS(ctx->i915)) 1081 return INT_MAX; 1082 1083 rq = igt_request_alloc(ctx, engine); 1084 if (IS_ERR(rq)) { 1085 ret = PTR_ERR(rq); 1086 } else { 1087 int sz; 1088 1089 ret = rq->ring->size - rq->reserved_space; 1090 i915_request_add(rq); 1091 1092 sz = rq->ring->emit - rq->head; 1093 if (sz < 0) 1094 sz += rq->ring->size; 1095 ret /= sz; 1096 ret /= 2; /* leave half spare, in case of emergency! */ 1097 } 1098 1099 return ret; 1100 } 1101 1102 static int live_breadcrumbs_smoketest(void *arg) 1103 { 1104 struct drm_i915_private *i915 = arg; 1105 struct smoketest t[I915_NUM_ENGINES]; 1106 unsigned int ncpus = num_online_cpus(); 1107 unsigned long num_waits, num_fences; 1108 struct intel_engine_cs *engine; 1109 struct task_struct **threads; 1110 struct igt_live_test live; 1111 enum intel_engine_id id; 1112 intel_wakeref_t wakeref; 1113 struct drm_file *file; 1114 unsigned int n; 1115 int ret = 0; 1116 1117 /* 1118 * Smoketest our breadcrumb/signal handling for requests across multiple 1119 * threads. A very simple test to only catch the most egregious of bugs. 1120 * See __igt_breadcrumbs_smoketest(); 1121 * 1122 * On real hardware this time. 1123 */ 1124 1125 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1126 1127 file = mock_file(i915); 1128 if (IS_ERR(file)) { 1129 ret = PTR_ERR(file); 1130 goto out_rpm; 1131 } 1132 1133 threads = kcalloc(ncpus * I915_NUM_ENGINES, 1134 sizeof(*threads), 1135 GFP_KERNEL); 1136 if (!threads) { 1137 ret = -ENOMEM; 1138 goto out_file; 1139 } 1140 1141 memset(&t[0], 0, sizeof(t[0])); 1142 t[0].request_alloc = __live_request_alloc; 1143 t[0].ncontexts = 64; 1144 t[0].contexts = kmalloc_array(t[0].ncontexts, 1145 sizeof(*t[0].contexts), 1146 GFP_KERNEL); 1147 if (!t[0].contexts) { 1148 ret = -ENOMEM; 1149 goto out_threads; 1150 } 1151 1152 mutex_lock(&i915->drm.struct_mutex); 1153 for (n = 0; n < t[0].ncontexts; n++) { 1154 t[0].contexts[n] = live_context(i915, file); 1155 if (!t[0].contexts[n]) { 1156 ret = -ENOMEM; 1157 goto out_contexts; 1158 } 1159 } 1160 1161 ret = igt_live_test_begin(&live, i915, __func__, ""); 1162 if (ret) 1163 goto out_contexts; 1164 1165 for_each_engine(engine, i915, id) { 1166 t[id] = t[0]; 1167 t[id].engine = engine; 1168 t[id].max_batch = max_batches(t[0].contexts[0], engine); 1169 if (t[id].max_batch < 0) { 1170 ret = t[id].max_batch; 1171 mutex_unlock(&i915->drm.struct_mutex); 1172 goto out_flush; 1173 } 1174 /* One ring interleaved between requests from all cpus */ 1175 t[id].max_batch /= num_online_cpus() + 1; 1176 pr_debug("Limiting batches to %d requests on %s\n", 1177 t[id].max_batch, engine->name); 1178 1179 for (n = 0; n < ncpus; n++) { 1180 struct task_struct *tsk; 1181 1182 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1183 &t[id], "igt/%d.%d", id, n); 1184 if (IS_ERR(tsk)) { 1185 ret = PTR_ERR(tsk); 1186 mutex_unlock(&i915->drm.struct_mutex); 1187 goto out_flush; 1188 } 1189 1190 get_task_struct(tsk); 1191 threads[id * ncpus + n] = tsk; 1192 } 1193 } 1194 mutex_unlock(&i915->drm.struct_mutex); 1195 1196 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1197 1198 out_flush: 1199 num_waits = 0; 1200 num_fences = 0; 1201 for_each_engine(engine, i915, id) { 1202 for (n = 0; n < ncpus; n++) { 1203 struct task_struct *tsk = threads[id * ncpus + n]; 1204 int err; 1205 1206 if (!tsk) 1207 continue; 1208 1209 err = kthread_stop(tsk); 1210 if (err < 0 && !ret) 1211 ret = err; 1212 1213 put_task_struct(tsk); 1214 } 1215 1216 num_waits += atomic_long_read(&t[id].num_waits); 1217 num_fences += atomic_long_read(&t[id].num_fences); 1218 } 1219 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1220 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); 1221 1222 mutex_lock(&i915->drm.struct_mutex); 1223 ret = igt_live_test_end(&live) ?: ret; 1224 out_contexts: 1225 mutex_unlock(&i915->drm.struct_mutex); 1226 kfree(t[0].contexts); 1227 out_threads: 1228 kfree(threads); 1229 out_file: 1230 mock_file_free(i915, file); 1231 out_rpm: 1232 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1233 1234 return ret; 1235 } 1236 1237 int i915_request_live_selftests(struct drm_i915_private *i915) 1238 { 1239 static const struct i915_subtest tests[] = { 1240 SUBTEST(live_nop_request), 1241 SUBTEST(live_all_engines), 1242 SUBTEST(live_sequential_engines), 1243 SUBTEST(live_empty_request), 1244 SUBTEST(live_breadcrumbs_smoketest), 1245 }; 1246 1247 if (intel_gt_is_wedged(&i915->gt)) 1248 return 0; 1249 1250 return i915_subtests(tests, i915); 1251 } 1252