1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 27 #include "gem/i915_gem_pm.h" 28 #include "gem/selftests/mock_context.h" 29 30 #include "gt/intel_gt.h" 31 32 #include "i915_random.h" 33 #include "i915_selftest.h" 34 #include "igt_live_test.h" 35 #include "lib_sw_fence.h" 36 37 #include "mock_drm.h" 38 #include "mock_gem_device.h" 39 40 static int igt_add_request(void *arg) 41 { 42 struct drm_i915_private *i915 = arg; 43 struct i915_request *request; 44 int err = -ENOMEM; 45 46 /* Basic preliminary test to create a request and let it loose! */ 47 48 mutex_lock(&i915->drm.struct_mutex); 49 request = mock_request(i915->engine[RCS0], 50 i915->kernel_context, 51 HZ / 10); 52 if (!request) 53 goto out_unlock; 54 55 i915_request_add(request); 56 57 err = 0; 58 out_unlock: 59 mutex_unlock(&i915->drm.struct_mutex); 60 return err; 61 } 62 63 static int igt_wait_request(void *arg) 64 { 65 const long T = HZ / 4; 66 struct drm_i915_private *i915 = arg; 67 struct i915_request *request; 68 int err = -EINVAL; 69 70 /* Submit a request, then wait upon it */ 71 72 mutex_lock(&i915->drm.struct_mutex); 73 request = mock_request(i915->engine[RCS0], i915->kernel_context, T); 74 if (!request) { 75 err = -ENOMEM; 76 goto out_unlock; 77 } 78 i915_request_get(request); 79 80 if (i915_request_wait(request, 0, 0) != -ETIME) { 81 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 82 goto out_request; 83 } 84 85 if (i915_request_wait(request, 0, T) != -ETIME) { 86 pr_err("request wait succeeded (expected timeout before submit!)\n"); 87 goto out_request; 88 } 89 90 if (i915_request_completed(request)) { 91 pr_err("request completed before submit!!\n"); 92 goto out_request; 93 } 94 95 i915_request_add(request); 96 97 if (i915_request_wait(request, 0, 0) != -ETIME) { 98 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 99 goto out_request; 100 } 101 102 if (i915_request_completed(request)) { 103 pr_err("request completed immediately!\n"); 104 goto out_request; 105 } 106 107 if (i915_request_wait(request, 0, T / 2) != -ETIME) { 108 pr_err("request wait succeeded (expected timeout!)\n"); 109 goto out_request; 110 } 111 112 if (i915_request_wait(request, 0, T) == -ETIME) { 113 pr_err("request wait timed out!\n"); 114 goto out_request; 115 } 116 117 if (!i915_request_completed(request)) { 118 pr_err("request not complete after waiting!\n"); 119 goto out_request; 120 } 121 122 if (i915_request_wait(request, 0, T) == -ETIME) { 123 pr_err("request wait timed out when already complete!\n"); 124 goto out_request; 125 } 126 127 err = 0; 128 out_request: 129 i915_request_put(request); 130 out_unlock: 131 mock_device_flush(i915); 132 mutex_unlock(&i915->drm.struct_mutex); 133 return err; 134 } 135 136 static int igt_fence_wait(void *arg) 137 { 138 const long T = HZ / 4; 139 struct drm_i915_private *i915 = arg; 140 struct i915_request *request; 141 int err = -EINVAL; 142 143 /* Submit a request, treat it as a fence and wait upon it */ 144 145 mutex_lock(&i915->drm.struct_mutex); 146 request = mock_request(i915->engine[RCS0], i915->kernel_context, T); 147 if (!request) { 148 err = -ENOMEM; 149 goto out_locked; 150 } 151 152 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 153 pr_err("fence wait success before submit (expected timeout)!\n"); 154 goto out_locked; 155 } 156 157 i915_request_add(request); 158 mutex_unlock(&i915->drm.struct_mutex); 159 160 if (dma_fence_is_signaled(&request->fence)) { 161 pr_err("fence signaled immediately!\n"); 162 goto out_device; 163 } 164 165 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 166 pr_err("fence wait success after submit (expected timeout)!\n"); 167 goto out_device; 168 } 169 170 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 171 pr_err("fence wait timed out (expected success)!\n"); 172 goto out_device; 173 } 174 175 if (!dma_fence_is_signaled(&request->fence)) { 176 pr_err("fence unsignaled after waiting!\n"); 177 goto out_device; 178 } 179 180 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 181 pr_err("fence wait timed out when complete (expected success)!\n"); 182 goto out_device; 183 } 184 185 err = 0; 186 out_device: 187 mutex_lock(&i915->drm.struct_mutex); 188 out_locked: 189 mock_device_flush(i915); 190 mutex_unlock(&i915->drm.struct_mutex); 191 return err; 192 } 193 194 static int igt_request_rewind(void *arg) 195 { 196 struct drm_i915_private *i915 = arg; 197 struct i915_request *request, *vip; 198 struct i915_gem_context *ctx[2]; 199 int err = -EINVAL; 200 201 mutex_lock(&i915->drm.struct_mutex); 202 ctx[0] = mock_context(i915, "A"); 203 request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ); 204 if (!request) { 205 err = -ENOMEM; 206 goto err_context_0; 207 } 208 209 i915_request_get(request); 210 i915_request_add(request); 211 212 ctx[1] = mock_context(i915, "B"); 213 vip = mock_request(i915->engine[RCS0], ctx[1], 0); 214 if (!vip) { 215 err = -ENOMEM; 216 goto err_context_1; 217 } 218 219 /* Simulate preemption by manual reordering */ 220 if (!mock_cancel_request(request)) { 221 pr_err("failed to cancel request (already executed)!\n"); 222 i915_request_add(vip); 223 goto err_context_1; 224 } 225 i915_request_get(vip); 226 i915_request_add(vip); 227 rcu_read_lock(); 228 request->engine->submit_request(request); 229 rcu_read_unlock(); 230 231 mutex_unlock(&i915->drm.struct_mutex); 232 233 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 234 pr_err("timed out waiting for high priority request\n"); 235 goto err; 236 } 237 238 if (i915_request_completed(request)) { 239 pr_err("low priority request already completed\n"); 240 goto err; 241 } 242 243 err = 0; 244 err: 245 i915_request_put(vip); 246 mutex_lock(&i915->drm.struct_mutex); 247 err_context_1: 248 mock_context_close(ctx[1]); 249 i915_request_put(request); 250 err_context_0: 251 mock_context_close(ctx[0]); 252 mock_device_flush(i915); 253 mutex_unlock(&i915->drm.struct_mutex); 254 return err; 255 } 256 257 struct smoketest { 258 struct intel_engine_cs *engine; 259 struct i915_gem_context **contexts; 260 atomic_long_t num_waits, num_fences; 261 int ncontexts, max_batch; 262 struct i915_request *(*request_alloc)(struct i915_gem_context *, 263 struct intel_engine_cs *); 264 }; 265 266 static struct i915_request * 267 __mock_request_alloc(struct i915_gem_context *ctx, 268 struct intel_engine_cs *engine) 269 { 270 return mock_request(engine, ctx, 0); 271 } 272 273 static struct i915_request * 274 __live_request_alloc(struct i915_gem_context *ctx, 275 struct intel_engine_cs *engine) 276 { 277 return igt_request_alloc(ctx, engine); 278 } 279 280 static int __igt_breadcrumbs_smoketest(void *arg) 281 { 282 struct smoketest *t = arg; 283 struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; 284 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 285 const unsigned int total = 4 * t->ncontexts + 1; 286 unsigned int num_waits = 0, num_fences = 0; 287 struct i915_request **requests; 288 I915_RND_STATE(prng); 289 unsigned int *order; 290 int err = 0; 291 292 /* 293 * A very simple test to catch the most egregious of list handling bugs. 294 * 295 * At its heart, we simply create oodles of requests running across 296 * multiple kthreads and enable signaling on them, for the sole purpose 297 * of stressing our breadcrumb handling. The only inspection we do is 298 * that the fences were marked as signaled. 299 */ 300 301 requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); 302 if (!requests) 303 return -ENOMEM; 304 305 order = i915_random_order(total, &prng); 306 if (!order) { 307 err = -ENOMEM; 308 goto out_requests; 309 } 310 311 while (!kthread_should_stop()) { 312 struct i915_sw_fence *submit, *wait; 313 unsigned int n, count; 314 315 submit = heap_fence_create(GFP_KERNEL); 316 if (!submit) { 317 err = -ENOMEM; 318 break; 319 } 320 321 wait = heap_fence_create(GFP_KERNEL); 322 if (!wait) { 323 i915_sw_fence_commit(submit); 324 heap_fence_put(submit); 325 err = ENOMEM; 326 break; 327 } 328 329 i915_random_reorder(order, total, &prng); 330 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 331 332 for (n = 0; n < count; n++) { 333 struct i915_gem_context *ctx = 334 t->contexts[order[n] % t->ncontexts]; 335 struct i915_request *rq; 336 337 mutex_lock(BKL); 338 339 rq = t->request_alloc(ctx, t->engine); 340 if (IS_ERR(rq)) { 341 mutex_unlock(BKL); 342 err = PTR_ERR(rq); 343 count = n; 344 break; 345 } 346 347 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 348 submit, 349 GFP_KERNEL); 350 351 requests[n] = i915_request_get(rq); 352 i915_request_add(rq); 353 354 mutex_unlock(BKL); 355 356 if (err >= 0) 357 err = i915_sw_fence_await_dma_fence(wait, 358 &rq->fence, 359 0, 360 GFP_KERNEL); 361 362 if (err < 0) { 363 i915_request_put(rq); 364 count = n; 365 break; 366 } 367 } 368 369 i915_sw_fence_commit(submit); 370 i915_sw_fence_commit(wait); 371 372 if (!wait_event_timeout(wait->wait, 373 i915_sw_fence_done(wait), 374 5 * HZ)) { 375 struct i915_request *rq = requests[count - 1]; 376 377 pr_err("waiting for %d/%d fences (last %llx:%lld) on %s timed out!\n", 378 atomic_read(&wait->pending), count, 379 rq->fence.context, rq->fence.seqno, 380 t->engine->name); 381 GEM_TRACE_DUMP(); 382 383 intel_gt_set_wedged(t->engine->gt); 384 GEM_BUG_ON(!i915_request_completed(rq)); 385 i915_sw_fence_wait(wait); 386 err = -EIO; 387 } 388 389 for (n = 0; n < count; n++) { 390 struct i915_request *rq = requests[n]; 391 392 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 393 &rq->fence.flags)) { 394 pr_err("%llu:%llu was not signaled!\n", 395 rq->fence.context, rq->fence.seqno); 396 err = -EINVAL; 397 } 398 399 i915_request_put(rq); 400 } 401 402 heap_fence_put(wait); 403 heap_fence_put(submit); 404 405 if (err < 0) 406 break; 407 408 num_fences += count; 409 num_waits++; 410 411 cond_resched(); 412 } 413 414 atomic_long_add(num_fences, &t->num_fences); 415 atomic_long_add(num_waits, &t->num_waits); 416 417 kfree(order); 418 out_requests: 419 kfree(requests); 420 return err; 421 } 422 423 static int mock_breadcrumbs_smoketest(void *arg) 424 { 425 struct drm_i915_private *i915 = arg; 426 struct smoketest t = { 427 .engine = i915->engine[RCS0], 428 .ncontexts = 1024, 429 .max_batch = 1024, 430 .request_alloc = __mock_request_alloc 431 }; 432 unsigned int ncpus = num_online_cpus(); 433 struct task_struct **threads; 434 unsigned int n; 435 int ret = 0; 436 437 /* 438 * Smoketest our breadcrumb/signal handling for requests across multiple 439 * threads. A very simple test to only catch the most egregious of bugs. 440 * See __igt_breadcrumbs_smoketest(); 441 */ 442 443 threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); 444 if (!threads) 445 return -ENOMEM; 446 447 t.contexts = 448 kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 449 if (!t.contexts) { 450 ret = -ENOMEM; 451 goto out_threads; 452 } 453 454 mutex_lock(&t.engine->i915->drm.struct_mutex); 455 for (n = 0; n < t.ncontexts; n++) { 456 t.contexts[n] = mock_context(t.engine->i915, "mock"); 457 if (!t.contexts[n]) { 458 ret = -ENOMEM; 459 goto out_contexts; 460 } 461 } 462 mutex_unlock(&t.engine->i915->drm.struct_mutex); 463 464 for (n = 0; n < ncpus; n++) { 465 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 466 &t, "igt/%d", n); 467 if (IS_ERR(threads[n])) { 468 ret = PTR_ERR(threads[n]); 469 ncpus = n; 470 break; 471 } 472 473 get_task_struct(threads[n]); 474 } 475 476 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 477 478 for (n = 0; n < ncpus; n++) { 479 int err; 480 481 err = kthread_stop(threads[n]); 482 if (err < 0 && !ret) 483 ret = err; 484 485 put_task_struct(threads[n]); 486 } 487 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 488 atomic_long_read(&t.num_waits), 489 atomic_long_read(&t.num_fences), 490 ncpus); 491 492 mutex_lock(&t.engine->i915->drm.struct_mutex); 493 out_contexts: 494 for (n = 0; n < t.ncontexts; n++) { 495 if (!t.contexts[n]) 496 break; 497 mock_context_close(t.contexts[n]); 498 } 499 mutex_unlock(&t.engine->i915->drm.struct_mutex); 500 kfree(t.contexts); 501 out_threads: 502 kfree(threads); 503 504 return ret; 505 } 506 507 int i915_request_mock_selftests(void) 508 { 509 static const struct i915_subtest tests[] = { 510 SUBTEST(igt_add_request), 511 SUBTEST(igt_wait_request), 512 SUBTEST(igt_fence_wait), 513 SUBTEST(igt_request_rewind), 514 SUBTEST(mock_breadcrumbs_smoketest), 515 }; 516 struct drm_i915_private *i915; 517 intel_wakeref_t wakeref; 518 int err = 0; 519 520 i915 = mock_gem_device(); 521 if (!i915) 522 return -ENOMEM; 523 524 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 525 err = i915_subtests(tests, i915); 526 527 drm_dev_put(&i915->drm); 528 529 return err; 530 } 531 532 static int live_nop_request(void *arg) 533 { 534 struct drm_i915_private *i915 = arg; 535 struct intel_engine_cs *engine; 536 intel_wakeref_t wakeref; 537 struct igt_live_test t; 538 unsigned int id; 539 int err = -ENODEV; 540 541 /* Submit various sized batches of empty requests, to each engine 542 * (individually), and wait for the batch to complete. We can check 543 * the overhead of submitting requests to the hardware. 544 */ 545 546 mutex_lock(&i915->drm.struct_mutex); 547 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 548 549 for_each_engine(engine, i915, id) { 550 struct i915_request *request = NULL; 551 unsigned long n, prime; 552 IGT_TIMEOUT(end_time); 553 ktime_t times[2] = {}; 554 555 err = igt_live_test_begin(&t, i915, __func__, engine->name); 556 if (err) 557 goto out_unlock; 558 559 for_each_prime_number_from(prime, 1, 8192) { 560 times[1] = ktime_get_raw(); 561 562 for (n = 0; n < prime; n++) { 563 request = i915_request_create(engine->kernel_context); 564 if (IS_ERR(request)) { 565 err = PTR_ERR(request); 566 goto out_unlock; 567 } 568 569 /* This space is left intentionally blank. 570 * 571 * We do not actually want to perform any 572 * action with this request, we just want 573 * to measure the latency in allocation 574 * and submission of our breadcrumbs - 575 * ensuring that the bare request is sufficient 576 * for the system to work (i.e. proper HEAD 577 * tracking of the rings, interrupt handling, 578 * etc). It also gives us the lowest bounds 579 * for latency. 580 */ 581 582 i915_request_add(request); 583 } 584 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 585 586 times[1] = ktime_sub(ktime_get_raw(), times[1]); 587 if (prime == 1) 588 times[0] = times[1]; 589 590 if (__igt_timeout(end_time, NULL)) 591 break; 592 } 593 594 err = igt_live_test_end(&t); 595 if (err) 596 goto out_unlock; 597 598 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 599 engine->name, 600 ktime_to_ns(times[0]), 601 prime, div64_u64(ktime_to_ns(times[1]), prime)); 602 } 603 604 out_unlock: 605 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 606 mutex_unlock(&i915->drm.struct_mutex); 607 return err; 608 } 609 610 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 611 { 612 struct drm_i915_gem_object *obj; 613 struct i915_vma *vma; 614 u32 *cmd; 615 int err; 616 617 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 618 if (IS_ERR(obj)) 619 return ERR_CAST(obj); 620 621 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 622 if (IS_ERR(cmd)) { 623 err = PTR_ERR(cmd); 624 goto err; 625 } 626 627 *cmd = MI_BATCH_BUFFER_END; 628 629 __i915_gem_object_flush_map(obj, 0, 64); 630 i915_gem_object_unpin_map(obj); 631 632 intel_gt_chipset_flush(&i915->gt); 633 634 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 635 if (IS_ERR(vma)) { 636 err = PTR_ERR(vma); 637 goto err; 638 } 639 640 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 641 if (err) 642 goto err; 643 644 return vma; 645 646 err: 647 i915_gem_object_put(obj); 648 return ERR_PTR(err); 649 } 650 651 static struct i915_request * 652 empty_request(struct intel_engine_cs *engine, 653 struct i915_vma *batch) 654 { 655 struct i915_request *request; 656 int err; 657 658 request = i915_request_create(engine->kernel_context); 659 if (IS_ERR(request)) 660 return request; 661 662 err = engine->emit_bb_start(request, 663 batch->node.start, 664 batch->node.size, 665 I915_DISPATCH_SECURE); 666 if (err) 667 goto out_request; 668 669 out_request: 670 i915_request_add(request); 671 return err ? ERR_PTR(err) : request; 672 } 673 674 static int live_empty_request(void *arg) 675 { 676 struct drm_i915_private *i915 = arg; 677 struct intel_engine_cs *engine; 678 intel_wakeref_t wakeref; 679 struct igt_live_test t; 680 struct i915_vma *batch; 681 unsigned int id; 682 int err = 0; 683 684 /* Submit various sized batches of empty requests, to each engine 685 * (individually), and wait for the batch to complete. We can check 686 * the overhead of submitting requests to the hardware. 687 */ 688 689 mutex_lock(&i915->drm.struct_mutex); 690 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 691 692 batch = empty_batch(i915); 693 if (IS_ERR(batch)) { 694 err = PTR_ERR(batch); 695 goto out_unlock; 696 } 697 698 for_each_engine(engine, i915, id) { 699 IGT_TIMEOUT(end_time); 700 struct i915_request *request; 701 unsigned long n, prime; 702 ktime_t times[2] = {}; 703 704 err = igt_live_test_begin(&t, i915, __func__, engine->name); 705 if (err) 706 goto out_batch; 707 708 /* Warmup / preload */ 709 request = empty_request(engine, batch); 710 if (IS_ERR(request)) { 711 err = PTR_ERR(request); 712 goto out_batch; 713 } 714 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 715 716 for_each_prime_number_from(prime, 1, 8192) { 717 times[1] = ktime_get_raw(); 718 719 for (n = 0; n < prime; n++) { 720 request = empty_request(engine, batch); 721 if (IS_ERR(request)) { 722 err = PTR_ERR(request); 723 goto out_batch; 724 } 725 } 726 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 727 728 times[1] = ktime_sub(ktime_get_raw(), times[1]); 729 if (prime == 1) 730 times[0] = times[1]; 731 732 if (__igt_timeout(end_time, NULL)) 733 break; 734 } 735 736 err = igt_live_test_end(&t); 737 if (err) 738 goto out_batch; 739 740 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 741 engine->name, 742 ktime_to_ns(times[0]), 743 prime, div64_u64(ktime_to_ns(times[1]), prime)); 744 } 745 746 out_batch: 747 i915_vma_unpin(batch); 748 i915_vma_put(batch); 749 out_unlock: 750 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 751 mutex_unlock(&i915->drm.struct_mutex); 752 return err; 753 } 754 755 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 756 { 757 struct i915_gem_context *ctx = i915->kernel_context; 758 struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; 759 struct drm_i915_gem_object *obj; 760 const int gen = INTEL_GEN(i915); 761 struct i915_vma *vma; 762 u32 *cmd; 763 int err; 764 765 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 766 if (IS_ERR(obj)) 767 return ERR_CAST(obj); 768 769 vma = i915_vma_instance(obj, vm, NULL); 770 if (IS_ERR(vma)) { 771 err = PTR_ERR(vma); 772 goto err; 773 } 774 775 err = i915_vma_pin(vma, 0, 0, PIN_USER); 776 if (err) 777 goto err; 778 779 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 780 if (IS_ERR(cmd)) { 781 err = PTR_ERR(cmd); 782 goto err; 783 } 784 785 if (gen >= 8) { 786 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 787 *cmd++ = lower_32_bits(vma->node.start); 788 *cmd++ = upper_32_bits(vma->node.start); 789 } else if (gen >= 6) { 790 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 791 *cmd++ = lower_32_bits(vma->node.start); 792 } else { 793 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 794 *cmd++ = lower_32_bits(vma->node.start); 795 } 796 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 797 798 __i915_gem_object_flush_map(obj, 0, 64); 799 i915_gem_object_unpin_map(obj); 800 801 intel_gt_chipset_flush(&i915->gt); 802 803 return vma; 804 805 err: 806 i915_gem_object_put(obj); 807 return ERR_PTR(err); 808 } 809 810 static int recursive_batch_resolve(struct i915_vma *batch) 811 { 812 u32 *cmd; 813 814 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 815 if (IS_ERR(cmd)) 816 return PTR_ERR(cmd); 817 818 *cmd = MI_BATCH_BUFFER_END; 819 intel_gt_chipset_flush(batch->vm->gt); 820 821 i915_gem_object_unpin_map(batch->obj); 822 823 return 0; 824 } 825 826 static int live_all_engines(void *arg) 827 { 828 struct drm_i915_private *i915 = arg; 829 struct intel_engine_cs *engine; 830 struct i915_request *request[I915_NUM_ENGINES]; 831 intel_wakeref_t wakeref; 832 struct igt_live_test t; 833 struct i915_vma *batch; 834 unsigned int id; 835 int err; 836 837 /* Check we can submit requests to all engines simultaneously. We 838 * send a recursive batch to each engine - checking that we don't 839 * block doing so, and that they don't complete too soon. 840 */ 841 842 mutex_lock(&i915->drm.struct_mutex); 843 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 844 845 err = igt_live_test_begin(&t, i915, __func__, ""); 846 if (err) 847 goto out_unlock; 848 849 batch = recursive_batch(i915); 850 if (IS_ERR(batch)) { 851 err = PTR_ERR(batch); 852 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 853 goto out_unlock; 854 } 855 856 for_each_engine(engine, i915, id) { 857 request[id] = i915_request_create(engine->kernel_context); 858 if (IS_ERR(request[id])) { 859 err = PTR_ERR(request[id]); 860 pr_err("%s: Request allocation failed with err=%d\n", 861 __func__, err); 862 goto out_request; 863 } 864 865 err = engine->emit_bb_start(request[id], 866 batch->node.start, 867 batch->node.size, 868 0); 869 GEM_BUG_ON(err); 870 request[id]->batch = batch; 871 872 i915_vma_lock(batch); 873 err = i915_vma_move_to_active(batch, request[id], 0); 874 i915_vma_unlock(batch); 875 GEM_BUG_ON(err); 876 877 i915_request_get(request[id]); 878 i915_request_add(request[id]); 879 } 880 881 for_each_engine(engine, i915, id) { 882 if (i915_request_completed(request[id])) { 883 pr_err("%s(%s): request completed too early!\n", 884 __func__, engine->name); 885 err = -EINVAL; 886 goto out_request; 887 } 888 } 889 890 err = recursive_batch_resolve(batch); 891 if (err) { 892 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 893 goto out_request; 894 } 895 896 for_each_engine(engine, i915, id) { 897 long timeout; 898 899 timeout = i915_request_wait(request[id], 0, 900 MAX_SCHEDULE_TIMEOUT); 901 if (timeout < 0) { 902 err = timeout; 903 pr_err("%s: error waiting for request on %s, err=%d\n", 904 __func__, engine->name, err); 905 goto out_request; 906 } 907 908 GEM_BUG_ON(!i915_request_completed(request[id])); 909 i915_request_put(request[id]); 910 request[id] = NULL; 911 } 912 913 err = igt_live_test_end(&t); 914 915 out_request: 916 for_each_engine(engine, i915, id) 917 if (request[id]) 918 i915_request_put(request[id]); 919 i915_vma_unpin(batch); 920 i915_vma_put(batch); 921 out_unlock: 922 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 923 mutex_unlock(&i915->drm.struct_mutex); 924 return err; 925 } 926 927 static int live_sequential_engines(void *arg) 928 { 929 struct drm_i915_private *i915 = arg; 930 struct i915_request *request[I915_NUM_ENGINES] = {}; 931 struct i915_request *prev = NULL; 932 struct intel_engine_cs *engine; 933 intel_wakeref_t wakeref; 934 struct igt_live_test t; 935 unsigned int id; 936 int err; 937 938 /* Check we can submit requests to all engines sequentially, such 939 * that each successive request waits for the earlier ones. This 940 * tests that we don't execute requests out of order, even though 941 * they are running on independent engines. 942 */ 943 944 mutex_lock(&i915->drm.struct_mutex); 945 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 946 947 err = igt_live_test_begin(&t, i915, __func__, ""); 948 if (err) 949 goto out_unlock; 950 951 for_each_engine(engine, i915, id) { 952 struct i915_vma *batch; 953 954 batch = recursive_batch(i915); 955 if (IS_ERR(batch)) { 956 err = PTR_ERR(batch); 957 pr_err("%s: Unable to create batch for %s, err=%d\n", 958 __func__, engine->name, err); 959 goto out_unlock; 960 } 961 962 request[id] = i915_request_create(engine->kernel_context); 963 if (IS_ERR(request[id])) { 964 err = PTR_ERR(request[id]); 965 pr_err("%s: Request allocation failed for %s with err=%d\n", 966 __func__, engine->name, err); 967 goto out_request; 968 } 969 970 if (prev) { 971 err = i915_request_await_dma_fence(request[id], 972 &prev->fence); 973 if (err) { 974 i915_request_add(request[id]); 975 pr_err("%s: Request await failed for %s with err=%d\n", 976 __func__, engine->name, err); 977 goto out_request; 978 } 979 } 980 981 err = engine->emit_bb_start(request[id], 982 batch->node.start, 983 batch->node.size, 984 0); 985 GEM_BUG_ON(err); 986 request[id]->batch = batch; 987 988 i915_vma_lock(batch); 989 err = i915_vma_move_to_active(batch, request[id], 0); 990 i915_vma_unlock(batch); 991 GEM_BUG_ON(err); 992 993 i915_request_get(request[id]); 994 i915_request_add(request[id]); 995 996 prev = request[id]; 997 } 998 999 for_each_engine(engine, i915, id) { 1000 long timeout; 1001 1002 if (i915_request_completed(request[id])) { 1003 pr_err("%s(%s): request completed too early!\n", 1004 __func__, engine->name); 1005 err = -EINVAL; 1006 goto out_request; 1007 } 1008 1009 err = recursive_batch_resolve(request[id]->batch); 1010 if (err) { 1011 pr_err("%s: failed to resolve batch, err=%d\n", 1012 __func__, err); 1013 goto out_request; 1014 } 1015 1016 timeout = i915_request_wait(request[id], 0, 1017 MAX_SCHEDULE_TIMEOUT); 1018 if (timeout < 0) { 1019 err = timeout; 1020 pr_err("%s: error waiting for request on %s, err=%d\n", 1021 __func__, engine->name, err); 1022 goto out_request; 1023 } 1024 1025 GEM_BUG_ON(!i915_request_completed(request[id])); 1026 } 1027 1028 err = igt_live_test_end(&t); 1029 1030 out_request: 1031 for_each_engine(engine, i915, id) { 1032 u32 *cmd; 1033 1034 if (!request[id]) 1035 break; 1036 1037 cmd = i915_gem_object_pin_map(request[id]->batch->obj, 1038 I915_MAP_WC); 1039 if (!IS_ERR(cmd)) { 1040 *cmd = MI_BATCH_BUFFER_END; 1041 intel_gt_chipset_flush(engine->gt); 1042 1043 i915_gem_object_unpin_map(request[id]->batch->obj); 1044 } 1045 1046 i915_vma_put(request[id]->batch); 1047 i915_request_put(request[id]); 1048 } 1049 out_unlock: 1050 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1051 mutex_unlock(&i915->drm.struct_mutex); 1052 return err; 1053 } 1054 1055 static int 1056 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1057 { 1058 struct i915_request *rq; 1059 int ret; 1060 1061 /* 1062 * Before execlists, all contexts share the same ringbuffer. With 1063 * execlists, each context/engine has a separate ringbuffer and 1064 * for the purposes of this test, inexhaustible. 1065 * 1066 * For the global ringbuffer though, we have to be very careful 1067 * that we do not wrap while preventing the execution of requests 1068 * with a unsignaled fence. 1069 */ 1070 if (HAS_EXECLISTS(ctx->i915)) 1071 return INT_MAX; 1072 1073 rq = igt_request_alloc(ctx, engine); 1074 if (IS_ERR(rq)) { 1075 ret = PTR_ERR(rq); 1076 } else { 1077 int sz; 1078 1079 ret = rq->ring->size - rq->reserved_space; 1080 i915_request_add(rq); 1081 1082 sz = rq->ring->emit - rq->head; 1083 if (sz < 0) 1084 sz += rq->ring->size; 1085 ret /= sz; 1086 ret /= 2; /* leave half spare, in case of emergency! */ 1087 } 1088 1089 return ret; 1090 } 1091 1092 static int live_breadcrumbs_smoketest(void *arg) 1093 { 1094 struct drm_i915_private *i915 = arg; 1095 struct smoketest t[I915_NUM_ENGINES]; 1096 unsigned int ncpus = num_online_cpus(); 1097 unsigned long num_waits, num_fences; 1098 struct intel_engine_cs *engine; 1099 struct task_struct **threads; 1100 struct igt_live_test live; 1101 enum intel_engine_id id; 1102 intel_wakeref_t wakeref; 1103 struct drm_file *file; 1104 unsigned int n; 1105 int ret = 0; 1106 1107 /* 1108 * Smoketest our breadcrumb/signal handling for requests across multiple 1109 * threads. A very simple test to only catch the most egregious of bugs. 1110 * See __igt_breadcrumbs_smoketest(); 1111 * 1112 * On real hardware this time. 1113 */ 1114 1115 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1116 1117 file = mock_file(i915); 1118 if (IS_ERR(file)) { 1119 ret = PTR_ERR(file); 1120 goto out_rpm; 1121 } 1122 1123 threads = kcalloc(ncpus * I915_NUM_ENGINES, 1124 sizeof(*threads), 1125 GFP_KERNEL); 1126 if (!threads) { 1127 ret = -ENOMEM; 1128 goto out_file; 1129 } 1130 1131 memset(&t[0], 0, sizeof(t[0])); 1132 t[0].request_alloc = __live_request_alloc; 1133 t[0].ncontexts = 64; 1134 t[0].contexts = kmalloc_array(t[0].ncontexts, 1135 sizeof(*t[0].contexts), 1136 GFP_KERNEL); 1137 if (!t[0].contexts) { 1138 ret = -ENOMEM; 1139 goto out_threads; 1140 } 1141 1142 mutex_lock(&i915->drm.struct_mutex); 1143 for (n = 0; n < t[0].ncontexts; n++) { 1144 t[0].contexts[n] = live_context(i915, file); 1145 if (!t[0].contexts[n]) { 1146 ret = -ENOMEM; 1147 goto out_contexts; 1148 } 1149 } 1150 1151 ret = igt_live_test_begin(&live, i915, __func__, ""); 1152 if (ret) 1153 goto out_contexts; 1154 1155 for_each_engine(engine, i915, id) { 1156 t[id] = t[0]; 1157 t[id].engine = engine; 1158 t[id].max_batch = max_batches(t[0].contexts[0], engine); 1159 if (t[id].max_batch < 0) { 1160 ret = t[id].max_batch; 1161 mutex_unlock(&i915->drm.struct_mutex); 1162 goto out_flush; 1163 } 1164 /* One ring interleaved between requests from all cpus */ 1165 t[id].max_batch /= num_online_cpus() + 1; 1166 pr_debug("Limiting batches to %d requests on %s\n", 1167 t[id].max_batch, engine->name); 1168 1169 for (n = 0; n < ncpus; n++) { 1170 struct task_struct *tsk; 1171 1172 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1173 &t[id], "igt/%d.%d", id, n); 1174 if (IS_ERR(tsk)) { 1175 ret = PTR_ERR(tsk); 1176 mutex_unlock(&i915->drm.struct_mutex); 1177 goto out_flush; 1178 } 1179 1180 get_task_struct(tsk); 1181 threads[id * ncpus + n] = tsk; 1182 } 1183 } 1184 mutex_unlock(&i915->drm.struct_mutex); 1185 1186 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1187 1188 out_flush: 1189 num_waits = 0; 1190 num_fences = 0; 1191 for_each_engine(engine, i915, id) { 1192 for (n = 0; n < ncpus; n++) { 1193 struct task_struct *tsk = threads[id * ncpus + n]; 1194 int err; 1195 1196 if (!tsk) 1197 continue; 1198 1199 err = kthread_stop(tsk); 1200 if (err < 0 && !ret) 1201 ret = err; 1202 1203 put_task_struct(tsk); 1204 } 1205 1206 num_waits += atomic_long_read(&t[id].num_waits); 1207 num_fences += atomic_long_read(&t[id].num_fences); 1208 } 1209 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1210 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); 1211 1212 mutex_lock(&i915->drm.struct_mutex); 1213 ret = igt_live_test_end(&live) ?: ret; 1214 out_contexts: 1215 mutex_unlock(&i915->drm.struct_mutex); 1216 kfree(t[0].contexts); 1217 out_threads: 1218 kfree(threads); 1219 out_file: 1220 mock_file_free(i915, file); 1221 out_rpm: 1222 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1223 1224 return ret; 1225 } 1226 1227 int i915_request_live_selftests(struct drm_i915_private *i915) 1228 { 1229 static const struct i915_subtest tests[] = { 1230 SUBTEST(live_nop_request), 1231 SUBTEST(live_all_engines), 1232 SUBTEST(live_sequential_engines), 1233 SUBTEST(live_empty_request), 1234 SUBTEST(live_breadcrumbs_smoketest), 1235 }; 1236 1237 if (intel_gt_is_wedged(&i915->gt)) 1238 return 0; 1239 1240 return i915_subtests(tests, i915); 1241 } 1242