1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 27 #include "../i915_selftest.h" 28 #include "i915_random.h" 29 #include "igt_live_test.h" 30 #include "lib_sw_fence.h" 31 32 #include "mock_context.h" 33 #include "mock_drm.h" 34 #include "mock_gem_device.h" 35 36 static int igt_add_request(void *arg) 37 { 38 struct drm_i915_private *i915 = arg; 39 struct i915_request *request; 40 int err = -ENOMEM; 41 42 /* Basic preliminary test to create a request and let it loose! */ 43 44 mutex_lock(&i915->drm.struct_mutex); 45 request = mock_request(i915->engine[RCS0], 46 i915->kernel_context, 47 HZ / 10); 48 if (!request) 49 goto out_unlock; 50 51 i915_request_add(request); 52 53 err = 0; 54 out_unlock: 55 mutex_unlock(&i915->drm.struct_mutex); 56 return err; 57 } 58 59 static int igt_wait_request(void *arg) 60 { 61 const long T = HZ / 4; 62 struct drm_i915_private *i915 = arg; 63 struct i915_request *request; 64 int err = -EINVAL; 65 66 /* Submit a request, then wait upon it */ 67 68 mutex_lock(&i915->drm.struct_mutex); 69 request = mock_request(i915->engine[RCS0], i915->kernel_context, T); 70 if (!request) { 71 err = -ENOMEM; 72 goto out_unlock; 73 } 74 75 if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { 76 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 77 goto out_unlock; 78 } 79 80 if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) { 81 pr_err("request wait succeeded (expected timeout before submit!)\n"); 82 goto out_unlock; 83 } 84 85 if (i915_request_completed(request)) { 86 pr_err("request completed before submit!!\n"); 87 goto out_unlock; 88 } 89 90 i915_request_add(request); 91 92 if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { 93 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 94 goto out_unlock; 95 } 96 97 if (i915_request_completed(request)) { 98 pr_err("request completed immediately!\n"); 99 goto out_unlock; 100 } 101 102 if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { 103 pr_err("request wait succeeded (expected timeout!)\n"); 104 goto out_unlock; 105 } 106 107 if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { 108 pr_err("request wait timed out!\n"); 109 goto out_unlock; 110 } 111 112 if (!i915_request_completed(request)) { 113 pr_err("request not complete after waiting!\n"); 114 goto out_unlock; 115 } 116 117 if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { 118 pr_err("request wait timed out when already complete!\n"); 119 goto out_unlock; 120 } 121 122 err = 0; 123 out_unlock: 124 mock_device_flush(i915); 125 mutex_unlock(&i915->drm.struct_mutex); 126 return err; 127 } 128 129 static int igt_fence_wait(void *arg) 130 { 131 const long T = HZ / 4; 132 struct drm_i915_private *i915 = arg; 133 struct i915_request *request; 134 int err = -EINVAL; 135 136 /* Submit a request, treat it as a fence and wait upon it */ 137 138 mutex_lock(&i915->drm.struct_mutex); 139 request = mock_request(i915->engine[RCS0], i915->kernel_context, T); 140 if (!request) { 141 err = -ENOMEM; 142 goto out_locked; 143 } 144 145 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 146 pr_err("fence wait success before submit (expected timeout)!\n"); 147 goto out_locked; 148 } 149 150 i915_request_add(request); 151 mutex_unlock(&i915->drm.struct_mutex); 152 153 if (dma_fence_is_signaled(&request->fence)) { 154 pr_err("fence signaled immediately!\n"); 155 goto out_device; 156 } 157 158 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 159 pr_err("fence wait success after submit (expected timeout)!\n"); 160 goto out_device; 161 } 162 163 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 164 pr_err("fence wait timed out (expected success)!\n"); 165 goto out_device; 166 } 167 168 if (!dma_fence_is_signaled(&request->fence)) { 169 pr_err("fence unsignaled after waiting!\n"); 170 goto out_device; 171 } 172 173 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 174 pr_err("fence wait timed out when complete (expected success)!\n"); 175 goto out_device; 176 } 177 178 err = 0; 179 out_device: 180 mutex_lock(&i915->drm.struct_mutex); 181 out_locked: 182 mock_device_flush(i915); 183 mutex_unlock(&i915->drm.struct_mutex); 184 return err; 185 } 186 187 static int igt_request_rewind(void *arg) 188 { 189 struct drm_i915_private *i915 = arg; 190 struct i915_request *request, *vip; 191 struct i915_gem_context *ctx[2]; 192 int err = -EINVAL; 193 194 mutex_lock(&i915->drm.struct_mutex); 195 ctx[0] = mock_context(i915, "A"); 196 request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ); 197 if (!request) { 198 err = -ENOMEM; 199 goto err_context_0; 200 } 201 202 i915_request_get(request); 203 i915_request_add(request); 204 205 ctx[1] = mock_context(i915, "B"); 206 vip = mock_request(i915->engine[RCS0], ctx[1], 0); 207 if (!vip) { 208 err = -ENOMEM; 209 goto err_context_1; 210 } 211 212 /* Simulate preemption by manual reordering */ 213 if (!mock_cancel_request(request)) { 214 pr_err("failed to cancel request (already executed)!\n"); 215 i915_request_add(vip); 216 goto err_context_1; 217 } 218 i915_request_get(vip); 219 i915_request_add(vip); 220 rcu_read_lock(); 221 request->engine->submit_request(request); 222 rcu_read_unlock(); 223 224 mutex_unlock(&i915->drm.struct_mutex); 225 226 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 227 pr_err("timed out waiting for high priority request\n"); 228 goto err; 229 } 230 231 if (i915_request_completed(request)) { 232 pr_err("low priority request already completed\n"); 233 goto err; 234 } 235 236 err = 0; 237 err: 238 i915_request_put(vip); 239 mutex_lock(&i915->drm.struct_mutex); 240 err_context_1: 241 mock_context_close(ctx[1]); 242 i915_request_put(request); 243 err_context_0: 244 mock_context_close(ctx[0]); 245 mock_device_flush(i915); 246 mutex_unlock(&i915->drm.struct_mutex); 247 return err; 248 } 249 250 struct smoketest { 251 struct intel_engine_cs *engine; 252 struct i915_gem_context **contexts; 253 atomic_long_t num_waits, num_fences; 254 int ncontexts, max_batch; 255 struct i915_request *(*request_alloc)(struct i915_gem_context *, 256 struct intel_engine_cs *); 257 }; 258 259 static struct i915_request * 260 __mock_request_alloc(struct i915_gem_context *ctx, 261 struct intel_engine_cs *engine) 262 { 263 return mock_request(engine, ctx, 0); 264 } 265 266 static struct i915_request * 267 __live_request_alloc(struct i915_gem_context *ctx, 268 struct intel_engine_cs *engine) 269 { 270 return i915_request_alloc(engine, ctx); 271 } 272 273 static int __igt_breadcrumbs_smoketest(void *arg) 274 { 275 struct smoketest *t = arg; 276 struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; 277 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 278 const unsigned int total = 4 * t->ncontexts + 1; 279 unsigned int num_waits = 0, num_fences = 0; 280 struct i915_request **requests; 281 I915_RND_STATE(prng); 282 unsigned int *order; 283 int err = 0; 284 285 /* 286 * A very simple test to catch the most egregious of list handling bugs. 287 * 288 * At its heart, we simply create oodles of requests running across 289 * multiple kthreads and enable signaling on them, for the sole purpose 290 * of stressing our breadcrumb handling. The only inspection we do is 291 * that the fences were marked as signaled. 292 */ 293 294 requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); 295 if (!requests) 296 return -ENOMEM; 297 298 order = i915_random_order(total, &prng); 299 if (!order) { 300 err = -ENOMEM; 301 goto out_requests; 302 } 303 304 while (!kthread_should_stop()) { 305 struct i915_sw_fence *submit, *wait; 306 unsigned int n, count; 307 308 submit = heap_fence_create(GFP_KERNEL); 309 if (!submit) { 310 err = -ENOMEM; 311 break; 312 } 313 314 wait = heap_fence_create(GFP_KERNEL); 315 if (!wait) { 316 i915_sw_fence_commit(submit); 317 heap_fence_put(submit); 318 err = ENOMEM; 319 break; 320 } 321 322 i915_random_reorder(order, total, &prng); 323 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 324 325 for (n = 0; n < count; n++) { 326 struct i915_gem_context *ctx = 327 t->contexts[order[n] % t->ncontexts]; 328 struct i915_request *rq; 329 330 mutex_lock(BKL); 331 332 rq = t->request_alloc(ctx, t->engine); 333 if (IS_ERR(rq)) { 334 mutex_unlock(BKL); 335 err = PTR_ERR(rq); 336 count = n; 337 break; 338 } 339 340 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 341 submit, 342 GFP_KERNEL); 343 344 requests[n] = i915_request_get(rq); 345 i915_request_add(rq); 346 347 mutex_unlock(BKL); 348 349 if (err >= 0) 350 err = i915_sw_fence_await_dma_fence(wait, 351 &rq->fence, 352 0, 353 GFP_KERNEL); 354 355 if (err < 0) { 356 i915_request_put(rq); 357 count = n; 358 break; 359 } 360 } 361 362 i915_sw_fence_commit(submit); 363 i915_sw_fence_commit(wait); 364 365 if (!wait_event_timeout(wait->wait, 366 i915_sw_fence_done(wait), 367 HZ / 2)) { 368 struct i915_request *rq = requests[count - 1]; 369 370 pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n", 371 count, 372 rq->fence.context, rq->fence.seqno, 373 t->engine->name); 374 i915_gem_set_wedged(t->engine->i915); 375 GEM_BUG_ON(!i915_request_completed(rq)); 376 i915_sw_fence_wait(wait); 377 err = -EIO; 378 } 379 380 for (n = 0; n < count; n++) { 381 struct i915_request *rq = requests[n]; 382 383 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 384 &rq->fence.flags)) { 385 pr_err("%llu:%llu was not signaled!\n", 386 rq->fence.context, rq->fence.seqno); 387 err = -EINVAL; 388 } 389 390 i915_request_put(rq); 391 } 392 393 heap_fence_put(wait); 394 heap_fence_put(submit); 395 396 if (err < 0) 397 break; 398 399 num_fences += count; 400 num_waits++; 401 402 cond_resched(); 403 } 404 405 atomic_long_add(num_fences, &t->num_fences); 406 atomic_long_add(num_waits, &t->num_waits); 407 408 kfree(order); 409 out_requests: 410 kfree(requests); 411 return err; 412 } 413 414 static int mock_breadcrumbs_smoketest(void *arg) 415 { 416 struct drm_i915_private *i915 = arg; 417 struct smoketest t = { 418 .engine = i915->engine[RCS0], 419 .ncontexts = 1024, 420 .max_batch = 1024, 421 .request_alloc = __mock_request_alloc 422 }; 423 unsigned int ncpus = num_online_cpus(); 424 struct task_struct **threads; 425 unsigned int n; 426 int ret = 0; 427 428 /* 429 * Smoketest our breadcrumb/signal handling for requests across multiple 430 * threads. A very simple test to only catch the most egregious of bugs. 431 * See __igt_breadcrumbs_smoketest(); 432 */ 433 434 threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); 435 if (!threads) 436 return -ENOMEM; 437 438 t.contexts = 439 kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 440 if (!t.contexts) { 441 ret = -ENOMEM; 442 goto out_threads; 443 } 444 445 mutex_lock(&t.engine->i915->drm.struct_mutex); 446 for (n = 0; n < t.ncontexts; n++) { 447 t.contexts[n] = mock_context(t.engine->i915, "mock"); 448 if (!t.contexts[n]) { 449 ret = -ENOMEM; 450 goto out_contexts; 451 } 452 } 453 mutex_unlock(&t.engine->i915->drm.struct_mutex); 454 455 for (n = 0; n < ncpus; n++) { 456 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 457 &t, "igt/%d", n); 458 if (IS_ERR(threads[n])) { 459 ret = PTR_ERR(threads[n]); 460 ncpus = n; 461 break; 462 } 463 464 get_task_struct(threads[n]); 465 } 466 467 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 468 469 for (n = 0; n < ncpus; n++) { 470 int err; 471 472 err = kthread_stop(threads[n]); 473 if (err < 0 && !ret) 474 ret = err; 475 476 put_task_struct(threads[n]); 477 } 478 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 479 atomic_long_read(&t.num_waits), 480 atomic_long_read(&t.num_fences), 481 ncpus); 482 483 mutex_lock(&t.engine->i915->drm.struct_mutex); 484 out_contexts: 485 for (n = 0; n < t.ncontexts; n++) { 486 if (!t.contexts[n]) 487 break; 488 mock_context_close(t.contexts[n]); 489 } 490 mutex_unlock(&t.engine->i915->drm.struct_mutex); 491 kfree(t.contexts); 492 out_threads: 493 kfree(threads); 494 495 return ret; 496 } 497 498 int i915_request_mock_selftests(void) 499 { 500 static const struct i915_subtest tests[] = { 501 SUBTEST(igt_add_request), 502 SUBTEST(igt_wait_request), 503 SUBTEST(igt_fence_wait), 504 SUBTEST(igt_request_rewind), 505 SUBTEST(mock_breadcrumbs_smoketest), 506 }; 507 struct drm_i915_private *i915; 508 intel_wakeref_t wakeref; 509 int err = 0; 510 511 i915 = mock_gem_device(); 512 if (!i915) 513 return -ENOMEM; 514 515 with_intel_runtime_pm(i915, wakeref) 516 err = i915_subtests(tests, i915); 517 518 drm_dev_put(&i915->drm); 519 520 return err; 521 } 522 523 static int live_nop_request(void *arg) 524 { 525 struct drm_i915_private *i915 = arg; 526 struct intel_engine_cs *engine; 527 intel_wakeref_t wakeref; 528 struct igt_live_test t; 529 unsigned int id; 530 int err = -ENODEV; 531 532 /* Submit various sized batches of empty requests, to each engine 533 * (individually), and wait for the batch to complete. We can check 534 * the overhead of submitting requests to the hardware. 535 */ 536 537 mutex_lock(&i915->drm.struct_mutex); 538 wakeref = intel_runtime_pm_get(i915); 539 540 for_each_engine(engine, i915, id) { 541 struct i915_request *request = NULL; 542 unsigned long n, prime; 543 IGT_TIMEOUT(end_time); 544 ktime_t times[2] = {}; 545 546 err = igt_live_test_begin(&t, i915, __func__, engine->name); 547 if (err) 548 goto out_unlock; 549 550 for_each_prime_number_from(prime, 1, 8192) { 551 times[1] = ktime_get_raw(); 552 553 for (n = 0; n < prime; n++) { 554 request = i915_request_alloc(engine, 555 i915->kernel_context); 556 if (IS_ERR(request)) { 557 err = PTR_ERR(request); 558 goto out_unlock; 559 } 560 561 /* This space is left intentionally blank. 562 * 563 * We do not actually want to perform any 564 * action with this request, we just want 565 * to measure the latency in allocation 566 * and submission of our breadcrumbs - 567 * ensuring that the bare request is sufficient 568 * for the system to work (i.e. proper HEAD 569 * tracking of the rings, interrupt handling, 570 * etc). It also gives us the lowest bounds 571 * for latency. 572 */ 573 574 i915_request_add(request); 575 } 576 i915_request_wait(request, 577 I915_WAIT_LOCKED, 578 MAX_SCHEDULE_TIMEOUT); 579 580 times[1] = ktime_sub(ktime_get_raw(), times[1]); 581 if (prime == 1) 582 times[0] = times[1]; 583 584 if (__igt_timeout(end_time, NULL)) 585 break; 586 } 587 588 err = igt_live_test_end(&t); 589 if (err) 590 goto out_unlock; 591 592 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 593 engine->name, 594 ktime_to_ns(times[0]), 595 prime, div64_u64(ktime_to_ns(times[1]), prime)); 596 } 597 598 out_unlock: 599 intel_runtime_pm_put(i915, wakeref); 600 mutex_unlock(&i915->drm.struct_mutex); 601 return err; 602 } 603 604 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 605 { 606 struct drm_i915_gem_object *obj; 607 struct i915_vma *vma; 608 u32 *cmd; 609 int err; 610 611 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 612 if (IS_ERR(obj)) 613 return ERR_CAST(obj); 614 615 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 616 if (IS_ERR(cmd)) { 617 err = PTR_ERR(cmd); 618 goto err; 619 } 620 621 *cmd = MI_BATCH_BUFFER_END; 622 623 __i915_gem_object_flush_map(obj, 0, 64); 624 i915_gem_object_unpin_map(obj); 625 626 i915_gem_chipset_flush(i915); 627 628 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 629 if (IS_ERR(vma)) { 630 err = PTR_ERR(vma); 631 goto err; 632 } 633 634 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 635 if (err) 636 goto err; 637 638 return vma; 639 640 err: 641 i915_gem_object_put(obj); 642 return ERR_PTR(err); 643 } 644 645 static struct i915_request * 646 empty_request(struct intel_engine_cs *engine, 647 struct i915_vma *batch) 648 { 649 struct i915_request *request; 650 int err; 651 652 request = i915_request_alloc(engine, engine->i915->kernel_context); 653 if (IS_ERR(request)) 654 return request; 655 656 err = engine->emit_bb_start(request, 657 batch->node.start, 658 batch->node.size, 659 I915_DISPATCH_SECURE); 660 if (err) 661 goto out_request; 662 663 out_request: 664 i915_request_add(request); 665 return err ? ERR_PTR(err) : request; 666 } 667 668 static int live_empty_request(void *arg) 669 { 670 struct drm_i915_private *i915 = arg; 671 struct intel_engine_cs *engine; 672 intel_wakeref_t wakeref; 673 struct igt_live_test t; 674 struct i915_vma *batch; 675 unsigned int id; 676 int err = 0; 677 678 /* Submit various sized batches of empty requests, to each engine 679 * (individually), and wait for the batch to complete. We can check 680 * the overhead of submitting requests to the hardware. 681 */ 682 683 mutex_lock(&i915->drm.struct_mutex); 684 wakeref = intel_runtime_pm_get(i915); 685 686 batch = empty_batch(i915); 687 if (IS_ERR(batch)) { 688 err = PTR_ERR(batch); 689 goto out_unlock; 690 } 691 692 for_each_engine(engine, i915, id) { 693 IGT_TIMEOUT(end_time); 694 struct i915_request *request; 695 unsigned long n, prime; 696 ktime_t times[2] = {}; 697 698 err = igt_live_test_begin(&t, i915, __func__, engine->name); 699 if (err) 700 goto out_batch; 701 702 /* Warmup / preload */ 703 request = empty_request(engine, batch); 704 if (IS_ERR(request)) { 705 err = PTR_ERR(request); 706 goto out_batch; 707 } 708 i915_request_wait(request, 709 I915_WAIT_LOCKED, 710 MAX_SCHEDULE_TIMEOUT); 711 712 for_each_prime_number_from(prime, 1, 8192) { 713 times[1] = ktime_get_raw(); 714 715 for (n = 0; n < prime; n++) { 716 request = empty_request(engine, batch); 717 if (IS_ERR(request)) { 718 err = PTR_ERR(request); 719 goto out_batch; 720 } 721 } 722 i915_request_wait(request, 723 I915_WAIT_LOCKED, 724 MAX_SCHEDULE_TIMEOUT); 725 726 times[1] = ktime_sub(ktime_get_raw(), times[1]); 727 if (prime == 1) 728 times[0] = times[1]; 729 730 if (__igt_timeout(end_time, NULL)) 731 break; 732 } 733 734 err = igt_live_test_end(&t); 735 if (err) 736 goto out_batch; 737 738 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 739 engine->name, 740 ktime_to_ns(times[0]), 741 prime, div64_u64(ktime_to_ns(times[1]), prime)); 742 } 743 744 out_batch: 745 i915_vma_unpin(batch); 746 i915_vma_put(batch); 747 out_unlock: 748 intel_runtime_pm_put(i915, wakeref); 749 mutex_unlock(&i915->drm.struct_mutex); 750 return err; 751 } 752 753 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 754 { 755 struct i915_gem_context *ctx = i915->kernel_context; 756 struct i915_address_space *vm = 757 ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; 758 struct drm_i915_gem_object *obj; 759 const int gen = INTEL_GEN(i915); 760 struct i915_vma *vma; 761 u32 *cmd; 762 int err; 763 764 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 765 if (IS_ERR(obj)) 766 return ERR_CAST(obj); 767 768 vma = i915_vma_instance(obj, vm, NULL); 769 if (IS_ERR(vma)) { 770 err = PTR_ERR(vma); 771 goto err; 772 } 773 774 err = i915_vma_pin(vma, 0, 0, PIN_USER); 775 if (err) 776 goto err; 777 778 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 779 if (IS_ERR(cmd)) { 780 err = PTR_ERR(cmd); 781 goto err; 782 } 783 784 if (gen >= 8) { 785 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 786 *cmd++ = lower_32_bits(vma->node.start); 787 *cmd++ = upper_32_bits(vma->node.start); 788 } else if (gen >= 6) { 789 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 790 *cmd++ = lower_32_bits(vma->node.start); 791 } else { 792 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 793 *cmd++ = lower_32_bits(vma->node.start); 794 } 795 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 796 797 __i915_gem_object_flush_map(obj, 0, 64); 798 i915_gem_object_unpin_map(obj); 799 800 i915_gem_chipset_flush(i915); 801 802 return vma; 803 804 err: 805 i915_gem_object_put(obj); 806 return ERR_PTR(err); 807 } 808 809 static int recursive_batch_resolve(struct i915_vma *batch) 810 { 811 u32 *cmd; 812 813 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 814 if (IS_ERR(cmd)) 815 return PTR_ERR(cmd); 816 817 *cmd = MI_BATCH_BUFFER_END; 818 i915_gem_chipset_flush(batch->vm->i915); 819 820 i915_gem_object_unpin_map(batch->obj); 821 822 return 0; 823 } 824 825 static int live_all_engines(void *arg) 826 { 827 struct drm_i915_private *i915 = arg; 828 struct intel_engine_cs *engine; 829 struct i915_request *request[I915_NUM_ENGINES]; 830 intel_wakeref_t wakeref; 831 struct igt_live_test t; 832 struct i915_vma *batch; 833 unsigned int id; 834 int err; 835 836 /* Check we can submit requests to all engines simultaneously. We 837 * send a recursive batch to each engine - checking that we don't 838 * block doing so, and that they don't complete too soon. 839 */ 840 841 mutex_lock(&i915->drm.struct_mutex); 842 wakeref = intel_runtime_pm_get(i915); 843 844 err = igt_live_test_begin(&t, i915, __func__, ""); 845 if (err) 846 goto out_unlock; 847 848 batch = recursive_batch(i915); 849 if (IS_ERR(batch)) { 850 err = PTR_ERR(batch); 851 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 852 goto out_unlock; 853 } 854 855 for_each_engine(engine, i915, id) { 856 request[id] = i915_request_alloc(engine, i915->kernel_context); 857 if (IS_ERR(request[id])) { 858 err = PTR_ERR(request[id]); 859 pr_err("%s: Request allocation failed with err=%d\n", 860 __func__, err); 861 goto out_request; 862 } 863 864 err = engine->emit_bb_start(request[id], 865 batch->node.start, 866 batch->node.size, 867 0); 868 GEM_BUG_ON(err); 869 request[id]->batch = batch; 870 871 if (!i915_gem_object_has_active_reference(batch->obj)) { 872 i915_gem_object_get(batch->obj); 873 i915_gem_object_set_active_reference(batch->obj); 874 } 875 876 err = i915_vma_move_to_active(batch, request[id], 0); 877 GEM_BUG_ON(err); 878 879 i915_request_get(request[id]); 880 i915_request_add(request[id]); 881 } 882 883 for_each_engine(engine, i915, id) { 884 if (i915_request_completed(request[id])) { 885 pr_err("%s(%s): request completed too early!\n", 886 __func__, engine->name); 887 err = -EINVAL; 888 goto out_request; 889 } 890 } 891 892 err = recursive_batch_resolve(batch); 893 if (err) { 894 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 895 goto out_request; 896 } 897 898 for_each_engine(engine, i915, id) { 899 long timeout; 900 901 timeout = i915_request_wait(request[id], 902 I915_WAIT_LOCKED, 903 MAX_SCHEDULE_TIMEOUT); 904 if (timeout < 0) { 905 err = timeout; 906 pr_err("%s: error waiting for request on %s, err=%d\n", 907 __func__, engine->name, err); 908 goto out_request; 909 } 910 911 GEM_BUG_ON(!i915_request_completed(request[id])); 912 i915_request_put(request[id]); 913 request[id] = NULL; 914 } 915 916 err = igt_live_test_end(&t); 917 918 out_request: 919 for_each_engine(engine, i915, id) 920 if (request[id]) 921 i915_request_put(request[id]); 922 i915_vma_unpin(batch); 923 i915_vma_put(batch); 924 out_unlock: 925 intel_runtime_pm_put(i915, wakeref); 926 mutex_unlock(&i915->drm.struct_mutex); 927 return err; 928 } 929 930 static int live_sequential_engines(void *arg) 931 { 932 struct drm_i915_private *i915 = arg; 933 struct i915_request *request[I915_NUM_ENGINES] = {}; 934 struct i915_request *prev = NULL; 935 struct intel_engine_cs *engine; 936 intel_wakeref_t wakeref; 937 struct igt_live_test t; 938 unsigned int id; 939 int err; 940 941 /* Check we can submit requests to all engines sequentially, such 942 * that each successive request waits for the earlier ones. This 943 * tests that we don't execute requests out of order, even though 944 * they are running on independent engines. 945 */ 946 947 mutex_lock(&i915->drm.struct_mutex); 948 wakeref = intel_runtime_pm_get(i915); 949 950 err = igt_live_test_begin(&t, i915, __func__, ""); 951 if (err) 952 goto out_unlock; 953 954 for_each_engine(engine, i915, id) { 955 struct i915_vma *batch; 956 957 batch = recursive_batch(i915); 958 if (IS_ERR(batch)) { 959 err = PTR_ERR(batch); 960 pr_err("%s: Unable to create batch for %s, err=%d\n", 961 __func__, engine->name, err); 962 goto out_unlock; 963 } 964 965 request[id] = i915_request_alloc(engine, i915->kernel_context); 966 if (IS_ERR(request[id])) { 967 err = PTR_ERR(request[id]); 968 pr_err("%s: Request allocation failed for %s with err=%d\n", 969 __func__, engine->name, err); 970 goto out_request; 971 } 972 973 if (prev) { 974 err = i915_request_await_dma_fence(request[id], 975 &prev->fence); 976 if (err) { 977 i915_request_add(request[id]); 978 pr_err("%s: Request await failed for %s with err=%d\n", 979 __func__, engine->name, err); 980 goto out_request; 981 } 982 } 983 984 err = engine->emit_bb_start(request[id], 985 batch->node.start, 986 batch->node.size, 987 0); 988 GEM_BUG_ON(err); 989 request[id]->batch = batch; 990 991 err = i915_vma_move_to_active(batch, request[id], 0); 992 GEM_BUG_ON(err); 993 994 i915_gem_object_set_active_reference(batch->obj); 995 i915_vma_get(batch); 996 997 i915_request_get(request[id]); 998 i915_request_add(request[id]); 999 1000 prev = request[id]; 1001 } 1002 1003 for_each_engine(engine, i915, id) { 1004 long timeout; 1005 1006 if (i915_request_completed(request[id])) { 1007 pr_err("%s(%s): request completed too early!\n", 1008 __func__, engine->name); 1009 err = -EINVAL; 1010 goto out_request; 1011 } 1012 1013 err = recursive_batch_resolve(request[id]->batch); 1014 if (err) { 1015 pr_err("%s: failed to resolve batch, err=%d\n", 1016 __func__, err); 1017 goto out_request; 1018 } 1019 1020 timeout = i915_request_wait(request[id], 1021 I915_WAIT_LOCKED, 1022 MAX_SCHEDULE_TIMEOUT); 1023 if (timeout < 0) { 1024 err = timeout; 1025 pr_err("%s: error waiting for request on %s, err=%d\n", 1026 __func__, engine->name, err); 1027 goto out_request; 1028 } 1029 1030 GEM_BUG_ON(!i915_request_completed(request[id])); 1031 } 1032 1033 err = igt_live_test_end(&t); 1034 1035 out_request: 1036 for_each_engine(engine, i915, id) { 1037 u32 *cmd; 1038 1039 if (!request[id]) 1040 break; 1041 1042 cmd = i915_gem_object_pin_map(request[id]->batch->obj, 1043 I915_MAP_WC); 1044 if (!IS_ERR(cmd)) { 1045 *cmd = MI_BATCH_BUFFER_END; 1046 i915_gem_chipset_flush(i915); 1047 1048 i915_gem_object_unpin_map(request[id]->batch->obj); 1049 } 1050 1051 i915_vma_put(request[id]->batch); 1052 i915_request_put(request[id]); 1053 } 1054 out_unlock: 1055 intel_runtime_pm_put(i915, wakeref); 1056 mutex_unlock(&i915->drm.struct_mutex); 1057 return err; 1058 } 1059 1060 static int 1061 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1062 { 1063 struct i915_request *rq; 1064 int ret; 1065 1066 /* 1067 * Before execlists, all contexts share the same ringbuffer. With 1068 * execlists, each context/engine has a separate ringbuffer and 1069 * for the purposes of this test, inexhaustible. 1070 * 1071 * For the global ringbuffer though, we have to be very careful 1072 * that we do not wrap while preventing the execution of requests 1073 * with a unsignaled fence. 1074 */ 1075 if (HAS_EXECLISTS(ctx->i915)) 1076 return INT_MAX; 1077 1078 rq = i915_request_alloc(engine, ctx); 1079 if (IS_ERR(rq)) { 1080 ret = PTR_ERR(rq); 1081 } else { 1082 int sz; 1083 1084 ret = rq->ring->size - rq->reserved_space; 1085 i915_request_add(rq); 1086 1087 sz = rq->ring->emit - rq->head; 1088 if (sz < 0) 1089 sz += rq->ring->size; 1090 ret /= sz; 1091 ret /= 2; /* leave half spare, in case of emergency! */ 1092 } 1093 1094 return ret; 1095 } 1096 1097 static int live_breadcrumbs_smoketest(void *arg) 1098 { 1099 struct drm_i915_private *i915 = arg; 1100 struct smoketest t[I915_NUM_ENGINES]; 1101 unsigned int ncpus = num_online_cpus(); 1102 unsigned long num_waits, num_fences; 1103 struct intel_engine_cs *engine; 1104 struct task_struct **threads; 1105 struct igt_live_test live; 1106 enum intel_engine_id id; 1107 intel_wakeref_t wakeref; 1108 struct drm_file *file; 1109 unsigned int n; 1110 int ret = 0; 1111 1112 /* 1113 * Smoketest our breadcrumb/signal handling for requests across multiple 1114 * threads. A very simple test to only catch the most egregious of bugs. 1115 * See __igt_breadcrumbs_smoketest(); 1116 * 1117 * On real hardware this time. 1118 */ 1119 1120 wakeref = intel_runtime_pm_get(i915); 1121 1122 file = mock_file(i915); 1123 if (IS_ERR(file)) { 1124 ret = PTR_ERR(file); 1125 goto out_rpm; 1126 } 1127 1128 threads = kcalloc(ncpus * I915_NUM_ENGINES, 1129 sizeof(*threads), 1130 GFP_KERNEL); 1131 if (!threads) { 1132 ret = -ENOMEM; 1133 goto out_file; 1134 } 1135 1136 memset(&t[0], 0, sizeof(t[0])); 1137 t[0].request_alloc = __live_request_alloc; 1138 t[0].ncontexts = 64; 1139 t[0].contexts = kmalloc_array(t[0].ncontexts, 1140 sizeof(*t[0].contexts), 1141 GFP_KERNEL); 1142 if (!t[0].contexts) { 1143 ret = -ENOMEM; 1144 goto out_threads; 1145 } 1146 1147 mutex_lock(&i915->drm.struct_mutex); 1148 for (n = 0; n < t[0].ncontexts; n++) { 1149 t[0].contexts[n] = live_context(i915, file); 1150 if (!t[0].contexts[n]) { 1151 ret = -ENOMEM; 1152 goto out_contexts; 1153 } 1154 } 1155 1156 ret = igt_live_test_begin(&live, i915, __func__, ""); 1157 if (ret) 1158 goto out_contexts; 1159 1160 for_each_engine(engine, i915, id) { 1161 t[id] = t[0]; 1162 t[id].engine = engine; 1163 t[id].max_batch = max_batches(t[0].contexts[0], engine); 1164 if (t[id].max_batch < 0) { 1165 ret = t[id].max_batch; 1166 mutex_unlock(&i915->drm.struct_mutex); 1167 goto out_flush; 1168 } 1169 /* One ring interleaved between requests from all cpus */ 1170 t[id].max_batch /= num_online_cpus() + 1; 1171 pr_debug("Limiting batches to %d requests on %s\n", 1172 t[id].max_batch, engine->name); 1173 1174 for (n = 0; n < ncpus; n++) { 1175 struct task_struct *tsk; 1176 1177 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1178 &t[id], "igt/%d.%d", id, n); 1179 if (IS_ERR(tsk)) { 1180 ret = PTR_ERR(tsk); 1181 mutex_unlock(&i915->drm.struct_mutex); 1182 goto out_flush; 1183 } 1184 1185 get_task_struct(tsk); 1186 threads[id * ncpus + n] = tsk; 1187 } 1188 } 1189 mutex_unlock(&i915->drm.struct_mutex); 1190 1191 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1192 1193 out_flush: 1194 num_waits = 0; 1195 num_fences = 0; 1196 for_each_engine(engine, i915, id) { 1197 for (n = 0; n < ncpus; n++) { 1198 struct task_struct *tsk = threads[id * ncpus + n]; 1199 int err; 1200 1201 if (!tsk) 1202 continue; 1203 1204 err = kthread_stop(tsk); 1205 if (err < 0 && !ret) 1206 ret = err; 1207 1208 put_task_struct(tsk); 1209 } 1210 1211 num_waits += atomic_long_read(&t[id].num_waits); 1212 num_fences += atomic_long_read(&t[id].num_fences); 1213 } 1214 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1215 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); 1216 1217 mutex_lock(&i915->drm.struct_mutex); 1218 ret = igt_live_test_end(&live) ?: ret; 1219 out_contexts: 1220 mutex_unlock(&i915->drm.struct_mutex); 1221 kfree(t[0].contexts); 1222 out_threads: 1223 kfree(threads); 1224 out_file: 1225 mock_file_free(i915, file); 1226 out_rpm: 1227 intel_runtime_pm_put(i915, wakeref); 1228 1229 return ret; 1230 } 1231 1232 int i915_request_live_selftests(struct drm_i915_private *i915) 1233 { 1234 static const struct i915_subtest tests[] = { 1235 SUBTEST(live_nop_request), 1236 SUBTEST(live_all_engines), 1237 SUBTEST(live_sequential_engines), 1238 SUBTEST(live_empty_request), 1239 SUBTEST(live_breadcrumbs_smoketest), 1240 }; 1241 1242 if (i915_terminally_wedged(i915)) 1243 return 0; 1244 1245 return i915_subtests(tests, i915); 1246 } 1247