1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 27 #include "../i915_selftest.h" 28 #include "i915_random.h" 29 #include "igt_live_test.h" 30 #include "lib_sw_fence.h" 31 32 #include "mock_context.h" 33 #include "mock_drm.h" 34 #include "mock_gem_device.h" 35 36 static int igt_add_request(void *arg) 37 { 38 struct drm_i915_private *i915 = arg; 39 struct i915_request *request; 40 int err = -ENOMEM; 41 42 /* Basic preliminary test to create a request and let it loose! */ 43 44 mutex_lock(&i915->drm.struct_mutex); 45 request = mock_request(i915->engine[RCS0], 46 i915->kernel_context, 47 HZ / 10); 48 if (!request) 49 goto out_unlock; 50 51 i915_request_add(request); 52 53 err = 0; 54 out_unlock: 55 mutex_unlock(&i915->drm.struct_mutex); 56 return err; 57 } 58 59 static int igt_wait_request(void *arg) 60 { 61 const long T = HZ / 4; 62 struct drm_i915_private *i915 = arg; 63 struct i915_request *request; 64 int err = -EINVAL; 65 66 /* Submit a request, then wait upon it */ 67 68 mutex_lock(&i915->drm.struct_mutex); 69 request = mock_request(i915->engine[RCS0], i915->kernel_context, T); 70 if (!request) { 71 err = -ENOMEM; 72 goto out_unlock; 73 } 74 75 if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { 76 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 77 goto out_unlock; 78 } 79 80 if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) { 81 pr_err("request wait succeeded (expected timeout before submit!)\n"); 82 goto out_unlock; 83 } 84 85 if (i915_request_completed(request)) { 86 pr_err("request completed before submit!!\n"); 87 goto out_unlock; 88 } 89 90 i915_request_add(request); 91 92 if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { 93 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 94 goto out_unlock; 95 } 96 97 if (i915_request_completed(request)) { 98 pr_err("request completed immediately!\n"); 99 goto out_unlock; 100 } 101 102 if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { 103 pr_err("request wait succeeded (expected timeout!)\n"); 104 goto out_unlock; 105 } 106 107 if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { 108 pr_err("request wait timed out!\n"); 109 goto out_unlock; 110 } 111 112 if (!i915_request_completed(request)) { 113 pr_err("request not complete after waiting!\n"); 114 goto out_unlock; 115 } 116 117 if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { 118 pr_err("request wait timed out when already complete!\n"); 119 goto out_unlock; 120 } 121 122 err = 0; 123 out_unlock: 124 mock_device_flush(i915); 125 mutex_unlock(&i915->drm.struct_mutex); 126 return err; 127 } 128 129 static int igt_fence_wait(void *arg) 130 { 131 const long T = HZ / 4; 132 struct drm_i915_private *i915 = arg; 133 struct i915_request *request; 134 int err = -EINVAL; 135 136 /* Submit a request, treat it as a fence and wait upon it */ 137 138 mutex_lock(&i915->drm.struct_mutex); 139 request = mock_request(i915->engine[RCS0], i915->kernel_context, T); 140 if (!request) { 141 err = -ENOMEM; 142 goto out_locked; 143 } 144 145 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 146 pr_err("fence wait success before submit (expected timeout)!\n"); 147 goto out_locked; 148 } 149 150 i915_request_add(request); 151 mutex_unlock(&i915->drm.struct_mutex); 152 153 if (dma_fence_is_signaled(&request->fence)) { 154 pr_err("fence signaled immediately!\n"); 155 goto out_device; 156 } 157 158 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 159 pr_err("fence wait success after submit (expected timeout)!\n"); 160 goto out_device; 161 } 162 163 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 164 pr_err("fence wait timed out (expected success)!\n"); 165 goto out_device; 166 } 167 168 if (!dma_fence_is_signaled(&request->fence)) { 169 pr_err("fence unsignaled after waiting!\n"); 170 goto out_device; 171 } 172 173 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 174 pr_err("fence wait timed out when complete (expected success)!\n"); 175 goto out_device; 176 } 177 178 err = 0; 179 out_device: 180 mutex_lock(&i915->drm.struct_mutex); 181 out_locked: 182 mock_device_flush(i915); 183 mutex_unlock(&i915->drm.struct_mutex); 184 return err; 185 } 186 187 static int igt_request_rewind(void *arg) 188 { 189 struct drm_i915_private *i915 = arg; 190 struct i915_request *request, *vip; 191 struct i915_gem_context *ctx[2]; 192 int err = -EINVAL; 193 194 mutex_lock(&i915->drm.struct_mutex); 195 ctx[0] = mock_context(i915, "A"); 196 request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ); 197 if (!request) { 198 err = -ENOMEM; 199 goto err_context_0; 200 } 201 202 i915_request_get(request); 203 i915_request_add(request); 204 205 ctx[1] = mock_context(i915, "B"); 206 vip = mock_request(i915->engine[RCS0], ctx[1], 0); 207 if (!vip) { 208 err = -ENOMEM; 209 goto err_context_1; 210 } 211 212 /* Simulate preemption by manual reordering */ 213 if (!mock_cancel_request(request)) { 214 pr_err("failed to cancel request (already executed)!\n"); 215 i915_request_add(vip); 216 goto err_context_1; 217 } 218 i915_request_get(vip); 219 i915_request_add(vip); 220 rcu_read_lock(); 221 request->engine->submit_request(request); 222 rcu_read_unlock(); 223 224 mutex_unlock(&i915->drm.struct_mutex); 225 226 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 227 pr_err("timed out waiting for high priority request\n"); 228 goto err; 229 } 230 231 if (i915_request_completed(request)) { 232 pr_err("low priority request already completed\n"); 233 goto err; 234 } 235 236 err = 0; 237 err: 238 i915_request_put(vip); 239 mutex_lock(&i915->drm.struct_mutex); 240 err_context_1: 241 mock_context_close(ctx[1]); 242 i915_request_put(request); 243 err_context_0: 244 mock_context_close(ctx[0]); 245 mock_device_flush(i915); 246 mutex_unlock(&i915->drm.struct_mutex); 247 return err; 248 } 249 250 struct smoketest { 251 struct intel_engine_cs *engine; 252 struct i915_gem_context **contexts; 253 atomic_long_t num_waits, num_fences; 254 int ncontexts, max_batch; 255 struct i915_request *(*request_alloc)(struct i915_gem_context *, 256 struct intel_engine_cs *); 257 }; 258 259 static struct i915_request * 260 __mock_request_alloc(struct i915_gem_context *ctx, 261 struct intel_engine_cs *engine) 262 { 263 return mock_request(engine, ctx, 0); 264 } 265 266 static struct i915_request * 267 __live_request_alloc(struct i915_gem_context *ctx, 268 struct intel_engine_cs *engine) 269 { 270 return igt_request_alloc(ctx, engine); 271 } 272 273 static int __igt_breadcrumbs_smoketest(void *arg) 274 { 275 struct smoketest *t = arg; 276 struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; 277 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 278 const unsigned int total = 4 * t->ncontexts + 1; 279 unsigned int num_waits = 0, num_fences = 0; 280 struct i915_request **requests; 281 I915_RND_STATE(prng); 282 unsigned int *order; 283 int err = 0; 284 285 /* 286 * A very simple test to catch the most egregious of list handling bugs. 287 * 288 * At its heart, we simply create oodles of requests running across 289 * multiple kthreads and enable signaling on them, for the sole purpose 290 * of stressing our breadcrumb handling. The only inspection we do is 291 * that the fences were marked as signaled. 292 */ 293 294 requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); 295 if (!requests) 296 return -ENOMEM; 297 298 order = i915_random_order(total, &prng); 299 if (!order) { 300 err = -ENOMEM; 301 goto out_requests; 302 } 303 304 while (!kthread_should_stop()) { 305 struct i915_sw_fence *submit, *wait; 306 unsigned int n, count; 307 308 submit = heap_fence_create(GFP_KERNEL); 309 if (!submit) { 310 err = -ENOMEM; 311 break; 312 } 313 314 wait = heap_fence_create(GFP_KERNEL); 315 if (!wait) { 316 i915_sw_fence_commit(submit); 317 heap_fence_put(submit); 318 err = ENOMEM; 319 break; 320 } 321 322 i915_random_reorder(order, total, &prng); 323 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 324 325 for (n = 0; n < count; n++) { 326 struct i915_gem_context *ctx = 327 t->contexts[order[n] % t->ncontexts]; 328 struct i915_request *rq; 329 330 mutex_lock(BKL); 331 332 rq = t->request_alloc(ctx, t->engine); 333 if (IS_ERR(rq)) { 334 mutex_unlock(BKL); 335 err = PTR_ERR(rq); 336 count = n; 337 break; 338 } 339 340 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 341 submit, 342 GFP_KERNEL); 343 344 requests[n] = i915_request_get(rq); 345 i915_request_add(rq); 346 347 mutex_unlock(BKL); 348 349 if (err >= 0) 350 err = i915_sw_fence_await_dma_fence(wait, 351 &rq->fence, 352 0, 353 GFP_KERNEL); 354 355 if (err < 0) { 356 i915_request_put(rq); 357 count = n; 358 break; 359 } 360 } 361 362 i915_sw_fence_commit(submit); 363 i915_sw_fence_commit(wait); 364 365 if (!wait_event_timeout(wait->wait, 366 i915_sw_fence_done(wait), 367 HZ / 2)) { 368 struct i915_request *rq = requests[count - 1]; 369 370 pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n", 371 count, 372 rq->fence.context, rq->fence.seqno, 373 t->engine->name); 374 i915_gem_set_wedged(t->engine->i915); 375 GEM_BUG_ON(!i915_request_completed(rq)); 376 i915_sw_fence_wait(wait); 377 err = -EIO; 378 } 379 380 for (n = 0; n < count; n++) { 381 struct i915_request *rq = requests[n]; 382 383 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 384 &rq->fence.flags)) { 385 pr_err("%llu:%llu was not signaled!\n", 386 rq->fence.context, rq->fence.seqno); 387 err = -EINVAL; 388 } 389 390 i915_request_put(rq); 391 } 392 393 heap_fence_put(wait); 394 heap_fence_put(submit); 395 396 if (err < 0) 397 break; 398 399 num_fences += count; 400 num_waits++; 401 402 cond_resched(); 403 } 404 405 atomic_long_add(num_fences, &t->num_fences); 406 atomic_long_add(num_waits, &t->num_waits); 407 408 kfree(order); 409 out_requests: 410 kfree(requests); 411 return err; 412 } 413 414 static int mock_breadcrumbs_smoketest(void *arg) 415 { 416 struct drm_i915_private *i915 = arg; 417 struct smoketest t = { 418 .engine = i915->engine[RCS0], 419 .ncontexts = 1024, 420 .max_batch = 1024, 421 .request_alloc = __mock_request_alloc 422 }; 423 unsigned int ncpus = num_online_cpus(); 424 struct task_struct **threads; 425 unsigned int n; 426 int ret = 0; 427 428 /* 429 * Smoketest our breadcrumb/signal handling for requests across multiple 430 * threads. A very simple test to only catch the most egregious of bugs. 431 * See __igt_breadcrumbs_smoketest(); 432 */ 433 434 threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); 435 if (!threads) 436 return -ENOMEM; 437 438 t.contexts = 439 kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 440 if (!t.contexts) { 441 ret = -ENOMEM; 442 goto out_threads; 443 } 444 445 mutex_lock(&t.engine->i915->drm.struct_mutex); 446 for (n = 0; n < t.ncontexts; n++) { 447 t.contexts[n] = mock_context(t.engine->i915, "mock"); 448 if (!t.contexts[n]) { 449 ret = -ENOMEM; 450 goto out_contexts; 451 } 452 } 453 mutex_unlock(&t.engine->i915->drm.struct_mutex); 454 455 for (n = 0; n < ncpus; n++) { 456 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 457 &t, "igt/%d", n); 458 if (IS_ERR(threads[n])) { 459 ret = PTR_ERR(threads[n]); 460 ncpus = n; 461 break; 462 } 463 464 get_task_struct(threads[n]); 465 } 466 467 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 468 469 for (n = 0; n < ncpus; n++) { 470 int err; 471 472 err = kthread_stop(threads[n]); 473 if (err < 0 && !ret) 474 ret = err; 475 476 put_task_struct(threads[n]); 477 } 478 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 479 atomic_long_read(&t.num_waits), 480 atomic_long_read(&t.num_fences), 481 ncpus); 482 483 mutex_lock(&t.engine->i915->drm.struct_mutex); 484 out_contexts: 485 for (n = 0; n < t.ncontexts; n++) { 486 if (!t.contexts[n]) 487 break; 488 mock_context_close(t.contexts[n]); 489 } 490 mutex_unlock(&t.engine->i915->drm.struct_mutex); 491 kfree(t.contexts); 492 out_threads: 493 kfree(threads); 494 495 return ret; 496 } 497 498 int i915_request_mock_selftests(void) 499 { 500 static const struct i915_subtest tests[] = { 501 SUBTEST(igt_add_request), 502 SUBTEST(igt_wait_request), 503 SUBTEST(igt_fence_wait), 504 SUBTEST(igt_request_rewind), 505 SUBTEST(mock_breadcrumbs_smoketest), 506 }; 507 struct drm_i915_private *i915; 508 intel_wakeref_t wakeref; 509 int err = 0; 510 511 i915 = mock_gem_device(); 512 if (!i915) 513 return -ENOMEM; 514 515 with_intel_runtime_pm(i915, wakeref) 516 err = i915_subtests(tests, i915); 517 518 drm_dev_put(&i915->drm); 519 520 return err; 521 } 522 523 static int live_nop_request(void *arg) 524 { 525 struct drm_i915_private *i915 = arg; 526 struct intel_engine_cs *engine; 527 intel_wakeref_t wakeref; 528 struct igt_live_test t; 529 unsigned int id; 530 int err = -ENODEV; 531 532 /* Submit various sized batches of empty requests, to each engine 533 * (individually), and wait for the batch to complete. We can check 534 * the overhead of submitting requests to the hardware. 535 */ 536 537 mutex_lock(&i915->drm.struct_mutex); 538 wakeref = intel_runtime_pm_get(i915); 539 540 for_each_engine(engine, i915, id) { 541 struct i915_request *request = NULL; 542 unsigned long n, prime; 543 IGT_TIMEOUT(end_time); 544 ktime_t times[2] = {}; 545 546 err = igt_live_test_begin(&t, i915, __func__, engine->name); 547 if (err) 548 goto out_unlock; 549 550 for_each_prime_number_from(prime, 1, 8192) { 551 times[1] = ktime_get_raw(); 552 553 for (n = 0; n < prime; n++) { 554 request = i915_request_create(engine->kernel_context); 555 if (IS_ERR(request)) { 556 err = PTR_ERR(request); 557 goto out_unlock; 558 } 559 560 /* This space is left intentionally blank. 561 * 562 * We do not actually want to perform any 563 * action with this request, we just want 564 * to measure the latency in allocation 565 * and submission of our breadcrumbs - 566 * ensuring that the bare request is sufficient 567 * for the system to work (i.e. proper HEAD 568 * tracking of the rings, interrupt handling, 569 * etc). It also gives us the lowest bounds 570 * for latency. 571 */ 572 573 i915_request_add(request); 574 } 575 i915_request_wait(request, 576 I915_WAIT_LOCKED, 577 MAX_SCHEDULE_TIMEOUT); 578 579 times[1] = ktime_sub(ktime_get_raw(), times[1]); 580 if (prime == 1) 581 times[0] = times[1]; 582 583 if (__igt_timeout(end_time, NULL)) 584 break; 585 } 586 587 err = igt_live_test_end(&t); 588 if (err) 589 goto out_unlock; 590 591 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 592 engine->name, 593 ktime_to_ns(times[0]), 594 prime, div64_u64(ktime_to_ns(times[1]), prime)); 595 } 596 597 out_unlock: 598 intel_runtime_pm_put(i915, wakeref); 599 mutex_unlock(&i915->drm.struct_mutex); 600 return err; 601 } 602 603 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 604 { 605 struct drm_i915_gem_object *obj; 606 struct i915_vma *vma; 607 u32 *cmd; 608 int err; 609 610 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 611 if (IS_ERR(obj)) 612 return ERR_CAST(obj); 613 614 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 615 if (IS_ERR(cmd)) { 616 err = PTR_ERR(cmd); 617 goto err; 618 } 619 620 *cmd = MI_BATCH_BUFFER_END; 621 622 __i915_gem_object_flush_map(obj, 0, 64); 623 i915_gem_object_unpin_map(obj); 624 625 i915_gem_chipset_flush(i915); 626 627 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 628 if (IS_ERR(vma)) { 629 err = PTR_ERR(vma); 630 goto err; 631 } 632 633 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 634 if (err) 635 goto err; 636 637 return vma; 638 639 err: 640 i915_gem_object_put(obj); 641 return ERR_PTR(err); 642 } 643 644 static struct i915_request * 645 empty_request(struct intel_engine_cs *engine, 646 struct i915_vma *batch) 647 { 648 struct i915_request *request; 649 int err; 650 651 request = i915_request_create(engine->kernel_context); 652 if (IS_ERR(request)) 653 return request; 654 655 err = engine->emit_bb_start(request, 656 batch->node.start, 657 batch->node.size, 658 I915_DISPATCH_SECURE); 659 if (err) 660 goto out_request; 661 662 out_request: 663 i915_request_add(request); 664 return err ? ERR_PTR(err) : request; 665 } 666 667 static int live_empty_request(void *arg) 668 { 669 struct drm_i915_private *i915 = arg; 670 struct intel_engine_cs *engine; 671 intel_wakeref_t wakeref; 672 struct igt_live_test t; 673 struct i915_vma *batch; 674 unsigned int id; 675 int err = 0; 676 677 /* Submit various sized batches of empty requests, to each engine 678 * (individually), and wait for the batch to complete. We can check 679 * the overhead of submitting requests to the hardware. 680 */ 681 682 mutex_lock(&i915->drm.struct_mutex); 683 wakeref = intel_runtime_pm_get(i915); 684 685 batch = empty_batch(i915); 686 if (IS_ERR(batch)) { 687 err = PTR_ERR(batch); 688 goto out_unlock; 689 } 690 691 for_each_engine(engine, i915, id) { 692 IGT_TIMEOUT(end_time); 693 struct i915_request *request; 694 unsigned long n, prime; 695 ktime_t times[2] = {}; 696 697 err = igt_live_test_begin(&t, i915, __func__, engine->name); 698 if (err) 699 goto out_batch; 700 701 /* Warmup / preload */ 702 request = empty_request(engine, batch); 703 if (IS_ERR(request)) { 704 err = PTR_ERR(request); 705 goto out_batch; 706 } 707 i915_request_wait(request, 708 I915_WAIT_LOCKED, 709 MAX_SCHEDULE_TIMEOUT); 710 711 for_each_prime_number_from(prime, 1, 8192) { 712 times[1] = ktime_get_raw(); 713 714 for (n = 0; n < prime; n++) { 715 request = empty_request(engine, batch); 716 if (IS_ERR(request)) { 717 err = PTR_ERR(request); 718 goto out_batch; 719 } 720 } 721 i915_request_wait(request, 722 I915_WAIT_LOCKED, 723 MAX_SCHEDULE_TIMEOUT); 724 725 times[1] = ktime_sub(ktime_get_raw(), times[1]); 726 if (prime == 1) 727 times[0] = times[1]; 728 729 if (__igt_timeout(end_time, NULL)) 730 break; 731 } 732 733 err = igt_live_test_end(&t); 734 if (err) 735 goto out_batch; 736 737 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 738 engine->name, 739 ktime_to_ns(times[0]), 740 prime, div64_u64(ktime_to_ns(times[1]), prime)); 741 } 742 743 out_batch: 744 i915_vma_unpin(batch); 745 i915_vma_put(batch); 746 out_unlock: 747 intel_runtime_pm_put(i915, wakeref); 748 mutex_unlock(&i915->drm.struct_mutex); 749 return err; 750 } 751 752 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 753 { 754 struct i915_gem_context *ctx = i915->kernel_context; 755 struct i915_address_space *vm = 756 ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; 757 struct drm_i915_gem_object *obj; 758 const int gen = INTEL_GEN(i915); 759 struct i915_vma *vma; 760 u32 *cmd; 761 int err; 762 763 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 764 if (IS_ERR(obj)) 765 return ERR_CAST(obj); 766 767 vma = i915_vma_instance(obj, vm, NULL); 768 if (IS_ERR(vma)) { 769 err = PTR_ERR(vma); 770 goto err; 771 } 772 773 err = i915_vma_pin(vma, 0, 0, PIN_USER); 774 if (err) 775 goto err; 776 777 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 778 if (IS_ERR(cmd)) { 779 err = PTR_ERR(cmd); 780 goto err; 781 } 782 783 if (gen >= 8) { 784 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 785 *cmd++ = lower_32_bits(vma->node.start); 786 *cmd++ = upper_32_bits(vma->node.start); 787 } else if (gen >= 6) { 788 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 789 *cmd++ = lower_32_bits(vma->node.start); 790 } else { 791 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 792 *cmd++ = lower_32_bits(vma->node.start); 793 } 794 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 795 796 __i915_gem_object_flush_map(obj, 0, 64); 797 i915_gem_object_unpin_map(obj); 798 799 i915_gem_chipset_flush(i915); 800 801 return vma; 802 803 err: 804 i915_gem_object_put(obj); 805 return ERR_PTR(err); 806 } 807 808 static int recursive_batch_resolve(struct i915_vma *batch) 809 { 810 u32 *cmd; 811 812 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 813 if (IS_ERR(cmd)) 814 return PTR_ERR(cmd); 815 816 *cmd = MI_BATCH_BUFFER_END; 817 i915_gem_chipset_flush(batch->vm->i915); 818 819 i915_gem_object_unpin_map(batch->obj); 820 821 return 0; 822 } 823 824 static int live_all_engines(void *arg) 825 { 826 struct drm_i915_private *i915 = arg; 827 struct intel_engine_cs *engine; 828 struct i915_request *request[I915_NUM_ENGINES]; 829 intel_wakeref_t wakeref; 830 struct igt_live_test t; 831 struct i915_vma *batch; 832 unsigned int id; 833 int err; 834 835 /* Check we can submit requests to all engines simultaneously. We 836 * send a recursive batch to each engine - checking that we don't 837 * block doing so, and that they don't complete too soon. 838 */ 839 840 mutex_lock(&i915->drm.struct_mutex); 841 wakeref = intel_runtime_pm_get(i915); 842 843 err = igt_live_test_begin(&t, i915, __func__, ""); 844 if (err) 845 goto out_unlock; 846 847 batch = recursive_batch(i915); 848 if (IS_ERR(batch)) { 849 err = PTR_ERR(batch); 850 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 851 goto out_unlock; 852 } 853 854 for_each_engine(engine, i915, id) { 855 request[id] = i915_request_create(engine->kernel_context); 856 if (IS_ERR(request[id])) { 857 err = PTR_ERR(request[id]); 858 pr_err("%s: Request allocation failed with err=%d\n", 859 __func__, err); 860 goto out_request; 861 } 862 863 err = engine->emit_bb_start(request[id], 864 batch->node.start, 865 batch->node.size, 866 0); 867 GEM_BUG_ON(err); 868 request[id]->batch = batch; 869 870 if (!i915_gem_object_has_active_reference(batch->obj)) { 871 i915_gem_object_get(batch->obj); 872 i915_gem_object_set_active_reference(batch->obj); 873 } 874 875 err = i915_vma_move_to_active(batch, request[id], 0); 876 GEM_BUG_ON(err); 877 878 i915_request_get(request[id]); 879 i915_request_add(request[id]); 880 } 881 882 for_each_engine(engine, i915, id) { 883 if (i915_request_completed(request[id])) { 884 pr_err("%s(%s): request completed too early!\n", 885 __func__, engine->name); 886 err = -EINVAL; 887 goto out_request; 888 } 889 } 890 891 err = recursive_batch_resolve(batch); 892 if (err) { 893 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 894 goto out_request; 895 } 896 897 for_each_engine(engine, i915, id) { 898 long timeout; 899 900 timeout = i915_request_wait(request[id], 901 I915_WAIT_LOCKED, 902 MAX_SCHEDULE_TIMEOUT); 903 if (timeout < 0) { 904 err = timeout; 905 pr_err("%s: error waiting for request on %s, err=%d\n", 906 __func__, engine->name, err); 907 goto out_request; 908 } 909 910 GEM_BUG_ON(!i915_request_completed(request[id])); 911 i915_request_put(request[id]); 912 request[id] = NULL; 913 } 914 915 err = igt_live_test_end(&t); 916 917 out_request: 918 for_each_engine(engine, i915, id) 919 if (request[id]) 920 i915_request_put(request[id]); 921 i915_vma_unpin(batch); 922 i915_vma_put(batch); 923 out_unlock: 924 intel_runtime_pm_put(i915, wakeref); 925 mutex_unlock(&i915->drm.struct_mutex); 926 return err; 927 } 928 929 static int live_sequential_engines(void *arg) 930 { 931 struct drm_i915_private *i915 = arg; 932 struct i915_request *request[I915_NUM_ENGINES] = {}; 933 struct i915_request *prev = NULL; 934 struct intel_engine_cs *engine; 935 intel_wakeref_t wakeref; 936 struct igt_live_test t; 937 unsigned int id; 938 int err; 939 940 /* Check we can submit requests to all engines sequentially, such 941 * that each successive request waits for the earlier ones. This 942 * tests that we don't execute requests out of order, even though 943 * they are running on independent engines. 944 */ 945 946 mutex_lock(&i915->drm.struct_mutex); 947 wakeref = intel_runtime_pm_get(i915); 948 949 err = igt_live_test_begin(&t, i915, __func__, ""); 950 if (err) 951 goto out_unlock; 952 953 for_each_engine(engine, i915, id) { 954 struct i915_vma *batch; 955 956 batch = recursive_batch(i915); 957 if (IS_ERR(batch)) { 958 err = PTR_ERR(batch); 959 pr_err("%s: Unable to create batch for %s, err=%d\n", 960 __func__, engine->name, err); 961 goto out_unlock; 962 } 963 964 request[id] = i915_request_create(engine->kernel_context); 965 if (IS_ERR(request[id])) { 966 err = PTR_ERR(request[id]); 967 pr_err("%s: Request allocation failed for %s with err=%d\n", 968 __func__, engine->name, err); 969 goto out_request; 970 } 971 972 if (prev) { 973 err = i915_request_await_dma_fence(request[id], 974 &prev->fence); 975 if (err) { 976 i915_request_add(request[id]); 977 pr_err("%s: Request await failed for %s with err=%d\n", 978 __func__, engine->name, err); 979 goto out_request; 980 } 981 } 982 983 err = engine->emit_bb_start(request[id], 984 batch->node.start, 985 batch->node.size, 986 0); 987 GEM_BUG_ON(err); 988 request[id]->batch = batch; 989 990 err = i915_vma_move_to_active(batch, request[id], 0); 991 GEM_BUG_ON(err); 992 993 i915_gem_object_set_active_reference(batch->obj); 994 i915_vma_get(batch); 995 996 i915_request_get(request[id]); 997 i915_request_add(request[id]); 998 999 prev = request[id]; 1000 } 1001 1002 for_each_engine(engine, i915, id) { 1003 long timeout; 1004 1005 if (i915_request_completed(request[id])) { 1006 pr_err("%s(%s): request completed too early!\n", 1007 __func__, engine->name); 1008 err = -EINVAL; 1009 goto out_request; 1010 } 1011 1012 err = recursive_batch_resolve(request[id]->batch); 1013 if (err) { 1014 pr_err("%s: failed to resolve batch, err=%d\n", 1015 __func__, err); 1016 goto out_request; 1017 } 1018 1019 timeout = i915_request_wait(request[id], 1020 I915_WAIT_LOCKED, 1021 MAX_SCHEDULE_TIMEOUT); 1022 if (timeout < 0) { 1023 err = timeout; 1024 pr_err("%s: error waiting for request on %s, err=%d\n", 1025 __func__, engine->name, err); 1026 goto out_request; 1027 } 1028 1029 GEM_BUG_ON(!i915_request_completed(request[id])); 1030 } 1031 1032 err = igt_live_test_end(&t); 1033 1034 out_request: 1035 for_each_engine(engine, i915, id) { 1036 u32 *cmd; 1037 1038 if (!request[id]) 1039 break; 1040 1041 cmd = i915_gem_object_pin_map(request[id]->batch->obj, 1042 I915_MAP_WC); 1043 if (!IS_ERR(cmd)) { 1044 *cmd = MI_BATCH_BUFFER_END; 1045 i915_gem_chipset_flush(i915); 1046 1047 i915_gem_object_unpin_map(request[id]->batch->obj); 1048 } 1049 1050 i915_vma_put(request[id]->batch); 1051 i915_request_put(request[id]); 1052 } 1053 out_unlock: 1054 intel_runtime_pm_put(i915, wakeref); 1055 mutex_unlock(&i915->drm.struct_mutex); 1056 return err; 1057 } 1058 1059 static int 1060 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1061 { 1062 struct i915_request *rq; 1063 int ret; 1064 1065 /* 1066 * Before execlists, all contexts share the same ringbuffer. With 1067 * execlists, each context/engine has a separate ringbuffer and 1068 * for the purposes of this test, inexhaustible. 1069 * 1070 * For the global ringbuffer though, we have to be very careful 1071 * that we do not wrap while preventing the execution of requests 1072 * with a unsignaled fence. 1073 */ 1074 if (HAS_EXECLISTS(ctx->i915)) 1075 return INT_MAX; 1076 1077 rq = igt_request_alloc(ctx, engine); 1078 if (IS_ERR(rq)) { 1079 ret = PTR_ERR(rq); 1080 } else { 1081 int sz; 1082 1083 ret = rq->ring->size - rq->reserved_space; 1084 i915_request_add(rq); 1085 1086 sz = rq->ring->emit - rq->head; 1087 if (sz < 0) 1088 sz += rq->ring->size; 1089 ret /= sz; 1090 ret /= 2; /* leave half spare, in case of emergency! */ 1091 } 1092 1093 return ret; 1094 } 1095 1096 static int live_breadcrumbs_smoketest(void *arg) 1097 { 1098 struct drm_i915_private *i915 = arg; 1099 struct smoketest t[I915_NUM_ENGINES]; 1100 unsigned int ncpus = num_online_cpus(); 1101 unsigned long num_waits, num_fences; 1102 struct intel_engine_cs *engine; 1103 struct task_struct **threads; 1104 struct igt_live_test live; 1105 enum intel_engine_id id; 1106 intel_wakeref_t wakeref; 1107 struct drm_file *file; 1108 unsigned int n; 1109 int ret = 0; 1110 1111 /* 1112 * Smoketest our breadcrumb/signal handling for requests across multiple 1113 * threads. A very simple test to only catch the most egregious of bugs. 1114 * See __igt_breadcrumbs_smoketest(); 1115 * 1116 * On real hardware this time. 1117 */ 1118 1119 wakeref = intel_runtime_pm_get(i915); 1120 1121 file = mock_file(i915); 1122 if (IS_ERR(file)) { 1123 ret = PTR_ERR(file); 1124 goto out_rpm; 1125 } 1126 1127 threads = kcalloc(ncpus * I915_NUM_ENGINES, 1128 sizeof(*threads), 1129 GFP_KERNEL); 1130 if (!threads) { 1131 ret = -ENOMEM; 1132 goto out_file; 1133 } 1134 1135 memset(&t[0], 0, sizeof(t[0])); 1136 t[0].request_alloc = __live_request_alloc; 1137 t[0].ncontexts = 64; 1138 t[0].contexts = kmalloc_array(t[0].ncontexts, 1139 sizeof(*t[0].contexts), 1140 GFP_KERNEL); 1141 if (!t[0].contexts) { 1142 ret = -ENOMEM; 1143 goto out_threads; 1144 } 1145 1146 mutex_lock(&i915->drm.struct_mutex); 1147 for (n = 0; n < t[0].ncontexts; n++) { 1148 t[0].contexts[n] = live_context(i915, file); 1149 if (!t[0].contexts[n]) { 1150 ret = -ENOMEM; 1151 goto out_contexts; 1152 } 1153 } 1154 1155 ret = igt_live_test_begin(&live, i915, __func__, ""); 1156 if (ret) 1157 goto out_contexts; 1158 1159 for_each_engine(engine, i915, id) { 1160 t[id] = t[0]; 1161 t[id].engine = engine; 1162 t[id].max_batch = max_batches(t[0].contexts[0], engine); 1163 if (t[id].max_batch < 0) { 1164 ret = t[id].max_batch; 1165 mutex_unlock(&i915->drm.struct_mutex); 1166 goto out_flush; 1167 } 1168 /* One ring interleaved between requests from all cpus */ 1169 t[id].max_batch /= num_online_cpus() + 1; 1170 pr_debug("Limiting batches to %d requests on %s\n", 1171 t[id].max_batch, engine->name); 1172 1173 for (n = 0; n < ncpus; n++) { 1174 struct task_struct *tsk; 1175 1176 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1177 &t[id], "igt/%d.%d", id, n); 1178 if (IS_ERR(tsk)) { 1179 ret = PTR_ERR(tsk); 1180 mutex_unlock(&i915->drm.struct_mutex); 1181 goto out_flush; 1182 } 1183 1184 get_task_struct(tsk); 1185 threads[id * ncpus + n] = tsk; 1186 } 1187 } 1188 mutex_unlock(&i915->drm.struct_mutex); 1189 1190 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1191 1192 out_flush: 1193 num_waits = 0; 1194 num_fences = 0; 1195 for_each_engine(engine, i915, id) { 1196 for (n = 0; n < ncpus; n++) { 1197 struct task_struct *tsk = threads[id * ncpus + n]; 1198 int err; 1199 1200 if (!tsk) 1201 continue; 1202 1203 err = kthread_stop(tsk); 1204 if (err < 0 && !ret) 1205 ret = err; 1206 1207 put_task_struct(tsk); 1208 } 1209 1210 num_waits += atomic_long_read(&t[id].num_waits); 1211 num_fences += atomic_long_read(&t[id].num_fences); 1212 } 1213 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1214 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); 1215 1216 mutex_lock(&i915->drm.struct_mutex); 1217 ret = igt_live_test_end(&live) ?: ret; 1218 out_contexts: 1219 mutex_unlock(&i915->drm.struct_mutex); 1220 kfree(t[0].contexts); 1221 out_threads: 1222 kfree(threads); 1223 out_file: 1224 mock_file_free(i915, file); 1225 out_rpm: 1226 intel_runtime_pm_put(i915, wakeref); 1227 1228 return ret; 1229 } 1230 1231 int i915_request_live_selftests(struct drm_i915_private *i915) 1232 { 1233 static const struct i915_subtest tests[] = { 1234 SUBTEST(live_nop_request), 1235 SUBTEST(live_all_engines), 1236 SUBTEST(live_sequential_engines), 1237 SUBTEST(live_empty_request), 1238 SUBTEST(live_breadcrumbs_smoketest), 1239 }; 1240 1241 if (i915_terminally_wedged(i915)) 1242 return 0; 1243 1244 return i915_subtests(tests, i915); 1245 } 1246