1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 27 #include "gem/i915_gem_pm.h" 28 #include "gem/selftests/mock_context.h" 29 30 #include "i915_random.h" 31 #include "i915_selftest.h" 32 #include "igt_live_test.h" 33 #include "lib_sw_fence.h" 34 35 #include "mock_drm.h" 36 #include "mock_gem_device.h" 37 38 static int igt_add_request(void *arg) 39 { 40 struct drm_i915_private *i915 = arg; 41 struct i915_request *request; 42 int err = -ENOMEM; 43 44 /* Basic preliminary test to create a request and let it loose! */ 45 46 mutex_lock(&i915->drm.struct_mutex); 47 request = mock_request(i915->engine[RCS0], 48 i915->kernel_context, 49 HZ / 10); 50 if (!request) 51 goto out_unlock; 52 53 i915_request_add(request); 54 55 err = 0; 56 out_unlock: 57 mutex_unlock(&i915->drm.struct_mutex); 58 return err; 59 } 60 61 static int igt_wait_request(void *arg) 62 { 63 const long T = HZ / 4; 64 struct drm_i915_private *i915 = arg; 65 struct i915_request *request; 66 int err = -EINVAL; 67 68 /* Submit a request, then wait upon it */ 69 70 mutex_lock(&i915->drm.struct_mutex); 71 request = mock_request(i915->engine[RCS0], i915->kernel_context, T); 72 if (!request) { 73 err = -ENOMEM; 74 goto out_unlock; 75 } 76 77 if (i915_request_wait(request, 0, 0) != -ETIME) { 78 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 79 goto out_unlock; 80 } 81 82 if (i915_request_wait(request, 0, T) != -ETIME) { 83 pr_err("request wait succeeded (expected timeout before submit!)\n"); 84 goto out_unlock; 85 } 86 87 if (i915_request_completed(request)) { 88 pr_err("request completed before submit!!\n"); 89 goto out_unlock; 90 } 91 92 i915_request_add(request); 93 94 if (i915_request_wait(request, 0, 0) != -ETIME) { 95 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 96 goto out_unlock; 97 } 98 99 if (i915_request_completed(request)) { 100 pr_err("request completed immediately!\n"); 101 goto out_unlock; 102 } 103 104 if (i915_request_wait(request, 0, T / 2) != -ETIME) { 105 pr_err("request wait succeeded (expected timeout!)\n"); 106 goto out_unlock; 107 } 108 109 if (i915_request_wait(request, 0, T) == -ETIME) { 110 pr_err("request wait timed out!\n"); 111 goto out_unlock; 112 } 113 114 if (!i915_request_completed(request)) { 115 pr_err("request not complete after waiting!\n"); 116 goto out_unlock; 117 } 118 119 if (i915_request_wait(request, 0, T) == -ETIME) { 120 pr_err("request wait timed out when already complete!\n"); 121 goto out_unlock; 122 } 123 124 err = 0; 125 out_unlock: 126 mock_device_flush(i915); 127 mutex_unlock(&i915->drm.struct_mutex); 128 return err; 129 } 130 131 static int igt_fence_wait(void *arg) 132 { 133 const long T = HZ / 4; 134 struct drm_i915_private *i915 = arg; 135 struct i915_request *request; 136 int err = -EINVAL; 137 138 /* Submit a request, treat it as a fence and wait upon it */ 139 140 mutex_lock(&i915->drm.struct_mutex); 141 request = mock_request(i915->engine[RCS0], i915->kernel_context, T); 142 if (!request) { 143 err = -ENOMEM; 144 goto out_locked; 145 } 146 147 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 148 pr_err("fence wait success before submit (expected timeout)!\n"); 149 goto out_locked; 150 } 151 152 i915_request_add(request); 153 mutex_unlock(&i915->drm.struct_mutex); 154 155 if (dma_fence_is_signaled(&request->fence)) { 156 pr_err("fence signaled immediately!\n"); 157 goto out_device; 158 } 159 160 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 161 pr_err("fence wait success after submit (expected timeout)!\n"); 162 goto out_device; 163 } 164 165 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 166 pr_err("fence wait timed out (expected success)!\n"); 167 goto out_device; 168 } 169 170 if (!dma_fence_is_signaled(&request->fence)) { 171 pr_err("fence unsignaled after waiting!\n"); 172 goto out_device; 173 } 174 175 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 176 pr_err("fence wait timed out when complete (expected success)!\n"); 177 goto out_device; 178 } 179 180 err = 0; 181 out_device: 182 mutex_lock(&i915->drm.struct_mutex); 183 out_locked: 184 mock_device_flush(i915); 185 mutex_unlock(&i915->drm.struct_mutex); 186 return err; 187 } 188 189 static int igt_request_rewind(void *arg) 190 { 191 struct drm_i915_private *i915 = arg; 192 struct i915_request *request, *vip; 193 struct i915_gem_context *ctx[2]; 194 int err = -EINVAL; 195 196 mutex_lock(&i915->drm.struct_mutex); 197 ctx[0] = mock_context(i915, "A"); 198 request = mock_request(i915->engine[RCS0], ctx[0], 2 * HZ); 199 if (!request) { 200 err = -ENOMEM; 201 goto err_context_0; 202 } 203 204 i915_request_get(request); 205 i915_request_add(request); 206 207 ctx[1] = mock_context(i915, "B"); 208 vip = mock_request(i915->engine[RCS0], ctx[1], 0); 209 if (!vip) { 210 err = -ENOMEM; 211 goto err_context_1; 212 } 213 214 /* Simulate preemption by manual reordering */ 215 if (!mock_cancel_request(request)) { 216 pr_err("failed to cancel request (already executed)!\n"); 217 i915_request_add(vip); 218 goto err_context_1; 219 } 220 i915_request_get(vip); 221 i915_request_add(vip); 222 rcu_read_lock(); 223 request->engine->submit_request(request); 224 rcu_read_unlock(); 225 226 mutex_unlock(&i915->drm.struct_mutex); 227 228 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 229 pr_err("timed out waiting for high priority request\n"); 230 goto err; 231 } 232 233 if (i915_request_completed(request)) { 234 pr_err("low priority request already completed\n"); 235 goto err; 236 } 237 238 err = 0; 239 err: 240 i915_request_put(vip); 241 mutex_lock(&i915->drm.struct_mutex); 242 err_context_1: 243 mock_context_close(ctx[1]); 244 i915_request_put(request); 245 err_context_0: 246 mock_context_close(ctx[0]); 247 mock_device_flush(i915); 248 mutex_unlock(&i915->drm.struct_mutex); 249 return err; 250 } 251 252 struct smoketest { 253 struct intel_engine_cs *engine; 254 struct i915_gem_context **contexts; 255 atomic_long_t num_waits, num_fences; 256 int ncontexts, max_batch; 257 struct i915_request *(*request_alloc)(struct i915_gem_context *, 258 struct intel_engine_cs *); 259 }; 260 261 static struct i915_request * 262 __mock_request_alloc(struct i915_gem_context *ctx, 263 struct intel_engine_cs *engine) 264 { 265 return mock_request(engine, ctx, 0); 266 } 267 268 static struct i915_request * 269 __live_request_alloc(struct i915_gem_context *ctx, 270 struct intel_engine_cs *engine) 271 { 272 return igt_request_alloc(ctx, engine); 273 } 274 275 static int __igt_breadcrumbs_smoketest(void *arg) 276 { 277 struct smoketest *t = arg; 278 struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; 279 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 280 const unsigned int total = 4 * t->ncontexts + 1; 281 unsigned int num_waits = 0, num_fences = 0; 282 struct i915_request **requests; 283 I915_RND_STATE(prng); 284 unsigned int *order; 285 int err = 0; 286 287 /* 288 * A very simple test to catch the most egregious of list handling bugs. 289 * 290 * At its heart, we simply create oodles of requests running across 291 * multiple kthreads and enable signaling on them, for the sole purpose 292 * of stressing our breadcrumb handling. The only inspection we do is 293 * that the fences were marked as signaled. 294 */ 295 296 requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); 297 if (!requests) 298 return -ENOMEM; 299 300 order = i915_random_order(total, &prng); 301 if (!order) { 302 err = -ENOMEM; 303 goto out_requests; 304 } 305 306 while (!kthread_should_stop()) { 307 struct i915_sw_fence *submit, *wait; 308 unsigned int n, count; 309 310 submit = heap_fence_create(GFP_KERNEL); 311 if (!submit) { 312 err = -ENOMEM; 313 break; 314 } 315 316 wait = heap_fence_create(GFP_KERNEL); 317 if (!wait) { 318 i915_sw_fence_commit(submit); 319 heap_fence_put(submit); 320 err = ENOMEM; 321 break; 322 } 323 324 i915_random_reorder(order, total, &prng); 325 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 326 327 for (n = 0; n < count; n++) { 328 struct i915_gem_context *ctx = 329 t->contexts[order[n] % t->ncontexts]; 330 struct i915_request *rq; 331 332 mutex_lock(BKL); 333 334 rq = t->request_alloc(ctx, t->engine); 335 if (IS_ERR(rq)) { 336 mutex_unlock(BKL); 337 err = PTR_ERR(rq); 338 count = n; 339 break; 340 } 341 342 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 343 submit, 344 GFP_KERNEL); 345 346 requests[n] = i915_request_get(rq); 347 i915_request_add(rq); 348 349 mutex_unlock(BKL); 350 351 if (err >= 0) 352 err = i915_sw_fence_await_dma_fence(wait, 353 &rq->fence, 354 0, 355 GFP_KERNEL); 356 357 if (err < 0) { 358 i915_request_put(rq); 359 count = n; 360 break; 361 } 362 } 363 364 i915_sw_fence_commit(submit); 365 i915_sw_fence_commit(wait); 366 367 if (!wait_event_timeout(wait->wait, 368 i915_sw_fence_done(wait), 369 HZ / 2)) { 370 struct i915_request *rq = requests[count - 1]; 371 372 pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n", 373 count, 374 rq->fence.context, rq->fence.seqno, 375 t->engine->name); 376 i915_gem_set_wedged(t->engine->i915); 377 GEM_BUG_ON(!i915_request_completed(rq)); 378 i915_sw_fence_wait(wait); 379 err = -EIO; 380 } 381 382 for (n = 0; n < count; n++) { 383 struct i915_request *rq = requests[n]; 384 385 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 386 &rq->fence.flags)) { 387 pr_err("%llu:%llu was not signaled!\n", 388 rq->fence.context, rq->fence.seqno); 389 err = -EINVAL; 390 } 391 392 i915_request_put(rq); 393 } 394 395 heap_fence_put(wait); 396 heap_fence_put(submit); 397 398 if (err < 0) 399 break; 400 401 num_fences += count; 402 num_waits++; 403 404 cond_resched(); 405 } 406 407 atomic_long_add(num_fences, &t->num_fences); 408 atomic_long_add(num_waits, &t->num_waits); 409 410 kfree(order); 411 out_requests: 412 kfree(requests); 413 return err; 414 } 415 416 static int mock_breadcrumbs_smoketest(void *arg) 417 { 418 struct drm_i915_private *i915 = arg; 419 struct smoketest t = { 420 .engine = i915->engine[RCS0], 421 .ncontexts = 1024, 422 .max_batch = 1024, 423 .request_alloc = __mock_request_alloc 424 }; 425 unsigned int ncpus = num_online_cpus(); 426 struct task_struct **threads; 427 unsigned int n; 428 int ret = 0; 429 430 /* 431 * Smoketest our breadcrumb/signal handling for requests across multiple 432 * threads. A very simple test to only catch the most egregious of bugs. 433 * See __igt_breadcrumbs_smoketest(); 434 */ 435 436 threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); 437 if (!threads) 438 return -ENOMEM; 439 440 t.contexts = 441 kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 442 if (!t.contexts) { 443 ret = -ENOMEM; 444 goto out_threads; 445 } 446 447 mutex_lock(&t.engine->i915->drm.struct_mutex); 448 for (n = 0; n < t.ncontexts; n++) { 449 t.contexts[n] = mock_context(t.engine->i915, "mock"); 450 if (!t.contexts[n]) { 451 ret = -ENOMEM; 452 goto out_contexts; 453 } 454 } 455 mutex_unlock(&t.engine->i915->drm.struct_mutex); 456 457 for (n = 0; n < ncpus; n++) { 458 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 459 &t, "igt/%d", n); 460 if (IS_ERR(threads[n])) { 461 ret = PTR_ERR(threads[n]); 462 ncpus = n; 463 break; 464 } 465 466 get_task_struct(threads[n]); 467 } 468 469 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 470 471 for (n = 0; n < ncpus; n++) { 472 int err; 473 474 err = kthread_stop(threads[n]); 475 if (err < 0 && !ret) 476 ret = err; 477 478 put_task_struct(threads[n]); 479 } 480 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 481 atomic_long_read(&t.num_waits), 482 atomic_long_read(&t.num_fences), 483 ncpus); 484 485 mutex_lock(&t.engine->i915->drm.struct_mutex); 486 out_contexts: 487 for (n = 0; n < t.ncontexts; n++) { 488 if (!t.contexts[n]) 489 break; 490 mock_context_close(t.contexts[n]); 491 } 492 mutex_unlock(&t.engine->i915->drm.struct_mutex); 493 kfree(t.contexts); 494 out_threads: 495 kfree(threads); 496 497 return ret; 498 } 499 500 int i915_request_mock_selftests(void) 501 { 502 static const struct i915_subtest tests[] = { 503 SUBTEST(igt_add_request), 504 SUBTEST(igt_wait_request), 505 SUBTEST(igt_fence_wait), 506 SUBTEST(igt_request_rewind), 507 SUBTEST(mock_breadcrumbs_smoketest), 508 }; 509 struct drm_i915_private *i915; 510 intel_wakeref_t wakeref; 511 int err = 0; 512 513 i915 = mock_gem_device(); 514 if (!i915) 515 return -ENOMEM; 516 517 with_intel_runtime_pm(&i915->runtime_pm, wakeref) 518 err = i915_subtests(tests, i915); 519 520 drm_dev_put(&i915->drm); 521 522 return err; 523 } 524 525 static int live_nop_request(void *arg) 526 { 527 struct drm_i915_private *i915 = arg; 528 struct intel_engine_cs *engine; 529 intel_wakeref_t wakeref; 530 struct igt_live_test t; 531 unsigned int id; 532 int err = -ENODEV; 533 534 /* Submit various sized batches of empty requests, to each engine 535 * (individually), and wait for the batch to complete. We can check 536 * the overhead of submitting requests to the hardware. 537 */ 538 539 mutex_lock(&i915->drm.struct_mutex); 540 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 541 542 for_each_engine(engine, i915, id) { 543 struct i915_request *request = NULL; 544 unsigned long n, prime; 545 IGT_TIMEOUT(end_time); 546 ktime_t times[2] = {}; 547 548 err = igt_live_test_begin(&t, i915, __func__, engine->name); 549 if (err) 550 goto out_unlock; 551 552 for_each_prime_number_from(prime, 1, 8192) { 553 times[1] = ktime_get_raw(); 554 555 for (n = 0; n < prime; n++) { 556 request = i915_request_create(engine->kernel_context); 557 if (IS_ERR(request)) { 558 err = PTR_ERR(request); 559 goto out_unlock; 560 } 561 562 /* This space is left intentionally blank. 563 * 564 * We do not actually want to perform any 565 * action with this request, we just want 566 * to measure the latency in allocation 567 * and submission of our breadcrumbs - 568 * ensuring that the bare request is sufficient 569 * for the system to work (i.e. proper HEAD 570 * tracking of the rings, interrupt handling, 571 * etc). It also gives us the lowest bounds 572 * for latency. 573 */ 574 575 i915_request_add(request); 576 } 577 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 578 579 times[1] = ktime_sub(ktime_get_raw(), times[1]); 580 if (prime == 1) 581 times[0] = times[1]; 582 583 if (__igt_timeout(end_time, NULL)) 584 break; 585 } 586 587 err = igt_live_test_end(&t); 588 if (err) 589 goto out_unlock; 590 591 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 592 engine->name, 593 ktime_to_ns(times[0]), 594 prime, div64_u64(ktime_to_ns(times[1]), prime)); 595 } 596 597 out_unlock: 598 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 599 mutex_unlock(&i915->drm.struct_mutex); 600 return err; 601 } 602 603 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 604 { 605 struct drm_i915_gem_object *obj; 606 struct i915_vma *vma; 607 u32 *cmd; 608 int err; 609 610 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 611 if (IS_ERR(obj)) 612 return ERR_CAST(obj); 613 614 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 615 if (IS_ERR(cmd)) { 616 err = PTR_ERR(cmd); 617 goto err; 618 } 619 620 *cmd = MI_BATCH_BUFFER_END; 621 622 __i915_gem_object_flush_map(obj, 0, 64); 623 i915_gem_object_unpin_map(obj); 624 625 i915_gem_chipset_flush(i915); 626 627 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 628 if (IS_ERR(vma)) { 629 err = PTR_ERR(vma); 630 goto err; 631 } 632 633 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 634 if (err) 635 goto err; 636 637 return vma; 638 639 err: 640 i915_gem_object_put(obj); 641 return ERR_PTR(err); 642 } 643 644 static struct i915_request * 645 empty_request(struct intel_engine_cs *engine, 646 struct i915_vma *batch) 647 { 648 struct i915_request *request; 649 int err; 650 651 request = i915_request_create(engine->kernel_context); 652 if (IS_ERR(request)) 653 return request; 654 655 err = engine->emit_bb_start(request, 656 batch->node.start, 657 batch->node.size, 658 I915_DISPATCH_SECURE); 659 if (err) 660 goto out_request; 661 662 out_request: 663 i915_request_add(request); 664 return err ? ERR_PTR(err) : request; 665 } 666 667 static int live_empty_request(void *arg) 668 { 669 struct drm_i915_private *i915 = arg; 670 struct intel_engine_cs *engine; 671 intel_wakeref_t wakeref; 672 struct igt_live_test t; 673 struct i915_vma *batch; 674 unsigned int id; 675 int err = 0; 676 677 /* Submit various sized batches of empty requests, to each engine 678 * (individually), and wait for the batch to complete. We can check 679 * the overhead of submitting requests to the hardware. 680 */ 681 682 mutex_lock(&i915->drm.struct_mutex); 683 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 684 685 batch = empty_batch(i915); 686 if (IS_ERR(batch)) { 687 err = PTR_ERR(batch); 688 goto out_unlock; 689 } 690 691 for_each_engine(engine, i915, id) { 692 IGT_TIMEOUT(end_time); 693 struct i915_request *request; 694 unsigned long n, prime; 695 ktime_t times[2] = {}; 696 697 err = igt_live_test_begin(&t, i915, __func__, engine->name); 698 if (err) 699 goto out_batch; 700 701 /* Warmup / preload */ 702 request = empty_request(engine, batch); 703 if (IS_ERR(request)) { 704 err = PTR_ERR(request); 705 goto out_batch; 706 } 707 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 708 709 for_each_prime_number_from(prime, 1, 8192) { 710 times[1] = ktime_get_raw(); 711 712 for (n = 0; n < prime; n++) { 713 request = empty_request(engine, batch); 714 if (IS_ERR(request)) { 715 err = PTR_ERR(request); 716 goto out_batch; 717 } 718 } 719 i915_request_wait(request, 0, MAX_SCHEDULE_TIMEOUT); 720 721 times[1] = ktime_sub(ktime_get_raw(), times[1]); 722 if (prime == 1) 723 times[0] = times[1]; 724 725 if (__igt_timeout(end_time, NULL)) 726 break; 727 } 728 729 err = igt_live_test_end(&t); 730 if (err) 731 goto out_batch; 732 733 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 734 engine->name, 735 ktime_to_ns(times[0]), 736 prime, div64_u64(ktime_to_ns(times[1]), prime)); 737 } 738 739 out_batch: 740 i915_vma_unpin(batch); 741 i915_vma_put(batch); 742 out_unlock: 743 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 744 mutex_unlock(&i915->drm.struct_mutex); 745 return err; 746 } 747 748 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 749 { 750 struct i915_gem_context *ctx = i915->kernel_context; 751 struct i915_address_space *vm = ctx->vm ?: &i915->ggtt.vm; 752 struct drm_i915_gem_object *obj; 753 const int gen = INTEL_GEN(i915); 754 struct i915_vma *vma; 755 u32 *cmd; 756 int err; 757 758 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 759 if (IS_ERR(obj)) 760 return ERR_CAST(obj); 761 762 vma = i915_vma_instance(obj, vm, NULL); 763 if (IS_ERR(vma)) { 764 err = PTR_ERR(vma); 765 goto err; 766 } 767 768 err = i915_vma_pin(vma, 0, 0, PIN_USER); 769 if (err) 770 goto err; 771 772 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 773 if (IS_ERR(cmd)) { 774 err = PTR_ERR(cmd); 775 goto err; 776 } 777 778 if (gen >= 8) { 779 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 780 *cmd++ = lower_32_bits(vma->node.start); 781 *cmd++ = upper_32_bits(vma->node.start); 782 } else if (gen >= 6) { 783 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 784 *cmd++ = lower_32_bits(vma->node.start); 785 } else { 786 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 787 *cmd++ = lower_32_bits(vma->node.start); 788 } 789 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 790 791 __i915_gem_object_flush_map(obj, 0, 64); 792 i915_gem_object_unpin_map(obj); 793 794 i915_gem_chipset_flush(i915); 795 796 return vma; 797 798 err: 799 i915_gem_object_put(obj); 800 return ERR_PTR(err); 801 } 802 803 static int recursive_batch_resolve(struct i915_vma *batch) 804 { 805 u32 *cmd; 806 807 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 808 if (IS_ERR(cmd)) 809 return PTR_ERR(cmd); 810 811 *cmd = MI_BATCH_BUFFER_END; 812 i915_gem_chipset_flush(batch->vm->i915); 813 814 i915_gem_object_unpin_map(batch->obj); 815 816 return 0; 817 } 818 819 static int live_all_engines(void *arg) 820 { 821 struct drm_i915_private *i915 = arg; 822 struct intel_engine_cs *engine; 823 struct i915_request *request[I915_NUM_ENGINES]; 824 intel_wakeref_t wakeref; 825 struct igt_live_test t; 826 struct i915_vma *batch; 827 unsigned int id; 828 int err; 829 830 /* Check we can submit requests to all engines simultaneously. We 831 * send a recursive batch to each engine - checking that we don't 832 * block doing so, and that they don't complete too soon. 833 */ 834 835 mutex_lock(&i915->drm.struct_mutex); 836 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 837 838 err = igt_live_test_begin(&t, i915, __func__, ""); 839 if (err) 840 goto out_unlock; 841 842 batch = recursive_batch(i915); 843 if (IS_ERR(batch)) { 844 err = PTR_ERR(batch); 845 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 846 goto out_unlock; 847 } 848 849 for_each_engine(engine, i915, id) { 850 request[id] = i915_request_create(engine->kernel_context); 851 if (IS_ERR(request[id])) { 852 err = PTR_ERR(request[id]); 853 pr_err("%s: Request allocation failed with err=%d\n", 854 __func__, err); 855 goto out_request; 856 } 857 858 err = engine->emit_bb_start(request[id], 859 batch->node.start, 860 batch->node.size, 861 0); 862 GEM_BUG_ON(err); 863 request[id]->batch = batch; 864 865 i915_vma_lock(batch); 866 err = i915_vma_move_to_active(batch, request[id], 0); 867 i915_vma_unlock(batch); 868 GEM_BUG_ON(err); 869 870 i915_request_get(request[id]); 871 i915_request_add(request[id]); 872 } 873 874 for_each_engine(engine, i915, id) { 875 if (i915_request_completed(request[id])) { 876 pr_err("%s(%s): request completed too early!\n", 877 __func__, engine->name); 878 err = -EINVAL; 879 goto out_request; 880 } 881 } 882 883 err = recursive_batch_resolve(batch); 884 if (err) { 885 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 886 goto out_request; 887 } 888 889 for_each_engine(engine, i915, id) { 890 long timeout; 891 892 timeout = i915_request_wait(request[id], 0, 893 MAX_SCHEDULE_TIMEOUT); 894 if (timeout < 0) { 895 err = timeout; 896 pr_err("%s: error waiting for request on %s, err=%d\n", 897 __func__, engine->name, err); 898 goto out_request; 899 } 900 901 GEM_BUG_ON(!i915_request_completed(request[id])); 902 i915_request_put(request[id]); 903 request[id] = NULL; 904 } 905 906 err = igt_live_test_end(&t); 907 908 out_request: 909 for_each_engine(engine, i915, id) 910 if (request[id]) 911 i915_request_put(request[id]); 912 i915_vma_unpin(batch); 913 i915_vma_put(batch); 914 out_unlock: 915 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 916 mutex_unlock(&i915->drm.struct_mutex); 917 return err; 918 } 919 920 static int live_sequential_engines(void *arg) 921 { 922 struct drm_i915_private *i915 = arg; 923 struct i915_request *request[I915_NUM_ENGINES] = {}; 924 struct i915_request *prev = NULL; 925 struct intel_engine_cs *engine; 926 intel_wakeref_t wakeref; 927 struct igt_live_test t; 928 unsigned int id; 929 int err; 930 931 /* Check we can submit requests to all engines sequentially, such 932 * that each successive request waits for the earlier ones. This 933 * tests that we don't execute requests out of order, even though 934 * they are running on independent engines. 935 */ 936 937 mutex_lock(&i915->drm.struct_mutex); 938 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 939 940 err = igt_live_test_begin(&t, i915, __func__, ""); 941 if (err) 942 goto out_unlock; 943 944 for_each_engine(engine, i915, id) { 945 struct i915_vma *batch; 946 947 batch = recursive_batch(i915); 948 if (IS_ERR(batch)) { 949 err = PTR_ERR(batch); 950 pr_err("%s: Unable to create batch for %s, err=%d\n", 951 __func__, engine->name, err); 952 goto out_unlock; 953 } 954 955 request[id] = i915_request_create(engine->kernel_context); 956 if (IS_ERR(request[id])) { 957 err = PTR_ERR(request[id]); 958 pr_err("%s: Request allocation failed for %s with err=%d\n", 959 __func__, engine->name, err); 960 goto out_request; 961 } 962 963 if (prev) { 964 err = i915_request_await_dma_fence(request[id], 965 &prev->fence); 966 if (err) { 967 i915_request_add(request[id]); 968 pr_err("%s: Request await failed for %s with err=%d\n", 969 __func__, engine->name, err); 970 goto out_request; 971 } 972 } 973 974 err = engine->emit_bb_start(request[id], 975 batch->node.start, 976 batch->node.size, 977 0); 978 GEM_BUG_ON(err); 979 request[id]->batch = batch; 980 981 i915_vma_lock(batch); 982 err = i915_vma_move_to_active(batch, request[id], 0); 983 i915_vma_unlock(batch); 984 GEM_BUG_ON(err); 985 986 i915_request_get(request[id]); 987 i915_request_add(request[id]); 988 989 prev = request[id]; 990 } 991 992 for_each_engine(engine, i915, id) { 993 long timeout; 994 995 if (i915_request_completed(request[id])) { 996 pr_err("%s(%s): request completed too early!\n", 997 __func__, engine->name); 998 err = -EINVAL; 999 goto out_request; 1000 } 1001 1002 err = recursive_batch_resolve(request[id]->batch); 1003 if (err) { 1004 pr_err("%s: failed to resolve batch, err=%d\n", 1005 __func__, err); 1006 goto out_request; 1007 } 1008 1009 timeout = i915_request_wait(request[id], 0, 1010 MAX_SCHEDULE_TIMEOUT); 1011 if (timeout < 0) { 1012 err = timeout; 1013 pr_err("%s: error waiting for request on %s, err=%d\n", 1014 __func__, engine->name, err); 1015 goto out_request; 1016 } 1017 1018 GEM_BUG_ON(!i915_request_completed(request[id])); 1019 } 1020 1021 err = igt_live_test_end(&t); 1022 1023 out_request: 1024 for_each_engine(engine, i915, id) { 1025 u32 *cmd; 1026 1027 if (!request[id]) 1028 break; 1029 1030 cmd = i915_gem_object_pin_map(request[id]->batch->obj, 1031 I915_MAP_WC); 1032 if (!IS_ERR(cmd)) { 1033 *cmd = MI_BATCH_BUFFER_END; 1034 i915_gem_chipset_flush(i915); 1035 1036 i915_gem_object_unpin_map(request[id]->batch->obj); 1037 } 1038 1039 i915_vma_put(request[id]->batch); 1040 i915_request_put(request[id]); 1041 } 1042 out_unlock: 1043 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1044 mutex_unlock(&i915->drm.struct_mutex); 1045 return err; 1046 } 1047 1048 static int 1049 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1050 { 1051 struct i915_request *rq; 1052 int ret; 1053 1054 /* 1055 * Before execlists, all contexts share the same ringbuffer. With 1056 * execlists, each context/engine has a separate ringbuffer and 1057 * for the purposes of this test, inexhaustible. 1058 * 1059 * For the global ringbuffer though, we have to be very careful 1060 * that we do not wrap while preventing the execution of requests 1061 * with a unsignaled fence. 1062 */ 1063 if (HAS_EXECLISTS(ctx->i915)) 1064 return INT_MAX; 1065 1066 rq = igt_request_alloc(ctx, engine); 1067 if (IS_ERR(rq)) { 1068 ret = PTR_ERR(rq); 1069 } else { 1070 int sz; 1071 1072 ret = rq->ring->size - rq->reserved_space; 1073 i915_request_add(rq); 1074 1075 sz = rq->ring->emit - rq->head; 1076 if (sz < 0) 1077 sz += rq->ring->size; 1078 ret /= sz; 1079 ret /= 2; /* leave half spare, in case of emergency! */ 1080 } 1081 1082 return ret; 1083 } 1084 1085 static int live_breadcrumbs_smoketest(void *arg) 1086 { 1087 struct drm_i915_private *i915 = arg; 1088 struct smoketest t[I915_NUM_ENGINES]; 1089 unsigned int ncpus = num_online_cpus(); 1090 unsigned long num_waits, num_fences; 1091 struct intel_engine_cs *engine; 1092 struct task_struct **threads; 1093 struct igt_live_test live; 1094 enum intel_engine_id id; 1095 intel_wakeref_t wakeref; 1096 struct drm_file *file; 1097 unsigned int n; 1098 int ret = 0; 1099 1100 /* 1101 * Smoketest our breadcrumb/signal handling for requests across multiple 1102 * threads. A very simple test to only catch the most egregious of bugs. 1103 * See __igt_breadcrumbs_smoketest(); 1104 * 1105 * On real hardware this time. 1106 */ 1107 1108 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 1109 1110 file = mock_file(i915); 1111 if (IS_ERR(file)) { 1112 ret = PTR_ERR(file); 1113 goto out_rpm; 1114 } 1115 1116 threads = kcalloc(ncpus * I915_NUM_ENGINES, 1117 sizeof(*threads), 1118 GFP_KERNEL); 1119 if (!threads) { 1120 ret = -ENOMEM; 1121 goto out_file; 1122 } 1123 1124 memset(&t[0], 0, sizeof(t[0])); 1125 t[0].request_alloc = __live_request_alloc; 1126 t[0].ncontexts = 64; 1127 t[0].contexts = kmalloc_array(t[0].ncontexts, 1128 sizeof(*t[0].contexts), 1129 GFP_KERNEL); 1130 if (!t[0].contexts) { 1131 ret = -ENOMEM; 1132 goto out_threads; 1133 } 1134 1135 mutex_lock(&i915->drm.struct_mutex); 1136 for (n = 0; n < t[0].ncontexts; n++) { 1137 t[0].contexts[n] = live_context(i915, file); 1138 if (!t[0].contexts[n]) { 1139 ret = -ENOMEM; 1140 goto out_contexts; 1141 } 1142 } 1143 1144 ret = igt_live_test_begin(&live, i915, __func__, ""); 1145 if (ret) 1146 goto out_contexts; 1147 1148 for_each_engine(engine, i915, id) { 1149 t[id] = t[0]; 1150 t[id].engine = engine; 1151 t[id].max_batch = max_batches(t[0].contexts[0], engine); 1152 if (t[id].max_batch < 0) { 1153 ret = t[id].max_batch; 1154 mutex_unlock(&i915->drm.struct_mutex); 1155 goto out_flush; 1156 } 1157 /* One ring interleaved between requests from all cpus */ 1158 t[id].max_batch /= num_online_cpus() + 1; 1159 pr_debug("Limiting batches to %d requests on %s\n", 1160 t[id].max_batch, engine->name); 1161 1162 for (n = 0; n < ncpus; n++) { 1163 struct task_struct *tsk; 1164 1165 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1166 &t[id], "igt/%d.%d", id, n); 1167 if (IS_ERR(tsk)) { 1168 ret = PTR_ERR(tsk); 1169 mutex_unlock(&i915->drm.struct_mutex); 1170 goto out_flush; 1171 } 1172 1173 get_task_struct(tsk); 1174 threads[id * ncpus + n] = tsk; 1175 } 1176 } 1177 mutex_unlock(&i915->drm.struct_mutex); 1178 1179 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1180 1181 out_flush: 1182 num_waits = 0; 1183 num_fences = 0; 1184 for_each_engine(engine, i915, id) { 1185 for (n = 0; n < ncpus; n++) { 1186 struct task_struct *tsk = threads[id * ncpus + n]; 1187 int err; 1188 1189 if (!tsk) 1190 continue; 1191 1192 err = kthread_stop(tsk); 1193 if (err < 0 && !ret) 1194 ret = err; 1195 1196 put_task_struct(tsk); 1197 } 1198 1199 num_waits += atomic_long_read(&t[id].num_waits); 1200 num_fences += atomic_long_read(&t[id].num_fences); 1201 } 1202 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1203 num_waits, num_fences, RUNTIME_INFO(i915)->num_engines, ncpus); 1204 1205 mutex_lock(&i915->drm.struct_mutex); 1206 ret = igt_live_test_end(&live) ?: ret; 1207 out_contexts: 1208 mutex_unlock(&i915->drm.struct_mutex); 1209 kfree(t[0].contexts); 1210 out_threads: 1211 kfree(threads); 1212 out_file: 1213 mock_file_free(i915, file); 1214 out_rpm: 1215 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 1216 1217 return ret; 1218 } 1219 1220 int i915_request_live_selftests(struct drm_i915_private *i915) 1221 { 1222 static const struct i915_subtest tests[] = { 1223 SUBTEST(live_nop_request), 1224 SUBTEST(live_all_engines), 1225 SUBTEST(live_sequential_engines), 1226 SUBTEST(live_empty_request), 1227 SUBTEST(live_breadcrumbs_smoketest), 1228 }; 1229 1230 if (i915_terminally_wedged(i915)) 1231 return 0; 1232 1233 return i915_subtests(tests, i915); 1234 } 1235