1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <linux/prime_numbers.h> 26 27 #include "../i915_selftest.h" 28 #include "i915_random.h" 29 #include "igt_live_test.h" 30 #include "lib_sw_fence.h" 31 32 #include "mock_context.h" 33 #include "mock_drm.h" 34 #include "mock_gem_device.h" 35 36 static int igt_add_request(void *arg) 37 { 38 struct drm_i915_private *i915 = arg; 39 struct i915_request *request; 40 int err = -ENOMEM; 41 42 /* Basic preliminary test to create a request and let it loose! */ 43 44 mutex_lock(&i915->drm.struct_mutex); 45 request = mock_request(i915->engine[RCS], 46 i915->kernel_context, 47 HZ / 10); 48 if (!request) 49 goto out_unlock; 50 51 i915_request_add(request); 52 53 err = 0; 54 out_unlock: 55 mutex_unlock(&i915->drm.struct_mutex); 56 return err; 57 } 58 59 static int igt_wait_request(void *arg) 60 { 61 const long T = HZ / 4; 62 struct drm_i915_private *i915 = arg; 63 struct i915_request *request; 64 int err = -EINVAL; 65 66 /* Submit a request, then wait upon it */ 67 68 mutex_lock(&i915->drm.struct_mutex); 69 request = mock_request(i915->engine[RCS], i915->kernel_context, T); 70 if (!request) { 71 err = -ENOMEM; 72 goto out_unlock; 73 } 74 75 if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { 76 pr_err("request wait (busy query) succeeded (expected timeout before submit!)\n"); 77 goto out_unlock; 78 } 79 80 if (i915_request_wait(request, I915_WAIT_LOCKED, T) != -ETIME) { 81 pr_err("request wait succeeded (expected timeout before submit!)\n"); 82 goto out_unlock; 83 } 84 85 if (i915_request_completed(request)) { 86 pr_err("request completed before submit!!\n"); 87 goto out_unlock; 88 } 89 90 i915_request_add(request); 91 92 if (i915_request_wait(request, I915_WAIT_LOCKED, 0) != -ETIME) { 93 pr_err("request wait (busy query) succeeded (expected timeout after submit!)\n"); 94 goto out_unlock; 95 } 96 97 if (i915_request_completed(request)) { 98 pr_err("request completed immediately!\n"); 99 goto out_unlock; 100 } 101 102 if (i915_request_wait(request, I915_WAIT_LOCKED, T / 2) != -ETIME) { 103 pr_err("request wait succeeded (expected timeout!)\n"); 104 goto out_unlock; 105 } 106 107 if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { 108 pr_err("request wait timed out!\n"); 109 goto out_unlock; 110 } 111 112 if (!i915_request_completed(request)) { 113 pr_err("request not complete after waiting!\n"); 114 goto out_unlock; 115 } 116 117 if (i915_request_wait(request, I915_WAIT_LOCKED, T) == -ETIME) { 118 pr_err("request wait timed out when already complete!\n"); 119 goto out_unlock; 120 } 121 122 err = 0; 123 out_unlock: 124 mock_device_flush(i915); 125 mutex_unlock(&i915->drm.struct_mutex); 126 return err; 127 } 128 129 static int igt_fence_wait(void *arg) 130 { 131 const long T = HZ / 4; 132 struct drm_i915_private *i915 = arg; 133 struct i915_request *request; 134 int err = -EINVAL; 135 136 /* Submit a request, treat it as a fence and wait upon it */ 137 138 mutex_lock(&i915->drm.struct_mutex); 139 request = mock_request(i915->engine[RCS], i915->kernel_context, T); 140 if (!request) { 141 err = -ENOMEM; 142 goto out_locked; 143 } 144 mutex_unlock(&i915->drm.struct_mutex); /* safe as we are single user */ 145 146 if (dma_fence_wait_timeout(&request->fence, false, T) != -ETIME) { 147 pr_err("fence wait success before submit (expected timeout)!\n"); 148 goto out_device; 149 } 150 151 mutex_lock(&i915->drm.struct_mutex); 152 i915_request_add(request); 153 mutex_unlock(&i915->drm.struct_mutex); 154 155 if (dma_fence_is_signaled(&request->fence)) { 156 pr_err("fence signaled immediately!\n"); 157 goto out_device; 158 } 159 160 if (dma_fence_wait_timeout(&request->fence, false, T / 2) != -ETIME) { 161 pr_err("fence wait success after submit (expected timeout)!\n"); 162 goto out_device; 163 } 164 165 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 166 pr_err("fence wait timed out (expected success)!\n"); 167 goto out_device; 168 } 169 170 if (!dma_fence_is_signaled(&request->fence)) { 171 pr_err("fence unsignaled after waiting!\n"); 172 goto out_device; 173 } 174 175 if (dma_fence_wait_timeout(&request->fence, false, T) <= 0) { 176 pr_err("fence wait timed out when complete (expected success)!\n"); 177 goto out_device; 178 } 179 180 err = 0; 181 out_device: 182 mutex_lock(&i915->drm.struct_mutex); 183 out_locked: 184 mock_device_flush(i915); 185 mutex_unlock(&i915->drm.struct_mutex); 186 return err; 187 } 188 189 static int igt_request_rewind(void *arg) 190 { 191 struct drm_i915_private *i915 = arg; 192 struct i915_request *request, *vip; 193 struct i915_gem_context *ctx[2]; 194 int err = -EINVAL; 195 196 mutex_lock(&i915->drm.struct_mutex); 197 ctx[0] = mock_context(i915, "A"); 198 request = mock_request(i915->engine[RCS], ctx[0], 2 * HZ); 199 if (!request) { 200 err = -ENOMEM; 201 goto err_context_0; 202 } 203 204 i915_request_get(request); 205 i915_request_add(request); 206 207 ctx[1] = mock_context(i915, "B"); 208 vip = mock_request(i915->engine[RCS], ctx[1], 0); 209 if (!vip) { 210 err = -ENOMEM; 211 goto err_context_1; 212 } 213 214 /* Simulate preemption by manual reordering */ 215 if (!mock_cancel_request(request)) { 216 pr_err("failed to cancel request (already executed)!\n"); 217 i915_request_add(vip); 218 goto err_context_1; 219 } 220 i915_request_get(vip); 221 i915_request_add(vip); 222 rcu_read_lock(); 223 request->engine->submit_request(request); 224 rcu_read_unlock(); 225 226 mutex_unlock(&i915->drm.struct_mutex); 227 228 if (i915_request_wait(vip, 0, HZ) == -ETIME) { 229 pr_err("timed out waiting for high priority request, vip.seqno=%d, current seqno=%d\n", 230 vip->global_seqno, intel_engine_get_seqno(i915->engine[RCS])); 231 goto err; 232 } 233 234 if (i915_request_completed(request)) { 235 pr_err("low priority request already completed\n"); 236 goto err; 237 } 238 239 err = 0; 240 err: 241 i915_request_put(vip); 242 mutex_lock(&i915->drm.struct_mutex); 243 err_context_1: 244 mock_context_close(ctx[1]); 245 i915_request_put(request); 246 err_context_0: 247 mock_context_close(ctx[0]); 248 mock_device_flush(i915); 249 mutex_unlock(&i915->drm.struct_mutex); 250 return err; 251 } 252 253 struct smoketest { 254 struct intel_engine_cs *engine; 255 struct i915_gem_context **contexts; 256 atomic_long_t num_waits, num_fences; 257 int ncontexts, max_batch; 258 struct i915_request *(*request_alloc)(struct i915_gem_context *, 259 struct intel_engine_cs *); 260 }; 261 262 static struct i915_request * 263 __mock_request_alloc(struct i915_gem_context *ctx, 264 struct intel_engine_cs *engine) 265 { 266 return mock_request(engine, ctx, 0); 267 } 268 269 static struct i915_request * 270 __live_request_alloc(struct i915_gem_context *ctx, 271 struct intel_engine_cs *engine) 272 { 273 return i915_request_alloc(engine, ctx); 274 } 275 276 static int __igt_breadcrumbs_smoketest(void *arg) 277 { 278 struct smoketest *t = arg; 279 struct mutex * const BKL = &t->engine->i915->drm.struct_mutex; 280 const unsigned int max_batch = min(t->ncontexts, t->max_batch) - 1; 281 const unsigned int total = 4 * t->ncontexts + 1; 282 unsigned int num_waits = 0, num_fences = 0; 283 struct i915_request **requests; 284 I915_RND_STATE(prng); 285 unsigned int *order; 286 int err = 0; 287 288 /* 289 * A very simple test to catch the most egregious of list handling bugs. 290 * 291 * At its heart, we simply create oodles of requests running across 292 * multiple kthreads and enable signaling on them, for the sole purpose 293 * of stressing our breadcrumb handling. The only inspection we do is 294 * that the fences were marked as signaled. 295 */ 296 297 requests = kmalloc_array(total, sizeof(*requests), GFP_KERNEL); 298 if (!requests) 299 return -ENOMEM; 300 301 order = i915_random_order(total, &prng); 302 if (!order) { 303 err = -ENOMEM; 304 goto out_requests; 305 } 306 307 while (!kthread_should_stop()) { 308 struct i915_sw_fence *submit, *wait; 309 unsigned int n, count; 310 311 submit = heap_fence_create(GFP_KERNEL); 312 if (!submit) { 313 err = -ENOMEM; 314 break; 315 } 316 317 wait = heap_fence_create(GFP_KERNEL); 318 if (!wait) { 319 i915_sw_fence_commit(submit); 320 heap_fence_put(submit); 321 err = ENOMEM; 322 break; 323 } 324 325 i915_random_reorder(order, total, &prng); 326 count = 1 + i915_prandom_u32_max_state(max_batch, &prng); 327 328 for (n = 0; n < count; n++) { 329 struct i915_gem_context *ctx = 330 t->contexts[order[n] % t->ncontexts]; 331 struct i915_request *rq; 332 333 mutex_lock(BKL); 334 335 rq = t->request_alloc(ctx, t->engine); 336 if (IS_ERR(rq)) { 337 mutex_unlock(BKL); 338 err = PTR_ERR(rq); 339 count = n; 340 break; 341 } 342 343 err = i915_sw_fence_await_sw_fence_gfp(&rq->submit, 344 submit, 345 GFP_KERNEL); 346 347 requests[n] = i915_request_get(rq); 348 i915_request_add(rq); 349 350 mutex_unlock(BKL); 351 352 if (err >= 0) 353 err = i915_sw_fence_await_dma_fence(wait, 354 &rq->fence, 355 0, 356 GFP_KERNEL); 357 358 if (err < 0) { 359 i915_request_put(rq); 360 count = n; 361 break; 362 } 363 } 364 365 i915_sw_fence_commit(submit); 366 i915_sw_fence_commit(wait); 367 368 if (!wait_event_timeout(wait->wait, 369 i915_sw_fence_done(wait), 370 HZ / 2)) { 371 struct i915_request *rq = requests[count - 1]; 372 373 pr_err("waiting for %d fences (last %llx:%lld) on %s timed out!\n", 374 count, 375 rq->fence.context, rq->fence.seqno, 376 t->engine->name); 377 i915_gem_set_wedged(t->engine->i915); 378 GEM_BUG_ON(!i915_request_completed(rq)); 379 i915_sw_fence_wait(wait); 380 err = -EIO; 381 } 382 383 for (n = 0; n < count; n++) { 384 struct i915_request *rq = requests[n]; 385 386 if (!test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 387 &rq->fence.flags)) { 388 pr_err("%llu:%llu was not signaled!\n", 389 rq->fence.context, rq->fence.seqno); 390 err = -EINVAL; 391 } 392 393 i915_request_put(rq); 394 } 395 396 heap_fence_put(wait); 397 heap_fence_put(submit); 398 399 if (err < 0) 400 break; 401 402 num_fences += count; 403 num_waits++; 404 405 cond_resched(); 406 } 407 408 atomic_long_add(num_fences, &t->num_fences); 409 atomic_long_add(num_waits, &t->num_waits); 410 411 kfree(order); 412 out_requests: 413 kfree(requests); 414 return err; 415 } 416 417 static int mock_breadcrumbs_smoketest(void *arg) 418 { 419 struct drm_i915_private *i915 = arg; 420 struct smoketest t = { 421 .engine = i915->engine[RCS], 422 .ncontexts = 1024, 423 .max_batch = 1024, 424 .request_alloc = __mock_request_alloc 425 }; 426 unsigned int ncpus = num_online_cpus(); 427 struct task_struct **threads; 428 unsigned int n; 429 int ret = 0; 430 431 /* 432 * Smoketest our breadcrumb/signal handling for requests across multiple 433 * threads. A very simple test to only catch the most egregious of bugs. 434 * See __igt_breadcrumbs_smoketest(); 435 */ 436 437 threads = kmalloc_array(ncpus, sizeof(*threads), GFP_KERNEL); 438 if (!threads) 439 return -ENOMEM; 440 441 t.contexts = 442 kmalloc_array(t.ncontexts, sizeof(*t.contexts), GFP_KERNEL); 443 if (!t.contexts) { 444 ret = -ENOMEM; 445 goto out_threads; 446 } 447 448 mutex_lock(&t.engine->i915->drm.struct_mutex); 449 for (n = 0; n < t.ncontexts; n++) { 450 t.contexts[n] = mock_context(t.engine->i915, "mock"); 451 if (!t.contexts[n]) { 452 ret = -ENOMEM; 453 goto out_contexts; 454 } 455 } 456 mutex_unlock(&t.engine->i915->drm.struct_mutex); 457 458 for (n = 0; n < ncpus; n++) { 459 threads[n] = kthread_run(__igt_breadcrumbs_smoketest, 460 &t, "igt/%d", n); 461 if (IS_ERR(threads[n])) { 462 ret = PTR_ERR(threads[n]); 463 ncpus = n; 464 break; 465 } 466 467 get_task_struct(threads[n]); 468 } 469 470 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 471 472 for (n = 0; n < ncpus; n++) { 473 int err; 474 475 err = kthread_stop(threads[n]); 476 if (err < 0 && !ret) 477 ret = err; 478 479 put_task_struct(threads[n]); 480 } 481 pr_info("Completed %lu waits for %lu fence across %d cpus\n", 482 atomic_long_read(&t.num_waits), 483 atomic_long_read(&t.num_fences), 484 ncpus); 485 486 mutex_lock(&t.engine->i915->drm.struct_mutex); 487 out_contexts: 488 for (n = 0; n < t.ncontexts; n++) { 489 if (!t.contexts[n]) 490 break; 491 mock_context_close(t.contexts[n]); 492 } 493 mutex_unlock(&t.engine->i915->drm.struct_mutex); 494 kfree(t.contexts); 495 out_threads: 496 kfree(threads); 497 498 return ret; 499 } 500 501 int i915_request_mock_selftests(void) 502 { 503 static const struct i915_subtest tests[] = { 504 SUBTEST(igt_add_request), 505 SUBTEST(igt_wait_request), 506 SUBTEST(igt_fence_wait), 507 SUBTEST(igt_request_rewind), 508 SUBTEST(mock_breadcrumbs_smoketest), 509 }; 510 struct drm_i915_private *i915; 511 intel_wakeref_t wakeref; 512 int err = 0; 513 514 i915 = mock_gem_device(); 515 if (!i915) 516 return -ENOMEM; 517 518 with_intel_runtime_pm(i915, wakeref) 519 err = i915_subtests(tests, i915); 520 521 drm_dev_put(&i915->drm); 522 523 return err; 524 } 525 526 static int live_nop_request(void *arg) 527 { 528 struct drm_i915_private *i915 = arg; 529 struct intel_engine_cs *engine; 530 intel_wakeref_t wakeref; 531 struct igt_live_test t; 532 unsigned int id; 533 int err = -ENODEV; 534 535 /* Submit various sized batches of empty requests, to each engine 536 * (individually), and wait for the batch to complete. We can check 537 * the overhead of submitting requests to the hardware. 538 */ 539 540 mutex_lock(&i915->drm.struct_mutex); 541 wakeref = intel_runtime_pm_get(i915); 542 543 for_each_engine(engine, i915, id) { 544 struct i915_request *request = NULL; 545 unsigned long n, prime; 546 IGT_TIMEOUT(end_time); 547 ktime_t times[2] = {}; 548 549 err = igt_live_test_begin(&t, i915, __func__, engine->name); 550 if (err) 551 goto out_unlock; 552 553 for_each_prime_number_from(prime, 1, 8192) { 554 times[1] = ktime_get_raw(); 555 556 for (n = 0; n < prime; n++) { 557 request = i915_request_alloc(engine, 558 i915->kernel_context); 559 if (IS_ERR(request)) { 560 err = PTR_ERR(request); 561 goto out_unlock; 562 } 563 564 /* This space is left intentionally blank. 565 * 566 * We do not actually want to perform any 567 * action with this request, we just want 568 * to measure the latency in allocation 569 * and submission of our breadcrumbs - 570 * ensuring that the bare request is sufficient 571 * for the system to work (i.e. proper HEAD 572 * tracking of the rings, interrupt handling, 573 * etc). It also gives us the lowest bounds 574 * for latency. 575 */ 576 577 i915_request_add(request); 578 } 579 i915_request_wait(request, 580 I915_WAIT_LOCKED, 581 MAX_SCHEDULE_TIMEOUT); 582 583 times[1] = ktime_sub(ktime_get_raw(), times[1]); 584 if (prime == 1) 585 times[0] = times[1]; 586 587 if (__igt_timeout(end_time, NULL)) 588 break; 589 } 590 591 err = igt_live_test_end(&t); 592 if (err) 593 goto out_unlock; 594 595 pr_info("Request latencies on %s: 1 = %lluns, %lu = %lluns\n", 596 engine->name, 597 ktime_to_ns(times[0]), 598 prime, div64_u64(ktime_to_ns(times[1]), prime)); 599 } 600 601 out_unlock: 602 intel_runtime_pm_put(i915, wakeref); 603 mutex_unlock(&i915->drm.struct_mutex); 604 return err; 605 } 606 607 static struct i915_vma *empty_batch(struct drm_i915_private *i915) 608 { 609 struct drm_i915_gem_object *obj; 610 struct i915_vma *vma; 611 u32 *cmd; 612 int err; 613 614 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 615 if (IS_ERR(obj)) 616 return ERR_CAST(obj); 617 618 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 619 if (IS_ERR(cmd)) { 620 err = PTR_ERR(cmd); 621 goto err; 622 } 623 624 *cmd = MI_BATCH_BUFFER_END; 625 i915_gem_chipset_flush(i915); 626 627 i915_gem_object_unpin_map(obj); 628 629 err = i915_gem_object_set_to_gtt_domain(obj, false); 630 if (err) 631 goto err; 632 633 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 634 if (IS_ERR(vma)) { 635 err = PTR_ERR(vma); 636 goto err; 637 } 638 639 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_GLOBAL); 640 if (err) 641 goto err; 642 643 return vma; 644 645 err: 646 i915_gem_object_put(obj); 647 return ERR_PTR(err); 648 } 649 650 static struct i915_request * 651 empty_request(struct intel_engine_cs *engine, 652 struct i915_vma *batch) 653 { 654 struct i915_request *request; 655 int err; 656 657 request = i915_request_alloc(engine, engine->i915->kernel_context); 658 if (IS_ERR(request)) 659 return request; 660 661 err = engine->emit_bb_start(request, 662 batch->node.start, 663 batch->node.size, 664 I915_DISPATCH_SECURE); 665 if (err) 666 goto out_request; 667 668 out_request: 669 i915_request_add(request); 670 return err ? ERR_PTR(err) : request; 671 } 672 673 static int live_empty_request(void *arg) 674 { 675 struct drm_i915_private *i915 = arg; 676 struct intel_engine_cs *engine; 677 intel_wakeref_t wakeref; 678 struct igt_live_test t; 679 struct i915_vma *batch; 680 unsigned int id; 681 int err = 0; 682 683 /* Submit various sized batches of empty requests, to each engine 684 * (individually), and wait for the batch to complete. We can check 685 * the overhead of submitting requests to the hardware. 686 */ 687 688 mutex_lock(&i915->drm.struct_mutex); 689 wakeref = intel_runtime_pm_get(i915); 690 691 batch = empty_batch(i915); 692 if (IS_ERR(batch)) { 693 err = PTR_ERR(batch); 694 goto out_unlock; 695 } 696 697 for_each_engine(engine, i915, id) { 698 IGT_TIMEOUT(end_time); 699 struct i915_request *request; 700 unsigned long n, prime; 701 ktime_t times[2] = {}; 702 703 err = igt_live_test_begin(&t, i915, __func__, engine->name); 704 if (err) 705 goto out_batch; 706 707 /* Warmup / preload */ 708 request = empty_request(engine, batch); 709 if (IS_ERR(request)) { 710 err = PTR_ERR(request); 711 goto out_batch; 712 } 713 i915_request_wait(request, 714 I915_WAIT_LOCKED, 715 MAX_SCHEDULE_TIMEOUT); 716 717 for_each_prime_number_from(prime, 1, 8192) { 718 times[1] = ktime_get_raw(); 719 720 for (n = 0; n < prime; n++) { 721 request = empty_request(engine, batch); 722 if (IS_ERR(request)) { 723 err = PTR_ERR(request); 724 goto out_batch; 725 } 726 } 727 i915_request_wait(request, 728 I915_WAIT_LOCKED, 729 MAX_SCHEDULE_TIMEOUT); 730 731 times[1] = ktime_sub(ktime_get_raw(), times[1]); 732 if (prime == 1) 733 times[0] = times[1]; 734 735 if (__igt_timeout(end_time, NULL)) 736 break; 737 } 738 739 err = igt_live_test_end(&t); 740 if (err) 741 goto out_batch; 742 743 pr_info("Batch latencies on %s: 1 = %lluns, %lu = %lluns\n", 744 engine->name, 745 ktime_to_ns(times[0]), 746 prime, div64_u64(ktime_to_ns(times[1]), prime)); 747 } 748 749 out_batch: 750 i915_vma_unpin(batch); 751 i915_vma_put(batch); 752 out_unlock: 753 intel_runtime_pm_put(i915, wakeref); 754 mutex_unlock(&i915->drm.struct_mutex); 755 return err; 756 } 757 758 static struct i915_vma *recursive_batch(struct drm_i915_private *i915) 759 { 760 struct i915_gem_context *ctx = i915->kernel_context; 761 struct i915_address_space *vm = 762 ctx->ppgtt ? &ctx->ppgtt->vm : &i915->ggtt.vm; 763 struct drm_i915_gem_object *obj; 764 const int gen = INTEL_GEN(i915); 765 struct i915_vma *vma; 766 u32 *cmd; 767 int err; 768 769 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 770 if (IS_ERR(obj)) 771 return ERR_CAST(obj); 772 773 vma = i915_vma_instance(obj, vm, NULL); 774 if (IS_ERR(vma)) { 775 err = PTR_ERR(vma); 776 goto err; 777 } 778 779 err = i915_vma_pin(vma, 0, 0, PIN_USER); 780 if (err) 781 goto err; 782 783 err = i915_gem_object_set_to_wc_domain(obj, true); 784 if (err) 785 goto err; 786 787 cmd = i915_gem_object_pin_map(obj, I915_MAP_WC); 788 if (IS_ERR(cmd)) { 789 err = PTR_ERR(cmd); 790 goto err; 791 } 792 793 if (gen >= 8) { 794 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 795 *cmd++ = lower_32_bits(vma->node.start); 796 *cmd++ = upper_32_bits(vma->node.start); 797 } else if (gen >= 6) { 798 *cmd++ = MI_BATCH_BUFFER_START | 1 << 8; 799 *cmd++ = lower_32_bits(vma->node.start); 800 } else { 801 *cmd++ = MI_BATCH_BUFFER_START | MI_BATCH_GTT; 802 *cmd++ = lower_32_bits(vma->node.start); 803 } 804 *cmd++ = MI_BATCH_BUFFER_END; /* terminate early in case of error */ 805 i915_gem_chipset_flush(i915); 806 807 i915_gem_object_unpin_map(obj); 808 809 return vma; 810 811 err: 812 i915_gem_object_put(obj); 813 return ERR_PTR(err); 814 } 815 816 static int recursive_batch_resolve(struct i915_vma *batch) 817 { 818 u32 *cmd; 819 820 cmd = i915_gem_object_pin_map(batch->obj, I915_MAP_WC); 821 if (IS_ERR(cmd)) 822 return PTR_ERR(cmd); 823 824 *cmd = MI_BATCH_BUFFER_END; 825 i915_gem_chipset_flush(batch->vm->i915); 826 827 i915_gem_object_unpin_map(batch->obj); 828 829 return 0; 830 } 831 832 static int live_all_engines(void *arg) 833 { 834 struct drm_i915_private *i915 = arg; 835 struct intel_engine_cs *engine; 836 struct i915_request *request[I915_NUM_ENGINES]; 837 intel_wakeref_t wakeref; 838 struct igt_live_test t; 839 struct i915_vma *batch; 840 unsigned int id; 841 int err; 842 843 /* Check we can submit requests to all engines simultaneously. We 844 * send a recursive batch to each engine - checking that we don't 845 * block doing so, and that they don't complete too soon. 846 */ 847 848 mutex_lock(&i915->drm.struct_mutex); 849 wakeref = intel_runtime_pm_get(i915); 850 851 err = igt_live_test_begin(&t, i915, __func__, ""); 852 if (err) 853 goto out_unlock; 854 855 batch = recursive_batch(i915); 856 if (IS_ERR(batch)) { 857 err = PTR_ERR(batch); 858 pr_err("%s: Unable to create batch, err=%d\n", __func__, err); 859 goto out_unlock; 860 } 861 862 for_each_engine(engine, i915, id) { 863 request[id] = i915_request_alloc(engine, i915->kernel_context); 864 if (IS_ERR(request[id])) { 865 err = PTR_ERR(request[id]); 866 pr_err("%s: Request allocation failed with err=%d\n", 867 __func__, err); 868 goto out_request; 869 } 870 871 err = engine->emit_bb_start(request[id], 872 batch->node.start, 873 batch->node.size, 874 0); 875 GEM_BUG_ON(err); 876 request[id]->batch = batch; 877 878 if (!i915_gem_object_has_active_reference(batch->obj)) { 879 i915_gem_object_get(batch->obj); 880 i915_gem_object_set_active_reference(batch->obj); 881 } 882 883 err = i915_vma_move_to_active(batch, request[id], 0); 884 GEM_BUG_ON(err); 885 886 i915_request_get(request[id]); 887 i915_request_add(request[id]); 888 } 889 890 for_each_engine(engine, i915, id) { 891 if (i915_request_completed(request[id])) { 892 pr_err("%s(%s): request completed too early!\n", 893 __func__, engine->name); 894 err = -EINVAL; 895 goto out_request; 896 } 897 } 898 899 err = recursive_batch_resolve(batch); 900 if (err) { 901 pr_err("%s: failed to resolve batch, err=%d\n", __func__, err); 902 goto out_request; 903 } 904 905 for_each_engine(engine, i915, id) { 906 long timeout; 907 908 timeout = i915_request_wait(request[id], 909 I915_WAIT_LOCKED, 910 MAX_SCHEDULE_TIMEOUT); 911 if (timeout < 0) { 912 err = timeout; 913 pr_err("%s: error waiting for request on %s, err=%d\n", 914 __func__, engine->name, err); 915 goto out_request; 916 } 917 918 GEM_BUG_ON(!i915_request_completed(request[id])); 919 i915_request_put(request[id]); 920 request[id] = NULL; 921 } 922 923 err = igt_live_test_end(&t); 924 925 out_request: 926 for_each_engine(engine, i915, id) 927 if (request[id]) 928 i915_request_put(request[id]); 929 i915_vma_unpin(batch); 930 i915_vma_put(batch); 931 out_unlock: 932 intel_runtime_pm_put(i915, wakeref); 933 mutex_unlock(&i915->drm.struct_mutex); 934 return err; 935 } 936 937 static int live_sequential_engines(void *arg) 938 { 939 struct drm_i915_private *i915 = arg; 940 struct i915_request *request[I915_NUM_ENGINES] = {}; 941 struct i915_request *prev = NULL; 942 struct intel_engine_cs *engine; 943 intel_wakeref_t wakeref; 944 struct igt_live_test t; 945 unsigned int id; 946 int err; 947 948 /* Check we can submit requests to all engines sequentially, such 949 * that each successive request waits for the earlier ones. This 950 * tests that we don't execute requests out of order, even though 951 * they are running on independent engines. 952 */ 953 954 mutex_lock(&i915->drm.struct_mutex); 955 wakeref = intel_runtime_pm_get(i915); 956 957 err = igt_live_test_begin(&t, i915, __func__, ""); 958 if (err) 959 goto out_unlock; 960 961 for_each_engine(engine, i915, id) { 962 struct i915_vma *batch; 963 964 batch = recursive_batch(i915); 965 if (IS_ERR(batch)) { 966 err = PTR_ERR(batch); 967 pr_err("%s: Unable to create batch for %s, err=%d\n", 968 __func__, engine->name, err); 969 goto out_unlock; 970 } 971 972 request[id] = i915_request_alloc(engine, i915->kernel_context); 973 if (IS_ERR(request[id])) { 974 err = PTR_ERR(request[id]); 975 pr_err("%s: Request allocation failed for %s with err=%d\n", 976 __func__, engine->name, err); 977 goto out_request; 978 } 979 980 if (prev) { 981 err = i915_request_await_dma_fence(request[id], 982 &prev->fence); 983 if (err) { 984 i915_request_add(request[id]); 985 pr_err("%s: Request await failed for %s with err=%d\n", 986 __func__, engine->name, err); 987 goto out_request; 988 } 989 } 990 991 err = engine->emit_bb_start(request[id], 992 batch->node.start, 993 batch->node.size, 994 0); 995 GEM_BUG_ON(err); 996 request[id]->batch = batch; 997 998 err = i915_vma_move_to_active(batch, request[id], 0); 999 GEM_BUG_ON(err); 1000 1001 i915_gem_object_set_active_reference(batch->obj); 1002 i915_vma_get(batch); 1003 1004 i915_request_get(request[id]); 1005 i915_request_add(request[id]); 1006 1007 prev = request[id]; 1008 } 1009 1010 for_each_engine(engine, i915, id) { 1011 long timeout; 1012 1013 if (i915_request_completed(request[id])) { 1014 pr_err("%s(%s): request completed too early!\n", 1015 __func__, engine->name); 1016 err = -EINVAL; 1017 goto out_request; 1018 } 1019 1020 err = recursive_batch_resolve(request[id]->batch); 1021 if (err) { 1022 pr_err("%s: failed to resolve batch, err=%d\n", 1023 __func__, err); 1024 goto out_request; 1025 } 1026 1027 timeout = i915_request_wait(request[id], 1028 I915_WAIT_LOCKED, 1029 MAX_SCHEDULE_TIMEOUT); 1030 if (timeout < 0) { 1031 err = timeout; 1032 pr_err("%s: error waiting for request on %s, err=%d\n", 1033 __func__, engine->name, err); 1034 goto out_request; 1035 } 1036 1037 GEM_BUG_ON(!i915_request_completed(request[id])); 1038 } 1039 1040 err = igt_live_test_end(&t); 1041 1042 out_request: 1043 for_each_engine(engine, i915, id) { 1044 u32 *cmd; 1045 1046 if (!request[id]) 1047 break; 1048 1049 cmd = i915_gem_object_pin_map(request[id]->batch->obj, 1050 I915_MAP_WC); 1051 if (!IS_ERR(cmd)) { 1052 *cmd = MI_BATCH_BUFFER_END; 1053 i915_gem_chipset_flush(i915); 1054 1055 i915_gem_object_unpin_map(request[id]->batch->obj); 1056 } 1057 1058 i915_vma_put(request[id]->batch); 1059 i915_request_put(request[id]); 1060 } 1061 out_unlock: 1062 intel_runtime_pm_put(i915, wakeref); 1063 mutex_unlock(&i915->drm.struct_mutex); 1064 return err; 1065 } 1066 1067 static int 1068 max_batches(struct i915_gem_context *ctx, struct intel_engine_cs *engine) 1069 { 1070 struct i915_request *rq; 1071 int ret; 1072 1073 /* 1074 * Before execlists, all contexts share the same ringbuffer. With 1075 * execlists, each context/engine has a separate ringbuffer and 1076 * for the purposes of this test, inexhaustible. 1077 * 1078 * For the global ringbuffer though, we have to be very careful 1079 * that we do not wrap while preventing the execution of requests 1080 * with a unsignaled fence. 1081 */ 1082 if (HAS_EXECLISTS(ctx->i915)) 1083 return INT_MAX; 1084 1085 rq = i915_request_alloc(engine, ctx); 1086 if (IS_ERR(rq)) { 1087 ret = PTR_ERR(rq); 1088 } else { 1089 int sz; 1090 1091 ret = rq->ring->size - rq->reserved_space; 1092 i915_request_add(rq); 1093 1094 sz = rq->ring->emit - rq->head; 1095 if (sz < 0) 1096 sz += rq->ring->size; 1097 ret /= sz; 1098 ret /= 2; /* leave half spare, in case of emergency! */ 1099 } 1100 1101 return ret; 1102 } 1103 1104 static int live_breadcrumbs_smoketest(void *arg) 1105 { 1106 struct drm_i915_private *i915 = arg; 1107 struct smoketest t[I915_NUM_ENGINES]; 1108 unsigned int ncpus = num_online_cpus(); 1109 unsigned long num_waits, num_fences; 1110 struct intel_engine_cs *engine; 1111 struct task_struct **threads; 1112 struct igt_live_test live; 1113 enum intel_engine_id id; 1114 intel_wakeref_t wakeref; 1115 struct drm_file *file; 1116 unsigned int n; 1117 int ret = 0; 1118 1119 /* 1120 * Smoketest our breadcrumb/signal handling for requests across multiple 1121 * threads. A very simple test to only catch the most egregious of bugs. 1122 * See __igt_breadcrumbs_smoketest(); 1123 * 1124 * On real hardware this time. 1125 */ 1126 1127 wakeref = intel_runtime_pm_get(i915); 1128 1129 file = mock_file(i915); 1130 if (IS_ERR(file)) { 1131 ret = PTR_ERR(file); 1132 goto out_rpm; 1133 } 1134 1135 threads = kcalloc(ncpus * I915_NUM_ENGINES, 1136 sizeof(*threads), 1137 GFP_KERNEL); 1138 if (!threads) { 1139 ret = -ENOMEM; 1140 goto out_file; 1141 } 1142 1143 memset(&t[0], 0, sizeof(t[0])); 1144 t[0].request_alloc = __live_request_alloc; 1145 t[0].ncontexts = 64; 1146 t[0].contexts = kmalloc_array(t[0].ncontexts, 1147 sizeof(*t[0].contexts), 1148 GFP_KERNEL); 1149 if (!t[0].contexts) { 1150 ret = -ENOMEM; 1151 goto out_threads; 1152 } 1153 1154 mutex_lock(&i915->drm.struct_mutex); 1155 for (n = 0; n < t[0].ncontexts; n++) { 1156 t[0].contexts[n] = live_context(i915, file); 1157 if (!t[0].contexts[n]) { 1158 ret = -ENOMEM; 1159 goto out_contexts; 1160 } 1161 } 1162 1163 ret = igt_live_test_begin(&live, i915, __func__, ""); 1164 if (ret) 1165 goto out_contexts; 1166 1167 for_each_engine(engine, i915, id) { 1168 t[id] = t[0]; 1169 t[id].engine = engine; 1170 t[id].max_batch = max_batches(t[0].contexts[0], engine); 1171 if (t[id].max_batch < 0) { 1172 ret = t[id].max_batch; 1173 mutex_unlock(&i915->drm.struct_mutex); 1174 goto out_flush; 1175 } 1176 /* One ring interleaved between requests from all cpus */ 1177 t[id].max_batch /= num_online_cpus() + 1; 1178 pr_debug("Limiting batches to %d requests on %s\n", 1179 t[id].max_batch, engine->name); 1180 1181 for (n = 0; n < ncpus; n++) { 1182 struct task_struct *tsk; 1183 1184 tsk = kthread_run(__igt_breadcrumbs_smoketest, 1185 &t[id], "igt/%d.%d", id, n); 1186 if (IS_ERR(tsk)) { 1187 ret = PTR_ERR(tsk); 1188 mutex_unlock(&i915->drm.struct_mutex); 1189 goto out_flush; 1190 } 1191 1192 get_task_struct(tsk); 1193 threads[id * ncpus + n] = tsk; 1194 } 1195 } 1196 mutex_unlock(&i915->drm.struct_mutex); 1197 1198 msleep(jiffies_to_msecs(i915_selftest.timeout_jiffies)); 1199 1200 out_flush: 1201 num_waits = 0; 1202 num_fences = 0; 1203 for_each_engine(engine, i915, id) { 1204 for (n = 0; n < ncpus; n++) { 1205 struct task_struct *tsk = threads[id * ncpus + n]; 1206 int err; 1207 1208 if (!tsk) 1209 continue; 1210 1211 err = kthread_stop(tsk); 1212 if (err < 0 && !ret) 1213 ret = err; 1214 1215 put_task_struct(tsk); 1216 } 1217 1218 num_waits += atomic_long_read(&t[id].num_waits); 1219 num_fences += atomic_long_read(&t[id].num_fences); 1220 } 1221 pr_info("Completed %lu waits for %lu fences across %d engines and %d cpus\n", 1222 num_waits, num_fences, RUNTIME_INFO(i915)->num_rings, ncpus); 1223 1224 mutex_lock(&i915->drm.struct_mutex); 1225 ret = igt_live_test_end(&live) ?: ret; 1226 out_contexts: 1227 mutex_unlock(&i915->drm.struct_mutex); 1228 kfree(t[0].contexts); 1229 out_threads: 1230 kfree(threads); 1231 out_file: 1232 mock_file_free(i915, file); 1233 out_rpm: 1234 intel_runtime_pm_put(i915, wakeref); 1235 1236 return ret; 1237 } 1238 1239 int i915_request_live_selftests(struct drm_i915_private *i915) 1240 { 1241 static const struct i915_subtest tests[] = { 1242 SUBTEST(live_nop_request), 1243 SUBTEST(live_all_engines), 1244 SUBTEST(live_sequential_engines), 1245 SUBTEST(live_empty_request), 1246 SUBTEST(live_breadcrumbs_smoketest), 1247 }; 1248 1249 if (i915_terminally_wedged(&i915->gpu_error)) 1250 return 0; 1251 1252 return i915_subtests(tests, i915); 1253 } 1254