1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_gt.h" 11 #include "gt/intel_gt_requests.h" 12 #include "gt/intel_reset.h" 13 #include "i915_selftest.h" 14 15 #include "gem/selftests/igt_gem_utils.h" 16 #include "selftests/i915_random.h" 17 #include "selftests/igt_flush_test.h" 18 #include "selftests/igt_live_test.h" 19 #include "selftests/igt_reset.h" 20 #include "selftests/igt_spinner.h" 21 #include "selftests/mock_drm.h" 22 #include "selftests/mock_gem_device.h" 23 24 #include "huge_gem_object.h" 25 #include "igt_gem_utils.h" 26 27 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 28 29 static int live_nop_switch(void *arg) 30 { 31 const unsigned int nctx = 1024; 32 struct drm_i915_private *i915 = arg; 33 struct intel_engine_cs *engine; 34 struct i915_gem_context **ctx; 35 struct igt_live_test t; 36 struct drm_file *file; 37 unsigned long n; 38 int err = -ENODEV; 39 40 /* 41 * Create as many contexts as we can feasibly get away with 42 * and check we can switch between them rapidly. 43 * 44 * Serves as very simple stress test for submission and HW switching 45 * between contexts. 46 */ 47 48 if (!DRIVER_CAPS(i915)->has_logical_contexts) 49 return 0; 50 51 file = mock_file(i915); 52 if (IS_ERR(file)) 53 return PTR_ERR(file); 54 55 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 56 if (!ctx) { 57 err = -ENOMEM; 58 goto out_file; 59 } 60 61 for (n = 0; n < nctx; n++) { 62 ctx[n] = live_context(i915, file); 63 if (IS_ERR(ctx[n])) { 64 err = PTR_ERR(ctx[n]); 65 goto out_file; 66 } 67 } 68 69 for_each_uabi_engine(engine, i915) { 70 struct i915_request *rq; 71 unsigned long end_time, prime; 72 ktime_t times[2] = {}; 73 74 times[0] = ktime_get_raw(); 75 for (n = 0; n < nctx; n++) { 76 rq = igt_request_alloc(ctx[n], engine); 77 if (IS_ERR(rq)) { 78 err = PTR_ERR(rq); 79 goto out_file; 80 } 81 i915_request_add(rq); 82 } 83 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 84 pr_err("Failed to populated %d contexts\n", nctx); 85 intel_gt_set_wedged(&i915->gt); 86 err = -EIO; 87 goto out_file; 88 } 89 90 times[1] = ktime_get_raw(); 91 92 pr_info("Populated %d contexts on %s in %lluns\n", 93 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 94 95 err = igt_live_test_begin(&t, i915, __func__, engine->name); 96 if (err) 97 goto out_file; 98 99 end_time = jiffies + i915_selftest.timeout_jiffies; 100 for_each_prime_number_from(prime, 2, 8192) { 101 times[1] = ktime_get_raw(); 102 103 for (n = 0; n < prime; n++) { 104 rq = igt_request_alloc(ctx[n % nctx], engine); 105 if (IS_ERR(rq)) { 106 err = PTR_ERR(rq); 107 goto out_file; 108 } 109 110 /* 111 * This space is left intentionally blank. 112 * 113 * We do not actually want to perform any 114 * action with this request, we just want 115 * to measure the latency in allocation 116 * and submission of our breadcrumbs - 117 * ensuring that the bare request is sufficient 118 * for the system to work (i.e. proper HEAD 119 * tracking of the rings, interrupt handling, 120 * etc). It also gives us the lowest bounds 121 * for latency. 122 */ 123 124 i915_request_add(rq); 125 } 126 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 127 pr_err("Switching between %ld contexts timed out\n", 128 prime); 129 intel_gt_set_wedged(&i915->gt); 130 break; 131 } 132 133 times[1] = ktime_sub(ktime_get_raw(), times[1]); 134 if (prime == 2) 135 times[0] = times[1]; 136 137 if (__igt_timeout(end_time, NULL)) 138 break; 139 } 140 141 err = igt_live_test_end(&t); 142 if (err) 143 goto out_file; 144 145 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 146 engine->name, 147 ktime_to_ns(times[0]), 148 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 149 } 150 151 out_file: 152 mock_file_free(i915, file); 153 return err; 154 } 155 156 struct parallel_switch { 157 struct task_struct *tsk; 158 struct intel_context *ce[2]; 159 }; 160 161 static int __live_parallel_switch1(void *data) 162 { 163 struct parallel_switch *arg = data; 164 IGT_TIMEOUT(end_time); 165 unsigned long count; 166 167 count = 0; 168 do { 169 struct i915_request *rq = NULL; 170 int err, n; 171 172 err = 0; 173 for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { 174 struct i915_request *prev = rq; 175 176 rq = i915_request_create(arg->ce[n]); 177 if (IS_ERR(rq)) { 178 i915_request_put(prev); 179 return PTR_ERR(rq); 180 } 181 182 i915_request_get(rq); 183 if (prev) { 184 err = i915_request_await_dma_fence(rq, &prev->fence); 185 i915_request_put(prev); 186 } 187 188 i915_request_add(rq); 189 } 190 if (i915_request_wait(rq, 0, HZ / 5) < 0) 191 err = -ETIME; 192 i915_request_put(rq); 193 if (err) 194 return err; 195 196 count++; 197 } while (!__igt_timeout(end_time, NULL)); 198 199 pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); 200 return 0; 201 } 202 203 static int __live_parallel_switchN(void *data) 204 { 205 struct parallel_switch *arg = data; 206 struct i915_request *rq = NULL; 207 IGT_TIMEOUT(end_time); 208 unsigned long count; 209 int n; 210 211 count = 0; 212 do { 213 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { 214 struct i915_request *prev = rq; 215 int err = 0; 216 217 rq = i915_request_create(arg->ce[n]); 218 if (IS_ERR(rq)) { 219 i915_request_put(prev); 220 return PTR_ERR(rq); 221 } 222 223 i915_request_get(rq); 224 if (prev) { 225 err = i915_request_await_dma_fence(rq, &prev->fence); 226 i915_request_put(prev); 227 } 228 229 i915_request_add(rq); 230 if (err) { 231 i915_request_put(rq); 232 return err; 233 } 234 } 235 236 count++; 237 } while (!__igt_timeout(end_time, NULL)); 238 i915_request_put(rq); 239 240 pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); 241 return 0; 242 } 243 244 static int live_parallel_switch(void *arg) 245 { 246 struct drm_i915_private *i915 = arg; 247 static int (* const func[])(void *arg) = { 248 __live_parallel_switch1, 249 __live_parallel_switchN, 250 NULL, 251 }; 252 struct parallel_switch *data = NULL; 253 struct i915_gem_engines *engines; 254 struct i915_gem_engines_iter it; 255 int (* const *fn)(void *arg); 256 struct i915_gem_context *ctx; 257 struct intel_context *ce; 258 struct drm_file *file; 259 int n, m, count; 260 int err = 0; 261 262 /* 263 * Check we can process switches on all engines simultaneously. 264 */ 265 266 if (!DRIVER_CAPS(i915)->has_logical_contexts) 267 return 0; 268 269 file = mock_file(i915); 270 if (IS_ERR(file)) 271 return PTR_ERR(file); 272 273 ctx = live_context(i915, file); 274 if (IS_ERR(ctx)) { 275 err = PTR_ERR(ctx); 276 goto out_file; 277 } 278 279 engines = i915_gem_context_lock_engines(ctx); 280 count = engines->num_engines; 281 282 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 283 if (!data) { 284 i915_gem_context_unlock_engines(ctx); 285 err = -ENOMEM; 286 goto out_file; 287 } 288 289 m = 0; /* Use the first context as our template for the engines */ 290 for_each_gem_engine(ce, engines, it) { 291 err = intel_context_pin(ce); 292 if (err) { 293 i915_gem_context_unlock_engines(ctx); 294 goto out; 295 } 296 data[m++].ce[0] = intel_context_get(ce); 297 } 298 i915_gem_context_unlock_engines(ctx); 299 300 /* Clone the same set of engines into the other contexts */ 301 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 302 ctx = live_context(i915, file); 303 if (IS_ERR(ctx)) { 304 err = PTR_ERR(ctx); 305 goto out; 306 } 307 308 for (m = 0; m < count; m++) { 309 if (!data[m].ce[0]) 310 continue; 311 312 ce = intel_context_create(ctx, data[m].ce[0]->engine); 313 if (IS_ERR(ce)) 314 goto out; 315 316 err = intel_context_pin(ce); 317 if (err) { 318 intel_context_put(ce); 319 goto out; 320 } 321 322 data[m].ce[n] = ce; 323 } 324 } 325 326 for (fn = func; !err && *fn; fn++) { 327 struct igt_live_test t; 328 int n; 329 330 err = igt_live_test_begin(&t, i915, __func__, ""); 331 if (err) 332 break; 333 334 for (n = 0; n < count; n++) { 335 if (!data[n].ce[0]) 336 continue; 337 338 data[n].tsk = kthread_run(*fn, &data[n], 339 "igt/parallel:%s", 340 data[n].ce[0]->engine->name); 341 if (IS_ERR(data[n].tsk)) { 342 err = PTR_ERR(data[n].tsk); 343 break; 344 } 345 get_task_struct(data[n].tsk); 346 } 347 348 yield(); /* start all threads before we kthread_stop() */ 349 350 for (n = 0; n < count; n++) { 351 int status; 352 353 if (IS_ERR_OR_NULL(data[n].tsk)) 354 continue; 355 356 status = kthread_stop(data[n].tsk); 357 if (status && !err) 358 err = status; 359 360 put_task_struct(data[n].tsk); 361 data[n].tsk = NULL; 362 } 363 364 if (igt_live_test_end(&t)) 365 err = -EIO; 366 } 367 368 out: 369 for (n = 0; n < count; n++) { 370 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 371 if (!data[n].ce[m]) 372 continue; 373 374 intel_context_unpin(data[n].ce[m]); 375 intel_context_put(data[n].ce[m]); 376 } 377 } 378 kfree(data); 379 out_file: 380 mock_file_free(i915, file); 381 return err; 382 } 383 384 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 385 { 386 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 387 } 388 389 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 390 { 391 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 392 } 393 394 static int gpu_fill(struct intel_context *ce, 395 struct drm_i915_gem_object *obj, 396 unsigned int dw) 397 { 398 struct i915_vma *vma; 399 int err; 400 401 GEM_BUG_ON(obj->base.size > ce->vm->total); 402 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 403 404 vma = i915_vma_instance(obj, ce->vm, NULL); 405 if (IS_ERR(vma)) 406 return PTR_ERR(vma); 407 408 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 409 if (err) 410 return err; 411 412 /* 413 * Within the GTT the huge objects maps every page onto 414 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 415 * We set the nth dword within the page using the nth 416 * mapping via the GTT - this should exercise the GTT mapping 417 * whilst checking that each context provides a unique view 418 * into the object. 419 */ 420 err = igt_gpu_fill_dw(ce, vma, 421 (dw * real_page_count(obj)) << PAGE_SHIFT | 422 (dw * sizeof(u32)), 423 real_page_count(obj), 424 dw); 425 i915_vma_unpin(vma); 426 427 return err; 428 } 429 430 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 431 { 432 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 433 unsigned int n, m, need_flush; 434 int err; 435 436 err = i915_gem_object_prepare_write(obj, &need_flush); 437 if (err) 438 return err; 439 440 for (n = 0; n < real_page_count(obj); n++) { 441 u32 *map; 442 443 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 444 for (m = 0; m < DW_PER_PAGE; m++) 445 map[m] = value; 446 if (!has_llc) 447 drm_clflush_virt_range(map, PAGE_SIZE); 448 kunmap_atomic(map); 449 } 450 451 i915_gem_object_finish_access(obj); 452 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 453 obj->write_domain = 0; 454 return 0; 455 } 456 457 static noinline int cpu_check(struct drm_i915_gem_object *obj, 458 unsigned int idx, unsigned int max) 459 { 460 unsigned int n, m, needs_flush; 461 int err; 462 463 err = i915_gem_object_prepare_read(obj, &needs_flush); 464 if (err) 465 return err; 466 467 for (n = 0; n < real_page_count(obj); n++) { 468 u32 *map; 469 470 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 471 if (needs_flush & CLFLUSH_BEFORE) 472 drm_clflush_virt_range(map, PAGE_SIZE); 473 474 for (m = 0; m < max; m++) { 475 if (map[m] != m) { 476 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 477 __builtin_return_address(0), idx, 478 n, real_page_count(obj), m, max, 479 map[m], m); 480 err = -EINVAL; 481 goto out_unmap; 482 } 483 } 484 485 for (; m < DW_PER_PAGE; m++) { 486 if (map[m] != STACK_MAGIC) { 487 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 488 __builtin_return_address(0), idx, n, m, 489 map[m], STACK_MAGIC); 490 err = -EINVAL; 491 goto out_unmap; 492 } 493 } 494 495 out_unmap: 496 kunmap_atomic(map); 497 if (err) 498 break; 499 } 500 501 i915_gem_object_finish_access(obj); 502 return err; 503 } 504 505 static int file_add_object(struct drm_file *file, 506 struct drm_i915_gem_object *obj) 507 { 508 int err; 509 510 GEM_BUG_ON(obj->base.handle_count); 511 512 /* tie the object to the drm_file for easy reaping */ 513 err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); 514 if (err < 0) 515 return err; 516 517 i915_gem_object_get(obj); 518 obj->base.handle_count++; 519 return 0; 520 } 521 522 static struct drm_i915_gem_object * 523 create_test_object(struct i915_address_space *vm, 524 struct drm_file *file, 525 struct list_head *objects) 526 { 527 struct drm_i915_gem_object *obj; 528 u64 size; 529 int err; 530 531 /* Keep in GEM's good graces */ 532 intel_gt_retire_requests(vm->gt); 533 534 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 535 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 536 537 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 538 if (IS_ERR(obj)) 539 return obj; 540 541 err = file_add_object(file, obj); 542 i915_gem_object_put(obj); 543 if (err) 544 return ERR_PTR(err); 545 546 err = cpu_fill(obj, STACK_MAGIC); 547 if (err) { 548 pr_err("Failed to fill object with cpu, err=%d\n", 549 err); 550 return ERR_PTR(err); 551 } 552 553 list_add_tail(&obj->st_link, objects); 554 return obj; 555 } 556 557 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 558 { 559 unsigned long npages = fake_page_count(obj); 560 561 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 562 return npages / DW_PER_PAGE; 563 } 564 565 static void throttle_release(struct i915_request **q, int count) 566 { 567 int i; 568 569 for (i = 0; i < count; i++) { 570 if (IS_ERR_OR_NULL(q[i])) 571 continue; 572 573 i915_request_put(fetch_and_zero(&q[i])); 574 } 575 } 576 577 static int throttle(struct intel_context *ce, 578 struct i915_request **q, int count) 579 { 580 int i; 581 582 if (!IS_ERR_OR_NULL(q[0])) { 583 if (i915_request_wait(q[0], 584 I915_WAIT_INTERRUPTIBLE, 585 MAX_SCHEDULE_TIMEOUT) < 0) 586 return -EINTR; 587 588 i915_request_put(q[0]); 589 } 590 591 for (i = 0; i < count - 1; i++) 592 q[i] = q[i + 1]; 593 594 q[i] = intel_context_create_request(ce); 595 if (IS_ERR(q[i])) 596 return PTR_ERR(q[i]); 597 598 i915_request_get(q[i]); 599 i915_request_add(q[i]); 600 601 return 0; 602 } 603 604 static int igt_ctx_exec(void *arg) 605 { 606 struct drm_i915_private *i915 = arg; 607 struct intel_engine_cs *engine; 608 int err = -ENODEV; 609 610 /* 611 * Create a few different contexts (with different mm) and write 612 * through each ctx/mm using the GPU making sure those writes end 613 * up in the expected pages of our obj. 614 */ 615 616 if (!DRIVER_CAPS(i915)->has_logical_contexts) 617 return 0; 618 619 for_each_uabi_engine(engine, i915) { 620 struct drm_i915_gem_object *obj = NULL; 621 unsigned long ncontexts, ndwords, dw; 622 struct i915_request *tq[5] = {}; 623 struct igt_live_test t; 624 struct drm_file *file; 625 IGT_TIMEOUT(end_time); 626 LIST_HEAD(objects); 627 628 if (!intel_engine_can_store_dword(engine)) 629 continue; 630 631 if (!engine->context_size) 632 continue; /* No logical context support in HW */ 633 634 file = mock_file(i915); 635 if (IS_ERR(file)) 636 return PTR_ERR(file); 637 638 err = igt_live_test_begin(&t, i915, __func__, engine->name); 639 if (err) 640 goto out_file; 641 642 ncontexts = 0; 643 ndwords = 0; 644 dw = 0; 645 while (!time_after(jiffies, end_time)) { 646 struct i915_gem_context *ctx; 647 struct intel_context *ce; 648 649 ctx = kernel_context(i915); 650 if (IS_ERR(ctx)) { 651 err = PTR_ERR(ctx); 652 goto out_file; 653 } 654 655 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 656 GEM_BUG_ON(IS_ERR(ce)); 657 658 if (!obj) { 659 obj = create_test_object(ce->vm, file, &objects); 660 if (IS_ERR(obj)) { 661 err = PTR_ERR(obj); 662 intel_context_put(ce); 663 kernel_context_close(ctx); 664 goto out_file; 665 } 666 } 667 668 err = gpu_fill(ce, obj, dw); 669 if (err) { 670 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 671 ndwords, dw, max_dwords(obj), 672 engine->name, 673 yesno(!!rcu_access_pointer(ctx->vm)), 674 err); 675 intel_context_put(ce); 676 kernel_context_close(ctx); 677 goto out_file; 678 } 679 680 err = throttle(ce, tq, ARRAY_SIZE(tq)); 681 if (err) { 682 intel_context_put(ce); 683 kernel_context_close(ctx); 684 goto out_file; 685 } 686 687 if (++dw == max_dwords(obj)) { 688 obj = NULL; 689 dw = 0; 690 } 691 692 ndwords++; 693 ncontexts++; 694 695 intel_context_put(ce); 696 kernel_context_close(ctx); 697 } 698 699 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 700 ncontexts, engine->name, ndwords); 701 702 ncontexts = dw = 0; 703 list_for_each_entry(obj, &objects, st_link) { 704 unsigned int rem = 705 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 706 707 err = cpu_check(obj, ncontexts++, rem); 708 if (err) 709 break; 710 711 dw += rem; 712 } 713 714 out_file: 715 throttle_release(tq, ARRAY_SIZE(tq)); 716 if (igt_live_test_end(&t)) 717 err = -EIO; 718 719 mock_file_free(i915, file); 720 if (err) 721 return err; 722 723 i915_gem_drain_freed_objects(i915); 724 } 725 726 return 0; 727 } 728 729 static int igt_shared_ctx_exec(void *arg) 730 { 731 struct drm_i915_private *i915 = arg; 732 struct i915_request *tq[5] = {}; 733 struct i915_gem_context *parent; 734 struct intel_engine_cs *engine; 735 struct igt_live_test t; 736 struct drm_file *file; 737 int err = 0; 738 739 /* 740 * Create a few different contexts with the same mm and write 741 * through each ctx using the GPU making sure those writes end 742 * up in the expected pages of our obj. 743 */ 744 if (!DRIVER_CAPS(i915)->has_logical_contexts) 745 return 0; 746 747 file = mock_file(i915); 748 if (IS_ERR(file)) 749 return PTR_ERR(file); 750 751 parent = live_context(i915, file); 752 if (IS_ERR(parent)) { 753 err = PTR_ERR(parent); 754 goto out_file; 755 } 756 757 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 758 err = 0; 759 goto out_file; 760 } 761 762 err = igt_live_test_begin(&t, i915, __func__, ""); 763 if (err) 764 goto out_file; 765 766 for_each_uabi_engine(engine, i915) { 767 unsigned long ncontexts, ndwords, dw; 768 struct drm_i915_gem_object *obj = NULL; 769 IGT_TIMEOUT(end_time); 770 LIST_HEAD(objects); 771 772 if (!intel_engine_can_store_dword(engine)) 773 continue; 774 775 dw = 0; 776 ndwords = 0; 777 ncontexts = 0; 778 while (!time_after(jiffies, end_time)) { 779 struct i915_gem_context *ctx; 780 struct intel_context *ce; 781 782 ctx = kernel_context(i915); 783 if (IS_ERR(ctx)) { 784 err = PTR_ERR(ctx); 785 goto out_test; 786 } 787 788 mutex_lock(&ctx->mutex); 789 __assign_ppgtt(ctx, parent->vm); 790 mutex_unlock(&ctx->mutex); 791 792 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 793 GEM_BUG_ON(IS_ERR(ce)); 794 795 if (!obj) { 796 obj = create_test_object(parent->vm, file, &objects); 797 if (IS_ERR(obj)) { 798 err = PTR_ERR(obj); 799 intel_context_put(ce); 800 kernel_context_close(ctx); 801 goto out_test; 802 } 803 } 804 805 err = gpu_fill(ce, obj, dw); 806 if (err) { 807 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 808 ndwords, dw, max_dwords(obj), 809 engine->name, 810 yesno(!!rcu_access_pointer(ctx->vm)), 811 err); 812 intel_context_put(ce); 813 kernel_context_close(ctx); 814 goto out_test; 815 } 816 817 err = throttle(ce, tq, ARRAY_SIZE(tq)); 818 if (err) { 819 intel_context_put(ce); 820 kernel_context_close(ctx); 821 goto out_test; 822 } 823 824 if (++dw == max_dwords(obj)) { 825 obj = NULL; 826 dw = 0; 827 } 828 829 ndwords++; 830 ncontexts++; 831 832 intel_context_put(ce); 833 kernel_context_close(ctx); 834 } 835 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 836 ncontexts, engine->name, ndwords); 837 838 ncontexts = dw = 0; 839 list_for_each_entry(obj, &objects, st_link) { 840 unsigned int rem = 841 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 842 843 err = cpu_check(obj, ncontexts++, rem); 844 if (err) 845 goto out_test; 846 847 dw += rem; 848 } 849 850 i915_gem_drain_freed_objects(i915); 851 } 852 out_test: 853 throttle_release(tq, ARRAY_SIZE(tq)); 854 if (igt_live_test_end(&t)) 855 err = -EIO; 856 out_file: 857 mock_file_free(i915, file); 858 return err; 859 } 860 861 static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) 862 { 863 struct drm_i915_gem_object *obj; 864 u32 *cmd; 865 int err; 866 867 if (INTEL_GEN(vma->vm->i915) < 8) 868 return ERR_PTR(-EINVAL); 869 870 obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); 871 if (IS_ERR(obj)) 872 return ERR_CAST(obj); 873 874 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 875 if (IS_ERR(cmd)) { 876 err = PTR_ERR(cmd); 877 goto err; 878 } 879 880 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 881 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); 882 *cmd++ = lower_32_bits(vma->node.start); 883 *cmd++ = upper_32_bits(vma->node.start); 884 *cmd = MI_BATCH_BUFFER_END; 885 886 __i915_gem_object_flush_map(obj, 0, 64); 887 i915_gem_object_unpin_map(obj); 888 889 intel_gt_chipset_flush(vma->vm->gt); 890 891 vma = i915_vma_instance(obj, vma->vm, NULL); 892 if (IS_ERR(vma)) { 893 err = PTR_ERR(vma); 894 goto err; 895 } 896 897 err = i915_vma_pin(vma, 0, 0, PIN_USER); 898 if (err) 899 goto err; 900 901 return vma; 902 903 err: 904 i915_gem_object_put(obj); 905 return ERR_PTR(err); 906 } 907 908 static int 909 emit_rpcs_query(struct drm_i915_gem_object *obj, 910 struct intel_context *ce, 911 struct i915_request **rq_out) 912 { 913 struct i915_request *rq; 914 struct i915_vma *batch; 915 struct i915_vma *vma; 916 int err; 917 918 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 919 920 vma = i915_vma_instance(obj, ce->vm, NULL); 921 if (IS_ERR(vma)) 922 return PTR_ERR(vma); 923 924 i915_gem_object_lock(obj); 925 err = i915_gem_object_set_to_gtt_domain(obj, false); 926 i915_gem_object_unlock(obj); 927 if (err) 928 return err; 929 930 err = i915_vma_pin(vma, 0, 0, PIN_USER); 931 if (err) 932 return err; 933 934 batch = rpcs_query_batch(vma); 935 if (IS_ERR(batch)) { 936 err = PTR_ERR(batch); 937 goto err_vma; 938 } 939 940 rq = i915_request_create(ce); 941 if (IS_ERR(rq)) { 942 err = PTR_ERR(rq); 943 goto err_batch; 944 } 945 946 err = rq->engine->emit_bb_start(rq, 947 batch->node.start, batch->node.size, 948 0); 949 if (err) 950 goto err_request; 951 952 i915_vma_lock(batch); 953 err = i915_request_await_object(rq, batch->obj, false); 954 if (err == 0) 955 err = i915_vma_move_to_active(batch, rq, 0); 956 i915_vma_unlock(batch); 957 if (err) 958 goto skip_request; 959 960 i915_vma_lock(vma); 961 err = i915_request_await_object(rq, vma->obj, true); 962 if (err == 0) 963 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 964 i915_vma_unlock(vma); 965 if (err) 966 goto skip_request; 967 968 i915_vma_unpin_and_release(&batch, 0); 969 i915_vma_unpin(vma); 970 971 *rq_out = i915_request_get(rq); 972 973 i915_request_add(rq); 974 975 return 0; 976 977 skip_request: 978 i915_request_skip(rq, err); 979 err_request: 980 i915_request_add(rq); 981 err_batch: 982 i915_vma_unpin_and_release(&batch, 0); 983 err_vma: 984 i915_vma_unpin(vma); 985 986 return err; 987 } 988 989 #define TEST_IDLE BIT(0) 990 #define TEST_BUSY BIT(1) 991 #define TEST_RESET BIT(2) 992 993 static int 994 __sseu_prepare(const char *name, 995 unsigned int flags, 996 struct intel_context *ce, 997 struct igt_spinner **spin) 998 { 999 struct i915_request *rq; 1000 int ret; 1001 1002 *spin = NULL; 1003 if (!(flags & (TEST_BUSY | TEST_RESET))) 1004 return 0; 1005 1006 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1007 if (!*spin) 1008 return -ENOMEM; 1009 1010 ret = igt_spinner_init(*spin, ce->engine->gt); 1011 if (ret) 1012 goto err_free; 1013 1014 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1015 if (IS_ERR(rq)) { 1016 ret = PTR_ERR(rq); 1017 goto err_fini; 1018 } 1019 1020 i915_request_add(rq); 1021 1022 if (!igt_wait_for_spinner(*spin, rq)) { 1023 pr_err("%s: Spinner failed to start!\n", name); 1024 ret = -ETIMEDOUT; 1025 goto err_end; 1026 } 1027 1028 return 0; 1029 1030 err_end: 1031 igt_spinner_end(*spin); 1032 err_fini: 1033 igt_spinner_fini(*spin); 1034 err_free: 1035 kfree(fetch_and_zero(spin)); 1036 return ret; 1037 } 1038 1039 static int 1040 __read_slice_count(struct intel_context *ce, 1041 struct drm_i915_gem_object *obj, 1042 struct igt_spinner *spin, 1043 u32 *rpcs) 1044 { 1045 struct i915_request *rq = NULL; 1046 u32 s_mask, s_shift; 1047 unsigned int cnt; 1048 u32 *buf, val; 1049 long ret; 1050 1051 ret = emit_rpcs_query(obj, ce, &rq); 1052 if (ret) 1053 return ret; 1054 1055 if (spin) 1056 igt_spinner_end(spin); 1057 1058 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1059 i915_request_put(rq); 1060 if (ret < 0) 1061 return ret; 1062 1063 buf = i915_gem_object_pin_map(obj, I915_MAP_WB); 1064 if (IS_ERR(buf)) { 1065 ret = PTR_ERR(buf); 1066 return ret; 1067 } 1068 1069 if (INTEL_GEN(ce->engine->i915) >= 11) { 1070 s_mask = GEN11_RPCS_S_CNT_MASK; 1071 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1072 } else { 1073 s_mask = GEN8_RPCS_S_CNT_MASK; 1074 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1075 } 1076 1077 val = *buf; 1078 cnt = (val & s_mask) >> s_shift; 1079 *rpcs = val; 1080 1081 i915_gem_object_unpin_map(obj); 1082 1083 return cnt; 1084 } 1085 1086 static int 1087 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1088 const char *prefix, const char *suffix) 1089 { 1090 if (slices == expected) 1091 return 0; 1092 1093 if (slices < 0) { 1094 pr_err("%s: %s read slice count failed with %d%s\n", 1095 name, prefix, slices, suffix); 1096 return slices; 1097 } 1098 1099 pr_err("%s: %s slice count %d is not %u%s\n", 1100 name, prefix, slices, expected, suffix); 1101 1102 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1103 rpcs, slices, 1104 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1105 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1106 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1107 1108 return -EINVAL; 1109 } 1110 1111 static int 1112 __sseu_finish(const char *name, 1113 unsigned int flags, 1114 struct intel_context *ce, 1115 struct drm_i915_gem_object *obj, 1116 unsigned int expected, 1117 struct igt_spinner *spin) 1118 { 1119 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1120 u32 rpcs = 0; 1121 int ret = 0; 1122 1123 if (flags & TEST_RESET) { 1124 ret = intel_engine_reset(ce->engine, "sseu"); 1125 if (ret) 1126 goto out; 1127 } 1128 1129 ret = __read_slice_count(ce, obj, 1130 flags & TEST_RESET ? NULL : spin, &rpcs); 1131 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1132 if (ret) 1133 goto out; 1134 1135 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1136 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1137 1138 out: 1139 if (spin) 1140 igt_spinner_end(spin); 1141 1142 if ((flags & TEST_IDLE) && ret == 0) { 1143 ret = intel_gt_wait_for_idle(ce->engine->gt, 1144 MAX_SCHEDULE_TIMEOUT); 1145 if (ret) 1146 return ret; 1147 1148 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1149 ret = __check_rpcs(name, rpcs, ret, expected, 1150 "Context", " after idle!"); 1151 } 1152 1153 return ret; 1154 } 1155 1156 static int 1157 __sseu_test(const char *name, 1158 unsigned int flags, 1159 struct intel_context *ce, 1160 struct drm_i915_gem_object *obj, 1161 struct intel_sseu sseu) 1162 { 1163 struct igt_spinner *spin = NULL; 1164 int ret; 1165 1166 ret = __sseu_prepare(name, flags, ce, &spin); 1167 if (ret) 1168 return ret; 1169 1170 ret = intel_context_reconfigure_sseu(ce, sseu); 1171 if (ret) 1172 goto out_spin; 1173 1174 ret = __sseu_finish(name, flags, ce, obj, 1175 hweight32(sseu.slice_mask), spin); 1176 1177 out_spin: 1178 if (spin) { 1179 igt_spinner_end(spin); 1180 igt_spinner_fini(spin); 1181 kfree(spin); 1182 } 1183 return ret; 1184 } 1185 1186 static int 1187 __igt_ctx_sseu(struct drm_i915_private *i915, 1188 const char *name, 1189 unsigned int flags) 1190 { 1191 struct drm_i915_gem_object *obj; 1192 int inst = 0; 1193 int ret = 0; 1194 1195 if (INTEL_GEN(i915) < 9 || !RUNTIME_INFO(i915)->sseu.has_slice_pg) 1196 return 0; 1197 1198 if (flags & TEST_RESET) 1199 igt_global_reset_lock(&i915->gt); 1200 1201 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1202 if (IS_ERR(obj)) { 1203 ret = PTR_ERR(obj); 1204 goto out_unlock; 1205 } 1206 1207 do { 1208 struct intel_engine_cs *engine; 1209 struct intel_context *ce; 1210 struct intel_sseu pg_sseu; 1211 1212 engine = intel_engine_lookup_user(i915, 1213 I915_ENGINE_CLASS_RENDER, 1214 inst++); 1215 if (!engine) 1216 break; 1217 1218 if (hweight32(engine->sseu.slice_mask) < 2) 1219 continue; 1220 1221 /* 1222 * Gen11 VME friendly power-gated configuration with 1223 * half enabled sub-slices. 1224 */ 1225 pg_sseu = engine->sseu; 1226 pg_sseu.slice_mask = 1; 1227 pg_sseu.subslice_mask = 1228 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1229 1230 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1231 engine->name, name, flags, 1232 hweight32(engine->sseu.slice_mask), 1233 hweight32(pg_sseu.slice_mask)); 1234 1235 ce = intel_context_create(engine->kernel_context->gem_context, 1236 engine); 1237 if (IS_ERR(ce)) { 1238 ret = PTR_ERR(ce); 1239 goto out_put; 1240 } 1241 1242 ret = intel_context_pin(ce); 1243 if (ret) 1244 goto out_ce; 1245 1246 /* First set the default mask. */ 1247 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1248 if (ret) 1249 goto out_unpin; 1250 1251 /* Then set a power-gated configuration. */ 1252 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1253 if (ret) 1254 goto out_unpin; 1255 1256 /* Back to defaults. */ 1257 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1258 if (ret) 1259 goto out_unpin; 1260 1261 /* One last power-gated configuration for the road. */ 1262 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1263 if (ret) 1264 goto out_unpin; 1265 1266 out_unpin: 1267 intel_context_unpin(ce); 1268 out_ce: 1269 intel_context_put(ce); 1270 } while (!ret); 1271 1272 if (igt_flush_test(i915)) 1273 ret = -EIO; 1274 1275 out_put: 1276 i915_gem_object_put(obj); 1277 1278 out_unlock: 1279 if (flags & TEST_RESET) 1280 igt_global_reset_unlock(&i915->gt); 1281 1282 if (ret) 1283 pr_err("%s: Failed with %d!\n", name, ret); 1284 1285 return ret; 1286 } 1287 1288 static int igt_ctx_sseu(void *arg) 1289 { 1290 struct { 1291 const char *name; 1292 unsigned int flags; 1293 } *phase, phases[] = { 1294 { .name = "basic", .flags = 0 }, 1295 { .name = "idle", .flags = TEST_IDLE }, 1296 { .name = "busy", .flags = TEST_BUSY }, 1297 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1298 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1299 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1300 }; 1301 unsigned int i; 1302 int ret = 0; 1303 1304 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1305 i++, phase++) 1306 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1307 1308 return ret; 1309 } 1310 1311 static int igt_ctx_readonly(void *arg) 1312 { 1313 struct drm_i915_private *i915 = arg; 1314 struct drm_i915_gem_object *obj = NULL; 1315 struct i915_request *tq[5] = {}; 1316 struct i915_address_space *vm; 1317 struct i915_gem_context *ctx; 1318 unsigned long idx, ndwords, dw; 1319 struct igt_live_test t; 1320 struct drm_file *file; 1321 I915_RND_STATE(prng); 1322 IGT_TIMEOUT(end_time); 1323 LIST_HEAD(objects); 1324 int err = -ENODEV; 1325 1326 /* 1327 * Create a few read-only objects (with the occasional writable object) 1328 * and try to write into these object checking that the GPU discards 1329 * any write to a read-only object. 1330 */ 1331 1332 file = mock_file(i915); 1333 if (IS_ERR(file)) 1334 return PTR_ERR(file); 1335 1336 err = igt_live_test_begin(&t, i915, __func__, ""); 1337 if (err) 1338 goto out_file; 1339 1340 ctx = live_context(i915, file); 1341 if (IS_ERR(ctx)) { 1342 err = PTR_ERR(ctx); 1343 goto out_file; 1344 } 1345 1346 rcu_read_lock(); 1347 vm = rcu_dereference(ctx->vm) ?: &i915->ggtt.alias->vm; 1348 if (!vm || !vm->has_read_only) { 1349 rcu_read_unlock(); 1350 err = 0; 1351 goto out_file; 1352 } 1353 rcu_read_unlock(); 1354 1355 ndwords = 0; 1356 dw = 0; 1357 while (!time_after(jiffies, end_time)) { 1358 struct i915_gem_engines_iter it; 1359 struct intel_context *ce; 1360 1361 for_each_gem_engine(ce, 1362 i915_gem_context_lock_engines(ctx), it) { 1363 if (!intel_engine_can_store_dword(ce->engine)) 1364 continue; 1365 1366 if (!obj) { 1367 obj = create_test_object(ce->vm, file, &objects); 1368 if (IS_ERR(obj)) { 1369 err = PTR_ERR(obj); 1370 i915_gem_context_unlock_engines(ctx); 1371 goto out_file; 1372 } 1373 1374 if (prandom_u32_state(&prng) & 1) 1375 i915_gem_object_set_readonly(obj); 1376 } 1377 1378 err = gpu_fill(ce, obj, dw); 1379 if (err) { 1380 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1381 ndwords, dw, max_dwords(obj), 1382 ce->engine->name, 1383 yesno(!!rcu_access_pointer(ctx->vm)), 1384 err); 1385 i915_gem_context_unlock_engines(ctx); 1386 goto out_file; 1387 } 1388 1389 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1390 if (err) { 1391 i915_gem_context_unlock_engines(ctx); 1392 goto out_file; 1393 } 1394 1395 if (++dw == max_dwords(obj)) { 1396 obj = NULL; 1397 dw = 0; 1398 } 1399 ndwords++; 1400 } 1401 i915_gem_context_unlock_engines(ctx); 1402 } 1403 pr_info("Submitted %lu dwords (across %u engines)\n", 1404 ndwords, RUNTIME_INFO(i915)->num_engines); 1405 1406 dw = 0; 1407 idx = 0; 1408 list_for_each_entry(obj, &objects, st_link) { 1409 unsigned int rem = 1410 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1411 unsigned int num_writes; 1412 1413 num_writes = rem; 1414 if (i915_gem_object_is_readonly(obj)) 1415 num_writes = 0; 1416 1417 err = cpu_check(obj, idx++, num_writes); 1418 if (err) 1419 break; 1420 1421 dw += rem; 1422 } 1423 1424 out_file: 1425 throttle_release(tq, ARRAY_SIZE(tq)); 1426 if (igt_live_test_end(&t)) 1427 err = -EIO; 1428 1429 mock_file_free(i915, file); 1430 return err; 1431 } 1432 1433 static int check_scratch(struct i915_address_space *vm, u64 offset) 1434 { 1435 struct drm_mm_node *node = 1436 __drm_mm_interval_first(&vm->mm, 1437 offset, offset + sizeof(u32) - 1); 1438 if (!node || node->start > offset) 1439 return 0; 1440 1441 GEM_BUG_ON(offset >= node->start + node->size); 1442 1443 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1444 upper_32_bits(offset), lower_32_bits(offset)); 1445 return -EINVAL; 1446 } 1447 1448 static int write_to_scratch(struct i915_gem_context *ctx, 1449 struct intel_engine_cs *engine, 1450 u64 offset, u32 value) 1451 { 1452 struct drm_i915_private *i915 = ctx->i915; 1453 struct drm_i915_gem_object *obj; 1454 struct i915_address_space *vm; 1455 struct i915_request *rq; 1456 struct i915_vma *vma; 1457 u32 *cmd; 1458 int err; 1459 1460 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1461 1462 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1463 if (IS_ERR(obj)) 1464 return PTR_ERR(obj); 1465 1466 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 1467 if (IS_ERR(cmd)) { 1468 err = PTR_ERR(cmd); 1469 goto err; 1470 } 1471 1472 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1473 if (INTEL_GEN(i915) >= 8) { 1474 *cmd++ = lower_32_bits(offset); 1475 *cmd++ = upper_32_bits(offset); 1476 } else { 1477 *cmd++ = 0; 1478 *cmd++ = offset; 1479 } 1480 *cmd++ = value; 1481 *cmd = MI_BATCH_BUFFER_END; 1482 __i915_gem_object_flush_map(obj, 0, 64); 1483 i915_gem_object_unpin_map(obj); 1484 1485 intel_gt_chipset_flush(engine->gt); 1486 1487 vm = i915_gem_context_get_vm_rcu(ctx); 1488 vma = i915_vma_instance(obj, vm, NULL); 1489 if (IS_ERR(vma)) { 1490 err = PTR_ERR(vma); 1491 goto err_vm; 1492 } 1493 1494 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1495 if (err) 1496 goto err_vm; 1497 1498 err = check_scratch(vm, offset); 1499 if (err) 1500 goto err_unpin; 1501 1502 rq = igt_request_alloc(ctx, engine); 1503 if (IS_ERR(rq)) { 1504 err = PTR_ERR(rq); 1505 goto err_unpin; 1506 } 1507 1508 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1509 if (err) 1510 goto err_request; 1511 1512 i915_vma_lock(vma); 1513 err = i915_request_await_object(rq, vma->obj, false); 1514 if (err == 0) 1515 err = i915_vma_move_to_active(vma, rq, 0); 1516 i915_vma_unlock(vma); 1517 if (err) 1518 goto skip_request; 1519 1520 i915_vma_unpin_and_release(&vma, 0); 1521 1522 i915_request_add(rq); 1523 1524 i915_vm_put(vm); 1525 return 0; 1526 1527 skip_request: 1528 i915_request_skip(rq, err); 1529 err_request: 1530 i915_request_add(rq); 1531 err_unpin: 1532 i915_vma_unpin(vma); 1533 err_vm: 1534 i915_vm_put(vm); 1535 err: 1536 i915_gem_object_put(obj); 1537 return err; 1538 } 1539 1540 static int read_from_scratch(struct i915_gem_context *ctx, 1541 struct intel_engine_cs *engine, 1542 u64 offset, u32 *value) 1543 { 1544 struct drm_i915_private *i915 = ctx->i915; 1545 struct drm_i915_gem_object *obj; 1546 struct i915_address_space *vm; 1547 const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ 1548 const u32 result = 0x100; 1549 struct i915_request *rq; 1550 struct i915_vma *vma; 1551 u32 *cmd; 1552 int err; 1553 1554 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1555 1556 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1557 if (IS_ERR(obj)) 1558 return PTR_ERR(obj); 1559 1560 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 1561 if (IS_ERR(cmd)) { 1562 err = PTR_ERR(cmd); 1563 goto err; 1564 } 1565 1566 memset(cmd, POISON_INUSE, PAGE_SIZE); 1567 if (INTEL_GEN(i915) >= 8) { 1568 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1569 *cmd++ = RCS_GPR0; 1570 *cmd++ = lower_32_bits(offset); 1571 *cmd++ = upper_32_bits(offset); 1572 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1573 *cmd++ = RCS_GPR0; 1574 *cmd++ = result; 1575 *cmd++ = 0; 1576 } else { 1577 *cmd++ = MI_LOAD_REGISTER_MEM; 1578 *cmd++ = RCS_GPR0; 1579 *cmd++ = offset; 1580 *cmd++ = MI_STORE_REGISTER_MEM; 1581 *cmd++ = RCS_GPR0; 1582 *cmd++ = result; 1583 } 1584 *cmd = MI_BATCH_BUFFER_END; 1585 1586 i915_gem_object_flush_map(obj); 1587 i915_gem_object_unpin_map(obj); 1588 1589 intel_gt_chipset_flush(engine->gt); 1590 1591 vm = i915_gem_context_get_vm_rcu(ctx); 1592 vma = i915_vma_instance(obj, vm, NULL); 1593 if (IS_ERR(vma)) { 1594 err = PTR_ERR(vma); 1595 goto err_vm; 1596 } 1597 1598 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1599 if (err) 1600 goto err_vm; 1601 1602 err = check_scratch(vm, offset); 1603 if (err) 1604 goto err_unpin; 1605 1606 rq = igt_request_alloc(ctx, engine); 1607 if (IS_ERR(rq)) { 1608 err = PTR_ERR(rq); 1609 goto err_unpin; 1610 } 1611 1612 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1613 if (err) 1614 goto err_request; 1615 1616 i915_vma_lock(vma); 1617 err = i915_request_await_object(rq, vma->obj, true); 1618 if (err == 0) 1619 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1620 i915_vma_unlock(vma); 1621 if (err) 1622 goto skip_request; 1623 1624 i915_vma_unpin(vma); 1625 i915_vma_close(vma); 1626 1627 i915_request_add(rq); 1628 1629 i915_gem_object_lock(obj); 1630 err = i915_gem_object_set_to_cpu_domain(obj, false); 1631 i915_gem_object_unlock(obj); 1632 if (err) 1633 goto err_vm; 1634 1635 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 1636 if (IS_ERR(cmd)) { 1637 err = PTR_ERR(cmd); 1638 goto err_vm; 1639 } 1640 1641 *value = cmd[result / sizeof(*cmd)]; 1642 i915_gem_object_unpin_map(obj); 1643 i915_gem_object_put(obj); 1644 1645 return 0; 1646 1647 skip_request: 1648 i915_request_skip(rq, err); 1649 err_request: 1650 i915_request_add(rq); 1651 err_unpin: 1652 i915_vma_unpin(vma); 1653 err_vm: 1654 i915_vm_put(vm); 1655 err: 1656 i915_gem_object_put(obj); 1657 return err; 1658 } 1659 1660 static int igt_vm_isolation(void *arg) 1661 { 1662 struct drm_i915_private *i915 = arg; 1663 struct i915_gem_context *ctx_a, *ctx_b; 1664 struct intel_engine_cs *engine; 1665 struct igt_live_test t; 1666 struct drm_file *file; 1667 I915_RND_STATE(prng); 1668 unsigned long count; 1669 u64 vm_total; 1670 int err; 1671 1672 if (INTEL_GEN(i915) < 7) 1673 return 0; 1674 1675 /* 1676 * The simple goal here is that a write into one context is not 1677 * observed in a second (separate page tables and scratch). 1678 */ 1679 1680 file = mock_file(i915); 1681 if (IS_ERR(file)) 1682 return PTR_ERR(file); 1683 1684 err = igt_live_test_begin(&t, i915, __func__, ""); 1685 if (err) 1686 goto out_file; 1687 1688 ctx_a = live_context(i915, file); 1689 if (IS_ERR(ctx_a)) { 1690 err = PTR_ERR(ctx_a); 1691 goto out_file; 1692 } 1693 1694 ctx_b = live_context(i915, file); 1695 if (IS_ERR(ctx_b)) { 1696 err = PTR_ERR(ctx_b); 1697 goto out_file; 1698 } 1699 1700 /* We can only test vm isolation, if the vm are distinct */ 1701 if (ctx_a->vm == ctx_b->vm) 1702 goto out_file; 1703 1704 vm_total = ctx_a->vm->total; 1705 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1706 vm_total -= I915_GTT_PAGE_SIZE; 1707 1708 count = 0; 1709 for_each_uabi_engine(engine, i915) { 1710 IGT_TIMEOUT(end_time); 1711 unsigned long this = 0; 1712 1713 if (!intel_engine_can_store_dword(engine)) 1714 continue; 1715 1716 while (!__igt_timeout(end_time, NULL)) { 1717 u32 value = 0xc5c5c5c5; 1718 u64 offset; 1719 1720 div64_u64_rem(i915_prandom_u64_state(&prng), 1721 vm_total, &offset); 1722 offset = round_down(offset, alignof_dword); 1723 offset += I915_GTT_PAGE_SIZE; 1724 1725 err = write_to_scratch(ctx_a, engine, 1726 offset, 0xdeadbeef); 1727 if (err == 0) 1728 err = read_from_scratch(ctx_b, engine, 1729 offset, &value); 1730 if (err) 1731 goto out_file; 1732 1733 if (value) { 1734 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1735 engine->name, value, 1736 upper_32_bits(offset), 1737 lower_32_bits(offset), 1738 this); 1739 err = -EINVAL; 1740 goto out_file; 1741 } 1742 1743 this++; 1744 } 1745 count += this; 1746 } 1747 pr_info("Checked %lu scratch offsets across %d engines\n", 1748 count, RUNTIME_INFO(i915)->num_engines); 1749 1750 out_file: 1751 if (igt_live_test_end(&t)) 1752 err = -EIO; 1753 mock_file_free(i915, file); 1754 return err; 1755 } 1756 1757 static bool skip_unused_engines(struct intel_context *ce, void *data) 1758 { 1759 return !ce->state; 1760 } 1761 1762 static void mock_barrier_task(void *data) 1763 { 1764 unsigned int *counter = data; 1765 1766 ++*counter; 1767 } 1768 1769 static int mock_context_barrier(void *arg) 1770 { 1771 #undef pr_fmt 1772 #define pr_fmt(x) "context_barrier_task():" # x 1773 struct drm_i915_private *i915 = arg; 1774 struct i915_gem_context *ctx; 1775 struct i915_request *rq; 1776 unsigned int counter; 1777 int err; 1778 1779 /* 1780 * The context barrier provides us with a callback after it emits 1781 * a request; useful for retiring old state after loading new. 1782 */ 1783 1784 ctx = mock_context(i915, "mock"); 1785 if (!ctx) 1786 return -ENOMEM; 1787 1788 counter = 0; 1789 err = context_barrier_task(ctx, 0, 1790 NULL, NULL, mock_barrier_task, &counter); 1791 if (err) { 1792 pr_err("Failed at line %d, err=%d\n", __LINE__, err); 1793 goto out; 1794 } 1795 if (counter == 0) { 1796 pr_err("Did not retire immediately with 0 engines\n"); 1797 err = -EINVAL; 1798 goto out; 1799 } 1800 1801 counter = 0; 1802 err = context_barrier_task(ctx, ALL_ENGINES, 1803 skip_unused_engines, 1804 NULL, 1805 mock_barrier_task, 1806 &counter); 1807 if (err) { 1808 pr_err("Failed at line %d, err=%d\n", __LINE__, err); 1809 goto out; 1810 } 1811 if (counter == 0) { 1812 pr_err("Did not retire immediately for all unused engines\n"); 1813 err = -EINVAL; 1814 goto out; 1815 } 1816 1817 rq = igt_request_alloc(ctx, i915->engine[RCS0]); 1818 if (IS_ERR(rq)) { 1819 pr_err("Request allocation failed!\n"); 1820 goto out; 1821 } 1822 i915_request_add(rq); 1823 1824 counter = 0; 1825 context_barrier_inject_fault = BIT(RCS0); 1826 err = context_barrier_task(ctx, ALL_ENGINES, 1827 NULL, NULL, mock_barrier_task, &counter); 1828 context_barrier_inject_fault = 0; 1829 if (err == -ENXIO) 1830 err = 0; 1831 else 1832 pr_err("Did not hit fault injection!\n"); 1833 if (counter != 0) { 1834 pr_err("Invoked callback on error!\n"); 1835 err = -EIO; 1836 } 1837 if (err) 1838 goto out; 1839 1840 counter = 0; 1841 err = context_barrier_task(ctx, ALL_ENGINES, 1842 skip_unused_engines, 1843 NULL, 1844 mock_barrier_task, 1845 &counter); 1846 if (err) { 1847 pr_err("Failed at line %d, err=%d\n", __LINE__, err); 1848 goto out; 1849 } 1850 mock_device_flush(i915); 1851 if (counter == 0) { 1852 pr_err("Did not retire on each active engines\n"); 1853 err = -EINVAL; 1854 goto out; 1855 } 1856 1857 out: 1858 mock_context_close(ctx); 1859 return err; 1860 #undef pr_fmt 1861 #define pr_fmt(x) x 1862 } 1863 1864 int i915_gem_context_mock_selftests(void) 1865 { 1866 static const struct i915_subtest tests[] = { 1867 SUBTEST(mock_context_barrier), 1868 }; 1869 struct drm_i915_private *i915; 1870 int err; 1871 1872 i915 = mock_gem_device(); 1873 if (!i915) 1874 return -ENOMEM; 1875 1876 err = i915_subtests(tests, i915); 1877 1878 drm_dev_put(&i915->drm); 1879 return err; 1880 } 1881 1882 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1883 { 1884 static const struct i915_subtest tests[] = { 1885 SUBTEST(live_nop_switch), 1886 SUBTEST(live_parallel_switch), 1887 SUBTEST(igt_ctx_exec), 1888 SUBTEST(igt_ctx_readonly), 1889 SUBTEST(igt_ctx_sseu), 1890 SUBTEST(igt_shared_ctx_exec), 1891 SUBTEST(igt_vm_isolation), 1892 }; 1893 1894 if (intel_gt_is_wedged(&i915->gt)) 1895 return 0; 1896 1897 return i915_live_subtests(tests, i915); 1898 } 1899