1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_gt.h" 11 #include "gt/intel_gt_requests.h" 12 #include "gt/intel_reset.h" 13 #include "i915_selftest.h" 14 15 #include "gem/selftests/igt_gem_utils.h" 16 #include "selftests/i915_random.h" 17 #include "selftests/igt_flush_test.h" 18 #include "selftests/igt_live_test.h" 19 #include "selftests/igt_reset.h" 20 #include "selftests/igt_spinner.h" 21 #include "selftests/mock_drm.h" 22 #include "selftests/mock_gem_device.h" 23 24 #include "huge_gem_object.h" 25 #include "igt_gem_utils.h" 26 27 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 28 29 static int live_nop_switch(void *arg) 30 { 31 const unsigned int nctx = 1024; 32 struct drm_i915_private *i915 = arg; 33 struct intel_engine_cs *engine; 34 struct i915_gem_context **ctx; 35 enum intel_engine_id id; 36 struct igt_live_test t; 37 struct drm_file *file; 38 unsigned long n; 39 int err = -ENODEV; 40 41 /* 42 * Create as many contexts as we can feasibly get away with 43 * and check we can switch between them rapidly. 44 * 45 * Serves as very simple stress test for submission and HW switching 46 * between contexts. 47 */ 48 49 if (!DRIVER_CAPS(i915)->has_logical_contexts) 50 return 0; 51 52 file = mock_file(i915); 53 if (IS_ERR(file)) 54 return PTR_ERR(file); 55 56 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 57 if (!ctx) { 58 err = -ENOMEM; 59 goto out_file; 60 } 61 62 for (n = 0; n < nctx; n++) { 63 ctx[n] = live_context(i915, file); 64 if (IS_ERR(ctx[n])) { 65 err = PTR_ERR(ctx[n]); 66 goto out_file; 67 } 68 } 69 70 for_each_engine(engine, i915, id) { 71 struct i915_request *rq; 72 unsigned long end_time, prime; 73 ktime_t times[2] = {}; 74 75 times[0] = ktime_get_raw(); 76 for (n = 0; n < nctx; n++) { 77 rq = igt_request_alloc(ctx[n], engine); 78 if (IS_ERR(rq)) { 79 err = PTR_ERR(rq); 80 goto out_file; 81 } 82 i915_request_add(rq); 83 } 84 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 85 pr_err("Failed to populated %d contexts\n", nctx); 86 intel_gt_set_wedged(&i915->gt); 87 err = -EIO; 88 goto out_file; 89 } 90 91 times[1] = ktime_get_raw(); 92 93 pr_info("Populated %d contexts on %s in %lluns\n", 94 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 95 96 err = igt_live_test_begin(&t, i915, __func__, engine->name); 97 if (err) 98 goto out_file; 99 100 end_time = jiffies + i915_selftest.timeout_jiffies; 101 for_each_prime_number_from(prime, 2, 8192) { 102 times[1] = ktime_get_raw(); 103 104 for (n = 0; n < prime; n++) { 105 rq = igt_request_alloc(ctx[n % nctx], engine); 106 if (IS_ERR(rq)) { 107 err = PTR_ERR(rq); 108 goto out_file; 109 } 110 111 /* 112 * This space is left intentionally blank. 113 * 114 * We do not actually want to perform any 115 * action with this request, we just want 116 * to measure the latency in allocation 117 * and submission of our breadcrumbs - 118 * ensuring that the bare request is sufficient 119 * for the system to work (i.e. proper HEAD 120 * tracking of the rings, interrupt handling, 121 * etc). It also gives us the lowest bounds 122 * for latency. 123 */ 124 125 i915_request_add(rq); 126 } 127 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 128 pr_err("Switching between %ld contexts timed out\n", 129 prime); 130 intel_gt_set_wedged(&i915->gt); 131 break; 132 } 133 134 times[1] = ktime_sub(ktime_get_raw(), times[1]); 135 if (prime == 2) 136 times[0] = times[1]; 137 138 if (__igt_timeout(end_time, NULL)) 139 break; 140 } 141 142 err = igt_live_test_end(&t); 143 if (err) 144 goto out_file; 145 146 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 147 engine->name, 148 ktime_to_ns(times[0]), 149 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 150 } 151 152 out_file: 153 mock_file_free(i915, file); 154 return err; 155 } 156 157 struct parallel_switch { 158 struct task_struct *tsk; 159 struct intel_context *ce[2]; 160 }; 161 162 static int __live_parallel_switch1(void *data) 163 { 164 struct parallel_switch *arg = data; 165 IGT_TIMEOUT(end_time); 166 unsigned long count; 167 168 count = 0; 169 do { 170 struct i915_request *rq = NULL; 171 int err, n; 172 173 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { 174 i915_request_put(rq); 175 176 rq = i915_request_create(arg->ce[n]); 177 if (IS_ERR(rq)) 178 return PTR_ERR(rq); 179 180 i915_request_get(rq); 181 i915_request_add(rq); 182 } 183 184 err = 0; 185 if (i915_request_wait(rq, 0, HZ / 5) < 0) 186 err = -ETIME; 187 i915_request_put(rq); 188 if (err) 189 return err; 190 191 count++; 192 } while (!__igt_timeout(end_time, NULL)); 193 194 pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); 195 return 0; 196 } 197 198 static int __live_parallel_switchN(void *data) 199 { 200 struct parallel_switch *arg = data; 201 IGT_TIMEOUT(end_time); 202 unsigned long count; 203 int n; 204 205 count = 0; 206 do { 207 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { 208 struct i915_request *rq; 209 210 rq = i915_request_create(arg->ce[n]); 211 if (IS_ERR(rq)) 212 return PTR_ERR(rq); 213 214 i915_request_add(rq); 215 } 216 217 count++; 218 } while (!__igt_timeout(end_time, NULL)); 219 220 pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); 221 return 0; 222 } 223 224 static int live_parallel_switch(void *arg) 225 { 226 struct drm_i915_private *i915 = arg; 227 static int (* const func[])(void *arg) = { 228 __live_parallel_switch1, 229 __live_parallel_switchN, 230 NULL, 231 }; 232 struct parallel_switch *data = NULL; 233 struct i915_gem_engines *engines; 234 struct i915_gem_engines_iter it; 235 int (* const *fn)(void *arg); 236 struct i915_gem_context *ctx; 237 struct intel_context *ce; 238 struct drm_file *file; 239 int n, m, count; 240 int err = 0; 241 242 /* 243 * Check we can process switches on all engines simultaneously. 244 */ 245 246 if (!DRIVER_CAPS(i915)->has_logical_contexts) 247 return 0; 248 249 file = mock_file(i915); 250 if (IS_ERR(file)) 251 return PTR_ERR(file); 252 253 ctx = live_context(i915, file); 254 if (IS_ERR(ctx)) { 255 err = PTR_ERR(ctx); 256 goto out_file; 257 } 258 259 engines = i915_gem_context_lock_engines(ctx); 260 count = engines->num_engines; 261 262 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 263 if (!data) { 264 i915_gem_context_unlock_engines(ctx); 265 err = -ENOMEM; 266 goto out; 267 } 268 269 m = 0; /* Use the first context as our template for the engines */ 270 for_each_gem_engine(ce, engines, it) { 271 err = intel_context_pin(ce); 272 if (err) { 273 i915_gem_context_unlock_engines(ctx); 274 goto out; 275 } 276 data[m++].ce[0] = intel_context_get(ce); 277 } 278 i915_gem_context_unlock_engines(ctx); 279 280 /* Clone the same set of engines into the other contexts */ 281 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 282 ctx = live_context(i915, file); 283 if (IS_ERR(ctx)) { 284 err = PTR_ERR(ctx); 285 goto out; 286 } 287 288 for (m = 0; m < count; m++) { 289 if (!data[m].ce[0]) 290 continue; 291 292 ce = intel_context_create(ctx, data[m].ce[0]->engine); 293 if (IS_ERR(ce)) 294 goto out; 295 296 err = intel_context_pin(ce); 297 if (err) { 298 intel_context_put(ce); 299 goto out; 300 } 301 302 data[m].ce[n] = ce; 303 } 304 } 305 306 for (fn = func; !err && *fn; fn++) { 307 struct igt_live_test t; 308 int n; 309 310 err = igt_live_test_begin(&t, i915, __func__, ""); 311 if (err) 312 break; 313 314 for (n = 0; n < count; n++) { 315 if (!data[n].ce[0]) 316 continue; 317 318 data[n].tsk = kthread_run(*fn, &data[n], 319 "igt/parallel:%s", 320 data[n].ce[0]->engine->name); 321 if (IS_ERR(data[n].tsk)) { 322 err = PTR_ERR(data[n].tsk); 323 break; 324 } 325 get_task_struct(data[n].tsk); 326 } 327 328 for (n = 0; n < count; n++) { 329 int status; 330 331 if (IS_ERR_OR_NULL(data[n].tsk)) 332 continue; 333 334 status = kthread_stop(data[n].tsk); 335 if (status && !err) 336 err = status; 337 338 put_task_struct(data[n].tsk); 339 data[n].tsk = NULL; 340 } 341 342 if (igt_live_test_end(&t)) 343 err = -EIO; 344 } 345 346 out: 347 for (n = 0; n < count; n++) { 348 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 349 if (!data[n].ce[m]) 350 continue; 351 352 intel_context_unpin(data[n].ce[m]); 353 intel_context_put(data[n].ce[m]); 354 } 355 } 356 kfree(data); 357 out_file: 358 mock_file_free(i915, file); 359 return err; 360 } 361 362 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 363 { 364 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 365 } 366 367 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 368 { 369 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 370 } 371 372 static int gpu_fill(struct intel_context *ce, 373 struct drm_i915_gem_object *obj, 374 unsigned int dw) 375 { 376 struct i915_vma *vma; 377 int err; 378 379 GEM_BUG_ON(obj->base.size > ce->vm->total); 380 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 381 382 vma = i915_vma_instance(obj, ce->vm, NULL); 383 if (IS_ERR(vma)) 384 return PTR_ERR(vma); 385 386 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 387 if (err) 388 return err; 389 390 /* 391 * Within the GTT the huge objects maps every page onto 392 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 393 * We set the nth dword within the page using the nth 394 * mapping via the GTT - this should exercise the GTT mapping 395 * whilst checking that each context provides a unique view 396 * into the object. 397 */ 398 err = igt_gpu_fill_dw(ce, vma, 399 (dw * real_page_count(obj)) << PAGE_SHIFT | 400 (dw * sizeof(u32)), 401 real_page_count(obj), 402 dw); 403 i915_vma_unpin(vma); 404 405 return err; 406 } 407 408 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 409 { 410 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 411 unsigned int n, m, need_flush; 412 int err; 413 414 err = i915_gem_object_prepare_write(obj, &need_flush); 415 if (err) 416 return err; 417 418 for (n = 0; n < real_page_count(obj); n++) { 419 u32 *map; 420 421 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 422 for (m = 0; m < DW_PER_PAGE; m++) 423 map[m] = value; 424 if (!has_llc) 425 drm_clflush_virt_range(map, PAGE_SIZE); 426 kunmap_atomic(map); 427 } 428 429 i915_gem_object_finish_access(obj); 430 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 431 obj->write_domain = 0; 432 return 0; 433 } 434 435 static noinline int cpu_check(struct drm_i915_gem_object *obj, 436 unsigned int idx, unsigned int max) 437 { 438 unsigned int n, m, needs_flush; 439 int err; 440 441 err = i915_gem_object_prepare_read(obj, &needs_flush); 442 if (err) 443 return err; 444 445 for (n = 0; n < real_page_count(obj); n++) { 446 u32 *map; 447 448 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 449 if (needs_flush & CLFLUSH_BEFORE) 450 drm_clflush_virt_range(map, PAGE_SIZE); 451 452 for (m = 0; m < max; m++) { 453 if (map[m] != m) { 454 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 455 __builtin_return_address(0), idx, 456 n, real_page_count(obj), m, max, 457 map[m], m); 458 err = -EINVAL; 459 goto out_unmap; 460 } 461 } 462 463 for (; m < DW_PER_PAGE; m++) { 464 if (map[m] != STACK_MAGIC) { 465 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 466 __builtin_return_address(0), idx, n, m, 467 map[m], STACK_MAGIC); 468 err = -EINVAL; 469 goto out_unmap; 470 } 471 } 472 473 out_unmap: 474 kunmap_atomic(map); 475 if (err) 476 break; 477 } 478 479 i915_gem_object_finish_access(obj); 480 return err; 481 } 482 483 static int file_add_object(struct drm_file *file, 484 struct drm_i915_gem_object *obj) 485 { 486 int err; 487 488 GEM_BUG_ON(obj->base.handle_count); 489 490 /* tie the object to the drm_file for easy reaping */ 491 err = idr_alloc(&file->object_idr, &obj->base, 1, 0, GFP_KERNEL); 492 if (err < 0) 493 return err; 494 495 i915_gem_object_get(obj); 496 obj->base.handle_count++; 497 return 0; 498 } 499 500 static struct drm_i915_gem_object * 501 create_test_object(struct i915_address_space *vm, 502 struct drm_file *file, 503 struct list_head *objects) 504 { 505 struct drm_i915_gem_object *obj; 506 u64 size; 507 int err; 508 509 /* Keep in GEM's good graces */ 510 intel_gt_retire_requests(vm->gt); 511 512 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 513 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 514 515 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 516 if (IS_ERR(obj)) 517 return obj; 518 519 err = file_add_object(file, obj); 520 i915_gem_object_put(obj); 521 if (err) 522 return ERR_PTR(err); 523 524 err = cpu_fill(obj, STACK_MAGIC); 525 if (err) { 526 pr_err("Failed to fill object with cpu, err=%d\n", 527 err); 528 return ERR_PTR(err); 529 } 530 531 list_add_tail(&obj->st_link, objects); 532 return obj; 533 } 534 535 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 536 { 537 unsigned long npages = fake_page_count(obj); 538 539 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 540 return npages / DW_PER_PAGE; 541 } 542 543 static void throttle_release(struct i915_request **q, int count) 544 { 545 int i; 546 547 for (i = 0; i < count; i++) { 548 if (IS_ERR_OR_NULL(q[i])) 549 continue; 550 551 i915_request_put(fetch_and_zero(&q[i])); 552 } 553 } 554 555 static int throttle(struct intel_context *ce, 556 struct i915_request **q, int count) 557 { 558 int i; 559 560 if (!IS_ERR_OR_NULL(q[0])) { 561 if (i915_request_wait(q[0], 562 I915_WAIT_INTERRUPTIBLE, 563 MAX_SCHEDULE_TIMEOUT) < 0) 564 return -EINTR; 565 566 i915_request_put(q[0]); 567 } 568 569 for (i = 0; i < count - 1; i++) 570 q[i] = q[i + 1]; 571 572 q[i] = intel_context_create_request(ce); 573 if (IS_ERR(q[i])) 574 return PTR_ERR(q[i]); 575 576 i915_request_get(q[i]); 577 i915_request_add(q[i]); 578 579 return 0; 580 } 581 582 static int igt_ctx_exec(void *arg) 583 { 584 struct drm_i915_private *i915 = arg; 585 struct intel_engine_cs *engine; 586 enum intel_engine_id id; 587 int err = -ENODEV; 588 589 /* 590 * Create a few different contexts (with different mm) and write 591 * through each ctx/mm using the GPU making sure those writes end 592 * up in the expected pages of our obj. 593 */ 594 595 if (!DRIVER_CAPS(i915)->has_logical_contexts) 596 return 0; 597 598 for_each_engine(engine, i915, id) { 599 struct drm_i915_gem_object *obj = NULL; 600 unsigned long ncontexts, ndwords, dw; 601 struct i915_request *tq[5] = {}; 602 struct igt_live_test t; 603 struct drm_file *file; 604 IGT_TIMEOUT(end_time); 605 LIST_HEAD(objects); 606 607 if (!intel_engine_can_store_dword(engine)) 608 continue; 609 610 if (!engine->context_size) 611 continue; /* No logical context support in HW */ 612 613 file = mock_file(i915); 614 if (IS_ERR(file)) 615 return PTR_ERR(file); 616 617 err = igt_live_test_begin(&t, i915, __func__, engine->name); 618 if (err) 619 goto out_file; 620 621 ncontexts = 0; 622 ndwords = 0; 623 dw = 0; 624 while (!time_after(jiffies, end_time)) { 625 struct i915_gem_context *ctx; 626 struct intel_context *ce; 627 628 ctx = kernel_context(i915); 629 if (IS_ERR(ctx)) { 630 err = PTR_ERR(ctx); 631 goto out_file; 632 } 633 634 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 635 GEM_BUG_ON(IS_ERR(ce)); 636 637 if (!obj) { 638 obj = create_test_object(ce->vm, file, &objects); 639 if (IS_ERR(obj)) { 640 err = PTR_ERR(obj); 641 intel_context_put(ce); 642 kernel_context_close(ctx); 643 goto out_file; 644 } 645 } 646 647 err = gpu_fill(ce, obj, dw); 648 if (err) { 649 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 650 ndwords, dw, max_dwords(obj), 651 engine->name, 652 yesno(!!rcu_access_pointer(ctx->vm)), 653 err); 654 intel_context_put(ce); 655 kernel_context_close(ctx); 656 goto out_file; 657 } 658 659 err = throttle(ce, tq, ARRAY_SIZE(tq)); 660 if (err) { 661 intel_context_put(ce); 662 kernel_context_close(ctx); 663 goto out_file; 664 } 665 666 if (++dw == max_dwords(obj)) { 667 obj = NULL; 668 dw = 0; 669 } 670 671 ndwords++; 672 ncontexts++; 673 674 intel_context_put(ce); 675 kernel_context_close(ctx); 676 } 677 678 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 679 ncontexts, engine->name, ndwords); 680 681 ncontexts = dw = 0; 682 list_for_each_entry(obj, &objects, st_link) { 683 unsigned int rem = 684 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 685 686 err = cpu_check(obj, ncontexts++, rem); 687 if (err) 688 break; 689 690 dw += rem; 691 } 692 693 out_file: 694 throttle_release(tq, ARRAY_SIZE(tq)); 695 if (igt_live_test_end(&t)) 696 err = -EIO; 697 698 mock_file_free(i915, file); 699 if (err) 700 return err; 701 702 i915_gem_drain_freed_objects(i915); 703 } 704 705 return 0; 706 } 707 708 static int igt_shared_ctx_exec(void *arg) 709 { 710 struct drm_i915_private *i915 = arg; 711 struct i915_request *tq[5] = {}; 712 struct i915_gem_context *parent; 713 struct intel_engine_cs *engine; 714 enum intel_engine_id id; 715 struct igt_live_test t; 716 struct drm_file *file; 717 int err = 0; 718 719 /* 720 * Create a few different contexts with the same mm and write 721 * through each ctx using the GPU making sure those writes end 722 * up in the expected pages of our obj. 723 */ 724 if (!DRIVER_CAPS(i915)->has_logical_contexts) 725 return 0; 726 727 file = mock_file(i915); 728 if (IS_ERR(file)) 729 return PTR_ERR(file); 730 731 parent = live_context(i915, file); 732 if (IS_ERR(parent)) { 733 err = PTR_ERR(parent); 734 goto out_file; 735 } 736 737 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 738 err = 0; 739 goto out_file; 740 } 741 742 err = igt_live_test_begin(&t, i915, __func__, ""); 743 if (err) 744 goto out_file; 745 746 for_each_engine(engine, i915, id) { 747 unsigned long ncontexts, ndwords, dw; 748 struct drm_i915_gem_object *obj = NULL; 749 IGT_TIMEOUT(end_time); 750 LIST_HEAD(objects); 751 752 if (!intel_engine_can_store_dword(engine)) 753 continue; 754 755 dw = 0; 756 ndwords = 0; 757 ncontexts = 0; 758 while (!time_after(jiffies, end_time)) { 759 struct i915_gem_context *ctx; 760 struct intel_context *ce; 761 762 ctx = kernel_context(i915); 763 if (IS_ERR(ctx)) { 764 err = PTR_ERR(ctx); 765 goto out_test; 766 } 767 768 mutex_lock(&ctx->mutex); 769 __assign_ppgtt(ctx, parent->vm); 770 mutex_unlock(&ctx->mutex); 771 772 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 773 GEM_BUG_ON(IS_ERR(ce)); 774 775 if (!obj) { 776 obj = create_test_object(parent->vm, file, &objects); 777 if (IS_ERR(obj)) { 778 err = PTR_ERR(obj); 779 intel_context_put(ce); 780 kernel_context_close(ctx); 781 goto out_test; 782 } 783 } 784 785 err = gpu_fill(ce, obj, dw); 786 if (err) { 787 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 788 ndwords, dw, max_dwords(obj), 789 engine->name, 790 yesno(!!rcu_access_pointer(ctx->vm)), 791 err); 792 intel_context_put(ce); 793 kernel_context_close(ctx); 794 goto out_test; 795 } 796 797 err = throttle(ce, tq, ARRAY_SIZE(tq)); 798 if (err) { 799 intel_context_put(ce); 800 kernel_context_close(ctx); 801 goto out_test; 802 } 803 804 if (++dw == max_dwords(obj)) { 805 obj = NULL; 806 dw = 0; 807 } 808 809 ndwords++; 810 ncontexts++; 811 812 intel_context_put(ce); 813 kernel_context_close(ctx); 814 } 815 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 816 ncontexts, engine->name, ndwords); 817 818 ncontexts = dw = 0; 819 list_for_each_entry(obj, &objects, st_link) { 820 unsigned int rem = 821 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 822 823 err = cpu_check(obj, ncontexts++, rem); 824 if (err) 825 goto out_test; 826 827 dw += rem; 828 } 829 830 i915_gem_drain_freed_objects(i915); 831 } 832 out_test: 833 throttle_release(tq, ARRAY_SIZE(tq)); 834 if (igt_live_test_end(&t)) 835 err = -EIO; 836 out_file: 837 mock_file_free(i915, file); 838 return err; 839 } 840 841 static struct i915_vma *rpcs_query_batch(struct i915_vma *vma) 842 { 843 struct drm_i915_gem_object *obj; 844 u32 *cmd; 845 int err; 846 847 if (INTEL_GEN(vma->vm->i915) < 8) 848 return ERR_PTR(-EINVAL); 849 850 obj = i915_gem_object_create_internal(vma->vm->i915, PAGE_SIZE); 851 if (IS_ERR(obj)) 852 return ERR_CAST(obj); 853 854 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 855 if (IS_ERR(cmd)) { 856 err = PTR_ERR(cmd); 857 goto err; 858 } 859 860 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 861 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); 862 *cmd++ = lower_32_bits(vma->node.start); 863 *cmd++ = upper_32_bits(vma->node.start); 864 *cmd = MI_BATCH_BUFFER_END; 865 866 __i915_gem_object_flush_map(obj, 0, 64); 867 i915_gem_object_unpin_map(obj); 868 869 intel_gt_chipset_flush(vma->vm->gt); 870 871 vma = i915_vma_instance(obj, vma->vm, NULL); 872 if (IS_ERR(vma)) { 873 err = PTR_ERR(vma); 874 goto err; 875 } 876 877 err = i915_vma_pin(vma, 0, 0, PIN_USER); 878 if (err) 879 goto err; 880 881 return vma; 882 883 err: 884 i915_gem_object_put(obj); 885 return ERR_PTR(err); 886 } 887 888 static int 889 emit_rpcs_query(struct drm_i915_gem_object *obj, 890 struct intel_context *ce, 891 struct i915_request **rq_out) 892 { 893 struct i915_request *rq; 894 struct i915_vma *batch; 895 struct i915_vma *vma; 896 int err; 897 898 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 899 900 vma = i915_vma_instance(obj, ce->vm, NULL); 901 if (IS_ERR(vma)) 902 return PTR_ERR(vma); 903 904 i915_gem_object_lock(obj); 905 err = i915_gem_object_set_to_gtt_domain(obj, false); 906 i915_gem_object_unlock(obj); 907 if (err) 908 return err; 909 910 err = i915_vma_pin(vma, 0, 0, PIN_USER); 911 if (err) 912 return err; 913 914 batch = rpcs_query_batch(vma); 915 if (IS_ERR(batch)) { 916 err = PTR_ERR(batch); 917 goto err_vma; 918 } 919 920 rq = i915_request_create(ce); 921 if (IS_ERR(rq)) { 922 err = PTR_ERR(rq); 923 goto err_batch; 924 } 925 926 err = rq->engine->emit_bb_start(rq, 927 batch->node.start, batch->node.size, 928 0); 929 if (err) 930 goto err_request; 931 932 i915_vma_lock(batch); 933 err = i915_request_await_object(rq, batch->obj, false); 934 if (err == 0) 935 err = i915_vma_move_to_active(batch, rq, 0); 936 i915_vma_unlock(batch); 937 if (err) 938 goto skip_request; 939 940 i915_vma_lock(vma); 941 err = i915_request_await_object(rq, vma->obj, true); 942 if (err == 0) 943 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 944 i915_vma_unlock(vma); 945 if (err) 946 goto skip_request; 947 948 i915_vma_unpin_and_release(&batch, 0); 949 i915_vma_unpin(vma); 950 951 *rq_out = i915_request_get(rq); 952 953 i915_request_add(rq); 954 955 return 0; 956 957 skip_request: 958 i915_request_skip(rq, err); 959 err_request: 960 i915_request_add(rq); 961 err_batch: 962 i915_vma_unpin_and_release(&batch, 0); 963 err_vma: 964 i915_vma_unpin(vma); 965 966 return err; 967 } 968 969 #define TEST_IDLE BIT(0) 970 #define TEST_BUSY BIT(1) 971 #define TEST_RESET BIT(2) 972 973 static int 974 __sseu_prepare(const char *name, 975 unsigned int flags, 976 struct intel_context *ce, 977 struct igt_spinner **spin) 978 { 979 struct i915_request *rq; 980 int ret; 981 982 *spin = NULL; 983 if (!(flags & (TEST_BUSY | TEST_RESET))) 984 return 0; 985 986 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 987 if (!*spin) 988 return -ENOMEM; 989 990 ret = igt_spinner_init(*spin, ce->engine->gt); 991 if (ret) 992 goto err_free; 993 994 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 995 if (IS_ERR(rq)) { 996 ret = PTR_ERR(rq); 997 goto err_fini; 998 } 999 1000 i915_request_add(rq); 1001 1002 if (!igt_wait_for_spinner(*spin, rq)) { 1003 pr_err("%s: Spinner failed to start!\n", name); 1004 ret = -ETIMEDOUT; 1005 goto err_end; 1006 } 1007 1008 return 0; 1009 1010 err_end: 1011 igt_spinner_end(*spin); 1012 err_fini: 1013 igt_spinner_fini(*spin); 1014 err_free: 1015 kfree(fetch_and_zero(spin)); 1016 return ret; 1017 } 1018 1019 static int 1020 __read_slice_count(struct intel_context *ce, 1021 struct drm_i915_gem_object *obj, 1022 struct igt_spinner *spin, 1023 u32 *rpcs) 1024 { 1025 struct i915_request *rq = NULL; 1026 u32 s_mask, s_shift; 1027 unsigned int cnt; 1028 u32 *buf, val; 1029 long ret; 1030 1031 ret = emit_rpcs_query(obj, ce, &rq); 1032 if (ret) 1033 return ret; 1034 1035 if (spin) 1036 igt_spinner_end(spin); 1037 1038 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1039 i915_request_put(rq); 1040 if (ret < 0) 1041 return ret; 1042 1043 buf = i915_gem_object_pin_map(obj, I915_MAP_WB); 1044 if (IS_ERR(buf)) { 1045 ret = PTR_ERR(buf); 1046 return ret; 1047 } 1048 1049 if (INTEL_GEN(ce->engine->i915) >= 11) { 1050 s_mask = GEN11_RPCS_S_CNT_MASK; 1051 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1052 } else { 1053 s_mask = GEN8_RPCS_S_CNT_MASK; 1054 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1055 } 1056 1057 val = *buf; 1058 cnt = (val & s_mask) >> s_shift; 1059 *rpcs = val; 1060 1061 i915_gem_object_unpin_map(obj); 1062 1063 return cnt; 1064 } 1065 1066 static int 1067 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1068 const char *prefix, const char *suffix) 1069 { 1070 if (slices == expected) 1071 return 0; 1072 1073 if (slices < 0) { 1074 pr_err("%s: %s read slice count failed with %d%s\n", 1075 name, prefix, slices, suffix); 1076 return slices; 1077 } 1078 1079 pr_err("%s: %s slice count %d is not %u%s\n", 1080 name, prefix, slices, expected, suffix); 1081 1082 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1083 rpcs, slices, 1084 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1085 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1086 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1087 1088 return -EINVAL; 1089 } 1090 1091 static int 1092 __sseu_finish(const char *name, 1093 unsigned int flags, 1094 struct intel_context *ce, 1095 struct drm_i915_gem_object *obj, 1096 unsigned int expected, 1097 struct igt_spinner *spin) 1098 { 1099 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1100 u32 rpcs = 0; 1101 int ret = 0; 1102 1103 if (flags & TEST_RESET) { 1104 ret = intel_engine_reset(ce->engine, "sseu"); 1105 if (ret) 1106 goto out; 1107 } 1108 1109 ret = __read_slice_count(ce, obj, 1110 flags & TEST_RESET ? NULL : spin, &rpcs); 1111 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1112 if (ret) 1113 goto out; 1114 1115 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1116 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1117 1118 out: 1119 if (spin) 1120 igt_spinner_end(spin); 1121 1122 if ((flags & TEST_IDLE) && ret == 0) { 1123 ret = intel_gt_wait_for_idle(ce->engine->gt, 1124 MAX_SCHEDULE_TIMEOUT); 1125 if (ret) 1126 return ret; 1127 1128 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1129 ret = __check_rpcs(name, rpcs, ret, expected, 1130 "Context", " after idle!"); 1131 } 1132 1133 return ret; 1134 } 1135 1136 static int 1137 __sseu_test(const char *name, 1138 unsigned int flags, 1139 struct intel_context *ce, 1140 struct drm_i915_gem_object *obj, 1141 struct intel_sseu sseu) 1142 { 1143 struct igt_spinner *spin = NULL; 1144 int ret; 1145 1146 ret = __sseu_prepare(name, flags, ce, &spin); 1147 if (ret) 1148 return ret; 1149 1150 ret = intel_context_reconfigure_sseu(ce, sseu); 1151 if (ret) 1152 goto out_spin; 1153 1154 ret = __sseu_finish(name, flags, ce, obj, 1155 hweight32(sseu.slice_mask), spin); 1156 1157 out_spin: 1158 if (spin) { 1159 igt_spinner_end(spin); 1160 igt_spinner_fini(spin); 1161 kfree(spin); 1162 } 1163 return ret; 1164 } 1165 1166 static int 1167 __igt_ctx_sseu(struct drm_i915_private *i915, 1168 const char *name, 1169 unsigned int flags) 1170 { 1171 struct intel_engine_cs *engine = i915->engine[RCS0]; 1172 struct drm_i915_gem_object *obj; 1173 struct i915_gem_context *ctx; 1174 struct intel_context *ce; 1175 struct intel_sseu pg_sseu; 1176 struct drm_file *file; 1177 int ret; 1178 1179 if (INTEL_GEN(i915) < 9 || !engine) 1180 return 0; 1181 1182 if (!RUNTIME_INFO(i915)->sseu.has_slice_pg) 1183 return 0; 1184 1185 if (hweight32(engine->sseu.slice_mask) < 2) 1186 return 0; 1187 1188 /* 1189 * Gen11 VME friendly power-gated configuration with half enabled 1190 * sub-slices. 1191 */ 1192 pg_sseu = engine->sseu; 1193 pg_sseu.slice_mask = 1; 1194 pg_sseu.subslice_mask = 1195 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1196 1197 pr_info("SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1198 name, flags, hweight32(engine->sseu.slice_mask), 1199 hweight32(pg_sseu.slice_mask)); 1200 1201 file = mock_file(i915); 1202 if (IS_ERR(file)) 1203 return PTR_ERR(file); 1204 1205 if (flags & TEST_RESET) 1206 igt_global_reset_lock(&i915->gt); 1207 1208 ctx = live_context(i915, file); 1209 if (IS_ERR(ctx)) { 1210 ret = PTR_ERR(ctx); 1211 goto out_unlock; 1212 } 1213 i915_gem_context_clear_bannable(ctx); /* to reset and beyond! */ 1214 1215 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1216 if (IS_ERR(obj)) { 1217 ret = PTR_ERR(obj); 1218 goto out_unlock; 1219 } 1220 1221 ce = i915_gem_context_get_engine(ctx, RCS0); 1222 if (IS_ERR(ce)) { 1223 ret = PTR_ERR(ce); 1224 goto out_put; 1225 } 1226 1227 ret = intel_context_pin(ce); 1228 if (ret) 1229 goto out_context; 1230 1231 /* First set the default mask. */ 1232 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1233 if (ret) 1234 goto out_fail; 1235 1236 /* Then set a power-gated configuration. */ 1237 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1238 if (ret) 1239 goto out_fail; 1240 1241 /* Back to defaults. */ 1242 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1243 if (ret) 1244 goto out_fail; 1245 1246 /* One last power-gated configuration for the road. */ 1247 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1248 if (ret) 1249 goto out_fail; 1250 1251 out_fail: 1252 if (igt_flush_test(i915)) 1253 ret = -EIO; 1254 1255 intel_context_unpin(ce); 1256 out_context: 1257 intel_context_put(ce); 1258 out_put: 1259 i915_gem_object_put(obj); 1260 1261 out_unlock: 1262 if (flags & TEST_RESET) 1263 igt_global_reset_unlock(&i915->gt); 1264 1265 mock_file_free(i915, file); 1266 1267 if (ret) 1268 pr_err("%s: Failed with %d!\n", name, ret); 1269 1270 return ret; 1271 } 1272 1273 static int igt_ctx_sseu(void *arg) 1274 { 1275 struct { 1276 const char *name; 1277 unsigned int flags; 1278 } *phase, phases[] = { 1279 { .name = "basic", .flags = 0 }, 1280 { .name = "idle", .flags = TEST_IDLE }, 1281 { .name = "busy", .flags = TEST_BUSY }, 1282 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1283 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1284 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1285 }; 1286 unsigned int i; 1287 int ret = 0; 1288 1289 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1290 i++, phase++) 1291 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1292 1293 return ret; 1294 } 1295 1296 static int igt_ctx_readonly(void *arg) 1297 { 1298 struct drm_i915_private *i915 = arg; 1299 struct drm_i915_gem_object *obj = NULL; 1300 struct i915_request *tq[5] = {}; 1301 struct i915_address_space *vm; 1302 struct i915_gem_context *ctx; 1303 unsigned long idx, ndwords, dw; 1304 struct igt_live_test t; 1305 struct drm_file *file; 1306 I915_RND_STATE(prng); 1307 IGT_TIMEOUT(end_time); 1308 LIST_HEAD(objects); 1309 int err = -ENODEV; 1310 1311 /* 1312 * Create a few read-only objects (with the occasional writable object) 1313 * and try to write into these object checking that the GPU discards 1314 * any write to a read-only object. 1315 */ 1316 1317 file = mock_file(i915); 1318 if (IS_ERR(file)) 1319 return PTR_ERR(file); 1320 1321 err = igt_live_test_begin(&t, i915, __func__, ""); 1322 if (err) 1323 goto out_file; 1324 1325 ctx = live_context(i915, file); 1326 if (IS_ERR(ctx)) { 1327 err = PTR_ERR(ctx); 1328 goto out_file; 1329 } 1330 1331 rcu_read_lock(); 1332 vm = rcu_dereference(ctx->vm) ?: &i915->ggtt.alias->vm; 1333 if (!vm || !vm->has_read_only) { 1334 rcu_read_unlock(); 1335 err = 0; 1336 goto out_file; 1337 } 1338 rcu_read_unlock(); 1339 1340 ndwords = 0; 1341 dw = 0; 1342 while (!time_after(jiffies, end_time)) { 1343 struct i915_gem_engines_iter it; 1344 struct intel_context *ce; 1345 1346 for_each_gem_engine(ce, 1347 i915_gem_context_lock_engines(ctx), it) { 1348 if (!intel_engine_can_store_dword(ce->engine)) 1349 continue; 1350 1351 if (!obj) { 1352 obj = create_test_object(ce->vm, file, &objects); 1353 if (IS_ERR(obj)) { 1354 err = PTR_ERR(obj); 1355 i915_gem_context_unlock_engines(ctx); 1356 goto out_file; 1357 } 1358 1359 if (prandom_u32_state(&prng) & 1) 1360 i915_gem_object_set_readonly(obj); 1361 } 1362 1363 err = gpu_fill(ce, obj, dw); 1364 if (err) { 1365 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1366 ndwords, dw, max_dwords(obj), 1367 ce->engine->name, 1368 yesno(!!rcu_access_pointer(ctx->vm)), 1369 err); 1370 i915_gem_context_unlock_engines(ctx); 1371 goto out_file; 1372 } 1373 1374 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1375 if (err) { 1376 i915_gem_context_unlock_engines(ctx); 1377 goto out_file; 1378 } 1379 1380 if (++dw == max_dwords(obj)) { 1381 obj = NULL; 1382 dw = 0; 1383 } 1384 ndwords++; 1385 } 1386 i915_gem_context_unlock_engines(ctx); 1387 } 1388 pr_info("Submitted %lu dwords (across %u engines)\n", 1389 ndwords, RUNTIME_INFO(i915)->num_engines); 1390 1391 dw = 0; 1392 idx = 0; 1393 list_for_each_entry(obj, &objects, st_link) { 1394 unsigned int rem = 1395 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1396 unsigned int num_writes; 1397 1398 num_writes = rem; 1399 if (i915_gem_object_is_readonly(obj)) 1400 num_writes = 0; 1401 1402 err = cpu_check(obj, idx++, num_writes); 1403 if (err) 1404 break; 1405 1406 dw += rem; 1407 } 1408 1409 out_file: 1410 throttle_release(tq, ARRAY_SIZE(tq)); 1411 if (igt_live_test_end(&t)) 1412 err = -EIO; 1413 1414 mock_file_free(i915, file); 1415 return err; 1416 } 1417 1418 static int check_scratch(struct i915_address_space *vm, u64 offset) 1419 { 1420 struct drm_mm_node *node = 1421 __drm_mm_interval_first(&vm->mm, 1422 offset, offset + sizeof(u32) - 1); 1423 if (!node || node->start > offset) 1424 return 0; 1425 1426 GEM_BUG_ON(offset >= node->start + node->size); 1427 1428 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1429 upper_32_bits(offset), lower_32_bits(offset)); 1430 return -EINVAL; 1431 } 1432 1433 static int write_to_scratch(struct i915_gem_context *ctx, 1434 struct intel_engine_cs *engine, 1435 u64 offset, u32 value) 1436 { 1437 struct drm_i915_private *i915 = ctx->i915; 1438 struct drm_i915_gem_object *obj; 1439 struct i915_address_space *vm; 1440 struct i915_request *rq; 1441 struct i915_vma *vma; 1442 u32 *cmd; 1443 int err; 1444 1445 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1446 1447 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1448 if (IS_ERR(obj)) 1449 return PTR_ERR(obj); 1450 1451 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 1452 if (IS_ERR(cmd)) { 1453 err = PTR_ERR(cmd); 1454 goto err; 1455 } 1456 1457 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1458 if (INTEL_GEN(i915) >= 8) { 1459 *cmd++ = lower_32_bits(offset); 1460 *cmd++ = upper_32_bits(offset); 1461 } else { 1462 *cmd++ = 0; 1463 *cmd++ = offset; 1464 } 1465 *cmd++ = value; 1466 *cmd = MI_BATCH_BUFFER_END; 1467 __i915_gem_object_flush_map(obj, 0, 64); 1468 i915_gem_object_unpin_map(obj); 1469 1470 intel_gt_chipset_flush(engine->gt); 1471 1472 vm = i915_gem_context_get_vm_rcu(ctx); 1473 vma = i915_vma_instance(obj, vm, NULL); 1474 if (IS_ERR(vma)) { 1475 err = PTR_ERR(vma); 1476 goto err_vm; 1477 } 1478 1479 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1480 if (err) 1481 goto err_vm; 1482 1483 err = check_scratch(vm, offset); 1484 if (err) 1485 goto err_unpin; 1486 1487 rq = igt_request_alloc(ctx, engine); 1488 if (IS_ERR(rq)) { 1489 err = PTR_ERR(rq); 1490 goto err_unpin; 1491 } 1492 1493 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1494 if (err) 1495 goto err_request; 1496 1497 i915_vma_lock(vma); 1498 err = i915_request_await_object(rq, vma->obj, false); 1499 if (err == 0) 1500 err = i915_vma_move_to_active(vma, rq, 0); 1501 i915_vma_unlock(vma); 1502 if (err) 1503 goto skip_request; 1504 1505 i915_vma_unpin_and_release(&vma, 0); 1506 1507 i915_request_add(rq); 1508 1509 i915_vm_put(vm); 1510 return 0; 1511 1512 skip_request: 1513 i915_request_skip(rq, err); 1514 err_request: 1515 i915_request_add(rq); 1516 err_unpin: 1517 i915_vma_unpin(vma); 1518 err_vm: 1519 i915_vm_put(vm); 1520 err: 1521 i915_gem_object_put(obj); 1522 return err; 1523 } 1524 1525 static int read_from_scratch(struct i915_gem_context *ctx, 1526 struct intel_engine_cs *engine, 1527 u64 offset, u32 *value) 1528 { 1529 struct drm_i915_private *i915 = ctx->i915; 1530 struct drm_i915_gem_object *obj; 1531 struct i915_address_space *vm; 1532 const u32 RCS_GPR0 = 0x2600; /* not all engines have their own GPR! */ 1533 const u32 result = 0x100; 1534 struct i915_request *rq; 1535 struct i915_vma *vma; 1536 u32 *cmd; 1537 int err; 1538 1539 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1540 1541 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1542 if (IS_ERR(obj)) 1543 return PTR_ERR(obj); 1544 1545 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 1546 if (IS_ERR(cmd)) { 1547 err = PTR_ERR(cmd); 1548 goto err; 1549 } 1550 1551 memset(cmd, POISON_INUSE, PAGE_SIZE); 1552 if (INTEL_GEN(i915) >= 8) { 1553 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1554 *cmd++ = RCS_GPR0; 1555 *cmd++ = lower_32_bits(offset); 1556 *cmd++ = upper_32_bits(offset); 1557 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1558 *cmd++ = RCS_GPR0; 1559 *cmd++ = result; 1560 *cmd++ = 0; 1561 } else { 1562 *cmd++ = MI_LOAD_REGISTER_MEM; 1563 *cmd++ = RCS_GPR0; 1564 *cmd++ = offset; 1565 *cmd++ = MI_STORE_REGISTER_MEM; 1566 *cmd++ = RCS_GPR0; 1567 *cmd++ = result; 1568 } 1569 *cmd = MI_BATCH_BUFFER_END; 1570 1571 i915_gem_object_flush_map(obj); 1572 i915_gem_object_unpin_map(obj); 1573 1574 intel_gt_chipset_flush(engine->gt); 1575 1576 vm = i915_gem_context_get_vm_rcu(ctx); 1577 vma = i915_vma_instance(obj, vm, NULL); 1578 if (IS_ERR(vma)) { 1579 err = PTR_ERR(vma); 1580 goto err_vm; 1581 } 1582 1583 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1584 if (err) 1585 goto err_vm; 1586 1587 err = check_scratch(vm, offset); 1588 if (err) 1589 goto err_unpin; 1590 1591 rq = igt_request_alloc(ctx, engine); 1592 if (IS_ERR(rq)) { 1593 err = PTR_ERR(rq); 1594 goto err_unpin; 1595 } 1596 1597 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1598 if (err) 1599 goto err_request; 1600 1601 i915_vma_lock(vma); 1602 err = i915_request_await_object(rq, vma->obj, true); 1603 if (err == 0) 1604 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1605 i915_vma_unlock(vma); 1606 if (err) 1607 goto skip_request; 1608 1609 i915_vma_unpin(vma); 1610 i915_vma_close(vma); 1611 1612 i915_request_add(rq); 1613 1614 i915_gem_object_lock(obj); 1615 err = i915_gem_object_set_to_cpu_domain(obj, false); 1616 i915_gem_object_unlock(obj); 1617 if (err) 1618 goto err_vm; 1619 1620 cmd = i915_gem_object_pin_map(obj, I915_MAP_WB); 1621 if (IS_ERR(cmd)) { 1622 err = PTR_ERR(cmd); 1623 goto err_vm; 1624 } 1625 1626 *value = cmd[result / sizeof(*cmd)]; 1627 i915_gem_object_unpin_map(obj); 1628 i915_gem_object_put(obj); 1629 1630 return 0; 1631 1632 skip_request: 1633 i915_request_skip(rq, err); 1634 err_request: 1635 i915_request_add(rq); 1636 err_unpin: 1637 i915_vma_unpin(vma); 1638 err_vm: 1639 i915_vm_put(vm); 1640 err: 1641 i915_gem_object_put(obj); 1642 return err; 1643 } 1644 1645 static int igt_vm_isolation(void *arg) 1646 { 1647 struct drm_i915_private *i915 = arg; 1648 struct i915_gem_context *ctx_a, *ctx_b; 1649 struct intel_engine_cs *engine; 1650 struct igt_live_test t; 1651 struct drm_file *file; 1652 I915_RND_STATE(prng); 1653 unsigned long count; 1654 unsigned int id; 1655 u64 vm_total; 1656 int err; 1657 1658 if (INTEL_GEN(i915) < 7) 1659 return 0; 1660 1661 /* 1662 * The simple goal here is that a write into one context is not 1663 * observed in a second (separate page tables and scratch). 1664 */ 1665 1666 file = mock_file(i915); 1667 if (IS_ERR(file)) 1668 return PTR_ERR(file); 1669 1670 err = igt_live_test_begin(&t, i915, __func__, ""); 1671 if (err) 1672 goto out_file; 1673 1674 ctx_a = live_context(i915, file); 1675 if (IS_ERR(ctx_a)) { 1676 err = PTR_ERR(ctx_a); 1677 goto out_file; 1678 } 1679 1680 ctx_b = live_context(i915, file); 1681 if (IS_ERR(ctx_b)) { 1682 err = PTR_ERR(ctx_b); 1683 goto out_file; 1684 } 1685 1686 /* We can only test vm isolation, if the vm are distinct */ 1687 if (ctx_a->vm == ctx_b->vm) 1688 goto out_file; 1689 1690 vm_total = ctx_a->vm->total; 1691 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1692 vm_total -= I915_GTT_PAGE_SIZE; 1693 1694 count = 0; 1695 for_each_engine(engine, i915, id) { 1696 IGT_TIMEOUT(end_time); 1697 unsigned long this = 0; 1698 1699 if (!intel_engine_can_store_dword(engine)) 1700 continue; 1701 1702 while (!__igt_timeout(end_time, NULL)) { 1703 u32 value = 0xc5c5c5c5; 1704 u64 offset; 1705 1706 div64_u64_rem(i915_prandom_u64_state(&prng), 1707 vm_total, &offset); 1708 offset = round_down(offset, alignof_dword); 1709 offset += I915_GTT_PAGE_SIZE; 1710 1711 err = write_to_scratch(ctx_a, engine, 1712 offset, 0xdeadbeef); 1713 if (err == 0) 1714 err = read_from_scratch(ctx_b, engine, 1715 offset, &value); 1716 if (err) 1717 goto out_file; 1718 1719 if (value) { 1720 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1721 engine->name, value, 1722 upper_32_bits(offset), 1723 lower_32_bits(offset), 1724 this); 1725 err = -EINVAL; 1726 goto out_file; 1727 } 1728 1729 this++; 1730 } 1731 count += this; 1732 } 1733 pr_info("Checked %lu scratch offsets across %d engines\n", 1734 count, RUNTIME_INFO(i915)->num_engines); 1735 1736 out_file: 1737 if (igt_live_test_end(&t)) 1738 err = -EIO; 1739 mock_file_free(i915, file); 1740 return err; 1741 } 1742 1743 static bool skip_unused_engines(struct intel_context *ce, void *data) 1744 { 1745 return !ce->state; 1746 } 1747 1748 static void mock_barrier_task(void *data) 1749 { 1750 unsigned int *counter = data; 1751 1752 ++*counter; 1753 } 1754 1755 static int mock_context_barrier(void *arg) 1756 { 1757 #undef pr_fmt 1758 #define pr_fmt(x) "context_barrier_task():" # x 1759 struct drm_i915_private *i915 = arg; 1760 struct i915_gem_context *ctx; 1761 struct i915_request *rq; 1762 unsigned int counter; 1763 int err; 1764 1765 /* 1766 * The context barrier provides us with a callback after it emits 1767 * a request; useful for retiring old state after loading new. 1768 */ 1769 1770 ctx = mock_context(i915, "mock"); 1771 if (!ctx) 1772 return -ENOMEM; 1773 1774 counter = 0; 1775 err = context_barrier_task(ctx, 0, 1776 NULL, NULL, mock_barrier_task, &counter); 1777 if (err) { 1778 pr_err("Failed at line %d, err=%d\n", __LINE__, err); 1779 goto out; 1780 } 1781 if (counter == 0) { 1782 pr_err("Did not retire immediately with 0 engines\n"); 1783 err = -EINVAL; 1784 goto out; 1785 } 1786 1787 counter = 0; 1788 err = context_barrier_task(ctx, ALL_ENGINES, 1789 skip_unused_engines, 1790 NULL, 1791 mock_barrier_task, 1792 &counter); 1793 if (err) { 1794 pr_err("Failed at line %d, err=%d\n", __LINE__, err); 1795 goto out; 1796 } 1797 if (counter == 0) { 1798 pr_err("Did not retire immediately for all unused engines\n"); 1799 err = -EINVAL; 1800 goto out; 1801 } 1802 1803 rq = igt_request_alloc(ctx, i915->engine[RCS0]); 1804 if (IS_ERR(rq)) { 1805 pr_err("Request allocation failed!\n"); 1806 goto out; 1807 } 1808 i915_request_add(rq); 1809 1810 counter = 0; 1811 context_barrier_inject_fault = BIT(RCS0); 1812 err = context_barrier_task(ctx, ALL_ENGINES, 1813 NULL, NULL, mock_barrier_task, &counter); 1814 context_barrier_inject_fault = 0; 1815 if (err == -ENXIO) 1816 err = 0; 1817 else 1818 pr_err("Did not hit fault injection!\n"); 1819 if (counter != 0) { 1820 pr_err("Invoked callback on error!\n"); 1821 err = -EIO; 1822 } 1823 if (err) 1824 goto out; 1825 1826 counter = 0; 1827 err = context_barrier_task(ctx, ALL_ENGINES, 1828 skip_unused_engines, 1829 NULL, 1830 mock_barrier_task, 1831 &counter); 1832 if (err) { 1833 pr_err("Failed at line %d, err=%d\n", __LINE__, err); 1834 goto out; 1835 } 1836 mock_device_flush(i915); 1837 if (counter == 0) { 1838 pr_err("Did not retire on each active engines\n"); 1839 err = -EINVAL; 1840 goto out; 1841 } 1842 1843 out: 1844 mock_context_close(ctx); 1845 return err; 1846 #undef pr_fmt 1847 #define pr_fmt(x) x 1848 } 1849 1850 int i915_gem_context_mock_selftests(void) 1851 { 1852 static const struct i915_subtest tests[] = { 1853 SUBTEST(mock_context_barrier), 1854 }; 1855 struct drm_i915_private *i915; 1856 int err; 1857 1858 i915 = mock_gem_device(); 1859 if (!i915) 1860 return -ENOMEM; 1861 1862 err = i915_subtests(tests, i915); 1863 1864 drm_dev_put(&i915->drm); 1865 return err; 1866 } 1867 1868 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1869 { 1870 static const struct i915_subtest tests[] = { 1871 SUBTEST(live_nop_switch), 1872 SUBTEST(live_parallel_switch), 1873 SUBTEST(igt_ctx_exec), 1874 SUBTEST(igt_ctx_readonly), 1875 SUBTEST(igt_ctx_sseu), 1876 SUBTEST(igt_shared_ctx_exec), 1877 SUBTEST(igt_vm_isolation), 1878 }; 1879 1880 if (intel_gt_is_wedged(&i915->gt)) 1881 return 0; 1882 1883 return i915_live_subtests(tests, i915); 1884 } 1885