1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_pm.h" 10 #include "gt/intel_engine_pm.h" 11 #include "gt/intel_gt.h" 12 #include "gt/intel_gt_requests.h" 13 #include "gt/intel_reset.h" 14 #include "i915_selftest.h" 15 16 #include "gem/selftests/igt_gem_utils.h" 17 #include "selftests/i915_random.h" 18 #include "selftests/igt_flush_test.h" 19 #include "selftests/igt_live_test.h" 20 #include "selftests/igt_reset.h" 21 #include "selftests/igt_spinner.h" 22 #include "selftests/mock_drm.h" 23 #include "selftests/mock_gem_device.h" 24 25 #include "huge_gem_object.h" 26 #include "igt_gem_utils.h" 27 28 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 29 30 static int live_nop_switch(void *arg) 31 { 32 const unsigned int nctx = 1024; 33 struct drm_i915_private *i915 = arg; 34 struct intel_engine_cs *engine; 35 struct i915_gem_context **ctx; 36 struct igt_live_test t; 37 struct file *file; 38 unsigned long n; 39 int err = -ENODEV; 40 41 /* 42 * Create as many contexts as we can feasibly get away with 43 * and check we can switch between them rapidly. 44 * 45 * Serves as very simple stress test for submission and HW switching 46 * between contexts. 47 */ 48 49 if (!DRIVER_CAPS(i915)->has_logical_contexts) 50 return 0; 51 52 file = mock_file(i915); 53 if (IS_ERR(file)) 54 return PTR_ERR(file); 55 56 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 57 if (!ctx) { 58 err = -ENOMEM; 59 goto out_file; 60 } 61 62 for (n = 0; n < nctx; n++) { 63 ctx[n] = live_context(i915, file); 64 if (IS_ERR(ctx[n])) { 65 err = PTR_ERR(ctx[n]); 66 goto out_file; 67 } 68 } 69 70 for_each_uabi_engine(engine, i915) { 71 struct i915_request *rq = NULL; 72 unsigned long end_time, prime; 73 ktime_t times[2] = {}; 74 75 times[0] = ktime_get_raw(); 76 for (n = 0; n < nctx; n++) { 77 struct i915_request *this; 78 79 this = igt_request_alloc(ctx[n], engine); 80 if (IS_ERR(this)) { 81 err = PTR_ERR(this); 82 goto out_file; 83 } 84 if (rq) { 85 i915_request_await_dma_fence(this, &rq->fence); 86 i915_request_put(rq); 87 } 88 rq = i915_request_get(this); 89 i915_request_add(this); 90 } 91 if (i915_request_wait(rq, 0, HZ) < 0) { 92 pr_err("Failed to populated %d contexts\n", nctx); 93 intel_gt_set_wedged(&i915->gt); 94 i915_request_put(rq); 95 err = -EIO; 96 goto out_file; 97 } 98 i915_request_put(rq); 99 100 times[1] = ktime_get_raw(); 101 102 pr_info("Populated %d contexts on %s in %lluns\n", 103 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 104 105 err = igt_live_test_begin(&t, i915, __func__, engine->name); 106 if (err) 107 goto out_file; 108 109 end_time = jiffies + i915_selftest.timeout_jiffies; 110 for_each_prime_number_from(prime, 2, 8192) { 111 times[1] = ktime_get_raw(); 112 113 rq = NULL; 114 for (n = 0; n < prime; n++) { 115 struct i915_request *this; 116 117 this = igt_request_alloc(ctx[n % nctx], engine); 118 if (IS_ERR(this)) { 119 err = PTR_ERR(this); 120 goto out_file; 121 } 122 123 if (rq) { /* Force submission order */ 124 i915_request_await_dma_fence(this, &rq->fence); 125 i915_request_put(rq); 126 } 127 128 /* 129 * This space is left intentionally blank. 130 * 131 * We do not actually want to perform any 132 * action with this request, we just want 133 * to measure the latency in allocation 134 * and submission of our breadcrumbs - 135 * ensuring that the bare request is sufficient 136 * for the system to work (i.e. proper HEAD 137 * tracking of the rings, interrupt handling, 138 * etc). It also gives us the lowest bounds 139 * for latency. 140 */ 141 142 rq = i915_request_get(this); 143 i915_request_add(this); 144 } 145 GEM_BUG_ON(!rq); 146 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 147 pr_err("Switching between %ld contexts timed out\n", 148 prime); 149 intel_gt_set_wedged(&i915->gt); 150 i915_request_put(rq); 151 break; 152 } 153 i915_request_put(rq); 154 155 times[1] = ktime_sub(ktime_get_raw(), times[1]); 156 if (prime == 2) 157 times[0] = times[1]; 158 159 if (__igt_timeout(end_time, NULL)) 160 break; 161 } 162 163 err = igt_live_test_end(&t); 164 if (err) 165 goto out_file; 166 167 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 168 engine->name, 169 ktime_to_ns(times[0]), 170 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 171 } 172 173 out_file: 174 fput(file); 175 return err; 176 } 177 178 struct parallel_switch { 179 struct task_struct *tsk; 180 struct intel_context *ce[2]; 181 }; 182 183 static int __live_parallel_switch1(void *data) 184 { 185 struct parallel_switch *arg = data; 186 IGT_TIMEOUT(end_time); 187 unsigned long count; 188 189 count = 0; 190 do { 191 struct i915_request *rq = NULL; 192 int err, n; 193 194 err = 0; 195 for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { 196 struct i915_request *prev = rq; 197 198 rq = i915_request_create(arg->ce[n]); 199 if (IS_ERR(rq)) { 200 i915_request_put(prev); 201 return PTR_ERR(rq); 202 } 203 204 i915_request_get(rq); 205 if (prev) { 206 err = i915_request_await_dma_fence(rq, &prev->fence); 207 i915_request_put(prev); 208 } 209 210 i915_request_add(rq); 211 } 212 if (i915_request_wait(rq, 0, HZ / 5) < 0) 213 err = -ETIME; 214 i915_request_put(rq); 215 if (err) 216 return err; 217 218 count++; 219 } while (!__igt_timeout(end_time, NULL)); 220 221 pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); 222 return 0; 223 } 224 225 static int __live_parallel_switchN(void *data) 226 { 227 struct parallel_switch *arg = data; 228 struct i915_request *rq = NULL; 229 IGT_TIMEOUT(end_time); 230 unsigned long count; 231 int n; 232 233 count = 0; 234 do { 235 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { 236 struct i915_request *prev = rq; 237 int err = 0; 238 239 rq = i915_request_create(arg->ce[n]); 240 if (IS_ERR(rq)) { 241 i915_request_put(prev); 242 return PTR_ERR(rq); 243 } 244 245 i915_request_get(rq); 246 if (prev) { 247 err = i915_request_await_dma_fence(rq, &prev->fence); 248 i915_request_put(prev); 249 } 250 251 i915_request_add(rq); 252 if (err) { 253 i915_request_put(rq); 254 return err; 255 } 256 } 257 258 count++; 259 } while (!__igt_timeout(end_time, NULL)); 260 i915_request_put(rq); 261 262 pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); 263 return 0; 264 } 265 266 static int live_parallel_switch(void *arg) 267 { 268 struct drm_i915_private *i915 = arg; 269 static int (* const func[])(void *arg) = { 270 __live_parallel_switch1, 271 __live_parallel_switchN, 272 NULL, 273 }; 274 struct parallel_switch *data = NULL; 275 struct i915_gem_engines *engines; 276 struct i915_gem_engines_iter it; 277 int (* const *fn)(void *arg); 278 struct i915_gem_context *ctx; 279 struct intel_context *ce; 280 struct file *file; 281 int n, m, count; 282 int err = 0; 283 284 /* 285 * Check we can process switches on all engines simultaneously. 286 */ 287 288 if (!DRIVER_CAPS(i915)->has_logical_contexts) 289 return 0; 290 291 file = mock_file(i915); 292 if (IS_ERR(file)) 293 return PTR_ERR(file); 294 295 ctx = live_context(i915, file); 296 if (IS_ERR(ctx)) { 297 err = PTR_ERR(ctx); 298 goto out_file; 299 } 300 301 engines = i915_gem_context_lock_engines(ctx); 302 count = engines->num_engines; 303 304 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 305 if (!data) { 306 i915_gem_context_unlock_engines(ctx); 307 err = -ENOMEM; 308 goto out_file; 309 } 310 311 m = 0; /* Use the first context as our template for the engines */ 312 for_each_gem_engine(ce, engines, it) { 313 err = intel_context_pin(ce); 314 if (err) { 315 i915_gem_context_unlock_engines(ctx); 316 goto out; 317 } 318 data[m++].ce[0] = intel_context_get(ce); 319 } 320 i915_gem_context_unlock_engines(ctx); 321 322 /* Clone the same set of engines into the other contexts */ 323 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 324 ctx = live_context(i915, file); 325 if (IS_ERR(ctx)) { 326 err = PTR_ERR(ctx); 327 goto out; 328 } 329 330 for (m = 0; m < count; m++) { 331 if (!data[m].ce[0]) 332 continue; 333 334 ce = intel_context_create(data[m].ce[0]->engine); 335 if (IS_ERR(ce)) 336 goto out; 337 338 err = intel_context_pin(ce); 339 if (err) { 340 intel_context_put(ce); 341 goto out; 342 } 343 344 data[m].ce[n] = ce; 345 } 346 } 347 348 for (fn = func; !err && *fn; fn++) { 349 struct igt_live_test t; 350 int n; 351 352 err = igt_live_test_begin(&t, i915, __func__, ""); 353 if (err) 354 break; 355 356 for (n = 0; n < count; n++) { 357 if (!data[n].ce[0]) 358 continue; 359 360 data[n].tsk = kthread_run(*fn, &data[n], 361 "igt/parallel:%s", 362 data[n].ce[0]->engine->name); 363 if (IS_ERR(data[n].tsk)) { 364 err = PTR_ERR(data[n].tsk); 365 break; 366 } 367 get_task_struct(data[n].tsk); 368 } 369 370 yield(); /* start all threads before we kthread_stop() */ 371 372 for (n = 0; n < count; n++) { 373 int status; 374 375 if (IS_ERR_OR_NULL(data[n].tsk)) 376 continue; 377 378 status = kthread_stop(data[n].tsk); 379 if (status && !err) 380 err = status; 381 382 put_task_struct(data[n].tsk); 383 data[n].tsk = NULL; 384 } 385 386 if (igt_live_test_end(&t)) 387 err = -EIO; 388 } 389 390 out: 391 for (n = 0; n < count; n++) { 392 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 393 if (!data[n].ce[m]) 394 continue; 395 396 intel_context_unpin(data[n].ce[m]); 397 intel_context_put(data[n].ce[m]); 398 } 399 } 400 kfree(data); 401 out_file: 402 fput(file); 403 return err; 404 } 405 406 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 407 { 408 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 409 } 410 411 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 412 { 413 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 414 } 415 416 static int gpu_fill(struct intel_context *ce, 417 struct drm_i915_gem_object *obj, 418 unsigned int dw) 419 { 420 struct i915_vma *vma; 421 int err; 422 423 GEM_BUG_ON(obj->base.size > ce->vm->total); 424 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 425 426 vma = i915_vma_instance(obj, ce->vm, NULL); 427 if (IS_ERR(vma)) 428 return PTR_ERR(vma); 429 430 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 431 if (err) 432 return err; 433 434 /* 435 * Within the GTT the huge objects maps every page onto 436 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 437 * We set the nth dword within the page using the nth 438 * mapping via the GTT - this should exercise the GTT mapping 439 * whilst checking that each context provides a unique view 440 * into the object. 441 */ 442 err = igt_gpu_fill_dw(ce, vma, 443 (dw * real_page_count(obj)) << PAGE_SHIFT | 444 (dw * sizeof(u32)), 445 real_page_count(obj), 446 dw); 447 i915_vma_unpin(vma); 448 449 return err; 450 } 451 452 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 453 { 454 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 455 unsigned int n, m, need_flush; 456 int err; 457 458 i915_gem_object_lock(obj, NULL); 459 err = i915_gem_object_prepare_write(obj, &need_flush); 460 if (err) 461 goto out; 462 463 for (n = 0; n < real_page_count(obj); n++) { 464 u32 *map; 465 466 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 467 for (m = 0; m < DW_PER_PAGE; m++) 468 map[m] = value; 469 if (!has_llc) 470 drm_clflush_virt_range(map, PAGE_SIZE); 471 kunmap_atomic(map); 472 } 473 474 i915_gem_object_finish_access(obj); 475 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 476 obj->write_domain = 0; 477 out: 478 i915_gem_object_unlock(obj); 479 return err; 480 } 481 482 static noinline int cpu_check(struct drm_i915_gem_object *obj, 483 unsigned int idx, unsigned int max) 484 { 485 unsigned int n, m, needs_flush; 486 int err; 487 488 i915_gem_object_lock(obj, NULL); 489 err = i915_gem_object_prepare_read(obj, &needs_flush); 490 if (err) 491 goto out_unlock; 492 493 for (n = 0; n < real_page_count(obj); n++) { 494 u32 *map; 495 496 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 497 if (needs_flush & CLFLUSH_BEFORE) 498 drm_clflush_virt_range(map, PAGE_SIZE); 499 500 for (m = 0; m < max; m++) { 501 if (map[m] != m) { 502 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 503 __builtin_return_address(0), idx, 504 n, real_page_count(obj), m, max, 505 map[m], m); 506 err = -EINVAL; 507 goto out_unmap; 508 } 509 } 510 511 for (; m < DW_PER_PAGE; m++) { 512 if (map[m] != STACK_MAGIC) { 513 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 514 __builtin_return_address(0), idx, n, m, 515 map[m], STACK_MAGIC); 516 err = -EINVAL; 517 goto out_unmap; 518 } 519 } 520 521 out_unmap: 522 kunmap_atomic(map); 523 if (err) 524 break; 525 } 526 527 i915_gem_object_finish_access(obj); 528 out_unlock: 529 i915_gem_object_unlock(obj); 530 return err; 531 } 532 533 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 534 { 535 int err; 536 537 GEM_BUG_ON(obj->base.handle_count); 538 539 /* tie the object to the drm_file for easy reaping */ 540 err = idr_alloc(&to_drm_file(file)->object_idr, 541 &obj->base, 1, 0, GFP_KERNEL); 542 if (err < 0) 543 return err; 544 545 i915_gem_object_get(obj); 546 obj->base.handle_count++; 547 return 0; 548 } 549 550 static struct drm_i915_gem_object * 551 create_test_object(struct i915_address_space *vm, 552 struct file *file, 553 struct list_head *objects) 554 { 555 struct drm_i915_gem_object *obj; 556 u64 size; 557 int err; 558 559 /* Keep in GEM's good graces */ 560 intel_gt_retire_requests(vm->gt); 561 562 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 563 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 564 565 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 566 if (IS_ERR(obj)) 567 return obj; 568 569 err = file_add_object(file, obj); 570 i915_gem_object_put(obj); 571 if (err) 572 return ERR_PTR(err); 573 574 err = cpu_fill(obj, STACK_MAGIC); 575 if (err) { 576 pr_err("Failed to fill object with cpu, err=%d\n", 577 err); 578 return ERR_PTR(err); 579 } 580 581 list_add_tail(&obj->st_link, objects); 582 return obj; 583 } 584 585 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 586 { 587 unsigned long npages = fake_page_count(obj); 588 589 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 590 return npages / DW_PER_PAGE; 591 } 592 593 static void throttle_release(struct i915_request **q, int count) 594 { 595 int i; 596 597 for (i = 0; i < count; i++) { 598 if (IS_ERR_OR_NULL(q[i])) 599 continue; 600 601 i915_request_put(fetch_and_zero(&q[i])); 602 } 603 } 604 605 static int throttle(struct intel_context *ce, 606 struct i915_request **q, int count) 607 { 608 int i; 609 610 if (!IS_ERR_OR_NULL(q[0])) { 611 if (i915_request_wait(q[0], 612 I915_WAIT_INTERRUPTIBLE, 613 MAX_SCHEDULE_TIMEOUT) < 0) 614 return -EINTR; 615 616 i915_request_put(q[0]); 617 } 618 619 for (i = 0; i < count - 1; i++) 620 q[i] = q[i + 1]; 621 622 q[i] = intel_context_create_request(ce); 623 if (IS_ERR(q[i])) 624 return PTR_ERR(q[i]); 625 626 i915_request_get(q[i]); 627 i915_request_add(q[i]); 628 629 return 0; 630 } 631 632 static int igt_ctx_exec(void *arg) 633 { 634 struct drm_i915_private *i915 = arg; 635 struct intel_engine_cs *engine; 636 int err = -ENODEV; 637 638 /* 639 * Create a few different contexts (with different mm) and write 640 * through each ctx/mm using the GPU making sure those writes end 641 * up in the expected pages of our obj. 642 */ 643 644 if (!DRIVER_CAPS(i915)->has_logical_contexts) 645 return 0; 646 647 for_each_uabi_engine(engine, i915) { 648 struct drm_i915_gem_object *obj = NULL; 649 unsigned long ncontexts, ndwords, dw; 650 struct i915_request *tq[5] = {}; 651 struct igt_live_test t; 652 IGT_TIMEOUT(end_time); 653 LIST_HEAD(objects); 654 struct file *file; 655 656 if (!intel_engine_can_store_dword(engine)) 657 continue; 658 659 if (!engine->context_size) 660 continue; /* No logical context support in HW */ 661 662 file = mock_file(i915); 663 if (IS_ERR(file)) 664 return PTR_ERR(file); 665 666 err = igt_live_test_begin(&t, i915, __func__, engine->name); 667 if (err) 668 goto out_file; 669 670 ncontexts = 0; 671 ndwords = 0; 672 dw = 0; 673 while (!time_after(jiffies, end_time)) { 674 struct i915_gem_context *ctx; 675 struct intel_context *ce; 676 677 ctx = kernel_context(i915, NULL); 678 if (IS_ERR(ctx)) { 679 err = PTR_ERR(ctx); 680 goto out_file; 681 } 682 683 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 684 GEM_BUG_ON(IS_ERR(ce)); 685 686 if (!obj) { 687 obj = create_test_object(ce->vm, file, &objects); 688 if (IS_ERR(obj)) { 689 err = PTR_ERR(obj); 690 intel_context_put(ce); 691 kernel_context_close(ctx); 692 goto out_file; 693 } 694 } 695 696 err = gpu_fill(ce, obj, dw); 697 if (err) { 698 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 699 ndwords, dw, max_dwords(obj), 700 engine->name, 701 yesno(i915_gem_context_has_full_ppgtt(ctx)), 702 err); 703 intel_context_put(ce); 704 kernel_context_close(ctx); 705 goto out_file; 706 } 707 708 err = throttle(ce, tq, ARRAY_SIZE(tq)); 709 if (err) { 710 intel_context_put(ce); 711 kernel_context_close(ctx); 712 goto out_file; 713 } 714 715 if (++dw == max_dwords(obj)) { 716 obj = NULL; 717 dw = 0; 718 } 719 720 ndwords++; 721 ncontexts++; 722 723 intel_context_put(ce); 724 kernel_context_close(ctx); 725 } 726 727 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 728 ncontexts, engine->name, ndwords); 729 730 ncontexts = dw = 0; 731 list_for_each_entry(obj, &objects, st_link) { 732 unsigned int rem = 733 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 734 735 err = cpu_check(obj, ncontexts++, rem); 736 if (err) 737 break; 738 739 dw += rem; 740 } 741 742 out_file: 743 throttle_release(tq, ARRAY_SIZE(tq)); 744 if (igt_live_test_end(&t)) 745 err = -EIO; 746 747 fput(file); 748 if (err) 749 return err; 750 751 i915_gem_drain_freed_objects(i915); 752 } 753 754 return 0; 755 } 756 757 static int igt_shared_ctx_exec(void *arg) 758 { 759 struct drm_i915_private *i915 = arg; 760 struct i915_request *tq[5] = {}; 761 struct i915_gem_context *parent; 762 struct intel_engine_cs *engine; 763 struct igt_live_test t; 764 struct file *file; 765 int err = 0; 766 767 /* 768 * Create a few different contexts with the same mm and write 769 * through each ctx using the GPU making sure those writes end 770 * up in the expected pages of our obj. 771 */ 772 if (!DRIVER_CAPS(i915)->has_logical_contexts) 773 return 0; 774 775 file = mock_file(i915); 776 if (IS_ERR(file)) 777 return PTR_ERR(file); 778 779 parent = live_context(i915, file); 780 if (IS_ERR(parent)) { 781 err = PTR_ERR(parent); 782 goto out_file; 783 } 784 785 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 786 err = 0; 787 goto out_file; 788 } 789 790 err = igt_live_test_begin(&t, i915, __func__, ""); 791 if (err) 792 goto out_file; 793 794 for_each_uabi_engine(engine, i915) { 795 unsigned long ncontexts, ndwords, dw; 796 struct drm_i915_gem_object *obj = NULL; 797 IGT_TIMEOUT(end_time); 798 LIST_HEAD(objects); 799 800 if (!intel_engine_can_store_dword(engine)) 801 continue; 802 803 dw = 0; 804 ndwords = 0; 805 ncontexts = 0; 806 while (!time_after(jiffies, end_time)) { 807 struct i915_gem_context *ctx; 808 struct intel_context *ce; 809 810 ctx = kernel_context(i915, parent->vm); 811 if (IS_ERR(ctx)) { 812 err = PTR_ERR(ctx); 813 goto out_test; 814 } 815 816 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 817 GEM_BUG_ON(IS_ERR(ce)); 818 819 if (!obj) { 820 obj = create_test_object(parent->vm, 821 file, &objects); 822 if (IS_ERR(obj)) { 823 err = PTR_ERR(obj); 824 intel_context_put(ce); 825 kernel_context_close(ctx); 826 goto out_test; 827 } 828 } 829 830 err = gpu_fill(ce, obj, dw); 831 if (err) { 832 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 833 ndwords, dw, max_dwords(obj), 834 engine->name, 835 yesno(i915_gem_context_has_full_ppgtt(ctx)), 836 err); 837 intel_context_put(ce); 838 kernel_context_close(ctx); 839 goto out_test; 840 } 841 842 err = throttle(ce, tq, ARRAY_SIZE(tq)); 843 if (err) { 844 intel_context_put(ce); 845 kernel_context_close(ctx); 846 goto out_test; 847 } 848 849 if (++dw == max_dwords(obj)) { 850 obj = NULL; 851 dw = 0; 852 } 853 854 ndwords++; 855 ncontexts++; 856 857 intel_context_put(ce); 858 kernel_context_close(ctx); 859 } 860 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 861 ncontexts, engine->name, ndwords); 862 863 ncontexts = dw = 0; 864 list_for_each_entry(obj, &objects, st_link) { 865 unsigned int rem = 866 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 867 868 err = cpu_check(obj, ncontexts++, rem); 869 if (err) 870 goto out_test; 871 872 dw += rem; 873 } 874 875 i915_gem_drain_freed_objects(i915); 876 } 877 out_test: 878 throttle_release(tq, ARRAY_SIZE(tq)); 879 if (igt_live_test_end(&t)) 880 err = -EIO; 881 out_file: 882 fput(file); 883 return err; 884 } 885 886 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma) 887 { 888 u32 *cmd; 889 890 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 891 892 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 893 if (IS_ERR(cmd)) 894 return PTR_ERR(cmd); 895 896 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 897 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE); 898 *cmd++ = lower_32_bits(vma->node.start); 899 *cmd++ = upper_32_bits(vma->node.start); 900 *cmd = MI_BATCH_BUFFER_END; 901 902 __i915_gem_object_flush_map(rpcs, 0, 64); 903 i915_gem_object_unpin_map(rpcs); 904 905 intel_gt_chipset_flush(vma->vm->gt); 906 907 return 0; 908 } 909 910 static int 911 emit_rpcs_query(struct drm_i915_gem_object *obj, 912 struct intel_context *ce, 913 struct i915_request **rq_out) 914 { 915 struct drm_i915_private *i915 = to_i915(obj->base.dev); 916 struct i915_request *rq; 917 struct i915_gem_ww_ctx ww; 918 struct i915_vma *batch; 919 struct i915_vma *vma; 920 struct drm_i915_gem_object *rpcs; 921 int err; 922 923 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 924 925 if (GRAPHICS_VER(i915) < 8) 926 return -EINVAL; 927 928 vma = i915_vma_instance(obj, ce->vm, NULL); 929 if (IS_ERR(vma)) 930 return PTR_ERR(vma); 931 932 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 933 if (IS_ERR(rpcs)) 934 return PTR_ERR(rpcs); 935 936 batch = i915_vma_instance(rpcs, ce->vm, NULL); 937 if (IS_ERR(batch)) { 938 err = PTR_ERR(batch); 939 goto err_put; 940 } 941 942 i915_gem_ww_ctx_init(&ww, false); 943 retry: 944 err = i915_gem_object_lock(obj, &ww); 945 if (!err) 946 err = i915_gem_object_lock(rpcs, &ww); 947 if (!err) 948 err = i915_gem_object_set_to_gtt_domain(obj, false); 949 if (!err) 950 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 951 if (err) 952 goto err_put; 953 954 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 955 if (err) 956 goto err_vma; 957 958 err = rpcs_query_batch(rpcs, vma); 959 if (err) 960 goto err_batch; 961 962 rq = i915_request_create(ce); 963 if (IS_ERR(rq)) { 964 err = PTR_ERR(rq); 965 goto err_batch; 966 } 967 968 err = i915_request_await_object(rq, batch->obj, false); 969 if (err == 0) 970 err = i915_vma_move_to_active(batch, rq, 0); 971 if (err) 972 goto skip_request; 973 974 err = i915_request_await_object(rq, vma->obj, true); 975 if (err == 0) 976 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 977 if (err) 978 goto skip_request; 979 980 if (rq->engine->emit_init_breadcrumb) { 981 err = rq->engine->emit_init_breadcrumb(rq); 982 if (err) 983 goto skip_request; 984 } 985 986 err = rq->engine->emit_bb_start(rq, 987 batch->node.start, batch->node.size, 988 0); 989 if (err) 990 goto skip_request; 991 992 *rq_out = i915_request_get(rq); 993 994 skip_request: 995 if (err) 996 i915_request_set_error_once(rq, err); 997 i915_request_add(rq); 998 err_batch: 999 i915_vma_unpin(batch); 1000 err_vma: 1001 i915_vma_unpin(vma); 1002 err_put: 1003 if (err == -EDEADLK) { 1004 err = i915_gem_ww_ctx_backoff(&ww); 1005 if (!err) 1006 goto retry; 1007 } 1008 i915_gem_ww_ctx_fini(&ww); 1009 i915_gem_object_put(rpcs); 1010 return err; 1011 } 1012 1013 #define TEST_IDLE BIT(0) 1014 #define TEST_BUSY BIT(1) 1015 #define TEST_RESET BIT(2) 1016 1017 static int 1018 __sseu_prepare(const char *name, 1019 unsigned int flags, 1020 struct intel_context *ce, 1021 struct igt_spinner **spin) 1022 { 1023 struct i915_request *rq; 1024 int ret; 1025 1026 *spin = NULL; 1027 if (!(flags & (TEST_BUSY | TEST_RESET))) 1028 return 0; 1029 1030 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1031 if (!*spin) 1032 return -ENOMEM; 1033 1034 ret = igt_spinner_init(*spin, ce->engine->gt); 1035 if (ret) 1036 goto err_free; 1037 1038 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1039 if (IS_ERR(rq)) { 1040 ret = PTR_ERR(rq); 1041 goto err_fini; 1042 } 1043 1044 i915_request_add(rq); 1045 1046 if (!igt_wait_for_spinner(*spin, rq)) { 1047 pr_err("%s: Spinner failed to start!\n", name); 1048 ret = -ETIMEDOUT; 1049 goto err_end; 1050 } 1051 1052 return 0; 1053 1054 err_end: 1055 igt_spinner_end(*spin); 1056 err_fini: 1057 igt_spinner_fini(*spin); 1058 err_free: 1059 kfree(fetch_and_zero(spin)); 1060 return ret; 1061 } 1062 1063 static int 1064 __read_slice_count(struct intel_context *ce, 1065 struct drm_i915_gem_object *obj, 1066 struct igt_spinner *spin, 1067 u32 *rpcs) 1068 { 1069 struct i915_request *rq = NULL; 1070 u32 s_mask, s_shift; 1071 unsigned int cnt; 1072 u32 *buf, val; 1073 long ret; 1074 1075 ret = emit_rpcs_query(obj, ce, &rq); 1076 if (ret) 1077 return ret; 1078 1079 if (spin) 1080 igt_spinner_end(spin); 1081 1082 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1083 i915_request_put(rq); 1084 if (ret < 0) 1085 return ret; 1086 1087 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1088 if (IS_ERR(buf)) { 1089 ret = PTR_ERR(buf); 1090 return ret; 1091 } 1092 1093 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1094 s_mask = GEN11_RPCS_S_CNT_MASK; 1095 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1096 } else { 1097 s_mask = GEN8_RPCS_S_CNT_MASK; 1098 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1099 } 1100 1101 val = *buf; 1102 cnt = (val & s_mask) >> s_shift; 1103 *rpcs = val; 1104 1105 i915_gem_object_unpin_map(obj); 1106 1107 return cnt; 1108 } 1109 1110 static int 1111 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1112 const char *prefix, const char *suffix) 1113 { 1114 if (slices == expected) 1115 return 0; 1116 1117 if (slices < 0) { 1118 pr_err("%s: %s read slice count failed with %d%s\n", 1119 name, prefix, slices, suffix); 1120 return slices; 1121 } 1122 1123 pr_err("%s: %s slice count %d is not %u%s\n", 1124 name, prefix, slices, expected, suffix); 1125 1126 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1127 rpcs, slices, 1128 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1129 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1130 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1131 1132 return -EINVAL; 1133 } 1134 1135 static int 1136 __sseu_finish(const char *name, 1137 unsigned int flags, 1138 struct intel_context *ce, 1139 struct drm_i915_gem_object *obj, 1140 unsigned int expected, 1141 struct igt_spinner *spin) 1142 { 1143 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1144 u32 rpcs = 0; 1145 int ret = 0; 1146 1147 if (flags & TEST_RESET) { 1148 ret = intel_engine_reset(ce->engine, "sseu"); 1149 if (ret) 1150 goto out; 1151 } 1152 1153 ret = __read_slice_count(ce, obj, 1154 flags & TEST_RESET ? NULL : spin, &rpcs); 1155 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1156 if (ret) 1157 goto out; 1158 1159 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1160 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1161 1162 out: 1163 if (spin) 1164 igt_spinner_end(spin); 1165 1166 if ((flags & TEST_IDLE) && ret == 0) { 1167 ret = igt_flush_test(ce->engine->i915); 1168 if (ret) 1169 return ret; 1170 1171 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1172 ret = __check_rpcs(name, rpcs, ret, expected, 1173 "Context", " after idle!"); 1174 } 1175 1176 return ret; 1177 } 1178 1179 static int 1180 __sseu_test(const char *name, 1181 unsigned int flags, 1182 struct intel_context *ce, 1183 struct drm_i915_gem_object *obj, 1184 struct intel_sseu sseu) 1185 { 1186 struct igt_spinner *spin = NULL; 1187 int ret; 1188 1189 intel_engine_pm_get(ce->engine); 1190 1191 ret = __sseu_prepare(name, flags, ce, &spin); 1192 if (ret) 1193 goto out_pm; 1194 1195 ret = intel_context_reconfigure_sseu(ce, sseu); 1196 if (ret) 1197 goto out_spin; 1198 1199 ret = __sseu_finish(name, flags, ce, obj, 1200 hweight32(sseu.slice_mask), spin); 1201 1202 out_spin: 1203 if (spin) { 1204 igt_spinner_end(spin); 1205 igt_spinner_fini(spin); 1206 kfree(spin); 1207 } 1208 out_pm: 1209 intel_engine_pm_put(ce->engine); 1210 return ret; 1211 } 1212 1213 static int 1214 __igt_ctx_sseu(struct drm_i915_private *i915, 1215 const char *name, 1216 unsigned int flags) 1217 { 1218 struct drm_i915_gem_object *obj; 1219 int inst = 0; 1220 int ret = 0; 1221 1222 if (GRAPHICS_VER(i915) < 9) 1223 return 0; 1224 1225 if (flags & TEST_RESET) 1226 igt_global_reset_lock(&i915->gt); 1227 1228 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1229 if (IS_ERR(obj)) { 1230 ret = PTR_ERR(obj); 1231 goto out_unlock; 1232 } 1233 1234 do { 1235 struct intel_engine_cs *engine; 1236 struct intel_context *ce; 1237 struct intel_sseu pg_sseu; 1238 1239 engine = intel_engine_lookup_user(i915, 1240 I915_ENGINE_CLASS_RENDER, 1241 inst++); 1242 if (!engine) 1243 break; 1244 1245 if (hweight32(engine->sseu.slice_mask) < 2) 1246 continue; 1247 1248 if (!engine->gt->info.sseu.has_slice_pg) 1249 continue; 1250 1251 /* 1252 * Gen11 VME friendly power-gated configuration with 1253 * half enabled sub-slices. 1254 */ 1255 pg_sseu = engine->sseu; 1256 pg_sseu.slice_mask = 1; 1257 pg_sseu.subslice_mask = 1258 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1259 1260 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1261 engine->name, name, flags, 1262 hweight32(engine->sseu.slice_mask), 1263 hweight32(pg_sseu.slice_mask)); 1264 1265 ce = intel_context_create(engine); 1266 if (IS_ERR(ce)) { 1267 ret = PTR_ERR(ce); 1268 goto out_put; 1269 } 1270 1271 ret = intel_context_pin(ce); 1272 if (ret) 1273 goto out_ce; 1274 1275 /* First set the default mask. */ 1276 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1277 if (ret) 1278 goto out_unpin; 1279 1280 /* Then set a power-gated configuration. */ 1281 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1282 if (ret) 1283 goto out_unpin; 1284 1285 /* Back to defaults. */ 1286 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1287 if (ret) 1288 goto out_unpin; 1289 1290 /* One last power-gated configuration for the road. */ 1291 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1292 if (ret) 1293 goto out_unpin; 1294 1295 out_unpin: 1296 intel_context_unpin(ce); 1297 out_ce: 1298 intel_context_put(ce); 1299 } while (!ret); 1300 1301 if (igt_flush_test(i915)) 1302 ret = -EIO; 1303 1304 out_put: 1305 i915_gem_object_put(obj); 1306 1307 out_unlock: 1308 if (flags & TEST_RESET) 1309 igt_global_reset_unlock(&i915->gt); 1310 1311 if (ret) 1312 pr_err("%s: Failed with %d!\n", name, ret); 1313 1314 return ret; 1315 } 1316 1317 static int igt_ctx_sseu(void *arg) 1318 { 1319 struct { 1320 const char *name; 1321 unsigned int flags; 1322 } *phase, phases[] = { 1323 { .name = "basic", .flags = 0 }, 1324 { .name = "idle", .flags = TEST_IDLE }, 1325 { .name = "busy", .flags = TEST_BUSY }, 1326 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1327 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1328 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1329 }; 1330 unsigned int i; 1331 int ret = 0; 1332 1333 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1334 i++, phase++) 1335 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1336 1337 return ret; 1338 } 1339 1340 static int igt_ctx_readonly(void *arg) 1341 { 1342 struct drm_i915_private *i915 = arg; 1343 unsigned long idx, ndwords, dw, num_engines; 1344 struct drm_i915_gem_object *obj = NULL; 1345 struct i915_request *tq[5] = {}; 1346 struct i915_gem_engines_iter it; 1347 struct i915_address_space *vm; 1348 struct i915_gem_context *ctx; 1349 struct intel_context *ce; 1350 struct igt_live_test t; 1351 I915_RND_STATE(prng); 1352 IGT_TIMEOUT(end_time); 1353 LIST_HEAD(objects); 1354 struct file *file; 1355 int err = -ENODEV; 1356 1357 /* 1358 * Create a few read-only objects (with the occasional writable object) 1359 * and try to write into these object checking that the GPU discards 1360 * any write to a read-only object. 1361 */ 1362 1363 file = mock_file(i915); 1364 if (IS_ERR(file)) 1365 return PTR_ERR(file); 1366 1367 err = igt_live_test_begin(&t, i915, __func__, ""); 1368 if (err) 1369 goto out_file; 1370 1371 ctx = live_context(i915, file); 1372 if (IS_ERR(ctx)) { 1373 err = PTR_ERR(ctx); 1374 goto out_file; 1375 } 1376 1377 vm = ctx->vm ?: &i915->ggtt.alias->vm; 1378 if (!vm || !vm->has_read_only) { 1379 err = 0; 1380 goto out_file; 1381 } 1382 1383 num_engines = 0; 1384 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1385 if (intel_engine_can_store_dword(ce->engine)) 1386 num_engines++; 1387 i915_gem_context_unlock_engines(ctx); 1388 1389 ndwords = 0; 1390 dw = 0; 1391 while (!time_after(jiffies, end_time)) { 1392 for_each_gem_engine(ce, 1393 i915_gem_context_lock_engines(ctx), it) { 1394 if (!intel_engine_can_store_dword(ce->engine)) 1395 continue; 1396 1397 if (!obj) { 1398 obj = create_test_object(ce->vm, file, &objects); 1399 if (IS_ERR(obj)) { 1400 err = PTR_ERR(obj); 1401 i915_gem_context_unlock_engines(ctx); 1402 goto out_file; 1403 } 1404 1405 if (prandom_u32_state(&prng) & 1) 1406 i915_gem_object_set_readonly(obj); 1407 } 1408 1409 err = gpu_fill(ce, obj, dw); 1410 if (err) { 1411 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1412 ndwords, dw, max_dwords(obj), 1413 ce->engine->name, 1414 yesno(i915_gem_context_has_full_ppgtt(ctx)), 1415 err); 1416 i915_gem_context_unlock_engines(ctx); 1417 goto out_file; 1418 } 1419 1420 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1421 if (err) { 1422 i915_gem_context_unlock_engines(ctx); 1423 goto out_file; 1424 } 1425 1426 if (++dw == max_dwords(obj)) { 1427 obj = NULL; 1428 dw = 0; 1429 } 1430 ndwords++; 1431 } 1432 i915_gem_context_unlock_engines(ctx); 1433 } 1434 pr_info("Submitted %lu dwords (across %lu engines)\n", 1435 ndwords, num_engines); 1436 1437 dw = 0; 1438 idx = 0; 1439 list_for_each_entry(obj, &objects, st_link) { 1440 unsigned int rem = 1441 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1442 unsigned int num_writes; 1443 1444 num_writes = rem; 1445 if (i915_gem_object_is_readonly(obj)) 1446 num_writes = 0; 1447 1448 err = cpu_check(obj, idx++, num_writes); 1449 if (err) 1450 break; 1451 1452 dw += rem; 1453 } 1454 1455 out_file: 1456 throttle_release(tq, ARRAY_SIZE(tq)); 1457 if (igt_live_test_end(&t)) 1458 err = -EIO; 1459 1460 fput(file); 1461 return err; 1462 } 1463 1464 static int check_scratch(struct i915_address_space *vm, u64 offset) 1465 { 1466 struct drm_mm_node *node; 1467 1468 mutex_lock(&vm->mutex); 1469 node = __drm_mm_interval_first(&vm->mm, 1470 offset, offset + sizeof(u32) - 1); 1471 mutex_unlock(&vm->mutex); 1472 if (!node || node->start > offset) 1473 return 0; 1474 1475 GEM_BUG_ON(offset >= node->start + node->size); 1476 1477 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1478 upper_32_bits(offset), lower_32_bits(offset)); 1479 return -EINVAL; 1480 } 1481 1482 static int write_to_scratch(struct i915_gem_context *ctx, 1483 struct intel_engine_cs *engine, 1484 u64 offset, u32 value) 1485 { 1486 struct drm_i915_private *i915 = ctx->i915; 1487 struct drm_i915_gem_object *obj; 1488 struct i915_address_space *vm; 1489 struct i915_request *rq; 1490 struct i915_vma *vma; 1491 u32 *cmd; 1492 int err; 1493 1494 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1495 1496 err = check_scratch(ctx->vm, offset); 1497 if (err) 1498 return err; 1499 1500 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1501 if (IS_ERR(obj)) 1502 return PTR_ERR(obj); 1503 1504 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1505 if (IS_ERR(cmd)) { 1506 err = PTR_ERR(cmd); 1507 goto out; 1508 } 1509 1510 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1511 if (GRAPHICS_VER(i915) >= 8) { 1512 *cmd++ = lower_32_bits(offset); 1513 *cmd++ = upper_32_bits(offset); 1514 } else { 1515 *cmd++ = 0; 1516 *cmd++ = offset; 1517 } 1518 *cmd++ = value; 1519 *cmd = MI_BATCH_BUFFER_END; 1520 __i915_gem_object_flush_map(obj, 0, 64); 1521 i915_gem_object_unpin_map(obj); 1522 1523 intel_gt_chipset_flush(engine->gt); 1524 1525 vm = i915_gem_context_get_eb_vm(ctx); 1526 vma = i915_vma_instance(obj, vm, NULL); 1527 if (IS_ERR(vma)) { 1528 err = PTR_ERR(vma); 1529 goto out_vm; 1530 } 1531 1532 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1533 if (err) 1534 goto out_vm; 1535 1536 rq = igt_request_alloc(ctx, engine); 1537 if (IS_ERR(rq)) { 1538 err = PTR_ERR(rq); 1539 goto err_unpin; 1540 } 1541 1542 i915_vma_lock(vma); 1543 err = i915_request_await_object(rq, vma->obj, false); 1544 if (err == 0) 1545 err = i915_vma_move_to_active(vma, rq, 0); 1546 i915_vma_unlock(vma); 1547 if (err) 1548 goto skip_request; 1549 1550 if (rq->engine->emit_init_breadcrumb) { 1551 err = rq->engine->emit_init_breadcrumb(rq); 1552 if (err) 1553 goto skip_request; 1554 } 1555 1556 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1557 if (err) 1558 goto skip_request; 1559 1560 i915_vma_unpin(vma); 1561 1562 i915_request_add(rq); 1563 1564 goto out_vm; 1565 skip_request: 1566 i915_request_set_error_once(rq, err); 1567 i915_request_add(rq); 1568 err_unpin: 1569 i915_vma_unpin(vma); 1570 out_vm: 1571 i915_vm_put(vm); 1572 out: 1573 i915_gem_object_put(obj); 1574 return err; 1575 } 1576 1577 static int read_from_scratch(struct i915_gem_context *ctx, 1578 struct intel_engine_cs *engine, 1579 u64 offset, u32 *value) 1580 { 1581 struct drm_i915_private *i915 = ctx->i915; 1582 struct drm_i915_gem_object *obj; 1583 struct i915_address_space *vm; 1584 const u32 result = 0x100; 1585 struct i915_request *rq; 1586 struct i915_vma *vma; 1587 unsigned int flags; 1588 u32 *cmd; 1589 int err; 1590 1591 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1592 1593 err = check_scratch(ctx->vm, offset); 1594 if (err) 1595 return err; 1596 1597 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1598 if (IS_ERR(obj)) 1599 return PTR_ERR(obj); 1600 1601 if (GRAPHICS_VER(i915) >= 8) { 1602 const u32 GPR0 = engine->mmio_base + 0x600; 1603 1604 vm = i915_gem_context_get_eb_vm(ctx); 1605 vma = i915_vma_instance(obj, vm, NULL); 1606 if (IS_ERR(vma)) { 1607 err = PTR_ERR(vma); 1608 goto out_vm; 1609 } 1610 1611 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1612 if (err) 1613 goto out_vm; 1614 1615 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1616 if (IS_ERR(cmd)) { 1617 err = PTR_ERR(cmd); 1618 goto out; 1619 } 1620 1621 memset(cmd, POISON_INUSE, PAGE_SIZE); 1622 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1623 *cmd++ = GPR0; 1624 *cmd++ = lower_32_bits(offset); 1625 *cmd++ = upper_32_bits(offset); 1626 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1627 *cmd++ = GPR0; 1628 *cmd++ = result; 1629 *cmd++ = 0; 1630 *cmd = MI_BATCH_BUFFER_END; 1631 1632 i915_gem_object_flush_map(obj); 1633 i915_gem_object_unpin_map(obj); 1634 1635 flags = 0; 1636 } else { 1637 const u32 reg = engine->mmio_base + 0x420; 1638 1639 /* hsw: register access even to 3DPRIM! is protected */ 1640 vm = i915_vm_get(&engine->gt->ggtt->vm); 1641 vma = i915_vma_instance(obj, vm, NULL); 1642 if (IS_ERR(vma)) { 1643 err = PTR_ERR(vma); 1644 goto out_vm; 1645 } 1646 1647 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1648 if (err) 1649 goto out_vm; 1650 1651 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1652 if (IS_ERR(cmd)) { 1653 err = PTR_ERR(cmd); 1654 goto out; 1655 } 1656 1657 memset(cmd, POISON_INUSE, PAGE_SIZE); 1658 *cmd++ = MI_LOAD_REGISTER_MEM; 1659 *cmd++ = reg; 1660 *cmd++ = offset; 1661 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1662 *cmd++ = reg; 1663 *cmd++ = vma->node.start + result; 1664 *cmd = MI_BATCH_BUFFER_END; 1665 1666 i915_gem_object_flush_map(obj); 1667 i915_gem_object_unpin_map(obj); 1668 1669 flags = I915_DISPATCH_SECURE; 1670 } 1671 1672 intel_gt_chipset_flush(engine->gt); 1673 1674 rq = igt_request_alloc(ctx, engine); 1675 if (IS_ERR(rq)) { 1676 err = PTR_ERR(rq); 1677 goto err_unpin; 1678 } 1679 1680 i915_vma_lock(vma); 1681 err = i915_request_await_object(rq, vma->obj, true); 1682 if (err == 0) 1683 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1684 i915_vma_unlock(vma); 1685 if (err) 1686 goto skip_request; 1687 1688 if (rq->engine->emit_init_breadcrumb) { 1689 err = rq->engine->emit_init_breadcrumb(rq); 1690 if (err) 1691 goto skip_request; 1692 } 1693 1694 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); 1695 if (err) 1696 goto skip_request; 1697 1698 i915_vma_unpin(vma); 1699 1700 i915_request_add(rq); 1701 1702 i915_gem_object_lock(obj, NULL); 1703 err = i915_gem_object_set_to_cpu_domain(obj, false); 1704 i915_gem_object_unlock(obj); 1705 if (err) 1706 goto out_vm; 1707 1708 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1709 if (IS_ERR(cmd)) { 1710 err = PTR_ERR(cmd); 1711 goto out_vm; 1712 } 1713 1714 *value = cmd[result / sizeof(*cmd)]; 1715 i915_gem_object_unpin_map(obj); 1716 1717 goto out_vm; 1718 skip_request: 1719 i915_request_set_error_once(rq, err); 1720 i915_request_add(rq); 1721 err_unpin: 1722 i915_vma_unpin(vma); 1723 out_vm: 1724 i915_vm_put(vm); 1725 out: 1726 i915_gem_object_put(obj); 1727 return err; 1728 } 1729 1730 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1731 { 1732 struct i915_address_space *vm; 1733 u32 *vaddr; 1734 int err = 0; 1735 1736 vm = ctx->vm; 1737 if (!vm) 1738 return -ENODEV; 1739 1740 if (!vm->scratch[0]) { 1741 pr_err("No scratch page!\n"); 1742 return -EINVAL; 1743 } 1744 1745 vaddr = __px_vaddr(vm->scratch[0]); 1746 1747 memcpy(out, vaddr, sizeof(*out)); 1748 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1749 pr_err("Inconsistent initial state of scratch page!\n"); 1750 err = -EINVAL; 1751 } 1752 1753 return err; 1754 } 1755 1756 static int igt_vm_isolation(void *arg) 1757 { 1758 struct drm_i915_private *i915 = arg; 1759 struct i915_gem_context *ctx_a, *ctx_b; 1760 unsigned long num_engines, count; 1761 struct intel_engine_cs *engine; 1762 struct igt_live_test t; 1763 I915_RND_STATE(prng); 1764 struct file *file; 1765 u64 vm_total; 1766 u32 expected; 1767 int err; 1768 1769 if (GRAPHICS_VER(i915) < 7) 1770 return 0; 1771 1772 /* 1773 * The simple goal here is that a write into one context is not 1774 * observed in a second (separate page tables and scratch). 1775 */ 1776 1777 file = mock_file(i915); 1778 if (IS_ERR(file)) 1779 return PTR_ERR(file); 1780 1781 err = igt_live_test_begin(&t, i915, __func__, ""); 1782 if (err) 1783 goto out_file; 1784 1785 ctx_a = live_context(i915, file); 1786 if (IS_ERR(ctx_a)) { 1787 err = PTR_ERR(ctx_a); 1788 goto out_file; 1789 } 1790 1791 ctx_b = live_context(i915, file); 1792 if (IS_ERR(ctx_b)) { 1793 err = PTR_ERR(ctx_b); 1794 goto out_file; 1795 } 1796 1797 /* We can only test vm isolation, if the vm are distinct */ 1798 if (ctx_a->vm == ctx_b->vm) 1799 goto out_file; 1800 1801 /* Read the initial state of the scratch page */ 1802 err = check_scratch_page(ctx_a, &expected); 1803 if (err) 1804 goto out_file; 1805 1806 err = check_scratch_page(ctx_b, &expected); 1807 if (err) 1808 goto out_file; 1809 1810 vm_total = ctx_a->vm->total; 1811 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1812 1813 count = 0; 1814 num_engines = 0; 1815 for_each_uabi_engine(engine, i915) { 1816 IGT_TIMEOUT(end_time); 1817 unsigned long this = 0; 1818 1819 if (!intel_engine_can_store_dword(engine)) 1820 continue; 1821 1822 /* Not all engines have their own GPR! */ 1823 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1824 continue; 1825 1826 while (!__igt_timeout(end_time, NULL)) { 1827 u32 value = 0xc5c5c5c5; 1828 u64 offset; 1829 1830 /* Leave enough space at offset 0 for the batch */ 1831 offset = igt_random_offset(&prng, 1832 I915_GTT_PAGE_SIZE, vm_total, 1833 sizeof(u32), alignof_dword); 1834 1835 err = write_to_scratch(ctx_a, engine, 1836 offset, 0xdeadbeef); 1837 if (err == 0) 1838 err = read_from_scratch(ctx_b, engine, 1839 offset, &value); 1840 if (err) 1841 goto out_file; 1842 1843 if (value != expected) { 1844 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1845 engine->name, value, 1846 upper_32_bits(offset), 1847 lower_32_bits(offset), 1848 this); 1849 err = -EINVAL; 1850 goto out_file; 1851 } 1852 1853 this++; 1854 } 1855 count += this; 1856 num_engines++; 1857 } 1858 pr_info("Checked %lu scratch offsets across %lu engines\n", 1859 count, num_engines); 1860 1861 out_file: 1862 if (igt_live_test_end(&t)) 1863 err = -EIO; 1864 fput(file); 1865 return err; 1866 } 1867 1868 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1869 { 1870 static const struct i915_subtest tests[] = { 1871 SUBTEST(live_nop_switch), 1872 SUBTEST(live_parallel_switch), 1873 SUBTEST(igt_ctx_exec), 1874 SUBTEST(igt_ctx_readonly), 1875 SUBTEST(igt_ctx_sseu), 1876 SUBTEST(igt_shared_ctx_exec), 1877 SUBTEST(igt_vm_isolation), 1878 }; 1879 1880 if (intel_gt_is_wedged(&i915->gt)) 1881 return 0; 1882 1883 return i915_live_subtests(tests, i915); 1884 } 1885