1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_internal.h" 10 #include "gem/i915_gem_pm.h" 11 #include "gt/intel_engine_pm.h" 12 #include "gt/intel_engine_regs.h" 13 #include "gt/intel_gt.h" 14 #include "gt/intel_gt_requests.h" 15 #include "gt/intel_reset.h" 16 #include "i915_selftest.h" 17 18 #include "gem/selftests/igt_gem_utils.h" 19 #include "selftests/i915_random.h" 20 #include "selftests/igt_flush_test.h" 21 #include "selftests/igt_live_test.h" 22 #include "selftests/igt_reset.h" 23 #include "selftests/igt_spinner.h" 24 #include "selftests/mock_drm.h" 25 #include "selftests/mock_gem_device.h" 26 27 #include "huge_gem_object.h" 28 #include "igt_gem_utils.h" 29 30 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 31 32 static int live_nop_switch(void *arg) 33 { 34 const unsigned int nctx = 1024; 35 struct drm_i915_private *i915 = arg; 36 struct intel_engine_cs *engine; 37 struct i915_gem_context **ctx; 38 struct igt_live_test t; 39 struct file *file; 40 unsigned long n; 41 int err = -ENODEV; 42 43 /* 44 * Create as many contexts as we can feasibly get away with 45 * and check we can switch between them rapidly. 46 * 47 * Serves as very simple stress test for submission and HW switching 48 * between contexts. 49 */ 50 51 if (!DRIVER_CAPS(i915)->has_logical_contexts) 52 return 0; 53 54 file = mock_file(i915); 55 if (IS_ERR(file)) 56 return PTR_ERR(file); 57 58 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 59 if (!ctx) { 60 err = -ENOMEM; 61 goto out_file; 62 } 63 64 for (n = 0; n < nctx; n++) { 65 ctx[n] = live_context(i915, file); 66 if (IS_ERR(ctx[n])) { 67 err = PTR_ERR(ctx[n]); 68 goto out_file; 69 } 70 } 71 72 for_each_uabi_engine(engine, i915) { 73 struct i915_request *rq = NULL; 74 unsigned long end_time, prime; 75 ktime_t times[2] = {}; 76 77 times[0] = ktime_get_raw(); 78 for (n = 0; n < nctx; n++) { 79 struct i915_request *this; 80 81 this = igt_request_alloc(ctx[n], engine); 82 if (IS_ERR(this)) { 83 err = PTR_ERR(this); 84 goto out_file; 85 } 86 if (rq) { 87 i915_request_await_dma_fence(this, &rq->fence); 88 i915_request_put(rq); 89 } 90 rq = i915_request_get(this); 91 i915_request_add(this); 92 } 93 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 94 pr_err("Failed to populated %d contexts\n", nctx); 95 intel_gt_set_wedged(to_gt(i915)); 96 i915_request_put(rq); 97 err = -EIO; 98 goto out_file; 99 } 100 i915_request_put(rq); 101 102 times[1] = ktime_get_raw(); 103 104 pr_info("Populated %d contexts on %s in %lluns\n", 105 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 106 107 err = igt_live_test_begin(&t, i915, __func__, engine->name); 108 if (err) 109 goto out_file; 110 111 end_time = jiffies + i915_selftest.timeout_jiffies; 112 for_each_prime_number_from(prime, 2, 8192) { 113 times[1] = ktime_get_raw(); 114 115 rq = NULL; 116 for (n = 0; n < prime; n++) { 117 struct i915_request *this; 118 119 this = igt_request_alloc(ctx[n % nctx], engine); 120 if (IS_ERR(this)) { 121 err = PTR_ERR(this); 122 goto out_file; 123 } 124 125 if (rq) { /* Force submission order */ 126 i915_request_await_dma_fence(this, &rq->fence); 127 i915_request_put(rq); 128 } 129 130 /* 131 * This space is left intentionally blank. 132 * 133 * We do not actually want to perform any 134 * action with this request, we just want 135 * to measure the latency in allocation 136 * and submission of our breadcrumbs - 137 * ensuring that the bare request is sufficient 138 * for the system to work (i.e. proper HEAD 139 * tracking of the rings, interrupt handling, 140 * etc). It also gives us the lowest bounds 141 * for latency. 142 */ 143 144 rq = i915_request_get(this); 145 i915_request_add(this); 146 } 147 GEM_BUG_ON(!rq); 148 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 149 pr_err("Switching between %ld contexts timed out\n", 150 prime); 151 intel_gt_set_wedged(to_gt(i915)); 152 i915_request_put(rq); 153 break; 154 } 155 i915_request_put(rq); 156 157 times[1] = ktime_sub(ktime_get_raw(), times[1]); 158 if (prime == 2) 159 times[0] = times[1]; 160 161 if (__igt_timeout(end_time, NULL)) 162 break; 163 } 164 165 err = igt_live_test_end(&t); 166 if (err) 167 goto out_file; 168 169 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 170 engine->name, 171 ktime_to_ns(times[0]), 172 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 173 } 174 175 out_file: 176 fput(file); 177 return err; 178 } 179 180 struct parallel_switch { 181 struct task_struct *tsk; 182 struct intel_context *ce[2]; 183 }; 184 185 static int __live_parallel_switch1(void *data) 186 { 187 struct parallel_switch *arg = data; 188 IGT_TIMEOUT(end_time); 189 unsigned long count; 190 191 count = 0; 192 do { 193 struct i915_request *rq = NULL; 194 int err, n; 195 196 err = 0; 197 for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { 198 struct i915_request *prev = rq; 199 200 rq = i915_request_create(arg->ce[n]); 201 if (IS_ERR(rq)) { 202 i915_request_put(prev); 203 return PTR_ERR(rq); 204 } 205 206 i915_request_get(rq); 207 if (prev) { 208 err = i915_request_await_dma_fence(rq, &prev->fence); 209 i915_request_put(prev); 210 } 211 212 i915_request_add(rq); 213 } 214 if (i915_request_wait(rq, 0, HZ / 5) < 0) 215 err = -ETIME; 216 i915_request_put(rq); 217 if (err) 218 return err; 219 220 count++; 221 } while (!__igt_timeout(end_time, NULL)); 222 223 pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); 224 return 0; 225 } 226 227 static int __live_parallel_switchN(void *data) 228 { 229 struct parallel_switch *arg = data; 230 struct i915_request *rq = NULL; 231 IGT_TIMEOUT(end_time); 232 unsigned long count; 233 int n; 234 235 count = 0; 236 do { 237 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { 238 struct i915_request *prev = rq; 239 int err = 0; 240 241 rq = i915_request_create(arg->ce[n]); 242 if (IS_ERR(rq)) { 243 i915_request_put(prev); 244 return PTR_ERR(rq); 245 } 246 247 i915_request_get(rq); 248 if (prev) { 249 err = i915_request_await_dma_fence(rq, &prev->fence); 250 i915_request_put(prev); 251 } 252 253 i915_request_add(rq); 254 if (err) { 255 i915_request_put(rq); 256 return err; 257 } 258 } 259 260 count++; 261 } while (!__igt_timeout(end_time, NULL)); 262 i915_request_put(rq); 263 264 pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); 265 return 0; 266 } 267 268 static int live_parallel_switch(void *arg) 269 { 270 struct drm_i915_private *i915 = arg; 271 static int (* const func[])(void *arg) = { 272 __live_parallel_switch1, 273 __live_parallel_switchN, 274 NULL, 275 }; 276 struct parallel_switch *data = NULL; 277 struct i915_gem_engines *engines; 278 struct i915_gem_engines_iter it; 279 int (* const *fn)(void *arg); 280 struct i915_gem_context *ctx; 281 struct intel_context *ce; 282 struct file *file; 283 int n, m, count; 284 int err = 0; 285 286 /* 287 * Check we can process switches on all engines simultaneously. 288 */ 289 290 if (!DRIVER_CAPS(i915)->has_logical_contexts) 291 return 0; 292 293 file = mock_file(i915); 294 if (IS_ERR(file)) 295 return PTR_ERR(file); 296 297 ctx = live_context(i915, file); 298 if (IS_ERR(ctx)) { 299 err = PTR_ERR(ctx); 300 goto out_file; 301 } 302 303 engines = i915_gem_context_lock_engines(ctx); 304 count = engines->num_engines; 305 306 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 307 if (!data) { 308 i915_gem_context_unlock_engines(ctx); 309 err = -ENOMEM; 310 goto out_file; 311 } 312 313 m = 0; /* Use the first context as our template for the engines */ 314 for_each_gem_engine(ce, engines, it) { 315 err = intel_context_pin(ce); 316 if (err) { 317 i915_gem_context_unlock_engines(ctx); 318 goto out; 319 } 320 data[m++].ce[0] = intel_context_get(ce); 321 } 322 i915_gem_context_unlock_engines(ctx); 323 324 /* Clone the same set of engines into the other contexts */ 325 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 326 ctx = live_context(i915, file); 327 if (IS_ERR(ctx)) { 328 err = PTR_ERR(ctx); 329 goto out; 330 } 331 332 for (m = 0; m < count; m++) { 333 if (!data[m].ce[0]) 334 continue; 335 336 ce = intel_context_create(data[m].ce[0]->engine); 337 if (IS_ERR(ce)) 338 goto out; 339 340 err = intel_context_pin(ce); 341 if (err) { 342 intel_context_put(ce); 343 goto out; 344 } 345 346 data[m].ce[n] = ce; 347 } 348 } 349 350 for (fn = func; !err && *fn; fn++) { 351 struct igt_live_test t; 352 int n; 353 354 err = igt_live_test_begin(&t, i915, __func__, ""); 355 if (err) 356 break; 357 358 for (n = 0; n < count; n++) { 359 if (!data[n].ce[0]) 360 continue; 361 362 data[n].tsk = kthread_run(*fn, &data[n], 363 "igt/parallel:%s", 364 data[n].ce[0]->engine->name); 365 if (IS_ERR(data[n].tsk)) { 366 err = PTR_ERR(data[n].tsk); 367 break; 368 } 369 get_task_struct(data[n].tsk); 370 } 371 372 yield(); /* start all threads before we kthread_stop() */ 373 374 for (n = 0; n < count; n++) { 375 int status; 376 377 if (IS_ERR_OR_NULL(data[n].tsk)) 378 continue; 379 380 status = kthread_stop(data[n].tsk); 381 if (status && !err) 382 err = status; 383 384 put_task_struct(data[n].tsk); 385 data[n].tsk = NULL; 386 } 387 388 if (igt_live_test_end(&t)) 389 err = -EIO; 390 } 391 392 out: 393 for (n = 0; n < count; n++) { 394 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 395 if (!data[n].ce[m]) 396 continue; 397 398 intel_context_unpin(data[n].ce[m]); 399 intel_context_put(data[n].ce[m]); 400 } 401 } 402 kfree(data); 403 out_file: 404 fput(file); 405 return err; 406 } 407 408 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 409 { 410 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 411 } 412 413 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 414 { 415 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 416 } 417 418 static int gpu_fill(struct intel_context *ce, 419 struct drm_i915_gem_object *obj, 420 unsigned int dw) 421 { 422 struct i915_vma *vma; 423 int err; 424 425 GEM_BUG_ON(obj->base.size > ce->vm->total); 426 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 427 428 vma = i915_vma_instance(obj, ce->vm, NULL); 429 if (IS_ERR(vma)) 430 return PTR_ERR(vma); 431 432 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 433 if (err) 434 return err; 435 436 /* 437 * Within the GTT the huge objects maps every page onto 438 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 439 * We set the nth dword within the page using the nth 440 * mapping via the GTT - this should exercise the GTT mapping 441 * whilst checking that each context provides a unique view 442 * into the object. 443 */ 444 err = igt_gpu_fill_dw(ce, vma, 445 (dw * real_page_count(obj)) << PAGE_SHIFT | 446 (dw * sizeof(u32)), 447 real_page_count(obj), 448 dw); 449 i915_vma_unpin(vma); 450 451 return err; 452 } 453 454 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 455 { 456 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 457 unsigned int n, m, need_flush; 458 int err; 459 460 i915_gem_object_lock(obj, NULL); 461 err = i915_gem_object_prepare_write(obj, &need_flush); 462 if (err) 463 goto out; 464 465 for (n = 0; n < real_page_count(obj); n++) { 466 u32 *map; 467 468 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 469 for (m = 0; m < DW_PER_PAGE; m++) 470 map[m] = value; 471 if (!has_llc) 472 drm_clflush_virt_range(map, PAGE_SIZE); 473 kunmap_atomic(map); 474 } 475 476 i915_gem_object_finish_access(obj); 477 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 478 obj->write_domain = 0; 479 out: 480 i915_gem_object_unlock(obj); 481 return err; 482 } 483 484 static noinline int cpu_check(struct drm_i915_gem_object *obj, 485 unsigned int idx, unsigned int max) 486 { 487 unsigned int n, m, needs_flush; 488 int err; 489 490 i915_gem_object_lock(obj, NULL); 491 err = i915_gem_object_prepare_read(obj, &needs_flush); 492 if (err) 493 goto out_unlock; 494 495 for (n = 0; n < real_page_count(obj); n++) { 496 u32 *map; 497 498 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 499 if (needs_flush & CLFLUSH_BEFORE) 500 drm_clflush_virt_range(map, PAGE_SIZE); 501 502 for (m = 0; m < max; m++) { 503 if (map[m] != m) { 504 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 505 __builtin_return_address(0), idx, 506 n, real_page_count(obj), m, max, 507 map[m], m); 508 err = -EINVAL; 509 goto out_unmap; 510 } 511 } 512 513 for (; m < DW_PER_PAGE; m++) { 514 if (map[m] != STACK_MAGIC) { 515 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 516 __builtin_return_address(0), idx, n, m, 517 map[m], STACK_MAGIC); 518 err = -EINVAL; 519 goto out_unmap; 520 } 521 } 522 523 out_unmap: 524 kunmap_atomic(map); 525 if (err) 526 break; 527 } 528 529 i915_gem_object_finish_access(obj); 530 out_unlock: 531 i915_gem_object_unlock(obj); 532 return err; 533 } 534 535 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 536 { 537 int err; 538 539 GEM_BUG_ON(obj->base.handle_count); 540 541 /* tie the object to the drm_file for easy reaping */ 542 err = idr_alloc(&to_drm_file(file)->object_idr, 543 &obj->base, 1, 0, GFP_KERNEL); 544 if (err < 0) 545 return err; 546 547 i915_gem_object_get(obj); 548 obj->base.handle_count++; 549 return 0; 550 } 551 552 static struct drm_i915_gem_object * 553 create_test_object(struct i915_address_space *vm, 554 struct file *file, 555 struct list_head *objects) 556 { 557 struct drm_i915_gem_object *obj; 558 u64 size; 559 int err; 560 561 /* Keep in GEM's good graces */ 562 intel_gt_retire_requests(vm->gt); 563 564 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 565 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 566 567 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 568 if (IS_ERR(obj)) 569 return obj; 570 571 err = file_add_object(file, obj); 572 i915_gem_object_put(obj); 573 if (err) 574 return ERR_PTR(err); 575 576 err = cpu_fill(obj, STACK_MAGIC); 577 if (err) { 578 pr_err("Failed to fill object with cpu, err=%d\n", 579 err); 580 return ERR_PTR(err); 581 } 582 583 list_add_tail(&obj->st_link, objects); 584 return obj; 585 } 586 587 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 588 { 589 unsigned long npages = fake_page_count(obj); 590 591 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 592 return npages / DW_PER_PAGE; 593 } 594 595 static void throttle_release(struct i915_request **q, int count) 596 { 597 int i; 598 599 for (i = 0; i < count; i++) { 600 if (IS_ERR_OR_NULL(q[i])) 601 continue; 602 603 i915_request_put(fetch_and_zero(&q[i])); 604 } 605 } 606 607 static int throttle(struct intel_context *ce, 608 struct i915_request **q, int count) 609 { 610 int i; 611 612 if (!IS_ERR_OR_NULL(q[0])) { 613 if (i915_request_wait(q[0], 614 I915_WAIT_INTERRUPTIBLE, 615 MAX_SCHEDULE_TIMEOUT) < 0) 616 return -EINTR; 617 618 i915_request_put(q[0]); 619 } 620 621 for (i = 0; i < count - 1; i++) 622 q[i] = q[i + 1]; 623 624 q[i] = intel_context_create_request(ce); 625 if (IS_ERR(q[i])) 626 return PTR_ERR(q[i]); 627 628 i915_request_get(q[i]); 629 i915_request_add(q[i]); 630 631 return 0; 632 } 633 634 static int igt_ctx_exec(void *arg) 635 { 636 struct drm_i915_private *i915 = arg; 637 struct intel_engine_cs *engine; 638 int err = -ENODEV; 639 640 /* 641 * Create a few different contexts (with different mm) and write 642 * through each ctx/mm using the GPU making sure those writes end 643 * up in the expected pages of our obj. 644 */ 645 646 if (!DRIVER_CAPS(i915)->has_logical_contexts) 647 return 0; 648 649 for_each_uabi_engine(engine, i915) { 650 struct drm_i915_gem_object *obj = NULL; 651 unsigned long ncontexts, ndwords, dw; 652 struct i915_request *tq[5] = {}; 653 struct igt_live_test t; 654 IGT_TIMEOUT(end_time); 655 LIST_HEAD(objects); 656 struct file *file; 657 658 if (!intel_engine_can_store_dword(engine)) 659 continue; 660 661 if (!engine->context_size) 662 continue; /* No logical context support in HW */ 663 664 file = mock_file(i915); 665 if (IS_ERR(file)) 666 return PTR_ERR(file); 667 668 err = igt_live_test_begin(&t, i915, __func__, engine->name); 669 if (err) 670 goto out_file; 671 672 ncontexts = 0; 673 ndwords = 0; 674 dw = 0; 675 while (!time_after(jiffies, end_time)) { 676 struct i915_gem_context *ctx; 677 struct intel_context *ce; 678 679 ctx = kernel_context(i915, NULL); 680 if (IS_ERR(ctx)) { 681 err = PTR_ERR(ctx); 682 goto out_file; 683 } 684 685 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 686 GEM_BUG_ON(IS_ERR(ce)); 687 688 if (!obj) { 689 obj = create_test_object(ce->vm, file, &objects); 690 if (IS_ERR(obj)) { 691 err = PTR_ERR(obj); 692 intel_context_put(ce); 693 kernel_context_close(ctx); 694 goto out_file; 695 } 696 } 697 698 err = gpu_fill(ce, obj, dw); 699 if (err) { 700 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 701 ndwords, dw, max_dwords(obj), 702 engine->name, 703 yesno(i915_gem_context_has_full_ppgtt(ctx)), 704 err); 705 intel_context_put(ce); 706 kernel_context_close(ctx); 707 goto out_file; 708 } 709 710 err = throttle(ce, tq, ARRAY_SIZE(tq)); 711 if (err) { 712 intel_context_put(ce); 713 kernel_context_close(ctx); 714 goto out_file; 715 } 716 717 if (++dw == max_dwords(obj)) { 718 obj = NULL; 719 dw = 0; 720 } 721 722 ndwords++; 723 ncontexts++; 724 725 intel_context_put(ce); 726 kernel_context_close(ctx); 727 } 728 729 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 730 ncontexts, engine->name, ndwords); 731 732 ncontexts = dw = 0; 733 list_for_each_entry(obj, &objects, st_link) { 734 unsigned int rem = 735 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 736 737 err = cpu_check(obj, ncontexts++, rem); 738 if (err) 739 break; 740 741 dw += rem; 742 } 743 744 out_file: 745 throttle_release(tq, ARRAY_SIZE(tq)); 746 if (igt_live_test_end(&t)) 747 err = -EIO; 748 749 fput(file); 750 if (err) 751 return err; 752 753 i915_gem_drain_freed_objects(i915); 754 } 755 756 return 0; 757 } 758 759 static int igt_shared_ctx_exec(void *arg) 760 { 761 struct drm_i915_private *i915 = arg; 762 struct i915_request *tq[5] = {}; 763 struct i915_gem_context *parent; 764 struct intel_engine_cs *engine; 765 struct igt_live_test t; 766 struct file *file; 767 int err = 0; 768 769 /* 770 * Create a few different contexts with the same mm and write 771 * through each ctx using the GPU making sure those writes end 772 * up in the expected pages of our obj. 773 */ 774 if (!DRIVER_CAPS(i915)->has_logical_contexts) 775 return 0; 776 777 file = mock_file(i915); 778 if (IS_ERR(file)) 779 return PTR_ERR(file); 780 781 parent = live_context(i915, file); 782 if (IS_ERR(parent)) { 783 err = PTR_ERR(parent); 784 goto out_file; 785 } 786 787 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 788 err = 0; 789 goto out_file; 790 } 791 792 err = igt_live_test_begin(&t, i915, __func__, ""); 793 if (err) 794 goto out_file; 795 796 for_each_uabi_engine(engine, i915) { 797 unsigned long ncontexts, ndwords, dw; 798 struct drm_i915_gem_object *obj = NULL; 799 IGT_TIMEOUT(end_time); 800 LIST_HEAD(objects); 801 802 if (!intel_engine_can_store_dword(engine)) 803 continue; 804 805 dw = 0; 806 ndwords = 0; 807 ncontexts = 0; 808 while (!time_after(jiffies, end_time)) { 809 struct i915_gem_context *ctx; 810 struct intel_context *ce; 811 812 ctx = kernel_context(i915, parent->vm); 813 if (IS_ERR(ctx)) { 814 err = PTR_ERR(ctx); 815 goto out_test; 816 } 817 818 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 819 GEM_BUG_ON(IS_ERR(ce)); 820 821 if (!obj) { 822 obj = create_test_object(parent->vm, 823 file, &objects); 824 if (IS_ERR(obj)) { 825 err = PTR_ERR(obj); 826 intel_context_put(ce); 827 kernel_context_close(ctx); 828 goto out_test; 829 } 830 } 831 832 err = gpu_fill(ce, obj, dw); 833 if (err) { 834 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 835 ndwords, dw, max_dwords(obj), 836 engine->name, 837 yesno(i915_gem_context_has_full_ppgtt(ctx)), 838 err); 839 intel_context_put(ce); 840 kernel_context_close(ctx); 841 goto out_test; 842 } 843 844 err = throttle(ce, tq, ARRAY_SIZE(tq)); 845 if (err) { 846 intel_context_put(ce); 847 kernel_context_close(ctx); 848 goto out_test; 849 } 850 851 if (++dw == max_dwords(obj)) { 852 obj = NULL; 853 dw = 0; 854 } 855 856 ndwords++; 857 ncontexts++; 858 859 intel_context_put(ce); 860 kernel_context_close(ctx); 861 } 862 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 863 ncontexts, engine->name, ndwords); 864 865 ncontexts = dw = 0; 866 list_for_each_entry(obj, &objects, st_link) { 867 unsigned int rem = 868 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 869 870 err = cpu_check(obj, ncontexts++, rem); 871 if (err) 872 goto out_test; 873 874 dw += rem; 875 } 876 877 i915_gem_drain_freed_objects(i915); 878 } 879 out_test: 880 throttle_release(tq, ARRAY_SIZE(tq)); 881 if (igt_live_test_end(&t)) 882 err = -EIO; 883 out_file: 884 fput(file); 885 return err; 886 } 887 888 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, struct i915_vma *vma) 889 { 890 u32 *cmd; 891 892 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 893 894 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 895 if (IS_ERR(cmd)) 896 return PTR_ERR(cmd); 897 898 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 899 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(RENDER_RING_BASE)); 900 *cmd++ = lower_32_bits(vma->node.start); 901 *cmd++ = upper_32_bits(vma->node.start); 902 *cmd = MI_BATCH_BUFFER_END; 903 904 __i915_gem_object_flush_map(rpcs, 0, 64); 905 i915_gem_object_unpin_map(rpcs); 906 907 intel_gt_chipset_flush(vma->vm->gt); 908 909 return 0; 910 } 911 912 static int 913 emit_rpcs_query(struct drm_i915_gem_object *obj, 914 struct intel_context *ce, 915 struct i915_request **rq_out) 916 { 917 struct drm_i915_private *i915 = to_i915(obj->base.dev); 918 struct i915_request *rq; 919 struct i915_gem_ww_ctx ww; 920 struct i915_vma *batch; 921 struct i915_vma *vma; 922 struct drm_i915_gem_object *rpcs; 923 int err; 924 925 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 926 927 if (GRAPHICS_VER(i915) < 8) 928 return -EINVAL; 929 930 vma = i915_vma_instance(obj, ce->vm, NULL); 931 if (IS_ERR(vma)) 932 return PTR_ERR(vma); 933 934 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 935 if (IS_ERR(rpcs)) 936 return PTR_ERR(rpcs); 937 938 batch = i915_vma_instance(rpcs, ce->vm, NULL); 939 if (IS_ERR(batch)) { 940 err = PTR_ERR(batch); 941 goto err_put; 942 } 943 944 i915_gem_ww_ctx_init(&ww, false); 945 retry: 946 err = i915_gem_object_lock(obj, &ww); 947 if (!err) 948 err = i915_gem_object_lock(rpcs, &ww); 949 if (!err) 950 err = i915_gem_object_set_to_gtt_domain(obj, false); 951 if (!err) 952 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 953 if (err) 954 goto err_put; 955 956 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 957 if (err) 958 goto err_vma; 959 960 err = rpcs_query_batch(rpcs, vma); 961 if (err) 962 goto err_batch; 963 964 rq = i915_request_create(ce); 965 if (IS_ERR(rq)) { 966 err = PTR_ERR(rq); 967 goto err_batch; 968 } 969 970 err = i915_request_await_object(rq, batch->obj, false); 971 if (err == 0) 972 err = i915_vma_move_to_active(batch, rq, 0); 973 if (err) 974 goto skip_request; 975 976 err = i915_request_await_object(rq, vma->obj, true); 977 if (err == 0) 978 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 979 if (err) 980 goto skip_request; 981 982 if (rq->engine->emit_init_breadcrumb) { 983 err = rq->engine->emit_init_breadcrumb(rq); 984 if (err) 985 goto skip_request; 986 } 987 988 err = rq->engine->emit_bb_start(rq, 989 batch->node.start, batch->node.size, 990 0); 991 if (err) 992 goto skip_request; 993 994 *rq_out = i915_request_get(rq); 995 996 skip_request: 997 if (err) 998 i915_request_set_error_once(rq, err); 999 i915_request_add(rq); 1000 err_batch: 1001 i915_vma_unpin(batch); 1002 err_vma: 1003 i915_vma_unpin(vma); 1004 err_put: 1005 if (err == -EDEADLK) { 1006 err = i915_gem_ww_ctx_backoff(&ww); 1007 if (!err) 1008 goto retry; 1009 } 1010 i915_gem_ww_ctx_fini(&ww); 1011 i915_gem_object_put(rpcs); 1012 return err; 1013 } 1014 1015 #define TEST_IDLE BIT(0) 1016 #define TEST_BUSY BIT(1) 1017 #define TEST_RESET BIT(2) 1018 1019 static int 1020 __sseu_prepare(const char *name, 1021 unsigned int flags, 1022 struct intel_context *ce, 1023 struct igt_spinner **spin) 1024 { 1025 struct i915_request *rq; 1026 int ret; 1027 1028 *spin = NULL; 1029 if (!(flags & (TEST_BUSY | TEST_RESET))) 1030 return 0; 1031 1032 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1033 if (!*spin) 1034 return -ENOMEM; 1035 1036 ret = igt_spinner_init(*spin, ce->engine->gt); 1037 if (ret) 1038 goto err_free; 1039 1040 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1041 if (IS_ERR(rq)) { 1042 ret = PTR_ERR(rq); 1043 goto err_fini; 1044 } 1045 1046 i915_request_add(rq); 1047 1048 if (!igt_wait_for_spinner(*spin, rq)) { 1049 pr_err("%s: Spinner failed to start!\n", name); 1050 ret = -ETIMEDOUT; 1051 goto err_end; 1052 } 1053 1054 return 0; 1055 1056 err_end: 1057 igt_spinner_end(*spin); 1058 err_fini: 1059 igt_spinner_fini(*spin); 1060 err_free: 1061 kfree(fetch_and_zero(spin)); 1062 return ret; 1063 } 1064 1065 static int 1066 __read_slice_count(struct intel_context *ce, 1067 struct drm_i915_gem_object *obj, 1068 struct igt_spinner *spin, 1069 u32 *rpcs) 1070 { 1071 struct i915_request *rq = NULL; 1072 u32 s_mask, s_shift; 1073 unsigned int cnt; 1074 u32 *buf, val; 1075 long ret; 1076 1077 ret = emit_rpcs_query(obj, ce, &rq); 1078 if (ret) 1079 return ret; 1080 1081 if (spin) 1082 igt_spinner_end(spin); 1083 1084 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1085 i915_request_put(rq); 1086 if (ret < 0) 1087 return ret; 1088 1089 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1090 if (IS_ERR(buf)) { 1091 ret = PTR_ERR(buf); 1092 return ret; 1093 } 1094 1095 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1096 s_mask = GEN11_RPCS_S_CNT_MASK; 1097 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1098 } else { 1099 s_mask = GEN8_RPCS_S_CNT_MASK; 1100 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1101 } 1102 1103 val = *buf; 1104 cnt = (val & s_mask) >> s_shift; 1105 *rpcs = val; 1106 1107 i915_gem_object_unpin_map(obj); 1108 1109 return cnt; 1110 } 1111 1112 static int 1113 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1114 const char *prefix, const char *suffix) 1115 { 1116 if (slices == expected) 1117 return 0; 1118 1119 if (slices < 0) { 1120 pr_err("%s: %s read slice count failed with %d%s\n", 1121 name, prefix, slices, suffix); 1122 return slices; 1123 } 1124 1125 pr_err("%s: %s slice count %d is not %u%s\n", 1126 name, prefix, slices, expected, suffix); 1127 1128 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1129 rpcs, slices, 1130 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1131 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1132 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1133 1134 return -EINVAL; 1135 } 1136 1137 static int 1138 __sseu_finish(const char *name, 1139 unsigned int flags, 1140 struct intel_context *ce, 1141 struct drm_i915_gem_object *obj, 1142 unsigned int expected, 1143 struct igt_spinner *spin) 1144 { 1145 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1146 u32 rpcs = 0; 1147 int ret = 0; 1148 1149 if (flags & TEST_RESET) { 1150 ret = intel_engine_reset(ce->engine, "sseu"); 1151 if (ret) 1152 goto out; 1153 } 1154 1155 ret = __read_slice_count(ce, obj, 1156 flags & TEST_RESET ? NULL : spin, &rpcs); 1157 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1158 if (ret) 1159 goto out; 1160 1161 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1162 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1163 1164 out: 1165 if (spin) 1166 igt_spinner_end(spin); 1167 1168 if ((flags & TEST_IDLE) && ret == 0) { 1169 ret = igt_flush_test(ce->engine->i915); 1170 if (ret) 1171 return ret; 1172 1173 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1174 ret = __check_rpcs(name, rpcs, ret, expected, 1175 "Context", " after idle!"); 1176 } 1177 1178 return ret; 1179 } 1180 1181 static int 1182 __sseu_test(const char *name, 1183 unsigned int flags, 1184 struct intel_context *ce, 1185 struct drm_i915_gem_object *obj, 1186 struct intel_sseu sseu) 1187 { 1188 struct igt_spinner *spin = NULL; 1189 int ret; 1190 1191 intel_engine_pm_get(ce->engine); 1192 1193 ret = __sseu_prepare(name, flags, ce, &spin); 1194 if (ret) 1195 goto out_pm; 1196 1197 ret = intel_context_reconfigure_sseu(ce, sseu); 1198 if (ret) 1199 goto out_spin; 1200 1201 ret = __sseu_finish(name, flags, ce, obj, 1202 hweight32(sseu.slice_mask), spin); 1203 1204 out_spin: 1205 if (spin) { 1206 igt_spinner_end(spin); 1207 igt_spinner_fini(spin); 1208 kfree(spin); 1209 } 1210 out_pm: 1211 intel_engine_pm_put(ce->engine); 1212 return ret; 1213 } 1214 1215 static int 1216 __igt_ctx_sseu(struct drm_i915_private *i915, 1217 const char *name, 1218 unsigned int flags) 1219 { 1220 struct drm_i915_gem_object *obj; 1221 int inst = 0; 1222 int ret = 0; 1223 1224 if (GRAPHICS_VER(i915) < 9) 1225 return 0; 1226 1227 if (flags & TEST_RESET) 1228 igt_global_reset_lock(to_gt(i915)); 1229 1230 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1231 if (IS_ERR(obj)) { 1232 ret = PTR_ERR(obj); 1233 goto out_unlock; 1234 } 1235 1236 do { 1237 struct intel_engine_cs *engine; 1238 struct intel_context *ce; 1239 struct intel_sseu pg_sseu; 1240 1241 engine = intel_engine_lookup_user(i915, 1242 I915_ENGINE_CLASS_RENDER, 1243 inst++); 1244 if (!engine) 1245 break; 1246 1247 if (hweight32(engine->sseu.slice_mask) < 2) 1248 continue; 1249 1250 if (!engine->gt->info.sseu.has_slice_pg) 1251 continue; 1252 1253 /* 1254 * Gen11 VME friendly power-gated configuration with 1255 * half enabled sub-slices. 1256 */ 1257 pg_sseu = engine->sseu; 1258 pg_sseu.slice_mask = 1; 1259 pg_sseu.subslice_mask = 1260 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1261 1262 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1263 engine->name, name, flags, 1264 hweight32(engine->sseu.slice_mask), 1265 hweight32(pg_sseu.slice_mask)); 1266 1267 ce = intel_context_create(engine); 1268 if (IS_ERR(ce)) { 1269 ret = PTR_ERR(ce); 1270 goto out_put; 1271 } 1272 1273 ret = intel_context_pin(ce); 1274 if (ret) 1275 goto out_ce; 1276 1277 /* First set the default mask. */ 1278 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1279 if (ret) 1280 goto out_unpin; 1281 1282 /* Then set a power-gated configuration. */ 1283 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1284 if (ret) 1285 goto out_unpin; 1286 1287 /* Back to defaults. */ 1288 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1289 if (ret) 1290 goto out_unpin; 1291 1292 /* One last power-gated configuration for the road. */ 1293 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1294 if (ret) 1295 goto out_unpin; 1296 1297 out_unpin: 1298 intel_context_unpin(ce); 1299 out_ce: 1300 intel_context_put(ce); 1301 } while (!ret); 1302 1303 if (igt_flush_test(i915)) 1304 ret = -EIO; 1305 1306 out_put: 1307 i915_gem_object_put(obj); 1308 1309 out_unlock: 1310 if (flags & TEST_RESET) 1311 igt_global_reset_unlock(to_gt(i915)); 1312 1313 if (ret) 1314 pr_err("%s: Failed with %d!\n", name, ret); 1315 1316 return ret; 1317 } 1318 1319 static int igt_ctx_sseu(void *arg) 1320 { 1321 struct { 1322 const char *name; 1323 unsigned int flags; 1324 } *phase, phases[] = { 1325 { .name = "basic", .flags = 0 }, 1326 { .name = "idle", .flags = TEST_IDLE }, 1327 { .name = "busy", .flags = TEST_BUSY }, 1328 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1329 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1330 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1331 }; 1332 unsigned int i; 1333 int ret = 0; 1334 1335 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1336 i++, phase++) 1337 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1338 1339 return ret; 1340 } 1341 1342 static int igt_ctx_readonly(void *arg) 1343 { 1344 struct drm_i915_private *i915 = arg; 1345 unsigned long idx, ndwords, dw, num_engines; 1346 struct drm_i915_gem_object *obj = NULL; 1347 struct i915_request *tq[5] = {}; 1348 struct i915_gem_engines_iter it; 1349 struct i915_address_space *vm; 1350 struct i915_gem_context *ctx; 1351 struct intel_context *ce; 1352 struct igt_live_test t; 1353 I915_RND_STATE(prng); 1354 IGT_TIMEOUT(end_time); 1355 LIST_HEAD(objects); 1356 struct file *file; 1357 int err = -ENODEV; 1358 1359 /* 1360 * Create a few read-only objects (with the occasional writable object) 1361 * and try to write into these object checking that the GPU discards 1362 * any write to a read-only object. 1363 */ 1364 1365 file = mock_file(i915); 1366 if (IS_ERR(file)) 1367 return PTR_ERR(file); 1368 1369 err = igt_live_test_begin(&t, i915, __func__, ""); 1370 if (err) 1371 goto out_file; 1372 1373 ctx = live_context(i915, file); 1374 if (IS_ERR(ctx)) { 1375 err = PTR_ERR(ctx); 1376 goto out_file; 1377 } 1378 1379 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm; 1380 if (!vm || !vm->has_read_only) { 1381 err = 0; 1382 goto out_file; 1383 } 1384 1385 num_engines = 0; 1386 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1387 if (intel_engine_can_store_dword(ce->engine)) 1388 num_engines++; 1389 i915_gem_context_unlock_engines(ctx); 1390 1391 ndwords = 0; 1392 dw = 0; 1393 while (!time_after(jiffies, end_time)) { 1394 for_each_gem_engine(ce, 1395 i915_gem_context_lock_engines(ctx), it) { 1396 if (!intel_engine_can_store_dword(ce->engine)) 1397 continue; 1398 1399 if (!obj) { 1400 obj = create_test_object(ce->vm, file, &objects); 1401 if (IS_ERR(obj)) { 1402 err = PTR_ERR(obj); 1403 i915_gem_context_unlock_engines(ctx); 1404 goto out_file; 1405 } 1406 1407 if (prandom_u32_state(&prng) & 1) 1408 i915_gem_object_set_readonly(obj); 1409 } 1410 1411 err = gpu_fill(ce, obj, dw); 1412 if (err) { 1413 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1414 ndwords, dw, max_dwords(obj), 1415 ce->engine->name, 1416 yesno(i915_gem_context_has_full_ppgtt(ctx)), 1417 err); 1418 i915_gem_context_unlock_engines(ctx); 1419 goto out_file; 1420 } 1421 1422 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1423 if (err) { 1424 i915_gem_context_unlock_engines(ctx); 1425 goto out_file; 1426 } 1427 1428 if (++dw == max_dwords(obj)) { 1429 obj = NULL; 1430 dw = 0; 1431 } 1432 ndwords++; 1433 } 1434 i915_gem_context_unlock_engines(ctx); 1435 } 1436 pr_info("Submitted %lu dwords (across %lu engines)\n", 1437 ndwords, num_engines); 1438 1439 dw = 0; 1440 idx = 0; 1441 list_for_each_entry(obj, &objects, st_link) { 1442 unsigned int rem = 1443 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1444 unsigned int num_writes; 1445 1446 num_writes = rem; 1447 if (i915_gem_object_is_readonly(obj)) 1448 num_writes = 0; 1449 1450 err = cpu_check(obj, idx++, num_writes); 1451 if (err) 1452 break; 1453 1454 dw += rem; 1455 } 1456 1457 out_file: 1458 throttle_release(tq, ARRAY_SIZE(tq)); 1459 if (igt_live_test_end(&t)) 1460 err = -EIO; 1461 1462 fput(file); 1463 return err; 1464 } 1465 1466 static int check_scratch(struct i915_address_space *vm, u64 offset) 1467 { 1468 struct drm_mm_node *node; 1469 1470 mutex_lock(&vm->mutex); 1471 node = __drm_mm_interval_first(&vm->mm, 1472 offset, offset + sizeof(u32) - 1); 1473 mutex_unlock(&vm->mutex); 1474 if (!node || node->start > offset) 1475 return 0; 1476 1477 GEM_BUG_ON(offset >= node->start + node->size); 1478 1479 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1480 upper_32_bits(offset), lower_32_bits(offset)); 1481 return -EINVAL; 1482 } 1483 1484 static int write_to_scratch(struct i915_gem_context *ctx, 1485 struct intel_engine_cs *engine, 1486 struct drm_i915_gem_object *obj, 1487 u64 offset, u32 value) 1488 { 1489 struct drm_i915_private *i915 = ctx->i915; 1490 struct i915_address_space *vm; 1491 struct i915_request *rq; 1492 struct i915_vma *vma; 1493 u32 *cmd; 1494 int err; 1495 1496 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1497 1498 err = check_scratch(ctx->vm, offset); 1499 if (err) 1500 return err; 1501 1502 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1503 if (IS_ERR(cmd)) 1504 return PTR_ERR(cmd); 1505 1506 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1507 if (GRAPHICS_VER(i915) >= 8) { 1508 *cmd++ = lower_32_bits(offset); 1509 *cmd++ = upper_32_bits(offset); 1510 } else { 1511 *cmd++ = 0; 1512 *cmd++ = offset; 1513 } 1514 *cmd++ = value; 1515 *cmd = MI_BATCH_BUFFER_END; 1516 __i915_gem_object_flush_map(obj, 0, 64); 1517 i915_gem_object_unpin_map(obj); 1518 1519 intel_gt_chipset_flush(engine->gt); 1520 1521 vm = i915_gem_context_get_eb_vm(ctx); 1522 vma = i915_vma_instance(obj, vm, NULL); 1523 if (IS_ERR(vma)) { 1524 err = PTR_ERR(vma); 1525 goto out_vm; 1526 } 1527 1528 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1529 if (err) 1530 goto out_vm; 1531 1532 rq = igt_request_alloc(ctx, engine); 1533 if (IS_ERR(rq)) { 1534 err = PTR_ERR(rq); 1535 goto err_unpin; 1536 } 1537 1538 i915_vma_lock(vma); 1539 err = i915_request_await_object(rq, vma->obj, false); 1540 if (err == 0) 1541 err = i915_vma_move_to_active(vma, rq, 0); 1542 i915_vma_unlock(vma); 1543 if (err) 1544 goto skip_request; 1545 1546 if (rq->engine->emit_init_breadcrumb) { 1547 err = rq->engine->emit_init_breadcrumb(rq); 1548 if (err) 1549 goto skip_request; 1550 } 1551 1552 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1553 if (err) 1554 goto skip_request; 1555 1556 i915_vma_unpin(vma); 1557 1558 i915_request_add(rq); 1559 1560 goto out_vm; 1561 skip_request: 1562 i915_request_set_error_once(rq, err); 1563 i915_request_add(rq); 1564 err_unpin: 1565 i915_vma_unpin(vma); 1566 out_vm: 1567 i915_vm_put(vm); 1568 1569 if (!err) 1570 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1571 1572 return err; 1573 } 1574 1575 static int read_from_scratch(struct i915_gem_context *ctx, 1576 struct intel_engine_cs *engine, 1577 struct drm_i915_gem_object *obj, 1578 u64 offset, u32 *value) 1579 { 1580 struct drm_i915_private *i915 = ctx->i915; 1581 struct i915_address_space *vm; 1582 const u32 result = 0x100; 1583 struct i915_request *rq; 1584 struct i915_vma *vma; 1585 unsigned int flags; 1586 u32 *cmd; 1587 int err; 1588 1589 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1590 1591 err = check_scratch(ctx->vm, offset); 1592 if (err) 1593 return err; 1594 1595 if (GRAPHICS_VER(i915) >= 8) { 1596 const u32 GPR0 = engine->mmio_base + 0x600; 1597 1598 vm = i915_gem_context_get_eb_vm(ctx); 1599 vma = i915_vma_instance(obj, vm, NULL); 1600 if (IS_ERR(vma)) { 1601 err = PTR_ERR(vma); 1602 goto out_vm; 1603 } 1604 1605 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1606 if (err) 1607 goto out_vm; 1608 1609 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1610 if (IS_ERR(cmd)) { 1611 err = PTR_ERR(cmd); 1612 goto err_unpin; 1613 } 1614 1615 memset(cmd, POISON_INUSE, PAGE_SIZE); 1616 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1617 *cmd++ = GPR0; 1618 *cmd++ = lower_32_bits(offset); 1619 *cmd++ = upper_32_bits(offset); 1620 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1621 *cmd++ = GPR0; 1622 *cmd++ = result; 1623 *cmd++ = 0; 1624 *cmd = MI_BATCH_BUFFER_END; 1625 1626 i915_gem_object_flush_map(obj); 1627 i915_gem_object_unpin_map(obj); 1628 1629 flags = 0; 1630 } else { 1631 const u32 reg = engine->mmio_base + 0x420; 1632 1633 /* hsw: register access even to 3DPRIM! is protected */ 1634 vm = i915_vm_get(&engine->gt->ggtt->vm); 1635 vma = i915_vma_instance(obj, vm, NULL); 1636 if (IS_ERR(vma)) { 1637 err = PTR_ERR(vma); 1638 goto out_vm; 1639 } 1640 1641 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1642 if (err) 1643 goto out_vm; 1644 1645 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1646 if (IS_ERR(cmd)) { 1647 err = PTR_ERR(cmd); 1648 goto err_unpin; 1649 } 1650 1651 memset(cmd, POISON_INUSE, PAGE_SIZE); 1652 *cmd++ = MI_LOAD_REGISTER_MEM; 1653 *cmd++ = reg; 1654 *cmd++ = offset; 1655 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1656 *cmd++ = reg; 1657 *cmd++ = vma->node.start + result; 1658 *cmd = MI_BATCH_BUFFER_END; 1659 1660 i915_gem_object_flush_map(obj); 1661 i915_gem_object_unpin_map(obj); 1662 1663 flags = I915_DISPATCH_SECURE; 1664 } 1665 1666 intel_gt_chipset_flush(engine->gt); 1667 1668 rq = igt_request_alloc(ctx, engine); 1669 if (IS_ERR(rq)) { 1670 err = PTR_ERR(rq); 1671 goto err_unpin; 1672 } 1673 1674 i915_vma_lock(vma); 1675 err = i915_request_await_object(rq, vma->obj, true); 1676 if (err == 0) 1677 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1678 i915_vma_unlock(vma); 1679 if (err) 1680 goto skip_request; 1681 1682 if (rq->engine->emit_init_breadcrumb) { 1683 err = rq->engine->emit_init_breadcrumb(rq); 1684 if (err) 1685 goto skip_request; 1686 } 1687 1688 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); 1689 if (err) 1690 goto skip_request; 1691 1692 i915_vma_unpin(vma); 1693 1694 i915_request_add(rq); 1695 1696 i915_gem_object_lock(obj, NULL); 1697 err = i915_gem_object_set_to_cpu_domain(obj, false); 1698 i915_gem_object_unlock(obj); 1699 if (err) 1700 goto out_vm; 1701 1702 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1703 if (IS_ERR(cmd)) { 1704 err = PTR_ERR(cmd); 1705 goto out_vm; 1706 } 1707 1708 *value = cmd[result / sizeof(*cmd)]; 1709 i915_gem_object_unpin_map(obj); 1710 1711 goto out_vm; 1712 skip_request: 1713 i915_request_set_error_once(rq, err); 1714 i915_request_add(rq); 1715 err_unpin: 1716 i915_vma_unpin(vma); 1717 out_vm: 1718 i915_vm_put(vm); 1719 1720 if (!err) 1721 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1722 1723 return err; 1724 } 1725 1726 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1727 { 1728 struct i915_address_space *vm; 1729 u32 *vaddr; 1730 int err = 0; 1731 1732 vm = ctx->vm; 1733 if (!vm) 1734 return -ENODEV; 1735 1736 if (!vm->scratch[0]) { 1737 pr_err("No scratch page!\n"); 1738 return -EINVAL; 1739 } 1740 1741 vaddr = __px_vaddr(vm->scratch[0]); 1742 1743 memcpy(out, vaddr, sizeof(*out)); 1744 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1745 pr_err("Inconsistent initial state of scratch page!\n"); 1746 err = -EINVAL; 1747 } 1748 1749 return err; 1750 } 1751 1752 static int igt_vm_isolation(void *arg) 1753 { 1754 struct drm_i915_private *i915 = arg; 1755 struct i915_gem_context *ctx_a, *ctx_b; 1756 struct drm_i915_gem_object *obj_a, *obj_b; 1757 unsigned long num_engines, count; 1758 struct intel_engine_cs *engine; 1759 struct igt_live_test t; 1760 I915_RND_STATE(prng); 1761 struct file *file; 1762 u64 vm_total; 1763 u32 expected; 1764 int err; 1765 1766 if (GRAPHICS_VER(i915) < 7) 1767 return 0; 1768 1769 /* 1770 * The simple goal here is that a write into one context is not 1771 * observed in a second (separate page tables and scratch). 1772 */ 1773 1774 file = mock_file(i915); 1775 if (IS_ERR(file)) 1776 return PTR_ERR(file); 1777 1778 err = igt_live_test_begin(&t, i915, __func__, ""); 1779 if (err) 1780 goto out_file; 1781 1782 ctx_a = live_context(i915, file); 1783 if (IS_ERR(ctx_a)) { 1784 err = PTR_ERR(ctx_a); 1785 goto out_file; 1786 } 1787 1788 ctx_b = live_context(i915, file); 1789 if (IS_ERR(ctx_b)) { 1790 err = PTR_ERR(ctx_b); 1791 goto out_file; 1792 } 1793 1794 /* We can only test vm isolation, if the vm are distinct */ 1795 if (ctx_a->vm == ctx_b->vm) 1796 goto out_file; 1797 1798 /* Read the initial state of the scratch page */ 1799 err = check_scratch_page(ctx_a, &expected); 1800 if (err) 1801 goto out_file; 1802 1803 err = check_scratch_page(ctx_b, &expected); 1804 if (err) 1805 goto out_file; 1806 1807 vm_total = ctx_a->vm->total; 1808 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1809 1810 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1811 if (IS_ERR(obj_a)) { 1812 err = PTR_ERR(obj_a); 1813 goto out_file; 1814 } 1815 1816 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1817 if (IS_ERR(obj_b)) { 1818 err = PTR_ERR(obj_b); 1819 goto put_a; 1820 } 1821 1822 count = 0; 1823 num_engines = 0; 1824 for_each_uabi_engine(engine, i915) { 1825 IGT_TIMEOUT(end_time); 1826 unsigned long this = 0; 1827 1828 if (!intel_engine_can_store_dword(engine)) 1829 continue; 1830 1831 /* Not all engines have their own GPR! */ 1832 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1833 continue; 1834 1835 while (!__igt_timeout(end_time, NULL)) { 1836 u32 value = 0xc5c5c5c5; 1837 u64 offset; 1838 1839 /* Leave enough space at offset 0 for the batch */ 1840 offset = igt_random_offset(&prng, 1841 I915_GTT_PAGE_SIZE, vm_total, 1842 sizeof(u32), alignof_dword); 1843 1844 err = write_to_scratch(ctx_a, engine, obj_a, 1845 offset, 0xdeadbeef); 1846 if (err == 0) 1847 err = read_from_scratch(ctx_b, engine, obj_b, 1848 offset, &value); 1849 if (err) 1850 goto put_b; 1851 1852 if (value != expected) { 1853 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1854 engine->name, value, 1855 upper_32_bits(offset), 1856 lower_32_bits(offset), 1857 this); 1858 err = -EINVAL; 1859 goto put_b; 1860 } 1861 1862 this++; 1863 } 1864 count += this; 1865 num_engines++; 1866 } 1867 pr_info("Checked %lu scratch offsets across %lu engines\n", 1868 count, num_engines); 1869 1870 put_b: 1871 i915_gem_object_put(obj_b); 1872 put_a: 1873 i915_gem_object_put(obj_a); 1874 out_file: 1875 if (igt_live_test_end(&t)) 1876 err = -EIO; 1877 fput(file); 1878 return err; 1879 } 1880 1881 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1882 { 1883 static const struct i915_subtest tests[] = { 1884 SUBTEST(live_nop_switch), 1885 SUBTEST(live_parallel_switch), 1886 SUBTEST(igt_ctx_exec), 1887 SUBTEST(igt_ctx_readonly), 1888 SUBTEST(igt_ctx_sseu), 1889 SUBTEST(igt_shared_ctx_exec), 1890 SUBTEST(igt_vm_isolation), 1891 }; 1892 1893 if (intel_gt_is_wedged(to_gt(i915))) 1894 return 0; 1895 1896 return i915_live_subtests(tests, i915); 1897 } 1898