1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gem/i915_gem_internal.h" 10 #include "gem/i915_gem_pm.h" 11 #include "gt/intel_engine_pm.h" 12 #include "gt/intel_engine_regs.h" 13 #include "gt/intel_gt.h" 14 #include "gt/intel_gt_requests.h" 15 #include "gt/intel_reset.h" 16 #include "i915_selftest.h" 17 18 #include "gem/selftests/igt_gem_utils.h" 19 #include "selftests/i915_random.h" 20 #include "selftests/igt_flush_test.h" 21 #include "selftests/igt_live_test.h" 22 #include "selftests/igt_reset.h" 23 #include "selftests/igt_spinner.h" 24 #include "selftests/mock_drm.h" 25 #include "selftests/mock_gem_device.h" 26 27 #include "huge_gem_object.h" 28 #include "igt_gem_utils.h" 29 30 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 31 32 static int live_nop_switch(void *arg) 33 { 34 const unsigned int nctx = 1024; 35 struct drm_i915_private *i915 = arg; 36 struct intel_engine_cs *engine; 37 struct i915_gem_context **ctx; 38 struct igt_live_test t; 39 struct file *file; 40 unsigned long n; 41 int err = -ENODEV; 42 43 /* 44 * Create as many contexts as we can feasibly get away with 45 * and check we can switch between them rapidly. 46 * 47 * Serves as very simple stress test for submission and HW switching 48 * between contexts. 49 */ 50 51 if (!DRIVER_CAPS(i915)->has_logical_contexts) 52 return 0; 53 54 file = mock_file(i915); 55 if (IS_ERR(file)) 56 return PTR_ERR(file); 57 58 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 59 if (!ctx) { 60 err = -ENOMEM; 61 goto out_file; 62 } 63 64 for (n = 0; n < nctx; n++) { 65 ctx[n] = live_context(i915, file); 66 if (IS_ERR(ctx[n])) { 67 err = PTR_ERR(ctx[n]); 68 goto out_file; 69 } 70 } 71 72 for_each_uabi_engine(engine, i915) { 73 struct i915_request *rq = NULL; 74 unsigned long end_time, prime; 75 ktime_t times[2] = {}; 76 77 times[0] = ktime_get_raw(); 78 for (n = 0; n < nctx; n++) { 79 struct i915_request *this; 80 81 this = igt_request_alloc(ctx[n], engine); 82 if (IS_ERR(this)) { 83 err = PTR_ERR(this); 84 goto out_file; 85 } 86 if (rq) { 87 i915_request_await_dma_fence(this, &rq->fence); 88 i915_request_put(rq); 89 } 90 rq = i915_request_get(this); 91 i915_request_add(this); 92 } 93 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 94 pr_err("Failed to populated %d contexts\n", nctx); 95 intel_gt_set_wedged(to_gt(i915)); 96 i915_request_put(rq); 97 err = -EIO; 98 goto out_file; 99 } 100 i915_request_put(rq); 101 102 times[1] = ktime_get_raw(); 103 104 pr_info("Populated %d contexts on %s in %lluns\n", 105 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 106 107 err = igt_live_test_begin(&t, i915, __func__, engine->name); 108 if (err) 109 goto out_file; 110 111 end_time = jiffies + i915_selftest.timeout_jiffies; 112 for_each_prime_number_from(prime, 2, 8192) { 113 times[1] = ktime_get_raw(); 114 115 rq = NULL; 116 for (n = 0; n < prime; n++) { 117 struct i915_request *this; 118 119 this = igt_request_alloc(ctx[n % nctx], engine); 120 if (IS_ERR(this)) { 121 err = PTR_ERR(this); 122 goto out_file; 123 } 124 125 if (rq) { /* Force submission order */ 126 i915_request_await_dma_fence(this, &rq->fence); 127 i915_request_put(rq); 128 } 129 130 /* 131 * This space is left intentionally blank. 132 * 133 * We do not actually want to perform any 134 * action with this request, we just want 135 * to measure the latency in allocation 136 * and submission of our breadcrumbs - 137 * ensuring that the bare request is sufficient 138 * for the system to work (i.e. proper HEAD 139 * tracking of the rings, interrupt handling, 140 * etc). It also gives us the lowest bounds 141 * for latency. 142 */ 143 144 rq = i915_request_get(this); 145 i915_request_add(this); 146 } 147 GEM_BUG_ON(!rq); 148 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 149 pr_err("Switching between %ld contexts timed out\n", 150 prime); 151 intel_gt_set_wedged(to_gt(i915)); 152 i915_request_put(rq); 153 break; 154 } 155 i915_request_put(rq); 156 157 times[1] = ktime_sub(ktime_get_raw(), times[1]); 158 if (prime == 2) 159 times[0] = times[1]; 160 161 if (__igt_timeout(end_time, NULL)) 162 break; 163 } 164 165 err = igt_live_test_end(&t); 166 if (err) 167 goto out_file; 168 169 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 170 engine->name, 171 ktime_to_ns(times[0]), 172 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 173 } 174 175 out_file: 176 fput(file); 177 return err; 178 } 179 180 struct parallel_switch { 181 struct task_struct *tsk; 182 struct intel_context *ce[2]; 183 }; 184 185 static int __live_parallel_switch1(void *data) 186 { 187 struct parallel_switch *arg = data; 188 IGT_TIMEOUT(end_time); 189 unsigned long count; 190 191 count = 0; 192 do { 193 struct i915_request *rq = NULL; 194 int err, n; 195 196 err = 0; 197 for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { 198 struct i915_request *prev = rq; 199 200 rq = i915_request_create(arg->ce[n]); 201 if (IS_ERR(rq)) { 202 i915_request_put(prev); 203 return PTR_ERR(rq); 204 } 205 206 i915_request_get(rq); 207 if (prev) { 208 err = i915_request_await_dma_fence(rq, &prev->fence); 209 i915_request_put(prev); 210 } 211 212 i915_request_add(rq); 213 } 214 if (i915_request_wait(rq, 0, HZ / 5) < 0) 215 err = -ETIME; 216 i915_request_put(rq); 217 if (err) 218 return err; 219 220 count++; 221 } while (!__igt_timeout(end_time, NULL)); 222 223 pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); 224 return 0; 225 } 226 227 static int __live_parallel_switchN(void *data) 228 { 229 struct parallel_switch *arg = data; 230 struct i915_request *rq = NULL; 231 IGT_TIMEOUT(end_time); 232 unsigned long count; 233 int n; 234 235 count = 0; 236 do { 237 for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { 238 struct i915_request *prev = rq; 239 int err = 0; 240 241 rq = i915_request_create(arg->ce[n]); 242 if (IS_ERR(rq)) { 243 i915_request_put(prev); 244 return PTR_ERR(rq); 245 } 246 247 i915_request_get(rq); 248 if (prev) { 249 err = i915_request_await_dma_fence(rq, &prev->fence); 250 i915_request_put(prev); 251 } 252 253 i915_request_add(rq); 254 if (err) { 255 i915_request_put(rq); 256 return err; 257 } 258 } 259 260 count++; 261 } while (!__igt_timeout(end_time, NULL)); 262 i915_request_put(rq); 263 264 pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); 265 return 0; 266 } 267 268 static int live_parallel_switch(void *arg) 269 { 270 struct drm_i915_private *i915 = arg; 271 static int (* const func[])(void *arg) = { 272 __live_parallel_switch1, 273 __live_parallel_switchN, 274 NULL, 275 }; 276 struct parallel_switch *data = NULL; 277 struct i915_gem_engines *engines; 278 struct i915_gem_engines_iter it; 279 int (* const *fn)(void *arg); 280 struct i915_gem_context *ctx; 281 struct intel_context *ce; 282 struct file *file; 283 int n, m, count; 284 int err = 0; 285 286 /* 287 * Check we can process switches on all engines simultaneously. 288 */ 289 290 if (!DRIVER_CAPS(i915)->has_logical_contexts) 291 return 0; 292 293 file = mock_file(i915); 294 if (IS_ERR(file)) 295 return PTR_ERR(file); 296 297 ctx = live_context(i915, file); 298 if (IS_ERR(ctx)) { 299 err = PTR_ERR(ctx); 300 goto out_file; 301 } 302 303 engines = i915_gem_context_lock_engines(ctx); 304 count = engines->num_engines; 305 306 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 307 if (!data) { 308 i915_gem_context_unlock_engines(ctx); 309 err = -ENOMEM; 310 goto out_file; 311 } 312 313 m = 0; /* Use the first context as our template for the engines */ 314 for_each_gem_engine(ce, engines, it) { 315 err = intel_context_pin(ce); 316 if (err) { 317 i915_gem_context_unlock_engines(ctx); 318 goto out; 319 } 320 data[m++].ce[0] = intel_context_get(ce); 321 } 322 i915_gem_context_unlock_engines(ctx); 323 324 /* Clone the same set of engines into the other contexts */ 325 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 326 ctx = live_context(i915, file); 327 if (IS_ERR(ctx)) { 328 err = PTR_ERR(ctx); 329 goto out; 330 } 331 332 for (m = 0; m < count; m++) { 333 if (!data[m].ce[0]) 334 continue; 335 336 ce = intel_context_create(data[m].ce[0]->engine); 337 if (IS_ERR(ce)) 338 goto out; 339 340 err = intel_context_pin(ce); 341 if (err) { 342 intel_context_put(ce); 343 goto out; 344 } 345 346 data[m].ce[n] = ce; 347 } 348 } 349 350 for (fn = func; !err && *fn; fn++) { 351 struct igt_live_test t; 352 int n; 353 354 err = igt_live_test_begin(&t, i915, __func__, ""); 355 if (err) 356 break; 357 358 for (n = 0; n < count; n++) { 359 if (!data[n].ce[0]) 360 continue; 361 362 data[n].tsk = kthread_run(*fn, &data[n], 363 "igt/parallel:%s", 364 data[n].ce[0]->engine->name); 365 if (IS_ERR(data[n].tsk)) { 366 err = PTR_ERR(data[n].tsk); 367 break; 368 } 369 get_task_struct(data[n].tsk); 370 } 371 372 yield(); /* start all threads before we kthread_stop() */ 373 374 for (n = 0; n < count; n++) { 375 int status; 376 377 if (IS_ERR_OR_NULL(data[n].tsk)) 378 continue; 379 380 status = kthread_stop(data[n].tsk); 381 if (status && !err) 382 err = status; 383 384 put_task_struct(data[n].tsk); 385 data[n].tsk = NULL; 386 } 387 388 if (igt_live_test_end(&t)) 389 err = -EIO; 390 } 391 392 out: 393 for (n = 0; n < count; n++) { 394 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 395 if (!data[n].ce[m]) 396 continue; 397 398 intel_context_unpin(data[n].ce[m]); 399 intel_context_put(data[n].ce[m]); 400 } 401 } 402 kfree(data); 403 out_file: 404 fput(file); 405 return err; 406 } 407 408 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 409 { 410 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 411 } 412 413 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 414 { 415 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 416 } 417 418 static int gpu_fill(struct intel_context *ce, 419 struct drm_i915_gem_object *obj, 420 unsigned int dw) 421 { 422 struct i915_vma *vma; 423 int err; 424 425 GEM_BUG_ON(obj->base.size > ce->vm->total); 426 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 427 428 vma = i915_vma_instance(obj, ce->vm, NULL); 429 if (IS_ERR(vma)) 430 return PTR_ERR(vma); 431 432 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 433 if (err) 434 return err; 435 436 /* 437 * Within the GTT the huge objects maps every page onto 438 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 439 * We set the nth dword within the page using the nth 440 * mapping via the GTT - this should exercise the GTT mapping 441 * whilst checking that each context provides a unique view 442 * into the object. 443 */ 444 err = igt_gpu_fill_dw(ce, vma, 445 (dw * real_page_count(obj)) << PAGE_SHIFT | 446 (dw * sizeof(u32)), 447 real_page_count(obj), 448 dw); 449 i915_vma_unpin(vma); 450 451 return err; 452 } 453 454 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 455 { 456 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 457 unsigned int n, m, need_flush; 458 int err; 459 460 i915_gem_object_lock(obj, NULL); 461 err = i915_gem_object_prepare_write(obj, &need_flush); 462 if (err) 463 goto out; 464 465 for (n = 0; n < real_page_count(obj); n++) { 466 u32 *map; 467 468 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 469 for (m = 0; m < DW_PER_PAGE; m++) 470 map[m] = value; 471 if (!has_llc) 472 drm_clflush_virt_range(map, PAGE_SIZE); 473 kunmap_atomic(map); 474 } 475 476 i915_gem_object_finish_access(obj); 477 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 478 obj->write_domain = 0; 479 out: 480 i915_gem_object_unlock(obj); 481 return err; 482 } 483 484 static noinline int cpu_check(struct drm_i915_gem_object *obj, 485 unsigned int idx, unsigned int max) 486 { 487 unsigned int n, m, needs_flush; 488 int err; 489 490 i915_gem_object_lock(obj, NULL); 491 err = i915_gem_object_prepare_read(obj, &needs_flush); 492 if (err) 493 goto out_unlock; 494 495 for (n = 0; n < real_page_count(obj); n++) { 496 u32 *map; 497 498 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 499 if (needs_flush & CLFLUSH_BEFORE) 500 drm_clflush_virt_range(map, PAGE_SIZE); 501 502 for (m = 0; m < max; m++) { 503 if (map[m] != m) { 504 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 505 __builtin_return_address(0), idx, 506 n, real_page_count(obj), m, max, 507 map[m], m); 508 err = -EINVAL; 509 goto out_unmap; 510 } 511 } 512 513 for (; m < DW_PER_PAGE; m++) { 514 if (map[m] != STACK_MAGIC) { 515 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 516 __builtin_return_address(0), idx, n, m, 517 map[m], STACK_MAGIC); 518 err = -EINVAL; 519 goto out_unmap; 520 } 521 } 522 523 out_unmap: 524 kunmap_atomic(map); 525 if (err) 526 break; 527 } 528 529 i915_gem_object_finish_access(obj); 530 out_unlock: 531 i915_gem_object_unlock(obj); 532 return err; 533 } 534 535 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 536 { 537 int err; 538 539 GEM_BUG_ON(obj->base.handle_count); 540 541 /* tie the object to the drm_file for easy reaping */ 542 err = idr_alloc(&to_drm_file(file)->object_idr, 543 &obj->base, 1, 0, GFP_KERNEL); 544 if (err < 0) 545 return err; 546 547 i915_gem_object_get(obj); 548 obj->base.handle_count++; 549 return 0; 550 } 551 552 static struct drm_i915_gem_object * 553 create_test_object(struct i915_address_space *vm, 554 struct file *file, 555 struct list_head *objects) 556 { 557 struct drm_i915_gem_object *obj; 558 u64 size; 559 int err; 560 561 /* Keep in GEM's good graces */ 562 intel_gt_retire_requests(vm->gt); 563 564 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 565 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 566 567 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 568 if (IS_ERR(obj)) 569 return obj; 570 571 err = file_add_object(file, obj); 572 i915_gem_object_put(obj); 573 if (err) 574 return ERR_PTR(err); 575 576 err = cpu_fill(obj, STACK_MAGIC); 577 if (err) { 578 pr_err("Failed to fill object with cpu, err=%d\n", 579 err); 580 return ERR_PTR(err); 581 } 582 583 list_add_tail(&obj->st_link, objects); 584 return obj; 585 } 586 587 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 588 { 589 unsigned long npages = fake_page_count(obj); 590 591 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 592 return npages / DW_PER_PAGE; 593 } 594 595 static void throttle_release(struct i915_request **q, int count) 596 { 597 int i; 598 599 for (i = 0; i < count; i++) { 600 if (IS_ERR_OR_NULL(q[i])) 601 continue; 602 603 i915_request_put(fetch_and_zero(&q[i])); 604 } 605 } 606 607 static int throttle(struct intel_context *ce, 608 struct i915_request **q, int count) 609 { 610 int i; 611 612 if (!IS_ERR_OR_NULL(q[0])) { 613 if (i915_request_wait(q[0], 614 I915_WAIT_INTERRUPTIBLE, 615 MAX_SCHEDULE_TIMEOUT) < 0) 616 return -EINTR; 617 618 i915_request_put(q[0]); 619 } 620 621 for (i = 0; i < count - 1; i++) 622 q[i] = q[i + 1]; 623 624 q[i] = intel_context_create_request(ce); 625 if (IS_ERR(q[i])) 626 return PTR_ERR(q[i]); 627 628 i915_request_get(q[i]); 629 i915_request_add(q[i]); 630 631 return 0; 632 } 633 634 static int igt_ctx_exec(void *arg) 635 { 636 struct drm_i915_private *i915 = arg; 637 struct intel_engine_cs *engine; 638 int err = -ENODEV; 639 640 /* 641 * Create a few different contexts (with different mm) and write 642 * through each ctx/mm using the GPU making sure those writes end 643 * up in the expected pages of our obj. 644 */ 645 646 if (!DRIVER_CAPS(i915)->has_logical_contexts) 647 return 0; 648 649 for_each_uabi_engine(engine, i915) { 650 struct drm_i915_gem_object *obj = NULL; 651 unsigned long ncontexts, ndwords, dw; 652 struct i915_request *tq[5] = {}; 653 struct igt_live_test t; 654 IGT_TIMEOUT(end_time); 655 LIST_HEAD(objects); 656 struct file *file; 657 658 if (!intel_engine_can_store_dword(engine)) 659 continue; 660 661 if (!engine->context_size) 662 continue; /* No logical context support in HW */ 663 664 file = mock_file(i915); 665 if (IS_ERR(file)) 666 return PTR_ERR(file); 667 668 err = igt_live_test_begin(&t, i915, __func__, engine->name); 669 if (err) 670 goto out_file; 671 672 ncontexts = 0; 673 ndwords = 0; 674 dw = 0; 675 while (!time_after(jiffies, end_time)) { 676 struct i915_gem_context *ctx; 677 struct intel_context *ce; 678 679 ctx = kernel_context(i915, NULL); 680 if (IS_ERR(ctx)) { 681 err = PTR_ERR(ctx); 682 goto out_file; 683 } 684 685 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 686 GEM_BUG_ON(IS_ERR(ce)); 687 688 if (!obj) { 689 obj = create_test_object(ce->vm, file, &objects); 690 if (IS_ERR(obj)) { 691 err = PTR_ERR(obj); 692 intel_context_put(ce); 693 kernel_context_close(ctx); 694 goto out_file; 695 } 696 } 697 698 err = gpu_fill(ce, obj, dw); 699 if (err) { 700 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 701 ndwords, dw, max_dwords(obj), 702 engine->name, 703 yesno(i915_gem_context_has_full_ppgtt(ctx)), 704 err); 705 intel_context_put(ce); 706 kernel_context_close(ctx); 707 goto out_file; 708 } 709 710 err = throttle(ce, tq, ARRAY_SIZE(tq)); 711 if (err) { 712 intel_context_put(ce); 713 kernel_context_close(ctx); 714 goto out_file; 715 } 716 717 if (++dw == max_dwords(obj)) { 718 obj = NULL; 719 dw = 0; 720 } 721 722 ndwords++; 723 ncontexts++; 724 725 intel_context_put(ce); 726 kernel_context_close(ctx); 727 } 728 729 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 730 ncontexts, engine->name, ndwords); 731 732 ncontexts = dw = 0; 733 list_for_each_entry(obj, &objects, st_link) { 734 unsigned int rem = 735 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 736 737 err = cpu_check(obj, ncontexts++, rem); 738 if (err) 739 break; 740 741 dw += rem; 742 } 743 744 out_file: 745 throttle_release(tq, ARRAY_SIZE(tq)); 746 if (igt_live_test_end(&t)) 747 err = -EIO; 748 749 fput(file); 750 if (err) 751 return err; 752 753 i915_gem_drain_freed_objects(i915); 754 } 755 756 return 0; 757 } 758 759 static int igt_shared_ctx_exec(void *arg) 760 { 761 struct drm_i915_private *i915 = arg; 762 struct i915_request *tq[5] = {}; 763 struct i915_gem_context *parent; 764 struct intel_engine_cs *engine; 765 struct igt_live_test t; 766 struct file *file; 767 int err = 0; 768 769 /* 770 * Create a few different contexts with the same mm and write 771 * through each ctx using the GPU making sure those writes end 772 * up in the expected pages of our obj. 773 */ 774 if (!DRIVER_CAPS(i915)->has_logical_contexts) 775 return 0; 776 777 file = mock_file(i915); 778 if (IS_ERR(file)) 779 return PTR_ERR(file); 780 781 parent = live_context(i915, file); 782 if (IS_ERR(parent)) { 783 err = PTR_ERR(parent); 784 goto out_file; 785 } 786 787 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 788 err = 0; 789 goto out_file; 790 } 791 792 err = igt_live_test_begin(&t, i915, __func__, ""); 793 if (err) 794 goto out_file; 795 796 for_each_uabi_engine(engine, i915) { 797 unsigned long ncontexts, ndwords, dw; 798 struct drm_i915_gem_object *obj = NULL; 799 IGT_TIMEOUT(end_time); 800 LIST_HEAD(objects); 801 802 if (!intel_engine_can_store_dword(engine)) 803 continue; 804 805 dw = 0; 806 ndwords = 0; 807 ncontexts = 0; 808 while (!time_after(jiffies, end_time)) { 809 struct i915_gem_context *ctx; 810 struct intel_context *ce; 811 812 ctx = kernel_context(i915, parent->vm); 813 if (IS_ERR(ctx)) { 814 err = PTR_ERR(ctx); 815 goto out_test; 816 } 817 818 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 819 GEM_BUG_ON(IS_ERR(ce)); 820 821 if (!obj) { 822 obj = create_test_object(parent->vm, 823 file, &objects); 824 if (IS_ERR(obj)) { 825 err = PTR_ERR(obj); 826 intel_context_put(ce); 827 kernel_context_close(ctx); 828 goto out_test; 829 } 830 } 831 832 err = gpu_fill(ce, obj, dw); 833 if (err) { 834 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 835 ndwords, dw, max_dwords(obj), 836 engine->name, 837 yesno(i915_gem_context_has_full_ppgtt(ctx)), 838 err); 839 intel_context_put(ce); 840 kernel_context_close(ctx); 841 goto out_test; 842 } 843 844 err = throttle(ce, tq, ARRAY_SIZE(tq)); 845 if (err) { 846 intel_context_put(ce); 847 kernel_context_close(ctx); 848 goto out_test; 849 } 850 851 if (++dw == max_dwords(obj)) { 852 obj = NULL; 853 dw = 0; 854 } 855 856 ndwords++; 857 ncontexts++; 858 859 intel_context_put(ce); 860 kernel_context_close(ctx); 861 } 862 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 863 ncontexts, engine->name, ndwords); 864 865 ncontexts = dw = 0; 866 list_for_each_entry(obj, &objects, st_link) { 867 unsigned int rem = 868 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 869 870 err = cpu_check(obj, ncontexts++, rem); 871 if (err) 872 goto out_test; 873 874 dw += rem; 875 } 876 877 i915_gem_drain_freed_objects(i915); 878 } 879 out_test: 880 throttle_release(tq, ARRAY_SIZE(tq)); 881 if (igt_live_test_end(&t)) 882 err = -EIO; 883 out_file: 884 fput(file); 885 return err; 886 } 887 888 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, 889 struct i915_vma *vma, 890 struct intel_engine_cs *engine) 891 { 892 u32 *cmd; 893 894 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 895 896 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 897 if (IS_ERR(cmd)) 898 return PTR_ERR(cmd); 899 900 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 901 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); 902 *cmd++ = lower_32_bits(vma->node.start); 903 *cmd++ = upper_32_bits(vma->node.start); 904 *cmd = MI_BATCH_BUFFER_END; 905 906 __i915_gem_object_flush_map(rpcs, 0, 64); 907 i915_gem_object_unpin_map(rpcs); 908 909 intel_gt_chipset_flush(vma->vm->gt); 910 911 return 0; 912 } 913 914 static int 915 emit_rpcs_query(struct drm_i915_gem_object *obj, 916 struct intel_context *ce, 917 struct i915_request **rq_out) 918 { 919 struct drm_i915_private *i915 = to_i915(obj->base.dev); 920 struct i915_request *rq; 921 struct i915_gem_ww_ctx ww; 922 struct i915_vma *batch; 923 struct i915_vma *vma; 924 struct drm_i915_gem_object *rpcs; 925 int err; 926 927 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 928 929 if (GRAPHICS_VER(i915) < 8) 930 return -EINVAL; 931 932 vma = i915_vma_instance(obj, ce->vm, NULL); 933 if (IS_ERR(vma)) 934 return PTR_ERR(vma); 935 936 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 937 if (IS_ERR(rpcs)) 938 return PTR_ERR(rpcs); 939 940 batch = i915_vma_instance(rpcs, ce->vm, NULL); 941 if (IS_ERR(batch)) { 942 err = PTR_ERR(batch); 943 goto err_put; 944 } 945 946 i915_gem_ww_ctx_init(&ww, false); 947 retry: 948 err = i915_gem_object_lock(obj, &ww); 949 if (!err) 950 err = i915_gem_object_lock(rpcs, &ww); 951 if (!err) 952 err = i915_gem_object_set_to_gtt_domain(obj, false); 953 if (!err) 954 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 955 if (err) 956 goto err_put; 957 958 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 959 if (err) 960 goto err_vma; 961 962 err = rpcs_query_batch(rpcs, vma, ce->engine); 963 if (err) 964 goto err_batch; 965 966 rq = i915_request_create(ce); 967 if (IS_ERR(rq)) { 968 err = PTR_ERR(rq); 969 goto err_batch; 970 } 971 972 err = i915_request_await_object(rq, batch->obj, false); 973 if (err == 0) 974 err = i915_vma_move_to_active(batch, rq, 0); 975 if (err) 976 goto skip_request; 977 978 err = i915_request_await_object(rq, vma->obj, true); 979 if (err == 0) 980 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 981 if (err) 982 goto skip_request; 983 984 if (rq->engine->emit_init_breadcrumb) { 985 err = rq->engine->emit_init_breadcrumb(rq); 986 if (err) 987 goto skip_request; 988 } 989 990 err = rq->engine->emit_bb_start(rq, 991 batch->node.start, batch->node.size, 992 0); 993 if (err) 994 goto skip_request; 995 996 *rq_out = i915_request_get(rq); 997 998 skip_request: 999 if (err) 1000 i915_request_set_error_once(rq, err); 1001 i915_request_add(rq); 1002 err_batch: 1003 i915_vma_unpin(batch); 1004 err_vma: 1005 i915_vma_unpin(vma); 1006 err_put: 1007 if (err == -EDEADLK) { 1008 err = i915_gem_ww_ctx_backoff(&ww); 1009 if (!err) 1010 goto retry; 1011 } 1012 i915_gem_ww_ctx_fini(&ww); 1013 i915_gem_object_put(rpcs); 1014 return err; 1015 } 1016 1017 #define TEST_IDLE BIT(0) 1018 #define TEST_BUSY BIT(1) 1019 #define TEST_RESET BIT(2) 1020 1021 static int 1022 __sseu_prepare(const char *name, 1023 unsigned int flags, 1024 struct intel_context *ce, 1025 struct igt_spinner **spin) 1026 { 1027 struct i915_request *rq; 1028 int ret; 1029 1030 *spin = NULL; 1031 if (!(flags & (TEST_BUSY | TEST_RESET))) 1032 return 0; 1033 1034 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1035 if (!*spin) 1036 return -ENOMEM; 1037 1038 ret = igt_spinner_init(*spin, ce->engine->gt); 1039 if (ret) 1040 goto err_free; 1041 1042 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1043 if (IS_ERR(rq)) { 1044 ret = PTR_ERR(rq); 1045 goto err_fini; 1046 } 1047 1048 i915_request_add(rq); 1049 1050 if (!igt_wait_for_spinner(*spin, rq)) { 1051 pr_err("%s: Spinner failed to start!\n", name); 1052 ret = -ETIMEDOUT; 1053 goto err_end; 1054 } 1055 1056 return 0; 1057 1058 err_end: 1059 igt_spinner_end(*spin); 1060 err_fini: 1061 igt_spinner_fini(*spin); 1062 err_free: 1063 kfree(fetch_and_zero(spin)); 1064 return ret; 1065 } 1066 1067 static int 1068 __read_slice_count(struct intel_context *ce, 1069 struct drm_i915_gem_object *obj, 1070 struct igt_spinner *spin, 1071 u32 *rpcs) 1072 { 1073 struct i915_request *rq = NULL; 1074 u32 s_mask, s_shift; 1075 unsigned int cnt; 1076 u32 *buf, val; 1077 long ret; 1078 1079 ret = emit_rpcs_query(obj, ce, &rq); 1080 if (ret) 1081 return ret; 1082 1083 if (spin) 1084 igt_spinner_end(spin); 1085 1086 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1087 i915_request_put(rq); 1088 if (ret < 0) 1089 return ret; 1090 1091 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1092 if (IS_ERR(buf)) { 1093 ret = PTR_ERR(buf); 1094 return ret; 1095 } 1096 1097 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1098 s_mask = GEN11_RPCS_S_CNT_MASK; 1099 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1100 } else { 1101 s_mask = GEN8_RPCS_S_CNT_MASK; 1102 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1103 } 1104 1105 val = *buf; 1106 cnt = (val & s_mask) >> s_shift; 1107 *rpcs = val; 1108 1109 i915_gem_object_unpin_map(obj); 1110 1111 return cnt; 1112 } 1113 1114 static int 1115 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1116 const char *prefix, const char *suffix) 1117 { 1118 if (slices == expected) 1119 return 0; 1120 1121 if (slices < 0) { 1122 pr_err("%s: %s read slice count failed with %d%s\n", 1123 name, prefix, slices, suffix); 1124 return slices; 1125 } 1126 1127 pr_err("%s: %s slice count %d is not %u%s\n", 1128 name, prefix, slices, expected, suffix); 1129 1130 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1131 rpcs, slices, 1132 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1133 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1134 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1135 1136 return -EINVAL; 1137 } 1138 1139 static int 1140 __sseu_finish(const char *name, 1141 unsigned int flags, 1142 struct intel_context *ce, 1143 struct drm_i915_gem_object *obj, 1144 unsigned int expected, 1145 struct igt_spinner *spin) 1146 { 1147 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1148 u32 rpcs = 0; 1149 int ret = 0; 1150 1151 if (flags & TEST_RESET) { 1152 ret = intel_engine_reset(ce->engine, "sseu"); 1153 if (ret) 1154 goto out; 1155 } 1156 1157 ret = __read_slice_count(ce, obj, 1158 flags & TEST_RESET ? NULL : spin, &rpcs); 1159 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1160 if (ret) 1161 goto out; 1162 1163 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1164 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1165 1166 out: 1167 if (spin) 1168 igt_spinner_end(spin); 1169 1170 if ((flags & TEST_IDLE) && ret == 0) { 1171 ret = igt_flush_test(ce->engine->i915); 1172 if (ret) 1173 return ret; 1174 1175 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1176 ret = __check_rpcs(name, rpcs, ret, expected, 1177 "Context", " after idle!"); 1178 } 1179 1180 return ret; 1181 } 1182 1183 static int 1184 __sseu_test(const char *name, 1185 unsigned int flags, 1186 struct intel_context *ce, 1187 struct drm_i915_gem_object *obj, 1188 struct intel_sseu sseu) 1189 { 1190 struct igt_spinner *spin = NULL; 1191 int ret; 1192 1193 intel_engine_pm_get(ce->engine); 1194 1195 ret = __sseu_prepare(name, flags, ce, &spin); 1196 if (ret) 1197 goto out_pm; 1198 1199 ret = intel_context_reconfigure_sseu(ce, sseu); 1200 if (ret) 1201 goto out_spin; 1202 1203 ret = __sseu_finish(name, flags, ce, obj, 1204 hweight32(sseu.slice_mask), spin); 1205 1206 out_spin: 1207 if (spin) { 1208 igt_spinner_end(spin); 1209 igt_spinner_fini(spin); 1210 kfree(spin); 1211 } 1212 out_pm: 1213 intel_engine_pm_put(ce->engine); 1214 return ret; 1215 } 1216 1217 static int 1218 __igt_ctx_sseu(struct drm_i915_private *i915, 1219 const char *name, 1220 unsigned int flags) 1221 { 1222 struct drm_i915_gem_object *obj; 1223 int inst = 0; 1224 int ret = 0; 1225 1226 if (GRAPHICS_VER(i915) < 9) 1227 return 0; 1228 1229 if (flags & TEST_RESET) 1230 igt_global_reset_lock(to_gt(i915)); 1231 1232 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1233 if (IS_ERR(obj)) { 1234 ret = PTR_ERR(obj); 1235 goto out_unlock; 1236 } 1237 1238 do { 1239 struct intel_engine_cs *engine; 1240 struct intel_context *ce; 1241 struct intel_sseu pg_sseu; 1242 1243 engine = intel_engine_lookup_user(i915, 1244 I915_ENGINE_CLASS_RENDER, 1245 inst++); 1246 if (!engine) 1247 break; 1248 1249 if (hweight32(engine->sseu.slice_mask) < 2) 1250 continue; 1251 1252 if (!engine->gt->info.sseu.has_slice_pg) 1253 continue; 1254 1255 /* 1256 * Gen11 VME friendly power-gated configuration with 1257 * half enabled sub-slices. 1258 */ 1259 pg_sseu = engine->sseu; 1260 pg_sseu.slice_mask = 1; 1261 pg_sseu.subslice_mask = 1262 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1263 1264 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1265 engine->name, name, flags, 1266 hweight32(engine->sseu.slice_mask), 1267 hweight32(pg_sseu.slice_mask)); 1268 1269 ce = intel_context_create(engine); 1270 if (IS_ERR(ce)) { 1271 ret = PTR_ERR(ce); 1272 goto out_put; 1273 } 1274 1275 ret = intel_context_pin(ce); 1276 if (ret) 1277 goto out_ce; 1278 1279 /* First set the default mask. */ 1280 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1281 if (ret) 1282 goto out_unpin; 1283 1284 /* Then set a power-gated configuration. */ 1285 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1286 if (ret) 1287 goto out_unpin; 1288 1289 /* Back to defaults. */ 1290 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1291 if (ret) 1292 goto out_unpin; 1293 1294 /* One last power-gated configuration for the road. */ 1295 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1296 if (ret) 1297 goto out_unpin; 1298 1299 out_unpin: 1300 intel_context_unpin(ce); 1301 out_ce: 1302 intel_context_put(ce); 1303 } while (!ret); 1304 1305 if (igt_flush_test(i915)) 1306 ret = -EIO; 1307 1308 out_put: 1309 i915_gem_object_put(obj); 1310 1311 out_unlock: 1312 if (flags & TEST_RESET) 1313 igt_global_reset_unlock(to_gt(i915)); 1314 1315 if (ret) 1316 pr_err("%s: Failed with %d!\n", name, ret); 1317 1318 return ret; 1319 } 1320 1321 static int igt_ctx_sseu(void *arg) 1322 { 1323 struct { 1324 const char *name; 1325 unsigned int flags; 1326 } *phase, phases[] = { 1327 { .name = "basic", .flags = 0 }, 1328 { .name = "idle", .flags = TEST_IDLE }, 1329 { .name = "busy", .flags = TEST_BUSY }, 1330 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1331 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1332 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1333 }; 1334 unsigned int i; 1335 int ret = 0; 1336 1337 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1338 i++, phase++) 1339 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1340 1341 return ret; 1342 } 1343 1344 static int igt_ctx_readonly(void *arg) 1345 { 1346 struct drm_i915_private *i915 = arg; 1347 unsigned long idx, ndwords, dw, num_engines; 1348 struct drm_i915_gem_object *obj = NULL; 1349 struct i915_request *tq[5] = {}; 1350 struct i915_gem_engines_iter it; 1351 struct i915_address_space *vm; 1352 struct i915_gem_context *ctx; 1353 struct intel_context *ce; 1354 struct igt_live_test t; 1355 I915_RND_STATE(prng); 1356 IGT_TIMEOUT(end_time); 1357 LIST_HEAD(objects); 1358 struct file *file; 1359 int err = -ENODEV; 1360 1361 /* 1362 * Create a few read-only objects (with the occasional writable object) 1363 * and try to write into these object checking that the GPU discards 1364 * any write to a read-only object. 1365 */ 1366 1367 file = mock_file(i915); 1368 if (IS_ERR(file)) 1369 return PTR_ERR(file); 1370 1371 err = igt_live_test_begin(&t, i915, __func__, ""); 1372 if (err) 1373 goto out_file; 1374 1375 ctx = live_context(i915, file); 1376 if (IS_ERR(ctx)) { 1377 err = PTR_ERR(ctx); 1378 goto out_file; 1379 } 1380 1381 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm; 1382 if (!vm || !vm->has_read_only) { 1383 err = 0; 1384 goto out_file; 1385 } 1386 1387 num_engines = 0; 1388 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1389 if (intel_engine_can_store_dword(ce->engine)) 1390 num_engines++; 1391 i915_gem_context_unlock_engines(ctx); 1392 1393 ndwords = 0; 1394 dw = 0; 1395 while (!time_after(jiffies, end_time)) { 1396 for_each_gem_engine(ce, 1397 i915_gem_context_lock_engines(ctx), it) { 1398 if (!intel_engine_can_store_dword(ce->engine)) 1399 continue; 1400 1401 if (!obj) { 1402 obj = create_test_object(ce->vm, file, &objects); 1403 if (IS_ERR(obj)) { 1404 err = PTR_ERR(obj); 1405 i915_gem_context_unlock_engines(ctx); 1406 goto out_file; 1407 } 1408 1409 if (prandom_u32_state(&prng) & 1) 1410 i915_gem_object_set_readonly(obj); 1411 } 1412 1413 err = gpu_fill(ce, obj, dw); 1414 if (err) { 1415 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1416 ndwords, dw, max_dwords(obj), 1417 ce->engine->name, 1418 yesno(i915_gem_context_has_full_ppgtt(ctx)), 1419 err); 1420 i915_gem_context_unlock_engines(ctx); 1421 goto out_file; 1422 } 1423 1424 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1425 if (err) { 1426 i915_gem_context_unlock_engines(ctx); 1427 goto out_file; 1428 } 1429 1430 if (++dw == max_dwords(obj)) { 1431 obj = NULL; 1432 dw = 0; 1433 } 1434 ndwords++; 1435 } 1436 i915_gem_context_unlock_engines(ctx); 1437 } 1438 pr_info("Submitted %lu dwords (across %lu engines)\n", 1439 ndwords, num_engines); 1440 1441 dw = 0; 1442 idx = 0; 1443 list_for_each_entry(obj, &objects, st_link) { 1444 unsigned int rem = 1445 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1446 unsigned int num_writes; 1447 1448 num_writes = rem; 1449 if (i915_gem_object_is_readonly(obj)) 1450 num_writes = 0; 1451 1452 err = cpu_check(obj, idx++, num_writes); 1453 if (err) 1454 break; 1455 1456 dw += rem; 1457 } 1458 1459 out_file: 1460 throttle_release(tq, ARRAY_SIZE(tq)); 1461 if (igt_live_test_end(&t)) 1462 err = -EIO; 1463 1464 fput(file); 1465 return err; 1466 } 1467 1468 static int check_scratch(struct i915_address_space *vm, u64 offset) 1469 { 1470 struct drm_mm_node *node; 1471 1472 mutex_lock(&vm->mutex); 1473 node = __drm_mm_interval_first(&vm->mm, 1474 offset, offset + sizeof(u32) - 1); 1475 mutex_unlock(&vm->mutex); 1476 if (!node || node->start > offset) 1477 return 0; 1478 1479 GEM_BUG_ON(offset >= node->start + node->size); 1480 1481 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1482 upper_32_bits(offset), lower_32_bits(offset)); 1483 return -EINVAL; 1484 } 1485 1486 static int write_to_scratch(struct i915_gem_context *ctx, 1487 struct intel_engine_cs *engine, 1488 struct drm_i915_gem_object *obj, 1489 u64 offset, u32 value) 1490 { 1491 struct drm_i915_private *i915 = ctx->i915; 1492 struct i915_address_space *vm; 1493 struct i915_request *rq; 1494 struct i915_vma *vma; 1495 u32 *cmd; 1496 int err; 1497 1498 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1499 1500 err = check_scratch(ctx->vm, offset); 1501 if (err) 1502 return err; 1503 1504 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1505 if (IS_ERR(cmd)) 1506 return PTR_ERR(cmd); 1507 1508 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1509 if (GRAPHICS_VER(i915) >= 8) { 1510 *cmd++ = lower_32_bits(offset); 1511 *cmd++ = upper_32_bits(offset); 1512 } else { 1513 *cmd++ = 0; 1514 *cmd++ = offset; 1515 } 1516 *cmd++ = value; 1517 *cmd = MI_BATCH_BUFFER_END; 1518 __i915_gem_object_flush_map(obj, 0, 64); 1519 i915_gem_object_unpin_map(obj); 1520 1521 intel_gt_chipset_flush(engine->gt); 1522 1523 vm = i915_gem_context_get_eb_vm(ctx); 1524 vma = i915_vma_instance(obj, vm, NULL); 1525 if (IS_ERR(vma)) { 1526 err = PTR_ERR(vma); 1527 goto out_vm; 1528 } 1529 1530 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1531 if (err) 1532 goto out_vm; 1533 1534 rq = igt_request_alloc(ctx, engine); 1535 if (IS_ERR(rq)) { 1536 err = PTR_ERR(rq); 1537 goto err_unpin; 1538 } 1539 1540 i915_vma_lock(vma); 1541 err = i915_request_await_object(rq, vma->obj, false); 1542 if (err == 0) 1543 err = i915_vma_move_to_active(vma, rq, 0); 1544 i915_vma_unlock(vma); 1545 if (err) 1546 goto skip_request; 1547 1548 if (rq->engine->emit_init_breadcrumb) { 1549 err = rq->engine->emit_init_breadcrumb(rq); 1550 if (err) 1551 goto skip_request; 1552 } 1553 1554 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1555 if (err) 1556 goto skip_request; 1557 1558 i915_vma_unpin(vma); 1559 1560 i915_request_add(rq); 1561 1562 goto out_vm; 1563 skip_request: 1564 i915_request_set_error_once(rq, err); 1565 i915_request_add(rq); 1566 err_unpin: 1567 i915_vma_unpin(vma); 1568 out_vm: 1569 i915_vm_put(vm); 1570 1571 if (!err) 1572 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1573 1574 return err; 1575 } 1576 1577 static int read_from_scratch(struct i915_gem_context *ctx, 1578 struct intel_engine_cs *engine, 1579 struct drm_i915_gem_object *obj, 1580 u64 offset, u32 *value) 1581 { 1582 struct drm_i915_private *i915 = ctx->i915; 1583 struct i915_address_space *vm; 1584 const u32 result = 0x100; 1585 struct i915_request *rq; 1586 struct i915_vma *vma; 1587 unsigned int flags; 1588 u32 *cmd; 1589 int err; 1590 1591 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1592 1593 err = check_scratch(ctx->vm, offset); 1594 if (err) 1595 return err; 1596 1597 if (GRAPHICS_VER(i915) >= 8) { 1598 const u32 GPR0 = engine->mmio_base + 0x600; 1599 1600 vm = i915_gem_context_get_eb_vm(ctx); 1601 vma = i915_vma_instance(obj, vm, NULL); 1602 if (IS_ERR(vma)) { 1603 err = PTR_ERR(vma); 1604 goto out_vm; 1605 } 1606 1607 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1608 if (err) 1609 goto out_vm; 1610 1611 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1612 if (IS_ERR(cmd)) { 1613 err = PTR_ERR(cmd); 1614 goto err_unpin; 1615 } 1616 1617 memset(cmd, POISON_INUSE, PAGE_SIZE); 1618 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1619 *cmd++ = GPR0; 1620 *cmd++ = lower_32_bits(offset); 1621 *cmd++ = upper_32_bits(offset); 1622 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1623 *cmd++ = GPR0; 1624 *cmd++ = result; 1625 *cmd++ = 0; 1626 *cmd = MI_BATCH_BUFFER_END; 1627 1628 i915_gem_object_flush_map(obj); 1629 i915_gem_object_unpin_map(obj); 1630 1631 flags = 0; 1632 } else { 1633 const u32 reg = engine->mmio_base + 0x420; 1634 1635 /* hsw: register access even to 3DPRIM! is protected */ 1636 vm = i915_vm_get(&engine->gt->ggtt->vm); 1637 vma = i915_vma_instance(obj, vm, NULL); 1638 if (IS_ERR(vma)) { 1639 err = PTR_ERR(vma); 1640 goto out_vm; 1641 } 1642 1643 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1644 if (err) 1645 goto out_vm; 1646 1647 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1648 if (IS_ERR(cmd)) { 1649 err = PTR_ERR(cmd); 1650 goto err_unpin; 1651 } 1652 1653 memset(cmd, POISON_INUSE, PAGE_SIZE); 1654 *cmd++ = MI_LOAD_REGISTER_MEM; 1655 *cmd++ = reg; 1656 *cmd++ = offset; 1657 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1658 *cmd++ = reg; 1659 *cmd++ = vma->node.start + result; 1660 *cmd = MI_BATCH_BUFFER_END; 1661 1662 i915_gem_object_flush_map(obj); 1663 i915_gem_object_unpin_map(obj); 1664 1665 flags = I915_DISPATCH_SECURE; 1666 } 1667 1668 intel_gt_chipset_flush(engine->gt); 1669 1670 rq = igt_request_alloc(ctx, engine); 1671 if (IS_ERR(rq)) { 1672 err = PTR_ERR(rq); 1673 goto err_unpin; 1674 } 1675 1676 i915_vma_lock(vma); 1677 err = i915_request_await_object(rq, vma->obj, true); 1678 if (err == 0) 1679 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1680 i915_vma_unlock(vma); 1681 if (err) 1682 goto skip_request; 1683 1684 if (rq->engine->emit_init_breadcrumb) { 1685 err = rq->engine->emit_init_breadcrumb(rq); 1686 if (err) 1687 goto skip_request; 1688 } 1689 1690 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); 1691 if (err) 1692 goto skip_request; 1693 1694 i915_vma_unpin(vma); 1695 1696 i915_request_add(rq); 1697 1698 i915_gem_object_lock(obj, NULL); 1699 err = i915_gem_object_set_to_cpu_domain(obj, false); 1700 i915_gem_object_unlock(obj); 1701 if (err) 1702 goto out_vm; 1703 1704 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1705 if (IS_ERR(cmd)) { 1706 err = PTR_ERR(cmd); 1707 goto out_vm; 1708 } 1709 1710 *value = cmd[result / sizeof(*cmd)]; 1711 i915_gem_object_unpin_map(obj); 1712 1713 goto out_vm; 1714 skip_request: 1715 i915_request_set_error_once(rq, err); 1716 i915_request_add(rq); 1717 err_unpin: 1718 i915_vma_unpin(vma); 1719 out_vm: 1720 i915_vm_put(vm); 1721 1722 if (!err) 1723 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1724 1725 return err; 1726 } 1727 1728 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1729 { 1730 struct i915_address_space *vm; 1731 u32 *vaddr; 1732 int err = 0; 1733 1734 vm = ctx->vm; 1735 if (!vm) 1736 return -ENODEV; 1737 1738 if (!vm->scratch[0]) { 1739 pr_err("No scratch page!\n"); 1740 return -EINVAL; 1741 } 1742 1743 vaddr = __px_vaddr(vm->scratch[0]); 1744 1745 memcpy(out, vaddr, sizeof(*out)); 1746 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1747 pr_err("Inconsistent initial state of scratch page!\n"); 1748 err = -EINVAL; 1749 } 1750 1751 return err; 1752 } 1753 1754 static int igt_vm_isolation(void *arg) 1755 { 1756 struct drm_i915_private *i915 = arg; 1757 struct i915_gem_context *ctx_a, *ctx_b; 1758 struct drm_i915_gem_object *obj_a, *obj_b; 1759 unsigned long num_engines, count; 1760 struct intel_engine_cs *engine; 1761 struct igt_live_test t; 1762 I915_RND_STATE(prng); 1763 struct file *file; 1764 u64 vm_total; 1765 u32 expected; 1766 int err; 1767 1768 if (GRAPHICS_VER(i915) < 7) 1769 return 0; 1770 1771 /* 1772 * The simple goal here is that a write into one context is not 1773 * observed in a second (separate page tables and scratch). 1774 */ 1775 1776 file = mock_file(i915); 1777 if (IS_ERR(file)) 1778 return PTR_ERR(file); 1779 1780 err = igt_live_test_begin(&t, i915, __func__, ""); 1781 if (err) 1782 goto out_file; 1783 1784 ctx_a = live_context(i915, file); 1785 if (IS_ERR(ctx_a)) { 1786 err = PTR_ERR(ctx_a); 1787 goto out_file; 1788 } 1789 1790 ctx_b = live_context(i915, file); 1791 if (IS_ERR(ctx_b)) { 1792 err = PTR_ERR(ctx_b); 1793 goto out_file; 1794 } 1795 1796 /* We can only test vm isolation, if the vm are distinct */ 1797 if (ctx_a->vm == ctx_b->vm) 1798 goto out_file; 1799 1800 /* Read the initial state of the scratch page */ 1801 err = check_scratch_page(ctx_a, &expected); 1802 if (err) 1803 goto out_file; 1804 1805 err = check_scratch_page(ctx_b, &expected); 1806 if (err) 1807 goto out_file; 1808 1809 vm_total = ctx_a->vm->total; 1810 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1811 1812 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1813 if (IS_ERR(obj_a)) { 1814 err = PTR_ERR(obj_a); 1815 goto out_file; 1816 } 1817 1818 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1819 if (IS_ERR(obj_b)) { 1820 err = PTR_ERR(obj_b); 1821 goto put_a; 1822 } 1823 1824 count = 0; 1825 num_engines = 0; 1826 for_each_uabi_engine(engine, i915) { 1827 IGT_TIMEOUT(end_time); 1828 unsigned long this = 0; 1829 1830 if (!intel_engine_can_store_dword(engine)) 1831 continue; 1832 1833 /* Not all engines have their own GPR! */ 1834 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1835 continue; 1836 1837 while (!__igt_timeout(end_time, NULL)) { 1838 u32 value = 0xc5c5c5c5; 1839 u64 offset; 1840 1841 /* Leave enough space at offset 0 for the batch */ 1842 offset = igt_random_offset(&prng, 1843 I915_GTT_PAGE_SIZE, vm_total, 1844 sizeof(u32), alignof_dword); 1845 1846 err = write_to_scratch(ctx_a, engine, obj_a, 1847 offset, 0xdeadbeef); 1848 if (err == 0) 1849 err = read_from_scratch(ctx_b, engine, obj_b, 1850 offset, &value); 1851 if (err) 1852 goto put_b; 1853 1854 if (value != expected) { 1855 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1856 engine->name, value, 1857 upper_32_bits(offset), 1858 lower_32_bits(offset), 1859 this); 1860 err = -EINVAL; 1861 goto put_b; 1862 } 1863 1864 this++; 1865 } 1866 count += this; 1867 num_engines++; 1868 } 1869 pr_info("Checked %lu scratch offsets across %lu engines\n", 1870 count, num_engines); 1871 1872 put_b: 1873 i915_gem_object_put(obj_b); 1874 put_a: 1875 i915_gem_object_put(obj_a); 1876 out_file: 1877 if (igt_live_test_end(&t)) 1878 err = -EIO; 1879 fput(file); 1880 return err; 1881 } 1882 1883 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1884 { 1885 static const struct i915_subtest tests[] = { 1886 SUBTEST(live_nop_switch), 1887 SUBTEST(live_parallel_switch), 1888 SUBTEST(igt_ctx_exec), 1889 SUBTEST(igt_ctx_readonly), 1890 SUBTEST(igt_ctx_sseu), 1891 SUBTEST(igt_shared_ctx_exec), 1892 SUBTEST(igt_vm_isolation), 1893 }; 1894 1895 if (intel_gt_is_wedged(to_gt(i915))) 1896 return 0; 1897 1898 return i915_live_subtests(tests, i915); 1899 } 1900