1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 #include <linux/string_helpers.h> 9 10 #include "gem/i915_gem_internal.h" 11 #include "gem/i915_gem_pm.h" 12 #include "gt/intel_engine_pm.h" 13 #include "gt/intel_engine_regs.h" 14 #include "gt/intel_gt.h" 15 #include "gt/intel_gt_requests.h" 16 #include "gt/intel_reset.h" 17 #include "i915_selftest.h" 18 19 #include "gem/selftests/igt_gem_utils.h" 20 #include "selftests/i915_random.h" 21 #include "selftests/igt_flush_test.h" 22 #include "selftests/igt_live_test.h" 23 #include "selftests/igt_reset.h" 24 #include "selftests/igt_spinner.h" 25 #include "selftests/mock_drm.h" 26 #include "selftests/mock_gem_device.h" 27 28 #include "huge_gem_object.h" 29 #include "igt_gem_utils.h" 30 31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 32 33 static int live_nop_switch(void *arg) 34 { 35 const unsigned int nctx = 1024; 36 struct drm_i915_private *i915 = arg; 37 struct intel_engine_cs *engine; 38 struct i915_gem_context **ctx; 39 struct igt_live_test t; 40 struct file *file; 41 unsigned long n; 42 int err = -ENODEV; 43 44 /* 45 * Create as many contexts as we can feasibly get away with 46 * and check we can switch between them rapidly. 47 * 48 * Serves as very simple stress test for submission and HW switching 49 * between contexts. 50 */ 51 52 if (!DRIVER_CAPS(i915)->has_logical_contexts) 53 return 0; 54 55 file = mock_file(i915); 56 if (IS_ERR(file)) 57 return PTR_ERR(file); 58 59 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 60 if (!ctx) { 61 err = -ENOMEM; 62 goto out_file; 63 } 64 65 for (n = 0; n < nctx; n++) { 66 ctx[n] = live_context(i915, file); 67 if (IS_ERR(ctx[n])) { 68 err = PTR_ERR(ctx[n]); 69 goto out_file; 70 } 71 } 72 73 for_each_uabi_engine(engine, i915) { 74 struct i915_request *rq = NULL; 75 unsigned long end_time, prime; 76 ktime_t times[2] = {}; 77 78 times[0] = ktime_get_raw(); 79 for (n = 0; n < nctx; n++) { 80 struct i915_request *this; 81 82 this = igt_request_alloc(ctx[n], engine); 83 if (IS_ERR(this)) { 84 err = PTR_ERR(this); 85 goto out_file; 86 } 87 if (rq) { 88 i915_request_await_dma_fence(this, &rq->fence); 89 i915_request_put(rq); 90 } 91 rq = i915_request_get(this); 92 i915_request_add(this); 93 } 94 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 95 pr_err("Failed to populated %d contexts\n", nctx); 96 intel_gt_set_wedged(to_gt(i915)); 97 i915_request_put(rq); 98 err = -EIO; 99 goto out_file; 100 } 101 i915_request_put(rq); 102 103 times[1] = ktime_get_raw(); 104 105 pr_info("Populated %d contexts on %s in %lluns\n", 106 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 107 108 err = igt_live_test_begin(&t, i915, __func__, engine->name); 109 if (err) 110 goto out_file; 111 112 end_time = jiffies + i915_selftest.timeout_jiffies; 113 for_each_prime_number_from(prime, 2, 8192) { 114 times[1] = ktime_get_raw(); 115 116 rq = NULL; 117 for (n = 0; n < prime; n++) { 118 struct i915_request *this; 119 120 this = igt_request_alloc(ctx[n % nctx], engine); 121 if (IS_ERR(this)) { 122 err = PTR_ERR(this); 123 goto out_file; 124 } 125 126 if (rq) { /* Force submission order */ 127 i915_request_await_dma_fence(this, &rq->fence); 128 i915_request_put(rq); 129 } 130 131 /* 132 * This space is left intentionally blank. 133 * 134 * We do not actually want to perform any 135 * action with this request, we just want 136 * to measure the latency in allocation 137 * and submission of our breadcrumbs - 138 * ensuring that the bare request is sufficient 139 * for the system to work (i.e. proper HEAD 140 * tracking of the rings, interrupt handling, 141 * etc). It also gives us the lowest bounds 142 * for latency. 143 */ 144 145 rq = i915_request_get(this); 146 i915_request_add(this); 147 } 148 GEM_BUG_ON(!rq); 149 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 150 pr_err("Switching between %ld contexts timed out\n", 151 prime); 152 intel_gt_set_wedged(to_gt(i915)); 153 i915_request_put(rq); 154 break; 155 } 156 i915_request_put(rq); 157 158 times[1] = ktime_sub(ktime_get_raw(), times[1]); 159 if (prime == 2) 160 times[0] = times[1]; 161 162 if (__igt_timeout(end_time, NULL)) 163 break; 164 } 165 166 err = igt_live_test_end(&t); 167 if (err) 168 goto out_file; 169 170 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 171 engine->name, 172 ktime_to_ns(times[0]), 173 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 174 } 175 176 out_file: 177 fput(file); 178 return err; 179 } 180 181 struct parallel_switch { 182 struct kthread_worker *worker; 183 struct kthread_work work; 184 struct intel_context *ce[2]; 185 int result; 186 }; 187 188 static void __live_parallel_switch1(struct kthread_work *work) 189 { 190 struct parallel_switch *arg = 191 container_of(work, typeof(*arg), work); 192 IGT_TIMEOUT(end_time); 193 unsigned long count; 194 195 count = 0; 196 arg->result = 0; 197 do { 198 struct i915_request *rq = NULL; 199 int n; 200 201 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 202 struct i915_request *prev = rq; 203 204 rq = i915_request_create(arg->ce[n]); 205 if (IS_ERR(rq)) { 206 i915_request_put(prev); 207 arg->result = PTR_ERR(rq); 208 break; 209 } 210 211 i915_request_get(rq); 212 if (prev) { 213 arg->result = 214 i915_request_await_dma_fence(rq, 215 &prev->fence); 216 i915_request_put(prev); 217 } 218 219 i915_request_add(rq); 220 } 221 222 if (IS_ERR_OR_NULL(rq)) 223 break; 224 225 if (i915_request_wait(rq, 0, HZ) < 0) 226 arg->result = -ETIME; 227 228 i915_request_put(rq); 229 230 count++; 231 } while (!arg->result && !__igt_timeout(end_time, NULL)); 232 233 pr_info("%s: %lu switches (sync) <%d>\n", 234 arg->ce[0]->engine->name, count, arg->result); 235 } 236 237 static void __live_parallel_switchN(struct kthread_work *work) 238 { 239 struct parallel_switch *arg = 240 container_of(work, typeof(*arg), work); 241 struct i915_request *rq = NULL; 242 IGT_TIMEOUT(end_time); 243 unsigned long count; 244 int n; 245 246 count = 0; 247 arg->result = 0; 248 do { 249 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 250 struct i915_request *prev = rq; 251 252 rq = i915_request_create(arg->ce[n]); 253 if (IS_ERR(rq)) { 254 i915_request_put(prev); 255 arg->result = PTR_ERR(rq); 256 break; 257 } 258 259 i915_request_get(rq); 260 if (prev) { 261 arg->result = 262 i915_request_await_dma_fence(rq, 263 &prev->fence); 264 i915_request_put(prev); 265 } 266 267 i915_request_add(rq); 268 } 269 270 count++; 271 } while (!arg->result && !__igt_timeout(end_time, NULL)); 272 273 if (!IS_ERR_OR_NULL(rq)) 274 i915_request_put(rq); 275 276 pr_info("%s: %lu switches (many) <%d>\n", 277 arg->ce[0]->engine->name, count, arg->result); 278 } 279 280 static int live_parallel_switch(void *arg) 281 { 282 struct drm_i915_private *i915 = arg; 283 static void (* const func[])(struct kthread_work *) = { 284 __live_parallel_switch1, 285 __live_parallel_switchN, 286 NULL, 287 }; 288 struct parallel_switch *data = NULL; 289 struct i915_gem_engines *engines; 290 struct i915_gem_engines_iter it; 291 void (* const *fn)(struct kthread_work *); 292 struct i915_gem_context *ctx; 293 struct intel_context *ce; 294 struct file *file; 295 int n, m, count; 296 int err = 0; 297 298 /* 299 * Check we can process switches on all engines simultaneously. 300 */ 301 302 if (!DRIVER_CAPS(i915)->has_logical_contexts) 303 return 0; 304 305 file = mock_file(i915); 306 if (IS_ERR(file)) 307 return PTR_ERR(file); 308 309 ctx = live_context(i915, file); 310 if (IS_ERR(ctx)) { 311 err = PTR_ERR(ctx); 312 goto out_file; 313 } 314 315 engines = i915_gem_context_lock_engines(ctx); 316 count = engines->num_engines; 317 318 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 319 if (!data) { 320 i915_gem_context_unlock_engines(ctx); 321 err = -ENOMEM; 322 goto out_file; 323 } 324 325 m = 0; /* Use the first context as our template for the engines */ 326 for_each_gem_engine(ce, engines, it) { 327 err = intel_context_pin(ce); 328 if (err) { 329 i915_gem_context_unlock_engines(ctx); 330 goto out; 331 } 332 data[m++].ce[0] = intel_context_get(ce); 333 } 334 i915_gem_context_unlock_engines(ctx); 335 336 /* Clone the same set of engines into the other contexts */ 337 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 338 ctx = live_context(i915, file); 339 if (IS_ERR(ctx)) { 340 err = PTR_ERR(ctx); 341 goto out; 342 } 343 344 for (m = 0; m < count; m++) { 345 if (!data[m].ce[0]) 346 continue; 347 348 ce = intel_context_create(data[m].ce[0]->engine); 349 if (IS_ERR(ce)) { 350 err = PTR_ERR(ce); 351 goto out; 352 } 353 354 err = intel_context_pin(ce); 355 if (err) { 356 intel_context_put(ce); 357 goto out; 358 } 359 360 data[m].ce[n] = ce; 361 } 362 } 363 364 for (n = 0; n < count; n++) { 365 struct kthread_worker *worker; 366 367 if (!data[n].ce[0]) 368 continue; 369 370 worker = kthread_create_worker(0, "igt/parallel:%s", 371 data[n].ce[0]->engine->name); 372 if (IS_ERR(worker)) { 373 err = PTR_ERR(worker); 374 goto out; 375 } 376 377 data[n].worker = worker; 378 } 379 380 for (fn = func; !err && *fn; fn++) { 381 struct igt_live_test t; 382 383 err = igt_live_test_begin(&t, i915, __func__, ""); 384 if (err) 385 break; 386 387 for (n = 0; n < count; n++) { 388 if (!data[n].ce[0]) 389 continue; 390 391 data[n].result = 0; 392 kthread_init_work(&data[n].work, *fn); 393 kthread_queue_work(data[n].worker, &data[n].work); 394 } 395 396 for (n = 0; n < count; n++) { 397 if (data[n].ce[0]) { 398 kthread_flush_work(&data[n].work); 399 if (data[n].result && !err) 400 err = data[n].result; 401 } 402 } 403 404 if (igt_live_test_end(&t)) { 405 err = err ?: -EIO; 406 break; 407 } 408 } 409 410 out: 411 for (n = 0; n < count; n++) { 412 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 413 if (!data[n].ce[m]) 414 continue; 415 416 intel_context_unpin(data[n].ce[m]); 417 intel_context_put(data[n].ce[m]); 418 } 419 420 if (data[n].worker) 421 kthread_destroy_worker(data[n].worker); 422 } 423 kfree(data); 424 out_file: 425 fput(file); 426 return err; 427 } 428 429 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 430 { 431 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 432 } 433 434 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 435 { 436 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 437 } 438 439 static int gpu_fill(struct intel_context *ce, 440 struct drm_i915_gem_object *obj, 441 unsigned int dw) 442 { 443 struct i915_vma *vma; 444 int err; 445 446 GEM_BUG_ON(obj->base.size > ce->vm->total); 447 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 448 449 vma = i915_vma_instance(obj, ce->vm, NULL); 450 if (IS_ERR(vma)) 451 return PTR_ERR(vma); 452 453 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 454 if (err) 455 return err; 456 457 /* 458 * Within the GTT the huge objects maps every page onto 459 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 460 * We set the nth dword within the page using the nth 461 * mapping via the GTT - this should exercise the GTT mapping 462 * whilst checking that each context provides a unique view 463 * into the object. 464 */ 465 err = igt_gpu_fill_dw(ce, vma, 466 (dw * real_page_count(obj)) << PAGE_SHIFT | 467 (dw * sizeof(u32)), 468 real_page_count(obj), 469 dw); 470 i915_vma_unpin(vma); 471 472 return err; 473 } 474 475 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 476 { 477 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 478 unsigned int need_flush; 479 unsigned long n, m; 480 int err; 481 482 i915_gem_object_lock(obj, NULL); 483 err = i915_gem_object_prepare_write(obj, &need_flush); 484 if (err) 485 goto out; 486 487 for (n = 0; n < real_page_count(obj); n++) { 488 u32 *map; 489 490 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 491 for (m = 0; m < DW_PER_PAGE; m++) 492 map[m] = value; 493 if (!has_llc) 494 drm_clflush_virt_range(map, PAGE_SIZE); 495 kunmap_atomic(map); 496 } 497 498 i915_gem_object_finish_access(obj); 499 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 500 obj->write_domain = 0; 501 out: 502 i915_gem_object_unlock(obj); 503 return err; 504 } 505 506 static noinline int cpu_check(struct drm_i915_gem_object *obj, 507 unsigned int idx, unsigned int max) 508 { 509 unsigned int needs_flush; 510 unsigned long n; 511 int err; 512 513 i915_gem_object_lock(obj, NULL); 514 err = i915_gem_object_prepare_read(obj, &needs_flush); 515 if (err) 516 goto out_unlock; 517 518 for (n = 0; n < real_page_count(obj); n++) { 519 u32 *map, m; 520 521 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 522 if (needs_flush & CLFLUSH_BEFORE) 523 drm_clflush_virt_range(map, PAGE_SIZE); 524 525 for (m = 0; m < max; m++) { 526 if (map[m] != m) { 527 pr_err("%pS: Invalid value at object %d page %ld/%ld, offset %d/%d: found %x expected %x\n", 528 __builtin_return_address(0), idx, 529 n, real_page_count(obj), m, max, 530 map[m], m); 531 err = -EINVAL; 532 goto out_unmap; 533 } 534 } 535 536 for (; m < DW_PER_PAGE; m++) { 537 if (map[m] != STACK_MAGIC) { 538 pr_err("%pS: Invalid value at object %d page %ld, offset %d: found %x expected %x (uninitialised)\n", 539 __builtin_return_address(0), idx, n, m, 540 map[m], STACK_MAGIC); 541 err = -EINVAL; 542 goto out_unmap; 543 } 544 } 545 546 out_unmap: 547 kunmap_atomic(map); 548 if (err) 549 break; 550 } 551 552 i915_gem_object_finish_access(obj); 553 out_unlock: 554 i915_gem_object_unlock(obj); 555 return err; 556 } 557 558 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 559 { 560 int err; 561 562 GEM_BUG_ON(obj->base.handle_count); 563 564 /* tie the object to the drm_file for easy reaping */ 565 err = idr_alloc(&to_drm_file(file)->object_idr, 566 &obj->base, 1, 0, GFP_KERNEL); 567 if (err < 0) 568 return err; 569 570 i915_gem_object_get(obj); 571 obj->base.handle_count++; 572 return 0; 573 } 574 575 static struct drm_i915_gem_object * 576 create_test_object(struct i915_address_space *vm, 577 struct file *file, 578 struct list_head *objects) 579 { 580 struct drm_i915_gem_object *obj; 581 u64 size; 582 int err; 583 584 /* Keep in GEM's good graces */ 585 intel_gt_retire_requests(vm->gt); 586 587 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 588 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 589 590 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 591 if (IS_ERR(obj)) 592 return obj; 593 594 err = file_add_object(file, obj); 595 i915_gem_object_put(obj); 596 if (err) 597 return ERR_PTR(err); 598 599 err = cpu_fill(obj, STACK_MAGIC); 600 if (err) { 601 pr_err("Failed to fill object with cpu, err=%d\n", 602 err); 603 return ERR_PTR(err); 604 } 605 606 list_add_tail(&obj->st_link, objects); 607 return obj; 608 } 609 610 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 611 { 612 unsigned long npages = fake_page_count(obj); 613 614 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 615 return npages / DW_PER_PAGE; 616 } 617 618 static void throttle_release(struct i915_request **q, int count) 619 { 620 int i; 621 622 for (i = 0; i < count; i++) { 623 if (IS_ERR_OR_NULL(q[i])) 624 continue; 625 626 i915_request_put(fetch_and_zero(&q[i])); 627 } 628 } 629 630 static int throttle(struct intel_context *ce, 631 struct i915_request **q, int count) 632 { 633 int i; 634 635 if (!IS_ERR_OR_NULL(q[0])) { 636 if (i915_request_wait(q[0], 637 I915_WAIT_INTERRUPTIBLE, 638 MAX_SCHEDULE_TIMEOUT) < 0) 639 return -EINTR; 640 641 i915_request_put(q[0]); 642 } 643 644 for (i = 0; i < count - 1; i++) 645 q[i] = q[i + 1]; 646 647 q[i] = intel_context_create_request(ce); 648 if (IS_ERR(q[i])) 649 return PTR_ERR(q[i]); 650 651 i915_request_get(q[i]); 652 i915_request_add(q[i]); 653 654 return 0; 655 } 656 657 static int igt_ctx_exec(void *arg) 658 { 659 struct drm_i915_private *i915 = arg; 660 struct intel_engine_cs *engine; 661 int err = -ENODEV; 662 663 /* 664 * Create a few different contexts (with different mm) and write 665 * through each ctx/mm using the GPU making sure those writes end 666 * up in the expected pages of our obj. 667 */ 668 669 if (!DRIVER_CAPS(i915)->has_logical_contexts) 670 return 0; 671 672 for_each_uabi_engine(engine, i915) { 673 struct drm_i915_gem_object *obj = NULL; 674 unsigned long ncontexts, ndwords, dw; 675 struct i915_request *tq[5] = {}; 676 struct igt_live_test t; 677 IGT_TIMEOUT(end_time); 678 LIST_HEAD(objects); 679 struct file *file; 680 681 if (!intel_engine_can_store_dword(engine)) 682 continue; 683 684 if (!engine->context_size) 685 continue; /* No logical context support in HW */ 686 687 file = mock_file(i915); 688 if (IS_ERR(file)) 689 return PTR_ERR(file); 690 691 err = igt_live_test_begin(&t, i915, __func__, engine->name); 692 if (err) 693 goto out_file; 694 695 ncontexts = 0; 696 ndwords = 0; 697 dw = 0; 698 while (!time_after(jiffies, end_time)) { 699 struct i915_gem_context *ctx; 700 struct intel_context *ce; 701 702 ctx = kernel_context(i915, NULL); 703 if (IS_ERR(ctx)) { 704 err = PTR_ERR(ctx); 705 goto out_file; 706 } 707 708 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 709 GEM_BUG_ON(IS_ERR(ce)); 710 711 if (!obj) { 712 obj = create_test_object(ce->vm, file, &objects); 713 if (IS_ERR(obj)) { 714 err = PTR_ERR(obj); 715 intel_context_put(ce); 716 kernel_context_close(ctx); 717 goto out_file; 718 } 719 } 720 721 err = gpu_fill(ce, obj, dw); 722 if (err) { 723 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 724 ndwords, dw, max_dwords(obj), 725 engine->name, 726 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 727 err); 728 intel_context_put(ce); 729 kernel_context_close(ctx); 730 goto out_file; 731 } 732 733 err = throttle(ce, tq, ARRAY_SIZE(tq)); 734 if (err) { 735 intel_context_put(ce); 736 kernel_context_close(ctx); 737 goto out_file; 738 } 739 740 if (++dw == max_dwords(obj)) { 741 obj = NULL; 742 dw = 0; 743 } 744 745 ndwords++; 746 ncontexts++; 747 748 intel_context_put(ce); 749 kernel_context_close(ctx); 750 } 751 752 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 753 ncontexts, engine->name, ndwords); 754 755 ncontexts = dw = 0; 756 list_for_each_entry(obj, &objects, st_link) { 757 unsigned int rem = 758 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 759 760 err = cpu_check(obj, ncontexts++, rem); 761 if (err) 762 break; 763 764 dw += rem; 765 } 766 767 out_file: 768 throttle_release(tq, ARRAY_SIZE(tq)); 769 if (igt_live_test_end(&t)) 770 err = -EIO; 771 772 fput(file); 773 if (err) 774 return err; 775 776 i915_gem_drain_freed_objects(i915); 777 } 778 779 return 0; 780 } 781 782 static int igt_shared_ctx_exec(void *arg) 783 { 784 struct drm_i915_private *i915 = arg; 785 struct i915_request *tq[5] = {}; 786 struct i915_gem_context *parent; 787 struct intel_engine_cs *engine; 788 struct igt_live_test t; 789 struct file *file; 790 int err = 0; 791 792 /* 793 * Create a few different contexts with the same mm and write 794 * through each ctx using the GPU making sure those writes end 795 * up in the expected pages of our obj. 796 */ 797 if (!DRIVER_CAPS(i915)->has_logical_contexts) 798 return 0; 799 800 file = mock_file(i915); 801 if (IS_ERR(file)) 802 return PTR_ERR(file); 803 804 parent = live_context(i915, file); 805 if (IS_ERR(parent)) { 806 err = PTR_ERR(parent); 807 goto out_file; 808 } 809 810 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 811 err = 0; 812 goto out_file; 813 } 814 815 err = igt_live_test_begin(&t, i915, __func__, ""); 816 if (err) 817 goto out_file; 818 819 for_each_uabi_engine(engine, i915) { 820 unsigned long ncontexts, ndwords, dw; 821 struct drm_i915_gem_object *obj = NULL; 822 IGT_TIMEOUT(end_time); 823 LIST_HEAD(objects); 824 825 if (!intel_engine_can_store_dword(engine)) 826 continue; 827 828 dw = 0; 829 ndwords = 0; 830 ncontexts = 0; 831 while (!time_after(jiffies, end_time)) { 832 struct i915_gem_context *ctx; 833 struct intel_context *ce; 834 835 ctx = kernel_context(i915, parent->vm); 836 if (IS_ERR(ctx)) { 837 err = PTR_ERR(ctx); 838 goto out_test; 839 } 840 841 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 842 GEM_BUG_ON(IS_ERR(ce)); 843 844 if (!obj) { 845 obj = create_test_object(parent->vm, 846 file, &objects); 847 if (IS_ERR(obj)) { 848 err = PTR_ERR(obj); 849 intel_context_put(ce); 850 kernel_context_close(ctx); 851 goto out_test; 852 } 853 } 854 855 err = gpu_fill(ce, obj, dw); 856 if (err) { 857 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 858 ndwords, dw, max_dwords(obj), 859 engine->name, 860 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 861 err); 862 intel_context_put(ce); 863 kernel_context_close(ctx); 864 goto out_test; 865 } 866 867 err = throttle(ce, tq, ARRAY_SIZE(tq)); 868 if (err) { 869 intel_context_put(ce); 870 kernel_context_close(ctx); 871 goto out_test; 872 } 873 874 if (++dw == max_dwords(obj)) { 875 obj = NULL; 876 dw = 0; 877 } 878 879 ndwords++; 880 ncontexts++; 881 882 intel_context_put(ce); 883 kernel_context_close(ctx); 884 } 885 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 886 ncontexts, engine->name, ndwords); 887 888 ncontexts = dw = 0; 889 list_for_each_entry(obj, &objects, st_link) { 890 unsigned int rem = 891 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 892 893 err = cpu_check(obj, ncontexts++, rem); 894 if (err) 895 goto out_test; 896 897 dw += rem; 898 } 899 900 i915_gem_drain_freed_objects(i915); 901 } 902 out_test: 903 throttle_release(tq, ARRAY_SIZE(tq)); 904 if (igt_live_test_end(&t)) 905 err = -EIO; 906 out_file: 907 fput(file); 908 return err; 909 } 910 911 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, 912 struct i915_vma *vma, 913 struct intel_engine_cs *engine) 914 { 915 u32 *cmd; 916 917 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 918 919 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 920 if (IS_ERR(cmd)) 921 return PTR_ERR(cmd); 922 923 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 924 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); 925 *cmd++ = lower_32_bits(i915_vma_offset(vma)); 926 *cmd++ = upper_32_bits(i915_vma_offset(vma)); 927 *cmd = MI_BATCH_BUFFER_END; 928 929 __i915_gem_object_flush_map(rpcs, 0, 64); 930 i915_gem_object_unpin_map(rpcs); 931 932 intel_gt_chipset_flush(vma->vm->gt); 933 934 return 0; 935 } 936 937 static int 938 emit_rpcs_query(struct drm_i915_gem_object *obj, 939 struct intel_context *ce, 940 struct i915_request **rq_out) 941 { 942 struct drm_i915_private *i915 = to_i915(obj->base.dev); 943 struct i915_request *rq; 944 struct i915_gem_ww_ctx ww; 945 struct i915_vma *batch; 946 struct i915_vma *vma; 947 struct drm_i915_gem_object *rpcs; 948 int err; 949 950 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 951 952 if (GRAPHICS_VER(i915) < 8) 953 return -EINVAL; 954 955 vma = i915_vma_instance(obj, ce->vm, NULL); 956 if (IS_ERR(vma)) 957 return PTR_ERR(vma); 958 959 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 960 if (IS_ERR(rpcs)) 961 return PTR_ERR(rpcs); 962 963 batch = i915_vma_instance(rpcs, ce->vm, NULL); 964 if (IS_ERR(batch)) { 965 err = PTR_ERR(batch); 966 goto err_put; 967 } 968 969 i915_gem_ww_ctx_init(&ww, false); 970 retry: 971 err = i915_gem_object_lock(obj, &ww); 972 if (!err) 973 err = i915_gem_object_lock(rpcs, &ww); 974 if (!err) 975 err = i915_gem_object_set_to_gtt_domain(obj, false); 976 if (!err) 977 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 978 if (err) 979 goto err_put; 980 981 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 982 if (err) 983 goto err_vma; 984 985 err = rpcs_query_batch(rpcs, vma, ce->engine); 986 if (err) 987 goto err_batch; 988 989 rq = i915_request_create(ce); 990 if (IS_ERR(rq)) { 991 err = PTR_ERR(rq); 992 goto err_batch; 993 } 994 995 err = i915_vma_move_to_active(batch, rq, 0); 996 if (err) 997 goto skip_request; 998 999 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1000 if (err) 1001 goto skip_request; 1002 1003 if (rq->engine->emit_init_breadcrumb) { 1004 err = rq->engine->emit_init_breadcrumb(rq); 1005 if (err) 1006 goto skip_request; 1007 } 1008 1009 err = rq->engine->emit_bb_start(rq, 1010 i915_vma_offset(batch), 1011 i915_vma_size(batch), 1012 0); 1013 if (err) 1014 goto skip_request; 1015 1016 *rq_out = i915_request_get(rq); 1017 1018 skip_request: 1019 if (err) 1020 i915_request_set_error_once(rq, err); 1021 i915_request_add(rq); 1022 err_batch: 1023 i915_vma_unpin(batch); 1024 err_vma: 1025 i915_vma_unpin(vma); 1026 err_put: 1027 if (err == -EDEADLK) { 1028 err = i915_gem_ww_ctx_backoff(&ww); 1029 if (!err) 1030 goto retry; 1031 } 1032 i915_gem_ww_ctx_fini(&ww); 1033 i915_gem_object_put(rpcs); 1034 return err; 1035 } 1036 1037 #define TEST_IDLE BIT(0) 1038 #define TEST_BUSY BIT(1) 1039 #define TEST_RESET BIT(2) 1040 1041 static int 1042 __sseu_prepare(const char *name, 1043 unsigned int flags, 1044 struct intel_context *ce, 1045 struct igt_spinner **spin) 1046 { 1047 struct i915_request *rq; 1048 int ret; 1049 1050 *spin = NULL; 1051 if (!(flags & (TEST_BUSY | TEST_RESET))) 1052 return 0; 1053 1054 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1055 if (!*spin) 1056 return -ENOMEM; 1057 1058 ret = igt_spinner_init(*spin, ce->engine->gt); 1059 if (ret) 1060 goto err_free; 1061 1062 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1063 if (IS_ERR(rq)) { 1064 ret = PTR_ERR(rq); 1065 goto err_fini; 1066 } 1067 1068 i915_request_add(rq); 1069 1070 if (!igt_wait_for_spinner(*spin, rq)) { 1071 pr_err("%s: Spinner failed to start!\n", name); 1072 ret = -ETIMEDOUT; 1073 goto err_end; 1074 } 1075 1076 return 0; 1077 1078 err_end: 1079 igt_spinner_end(*spin); 1080 err_fini: 1081 igt_spinner_fini(*spin); 1082 err_free: 1083 kfree(fetch_and_zero(spin)); 1084 return ret; 1085 } 1086 1087 static int 1088 __read_slice_count(struct intel_context *ce, 1089 struct drm_i915_gem_object *obj, 1090 struct igt_spinner *spin, 1091 u32 *rpcs) 1092 { 1093 struct i915_request *rq = NULL; 1094 u32 s_mask, s_shift; 1095 unsigned int cnt; 1096 u32 *buf, val; 1097 long ret; 1098 1099 ret = emit_rpcs_query(obj, ce, &rq); 1100 if (ret) 1101 return ret; 1102 1103 if (spin) 1104 igt_spinner_end(spin); 1105 1106 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1107 i915_request_put(rq); 1108 if (ret < 0) 1109 return ret; 1110 1111 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1112 if (IS_ERR(buf)) { 1113 ret = PTR_ERR(buf); 1114 return ret; 1115 } 1116 1117 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1118 s_mask = GEN11_RPCS_S_CNT_MASK; 1119 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1120 } else { 1121 s_mask = GEN8_RPCS_S_CNT_MASK; 1122 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1123 } 1124 1125 val = *buf; 1126 cnt = (val & s_mask) >> s_shift; 1127 *rpcs = val; 1128 1129 i915_gem_object_unpin_map(obj); 1130 1131 return cnt; 1132 } 1133 1134 static int 1135 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1136 const char *prefix, const char *suffix) 1137 { 1138 if (slices == expected) 1139 return 0; 1140 1141 if (slices < 0) { 1142 pr_err("%s: %s read slice count failed with %d%s\n", 1143 name, prefix, slices, suffix); 1144 return slices; 1145 } 1146 1147 pr_err("%s: %s slice count %d is not %u%s\n", 1148 name, prefix, slices, expected, suffix); 1149 1150 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1151 rpcs, slices, 1152 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1153 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1154 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1155 1156 return -EINVAL; 1157 } 1158 1159 static int 1160 __sseu_finish(const char *name, 1161 unsigned int flags, 1162 struct intel_context *ce, 1163 struct drm_i915_gem_object *obj, 1164 unsigned int expected, 1165 struct igt_spinner *spin) 1166 { 1167 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1168 u32 rpcs = 0; 1169 int ret = 0; 1170 1171 if (flags & TEST_RESET) { 1172 ret = intel_engine_reset(ce->engine, "sseu"); 1173 if (ret) 1174 goto out; 1175 } 1176 1177 ret = __read_slice_count(ce, obj, 1178 flags & TEST_RESET ? NULL : spin, &rpcs); 1179 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1180 if (ret) 1181 goto out; 1182 1183 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1184 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1185 1186 out: 1187 if (spin) 1188 igt_spinner_end(spin); 1189 1190 if ((flags & TEST_IDLE) && ret == 0) { 1191 ret = igt_flush_test(ce->engine->i915); 1192 if (ret) 1193 return ret; 1194 1195 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1196 ret = __check_rpcs(name, rpcs, ret, expected, 1197 "Context", " after idle!"); 1198 } 1199 1200 return ret; 1201 } 1202 1203 static int 1204 __sseu_test(const char *name, 1205 unsigned int flags, 1206 struct intel_context *ce, 1207 struct drm_i915_gem_object *obj, 1208 struct intel_sseu sseu) 1209 { 1210 struct igt_spinner *spin = NULL; 1211 int ret; 1212 1213 intel_engine_pm_get(ce->engine); 1214 1215 ret = __sseu_prepare(name, flags, ce, &spin); 1216 if (ret) 1217 goto out_pm; 1218 1219 ret = intel_context_reconfigure_sseu(ce, sseu); 1220 if (ret) 1221 goto out_spin; 1222 1223 ret = __sseu_finish(name, flags, ce, obj, 1224 hweight32(sseu.slice_mask), spin); 1225 1226 out_spin: 1227 if (spin) { 1228 igt_spinner_end(spin); 1229 igt_spinner_fini(spin); 1230 kfree(spin); 1231 } 1232 out_pm: 1233 intel_engine_pm_put(ce->engine); 1234 return ret; 1235 } 1236 1237 static int 1238 __igt_ctx_sseu(struct drm_i915_private *i915, 1239 const char *name, 1240 unsigned int flags) 1241 { 1242 struct drm_i915_gem_object *obj; 1243 int inst = 0; 1244 int ret = 0; 1245 1246 if (GRAPHICS_VER(i915) < 9) 1247 return 0; 1248 1249 if (flags & TEST_RESET) 1250 igt_global_reset_lock(to_gt(i915)); 1251 1252 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1253 if (IS_ERR(obj)) { 1254 ret = PTR_ERR(obj); 1255 goto out_unlock; 1256 } 1257 1258 do { 1259 struct intel_engine_cs *engine; 1260 struct intel_context *ce; 1261 struct intel_sseu pg_sseu; 1262 1263 engine = intel_engine_lookup_user(i915, 1264 I915_ENGINE_CLASS_RENDER, 1265 inst++); 1266 if (!engine) 1267 break; 1268 1269 if (hweight32(engine->sseu.slice_mask) < 2) 1270 continue; 1271 1272 if (!engine->gt->info.sseu.has_slice_pg) 1273 continue; 1274 1275 /* 1276 * Gen11 VME friendly power-gated configuration with 1277 * half enabled sub-slices. 1278 */ 1279 pg_sseu = engine->sseu; 1280 pg_sseu.slice_mask = 1; 1281 pg_sseu.subslice_mask = 1282 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1283 1284 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1285 engine->name, name, flags, 1286 hweight32(engine->sseu.slice_mask), 1287 hweight32(pg_sseu.slice_mask)); 1288 1289 ce = intel_context_create(engine); 1290 if (IS_ERR(ce)) { 1291 ret = PTR_ERR(ce); 1292 goto out_put; 1293 } 1294 1295 ret = intel_context_pin(ce); 1296 if (ret) 1297 goto out_ce; 1298 1299 /* First set the default mask. */ 1300 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1301 if (ret) 1302 goto out_unpin; 1303 1304 /* Then set a power-gated configuration. */ 1305 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1306 if (ret) 1307 goto out_unpin; 1308 1309 /* Back to defaults. */ 1310 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1311 if (ret) 1312 goto out_unpin; 1313 1314 /* One last power-gated configuration for the road. */ 1315 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1316 if (ret) 1317 goto out_unpin; 1318 1319 out_unpin: 1320 intel_context_unpin(ce); 1321 out_ce: 1322 intel_context_put(ce); 1323 } while (!ret); 1324 1325 if (igt_flush_test(i915)) 1326 ret = -EIO; 1327 1328 out_put: 1329 i915_gem_object_put(obj); 1330 1331 out_unlock: 1332 if (flags & TEST_RESET) 1333 igt_global_reset_unlock(to_gt(i915)); 1334 1335 if (ret) 1336 pr_err("%s: Failed with %d!\n", name, ret); 1337 1338 return ret; 1339 } 1340 1341 static int igt_ctx_sseu(void *arg) 1342 { 1343 struct { 1344 const char *name; 1345 unsigned int flags; 1346 } *phase, phases[] = { 1347 { .name = "basic", .flags = 0 }, 1348 { .name = "idle", .flags = TEST_IDLE }, 1349 { .name = "busy", .flags = TEST_BUSY }, 1350 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1351 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1352 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1353 }; 1354 unsigned int i; 1355 int ret = 0; 1356 1357 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1358 i++, phase++) 1359 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1360 1361 return ret; 1362 } 1363 1364 static int igt_ctx_readonly(void *arg) 1365 { 1366 struct drm_i915_private *i915 = arg; 1367 unsigned long idx, ndwords, dw, num_engines; 1368 struct drm_i915_gem_object *obj = NULL; 1369 struct i915_request *tq[5] = {}; 1370 struct i915_gem_engines_iter it; 1371 struct i915_address_space *vm; 1372 struct i915_gem_context *ctx; 1373 struct intel_context *ce; 1374 struct igt_live_test t; 1375 I915_RND_STATE(prng); 1376 IGT_TIMEOUT(end_time); 1377 LIST_HEAD(objects); 1378 struct file *file; 1379 int err = -ENODEV; 1380 1381 /* 1382 * Create a few read-only objects (with the occasional writable object) 1383 * and try to write into these object checking that the GPU discards 1384 * any write to a read-only object. 1385 */ 1386 1387 file = mock_file(i915); 1388 if (IS_ERR(file)) 1389 return PTR_ERR(file); 1390 1391 err = igt_live_test_begin(&t, i915, __func__, ""); 1392 if (err) 1393 goto out_file; 1394 1395 ctx = live_context(i915, file); 1396 if (IS_ERR(ctx)) { 1397 err = PTR_ERR(ctx); 1398 goto out_file; 1399 } 1400 1401 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm; 1402 if (!vm || !vm->has_read_only) { 1403 err = 0; 1404 goto out_file; 1405 } 1406 1407 num_engines = 0; 1408 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1409 if (intel_engine_can_store_dword(ce->engine)) 1410 num_engines++; 1411 i915_gem_context_unlock_engines(ctx); 1412 1413 ndwords = 0; 1414 dw = 0; 1415 while (!time_after(jiffies, end_time)) { 1416 for_each_gem_engine(ce, 1417 i915_gem_context_lock_engines(ctx), it) { 1418 if (!intel_engine_can_store_dword(ce->engine)) 1419 continue; 1420 1421 if (!obj) { 1422 obj = create_test_object(ce->vm, file, &objects); 1423 if (IS_ERR(obj)) { 1424 err = PTR_ERR(obj); 1425 i915_gem_context_unlock_engines(ctx); 1426 goto out_file; 1427 } 1428 1429 if (prandom_u32_state(&prng) & 1) 1430 i915_gem_object_set_readonly(obj); 1431 } 1432 1433 err = gpu_fill(ce, obj, dw); 1434 if (err) { 1435 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1436 ndwords, dw, max_dwords(obj), 1437 ce->engine->name, 1438 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 1439 err); 1440 i915_gem_context_unlock_engines(ctx); 1441 goto out_file; 1442 } 1443 1444 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1445 if (err) { 1446 i915_gem_context_unlock_engines(ctx); 1447 goto out_file; 1448 } 1449 1450 if (++dw == max_dwords(obj)) { 1451 obj = NULL; 1452 dw = 0; 1453 } 1454 ndwords++; 1455 } 1456 i915_gem_context_unlock_engines(ctx); 1457 } 1458 pr_info("Submitted %lu dwords (across %lu engines)\n", 1459 ndwords, num_engines); 1460 1461 dw = 0; 1462 idx = 0; 1463 list_for_each_entry(obj, &objects, st_link) { 1464 unsigned int rem = 1465 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1466 unsigned int num_writes; 1467 1468 num_writes = rem; 1469 if (i915_gem_object_is_readonly(obj)) 1470 num_writes = 0; 1471 1472 err = cpu_check(obj, idx++, num_writes); 1473 if (err) 1474 break; 1475 1476 dw += rem; 1477 } 1478 1479 out_file: 1480 throttle_release(tq, ARRAY_SIZE(tq)); 1481 if (igt_live_test_end(&t)) 1482 err = -EIO; 1483 1484 fput(file); 1485 return err; 1486 } 1487 1488 static int check_scratch(struct i915_address_space *vm, u64 offset) 1489 { 1490 struct drm_mm_node *node; 1491 1492 mutex_lock(&vm->mutex); 1493 node = __drm_mm_interval_first(&vm->mm, 1494 offset, offset + sizeof(u32) - 1); 1495 mutex_unlock(&vm->mutex); 1496 if (!node || node->start > offset) 1497 return 0; 1498 1499 GEM_BUG_ON(offset >= node->start + node->size); 1500 1501 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1502 upper_32_bits(offset), lower_32_bits(offset)); 1503 return -EINVAL; 1504 } 1505 1506 static int write_to_scratch(struct i915_gem_context *ctx, 1507 struct intel_engine_cs *engine, 1508 struct drm_i915_gem_object *obj, 1509 u64 offset, u32 value) 1510 { 1511 struct drm_i915_private *i915 = ctx->i915; 1512 struct i915_address_space *vm; 1513 struct i915_request *rq; 1514 struct i915_vma *vma; 1515 u32 *cmd; 1516 int err; 1517 1518 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1519 1520 err = check_scratch(ctx->vm, offset); 1521 if (err) 1522 return err; 1523 1524 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1525 if (IS_ERR(cmd)) 1526 return PTR_ERR(cmd); 1527 1528 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1529 if (GRAPHICS_VER(i915) >= 8) { 1530 *cmd++ = lower_32_bits(offset); 1531 *cmd++ = upper_32_bits(offset); 1532 } else { 1533 *cmd++ = 0; 1534 *cmd++ = offset; 1535 } 1536 *cmd++ = value; 1537 *cmd = MI_BATCH_BUFFER_END; 1538 __i915_gem_object_flush_map(obj, 0, 64); 1539 i915_gem_object_unpin_map(obj); 1540 1541 intel_gt_chipset_flush(engine->gt); 1542 1543 vm = i915_gem_context_get_eb_vm(ctx); 1544 vma = i915_vma_instance(obj, vm, NULL); 1545 if (IS_ERR(vma)) { 1546 err = PTR_ERR(vma); 1547 goto out_vm; 1548 } 1549 1550 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1551 if (err) 1552 goto out_vm; 1553 1554 rq = igt_request_alloc(ctx, engine); 1555 if (IS_ERR(rq)) { 1556 err = PTR_ERR(rq); 1557 goto err_unpin; 1558 } 1559 1560 err = igt_vma_move_to_active_unlocked(vma, rq, 0); 1561 if (err) 1562 goto skip_request; 1563 1564 if (rq->engine->emit_init_breadcrumb) { 1565 err = rq->engine->emit_init_breadcrumb(rq); 1566 if (err) 1567 goto skip_request; 1568 } 1569 1570 err = engine->emit_bb_start(rq, i915_vma_offset(vma), 1571 i915_vma_size(vma), 0); 1572 if (err) 1573 goto skip_request; 1574 1575 i915_vma_unpin(vma); 1576 1577 i915_request_add(rq); 1578 1579 goto out_vm; 1580 skip_request: 1581 i915_request_set_error_once(rq, err); 1582 i915_request_add(rq); 1583 err_unpin: 1584 i915_vma_unpin(vma); 1585 out_vm: 1586 i915_vm_put(vm); 1587 1588 if (!err) 1589 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1590 1591 return err; 1592 } 1593 1594 static int read_from_scratch(struct i915_gem_context *ctx, 1595 struct intel_engine_cs *engine, 1596 struct drm_i915_gem_object *obj, 1597 u64 offset, u32 *value) 1598 { 1599 struct drm_i915_private *i915 = ctx->i915; 1600 struct i915_address_space *vm; 1601 const u32 result = 0x100; 1602 struct i915_request *rq; 1603 struct i915_vma *vma; 1604 unsigned int flags; 1605 u32 *cmd; 1606 int err; 1607 1608 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1609 1610 err = check_scratch(ctx->vm, offset); 1611 if (err) 1612 return err; 1613 1614 if (GRAPHICS_VER(i915) >= 8) { 1615 const u32 GPR0 = engine->mmio_base + 0x600; 1616 1617 vm = i915_gem_context_get_eb_vm(ctx); 1618 vma = i915_vma_instance(obj, vm, NULL); 1619 if (IS_ERR(vma)) { 1620 err = PTR_ERR(vma); 1621 goto out_vm; 1622 } 1623 1624 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1625 if (err) 1626 goto out_vm; 1627 1628 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1629 if (IS_ERR(cmd)) { 1630 err = PTR_ERR(cmd); 1631 goto err_unpin; 1632 } 1633 1634 memset(cmd, POISON_INUSE, PAGE_SIZE); 1635 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1636 *cmd++ = GPR0; 1637 *cmd++ = lower_32_bits(offset); 1638 *cmd++ = upper_32_bits(offset); 1639 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1640 *cmd++ = GPR0; 1641 *cmd++ = result; 1642 *cmd++ = 0; 1643 *cmd = MI_BATCH_BUFFER_END; 1644 1645 i915_gem_object_flush_map(obj); 1646 i915_gem_object_unpin_map(obj); 1647 1648 flags = 0; 1649 } else { 1650 const u32 reg = engine->mmio_base + 0x420; 1651 1652 /* hsw: register access even to 3DPRIM! is protected */ 1653 vm = i915_vm_get(&engine->gt->ggtt->vm); 1654 vma = i915_vma_instance(obj, vm, NULL); 1655 if (IS_ERR(vma)) { 1656 err = PTR_ERR(vma); 1657 goto out_vm; 1658 } 1659 1660 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1661 if (err) 1662 goto out_vm; 1663 1664 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1665 if (IS_ERR(cmd)) { 1666 err = PTR_ERR(cmd); 1667 goto err_unpin; 1668 } 1669 1670 memset(cmd, POISON_INUSE, PAGE_SIZE); 1671 *cmd++ = MI_LOAD_REGISTER_MEM; 1672 *cmd++ = reg; 1673 *cmd++ = offset; 1674 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1675 *cmd++ = reg; 1676 *cmd++ = i915_vma_offset(vma) + result; 1677 *cmd = MI_BATCH_BUFFER_END; 1678 1679 i915_gem_object_flush_map(obj); 1680 i915_gem_object_unpin_map(obj); 1681 1682 flags = I915_DISPATCH_SECURE; 1683 } 1684 1685 intel_gt_chipset_flush(engine->gt); 1686 1687 rq = igt_request_alloc(ctx, engine); 1688 if (IS_ERR(rq)) { 1689 err = PTR_ERR(rq); 1690 goto err_unpin; 1691 } 1692 1693 err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE); 1694 if (err) 1695 goto skip_request; 1696 1697 if (rq->engine->emit_init_breadcrumb) { 1698 err = rq->engine->emit_init_breadcrumb(rq); 1699 if (err) 1700 goto skip_request; 1701 } 1702 1703 err = engine->emit_bb_start(rq, i915_vma_offset(vma), 1704 i915_vma_size(vma), flags); 1705 if (err) 1706 goto skip_request; 1707 1708 i915_vma_unpin(vma); 1709 1710 i915_request_add(rq); 1711 1712 i915_gem_object_lock(obj, NULL); 1713 err = i915_gem_object_set_to_cpu_domain(obj, false); 1714 i915_gem_object_unlock(obj); 1715 if (err) 1716 goto out_vm; 1717 1718 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1719 if (IS_ERR(cmd)) { 1720 err = PTR_ERR(cmd); 1721 goto out_vm; 1722 } 1723 1724 *value = cmd[result / sizeof(*cmd)]; 1725 i915_gem_object_unpin_map(obj); 1726 1727 goto out_vm; 1728 skip_request: 1729 i915_request_set_error_once(rq, err); 1730 i915_request_add(rq); 1731 err_unpin: 1732 i915_vma_unpin(vma); 1733 out_vm: 1734 i915_vm_put(vm); 1735 1736 if (!err) 1737 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1738 1739 return err; 1740 } 1741 1742 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1743 { 1744 struct i915_address_space *vm; 1745 u32 *vaddr; 1746 int err = 0; 1747 1748 vm = ctx->vm; 1749 if (!vm) 1750 return -ENODEV; 1751 1752 if (!vm->scratch[0]) { 1753 pr_err("No scratch page!\n"); 1754 return -EINVAL; 1755 } 1756 1757 vaddr = __px_vaddr(vm->scratch[0]); 1758 1759 memcpy(out, vaddr, sizeof(*out)); 1760 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1761 pr_err("Inconsistent initial state of scratch page!\n"); 1762 err = -EINVAL; 1763 } 1764 1765 return err; 1766 } 1767 1768 static int igt_vm_isolation(void *arg) 1769 { 1770 struct drm_i915_private *i915 = arg; 1771 struct i915_gem_context *ctx_a, *ctx_b; 1772 struct drm_i915_gem_object *obj_a, *obj_b; 1773 unsigned long num_engines, count; 1774 struct intel_engine_cs *engine; 1775 struct igt_live_test t; 1776 I915_RND_STATE(prng); 1777 struct file *file; 1778 u64 vm_total; 1779 u32 expected; 1780 int err; 1781 1782 if (GRAPHICS_VER(i915) < 7) 1783 return 0; 1784 1785 /* 1786 * The simple goal here is that a write into one context is not 1787 * observed in a second (separate page tables and scratch). 1788 */ 1789 1790 file = mock_file(i915); 1791 if (IS_ERR(file)) 1792 return PTR_ERR(file); 1793 1794 err = igt_live_test_begin(&t, i915, __func__, ""); 1795 if (err) 1796 goto out_file; 1797 1798 ctx_a = live_context(i915, file); 1799 if (IS_ERR(ctx_a)) { 1800 err = PTR_ERR(ctx_a); 1801 goto out_file; 1802 } 1803 1804 ctx_b = live_context(i915, file); 1805 if (IS_ERR(ctx_b)) { 1806 err = PTR_ERR(ctx_b); 1807 goto out_file; 1808 } 1809 1810 /* We can only test vm isolation, if the vm are distinct */ 1811 if (ctx_a->vm == ctx_b->vm) 1812 goto out_file; 1813 1814 /* Read the initial state of the scratch page */ 1815 err = check_scratch_page(ctx_a, &expected); 1816 if (err) 1817 goto out_file; 1818 1819 err = check_scratch_page(ctx_b, &expected); 1820 if (err) 1821 goto out_file; 1822 1823 vm_total = ctx_a->vm->total; 1824 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1825 1826 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1827 if (IS_ERR(obj_a)) { 1828 err = PTR_ERR(obj_a); 1829 goto out_file; 1830 } 1831 1832 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1833 if (IS_ERR(obj_b)) { 1834 err = PTR_ERR(obj_b); 1835 goto put_a; 1836 } 1837 1838 count = 0; 1839 num_engines = 0; 1840 for_each_uabi_engine(engine, i915) { 1841 IGT_TIMEOUT(end_time); 1842 unsigned long this = 0; 1843 1844 if (!intel_engine_can_store_dword(engine)) 1845 continue; 1846 1847 /* Not all engines have their own GPR! */ 1848 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1849 continue; 1850 1851 while (!__igt_timeout(end_time, NULL)) { 1852 u32 value = 0xc5c5c5c5; 1853 u64 offset; 1854 1855 /* Leave enough space at offset 0 for the batch */ 1856 offset = igt_random_offset(&prng, 1857 I915_GTT_PAGE_SIZE, vm_total, 1858 sizeof(u32), alignof_dword); 1859 1860 err = write_to_scratch(ctx_a, engine, obj_a, 1861 offset, 0xdeadbeef); 1862 if (err == 0) 1863 err = read_from_scratch(ctx_b, engine, obj_b, 1864 offset, &value); 1865 if (err) 1866 goto put_b; 1867 1868 if (value != expected) { 1869 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1870 engine->name, value, 1871 upper_32_bits(offset), 1872 lower_32_bits(offset), 1873 this); 1874 err = -EINVAL; 1875 goto put_b; 1876 } 1877 1878 this++; 1879 } 1880 count += this; 1881 num_engines++; 1882 } 1883 pr_info("Checked %lu scratch offsets across %lu engines\n", 1884 count, num_engines); 1885 1886 put_b: 1887 i915_gem_object_put(obj_b); 1888 put_a: 1889 i915_gem_object_put(obj_a); 1890 out_file: 1891 if (igt_live_test_end(&t)) 1892 err = -EIO; 1893 fput(file); 1894 return err; 1895 } 1896 1897 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1898 { 1899 static const struct i915_subtest tests[] = { 1900 SUBTEST(live_nop_switch), 1901 SUBTEST(live_parallel_switch), 1902 SUBTEST(igt_ctx_exec), 1903 SUBTEST(igt_ctx_readonly), 1904 SUBTEST(igt_ctx_sseu), 1905 SUBTEST(igt_shared_ctx_exec), 1906 SUBTEST(igt_vm_isolation), 1907 }; 1908 1909 if (intel_gt_is_wedged(to_gt(i915))) 1910 return 0; 1911 1912 return i915_live_subtests(tests, i915); 1913 } 1914