1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 #include <linux/string_helpers.h> 9 10 #include "gem/i915_gem_internal.h" 11 #include "gem/i915_gem_pm.h" 12 #include "gt/intel_engine_pm.h" 13 #include "gt/intel_engine_regs.h" 14 #include "gt/intel_gt.h" 15 #include "gt/intel_gt_requests.h" 16 #include "gt/intel_reset.h" 17 #include "i915_selftest.h" 18 19 #include "gem/selftests/igt_gem_utils.h" 20 #include "selftests/i915_random.h" 21 #include "selftests/igt_flush_test.h" 22 #include "selftests/igt_live_test.h" 23 #include "selftests/igt_reset.h" 24 #include "selftests/igt_spinner.h" 25 #include "selftests/mock_drm.h" 26 #include "selftests/mock_gem_device.h" 27 28 #include "huge_gem_object.h" 29 #include "igt_gem_utils.h" 30 31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 32 33 static int live_nop_switch(void *arg) 34 { 35 const unsigned int nctx = 1024; 36 struct drm_i915_private *i915 = arg; 37 struct intel_engine_cs *engine; 38 struct i915_gem_context **ctx; 39 struct igt_live_test t; 40 struct file *file; 41 unsigned long n; 42 int err = -ENODEV; 43 44 /* 45 * Create as many contexts as we can feasibly get away with 46 * and check we can switch between them rapidly. 47 * 48 * Serves as very simple stress test for submission and HW switching 49 * between contexts. 50 */ 51 52 if (!DRIVER_CAPS(i915)->has_logical_contexts) 53 return 0; 54 55 file = mock_file(i915); 56 if (IS_ERR(file)) 57 return PTR_ERR(file); 58 59 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 60 if (!ctx) { 61 err = -ENOMEM; 62 goto out_file; 63 } 64 65 for (n = 0; n < nctx; n++) { 66 ctx[n] = live_context(i915, file); 67 if (IS_ERR(ctx[n])) { 68 err = PTR_ERR(ctx[n]); 69 goto out_file; 70 } 71 } 72 73 for_each_uabi_engine(engine, i915) { 74 struct i915_request *rq = NULL; 75 unsigned long end_time, prime; 76 ktime_t times[2] = {}; 77 78 times[0] = ktime_get_raw(); 79 for (n = 0; n < nctx; n++) { 80 struct i915_request *this; 81 82 this = igt_request_alloc(ctx[n], engine); 83 if (IS_ERR(this)) { 84 err = PTR_ERR(this); 85 goto out_file; 86 } 87 if (rq) { 88 i915_request_await_dma_fence(this, &rq->fence); 89 i915_request_put(rq); 90 } 91 rq = i915_request_get(this); 92 i915_request_add(this); 93 } 94 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 95 pr_err("Failed to populated %d contexts\n", nctx); 96 intel_gt_set_wedged(to_gt(i915)); 97 i915_request_put(rq); 98 err = -EIO; 99 goto out_file; 100 } 101 i915_request_put(rq); 102 103 times[1] = ktime_get_raw(); 104 105 pr_info("Populated %d contexts on %s in %lluns\n", 106 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 107 108 err = igt_live_test_begin(&t, i915, __func__, engine->name); 109 if (err) 110 goto out_file; 111 112 end_time = jiffies + i915_selftest.timeout_jiffies; 113 for_each_prime_number_from(prime, 2, 8192) { 114 times[1] = ktime_get_raw(); 115 116 rq = NULL; 117 for (n = 0; n < prime; n++) { 118 struct i915_request *this; 119 120 this = igt_request_alloc(ctx[n % nctx], engine); 121 if (IS_ERR(this)) { 122 err = PTR_ERR(this); 123 goto out_file; 124 } 125 126 if (rq) { /* Force submission order */ 127 i915_request_await_dma_fence(this, &rq->fence); 128 i915_request_put(rq); 129 } 130 131 /* 132 * This space is left intentionally blank. 133 * 134 * We do not actually want to perform any 135 * action with this request, we just want 136 * to measure the latency in allocation 137 * and submission of our breadcrumbs - 138 * ensuring that the bare request is sufficient 139 * for the system to work (i.e. proper HEAD 140 * tracking of the rings, interrupt handling, 141 * etc). It also gives us the lowest bounds 142 * for latency. 143 */ 144 145 rq = i915_request_get(this); 146 i915_request_add(this); 147 } 148 GEM_BUG_ON(!rq); 149 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 150 pr_err("Switching between %ld contexts timed out\n", 151 prime); 152 intel_gt_set_wedged(to_gt(i915)); 153 i915_request_put(rq); 154 break; 155 } 156 i915_request_put(rq); 157 158 times[1] = ktime_sub(ktime_get_raw(), times[1]); 159 if (prime == 2) 160 times[0] = times[1]; 161 162 if (__igt_timeout(end_time, NULL)) 163 break; 164 } 165 166 err = igt_live_test_end(&t); 167 if (err) 168 goto out_file; 169 170 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 171 engine->name, 172 ktime_to_ns(times[0]), 173 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 174 } 175 176 out_file: 177 fput(file); 178 return err; 179 } 180 181 struct parallel_switch { 182 struct kthread_worker *worker; 183 struct kthread_work work; 184 struct intel_context *ce[2]; 185 int result; 186 }; 187 188 static void __live_parallel_switch1(struct kthread_work *work) 189 { 190 struct parallel_switch *arg = 191 container_of(work, typeof(*arg), work); 192 IGT_TIMEOUT(end_time); 193 unsigned long count; 194 195 count = 0; 196 arg->result = 0; 197 do { 198 struct i915_request *rq = NULL; 199 int n; 200 201 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 202 struct i915_request *prev = rq; 203 204 rq = i915_request_create(arg->ce[n]); 205 if (IS_ERR(rq)) { 206 i915_request_put(prev); 207 arg->result = PTR_ERR(rq); 208 break; 209 } 210 211 i915_request_get(rq); 212 if (prev) { 213 arg->result = 214 i915_request_await_dma_fence(rq, 215 &prev->fence); 216 i915_request_put(prev); 217 } 218 219 i915_request_add(rq); 220 } 221 222 if (IS_ERR_OR_NULL(rq)) 223 break; 224 225 if (i915_request_wait(rq, 0, HZ) < 0) 226 arg->result = -ETIME; 227 228 i915_request_put(rq); 229 230 count++; 231 } while (!arg->result && !__igt_timeout(end_time, NULL)); 232 233 pr_info("%s: %lu switches (sync) <%d>\n", 234 arg->ce[0]->engine->name, count, arg->result); 235 } 236 237 static void __live_parallel_switchN(struct kthread_work *work) 238 { 239 struct parallel_switch *arg = 240 container_of(work, typeof(*arg), work); 241 struct i915_request *rq = NULL; 242 IGT_TIMEOUT(end_time); 243 unsigned long count; 244 int n; 245 246 count = 0; 247 arg->result = 0; 248 do { 249 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 250 struct i915_request *prev = rq; 251 252 rq = i915_request_create(arg->ce[n]); 253 if (IS_ERR(rq)) { 254 i915_request_put(prev); 255 arg->result = PTR_ERR(rq); 256 break; 257 } 258 259 i915_request_get(rq); 260 if (prev) { 261 arg->result = 262 i915_request_await_dma_fence(rq, 263 &prev->fence); 264 i915_request_put(prev); 265 } 266 267 i915_request_add(rq); 268 } 269 270 count++; 271 } while (!arg->result && !__igt_timeout(end_time, NULL)); 272 273 if (!IS_ERR_OR_NULL(rq)) 274 i915_request_put(rq); 275 276 pr_info("%s: %lu switches (many) <%d>\n", 277 arg->ce[0]->engine->name, count, arg->result); 278 } 279 280 static int live_parallel_switch(void *arg) 281 { 282 struct drm_i915_private *i915 = arg; 283 static void (* const func[])(struct kthread_work *) = { 284 __live_parallel_switch1, 285 __live_parallel_switchN, 286 NULL, 287 }; 288 struct parallel_switch *data = NULL; 289 struct i915_gem_engines *engines; 290 struct i915_gem_engines_iter it; 291 void (* const *fn)(struct kthread_work *); 292 struct i915_gem_context *ctx; 293 struct intel_context *ce; 294 struct file *file; 295 int n, m, count; 296 int err = 0; 297 298 /* 299 * Check we can process switches on all engines simultaneously. 300 */ 301 302 if (!DRIVER_CAPS(i915)->has_logical_contexts) 303 return 0; 304 305 file = mock_file(i915); 306 if (IS_ERR(file)) 307 return PTR_ERR(file); 308 309 ctx = live_context(i915, file); 310 if (IS_ERR(ctx)) { 311 err = PTR_ERR(ctx); 312 goto out_file; 313 } 314 315 engines = i915_gem_context_lock_engines(ctx); 316 count = engines->num_engines; 317 318 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 319 if (!data) { 320 i915_gem_context_unlock_engines(ctx); 321 err = -ENOMEM; 322 goto out_file; 323 } 324 325 m = 0; /* Use the first context as our template for the engines */ 326 for_each_gem_engine(ce, engines, it) { 327 err = intel_context_pin(ce); 328 if (err) { 329 i915_gem_context_unlock_engines(ctx); 330 goto out; 331 } 332 data[m++].ce[0] = intel_context_get(ce); 333 } 334 i915_gem_context_unlock_engines(ctx); 335 336 /* Clone the same set of engines into the other contexts */ 337 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 338 ctx = live_context(i915, file); 339 if (IS_ERR(ctx)) { 340 err = PTR_ERR(ctx); 341 goto out; 342 } 343 344 for (m = 0; m < count; m++) { 345 if (!data[m].ce[0]) 346 continue; 347 348 ce = intel_context_create(data[m].ce[0]->engine); 349 if (IS_ERR(ce)) 350 goto out; 351 352 err = intel_context_pin(ce); 353 if (err) { 354 intel_context_put(ce); 355 goto out; 356 } 357 358 data[m].ce[n] = ce; 359 } 360 } 361 362 for (n = 0; n < count; n++) { 363 struct kthread_worker *worker; 364 365 if (!data[n].ce[0]) 366 continue; 367 368 worker = kthread_create_worker(0, "igt/parallel:%s", 369 data[n].ce[0]->engine->name); 370 if (IS_ERR(worker)) 371 goto out; 372 373 data[n].worker = worker; 374 } 375 376 for (fn = func; !err && *fn; fn++) { 377 struct igt_live_test t; 378 379 err = igt_live_test_begin(&t, i915, __func__, ""); 380 if (err) 381 break; 382 383 for (n = 0; n < count; n++) { 384 if (!data[n].ce[0]) 385 continue; 386 387 data[n].result = 0; 388 kthread_init_work(&data[n].work, *fn); 389 kthread_queue_work(data[n].worker, &data[n].work); 390 } 391 392 for (n = 0; n < count; n++) { 393 if (data[n].ce[0]) { 394 kthread_flush_work(&data[n].work); 395 if (data[n].result && !err) 396 err = data[n].result; 397 } 398 } 399 400 if (igt_live_test_end(&t)) 401 err = -EIO; 402 } 403 404 out: 405 for (n = 0; n < count; n++) { 406 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 407 if (!data[n].ce[m]) 408 continue; 409 410 intel_context_unpin(data[n].ce[m]); 411 intel_context_put(data[n].ce[m]); 412 } 413 414 if (data[n].worker) 415 kthread_destroy_worker(data[n].worker); 416 } 417 kfree(data); 418 out_file: 419 fput(file); 420 return err; 421 } 422 423 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 424 { 425 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 426 } 427 428 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 429 { 430 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 431 } 432 433 static int gpu_fill(struct intel_context *ce, 434 struct drm_i915_gem_object *obj, 435 unsigned int dw) 436 { 437 struct i915_vma *vma; 438 int err; 439 440 GEM_BUG_ON(obj->base.size > ce->vm->total); 441 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 442 443 vma = i915_vma_instance(obj, ce->vm, NULL); 444 if (IS_ERR(vma)) 445 return PTR_ERR(vma); 446 447 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 448 if (err) 449 return err; 450 451 /* 452 * Within the GTT the huge objects maps every page onto 453 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 454 * We set the nth dword within the page using the nth 455 * mapping via the GTT - this should exercise the GTT mapping 456 * whilst checking that each context provides a unique view 457 * into the object. 458 */ 459 err = igt_gpu_fill_dw(ce, vma, 460 (dw * real_page_count(obj)) << PAGE_SHIFT | 461 (dw * sizeof(u32)), 462 real_page_count(obj), 463 dw); 464 i915_vma_unpin(vma); 465 466 return err; 467 } 468 469 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 470 { 471 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 472 unsigned int n, m, need_flush; 473 int err; 474 475 i915_gem_object_lock(obj, NULL); 476 err = i915_gem_object_prepare_write(obj, &need_flush); 477 if (err) 478 goto out; 479 480 for (n = 0; n < real_page_count(obj); n++) { 481 u32 *map; 482 483 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 484 for (m = 0; m < DW_PER_PAGE; m++) 485 map[m] = value; 486 if (!has_llc) 487 drm_clflush_virt_range(map, PAGE_SIZE); 488 kunmap_atomic(map); 489 } 490 491 i915_gem_object_finish_access(obj); 492 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 493 obj->write_domain = 0; 494 out: 495 i915_gem_object_unlock(obj); 496 return err; 497 } 498 499 static noinline int cpu_check(struct drm_i915_gem_object *obj, 500 unsigned int idx, unsigned int max) 501 { 502 unsigned int n, m, needs_flush; 503 int err; 504 505 i915_gem_object_lock(obj, NULL); 506 err = i915_gem_object_prepare_read(obj, &needs_flush); 507 if (err) 508 goto out_unlock; 509 510 for (n = 0; n < real_page_count(obj); n++) { 511 u32 *map; 512 513 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 514 if (needs_flush & CLFLUSH_BEFORE) 515 drm_clflush_virt_range(map, PAGE_SIZE); 516 517 for (m = 0; m < max; m++) { 518 if (map[m] != m) { 519 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 520 __builtin_return_address(0), idx, 521 n, real_page_count(obj), m, max, 522 map[m], m); 523 err = -EINVAL; 524 goto out_unmap; 525 } 526 } 527 528 for (; m < DW_PER_PAGE; m++) { 529 if (map[m] != STACK_MAGIC) { 530 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 531 __builtin_return_address(0), idx, n, m, 532 map[m], STACK_MAGIC); 533 err = -EINVAL; 534 goto out_unmap; 535 } 536 } 537 538 out_unmap: 539 kunmap_atomic(map); 540 if (err) 541 break; 542 } 543 544 i915_gem_object_finish_access(obj); 545 out_unlock: 546 i915_gem_object_unlock(obj); 547 return err; 548 } 549 550 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 551 { 552 int err; 553 554 GEM_BUG_ON(obj->base.handle_count); 555 556 /* tie the object to the drm_file for easy reaping */ 557 err = idr_alloc(&to_drm_file(file)->object_idr, 558 &obj->base, 1, 0, GFP_KERNEL); 559 if (err < 0) 560 return err; 561 562 i915_gem_object_get(obj); 563 obj->base.handle_count++; 564 return 0; 565 } 566 567 static struct drm_i915_gem_object * 568 create_test_object(struct i915_address_space *vm, 569 struct file *file, 570 struct list_head *objects) 571 { 572 struct drm_i915_gem_object *obj; 573 u64 size; 574 int err; 575 576 /* Keep in GEM's good graces */ 577 intel_gt_retire_requests(vm->gt); 578 579 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 580 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 581 582 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 583 if (IS_ERR(obj)) 584 return obj; 585 586 err = file_add_object(file, obj); 587 i915_gem_object_put(obj); 588 if (err) 589 return ERR_PTR(err); 590 591 err = cpu_fill(obj, STACK_MAGIC); 592 if (err) { 593 pr_err("Failed to fill object with cpu, err=%d\n", 594 err); 595 return ERR_PTR(err); 596 } 597 598 list_add_tail(&obj->st_link, objects); 599 return obj; 600 } 601 602 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 603 { 604 unsigned long npages = fake_page_count(obj); 605 606 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 607 return npages / DW_PER_PAGE; 608 } 609 610 static void throttle_release(struct i915_request **q, int count) 611 { 612 int i; 613 614 for (i = 0; i < count; i++) { 615 if (IS_ERR_OR_NULL(q[i])) 616 continue; 617 618 i915_request_put(fetch_and_zero(&q[i])); 619 } 620 } 621 622 static int throttle(struct intel_context *ce, 623 struct i915_request **q, int count) 624 { 625 int i; 626 627 if (!IS_ERR_OR_NULL(q[0])) { 628 if (i915_request_wait(q[0], 629 I915_WAIT_INTERRUPTIBLE, 630 MAX_SCHEDULE_TIMEOUT) < 0) 631 return -EINTR; 632 633 i915_request_put(q[0]); 634 } 635 636 for (i = 0; i < count - 1; i++) 637 q[i] = q[i + 1]; 638 639 q[i] = intel_context_create_request(ce); 640 if (IS_ERR(q[i])) 641 return PTR_ERR(q[i]); 642 643 i915_request_get(q[i]); 644 i915_request_add(q[i]); 645 646 return 0; 647 } 648 649 static int igt_ctx_exec(void *arg) 650 { 651 struct drm_i915_private *i915 = arg; 652 struct intel_engine_cs *engine; 653 int err = -ENODEV; 654 655 /* 656 * Create a few different contexts (with different mm) and write 657 * through each ctx/mm using the GPU making sure those writes end 658 * up in the expected pages of our obj. 659 */ 660 661 if (!DRIVER_CAPS(i915)->has_logical_contexts) 662 return 0; 663 664 for_each_uabi_engine(engine, i915) { 665 struct drm_i915_gem_object *obj = NULL; 666 unsigned long ncontexts, ndwords, dw; 667 struct i915_request *tq[5] = {}; 668 struct igt_live_test t; 669 IGT_TIMEOUT(end_time); 670 LIST_HEAD(objects); 671 struct file *file; 672 673 if (!intel_engine_can_store_dword(engine)) 674 continue; 675 676 if (!engine->context_size) 677 continue; /* No logical context support in HW */ 678 679 file = mock_file(i915); 680 if (IS_ERR(file)) 681 return PTR_ERR(file); 682 683 err = igt_live_test_begin(&t, i915, __func__, engine->name); 684 if (err) 685 goto out_file; 686 687 ncontexts = 0; 688 ndwords = 0; 689 dw = 0; 690 while (!time_after(jiffies, end_time)) { 691 struct i915_gem_context *ctx; 692 struct intel_context *ce; 693 694 ctx = kernel_context(i915, NULL); 695 if (IS_ERR(ctx)) { 696 err = PTR_ERR(ctx); 697 goto out_file; 698 } 699 700 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 701 GEM_BUG_ON(IS_ERR(ce)); 702 703 if (!obj) { 704 obj = create_test_object(ce->vm, file, &objects); 705 if (IS_ERR(obj)) { 706 err = PTR_ERR(obj); 707 intel_context_put(ce); 708 kernel_context_close(ctx); 709 goto out_file; 710 } 711 } 712 713 err = gpu_fill(ce, obj, dw); 714 if (err) { 715 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 716 ndwords, dw, max_dwords(obj), 717 engine->name, 718 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 719 err); 720 intel_context_put(ce); 721 kernel_context_close(ctx); 722 goto out_file; 723 } 724 725 err = throttle(ce, tq, ARRAY_SIZE(tq)); 726 if (err) { 727 intel_context_put(ce); 728 kernel_context_close(ctx); 729 goto out_file; 730 } 731 732 if (++dw == max_dwords(obj)) { 733 obj = NULL; 734 dw = 0; 735 } 736 737 ndwords++; 738 ncontexts++; 739 740 intel_context_put(ce); 741 kernel_context_close(ctx); 742 } 743 744 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 745 ncontexts, engine->name, ndwords); 746 747 ncontexts = dw = 0; 748 list_for_each_entry(obj, &objects, st_link) { 749 unsigned int rem = 750 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 751 752 err = cpu_check(obj, ncontexts++, rem); 753 if (err) 754 break; 755 756 dw += rem; 757 } 758 759 out_file: 760 throttle_release(tq, ARRAY_SIZE(tq)); 761 if (igt_live_test_end(&t)) 762 err = -EIO; 763 764 fput(file); 765 if (err) 766 return err; 767 768 i915_gem_drain_freed_objects(i915); 769 } 770 771 return 0; 772 } 773 774 static int igt_shared_ctx_exec(void *arg) 775 { 776 struct drm_i915_private *i915 = arg; 777 struct i915_request *tq[5] = {}; 778 struct i915_gem_context *parent; 779 struct intel_engine_cs *engine; 780 struct igt_live_test t; 781 struct file *file; 782 int err = 0; 783 784 /* 785 * Create a few different contexts with the same mm and write 786 * through each ctx using the GPU making sure those writes end 787 * up in the expected pages of our obj. 788 */ 789 if (!DRIVER_CAPS(i915)->has_logical_contexts) 790 return 0; 791 792 file = mock_file(i915); 793 if (IS_ERR(file)) 794 return PTR_ERR(file); 795 796 parent = live_context(i915, file); 797 if (IS_ERR(parent)) { 798 err = PTR_ERR(parent); 799 goto out_file; 800 } 801 802 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 803 err = 0; 804 goto out_file; 805 } 806 807 err = igt_live_test_begin(&t, i915, __func__, ""); 808 if (err) 809 goto out_file; 810 811 for_each_uabi_engine(engine, i915) { 812 unsigned long ncontexts, ndwords, dw; 813 struct drm_i915_gem_object *obj = NULL; 814 IGT_TIMEOUT(end_time); 815 LIST_HEAD(objects); 816 817 if (!intel_engine_can_store_dword(engine)) 818 continue; 819 820 dw = 0; 821 ndwords = 0; 822 ncontexts = 0; 823 while (!time_after(jiffies, end_time)) { 824 struct i915_gem_context *ctx; 825 struct intel_context *ce; 826 827 ctx = kernel_context(i915, parent->vm); 828 if (IS_ERR(ctx)) { 829 err = PTR_ERR(ctx); 830 goto out_test; 831 } 832 833 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 834 GEM_BUG_ON(IS_ERR(ce)); 835 836 if (!obj) { 837 obj = create_test_object(parent->vm, 838 file, &objects); 839 if (IS_ERR(obj)) { 840 err = PTR_ERR(obj); 841 intel_context_put(ce); 842 kernel_context_close(ctx); 843 goto out_test; 844 } 845 } 846 847 err = gpu_fill(ce, obj, dw); 848 if (err) { 849 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 850 ndwords, dw, max_dwords(obj), 851 engine->name, 852 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 853 err); 854 intel_context_put(ce); 855 kernel_context_close(ctx); 856 goto out_test; 857 } 858 859 err = throttle(ce, tq, ARRAY_SIZE(tq)); 860 if (err) { 861 intel_context_put(ce); 862 kernel_context_close(ctx); 863 goto out_test; 864 } 865 866 if (++dw == max_dwords(obj)) { 867 obj = NULL; 868 dw = 0; 869 } 870 871 ndwords++; 872 ncontexts++; 873 874 intel_context_put(ce); 875 kernel_context_close(ctx); 876 } 877 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 878 ncontexts, engine->name, ndwords); 879 880 ncontexts = dw = 0; 881 list_for_each_entry(obj, &objects, st_link) { 882 unsigned int rem = 883 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 884 885 err = cpu_check(obj, ncontexts++, rem); 886 if (err) 887 goto out_test; 888 889 dw += rem; 890 } 891 892 i915_gem_drain_freed_objects(i915); 893 } 894 out_test: 895 throttle_release(tq, ARRAY_SIZE(tq)); 896 if (igt_live_test_end(&t)) 897 err = -EIO; 898 out_file: 899 fput(file); 900 return err; 901 } 902 903 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, 904 struct i915_vma *vma, 905 struct intel_engine_cs *engine) 906 { 907 u32 *cmd; 908 909 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 910 911 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 912 if (IS_ERR(cmd)) 913 return PTR_ERR(cmd); 914 915 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 916 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); 917 *cmd++ = lower_32_bits(i915_vma_offset(vma)); 918 *cmd++ = upper_32_bits(i915_vma_offset(vma)); 919 *cmd = MI_BATCH_BUFFER_END; 920 921 __i915_gem_object_flush_map(rpcs, 0, 64); 922 i915_gem_object_unpin_map(rpcs); 923 924 intel_gt_chipset_flush(vma->vm->gt); 925 926 return 0; 927 } 928 929 static int 930 emit_rpcs_query(struct drm_i915_gem_object *obj, 931 struct intel_context *ce, 932 struct i915_request **rq_out) 933 { 934 struct drm_i915_private *i915 = to_i915(obj->base.dev); 935 struct i915_request *rq; 936 struct i915_gem_ww_ctx ww; 937 struct i915_vma *batch; 938 struct i915_vma *vma; 939 struct drm_i915_gem_object *rpcs; 940 int err; 941 942 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 943 944 if (GRAPHICS_VER(i915) < 8) 945 return -EINVAL; 946 947 vma = i915_vma_instance(obj, ce->vm, NULL); 948 if (IS_ERR(vma)) 949 return PTR_ERR(vma); 950 951 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 952 if (IS_ERR(rpcs)) 953 return PTR_ERR(rpcs); 954 955 batch = i915_vma_instance(rpcs, ce->vm, NULL); 956 if (IS_ERR(batch)) { 957 err = PTR_ERR(batch); 958 goto err_put; 959 } 960 961 i915_gem_ww_ctx_init(&ww, false); 962 retry: 963 err = i915_gem_object_lock(obj, &ww); 964 if (!err) 965 err = i915_gem_object_lock(rpcs, &ww); 966 if (!err) 967 err = i915_gem_object_set_to_gtt_domain(obj, false); 968 if (!err) 969 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 970 if (err) 971 goto err_put; 972 973 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 974 if (err) 975 goto err_vma; 976 977 err = rpcs_query_batch(rpcs, vma, ce->engine); 978 if (err) 979 goto err_batch; 980 981 rq = i915_request_create(ce); 982 if (IS_ERR(rq)) { 983 err = PTR_ERR(rq); 984 goto err_batch; 985 } 986 987 err = i915_vma_move_to_active(batch, rq, 0); 988 if (err) 989 goto skip_request; 990 991 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 992 if (err) 993 goto skip_request; 994 995 if (rq->engine->emit_init_breadcrumb) { 996 err = rq->engine->emit_init_breadcrumb(rq); 997 if (err) 998 goto skip_request; 999 } 1000 1001 err = rq->engine->emit_bb_start(rq, 1002 i915_vma_offset(batch), 1003 i915_vma_size(batch), 1004 0); 1005 if (err) 1006 goto skip_request; 1007 1008 *rq_out = i915_request_get(rq); 1009 1010 skip_request: 1011 if (err) 1012 i915_request_set_error_once(rq, err); 1013 i915_request_add(rq); 1014 err_batch: 1015 i915_vma_unpin(batch); 1016 err_vma: 1017 i915_vma_unpin(vma); 1018 err_put: 1019 if (err == -EDEADLK) { 1020 err = i915_gem_ww_ctx_backoff(&ww); 1021 if (!err) 1022 goto retry; 1023 } 1024 i915_gem_ww_ctx_fini(&ww); 1025 i915_gem_object_put(rpcs); 1026 return err; 1027 } 1028 1029 #define TEST_IDLE BIT(0) 1030 #define TEST_BUSY BIT(1) 1031 #define TEST_RESET BIT(2) 1032 1033 static int 1034 __sseu_prepare(const char *name, 1035 unsigned int flags, 1036 struct intel_context *ce, 1037 struct igt_spinner **spin) 1038 { 1039 struct i915_request *rq; 1040 int ret; 1041 1042 *spin = NULL; 1043 if (!(flags & (TEST_BUSY | TEST_RESET))) 1044 return 0; 1045 1046 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1047 if (!*spin) 1048 return -ENOMEM; 1049 1050 ret = igt_spinner_init(*spin, ce->engine->gt); 1051 if (ret) 1052 goto err_free; 1053 1054 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1055 if (IS_ERR(rq)) { 1056 ret = PTR_ERR(rq); 1057 goto err_fini; 1058 } 1059 1060 i915_request_add(rq); 1061 1062 if (!igt_wait_for_spinner(*spin, rq)) { 1063 pr_err("%s: Spinner failed to start!\n", name); 1064 ret = -ETIMEDOUT; 1065 goto err_end; 1066 } 1067 1068 return 0; 1069 1070 err_end: 1071 igt_spinner_end(*spin); 1072 err_fini: 1073 igt_spinner_fini(*spin); 1074 err_free: 1075 kfree(fetch_and_zero(spin)); 1076 return ret; 1077 } 1078 1079 static int 1080 __read_slice_count(struct intel_context *ce, 1081 struct drm_i915_gem_object *obj, 1082 struct igt_spinner *spin, 1083 u32 *rpcs) 1084 { 1085 struct i915_request *rq = NULL; 1086 u32 s_mask, s_shift; 1087 unsigned int cnt; 1088 u32 *buf, val; 1089 long ret; 1090 1091 ret = emit_rpcs_query(obj, ce, &rq); 1092 if (ret) 1093 return ret; 1094 1095 if (spin) 1096 igt_spinner_end(spin); 1097 1098 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1099 i915_request_put(rq); 1100 if (ret < 0) 1101 return ret; 1102 1103 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1104 if (IS_ERR(buf)) { 1105 ret = PTR_ERR(buf); 1106 return ret; 1107 } 1108 1109 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1110 s_mask = GEN11_RPCS_S_CNT_MASK; 1111 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1112 } else { 1113 s_mask = GEN8_RPCS_S_CNT_MASK; 1114 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1115 } 1116 1117 val = *buf; 1118 cnt = (val & s_mask) >> s_shift; 1119 *rpcs = val; 1120 1121 i915_gem_object_unpin_map(obj); 1122 1123 return cnt; 1124 } 1125 1126 static int 1127 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1128 const char *prefix, const char *suffix) 1129 { 1130 if (slices == expected) 1131 return 0; 1132 1133 if (slices < 0) { 1134 pr_err("%s: %s read slice count failed with %d%s\n", 1135 name, prefix, slices, suffix); 1136 return slices; 1137 } 1138 1139 pr_err("%s: %s slice count %d is not %u%s\n", 1140 name, prefix, slices, expected, suffix); 1141 1142 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1143 rpcs, slices, 1144 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1145 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1146 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1147 1148 return -EINVAL; 1149 } 1150 1151 static int 1152 __sseu_finish(const char *name, 1153 unsigned int flags, 1154 struct intel_context *ce, 1155 struct drm_i915_gem_object *obj, 1156 unsigned int expected, 1157 struct igt_spinner *spin) 1158 { 1159 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1160 u32 rpcs = 0; 1161 int ret = 0; 1162 1163 if (flags & TEST_RESET) { 1164 ret = intel_engine_reset(ce->engine, "sseu"); 1165 if (ret) 1166 goto out; 1167 } 1168 1169 ret = __read_slice_count(ce, obj, 1170 flags & TEST_RESET ? NULL : spin, &rpcs); 1171 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1172 if (ret) 1173 goto out; 1174 1175 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1176 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1177 1178 out: 1179 if (spin) 1180 igt_spinner_end(spin); 1181 1182 if ((flags & TEST_IDLE) && ret == 0) { 1183 ret = igt_flush_test(ce->engine->i915); 1184 if (ret) 1185 return ret; 1186 1187 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1188 ret = __check_rpcs(name, rpcs, ret, expected, 1189 "Context", " after idle!"); 1190 } 1191 1192 return ret; 1193 } 1194 1195 static int 1196 __sseu_test(const char *name, 1197 unsigned int flags, 1198 struct intel_context *ce, 1199 struct drm_i915_gem_object *obj, 1200 struct intel_sseu sseu) 1201 { 1202 struct igt_spinner *spin = NULL; 1203 int ret; 1204 1205 intel_engine_pm_get(ce->engine); 1206 1207 ret = __sseu_prepare(name, flags, ce, &spin); 1208 if (ret) 1209 goto out_pm; 1210 1211 ret = intel_context_reconfigure_sseu(ce, sseu); 1212 if (ret) 1213 goto out_spin; 1214 1215 ret = __sseu_finish(name, flags, ce, obj, 1216 hweight32(sseu.slice_mask), spin); 1217 1218 out_spin: 1219 if (spin) { 1220 igt_spinner_end(spin); 1221 igt_spinner_fini(spin); 1222 kfree(spin); 1223 } 1224 out_pm: 1225 intel_engine_pm_put(ce->engine); 1226 return ret; 1227 } 1228 1229 static int 1230 __igt_ctx_sseu(struct drm_i915_private *i915, 1231 const char *name, 1232 unsigned int flags) 1233 { 1234 struct drm_i915_gem_object *obj; 1235 int inst = 0; 1236 int ret = 0; 1237 1238 if (GRAPHICS_VER(i915) < 9) 1239 return 0; 1240 1241 if (flags & TEST_RESET) 1242 igt_global_reset_lock(to_gt(i915)); 1243 1244 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1245 if (IS_ERR(obj)) { 1246 ret = PTR_ERR(obj); 1247 goto out_unlock; 1248 } 1249 1250 do { 1251 struct intel_engine_cs *engine; 1252 struct intel_context *ce; 1253 struct intel_sseu pg_sseu; 1254 1255 engine = intel_engine_lookup_user(i915, 1256 I915_ENGINE_CLASS_RENDER, 1257 inst++); 1258 if (!engine) 1259 break; 1260 1261 if (hweight32(engine->sseu.slice_mask) < 2) 1262 continue; 1263 1264 if (!engine->gt->info.sseu.has_slice_pg) 1265 continue; 1266 1267 /* 1268 * Gen11 VME friendly power-gated configuration with 1269 * half enabled sub-slices. 1270 */ 1271 pg_sseu = engine->sseu; 1272 pg_sseu.slice_mask = 1; 1273 pg_sseu.subslice_mask = 1274 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1275 1276 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1277 engine->name, name, flags, 1278 hweight32(engine->sseu.slice_mask), 1279 hweight32(pg_sseu.slice_mask)); 1280 1281 ce = intel_context_create(engine); 1282 if (IS_ERR(ce)) { 1283 ret = PTR_ERR(ce); 1284 goto out_put; 1285 } 1286 1287 ret = intel_context_pin(ce); 1288 if (ret) 1289 goto out_ce; 1290 1291 /* First set the default mask. */ 1292 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1293 if (ret) 1294 goto out_unpin; 1295 1296 /* Then set a power-gated configuration. */ 1297 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1298 if (ret) 1299 goto out_unpin; 1300 1301 /* Back to defaults. */ 1302 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1303 if (ret) 1304 goto out_unpin; 1305 1306 /* One last power-gated configuration for the road. */ 1307 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1308 if (ret) 1309 goto out_unpin; 1310 1311 out_unpin: 1312 intel_context_unpin(ce); 1313 out_ce: 1314 intel_context_put(ce); 1315 } while (!ret); 1316 1317 if (igt_flush_test(i915)) 1318 ret = -EIO; 1319 1320 out_put: 1321 i915_gem_object_put(obj); 1322 1323 out_unlock: 1324 if (flags & TEST_RESET) 1325 igt_global_reset_unlock(to_gt(i915)); 1326 1327 if (ret) 1328 pr_err("%s: Failed with %d!\n", name, ret); 1329 1330 return ret; 1331 } 1332 1333 static int igt_ctx_sseu(void *arg) 1334 { 1335 struct { 1336 const char *name; 1337 unsigned int flags; 1338 } *phase, phases[] = { 1339 { .name = "basic", .flags = 0 }, 1340 { .name = "idle", .flags = TEST_IDLE }, 1341 { .name = "busy", .flags = TEST_BUSY }, 1342 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1343 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1344 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1345 }; 1346 unsigned int i; 1347 int ret = 0; 1348 1349 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1350 i++, phase++) 1351 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1352 1353 return ret; 1354 } 1355 1356 static int igt_ctx_readonly(void *arg) 1357 { 1358 struct drm_i915_private *i915 = arg; 1359 unsigned long idx, ndwords, dw, num_engines; 1360 struct drm_i915_gem_object *obj = NULL; 1361 struct i915_request *tq[5] = {}; 1362 struct i915_gem_engines_iter it; 1363 struct i915_address_space *vm; 1364 struct i915_gem_context *ctx; 1365 struct intel_context *ce; 1366 struct igt_live_test t; 1367 I915_RND_STATE(prng); 1368 IGT_TIMEOUT(end_time); 1369 LIST_HEAD(objects); 1370 struct file *file; 1371 int err = -ENODEV; 1372 1373 /* 1374 * Create a few read-only objects (with the occasional writable object) 1375 * and try to write into these object checking that the GPU discards 1376 * any write to a read-only object. 1377 */ 1378 1379 file = mock_file(i915); 1380 if (IS_ERR(file)) 1381 return PTR_ERR(file); 1382 1383 err = igt_live_test_begin(&t, i915, __func__, ""); 1384 if (err) 1385 goto out_file; 1386 1387 ctx = live_context(i915, file); 1388 if (IS_ERR(ctx)) { 1389 err = PTR_ERR(ctx); 1390 goto out_file; 1391 } 1392 1393 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm; 1394 if (!vm || !vm->has_read_only) { 1395 err = 0; 1396 goto out_file; 1397 } 1398 1399 num_engines = 0; 1400 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1401 if (intel_engine_can_store_dword(ce->engine)) 1402 num_engines++; 1403 i915_gem_context_unlock_engines(ctx); 1404 1405 ndwords = 0; 1406 dw = 0; 1407 while (!time_after(jiffies, end_time)) { 1408 for_each_gem_engine(ce, 1409 i915_gem_context_lock_engines(ctx), it) { 1410 if (!intel_engine_can_store_dword(ce->engine)) 1411 continue; 1412 1413 if (!obj) { 1414 obj = create_test_object(ce->vm, file, &objects); 1415 if (IS_ERR(obj)) { 1416 err = PTR_ERR(obj); 1417 i915_gem_context_unlock_engines(ctx); 1418 goto out_file; 1419 } 1420 1421 if (prandom_u32_state(&prng) & 1) 1422 i915_gem_object_set_readonly(obj); 1423 } 1424 1425 err = gpu_fill(ce, obj, dw); 1426 if (err) { 1427 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1428 ndwords, dw, max_dwords(obj), 1429 ce->engine->name, 1430 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 1431 err); 1432 i915_gem_context_unlock_engines(ctx); 1433 goto out_file; 1434 } 1435 1436 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1437 if (err) { 1438 i915_gem_context_unlock_engines(ctx); 1439 goto out_file; 1440 } 1441 1442 if (++dw == max_dwords(obj)) { 1443 obj = NULL; 1444 dw = 0; 1445 } 1446 ndwords++; 1447 } 1448 i915_gem_context_unlock_engines(ctx); 1449 } 1450 pr_info("Submitted %lu dwords (across %lu engines)\n", 1451 ndwords, num_engines); 1452 1453 dw = 0; 1454 idx = 0; 1455 list_for_each_entry(obj, &objects, st_link) { 1456 unsigned int rem = 1457 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1458 unsigned int num_writes; 1459 1460 num_writes = rem; 1461 if (i915_gem_object_is_readonly(obj)) 1462 num_writes = 0; 1463 1464 err = cpu_check(obj, idx++, num_writes); 1465 if (err) 1466 break; 1467 1468 dw += rem; 1469 } 1470 1471 out_file: 1472 throttle_release(tq, ARRAY_SIZE(tq)); 1473 if (igt_live_test_end(&t)) 1474 err = -EIO; 1475 1476 fput(file); 1477 return err; 1478 } 1479 1480 static int check_scratch(struct i915_address_space *vm, u64 offset) 1481 { 1482 struct drm_mm_node *node; 1483 1484 mutex_lock(&vm->mutex); 1485 node = __drm_mm_interval_first(&vm->mm, 1486 offset, offset + sizeof(u32) - 1); 1487 mutex_unlock(&vm->mutex); 1488 if (!node || node->start > offset) 1489 return 0; 1490 1491 GEM_BUG_ON(offset >= node->start + node->size); 1492 1493 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1494 upper_32_bits(offset), lower_32_bits(offset)); 1495 return -EINVAL; 1496 } 1497 1498 static int write_to_scratch(struct i915_gem_context *ctx, 1499 struct intel_engine_cs *engine, 1500 struct drm_i915_gem_object *obj, 1501 u64 offset, u32 value) 1502 { 1503 struct drm_i915_private *i915 = ctx->i915; 1504 struct i915_address_space *vm; 1505 struct i915_request *rq; 1506 struct i915_vma *vma; 1507 u32 *cmd; 1508 int err; 1509 1510 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1511 1512 err = check_scratch(ctx->vm, offset); 1513 if (err) 1514 return err; 1515 1516 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1517 if (IS_ERR(cmd)) 1518 return PTR_ERR(cmd); 1519 1520 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1521 if (GRAPHICS_VER(i915) >= 8) { 1522 *cmd++ = lower_32_bits(offset); 1523 *cmd++ = upper_32_bits(offset); 1524 } else { 1525 *cmd++ = 0; 1526 *cmd++ = offset; 1527 } 1528 *cmd++ = value; 1529 *cmd = MI_BATCH_BUFFER_END; 1530 __i915_gem_object_flush_map(obj, 0, 64); 1531 i915_gem_object_unpin_map(obj); 1532 1533 intel_gt_chipset_flush(engine->gt); 1534 1535 vm = i915_gem_context_get_eb_vm(ctx); 1536 vma = i915_vma_instance(obj, vm, NULL); 1537 if (IS_ERR(vma)) { 1538 err = PTR_ERR(vma); 1539 goto out_vm; 1540 } 1541 1542 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1543 if (err) 1544 goto out_vm; 1545 1546 rq = igt_request_alloc(ctx, engine); 1547 if (IS_ERR(rq)) { 1548 err = PTR_ERR(rq); 1549 goto err_unpin; 1550 } 1551 1552 i915_vma_lock(vma); 1553 err = i915_vma_move_to_active(vma, rq, 0); 1554 i915_vma_unlock(vma); 1555 if (err) 1556 goto skip_request; 1557 1558 if (rq->engine->emit_init_breadcrumb) { 1559 err = rq->engine->emit_init_breadcrumb(rq); 1560 if (err) 1561 goto skip_request; 1562 } 1563 1564 err = engine->emit_bb_start(rq, i915_vma_offset(vma), 1565 i915_vma_size(vma), 0); 1566 if (err) 1567 goto skip_request; 1568 1569 i915_vma_unpin(vma); 1570 1571 i915_request_add(rq); 1572 1573 goto out_vm; 1574 skip_request: 1575 i915_request_set_error_once(rq, err); 1576 i915_request_add(rq); 1577 err_unpin: 1578 i915_vma_unpin(vma); 1579 out_vm: 1580 i915_vm_put(vm); 1581 1582 if (!err) 1583 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1584 1585 return err; 1586 } 1587 1588 static int read_from_scratch(struct i915_gem_context *ctx, 1589 struct intel_engine_cs *engine, 1590 struct drm_i915_gem_object *obj, 1591 u64 offset, u32 *value) 1592 { 1593 struct drm_i915_private *i915 = ctx->i915; 1594 struct i915_address_space *vm; 1595 const u32 result = 0x100; 1596 struct i915_request *rq; 1597 struct i915_vma *vma; 1598 unsigned int flags; 1599 u32 *cmd; 1600 int err; 1601 1602 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1603 1604 err = check_scratch(ctx->vm, offset); 1605 if (err) 1606 return err; 1607 1608 if (GRAPHICS_VER(i915) >= 8) { 1609 const u32 GPR0 = engine->mmio_base + 0x600; 1610 1611 vm = i915_gem_context_get_eb_vm(ctx); 1612 vma = i915_vma_instance(obj, vm, NULL); 1613 if (IS_ERR(vma)) { 1614 err = PTR_ERR(vma); 1615 goto out_vm; 1616 } 1617 1618 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1619 if (err) 1620 goto out_vm; 1621 1622 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1623 if (IS_ERR(cmd)) { 1624 err = PTR_ERR(cmd); 1625 goto err_unpin; 1626 } 1627 1628 memset(cmd, POISON_INUSE, PAGE_SIZE); 1629 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1630 *cmd++ = GPR0; 1631 *cmd++ = lower_32_bits(offset); 1632 *cmd++ = upper_32_bits(offset); 1633 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1634 *cmd++ = GPR0; 1635 *cmd++ = result; 1636 *cmd++ = 0; 1637 *cmd = MI_BATCH_BUFFER_END; 1638 1639 i915_gem_object_flush_map(obj); 1640 i915_gem_object_unpin_map(obj); 1641 1642 flags = 0; 1643 } else { 1644 const u32 reg = engine->mmio_base + 0x420; 1645 1646 /* hsw: register access even to 3DPRIM! is protected */ 1647 vm = i915_vm_get(&engine->gt->ggtt->vm); 1648 vma = i915_vma_instance(obj, vm, NULL); 1649 if (IS_ERR(vma)) { 1650 err = PTR_ERR(vma); 1651 goto out_vm; 1652 } 1653 1654 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1655 if (err) 1656 goto out_vm; 1657 1658 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1659 if (IS_ERR(cmd)) { 1660 err = PTR_ERR(cmd); 1661 goto err_unpin; 1662 } 1663 1664 memset(cmd, POISON_INUSE, PAGE_SIZE); 1665 *cmd++ = MI_LOAD_REGISTER_MEM; 1666 *cmd++ = reg; 1667 *cmd++ = offset; 1668 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1669 *cmd++ = reg; 1670 *cmd++ = i915_vma_offset(vma) + result; 1671 *cmd = MI_BATCH_BUFFER_END; 1672 1673 i915_gem_object_flush_map(obj); 1674 i915_gem_object_unpin_map(obj); 1675 1676 flags = I915_DISPATCH_SECURE; 1677 } 1678 1679 intel_gt_chipset_flush(engine->gt); 1680 1681 rq = igt_request_alloc(ctx, engine); 1682 if (IS_ERR(rq)) { 1683 err = PTR_ERR(rq); 1684 goto err_unpin; 1685 } 1686 1687 i915_vma_lock(vma); 1688 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1689 i915_vma_unlock(vma); 1690 if (err) 1691 goto skip_request; 1692 1693 if (rq->engine->emit_init_breadcrumb) { 1694 err = rq->engine->emit_init_breadcrumb(rq); 1695 if (err) 1696 goto skip_request; 1697 } 1698 1699 err = engine->emit_bb_start(rq, i915_vma_offset(vma), 1700 i915_vma_size(vma), flags); 1701 if (err) 1702 goto skip_request; 1703 1704 i915_vma_unpin(vma); 1705 1706 i915_request_add(rq); 1707 1708 i915_gem_object_lock(obj, NULL); 1709 err = i915_gem_object_set_to_cpu_domain(obj, false); 1710 i915_gem_object_unlock(obj); 1711 if (err) 1712 goto out_vm; 1713 1714 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1715 if (IS_ERR(cmd)) { 1716 err = PTR_ERR(cmd); 1717 goto out_vm; 1718 } 1719 1720 *value = cmd[result / sizeof(*cmd)]; 1721 i915_gem_object_unpin_map(obj); 1722 1723 goto out_vm; 1724 skip_request: 1725 i915_request_set_error_once(rq, err); 1726 i915_request_add(rq); 1727 err_unpin: 1728 i915_vma_unpin(vma); 1729 out_vm: 1730 i915_vm_put(vm); 1731 1732 if (!err) 1733 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1734 1735 return err; 1736 } 1737 1738 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1739 { 1740 struct i915_address_space *vm; 1741 u32 *vaddr; 1742 int err = 0; 1743 1744 vm = ctx->vm; 1745 if (!vm) 1746 return -ENODEV; 1747 1748 if (!vm->scratch[0]) { 1749 pr_err("No scratch page!\n"); 1750 return -EINVAL; 1751 } 1752 1753 vaddr = __px_vaddr(vm->scratch[0]); 1754 1755 memcpy(out, vaddr, sizeof(*out)); 1756 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1757 pr_err("Inconsistent initial state of scratch page!\n"); 1758 err = -EINVAL; 1759 } 1760 1761 return err; 1762 } 1763 1764 static int igt_vm_isolation(void *arg) 1765 { 1766 struct drm_i915_private *i915 = arg; 1767 struct i915_gem_context *ctx_a, *ctx_b; 1768 struct drm_i915_gem_object *obj_a, *obj_b; 1769 unsigned long num_engines, count; 1770 struct intel_engine_cs *engine; 1771 struct igt_live_test t; 1772 I915_RND_STATE(prng); 1773 struct file *file; 1774 u64 vm_total; 1775 u32 expected; 1776 int err; 1777 1778 if (GRAPHICS_VER(i915) < 7) 1779 return 0; 1780 1781 /* 1782 * The simple goal here is that a write into one context is not 1783 * observed in a second (separate page tables and scratch). 1784 */ 1785 1786 file = mock_file(i915); 1787 if (IS_ERR(file)) 1788 return PTR_ERR(file); 1789 1790 err = igt_live_test_begin(&t, i915, __func__, ""); 1791 if (err) 1792 goto out_file; 1793 1794 ctx_a = live_context(i915, file); 1795 if (IS_ERR(ctx_a)) { 1796 err = PTR_ERR(ctx_a); 1797 goto out_file; 1798 } 1799 1800 ctx_b = live_context(i915, file); 1801 if (IS_ERR(ctx_b)) { 1802 err = PTR_ERR(ctx_b); 1803 goto out_file; 1804 } 1805 1806 /* We can only test vm isolation, if the vm are distinct */ 1807 if (ctx_a->vm == ctx_b->vm) 1808 goto out_file; 1809 1810 /* Read the initial state of the scratch page */ 1811 err = check_scratch_page(ctx_a, &expected); 1812 if (err) 1813 goto out_file; 1814 1815 err = check_scratch_page(ctx_b, &expected); 1816 if (err) 1817 goto out_file; 1818 1819 vm_total = ctx_a->vm->total; 1820 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1821 1822 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1823 if (IS_ERR(obj_a)) { 1824 err = PTR_ERR(obj_a); 1825 goto out_file; 1826 } 1827 1828 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1829 if (IS_ERR(obj_b)) { 1830 err = PTR_ERR(obj_b); 1831 goto put_a; 1832 } 1833 1834 count = 0; 1835 num_engines = 0; 1836 for_each_uabi_engine(engine, i915) { 1837 IGT_TIMEOUT(end_time); 1838 unsigned long this = 0; 1839 1840 if (!intel_engine_can_store_dword(engine)) 1841 continue; 1842 1843 /* Not all engines have their own GPR! */ 1844 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1845 continue; 1846 1847 while (!__igt_timeout(end_time, NULL)) { 1848 u32 value = 0xc5c5c5c5; 1849 u64 offset; 1850 1851 /* Leave enough space at offset 0 for the batch */ 1852 offset = igt_random_offset(&prng, 1853 I915_GTT_PAGE_SIZE, vm_total, 1854 sizeof(u32), alignof_dword); 1855 1856 err = write_to_scratch(ctx_a, engine, obj_a, 1857 offset, 0xdeadbeef); 1858 if (err == 0) 1859 err = read_from_scratch(ctx_b, engine, obj_b, 1860 offset, &value); 1861 if (err) 1862 goto put_b; 1863 1864 if (value != expected) { 1865 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1866 engine->name, value, 1867 upper_32_bits(offset), 1868 lower_32_bits(offset), 1869 this); 1870 err = -EINVAL; 1871 goto put_b; 1872 } 1873 1874 this++; 1875 } 1876 count += this; 1877 num_engines++; 1878 } 1879 pr_info("Checked %lu scratch offsets across %lu engines\n", 1880 count, num_engines); 1881 1882 put_b: 1883 i915_gem_object_put(obj_b); 1884 put_a: 1885 i915_gem_object_put(obj_a); 1886 out_file: 1887 if (igt_live_test_end(&t)) 1888 err = -EIO; 1889 fput(file); 1890 return err; 1891 } 1892 1893 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1894 { 1895 static const struct i915_subtest tests[] = { 1896 SUBTEST(live_nop_switch), 1897 SUBTEST(live_parallel_switch), 1898 SUBTEST(igt_ctx_exec), 1899 SUBTEST(igt_ctx_readonly), 1900 SUBTEST(igt_ctx_sseu), 1901 SUBTEST(igt_shared_ctx_exec), 1902 SUBTEST(igt_vm_isolation), 1903 }; 1904 1905 if (intel_gt_is_wedged(to_gt(i915))) 1906 return 0; 1907 1908 return i915_live_subtests(tests, i915); 1909 } 1910