1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 #include <linux/string_helpers.h> 9 10 #include "gem/i915_gem_internal.h" 11 #include "gem/i915_gem_pm.h" 12 #include "gt/intel_engine_pm.h" 13 #include "gt/intel_engine_regs.h" 14 #include "gt/intel_gt.h" 15 #include "gt/intel_gt_requests.h" 16 #include "gt/intel_reset.h" 17 #include "i915_selftest.h" 18 19 #include "gem/selftests/igt_gem_utils.h" 20 #include "selftests/i915_random.h" 21 #include "selftests/igt_flush_test.h" 22 #include "selftests/igt_live_test.h" 23 #include "selftests/igt_reset.h" 24 #include "selftests/igt_spinner.h" 25 #include "selftests/mock_drm.h" 26 #include "selftests/mock_gem_device.h" 27 28 #include "huge_gem_object.h" 29 #include "igt_gem_utils.h" 30 31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 32 33 static int live_nop_switch(void *arg) 34 { 35 const unsigned int nctx = 1024; 36 struct drm_i915_private *i915 = arg; 37 struct intel_engine_cs *engine; 38 struct i915_gem_context **ctx; 39 struct igt_live_test t; 40 struct file *file; 41 unsigned long n; 42 int err = -ENODEV; 43 44 /* 45 * Create as many contexts as we can feasibly get away with 46 * and check we can switch between them rapidly. 47 * 48 * Serves as very simple stress test for submission and HW switching 49 * between contexts. 50 */ 51 52 if (!DRIVER_CAPS(i915)->has_logical_contexts) 53 return 0; 54 55 file = mock_file(i915); 56 if (IS_ERR(file)) 57 return PTR_ERR(file); 58 59 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 60 if (!ctx) { 61 err = -ENOMEM; 62 goto out_file; 63 } 64 65 for (n = 0; n < nctx; n++) { 66 ctx[n] = live_context(i915, file); 67 if (IS_ERR(ctx[n])) { 68 err = PTR_ERR(ctx[n]); 69 goto out_file; 70 } 71 } 72 73 for_each_uabi_engine(engine, i915) { 74 struct i915_request *rq = NULL; 75 unsigned long end_time, prime; 76 ktime_t times[2] = {}; 77 78 times[0] = ktime_get_raw(); 79 for (n = 0; n < nctx; n++) { 80 struct i915_request *this; 81 82 this = igt_request_alloc(ctx[n], engine); 83 if (IS_ERR(this)) { 84 err = PTR_ERR(this); 85 goto out_file; 86 } 87 if (rq) { 88 i915_request_await_dma_fence(this, &rq->fence); 89 i915_request_put(rq); 90 } 91 rq = i915_request_get(this); 92 i915_request_add(this); 93 } 94 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 95 pr_err("Failed to populated %d contexts\n", nctx); 96 intel_gt_set_wedged(to_gt(i915)); 97 i915_request_put(rq); 98 err = -EIO; 99 goto out_file; 100 } 101 i915_request_put(rq); 102 103 times[1] = ktime_get_raw(); 104 105 pr_info("Populated %d contexts on %s in %lluns\n", 106 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 107 108 err = igt_live_test_begin(&t, i915, __func__, engine->name); 109 if (err) 110 goto out_file; 111 112 end_time = jiffies + i915_selftest.timeout_jiffies; 113 for_each_prime_number_from(prime, 2, 8192) { 114 times[1] = ktime_get_raw(); 115 116 rq = NULL; 117 for (n = 0; n < prime; n++) { 118 struct i915_request *this; 119 120 this = igt_request_alloc(ctx[n % nctx], engine); 121 if (IS_ERR(this)) { 122 err = PTR_ERR(this); 123 goto out_file; 124 } 125 126 if (rq) { /* Force submission order */ 127 i915_request_await_dma_fence(this, &rq->fence); 128 i915_request_put(rq); 129 } 130 131 /* 132 * This space is left intentionally blank. 133 * 134 * We do not actually want to perform any 135 * action with this request, we just want 136 * to measure the latency in allocation 137 * and submission of our breadcrumbs - 138 * ensuring that the bare request is sufficient 139 * for the system to work (i.e. proper HEAD 140 * tracking of the rings, interrupt handling, 141 * etc). It also gives us the lowest bounds 142 * for latency. 143 */ 144 145 rq = i915_request_get(this); 146 i915_request_add(this); 147 } 148 GEM_BUG_ON(!rq); 149 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 150 pr_err("Switching between %ld contexts timed out\n", 151 prime); 152 intel_gt_set_wedged(to_gt(i915)); 153 i915_request_put(rq); 154 break; 155 } 156 i915_request_put(rq); 157 158 times[1] = ktime_sub(ktime_get_raw(), times[1]); 159 if (prime == 2) 160 times[0] = times[1]; 161 162 if (__igt_timeout(end_time, NULL)) 163 break; 164 } 165 166 err = igt_live_test_end(&t); 167 if (err) 168 goto out_file; 169 170 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 171 engine->name, 172 ktime_to_ns(times[0]), 173 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 174 } 175 176 out_file: 177 fput(file); 178 return err; 179 } 180 181 struct parallel_switch { 182 struct kthread_worker *worker; 183 struct kthread_work work; 184 struct intel_context *ce[2]; 185 int result; 186 }; 187 188 static void __live_parallel_switch1(struct kthread_work *work) 189 { 190 struct parallel_switch *arg = 191 container_of(work, typeof(*arg), work); 192 IGT_TIMEOUT(end_time); 193 unsigned long count; 194 195 count = 0; 196 arg->result = 0; 197 do { 198 struct i915_request *rq = NULL; 199 int n; 200 201 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 202 struct i915_request *prev = rq; 203 204 rq = i915_request_create(arg->ce[n]); 205 if (IS_ERR(rq)) { 206 i915_request_put(prev); 207 arg->result = PTR_ERR(rq); 208 break; 209 } 210 211 i915_request_get(rq); 212 if (prev) { 213 arg->result = 214 i915_request_await_dma_fence(rq, 215 &prev->fence); 216 i915_request_put(prev); 217 } 218 219 i915_request_add(rq); 220 } 221 222 if (IS_ERR_OR_NULL(rq)) 223 break; 224 225 if (i915_request_wait(rq, 0, HZ) < 0) 226 arg->result = -ETIME; 227 228 i915_request_put(rq); 229 230 count++; 231 } while (!arg->result && !__igt_timeout(end_time, NULL)); 232 233 pr_info("%s: %lu switches (sync) <%d>\n", 234 arg->ce[0]->engine->name, count, arg->result); 235 } 236 237 static void __live_parallel_switchN(struct kthread_work *work) 238 { 239 struct parallel_switch *arg = 240 container_of(work, typeof(*arg), work); 241 struct i915_request *rq = NULL; 242 IGT_TIMEOUT(end_time); 243 unsigned long count; 244 int n; 245 246 count = 0; 247 arg->result = 0; 248 do { 249 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 250 struct i915_request *prev = rq; 251 252 rq = i915_request_create(arg->ce[n]); 253 if (IS_ERR(rq)) { 254 i915_request_put(prev); 255 arg->result = PTR_ERR(rq); 256 break; 257 } 258 259 i915_request_get(rq); 260 if (prev) { 261 arg->result = 262 i915_request_await_dma_fence(rq, 263 &prev->fence); 264 i915_request_put(prev); 265 } 266 267 i915_request_add(rq); 268 } 269 270 count++; 271 } while (!arg->result && !__igt_timeout(end_time, NULL)); 272 273 if (!IS_ERR_OR_NULL(rq)) 274 i915_request_put(rq); 275 276 pr_info("%s: %lu switches (many) <%d>\n", 277 arg->ce[0]->engine->name, count, arg->result); 278 } 279 280 static int live_parallel_switch(void *arg) 281 { 282 struct drm_i915_private *i915 = arg; 283 static void (* const func[])(struct kthread_work *) = { 284 __live_parallel_switch1, 285 __live_parallel_switchN, 286 NULL, 287 }; 288 struct parallel_switch *data = NULL; 289 struct i915_gem_engines *engines; 290 struct i915_gem_engines_iter it; 291 void (* const *fn)(struct kthread_work *); 292 struct i915_gem_context *ctx; 293 struct intel_context *ce; 294 struct file *file; 295 int n, m, count; 296 int err = 0; 297 298 /* 299 * Check we can process switches on all engines simultaneously. 300 */ 301 302 if (!DRIVER_CAPS(i915)->has_logical_contexts) 303 return 0; 304 305 file = mock_file(i915); 306 if (IS_ERR(file)) 307 return PTR_ERR(file); 308 309 ctx = live_context(i915, file); 310 if (IS_ERR(ctx)) { 311 err = PTR_ERR(ctx); 312 goto out_file; 313 } 314 315 engines = i915_gem_context_lock_engines(ctx); 316 count = engines->num_engines; 317 318 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 319 if (!data) { 320 i915_gem_context_unlock_engines(ctx); 321 err = -ENOMEM; 322 goto out_file; 323 } 324 325 m = 0; /* Use the first context as our template for the engines */ 326 for_each_gem_engine(ce, engines, it) { 327 err = intel_context_pin(ce); 328 if (err) { 329 i915_gem_context_unlock_engines(ctx); 330 goto out; 331 } 332 data[m++].ce[0] = intel_context_get(ce); 333 } 334 i915_gem_context_unlock_engines(ctx); 335 336 /* Clone the same set of engines into the other contexts */ 337 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 338 ctx = live_context(i915, file); 339 if (IS_ERR(ctx)) { 340 err = PTR_ERR(ctx); 341 goto out; 342 } 343 344 for (m = 0; m < count; m++) { 345 if (!data[m].ce[0]) 346 continue; 347 348 ce = intel_context_create(data[m].ce[0]->engine); 349 if (IS_ERR(ce)) 350 goto out; 351 352 err = intel_context_pin(ce); 353 if (err) { 354 intel_context_put(ce); 355 goto out; 356 } 357 358 data[m].ce[n] = ce; 359 } 360 } 361 362 for (n = 0; n < count; n++) { 363 struct kthread_worker *worker; 364 365 if (!data[n].ce[0]) 366 continue; 367 368 worker = kthread_create_worker(0, "igt/parallel:%s", 369 data[n].ce[0]->engine->name); 370 if (IS_ERR(worker)) 371 goto out; 372 373 data[n].worker = worker; 374 } 375 376 for (fn = func; !err && *fn; fn++) { 377 struct igt_live_test t; 378 379 err = igt_live_test_begin(&t, i915, __func__, ""); 380 if (err) 381 break; 382 383 for (n = 0; n < count; n++) { 384 if (!data[n].ce[0]) 385 continue; 386 387 data[n].result = 0; 388 kthread_init_work(&data[n].work, *fn); 389 kthread_queue_work(data[n].worker, &data[n].work); 390 } 391 392 for (n = 0; n < count; n++) { 393 if (data[n].ce[0]) { 394 kthread_flush_work(&data[n].work); 395 if (data[n].result && !err) 396 err = data[n].result; 397 } 398 } 399 400 if (igt_live_test_end(&t)) 401 err = -EIO; 402 } 403 404 out: 405 for (n = 0; n < count; n++) { 406 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 407 if (!data[n].ce[m]) 408 continue; 409 410 intel_context_unpin(data[n].ce[m]); 411 intel_context_put(data[n].ce[m]); 412 } 413 414 if (data[n].worker) 415 kthread_destroy_worker(data[n].worker); 416 } 417 kfree(data); 418 out_file: 419 fput(file); 420 return err; 421 } 422 423 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 424 { 425 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 426 } 427 428 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 429 { 430 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 431 } 432 433 static int gpu_fill(struct intel_context *ce, 434 struct drm_i915_gem_object *obj, 435 unsigned int dw) 436 { 437 struct i915_vma *vma; 438 int err; 439 440 GEM_BUG_ON(obj->base.size > ce->vm->total); 441 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 442 443 vma = i915_vma_instance(obj, ce->vm, NULL); 444 if (IS_ERR(vma)) 445 return PTR_ERR(vma); 446 447 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 448 if (err) 449 return err; 450 451 /* 452 * Within the GTT the huge objects maps every page onto 453 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 454 * We set the nth dword within the page using the nth 455 * mapping via the GTT - this should exercise the GTT mapping 456 * whilst checking that each context provides a unique view 457 * into the object. 458 */ 459 err = igt_gpu_fill_dw(ce, vma, 460 (dw * real_page_count(obj)) << PAGE_SHIFT | 461 (dw * sizeof(u32)), 462 real_page_count(obj), 463 dw); 464 i915_vma_unpin(vma); 465 466 return err; 467 } 468 469 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 470 { 471 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 472 unsigned int n, m, need_flush; 473 int err; 474 475 i915_gem_object_lock(obj, NULL); 476 err = i915_gem_object_prepare_write(obj, &need_flush); 477 if (err) 478 goto out; 479 480 for (n = 0; n < real_page_count(obj); n++) { 481 u32 *map; 482 483 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 484 for (m = 0; m < DW_PER_PAGE; m++) 485 map[m] = value; 486 if (!has_llc) 487 drm_clflush_virt_range(map, PAGE_SIZE); 488 kunmap_atomic(map); 489 } 490 491 i915_gem_object_finish_access(obj); 492 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 493 obj->write_domain = 0; 494 out: 495 i915_gem_object_unlock(obj); 496 return err; 497 } 498 499 static noinline int cpu_check(struct drm_i915_gem_object *obj, 500 unsigned int idx, unsigned int max) 501 { 502 unsigned int n, m, needs_flush; 503 int err; 504 505 i915_gem_object_lock(obj, NULL); 506 err = i915_gem_object_prepare_read(obj, &needs_flush); 507 if (err) 508 goto out_unlock; 509 510 for (n = 0; n < real_page_count(obj); n++) { 511 u32 *map; 512 513 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 514 if (needs_flush & CLFLUSH_BEFORE) 515 drm_clflush_virt_range(map, PAGE_SIZE); 516 517 for (m = 0; m < max; m++) { 518 if (map[m] != m) { 519 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 520 __builtin_return_address(0), idx, 521 n, real_page_count(obj), m, max, 522 map[m], m); 523 err = -EINVAL; 524 goto out_unmap; 525 } 526 } 527 528 for (; m < DW_PER_PAGE; m++) { 529 if (map[m] != STACK_MAGIC) { 530 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 531 __builtin_return_address(0), idx, n, m, 532 map[m], STACK_MAGIC); 533 err = -EINVAL; 534 goto out_unmap; 535 } 536 } 537 538 out_unmap: 539 kunmap_atomic(map); 540 if (err) 541 break; 542 } 543 544 i915_gem_object_finish_access(obj); 545 out_unlock: 546 i915_gem_object_unlock(obj); 547 return err; 548 } 549 550 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 551 { 552 int err; 553 554 GEM_BUG_ON(obj->base.handle_count); 555 556 /* tie the object to the drm_file for easy reaping */ 557 err = idr_alloc(&to_drm_file(file)->object_idr, 558 &obj->base, 1, 0, GFP_KERNEL); 559 if (err < 0) 560 return err; 561 562 i915_gem_object_get(obj); 563 obj->base.handle_count++; 564 return 0; 565 } 566 567 static struct drm_i915_gem_object * 568 create_test_object(struct i915_address_space *vm, 569 struct file *file, 570 struct list_head *objects) 571 { 572 struct drm_i915_gem_object *obj; 573 u64 size; 574 int err; 575 576 /* Keep in GEM's good graces */ 577 intel_gt_retire_requests(vm->gt); 578 579 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 580 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 581 582 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 583 if (IS_ERR(obj)) 584 return obj; 585 586 err = file_add_object(file, obj); 587 i915_gem_object_put(obj); 588 if (err) 589 return ERR_PTR(err); 590 591 err = cpu_fill(obj, STACK_MAGIC); 592 if (err) { 593 pr_err("Failed to fill object with cpu, err=%d\n", 594 err); 595 return ERR_PTR(err); 596 } 597 598 list_add_tail(&obj->st_link, objects); 599 return obj; 600 } 601 602 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 603 { 604 unsigned long npages = fake_page_count(obj); 605 606 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 607 return npages / DW_PER_PAGE; 608 } 609 610 static void throttle_release(struct i915_request **q, int count) 611 { 612 int i; 613 614 for (i = 0; i < count; i++) { 615 if (IS_ERR_OR_NULL(q[i])) 616 continue; 617 618 i915_request_put(fetch_and_zero(&q[i])); 619 } 620 } 621 622 static int throttle(struct intel_context *ce, 623 struct i915_request **q, int count) 624 { 625 int i; 626 627 if (!IS_ERR_OR_NULL(q[0])) { 628 if (i915_request_wait(q[0], 629 I915_WAIT_INTERRUPTIBLE, 630 MAX_SCHEDULE_TIMEOUT) < 0) 631 return -EINTR; 632 633 i915_request_put(q[0]); 634 } 635 636 for (i = 0; i < count - 1; i++) 637 q[i] = q[i + 1]; 638 639 q[i] = intel_context_create_request(ce); 640 if (IS_ERR(q[i])) 641 return PTR_ERR(q[i]); 642 643 i915_request_get(q[i]); 644 i915_request_add(q[i]); 645 646 return 0; 647 } 648 649 static int igt_ctx_exec(void *arg) 650 { 651 struct drm_i915_private *i915 = arg; 652 struct intel_engine_cs *engine; 653 int err = -ENODEV; 654 655 /* 656 * Create a few different contexts (with different mm) and write 657 * through each ctx/mm using the GPU making sure those writes end 658 * up in the expected pages of our obj. 659 */ 660 661 if (!DRIVER_CAPS(i915)->has_logical_contexts) 662 return 0; 663 664 for_each_uabi_engine(engine, i915) { 665 struct drm_i915_gem_object *obj = NULL; 666 unsigned long ncontexts, ndwords, dw; 667 struct i915_request *tq[5] = {}; 668 struct igt_live_test t; 669 IGT_TIMEOUT(end_time); 670 LIST_HEAD(objects); 671 struct file *file; 672 673 if (!intel_engine_can_store_dword(engine)) 674 continue; 675 676 if (!engine->context_size) 677 continue; /* No logical context support in HW */ 678 679 file = mock_file(i915); 680 if (IS_ERR(file)) 681 return PTR_ERR(file); 682 683 err = igt_live_test_begin(&t, i915, __func__, engine->name); 684 if (err) 685 goto out_file; 686 687 ncontexts = 0; 688 ndwords = 0; 689 dw = 0; 690 while (!time_after(jiffies, end_time)) { 691 struct i915_gem_context *ctx; 692 struct intel_context *ce; 693 694 ctx = kernel_context(i915, NULL); 695 if (IS_ERR(ctx)) { 696 err = PTR_ERR(ctx); 697 goto out_file; 698 } 699 700 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 701 GEM_BUG_ON(IS_ERR(ce)); 702 703 if (!obj) { 704 obj = create_test_object(ce->vm, file, &objects); 705 if (IS_ERR(obj)) { 706 err = PTR_ERR(obj); 707 intel_context_put(ce); 708 kernel_context_close(ctx); 709 goto out_file; 710 } 711 } 712 713 err = gpu_fill(ce, obj, dw); 714 if (err) { 715 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 716 ndwords, dw, max_dwords(obj), 717 engine->name, 718 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 719 err); 720 intel_context_put(ce); 721 kernel_context_close(ctx); 722 goto out_file; 723 } 724 725 err = throttle(ce, tq, ARRAY_SIZE(tq)); 726 if (err) { 727 intel_context_put(ce); 728 kernel_context_close(ctx); 729 goto out_file; 730 } 731 732 if (++dw == max_dwords(obj)) { 733 obj = NULL; 734 dw = 0; 735 } 736 737 ndwords++; 738 ncontexts++; 739 740 intel_context_put(ce); 741 kernel_context_close(ctx); 742 } 743 744 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 745 ncontexts, engine->name, ndwords); 746 747 ncontexts = dw = 0; 748 list_for_each_entry(obj, &objects, st_link) { 749 unsigned int rem = 750 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 751 752 err = cpu_check(obj, ncontexts++, rem); 753 if (err) 754 break; 755 756 dw += rem; 757 } 758 759 out_file: 760 throttle_release(tq, ARRAY_SIZE(tq)); 761 if (igt_live_test_end(&t)) 762 err = -EIO; 763 764 fput(file); 765 if (err) 766 return err; 767 768 i915_gem_drain_freed_objects(i915); 769 } 770 771 return 0; 772 } 773 774 static int igt_shared_ctx_exec(void *arg) 775 { 776 struct drm_i915_private *i915 = arg; 777 struct i915_request *tq[5] = {}; 778 struct i915_gem_context *parent; 779 struct intel_engine_cs *engine; 780 struct igt_live_test t; 781 struct file *file; 782 int err = 0; 783 784 /* 785 * Create a few different contexts with the same mm and write 786 * through each ctx using the GPU making sure those writes end 787 * up in the expected pages of our obj. 788 */ 789 if (!DRIVER_CAPS(i915)->has_logical_contexts) 790 return 0; 791 792 file = mock_file(i915); 793 if (IS_ERR(file)) 794 return PTR_ERR(file); 795 796 parent = live_context(i915, file); 797 if (IS_ERR(parent)) { 798 err = PTR_ERR(parent); 799 goto out_file; 800 } 801 802 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 803 err = 0; 804 goto out_file; 805 } 806 807 err = igt_live_test_begin(&t, i915, __func__, ""); 808 if (err) 809 goto out_file; 810 811 for_each_uabi_engine(engine, i915) { 812 unsigned long ncontexts, ndwords, dw; 813 struct drm_i915_gem_object *obj = NULL; 814 IGT_TIMEOUT(end_time); 815 LIST_HEAD(objects); 816 817 if (!intel_engine_can_store_dword(engine)) 818 continue; 819 820 dw = 0; 821 ndwords = 0; 822 ncontexts = 0; 823 while (!time_after(jiffies, end_time)) { 824 struct i915_gem_context *ctx; 825 struct intel_context *ce; 826 827 ctx = kernel_context(i915, parent->vm); 828 if (IS_ERR(ctx)) { 829 err = PTR_ERR(ctx); 830 goto out_test; 831 } 832 833 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 834 GEM_BUG_ON(IS_ERR(ce)); 835 836 if (!obj) { 837 obj = create_test_object(parent->vm, 838 file, &objects); 839 if (IS_ERR(obj)) { 840 err = PTR_ERR(obj); 841 intel_context_put(ce); 842 kernel_context_close(ctx); 843 goto out_test; 844 } 845 } 846 847 err = gpu_fill(ce, obj, dw); 848 if (err) { 849 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 850 ndwords, dw, max_dwords(obj), 851 engine->name, 852 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 853 err); 854 intel_context_put(ce); 855 kernel_context_close(ctx); 856 goto out_test; 857 } 858 859 err = throttle(ce, tq, ARRAY_SIZE(tq)); 860 if (err) { 861 intel_context_put(ce); 862 kernel_context_close(ctx); 863 goto out_test; 864 } 865 866 if (++dw == max_dwords(obj)) { 867 obj = NULL; 868 dw = 0; 869 } 870 871 ndwords++; 872 ncontexts++; 873 874 intel_context_put(ce); 875 kernel_context_close(ctx); 876 } 877 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 878 ncontexts, engine->name, ndwords); 879 880 ncontexts = dw = 0; 881 list_for_each_entry(obj, &objects, st_link) { 882 unsigned int rem = 883 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 884 885 err = cpu_check(obj, ncontexts++, rem); 886 if (err) 887 goto out_test; 888 889 dw += rem; 890 } 891 892 i915_gem_drain_freed_objects(i915); 893 } 894 out_test: 895 throttle_release(tq, ARRAY_SIZE(tq)); 896 if (igt_live_test_end(&t)) 897 err = -EIO; 898 out_file: 899 fput(file); 900 return err; 901 } 902 903 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, 904 struct i915_vma *vma, 905 struct intel_engine_cs *engine) 906 { 907 u32 *cmd; 908 909 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 910 911 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 912 if (IS_ERR(cmd)) 913 return PTR_ERR(cmd); 914 915 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 916 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); 917 *cmd++ = lower_32_bits(vma->node.start); 918 *cmd++ = upper_32_bits(vma->node.start); 919 *cmd = MI_BATCH_BUFFER_END; 920 921 __i915_gem_object_flush_map(rpcs, 0, 64); 922 i915_gem_object_unpin_map(rpcs); 923 924 intel_gt_chipset_flush(vma->vm->gt); 925 926 return 0; 927 } 928 929 static int 930 emit_rpcs_query(struct drm_i915_gem_object *obj, 931 struct intel_context *ce, 932 struct i915_request **rq_out) 933 { 934 struct drm_i915_private *i915 = to_i915(obj->base.dev); 935 struct i915_request *rq; 936 struct i915_gem_ww_ctx ww; 937 struct i915_vma *batch; 938 struct i915_vma *vma; 939 struct drm_i915_gem_object *rpcs; 940 int err; 941 942 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 943 944 if (GRAPHICS_VER(i915) < 8) 945 return -EINVAL; 946 947 vma = i915_vma_instance(obj, ce->vm, NULL); 948 if (IS_ERR(vma)) 949 return PTR_ERR(vma); 950 951 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 952 if (IS_ERR(rpcs)) 953 return PTR_ERR(rpcs); 954 955 batch = i915_vma_instance(rpcs, ce->vm, NULL); 956 if (IS_ERR(batch)) { 957 err = PTR_ERR(batch); 958 goto err_put; 959 } 960 961 i915_gem_ww_ctx_init(&ww, false); 962 retry: 963 err = i915_gem_object_lock(obj, &ww); 964 if (!err) 965 err = i915_gem_object_lock(rpcs, &ww); 966 if (!err) 967 err = i915_gem_object_set_to_gtt_domain(obj, false); 968 if (!err) 969 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 970 if (err) 971 goto err_put; 972 973 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 974 if (err) 975 goto err_vma; 976 977 err = rpcs_query_batch(rpcs, vma, ce->engine); 978 if (err) 979 goto err_batch; 980 981 rq = i915_request_create(ce); 982 if (IS_ERR(rq)) { 983 err = PTR_ERR(rq); 984 goto err_batch; 985 } 986 987 err = i915_request_await_object(rq, batch->obj, false); 988 if (err == 0) 989 err = i915_vma_move_to_active(batch, rq, 0); 990 if (err) 991 goto skip_request; 992 993 err = i915_request_await_object(rq, vma->obj, true); 994 if (err == 0) 995 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 996 if (err) 997 goto skip_request; 998 999 if (rq->engine->emit_init_breadcrumb) { 1000 err = rq->engine->emit_init_breadcrumb(rq); 1001 if (err) 1002 goto skip_request; 1003 } 1004 1005 err = rq->engine->emit_bb_start(rq, 1006 batch->node.start, batch->node.size, 1007 0); 1008 if (err) 1009 goto skip_request; 1010 1011 *rq_out = i915_request_get(rq); 1012 1013 skip_request: 1014 if (err) 1015 i915_request_set_error_once(rq, err); 1016 i915_request_add(rq); 1017 err_batch: 1018 i915_vma_unpin(batch); 1019 err_vma: 1020 i915_vma_unpin(vma); 1021 err_put: 1022 if (err == -EDEADLK) { 1023 err = i915_gem_ww_ctx_backoff(&ww); 1024 if (!err) 1025 goto retry; 1026 } 1027 i915_gem_ww_ctx_fini(&ww); 1028 i915_gem_object_put(rpcs); 1029 return err; 1030 } 1031 1032 #define TEST_IDLE BIT(0) 1033 #define TEST_BUSY BIT(1) 1034 #define TEST_RESET BIT(2) 1035 1036 static int 1037 __sseu_prepare(const char *name, 1038 unsigned int flags, 1039 struct intel_context *ce, 1040 struct igt_spinner **spin) 1041 { 1042 struct i915_request *rq; 1043 int ret; 1044 1045 *spin = NULL; 1046 if (!(flags & (TEST_BUSY | TEST_RESET))) 1047 return 0; 1048 1049 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1050 if (!*spin) 1051 return -ENOMEM; 1052 1053 ret = igt_spinner_init(*spin, ce->engine->gt); 1054 if (ret) 1055 goto err_free; 1056 1057 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1058 if (IS_ERR(rq)) { 1059 ret = PTR_ERR(rq); 1060 goto err_fini; 1061 } 1062 1063 i915_request_add(rq); 1064 1065 if (!igt_wait_for_spinner(*spin, rq)) { 1066 pr_err("%s: Spinner failed to start!\n", name); 1067 ret = -ETIMEDOUT; 1068 goto err_end; 1069 } 1070 1071 return 0; 1072 1073 err_end: 1074 igt_spinner_end(*spin); 1075 err_fini: 1076 igt_spinner_fini(*spin); 1077 err_free: 1078 kfree(fetch_and_zero(spin)); 1079 return ret; 1080 } 1081 1082 static int 1083 __read_slice_count(struct intel_context *ce, 1084 struct drm_i915_gem_object *obj, 1085 struct igt_spinner *spin, 1086 u32 *rpcs) 1087 { 1088 struct i915_request *rq = NULL; 1089 u32 s_mask, s_shift; 1090 unsigned int cnt; 1091 u32 *buf, val; 1092 long ret; 1093 1094 ret = emit_rpcs_query(obj, ce, &rq); 1095 if (ret) 1096 return ret; 1097 1098 if (spin) 1099 igt_spinner_end(spin); 1100 1101 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1102 i915_request_put(rq); 1103 if (ret < 0) 1104 return ret; 1105 1106 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1107 if (IS_ERR(buf)) { 1108 ret = PTR_ERR(buf); 1109 return ret; 1110 } 1111 1112 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1113 s_mask = GEN11_RPCS_S_CNT_MASK; 1114 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1115 } else { 1116 s_mask = GEN8_RPCS_S_CNT_MASK; 1117 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1118 } 1119 1120 val = *buf; 1121 cnt = (val & s_mask) >> s_shift; 1122 *rpcs = val; 1123 1124 i915_gem_object_unpin_map(obj); 1125 1126 return cnt; 1127 } 1128 1129 static int 1130 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1131 const char *prefix, const char *suffix) 1132 { 1133 if (slices == expected) 1134 return 0; 1135 1136 if (slices < 0) { 1137 pr_err("%s: %s read slice count failed with %d%s\n", 1138 name, prefix, slices, suffix); 1139 return slices; 1140 } 1141 1142 pr_err("%s: %s slice count %d is not %u%s\n", 1143 name, prefix, slices, expected, suffix); 1144 1145 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1146 rpcs, slices, 1147 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1148 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1149 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1150 1151 return -EINVAL; 1152 } 1153 1154 static int 1155 __sseu_finish(const char *name, 1156 unsigned int flags, 1157 struct intel_context *ce, 1158 struct drm_i915_gem_object *obj, 1159 unsigned int expected, 1160 struct igt_spinner *spin) 1161 { 1162 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1163 u32 rpcs = 0; 1164 int ret = 0; 1165 1166 if (flags & TEST_RESET) { 1167 ret = intel_engine_reset(ce->engine, "sseu"); 1168 if (ret) 1169 goto out; 1170 } 1171 1172 ret = __read_slice_count(ce, obj, 1173 flags & TEST_RESET ? NULL : spin, &rpcs); 1174 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1175 if (ret) 1176 goto out; 1177 1178 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1179 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1180 1181 out: 1182 if (spin) 1183 igt_spinner_end(spin); 1184 1185 if ((flags & TEST_IDLE) && ret == 0) { 1186 ret = igt_flush_test(ce->engine->i915); 1187 if (ret) 1188 return ret; 1189 1190 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1191 ret = __check_rpcs(name, rpcs, ret, expected, 1192 "Context", " after idle!"); 1193 } 1194 1195 return ret; 1196 } 1197 1198 static int 1199 __sseu_test(const char *name, 1200 unsigned int flags, 1201 struct intel_context *ce, 1202 struct drm_i915_gem_object *obj, 1203 struct intel_sseu sseu) 1204 { 1205 struct igt_spinner *spin = NULL; 1206 int ret; 1207 1208 intel_engine_pm_get(ce->engine); 1209 1210 ret = __sseu_prepare(name, flags, ce, &spin); 1211 if (ret) 1212 goto out_pm; 1213 1214 ret = intel_context_reconfigure_sseu(ce, sseu); 1215 if (ret) 1216 goto out_spin; 1217 1218 ret = __sseu_finish(name, flags, ce, obj, 1219 hweight32(sseu.slice_mask), spin); 1220 1221 out_spin: 1222 if (spin) { 1223 igt_spinner_end(spin); 1224 igt_spinner_fini(spin); 1225 kfree(spin); 1226 } 1227 out_pm: 1228 intel_engine_pm_put(ce->engine); 1229 return ret; 1230 } 1231 1232 static int 1233 __igt_ctx_sseu(struct drm_i915_private *i915, 1234 const char *name, 1235 unsigned int flags) 1236 { 1237 struct drm_i915_gem_object *obj; 1238 int inst = 0; 1239 int ret = 0; 1240 1241 if (GRAPHICS_VER(i915) < 9) 1242 return 0; 1243 1244 if (flags & TEST_RESET) 1245 igt_global_reset_lock(to_gt(i915)); 1246 1247 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1248 if (IS_ERR(obj)) { 1249 ret = PTR_ERR(obj); 1250 goto out_unlock; 1251 } 1252 1253 do { 1254 struct intel_engine_cs *engine; 1255 struct intel_context *ce; 1256 struct intel_sseu pg_sseu; 1257 1258 engine = intel_engine_lookup_user(i915, 1259 I915_ENGINE_CLASS_RENDER, 1260 inst++); 1261 if (!engine) 1262 break; 1263 1264 if (hweight32(engine->sseu.slice_mask) < 2) 1265 continue; 1266 1267 if (!engine->gt->info.sseu.has_slice_pg) 1268 continue; 1269 1270 /* 1271 * Gen11 VME friendly power-gated configuration with 1272 * half enabled sub-slices. 1273 */ 1274 pg_sseu = engine->sseu; 1275 pg_sseu.slice_mask = 1; 1276 pg_sseu.subslice_mask = 1277 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1278 1279 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1280 engine->name, name, flags, 1281 hweight32(engine->sseu.slice_mask), 1282 hweight32(pg_sseu.slice_mask)); 1283 1284 ce = intel_context_create(engine); 1285 if (IS_ERR(ce)) { 1286 ret = PTR_ERR(ce); 1287 goto out_put; 1288 } 1289 1290 ret = intel_context_pin(ce); 1291 if (ret) 1292 goto out_ce; 1293 1294 /* First set the default mask. */ 1295 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1296 if (ret) 1297 goto out_unpin; 1298 1299 /* Then set a power-gated configuration. */ 1300 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1301 if (ret) 1302 goto out_unpin; 1303 1304 /* Back to defaults. */ 1305 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1306 if (ret) 1307 goto out_unpin; 1308 1309 /* One last power-gated configuration for the road. */ 1310 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1311 if (ret) 1312 goto out_unpin; 1313 1314 out_unpin: 1315 intel_context_unpin(ce); 1316 out_ce: 1317 intel_context_put(ce); 1318 } while (!ret); 1319 1320 if (igt_flush_test(i915)) 1321 ret = -EIO; 1322 1323 out_put: 1324 i915_gem_object_put(obj); 1325 1326 out_unlock: 1327 if (flags & TEST_RESET) 1328 igt_global_reset_unlock(to_gt(i915)); 1329 1330 if (ret) 1331 pr_err("%s: Failed with %d!\n", name, ret); 1332 1333 return ret; 1334 } 1335 1336 static int igt_ctx_sseu(void *arg) 1337 { 1338 struct { 1339 const char *name; 1340 unsigned int flags; 1341 } *phase, phases[] = { 1342 { .name = "basic", .flags = 0 }, 1343 { .name = "idle", .flags = TEST_IDLE }, 1344 { .name = "busy", .flags = TEST_BUSY }, 1345 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1346 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1347 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1348 }; 1349 unsigned int i; 1350 int ret = 0; 1351 1352 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1353 i++, phase++) 1354 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1355 1356 return ret; 1357 } 1358 1359 static int igt_ctx_readonly(void *arg) 1360 { 1361 struct drm_i915_private *i915 = arg; 1362 unsigned long idx, ndwords, dw, num_engines; 1363 struct drm_i915_gem_object *obj = NULL; 1364 struct i915_request *tq[5] = {}; 1365 struct i915_gem_engines_iter it; 1366 struct i915_address_space *vm; 1367 struct i915_gem_context *ctx; 1368 struct intel_context *ce; 1369 struct igt_live_test t; 1370 I915_RND_STATE(prng); 1371 IGT_TIMEOUT(end_time); 1372 LIST_HEAD(objects); 1373 struct file *file; 1374 int err = -ENODEV; 1375 1376 /* 1377 * Create a few read-only objects (with the occasional writable object) 1378 * and try to write into these object checking that the GPU discards 1379 * any write to a read-only object. 1380 */ 1381 1382 file = mock_file(i915); 1383 if (IS_ERR(file)) 1384 return PTR_ERR(file); 1385 1386 err = igt_live_test_begin(&t, i915, __func__, ""); 1387 if (err) 1388 goto out_file; 1389 1390 ctx = live_context(i915, file); 1391 if (IS_ERR(ctx)) { 1392 err = PTR_ERR(ctx); 1393 goto out_file; 1394 } 1395 1396 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm; 1397 if (!vm || !vm->has_read_only) { 1398 err = 0; 1399 goto out_file; 1400 } 1401 1402 num_engines = 0; 1403 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1404 if (intel_engine_can_store_dword(ce->engine)) 1405 num_engines++; 1406 i915_gem_context_unlock_engines(ctx); 1407 1408 ndwords = 0; 1409 dw = 0; 1410 while (!time_after(jiffies, end_time)) { 1411 for_each_gem_engine(ce, 1412 i915_gem_context_lock_engines(ctx), it) { 1413 if (!intel_engine_can_store_dword(ce->engine)) 1414 continue; 1415 1416 if (!obj) { 1417 obj = create_test_object(ce->vm, file, &objects); 1418 if (IS_ERR(obj)) { 1419 err = PTR_ERR(obj); 1420 i915_gem_context_unlock_engines(ctx); 1421 goto out_file; 1422 } 1423 1424 if (prandom_u32_state(&prng) & 1) 1425 i915_gem_object_set_readonly(obj); 1426 } 1427 1428 err = gpu_fill(ce, obj, dw); 1429 if (err) { 1430 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1431 ndwords, dw, max_dwords(obj), 1432 ce->engine->name, 1433 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 1434 err); 1435 i915_gem_context_unlock_engines(ctx); 1436 goto out_file; 1437 } 1438 1439 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1440 if (err) { 1441 i915_gem_context_unlock_engines(ctx); 1442 goto out_file; 1443 } 1444 1445 if (++dw == max_dwords(obj)) { 1446 obj = NULL; 1447 dw = 0; 1448 } 1449 ndwords++; 1450 } 1451 i915_gem_context_unlock_engines(ctx); 1452 } 1453 pr_info("Submitted %lu dwords (across %lu engines)\n", 1454 ndwords, num_engines); 1455 1456 dw = 0; 1457 idx = 0; 1458 list_for_each_entry(obj, &objects, st_link) { 1459 unsigned int rem = 1460 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1461 unsigned int num_writes; 1462 1463 num_writes = rem; 1464 if (i915_gem_object_is_readonly(obj)) 1465 num_writes = 0; 1466 1467 err = cpu_check(obj, idx++, num_writes); 1468 if (err) 1469 break; 1470 1471 dw += rem; 1472 } 1473 1474 out_file: 1475 throttle_release(tq, ARRAY_SIZE(tq)); 1476 if (igt_live_test_end(&t)) 1477 err = -EIO; 1478 1479 fput(file); 1480 return err; 1481 } 1482 1483 static int check_scratch(struct i915_address_space *vm, u64 offset) 1484 { 1485 struct drm_mm_node *node; 1486 1487 mutex_lock(&vm->mutex); 1488 node = __drm_mm_interval_first(&vm->mm, 1489 offset, offset + sizeof(u32) - 1); 1490 mutex_unlock(&vm->mutex); 1491 if (!node || node->start > offset) 1492 return 0; 1493 1494 GEM_BUG_ON(offset >= node->start + node->size); 1495 1496 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1497 upper_32_bits(offset), lower_32_bits(offset)); 1498 return -EINVAL; 1499 } 1500 1501 static int write_to_scratch(struct i915_gem_context *ctx, 1502 struct intel_engine_cs *engine, 1503 struct drm_i915_gem_object *obj, 1504 u64 offset, u32 value) 1505 { 1506 struct drm_i915_private *i915 = ctx->i915; 1507 struct i915_address_space *vm; 1508 struct i915_request *rq; 1509 struct i915_vma *vma; 1510 u32 *cmd; 1511 int err; 1512 1513 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1514 1515 err = check_scratch(ctx->vm, offset); 1516 if (err) 1517 return err; 1518 1519 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1520 if (IS_ERR(cmd)) 1521 return PTR_ERR(cmd); 1522 1523 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1524 if (GRAPHICS_VER(i915) >= 8) { 1525 *cmd++ = lower_32_bits(offset); 1526 *cmd++ = upper_32_bits(offset); 1527 } else { 1528 *cmd++ = 0; 1529 *cmd++ = offset; 1530 } 1531 *cmd++ = value; 1532 *cmd = MI_BATCH_BUFFER_END; 1533 __i915_gem_object_flush_map(obj, 0, 64); 1534 i915_gem_object_unpin_map(obj); 1535 1536 intel_gt_chipset_flush(engine->gt); 1537 1538 vm = i915_gem_context_get_eb_vm(ctx); 1539 vma = i915_vma_instance(obj, vm, NULL); 1540 if (IS_ERR(vma)) { 1541 err = PTR_ERR(vma); 1542 goto out_vm; 1543 } 1544 1545 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1546 if (err) 1547 goto out_vm; 1548 1549 rq = igt_request_alloc(ctx, engine); 1550 if (IS_ERR(rq)) { 1551 err = PTR_ERR(rq); 1552 goto err_unpin; 1553 } 1554 1555 i915_vma_lock(vma); 1556 err = i915_request_await_object(rq, vma->obj, false); 1557 if (err == 0) 1558 err = i915_vma_move_to_active(vma, rq, 0); 1559 i915_vma_unlock(vma); 1560 if (err) 1561 goto skip_request; 1562 1563 if (rq->engine->emit_init_breadcrumb) { 1564 err = rq->engine->emit_init_breadcrumb(rq); 1565 if (err) 1566 goto skip_request; 1567 } 1568 1569 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1570 if (err) 1571 goto skip_request; 1572 1573 i915_vma_unpin(vma); 1574 1575 i915_request_add(rq); 1576 1577 goto out_vm; 1578 skip_request: 1579 i915_request_set_error_once(rq, err); 1580 i915_request_add(rq); 1581 err_unpin: 1582 i915_vma_unpin(vma); 1583 out_vm: 1584 i915_vm_put(vm); 1585 1586 if (!err) 1587 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1588 1589 return err; 1590 } 1591 1592 static int read_from_scratch(struct i915_gem_context *ctx, 1593 struct intel_engine_cs *engine, 1594 struct drm_i915_gem_object *obj, 1595 u64 offset, u32 *value) 1596 { 1597 struct drm_i915_private *i915 = ctx->i915; 1598 struct i915_address_space *vm; 1599 const u32 result = 0x100; 1600 struct i915_request *rq; 1601 struct i915_vma *vma; 1602 unsigned int flags; 1603 u32 *cmd; 1604 int err; 1605 1606 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1607 1608 err = check_scratch(ctx->vm, offset); 1609 if (err) 1610 return err; 1611 1612 if (GRAPHICS_VER(i915) >= 8) { 1613 const u32 GPR0 = engine->mmio_base + 0x600; 1614 1615 vm = i915_gem_context_get_eb_vm(ctx); 1616 vma = i915_vma_instance(obj, vm, NULL); 1617 if (IS_ERR(vma)) { 1618 err = PTR_ERR(vma); 1619 goto out_vm; 1620 } 1621 1622 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1623 if (err) 1624 goto out_vm; 1625 1626 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1627 if (IS_ERR(cmd)) { 1628 err = PTR_ERR(cmd); 1629 goto err_unpin; 1630 } 1631 1632 memset(cmd, POISON_INUSE, PAGE_SIZE); 1633 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1634 *cmd++ = GPR0; 1635 *cmd++ = lower_32_bits(offset); 1636 *cmd++ = upper_32_bits(offset); 1637 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1638 *cmd++ = GPR0; 1639 *cmd++ = result; 1640 *cmd++ = 0; 1641 *cmd = MI_BATCH_BUFFER_END; 1642 1643 i915_gem_object_flush_map(obj); 1644 i915_gem_object_unpin_map(obj); 1645 1646 flags = 0; 1647 } else { 1648 const u32 reg = engine->mmio_base + 0x420; 1649 1650 /* hsw: register access even to 3DPRIM! is protected */ 1651 vm = i915_vm_get(&engine->gt->ggtt->vm); 1652 vma = i915_vma_instance(obj, vm, NULL); 1653 if (IS_ERR(vma)) { 1654 err = PTR_ERR(vma); 1655 goto out_vm; 1656 } 1657 1658 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1659 if (err) 1660 goto out_vm; 1661 1662 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1663 if (IS_ERR(cmd)) { 1664 err = PTR_ERR(cmd); 1665 goto err_unpin; 1666 } 1667 1668 memset(cmd, POISON_INUSE, PAGE_SIZE); 1669 *cmd++ = MI_LOAD_REGISTER_MEM; 1670 *cmd++ = reg; 1671 *cmd++ = offset; 1672 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1673 *cmd++ = reg; 1674 *cmd++ = vma->node.start + result; 1675 *cmd = MI_BATCH_BUFFER_END; 1676 1677 i915_gem_object_flush_map(obj); 1678 i915_gem_object_unpin_map(obj); 1679 1680 flags = I915_DISPATCH_SECURE; 1681 } 1682 1683 intel_gt_chipset_flush(engine->gt); 1684 1685 rq = igt_request_alloc(ctx, engine); 1686 if (IS_ERR(rq)) { 1687 err = PTR_ERR(rq); 1688 goto err_unpin; 1689 } 1690 1691 i915_vma_lock(vma); 1692 err = i915_request_await_object(rq, vma->obj, true); 1693 if (err == 0) 1694 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1695 i915_vma_unlock(vma); 1696 if (err) 1697 goto skip_request; 1698 1699 if (rq->engine->emit_init_breadcrumb) { 1700 err = rq->engine->emit_init_breadcrumb(rq); 1701 if (err) 1702 goto skip_request; 1703 } 1704 1705 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); 1706 if (err) 1707 goto skip_request; 1708 1709 i915_vma_unpin(vma); 1710 1711 i915_request_add(rq); 1712 1713 i915_gem_object_lock(obj, NULL); 1714 err = i915_gem_object_set_to_cpu_domain(obj, false); 1715 i915_gem_object_unlock(obj); 1716 if (err) 1717 goto out_vm; 1718 1719 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1720 if (IS_ERR(cmd)) { 1721 err = PTR_ERR(cmd); 1722 goto out_vm; 1723 } 1724 1725 *value = cmd[result / sizeof(*cmd)]; 1726 i915_gem_object_unpin_map(obj); 1727 1728 goto out_vm; 1729 skip_request: 1730 i915_request_set_error_once(rq, err); 1731 i915_request_add(rq); 1732 err_unpin: 1733 i915_vma_unpin(vma); 1734 out_vm: 1735 i915_vm_put(vm); 1736 1737 if (!err) 1738 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1739 1740 return err; 1741 } 1742 1743 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1744 { 1745 struct i915_address_space *vm; 1746 u32 *vaddr; 1747 int err = 0; 1748 1749 vm = ctx->vm; 1750 if (!vm) 1751 return -ENODEV; 1752 1753 if (!vm->scratch[0]) { 1754 pr_err("No scratch page!\n"); 1755 return -EINVAL; 1756 } 1757 1758 vaddr = __px_vaddr(vm->scratch[0]); 1759 1760 memcpy(out, vaddr, sizeof(*out)); 1761 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1762 pr_err("Inconsistent initial state of scratch page!\n"); 1763 err = -EINVAL; 1764 } 1765 1766 return err; 1767 } 1768 1769 static int igt_vm_isolation(void *arg) 1770 { 1771 struct drm_i915_private *i915 = arg; 1772 struct i915_gem_context *ctx_a, *ctx_b; 1773 struct drm_i915_gem_object *obj_a, *obj_b; 1774 unsigned long num_engines, count; 1775 struct intel_engine_cs *engine; 1776 struct igt_live_test t; 1777 I915_RND_STATE(prng); 1778 struct file *file; 1779 u64 vm_total; 1780 u32 expected; 1781 int err; 1782 1783 if (GRAPHICS_VER(i915) < 7) 1784 return 0; 1785 1786 /* 1787 * The simple goal here is that a write into one context is not 1788 * observed in a second (separate page tables and scratch). 1789 */ 1790 1791 file = mock_file(i915); 1792 if (IS_ERR(file)) 1793 return PTR_ERR(file); 1794 1795 err = igt_live_test_begin(&t, i915, __func__, ""); 1796 if (err) 1797 goto out_file; 1798 1799 ctx_a = live_context(i915, file); 1800 if (IS_ERR(ctx_a)) { 1801 err = PTR_ERR(ctx_a); 1802 goto out_file; 1803 } 1804 1805 ctx_b = live_context(i915, file); 1806 if (IS_ERR(ctx_b)) { 1807 err = PTR_ERR(ctx_b); 1808 goto out_file; 1809 } 1810 1811 /* We can only test vm isolation, if the vm are distinct */ 1812 if (ctx_a->vm == ctx_b->vm) 1813 goto out_file; 1814 1815 /* Read the initial state of the scratch page */ 1816 err = check_scratch_page(ctx_a, &expected); 1817 if (err) 1818 goto out_file; 1819 1820 err = check_scratch_page(ctx_b, &expected); 1821 if (err) 1822 goto out_file; 1823 1824 vm_total = ctx_a->vm->total; 1825 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1826 1827 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1828 if (IS_ERR(obj_a)) { 1829 err = PTR_ERR(obj_a); 1830 goto out_file; 1831 } 1832 1833 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1834 if (IS_ERR(obj_b)) { 1835 err = PTR_ERR(obj_b); 1836 goto put_a; 1837 } 1838 1839 count = 0; 1840 num_engines = 0; 1841 for_each_uabi_engine(engine, i915) { 1842 IGT_TIMEOUT(end_time); 1843 unsigned long this = 0; 1844 1845 if (!intel_engine_can_store_dword(engine)) 1846 continue; 1847 1848 /* Not all engines have their own GPR! */ 1849 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1850 continue; 1851 1852 while (!__igt_timeout(end_time, NULL)) { 1853 u32 value = 0xc5c5c5c5; 1854 u64 offset; 1855 1856 /* Leave enough space at offset 0 for the batch */ 1857 offset = igt_random_offset(&prng, 1858 I915_GTT_PAGE_SIZE, vm_total, 1859 sizeof(u32), alignof_dword); 1860 1861 err = write_to_scratch(ctx_a, engine, obj_a, 1862 offset, 0xdeadbeef); 1863 if (err == 0) 1864 err = read_from_scratch(ctx_b, engine, obj_b, 1865 offset, &value); 1866 if (err) 1867 goto put_b; 1868 1869 if (value != expected) { 1870 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1871 engine->name, value, 1872 upper_32_bits(offset), 1873 lower_32_bits(offset), 1874 this); 1875 err = -EINVAL; 1876 goto put_b; 1877 } 1878 1879 this++; 1880 } 1881 count += this; 1882 num_engines++; 1883 } 1884 pr_info("Checked %lu scratch offsets across %lu engines\n", 1885 count, num_engines); 1886 1887 put_b: 1888 i915_gem_object_put(obj_b); 1889 put_a: 1890 i915_gem_object_put(obj_a); 1891 out_file: 1892 if (igt_live_test_end(&t)) 1893 err = -EIO; 1894 fput(file); 1895 return err; 1896 } 1897 1898 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1899 { 1900 static const struct i915_subtest tests[] = { 1901 SUBTEST(live_nop_switch), 1902 SUBTEST(live_parallel_switch), 1903 SUBTEST(igt_ctx_exec), 1904 SUBTEST(igt_ctx_readonly), 1905 SUBTEST(igt_ctx_sseu), 1906 SUBTEST(igt_shared_ctx_exec), 1907 SUBTEST(igt_vm_isolation), 1908 }; 1909 1910 if (intel_gt_is_wedged(to_gt(i915))) 1911 return 0; 1912 1913 return i915_live_subtests(tests, i915); 1914 } 1915