1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 #include <linux/string_helpers.h> 9 10 #include "gem/i915_gem_internal.h" 11 #include "gem/i915_gem_pm.h" 12 #include "gt/intel_engine_pm.h" 13 #include "gt/intel_engine_regs.h" 14 #include "gt/intel_gt.h" 15 #include "gt/intel_gt_requests.h" 16 #include "gt/intel_reset.h" 17 #include "i915_selftest.h" 18 19 #include "gem/selftests/igt_gem_utils.h" 20 #include "selftests/i915_random.h" 21 #include "selftests/igt_flush_test.h" 22 #include "selftests/igt_live_test.h" 23 #include "selftests/igt_reset.h" 24 #include "selftests/igt_spinner.h" 25 #include "selftests/mock_drm.h" 26 #include "selftests/mock_gem_device.h" 27 28 #include "huge_gem_object.h" 29 #include "igt_gem_utils.h" 30 31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 32 33 static int live_nop_switch(void *arg) 34 { 35 const unsigned int nctx = 1024; 36 struct drm_i915_private *i915 = arg; 37 struct intel_engine_cs *engine; 38 struct i915_gem_context **ctx; 39 struct igt_live_test t; 40 struct file *file; 41 unsigned long n; 42 int err = -ENODEV; 43 44 /* 45 * Create as many contexts as we can feasibly get away with 46 * and check we can switch between them rapidly. 47 * 48 * Serves as very simple stress test for submission and HW switching 49 * between contexts. 50 */ 51 52 if (!DRIVER_CAPS(i915)->has_logical_contexts) 53 return 0; 54 55 file = mock_file(i915); 56 if (IS_ERR(file)) 57 return PTR_ERR(file); 58 59 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 60 if (!ctx) { 61 err = -ENOMEM; 62 goto out_file; 63 } 64 65 for (n = 0; n < nctx; n++) { 66 ctx[n] = live_context(i915, file); 67 if (IS_ERR(ctx[n])) { 68 err = PTR_ERR(ctx[n]); 69 goto out_ctx; 70 } 71 } 72 73 for_each_uabi_engine(engine, i915) { 74 struct i915_request *rq = NULL; 75 unsigned long end_time, prime; 76 ktime_t times[2] = {}; 77 78 times[0] = ktime_get_raw(); 79 for (n = 0; n < nctx; n++) { 80 struct i915_request *this; 81 82 this = igt_request_alloc(ctx[n], engine); 83 if (IS_ERR(this)) { 84 err = PTR_ERR(this); 85 goto out_ctx; 86 } 87 if (rq) { 88 i915_request_await_dma_fence(this, &rq->fence); 89 i915_request_put(rq); 90 } 91 rq = i915_request_get(this); 92 i915_request_add(this); 93 } 94 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 95 pr_err("Failed to populated %d contexts\n", nctx); 96 intel_gt_set_wedged(engine->gt); 97 i915_request_put(rq); 98 err = -EIO; 99 goto out_ctx; 100 } 101 i915_request_put(rq); 102 103 times[1] = ktime_get_raw(); 104 105 pr_info("Populated %d contexts on %s in %lluns\n", 106 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 107 108 err = igt_live_test_begin(&t, i915, __func__, engine->name); 109 if (err) 110 goto out_ctx; 111 112 end_time = jiffies + i915_selftest.timeout_jiffies; 113 for_each_prime_number_from(prime, 2, 8192) { 114 times[1] = ktime_get_raw(); 115 116 rq = NULL; 117 for (n = 0; n < prime; n++) { 118 struct i915_request *this; 119 120 this = igt_request_alloc(ctx[n % nctx], engine); 121 if (IS_ERR(this)) { 122 err = PTR_ERR(this); 123 goto out_ctx; 124 } 125 126 if (rq) { /* Force submission order */ 127 i915_request_await_dma_fence(this, &rq->fence); 128 i915_request_put(rq); 129 } 130 131 /* 132 * This space is left intentionally blank. 133 * 134 * We do not actually want to perform any 135 * action with this request, we just want 136 * to measure the latency in allocation 137 * and submission of our breadcrumbs - 138 * ensuring that the bare request is sufficient 139 * for the system to work (i.e. proper HEAD 140 * tracking of the rings, interrupt handling, 141 * etc). It also gives us the lowest bounds 142 * for latency. 143 */ 144 145 rq = i915_request_get(this); 146 i915_request_add(this); 147 } 148 GEM_BUG_ON(!rq); 149 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 150 pr_err("Switching between %ld contexts timed out\n", 151 prime); 152 intel_gt_set_wedged(engine->gt); 153 i915_request_put(rq); 154 break; 155 } 156 i915_request_put(rq); 157 158 times[1] = ktime_sub(ktime_get_raw(), times[1]); 159 if (prime == 2) 160 times[0] = times[1]; 161 162 if (__igt_timeout(end_time, NULL)) 163 break; 164 } 165 166 err = igt_live_test_end(&t); 167 if (err) 168 goto out_ctx; 169 170 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 171 engine->name, 172 ktime_to_ns(times[0]), 173 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 174 } 175 176 out_ctx: 177 kfree(ctx); 178 out_file: 179 fput(file); 180 return err; 181 } 182 183 struct parallel_switch { 184 struct kthread_worker *worker; 185 struct kthread_work work; 186 struct intel_context *ce[2]; 187 int result; 188 }; 189 190 static void __live_parallel_switch1(struct kthread_work *work) 191 { 192 struct parallel_switch *arg = 193 container_of(work, typeof(*arg), work); 194 IGT_TIMEOUT(end_time); 195 unsigned long count; 196 197 count = 0; 198 arg->result = 0; 199 do { 200 struct i915_request *rq = NULL; 201 int n; 202 203 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 204 struct i915_request *prev = rq; 205 206 rq = i915_request_create(arg->ce[n]); 207 if (IS_ERR(rq)) { 208 i915_request_put(prev); 209 arg->result = PTR_ERR(rq); 210 break; 211 } 212 213 i915_request_get(rq); 214 if (prev) { 215 arg->result = 216 i915_request_await_dma_fence(rq, 217 &prev->fence); 218 i915_request_put(prev); 219 } 220 221 i915_request_add(rq); 222 } 223 224 if (IS_ERR_OR_NULL(rq)) 225 break; 226 227 if (i915_request_wait(rq, 0, HZ) < 0) 228 arg->result = -ETIME; 229 230 i915_request_put(rq); 231 232 count++; 233 } while (!arg->result && !__igt_timeout(end_time, NULL)); 234 235 pr_info("%s: %lu switches (sync) <%d>\n", 236 arg->ce[0]->engine->name, count, arg->result); 237 } 238 239 static void __live_parallel_switchN(struct kthread_work *work) 240 { 241 struct parallel_switch *arg = 242 container_of(work, typeof(*arg), work); 243 struct i915_request *rq = NULL; 244 IGT_TIMEOUT(end_time); 245 unsigned long count; 246 int n; 247 248 count = 0; 249 arg->result = 0; 250 do { 251 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 252 struct i915_request *prev = rq; 253 254 rq = i915_request_create(arg->ce[n]); 255 if (IS_ERR(rq)) { 256 i915_request_put(prev); 257 arg->result = PTR_ERR(rq); 258 break; 259 } 260 261 i915_request_get(rq); 262 if (prev) { 263 arg->result = 264 i915_request_await_dma_fence(rq, 265 &prev->fence); 266 i915_request_put(prev); 267 } 268 269 i915_request_add(rq); 270 } 271 272 count++; 273 } while (!arg->result && !__igt_timeout(end_time, NULL)); 274 275 if (!IS_ERR_OR_NULL(rq)) 276 i915_request_put(rq); 277 278 pr_info("%s: %lu switches (many) <%d>\n", 279 arg->ce[0]->engine->name, count, arg->result); 280 } 281 282 static int live_parallel_switch(void *arg) 283 { 284 struct drm_i915_private *i915 = arg; 285 static void (* const func[])(struct kthread_work *) = { 286 __live_parallel_switch1, 287 __live_parallel_switchN, 288 NULL, 289 }; 290 struct parallel_switch *data = NULL; 291 struct i915_gem_engines *engines; 292 struct i915_gem_engines_iter it; 293 void (* const *fn)(struct kthread_work *); 294 struct i915_gem_context *ctx; 295 struct intel_context *ce; 296 struct file *file; 297 int n, m, count; 298 int err = 0; 299 300 /* 301 * Check we can process switches on all engines simultaneously. 302 */ 303 304 if (!DRIVER_CAPS(i915)->has_logical_contexts) 305 return 0; 306 307 file = mock_file(i915); 308 if (IS_ERR(file)) 309 return PTR_ERR(file); 310 311 ctx = live_context(i915, file); 312 if (IS_ERR(ctx)) { 313 err = PTR_ERR(ctx); 314 goto out_file; 315 } 316 317 engines = i915_gem_context_lock_engines(ctx); 318 count = engines->num_engines; 319 320 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 321 if (!data) { 322 i915_gem_context_unlock_engines(ctx); 323 err = -ENOMEM; 324 goto out_file; 325 } 326 327 m = 0; /* Use the first context as our template for the engines */ 328 for_each_gem_engine(ce, engines, it) { 329 err = intel_context_pin(ce); 330 if (err) { 331 i915_gem_context_unlock_engines(ctx); 332 goto out; 333 } 334 data[m++].ce[0] = intel_context_get(ce); 335 } 336 i915_gem_context_unlock_engines(ctx); 337 338 /* Clone the same set of engines into the other contexts */ 339 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 340 ctx = live_context(i915, file); 341 if (IS_ERR(ctx)) { 342 err = PTR_ERR(ctx); 343 goto out; 344 } 345 346 for (m = 0; m < count; m++) { 347 if (!data[m].ce[0]) 348 continue; 349 350 ce = intel_context_create(data[m].ce[0]->engine); 351 if (IS_ERR(ce)) 352 goto out; 353 354 err = intel_context_pin(ce); 355 if (err) { 356 intel_context_put(ce); 357 goto out; 358 } 359 360 data[m].ce[n] = ce; 361 } 362 } 363 364 for (n = 0; n < count; n++) { 365 struct kthread_worker *worker; 366 367 if (!data[n].ce[0]) 368 continue; 369 370 worker = kthread_create_worker(0, "igt/parallel:%s", 371 data[n].ce[0]->engine->name); 372 if (IS_ERR(worker)) 373 goto out; 374 375 data[n].worker = worker; 376 } 377 378 for (fn = func; !err && *fn; fn++) { 379 struct igt_live_test t; 380 381 err = igt_live_test_begin(&t, i915, __func__, ""); 382 if (err) 383 break; 384 385 for (n = 0; n < count; n++) { 386 if (!data[n].ce[0]) 387 continue; 388 389 data[n].result = 0; 390 kthread_init_work(&data[n].work, *fn); 391 kthread_queue_work(data[n].worker, &data[n].work); 392 } 393 394 for (n = 0; n < count; n++) { 395 if (data[n].ce[0]) { 396 kthread_flush_work(&data[n].work); 397 if (data[n].result && !err) 398 err = data[n].result; 399 } 400 } 401 402 if (igt_live_test_end(&t)) 403 err = -EIO; 404 } 405 406 out: 407 for (n = 0; n < count; n++) { 408 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 409 if (!data[n].ce[m]) 410 continue; 411 412 intel_context_unpin(data[n].ce[m]); 413 intel_context_put(data[n].ce[m]); 414 } 415 416 if (data[n].worker) 417 kthread_destroy_worker(data[n].worker); 418 } 419 kfree(data); 420 out_file: 421 fput(file); 422 return err; 423 } 424 425 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 426 { 427 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 428 } 429 430 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 431 { 432 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 433 } 434 435 static int gpu_fill(struct intel_context *ce, 436 struct drm_i915_gem_object *obj, 437 unsigned int dw) 438 { 439 struct i915_vma *vma; 440 int err; 441 442 GEM_BUG_ON(obj->base.size > ce->vm->total); 443 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 444 445 vma = i915_vma_instance(obj, ce->vm, NULL); 446 if (IS_ERR(vma)) 447 return PTR_ERR(vma); 448 449 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 450 if (err) 451 return err; 452 453 /* 454 * Within the GTT the huge objects maps every page onto 455 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 456 * We set the nth dword within the page using the nth 457 * mapping via the GTT - this should exercise the GTT mapping 458 * whilst checking that each context provides a unique view 459 * into the object. 460 */ 461 err = igt_gpu_fill_dw(ce, vma, 462 (dw * real_page_count(obj)) << PAGE_SHIFT | 463 (dw * sizeof(u32)), 464 real_page_count(obj), 465 dw); 466 i915_vma_unpin(vma); 467 468 return err; 469 } 470 471 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 472 { 473 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 474 unsigned int need_flush; 475 unsigned long n, m; 476 int err; 477 478 i915_gem_object_lock(obj, NULL); 479 err = i915_gem_object_prepare_write(obj, &need_flush); 480 if (err) 481 goto out; 482 483 for (n = 0; n < real_page_count(obj); n++) { 484 u32 *map; 485 486 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 487 for (m = 0; m < DW_PER_PAGE; m++) 488 map[m] = value; 489 if (!has_llc) 490 drm_clflush_virt_range(map, PAGE_SIZE); 491 kunmap_atomic(map); 492 } 493 494 i915_gem_object_finish_access(obj); 495 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 496 obj->write_domain = 0; 497 out: 498 i915_gem_object_unlock(obj); 499 return err; 500 } 501 502 static noinline int cpu_check(struct drm_i915_gem_object *obj, 503 unsigned int idx, unsigned int max) 504 { 505 unsigned int needs_flush; 506 unsigned long n; 507 int err; 508 509 i915_gem_object_lock(obj, NULL); 510 err = i915_gem_object_prepare_read(obj, &needs_flush); 511 if (err) 512 goto out_unlock; 513 514 for (n = 0; n < real_page_count(obj); n++) { 515 u32 *map, m; 516 517 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 518 if (needs_flush & CLFLUSH_BEFORE) 519 drm_clflush_virt_range(map, PAGE_SIZE); 520 521 for (m = 0; m < max; m++) { 522 if (map[m] != m) { 523 pr_err("%pS: Invalid value at object %d page %ld/%ld, offset %d/%d: found %x expected %x\n", 524 __builtin_return_address(0), idx, 525 n, real_page_count(obj), m, max, 526 map[m], m); 527 err = -EINVAL; 528 goto out_unmap; 529 } 530 } 531 532 for (; m < DW_PER_PAGE; m++) { 533 if (map[m] != STACK_MAGIC) { 534 pr_err("%pS: Invalid value at object %d page %ld, offset %d: found %x expected %x (uninitialised)\n", 535 __builtin_return_address(0), idx, n, m, 536 map[m], STACK_MAGIC); 537 err = -EINVAL; 538 goto out_unmap; 539 } 540 } 541 542 out_unmap: 543 kunmap_atomic(map); 544 if (err) 545 break; 546 } 547 548 i915_gem_object_finish_access(obj); 549 out_unlock: 550 i915_gem_object_unlock(obj); 551 return err; 552 } 553 554 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 555 { 556 int err; 557 558 GEM_BUG_ON(obj->base.handle_count); 559 560 /* tie the object to the drm_file for easy reaping */ 561 err = idr_alloc(&to_drm_file(file)->object_idr, 562 &obj->base, 1, 0, GFP_KERNEL); 563 if (err < 0) 564 return err; 565 566 i915_gem_object_get(obj); 567 obj->base.handle_count++; 568 return 0; 569 } 570 571 static struct drm_i915_gem_object * 572 create_test_object(struct i915_address_space *vm, 573 struct file *file, 574 struct list_head *objects) 575 { 576 struct drm_i915_gem_object *obj; 577 u64 size; 578 int err; 579 580 /* Keep in GEM's good graces */ 581 intel_gt_retire_requests(vm->gt); 582 583 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 584 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 585 586 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 587 if (IS_ERR(obj)) 588 return obj; 589 590 err = file_add_object(file, obj); 591 i915_gem_object_put(obj); 592 if (err) 593 return ERR_PTR(err); 594 595 err = cpu_fill(obj, STACK_MAGIC); 596 if (err) { 597 pr_err("Failed to fill object with cpu, err=%d\n", 598 err); 599 return ERR_PTR(err); 600 } 601 602 list_add_tail(&obj->st_link, objects); 603 return obj; 604 } 605 606 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 607 { 608 unsigned long npages = fake_page_count(obj); 609 610 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 611 return npages / DW_PER_PAGE; 612 } 613 614 static void throttle_release(struct i915_request **q, int count) 615 { 616 int i; 617 618 for (i = 0; i < count; i++) { 619 if (IS_ERR_OR_NULL(q[i])) 620 continue; 621 622 i915_request_put(fetch_and_zero(&q[i])); 623 } 624 } 625 626 static int throttle(struct intel_context *ce, 627 struct i915_request **q, int count) 628 { 629 int i; 630 631 if (!IS_ERR_OR_NULL(q[0])) { 632 if (i915_request_wait(q[0], 633 I915_WAIT_INTERRUPTIBLE, 634 MAX_SCHEDULE_TIMEOUT) < 0) 635 return -EINTR; 636 637 i915_request_put(q[0]); 638 } 639 640 for (i = 0; i < count - 1; i++) 641 q[i] = q[i + 1]; 642 643 q[i] = intel_context_create_request(ce); 644 if (IS_ERR(q[i])) 645 return PTR_ERR(q[i]); 646 647 i915_request_get(q[i]); 648 i915_request_add(q[i]); 649 650 return 0; 651 } 652 653 static int igt_ctx_exec(void *arg) 654 { 655 struct drm_i915_private *i915 = arg; 656 struct intel_engine_cs *engine; 657 int err = -ENODEV; 658 659 /* 660 * Create a few different contexts (with different mm) and write 661 * through each ctx/mm using the GPU making sure those writes end 662 * up in the expected pages of our obj. 663 */ 664 665 if (!DRIVER_CAPS(i915)->has_logical_contexts) 666 return 0; 667 668 for_each_uabi_engine(engine, i915) { 669 struct drm_i915_gem_object *obj = NULL; 670 unsigned long ncontexts, ndwords, dw; 671 struct i915_request *tq[5] = {}; 672 struct igt_live_test t; 673 IGT_TIMEOUT(end_time); 674 LIST_HEAD(objects); 675 struct file *file; 676 677 if (!intel_engine_can_store_dword(engine)) 678 continue; 679 680 if (!engine->context_size) 681 continue; /* No logical context support in HW */ 682 683 file = mock_file(i915); 684 if (IS_ERR(file)) 685 return PTR_ERR(file); 686 687 err = igt_live_test_begin(&t, i915, __func__, engine->name); 688 if (err) 689 goto out_file; 690 691 ncontexts = 0; 692 ndwords = 0; 693 dw = 0; 694 while (!time_after(jiffies, end_time)) { 695 struct i915_gem_context *ctx; 696 struct intel_context *ce; 697 698 ctx = kernel_context(i915, NULL); 699 if (IS_ERR(ctx)) { 700 err = PTR_ERR(ctx); 701 goto out_file; 702 } 703 704 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 705 GEM_BUG_ON(IS_ERR(ce)); 706 707 if (!obj) { 708 obj = create_test_object(ce->vm, file, &objects); 709 if (IS_ERR(obj)) { 710 err = PTR_ERR(obj); 711 intel_context_put(ce); 712 kernel_context_close(ctx); 713 goto out_file; 714 } 715 } 716 717 err = gpu_fill(ce, obj, dw); 718 if (err) { 719 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 720 ndwords, dw, max_dwords(obj), 721 engine->name, 722 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 723 err); 724 intel_context_put(ce); 725 kernel_context_close(ctx); 726 goto out_file; 727 } 728 729 err = throttle(ce, tq, ARRAY_SIZE(tq)); 730 if (err) { 731 intel_context_put(ce); 732 kernel_context_close(ctx); 733 goto out_file; 734 } 735 736 if (++dw == max_dwords(obj)) { 737 obj = NULL; 738 dw = 0; 739 } 740 741 ndwords++; 742 ncontexts++; 743 744 intel_context_put(ce); 745 kernel_context_close(ctx); 746 } 747 748 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 749 ncontexts, engine->name, ndwords); 750 751 ncontexts = dw = 0; 752 list_for_each_entry(obj, &objects, st_link) { 753 unsigned int rem = 754 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 755 756 err = cpu_check(obj, ncontexts++, rem); 757 if (err) 758 break; 759 760 dw += rem; 761 } 762 763 out_file: 764 throttle_release(tq, ARRAY_SIZE(tq)); 765 if (igt_live_test_end(&t)) 766 err = -EIO; 767 768 fput(file); 769 if (err) 770 return err; 771 772 i915_gem_drain_freed_objects(i915); 773 } 774 775 return 0; 776 } 777 778 static int igt_shared_ctx_exec(void *arg) 779 { 780 struct drm_i915_private *i915 = arg; 781 struct i915_request *tq[5] = {}; 782 struct i915_gem_context *parent; 783 struct intel_engine_cs *engine; 784 struct igt_live_test t; 785 struct file *file; 786 int err = 0; 787 788 /* 789 * Create a few different contexts with the same mm and write 790 * through each ctx using the GPU making sure those writes end 791 * up in the expected pages of our obj. 792 */ 793 if (!DRIVER_CAPS(i915)->has_logical_contexts) 794 return 0; 795 796 file = mock_file(i915); 797 if (IS_ERR(file)) 798 return PTR_ERR(file); 799 800 parent = live_context(i915, file); 801 if (IS_ERR(parent)) { 802 err = PTR_ERR(parent); 803 goto out_file; 804 } 805 806 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 807 err = 0; 808 goto out_file; 809 } 810 811 err = igt_live_test_begin(&t, i915, __func__, ""); 812 if (err) 813 goto out_file; 814 815 for_each_uabi_engine(engine, i915) { 816 unsigned long ncontexts, ndwords, dw; 817 struct drm_i915_gem_object *obj = NULL; 818 IGT_TIMEOUT(end_time); 819 LIST_HEAD(objects); 820 821 if (!intel_engine_can_store_dword(engine)) 822 continue; 823 824 dw = 0; 825 ndwords = 0; 826 ncontexts = 0; 827 while (!time_after(jiffies, end_time)) { 828 struct i915_gem_context *ctx; 829 struct intel_context *ce; 830 831 ctx = kernel_context(i915, parent->vm); 832 if (IS_ERR(ctx)) { 833 err = PTR_ERR(ctx); 834 goto out_test; 835 } 836 837 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 838 GEM_BUG_ON(IS_ERR(ce)); 839 840 if (!obj) { 841 obj = create_test_object(parent->vm, 842 file, &objects); 843 if (IS_ERR(obj)) { 844 err = PTR_ERR(obj); 845 intel_context_put(ce); 846 kernel_context_close(ctx); 847 goto out_test; 848 } 849 } 850 851 err = gpu_fill(ce, obj, dw); 852 if (err) { 853 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 854 ndwords, dw, max_dwords(obj), 855 engine->name, 856 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 857 err); 858 intel_context_put(ce); 859 kernel_context_close(ctx); 860 goto out_test; 861 } 862 863 err = throttle(ce, tq, ARRAY_SIZE(tq)); 864 if (err) { 865 intel_context_put(ce); 866 kernel_context_close(ctx); 867 goto out_test; 868 } 869 870 if (++dw == max_dwords(obj)) { 871 obj = NULL; 872 dw = 0; 873 } 874 875 ndwords++; 876 ncontexts++; 877 878 intel_context_put(ce); 879 kernel_context_close(ctx); 880 } 881 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 882 ncontexts, engine->name, ndwords); 883 884 ncontexts = dw = 0; 885 list_for_each_entry(obj, &objects, st_link) { 886 unsigned int rem = 887 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 888 889 err = cpu_check(obj, ncontexts++, rem); 890 if (err) 891 goto out_test; 892 893 dw += rem; 894 } 895 896 i915_gem_drain_freed_objects(i915); 897 } 898 out_test: 899 throttle_release(tq, ARRAY_SIZE(tq)); 900 if (igt_live_test_end(&t)) 901 err = -EIO; 902 out_file: 903 fput(file); 904 return err; 905 } 906 907 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, 908 struct i915_vma *vma, 909 struct intel_engine_cs *engine) 910 { 911 u32 *cmd; 912 913 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 914 915 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 916 if (IS_ERR(cmd)) 917 return PTR_ERR(cmd); 918 919 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 920 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); 921 *cmd++ = lower_32_bits(i915_vma_offset(vma)); 922 *cmd++ = upper_32_bits(i915_vma_offset(vma)); 923 *cmd = MI_BATCH_BUFFER_END; 924 925 __i915_gem_object_flush_map(rpcs, 0, 64); 926 i915_gem_object_unpin_map(rpcs); 927 928 intel_gt_chipset_flush(vma->vm->gt); 929 930 return 0; 931 } 932 933 static int 934 emit_rpcs_query(struct drm_i915_gem_object *obj, 935 struct intel_context *ce, 936 struct i915_request **rq_out) 937 { 938 struct drm_i915_private *i915 = to_i915(obj->base.dev); 939 struct i915_request *rq; 940 struct i915_gem_ww_ctx ww; 941 struct i915_vma *batch; 942 struct i915_vma *vma; 943 struct drm_i915_gem_object *rpcs; 944 int err; 945 946 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 947 948 if (GRAPHICS_VER(i915) < 8) 949 return -EINVAL; 950 951 vma = i915_vma_instance(obj, ce->vm, NULL); 952 if (IS_ERR(vma)) 953 return PTR_ERR(vma); 954 955 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 956 if (IS_ERR(rpcs)) 957 return PTR_ERR(rpcs); 958 959 batch = i915_vma_instance(rpcs, ce->vm, NULL); 960 if (IS_ERR(batch)) { 961 err = PTR_ERR(batch); 962 goto err_put; 963 } 964 965 i915_gem_ww_ctx_init(&ww, false); 966 retry: 967 err = i915_gem_object_lock(obj, &ww); 968 if (!err) 969 err = i915_gem_object_lock(rpcs, &ww); 970 if (!err) 971 err = i915_gem_object_set_to_gtt_domain(obj, false); 972 if (!err) 973 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 974 if (err) 975 goto err_put; 976 977 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 978 if (err) 979 goto err_vma; 980 981 err = rpcs_query_batch(rpcs, vma, ce->engine); 982 if (err) 983 goto err_batch; 984 985 rq = i915_request_create(ce); 986 if (IS_ERR(rq)) { 987 err = PTR_ERR(rq); 988 goto err_batch; 989 } 990 991 err = i915_vma_move_to_active(batch, rq, 0); 992 if (err) 993 goto skip_request; 994 995 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 996 if (err) 997 goto skip_request; 998 999 if (rq->engine->emit_init_breadcrumb) { 1000 err = rq->engine->emit_init_breadcrumb(rq); 1001 if (err) 1002 goto skip_request; 1003 } 1004 1005 err = rq->engine->emit_bb_start(rq, 1006 i915_vma_offset(batch), 1007 i915_vma_size(batch), 1008 0); 1009 if (err) 1010 goto skip_request; 1011 1012 *rq_out = i915_request_get(rq); 1013 1014 skip_request: 1015 if (err) 1016 i915_request_set_error_once(rq, err); 1017 i915_request_add(rq); 1018 err_batch: 1019 i915_vma_unpin(batch); 1020 err_vma: 1021 i915_vma_unpin(vma); 1022 err_put: 1023 if (err == -EDEADLK) { 1024 err = i915_gem_ww_ctx_backoff(&ww); 1025 if (!err) 1026 goto retry; 1027 } 1028 i915_gem_ww_ctx_fini(&ww); 1029 i915_gem_object_put(rpcs); 1030 return err; 1031 } 1032 1033 #define TEST_IDLE BIT(0) 1034 #define TEST_BUSY BIT(1) 1035 #define TEST_RESET BIT(2) 1036 1037 static int 1038 __sseu_prepare(const char *name, 1039 unsigned int flags, 1040 struct intel_context *ce, 1041 struct igt_spinner **spin) 1042 { 1043 struct i915_request *rq; 1044 int ret; 1045 1046 *spin = NULL; 1047 if (!(flags & (TEST_BUSY | TEST_RESET))) 1048 return 0; 1049 1050 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1051 if (!*spin) 1052 return -ENOMEM; 1053 1054 ret = igt_spinner_init(*spin, ce->engine->gt); 1055 if (ret) 1056 goto err_free; 1057 1058 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1059 if (IS_ERR(rq)) { 1060 ret = PTR_ERR(rq); 1061 goto err_fini; 1062 } 1063 1064 i915_request_add(rq); 1065 1066 if (!igt_wait_for_spinner(*spin, rq)) { 1067 pr_err("%s: Spinner failed to start!\n", name); 1068 ret = -ETIMEDOUT; 1069 goto err_end; 1070 } 1071 1072 return 0; 1073 1074 err_end: 1075 igt_spinner_end(*spin); 1076 err_fini: 1077 igt_spinner_fini(*spin); 1078 err_free: 1079 kfree(fetch_and_zero(spin)); 1080 return ret; 1081 } 1082 1083 static int 1084 __read_slice_count(struct intel_context *ce, 1085 struct drm_i915_gem_object *obj, 1086 struct igt_spinner *spin, 1087 u32 *rpcs) 1088 { 1089 struct i915_request *rq = NULL; 1090 u32 s_mask, s_shift; 1091 unsigned int cnt; 1092 u32 *buf, val; 1093 long ret; 1094 1095 ret = emit_rpcs_query(obj, ce, &rq); 1096 if (ret) 1097 return ret; 1098 1099 if (spin) 1100 igt_spinner_end(spin); 1101 1102 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1103 i915_request_put(rq); 1104 if (ret < 0) 1105 return ret; 1106 1107 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1108 if (IS_ERR(buf)) { 1109 ret = PTR_ERR(buf); 1110 return ret; 1111 } 1112 1113 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1114 s_mask = GEN11_RPCS_S_CNT_MASK; 1115 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1116 } else { 1117 s_mask = GEN8_RPCS_S_CNT_MASK; 1118 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1119 } 1120 1121 val = *buf; 1122 cnt = (val & s_mask) >> s_shift; 1123 *rpcs = val; 1124 1125 i915_gem_object_unpin_map(obj); 1126 1127 return cnt; 1128 } 1129 1130 static int 1131 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1132 const char *prefix, const char *suffix) 1133 { 1134 if (slices == expected) 1135 return 0; 1136 1137 if (slices < 0) { 1138 pr_err("%s: %s read slice count failed with %d%s\n", 1139 name, prefix, slices, suffix); 1140 return slices; 1141 } 1142 1143 pr_err("%s: %s slice count %d is not %u%s\n", 1144 name, prefix, slices, expected, suffix); 1145 1146 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1147 rpcs, slices, 1148 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1149 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1150 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1151 1152 return -EINVAL; 1153 } 1154 1155 static int 1156 __sseu_finish(const char *name, 1157 unsigned int flags, 1158 struct intel_context *ce, 1159 struct drm_i915_gem_object *obj, 1160 unsigned int expected, 1161 struct igt_spinner *spin) 1162 { 1163 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1164 u32 rpcs = 0; 1165 int ret = 0; 1166 1167 if (flags & TEST_RESET) { 1168 ret = intel_engine_reset(ce->engine, "sseu"); 1169 if (ret) 1170 goto out; 1171 } 1172 1173 ret = __read_slice_count(ce, obj, 1174 flags & TEST_RESET ? NULL : spin, &rpcs); 1175 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1176 if (ret) 1177 goto out; 1178 1179 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1180 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1181 1182 out: 1183 if (spin) 1184 igt_spinner_end(spin); 1185 1186 if ((flags & TEST_IDLE) && ret == 0) { 1187 ret = igt_flush_test(ce->engine->i915); 1188 if (ret) 1189 return ret; 1190 1191 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1192 ret = __check_rpcs(name, rpcs, ret, expected, 1193 "Context", " after idle!"); 1194 } 1195 1196 return ret; 1197 } 1198 1199 static int 1200 __sseu_test(const char *name, 1201 unsigned int flags, 1202 struct intel_context *ce, 1203 struct drm_i915_gem_object *obj, 1204 struct intel_sseu sseu) 1205 { 1206 struct igt_spinner *spin = NULL; 1207 int ret; 1208 1209 intel_engine_pm_get(ce->engine); 1210 1211 ret = __sseu_prepare(name, flags, ce, &spin); 1212 if (ret) 1213 goto out_pm; 1214 1215 ret = intel_context_reconfigure_sseu(ce, sseu); 1216 if (ret) 1217 goto out_spin; 1218 1219 ret = __sseu_finish(name, flags, ce, obj, 1220 hweight32(sseu.slice_mask), spin); 1221 1222 out_spin: 1223 if (spin) { 1224 igt_spinner_end(spin); 1225 igt_spinner_fini(spin); 1226 kfree(spin); 1227 } 1228 out_pm: 1229 intel_engine_pm_put(ce->engine); 1230 return ret; 1231 } 1232 1233 static int 1234 __igt_ctx_sseu(struct drm_i915_private *i915, 1235 const char *name, 1236 unsigned int flags) 1237 { 1238 struct drm_i915_gem_object *obj; 1239 int inst = 0; 1240 int ret = 0; 1241 1242 if (GRAPHICS_VER(i915) < 9) 1243 return 0; 1244 1245 if (flags & TEST_RESET) 1246 igt_global_reset_lock(to_gt(i915)); 1247 1248 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1249 if (IS_ERR(obj)) { 1250 ret = PTR_ERR(obj); 1251 goto out_unlock; 1252 } 1253 1254 do { 1255 struct intel_engine_cs *engine; 1256 struct intel_context *ce; 1257 struct intel_sseu pg_sseu; 1258 1259 engine = intel_engine_lookup_user(i915, 1260 I915_ENGINE_CLASS_RENDER, 1261 inst++); 1262 if (!engine) 1263 break; 1264 1265 if (hweight32(engine->sseu.slice_mask) < 2) 1266 continue; 1267 1268 if (!engine->gt->info.sseu.has_slice_pg) 1269 continue; 1270 1271 /* 1272 * Gen11 VME friendly power-gated configuration with 1273 * half enabled sub-slices. 1274 */ 1275 pg_sseu = engine->sseu; 1276 pg_sseu.slice_mask = 1; 1277 pg_sseu.subslice_mask = 1278 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1279 1280 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1281 engine->name, name, flags, 1282 hweight32(engine->sseu.slice_mask), 1283 hweight32(pg_sseu.slice_mask)); 1284 1285 ce = intel_context_create(engine); 1286 if (IS_ERR(ce)) { 1287 ret = PTR_ERR(ce); 1288 goto out_put; 1289 } 1290 1291 ret = intel_context_pin(ce); 1292 if (ret) 1293 goto out_ce; 1294 1295 /* First set the default mask. */ 1296 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1297 if (ret) 1298 goto out_unpin; 1299 1300 /* Then set a power-gated configuration. */ 1301 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1302 if (ret) 1303 goto out_unpin; 1304 1305 /* Back to defaults. */ 1306 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1307 if (ret) 1308 goto out_unpin; 1309 1310 /* One last power-gated configuration for the road. */ 1311 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1312 if (ret) 1313 goto out_unpin; 1314 1315 out_unpin: 1316 intel_context_unpin(ce); 1317 out_ce: 1318 intel_context_put(ce); 1319 } while (!ret); 1320 1321 if (igt_flush_test(i915)) 1322 ret = -EIO; 1323 1324 out_put: 1325 i915_gem_object_put(obj); 1326 1327 out_unlock: 1328 if (flags & TEST_RESET) 1329 igt_global_reset_unlock(to_gt(i915)); 1330 1331 if (ret) 1332 pr_err("%s: Failed with %d!\n", name, ret); 1333 1334 return ret; 1335 } 1336 1337 static int igt_ctx_sseu(void *arg) 1338 { 1339 struct { 1340 const char *name; 1341 unsigned int flags; 1342 } *phase, phases[] = { 1343 { .name = "basic", .flags = 0 }, 1344 { .name = "idle", .flags = TEST_IDLE }, 1345 { .name = "busy", .flags = TEST_BUSY }, 1346 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1347 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1348 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1349 }; 1350 unsigned int i; 1351 int ret = 0; 1352 1353 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1354 i++, phase++) 1355 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1356 1357 return ret; 1358 } 1359 1360 static int igt_ctx_readonly(void *arg) 1361 { 1362 struct drm_i915_private *i915 = arg; 1363 unsigned long idx, ndwords, dw, num_engines; 1364 struct drm_i915_gem_object *obj = NULL; 1365 struct i915_request *tq[5] = {}; 1366 struct i915_gem_engines_iter it; 1367 struct i915_address_space *vm; 1368 struct i915_gem_context *ctx; 1369 struct intel_context *ce; 1370 struct igt_live_test t; 1371 I915_RND_STATE(prng); 1372 IGT_TIMEOUT(end_time); 1373 LIST_HEAD(objects); 1374 struct file *file; 1375 int err = -ENODEV; 1376 1377 /* 1378 * Create a few read-only objects (with the occasional writable object) 1379 * and try to write into these object checking that the GPU discards 1380 * any write to a read-only object. 1381 */ 1382 1383 file = mock_file(i915); 1384 if (IS_ERR(file)) 1385 return PTR_ERR(file); 1386 1387 err = igt_live_test_begin(&t, i915, __func__, ""); 1388 if (err) 1389 goto out_file; 1390 1391 ctx = live_context(i915, file); 1392 if (IS_ERR(ctx)) { 1393 err = PTR_ERR(ctx); 1394 goto out_file; 1395 } 1396 1397 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm; 1398 if (!vm || !vm->has_read_only) { 1399 err = 0; 1400 goto out_file; 1401 } 1402 1403 num_engines = 0; 1404 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1405 if (intel_engine_can_store_dword(ce->engine)) 1406 num_engines++; 1407 i915_gem_context_unlock_engines(ctx); 1408 1409 ndwords = 0; 1410 dw = 0; 1411 while (!time_after(jiffies, end_time)) { 1412 for_each_gem_engine(ce, 1413 i915_gem_context_lock_engines(ctx), it) { 1414 if (!intel_engine_can_store_dword(ce->engine)) 1415 continue; 1416 1417 if (!obj) { 1418 obj = create_test_object(ce->vm, file, &objects); 1419 if (IS_ERR(obj)) { 1420 err = PTR_ERR(obj); 1421 i915_gem_context_unlock_engines(ctx); 1422 goto out_file; 1423 } 1424 1425 if (prandom_u32_state(&prng) & 1) 1426 i915_gem_object_set_readonly(obj); 1427 } 1428 1429 err = gpu_fill(ce, obj, dw); 1430 if (err) { 1431 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1432 ndwords, dw, max_dwords(obj), 1433 ce->engine->name, 1434 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 1435 err); 1436 i915_gem_context_unlock_engines(ctx); 1437 goto out_file; 1438 } 1439 1440 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1441 if (err) { 1442 i915_gem_context_unlock_engines(ctx); 1443 goto out_file; 1444 } 1445 1446 if (++dw == max_dwords(obj)) { 1447 obj = NULL; 1448 dw = 0; 1449 } 1450 ndwords++; 1451 } 1452 i915_gem_context_unlock_engines(ctx); 1453 } 1454 pr_info("Submitted %lu dwords (across %lu engines)\n", 1455 ndwords, num_engines); 1456 1457 dw = 0; 1458 idx = 0; 1459 list_for_each_entry(obj, &objects, st_link) { 1460 unsigned int rem = 1461 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1462 unsigned int num_writes; 1463 1464 num_writes = rem; 1465 if (i915_gem_object_is_readonly(obj)) 1466 num_writes = 0; 1467 1468 err = cpu_check(obj, idx++, num_writes); 1469 if (err) 1470 break; 1471 1472 dw += rem; 1473 } 1474 1475 out_file: 1476 throttle_release(tq, ARRAY_SIZE(tq)); 1477 if (igt_live_test_end(&t)) 1478 err = -EIO; 1479 1480 fput(file); 1481 return err; 1482 } 1483 1484 static int check_scratch(struct i915_address_space *vm, u64 offset) 1485 { 1486 struct drm_mm_node *node; 1487 1488 mutex_lock(&vm->mutex); 1489 node = __drm_mm_interval_first(&vm->mm, 1490 offset, offset + sizeof(u32) - 1); 1491 mutex_unlock(&vm->mutex); 1492 if (!node || node->start > offset) 1493 return 0; 1494 1495 GEM_BUG_ON(offset >= node->start + node->size); 1496 1497 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1498 upper_32_bits(offset), lower_32_bits(offset)); 1499 return -EINVAL; 1500 } 1501 1502 static int write_to_scratch(struct i915_gem_context *ctx, 1503 struct intel_engine_cs *engine, 1504 struct drm_i915_gem_object *obj, 1505 u64 offset, u32 value) 1506 { 1507 struct drm_i915_private *i915 = ctx->i915; 1508 struct i915_address_space *vm; 1509 struct i915_request *rq; 1510 struct i915_vma *vma; 1511 u32 *cmd; 1512 int err; 1513 1514 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1515 1516 err = check_scratch(ctx->vm, offset); 1517 if (err) 1518 return err; 1519 1520 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1521 if (IS_ERR(cmd)) 1522 return PTR_ERR(cmd); 1523 1524 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1525 if (GRAPHICS_VER(i915) >= 8) { 1526 *cmd++ = lower_32_bits(offset); 1527 *cmd++ = upper_32_bits(offset); 1528 } else { 1529 *cmd++ = 0; 1530 *cmd++ = offset; 1531 } 1532 *cmd++ = value; 1533 *cmd = MI_BATCH_BUFFER_END; 1534 __i915_gem_object_flush_map(obj, 0, 64); 1535 i915_gem_object_unpin_map(obj); 1536 1537 intel_gt_chipset_flush(engine->gt); 1538 1539 vm = i915_gem_context_get_eb_vm(ctx); 1540 vma = i915_vma_instance(obj, vm, NULL); 1541 if (IS_ERR(vma)) { 1542 err = PTR_ERR(vma); 1543 goto out_vm; 1544 } 1545 1546 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1547 if (err) 1548 goto out_vm; 1549 1550 rq = igt_request_alloc(ctx, engine); 1551 if (IS_ERR(rq)) { 1552 err = PTR_ERR(rq); 1553 goto err_unpin; 1554 } 1555 1556 err = igt_vma_move_to_active_unlocked(vma, rq, 0); 1557 if (err) 1558 goto skip_request; 1559 1560 if (rq->engine->emit_init_breadcrumb) { 1561 err = rq->engine->emit_init_breadcrumb(rq); 1562 if (err) 1563 goto skip_request; 1564 } 1565 1566 err = engine->emit_bb_start(rq, i915_vma_offset(vma), 1567 i915_vma_size(vma), 0); 1568 if (err) 1569 goto skip_request; 1570 1571 i915_vma_unpin(vma); 1572 1573 i915_request_add(rq); 1574 1575 goto out_vm; 1576 skip_request: 1577 i915_request_set_error_once(rq, err); 1578 i915_request_add(rq); 1579 err_unpin: 1580 i915_vma_unpin(vma); 1581 out_vm: 1582 i915_vm_put(vm); 1583 1584 if (!err) 1585 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1586 1587 return err; 1588 } 1589 1590 static int read_from_scratch(struct i915_gem_context *ctx, 1591 struct intel_engine_cs *engine, 1592 struct drm_i915_gem_object *obj, 1593 u64 offset, u32 *value) 1594 { 1595 struct drm_i915_private *i915 = ctx->i915; 1596 struct i915_address_space *vm; 1597 const u32 result = 0x100; 1598 struct i915_request *rq; 1599 struct i915_vma *vma; 1600 unsigned int flags; 1601 u32 *cmd; 1602 int err; 1603 1604 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1605 1606 err = check_scratch(ctx->vm, offset); 1607 if (err) 1608 return err; 1609 1610 if (GRAPHICS_VER(i915) >= 8) { 1611 const u32 GPR0 = engine->mmio_base + 0x600; 1612 1613 vm = i915_gem_context_get_eb_vm(ctx); 1614 vma = i915_vma_instance(obj, vm, NULL); 1615 if (IS_ERR(vma)) { 1616 err = PTR_ERR(vma); 1617 goto out_vm; 1618 } 1619 1620 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1621 if (err) 1622 goto out_vm; 1623 1624 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1625 if (IS_ERR(cmd)) { 1626 err = PTR_ERR(cmd); 1627 goto err_unpin; 1628 } 1629 1630 memset(cmd, POISON_INUSE, PAGE_SIZE); 1631 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1632 *cmd++ = GPR0; 1633 *cmd++ = lower_32_bits(offset); 1634 *cmd++ = upper_32_bits(offset); 1635 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1636 *cmd++ = GPR0; 1637 *cmd++ = result; 1638 *cmd++ = 0; 1639 *cmd = MI_BATCH_BUFFER_END; 1640 1641 i915_gem_object_flush_map(obj); 1642 i915_gem_object_unpin_map(obj); 1643 1644 flags = 0; 1645 } else { 1646 const u32 reg = engine->mmio_base + 0x420; 1647 1648 /* hsw: register access even to 3DPRIM! is protected */ 1649 vm = i915_vm_get(&engine->gt->ggtt->vm); 1650 vma = i915_vma_instance(obj, vm, NULL); 1651 if (IS_ERR(vma)) { 1652 err = PTR_ERR(vma); 1653 goto out_vm; 1654 } 1655 1656 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1657 if (err) 1658 goto out_vm; 1659 1660 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1661 if (IS_ERR(cmd)) { 1662 err = PTR_ERR(cmd); 1663 goto err_unpin; 1664 } 1665 1666 memset(cmd, POISON_INUSE, PAGE_SIZE); 1667 *cmd++ = MI_LOAD_REGISTER_MEM; 1668 *cmd++ = reg; 1669 *cmd++ = offset; 1670 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1671 *cmd++ = reg; 1672 *cmd++ = i915_vma_offset(vma) + result; 1673 *cmd = MI_BATCH_BUFFER_END; 1674 1675 i915_gem_object_flush_map(obj); 1676 i915_gem_object_unpin_map(obj); 1677 1678 flags = I915_DISPATCH_SECURE; 1679 } 1680 1681 intel_gt_chipset_flush(engine->gt); 1682 1683 rq = igt_request_alloc(ctx, engine); 1684 if (IS_ERR(rq)) { 1685 err = PTR_ERR(rq); 1686 goto err_unpin; 1687 } 1688 1689 err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE); 1690 if (err) 1691 goto skip_request; 1692 1693 if (rq->engine->emit_init_breadcrumb) { 1694 err = rq->engine->emit_init_breadcrumb(rq); 1695 if (err) 1696 goto skip_request; 1697 } 1698 1699 err = engine->emit_bb_start(rq, i915_vma_offset(vma), 1700 i915_vma_size(vma), flags); 1701 if (err) 1702 goto skip_request; 1703 1704 i915_vma_unpin(vma); 1705 1706 i915_request_add(rq); 1707 1708 i915_gem_object_lock(obj, NULL); 1709 err = i915_gem_object_set_to_cpu_domain(obj, false); 1710 i915_gem_object_unlock(obj); 1711 if (err) 1712 goto out_vm; 1713 1714 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1715 if (IS_ERR(cmd)) { 1716 err = PTR_ERR(cmd); 1717 goto out_vm; 1718 } 1719 1720 *value = cmd[result / sizeof(*cmd)]; 1721 i915_gem_object_unpin_map(obj); 1722 1723 goto out_vm; 1724 skip_request: 1725 i915_request_set_error_once(rq, err); 1726 i915_request_add(rq); 1727 err_unpin: 1728 i915_vma_unpin(vma); 1729 out_vm: 1730 i915_vm_put(vm); 1731 1732 if (!err) 1733 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1734 1735 return err; 1736 } 1737 1738 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1739 { 1740 struct i915_address_space *vm; 1741 u32 *vaddr; 1742 int err = 0; 1743 1744 vm = ctx->vm; 1745 if (!vm) 1746 return -ENODEV; 1747 1748 if (!vm->scratch[0]) { 1749 pr_err("No scratch page!\n"); 1750 return -EINVAL; 1751 } 1752 1753 vaddr = __px_vaddr(vm->scratch[0]); 1754 1755 memcpy(out, vaddr, sizeof(*out)); 1756 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1757 pr_err("Inconsistent initial state of scratch page!\n"); 1758 err = -EINVAL; 1759 } 1760 1761 return err; 1762 } 1763 1764 static int igt_vm_isolation(void *arg) 1765 { 1766 struct drm_i915_private *i915 = arg; 1767 struct i915_gem_context *ctx_a, *ctx_b; 1768 struct drm_i915_gem_object *obj_a, *obj_b; 1769 unsigned long num_engines, count; 1770 struct intel_engine_cs *engine; 1771 struct igt_live_test t; 1772 I915_RND_STATE(prng); 1773 struct file *file; 1774 u64 vm_total; 1775 u32 expected; 1776 int err; 1777 1778 if (GRAPHICS_VER(i915) < 7) 1779 return 0; 1780 1781 /* 1782 * The simple goal here is that a write into one context is not 1783 * observed in a second (separate page tables and scratch). 1784 */ 1785 1786 file = mock_file(i915); 1787 if (IS_ERR(file)) 1788 return PTR_ERR(file); 1789 1790 err = igt_live_test_begin(&t, i915, __func__, ""); 1791 if (err) 1792 goto out_file; 1793 1794 ctx_a = live_context(i915, file); 1795 if (IS_ERR(ctx_a)) { 1796 err = PTR_ERR(ctx_a); 1797 goto out_file; 1798 } 1799 1800 ctx_b = live_context(i915, file); 1801 if (IS_ERR(ctx_b)) { 1802 err = PTR_ERR(ctx_b); 1803 goto out_file; 1804 } 1805 1806 /* We can only test vm isolation, if the vm are distinct */ 1807 if (ctx_a->vm == ctx_b->vm) 1808 goto out_file; 1809 1810 /* Read the initial state of the scratch page */ 1811 err = check_scratch_page(ctx_a, &expected); 1812 if (err) 1813 goto out_file; 1814 1815 err = check_scratch_page(ctx_b, &expected); 1816 if (err) 1817 goto out_file; 1818 1819 vm_total = ctx_a->vm->total; 1820 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1821 1822 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1823 if (IS_ERR(obj_a)) { 1824 err = PTR_ERR(obj_a); 1825 goto out_file; 1826 } 1827 1828 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1829 if (IS_ERR(obj_b)) { 1830 err = PTR_ERR(obj_b); 1831 goto put_a; 1832 } 1833 1834 count = 0; 1835 num_engines = 0; 1836 for_each_uabi_engine(engine, i915) { 1837 IGT_TIMEOUT(end_time); 1838 unsigned long this = 0; 1839 1840 if (!intel_engine_can_store_dword(engine)) 1841 continue; 1842 1843 /* Not all engines have their own GPR! */ 1844 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1845 continue; 1846 1847 while (!__igt_timeout(end_time, NULL)) { 1848 u32 value = 0xc5c5c5c5; 1849 u64 offset; 1850 1851 /* Leave enough space at offset 0 for the batch */ 1852 offset = igt_random_offset(&prng, 1853 I915_GTT_PAGE_SIZE, vm_total, 1854 sizeof(u32), alignof_dword); 1855 1856 err = write_to_scratch(ctx_a, engine, obj_a, 1857 offset, 0xdeadbeef); 1858 if (err == 0) 1859 err = read_from_scratch(ctx_b, engine, obj_b, 1860 offset, &value); 1861 if (err) 1862 goto put_b; 1863 1864 if (value != expected) { 1865 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1866 engine->name, value, 1867 upper_32_bits(offset), 1868 lower_32_bits(offset), 1869 this); 1870 err = -EINVAL; 1871 goto put_b; 1872 } 1873 1874 this++; 1875 } 1876 count += this; 1877 num_engines++; 1878 } 1879 pr_info("Checked %lu scratch offsets across %lu engines\n", 1880 count, num_engines); 1881 1882 put_b: 1883 i915_gem_object_put(obj_b); 1884 put_a: 1885 i915_gem_object_put(obj_a); 1886 out_file: 1887 if (igt_live_test_end(&t)) 1888 err = -EIO; 1889 fput(file); 1890 return err; 1891 } 1892 1893 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1894 { 1895 static const struct i915_subtest tests[] = { 1896 SUBTEST(live_nop_switch), 1897 SUBTEST(live_parallel_switch), 1898 SUBTEST(igt_ctx_exec), 1899 SUBTEST(igt_ctx_readonly), 1900 SUBTEST(igt_ctx_sseu), 1901 SUBTEST(igt_shared_ctx_exec), 1902 SUBTEST(igt_vm_isolation), 1903 }; 1904 1905 if (intel_gt_is_wedged(to_gt(i915))) 1906 return 0; 1907 1908 return i915_live_subtests(tests, i915); 1909 } 1910