1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 #include <linux/string_helpers.h> 9 10 #include "gem/i915_gem_internal.h" 11 #include "gem/i915_gem_pm.h" 12 #include "gt/intel_engine_pm.h" 13 #include "gt/intel_engine_regs.h" 14 #include "gt/intel_gt.h" 15 #include "gt/intel_gt_requests.h" 16 #include "gt/intel_reset.h" 17 #include "i915_selftest.h" 18 19 #include "gem/selftests/igt_gem_utils.h" 20 #include "selftests/i915_random.h" 21 #include "selftests/igt_flush_test.h" 22 #include "selftests/igt_live_test.h" 23 #include "selftests/igt_reset.h" 24 #include "selftests/igt_spinner.h" 25 #include "selftests/mock_drm.h" 26 #include "selftests/mock_gem_device.h" 27 28 #include "huge_gem_object.h" 29 #include "igt_gem_utils.h" 30 31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 32 33 static int live_nop_switch(void *arg) 34 { 35 const unsigned int nctx = 1024; 36 struct drm_i915_private *i915 = arg; 37 struct intel_engine_cs *engine; 38 struct i915_gem_context **ctx; 39 struct igt_live_test t; 40 struct file *file; 41 unsigned long n; 42 int err = -ENODEV; 43 44 /* 45 * Create as many contexts as we can feasibly get away with 46 * and check we can switch between them rapidly. 47 * 48 * Serves as very simple stress test for submission and HW switching 49 * between contexts. 50 */ 51 52 if (!DRIVER_CAPS(i915)->has_logical_contexts) 53 return 0; 54 55 file = mock_file(i915); 56 if (IS_ERR(file)) 57 return PTR_ERR(file); 58 59 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 60 if (!ctx) { 61 err = -ENOMEM; 62 goto out_file; 63 } 64 65 for (n = 0; n < nctx; n++) { 66 ctx[n] = live_context(i915, file); 67 if (IS_ERR(ctx[n])) { 68 err = PTR_ERR(ctx[n]); 69 goto out_file; 70 } 71 } 72 73 for_each_uabi_engine(engine, i915) { 74 struct i915_request *rq = NULL; 75 unsigned long end_time, prime; 76 ktime_t times[2] = {}; 77 78 times[0] = ktime_get_raw(); 79 for (n = 0; n < nctx; n++) { 80 struct i915_request *this; 81 82 this = igt_request_alloc(ctx[n], engine); 83 if (IS_ERR(this)) { 84 err = PTR_ERR(this); 85 goto out_file; 86 } 87 if (rq) { 88 i915_request_await_dma_fence(this, &rq->fence); 89 i915_request_put(rq); 90 } 91 rq = i915_request_get(this); 92 i915_request_add(this); 93 } 94 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 95 pr_err("Failed to populated %d contexts\n", nctx); 96 intel_gt_set_wedged(to_gt(i915)); 97 i915_request_put(rq); 98 err = -EIO; 99 goto out_file; 100 } 101 i915_request_put(rq); 102 103 times[1] = ktime_get_raw(); 104 105 pr_info("Populated %d contexts on %s in %lluns\n", 106 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 107 108 err = igt_live_test_begin(&t, i915, __func__, engine->name); 109 if (err) 110 goto out_file; 111 112 end_time = jiffies + i915_selftest.timeout_jiffies; 113 for_each_prime_number_from(prime, 2, 8192) { 114 times[1] = ktime_get_raw(); 115 116 rq = NULL; 117 for (n = 0; n < prime; n++) { 118 struct i915_request *this; 119 120 this = igt_request_alloc(ctx[n % nctx], engine); 121 if (IS_ERR(this)) { 122 err = PTR_ERR(this); 123 goto out_file; 124 } 125 126 if (rq) { /* Force submission order */ 127 i915_request_await_dma_fence(this, &rq->fence); 128 i915_request_put(rq); 129 } 130 131 /* 132 * This space is left intentionally blank. 133 * 134 * We do not actually want to perform any 135 * action with this request, we just want 136 * to measure the latency in allocation 137 * and submission of our breadcrumbs - 138 * ensuring that the bare request is sufficient 139 * for the system to work (i.e. proper HEAD 140 * tracking of the rings, interrupt handling, 141 * etc). It also gives us the lowest bounds 142 * for latency. 143 */ 144 145 rq = i915_request_get(this); 146 i915_request_add(this); 147 } 148 GEM_BUG_ON(!rq); 149 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 150 pr_err("Switching between %ld contexts timed out\n", 151 prime); 152 intel_gt_set_wedged(to_gt(i915)); 153 i915_request_put(rq); 154 break; 155 } 156 i915_request_put(rq); 157 158 times[1] = ktime_sub(ktime_get_raw(), times[1]); 159 if (prime == 2) 160 times[0] = times[1]; 161 162 if (__igt_timeout(end_time, NULL)) 163 break; 164 } 165 166 err = igt_live_test_end(&t); 167 if (err) 168 goto out_file; 169 170 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 171 engine->name, 172 ktime_to_ns(times[0]), 173 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 174 } 175 176 out_file: 177 fput(file); 178 return err; 179 } 180 181 struct parallel_switch { 182 struct kthread_worker *worker; 183 struct kthread_work work; 184 struct intel_context *ce[2]; 185 int result; 186 }; 187 188 static void __live_parallel_switch1(struct kthread_work *work) 189 { 190 struct parallel_switch *arg = 191 container_of(work, typeof(*arg), work); 192 IGT_TIMEOUT(end_time); 193 unsigned long count; 194 195 count = 0; 196 arg->result = 0; 197 do { 198 struct i915_request *rq = NULL; 199 int n; 200 201 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 202 struct i915_request *prev = rq; 203 204 rq = i915_request_create(arg->ce[n]); 205 if (IS_ERR(rq)) { 206 i915_request_put(prev); 207 arg->result = PTR_ERR(rq); 208 break; 209 } 210 211 i915_request_get(rq); 212 if (prev) { 213 arg->result = 214 i915_request_await_dma_fence(rq, 215 &prev->fence); 216 i915_request_put(prev); 217 } 218 219 i915_request_add(rq); 220 } 221 222 if (IS_ERR_OR_NULL(rq)) 223 break; 224 225 if (i915_request_wait(rq, 0, HZ) < 0) 226 arg->result = -ETIME; 227 228 i915_request_put(rq); 229 230 count++; 231 } while (!arg->result && !__igt_timeout(end_time, NULL)); 232 233 pr_info("%s: %lu switches (sync) <%d>\n", 234 arg->ce[0]->engine->name, count, arg->result); 235 } 236 237 static void __live_parallel_switchN(struct kthread_work *work) 238 { 239 struct parallel_switch *arg = 240 container_of(work, typeof(*arg), work); 241 struct i915_request *rq = NULL; 242 IGT_TIMEOUT(end_time); 243 unsigned long count; 244 int n; 245 246 count = 0; 247 arg->result = 0; 248 do { 249 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 250 struct i915_request *prev = rq; 251 252 rq = i915_request_create(arg->ce[n]); 253 if (IS_ERR(rq)) { 254 i915_request_put(prev); 255 arg->result = PTR_ERR(rq); 256 break; 257 } 258 259 i915_request_get(rq); 260 if (prev) { 261 arg->result = 262 i915_request_await_dma_fence(rq, 263 &prev->fence); 264 i915_request_put(prev); 265 } 266 267 i915_request_add(rq); 268 } 269 270 count++; 271 } while (!arg->result && !__igt_timeout(end_time, NULL)); 272 273 if (!IS_ERR_OR_NULL(rq)) 274 i915_request_put(rq); 275 276 pr_info("%s: %lu switches (many) <%d>\n", 277 arg->ce[0]->engine->name, count, arg->result); 278 } 279 280 static int live_parallel_switch(void *arg) 281 { 282 struct drm_i915_private *i915 = arg; 283 static void (* const func[])(struct kthread_work *) = { 284 __live_parallel_switch1, 285 __live_parallel_switchN, 286 NULL, 287 }; 288 struct parallel_switch *data = NULL; 289 struct i915_gem_engines *engines; 290 struct i915_gem_engines_iter it; 291 void (* const *fn)(struct kthread_work *); 292 struct i915_gem_context *ctx; 293 struct intel_context *ce; 294 struct file *file; 295 int n, m, count; 296 int err = 0; 297 298 /* 299 * Check we can process switches on all engines simultaneously. 300 */ 301 302 if (!DRIVER_CAPS(i915)->has_logical_contexts) 303 return 0; 304 305 file = mock_file(i915); 306 if (IS_ERR(file)) 307 return PTR_ERR(file); 308 309 ctx = live_context(i915, file); 310 if (IS_ERR(ctx)) { 311 err = PTR_ERR(ctx); 312 goto out_file; 313 } 314 315 engines = i915_gem_context_lock_engines(ctx); 316 count = engines->num_engines; 317 318 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 319 if (!data) { 320 i915_gem_context_unlock_engines(ctx); 321 err = -ENOMEM; 322 goto out_file; 323 } 324 325 m = 0; /* Use the first context as our template for the engines */ 326 for_each_gem_engine(ce, engines, it) { 327 err = intel_context_pin(ce); 328 if (err) { 329 i915_gem_context_unlock_engines(ctx); 330 goto out; 331 } 332 data[m++].ce[0] = intel_context_get(ce); 333 } 334 i915_gem_context_unlock_engines(ctx); 335 336 /* Clone the same set of engines into the other contexts */ 337 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 338 ctx = live_context(i915, file); 339 if (IS_ERR(ctx)) { 340 err = PTR_ERR(ctx); 341 goto out; 342 } 343 344 for (m = 0; m < count; m++) { 345 if (!data[m].ce[0]) 346 continue; 347 348 ce = intel_context_create(data[m].ce[0]->engine); 349 if (IS_ERR(ce)) 350 goto out; 351 352 err = intel_context_pin(ce); 353 if (err) { 354 intel_context_put(ce); 355 goto out; 356 } 357 358 data[m].ce[n] = ce; 359 } 360 } 361 362 for (n = 0; n < count; n++) { 363 struct kthread_worker *worker; 364 365 if (!data[n].ce[0]) 366 continue; 367 368 worker = kthread_create_worker(0, "igt/parallel:%s", 369 data[n].ce[0]->engine->name); 370 if (IS_ERR(worker)) 371 goto out; 372 373 data[n].worker = worker; 374 } 375 376 for (fn = func; !err && *fn; fn++) { 377 struct igt_live_test t; 378 379 err = igt_live_test_begin(&t, i915, __func__, ""); 380 if (err) 381 break; 382 383 for (n = 0; n < count; n++) { 384 if (!data[n].ce[0]) 385 continue; 386 387 data[n].result = 0; 388 kthread_init_work(&data[n].work, *fn); 389 kthread_queue_work(data[n].worker, &data[n].work); 390 } 391 392 for (n = 0; n < count; n++) { 393 if (data[n].ce[0]) { 394 kthread_flush_work(&data[n].work); 395 if (data[n].result && !err) 396 err = data[n].result; 397 } 398 } 399 400 if (igt_live_test_end(&t)) 401 err = -EIO; 402 } 403 404 out: 405 for (n = 0; n < count; n++) { 406 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 407 if (!data[n].ce[m]) 408 continue; 409 410 intel_context_unpin(data[n].ce[m]); 411 intel_context_put(data[n].ce[m]); 412 } 413 414 if (data[n].worker) 415 kthread_destroy_worker(data[n].worker); 416 } 417 kfree(data); 418 out_file: 419 fput(file); 420 return err; 421 } 422 423 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 424 { 425 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 426 } 427 428 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 429 { 430 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 431 } 432 433 static int gpu_fill(struct intel_context *ce, 434 struct drm_i915_gem_object *obj, 435 unsigned int dw) 436 { 437 struct i915_vma *vma; 438 int err; 439 440 GEM_BUG_ON(obj->base.size > ce->vm->total); 441 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 442 443 vma = i915_vma_instance(obj, ce->vm, NULL); 444 if (IS_ERR(vma)) 445 return PTR_ERR(vma); 446 447 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 448 if (err) 449 return err; 450 451 /* 452 * Within the GTT the huge objects maps every page onto 453 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 454 * We set the nth dword within the page using the nth 455 * mapping via the GTT - this should exercise the GTT mapping 456 * whilst checking that each context provides a unique view 457 * into the object. 458 */ 459 err = igt_gpu_fill_dw(ce, vma, 460 (dw * real_page_count(obj)) << PAGE_SHIFT | 461 (dw * sizeof(u32)), 462 real_page_count(obj), 463 dw); 464 i915_vma_unpin(vma); 465 466 return err; 467 } 468 469 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 470 { 471 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 472 unsigned int n, m, need_flush; 473 int err; 474 475 i915_gem_object_lock(obj, NULL); 476 err = i915_gem_object_prepare_write(obj, &need_flush); 477 if (err) 478 goto out; 479 480 for (n = 0; n < real_page_count(obj); n++) { 481 u32 *map; 482 483 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 484 for (m = 0; m < DW_PER_PAGE; m++) 485 map[m] = value; 486 if (!has_llc) 487 drm_clflush_virt_range(map, PAGE_SIZE); 488 kunmap_atomic(map); 489 } 490 491 i915_gem_object_finish_access(obj); 492 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 493 obj->write_domain = 0; 494 out: 495 i915_gem_object_unlock(obj); 496 return err; 497 } 498 499 static noinline int cpu_check(struct drm_i915_gem_object *obj, 500 unsigned int idx, unsigned int max) 501 { 502 unsigned int n, m, needs_flush; 503 int err; 504 505 i915_gem_object_lock(obj, NULL); 506 err = i915_gem_object_prepare_read(obj, &needs_flush); 507 if (err) 508 goto out_unlock; 509 510 for (n = 0; n < real_page_count(obj); n++) { 511 u32 *map; 512 513 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 514 if (needs_flush & CLFLUSH_BEFORE) 515 drm_clflush_virt_range(map, PAGE_SIZE); 516 517 for (m = 0; m < max; m++) { 518 if (map[m] != m) { 519 pr_err("%pS: Invalid value at object %d page %d/%ld, offset %d/%d: found %x expected %x\n", 520 __builtin_return_address(0), idx, 521 n, real_page_count(obj), m, max, 522 map[m], m); 523 err = -EINVAL; 524 goto out_unmap; 525 } 526 } 527 528 for (; m < DW_PER_PAGE; m++) { 529 if (map[m] != STACK_MAGIC) { 530 pr_err("%pS: Invalid value at object %d page %d, offset %d: found %x expected %x (uninitialised)\n", 531 __builtin_return_address(0), idx, n, m, 532 map[m], STACK_MAGIC); 533 err = -EINVAL; 534 goto out_unmap; 535 } 536 } 537 538 out_unmap: 539 kunmap_atomic(map); 540 if (err) 541 break; 542 } 543 544 i915_gem_object_finish_access(obj); 545 out_unlock: 546 i915_gem_object_unlock(obj); 547 return err; 548 } 549 550 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 551 { 552 int err; 553 554 GEM_BUG_ON(obj->base.handle_count); 555 556 /* tie the object to the drm_file for easy reaping */ 557 err = idr_alloc(&to_drm_file(file)->object_idr, 558 &obj->base, 1, 0, GFP_KERNEL); 559 if (err < 0) 560 return err; 561 562 i915_gem_object_get(obj); 563 obj->base.handle_count++; 564 return 0; 565 } 566 567 static struct drm_i915_gem_object * 568 create_test_object(struct i915_address_space *vm, 569 struct file *file, 570 struct list_head *objects) 571 { 572 struct drm_i915_gem_object *obj; 573 u64 size; 574 int err; 575 576 /* Keep in GEM's good graces */ 577 intel_gt_retire_requests(vm->gt); 578 579 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 580 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 581 582 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 583 if (IS_ERR(obj)) 584 return obj; 585 586 err = file_add_object(file, obj); 587 i915_gem_object_put(obj); 588 if (err) 589 return ERR_PTR(err); 590 591 err = cpu_fill(obj, STACK_MAGIC); 592 if (err) { 593 pr_err("Failed to fill object with cpu, err=%d\n", 594 err); 595 return ERR_PTR(err); 596 } 597 598 list_add_tail(&obj->st_link, objects); 599 return obj; 600 } 601 602 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 603 { 604 unsigned long npages = fake_page_count(obj); 605 606 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 607 return npages / DW_PER_PAGE; 608 } 609 610 static void throttle_release(struct i915_request **q, int count) 611 { 612 int i; 613 614 for (i = 0; i < count; i++) { 615 if (IS_ERR_OR_NULL(q[i])) 616 continue; 617 618 i915_request_put(fetch_and_zero(&q[i])); 619 } 620 } 621 622 static int throttle(struct intel_context *ce, 623 struct i915_request **q, int count) 624 { 625 int i; 626 627 if (!IS_ERR_OR_NULL(q[0])) { 628 if (i915_request_wait(q[0], 629 I915_WAIT_INTERRUPTIBLE, 630 MAX_SCHEDULE_TIMEOUT) < 0) 631 return -EINTR; 632 633 i915_request_put(q[0]); 634 } 635 636 for (i = 0; i < count - 1; i++) 637 q[i] = q[i + 1]; 638 639 q[i] = intel_context_create_request(ce); 640 if (IS_ERR(q[i])) 641 return PTR_ERR(q[i]); 642 643 i915_request_get(q[i]); 644 i915_request_add(q[i]); 645 646 return 0; 647 } 648 649 static int igt_ctx_exec(void *arg) 650 { 651 struct drm_i915_private *i915 = arg; 652 struct intel_engine_cs *engine; 653 int err = -ENODEV; 654 655 /* 656 * Create a few different contexts (with different mm) and write 657 * through each ctx/mm using the GPU making sure those writes end 658 * up in the expected pages of our obj. 659 */ 660 661 if (!DRIVER_CAPS(i915)->has_logical_contexts) 662 return 0; 663 664 for_each_uabi_engine(engine, i915) { 665 struct drm_i915_gem_object *obj = NULL; 666 unsigned long ncontexts, ndwords, dw; 667 struct i915_request *tq[5] = {}; 668 struct igt_live_test t; 669 IGT_TIMEOUT(end_time); 670 LIST_HEAD(objects); 671 struct file *file; 672 673 if (!intel_engine_can_store_dword(engine)) 674 continue; 675 676 if (!engine->context_size) 677 continue; /* No logical context support in HW */ 678 679 file = mock_file(i915); 680 if (IS_ERR(file)) 681 return PTR_ERR(file); 682 683 err = igt_live_test_begin(&t, i915, __func__, engine->name); 684 if (err) 685 goto out_file; 686 687 ncontexts = 0; 688 ndwords = 0; 689 dw = 0; 690 while (!time_after(jiffies, end_time)) { 691 struct i915_gem_context *ctx; 692 struct intel_context *ce; 693 694 ctx = kernel_context(i915, NULL); 695 if (IS_ERR(ctx)) { 696 err = PTR_ERR(ctx); 697 goto out_file; 698 } 699 700 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 701 GEM_BUG_ON(IS_ERR(ce)); 702 703 if (!obj) { 704 obj = create_test_object(ce->vm, file, &objects); 705 if (IS_ERR(obj)) { 706 err = PTR_ERR(obj); 707 intel_context_put(ce); 708 kernel_context_close(ctx); 709 goto out_file; 710 } 711 } 712 713 err = gpu_fill(ce, obj, dw); 714 if (err) { 715 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 716 ndwords, dw, max_dwords(obj), 717 engine->name, 718 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 719 err); 720 intel_context_put(ce); 721 kernel_context_close(ctx); 722 goto out_file; 723 } 724 725 err = throttle(ce, tq, ARRAY_SIZE(tq)); 726 if (err) { 727 intel_context_put(ce); 728 kernel_context_close(ctx); 729 goto out_file; 730 } 731 732 if (++dw == max_dwords(obj)) { 733 obj = NULL; 734 dw = 0; 735 } 736 737 ndwords++; 738 ncontexts++; 739 740 intel_context_put(ce); 741 kernel_context_close(ctx); 742 } 743 744 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 745 ncontexts, engine->name, ndwords); 746 747 ncontexts = dw = 0; 748 list_for_each_entry(obj, &objects, st_link) { 749 unsigned int rem = 750 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 751 752 err = cpu_check(obj, ncontexts++, rem); 753 if (err) 754 break; 755 756 dw += rem; 757 } 758 759 out_file: 760 throttle_release(tq, ARRAY_SIZE(tq)); 761 if (igt_live_test_end(&t)) 762 err = -EIO; 763 764 fput(file); 765 if (err) 766 return err; 767 768 i915_gem_drain_freed_objects(i915); 769 } 770 771 return 0; 772 } 773 774 static int igt_shared_ctx_exec(void *arg) 775 { 776 struct drm_i915_private *i915 = arg; 777 struct i915_request *tq[5] = {}; 778 struct i915_gem_context *parent; 779 struct intel_engine_cs *engine; 780 struct igt_live_test t; 781 struct file *file; 782 int err = 0; 783 784 /* 785 * Create a few different contexts with the same mm and write 786 * through each ctx using the GPU making sure those writes end 787 * up in the expected pages of our obj. 788 */ 789 if (!DRIVER_CAPS(i915)->has_logical_contexts) 790 return 0; 791 792 file = mock_file(i915); 793 if (IS_ERR(file)) 794 return PTR_ERR(file); 795 796 parent = live_context(i915, file); 797 if (IS_ERR(parent)) { 798 err = PTR_ERR(parent); 799 goto out_file; 800 } 801 802 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 803 err = 0; 804 goto out_file; 805 } 806 807 err = igt_live_test_begin(&t, i915, __func__, ""); 808 if (err) 809 goto out_file; 810 811 for_each_uabi_engine(engine, i915) { 812 unsigned long ncontexts, ndwords, dw; 813 struct drm_i915_gem_object *obj = NULL; 814 IGT_TIMEOUT(end_time); 815 LIST_HEAD(objects); 816 817 if (!intel_engine_can_store_dword(engine)) 818 continue; 819 820 dw = 0; 821 ndwords = 0; 822 ncontexts = 0; 823 while (!time_after(jiffies, end_time)) { 824 struct i915_gem_context *ctx; 825 struct intel_context *ce; 826 827 ctx = kernel_context(i915, parent->vm); 828 if (IS_ERR(ctx)) { 829 err = PTR_ERR(ctx); 830 goto out_test; 831 } 832 833 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 834 GEM_BUG_ON(IS_ERR(ce)); 835 836 if (!obj) { 837 obj = create_test_object(parent->vm, 838 file, &objects); 839 if (IS_ERR(obj)) { 840 err = PTR_ERR(obj); 841 intel_context_put(ce); 842 kernel_context_close(ctx); 843 goto out_test; 844 } 845 } 846 847 err = gpu_fill(ce, obj, dw); 848 if (err) { 849 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 850 ndwords, dw, max_dwords(obj), 851 engine->name, 852 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 853 err); 854 intel_context_put(ce); 855 kernel_context_close(ctx); 856 goto out_test; 857 } 858 859 err = throttle(ce, tq, ARRAY_SIZE(tq)); 860 if (err) { 861 intel_context_put(ce); 862 kernel_context_close(ctx); 863 goto out_test; 864 } 865 866 if (++dw == max_dwords(obj)) { 867 obj = NULL; 868 dw = 0; 869 } 870 871 ndwords++; 872 ncontexts++; 873 874 intel_context_put(ce); 875 kernel_context_close(ctx); 876 } 877 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 878 ncontexts, engine->name, ndwords); 879 880 ncontexts = dw = 0; 881 list_for_each_entry(obj, &objects, st_link) { 882 unsigned int rem = 883 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 884 885 err = cpu_check(obj, ncontexts++, rem); 886 if (err) 887 goto out_test; 888 889 dw += rem; 890 } 891 892 i915_gem_drain_freed_objects(i915); 893 } 894 out_test: 895 throttle_release(tq, ARRAY_SIZE(tq)); 896 if (igt_live_test_end(&t)) 897 err = -EIO; 898 out_file: 899 fput(file); 900 return err; 901 } 902 903 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, 904 struct i915_vma *vma, 905 struct intel_engine_cs *engine) 906 { 907 u32 *cmd; 908 909 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 910 911 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 912 if (IS_ERR(cmd)) 913 return PTR_ERR(cmd); 914 915 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 916 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); 917 *cmd++ = lower_32_bits(vma->node.start); 918 *cmd++ = upper_32_bits(vma->node.start); 919 *cmd = MI_BATCH_BUFFER_END; 920 921 __i915_gem_object_flush_map(rpcs, 0, 64); 922 i915_gem_object_unpin_map(rpcs); 923 924 intel_gt_chipset_flush(vma->vm->gt); 925 926 return 0; 927 } 928 929 static int 930 emit_rpcs_query(struct drm_i915_gem_object *obj, 931 struct intel_context *ce, 932 struct i915_request **rq_out) 933 { 934 struct drm_i915_private *i915 = to_i915(obj->base.dev); 935 struct i915_request *rq; 936 struct i915_gem_ww_ctx ww; 937 struct i915_vma *batch; 938 struct i915_vma *vma; 939 struct drm_i915_gem_object *rpcs; 940 int err; 941 942 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 943 944 if (GRAPHICS_VER(i915) < 8) 945 return -EINVAL; 946 947 vma = i915_vma_instance(obj, ce->vm, NULL); 948 if (IS_ERR(vma)) 949 return PTR_ERR(vma); 950 951 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 952 if (IS_ERR(rpcs)) 953 return PTR_ERR(rpcs); 954 955 batch = i915_vma_instance(rpcs, ce->vm, NULL); 956 if (IS_ERR(batch)) { 957 err = PTR_ERR(batch); 958 goto err_put; 959 } 960 961 i915_gem_ww_ctx_init(&ww, false); 962 retry: 963 err = i915_gem_object_lock(obj, &ww); 964 if (!err) 965 err = i915_gem_object_lock(rpcs, &ww); 966 if (!err) 967 err = i915_gem_object_set_to_gtt_domain(obj, false); 968 if (!err) 969 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 970 if (err) 971 goto err_put; 972 973 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 974 if (err) 975 goto err_vma; 976 977 err = rpcs_query_batch(rpcs, vma, ce->engine); 978 if (err) 979 goto err_batch; 980 981 rq = i915_request_create(ce); 982 if (IS_ERR(rq)) { 983 err = PTR_ERR(rq); 984 goto err_batch; 985 } 986 987 err = i915_vma_move_to_active(batch, rq, 0); 988 if (err) 989 goto skip_request; 990 991 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 992 if (err) 993 goto skip_request; 994 995 if (rq->engine->emit_init_breadcrumb) { 996 err = rq->engine->emit_init_breadcrumb(rq); 997 if (err) 998 goto skip_request; 999 } 1000 1001 err = rq->engine->emit_bb_start(rq, 1002 batch->node.start, batch->node.size, 1003 0); 1004 if (err) 1005 goto skip_request; 1006 1007 *rq_out = i915_request_get(rq); 1008 1009 skip_request: 1010 if (err) 1011 i915_request_set_error_once(rq, err); 1012 i915_request_add(rq); 1013 err_batch: 1014 i915_vma_unpin(batch); 1015 err_vma: 1016 i915_vma_unpin(vma); 1017 err_put: 1018 if (err == -EDEADLK) { 1019 err = i915_gem_ww_ctx_backoff(&ww); 1020 if (!err) 1021 goto retry; 1022 } 1023 i915_gem_ww_ctx_fini(&ww); 1024 i915_gem_object_put(rpcs); 1025 return err; 1026 } 1027 1028 #define TEST_IDLE BIT(0) 1029 #define TEST_BUSY BIT(1) 1030 #define TEST_RESET BIT(2) 1031 1032 static int 1033 __sseu_prepare(const char *name, 1034 unsigned int flags, 1035 struct intel_context *ce, 1036 struct igt_spinner **spin) 1037 { 1038 struct i915_request *rq; 1039 int ret; 1040 1041 *spin = NULL; 1042 if (!(flags & (TEST_BUSY | TEST_RESET))) 1043 return 0; 1044 1045 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1046 if (!*spin) 1047 return -ENOMEM; 1048 1049 ret = igt_spinner_init(*spin, ce->engine->gt); 1050 if (ret) 1051 goto err_free; 1052 1053 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1054 if (IS_ERR(rq)) { 1055 ret = PTR_ERR(rq); 1056 goto err_fini; 1057 } 1058 1059 i915_request_add(rq); 1060 1061 if (!igt_wait_for_spinner(*spin, rq)) { 1062 pr_err("%s: Spinner failed to start!\n", name); 1063 ret = -ETIMEDOUT; 1064 goto err_end; 1065 } 1066 1067 return 0; 1068 1069 err_end: 1070 igt_spinner_end(*spin); 1071 err_fini: 1072 igt_spinner_fini(*spin); 1073 err_free: 1074 kfree(fetch_and_zero(spin)); 1075 return ret; 1076 } 1077 1078 static int 1079 __read_slice_count(struct intel_context *ce, 1080 struct drm_i915_gem_object *obj, 1081 struct igt_spinner *spin, 1082 u32 *rpcs) 1083 { 1084 struct i915_request *rq = NULL; 1085 u32 s_mask, s_shift; 1086 unsigned int cnt; 1087 u32 *buf, val; 1088 long ret; 1089 1090 ret = emit_rpcs_query(obj, ce, &rq); 1091 if (ret) 1092 return ret; 1093 1094 if (spin) 1095 igt_spinner_end(spin); 1096 1097 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1098 i915_request_put(rq); 1099 if (ret < 0) 1100 return ret; 1101 1102 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1103 if (IS_ERR(buf)) { 1104 ret = PTR_ERR(buf); 1105 return ret; 1106 } 1107 1108 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1109 s_mask = GEN11_RPCS_S_CNT_MASK; 1110 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1111 } else { 1112 s_mask = GEN8_RPCS_S_CNT_MASK; 1113 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1114 } 1115 1116 val = *buf; 1117 cnt = (val & s_mask) >> s_shift; 1118 *rpcs = val; 1119 1120 i915_gem_object_unpin_map(obj); 1121 1122 return cnt; 1123 } 1124 1125 static int 1126 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1127 const char *prefix, const char *suffix) 1128 { 1129 if (slices == expected) 1130 return 0; 1131 1132 if (slices < 0) { 1133 pr_err("%s: %s read slice count failed with %d%s\n", 1134 name, prefix, slices, suffix); 1135 return slices; 1136 } 1137 1138 pr_err("%s: %s slice count %d is not %u%s\n", 1139 name, prefix, slices, expected, suffix); 1140 1141 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1142 rpcs, slices, 1143 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1144 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1145 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1146 1147 return -EINVAL; 1148 } 1149 1150 static int 1151 __sseu_finish(const char *name, 1152 unsigned int flags, 1153 struct intel_context *ce, 1154 struct drm_i915_gem_object *obj, 1155 unsigned int expected, 1156 struct igt_spinner *spin) 1157 { 1158 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1159 u32 rpcs = 0; 1160 int ret = 0; 1161 1162 if (flags & TEST_RESET) { 1163 ret = intel_engine_reset(ce->engine, "sseu"); 1164 if (ret) 1165 goto out; 1166 } 1167 1168 ret = __read_slice_count(ce, obj, 1169 flags & TEST_RESET ? NULL : spin, &rpcs); 1170 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1171 if (ret) 1172 goto out; 1173 1174 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1175 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1176 1177 out: 1178 if (spin) 1179 igt_spinner_end(spin); 1180 1181 if ((flags & TEST_IDLE) && ret == 0) { 1182 ret = igt_flush_test(ce->engine->i915); 1183 if (ret) 1184 return ret; 1185 1186 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1187 ret = __check_rpcs(name, rpcs, ret, expected, 1188 "Context", " after idle!"); 1189 } 1190 1191 return ret; 1192 } 1193 1194 static int 1195 __sseu_test(const char *name, 1196 unsigned int flags, 1197 struct intel_context *ce, 1198 struct drm_i915_gem_object *obj, 1199 struct intel_sseu sseu) 1200 { 1201 struct igt_spinner *spin = NULL; 1202 int ret; 1203 1204 intel_engine_pm_get(ce->engine); 1205 1206 ret = __sseu_prepare(name, flags, ce, &spin); 1207 if (ret) 1208 goto out_pm; 1209 1210 ret = intel_context_reconfigure_sseu(ce, sseu); 1211 if (ret) 1212 goto out_spin; 1213 1214 ret = __sseu_finish(name, flags, ce, obj, 1215 hweight32(sseu.slice_mask), spin); 1216 1217 out_spin: 1218 if (spin) { 1219 igt_spinner_end(spin); 1220 igt_spinner_fini(spin); 1221 kfree(spin); 1222 } 1223 out_pm: 1224 intel_engine_pm_put(ce->engine); 1225 return ret; 1226 } 1227 1228 static int 1229 __igt_ctx_sseu(struct drm_i915_private *i915, 1230 const char *name, 1231 unsigned int flags) 1232 { 1233 struct drm_i915_gem_object *obj; 1234 int inst = 0; 1235 int ret = 0; 1236 1237 if (GRAPHICS_VER(i915) < 9) 1238 return 0; 1239 1240 if (flags & TEST_RESET) 1241 igt_global_reset_lock(to_gt(i915)); 1242 1243 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1244 if (IS_ERR(obj)) { 1245 ret = PTR_ERR(obj); 1246 goto out_unlock; 1247 } 1248 1249 do { 1250 struct intel_engine_cs *engine; 1251 struct intel_context *ce; 1252 struct intel_sseu pg_sseu; 1253 1254 engine = intel_engine_lookup_user(i915, 1255 I915_ENGINE_CLASS_RENDER, 1256 inst++); 1257 if (!engine) 1258 break; 1259 1260 if (hweight32(engine->sseu.slice_mask) < 2) 1261 continue; 1262 1263 if (!engine->gt->info.sseu.has_slice_pg) 1264 continue; 1265 1266 /* 1267 * Gen11 VME friendly power-gated configuration with 1268 * half enabled sub-slices. 1269 */ 1270 pg_sseu = engine->sseu; 1271 pg_sseu.slice_mask = 1; 1272 pg_sseu.subslice_mask = 1273 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1274 1275 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1276 engine->name, name, flags, 1277 hweight32(engine->sseu.slice_mask), 1278 hweight32(pg_sseu.slice_mask)); 1279 1280 ce = intel_context_create(engine); 1281 if (IS_ERR(ce)) { 1282 ret = PTR_ERR(ce); 1283 goto out_put; 1284 } 1285 1286 ret = intel_context_pin(ce); 1287 if (ret) 1288 goto out_ce; 1289 1290 /* First set the default mask. */ 1291 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1292 if (ret) 1293 goto out_unpin; 1294 1295 /* Then set a power-gated configuration. */ 1296 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1297 if (ret) 1298 goto out_unpin; 1299 1300 /* Back to defaults. */ 1301 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1302 if (ret) 1303 goto out_unpin; 1304 1305 /* One last power-gated configuration for the road. */ 1306 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1307 if (ret) 1308 goto out_unpin; 1309 1310 out_unpin: 1311 intel_context_unpin(ce); 1312 out_ce: 1313 intel_context_put(ce); 1314 } while (!ret); 1315 1316 if (igt_flush_test(i915)) 1317 ret = -EIO; 1318 1319 out_put: 1320 i915_gem_object_put(obj); 1321 1322 out_unlock: 1323 if (flags & TEST_RESET) 1324 igt_global_reset_unlock(to_gt(i915)); 1325 1326 if (ret) 1327 pr_err("%s: Failed with %d!\n", name, ret); 1328 1329 return ret; 1330 } 1331 1332 static int igt_ctx_sseu(void *arg) 1333 { 1334 struct { 1335 const char *name; 1336 unsigned int flags; 1337 } *phase, phases[] = { 1338 { .name = "basic", .flags = 0 }, 1339 { .name = "idle", .flags = TEST_IDLE }, 1340 { .name = "busy", .flags = TEST_BUSY }, 1341 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1342 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1343 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1344 }; 1345 unsigned int i; 1346 int ret = 0; 1347 1348 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1349 i++, phase++) 1350 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1351 1352 return ret; 1353 } 1354 1355 static int igt_ctx_readonly(void *arg) 1356 { 1357 struct drm_i915_private *i915 = arg; 1358 unsigned long idx, ndwords, dw, num_engines; 1359 struct drm_i915_gem_object *obj = NULL; 1360 struct i915_request *tq[5] = {}; 1361 struct i915_gem_engines_iter it; 1362 struct i915_address_space *vm; 1363 struct i915_gem_context *ctx; 1364 struct intel_context *ce; 1365 struct igt_live_test t; 1366 I915_RND_STATE(prng); 1367 IGT_TIMEOUT(end_time); 1368 LIST_HEAD(objects); 1369 struct file *file; 1370 int err = -ENODEV; 1371 1372 /* 1373 * Create a few read-only objects (with the occasional writable object) 1374 * and try to write into these object checking that the GPU discards 1375 * any write to a read-only object. 1376 */ 1377 1378 file = mock_file(i915); 1379 if (IS_ERR(file)) 1380 return PTR_ERR(file); 1381 1382 err = igt_live_test_begin(&t, i915, __func__, ""); 1383 if (err) 1384 goto out_file; 1385 1386 ctx = live_context(i915, file); 1387 if (IS_ERR(ctx)) { 1388 err = PTR_ERR(ctx); 1389 goto out_file; 1390 } 1391 1392 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm; 1393 if (!vm || !vm->has_read_only) { 1394 err = 0; 1395 goto out_file; 1396 } 1397 1398 num_engines = 0; 1399 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1400 if (intel_engine_can_store_dword(ce->engine)) 1401 num_engines++; 1402 i915_gem_context_unlock_engines(ctx); 1403 1404 ndwords = 0; 1405 dw = 0; 1406 while (!time_after(jiffies, end_time)) { 1407 for_each_gem_engine(ce, 1408 i915_gem_context_lock_engines(ctx), it) { 1409 if (!intel_engine_can_store_dword(ce->engine)) 1410 continue; 1411 1412 if (!obj) { 1413 obj = create_test_object(ce->vm, file, &objects); 1414 if (IS_ERR(obj)) { 1415 err = PTR_ERR(obj); 1416 i915_gem_context_unlock_engines(ctx); 1417 goto out_file; 1418 } 1419 1420 if (prandom_u32_state(&prng) & 1) 1421 i915_gem_object_set_readonly(obj); 1422 } 1423 1424 err = gpu_fill(ce, obj, dw); 1425 if (err) { 1426 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1427 ndwords, dw, max_dwords(obj), 1428 ce->engine->name, 1429 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 1430 err); 1431 i915_gem_context_unlock_engines(ctx); 1432 goto out_file; 1433 } 1434 1435 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1436 if (err) { 1437 i915_gem_context_unlock_engines(ctx); 1438 goto out_file; 1439 } 1440 1441 if (++dw == max_dwords(obj)) { 1442 obj = NULL; 1443 dw = 0; 1444 } 1445 ndwords++; 1446 } 1447 i915_gem_context_unlock_engines(ctx); 1448 } 1449 pr_info("Submitted %lu dwords (across %lu engines)\n", 1450 ndwords, num_engines); 1451 1452 dw = 0; 1453 idx = 0; 1454 list_for_each_entry(obj, &objects, st_link) { 1455 unsigned int rem = 1456 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1457 unsigned int num_writes; 1458 1459 num_writes = rem; 1460 if (i915_gem_object_is_readonly(obj)) 1461 num_writes = 0; 1462 1463 err = cpu_check(obj, idx++, num_writes); 1464 if (err) 1465 break; 1466 1467 dw += rem; 1468 } 1469 1470 out_file: 1471 throttle_release(tq, ARRAY_SIZE(tq)); 1472 if (igt_live_test_end(&t)) 1473 err = -EIO; 1474 1475 fput(file); 1476 return err; 1477 } 1478 1479 static int check_scratch(struct i915_address_space *vm, u64 offset) 1480 { 1481 struct drm_mm_node *node; 1482 1483 mutex_lock(&vm->mutex); 1484 node = __drm_mm_interval_first(&vm->mm, 1485 offset, offset + sizeof(u32) - 1); 1486 mutex_unlock(&vm->mutex); 1487 if (!node || node->start > offset) 1488 return 0; 1489 1490 GEM_BUG_ON(offset >= node->start + node->size); 1491 1492 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1493 upper_32_bits(offset), lower_32_bits(offset)); 1494 return -EINVAL; 1495 } 1496 1497 static int write_to_scratch(struct i915_gem_context *ctx, 1498 struct intel_engine_cs *engine, 1499 struct drm_i915_gem_object *obj, 1500 u64 offset, u32 value) 1501 { 1502 struct drm_i915_private *i915 = ctx->i915; 1503 struct i915_address_space *vm; 1504 struct i915_request *rq; 1505 struct i915_vma *vma; 1506 u32 *cmd; 1507 int err; 1508 1509 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1510 1511 err = check_scratch(ctx->vm, offset); 1512 if (err) 1513 return err; 1514 1515 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1516 if (IS_ERR(cmd)) 1517 return PTR_ERR(cmd); 1518 1519 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1520 if (GRAPHICS_VER(i915) >= 8) { 1521 *cmd++ = lower_32_bits(offset); 1522 *cmd++ = upper_32_bits(offset); 1523 } else { 1524 *cmd++ = 0; 1525 *cmd++ = offset; 1526 } 1527 *cmd++ = value; 1528 *cmd = MI_BATCH_BUFFER_END; 1529 __i915_gem_object_flush_map(obj, 0, 64); 1530 i915_gem_object_unpin_map(obj); 1531 1532 intel_gt_chipset_flush(engine->gt); 1533 1534 vm = i915_gem_context_get_eb_vm(ctx); 1535 vma = i915_vma_instance(obj, vm, NULL); 1536 if (IS_ERR(vma)) { 1537 err = PTR_ERR(vma); 1538 goto out_vm; 1539 } 1540 1541 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1542 if (err) 1543 goto out_vm; 1544 1545 rq = igt_request_alloc(ctx, engine); 1546 if (IS_ERR(rq)) { 1547 err = PTR_ERR(rq); 1548 goto err_unpin; 1549 } 1550 1551 i915_vma_lock(vma); 1552 err = i915_vma_move_to_active(vma, rq, 0); 1553 i915_vma_unlock(vma); 1554 if (err) 1555 goto skip_request; 1556 1557 if (rq->engine->emit_init_breadcrumb) { 1558 err = rq->engine->emit_init_breadcrumb(rq); 1559 if (err) 1560 goto skip_request; 1561 } 1562 1563 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, 0); 1564 if (err) 1565 goto skip_request; 1566 1567 i915_vma_unpin(vma); 1568 1569 i915_request_add(rq); 1570 1571 goto out_vm; 1572 skip_request: 1573 i915_request_set_error_once(rq, err); 1574 i915_request_add(rq); 1575 err_unpin: 1576 i915_vma_unpin(vma); 1577 out_vm: 1578 i915_vm_put(vm); 1579 1580 if (!err) 1581 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1582 1583 return err; 1584 } 1585 1586 static int read_from_scratch(struct i915_gem_context *ctx, 1587 struct intel_engine_cs *engine, 1588 struct drm_i915_gem_object *obj, 1589 u64 offset, u32 *value) 1590 { 1591 struct drm_i915_private *i915 = ctx->i915; 1592 struct i915_address_space *vm; 1593 const u32 result = 0x100; 1594 struct i915_request *rq; 1595 struct i915_vma *vma; 1596 unsigned int flags; 1597 u32 *cmd; 1598 int err; 1599 1600 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1601 1602 err = check_scratch(ctx->vm, offset); 1603 if (err) 1604 return err; 1605 1606 if (GRAPHICS_VER(i915) >= 8) { 1607 const u32 GPR0 = engine->mmio_base + 0x600; 1608 1609 vm = i915_gem_context_get_eb_vm(ctx); 1610 vma = i915_vma_instance(obj, vm, NULL); 1611 if (IS_ERR(vma)) { 1612 err = PTR_ERR(vma); 1613 goto out_vm; 1614 } 1615 1616 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1617 if (err) 1618 goto out_vm; 1619 1620 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1621 if (IS_ERR(cmd)) { 1622 err = PTR_ERR(cmd); 1623 goto err_unpin; 1624 } 1625 1626 memset(cmd, POISON_INUSE, PAGE_SIZE); 1627 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1628 *cmd++ = GPR0; 1629 *cmd++ = lower_32_bits(offset); 1630 *cmd++ = upper_32_bits(offset); 1631 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1632 *cmd++ = GPR0; 1633 *cmd++ = result; 1634 *cmd++ = 0; 1635 *cmd = MI_BATCH_BUFFER_END; 1636 1637 i915_gem_object_flush_map(obj); 1638 i915_gem_object_unpin_map(obj); 1639 1640 flags = 0; 1641 } else { 1642 const u32 reg = engine->mmio_base + 0x420; 1643 1644 /* hsw: register access even to 3DPRIM! is protected */ 1645 vm = i915_vm_get(&engine->gt->ggtt->vm); 1646 vma = i915_vma_instance(obj, vm, NULL); 1647 if (IS_ERR(vma)) { 1648 err = PTR_ERR(vma); 1649 goto out_vm; 1650 } 1651 1652 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1653 if (err) 1654 goto out_vm; 1655 1656 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1657 if (IS_ERR(cmd)) { 1658 err = PTR_ERR(cmd); 1659 goto err_unpin; 1660 } 1661 1662 memset(cmd, POISON_INUSE, PAGE_SIZE); 1663 *cmd++ = MI_LOAD_REGISTER_MEM; 1664 *cmd++ = reg; 1665 *cmd++ = offset; 1666 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1667 *cmd++ = reg; 1668 *cmd++ = vma->node.start + result; 1669 *cmd = MI_BATCH_BUFFER_END; 1670 1671 i915_gem_object_flush_map(obj); 1672 i915_gem_object_unpin_map(obj); 1673 1674 flags = I915_DISPATCH_SECURE; 1675 } 1676 1677 intel_gt_chipset_flush(engine->gt); 1678 1679 rq = igt_request_alloc(ctx, engine); 1680 if (IS_ERR(rq)) { 1681 err = PTR_ERR(rq); 1682 goto err_unpin; 1683 } 1684 1685 i915_vma_lock(vma); 1686 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 1687 i915_vma_unlock(vma); 1688 if (err) 1689 goto skip_request; 1690 1691 if (rq->engine->emit_init_breadcrumb) { 1692 err = rq->engine->emit_init_breadcrumb(rq); 1693 if (err) 1694 goto skip_request; 1695 } 1696 1697 err = engine->emit_bb_start(rq, vma->node.start, vma->node.size, flags); 1698 if (err) 1699 goto skip_request; 1700 1701 i915_vma_unpin(vma); 1702 1703 i915_request_add(rq); 1704 1705 i915_gem_object_lock(obj, NULL); 1706 err = i915_gem_object_set_to_cpu_domain(obj, false); 1707 i915_gem_object_unlock(obj); 1708 if (err) 1709 goto out_vm; 1710 1711 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1712 if (IS_ERR(cmd)) { 1713 err = PTR_ERR(cmd); 1714 goto out_vm; 1715 } 1716 1717 *value = cmd[result / sizeof(*cmd)]; 1718 i915_gem_object_unpin_map(obj); 1719 1720 goto out_vm; 1721 skip_request: 1722 i915_request_set_error_once(rq, err); 1723 i915_request_add(rq); 1724 err_unpin: 1725 i915_vma_unpin(vma); 1726 out_vm: 1727 i915_vm_put(vm); 1728 1729 if (!err) 1730 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1731 1732 return err; 1733 } 1734 1735 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1736 { 1737 struct i915_address_space *vm; 1738 u32 *vaddr; 1739 int err = 0; 1740 1741 vm = ctx->vm; 1742 if (!vm) 1743 return -ENODEV; 1744 1745 if (!vm->scratch[0]) { 1746 pr_err("No scratch page!\n"); 1747 return -EINVAL; 1748 } 1749 1750 vaddr = __px_vaddr(vm->scratch[0]); 1751 1752 memcpy(out, vaddr, sizeof(*out)); 1753 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1754 pr_err("Inconsistent initial state of scratch page!\n"); 1755 err = -EINVAL; 1756 } 1757 1758 return err; 1759 } 1760 1761 static int igt_vm_isolation(void *arg) 1762 { 1763 struct drm_i915_private *i915 = arg; 1764 struct i915_gem_context *ctx_a, *ctx_b; 1765 struct drm_i915_gem_object *obj_a, *obj_b; 1766 unsigned long num_engines, count; 1767 struct intel_engine_cs *engine; 1768 struct igt_live_test t; 1769 I915_RND_STATE(prng); 1770 struct file *file; 1771 u64 vm_total; 1772 u32 expected; 1773 int err; 1774 1775 if (GRAPHICS_VER(i915) < 7) 1776 return 0; 1777 1778 /* 1779 * The simple goal here is that a write into one context is not 1780 * observed in a second (separate page tables and scratch). 1781 */ 1782 1783 file = mock_file(i915); 1784 if (IS_ERR(file)) 1785 return PTR_ERR(file); 1786 1787 err = igt_live_test_begin(&t, i915, __func__, ""); 1788 if (err) 1789 goto out_file; 1790 1791 ctx_a = live_context(i915, file); 1792 if (IS_ERR(ctx_a)) { 1793 err = PTR_ERR(ctx_a); 1794 goto out_file; 1795 } 1796 1797 ctx_b = live_context(i915, file); 1798 if (IS_ERR(ctx_b)) { 1799 err = PTR_ERR(ctx_b); 1800 goto out_file; 1801 } 1802 1803 /* We can only test vm isolation, if the vm are distinct */ 1804 if (ctx_a->vm == ctx_b->vm) 1805 goto out_file; 1806 1807 /* Read the initial state of the scratch page */ 1808 err = check_scratch_page(ctx_a, &expected); 1809 if (err) 1810 goto out_file; 1811 1812 err = check_scratch_page(ctx_b, &expected); 1813 if (err) 1814 goto out_file; 1815 1816 vm_total = ctx_a->vm->total; 1817 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1818 1819 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1820 if (IS_ERR(obj_a)) { 1821 err = PTR_ERR(obj_a); 1822 goto out_file; 1823 } 1824 1825 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1826 if (IS_ERR(obj_b)) { 1827 err = PTR_ERR(obj_b); 1828 goto put_a; 1829 } 1830 1831 count = 0; 1832 num_engines = 0; 1833 for_each_uabi_engine(engine, i915) { 1834 IGT_TIMEOUT(end_time); 1835 unsigned long this = 0; 1836 1837 if (!intel_engine_can_store_dword(engine)) 1838 continue; 1839 1840 /* Not all engines have their own GPR! */ 1841 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1842 continue; 1843 1844 while (!__igt_timeout(end_time, NULL)) { 1845 u32 value = 0xc5c5c5c5; 1846 u64 offset; 1847 1848 /* Leave enough space at offset 0 for the batch */ 1849 offset = igt_random_offset(&prng, 1850 I915_GTT_PAGE_SIZE, vm_total, 1851 sizeof(u32), alignof_dword); 1852 1853 err = write_to_scratch(ctx_a, engine, obj_a, 1854 offset, 0xdeadbeef); 1855 if (err == 0) 1856 err = read_from_scratch(ctx_b, engine, obj_b, 1857 offset, &value); 1858 if (err) 1859 goto put_b; 1860 1861 if (value != expected) { 1862 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1863 engine->name, value, 1864 upper_32_bits(offset), 1865 lower_32_bits(offset), 1866 this); 1867 err = -EINVAL; 1868 goto put_b; 1869 } 1870 1871 this++; 1872 } 1873 count += this; 1874 num_engines++; 1875 } 1876 pr_info("Checked %lu scratch offsets across %lu engines\n", 1877 count, num_engines); 1878 1879 put_b: 1880 i915_gem_object_put(obj_b); 1881 put_a: 1882 i915_gem_object_put(obj_a); 1883 out_file: 1884 if (igt_live_test_end(&t)) 1885 err = -EIO; 1886 fput(file); 1887 return err; 1888 } 1889 1890 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1891 { 1892 static const struct i915_subtest tests[] = { 1893 SUBTEST(live_nop_switch), 1894 SUBTEST(live_parallel_switch), 1895 SUBTEST(igt_ctx_exec), 1896 SUBTEST(igt_ctx_readonly), 1897 SUBTEST(igt_ctx_sseu), 1898 SUBTEST(igt_shared_ctx_exec), 1899 SUBTEST(igt_vm_isolation), 1900 }; 1901 1902 if (intel_gt_is_wedged(to_gt(i915))) 1903 return 0; 1904 1905 return i915_live_subtests(tests, i915); 1906 } 1907