1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 #include <linux/string_helpers.h> 9 10 #include "gem/i915_gem_internal.h" 11 #include "gem/i915_gem_pm.h" 12 #include "gt/intel_engine_pm.h" 13 #include "gt/intel_engine_regs.h" 14 #include "gt/intel_gt.h" 15 #include "gt/intel_gt_requests.h" 16 #include "gt/intel_reset.h" 17 #include "i915_selftest.h" 18 19 #include "gem/selftests/igt_gem_utils.h" 20 #include "selftests/i915_random.h" 21 #include "selftests/igt_flush_test.h" 22 #include "selftests/igt_live_test.h" 23 #include "selftests/igt_reset.h" 24 #include "selftests/igt_spinner.h" 25 #include "selftests/mock_drm.h" 26 #include "selftests/mock_gem_device.h" 27 28 #include "huge_gem_object.h" 29 #include "igt_gem_utils.h" 30 31 #define DW_PER_PAGE (PAGE_SIZE / sizeof(u32)) 32 33 static int live_nop_switch(void *arg) 34 { 35 const unsigned int nctx = 1024; 36 struct drm_i915_private *i915 = arg; 37 struct intel_engine_cs *engine; 38 struct i915_gem_context **ctx; 39 struct igt_live_test t; 40 struct file *file; 41 unsigned long n; 42 int err = -ENODEV; 43 44 /* 45 * Create as many contexts as we can feasibly get away with 46 * and check we can switch between them rapidly. 47 * 48 * Serves as very simple stress test for submission and HW switching 49 * between contexts. 50 */ 51 52 if (!DRIVER_CAPS(i915)->has_logical_contexts) 53 return 0; 54 55 file = mock_file(i915); 56 if (IS_ERR(file)) 57 return PTR_ERR(file); 58 59 ctx = kcalloc(nctx, sizeof(*ctx), GFP_KERNEL); 60 if (!ctx) { 61 err = -ENOMEM; 62 goto out_file; 63 } 64 65 for (n = 0; n < nctx; n++) { 66 ctx[n] = live_context(i915, file); 67 if (IS_ERR(ctx[n])) { 68 err = PTR_ERR(ctx[n]); 69 goto out_file; 70 } 71 } 72 73 for_each_uabi_engine(engine, i915) { 74 struct i915_request *rq = NULL; 75 unsigned long end_time, prime; 76 ktime_t times[2] = {}; 77 78 times[0] = ktime_get_raw(); 79 for (n = 0; n < nctx; n++) { 80 struct i915_request *this; 81 82 this = igt_request_alloc(ctx[n], engine); 83 if (IS_ERR(this)) { 84 err = PTR_ERR(this); 85 goto out_file; 86 } 87 if (rq) { 88 i915_request_await_dma_fence(this, &rq->fence); 89 i915_request_put(rq); 90 } 91 rq = i915_request_get(this); 92 i915_request_add(this); 93 } 94 if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 95 pr_err("Failed to populated %d contexts\n", nctx); 96 intel_gt_set_wedged(to_gt(i915)); 97 i915_request_put(rq); 98 err = -EIO; 99 goto out_file; 100 } 101 i915_request_put(rq); 102 103 times[1] = ktime_get_raw(); 104 105 pr_info("Populated %d contexts on %s in %lluns\n", 106 nctx, engine->name, ktime_to_ns(times[1] - times[0])); 107 108 err = igt_live_test_begin(&t, i915, __func__, engine->name); 109 if (err) 110 goto out_file; 111 112 end_time = jiffies + i915_selftest.timeout_jiffies; 113 for_each_prime_number_from(prime, 2, 8192) { 114 times[1] = ktime_get_raw(); 115 116 rq = NULL; 117 for (n = 0; n < prime; n++) { 118 struct i915_request *this; 119 120 this = igt_request_alloc(ctx[n % nctx], engine); 121 if (IS_ERR(this)) { 122 err = PTR_ERR(this); 123 goto out_file; 124 } 125 126 if (rq) { /* Force submission order */ 127 i915_request_await_dma_fence(this, &rq->fence); 128 i915_request_put(rq); 129 } 130 131 /* 132 * This space is left intentionally blank. 133 * 134 * We do not actually want to perform any 135 * action with this request, we just want 136 * to measure the latency in allocation 137 * and submission of our breadcrumbs - 138 * ensuring that the bare request is sufficient 139 * for the system to work (i.e. proper HEAD 140 * tracking of the rings, interrupt handling, 141 * etc). It also gives us the lowest bounds 142 * for latency. 143 */ 144 145 rq = i915_request_get(this); 146 i915_request_add(this); 147 } 148 GEM_BUG_ON(!rq); 149 if (i915_request_wait(rq, 0, HZ / 5) < 0) { 150 pr_err("Switching between %ld contexts timed out\n", 151 prime); 152 intel_gt_set_wedged(to_gt(i915)); 153 i915_request_put(rq); 154 break; 155 } 156 i915_request_put(rq); 157 158 times[1] = ktime_sub(ktime_get_raw(), times[1]); 159 if (prime == 2) 160 times[0] = times[1]; 161 162 if (__igt_timeout(end_time, NULL)) 163 break; 164 } 165 166 err = igt_live_test_end(&t); 167 if (err) 168 goto out_file; 169 170 pr_info("Switch latencies on %s: 1 = %lluns, %lu = %lluns\n", 171 engine->name, 172 ktime_to_ns(times[0]), 173 prime - 1, div64_u64(ktime_to_ns(times[1]), prime - 1)); 174 } 175 176 out_file: 177 fput(file); 178 return err; 179 } 180 181 struct parallel_switch { 182 struct kthread_worker *worker; 183 struct kthread_work work; 184 struct intel_context *ce[2]; 185 int result; 186 }; 187 188 static void __live_parallel_switch1(struct kthread_work *work) 189 { 190 struct parallel_switch *arg = 191 container_of(work, typeof(*arg), work); 192 IGT_TIMEOUT(end_time); 193 unsigned long count; 194 195 count = 0; 196 arg->result = 0; 197 do { 198 struct i915_request *rq = NULL; 199 int n; 200 201 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 202 struct i915_request *prev = rq; 203 204 rq = i915_request_create(arg->ce[n]); 205 if (IS_ERR(rq)) { 206 i915_request_put(prev); 207 arg->result = PTR_ERR(rq); 208 break; 209 } 210 211 i915_request_get(rq); 212 if (prev) { 213 arg->result = 214 i915_request_await_dma_fence(rq, 215 &prev->fence); 216 i915_request_put(prev); 217 } 218 219 i915_request_add(rq); 220 } 221 222 if (IS_ERR_OR_NULL(rq)) 223 break; 224 225 if (i915_request_wait(rq, 0, HZ) < 0) 226 arg->result = -ETIME; 227 228 i915_request_put(rq); 229 230 count++; 231 } while (!arg->result && !__igt_timeout(end_time, NULL)); 232 233 pr_info("%s: %lu switches (sync) <%d>\n", 234 arg->ce[0]->engine->name, count, arg->result); 235 } 236 237 static void __live_parallel_switchN(struct kthread_work *work) 238 { 239 struct parallel_switch *arg = 240 container_of(work, typeof(*arg), work); 241 struct i915_request *rq = NULL; 242 IGT_TIMEOUT(end_time); 243 unsigned long count; 244 int n; 245 246 count = 0; 247 arg->result = 0; 248 do { 249 for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { 250 struct i915_request *prev = rq; 251 252 rq = i915_request_create(arg->ce[n]); 253 if (IS_ERR(rq)) { 254 i915_request_put(prev); 255 arg->result = PTR_ERR(rq); 256 break; 257 } 258 259 i915_request_get(rq); 260 if (prev) { 261 arg->result = 262 i915_request_await_dma_fence(rq, 263 &prev->fence); 264 i915_request_put(prev); 265 } 266 267 i915_request_add(rq); 268 } 269 270 count++; 271 } while (!arg->result && !__igt_timeout(end_time, NULL)); 272 273 if (!IS_ERR_OR_NULL(rq)) 274 i915_request_put(rq); 275 276 pr_info("%s: %lu switches (many) <%d>\n", 277 arg->ce[0]->engine->name, count, arg->result); 278 } 279 280 static int live_parallel_switch(void *arg) 281 { 282 struct drm_i915_private *i915 = arg; 283 static void (* const func[])(struct kthread_work *) = { 284 __live_parallel_switch1, 285 __live_parallel_switchN, 286 NULL, 287 }; 288 struct parallel_switch *data = NULL; 289 struct i915_gem_engines *engines; 290 struct i915_gem_engines_iter it; 291 void (* const *fn)(struct kthread_work *); 292 struct i915_gem_context *ctx; 293 struct intel_context *ce; 294 struct file *file; 295 int n, m, count; 296 int err = 0; 297 298 /* 299 * Check we can process switches on all engines simultaneously. 300 */ 301 302 if (!DRIVER_CAPS(i915)->has_logical_contexts) 303 return 0; 304 305 file = mock_file(i915); 306 if (IS_ERR(file)) 307 return PTR_ERR(file); 308 309 ctx = live_context(i915, file); 310 if (IS_ERR(ctx)) { 311 err = PTR_ERR(ctx); 312 goto out_file; 313 } 314 315 engines = i915_gem_context_lock_engines(ctx); 316 count = engines->num_engines; 317 318 data = kcalloc(count, sizeof(*data), GFP_KERNEL); 319 if (!data) { 320 i915_gem_context_unlock_engines(ctx); 321 err = -ENOMEM; 322 goto out_file; 323 } 324 325 m = 0; /* Use the first context as our template for the engines */ 326 for_each_gem_engine(ce, engines, it) { 327 err = intel_context_pin(ce); 328 if (err) { 329 i915_gem_context_unlock_engines(ctx); 330 goto out; 331 } 332 data[m++].ce[0] = intel_context_get(ce); 333 } 334 i915_gem_context_unlock_engines(ctx); 335 336 /* Clone the same set of engines into the other contexts */ 337 for (n = 1; n < ARRAY_SIZE(data->ce); n++) { 338 ctx = live_context(i915, file); 339 if (IS_ERR(ctx)) { 340 err = PTR_ERR(ctx); 341 goto out; 342 } 343 344 for (m = 0; m < count; m++) { 345 if (!data[m].ce[0]) 346 continue; 347 348 ce = intel_context_create(data[m].ce[0]->engine); 349 if (IS_ERR(ce)) 350 goto out; 351 352 err = intel_context_pin(ce); 353 if (err) { 354 intel_context_put(ce); 355 goto out; 356 } 357 358 data[m].ce[n] = ce; 359 } 360 } 361 362 for (n = 0; n < count; n++) { 363 struct kthread_worker *worker; 364 365 if (!data[n].ce[0]) 366 continue; 367 368 worker = kthread_create_worker(0, "igt/parallel:%s", 369 data[n].ce[0]->engine->name); 370 if (IS_ERR(worker)) 371 goto out; 372 373 data[n].worker = worker; 374 } 375 376 for (fn = func; !err && *fn; fn++) { 377 struct igt_live_test t; 378 379 err = igt_live_test_begin(&t, i915, __func__, ""); 380 if (err) 381 break; 382 383 for (n = 0; n < count; n++) { 384 if (!data[n].ce[0]) 385 continue; 386 387 data[n].result = 0; 388 kthread_init_work(&data[n].work, *fn); 389 kthread_queue_work(data[n].worker, &data[n].work); 390 } 391 392 for (n = 0; n < count; n++) { 393 if (data[n].ce[0]) { 394 kthread_flush_work(&data[n].work); 395 if (data[n].result && !err) 396 err = data[n].result; 397 } 398 } 399 400 if (igt_live_test_end(&t)) 401 err = -EIO; 402 } 403 404 out: 405 for (n = 0; n < count; n++) { 406 for (m = 0; m < ARRAY_SIZE(data->ce); m++) { 407 if (!data[n].ce[m]) 408 continue; 409 410 intel_context_unpin(data[n].ce[m]); 411 intel_context_put(data[n].ce[m]); 412 } 413 414 if (data[n].worker) 415 kthread_destroy_worker(data[n].worker); 416 } 417 kfree(data); 418 out_file: 419 fput(file); 420 return err; 421 } 422 423 static unsigned long real_page_count(struct drm_i915_gem_object *obj) 424 { 425 return huge_gem_object_phys_size(obj) >> PAGE_SHIFT; 426 } 427 428 static unsigned long fake_page_count(struct drm_i915_gem_object *obj) 429 { 430 return huge_gem_object_dma_size(obj) >> PAGE_SHIFT; 431 } 432 433 static int gpu_fill(struct intel_context *ce, 434 struct drm_i915_gem_object *obj, 435 unsigned int dw) 436 { 437 struct i915_vma *vma; 438 int err; 439 440 GEM_BUG_ON(obj->base.size > ce->vm->total); 441 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 442 443 vma = i915_vma_instance(obj, ce->vm, NULL); 444 if (IS_ERR(vma)) 445 return PTR_ERR(vma); 446 447 err = i915_vma_pin(vma, 0, 0, PIN_HIGH | PIN_USER); 448 if (err) 449 return err; 450 451 /* 452 * Within the GTT the huge objects maps every page onto 453 * its 1024 real pages (using phys_pfn = dma_pfn % 1024). 454 * We set the nth dword within the page using the nth 455 * mapping via the GTT - this should exercise the GTT mapping 456 * whilst checking that each context provides a unique view 457 * into the object. 458 */ 459 err = igt_gpu_fill_dw(ce, vma, 460 (dw * real_page_count(obj)) << PAGE_SHIFT | 461 (dw * sizeof(u32)), 462 real_page_count(obj), 463 dw); 464 i915_vma_unpin(vma); 465 466 return err; 467 } 468 469 static int cpu_fill(struct drm_i915_gem_object *obj, u32 value) 470 { 471 const bool has_llc = HAS_LLC(to_i915(obj->base.dev)); 472 unsigned int need_flush; 473 unsigned long n, m; 474 int err; 475 476 i915_gem_object_lock(obj, NULL); 477 err = i915_gem_object_prepare_write(obj, &need_flush); 478 if (err) 479 goto out; 480 481 for (n = 0; n < real_page_count(obj); n++) { 482 u32 *map; 483 484 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 485 for (m = 0; m < DW_PER_PAGE; m++) 486 map[m] = value; 487 if (!has_llc) 488 drm_clflush_virt_range(map, PAGE_SIZE); 489 kunmap_atomic(map); 490 } 491 492 i915_gem_object_finish_access(obj); 493 obj->read_domains = I915_GEM_DOMAIN_GTT | I915_GEM_DOMAIN_CPU; 494 obj->write_domain = 0; 495 out: 496 i915_gem_object_unlock(obj); 497 return err; 498 } 499 500 static noinline int cpu_check(struct drm_i915_gem_object *obj, 501 unsigned int idx, unsigned int max) 502 { 503 unsigned int needs_flush; 504 unsigned long n; 505 int err; 506 507 i915_gem_object_lock(obj, NULL); 508 err = i915_gem_object_prepare_read(obj, &needs_flush); 509 if (err) 510 goto out_unlock; 511 512 for (n = 0; n < real_page_count(obj); n++) { 513 u32 *map, m; 514 515 map = kmap_atomic(i915_gem_object_get_page(obj, n)); 516 if (needs_flush & CLFLUSH_BEFORE) 517 drm_clflush_virt_range(map, PAGE_SIZE); 518 519 for (m = 0; m < max; m++) { 520 if (map[m] != m) { 521 pr_err("%pS: Invalid value at object %d page %ld/%ld, offset %d/%d: found %x expected %x\n", 522 __builtin_return_address(0), idx, 523 n, real_page_count(obj), m, max, 524 map[m], m); 525 err = -EINVAL; 526 goto out_unmap; 527 } 528 } 529 530 for (; m < DW_PER_PAGE; m++) { 531 if (map[m] != STACK_MAGIC) { 532 pr_err("%pS: Invalid value at object %d page %ld, offset %d: found %x expected %x (uninitialised)\n", 533 __builtin_return_address(0), idx, n, m, 534 map[m], STACK_MAGIC); 535 err = -EINVAL; 536 goto out_unmap; 537 } 538 } 539 540 out_unmap: 541 kunmap_atomic(map); 542 if (err) 543 break; 544 } 545 546 i915_gem_object_finish_access(obj); 547 out_unlock: 548 i915_gem_object_unlock(obj); 549 return err; 550 } 551 552 static int file_add_object(struct file *file, struct drm_i915_gem_object *obj) 553 { 554 int err; 555 556 GEM_BUG_ON(obj->base.handle_count); 557 558 /* tie the object to the drm_file for easy reaping */ 559 err = idr_alloc(&to_drm_file(file)->object_idr, 560 &obj->base, 1, 0, GFP_KERNEL); 561 if (err < 0) 562 return err; 563 564 i915_gem_object_get(obj); 565 obj->base.handle_count++; 566 return 0; 567 } 568 569 static struct drm_i915_gem_object * 570 create_test_object(struct i915_address_space *vm, 571 struct file *file, 572 struct list_head *objects) 573 { 574 struct drm_i915_gem_object *obj; 575 u64 size; 576 int err; 577 578 /* Keep in GEM's good graces */ 579 intel_gt_retire_requests(vm->gt); 580 581 size = min(vm->total / 2, 1024ull * DW_PER_PAGE * PAGE_SIZE); 582 size = round_down(size, DW_PER_PAGE * PAGE_SIZE); 583 584 obj = huge_gem_object(vm->i915, DW_PER_PAGE * PAGE_SIZE, size); 585 if (IS_ERR(obj)) 586 return obj; 587 588 err = file_add_object(file, obj); 589 i915_gem_object_put(obj); 590 if (err) 591 return ERR_PTR(err); 592 593 err = cpu_fill(obj, STACK_MAGIC); 594 if (err) { 595 pr_err("Failed to fill object with cpu, err=%d\n", 596 err); 597 return ERR_PTR(err); 598 } 599 600 list_add_tail(&obj->st_link, objects); 601 return obj; 602 } 603 604 static unsigned long max_dwords(struct drm_i915_gem_object *obj) 605 { 606 unsigned long npages = fake_page_count(obj); 607 608 GEM_BUG_ON(!IS_ALIGNED(npages, DW_PER_PAGE)); 609 return npages / DW_PER_PAGE; 610 } 611 612 static void throttle_release(struct i915_request **q, int count) 613 { 614 int i; 615 616 for (i = 0; i < count; i++) { 617 if (IS_ERR_OR_NULL(q[i])) 618 continue; 619 620 i915_request_put(fetch_and_zero(&q[i])); 621 } 622 } 623 624 static int throttle(struct intel_context *ce, 625 struct i915_request **q, int count) 626 { 627 int i; 628 629 if (!IS_ERR_OR_NULL(q[0])) { 630 if (i915_request_wait(q[0], 631 I915_WAIT_INTERRUPTIBLE, 632 MAX_SCHEDULE_TIMEOUT) < 0) 633 return -EINTR; 634 635 i915_request_put(q[0]); 636 } 637 638 for (i = 0; i < count - 1; i++) 639 q[i] = q[i + 1]; 640 641 q[i] = intel_context_create_request(ce); 642 if (IS_ERR(q[i])) 643 return PTR_ERR(q[i]); 644 645 i915_request_get(q[i]); 646 i915_request_add(q[i]); 647 648 return 0; 649 } 650 651 static int igt_ctx_exec(void *arg) 652 { 653 struct drm_i915_private *i915 = arg; 654 struct intel_engine_cs *engine; 655 int err = -ENODEV; 656 657 /* 658 * Create a few different contexts (with different mm) and write 659 * through each ctx/mm using the GPU making sure those writes end 660 * up in the expected pages of our obj. 661 */ 662 663 if (!DRIVER_CAPS(i915)->has_logical_contexts) 664 return 0; 665 666 for_each_uabi_engine(engine, i915) { 667 struct drm_i915_gem_object *obj = NULL; 668 unsigned long ncontexts, ndwords, dw; 669 struct i915_request *tq[5] = {}; 670 struct igt_live_test t; 671 IGT_TIMEOUT(end_time); 672 LIST_HEAD(objects); 673 struct file *file; 674 675 if (!intel_engine_can_store_dword(engine)) 676 continue; 677 678 if (!engine->context_size) 679 continue; /* No logical context support in HW */ 680 681 file = mock_file(i915); 682 if (IS_ERR(file)) 683 return PTR_ERR(file); 684 685 err = igt_live_test_begin(&t, i915, __func__, engine->name); 686 if (err) 687 goto out_file; 688 689 ncontexts = 0; 690 ndwords = 0; 691 dw = 0; 692 while (!time_after(jiffies, end_time)) { 693 struct i915_gem_context *ctx; 694 struct intel_context *ce; 695 696 ctx = kernel_context(i915, NULL); 697 if (IS_ERR(ctx)) { 698 err = PTR_ERR(ctx); 699 goto out_file; 700 } 701 702 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 703 GEM_BUG_ON(IS_ERR(ce)); 704 705 if (!obj) { 706 obj = create_test_object(ce->vm, file, &objects); 707 if (IS_ERR(obj)) { 708 err = PTR_ERR(obj); 709 intel_context_put(ce); 710 kernel_context_close(ctx); 711 goto out_file; 712 } 713 } 714 715 err = gpu_fill(ce, obj, dw); 716 if (err) { 717 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 718 ndwords, dw, max_dwords(obj), 719 engine->name, 720 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 721 err); 722 intel_context_put(ce); 723 kernel_context_close(ctx); 724 goto out_file; 725 } 726 727 err = throttle(ce, tq, ARRAY_SIZE(tq)); 728 if (err) { 729 intel_context_put(ce); 730 kernel_context_close(ctx); 731 goto out_file; 732 } 733 734 if (++dw == max_dwords(obj)) { 735 obj = NULL; 736 dw = 0; 737 } 738 739 ndwords++; 740 ncontexts++; 741 742 intel_context_put(ce); 743 kernel_context_close(ctx); 744 } 745 746 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 747 ncontexts, engine->name, ndwords); 748 749 ncontexts = dw = 0; 750 list_for_each_entry(obj, &objects, st_link) { 751 unsigned int rem = 752 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 753 754 err = cpu_check(obj, ncontexts++, rem); 755 if (err) 756 break; 757 758 dw += rem; 759 } 760 761 out_file: 762 throttle_release(tq, ARRAY_SIZE(tq)); 763 if (igt_live_test_end(&t)) 764 err = -EIO; 765 766 fput(file); 767 if (err) 768 return err; 769 770 i915_gem_drain_freed_objects(i915); 771 } 772 773 return 0; 774 } 775 776 static int igt_shared_ctx_exec(void *arg) 777 { 778 struct drm_i915_private *i915 = arg; 779 struct i915_request *tq[5] = {}; 780 struct i915_gem_context *parent; 781 struct intel_engine_cs *engine; 782 struct igt_live_test t; 783 struct file *file; 784 int err = 0; 785 786 /* 787 * Create a few different contexts with the same mm and write 788 * through each ctx using the GPU making sure those writes end 789 * up in the expected pages of our obj. 790 */ 791 if (!DRIVER_CAPS(i915)->has_logical_contexts) 792 return 0; 793 794 file = mock_file(i915); 795 if (IS_ERR(file)) 796 return PTR_ERR(file); 797 798 parent = live_context(i915, file); 799 if (IS_ERR(parent)) { 800 err = PTR_ERR(parent); 801 goto out_file; 802 } 803 804 if (!parent->vm) { /* not full-ppgtt; nothing to share */ 805 err = 0; 806 goto out_file; 807 } 808 809 err = igt_live_test_begin(&t, i915, __func__, ""); 810 if (err) 811 goto out_file; 812 813 for_each_uabi_engine(engine, i915) { 814 unsigned long ncontexts, ndwords, dw; 815 struct drm_i915_gem_object *obj = NULL; 816 IGT_TIMEOUT(end_time); 817 LIST_HEAD(objects); 818 819 if (!intel_engine_can_store_dword(engine)) 820 continue; 821 822 dw = 0; 823 ndwords = 0; 824 ncontexts = 0; 825 while (!time_after(jiffies, end_time)) { 826 struct i915_gem_context *ctx; 827 struct intel_context *ce; 828 829 ctx = kernel_context(i915, parent->vm); 830 if (IS_ERR(ctx)) { 831 err = PTR_ERR(ctx); 832 goto out_test; 833 } 834 835 ce = i915_gem_context_get_engine(ctx, engine->legacy_idx); 836 GEM_BUG_ON(IS_ERR(ce)); 837 838 if (!obj) { 839 obj = create_test_object(parent->vm, 840 file, &objects); 841 if (IS_ERR(obj)) { 842 err = PTR_ERR(obj); 843 intel_context_put(ce); 844 kernel_context_close(ctx); 845 goto out_test; 846 } 847 } 848 849 err = gpu_fill(ce, obj, dw); 850 if (err) { 851 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 852 ndwords, dw, max_dwords(obj), 853 engine->name, 854 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 855 err); 856 intel_context_put(ce); 857 kernel_context_close(ctx); 858 goto out_test; 859 } 860 861 err = throttle(ce, tq, ARRAY_SIZE(tq)); 862 if (err) { 863 intel_context_put(ce); 864 kernel_context_close(ctx); 865 goto out_test; 866 } 867 868 if (++dw == max_dwords(obj)) { 869 obj = NULL; 870 dw = 0; 871 } 872 873 ndwords++; 874 ncontexts++; 875 876 intel_context_put(ce); 877 kernel_context_close(ctx); 878 } 879 pr_info("Submitted %lu contexts to %s, filling %lu dwords\n", 880 ncontexts, engine->name, ndwords); 881 882 ncontexts = dw = 0; 883 list_for_each_entry(obj, &objects, st_link) { 884 unsigned int rem = 885 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 886 887 err = cpu_check(obj, ncontexts++, rem); 888 if (err) 889 goto out_test; 890 891 dw += rem; 892 } 893 894 i915_gem_drain_freed_objects(i915); 895 } 896 out_test: 897 throttle_release(tq, ARRAY_SIZE(tq)); 898 if (igt_live_test_end(&t)) 899 err = -EIO; 900 out_file: 901 fput(file); 902 return err; 903 } 904 905 static int rpcs_query_batch(struct drm_i915_gem_object *rpcs, 906 struct i915_vma *vma, 907 struct intel_engine_cs *engine) 908 { 909 u32 *cmd; 910 911 GEM_BUG_ON(GRAPHICS_VER(vma->vm->i915) < 8); 912 913 cmd = i915_gem_object_pin_map(rpcs, I915_MAP_WB); 914 if (IS_ERR(cmd)) 915 return PTR_ERR(cmd); 916 917 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 918 *cmd++ = i915_mmio_reg_offset(GEN8_R_PWR_CLK_STATE(engine->mmio_base)); 919 *cmd++ = lower_32_bits(i915_vma_offset(vma)); 920 *cmd++ = upper_32_bits(i915_vma_offset(vma)); 921 *cmd = MI_BATCH_BUFFER_END; 922 923 __i915_gem_object_flush_map(rpcs, 0, 64); 924 i915_gem_object_unpin_map(rpcs); 925 926 intel_gt_chipset_flush(vma->vm->gt); 927 928 return 0; 929 } 930 931 static int 932 emit_rpcs_query(struct drm_i915_gem_object *obj, 933 struct intel_context *ce, 934 struct i915_request **rq_out) 935 { 936 struct drm_i915_private *i915 = to_i915(obj->base.dev); 937 struct i915_request *rq; 938 struct i915_gem_ww_ctx ww; 939 struct i915_vma *batch; 940 struct i915_vma *vma; 941 struct drm_i915_gem_object *rpcs; 942 int err; 943 944 GEM_BUG_ON(!intel_engine_can_store_dword(ce->engine)); 945 946 if (GRAPHICS_VER(i915) < 8) 947 return -EINVAL; 948 949 vma = i915_vma_instance(obj, ce->vm, NULL); 950 if (IS_ERR(vma)) 951 return PTR_ERR(vma); 952 953 rpcs = i915_gem_object_create_internal(i915, PAGE_SIZE); 954 if (IS_ERR(rpcs)) 955 return PTR_ERR(rpcs); 956 957 batch = i915_vma_instance(rpcs, ce->vm, NULL); 958 if (IS_ERR(batch)) { 959 err = PTR_ERR(batch); 960 goto err_put; 961 } 962 963 i915_gem_ww_ctx_init(&ww, false); 964 retry: 965 err = i915_gem_object_lock(obj, &ww); 966 if (!err) 967 err = i915_gem_object_lock(rpcs, &ww); 968 if (!err) 969 err = i915_gem_object_set_to_gtt_domain(obj, false); 970 if (!err) 971 err = i915_vma_pin_ww(vma, &ww, 0, 0, PIN_USER); 972 if (err) 973 goto err_put; 974 975 err = i915_vma_pin_ww(batch, &ww, 0, 0, PIN_USER); 976 if (err) 977 goto err_vma; 978 979 err = rpcs_query_batch(rpcs, vma, ce->engine); 980 if (err) 981 goto err_batch; 982 983 rq = i915_request_create(ce); 984 if (IS_ERR(rq)) { 985 err = PTR_ERR(rq); 986 goto err_batch; 987 } 988 989 err = i915_vma_move_to_active(batch, rq, 0); 990 if (err) 991 goto skip_request; 992 993 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 994 if (err) 995 goto skip_request; 996 997 if (rq->engine->emit_init_breadcrumb) { 998 err = rq->engine->emit_init_breadcrumb(rq); 999 if (err) 1000 goto skip_request; 1001 } 1002 1003 err = rq->engine->emit_bb_start(rq, 1004 i915_vma_offset(batch), 1005 i915_vma_size(batch), 1006 0); 1007 if (err) 1008 goto skip_request; 1009 1010 *rq_out = i915_request_get(rq); 1011 1012 skip_request: 1013 if (err) 1014 i915_request_set_error_once(rq, err); 1015 i915_request_add(rq); 1016 err_batch: 1017 i915_vma_unpin(batch); 1018 err_vma: 1019 i915_vma_unpin(vma); 1020 err_put: 1021 if (err == -EDEADLK) { 1022 err = i915_gem_ww_ctx_backoff(&ww); 1023 if (!err) 1024 goto retry; 1025 } 1026 i915_gem_ww_ctx_fini(&ww); 1027 i915_gem_object_put(rpcs); 1028 return err; 1029 } 1030 1031 #define TEST_IDLE BIT(0) 1032 #define TEST_BUSY BIT(1) 1033 #define TEST_RESET BIT(2) 1034 1035 static int 1036 __sseu_prepare(const char *name, 1037 unsigned int flags, 1038 struct intel_context *ce, 1039 struct igt_spinner **spin) 1040 { 1041 struct i915_request *rq; 1042 int ret; 1043 1044 *spin = NULL; 1045 if (!(flags & (TEST_BUSY | TEST_RESET))) 1046 return 0; 1047 1048 *spin = kzalloc(sizeof(**spin), GFP_KERNEL); 1049 if (!*spin) 1050 return -ENOMEM; 1051 1052 ret = igt_spinner_init(*spin, ce->engine->gt); 1053 if (ret) 1054 goto err_free; 1055 1056 rq = igt_spinner_create_request(*spin, ce, MI_NOOP); 1057 if (IS_ERR(rq)) { 1058 ret = PTR_ERR(rq); 1059 goto err_fini; 1060 } 1061 1062 i915_request_add(rq); 1063 1064 if (!igt_wait_for_spinner(*spin, rq)) { 1065 pr_err("%s: Spinner failed to start!\n", name); 1066 ret = -ETIMEDOUT; 1067 goto err_end; 1068 } 1069 1070 return 0; 1071 1072 err_end: 1073 igt_spinner_end(*spin); 1074 err_fini: 1075 igt_spinner_fini(*spin); 1076 err_free: 1077 kfree(fetch_and_zero(spin)); 1078 return ret; 1079 } 1080 1081 static int 1082 __read_slice_count(struct intel_context *ce, 1083 struct drm_i915_gem_object *obj, 1084 struct igt_spinner *spin, 1085 u32 *rpcs) 1086 { 1087 struct i915_request *rq = NULL; 1088 u32 s_mask, s_shift; 1089 unsigned int cnt; 1090 u32 *buf, val; 1091 long ret; 1092 1093 ret = emit_rpcs_query(obj, ce, &rq); 1094 if (ret) 1095 return ret; 1096 1097 if (spin) 1098 igt_spinner_end(spin); 1099 1100 ret = i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1101 i915_request_put(rq); 1102 if (ret < 0) 1103 return ret; 1104 1105 buf = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1106 if (IS_ERR(buf)) { 1107 ret = PTR_ERR(buf); 1108 return ret; 1109 } 1110 1111 if (GRAPHICS_VER(ce->engine->i915) >= 11) { 1112 s_mask = GEN11_RPCS_S_CNT_MASK; 1113 s_shift = GEN11_RPCS_S_CNT_SHIFT; 1114 } else { 1115 s_mask = GEN8_RPCS_S_CNT_MASK; 1116 s_shift = GEN8_RPCS_S_CNT_SHIFT; 1117 } 1118 1119 val = *buf; 1120 cnt = (val & s_mask) >> s_shift; 1121 *rpcs = val; 1122 1123 i915_gem_object_unpin_map(obj); 1124 1125 return cnt; 1126 } 1127 1128 static int 1129 __check_rpcs(const char *name, u32 rpcs, int slices, unsigned int expected, 1130 const char *prefix, const char *suffix) 1131 { 1132 if (slices == expected) 1133 return 0; 1134 1135 if (slices < 0) { 1136 pr_err("%s: %s read slice count failed with %d%s\n", 1137 name, prefix, slices, suffix); 1138 return slices; 1139 } 1140 1141 pr_err("%s: %s slice count %d is not %u%s\n", 1142 name, prefix, slices, expected, suffix); 1143 1144 pr_info("RPCS=0x%x; %u%sx%u%s\n", 1145 rpcs, slices, 1146 (rpcs & GEN8_RPCS_S_CNT_ENABLE) ? "*" : "", 1147 (rpcs & GEN8_RPCS_SS_CNT_MASK) >> GEN8_RPCS_SS_CNT_SHIFT, 1148 (rpcs & GEN8_RPCS_SS_CNT_ENABLE) ? "*" : ""); 1149 1150 return -EINVAL; 1151 } 1152 1153 static int 1154 __sseu_finish(const char *name, 1155 unsigned int flags, 1156 struct intel_context *ce, 1157 struct drm_i915_gem_object *obj, 1158 unsigned int expected, 1159 struct igt_spinner *spin) 1160 { 1161 unsigned int slices = hweight32(ce->engine->sseu.slice_mask); 1162 u32 rpcs = 0; 1163 int ret = 0; 1164 1165 if (flags & TEST_RESET) { 1166 ret = intel_engine_reset(ce->engine, "sseu"); 1167 if (ret) 1168 goto out; 1169 } 1170 1171 ret = __read_slice_count(ce, obj, 1172 flags & TEST_RESET ? NULL : spin, &rpcs); 1173 ret = __check_rpcs(name, rpcs, ret, expected, "Context", "!"); 1174 if (ret) 1175 goto out; 1176 1177 ret = __read_slice_count(ce->engine->kernel_context, obj, NULL, &rpcs); 1178 ret = __check_rpcs(name, rpcs, ret, slices, "Kernel context", "!"); 1179 1180 out: 1181 if (spin) 1182 igt_spinner_end(spin); 1183 1184 if ((flags & TEST_IDLE) && ret == 0) { 1185 ret = igt_flush_test(ce->engine->i915); 1186 if (ret) 1187 return ret; 1188 1189 ret = __read_slice_count(ce, obj, NULL, &rpcs); 1190 ret = __check_rpcs(name, rpcs, ret, expected, 1191 "Context", " after idle!"); 1192 } 1193 1194 return ret; 1195 } 1196 1197 static int 1198 __sseu_test(const char *name, 1199 unsigned int flags, 1200 struct intel_context *ce, 1201 struct drm_i915_gem_object *obj, 1202 struct intel_sseu sseu) 1203 { 1204 struct igt_spinner *spin = NULL; 1205 int ret; 1206 1207 intel_engine_pm_get(ce->engine); 1208 1209 ret = __sseu_prepare(name, flags, ce, &spin); 1210 if (ret) 1211 goto out_pm; 1212 1213 ret = intel_context_reconfigure_sseu(ce, sseu); 1214 if (ret) 1215 goto out_spin; 1216 1217 ret = __sseu_finish(name, flags, ce, obj, 1218 hweight32(sseu.slice_mask), spin); 1219 1220 out_spin: 1221 if (spin) { 1222 igt_spinner_end(spin); 1223 igt_spinner_fini(spin); 1224 kfree(spin); 1225 } 1226 out_pm: 1227 intel_engine_pm_put(ce->engine); 1228 return ret; 1229 } 1230 1231 static int 1232 __igt_ctx_sseu(struct drm_i915_private *i915, 1233 const char *name, 1234 unsigned int flags) 1235 { 1236 struct drm_i915_gem_object *obj; 1237 int inst = 0; 1238 int ret = 0; 1239 1240 if (GRAPHICS_VER(i915) < 9) 1241 return 0; 1242 1243 if (flags & TEST_RESET) 1244 igt_global_reset_lock(to_gt(i915)); 1245 1246 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 1247 if (IS_ERR(obj)) { 1248 ret = PTR_ERR(obj); 1249 goto out_unlock; 1250 } 1251 1252 do { 1253 struct intel_engine_cs *engine; 1254 struct intel_context *ce; 1255 struct intel_sseu pg_sseu; 1256 1257 engine = intel_engine_lookup_user(i915, 1258 I915_ENGINE_CLASS_RENDER, 1259 inst++); 1260 if (!engine) 1261 break; 1262 1263 if (hweight32(engine->sseu.slice_mask) < 2) 1264 continue; 1265 1266 if (!engine->gt->info.sseu.has_slice_pg) 1267 continue; 1268 1269 /* 1270 * Gen11 VME friendly power-gated configuration with 1271 * half enabled sub-slices. 1272 */ 1273 pg_sseu = engine->sseu; 1274 pg_sseu.slice_mask = 1; 1275 pg_sseu.subslice_mask = 1276 ~(~0 << (hweight32(engine->sseu.subslice_mask) / 2)); 1277 1278 pr_info("%s: SSEU subtest '%s', flags=%x, def_slices=%u, pg_slices=%u\n", 1279 engine->name, name, flags, 1280 hweight32(engine->sseu.slice_mask), 1281 hweight32(pg_sseu.slice_mask)); 1282 1283 ce = intel_context_create(engine); 1284 if (IS_ERR(ce)) { 1285 ret = PTR_ERR(ce); 1286 goto out_put; 1287 } 1288 1289 ret = intel_context_pin(ce); 1290 if (ret) 1291 goto out_ce; 1292 1293 /* First set the default mask. */ 1294 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1295 if (ret) 1296 goto out_unpin; 1297 1298 /* Then set a power-gated configuration. */ 1299 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1300 if (ret) 1301 goto out_unpin; 1302 1303 /* Back to defaults. */ 1304 ret = __sseu_test(name, flags, ce, obj, engine->sseu); 1305 if (ret) 1306 goto out_unpin; 1307 1308 /* One last power-gated configuration for the road. */ 1309 ret = __sseu_test(name, flags, ce, obj, pg_sseu); 1310 if (ret) 1311 goto out_unpin; 1312 1313 out_unpin: 1314 intel_context_unpin(ce); 1315 out_ce: 1316 intel_context_put(ce); 1317 } while (!ret); 1318 1319 if (igt_flush_test(i915)) 1320 ret = -EIO; 1321 1322 out_put: 1323 i915_gem_object_put(obj); 1324 1325 out_unlock: 1326 if (flags & TEST_RESET) 1327 igt_global_reset_unlock(to_gt(i915)); 1328 1329 if (ret) 1330 pr_err("%s: Failed with %d!\n", name, ret); 1331 1332 return ret; 1333 } 1334 1335 static int igt_ctx_sseu(void *arg) 1336 { 1337 struct { 1338 const char *name; 1339 unsigned int flags; 1340 } *phase, phases[] = { 1341 { .name = "basic", .flags = 0 }, 1342 { .name = "idle", .flags = TEST_IDLE }, 1343 { .name = "busy", .flags = TEST_BUSY }, 1344 { .name = "busy-reset", .flags = TEST_BUSY | TEST_RESET }, 1345 { .name = "busy-idle", .flags = TEST_BUSY | TEST_IDLE }, 1346 { .name = "reset-idle", .flags = TEST_RESET | TEST_IDLE }, 1347 }; 1348 unsigned int i; 1349 int ret = 0; 1350 1351 for (i = 0, phase = phases; ret == 0 && i < ARRAY_SIZE(phases); 1352 i++, phase++) 1353 ret = __igt_ctx_sseu(arg, phase->name, phase->flags); 1354 1355 return ret; 1356 } 1357 1358 static int igt_ctx_readonly(void *arg) 1359 { 1360 struct drm_i915_private *i915 = arg; 1361 unsigned long idx, ndwords, dw, num_engines; 1362 struct drm_i915_gem_object *obj = NULL; 1363 struct i915_request *tq[5] = {}; 1364 struct i915_gem_engines_iter it; 1365 struct i915_address_space *vm; 1366 struct i915_gem_context *ctx; 1367 struct intel_context *ce; 1368 struct igt_live_test t; 1369 I915_RND_STATE(prng); 1370 IGT_TIMEOUT(end_time); 1371 LIST_HEAD(objects); 1372 struct file *file; 1373 int err = -ENODEV; 1374 1375 /* 1376 * Create a few read-only objects (with the occasional writable object) 1377 * and try to write into these object checking that the GPU discards 1378 * any write to a read-only object. 1379 */ 1380 1381 file = mock_file(i915); 1382 if (IS_ERR(file)) 1383 return PTR_ERR(file); 1384 1385 err = igt_live_test_begin(&t, i915, __func__, ""); 1386 if (err) 1387 goto out_file; 1388 1389 ctx = live_context(i915, file); 1390 if (IS_ERR(ctx)) { 1391 err = PTR_ERR(ctx); 1392 goto out_file; 1393 } 1394 1395 vm = ctx->vm ?: &to_gt(i915)->ggtt->alias->vm; 1396 if (!vm || !vm->has_read_only) { 1397 err = 0; 1398 goto out_file; 1399 } 1400 1401 num_engines = 0; 1402 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) 1403 if (intel_engine_can_store_dword(ce->engine)) 1404 num_engines++; 1405 i915_gem_context_unlock_engines(ctx); 1406 1407 ndwords = 0; 1408 dw = 0; 1409 while (!time_after(jiffies, end_time)) { 1410 for_each_gem_engine(ce, 1411 i915_gem_context_lock_engines(ctx), it) { 1412 if (!intel_engine_can_store_dword(ce->engine)) 1413 continue; 1414 1415 if (!obj) { 1416 obj = create_test_object(ce->vm, file, &objects); 1417 if (IS_ERR(obj)) { 1418 err = PTR_ERR(obj); 1419 i915_gem_context_unlock_engines(ctx); 1420 goto out_file; 1421 } 1422 1423 if (prandom_u32_state(&prng) & 1) 1424 i915_gem_object_set_readonly(obj); 1425 } 1426 1427 err = gpu_fill(ce, obj, dw); 1428 if (err) { 1429 pr_err("Failed to fill dword %lu [%lu/%lu] with gpu (%s) [full-ppgtt? %s], err=%d\n", 1430 ndwords, dw, max_dwords(obj), 1431 ce->engine->name, 1432 str_yes_no(i915_gem_context_has_full_ppgtt(ctx)), 1433 err); 1434 i915_gem_context_unlock_engines(ctx); 1435 goto out_file; 1436 } 1437 1438 err = throttle(ce, tq, ARRAY_SIZE(tq)); 1439 if (err) { 1440 i915_gem_context_unlock_engines(ctx); 1441 goto out_file; 1442 } 1443 1444 if (++dw == max_dwords(obj)) { 1445 obj = NULL; 1446 dw = 0; 1447 } 1448 ndwords++; 1449 } 1450 i915_gem_context_unlock_engines(ctx); 1451 } 1452 pr_info("Submitted %lu dwords (across %lu engines)\n", 1453 ndwords, num_engines); 1454 1455 dw = 0; 1456 idx = 0; 1457 list_for_each_entry(obj, &objects, st_link) { 1458 unsigned int rem = 1459 min_t(unsigned int, ndwords - dw, max_dwords(obj)); 1460 unsigned int num_writes; 1461 1462 num_writes = rem; 1463 if (i915_gem_object_is_readonly(obj)) 1464 num_writes = 0; 1465 1466 err = cpu_check(obj, idx++, num_writes); 1467 if (err) 1468 break; 1469 1470 dw += rem; 1471 } 1472 1473 out_file: 1474 throttle_release(tq, ARRAY_SIZE(tq)); 1475 if (igt_live_test_end(&t)) 1476 err = -EIO; 1477 1478 fput(file); 1479 return err; 1480 } 1481 1482 static int check_scratch(struct i915_address_space *vm, u64 offset) 1483 { 1484 struct drm_mm_node *node; 1485 1486 mutex_lock(&vm->mutex); 1487 node = __drm_mm_interval_first(&vm->mm, 1488 offset, offset + sizeof(u32) - 1); 1489 mutex_unlock(&vm->mutex); 1490 if (!node || node->start > offset) 1491 return 0; 1492 1493 GEM_BUG_ON(offset >= node->start + node->size); 1494 1495 pr_err("Target offset 0x%08x_%08x overlaps with a node in the mm!\n", 1496 upper_32_bits(offset), lower_32_bits(offset)); 1497 return -EINVAL; 1498 } 1499 1500 static int write_to_scratch(struct i915_gem_context *ctx, 1501 struct intel_engine_cs *engine, 1502 struct drm_i915_gem_object *obj, 1503 u64 offset, u32 value) 1504 { 1505 struct drm_i915_private *i915 = ctx->i915; 1506 struct i915_address_space *vm; 1507 struct i915_request *rq; 1508 struct i915_vma *vma; 1509 u32 *cmd; 1510 int err; 1511 1512 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1513 1514 err = check_scratch(ctx->vm, offset); 1515 if (err) 1516 return err; 1517 1518 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1519 if (IS_ERR(cmd)) 1520 return PTR_ERR(cmd); 1521 1522 *cmd++ = MI_STORE_DWORD_IMM_GEN4; 1523 if (GRAPHICS_VER(i915) >= 8) { 1524 *cmd++ = lower_32_bits(offset); 1525 *cmd++ = upper_32_bits(offset); 1526 } else { 1527 *cmd++ = 0; 1528 *cmd++ = offset; 1529 } 1530 *cmd++ = value; 1531 *cmd = MI_BATCH_BUFFER_END; 1532 __i915_gem_object_flush_map(obj, 0, 64); 1533 i915_gem_object_unpin_map(obj); 1534 1535 intel_gt_chipset_flush(engine->gt); 1536 1537 vm = i915_gem_context_get_eb_vm(ctx); 1538 vma = i915_vma_instance(obj, vm, NULL); 1539 if (IS_ERR(vma)) { 1540 err = PTR_ERR(vma); 1541 goto out_vm; 1542 } 1543 1544 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1545 if (err) 1546 goto out_vm; 1547 1548 rq = igt_request_alloc(ctx, engine); 1549 if (IS_ERR(rq)) { 1550 err = PTR_ERR(rq); 1551 goto err_unpin; 1552 } 1553 1554 err = igt_vma_move_to_active_unlocked(vma, rq, 0); 1555 if (err) 1556 goto skip_request; 1557 1558 if (rq->engine->emit_init_breadcrumb) { 1559 err = rq->engine->emit_init_breadcrumb(rq); 1560 if (err) 1561 goto skip_request; 1562 } 1563 1564 err = engine->emit_bb_start(rq, i915_vma_offset(vma), 1565 i915_vma_size(vma), 0); 1566 if (err) 1567 goto skip_request; 1568 1569 i915_vma_unpin(vma); 1570 1571 i915_request_add(rq); 1572 1573 goto out_vm; 1574 skip_request: 1575 i915_request_set_error_once(rq, err); 1576 i915_request_add(rq); 1577 err_unpin: 1578 i915_vma_unpin(vma); 1579 out_vm: 1580 i915_vm_put(vm); 1581 1582 if (!err) 1583 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1584 1585 return err; 1586 } 1587 1588 static int read_from_scratch(struct i915_gem_context *ctx, 1589 struct intel_engine_cs *engine, 1590 struct drm_i915_gem_object *obj, 1591 u64 offset, u32 *value) 1592 { 1593 struct drm_i915_private *i915 = ctx->i915; 1594 struct i915_address_space *vm; 1595 const u32 result = 0x100; 1596 struct i915_request *rq; 1597 struct i915_vma *vma; 1598 unsigned int flags; 1599 u32 *cmd; 1600 int err; 1601 1602 GEM_BUG_ON(offset < I915_GTT_PAGE_SIZE); 1603 1604 err = check_scratch(ctx->vm, offset); 1605 if (err) 1606 return err; 1607 1608 if (GRAPHICS_VER(i915) >= 8) { 1609 const u32 GPR0 = engine->mmio_base + 0x600; 1610 1611 vm = i915_gem_context_get_eb_vm(ctx); 1612 vma = i915_vma_instance(obj, vm, NULL); 1613 if (IS_ERR(vma)) { 1614 err = PTR_ERR(vma); 1615 goto out_vm; 1616 } 1617 1618 err = i915_vma_pin(vma, 0, 0, PIN_USER | PIN_OFFSET_FIXED); 1619 if (err) 1620 goto out_vm; 1621 1622 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1623 if (IS_ERR(cmd)) { 1624 err = PTR_ERR(cmd); 1625 goto err_unpin; 1626 } 1627 1628 memset(cmd, POISON_INUSE, PAGE_SIZE); 1629 *cmd++ = MI_LOAD_REGISTER_MEM_GEN8; 1630 *cmd++ = GPR0; 1631 *cmd++ = lower_32_bits(offset); 1632 *cmd++ = upper_32_bits(offset); 1633 *cmd++ = MI_STORE_REGISTER_MEM_GEN8; 1634 *cmd++ = GPR0; 1635 *cmd++ = result; 1636 *cmd++ = 0; 1637 *cmd = MI_BATCH_BUFFER_END; 1638 1639 i915_gem_object_flush_map(obj); 1640 i915_gem_object_unpin_map(obj); 1641 1642 flags = 0; 1643 } else { 1644 const u32 reg = engine->mmio_base + 0x420; 1645 1646 /* hsw: register access even to 3DPRIM! is protected */ 1647 vm = i915_vm_get(&engine->gt->ggtt->vm); 1648 vma = i915_vma_instance(obj, vm, NULL); 1649 if (IS_ERR(vma)) { 1650 err = PTR_ERR(vma); 1651 goto out_vm; 1652 } 1653 1654 err = i915_vma_pin(vma, 0, 0, PIN_GLOBAL); 1655 if (err) 1656 goto out_vm; 1657 1658 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1659 if (IS_ERR(cmd)) { 1660 err = PTR_ERR(cmd); 1661 goto err_unpin; 1662 } 1663 1664 memset(cmd, POISON_INUSE, PAGE_SIZE); 1665 *cmd++ = MI_LOAD_REGISTER_MEM; 1666 *cmd++ = reg; 1667 *cmd++ = offset; 1668 *cmd++ = MI_STORE_REGISTER_MEM | MI_USE_GGTT; 1669 *cmd++ = reg; 1670 *cmd++ = i915_vma_offset(vma) + result; 1671 *cmd = MI_BATCH_BUFFER_END; 1672 1673 i915_gem_object_flush_map(obj); 1674 i915_gem_object_unpin_map(obj); 1675 1676 flags = I915_DISPATCH_SECURE; 1677 } 1678 1679 intel_gt_chipset_flush(engine->gt); 1680 1681 rq = igt_request_alloc(ctx, engine); 1682 if (IS_ERR(rq)) { 1683 err = PTR_ERR(rq); 1684 goto err_unpin; 1685 } 1686 1687 err = igt_vma_move_to_active_unlocked(vma, rq, EXEC_OBJECT_WRITE); 1688 if (err) 1689 goto skip_request; 1690 1691 if (rq->engine->emit_init_breadcrumb) { 1692 err = rq->engine->emit_init_breadcrumb(rq); 1693 if (err) 1694 goto skip_request; 1695 } 1696 1697 err = engine->emit_bb_start(rq, i915_vma_offset(vma), 1698 i915_vma_size(vma), flags); 1699 if (err) 1700 goto skip_request; 1701 1702 i915_vma_unpin(vma); 1703 1704 i915_request_add(rq); 1705 1706 i915_gem_object_lock(obj, NULL); 1707 err = i915_gem_object_set_to_cpu_domain(obj, false); 1708 i915_gem_object_unlock(obj); 1709 if (err) 1710 goto out_vm; 1711 1712 cmd = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB); 1713 if (IS_ERR(cmd)) { 1714 err = PTR_ERR(cmd); 1715 goto out_vm; 1716 } 1717 1718 *value = cmd[result / sizeof(*cmd)]; 1719 i915_gem_object_unpin_map(obj); 1720 1721 goto out_vm; 1722 skip_request: 1723 i915_request_set_error_once(rq, err); 1724 i915_request_add(rq); 1725 err_unpin: 1726 i915_vma_unpin(vma); 1727 out_vm: 1728 i915_vm_put(vm); 1729 1730 if (!err) 1731 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 1732 1733 return err; 1734 } 1735 1736 static int check_scratch_page(struct i915_gem_context *ctx, u32 *out) 1737 { 1738 struct i915_address_space *vm; 1739 u32 *vaddr; 1740 int err = 0; 1741 1742 vm = ctx->vm; 1743 if (!vm) 1744 return -ENODEV; 1745 1746 if (!vm->scratch[0]) { 1747 pr_err("No scratch page!\n"); 1748 return -EINVAL; 1749 } 1750 1751 vaddr = __px_vaddr(vm->scratch[0]); 1752 1753 memcpy(out, vaddr, sizeof(*out)); 1754 if (memchr_inv(vaddr, *out, PAGE_SIZE)) { 1755 pr_err("Inconsistent initial state of scratch page!\n"); 1756 err = -EINVAL; 1757 } 1758 1759 return err; 1760 } 1761 1762 static int igt_vm_isolation(void *arg) 1763 { 1764 struct drm_i915_private *i915 = arg; 1765 struct i915_gem_context *ctx_a, *ctx_b; 1766 struct drm_i915_gem_object *obj_a, *obj_b; 1767 unsigned long num_engines, count; 1768 struct intel_engine_cs *engine; 1769 struct igt_live_test t; 1770 I915_RND_STATE(prng); 1771 struct file *file; 1772 u64 vm_total; 1773 u32 expected; 1774 int err; 1775 1776 if (GRAPHICS_VER(i915) < 7) 1777 return 0; 1778 1779 /* 1780 * The simple goal here is that a write into one context is not 1781 * observed in a second (separate page tables and scratch). 1782 */ 1783 1784 file = mock_file(i915); 1785 if (IS_ERR(file)) 1786 return PTR_ERR(file); 1787 1788 err = igt_live_test_begin(&t, i915, __func__, ""); 1789 if (err) 1790 goto out_file; 1791 1792 ctx_a = live_context(i915, file); 1793 if (IS_ERR(ctx_a)) { 1794 err = PTR_ERR(ctx_a); 1795 goto out_file; 1796 } 1797 1798 ctx_b = live_context(i915, file); 1799 if (IS_ERR(ctx_b)) { 1800 err = PTR_ERR(ctx_b); 1801 goto out_file; 1802 } 1803 1804 /* We can only test vm isolation, if the vm are distinct */ 1805 if (ctx_a->vm == ctx_b->vm) 1806 goto out_file; 1807 1808 /* Read the initial state of the scratch page */ 1809 err = check_scratch_page(ctx_a, &expected); 1810 if (err) 1811 goto out_file; 1812 1813 err = check_scratch_page(ctx_b, &expected); 1814 if (err) 1815 goto out_file; 1816 1817 vm_total = ctx_a->vm->total; 1818 GEM_BUG_ON(ctx_b->vm->total != vm_total); 1819 1820 obj_a = i915_gem_object_create_internal(i915, PAGE_SIZE); 1821 if (IS_ERR(obj_a)) { 1822 err = PTR_ERR(obj_a); 1823 goto out_file; 1824 } 1825 1826 obj_b = i915_gem_object_create_internal(i915, PAGE_SIZE); 1827 if (IS_ERR(obj_b)) { 1828 err = PTR_ERR(obj_b); 1829 goto put_a; 1830 } 1831 1832 count = 0; 1833 num_engines = 0; 1834 for_each_uabi_engine(engine, i915) { 1835 IGT_TIMEOUT(end_time); 1836 unsigned long this = 0; 1837 1838 if (!intel_engine_can_store_dword(engine)) 1839 continue; 1840 1841 /* Not all engines have their own GPR! */ 1842 if (GRAPHICS_VER(i915) < 8 && engine->class != RENDER_CLASS) 1843 continue; 1844 1845 while (!__igt_timeout(end_time, NULL)) { 1846 u32 value = 0xc5c5c5c5; 1847 u64 offset; 1848 1849 /* Leave enough space at offset 0 for the batch */ 1850 offset = igt_random_offset(&prng, 1851 I915_GTT_PAGE_SIZE, vm_total, 1852 sizeof(u32), alignof_dword); 1853 1854 err = write_to_scratch(ctx_a, engine, obj_a, 1855 offset, 0xdeadbeef); 1856 if (err == 0) 1857 err = read_from_scratch(ctx_b, engine, obj_b, 1858 offset, &value); 1859 if (err) 1860 goto put_b; 1861 1862 if (value != expected) { 1863 pr_err("%s: Read %08x from scratch (offset 0x%08x_%08x), after %lu reads!\n", 1864 engine->name, value, 1865 upper_32_bits(offset), 1866 lower_32_bits(offset), 1867 this); 1868 err = -EINVAL; 1869 goto put_b; 1870 } 1871 1872 this++; 1873 } 1874 count += this; 1875 num_engines++; 1876 } 1877 pr_info("Checked %lu scratch offsets across %lu engines\n", 1878 count, num_engines); 1879 1880 put_b: 1881 i915_gem_object_put(obj_b); 1882 put_a: 1883 i915_gem_object_put(obj_a); 1884 out_file: 1885 if (igt_live_test_end(&t)) 1886 err = -EIO; 1887 fput(file); 1888 return err; 1889 } 1890 1891 int i915_gem_context_live_selftests(struct drm_i915_private *i915) 1892 { 1893 static const struct i915_subtest tests[] = { 1894 SUBTEST(live_nop_switch), 1895 SUBTEST(live_parallel_switch), 1896 SUBTEST(igt_ctx_exec), 1897 SUBTEST(igt_ctx_readonly), 1898 SUBTEST(igt_ctx_sseu), 1899 SUBTEST(igt_shared_ctx_exec), 1900 SUBTEST(igt_vm_isolation), 1901 }; 1902 1903 if (intel_gt_is_wedged(to_gt(i915))) 1904 return 0; 1905 1906 return i915_live_subtests(tests, i915); 1907 } 1908