1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/prime_numbers.h> 7 #include <linux/sort.h> 8 9 #include "../i915_selftest.h" 10 11 #include "mock_drm.h" 12 #include "mock_gem_device.h" 13 #include "mock_region.h" 14 15 #include "gem/i915_gem_context.h" 16 #include "gem/i915_gem_lmem.h" 17 #include "gem/i915_gem_region.h" 18 #include "gem/i915_gem_object_blt.h" 19 #include "gem/selftests/igt_gem_utils.h" 20 #include "gem/selftests/mock_context.h" 21 #include "gt/intel_engine_user.h" 22 #include "gt/intel_gt.h" 23 #include "i915_memcpy.h" 24 #include "selftests/igt_flush_test.h" 25 #include "selftests/i915_random.h" 26 27 static void close_objects(struct intel_memory_region *mem, 28 struct list_head *objects) 29 { 30 struct drm_i915_private *i915 = mem->i915; 31 struct drm_i915_gem_object *obj, *on; 32 33 list_for_each_entry_safe(obj, on, objects, st_link) { 34 if (i915_gem_object_has_pinned_pages(obj)) 35 i915_gem_object_unpin_pages(obj); 36 /* No polluting the memory region between tests */ 37 __i915_gem_object_put_pages(obj); 38 list_del(&obj->st_link); 39 i915_gem_object_put(obj); 40 } 41 42 cond_resched(); 43 44 i915_gem_drain_freed_objects(i915); 45 } 46 47 static int igt_mock_fill(void *arg) 48 { 49 struct intel_memory_region *mem = arg; 50 resource_size_t total = resource_size(&mem->region); 51 resource_size_t page_size; 52 resource_size_t rem; 53 unsigned long max_pages; 54 unsigned long page_num; 55 LIST_HEAD(objects); 56 int err = 0; 57 58 page_size = mem->mm.chunk_size; 59 max_pages = div64_u64(total, page_size); 60 rem = total; 61 62 for_each_prime_number_from(page_num, 1, max_pages) { 63 resource_size_t size = page_num * page_size; 64 struct drm_i915_gem_object *obj; 65 66 obj = i915_gem_object_create_region(mem, size, 0); 67 if (IS_ERR(obj)) { 68 err = PTR_ERR(obj); 69 break; 70 } 71 72 err = i915_gem_object_pin_pages(obj); 73 if (err) { 74 i915_gem_object_put(obj); 75 break; 76 } 77 78 list_add(&obj->st_link, &objects); 79 rem -= size; 80 } 81 82 if (err == -ENOMEM) 83 err = 0; 84 if (err == -ENXIO) { 85 if (page_num * page_size <= rem) { 86 pr_err("%s failed, space still left in region\n", 87 __func__); 88 err = -EINVAL; 89 } else { 90 err = 0; 91 } 92 } 93 94 close_objects(mem, &objects); 95 96 return err; 97 } 98 99 static struct drm_i915_gem_object * 100 igt_object_create(struct intel_memory_region *mem, 101 struct list_head *objects, 102 u64 size, 103 unsigned int flags) 104 { 105 struct drm_i915_gem_object *obj; 106 int err; 107 108 obj = i915_gem_object_create_region(mem, size, flags); 109 if (IS_ERR(obj)) 110 return obj; 111 112 err = i915_gem_object_pin_pages(obj); 113 if (err) 114 goto put; 115 116 list_add(&obj->st_link, objects); 117 return obj; 118 119 put: 120 i915_gem_object_put(obj); 121 return ERR_PTR(err); 122 } 123 124 static void igt_object_release(struct drm_i915_gem_object *obj) 125 { 126 i915_gem_object_unpin_pages(obj); 127 __i915_gem_object_put_pages(obj); 128 list_del(&obj->st_link); 129 i915_gem_object_put(obj); 130 } 131 132 static int igt_mock_contiguous(void *arg) 133 { 134 struct intel_memory_region *mem = arg; 135 struct drm_i915_gem_object *obj; 136 unsigned long n_objects; 137 LIST_HEAD(objects); 138 LIST_HEAD(holes); 139 I915_RND_STATE(prng); 140 resource_size_t total; 141 resource_size_t min; 142 u64 target; 143 int err = 0; 144 145 total = resource_size(&mem->region); 146 147 /* Min size */ 148 obj = igt_object_create(mem, &objects, mem->mm.chunk_size, 149 I915_BO_ALLOC_CONTIGUOUS); 150 if (IS_ERR(obj)) 151 return PTR_ERR(obj); 152 153 if (obj->mm.pages->nents != 1) { 154 pr_err("%s min object spans multiple sg entries\n", __func__); 155 err = -EINVAL; 156 goto err_close_objects; 157 } 158 159 igt_object_release(obj); 160 161 /* Max size */ 162 obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS); 163 if (IS_ERR(obj)) 164 return PTR_ERR(obj); 165 166 if (obj->mm.pages->nents != 1) { 167 pr_err("%s max object spans multiple sg entries\n", __func__); 168 err = -EINVAL; 169 goto err_close_objects; 170 } 171 172 igt_object_release(obj); 173 174 /* Internal fragmentation should not bleed into the object size */ 175 target = i915_prandom_u64_state(&prng); 176 div64_u64_rem(target, total, &target); 177 target = round_up(target, PAGE_SIZE); 178 target = max_t(u64, PAGE_SIZE, target); 179 180 obj = igt_object_create(mem, &objects, target, 181 I915_BO_ALLOC_CONTIGUOUS); 182 if (IS_ERR(obj)) 183 return PTR_ERR(obj); 184 185 if (obj->base.size != target) { 186 pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__, 187 obj->base.size, target); 188 err = -EINVAL; 189 goto err_close_objects; 190 } 191 192 if (obj->mm.pages->nents != 1) { 193 pr_err("%s object spans multiple sg entries\n", __func__); 194 err = -EINVAL; 195 goto err_close_objects; 196 } 197 198 igt_object_release(obj); 199 200 /* 201 * Try to fragment the address space, such that half of it is free, but 202 * the max contiguous block size is SZ_64K. 203 */ 204 205 target = SZ_64K; 206 n_objects = div64_u64(total, target); 207 208 while (n_objects--) { 209 struct list_head *list; 210 211 if (n_objects % 2) 212 list = &holes; 213 else 214 list = &objects; 215 216 obj = igt_object_create(mem, list, target, 217 I915_BO_ALLOC_CONTIGUOUS); 218 if (IS_ERR(obj)) { 219 err = PTR_ERR(obj); 220 goto err_close_objects; 221 } 222 } 223 224 close_objects(mem, &holes); 225 226 min = target; 227 target = total >> 1; 228 229 /* Make sure we can still allocate all the fragmented space */ 230 obj = igt_object_create(mem, &objects, target, 0); 231 if (IS_ERR(obj)) { 232 err = PTR_ERR(obj); 233 goto err_close_objects; 234 } 235 236 igt_object_release(obj); 237 238 /* 239 * Even though we have enough free space, we don't have a big enough 240 * contiguous block. Make sure that holds true. 241 */ 242 243 do { 244 bool should_fail = target > min; 245 246 obj = igt_object_create(mem, &objects, target, 247 I915_BO_ALLOC_CONTIGUOUS); 248 if (should_fail != IS_ERR(obj)) { 249 pr_err("%s target allocation(%llx) mismatch\n", 250 __func__, target); 251 err = -EINVAL; 252 goto err_close_objects; 253 } 254 255 target >>= 1; 256 } while (target >= mem->mm.chunk_size); 257 258 err_close_objects: 259 list_splice_tail(&holes, &objects); 260 close_objects(mem, &objects); 261 return err; 262 } 263 264 static int igt_gpu_write_dw(struct intel_context *ce, 265 struct i915_vma *vma, 266 u32 dword, 267 u32 value) 268 { 269 return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32), 270 vma->size >> PAGE_SHIFT, value); 271 } 272 273 static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) 274 { 275 unsigned long n = obj->base.size >> PAGE_SHIFT; 276 u32 *ptr; 277 int err; 278 279 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 280 if (err) 281 return err; 282 283 ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); 284 if (IS_ERR(ptr)) 285 return PTR_ERR(ptr); 286 287 ptr += dword; 288 while (n--) { 289 if (*ptr != val) { 290 pr_err("base[%u]=%08x, val=%08x\n", 291 dword, *ptr, val); 292 err = -EINVAL; 293 break; 294 } 295 296 ptr += PAGE_SIZE / sizeof(*ptr); 297 } 298 299 i915_gem_object_unpin_map(obj); 300 return err; 301 } 302 303 static int igt_gpu_write(struct i915_gem_context *ctx, 304 struct drm_i915_gem_object *obj) 305 { 306 struct i915_gem_engines *engines; 307 struct i915_gem_engines_iter it; 308 struct i915_address_space *vm; 309 struct intel_context *ce; 310 I915_RND_STATE(prng); 311 IGT_TIMEOUT(end_time); 312 unsigned int count; 313 struct i915_vma *vma; 314 int *order; 315 int i, n; 316 int err = 0; 317 318 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 319 320 n = 0; 321 count = 0; 322 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 323 count++; 324 if (!intel_engine_can_store_dword(ce->engine)) 325 continue; 326 327 vm = ce->vm; 328 n++; 329 } 330 i915_gem_context_unlock_engines(ctx); 331 if (!n) 332 return 0; 333 334 order = i915_random_order(count * count, &prng); 335 if (!order) 336 return -ENOMEM; 337 338 vma = i915_vma_instance(obj, vm, NULL); 339 if (IS_ERR(vma)) { 340 err = PTR_ERR(vma); 341 goto out_free; 342 } 343 344 err = i915_vma_pin(vma, 0, 0, PIN_USER); 345 if (err) 346 goto out_free; 347 348 i = 0; 349 engines = i915_gem_context_lock_engines(ctx); 350 do { 351 u32 rng = prandom_u32_state(&prng); 352 u32 dword = offset_in_page(rng) / 4; 353 354 ce = engines->engines[order[i] % engines->num_engines]; 355 i = (i + 1) % (count * count); 356 if (!ce || !intel_engine_can_store_dword(ce->engine)) 357 continue; 358 359 err = igt_gpu_write_dw(ce, vma, dword, rng); 360 if (err) 361 break; 362 363 err = igt_cpu_check(obj, dword, rng); 364 if (err) 365 break; 366 } while (!__igt_timeout(end_time, NULL)); 367 i915_gem_context_unlock_engines(ctx); 368 369 out_free: 370 kfree(order); 371 372 if (err == -ENOMEM) 373 err = 0; 374 375 return err; 376 } 377 378 static int igt_lmem_create(void *arg) 379 { 380 struct drm_i915_private *i915 = arg; 381 struct drm_i915_gem_object *obj; 382 int err = 0; 383 384 obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0); 385 if (IS_ERR(obj)) 386 return PTR_ERR(obj); 387 388 err = i915_gem_object_pin_pages(obj); 389 if (err) 390 goto out_put; 391 392 i915_gem_object_unpin_pages(obj); 393 out_put: 394 i915_gem_object_put(obj); 395 396 return err; 397 } 398 399 static int igt_lmem_write_gpu(void *arg) 400 { 401 struct drm_i915_private *i915 = arg; 402 struct drm_i915_gem_object *obj; 403 struct i915_gem_context *ctx; 404 struct file *file; 405 I915_RND_STATE(prng); 406 u32 sz; 407 int err; 408 409 file = mock_file(i915); 410 if (IS_ERR(file)) 411 return PTR_ERR(file); 412 413 ctx = live_context(i915, file); 414 if (IS_ERR(ctx)) { 415 err = PTR_ERR(ctx); 416 goto out_file; 417 } 418 419 sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); 420 421 obj = i915_gem_object_create_lmem(i915, sz, 0); 422 if (IS_ERR(obj)) { 423 err = PTR_ERR(obj); 424 goto out_file; 425 } 426 427 err = i915_gem_object_pin_pages(obj); 428 if (err) 429 goto out_put; 430 431 err = igt_gpu_write(ctx, obj); 432 if (err) 433 pr_err("igt_gpu_write failed(%d)\n", err); 434 435 i915_gem_object_unpin_pages(obj); 436 out_put: 437 i915_gem_object_put(obj); 438 out_file: 439 fput(file); 440 return err; 441 } 442 443 static struct intel_engine_cs * 444 random_engine_class(struct drm_i915_private *i915, 445 unsigned int class, 446 struct rnd_state *prng) 447 { 448 struct intel_engine_cs *engine; 449 unsigned int count; 450 451 count = 0; 452 for (engine = intel_engine_lookup_user(i915, class, 0); 453 engine && engine->uabi_class == class; 454 engine = rb_entry_safe(rb_next(&engine->uabi_node), 455 typeof(*engine), uabi_node)) 456 count++; 457 458 count = i915_prandom_u32_max_state(count, prng); 459 return intel_engine_lookup_user(i915, class, count); 460 } 461 462 static int igt_lmem_write_cpu(void *arg) 463 { 464 struct drm_i915_private *i915 = arg; 465 struct drm_i915_gem_object *obj; 466 I915_RND_STATE(prng); 467 IGT_TIMEOUT(end_time); 468 u32 bytes[] = { 469 0, /* rng placeholder */ 470 sizeof(u32), 471 sizeof(u64), 472 64, /* cl */ 473 PAGE_SIZE, 474 PAGE_SIZE - sizeof(u32), 475 PAGE_SIZE - sizeof(u64), 476 PAGE_SIZE - 64, 477 }; 478 struct intel_engine_cs *engine; 479 u32 *vaddr; 480 u32 sz; 481 u32 i; 482 int *order; 483 int count; 484 int err; 485 486 engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng); 487 if (!engine) 488 return 0; 489 490 pr_info("%s: using %s\n", __func__, engine->name); 491 492 sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); 493 sz = max_t(u32, 2 * PAGE_SIZE, sz); 494 495 obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS); 496 if (IS_ERR(obj)) 497 return PTR_ERR(obj); 498 499 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 500 if (IS_ERR(vaddr)) { 501 err = PTR_ERR(vaddr); 502 goto out_put; 503 } 504 505 /* Put the pages into a known state -- from the gpu for added fun */ 506 intel_engine_pm_get(engine); 507 err = i915_gem_object_fill_blt(obj, engine->kernel_context, 0xdeadbeaf); 508 intel_engine_pm_put(engine); 509 if (err) 510 goto out_unpin; 511 512 i915_gem_object_lock(obj); 513 err = i915_gem_object_set_to_wc_domain(obj, true); 514 i915_gem_object_unlock(obj); 515 if (err) 516 goto out_unpin; 517 518 count = ARRAY_SIZE(bytes); 519 order = i915_random_order(count * count, &prng); 520 if (!order) { 521 err = -ENOMEM; 522 goto out_unpin; 523 } 524 525 /* We want to throw in a random width/align */ 526 bytes[0] = igt_random_offset(&prng, 0, PAGE_SIZE, sizeof(u32), 527 sizeof(u32)); 528 529 i = 0; 530 do { 531 u32 offset; 532 u32 align; 533 u32 dword; 534 u32 size; 535 u32 val; 536 537 size = bytes[order[i] % count]; 538 i = (i + 1) % (count * count); 539 540 align = bytes[order[i] % count]; 541 i = (i + 1) % (count * count); 542 543 align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align)); 544 545 offset = igt_random_offset(&prng, 0, obj->base.size, 546 size, align); 547 548 val = prandom_u32_state(&prng); 549 memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf, 550 size / sizeof(u32)); 551 552 /* 553 * Sample random dw -- don't waste precious time reading every 554 * single dw. 555 */ 556 dword = igt_random_offset(&prng, offset, 557 offset + size, 558 sizeof(u32), sizeof(u32)); 559 dword /= sizeof(u32); 560 if (vaddr[dword] != (val ^ 0xdeadbeaf)) { 561 pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n", 562 __func__, dword, vaddr[dword], val ^ 0xdeadbeaf, 563 size, align, offset); 564 err = -EINVAL; 565 break; 566 } 567 } while (!__igt_timeout(end_time, NULL)); 568 569 out_unpin: 570 i915_gem_object_unpin_map(obj); 571 out_put: 572 i915_gem_object_put(obj); 573 574 return err; 575 } 576 577 static const char *repr_type(u32 type) 578 { 579 switch (type) { 580 case I915_MAP_WB: 581 return "WB"; 582 case I915_MAP_WC: 583 return "WC"; 584 } 585 586 return ""; 587 } 588 589 static struct drm_i915_gem_object * 590 create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, 591 void **out_addr) 592 { 593 struct drm_i915_gem_object *obj; 594 void *addr; 595 596 obj = i915_gem_object_create_region(mr, size, 0); 597 if (IS_ERR(obj)) 598 return obj; 599 600 addr = i915_gem_object_pin_map(obj, type); 601 if (IS_ERR(addr)) { 602 i915_gem_object_put(obj); 603 if (PTR_ERR(addr) == -ENXIO) 604 return ERR_PTR(-ENODEV); 605 return addr; 606 } 607 608 *out_addr = addr; 609 return obj; 610 } 611 612 static int wrap_ktime_compare(const void *A, const void *B) 613 { 614 const ktime_t *a = A, *b = B; 615 616 return ktime_compare(*a, *b); 617 } 618 619 static void igt_memcpy_long(void *dst, const void *src, size_t size) 620 { 621 unsigned long *tmp = dst; 622 const unsigned long *s = src; 623 624 size = size / sizeof(unsigned long); 625 while (size--) 626 *tmp++ = *s++; 627 } 628 629 static inline void igt_memcpy(void *dst, const void *src, size_t size) 630 { 631 memcpy(dst, src, size); 632 } 633 634 static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size) 635 { 636 i915_memcpy_from_wc(dst, src, size); 637 } 638 639 static int _perf_memcpy(struct intel_memory_region *src_mr, 640 struct intel_memory_region *dst_mr, 641 u64 size, u32 src_type, u32 dst_type) 642 { 643 struct drm_i915_private *i915 = src_mr->i915; 644 const struct { 645 const char *name; 646 void (*copy)(void *dst, const void *src, size_t size); 647 bool skip; 648 } tests[] = { 649 { 650 "memcpy", 651 igt_memcpy, 652 }, 653 { 654 "memcpy_long", 655 igt_memcpy_long, 656 }, 657 { 658 "memcpy_from_wc", 659 igt_memcpy_from_wc, 660 !i915_has_memcpy_from_wc(), 661 }, 662 }; 663 struct drm_i915_gem_object *src, *dst; 664 void *src_addr, *dst_addr; 665 int ret = 0; 666 int i; 667 668 src = create_region_for_mapping(src_mr, size, src_type, &src_addr); 669 if (IS_ERR(src)) { 670 ret = PTR_ERR(src); 671 goto out; 672 } 673 674 dst = create_region_for_mapping(dst_mr, size, dst_type, &dst_addr); 675 if (IS_ERR(dst)) { 676 ret = PTR_ERR(dst); 677 goto out_unpin_src; 678 } 679 680 for (i = 0; i < ARRAY_SIZE(tests); ++i) { 681 ktime_t t[5]; 682 int pass; 683 684 if (tests[i].skip) 685 continue; 686 687 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 688 ktime_t t0, t1; 689 690 t0 = ktime_get(); 691 692 tests[i].copy(dst_addr, src_addr, size); 693 694 t1 = ktime_get(); 695 t[pass] = ktime_sub(t1, t0); 696 } 697 698 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 699 pr_info("%s src(%s, %s) -> dst(%s, %s) %14s %4llu KiB copy: %5lld MiB/s\n", 700 __func__, 701 src_mr->name, 702 repr_type(src_type), 703 dst_mr->name, 704 repr_type(dst_type), 705 tests[i].name, 706 size >> 10, 707 div64_u64(mul_u32_u32(4 * size, 708 1000 * 1000 * 1000), 709 t[1] + 2 * t[2] + t[3]) >> 20); 710 711 cond_resched(); 712 } 713 714 i915_gem_object_unpin_map(dst); 715 i915_gem_object_put(dst); 716 out_unpin_src: 717 i915_gem_object_unpin_map(src); 718 i915_gem_object_put(src); 719 720 i915_gem_drain_freed_objects(i915); 721 out: 722 if (ret == -ENODEV) 723 ret = 0; 724 725 return ret; 726 } 727 728 static int perf_memcpy(void *arg) 729 { 730 struct drm_i915_private *i915 = arg; 731 static const u32 types[] = { 732 I915_MAP_WB, 733 I915_MAP_WC, 734 }; 735 static const u32 sizes[] = { 736 SZ_4K, 737 SZ_64K, 738 SZ_4M, 739 }; 740 struct intel_memory_region *src_mr, *dst_mr; 741 int src_id, dst_id; 742 int i, j, k; 743 int ret; 744 745 for_each_memory_region(src_mr, i915, src_id) { 746 for_each_memory_region(dst_mr, i915, dst_id) { 747 for (i = 0; i < ARRAY_SIZE(sizes); ++i) { 748 for (j = 0; j < ARRAY_SIZE(types); ++j) { 749 for (k = 0; k < ARRAY_SIZE(types); ++k) { 750 ret = _perf_memcpy(src_mr, 751 dst_mr, 752 sizes[i], 753 types[j], 754 types[k]); 755 if (ret) 756 return ret; 757 } 758 } 759 } 760 } 761 } 762 763 return 0; 764 } 765 766 int intel_memory_region_mock_selftests(void) 767 { 768 static const struct i915_subtest tests[] = { 769 SUBTEST(igt_mock_fill), 770 SUBTEST(igt_mock_contiguous), 771 }; 772 struct intel_memory_region *mem; 773 struct drm_i915_private *i915; 774 int err; 775 776 i915 = mock_gem_device(); 777 if (!i915) 778 return -ENOMEM; 779 780 mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); 781 if (IS_ERR(mem)) { 782 pr_err("failed to create memory region\n"); 783 err = PTR_ERR(mem); 784 goto out_unref; 785 } 786 787 err = i915_subtests(tests, mem); 788 789 intel_memory_region_put(mem); 790 out_unref: 791 drm_dev_put(&i915->drm); 792 return err; 793 } 794 795 int intel_memory_region_live_selftests(struct drm_i915_private *i915) 796 { 797 static const struct i915_subtest tests[] = { 798 SUBTEST(igt_lmem_create), 799 SUBTEST(igt_lmem_write_cpu), 800 SUBTEST(igt_lmem_write_gpu), 801 }; 802 803 if (!HAS_LMEM(i915)) { 804 pr_info("device lacks LMEM support, skipping\n"); 805 return 0; 806 } 807 808 if (intel_gt_is_wedged(&i915->gt)) 809 return 0; 810 811 return i915_live_subtests(tests, i915); 812 } 813 814 int intel_memory_region_perf_selftests(struct drm_i915_private *i915) 815 { 816 static const struct i915_subtest tests[] = { 817 SUBTEST(perf_memcpy), 818 }; 819 820 if (intel_gt_is_wedged(&i915->gt)) 821 return 0; 822 823 return i915_live_subtests(tests, i915); 824 } 825