1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/prime_numbers.h> 7 #include <linux/sort.h> 8 9 #include "../i915_selftest.h" 10 11 #include "mock_drm.h" 12 #include "mock_gem_device.h" 13 #include "mock_region.h" 14 15 #include "gem/i915_gem_context.h" 16 #include "gem/i915_gem_lmem.h" 17 #include "gem/i915_gem_region.h" 18 #include "gem/i915_gem_object_blt.h" 19 #include "gem/selftests/igt_gem_utils.h" 20 #include "gem/selftests/mock_context.h" 21 #include "gt/intel_engine_user.h" 22 #include "gt/intel_gt.h" 23 #include "i915_memcpy.h" 24 #include "selftests/igt_flush_test.h" 25 #include "selftests/i915_random.h" 26 27 static void close_objects(struct intel_memory_region *mem, 28 struct list_head *objects) 29 { 30 struct drm_i915_private *i915 = mem->i915; 31 struct drm_i915_gem_object *obj, *on; 32 33 list_for_each_entry_safe(obj, on, objects, st_link) { 34 i915_gem_object_lock(obj, NULL); 35 if (i915_gem_object_has_pinned_pages(obj)) 36 i915_gem_object_unpin_pages(obj); 37 /* No polluting the memory region between tests */ 38 __i915_gem_object_put_pages(obj); 39 i915_gem_object_unlock(obj); 40 list_del(&obj->st_link); 41 i915_gem_object_put(obj); 42 } 43 44 cond_resched(); 45 46 i915_gem_drain_freed_objects(i915); 47 } 48 49 static int igt_mock_fill(void *arg) 50 { 51 struct intel_memory_region *mem = arg; 52 resource_size_t total = resource_size(&mem->region); 53 resource_size_t page_size; 54 resource_size_t rem; 55 unsigned long max_pages; 56 unsigned long page_num; 57 LIST_HEAD(objects); 58 int err = 0; 59 60 page_size = mem->mm.chunk_size; 61 max_pages = div64_u64(total, page_size); 62 rem = total; 63 64 for_each_prime_number_from(page_num, 1, max_pages) { 65 resource_size_t size = page_num * page_size; 66 struct drm_i915_gem_object *obj; 67 68 obj = i915_gem_object_create_region(mem, size, 0); 69 if (IS_ERR(obj)) { 70 err = PTR_ERR(obj); 71 break; 72 } 73 74 err = i915_gem_object_pin_pages_unlocked(obj); 75 if (err) { 76 i915_gem_object_put(obj); 77 break; 78 } 79 80 list_add(&obj->st_link, &objects); 81 rem -= size; 82 } 83 84 if (err == -ENOMEM) 85 err = 0; 86 if (err == -ENXIO) { 87 if (page_num * page_size <= rem) { 88 pr_err("%s failed, space still left in region\n", 89 __func__); 90 err = -EINVAL; 91 } else { 92 err = 0; 93 } 94 } 95 96 close_objects(mem, &objects); 97 98 return err; 99 } 100 101 static struct drm_i915_gem_object * 102 igt_object_create(struct intel_memory_region *mem, 103 struct list_head *objects, 104 u64 size, 105 unsigned int flags) 106 { 107 struct drm_i915_gem_object *obj; 108 int err; 109 110 obj = i915_gem_object_create_region(mem, size, flags); 111 if (IS_ERR(obj)) 112 return obj; 113 114 err = i915_gem_object_pin_pages_unlocked(obj); 115 if (err) 116 goto put; 117 118 list_add(&obj->st_link, objects); 119 return obj; 120 121 put: 122 i915_gem_object_put(obj); 123 return ERR_PTR(err); 124 } 125 126 static void igt_object_release(struct drm_i915_gem_object *obj) 127 { 128 i915_gem_object_lock(obj, NULL); 129 i915_gem_object_unpin_pages(obj); 130 __i915_gem_object_put_pages(obj); 131 i915_gem_object_unlock(obj); 132 list_del(&obj->st_link); 133 i915_gem_object_put(obj); 134 } 135 136 static bool is_contiguous(struct drm_i915_gem_object *obj) 137 { 138 struct scatterlist *sg; 139 dma_addr_t addr = -1; 140 141 for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { 142 if (addr != -1 && sg_dma_address(sg) != addr) 143 return false; 144 145 addr = sg_dma_address(sg) + sg_dma_len(sg); 146 } 147 148 return true; 149 } 150 151 static int igt_mock_reserve(void *arg) 152 { 153 struct intel_memory_region *mem = arg; 154 resource_size_t avail = resource_size(&mem->region); 155 struct drm_i915_gem_object *obj; 156 const u32 chunk_size = SZ_32M; 157 u32 i, offset, count, *order; 158 u64 allocated, cur_avail; 159 I915_RND_STATE(prng); 160 LIST_HEAD(objects); 161 int err = 0; 162 163 if (!list_empty(&mem->reserved)) { 164 pr_err("%s region reserved list is not empty\n", __func__); 165 return -EINVAL; 166 } 167 168 count = avail / chunk_size; 169 order = i915_random_order(count, &prng); 170 if (!order) 171 return 0; 172 173 /* Reserve a bunch of ranges within the region */ 174 for (i = 0; i < count; ++i) { 175 u64 start = order[i] * chunk_size; 176 u64 size = i915_prandom_u32_max_state(chunk_size, &prng); 177 178 /* Allow for some really big holes */ 179 if (!size) 180 continue; 181 182 size = round_up(size, PAGE_SIZE); 183 offset = igt_random_offset(&prng, 0, chunk_size, size, 184 PAGE_SIZE); 185 186 err = intel_memory_region_reserve(mem, start + offset, size); 187 if (err) { 188 pr_err("%s failed to reserve range", __func__); 189 goto out_close; 190 } 191 192 /* XXX: maybe sanity check the block range here? */ 193 avail -= size; 194 } 195 196 /* Try to see if we can allocate from the remaining space */ 197 allocated = 0; 198 cur_avail = avail; 199 do { 200 u32 size = i915_prandom_u32_max_state(cur_avail, &prng); 201 202 size = max_t(u32, round_up(size, PAGE_SIZE), PAGE_SIZE); 203 obj = igt_object_create(mem, &objects, size, 0); 204 if (IS_ERR(obj)) { 205 if (PTR_ERR(obj) == -ENXIO) 206 break; 207 208 err = PTR_ERR(obj); 209 goto out_close; 210 } 211 cur_avail -= size; 212 allocated += size; 213 } while (1); 214 215 if (allocated != avail) { 216 pr_err("%s mismatch between allocation and free space", __func__); 217 err = -EINVAL; 218 } 219 220 out_close: 221 kfree(order); 222 close_objects(mem, &objects); 223 i915_buddy_free_list(&mem->mm, &mem->reserved); 224 return err; 225 } 226 227 static int igt_mock_contiguous(void *arg) 228 { 229 struct intel_memory_region *mem = arg; 230 struct drm_i915_gem_object *obj; 231 unsigned long n_objects; 232 LIST_HEAD(objects); 233 LIST_HEAD(holes); 234 I915_RND_STATE(prng); 235 resource_size_t total; 236 resource_size_t min; 237 u64 target; 238 int err = 0; 239 240 total = resource_size(&mem->region); 241 242 /* Min size */ 243 obj = igt_object_create(mem, &objects, mem->mm.chunk_size, 244 I915_BO_ALLOC_CONTIGUOUS); 245 if (IS_ERR(obj)) 246 return PTR_ERR(obj); 247 248 if (!is_contiguous(obj)) { 249 pr_err("%s min object spans disjoint sg entries\n", __func__); 250 err = -EINVAL; 251 goto err_close_objects; 252 } 253 254 igt_object_release(obj); 255 256 /* Max size */ 257 obj = igt_object_create(mem, &objects, total, I915_BO_ALLOC_CONTIGUOUS); 258 if (IS_ERR(obj)) 259 return PTR_ERR(obj); 260 261 if (!is_contiguous(obj)) { 262 pr_err("%s max object spans disjoint sg entries\n", __func__); 263 err = -EINVAL; 264 goto err_close_objects; 265 } 266 267 igt_object_release(obj); 268 269 /* Internal fragmentation should not bleed into the object size */ 270 target = i915_prandom_u64_state(&prng); 271 div64_u64_rem(target, total, &target); 272 target = round_up(target, PAGE_SIZE); 273 target = max_t(u64, PAGE_SIZE, target); 274 275 obj = igt_object_create(mem, &objects, target, 276 I915_BO_ALLOC_CONTIGUOUS); 277 if (IS_ERR(obj)) 278 return PTR_ERR(obj); 279 280 if (obj->base.size != target) { 281 pr_err("%s obj->base.size(%zx) != target(%llx)\n", __func__, 282 obj->base.size, target); 283 err = -EINVAL; 284 goto err_close_objects; 285 } 286 287 if (!is_contiguous(obj)) { 288 pr_err("%s object spans disjoint sg entries\n", __func__); 289 err = -EINVAL; 290 goto err_close_objects; 291 } 292 293 igt_object_release(obj); 294 295 /* 296 * Try to fragment the address space, such that half of it is free, but 297 * the max contiguous block size is SZ_64K. 298 */ 299 300 target = SZ_64K; 301 n_objects = div64_u64(total, target); 302 303 while (n_objects--) { 304 struct list_head *list; 305 306 if (n_objects % 2) 307 list = &holes; 308 else 309 list = &objects; 310 311 obj = igt_object_create(mem, list, target, 312 I915_BO_ALLOC_CONTIGUOUS); 313 if (IS_ERR(obj)) { 314 err = PTR_ERR(obj); 315 goto err_close_objects; 316 } 317 } 318 319 close_objects(mem, &holes); 320 321 min = target; 322 target = total >> 1; 323 324 /* Make sure we can still allocate all the fragmented space */ 325 obj = igt_object_create(mem, &objects, target, 0); 326 if (IS_ERR(obj)) { 327 err = PTR_ERR(obj); 328 goto err_close_objects; 329 } 330 331 igt_object_release(obj); 332 333 /* 334 * Even though we have enough free space, we don't have a big enough 335 * contiguous block. Make sure that holds true. 336 */ 337 338 do { 339 bool should_fail = target > min; 340 341 obj = igt_object_create(mem, &objects, target, 342 I915_BO_ALLOC_CONTIGUOUS); 343 if (should_fail != IS_ERR(obj)) { 344 pr_err("%s target allocation(%llx) mismatch\n", 345 __func__, target); 346 err = -EINVAL; 347 goto err_close_objects; 348 } 349 350 target >>= 1; 351 } while (target >= mem->mm.chunk_size); 352 353 err_close_objects: 354 list_splice_tail(&holes, &objects); 355 close_objects(mem, &objects); 356 return err; 357 } 358 359 static int igt_mock_splintered_region(void *arg) 360 { 361 struct intel_memory_region *mem = arg; 362 struct drm_i915_private *i915 = mem->i915; 363 struct drm_i915_gem_object *obj; 364 unsigned int expected_order; 365 LIST_HEAD(objects); 366 u64 size; 367 int err = 0; 368 369 /* 370 * Sanity check we can still allocate everything even if the 371 * mm.max_order != mm.size. i.e our starting address space size is not a 372 * power-of-two. 373 */ 374 375 size = (SZ_4G - 1) & PAGE_MASK; 376 mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); 377 if (IS_ERR(mem)) 378 return PTR_ERR(mem); 379 380 if (mem->mm.size != size) { 381 pr_err("%s size mismatch(%llu != %llu)\n", 382 __func__, mem->mm.size, size); 383 err = -EINVAL; 384 goto out_put; 385 } 386 387 expected_order = get_order(rounddown_pow_of_two(size)); 388 if (mem->mm.max_order != expected_order) { 389 pr_err("%s order mismatch(%u != %u)\n", 390 __func__, mem->mm.max_order, expected_order); 391 err = -EINVAL; 392 goto out_put; 393 } 394 395 obj = igt_object_create(mem, &objects, size, 0); 396 if (IS_ERR(obj)) { 397 err = PTR_ERR(obj); 398 goto out_close; 399 } 400 401 close_objects(mem, &objects); 402 403 /* 404 * While we should be able allocate everything without any flag 405 * restrictions, if we consider I915_BO_ALLOC_CONTIGUOUS then we are 406 * actually limited to the largest power-of-two for the region size i.e 407 * max_order, due to the inner workings of the buddy allocator. So make 408 * sure that does indeed hold true. 409 */ 410 411 obj = igt_object_create(mem, &objects, size, I915_BO_ALLOC_CONTIGUOUS); 412 if (!IS_ERR(obj)) { 413 pr_err("%s too large contiguous allocation was not rejected\n", 414 __func__); 415 err = -EINVAL; 416 goto out_close; 417 } 418 419 obj = igt_object_create(mem, &objects, rounddown_pow_of_two(size), 420 I915_BO_ALLOC_CONTIGUOUS); 421 if (IS_ERR(obj)) { 422 pr_err("%s largest possible contiguous allocation failed\n", 423 __func__); 424 err = PTR_ERR(obj); 425 goto out_close; 426 } 427 428 out_close: 429 close_objects(mem, &objects); 430 out_put: 431 intel_memory_region_put(mem); 432 return err; 433 } 434 435 #ifndef SZ_8G 436 #define SZ_8G BIT_ULL(33) 437 #endif 438 439 static int igt_mock_max_segment(void *arg) 440 { 441 const unsigned int max_segment = i915_sg_segment_size(); 442 struct intel_memory_region *mem = arg; 443 struct drm_i915_private *i915 = mem->i915; 444 struct drm_i915_gem_object *obj; 445 struct i915_buddy_block *block; 446 struct scatterlist *sg; 447 LIST_HEAD(objects); 448 u64 size; 449 int err = 0; 450 451 /* 452 * While we may create very large contiguous blocks, we may need 453 * to break those down for consumption elsewhere. In particular, 454 * dma-mapping with scatterlist elements have an implicit limit of 455 * UINT_MAX on each element. 456 */ 457 458 size = SZ_8G; 459 mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0); 460 if (IS_ERR(mem)) 461 return PTR_ERR(mem); 462 463 obj = igt_object_create(mem, &objects, size, 0); 464 if (IS_ERR(obj)) { 465 err = PTR_ERR(obj); 466 goto out_put; 467 } 468 469 size = 0; 470 list_for_each_entry(block, &obj->mm.blocks, link) { 471 if (i915_buddy_block_size(&mem->mm, block) > size) 472 size = i915_buddy_block_size(&mem->mm, block); 473 } 474 if (size < max_segment) { 475 pr_err("%s: Failed to create a huge contiguous block [> %u], largest block %lld\n", 476 __func__, max_segment, size); 477 err = -EINVAL; 478 goto out_close; 479 } 480 481 for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) { 482 if (sg->length > max_segment) { 483 pr_err("%s: Created an oversized scatterlist entry, %u > %u\n", 484 __func__, sg->length, max_segment); 485 err = -EINVAL; 486 goto out_close; 487 } 488 } 489 490 out_close: 491 close_objects(mem, &objects); 492 out_put: 493 intel_memory_region_put(mem); 494 return err; 495 } 496 497 static int igt_gpu_write_dw(struct intel_context *ce, 498 struct i915_vma *vma, 499 u32 dword, 500 u32 value) 501 { 502 return igt_gpu_fill_dw(ce, vma, dword * sizeof(u32), 503 vma->size >> PAGE_SHIFT, value); 504 } 505 506 static int igt_cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val) 507 { 508 unsigned long n = obj->base.size >> PAGE_SHIFT; 509 u32 *ptr; 510 int err; 511 512 err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT); 513 if (err) 514 return err; 515 516 ptr = i915_gem_object_pin_map(obj, I915_MAP_WC); 517 if (IS_ERR(ptr)) 518 return PTR_ERR(ptr); 519 520 ptr += dword; 521 while (n--) { 522 if (*ptr != val) { 523 pr_err("base[%u]=%08x, val=%08x\n", 524 dword, *ptr, val); 525 err = -EINVAL; 526 break; 527 } 528 529 ptr += PAGE_SIZE / sizeof(*ptr); 530 } 531 532 i915_gem_object_unpin_map(obj); 533 return err; 534 } 535 536 static int igt_gpu_write(struct i915_gem_context *ctx, 537 struct drm_i915_gem_object *obj) 538 { 539 struct i915_gem_engines *engines; 540 struct i915_gem_engines_iter it; 541 struct i915_address_space *vm; 542 struct intel_context *ce; 543 I915_RND_STATE(prng); 544 IGT_TIMEOUT(end_time); 545 unsigned int count; 546 struct i915_vma *vma; 547 int *order; 548 int i, n; 549 int err = 0; 550 551 GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); 552 553 n = 0; 554 count = 0; 555 for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { 556 count++; 557 if (!intel_engine_can_store_dword(ce->engine)) 558 continue; 559 560 vm = ce->vm; 561 n++; 562 } 563 i915_gem_context_unlock_engines(ctx); 564 if (!n) 565 return 0; 566 567 order = i915_random_order(count * count, &prng); 568 if (!order) 569 return -ENOMEM; 570 571 vma = i915_vma_instance(obj, vm, NULL); 572 if (IS_ERR(vma)) { 573 err = PTR_ERR(vma); 574 goto out_free; 575 } 576 577 err = i915_vma_pin(vma, 0, 0, PIN_USER); 578 if (err) 579 goto out_free; 580 581 i = 0; 582 engines = i915_gem_context_lock_engines(ctx); 583 do { 584 u32 rng = prandom_u32_state(&prng); 585 u32 dword = offset_in_page(rng) / 4; 586 587 ce = engines->engines[order[i] % engines->num_engines]; 588 i = (i + 1) % (count * count); 589 if (!ce || !intel_engine_can_store_dword(ce->engine)) 590 continue; 591 592 err = igt_gpu_write_dw(ce, vma, dword, rng); 593 if (err) 594 break; 595 596 i915_gem_object_lock(obj, NULL); 597 err = igt_cpu_check(obj, dword, rng); 598 i915_gem_object_unlock(obj); 599 if (err) 600 break; 601 } while (!__igt_timeout(end_time, NULL)); 602 i915_gem_context_unlock_engines(ctx); 603 604 out_free: 605 kfree(order); 606 607 if (err == -ENOMEM) 608 err = 0; 609 610 return err; 611 } 612 613 static int igt_lmem_create(void *arg) 614 { 615 struct drm_i915_private *i915 = arg; 616 struct drm_i915_gem_object *obj; 617 int err = 0; 618 619 obj = i915_gem_object_create_lmem(i915, PAGE_SIZE, 0); 620 if (IS_ERR(obj)) 621 return PTR_ERR(obj); 622 623 err = i915_gem_object_pin_pages_unlocked(obj); 624 if (err) 625 goto out_put; 626 627 i915_gem_object_unpin_pages(obj); 628 out_put: 629 i915_gem_object_put(obj); 630 631 return err; 632 } 633 634 static int igt_lmem_create_cleared_cpu(void *arg) 635 { 636 struct drm_i915_private *i915 = arg; 637 I915_RND_STATE(prng); 638 IGT_TIMEOUT(end_time); 639 u32 size, i; 640 int err; 641 642 i915_gem_drain_freed_objects(i915); 643 644 size = max_t(u32, PAGE_SIZE, i915_prandom_u32_max_state(SZ_32M, &prng)); 645 size = round_up(size, PAGE_SIZE); 646 i = 0; 647 648 do { 649 struct drm_i915_gem_object *obj; 650 unsigned int flags; 651 u32 dword, val; 652 void *vaddr; 653 654 /* 655 * Alternate between cleared and uncleared allocations, while 656 * also dirtying the pages each time to check that the pages are 657 * always cleared if requested, since we should get some overlap 658 * of the underlying pages, if not all, since we are the only 659 * user. 660 */ 661 662 flags = I915_BO_ALLOC_CPU_CLEAR; 663 if (i & 1) 664 flags = 0; 665 666 obj = i915_gem_object_create_lmem(i915, size, flags); 667 if (IS_ERR(obj)) 668 return PTR_ERR(obj); 669 670 i915_gem_object_lock(obj, NULL); 671 err = i915_gem_object_pin_pages(obj); 672 if (err) 673 goto out_put; 674 675 dword = i915_prandom_u32_max_state(PAGE_SIZE / sizeof(u32), 676 &prng); 677 678 if (flags & I915_BO_ALLOC_CPU_CLEAR) { 679 err = igt_cpu_check(obj, dword, 0); 680 if (err) { 681 pr_err("%s failed with size=%u, flags=%u\n", 682 __func__, size, flags); 683 goto out_unpin; 684 } 685 } 686 687 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC); 688 if (IS_ERR(vaddr)) { 689 err = PTR_ERR(vaddr); 690 goto out_unpin; 691 } 692 693 val = prandom_u32_state(&prng); 694 695 memset32(vaddr, val, obj->base.size / sizeof(u32)); 696 697 i915_gem_object_flush_map(obj); 698 i915_gem_object_unpin_map(obj); 699 out_unpin: 700 i915_gem_object_unpin_pages(obj); 701 __i915_gem_object_put_pages(obj); 702 out_put: 703 i915_gem_object_unlock(obj); 704 i915_gem_object_put(obj); 705 706 if (err) 707 break; 708 ++i; 709 } while (!__igt_timeout(end_time, NULL)); 710 711 pr_info("%s completed (%u) iterations\n", __func__, i); 712 713 return err; 714 } 715 716 static int igt_lmem_write_gpu(void *arg) 717 { 718 struct drm_i915_private *i915 = arg; 719 struct drm_i915_gem_object *obj; 720 struct i915_gem_context *ctx; 721 struct file *file; 722 I915_RND_STATE(prng); 723 u32 sz; 724 int err; 725 726 file = mock_file(i915); 727 if (IS_ERR(file)) 728 return PTR_ERR(file); 729 730 ctx = live_context(i915, file); 731 if (IS_ERR(ctx)) { 732 err = PTR_ERR(ctx); 733 goto out_file; 734 } 735 736 sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); 737 738 obj = i915_gem_object_create_lmem(i915, sz, 0); 739 if (IS_ERR(obj)) { 740 err = PTR_ERR(obj); 741 goto out_file; 742 } 743 744 err = i915_gem_object_pin_pages_unlocked(obj); 745 if (err) 746 goto out_put; 747 748 err = igt_gpu_write(ctx, obj); 749 if (err) 750 pr_err("igt_gpu_write failed(%d)\n", err); 751 752 i915_gem_object_unpin_pages(obj); 753 out_put: 754 i915_gem_object_put(obj); 755 out_file: 756 fput(file); 757 return err; 758 } 759 760 static struct intel_engine_cs * 761 random_engine_class(struct drm_i915_private *i915, 762 unsigned int class, 763 struct rnd_state *prng) 764 { 765 struct intel_engine_cs *engine; 766 unsigned int count; 767 768 count = 0; 769 for (engine = intel_engine_lookup_user(i915, class, 0); 770 engine && engine->uabi_class == class; 771 engine = rb_entry_safe(rb_next(&engine->uabi_node), 772 typeof(*engine), uabi_node)) 773 count++; 774 775 count = i915_prandom_u32_max_state(count, prng); 776 return intel_engine_lookup_user(i915, class, count); 777 } 778 779 static int igt_lmem_write_cpu(void *arg) 780 { 781 struct drm_i915_private *i915 = arg; 782 struct drm_i915_gem_object *obj; 783 I915_RND_STATE(prng); 784 IGT_TIMEOUT(end_time); 785 u32 bytes[] = { 786 0, /* rng placeholder */ 787 sizeof(u32), 788 sizeof(u64), 789 64, /* cl */ 790 PAGE_SIZE, 791 PAGE_SIZE - sizeof(u32), 792 PAGE_SIZE - sizeof(u64), 793 PAGE_SIZE - 64, 794 }; 795 struct intel_engine_cs *engine; 796 u32 *vaddr; 797 u32 sz; 798 u32 i; 799 int *order; 800 int count; 801 int err; 802 803 engine = random_engine_class(i915, I915_ENGINE_CLASS_COPY, &prng); 804 if (!engine) 805 return 0; 806 807 pr_info("%s: using %s\n", __func__, engine->name); 808 809 sz = round_up(prandom_u32_state(&prng) % SZ_32M, PAGE_SIZE); 810 sz = max_t(u32, 2 * PAGE_SIZE, sz); 811 812 obj = i915_gem_object_create_lmem(i915, sz, I915_BO_ALLOC_CONTIGUOUS); 813 if (IS_ERR(obj)) 814 return PTR_ERR(obj); 815 816 vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC); 817 if (IS_ERR(vaddr)) { 818 err = PTR_ERR(vaddr); 819 goto out_put; 820 } 821 822 /* Put the pages into a known state -- from the gpu for added fun */ 823 intel_engine_pm_get(engine); 824 err = i915_gem_object_fill_blt(obj, engine->kernel_context, 0xdeadbeaf); 825 intel_engine_pm_put(engine); 826 if (err) 827 goto out_unpin; 828 829 i915_gem_object_lock(obj, NULL); 830 err = i915_gem_object_set_to_wc_domain(obj, true); 831 i915_gem_object_unlock(obj); 832 if (err) 833 goto out_unpin; 834 835 count = ARRAY_SIZE(bytes); 836 order = i915_random_order(count * count, &prng); 837 if (!order) { 838 err = -ENOMEM; 839 goto out_unpin; 840 } 841 842 /* A random multiple of u32, picked between [64, PAGE_SIZE - 64] */ 843 bytes[0] = igt_random_offset(&prng, 64, PAGE_SIZE - 64, 0, sizeof(u32)); 844 GEM_BUG_ON(!IS_ALIGNED(bytes[0], sizeof(u32))); 845 846 i = 0; 847 do { 848 u32 offset; 849 u32 align; 850 u32 dword; 851 u32 size; 852 u32 val; 853 854 size = bytes[order[i] % count]; 855 i = (i + 1) % (count * count); 856 857 align = bytes[order[i] % count]; 858 i = (i + 1) % (count * count); 859 860 align = max_t(u32, sizeof(u32), rounddown_pow_of_two(align)); 861 862 offset = igt_random_offset(&prng, 0, obj->base.size, 863 size, align); 864 865 val = prandom_u32_state(&prng); 866 memset32(vaddr + offset / sizeof(u32), val ^ 0xdeadbeaf, 867 size / sizeof(u32)); 868 869 /* 870 * Sample random dw -- don't waste precious time reading every 871 * single dw. 872 */ 873 dword = igt_random_offset(&prng, offset, 874 offset + size, 875 sizeof(u32), sizeof(u32)); 876 dword /= sizeof(u32); 877 if (vaddr[dword] != (val ^ 0xdeadbeaf)) { 878 pr_err("%s vaddr[%u]=%u, val=%u, size=%u, align=%u, offset=%u\n", 879 __func__, dword, vaddr[dword], val ^ 0xdeadbeaf, 880 size, align, offset); 881 err = -EINVAL; 882 break; 883 } 884 } while (!__igt_timeout(end_time, NULL)); 885 886 out_unpin: 887 i915_gem_object_unpin_map(obj); 888 out_put: 889 i915_gem_object_put(obj); 890 891 return err; 892 } 893 894 static const char *repr_type(u32 type) 895 { 896 switch (type) { 897 case I915_MAP_WB: 898 return "WB"; 899 case I915_MAP_WC: 900 return "WC"; 901 } 902 903 return ""; 904 } 905 906 static struct drm_i915_gem_object * 907 create_region_for_mapping(struct intel_memory_region *mr, u64 size, u32 type, 908 void **out_addr) 909 { 910 struct drm_i915_gem_object *obj; 911 void *addr; 912 913 obj = i915_gem_object_create_region(mr, size, 0); 914 if (IS_ERR(obj)) { 915 if (PTR_ERR(obj) == -ENOSPC) /* Stolen memory */ 916 return ERR_PTR(-ENODEV); 917 return obj; 918 } 919 920 addr = i915_gem_object_pin_map_unlocked(obj, type); 921 if (IS_ERR(addr)) { 922 i915_gem_object_put(obj); 923 if (PTR_ERR(addr) == -ENXIO) 924 return ERR_PTR(-ENODEV); 925 return addr; 926 } 927 928 *out_addr = addr; 929 return obj; 930 } 931 932 static int wrap_ktime_compare(const void *A, const void *B) 933 { 934 const ktime_t *a = A, *b = B; 935 936 return ktime_compare(*a, *b); 937 } 938 939 static void igt_memcpy_long(void *dst, const void *src, size_t size) 940 { 941 unsigned long *tmp = dst; 942 const unsigned long *s = src; 943 944 size = size / sizeof(unsigned long); 945 while (size--) 946 *tmp++ = *s++; 947 } 948 949 static inline void igt_memcpy(void *dst, const void *src, size_t size) 950 { 951 memcpy(dst, src, size); 952 } 953 954 static inline void igt_memcpy_from_wc(void *dst, const void *src, size_t size) 955 { 956 i915_memcpy_from_wc(dst, src, size); 957 } 958 959 static int _perf_memcpy(struct intel_memory_region *src_mr, 960 struct intel_memory_region *dst_mr, 961 u64 size, u32 src_type, u32 dst_type) 962 { 963 struct drm_i915_private *i915 = src_mr->i915; 964 const struct { 965 const char *name; 966 void (*copy)(void *dst, const void *src, size_t size); 967 bool skip; 968 } tests[] = { 969 { 970 "memcpy", 971 igt_memcpy, 972 }, 973 { 974 "memcpy_long", 975 igt_memcpy_long, 976 }, 977 { 978 "memcpy_from_wc", 979 igt_memcpy_from_wc, 980 !i915_has_memcpy_from_wc(), 981 }, 982 }; 983 struct drm_i915_gem_object *src, *dst; 984 void *src_addr, *dst_addr; 985 int ret = 0; 986 int i; 987 988 src = create_region_for_mapping(src_mr, size, src_type, &src_addr); 989 if (IS_ERR(src)) { 990 ret = PTR_ERR(src); 991 goto out; 992 } 993 994 dst = create_region_for_mapping(dst_mr, size, dst_type, &dst_addr); 995 if (IS_ERR(dst)) { 996 ret = PTR_ERR(dst); 997 goto out_unpin_src; 998 } 999 1000 for (i = 0; i < ARRAY_SIZE(tests); ++i) { 1001 ktime_t t[5]; 1002 int pass; 1003 1004 if (tests[i].skip) 1005 continue; 1006 1007 for (pass = 0; pass < ARRAY_SIZE(t); pass++) { 1008 ktime_t t0, t1; 1009 1010 t0 = ktime_get(); 1011 1012 tests[i].copy(dst_addr, src_addr, size); 1013 1014 t1 = ktime_get(); 1015 t[pass] = ktime_sub(t1, t0); 1016 } 1017 1018 sort(t, ARRAY_SIZE(t), sizeof(*t), wrap_ktime_compare, NULL); 1019 if (t[0] <= 0) { 1020 /* ignore the impossible to protect our sanity */ 1021 pr_debug("Skipping %s src(%s, %s) -> dst(%s, %s) %14s %4lluKiB copy, unstable measurement [%lld, %lld]\n", 1022 __func__, 1023 src_mr->name, repr_type(src_type), 1024 dst_mr->name, repr_type(dst_type), 1025 tests[i].name, size >> 10, 1026 t[0], t[4]); 1027 continue; 1028 } 1029 1030 pr_info("%s src(%s, %s) -> dst(%s, %s) %14s %4llu KiB copy: %5lld MiB/s\n", 1031 __func__, 1032 src_mr->name, repr_type(src_type), 1033 dst_mr->name, repr_type(dst_type), 1034 tests[i].name, size >> 10, 1035 div64_u64(mul_u32_u32(4 * size, 1036 1000 * 1000 * 1000), 1037 t[1] + 2 * t[2] + t[3]) >> 20); 1038 1039 cond_resched(); 1040 } 1041 1042 i915_gem_object_unpin_map(dst); 1043 i915_gem_object_put(dst); 1044 out_unpin_src: 1045 i915_gem_object_unpin_map(src); 1046 i915_gem_object_put(src); 1047 1048 i915_gem_drain_freed_objects(i915); 1049 out: 1050 if (ret == -ENODEV) 1051 ret = 0; 1052 1053 return ret; 1054 } 1055 1056 static int perf_memcpy(void *arg) 1057 { 1058 struct drm_i915_private *i915 = arg; 1059 static const u32 types[] = { 1060 I915_MAP_WB, 1061 I915_MAP_WC, 1062 }; 1063 static const u32 sizes[] = { 1064 SZ_4K, 1065 SZ_64K, 1066 SZ_4M, 1067 }; 1068 struct intel_memory_region *src_mr, *dst_mr; 1069 int src_id, dst_id; 1070 int i, j, k; 1071 int ret; 1072 1073 for_each_memory_region(src_mr, i915, src_id) { 1074 for_each_memory_region(dst_mr, i915, dst_id) { 1075 for (i = 0; i < ARRAY_SIZE(sizes); ++i) { 1076 for (j = 0; j < ARRAY_SIZE(types); ++j) { 1077 for (k = 0; k < ARRAY_SIZE(types); ++k) { 1078 ret = _perf_memcpy(src_mr, 1079 dst_mr, 1080 sizes[i], 1081 types[j], 1082 types[k]); 1083 if (ret) 1084 return ret; 1085 } 1086 } 1087 } 1088 } 1089 } 1090 1091 return 0; 1092 } 1093 1094 int intel_memory_region_mock_selftests(void) 1095 { 1096 static const struct i915_subtest tests[] = { 1097 SUBTEST(igt_mock_reserve), 1098 SUBTEST(igt_mock_fill), 1099 SUBTEST(igt_mock_contiguous), 1100 SUBTEST(igt_mock_splintered_region), 1101 SUBTEST(igt_mock_max_segment), 1102 }; 1103 struct intel_memory_region *mem; 1104 struct drm_i915_private *i915; 1105 int err; 1106 1107 i915 = mock_gem_device(); 1108 if (!i915) 1109 return -ENOMEM; 1110 1111 mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0); 1112 if (IS_ERR(mem)) { 1113 pr_err("failed to create memory region\n"); 1114 err = PTR_ERR(mem); 1115 goto out_unref; 1116 } 1117 1118 err = i915_subtests(tests, mem); 1119 1120 intel_memory_region_put(mem); 1121 out_unref: 1122 mock_destroy_device(i915); 1123 return err; 1124 } 1125 1126 int intel_memory_region_live_selftests(struct drm_i915_private *i915) 1127 { 1128 static const struct i915_subtest tests[] = { 1129 SUBTEST(igt_lmem_create), 1130 SUBTEST(igt_lmem_create_cleared_cpu), 1131 SUBTEST(igt_lmem_write_cpu), 1132 SUBTEST(igt_lmem_write_gpu), 1133 }; 1134 1135 if (!HAS_LMEM(i915)) { 1136 pr_info("device lacks LMEM support, skipping\n"); 1137 return 0; 1138 } 1139 1140 if (intel_gt_is_wedged(&i915->gt)) 1141 return 0; 1142 1143 return i915_live_subtests(tests, i915); 1144 } 1145 1146 int intel_memory_region_perf_selftests(struct drm_i915_private *i915) 1147 { 1148 static const struct i915_subtest tests[] = { 1149 SUBTEST(perf_memcpy), 1150 }; 1151 1152 if (intel_gt_is_wedged(&i915->gt)) 1153 return 0; 1154 1155 return i915_live_subtests(tests, i915); 1156 } 1157