1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2016 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gt/intel_gt.h" 10 #include "gt/intel_gt_pm.h" 11 #include "huge_gem_object.h" 12 #include "i915_selftest.h" 13 #include "selftests/igt_flush_test.h" 14 15 struct tile { 16 unsigned int width; 17 unsigned int height; 18 unsigned int stride; 19 unsigned int size; 20 unsigned int tiling; 21 unsigned int swizzle; 22 }; 23 24 static u64 swizzle_bit(unsigned int bit, u64 offset) 25 { 26 return (offset & BIT_ULL(bit)) >> (bit - 6); 27 } 28 29 static u64 tiled_offset(const struct tile *tile, u64 v) 30 { 31 u64 x, y; 32 33 if (tile->tiling == I915_TILING_NONE) 34 return v; 35 36 y = div64_u64_rem(v, tile->stride, &x); 37 v = div64_u64_rem(y, tile->height, &y) * tile->stride * tile->height; 38 39 if (tile->tiling == I915_TILING_X) { 40 v += y * tile->width; 41 v += div64_u64_rem(x, tile->width, &x) << tile->size; 42 v += x; 43 } else if (tile->width == 128) { 44 const unsigned int ytile_span = 16; 45 const unsigned int ytile_height = 512; 46 47 v += y * ytile_span; 48 v += div64_u64_rem(x, ytile_span, &x) * ytile_height; 49 v += x; 50 } else { 51 const unsigned int ytile_span = 32; 52 const unsigned int ytile_height = 256; 53 54 v += y * ytile_span; 55 v += div64_u64_rem(x, ytile_span, &x) * ytile_height; 56 v += x; 57 } 58 59 switch (tile->swizzle) { 60 case I915_BIT_6_SWIZZLE_9: 61 v ^= swizzle_bit(9, v); 62 break; 63 case I915_BIT_6_SWIZZLE_9_10: 64 v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v); 65 break; 66 case I915_BIT_6_SWIZZLE_9_11: 67 v ^= swizzle_bit(9, v) ^ swizzle_bit(11, v); 68 break; 69 case I915_BIT_6_SWIZZLE_9_10_11: 70 v ^= swizzle_bit(9, v) ^ swizzle_bit(10, v) ^ swizzle_bit(11, v); 71 break; 72 } 73 74 return v; 75 } 76 77 static int check_partial_mapping(struct drm_i915_gem_object *obj, 78 const struct tile *tile, 79 unsigned long end_time) 80 { 81 const unsigned int nreal = obj->scratch / PAGE_SIZE; 82 const unsigned long npages = obj->base.size / PAGE_SIZE; 83 struct i915_vma *vma; 84 unsigned long page; 85 int err; 86 87 if (igt_timeout(end_time, 88 "%s: timed out before tiling=%d stride=%d\n", 89 __func__, tile->tiling, tile->stride)) 90 return -EINTR; 91 92 err = i915_gem_object_set_tiling(obj, tile->tiling, tile->stride); 93 if (err) { 94 pr_err("Failed to set tiling mode=%u, stride=%u, err=%d\n", 95 tile->tiling, tile->stride, err); 96 return err; 97 } 98 99 GEM_BUG_ON(i915_gem_object_get_tiling(obj) != tile->tiling); 100 GEM_BUG_ON(i915_gem_object_get_stride(obj) != tile->stride); 101 102 i915_gem_object_lock(obj); 103 err = i915_gem_object_set_to_gtt_domain(obj, true); 104 i915_gem_object_unlock(obj); 105 if (err) { 106 pr_err("Failed to flush to GTT write domain; err=%d\n", err); 107 return err; 108 } 109 110 for_each_prime_number_from(page, 1, npages) { 111 struct i915_ggtt_view view = 112 compute_partial_view(obj, page, MIN_CHUNK_PAGES); 113 u32 __iomem *io; 114 struct page *p; 115 unsigned int n; 116 u64 offset; 117 u32 *cpu; 118 119 GEM_BUG_ON(view.partial.size > nreal); 120 cond_resched(); 121 122 vma = i915_gem_object_ggtt_pin(obj, &view, 0, 0, PIN_MAPPABLE); 123 if (IS_ERR(vma)) { 124 pr_err("Failed to pin partial view: offset=%lu; err=%d\n", 125 page, (int)PTR_ERR(vma)); 126 return PTR_ERR(vma); 127 } 128 129 n = page - view.partial.offset; 130 GEM_BUG_ON(n >= view.partial.size); 131 132 io = i915_vma_pin_iomap(vma); 133 i915_vma_unpin(vma); 134 if (IS_ERR(io)) { 135 pr_err("Failed to iomap partial view: offset=%lu; err=%d\n", 136 page, (int)PTR_ERR(io)); 137 return PTR_ERR(io); 138 } 139 140 iowrite32(page, io + n * PAGE_SIZE / sizeof(*io)); 141 i915_vma_unpin_iomap(vma); 142 143 offset = tiled_offset(tile, page << PAGE_SHIFT); 144 if (offset >= obj->base.size) 145 continue; 146 147 intel_gt_flush_ggtt_writes(&to_i915(obj->base.dev)->gt); 148 149 p = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); 150 cpu = kmap(p) + offset_in_page(offset); 151 drm_clflush_virt_range(cpu, sizeof(*cpu)); 152 if (*cpu != (u32)page) { 153 pr_err("Partial view for %lu [%u] (offset=%llu, size=%u [%llu, row size %u], fence=%d, tiling=%d, stride=%d) misalignment, expected write to page (%llu + %u [0x%llx]) of 0x%x, found 0x%x\n", 154 page, n, 155 view.partial.offset, 156 view.partial.size, 157 vma->size >> PAGE_SHIFT, 158 tile->tiling ? tile_row_pages(obj) : 0, 159 vma->fence ? vma->fence->id : -1, tile->tiling, tile->stride, 160 offset >> PAGE_SHIFT, 161 (unsigned int)offset_in_page(offset), 162 offset, 163 (u32)page, *cpu); 164 err = -EINVAL; 165 } 166 *cpu = 0; 167 drm_clflush_virt_range(cpu, sizeof(*cpu)); 168 kunmap(p); 169 if (err) 170 return err; 171 172 i915_vma_destroy(vma); 173 } 174 175 return 0; 176 } 177 178 static int igt_partial_tiling(void *arg) 179 { 180 const unsigned int nreal = 1 << 12; /* largest tile row x2 */ 181 struct drm_i915_private *i915 = arg; 182 struct drm_i915_gem_object *obj; 183 intel_wakeref_t wakeref; 184 int tiling; 185 int err; 186 187 /* We want to check the page mapping and fencing of a large object 188 * mmapped through the GTT. The object we create is larger than can 189 * possibly be mmaped as a whole, and so we must use partial GGTT vma. 190 * We then check that a write through each partial GGTT vma ends up 191 * in the right set of pages within the object, and with the expected 192 * tiling, which we verify by manual swizzling. 193 */ 194 195 obj = huge_gem_object(i915, 196 nreal << PAGE_SHIFT, 197 (1 + next_prime_number(i915->ggtt.vm.total >> PAGE_SHIFT)) << PAGE_SHIFT); 198 if (IS_ERR(obj)) 199 return PTR_ERR(obj); 200 201 err = i915_gem_object_pin_pages(obj); 202 if (err) { 203 pr_err("Failed to allocate %u pages (%lu total), err=%d\n", 204 nreal, obj->base.size / PAGE_SIZE, err); 205 goto out; 206 } 207 208 mutex_lock(&i915->drm.struct_mutex); 209 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 210 211 if (1) { 212 IGT_TIMEOUT(end); 213 struct tile tile; 214 215 tile.height = 1; 216 tile.width = 1; 217 tile.size = 0; 218 tile.stride = 0; 219 tile.swizzle = I915_BIT_6_SWIZZLE_NONE; 220 tile.tiling = I915_TILING_NONE; 221 222 err = check_partial_mapping(obj, &tile, end); 223 if (err && err != -EINTR) 224 goto out_unlock; 225 } 226 227 for (tiling = I915_TILING_X; tiling <= I915_TILING_Y; tiling++) { 228 IGT_TIMEOUT(end); 229 unsigned int max_pitch; 230 unsigned int pitch; 231 struct tile tile; 232 233 if (i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) 234 /* 235 * The swizzling pattern is actually unknown as it 236 * varies based on physical address of each page. 237 * See i915_gem_detect_bit_6_swizzle(). 238 */ 239 break; 240 241 tile.tiling = tiling; 242 switch (tiling) { 243 case I915_TILING_X: 244 tile.swizzle = i915->mm.bit_6_swizzle_x; 245 break; 246 case I915_TILING_Y: 247 tile.swizzle = i915->mm.bit_6_swizzle_y; 248 break; 249 } 250 251 GEM_BUG_ON(tile.swizzle == I915_BIT_6_SWIZZLE_UNKNOWN); 252 if (tile.swizzle == I915_BIT_6_SWIZZLE_9_17 || 253 tile.swizzle == I915_BIT_6_SWIZZLE_9_10_17) 254 continue; 255 256 if (INTEL_GEN(i915) <= 2) { 257 tile.height = 16; 258 tile.width = 128; 259 tile.size = 11; 260 } else if (tile.tiling == I915_TILING_Y && 261 HAS_128_BYTE_Y_TILING(i915)) { 262 tile.height = 32; 263 tile.width = 128; 264 tile.size = 12; 265 } else { 266 tile.height = 8; 267 tile.width = 512; 268 tile.size = 12; 269 } 270 271 if (INTEL_GEN(i915) < 4) 272 max_pitch = 8192 / tile.width; 273 else if (INTEL_GEN(i915) < 7) 274 max_pitch = 128 * I965_FENCE_MAX_PITCH_VAL / tile.width; 275 else 276 max_pitch = 128 * GEN7_FENCE_MAX_PITCH_VAL / tile.width; 277 278 for (pitch = max_pitch; pitch; pitch >>= 1) { 279 tile.stride = tile.width * pitch; 280 err = check_partial_mapping(obj, &tile, end); 281 if (err == -EINTR) 282 goto next_tiling; 283 if (err) 284 goto out_unlock; 285 286 if (pitch > 2 && INTEL_GEN(i915) >= 4) { 287 tile.stride = tile.width * (pitch - 1); 288 err = check_partial_mapping(obj, &tile, end); 289 if (err == -EINTR) 290 goto next_tiling; 291 if (err) 292 goto out_unlock; 293 } 294 295 if (pitch < max_pitch && INTEL_GEN(i915) >= 4) { 296 tile.stride = tile.width * (pitch + 1); 297 err = check_partial_mapping(obj, &tile, end); 298 if (err == -EINTR) 299 goto next_tiling; 300 if (err) 301 goto out_unlock; 302 } 303 } 304 305 if (INTEL_GEN(i915) >= 4) { 306 for_each_prime_number(pitch, max_pitch) { 307 tile.stride = tile.width * pitch; 308 err = check_partial_mapping(obj, &tile, end); 309 if (err == -EINTR) 310 goto next_tiling; 311 if (err) 312 goto out_unlock; 313 } 314 } 315 316 next_tiling: ; 317 } 318 319 out_unlock: 320 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 321 mutex_unlock(&i915->drm.struct_mutex); 322 i915_gem_object_unpin_pages(obj); 323 out: 324 i915_gem_object_put(obj); 325 return err; 326 } 327 328 static int make_obj_busy(struct drm_i915_gem_object *obj) 329 { 330 struct drm_i915_private *i915 = to_i915(obj->base.dev); 331 struct intel_engine_cs *engine; 332 enum intel_engine_id id; 333 struct i915_vma *vma; 334 int err; 335 336 vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL); 337 if (IS_ERR(vma)) 338 return PTR_ERR(vma); 339 340 err = i915_vma_pin(vma, 0, 0, PIN_USER); 341 if (err) 342 return err; 343 344 for_each_engine(engine, i915, id) { 345 struct i915_request *rq; 346 347 rq = i915_request_create(engine->kernel_context); 348 if (IS_ERR(rq)) { 349 i915_vma_unpin(vma); 350 return PTR_ERR(rq); 351 } 352 353 i915_vma_lock(vma); 354 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 355 i915_vma_unlock(vma); 356 357 i915_request_add(rq); 358 } 359 360 i915_vma_unpin(vma); 361 i915_gem_object_put(obj); /* leave it only alive via its active ref */ 362 363 return err; 364 } 365 366 static bool assert_mmap_offset(struct drm_i915_private *i915, 367 unsigned long size, 368 int expected) 369 { 370 struct drm_i915_gem_object *obj; 371 int err; 372 373 obj = i915_gem_object_create_internal(i915, size); 374 if (IS_ERR(obj)) 375 return PTR_ERR(obj); 376 377 err = create_mmap_offset(obj); 378 i915_gem_object_put(obj); 379 380 return err == expected; 381 } 382 383 static void disable_retire_worker(struct drm_i915_private *i915) 384 { 385 i915_gem_shrinker_unregister(i915); 386 387 intel_gt_pm_get(&i915->gt); 388 389 cancel_delayed_work_sync(&i915->gem.retire_work); 390 flush_work(&i915->gem.idle_work); 391 } 392 393 static void restore_retire_worker(struct drm_i915_private *i915) 394 { 395 intel_gt_pm_put(&i915->gt); 396 397 mutex_lock(&i915->drm.struct_mutex); 398 igt_flush_test(i915, I915_WAIT_LOCKED); 399 mutex_unlock(&i915->drm.struct_mutex); 400 401 i915_gem_shrinker_register(i915); 402 } 403 404 static void mmap_offset_lock(struct drm_i915_private *i915) 405 __acquires(&i915->drm.vma_offset_manager->vm_lock) 406 { 407 write_lock(&i915->drm.vma_offset_manager->vm_lock); 408 } 409 410 static void mmap_offset_unlock(struct drm_i915_private *i915) 411 __releases(&i915->drm.vma_offset_manager->vm_lock) 412 { 413 write_unlock(&i915->drm.vma_offset_manager->vm_lock); 414 } 415 416 static int igt_mmap_offset_exhaustion(void *arg) 417 { 418 struct drm_i915_private *i915 = arg; 419 struct drm_mm *mm = &i915->drm.vma_offset_manager->vm_addr_space_mm; 420 struct drm_i915_gem_object *obj; 421 struct drm_mm_node resv, *hole; 422 u64 hole_start, hole_end; 423 int loop, err; 424 425 /* Disable background reaper */ 426 disable_retire_worker(i915); 427 GEM_BUG_ON(!i915->gt.awake); 428 429 /* Trim the device mmap space to only a page */ 430 memset(&resv, 0, sizeof(resv)); 431 drm_mm_for_each_hole(hole, mm, hole_start, hole_end) { 432 resv.start = hole_start; 433 resv.size = hole_end - hole_start - 1; /* PAGE_SIZE units */ 434 mmap_offset_lock(i915); 435 err = drm_mm_reserve_node(mm, &resv); 436 mmap_offset_unlock(i915); 437 if (err) { 438 pr_err("Failed to trim VMA manager, err=%d\n", err); 439 goto out_park; 440 } 441 break; 442 } 443 444 /* Just fits! */ 445 if (!assert_mmap_offset(i915, PAGE_SIZE, 0)) { 446 pr_err("Unable to insert object into single page hole\n"); 447 err = -EINVAL; 448 goto out; 449 } 450 451 /* Too large */ 452 if (!assert_mmap_offset(i915, 2 * PAGE_SIZE, -ENOSPC)) { 453 pr_err("Unexpectedly succeeded in inserting too large object into single page hole\n"); 454 err = -EINVAL; 455 goto out; 456 } 457 458 /* Fill the hole, further allocation attempts should then fail */ 459 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 460 if (IS_ERR(obj)) { 461 err = PTR_ERR(obj); 462 goto out; 463 } 464 465 err = create_mmap_offset(obj); 466 if (err) { 467 pr_err("Unable to insert object into reclaimed hole\n"); 468 goto err_obj; 469 } 470 471 if (!assert_mmap_offset(i915, PAGE_SIZE, -ENOSPC)) { 472 pr_err("Unexpectedly succeeded in inserting object into no holes!\n"); 473 err = -EINVAL; 474 goto err_obj; 475 } 476 477 i915_gem_object_put(obj); 478 479 /* Now fill with busy dead objects that we expect to reap */ 480 for (loop = 0; loop < 3; loop++) { 481 if (intel_gt_is_wedged(&i915->gt)) 482 break; 483 484 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 485 if (IS_ERR(obj)) { 486 err = PTR_ERR(obj); 487 goto out; 488 } 489 490 mutex_lock(&i915->drm.struct_mutex); 491 err = make_obj_busy(obj); 492 mutex_unlock(&i915->drm.struct_mutex); 493 if (err) { 494 pr_err("[loop %d] Failed to busy the object\n", loop); 495 goto err_obj; 496 } 497 } 498 499 out: 500 mmap_offset_lock(i915); 501 drm_mm_remove_node(&resv); 502 mmap_offset_unlock(i915); 503 out_park: 504 restore_retire_worker(i915); 505 return err; 506 err_obj: 507 i915_gem_object_put(obj); 508 goto out; 509 } 510 511 int i915_gem_mman_live_selftests(struct drm_i915_private *i915) 512 { 513 static const struct i915_subtest tests[] = { 514 SUBTEST(igt_partial_tiling), 515 SUBTEST(igt_mmap_offset_exhaustion), 516 }; 517 518 return i915_subtests(tests, i915); 519 } 520