1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "i915_selftest.h" 7 8 #include "gem/i915_gem_internal.h" 9 #include "gem/i915_gem_region.h" 10 11 #include "gen8_engine_cs.h" 12 #include "i915_gem_ww.h" 13 #include "intel_engine_regs.h" 14 #include "intel_gpu_commands.h" 15 #include "intel_context.h" 16 #include "intel_gt.h" 17 #include "intel_ring.h" 18 19 #include "selftests/igt_flush_test.h" 20 #include "selftests/i915_random.h" 21 22 static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val) 23 { 24 GEM_BUG_ON(addr < i915_vma_offset(vma)); 25 GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val)); 26 memset64(page_mask_bits(vma->obj->mm.mapping) + 27 (addr - i915_vma_offset(vma)), val, 1); 28 } 29 30 static int 31 pte_tlbinv(struct intel_context *ce, 32 struct i915_vma *va, 33 struct i915_vma *vb, 34 u64 align, 35 void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length), 36 u64 length, 37 struct rnd_state *prng) 38 { 39 const unsigned int pat_index = 40 i915_gem_get_pat_index(ce->vm->i915, I915_CACHE_NONE); 41 struct drm_i915_gem_object *batch; 42 struct drm_mm_node vb_node; 43 struct i915_request *rq; 44 struct i915_vma *vma; 45 u64 addr; 46 int err; 47 u32 *cs; 48 49 batch = i915_gem_object_create_internal(ce->vm->i915, 4096); 50 if (IS_ERR(batch)) 51 return PTR_ERR(batch); 52 53 vma = i915_vma_instance(batch, ce->vm, NULL); 54 if (IS_ERR(vma)) { 55 err = PTR_ERR(vma); 56 goto out; 57 } 58 59 err = i915_vma_pin(vma, 0, 0, PIN_USER); 60 if (err) 61 goto out; 62 63 /* Pin va at random but aligned offset after vma */ 64 addr = round_up(vma->node.start + vma->node.size, align); 65 /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */ 66 addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)), 67 va->size, align); 68 err = i915_vma_pin(va, 0, 0, addr | PIN_OFFSET_FIXED | PIN_USER); 69 if (err) { 70 pr_err("Cannot pin at %llx+%llx\n", addr, va->size); 71 goto out; 72 } 73 GEM_BUG_ON(i915_vma_offset(va) != addr); 74 if (vb != va) { 75 vb_node = vb->node; 76 vb->node = va->node; /* overwrites the _same_ PTE */ 77 } 78 79 /* 80 * Now choose random dword at the 1st pinned page. 81 * 82 * SZ_64K pages on dg1 require that the whole PT be marked 83 * containing 64KiB entries. So we make sure that vma 84 * covers the whole PT, despite being randomly aligned to 64KiB 85 * and restrict our sampling to the 2MiB PT within where 86 * we know that we will be using 64KiB pages. 87 */ 88 if (align == SZ_64K) 89 addr = round_up(addr, SZ_2M); 90 addr = igt_random_offset(prng, addr, addr + align, 8, 8); 91 92 if (va != vb) 93 pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n", 94 ce->engine->name, va->obj->mm.region->name ?: "smem", 95 addr, align, va->resource->page_sizes_gtt, 96 va->page_sizes.phys, va->page_sizes.sg, 97 addr & -length, length); 98 99 cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC); 100 *cs++ = MI_NOOP; /* for later termination */ 101 /* 102 * Sample the target to see if we spot the updated backing store. 103 * Gen8 VCS compares immediate value with bitwise-and of two 104 * consecutive DWORDS pointed by addr, other gen/engines compare value 105 * with DWORD pointed by addr. Moreover we want to exercise DWORD size 106 * invalidations. To fulfill all these requirements below values 107 * have been chosen. 108 */ 109 *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2; 110 *cs++ = 0; /* break if *addr == 0 */ 111 *cs++ = lower_32_bits(addr); 112 *cs++ = upper_32_bits(addr); 113 vma_set_qw(va, addr, -1); 114 vma_set_qw(vb, addr, 0); 115 116 /* Keep sampling until we get bored */ 117 *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1; 118 *cs++ = lower_32_bits(i915_vma_offset(vma)); 119 *cs++ = upper_32_bits(i915_vma_offset(vma)); 120 121 i915_gem_object_flush_map(batch); 122 123 rq = i915_request_create(ce); 124 if (IS_ERR(rq)) { 125 err = PTR_ERR(rq); 126 goto out_va; 127 } 128 129 err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0); 130 if (err) { 131 i915_request_add(rq); 132 goto out_va; 133 } 134 135 i915_request_get(rq); 136 i915_request_add(rq); 137 138 /* Short sleep to sanitycheck the batch is spinning before we begin */ 139 msleep(10); 140 if (va == vb) { 141 if (!i915_request_completed(rq)) { 142 pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n", 143 ce->engine->name, va->obj->mm.region->name ?: "smem", 144 addr, align, va->resource->page_sizes_gtt, 145 va->page_sizes.phys, va->page_sizes.sg); 146 err = -EIO; 147 } 148 } else if (!i915_request_completed(rq)) { 149 struct i915_vma_resource vb_res = { 150 .bi.pages = vb->obj->mm.pages, 151 .bi.page_sizes = vb->obj->mm.page_sizes, 152 .start = i915_vma_offset(vb), 153 .vma_size = i915_vma_size(vb) 154 }; 155 unsigned int pte_flags = 0; 156 157 /* Flip the PTE between A and B */ 158 if (i915_gem_object_is_lmem(vb->obj)) 159 pte_flags |= PTE_LM; 160 ce->vm->insert_entries(ce->vm, &vb_res, pat_index, pte_flags); 161 162 /* Flush the PTE update to concurrent HW */ 163 tlbinv(ce->vm, addr & -length, length); 164 165 if (wait_for(i915_request_completed(rq), HZ / 2)) { 166 pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n", 167 ce->engine->name); 168 err = -EINVAL; 169 } 170 } else { 171 pr_err("Spinner ended unexpectedly\n"); 172 err = -EIO; 173 } 174 i915_request_put(rq); 175 176 cs = page_mask_bits(batch->mm.mapping); 177 *cs = MI_BATCH_BUFFER_END; 178 wmb(); 179 180 out_va: 181 if (vb != va) 182 vb->node = vb_node; 183 i915_vma_unpin(va); 184 if (i915_vma_unbind_unlocked(va)) 185 err = -EIO; 186 out: 187 i915_gem_object_put(batch); 188 return err; 189 } 190 191 static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt) 192 { 193 struct intel_memory_region *mr = gt->i915->mm.regions[INTEL_REGION_LMEM_0]; 194 resource_size_t size = SZ_1G; 195 196 /* 197 * Allocation of largest possible page size allows to test all types 198 * of pages. To succeed with both allocations, especially in case of Small 199 * BAR, try to allocate no more than quarter of mappable memory. 200 */ 201 if (mr && size > mr->io_size / 4) 202 size = mr->io_size / 4; 203 204 return i915_gem_object_create_lmem(gt->i915, size, I915_BO_ALLOC_CONTIGUOUS); 205 } 206 207 static struct drm_i915_gem_object *create_smem(struct intel_gt *gt) 208 { 209 /* 210 * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1). 211 * While that does not require the whole 2M block to be contiguous 212 * it is easier to make it so, since we need that for SZ_2M pagees. 213 * Since we randomly offset the start of the vma, we need a 4M object 214 * so that there is a 2M range within it is suitable for SZ_64K PTE. 215 */ 216 return i915_gem_object_create_internal(gt->i915, SZ_4M); 217 } 218 219 static int 220 mem_tlbinv(struct intel_gt *gt, 221 struct drm_i915_gem_object *(*create_fn)(struct intel_gt *), 222 void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length)) 223 { 224 unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size; 225 struct intel_engine_cs *engine; 226 struct drm_i915_gem_object *A, *B; 227 struct i915_ppgtt *ppgtt; 228 struct i915_vma *va, *vb; 229 enum intel_engine_id id; 230 I915_RND_STATE(prng); 231 void *vaddr; 232 int err; 233 234 /* 235 * Check that the TLB invalidate is able to revoke an active 236 * page. We load a page into a spinning COND_BBE loop and then 237 * remap that page to a new physical address. The old address, and 238 * so the loop keeps spinning, is retained in the TLB cache until 239 * we issue an invalidate. 240 */ 241 242 A = create_fn(gt); 243 if (IS_ERR(A)) 244 return PTR_ERR(A); 245 246 vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC); 247 if (IS_ERR(vaddr)) { 248 err = PTR_ERR(vaddr); 249 goto out_a; 250 } 251 252 B = create_fn(gt); 253 if (IS_ERR(B)) { 254 err = PTR_ERR(B); 255 goto out_a; 256 } 257 258 vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC); 259 if (IS_ERR(vaddr)) { 260 err = PTR_ERR(vaddr); 261 goto out_b; 262 } 263 264 GEM_BUG_ON(A->base.size != B->base.size); 265 if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1)) 266 pr_warn("Failed to allocate contiguous pages for size %zx\n", 267 A->base.size); 268 269 ppgtt = i915_ppgtt_create(gt, 0); 270 if (IS_ERR(ppgtt)) { 271 err = PTR_ERR(ppgtt); 272 goto out_b; 273 } 274 275 va = i915_vma_instance(A, &ppgtt->vm, NULL); 276 if (IS_ERR(va)) { 277 err = PTR_ERR(va); 278 goto out_vm; 279 } 280 281 vb = i915_vma_instance(B, &ppgtt->vm, NULL); 282 if (IS_ERR(vb)) { 283 err = PTR_ERR(vb); 284 goto out_vm; 285 } 286 287 err = 0; 288 for_each_engine(engine, gt, id) { 289 struct i915_gem_ww_ctx ww; 290 struct intel_context *ce; 291 int bit; 292 293 ce = intel_context_create(engine); 294 if (IS_ERR(ce)) { 295 err = PTR_ERR(ce); 296 break; 297 } 298 299 i915_vm_put(ce->vm); 300 ce->vm = i915_vm_get(&ppgtt->vm); 301 302 for_i915_gem_ww(&ww, err, true) 303 err = intel_context_pin_ww(ce, &ww); 304 if (err) 305 goto err_put; 306 307 for_each_set_bit(bit, 308 (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes, 309 BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) { 310 unsigned int len; 311 312 if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj)) 313 continue; 314 315 /* sanitycheck the semaphore wake up */ 316 err = pte_tlbinv(ce, va, va, 317 BIT_ULL(bit), 318 NULL, SZ_4K, 319 &prng); 320 if (err) 321 goto err_unpin; 322 323 for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) { 324 err = pte_tlbinv(ce, va, vb, 325 BIT_ULL(bit), 326 tlbinv, 327 BIT_ULL(len), 328 &prng); 329 if (err) 330 goto err_unpin; 331 if (len == ppgtt_size) 332 break; 333 } 334 } 335 err_unpin: 336 intel_context_unpin(ce); 337 err_put: 338 intel_context_put(ce); 339 if (err) 340 break; 341 } 342 343 if (igt_flush_test(gt->i915)) 344 err = -EIO; 345 346 out_vm: 347 i915_vm_put(&ppgtt->vm); 348 out_b: 349 i915_gem_object_put(B); 350 out_a: 351 i915_gem_object_put(A); 352 return err; 353 } 354 355 static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length) 356 { 357 intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1); 358 } 359 360 static int invalidate_full(void *arg) 361 { 362 struct intel_gt *gt = arg; 363 int err; 364 365 if (GRAPHICS_VER(gt->i915) < 8) 366 return 0; /* TLB invalidate not implemented */ 367 368 err = mem_tlbinv(gt, create_smem, tlbinv_full); 369 if (err == 0) 370 err = mem_tlbinv(gt, create_lmem, tlbinv_full); 371 if (err == -ENODEV || err == -ENXIO) 372 err = 0; 373 374 return err; 375 } 376 377 int intel_tlb_live_selftests(struct drm_i915_private *i915) 378 { 379 static const struct i915_subtest tests[] = { 380 SUBTEST(invalidate_full), 381 }; 382 struct intel_gt *gt; 383 unsigned int i; 384 385 for_each_gt(gt, i915, i) { 386 int err; 387 388 if (intel_gt_is_wedged(gt)) 389 continue; 390 391 err = intel_gt_live_subtests(tests, gt); 392 if (err) 393 return err; 394 } 395 396 return 0; 397 } 398