1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2022 Intel Corporation 4 */ 5 6 #include "i915_selftest.h" 7 8 #include "gem/i915_gem_internal.h" 9 #include "gem/i915_gem_region.h" 10 11 #include "gen8_engine_cs.h" 12 #include "i915_gem_ww.h" 13 #include "intel_engine_regs.h" 14 #include "intel_gpu_commands.h" 15 #include "intel_context.h" 16 #include "intel_gt.h" 17 #include "intel_ring.h" 18 19 #include "selftests/igt_flush_test.h" 20 #include "selftests/i915_random.h" 21 22 static void vma_set_qw(struct i915_vma *vma, u64 addr, u64 val) 23 { 24 GEM_BUG_ON(addr < i915_vma_offset(vma)); 25 GEM_BUG_ON(addr >= i915_vma_offset(vma) + i915_vma_size(vma) + sizeof(val)); 26 memset64(page_mask_bits(vma->obj->mm.mapping) + 27 (addr - i915_vma_offset(vma)), val, 1); 28 } 29 30 static int 31 pte_tlbinv(struct intel_context *ce, 32 struct i915_vma *va, 33 struct i915_vma *vb, 34 u64 align, 35 void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length), 36 u64 length, 37 struct rnd_state *prng) 38 { 39 struct drm_i915_gem_object *batch; 40 struct drm_mm_node vb_node; 41 struct i915_request *rq; 42 struct i915_vma *vma; 43 u64 addr; 44 int err; 45 u32 *cs; 46 47 batch = i915_gem_object_create_internal(ce->vm->i915, 4096); 48 if (IS_ERR(batch)) 49 return PTR_ERR(batch); 50 51 vma = i915_vma_instance(batch, ce->vm, NULL); 52 if (IS_ERR(vma)) { 53 err = PTR_ERR(vma); 54 goto out; 55 } 56 57 err = i915_vma_pin(vma, 0, 0, PIN_USER); 58 if (err) 59 goto out; 60 61 /* Pin va at random but aligned offset after vma */ 62 addr = round_up(vma->node.start + vma->node.size, align); 63 /* MI_CONDITIONAL_BATCH_BUFFER_END limits address to 48b */ 64 addr = igt_random_offset(prng, addr, min(ce->vm->total, BIT_ULL(48)), 65 va->size, align); 66 err = i915_vma_pin(va, 0, 0, addr | PIN_OFFSET_FIXED | PIN_USER); 67 if (err) { 68 pr_err("Cannot pin at %llx+%llx\n", addr, va->size); 69 goto out; 70 } 71 GEM_BUG_ON(i915_vma_offset(va) != addr); 72 if (vb != va) { 73 vb_node = vb->node; 74 vb->node = va->node; /* overwrites the _same_ PTE */ 75 } 76 77 /* 78 * Now choose random dword at the 1st pinned page. 79 * 80 * SZ_64K pages on dg1 require that the whole PT be marked 81 * containing 64KiB entries. So we make sure that vma 82 * covers the whole PT, despite being randomly aligned to 64KiB 83 * and restrict our sampling to the 2MiB PT within where 84 * we know that we will be using 64KiB pages. 85 */ 86 if (align == SZ_64K) 87 addr = round_up(addr, SZ_2M); 88 addr = igt_random_offset(prng, addr, addr + align, 8, 8); 89 90 if (va != vb) 91 pr_info("%s(%s): Sampling %llx, with alignment %llx, using PTE size %x (phys %x, sg %x), invalidate:%llx+%llx\n", 92 ce->engine->name, va->obj->mm.region->name ?: "smem", 93 addr, align, va->resource->page_sizes_gtt, 94 va->page_sizes.phys, va->page_sizes.sg, 95 addr & -length, length); 96 97 cs = i915_gem_object_pin_map_unlocked(batch, I915_MAP_WC); 98 *cs++ = MI_NOOP; /* for later termination */ 99 /* 100 * Sample the target to see if we spot the updated backing store. 101 * Gen8 VCS compares immediate value with bitwise-and of two 102 * consecutive DWORDS pointed by addr, other gen/engines compare value 103 * with DWORD pointed by addr. Moreover we want to exercise DWORD size 104 * invalidations. To fulfill all these requirements below values 105 * have been chosen. 106 */ 107 *cs++ = MI_CONDITIONAL_BATCH_BUFFER_END | MI_DO_COMPARE | 2; 108 *cs++ = 0; /* break if *addr == 0 */ 109 *cs++ = lower_32_bits(addr); 110 *cs++ = upper_32_bits(addr); 111 vma_set_qw(va, addr, -1); 112 vma_set_qw(vb, addr, 0); 113 114 /* Keep sampling until we get bored */ 115 *cs++ = MI_BATCH_BUFFER_START | BIT(8) | 1; 116 *cs++ = lower_32_bits(i915_vma_offset(vma)); 117 *cs++ = upper_32_bits(i915_vma_offset(vma)); 118 119 i915_gem_object_flush_map(batch); 120 121 rq = i915_request_create(ce); 122 if (IS_ERR(rq)) { 123 err = PTR_ERR(rq); 124 goto out_va; 125 } 126 127 err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), 0, 0); 128 if (err) { 129 i915_request_add(rq); 130 goto out_va; 131 } 132 133 i915_request_get(rq); 134 i915_request_add(rq); 135 136 /* Short sleep to sanitycheck the batch is spinning before we begin */ 137 msleep(10); 138 if (va == vb) { 139 if (!i915_request_completed(rq)) { 140 pr_err("%s(%s): Semaphore sanitycheck failed %llx, with alignment %llx, using PTE size %x (phys %x, sg %x)\n", 141 ce->engine->name, va->obj->mm.region->name ?: "smem", 142 addr, align, va->resource->page_sizes_gtt, 143 va->page_sizes.phys, va->page_sizes.sg); 144 err = -EIO; 145 } 146 } else if (!i915_request_completed(rq)) { 147 struct i915_vma_resource vb_res = { 148 .bi.pages = vb->obj->mm.pages, 149 .bi.page_sizes = vb->obj->mm.page_sizes, 150 .start = i915_vma_offset(vb), 151 .vma_size = i915_vma_size(vb) 152 }; 153 unsigned int pte_flags = 0; 154 155 /* Flip the PTE between A and B */ 156 if (i915_gem_object_is_lmem(vb->obj)) 157 pte_flags |= PTE_LM; 158 ce->vm->insert_entries(ce->vm, &vb_res, 0, pte_flags); 159 160 /* Flush the PTE update to concurrent HW */ 161 tlbinv(ce->vm, addr & -length, length); 162 163 if (wait_for(i915_request_completed(rq), HZ / 2)) { 164 pr_err("%s: Request did not complete; the COND_BBE did not read the updated PTE\n", 165 ce->engine->name); 166 err = -EINVAL; 167 } 168 } else { 169 pr_err("Spinner ended unexpectedly\n"); 170 err = -EIO; 171 } 172 i915_request_put(rq); 173 174 cs = page_mask_bits(batch->mm.mapping); 175 *cs = MI_BATCH_BUFFER_END; 176 wmb(); 177 178 out_va: 179 if (vb != va) 180 vb->node = vb_node; 181 i915_vma_unpin(va); 182 if (i915_vma_unbind_unlocked(va)) 183 err = -EIO; 184 out: 185 i915_gem_object_put(batch); 186 return err; 187 } 188 189 static struct drm_i915_gem_object *create_lmem(struct intel_gt *gt) 190 { 191 /* 192 * Allocation of largest possible page size allows to test all types 193 * of pages. 194 */ 195 return i915_gem_object_create_lmem(gt->i915, SZ_1G, I915_BO_ALLOC_CONTIGUOUS); 196 } 197 198 static struct drm_i915_gem_object *create_smem(struct intel_gt *gt) 199 { 200 /* 201 * SZ_64K pages require covering the whole 2M PT (gen8 to tgl/dg1). 202 * While that does not require the whole 2M block to be contiguous 203 * it is easier to make it so, since we need that for SZ_2M pagees. 204 * Since we randomly offset the start of the vma, we need a 4M object 205 * so that there is a 2M range within it is suitable for SZ_64K PTE. 206 */ 207 return i915_gem_object_create_internal(gt->i915, SZ_4M); 208 } 209 210 static int 211 mem_tlbinv(struct intel_gt *gt, 212 struct drm_i915_gem_object *(*create_fn)(struct intel_gt *), 213 void (*tlbinv)(struct i915_address_space *vm, u64 addr, u64 length)) 214 { 215 unsigned int ppgtt_size = RUNTIME_INFO(gt->i915)->ppgtt_size; 216 struct intel_engine_cs *engine; 217 struct drm_i915_gem_object *A, *B; 218 struct i915_ppgtt *ppgtt; 219 struct i915_vma *va, *vb; 220 enum intel_engine_id id; 221 I915_RND_STATE(prng); 222 void *vaddr; 223 int err; 224 225 /* 226 * Check that the TLB invalidate is able to revoke an active 227 * page. We load a page into a spinning COND_BBE loop and then 228 * remap that page to a new physical address. The old address, and 229 * so the loop keeps spinning, is retained in the TLB cache until 230 * we issue an invalidate. 231 */ 232 233 A = create_fn(gt); 234 if (IS_ERR(A)) 235 return PTR_ERR(A); 236 237 vaddr = i915_gem_object_pin_map_unlocked(A, I915_MAP_WC); 238 if (IS_ERR(vaddr)) { 239 err = PTR_ERR(vaddr); 240 goto out_a; 241 } 242 243 B = create_fn(gt); 244 if (IS_ERR(B)) { 245 err = PTR_ERR(B); 246 goto out_a; 247 } 248 249 vaddr = i915_gem_object_pin_map_unlocked(B, I915_MAP_WC); 250 if (IS_ERR(vaddr)) { 251 err = PTR_ERR(vaddr); 252 goto out_b; 253 } 254 255 GEM_BUG_ON(A->base.size != B->base.size); 256 if ((A->mm.page_sizes.phys | B->mm.page_sizes.phys) & (A->base.size - 1)) 257 pr_warn("Failed to allocate contiguous pages for size %zx\n", 258 A->base.size); 259 260 ppgtt = i915_ppgtt_create(gt, 0); 261 if (IS_ERR(ppgtt)) { 262 err = PTR_ERR(ppgtt); 263 goto out_b; 264 } 265 266 va = i915_vma_instance(A, &ppgtt->vm, NULL); 267 if (IS_ERR(va)) { 268 err = PTR_ERR(va); 269 goto out_vm; 270 } 271 272 vb = i915_vma_instance(B, &ppgtt->vm, NULL); 273 if (IS_ERR(vb)) { 274 err = PTR_ERR(vb); 275 goto out_vm; 276 } 277 278 err = 0; 279 for_each_engine(engine, gt, id) { 280 struct i915_gem_ww_ctx ww; 281 struct intel_context *ce; 282 int bit; 283 284 ce = intel_context_create(engine); 285 if (IS_ERR(ce)) { 286 err = PTR_ERR(ce); 287 break; 288 } 289 290 i915_vm_put(ce->vm); 291 ce->vm = i915_vm_get(&ppgtt->vm); 292 293 for_i915_gem_ww(&ww, err, true) 294 err = intel_context_pin_ww(ce, &ww); 295 if (err) 296 goto err_put; 297 298 for_each_set_bit(bit, 299 (unsigned long *)&RUNTIME_INFO(gt->i915)->page_sizes, 300 BITS_PER_TYPE(RUNTIME_INFO(gt->i915)->page_sizes)) { 301 unsigned int len; 302 303 if (BIT_ULL(bit) < i915_vm_obj_min_alignment(va->vm, va->obj)) 304 continue; 305 306 /* sanitycheck the semaphore wake up */ 307 err = pte_tlbinv(ce, va, va, 308 BIT_ULL(bit), 309 NULL, SZ_4K, 310 &prng); 311 if (err) 312 goto err_unpin; 313 314 for (len = 2; len <= ppgtt_size; len = min(2 * len, ppgtt_size)) { 315 err = pte_tlbinv(ce, va, vb, 316 BIT_ULL(bit), 317 tlbinv, 318 BIT_ULL(len), 319 &prng); 320 if (err) 321 goto err_unpin; 322 if (len == ppgtt_size) 323 break; 324 } 325 } 326 err_unpin: 327 intel_context_unpin(ce); 328 err_put: 329 intel_context_put(ce); 330 if (err) 331 break; 332 } 333 334 if (igt_flush_test(gt->i915)) 335 err = -EIO; 336 337 out_vm: 338 i915_vm_put(&ppgtt->vm); 339 out_b: 340 i915_gem_object_put(B); 341 out_a: 342 i915_gem_object_put(A); 343 return err; 344 } 345 346 static void tlbinv_full(struct i915_address_space *vm, u64 addr, u64 length) 347 { 348 intel_gt_invalidate_tlb(vm->gt, intel_gt_tlb_seqno(vm->gt) | 1); 349 } 350 351 static int invalidate_full(void *arg) 352 { 353 struct intel_gt *gt = arg; 354 int err; 355 356 if (GRAPHICS_VER(gt->i915) < 8) 357 return 0; /* TLB invalidate not implemented */ 358 359 err = mem_tlbinv(gt, create_smem, tlbinv_full); 360 if (err == 0) 361 err = mem_tlbinv(gt, create_lmem, tlbinv_full); 362 if (err == -ENODEV || err == -ENXIO) 363 err = 0; 364 365 return err; 366 } 367 368 int intel_tlb_live_selftests(struct drm_i915_private *i915) 369 { 370 static const struct i915_subtest tests[] = { 371 SUBTEST(invalidate_full), 372 }; 373 struct intel_gt *gt; 374 unsigned int i; 375 376 for_each_gt(gt, i915, i) { 377 int err; 378 379 if (intel_gt_is_wedged(gt)) 380 continue; 381 382 err = intel_gt_live_subtests(tests, gt); 383 if (err) 384 return err; 385 } 386 387 return 0; 388 } 389