1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gt/intel_engine_pm.h" 10 #include "gt/intel_gt.h" 11 #include "gt/intel_gt_pm.h" 12 #include "gt/intel_ring.h" 13 14 #include "i915_selftest.h" 15 #include "selftests/i915_random.h" 16 17 struct context { 18 struct drm_i915_gem_object *obj; 19 struct intel_engine_cs *engine; 20 }; 21 22 static int cpu_set(struct context *ctx, unsigned long offset, u32 v) 23 { 24 unsigned int needs_clflush; 25 struct page *page; 26 void *map; 27 u32 *cpu; 28 int err; 29 30 err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); 31 if (err) 32 return err; 33 34 page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); 35 map = kmap_atomic(page); 36 cpu = map + offset_in_page(offset); 37 38 if (needs_clflush & CLFLUSH_BEFORE) 39 drm_clflush_virt_range(cpu, sizeof(*cpu)); 40 41 *cpu = v; 42 43 if (needs_clflush & CLFLUSH_AFTER) 44 drm_clflush_virt_range(cpu, sizeof(*cpu)); 45 46 kunmap_atomic(map); 47 i915_gem_object_finish_access(ctx->obj); 48 49 return 0; 50 } 51 52 static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) 53 { 54 unsigned int needs_clflush; 55 struct page *page; 56 void *map; 57 u32 *cpu; 58 int err; 59 60 err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); 61 if (err) 62 return err; 63 64 page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); 65 map = kmap_atomic(page); 66 cpu = map + offset_in_page(offset); 67 68 if (needs_clflush & CLFLUSH_BEFORE) 69 drm_clflush_virt_range(cpu, sizeof(*cpu)); 70 71 *v = *cpu; 72 73 kunmap_atomic(map); 74 i915_gem_object_finish_access(ctx->obj); 75 76 return 0; 77 } 78 79 static int gtt_set(struct context *ctx, unsigned long offset, u32 v) 80 { 81 struct i915_vma *vma; 82 u32 __iomem *map; 83 int err = 0; 84 85 i915_gem_object_lock(ctx->obj); 86 err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); 87 i915_gem_object_unlock(ctx->obj); 88 if (err) 89 return err; 90 91 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); 92 if (IS_ERR(vma)) 93 return PTR_ERR(vma); 94 95 intel_gt_pm_get(vma->vm->gt); 96 97 map = i915_vma_pin_iomap(vma); 98 i915_vma_unpin(vma); 99 if (IS_ERR(map)) { 100 err = PTR_ERR(map); 101 goto out_rpm; 102 } 103 104 iowrite32(v, &map[offset / sizeof(*map)]); 105 i915_vma_unpin_iomap(vma); 106 107 out_rpm: 108 intel_gt_pm_put(vma->vm->gt); 109 return err; 110 } 111 112 static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) 113 { 114 struct i915_vma *vma; 115 u32 __iomem *map; 116 int err = 0; 117 118 i915_gem_object_lock(ctx->obj); 119 err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); 120 i915_gem_object_unlock(ctx->obj); 121 if (err) 122 return err; 123 124 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); 125 if (IS_ERR(vma)) 126 return PTR_ERR(vma); 127 128 intel_gt_pm_get(vma->vm->gt); 129 130 map = i915_vma_pin_iomap(vma); 131 i915_vma_unpin(vma); 132 if (IS_ERR(map)) { 133 err = PTR_ERR(map); 134 goto out_rpm; 135 } 136 137 *v = ioread32(&map[offset / sizeof(*map)]); 138 i915_vma_unpin_iomap(vma); 139 140 out_rpm: 141 intel_gt_pm_put(vma->vm->gt); 142 return err; 143 } 144 145 static int wc_set(struct context *ctx, unsigned long offset, u32 v) 146 { 147 u32 *map; 148 int err; 149 150 i915_gem_object_lock(ctx->obj); 151 err = i915_gem_object_set_to_wc_domain(ctx->obj, true); 152 i915_gem_object_unlock(ctx->obj); 153 if (err) 154 return err; 155 156 map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); 157 if (IS_ERR(map)) 158 return PTR_ERR(map); 159 160 map[offset / sizeof(*map)] = v; 161 162 __i915_gem_object_flush_map(ctx->obj, offset, sizeof(*map)); 163 i915_gem_object_unpin_map(ctx->obj); 164 165 return 0; 166 } 167 168 static int wc_get(struct context *ctx, unsigned long offset, u32 *v) 169 { 170 u32 *map; 171 int err; 172 173 i915_gem_object_lock(ctx->obj); 174 err = i915_gem_object_set_to_wc_domain(ctx->obj, false); 175 i915_gem_object_unlock(ctx->obj); 176 if (err) 177 return err; 178 179 map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); 180 if (IS_ERR(map)) 181 return PTR_ERR(map); 182 183 *v = map[offset / sizeof(*map)]; 184 i915_gem_object_unpin_map(ctx->obj); 185 186 return 0; 187 } 188 189 static int gpu_set(struct context *ctx, unsigned long offset, u32 v) 190 { 191 struct i915_request *rq; 192 struct i915_vma *vma; 193 u32 *cs; 194 int err; 195 196 i915_gem_object_lock(ctx->obj); 197 err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); 198 i915_gem_object_unlock(ctx->obj); 199 if (err) 200 return err; 201 202 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); 203 if (IS_ERR(vma)) 204 return PTR_ERR(vma); 205 206 rq = intel_engine_create_kernel_request(ctx->engine); 207 if (IS_ERR(rq)) { 208 i915_vma_unpin(vma); 209 return PTR_ERR(rq); 210 } 211 212 cs = intel_ring_begin(rq, 4); 213 if (IS_ERR(cs)) { 214 i915_request_add(rq); 215 i915_vma_unpin(vma); 216 return PTR_ERR(cs); 217 } 218 219 if (INTEL_GEN(ctx->engine->i915) >= 8) { 220 *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; 221 *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); 222 *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); 223 *cs++ = v; 224 } else if (INTEL_GEN(ctx->engine->i915) >= 4) { 225 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 226 *cs++ = 0; 227 *cs++ = i915_ggtt_offset(vma) + offset; 228 *cs++ = v; 229 } else { 230 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 231 *cs++ = i915_ggtt_offset(vma) + offset; 232 *cs++ = v; 233 *cs++ = MI_NOOP; 234 } 235 intel_ring_advance(rq, cs); 236 237 i915_vma_lock(vma); 238 err = i915_request_await_object(rq, vma->obj, true); 239 if (err == 0) 240 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 241 i915_vma_unlock(vma); 242 i915_vma_unpin(vma); 243 244 i915_request_add(rq); 245 246 return err; 247 } 248 249 static bool always_valid(struct context *ctx) 250 { 251 return true; 252 } 253 254 static bool needs_fence_registers(struct context *ctx) 255 { 256 struct intel_gt *gt = ctx->engine->gt; 257 258 if (intel_gt_is_wedged(gt)) 259 return false; 260 261 return gt->ggtt->num_fences; 262 } 263 264 static bool needs_mi_store_dword(struct context *ctx) 265 { 266 if (intel_gt_is_wedged(ctx->engine->gt)) 267 return false; 268 269 return intel_engine_can_store_dword(ctx->engine); 270 } 271 272 static const struct igt_coherency_mode { 273 const char *name; 274 int (*set)(struct context *ctx, unsigned long offset, u32 v); 275 int (*get)(struct context *ctx, unsigned long offset, u32 *v); 276 bool (*valid)(struct context *ctx); 277 } igt_coherency_mode[] = { 278 { "cpu", cpu_set, cpu_get, always_valid }, 279 { "gtt", gtt_set, gtt_get, needs_fence_registers }, 280 { "wc", wc_set, wc_get, always_valid }, 281 { "gpu", gpu_set, NULL, needs_mi_store_dword }, 282 { }, 283 }; 284 285 static struct intel_engine_cs * 286 random_engine(struct drm_i915_private *i915, struct rnd_state *prng) 287 { 288 struct intel_engine_cs *engine; 289 unsigned int count; 290 291 count = 0; 292 for_each_uabi_engine(engine, i915) 293 count++; 294 295 count = i915_prandom_u32_max_state(count, prng); 296 for_each_uabi_engine(engine, i915) 297 if (count-- == 0) 298 return engine; 299 300 return NULL; 301 } 302 303 static int igt_gem_coherency(void *arg) 304 { 305 const unsigned int ncachelines = PAGE_SIZE/64; 306 struct drm_i915_private *i915 = arg; 307 const struct igt_coherency_mode *read, *write, *over; 308 unsigned long count, n; 309 u32 *offsets, *values; 310 I915_RND_STATE(prng); 311 struct context ctx; 312 int err = 0; 313 314 /* 315 * We repeatedly write, overwrite and read from a sequence of 316 * cachelines in order to try and detect incoherency (unflushed writes 317 * from either the CPU or GPU). Each setter/getter uses our cache 318 * domain API which should prevent incoherency. 319 */ 320 321 offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); 322 if (!offsets) 323 return -ENOMEM; 324 for (count = 0; count < ncachelines; count++) 325 offsets[count] = count * 64 + 4 * (count % 16); 326 327 values = offsets + ncachelines; 328 329 ctx.engine = random_engine(i915, &prng); 330 if (!ctx.engine) { 331 err = -ENODEV; 332 goto out_free; 333 } 334 pr_info("%s: using %s\n", __func__, ctx.engine->name); 335 intel_engine_pm_get(ctx.engine); 336 337 for (over = igt_coherency_mode; over->name; over++) { 338 if (!over->set) 339 continue; 340 341 if (!over->valid(&ctx)) 342 continue; 343 344 for (write = igt_coherency_mode; write->name; write++) { 345 if (!write->set) 346 continue; 347 348 if (!write->valid(&ctx)) 349 continue; 350 351 for (read = igt_coherency_mode; read->name; read++) { 352 if (!read->get) 353 continue; 354 355 if (!read->valid(&ctx)) 356 continue; 357 358 for_each_prime_number_from(count, 1, ncachelines) { 359 ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 360 if (IS_ERR(ctx.obj)) { 361 err = PTR_ERR(ctx.obj); 362 goto out_pm; 363 } 364 365 i915_random_reorder(offsets, ncachelines, &prng); 366 for (n = 0; n < count; n++) 367 values[n] = prandom_u32_state(&prng); 368 369 for (n = 0; n < count; n++) { 370 err = over->set(&ctx, offsets[n], ~values[n]); 371 if (err) { 372 pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", 373 n, count, over->name, err); 374 goto put_object; 375 } 376 } 377 378 for (n = 0; n < count; n++) { 379 err = write->set(&ctx, offsets[n], values[n]); 380 if (err) { 381 pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", 382 n, count, write->name, err); 383 goto put_object; 384 } 385 } 386 387 for (n = 0; n < count; n++) { 388 u32 found; 389 390 err = read->get(&ctx, offsets[n], &found); 391 if (err) { 392 pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", 393 n, count, read->name, err); 394 goto put_object; 395 } 396 397 if (found != values[n]) { 398 pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", 399 n, count, over->name, 400 write->name, values[n], 401 read->name, found, 402 ~values[n], offsets[n]); 403 err = -EINVAL; 404 goto put_object; 405 } 406 } 407 408 i915_gem_object_put(ctx.obj); 409 } 410 } 411 } 412 } 413 out_pm: 414 intel_engine_pm_put(ctx.engine); 415 out_free: 416 kfree(offsets); 417 return err; 418 419 put_object: 420 i915_gem_object_put(ctx.obj); 421 goto out_pm; 422 } 423 424 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) 425 { 426 static const struct i915_subtest tests[] = { 427 SUBTEST(igt_gem_coherency), 428 }; 429 430 return i915_subtests(tests, i915); 431 } 432