1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gt/intel_gt.h" 10 #include "gt/intel_gt_pm.h" 11 #include "gt/intel_ring.h" 12 13 #include "i915_selftest.h" 14 #include "selftests/i915_random.h" 15 16 struct context { 17 struct drm_i915_gem_object *obj; 18 struct intel_engine_cs *engine; 19 }; 20 21 static int cpu_set(struct context *ctx, unsigned long offset, u32 v) 22 { 23 unsigned int needs_clflush; 24 struct page *page; 25 void *map; 26 u32 *cpu; 27 int err; 28 29 err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); 30 if (err) 31 return err; 32 33 page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); 34 map = kmap_atomic(page); 35 cpu = map + offset_in_page(offset); 36 37 if (needs_clflush & CLFLUSH_BEFORE) 38 drm_clflush_virt_range(cpu, sizeof(*cpu)); 39 40 *cpu = v; 41 42 if (needs_clflush & CLFLUSH_AFTER) 43 drm_clflush_virt_range(cpu, sizeof(*cpu)); 44 45 kunmap_atomic(map); 46 i915_gem_object_finish_access(ctx->obj); 47 48 return 0; 49 } 50 51 static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) 52 { 53 unsigned int needs_clflush; 54 struct page *page; 55 void *map; 56 u32 *cpu; 57 int err; 58 59 err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); 60 if (err) 61 return err; 62 63 page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); 64 map = kmap_atomic(page); 65 cpu = map + offset_in_page(offset); 66 67 if (needs_clflush & CLFLUSH_BEFORE) 68 drm_clflush_virt_range(cpu, sizeof(*cpu)); 69 70 *v = *cpu; 71 72 kunmap_atomic(map); 73 i915_gem_object_finish_access(ctx->obj); 74 75 return 0; 76 } 77 78 static int gtt_set(struct context *ctx, unsigned long offset, u32 v) 79 { 80 struct i915_vma *vma; 81 u32 __iomem *map; 82 int err = 0; 83 84 i915_gem_object_lock(ctx->obj); 85 err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); 86 i915_gem_object_unlock(ctx->obj); 87 if (err) 88 return err; 89 90 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); 91 if (IS_ERR(vma)) 92 return PTR_ERR(vma); 93 94 intel_gt_pm_get(vma->vm->gt); 95 96 map = i915_vma_pin_iomap(vma); 97 i915_vma_unpin(vma); 98 if (IS_ERR(map)) { 99 err = PTR_ERR(map); 100 goto out_rpm; 101 } 102 103 iowrite32(v, &map[offset / sizeof(*map)]); 104 i915_vma_unpin_iomap(vma); 105 106 out_rpm: 107 intel_gt_pm_put(vma->vm->gt); 108 return err; 109 } 110 111 static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) 112 { 113 struct i915_vma *vma; 114 u32 __iomem *map; 115 int err = 0; 116 117 i915_gem_object_lock(ctx->obj); 118 err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); 119 i915_gem_object_unlock(ctx->obj); 120 if (err) 121 return err; 122 123 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); 124 if (IS_ERR(vma)) 125 return PTR_ERR(vma); 126 127 intel_gt_pm_get(vma->vm->gt); 128 129 map = i915_vma_pin_iomap(vma); 130 i915_vma_unpin(vma); 131 if (IS_ERR(map)) { 132 err = PTR_ERR(map); 133 goto out_rpm; 134 } 135 136 *v = ioread32(&map[offset / sizeof(*map)]); 137 i915_vma_unpin_iomap(vma); 138 139 out_rpm: 140 intel_gt_pm_put(vma->vm->gt); 141 return err; 142 } 143 144 static int wc_set(struct context *ctx, unsigned long offset, u32 v) 145 { 146 u32 *map; 147 int err; 148 149 i915_gem_object_lock(ctx->obj); 150 err = i915_gem_object_set_to_wc_domain(ctx->obj, true); 151 i915_gem_object_unlock(ctx->obj); 152 if (err) 153 return err; 154 155 map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); 156 if (IS_ERR(map)) 157 return PTR_ERR(map); 158 159 map[offset / sizeof(*map)] = v; 160 i915_gem_object_unpin_map(ctx->obj); 161 162 return 0; 163 } 164 165 static int wc_get(struct context *ctx, unsigned long offset, u32 *v) 166 { 167 u32 *map; 168 int err; 169 170 i915_gem_object_lock(ctx->obj); 171 err = i915_gem_object_set_to_wc_domain(ctx->obj, false); 172 i915_gem_object_unlock(ctx->obj); 173 if (err) 174 return err; 175 176 map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); 177 if (IS_ERR(map)) 178 return PTR_ERR(map); 179 180 *v = map[offset / sizeof(*map)]; 181 i915_gem_object_unpin_map(ctx->obj); 182 183 return 0; 184 } 185 186 static int gpu_set(struct context *ctx, unsigned long offset, u32 v) 187 { 188 struct i915_request *rq; 189 struct i915_vma *vma; 190 u32 *cs; 191 int err; 192 193 i915_gem_object_lock(ctx->obj); 194 err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); 195 i915_gem_object_unlock(ctx->obj); 196 if (err) 197 return err; 198 199 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); 200 if (IS_ERR(vma)) 201 return PTR_ERR(vma); 202 203 rq = i915_request_create(ctx->engine->kernel_context); 204 if (IS_ERR(rq)) { 205 i915_vma_unpin(vma); 206 return PTR_ERR(rq); 207 } 208 209 cs = intel_ring_begin(rq, 4); 210 if (IS_ERR(cs)) { 211 i915_request_add(rq); 212 i915_vma_unpin(vma); 213 return PTR_ERR(cs); 214 } 215 216 if (INTEL_GEN(ctx->engine->i915) >= 8) { 217 *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; 218 *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); 219 *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); 220 *cs++ = v; 221 } else if (INTEL_GEN(ctx->engine->i915) >= 4) { 222 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 223 *cs++ = 0; 224 *cs++ = i915_ggtt_offset(vma) + offset; 225 *cs++ = v; 226 } else { 227 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 228 *cs++ = i915_ggtt_offset(vma) + offset; 229 *cs++ = v; 230 *cs++ = MI_NOOP; 231 } 232 intel_ring_advance(rq, cs); 233 234 i915_vma_lock(vma); 235 err = i915_request_await_object(rq, vma->obj, true); 236 if (err == 0) 237 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 238 i915_vma_unlock(vma); 239 i915_vma_unpin(vma); 240 241 i915_request_add(rq); 242 243 return err; 244 } 245 246 static bool always_valid(struct context *ctx) 247 { 248 return true; 249 } 250 251 static bool needs_fence_registers(struct context *ctx) 252 { 253 struct intel_gt *gt = ctx->engine->gt; 254 255 if (intel_gt_is_wedged(gt)) 256 return false; 257 258 return gt->ggtt->num_fences; 259 } 260 261 static bool needs_mi_store_dword(struct context *ctx) 262 { 263 if (intel_gt_is_wedged(ctx->engine->gt)) 264 return false; 265 266 return intel_engine_can_store_dword(ctx->engine); 267 } 268 269 static const struct igt_coherency_mode { 270 const char *name; 271 int (*set)(struct context *ctx, unsigned long offset, u32 v); 272 int (*get)(struct context *ctx, unsigned long offset, u32 *v); 273 bool (*valid)(struct context *ctx); 274 } igt_coherency_mode[] = { 275 { "cpu", cpu_set, cpu_get, always_valid }, 276 { "gtt", gtt_set, gtt_get, needs_fence_registers }, 277 { "wc", wc_set, wc_get, always_valid }, 278 { "gpu", gpu_set, NULL, needs_mi_store_dword }, 279 { }, 280 }; 281 282 static struct intel_engine_cs * 283 random_engine(struct drm_i915_private *i915, struct rnd_state *prng) 284 { 285 struct intel_engine_cs *engine; 286 unsigned int count; 287 288 count = 0; 289 for_each_uabi_engine(engine, i915) 290 count++; 291 292 count = i915_prandom_u32_max_state(count, prng); 293 for_each_uabi_engine(engine, i915) 294 if (count-- == 0) 295 return engine; 296 297 return NULL; 298 } 299 300 static int igt_gem_coherency(void *arg) 301 { 302 const unsigned int ncachelines = PAGE_SIZE/64; 303 struct drm_i915_private *i915 = arg; 304 const struct igt_coherency_mode *read, *write, *over; 305 unsigned long count, n; 306 u32 *offsets, *values; 307 I915_RND_STATE(prng); 308 struct context ctx; 309 int err = 0; 310 311 /* 312 * We repeatedly write, overwrite and read from a sequence of 313 * cachelines in order to try and detect incoherency (unflushed writes 314 * from either the CPU or GPU). Each setter/getter uses our cache 315 * domain API which should prevent incoherency. 316 */ 317 318 offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); 319 if (!offsets) 320 return -ENOMEM; 321 for (count = 0; count < ncachelines; count++) 322 offsets[count] = count * 64 + 4 * (count % 16); 323 324 values = offsets + ncachelines; 325 326 ctx.engine = random_engine(i915, &prng); 327 GEM_BUG_ON(!ctx.engine); 328 pr_info("%s: using %s\n", __func__, ctx.engine->name); 329 330 for (over = igt_coherency_mode; over->name; over++) { 331 if (!over->set) 332 continue; 333 334 if (!over->valid(&ctx)) 335 continue; 336 337 for (write = igt_coherency_mode; write->name; write++) { 338 if (!write->set) 339 continue; 340 341 if (!write->valid(&ctx)) 342 continue; 343 344 for (read = igt_coherency_mode; read->name; read++) { 345 if (!read->get) 346 continue; 347 348 if (!read->valid(&ctx)) 349 continue; 350 351 for_each_prime_number_from(count, 1, ncachelines) { 352 ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 353 if (IS_ERR(ctx.obj)) { 354 err = PTR_ERR(ctx.obj); 355 goto free; 356 } 357 358 i915_random_reorder(offsets, ncachelines, &prng); 359 for (n = 0; n < count; n++) 360 values[n] = prandom_u32_state(&prng); 361 362 for (n = 0; n < count; n++) { 363 err = over->set(&ctx, offsets[n], ~values[n]); 364 if (err) { 365 pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", 366 n, count, over->name, err); 367 goto put_object; 368 } 369 } 370 371 for (n = 0; n < count; n++) { 372 err = write->set(&ctx, offsets[n], values[n]); 373 if (err) { 374 pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", 375 n, count, write->name, err); 376 goto put_object; 377 } 378 } 379 380 for (n = 0; n < count; n++) { 381 u32 found; 382 383 err = read->get(&ctx, offsets[n], &found); 384 if (err) { 385 pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", 386 n, count, read->name, err); 387 goto put_object; 388 } 389 390 if (found != values[n]) { 391 pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", 392 n, count, over->name, 393 write->name, values[n], 394 read->name, found, 395 ~values[n], offsets[n]); 396 err = -EINVAL; 397 goto put_object; 398 } 399 } 400 401 i915_gem_object_put(ctx.obj); 402 } 403 } 404 } 405 } 406 free: 407 kfree(offsets); 408 return err; 409 410 put_object: 411 i915_gem_object_put(ctx.obj); 412 goto free; 413 } 414 415 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) 416 { 417 static const struct i915_subtest tests[] = { 418 SUBTEST(igt_gem_coherency), 419 }; 420 421 return i915_subtests(tests, i915); 422 } 423