1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gt/intel_engine_pm.h" 10 #include "gt/intel_gt.h" 11 #include "gt/intel_gt_pm.h" 12 #include "gt/intel_ring.h" 13 14 #include "i915_selftest.h" 15 #include "selftests/i915_random.h" 16 17 struct context { 18 struct drm_i915_gem_object *obj; 19 struct intel_engine_cs *engine; 20 }; 21 22 static int cpu_set(struct context *ctx, unsigned long offset, u32 v) 23 { 24 unsigned int needs_clflush; 25 struct page *page; 26 void *map; 27 u32 *cpu; 28 int err; 29 30 err = i915_gem_object_prepare_write(ctx->obj, &needs_clflush); 31 if (err) 32 return err; 33 34 page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); 35 map = kmap_atomic(page); 36 cpu = map + offset_in_page(offset); 37 38 if (needs_clflush & CLFLUSH_BEFORE) 39 drm_clflush_virt_range(cpu, sizeof(*cpu)); 40 41 *cpu = v; 42 43 if (needs_clflush & CLFLUSH_AFTER) 44 drm_clflush_virt_range(cpu, sizeof(*cpu)); 45 46 kunmap_atomic(map); 47 i915_gem_object_finish_access(ctx->obj); 48 49 return 0; 50 } 51 52 static int cpu_get(struct context *ctx, unsigned long offset, u32 *v) 53 { 54 unsigned int needs_clflush; 55 struct page *page; 56 void *map; 57 u32 *cpu; 58 int err; 59 60 err = i915_gem_object_prepare_read(ctx->obj, &needs_clflush); 61 if (err) 62 return err; 63 64 page = i915_gem_object_get_page(ctx->obj, offset >> PAGE_SHIFT); 65 map = kmap_atomic(page); 66 cpu = map + offset_in_page(offset); 67 68 if (needs_clflush & CLFLUSH_BEFORE) 69 drm_clflush_virt_range(cpu, sizeof(*cpu)); 70 71 *v = *cpu; 72 73 kunmap_atomic(map); 74 i915_gem_object_finish_access(ctx->obj); 75 76 return 0; 77 } 78 79 static int gtt_set(struct context *ctx, unsigned long offset, u32 v) 80 { 81 struct i915_vma *vma; 82 u32 __iomem *map; 83 int err = 0; 84 85 i915_gem_object_lock(ctx->obj); 86 err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); 87 i915_gem_object_unlock(ctx->obj); 88 if (err) 89 return err; 90 91 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); 92 if (IS_ERR(vma)) 93 return PTR_ERR(vma); 94 95 intel_gt_pm_get(vma->vm->gt); 96 97 map = i915_vma_pin_iomap(vma); 98 i915_vma_unpin(vma); 99 if (IS_ERR(map)) { 100 err = PTR_ERR(map); 101 goto out_rpm; 102 } 103 104 iowrite32(v, &map[offset / sizeof(*map)]); 105 i915_vma_unpin_iomap(vma); 106 107 out_rpm: 108 intel_gt_pm_put(vma->vm->gt); 109 return err; 110 } 111 112 static int gtt_get(struct context *ctx, unsigned long offset, u32 *v) 113 { 114 struct i915_vma *vma; 115 u32 __iomem *map; 116 int err = 0; 117 118 i915_gem_object_lock(ctx->obj); 119 err = i915_gem_object_set_to_gtt_domain(ctx->obj, false); 120 i915_gem_object_unlock(ctx->obj); 121 if (err) 122 return err; 123 124 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, PIN_MAPPABLE); 125 if (IS_ERR(vma)) 126 return PTR_ERR(vma); 127 128 intel_gt_pm_get(vma->vm->gt); 129 130 map = i915_vma_pin_iomap(vma); 131 i915_vma_unpin(vma); 132 if (IS_ERR(map)) { 133 err = PTR_ERR(map); 134 goto out_rpm; 135 } 136 137 *v = ioread32(&map[offset / sizeof(*map)]); 138 i915_vma_unpin_iomap(vma); 139 140 out_rpm: 141 intel_gt_pm_put(vma->vm->gt); 142 return err; 143 } 144 145 static int wc_set(struct context *ctx, unsigned long offset, u32 v) 146 { 147 u32 *map; 148 int err; 149 150 i915_gem_object_lock(ctx->obj); 151 err = i915_gem_object_set_to_wc_domain(ctx->obj, true); 152 i915_gem_object_unlock(ctx->obj); 153 if (err) 154 return err; 155 156 map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); 157 if (IS_ERR(map)) 158 return PTR_ERR(map); 159 160 map[offset / sizeof(*map)] = v; 161 i915_gem_object_unpin_map(ctx->obj); 162 163 return 0; 164 } 165 166 static int wc_get(struct context *ctx, unsigned long offset, u32 *v) 167 { 168 u32 *map; 169 int err; 170 171 i915_gem_object_lock(ctx->obj); 172 err = i915_gem_object_set_to_wc_domain(ctx->obj, false); 173 i915_gem_object_unlock(ctx->obj); 174 if (err) 175 return err; 176 177 map = i915_gem_object_pin_map(ctx->obj, I915_MAP_WC); 178 if (IS_ERR(map)) 179 return PTR_ERR(map); 180 181 *v = map[offset / sizeof(*map)]; 182 i915_gem_object_unpin_map(ctx->obj); 183 184 return 0; 185 } 186 187 static int gpu_set(struct context *ctx, unsigned long offset, u32 v) 188 { 189 struct i915_request *rq; 190 struct i915_vma *vma; 191 u32 *cs; 192 int err; 193 194 i915_gem_object_lock(ctx->obj); 195 err = i915_gem_object_set_to_gtt_domain(ctx->obj, true); 196 i915_gem_object_unlock(ctx->obj); 197 if (err) 198 return err; 199 200 vma = i915_gem_object_ggtt_pin(ctx->obj, NULL, 0, 0, 0); 201 if (IS_ERR(vma)) 202 return PTR_ERR(vma); 203 204 rq = intel_engine_create_kernel_request(ctx->engine); 205 if (IS_ERR(rq)) { 206 i915_vma_unpin(vma); 207 return PTR_ERR(rq); 208 } 209 210 cs = intel_ring_begin(rq, 4); 211 if (IS_ERR(cs)) { 212 i915_request_add(rq); 213 i915_vma_unpin(vma); 214 return PTR_ERR(cs); 215 } 216 217 if (INTEL_GEN(ctx->engine->i915) >= 8) { 218 *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; 219 *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); 220 *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); 221 *cs++ = v; 222 } else if (INTEL_GEN(ctx->engine->i915) >= 4) { 223 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 224 *cs++ = 0; 225 *cs++ = i915_ggtt_offset(vma) + offset; 226 *cs++ = v; 227 } else { 228 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 229 *cs++ = i915_ggtt_offset(vma) + offset; 230 *cs++ = v; 231 *cs++ = MI_NOOP; 232 } 233 intel_ring_advance(rq, cs); 234 235 i915_vma_lock(vma); 236 err = i915_request_await_object(rq, vma->obj, true); 237 if (err == 0) 238 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 239 i915_vma_unlock(vma); 240 i915_vma_unpin(vma); 241 242 i915_request_add(rq); 243 244 return err; 245 } 246 247 static bool always_valid(struct context *ctx) 248 { 249 return true; 250 } 251 252 static bool needs_fence_registers(struct context *ctx) 253 { 254 struct intel_gt *gt = ctx->engine->gt; 255 256 if (intel_gt_is_wedged(gt)) 257 return false; 258 259 return gt->ggtt->num_fences; 260 } 261 262 static bool needs_mi_store_dword(struct context *ctx) 263 { 264 if (intel_gt_is_wedged(ctx->engine->gt)) 265 return false; 266 267 return intel_engine_can_store_dword(ctx->engine); 268 } 269 270 static const struct igt_coherency_mode { 271 const char *name; 272 int (*set)(struct context *ctx, unsigned long offset, u32 v); 273 int (*get)(struct context *ctx, unsigned long offset, u32 *v); 274 bool (*valid)(struct context *ctx); 275 } igt_coherency_mode[] = { 276 { "cpu", cpu_set, cpu_get, always_valid }, 277 { "gtt", gtt_set, gtt_get, needs_fence_registers }, 278 { "wc", wc_set, wc_get, always_valid }, 279 { "gpu", gpu_set, NULL, needs_mi_store_dword }, 280 { }, 281 }; 282 283 static struct intel_engine_cs * 284 random_engine(struct drm_i915_private *i915, struct rnd_state *prng) 285 { 286 struct intel_engine_cs *engine; 287 unsigned int count; 288 289 count = 0; 290 for_each_uabi_engine(engine, i915) 291 count++; 292 293 count = i915_prandom_u32_max_state(count, prng); 294 for_each_uabi_engine(engine, i915) 295 if (count-- == 0) 296 return engine; 297 298 return NULL; 299 } 300 301 static int igt_gem_coherency(void *arg) 302 { 303 const unsigned int ncachelines = PAGE_SIZE/64; 304 struct drm_i915_private *i915 = arg; 305 const struct igt_coherency_mode *read, *write, *over; 306 unsigned long count, n; 307 u32 *offsets, *values; 308 I915_RND_STATE(prng); 309 struct context ctx; 310 int err = 0; 311 312 /* 313 * We repeatedly write, overwrite and read from a sequence of 314 * cachelines in order to try and detect incoherency (unflushed writes 315 * from either the CPU or GPU). Each setter/getter uses our cache 316 * domain API which should prevent incoherency. 317 */ 318 319 offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); 320 if (!offsets) 321 return -ENOMEM; 322 for (count = 0; count < ncachelines; count++) 323 offsets[count] = count * 64 + 4 * (count % 16); 324 325 values = offsets + ncachelines; 326 327 ctx.engine = random_engine(i915, &prng); 328 if (!ctx.engine) { 329 err = -ENODEV; 330 goto out_free; 331 } 332 pr_info("%s: using %s\n", __func__, ctx.engine->name); 333 intel_engine_pm_get(ctx.engine); 334 335 for (over = igt_coherency_mode; over->name; over++) { 336 if (!over->set) 337 continue; 338 339 if (!over->valid(&ctx)) 340 continue; 341 342 for (write = igt_coherency_mode; write->name; write++) { 343 if (!write->set) 344 continue; 345 346 if (!write->valid(&ctx)) 347 continue; 348 349 for (read = igt_coherency_mode; read->name; read++) { 350 if (!read->get) 351 continue; 352 353 if (!read->valid(&ctx)) 354 continue; 355 356 for_each_prime_number_from(count, 1, ncachelines) { 357 ctx.obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 358 if (IS_ERR(ctx.obj)) { 359 err = PTR_ERR(ctx.obj); 360 goto out_pm; 361 } 362 363 i915_random_reorder(offsets, ncachelines, &prng); 364 for (n = 0; n < count; n++) 365 values[n] = prandom_u32_state(&prng); 366 367 for (n = 0; n < count; n++) { 368 err = over->set(&ctx, offsets[n], ~values[n]); 369 if (err) { 370 pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", 371 n, count, over->name, err); 372 goto put_object; 373 } 374 } 375 376 for (n = 0; n < count; n++) { 377 err = write->set(&ctx, offsets[n], values[n]); 378 if (err) { 379 pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", 380 n, count, write->name, err); 381 goto put_object; 382 } 383 } 384 385 for (n = 0; n < count; n++) { 386 u32 found; 387 388 err = read->get(&ctx, offsets[n], &found); 389 if (err) { 390 pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", 391 n, count, read->name, err); 392 goto put_object; 393 } 394 395 if (found != values[n]) { 396 pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", 397 n, count, over->name, 398 write->name, values[n], 399 read->name, found, 400 ~values[n], offsets[n]); 401 err = -EINVAL; 402 goto put_object; 403 } 404 } 405 406 i915_gem_object_put(ctx.obj); 407 } 408 } 409 } 410 } 411 out_pm: 412 intel_engine_pm_put(ctx.engine); 413 out_free: 414 kfree(offsets); 415 return err; 416 417 put_object: 418 i915_gem_object_put(ctx.obj); 419 goto out_pm; 420 } 421 422 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) 423 { 424 static const struct i915_subtest tests[] = { 425 SUBTEST(igt_gem_coherency), 426 }; 427 428 return i915_subtests(tests, i915); 429 } 430