1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "i915_selftest.h" 10 #include "selftests/i915_random.h" 11 12 static int cpu_set(struct drm_i915_gem_object *obj, 13 unsigned long offset, 14 u32 v) 15 { 16 unsigned int needs_clflush; 17 struct page *page; 18 void *map; 19 u32 *cpu; 20 int err; 21 22 err = i915_gem_object_prepare_write(obj, &needs_clflush); 23 if (err) 24 return err; 25 26 page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); 27 map = kmap_atomic(page); 28 cpu = map + offset_in_page(offset); 29 30 if (needs_clflush & CLFLUSH_BEFORE) 31 drm_clflush_virt_range(cpu, sizeof(*cpu)); 32 33 *cpu = v; 34 35 if (needs_clflush & CLFLUSH_AFTER) 36 drm_clflush_virt_range(cpu, sizeof(*cpu)); 37 38 kunmap_atomic(map); 39 i915_gem_object_finish_access(obj); 40 41 return 0; 42 } 43 44 static int cpu_get(struct drm_i915_gem_object *obj, 45 unsigned long offset, 46 u32 *v) 47 { 48 unsigned int needs_clflush; 49 struct page *page; 50 void *map; 51 u32 *cpu; 52 int err; 53 54 err = i915_gem_object_prepare_read(obj, &needs_clflush); 55 if (err) 56 return err; 57 58 page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); 59 map = kmap_atomic(page); 60 cpu = map + offset_in_page(offset); 61 62 if (needs_clflush & CLFLUSH_BEFORE) 63 drm_clflush_virt_range(cpu, sizeof(*cpu)); 64 65 *v = *cpu; 66 67 kunmap_atomic(map); 68 i915_gem_object_finish_access(obj); 69 70 return 0; 71 } 72 73 static int gtt_set(struct drm_i915_gem_object *obj, 74 unsigned long offset, 75 u32 v) 76 { 77 struct i915_vma *vma; 78 u32 __iomem *map; 79 int err; 80 81 i915_gem_object_lock(obj); 82 err = i915_gem_object_set_to_gtt_domain(obj, true); 83 i915_gem_object_unlock(obj); 84 if (err) 85 return err; 86 87 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); 88 if (IS_ERR(vma)) 89 return PTR_ERR(vma); 90 91 map = i915_vma_pin_iomap(vma); 92 i915_vma_unpin(vma); 93 if (IS_ERR(map)) 94 return PTR_ERR(map); 95 96 iowrite32(v, &map[offset / sizeof(*map)]); 97 i915_vma_unpin_iomap(vma); 98 99 return 0; 100 } 101 102 static int gtt_get(struct drm_i915_gem_object *obj, 103 unsigned long offset, 104 u32 *v) 105 { 106 struct i915_vma *vma; 107 u32 __iomem *map; 108 int err; 109 110 i915_gem_object_lock(obj); 111 err = i915_gem_object_set_to_gtt_domain(obj, false); 112 i915_gem_object_unlock(obj); 113 if (err) 114 return err; 115 116 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); 117 if (IS_ERR(vma)) 118 return PTR_ERR(vma); 119 120 map = i915_vma_pin_iomap(vma); 121 i915_vma_unpin(vma); 122 if (IS_ERR(map)) 123 return PTR_ERR(map); 124 125 *v = ioread32(&map[offset / sizeof(*map)]); 126 i915_vma_unpin_iomap(vma); 127 128 return 0; 129 } 130 131 static int wc_set(struct drm_i915_gem_object *obj, 132 unsigned long offset, 133 u32 v) 134 { 135 u32 *map; 136 int err; 137 138 i915_gem_object_lock(obj); 139 err = i915_gem_object_set_to_wc_domain(obj, true); 140 i915_gem_object_unlock(obj); 141 if (err) 142 return err; 143 144 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 145 if (IS_ERR(map)) 146 return PTR_ERR(map); 147 148 map[offset / sizeof(*map)] = v; 149 i915_gem_object_unpin_map(obj); 150 151 return 0; 152 } 153 154 static int wc_get(struct drm_i915_gem_object *obj, 155 unsigned long offset, 156 u32 *v) 157 { 158 u32 *map; 159 int err; 160 161 i915_gem_object_lock(obj); 162 err = i915_gem_object_set_to_wc_domain(obj, false); 163 i915_gem_object_unlock(obj); 164 if (err) 165 return err; 166 167 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 168 if (IS_ERR(map)) 169 return PTR_ERR(map); 170 171 *v = map[offset / sizeof(*map)]; 172 i915_gem_object_unpin_map(obj); 173 174 return 0; 175 } 176 177 static int gpu_set(struct drm_i915_gem_object *obj, 178 unsigned long offset, 179 u32 v) 180 { 181 struct drm_i915_private *i915 = to_i915(obj->base.dev); 182 struct i915_request *rq; 183 struct i915_vma *vma; 184 u32 *cs; 185 int err; 186 187 i915_gem_object_lock(obj); 188 err = i915_gem_object_set_to_gtt_domain(obj, true); 189 i915_gem_object_unlock(obj); 190 if (err) 191 return err; 192 193 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); 194 if (IS_ERR(vma)) 195 return PTR_ERR(vma); 196 197 rq = i915_request_create(i915->engine[RCS0]->kernel_context); 198 if (IS_ERR(rq)) { 199 i915_vma_unpin(vma); 200 return PTR_ERR(rq); 201 } 202 203 cs = intel_ring_begin(rq, 4); 204 if (IS_ERR(cs)) { 205 i915_request_add(rq); 206 i915_vma_unpin(vma); 207 return PTR_ERR(cs); 208 } 209 210 if (INTEL_GEN(i915) >= 8) { 211 *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; 212 *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); 213 *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); 214 *cs++ = v; 215 } else if (INTEL_GEN(i915) >= 4) { 216 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 217 *cs++ = 0; 218 *cs++ = i915_ggtt_offset(vma) + offset; 219 *cs++ = v; 220 } else { 221 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 222 *cs++ = i915_ggtt_offset(vma) + offset; 223 *cs++ = v; 224 *cs++ = MI_NOOP; 225 } 226 intel_ring_advance(rq, cs); 227 228 i915_vma_lock(vma); 229 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 230 i915_vma_unlock(vma); 231 i915_vma_unpin(vma); 232 233 i915_request_add(rq); 234 235 return err; 236 } 237 238 static bool always_valid(struct drm_i915_private *i915) 239 { 240 return true; 241 } 242 243 static bool needs_fence_registers(struct drm_i915_private *i915) 244 { 245 return !i915_terminally_wedged(i915); 246 } 247 248 static bool needs_mi_store_dword(struct drm_i915_private *i915) 249 { 250 if (i915_terminally_wedged(i915)) 251 return false; 252 253 return intel_engine_can_store_dword(i915->engine[RCS0]); 254 } 255 256 static const struct igt_coherency_mode { 257 const char *name; 258 int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); 259 int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); 260 bool (*valid)(struct drm_i915_private *i915); 261 } igt_coherency_mode[] = { 262 { "cpu", cpu_set, cpu_get, always_valid }, 263 { "gtt", gtt_set, gtt_get, needs_fence_registers }, 264 { "wc", wc_set, wc_get, always_valid }, 265 { "gpu", gpu_set, NULL, needs_mi_store_dword }, 266 { }, 267 }; 268 269 static int igt_gem_coherency(void *arg) 270 { 271 const unsigned int ncachelines = PAGE_SIZE/64; 272 I915_RND_STATE(prng); 273 struct drm_i915_private *i915 = arg; 274 const struct igt_coherency_mode *read, *write, *over; 275 struct drm_i915_gem_object *obj; 276 intel_wakeref_t wakeref; 277 unsigned long count, n; 278 u32 *offsets, *values; 279 int err = 0; 280 281 /* We repeatedly write, overwrite and read from a sequence of 282 * cachelines in order to try and detect incoherency (unflushed writes 283 * from either the CPU or GPU). Each setter/getter uses our cache 284 * domain API which should prevent incoherency. 285 */ 286 287 offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); 288 if (!offsets) 289 return -ENOMEM; 290 for (count = 0; count < ncachelines; count++) 291 offsets[count] = count * 64 + 4 * (count % 16); 292 293 values = offsets + ncachelines; 294 295 mutex_lock(&i915->drm.struct_mutex); 296 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 297 for (over = igt_coherency_mode; over->name; over++) { 298 if (!over->set) 299 continue; 300 301 if (!over->valid(i915)) 302 continue; 303 304 for (write = igt_coherency_mode; write->name; write++) { 305 if (!write->set) 306 continue; 307 308 if (!write->valid(i915)) 309 continue; 310 311 for (read = igt_coherency_mode; read->name; read++) { 312 if (!read->get) 313 continue; 314 315 if (!read->valid(i915)) 316 continue; 317 318 for_each_prime_number_from(count, 1, ncachelines) { 319 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 320 if (IS_ERR(obj)) { 321 err = PTR_ERR(obj); 322 goto unlock; 323 } 324 325 i915_random_reorder(offsets, ncachelines, &prng); 326 for (n = 0; n < count; n++) 327 values[n] = prandom_u32_state(&prng); 328 329 for (n = 0; n < count; n++) { 330 err = over->set(obj, offsets[n], ~values[n]); 331 if (err) { 332 pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", 333 n, count, over->name, err); 334 goto put_object; 335 } 336 } 337 338 for (n = 0; n < count; n++) { 339 err = write->set(obj, offsets[n], values[n]); 340 if (err) { 341 pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", 342 n, count, write->name, err); 343 goto put_object; 344 } 345 } 346 347 for (n = 0; n < count; n++) { 348 u32 found; 349 350 err = read->get(obj, offsets[n], &found); 351 if (err) { 352 pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", 353 n, count, read->name, err); 354 goto put_object; 355 } 356 357 if (found != values[n]) { 358 pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", 359 n, count, over->name, 360 write->name, values[n], 361 read->name, found, 362 ~values[n], offsets[n]); 363 err = -EINVAL; 364 goto put_object; 365 } 366 } 367 368 i915_gem_object_put(obj); 369 } 370 } 371 } 372 } 373 unlock: 374 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 375 mutex_unlock(&i915->drm.struct_mutex); 376 kfree(offsets); 377 return err; 378 379 put_object: 380 i915_gem_object_put(obj); 381 goto unlock; 382 } 383 384 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) 385 { 386 static const struct i915_subtest tests[] = { 387 SUBTEST(igt_gem_coherency), 388 }; 389 390 return i915_subtests(tests, i915); 391 } 392