1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gt/intel_gt.h" 10 11 #include "i915_selftest.h" 12 #include "selftests/i915_random.h" 13 14 static int cpu_set(struct drm_i915_gem_object *obj, 15 unsigned long offset, 16 u32 v) 17 { 18 unsigned int needs_clflush; 19 struct page *page; 20 void *map; 21 u32 *cpu; 22 int err; 23 24 err = i915_gem_object_prepare_write(obj, &needs_clflush); 25 if (err) 26 return err; 27 28 page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); 29 map = kmap_atomic(page); 30 cpu = map + offset_in_page(offset); 31 32 if (needs_clflush & CLFLUSH_BEFORE) 33 drm_clflush_virt_range(cpu, sizeof(*cpu)); 34 35 *cpu = v; 36 37 if (needs_clflush & CLFLUSH_AFTER) 38 drm_clflush_virt_range(cpu, sizeof(*cpu)); 39 40 kunmap_atomic(map); 41 i915_gem_object_finish_access(obj); 42 43 return 0; 44 } 45 46 static int cpu_get(struct drm_i915_gem_object *obj, 47 unsigned long offset, 48 u32 *v) 49 { 50 unsigned int needs_clflush; 51 struct page *page; 52 void *map; 53 u32 *cpu; 54 int err; 55 56 err = i915_gem_object_prepare_read(obj, &needs_clflush); 57 if (err) 58 return err; 59 60 page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); 61 map = kmap_atomic(page); 62 cpu = map + offset_in_page(offset); 63 64 if (needs_clflush & CLFLUSH_BEFORE) 65 drm_clflush_virt_range(cpu, sizeof(*cpu)); 66 67 *v = *cpu; 68 69 kunmap_atomic(map); 70 i915_gem_object_finish_access(obj); 71 72 return 0; 73 } 74 75 static int gtt_set(struct drm_i915_gem_object *obj, 76 unsigned long offset, 77 u32 v) 78 { 79 struct i915_vma *vma; 80 u32 __iomem *map; 81 int err; 82 83 i915_gem_object_lock(obj); 84 err = i915_gem_object_set_to_gtt_domain(obj, true); 85 i915_gem_object_unlock(obj); 86 if (err) 87 return err; 88 89 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); 90 if (IS_ERR(vma)) 91 return PTR_ERR(vma); 92 93 map = i915_vma_pin_iomap(vma); 94 i915_vma_unpin(vma); 95 if (IS_ERR(map)) 96 return PTR_ERR(map); 97 98 iowrite32(v, &map[offset / sizeof(*map)]); 99 i915_vma_unpin_iomap(vma); 100 101 return 0; 102 } 103 104 static int gtt_get(struct drm_i915_gem_object *obj, 105 unsigned long offset, 106 u32 *v) 107 { 108 struct i915_vma *vma; 109 u32 __iomem *map; 110 int err; 111 112 i915_gem_object_lock(obj); 113 err = i915_gem_object_set_to_gtt_domain(obj, false); 114 i915_gem_object_unlock(obj); 115 if (err) 116 return err; 117 118 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); 119 if (IS_ERR(vma)) 120 return PTR_ERR(vma); 121 122 map = i915_vma_pin_iomap(vma); 123 i915_vma_unpin(vma); 124 if (IS_ERR(map)) 125 return PTR_ERR(map); 126 127 *v = ioread32(&map[offset / sizeof(*map)]); 128 i915_vma_unpin_iomap(vma); 129 130 return 0; 131 } 132 133 static int wc_set(struct drm_i915_gem_object *obj, 134 unsigned long offset, 135 u32 v) 136 { 137 u32 *map; 138 int err; 139 140 i915_gem_object_lock(obj); 141 err = i915_gem_object_set_to_wc_domain(obj, true); 142 i915_gem_object_unlock(obj); 143 if (err) 144 return err; 145 146 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 147 if (IS_ERR(map)) 148 return PTR_ERR(map); 149 150 map[offset / sizeof(*map)] = v; 151 i915_gem_object_unpin_map(obj); 152 153 return 0; 154 } 155 156 static int wc_get(struct drm_i915_gem_object *obj, 157 unsigned long offset, 158 u32 *v) 159 { 160 u32 *map; 161 int err; 162 163 i915_gem_object_lock(obj); 164 err = i915_gem_object_set_to_wc_domain(obj, false); 165 i915_gem_object_unlock(obj); 166 if (err) 167 return err; 168 169 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 170 if (IS_ERR(map)) 171 return PTR_ERR(map); 172 173 *v = map[offset / sizeof(*map)]; 174 i915_gem_object_unpin_map(obj); 175 176 return 0; 177 } 178 179 static int gpu_set(struct drm_i915_gem_object *obj, 180 unsigned long offset, 181 u32 v) 182 { 183 struct drm_i915_private *i915 = to_i915(obj->base.dev); 184 struct i915_request *rq; 185 struct i915_vma *vma; 186 u32 *cs; 187 int err; 188 189 i915_gem_object_lock(obj); 190 err = i915_gem_object_set_to_gtt_domain(obj, true); 191 i915_gem_object_unlock(obj); 192 if (err) 193 return err; 194 195 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); 196 if (IS_ERR(vma)) 197 return PTR_ERR(vma); 198 199 rq = i915_request_create(i915->engine[RCS0]->kernel_context); 200 if (IS_ERR(rq)) { 201 i915_vma_unpin(vma); 202 return PTR_ERR(rq); 203 } 204 205 cs = intel_ring_begin(rq, 4); 206 if (IS_ERR(cs)) { 207 i915_request_add(rq); 208 i915_vma_unpin(vma); 209 return PTR_ERR(cs); 210 } 211 212 if (INTEL_GEN(i915) >= 8) { 213 *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; 214 *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); 215 *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); 216 *cs++ = v; 217 } else if (INTEL_GEN(i915) >= 4) { 218 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 219 *cs++ = 0; 220 *cs++ = i915_ggtt_offset(vma) + offset; 221 *cs++ = v; 222 } else { 223 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 224 *cs++ = i915_ggtt_offset(vma) + offset; 225 *cs++ = v; 226 *cs++ = MI_NOOP; 227 } 228 intel_ring_advance(rq, cs); 229 230 i915_vma_lock(vma); 231 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 232 i915_vma_unlock(vma); 233 i915_vma_unpin(vma); 234 235 i915_request_add(rq); 236 237 return err; 238 } 239 240 static bool always_valid(struct drm_i915_private *i915) 241 { 242 return true; 243 } 244 245 static bool needs_fence_registers(struct drm_i915_private *i915) 246 { 247 return !intel_gt_is_wedged(&i915->gt); 248 } 249 250 static bool needs_mi_store_dword(struct drm_i915_private *i915) 251 { 252 if (intel_gt_is_wedged(&i915->gt)) 253 return false; 254 255 if (!HAS_ENGINE(i915, RCS0)) 256 return false; 257 258 return intel_engine_can_store_dword(i915->engine[RCS0]); 259 } 260 261 static const struct igt_coherency_mode { 262 const char *name; 263 int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); 264 int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); 265 bool (*valid)(struct drm_i915_private *i915); 266 } igt_coherency_mode[] = { 267 { "cpu", cpu_set, cpu_get, always_valid }, 268 { "gtt", gtt_set, gtt_get, needs_fence_registers }, 269 { "wc", wc_set, wc_get, always_valid }, 270 { "gpu", gpu_set, NULL, needs_mi_store_dword }, 271 { }, 272 }; 273 274 static int igt_gem_coherency(void *arg) 275 { 276 const unsigned int ncachelines = PAGE_SIZE/64; 277 I915_RND_STATE(prng); 278 struct drm_i915_private *i915 = arg; 279 const struct igt_coherency_mode *read, *write, *over; 280 struct drm_i915_gem_object *obj; 281 intel_wakeref_t wakeref; 282 unsigned long count, n; 283 u32 *offsets, *values; 284 int err = 0; 285 286 /* We repeatedly write, overwrite and read from a sequence of 287 * cachelines in order to try and detect incoherency (unflushed writes 288 * from either the CPU or GPU). Each setter/getter uses our cache 289 * domain API which should prevent incoherency. 290 */ 291 292 offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); 293 if (!offsets) 294 return -ENOMEM; 295 for (count = 0; count < ncachelines; count++) 296 offsets[count] = count * 64 + 4 * (count % 16); 297 298 values = offsets + ncachelines; 299 300 mutex_lock(&i915->drm.struct_mutex); 301 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 302 for (over = igt_coherency_mode; over->name; over++) { 303 if (!over->set) 304 continue; 305 306 if (!over->valid(i915)) 307 continue; 308 309 for (write = igt_coherency_mode; write->name; write++) { 310 if (!write->set) 311 continue; 312 313 if (!write->valid(i915)) 314 continue; 315 316 for (read = igt_coherency_mode; read->name; read++) { 317 if (!read->get) 318 continue; 319 320 if (!read->valid(i915)) 321 continue; 322 323 for_each_prime_number_from(count, 1, ncachelines) { 324 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 325 if (IS_ERR(obj)) { 326 err = PTR_ERR(obj); 327 goto unlock; 328 } 329 330 i915_random_reorder(offsets, ncachelines, &prng); 331 for (n = 0; n < count; n++) 332 values[n] = prandom_u32_state(&prng); 333 334 for (n = 0; n < count; n++) { 335 err = over->set(obj, offsets[n], ~values[n]); 336 if (err) { 337 pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", 338 n, count, over->name, err); 339 goto put_object; 340 } 341 } 342 343 for (n = 0; n < count; n++) { 344 err = write->set(obj, offsets[n], values[n]); 345 if (err) { 346 pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", 347 n, count, write->name, err); 348 goto put_object; 349 } 350 } 351 352 for (n = 0; n < count; n++) { 353 u32 found; 354 355 err = read->get(obj, offsets[n], &found); 356 if (err) { 357 pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", 358 n, count, read->name, err); 359 goto put_object; 360 } 361 362 if (found != values[n]) { 363 pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", 364 n, count, over->name, 365 write->name, values[n], 366 read->name, found, 367 ~values[n], offsets[n]); 368 err = -EINVAL; 369 goto put_object; 370 } 371 } 372 373 i915_gem_object_put(obj); 374 } 375 } 376 } 377 } 378 unlock: 379 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 380 mutex_unlock(&i915->drm.struct_mutex); 381 kfree(offsets); 382 return err; 383 384 put_object: 385 i915_gem_object_put(obj); 386 goto unlock; 387 } 388 389 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) 390 { 391 static const struct i915_subtest tests[] = { 392 SUBTEST(igt_gem_coherency), 393 }; 394 395 return i915_subtests(tests, i915); 396 } 397