1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2017 Intel Corporation 5 */ 6 7 #include <linux/prime_numbers.h> 8 9 #include "gt/intel_gt.h" 10 11 #include "i915_selftest.h" 12 #include "selftests/i915_random.h" 13 14 static int cpu_set(struct drm_i915_gem_object *obj, 15 unsigned long offset, 16 u32 v) 17 { 18 unsigned int needs_clflush; 19 struct page *page; 20 void *map; 21 u32 *cpu; 22 int err; 23 24 err = i915_gem_object_prepare_write(obj, &needs_clflush); 25 if (err) 26 return err; 27 28 page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); 29 map = kmap_atomic(page); 30 cpu = map + offset_in_page(offset); 31 32 if (needs_clflush & CLFLUSH_BEFORE) 33 drm_clflush_virt_range(cpu, sizeof(*cpu)); 34 35 *cpu = v; 36 37 if (needs_clflush & CLFLUSH_AFTER) 38 drm_clflush_virt_range(cpu, sizeof(*cpu)); 39 40 kunmap_atomic(map); 41 i915_gem_object_finish_access(obj); 42 43 return 0; 44 } 45 46 static int cpu_get(struct drm_i915_gem_object *obj, 47 unsigned long offset, 48 u32 *v) 49 { 50 unsigned int needs_clflush; 51 struct page *page; 52 void *map; 53 u32 *cpu; 54 int err; 55 56 err = i915_gem_object_prepare_read(obj, &needs_clflush); 57 if (err) 58 return err; 59 60 page = i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); 61 map = kmap_atomic(page); 62 cpu = map + offset_in_page(offset); 63 64 if (needs_clflush & CLFLUSH_BEFORE) 65 drm_clflush_virt_range(cpu, sizeof(*cpu)); 66 67 *v = *cpu; 68 69 kunmap_atomic(map); 70 i915_gem_object_finish_access(obj); 71 72 return 0; 73 } 74 75 static int gtt_set(struct drm_i915_gem_object *obj, 76 unsigned long offset, 77 u32 v) 78 { 79 struct i915_vma *vma; 80 u32 __iomem *map; 81 int err; 82 83 i915_gem_object_lock(obj); 84 err = i915_gem_object_set_to_gtt_domain(obj, true); 85 i915_gem_object_unlock(obj); 86 if (err) 87 return err; 88 89 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); 90 if (IS_ERR(vma)) 91 return PTR_ERR(vma); 92 93 map = i915_vma_pin_iomap(vma); 94 i915_vma_unpin(vma); 95 if (IS_ERR(map)) 96 return PTR_ERR(map); 97 98 iowrite32(v, &map[offset / sizeof(*map)]); 99 i915_vma_unpin_iomap(vma); 100 101 return 0; 102 } 103 104 static int gtt_get(struct drm_i915_gem_object *obj, 105 unsigned long offset, 106 u32 *v) 107 { 108 struct i915_vma *vma; 109 u32 __iomem *map; 110 int err; 111 112 i915_gem_object_lock(obj); 113 err = i915_gem_object_set_to_gtt_domain(obj, false); 114 i915_gem_object_unlock(obj); 115 if (err) 116 return err; 117 118 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, PIN_MAPPABLE); 119 if (IS_ERR(vma)) 120 return PTR_ERR(vma); 121 122 map = i915_vma_pin_iomap(vma); 123 i915_vma_unpin(vma); 124 if (IS_ERR(map)) 125 return PTR_ERR(map); 126 127 *v = ioread32(&map[offset / sizeof(*map)]); 128 i915_vma_unpin_iomap(vma); 129 130 return 0; 131 } 132 133 static int wc_set(struct drm_i915_gem_object *obj, 134 unsigned long offset, 135 u32 v) 136 { 137 u32 *map; 138 int err; 139 140 i915_gem_object_lock(obj); 141 err = i915_gem_object_set_to_wc_domain(obj, true); 142 i915_gem_object_unlock(obj); 143 if (err) 144 return err; 145 146 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 147 if (IS_ERR(map)) 148 return PTR_ERR(map); 149 150 map[offset / sizeof(*map)] = v; 151 i915_gem_object_unpin_map(obj); 152 153 return 0; 154 } 155 156 static int wc_get(struct drm_i915_gem_object *obj, 157 unsigned long offset, 158 u32 *v) 159 { 160 u32 *map; 161 int err; 162 163 i915_gem_object_lock(obj); 164 err = i915_gem_object_set_to_wc_domain(obj, false); 165 i915_gem_object_unlock(obj); 166 if (err) 167 return err; 168 169 map = i915_gem_object_pin_map(obj, I915_MAP_WC); 170 if (IS_ERR(map)) 171 return PTR_ERR(map); 172 173 *v = map[offset / sizeof(*map)]; 174 i915_gem_object_unpin_map(obj); 175 176 return 0; 177 } 178 179 static int gpu_set(struct drm_i915_gem_object *obj, 180 unsigned long offset, 181 u32 v) 182 { 183 struct drm_i915_private *i915 = to_i915(obj->base.dev); 184 struct i915_request *rq; 185 struct i915_vma *vma; 186 u32 *cs; 187 int err; 188 189 i915_gem_object_lock(obj); 190 err = i915_gem_object_set_to_gtt_domain(obj, true); 191 i915_gem_object_unlock(obj); 192 if (err) 193 return err; 194 195 vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); 196 if (IS_ERR(vma)) 197 return PTR_ERR(vma); 198 199 rq = i915_request_create(i915->engine[RCS0]->kernel_context); 200 if (IS_ERR(rq)) { 201 i915_vma_unpin(vma); 202 return PTR_ERR(rq); 203 } 204 205 cs = intel_ring_begin(rq, 4); 206 if (IS_ERR(cs)) { 207 i915_request_add(rq); 208 i915_vma_unpin(vma); 209 return PTR_ERR(cs); 210 } 211 212 if (INTEL_GEN(i915) >= 8) { 213 *cs++ = MI_STORE_DWORD_IMM_GEN4 | 1 << 22; 214 *cs++ = lower_32_bits(i915_ggtt_offset(vma) + offset); 215 *cs++ = upper_32_bits(i915_ggtt_offset(vma) + offset); 216 *cs++ = v; 217 } else if (INTEL_GEN(i915) >= 4) { 218 *cs++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 219 *cs++ = 0; 220 *cs++ = i915_ggtt_offset(vma) + offset; 221 *cs++ = v; 222 } else { 223 *cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 224 *cs++ = i915_ggtt_offset(vma) + offset; 225 *cs++ = v; 226 *cs++ = MI_NOOP; 227 } 228 intel_ring_advance(rq, cs); 229 230 i915_vma_lock(vma); 231 err = i915_request_await_object(rq, vma->obj, true); 232 if (err == 0) 233 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); 234 i915_vma_unlock(vma); 235 i915_vma_unpin(vma); 236 237 i915_request_add(rq); 238 239 return err; 240 } 241 242 static bool always_valid(struct drm_i915_private *i915) 243 { 244 return true; 245 } 246 247 static bool needs_fence_registers(struct drm_i915_private *i915) 248 { 249 return !intel_gt_is_wedged(&i915->gt); 250 } 251 252 static bool needs_mi_store_dword(struct drm_i915_private *i915) 253 { 254 if (intel_gt_is_wedged(&i915->gt)) 255 return false; 256 257 if (!HAS_ENGINE(i915, RCS0)) 258 return false; 259 260 return intel_engine_can_store_dword(i915->engine[RCS0]); 261 } 262 263 static const struct igt_coherency_mode { 264 const char *name; 265 int (*set)(struct drm_i915_gem_object *, unsigned long offset, u32 v); 266 int (*get)(struct drm_i915_gem_object *, unsigned long offset, u32 *v); 267 bool (*valid)(struct drm_i915_private *i915); 268 } igt_coherency_mode[] = { 269 { "cpu", cpu_set, cpu_get, always_valid }, 270 { "gtt", gtt_set, gtt_get, needs_fence_registers }, 271 { "wc", wc_set, wc_get, always_valid }, 272 { "gpu", gpu_set, NULL, needs_mi_store_dword }, 273 { }, 274 }; 275 276 static int igt_gem_coherency(void *arg) 277 { 278 const unsigned int ncachelines = PAGE_SIZE/64; 279 I915_RND_STATE(prng); 280 struct drm_i915_private *i915 = arg; 281 const struct igt_coherency_mode *read, *write, *over; 282 struct drm_i915_gem_object *obj; 283 intel_wakeref_t wakeref; 284 unsigned long count, n; 285 u32 *offsets, *values; 286 int err = 0; 287 288 /* We repeatedly write, overwrite and read from a sequence of 289 * cachelines in order to try and detect incoherency (unflushed writes 290 * from either the CPU or GPU). Each setter/getter uses our cache 291 * domain API which should prevent incoherency. 292 */ 293 294 offsets = kmalloc_array(ncachelines, 2*sizeof(u32), GFP_KERNEL); 295 if (!offsets) 296 return -ENOMEM; 297 for (count = 0; count < ncachelines; count++) 298 offsets[count] = count * 64 + 4 * (count % 16); 299 300 values = offsets + ncachelines; 301 302 mutex_lock(&i915->drm.struct_mutex); 303 wakeref = intel_runtime_pm_get(&i915->runtime_pm); 304 for (over = igt_coherency_mode; over->name; over++) { 305 if (!over->set) 306 continue; 307 308 if (!over->valid(i915)) 309 continue; 310 311 for (write = igt_coherency_mode; write->name; write++) { 312 if (!write->set) 313 continue; 314 315 if (!write->valid(i915)) 316 continue; 317 318 for (read = igt_coherency_mode; read->name; read++) { 319 if (!read->get) 320 continue; 321 322 if (!read->valid(i915)) 323 continue; 324 325 for_each_prime_number_from(count, 1, ncachelines) { 326 obj = i915_gem_object_create_internal(i915, PAGE_SIZE); 327 if (IS_ERR(obj)) { 328 err = PTR_ERR(obj); 329 goto unlock; 330 } 331 332 i915_random_reorder(offsets, ncachelines, &prng); 333 for (n = 0; n < count; n++) 334 values[n] = prandom_u32_state(&prng); 335 336 for (n = 0; n < count; n++) { 337 err = over->set(obj, offsets[n], ~values[n]); 338 if (err) { 339 pr_err("Failed to set stale value[%ld/%ld] in object using %s, err=%d\n", 340 n, count, over->name, err); 341 goto put_object; 342 } 343 } 344 345 for (n = 0; n < count; n++) { 346 err = write->set(obj, offsets[n], values[n]); 347 if (err) { 348 pr_err("Failed to set value[%ld/%ld] in object using %s, err=%d\n", 349 n, count, write->name, err); 350 goto put_object; 351 } 352 } 353 354 for (n = 0; n < count; n++) { 355 u32 found; 356 357 err = read->get(obj, offsets[n], &found); 358 if (err) { 359 pr_err("Failed to get value[%ld/%ld] in object using %s, err=%d\n", 360 n, count, read->name, err); 361 goto put_object; 362 } 363 364 if (found != values[n]) { 365 pr_err("Value[%ld/%ld] mismatch, (overwrite with %s) wrote [%s] %x read [%s] %x (inverse %x), at offset %x\n", 366 n, count, over->name, 367 write->name, values[n], 368 read->name, found, 369 ~values[n], offsets[n]); 370 err = -EINVAL; 371 goto put_object; 372 } 373 } 374 375 i915_gem_object_put(obj); 376 } 377 } 378 } 379 } 380 unlock: 381 intel_runtime_pm_put(&i915->runtime_pm, wakeref); 382 mutex_unlock(&i915->drm.struct_mutex); 383 kfree(offsets); 384 return err; 385 386 put_object: 387 i915_gem_object_put(obj); 388 goto unlock; 389 } 390 391 int i915_gem_coherency_live_selftests(struct drm_i915_private *i915) 392 { 393 static const struct i915_subtest tests[] = { 394 SUBTEST(igt_gem_coherency), 395 }; 396 397 return i915_subtests(tests, i915); 398 } 399