1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2018 Intel Corporation 4 */ 5 6 #include <linux/crc32.h> 7 8 #include "gem/i915_gem_stolen.h" 9 10 #include "i915_memcpy.h" 11 #include "i915_selftest.h" 12 #include "selftests/igt_reset.h" 13 #include "selftests/igt_atomic.h" 14 #include "selftests/igt_spinner.h" 15 16 static int 17 __igt_reset_stolen(struct intel_gt *gt, 18 intel_engine_mask_t mask, 19 const char *msg) 20 { 21 struct i915_ggtt *ggtt = >->i915->ggtt; 22 const struct resource *dsm = >->i915->dsm; 23 resource_size_t num_pages, page; 24 struct intel_engine_cs *engine; 25 intel_wakeref_t wakeref; 26 enum intel_engine_id id; 27 struct igt_spinner spin; 28 long max, count; 29 void *tmp; 30 u32 *crc; 31 int err; 32 33 if (!drm_mm_node_allocated(&ggtt->error_capture)) 34 return 0; 35 36 num_pages = resource_size(dsm) >> PAGE_SHIFT; 37 if (!num_pages) 38 return 0; 39 40 crc = kmalloc_array(num_pages, sizeof(u32), GFP_KERNEL); 41 if (!crc) 42 return -ENOMEM; 43 44 tmp = kmalloc(PAGE_SIZE, GFP_KERNEL); 45 if (!tmp) { 46 err = -ENOMEM; 47 goto err_crc; 48 } 49 50 igt_global_reset_lock(gt); 51 wakeref = intel_runtime_pm_get(gt->uncore->rpm); 52 53 err = igt_spinner_init(&spin, gt); 54 if (err) 55 goto err_lock; 56 57 for_each_engine(engine, gt, id) { 58 struct intel_context *ce; 59 struct i915_request *rq; 60 61 if (!(mask & engine->mask)) 62 continue; 63 64 if (!intel_engine_can_store_dword(engine)) 65 continue; 66 67 ce = intel_context_create(engine); 68 if (IS_ERR(ce)) { 69 err = PTR_ERR(ce); 70 goto err_spin; 71 } 72 rq = igt_spinner_create_request(&spin, ce, MI_ARB_CHECK); 73 intel_context_put(ce); 74 if (IS_ERR(rq)) { 75 err = PTR_ERR(rq); 76 goto err_spin; 77 } 78 i915_request_add(rq); 79 } 80 81 for (page = 0; page < num_pages; page++) { 82 dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT); 83 void __iomem *s; 84 void *in; 85 86 ggtt->vm.insert_page(&ggtt->vm, dma, 87 ggtt->error_capture.start, 88 I915_CACHE_NONE, 0); 89 mb(); 90 91 s = io_mapping_map_wc(&ggtt->iomap, 92 ggtt->error_capture.start, 93 PAGE_SIZE); 94 95 if (!__drm_mm_interval_first(>->i915->mm.stolen, 96 page << PAGE_SHIFT, 97 ((page + 1) << PAGE_SHIFT) - 1)) 98 memset32(s, STACK_MAGIC, PAGE_SIZE / sizeof(u32)); 99 100 in = s; 101 if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE)) 102 in = tmp; 103 crc[page] = crc32_le(0, in, PAGE_SIZE); 104 105 io_mapping_unmap(s); 106 } 107 mb(); 108 ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE); 109 110 if (mask == ALL_ENGINES) { 111 intel_gt_reset(gt, mask, NULL); 112 } else { 113 for_each_engine(engine, gt, id) { 114 if (mask & engine->mask) 115 intel_engine_reset(engine, NULL); 116 } 117 } 118 119 max = -1; 120 count = 0; 121 for (page = 0; page < num_pages; page++) { 122 dma_addr_t dma = (dma_addr_t)dsm->start + (page << PAGE_SHIFT); 123 void __iomem *s; 124 void *in; 125 u32 x; 126 127 ggtt->vm.insert_page(&ggtt->vm, dma, 128 ggtt->error_capture.start, 129 I915_CACHE_NONE, 0); 130 mb(); 131 132 s = io_mapping_map_wc(&ggtt->iomap, 133 ggtt->error_capture.start, 134 PAGE_SIZE); 135 136 in = s; 137 if (i915_memcpy_from_wc(tmp, s, PAGE_SIZE)) 138 in = tmp; 139 x = crc32_le(0, in, PAGE_SIZE); 140 141 if (x != crc[page] && 142 !__drm_mm_interval_first(>->i915->mm.stolen, 143 page << PAGE_SHIFT, 144 ((page + 1) << PAGE_SHIFT) - 1)) { 145 pr_debug("unused stolen page %pa modified by GPU reset\n", 146 &page); 147 if (count++ == 0) 148 igt_hexdump(in, PAGE_SIZE); 149 max = page; 150 } 151 152 io_mapping_unmap(s); 153 } 154 mb(); 155 ggtt->vm.clear_range(&ggtt->vm, ggtt->error_capture.start, PAGE_SIZE); 156 157 if (count > 0) { 158 pr_info("%s reset clobbered %ld pages of stolen, last clobber at page %ld\n", 159 msg, count, max); 160 } 161 if (max >= I915_GEM_STOLEN_BIAS >> PAGE_SHIFT) { 162 pr_err("%s reset clobbered unreserved area [above %x] of stolen; may cause severe faults\n", 163 msg, I915_GEM_STOLEN_BIAS); 164 err = -EINVAL; 165 } 166 167 err_spin: 168 igt_spinner_fini(&spin); 169 170 err_lock: 171 intel_runtime_pm_put(gt->uncore->rpm, wakeref); 172 igt_global_reset_unlock(gt); 173 174 kfree(tmp); 175 err_crc: 176 kfree(crc); 177 return err; 178 } 179 180 static int igt_reset_device_stolen(void *arg) 181 { 182 return __igt_reset_stolen(arg, ALL_ENGINES, "device"); 183 } 184 185 static int igt_reset_engines_stolen(void *arg) 186 { 187 struct intel_gt *gt = arg; 188 struct intel_engine_cs *engine; 189 enum intel_engine_id id; 190 int err; 191 192 if (!intel_has_reset_engine(gt)) 193 return 0; 194 195 for_each_engine(engine, gt, id) { 196 err = __igt_reset_stolen(gt, engine->mask, engine->name); 197 if (err) 198 return err; 199 } 200 201 return 0; 202 } 203 204 static int igt_global_reset(void *arg) 205 { 206 struct intel_gt *gt = arg; 207 unsigned int reset_count; 208 intel_wakeref_t wakeref; 209 int err = 0; 210 211 /* Check that we can issue a global GPU reset */ 212 213 igt_global_reset_lock(gt); 214 wakeref = intel_runtime_pm_get(gt->uncore->rpm); 215 216 reset_count = i915_reset_count(>->i915->gpu_error); 217 218 intel_gt_reset(gt, ALL_ENGINES, NULL); 219 220 if (i915_reset_count(>->i915->gpu_error) == reset_count) { 221 pr_err("No GPU reset recorded!\n"); 222 err = -EINVAL; 223 } 224 225 intel_runtime_pm_put(gt->uncore->rpm, wakeref); 226 igt_global_reset_unlock(gt); 227 228 if (intel_gt_is_wedged(gt)) 229 err = -EIO; 230 231 return err; 232 } 233 234 static int igt_wedged_reset(void *arg) 235 { 236 struct intel_gt *gt = arg; 237 intel_wakeref_t wakeref; 238 239 /* Check that we can recover a wedged device with a GPU reset */ 240 241 igt_global_reset_lock(gt); 242 wakeref = intel_runtime_pm_get(gt->uncore->rpm); 243 244 intel_gt_set_wedged(gt); 245 246 GEM_BUG_ON(!intel_gt_is_wedged(gt)); 247 intel_gt_reset(gt, ALL_ENGINES, NULL); 248 249 intel_runtime_pm_put(gt->uncore->rpm, wakeref); 250 igt_global_reset_unlock(gt); 251 252 return intel_gt_is_wedged(gt) ? -EIO : 0; 253 } 254 255 static int igt_atomic_reset(void *arg) 256 { 257 struct intel_gt *gt = arg; 258 const typeof(*igt_atomic_phases) *p; 259 int err = 0; 260 261 /* Check that the resets are usable from atomic context */ 262 263 intel_gt_pm_get(gt); 264 igt_global_reset_lock(gt); 265 266 /* Flush any requests before we get started and check basics */ 267 if (!igt_force_reset(gt)) 268 goto unlock; 269 270 for (p = igt_atomic_phases; p->name; p++) { 271 intel_engine_mask_t awake; 272 273 GEM_TRACE("__intel_gt_reset under %s\n", p->name); 274 275 awake = reset_prepare(gt); 276 p->critical_section_begin(); 277 278 err = __intel_gt_reset(gt, ALL_ENGINES); 279 280 p->critical_section_end(); 281 reset_finish(gt, awake); 282 283 if (err) { 284 pr_err("__intel_gt_reset failed under %s\n", p->name); 285 break; 286 } 287 } 288 289 /* As we poke around the guts, do a full reset before continuing. */ 290 igt_force_reset(gt); 291 292 unlock: 293 igt_global_reset_unlock(gt); 294 intel_gt_pm_put(gt); 295 296 return err; 297 } 298 299 static int igt_atomic_engine_reset(void *arg) 300 { 301 struct intel_gt *gt = arg; 302 const typeof(*igt_atomic_phases) *p; 303 struct intel_engine_cs *engine; 304 enum intel_engine_id id; 305 int err = 0; 306 307 /* Check that the resets are usable from atomic context */ 308 309 if (!intel_has_reset_engine(gt)) 310 return 0; 311 312 if (intel_uc_uses_guc_submission(>->uc)) 313 return 0; 314 315 intel_gt_pm_get(gt); 316 igt_global_reset_lock(gt); 317 318 /* Flush any requests before we get started and check basics */ 319 if (!igt_force_reset(gt)) 320 goto out_unlock; 321 322 for_each_engine(engine, gt, id) { 323 tasklet_disable(&engine->execlists.tasklet); 324 intel_engine_pm_get(engine); 325 326 for (p = igt_atomic_phases; p->name; p++) { 327 GEM_TRACE("intel_engine_reset(%s) under %s\n", 328 engine->name, p->name); 329 330 p->critical_section_begin(); 331 err = intel_engine_reset(engine, NULL); 332 p->critical_section_end(); 333 334 if (err) { 335 pr_err("intel_engine_reset(%s) failed under %s\n", 336 engine->name, p->name); 337 break; 338 } 339 } 340 341 intel_engine_pm_put(engine); 342 tasklet_enable(&engine->execlists.tasklet); 343 if (err) 344 break; 345 } 346 347 /* As we poke around the guts, do a full reset before continuing. */ 348 igt_force_reset(gt); 349 350 out_unlock: 351 igt_global_reset_unlock(gt); 352 intel_gt_pm_put(gt); 353 354 return err; 355 } 356 357 int intel_reset_live_selftests(struct drm_i915_private *i915) 358 { 359 static const struct i915_subtest tests[] = { 360 SUBTEST(igt_global_reset), /* attempt to recover GPU first */ 361 SUBTEST(igt_reset_device_stolen), 362 SUBTEST(igt_reset_engines_stolen), 363 SUBTEST(igt_wedged_reset), 364 SUBTEST(igt_atomic_reset), 365 SUBTEST(igt_atomic_engine_reset), 366 }; 367 struct intel_gt *gt = &i915->gt; 368 369 if (!intel_has_gpu_reset(gt)) 370 return 0; 371 372 if (intel_gt_is_wedged(gt)) 373 return -EIO; /* we're long past hope of a successful reset */ 374 375 return intel_gt_live_subtests(tests, gt); 376 } 377