124f90d66SChris Wilson // SPDX-License-Identifier: MIT 2112ed2d3SChris Wilson /* 3112ed2d3SChris Wilson * Copyright © 2016 Intel Corporation 4112ed2d3SChris Wilson */ 5112ed2d3SChris Wilson 6112ed2d3SChris Wilson #include <linux/kthread.h> 7112ed2d3SChris Wilson 810be98a7SChris Wilson #include "gem/i915_gem_context.h" 9e6ba7648SChris Wilson 10e6ba7648SChris Wilson #include "intel_gt.h" 11e6ba7648SChris Wilson #include "intel_engine_heartbeat.h" 1279ffac85SChris Wilson #include "intel_engine_pm.h" 131b90e4a4SChris Wilson #include "selftest_engine_heartbeat.h" 1479ffac85SChris Wilson 15112ed2d3SChris Wilson #include "i915_selftest.h" 16112ed2d3SChris Wilson #include "selftests/i915_random.h" 17112ed2d3SChris Wilson #include "selftests/igt_flush_test.h" 18112ed2d3SChris Wilson #include "selftests/igt_reset.h" 19f6470c9bSMichal Wajdeczko #include "selftests/igt_atomic.h" 20617e87c0SJohn Harrison #include "selftests/igt_spinner.h" 21617e87c0SJohn Harrison #include "selftests/intel_scheduler_helpers.h" 22112ed2d3SChris Wilson 23112ed2d3SChris Wilson #include "selftests/mock_drm.h" 24112ed2d3SChris Wilson 2510be98a7SChris Wilson #include "gem/selftests/mock_context.h" 2610be98a7SChris Wilson #include "gem/selftests/igt_gem_utils.h" 2710be98a7SChris Wilson 28112ed2d3SChris Wilson #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ 29112ed2d3SChris Wilson 30112ed2d3SChris Wilson struct hang { 31baea429dSTvrtko Ursulin struct intel_gt *gt; 32112ed2d3SChris Wilson struct drm_i915_gem_object *hws; 33112ed2d3SChris Wilson struct drm_i915_gem_object *obj; 34112ed2d3SChris Wilson struct i915_gem_context *ctx; 35112ed2d3SChris Wilson u32 *seqno; 36112ed2d3SChris Wilson u32 *batch; 37112ed2d3SChris Wilson }; 38112ed2d3SChris Wilson 39cb823ed9SChris Wilson static int hang_init(struct hang *h, struct intel_gt *gt) 40112ed2d3SChris Wilson { 41112ed2d3SChris Wilson void *vaddr; 42112ed2d3SChris Wilson int err; 43112ed2d3SChris Wilson 44112ed2d3SChris Wilson memset(h, 0, sizeof(*h)); 45cb823ed9SChris Wilson h->gt = gt; 46112ed2d3SChris Wilson 475888d588SJason Ekstrand h->ctx = kernel_context(gt->i915, NULL); 48112ed2d3SChris Wilson if (IS_ERR(h->ctx)) 49112ed2d3SChris Wilson return PTR_ERR(h->ctx); 50112ed2d3SChris Wilson 51112ed2d3SChris Wilson GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx)); 52112ed2d3SChris Wilson 53cb823ed9SChris Wilson h->hws = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 54112ed2d3SChris Wilson if (IS_ERR(h->hws)) { 55112ed2d3SChris Wilson err = PTR_ERR(h->hws); 56112ed2d3SChris Wilson goto err_ctx; 57112ed2d3SChris Wilson } 58112ed2d3SChris Wilson 59cb823ed9SChris Wilson h->obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 60112ed2d3SChris Wilson if (IS_ERR(h->obj)) { 61112ed2d3SChris Wilson err = PTR_ERR(h->obj); 62112ed2d3SChris Wilson goto err_hws; 63112ed2d3SChris Wilson } 64112ed2d3SChris Wilson 65112ed2d3SChris Wilson i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC); 6617b7ab92SMaarten Lankhorst vaddr = i915_gem_object_pin_map_unlocked(h->hws, I915_MAP_WB); 67112ed2d3SChris Wilson if (IS_ERR(vaddr)) { 68112ed2d3SChris Wilson err = PTR_ERR(vaddr); 69112ed2d3SChris Wilson goto err_obj; 70112ed2d3SChris Wilson } 71112ed2d3SChris Wilson h->seqno = memset(vaddr, 0xff, PAGE_SIZE); 72112ed2d3SChris Wilson 7317b7ab92SMaarten Lankhorst vaddr = i915_gem_object_pin_map_unlocked(h->obj, 74fa85bfd1SVenkata Sandeep Dhanalakota i915_coherent_map_type(gt->i915, h->obj, false)); 75112ed2d3SChris Wilson if (IS_ERR(vaddr)) { 76112ed2d3SChris Wilson err = PTR_ERR(vaddr); 77112ed2d3SChris Wilson goto err_unpin_hws; 78112ed2d3SChris Wilson } 79112ed2d3SChris Wilson h->batch = vaddr; 80112ed2d3SChris Wilson 81112ed2d3SChris Wilson return 0; 82112ed2d3SChris Wilson 83112ed2d3SChris Wilson err_unpin_hws: 84112ed2d3SChris Wilson i915_gem_object_unpin_map(h->hws); 85112ed2d3SChris Wilson err_obj: 86112ed2d3SChris Wilson i915_gem_object_put(h->obj); 87112ed2d3SChris Wilson err_hws: 88112ed2d3SChris Wilson i915_gem_object_put(h->hws); 89112ed2d3SChris Wilson err_ctx: 90112ed2d3SChris Wilson kernel_context_close(h->ctx); 91112ed2d3SChris Wilson return err; 92112ed2d3SChris Wilson } 93112ed2d3SChris Wilson 94112ed2d3SChris Wilson static u64 hws_address(const struct i915_vma *hws, 95112ed2d3SChris Wilson const struct i915_request *rq) 96112ed2d3SChris Wilson { 97112ed2d3SChris Wilson return hws->node.start + offset_in_page(sizeof(u32)*rq->fence.context); 98112ed2d3SChris Wilson } 99112ed2d3SChris Wilson 100112ed2d3SChris Wilson static int move_to_active(struct i915_vma *vma, 101112ed2d3SChris Wilson struct i915_request *rq, 102112ed2d3SChris Wilson unsigned int flags) 103112ed2d3SChris Wilson { 104112ed2d3SChris Wilson int err; 105112ed2d3SChris Wilson 1066951e589SChris Wilson i915_vma_lock(vma); 10770d6894dSChris Wilson err = i915_request_await_object(rq, vma->obj, 10870d6894dSChris Wilson flags & EXEC_OBJECT_WRITE); 10970d6894dSChris Wilson if (err == 0) 110112ed2d3SChris Wilson err = i915_vma_move_to_active(vma, rq, flags); 1116951e589SChris Wilson i915_vma_unlock(vma); 112c017cf6bSChris Wilson 113112ed2d3SChris Wilson return err; 114112ed2d3SChris Wilson } 115112ed2d3SChris Wilson 116112ed2d3SChris Wilson static struct i915_request * 117112ed2d3SChris Wilson hang_create_request(struct hang *h, struct intel_engine_cs *engine) 118112ed2d3SChris Wilson { 119cb823ed9SChris Wilson struct intel_gt *gt = h->gt; 120c6d04e48SDaniel Vetter struct i915_address_space *vm = i915_gem_context_get_eb_vm(h->ctx); 121a93615f9SChris Wilson struct drm_i915_gem_object *obj; 122112ed2d3SChris Wilson struct i915_request *rq = NULL; 123112ed2d3SChris Wilson struct i915_vma *hws, *vma; 124112ed2d3SChris Wilson unsigned int flags; 125a93615f9SChris Wilson void *vaddr; 126112ed2d3SChris Wilson u32 *batch; 127112ed2d3SChris Wilson int err; 128112ed2d3SChris Wilson 129cb823ed9SChris Wilson obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 130a4e7ccdaSChris Wilson if (IS_ERR(obj)) { 131a4e7ccdaSChris Wilson i915_vm_put(vm); 132112ed2d3SChris Wilson return ERR_CAST(obj); 133a4e7ccdaSChris Wilson } 134112ed2d3SChris Wilson 135fa85bfd1SVenkata Sandeep Dhanalakota vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915, obj, false)); 136112ed2d3SChris Wilson if (IS_ERR(vaddr)) { 137112ed2d3SChris Wilson i915_gem_object_put(obj); 138a4e7ccdaSChris Wilson i915_vm_put(vm); 139112ed2d3SChris Wilson return ERR_CAST(vaddr); 140112ed2d3SChris Wilson } 141112ed2d3SChris Wilson 142112ed2d3SChris Wilson i915_gem_object_unpin_map(h->obj); 143112ed2d3SChris Wilson i915_gem_object_put(h->obj); 144112ed2d3SChris Wilson 145112ed2d3SChris Wilson h->obj = obj; 146112ed2d3SChris Wilson h->batch = vaddr; 147112ed2d3SChris Wilson 148112ed2d3SChris Wilson vma = i915_vma_instance(h->obj, vm, NULL); 149a4e7ccdaSChris Wilson if (IS_ERR(vma)) { 150a4e7ccdaSChris Wilson i915_vm_put(vm); 151112ed2d3SChris Wilson return ERR_CAST(vma); 152a4e7ccdaSChris Wilson } 153112ed2d3SChris Wilson 154112ed2d3SChris Wilson hws = i915_vma_instance(h->hws, vm, NULL); 155a4e7ccdaSChris Wilson if (IS_ERR(hws)) { 156a4e7ccdaSChris Wilson i915_vm_put(vm); 157112ed2d3SChris Wilson return ERR_CAST(hws); 158a4e7ccdaSChris Wilson } 159112ed2d3SChris Wilson 160112ed2d3SChris Wilson err = i915_vma_pin(vma, 0, 0, PIN_USER); 161a4e7ccdaSChris Wilson if (err) { 162a4e7ccdaSChris Wilson i915_vm_put(vm); 163112ed2d3SChris Wilson return ERR_PTR(err); 164a4e7ccdaSChris Wilson } 165112ed2d3SChris Wilson 166112ed2d3SChris Wilson err = i915_vma_pin(hws, 0, 0, PIN_USER); 167112ed2d3SChris Wilson if (err) 168112ed2d3SChris Wilson goto unpin_vma; 169112ed2d3SChris Wilson 17046472b3eSChris Wilson rq = igt_request_alloc(h->ctx, engine); 171112ed2d3SChris Wilson if (IS_ERR(rq)) { 172112ed2d3SChris Wilson err = PTR_ERR(rq); 173112ed2d3SChris Wilson goto unpin_hws; 174112ed2d3SChris Wilson } 175112ed2d3SChris Wilson 176112ed2d3SChris Wilson err = move_to_active(vma, rq, 0); 177112ed2d3SChris Wilson if (err) 178112ed2d3SChris Wilson goto cancel_rq; 179112ed2d3SChris Wilson 180112ed2d3SChris Wilson err = move_to_active(hws, rq, 0); 181112ed2d3SChris Wilson if (err) 182112ed2d3SChris Wilson goto cancel_rq; 183112ed2d3SChris Wilson 184112ed2d3SChris Wilson batch = h->batch; 185c816723bSLucas De Marchi if (GRAPHICS_VER(gt->i915) >= 8) { 186112ed2d3SChris Wilson *batch++ = MI_STORE_DWORD_IMM_GEN4; 187112ed2d3SChris Wilson *batch++ = lower_32_bits(hws_address(hws, rq)); 188112ed2d3SChris Wilson *batch++ = upper_32_bits(hws_address(hws, rq)); 189112ed2d3SChris Wilson *batch++ = rq->fence.seqno; 19094ed4753SChris Wilson *batch++ = MI_NOOP; 191112ed2d3SChris Wilson 192112ed2d3SChris Wilson memset(batch, 0, 1024); 193112ed2d3SChris Wilson batch += 1024 / sizeof(*batch); 194112ed2d3SChris Wilson 19594ed4753SChris Wilson *batch++ = MI_NOOP; 196112ed2d3SChris Wilson *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 197112ed2d3SChris Wilson *batch++ = lower_32_bits(vma->node.start); 198112ed2d3SChris Wilson *batch++ = upper_32_bits(vma->node.start); 199c816723bSLucas De Marchi } else if (GRAPHICS_VER(gt->i915) >= 6) { 200112ed2d3SChris Wilson *batch++ = MI_STORE_DWORD_IMM_GEN4; 201112ed2d3SChris Wilson *batch++ = 0; 202112ed2d3SChris Wilson *batch++ = lower_32_bits(hws_address(hws, rq)); 203112ed2d3SChris Wilson *batch++ = rq->fence.seqno; 20494ed4753SChris Wilson *batch++ = MI_NOOP; 205112ed2d3SChris Wilson 206112ed2d3SChris Wilson memset(batch, 0, 1024); 207112ed2d3SChris Wilson batch += 1024 / sizeof(*batch); 208112ed2d3SChris Wilson 20994ed4753SChris Wilson *batch++ = MI_NOOP; 210112ed2d3SChris Wilson *batch++ = MI_BATCH_BUFFER_START | 1 << 8; 211112ed2d3SChris Wilson *batch++ = lower_32_bits(vma->node.start); 212c816723bSLucas De Marchi } else if (GRAPHICS_VER(gt->i915) >= 4) { 213112ed2d3SChris Wilson *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 214112ed2d3SChris Wilson *batch++ = 0; 215112ed2d3SChris Wilson *batch++ = lower_32_bits(hws_address(hws, rq)); 216112ed2d3SChris Wilson *batch++ = rq->fence.seqno; 21794ed4753SChris Wilson *batch++ = MI_NOOP; 218112ed2d3SChris Wilson 219112ed2d3SChris Wilson memset(batch, 0, 1024); 220112ed2d3SChris Wilson batch += 1024 / sizeof(*batch); 221112ed2d3SChris Wilson 22294ed4753SChris Wilson *batch++ = MI_NOOP; 223112ed2d3SChris Wilson *batch++ = MI_BATCH_BUFFER_START | 2 << 6; 224112ed2d3SChris Wilson *batch++ = lower_32_bits(vma->node.start); 225112ed2d3SChris Wilson } else { 226112ed2d3SChris Wilson *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 227112ed2d3SChris Wilson *batch++ = lower_32_bits(hws_address(hws, rq)); 228112ed2d3SChris Wilson *batch++ = rq->fence.seqno; 22994ed4753SChris Wilson *batch++ = MI_NOOP; 230112ed2d3SChris Wilson 231112ed2d3SChris Wilson memset(batch, 0, 1024); 232112ed2d3SChris Wilson batch += 1024 / sizeof(*batch); 233112ed2d3SChris Wilson 23494ed4753SChris Wilson *batch++ = MI_NOOP; 235112ed2d3SChris Wilson *batch++ = MI_BATCH_BUFFER_START | 2 << 6; 236112ed2d3SChris Wilson *batch++ = lower_32_bits(vma->node.start); 237112ed2d3SChris Wilson } 238112ed2d3SChris Wilson *batch++ = MI_BATCH_BUFFER_END; /* not reached */ 239baea429dSTvrtko Ursulin intel_gt_chipset_flush(engine->gt); 240112ed2d3SChris Wilson 241112ed2d3SChris Wilson if (rq->engine->emit_init_breadcrumb) { 242112ed2d3SChris Wilson err = rq->engine->emit_init_breadcrumb(rq); 243112ed2d3SChris Wilson if (err) 244112ed2d3SChris Wilson goto cancel_rq; 245112ed2d3SChris Wilson } 246112ed2d3SChris Wilson 247112ed2d3SChris Wilson flags = 0; 248c816723bSLucas De Marchi if (GRAPHICS_VER(gt->i915) <= 5) 249112ed2d3SChris Wilson flags |= I915_DISPATCH_SECURE; 250112ed2d3SChris Wilson 251112ed2d3SChris Wilson err = rq->engine->emit_bb_start(rq, vma->node.start, PAGE_SIZE, flags); 252112ed2d3SChris Wilson 253112ed2d3SChris Wilson cancel_rq: 254112ed2d3SChris Wilson if (err) { 25536e191f0SChris Wilson i915_request_set_error_once(rq, err); 256112ed2d3SChris Wilson i915_request_add(rq); 257112ed2d3SChris Wilson } 258112ed2d3SChris Wilson unpin_hws: 259112ed2d3SChris Wilson i915_vma_unpin(hws); 260112ed2d3SChris Wilson unpin_vma: 261112ed2d3SChris Wilson i915_vma_unpin(vma); 262a4e7ccdaSChris Wilson i915_vm_put(vm); 263112ed2d3SChris Wilson return err ? ERR_PTR(err) : rq; 264112ed2d3SChris Wilson } 265112ed2d3SChris Wilson 266112ed2d3SChris Wilson static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) 267112ed2d3SChris Wilson { 268112ed2d3SChris Wilson return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); 269112ed2d3SChris Wilson } 270112ed2d3SChris Wilson 271112ed2d3SChris Wilson static void hang_fini(struct hang *h) 272112ed2d3SChris Wilson { 273112ed2d3SChris Wilson *h->batch = MI_BATCH_BUFFER_END; 274baea429dSTvrtko Ursulin intel_gt_chipset_flush(h->gt); 275112ed2d3SChris Wilson 276112ed2d3SChris Wilson i915_gem_object_unpin_map(h->obj); 277112ed2d3SChris Wilson i915_gem_object_put(h->obj); 278112ed2d3SChris Wilson 279112ed2d3SChris Wilson i915_gem_object_unpin_map(h->hws); 280112ed2d3SChris Wilson i915_gem_object_put(h->hws); 281112ed2d3SChris Wilson 282112ed2d3SChris Wilson kernel_context_close(h->ctx); 283112ed2d3SChris Wilson 2847e805762SChris Wilson igt_flush_test(h->gt->i915); 285112ed2d3SChris Wilson } 286112ed2d3SChris Wilson 287112ed2d3SChris Wilson static bool wait_until_running(struct hang *h, struct i915_request *rq) 288112ed2d3SChris Wilson { 289112ed2d3SChris Wilson return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), 290112ed2d3SChris Wilson rq->fence.seqno), 291112ed2d3SChris Wilson 10) && 292112ed2d3SChris Wilson wait_for(i915_seqno_passed(hws_seqno(h, rq), 293112ed2d3SChris Wilson rq->fence.seqno), 294112ed2d3SChris Wilson 1000)); 295112ed2d3SChris Wilson } 296112ed2d3SChris Wilson 297112ed2d3SChris Wilson static int igt_hang_sanitycheck(void *arg) 298112ed2d3SChris Wilson { 299cb823ed9SChris Wilson struct intel_gt *gt = arg; 300112ed2d3SChris Wilson struct i915_request *rq; 301112ed2d3SChris Wilson struct intel_engine_cs *engine; 302112ed2d3SChris Wilson enum intel_engine_id id; 303112ed2d3SChris Wilson struct hang h; 304112ed2d3SChris Wilson int err; 305112ed2d3SChris Wilson 306112ed2d3SChris Wilson /* Basic check that we can execute our hanging batch */ 307112ed2d3SChris Wilson 308cb823ed9SChris Wilson err = hang_init(&h, gt); 309112ed2d3SChris Wilson if (err) 3107e805762SChris Wilson return err; 311112ed2d3SChris Wilson 3125d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 313cb823ed9SChris Wilson struct intel_wedge_me w; 314112ed2d3SChris Wilson long timeout; 315112ed2d3SChris Wilson 316112ed2d3SChris Wilson if (!intel_engine_can_store_dword(engine)) 317112ed2d3SChris Wilson continue; 318112ed2d3SChris Wilson 319112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 320112ed2d3SChris Wilson if (IS_ERR(rq)) { 321112ed2d3SChris Wilson err = PTR_ERR(rq); 322112ed2d3SChris Wilson pr_err("Failed to create request for %s, err=%d\n", 323112ed2d3SChris Wilson engine->name, err); 324112ed2d3SChris Wilson goto fini; 325112ed2d3SChris Wilson } 326112ed2d3SChris Wilson 327112ed2d3SChris Wilson i915_request_get(rq); 328112ed2d3SChris Wilson 329112ed2d3SChris Wilson *h.batch = MI_BATCH_BUFFER_END; 330baea429dSTvrtko Ursulin intel_gt_chipset_flush(engine->gt); 331112ed2d3SChris Wilson 332112ed2d3SChris Wilson i915_request_add(rq); 333112ed2d3SChris Wilson 334112ed2d3SChris Wilson timeout = 0; 335cb823ed9SChris Wilson intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) 3362f530945SChris Wilson timeout = i915_request_wait(rq, 0, 337112ed2d3SChris Wilson MAX_SCHEDULE_TIMEOUT); 338cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 339112ed2d3SChris Wilson timeout = -EIO; 340112ed2d3SChris Wilson 341112ed2d3SChris Wilson i915_request_put(rq); 342112ed2d3SChris Wilson 343112ed2d3SChris Wilson if (timeout < 0) { 344112ed2d3SChris Wilson err = timeout; 345112ed2d3SChris Wilson pr_err("Wait for request failed on %s, err=%d\n", 346112ed2d3SChris Wilson engine->name, err); 347112ed2d3SChris Wilson goto fini; 348112ed2d3SChris Wilson } 349112ed2d3SChris Wilson } 350112ed2d3SChris Wilson 351112ed2d3SChris Wilson fini: 352112ed2d3SChris Wilson hang_fini(&h); 353112ed2d3SChris Wilson return err; 354112ed2d3SChris Wilson } 355112ed2d3SChris Wilson 356112ed2d3SChris Wilson static bool wait_for_idle(struct intel_engine_cs *engine) 357112ed2d3SChris Wilson { 358112ed2d3SChris Wilson return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; 359112ed2d3SChris Wilson } 360112ed2d3SChris Wilson 361112ed2d3SChris Wilson static int igt_reset_nop(void *arg) 362112ed2d3SChris Wilson { 363cb823ed9SChris Wilson struct intel_gt *gt = arg; 364cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 365112ed2d3SChris Wilson struct intel_engine_cs *engine; 366112ed2d3SChris Wilson unsigned int reset_count, count; 367112ed2d3SChris Wilson enum intel_engine_id id; 368112ed2d3SChris Wilson IGT_TIMEOUT(end_time); 369112ed2d3SChris Wilson int err = 0; 370112ed2d3SChris Wilson 371112ed2d3SChris Wilson /* Check that we can reset during non-user portions of requests */ 372112ed2d3SChris Wilson 373cb823ed9SChris Wilson reset_count = i915_reset_count(global); 374112ed2d3SChris Wilson count = 0; 375112ed2d3SChris Wilson do { 3765d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 377e6ba7648SChris Wilson struct intel_context *ce; 378112ed2d3SChris Wilson int i; 379112ed2d3SChris Wilson 380e6ba7648SChris Wilson ce = intel_context_create(engine); 381e6ba7648SChris Wilson if (IS_ERR(ce)) { 382e6ba7648SChris Wilson err = PTR_ERR(ce); 3833f5dff6cSJohn Harrison pr_err("[%s] Create context failed: %d!\n", engine->name, err); 384e6ba7648SChris Wilson break; 385e6ba7648SChris Wilson } 386e6ba7648SChris Wilson 387112ed2d3SChris Wilson for (i = 0; i < 16; i++) { 388112ed2d3SChris Wilson struct i915_request *rq; 389112ed2d3SChris Wilson 390e6ba7648SChris Wilson rq = intel_context_create_request(ce); 391112ed2d3SChris Wilson if (IS_ERR(rq)) { 392112ed2d3SChris Wilson err = PTR_ERR(rq); 3933f5dff6cSJohn Harrison pr_err("[%s] Create request failed: %d!\n", 3943f5dff6cSJohn Harrison engine->name, err); 395112ed2d3SChris Wilson break; 396112ed2d3SChris Wilson } 397112ed2d3SChris Wilson 398112ed2d3SChris Wilson i915_request_add(rq); 399112ed2d3SChris Wilson } 400e6ba7648SChris Wilson 401e6ba7648SChris Wilson intel_context_put(ce); 402112ed2d3SChris Wilson } 403112ed2d3SChris Wilson 404cb823ed9SChris Wilson igt_global_reset_lock(gt); 405cb823ed9SChris Wilson intel_gt_reset(gt, ALL_ENGINES, NULL); 406cb823ed9SChris Wilson igt_global_reset_unlock(gt); 407d8474795SChris Wilson 408cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) { 4093f5dff6cSJohn Harrison pr_err("[%s] GT is wedged!\n", engine->name); 410112ed2d3SChris Wilson err = -EIO; 411112ed2d3SChris Wilson break; 412112ed2d3SChris Wilson } 413112ed2d3SChris Wilson 414cb823ed9SChris Wilson if (i915_reset_count(global) != reset_count + ++count) { 4153f5dff6cSJohn Harrison pr_err("[%s] Reset not recorded: %d vs %d + %d!\n", 4163f5dff6cSJohn Harrison engine->name, i915_reset_count(global), reset_count, count); 417112ed2d3SChris Wilson err = -EINVAL; 418112ed2d3SChris Wilson break; 419112ed2d3SChris Wilson } 420112ed2d3SChris Wilson 4217e805762SChris Wilson err = igt_flush_test(gt->i915); 4223f5dff6cSJohn Harrison if (err) { 4233f5dff6cSJohn Harrison pr_err("[%s] Flush failed: %d!\n", engine->name, err); 424112ed2d3SChris Wilson break; 4253f5dff6cSJohn Harrison } 426112ed2d3SChris Wilson } while (time_before(jiffies, end_time)); 427112ed2d3SChris Wilson pr_info("%s: %d resets\n", __func__, count); 428112ed2d3SChris Wilson 4293f5dff6cSJohn Harrison if (igt_flush_test(gt->i915)) { 4303f5dff6cSJohn Harrison pr_err("Post flush failed: %d!\n", err); 431112ed2d3SChris Wilson err = -EIO; 4323f5dff6cSJohn Harrison } 4333f5dff6cSJohn Harrison 434112ed2d3SChris Wilson return err; 435112ed2d3SChris Wilson } 436112ed2d3SChris Wilson 437112ed2d3SChris Wilson static int igt_reset_nop_engine(void *arg) 438112ed2d3SChris Wilson { 439cb823ed9SChris Wilson struct intel_gt *gt = arg; 440cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 441112ed2d3SChris Wilson struct intel_engine_cs *engine; 442112ed2d3SChris Wilson enum intel_engine_id id; 443112ed2d3SChris Wilson 444112ed2d3SChris Wilson /* Check that we can engine-reset during non-user portions */ 445112ed2d3SChris Wilson 446260e6b71SChris Wilson if (!intel_has_reset_engine(gt)) 447112ed2d3SChris Wilson return 0; 448112ed2d3SChris Wilson 4495d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 450e6ba7648SChris Wilson unsigned int reset_count, reset_engine_count, count; 451e6ba7648SChris Wilson struct intel_context *ce; 452112ed2d3SChris Wilson IGT_TIMEOUT(end_time); 453e6ba7648SChris Wilson int err; 454e6ba7648SChris Wilson 455617e87c0SJohn Harrison if (intel_engine_uses_guc(engine)) { 456617e87c0SJohn Harrison /* Engine level resets are triggered by GuC when a hang 457617e87c0SJohn Harrison * is detected. They can't be triggered by the KMD any 458617e87c0SJohn Harrison * more. Thus a nop batch cannot be used as a reset test 459617e87c0SJohn Harrison */ 460617e87c0SJohn Harrison continue; 461617e87c0SJohn Harrison } 462617e87c0SJohn Harrison 463e6ba7648SChris Wilson ce = intel_context_create(engine); 4643f5dff6cSJohn Harrison if (IS_ERR(ce)) { 465ac5a2dffSNathan Chancellor pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); 466e6ba7648SChris Wilson return PTR_ERR(ce); 4673f5dff6cSJohn Harrison } 468112ed2d3SChris Wilson 469cb823ed9SChris Wilson reset_count = i915_reset_count(global); 470cb823ed9SChris Wilson reset_engine_count = i915_reset_engine_count(global, engine); 471112ed2d3SChris Wilson count = 0; 472112ed2d3SChris Wilson 4731b90e4a4SChris Wilson st_engine_heartbeat_disable(engine); 4749030e39cSThomas Hellström GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, 4759030e39cSThomas Hellström >->reset.flags)); 476112ed2d3SChris Wilson do { 477112ed2d3SChris Wilson int i; 478112ed2d3SChris Wilson 479112ed2d3SChris Wilson if (!wait_for_idle(engine)) { 480112ed2d3SChris Wilson pr_err("%s failed to idle before reset\n", 481112ed2d3SChris Wilson engine->name); 482112ed2d3SChris Wilson err = -EIO; 483112ed2d3SChris Wilson break; 484112ed2d3SChris Wilson } 485112ed2d3SChris Wilson 486112ed2d3SChris Wilson for (i = 0; i < 16; i++) { 487112ed2d3SChris Wilson struct i915_request *rq; 488112ed2d3SChris Wilson 489e6ba7648SChris Wilson rq = intel_context_create_request(ce); 490112ed2d3SChris Wilson if (IS_ERR(rq)) { 491f2e85e57SChris Wilson struct drm_printer p = 492f2e85e57SChris Wilson drm_info_printer(gt->i915->drm.dev); 493f2e85e57SChris Wilson intel_engine_dump(engine, &p, 494f2e85e57SChris Wilson "%s(%s): failed to submit request\n", 495f2e85e57SChris Wilson __func__, 496f2e85e57SChris Wilson engine->name); 497f2e85e57SChris Wilson 498f2e85e57SChris Wilson GEM_TRACE("%s(%s): failed to submit request\n", 499f2e85e57SChris Wilson __func__, 500f2e85e57SChris Wilson engine->name); 501f2e85e57SChris Wilson GEM_TRACE_DUMP(); 502f2e85e57SChris Wilson 503f2e85e57SChris Wilson intel_gt_set_wedged(gt); 504f2e85e57SChris Wilson 505112ed2d3SChris Wilson err = PTR_ERR(rq); 506112ed2d3SChris Wilson break; 507112ed2d3SChris Wilson } 508112ed2d3SChris Wilson 509112ed2d3SChris Wilson i915_request_add(rq); 510112ed2d3SChris Wilson } 511cb823ed9SChris Wilson err = intel_engine_reset(engine, NULL); 512112ed2d3SChris Wilson if (err) { 513cd7a214fSChris Wilson pr_err("intel_engine_reset(%s) failed, err:%d\n", 514cd7a214fSChris Wilson engine->name, err); 515112ed2d3SChris Wilson break; 516112ed2d3SChris Wilson } 517112ed2d3SChris Wilson 518cb823ed9SChris Wilson if (i915_reset_count(global) != reset_count) { 519112ed2d3SChris Wilson pr_err("Full GPU reset recorded! (engine reset expected)\n"); 520112ed2d3SChris Wilson err = -EINVAL; 521112ed2d3SChris Wilson break; 522112ed2d3SChris Wilson } 523112ed2d3SChris Wilson 524cb823ed9SChris Wilson if (i915_reset_engine_count(global, engine) != 525112ed2d3SChris Wilson reset_engine_count + ++count) { 526112ed2d3SChris Wilson pr_err("%s engine reset not recorded!\n", 527112ed2d3SChris Wilson engine->name); 528112ed2d3SChris Wilson err = -EINVAL; 529112ed2d3SChris Wilson break; 530112ed2d3SChris Wilson } 531112ed2d3SChris Wilson } while (time_before(jiffies, end_time)); 5329030e39cSThomas Hellström clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); 5331b90e4a4SChris Wilson st_engine_heartbeat_enable(engine); 534e6ba7648SChris Wilson 535112ed2d3SChris Wilson pr_info("%s(%s): %d resets\n", __func__, engine->name, count); 536112ed2d3SChris Wilson 537e6ba7648SChris Wilson intel_context_put(ce); 538e6ba7648SChris Wilson if (igt_flush_test(gt->i915)) 539e6ba7648SChris Wilson err = -EIO; 540112ed2d3SChris Wilson if (err) 541e6ba7648SChris Wilson return err; 542112ed2d3SChris Wilson } 543112ed2d3SChris Wilson 544e6ba7648SChris Wilson return 0; 545112ed2d3SChris Wilson } 546112ed2d3SChris Wilson 547106a9368SChris Wilson static void force_reset_timeout(struct intel_engine_cs *engine) 548106a9368SChris Wilson { 549106a9368SChris Wilson engine->reset_timeout.probability = 999; 550106a9368SChris Wilson atomic_set(&engine->reset_timeout.times, -1); 551106a9368SChris Wilson } 552106a9368SChris Wilson 553106a9368SChris Wilson static void cancel_reset_timeout(struct intel_engine_cs *engine) 554106a9368SChris Wilson { 555106a9368SChris Wilson memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout)); 556106a9368SChris Wilson } 557106a9368SChris Wilson 558106a9368SChris Wilson static int igt_reset_fail_engine(void *arg) 559106a9368SChris Wilson { 560106a9368SChris Wilson struct intel_gt *gt = arg; 561106a9368SChris Wilson struct intel_engine_cs *engine; 562106a9368SChris Wilson enum intel_engine_id id; 563106a9368SChris Wilson 564106a9368SChris Wilson /* Check that we can recover from engine-reset failues */ 565106a9368SChris Wilson 566106a9368SChris Wilson if (!intel_has_reset_engine(gt)) 567106a9368SChris Wilson return 0; 568106a9368SChris Wilson 569106a9368SChris Wilson for_each_engine(engine, gt, id) { 570106a9368SChris Wilson unsigned int count; 571106a9368SChris Wilson struct intel_context *ce; 572106a9368SChris Wilson IGT_TIMEOUT(end_time); 573106a9368SChris Wilson int err; 574106a9368SChris Wilson 575617e87c0SJohn Harrison /* Can't manually break the reset if i915 doesn't perform it */ 576617e87c0SJohn Harrison if (intel_engine_uses_guc(engine)) 577617e87c0SJohn Harrison continue; 578617e87c0SJohn Harrison 579106a9368SChris Wilson ce = intel_context_create(engine); 5803f5dff6cSJohn Harrison if (IS_ERR(ce)) { 581ac5a2dffSNathan Chancellor pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); 582106a9368SChris Wilson return PTR_ERR(ce); 5833f5dff6cSJohn Harrison } 584106a9368SChris Wilson 585106a9368SChris Wilson st_engine_heartbeat_disable(engine); 5869030e39cSThomas Hellström GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, 5879030e39cSThomas Hellström >->reset.flags)); 588106a9368SChris Wilson 589106a9368SChris Wilson force_reset_timeout(engine); 590106a9368SChris Wilson err = intel_engine_reset(engine, NULL); 591106a9368SChris Wilson cancel_reset_timeout(engine); 592106a9368SChris Wilson if (err == 0) /* timeouts only generated on gen8+ */ 593106a9368SChris Wilson goto skip; 594106a9368SChris Wilson 595106a9368SChris Wilson count = 0; 596106a9368SChris Wilson do { 597106a9368SChris Wilson struct i915_request *last = NULL; 598106a9368SChris Wilson int i; 599106a9368SChris Wilson 600106a9368SChris Wilson if (!wait_for_idle(engine)) { 601106a9368SChris Wilson pr_err("%s failed to idle before reset\n", 602106a9368SChris Wilson engine->name); 603106a9368SChris Wilson err = -EIO; 604106a9368SChris Wilson break; 605106a9368SChris Wilson } 606106a9368SChris Wilson 607106a9368SChris Wilson for (i = 0; i < count % 15; i++) { 608106a9368SChris Wilson struct i915_request *rq; 609106a9368SChris Wilson 610106a9368SChris Wilson rq = intel_context_create_request(ce); 611106a9368SChris Wilson if (IS_ERR(rq)) { 612106a9368SChris Wilson struct drm_printer p = 613106a9368SChris Wilson drm_info_printer(gt->i915->drm.dev); 614106a9368SChris Wilson intel_engine_dump(engine, &p, 615106a9368SChris Wilson "%s(%s): failed to submit request\n", 616106a9368SChris Wilson __func__, 617106a9368SChris Wilson engine->name); 618106a9368SChris Wilson 619106a9368SChris Wilson GEM_TRACE("%s(%s): failed to submit request\n", 620106a9368SChris Wilson __func__, 621106a9368SChris Wilson engine->name); 622106a9368SChris Wilson GEM_TRACE_DUMP(); 623106a9368SChris Wilson 624106a9368SChris Wilson intel_gt_set_wedged(gt); 625106a9368SChris Wilson if (last) 626106a9368SChris Wilson i915_request_put(last); 627106a9368SChris Wilson 628106a9368SChris Wilson err = PTR_ERR(rq); 629106a9368SChris Wilson goto out; 630106a9368SChris Wilson } 631106a9368SChris Wilson 632106a9368SChris Wilson if (last) 633106a9368SChris Wilson i915_request_put(last); 634106a9368SChris Wilson last = i915_request_get(rq); 635106a9368SChris Wilson i915_request_add(rq); 636106a9368SChris Wilson } 637106a9368SChris Wilson 638106a9368SChris Wilson if (count & 1) { 639106a9368SChris Wilson err = intel_engine_reset(engine, NULL); 640106a9368SChris Wilson if (err) { 641106a9368SChris Wilson GEM_TRACE_ERR("intel_engine_reset(%s) failed, err:%d\n", 642106a9368SChris Wilson engine->name, err); 643106a9368SChris Wilson GEM_TRACE_DUMP(); 644106a9368SChris Wilson i915_request_put(last); 645106a9368SChris Wilson break; 646106a9368SChris Wilson } 647106a9368SChris Wilson } else { 648106a9368SChris Wilson force_reset_timeout(engine); 649106a9368SChris Wilson err = intel_engine_reset(engine, NULL); 650106a9368SChris Wilson cancel_reset_timeout(engine); 651106a9368SChris Wilson if (err != -ETIMEDOUT) { 652106a9368SChris Wilson pr_err("intel_engine_reset(%s) did not fail, err:%d\n", 653106a9368SChris Wilson engine->name, err); 654106a9368SChris Wilson i915_request_put(last); 655106a9368SChris Wilson break; 656106a9368SChris Wilson } 657106a9368SChris Wilson } 658106a9368SChris Wilson 659106a9368SChris Wilson err = 0; 660106a9368SChris Wilson if (last) { 661106a9368SChris Wilson if (i915_request_wait(last, 0, HZ / 2) < 0) { 662106a9368SChris Wilson struct drm_printer p = 663106a9368SChris Wilson drm_info_printer(gt->i915->drm.dev); 664106a9368SChris Wilson 665106a9368SChris Wilson intel_engine_dump(engine, &p, 666106a9368SChris Wilson "%s(%s): failed to complete request\n", 667106a9368SChris Wilson __func__, 668106a9368SChris Wilson engine->name); 669106a9368SChris Wilson 670106a9368SChris Wilson GEM_TRACE("%s(%s): failed to complete request\n", 671106a9368SChris Wilson __func__, 672106a9368SChris Wilson engine->name); 673106a9368SChris Wilson GEM_TRACE_DUMP(); 674106a9368SChris Wilson 675106a9368SChris Wilson err = -EIO; 676106a9368SChris Wilson } 677106a9368SChris Wilson i915_request_put(last); 678106a9368SChris Wilson } 679106a9368SChris Wilson count++; 680106a9368SChris Wilson } while (err == 0 && time_before(jiffies, end_time)); 681106a9368SChris Wilson out: 682106a9368SChris Wilson pr_info("%s(%s): %d resets\n", __func__, engine->name, count); 683106a9368SChris Wilson skip: 6849030e39cSThomas Hellström clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); 685106a9368SChris Wilson st_engine_heartbeat_enable(engine); 686106a9368SChris Wilson intel_context_put(ce); 687106a9368SChris Wilson 688106a9368SChris Wilson if (igt_flush_test(gt->i915)) 689106a9368SChris Wilson err = -EIO; 690106a9368SChris Wilson if (err) 691106a9368SChris Wilson return err; 692106a9368SChris Wilson } 693106a9368SChris Wilson 694106a9368SChris Wilson return 0; 695106a9368SChris Wilson } 696106a9368SChris Wilson 697cb823ed9SChris Wilson static int __igt_reset_engine(struct intel_gt *gt, bool active) 698112ed2d3SChris Wilson { 699cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 700112ed2d3SChris Wilson struct intel_engine_cs *engine; 701112ed2d3SChris Wilson enum intel_engine_id id; 702112ed2d3SChris Wilson struct hang h; 703112ed2d3SChris Wilson int err = 0; 704112ed2d3SChris Wilson 705112ed2d3SChris Wilson /* Check that we can issue an engine reset on an idle engine (no-op) */ 706112ed2d3SChris Wilson 707260e6b71SChris Wilson if (!intel_has_reset_engine(gt)) 708112ed2d3SChris Wilson return 0; 709112ed2d3SChris Wilson 710112ed2d3SChris Wilson if (active) { 711cb823ed9SChris Wilson err = hang_init(&h, gt); 712112ed2d3SChris Wilson if (err) 713112ed2d3SChris Wilson return err; 714112ed2d3SChris Wilson } 715112ed2d3SChris Wilson 7165d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 717112ed2d3SChris Wilson unsigned int reset_count, reset_engine_count; 71880655d2aSChris Wilson unsigned long count; 719617e87c0SJohn Harrison bool using_guc = intel_engine_uses_guc(engine); 720112ed2d3SChris Wilson IGT_TIMEOUT(end_time); 721112ed2d3SChris Wilson 722617e87c0SJohn Harrison if (using_guc && !active) 723617e87c0SJohn Harrison continue; 724617e87c0SJohn Harrison 725112ed2d3SChris Wilson if (active && !intel_engine_can_store_dword(engine)) 726112ed2d3SChris Wilson continue; 727112ed2d3SChris Wilson 728112ed2d3SChris Wilson if (!wait_for_idle(engine)) { 729112ed2d3SChris Wilson pr_err("%s failed to idle before reset\n", 730112ed2d3SChris Wilson engine->name); 731112ed2d3SChris Wilson err = -EIO; 732112ed2d3SChris Wilson break; 733112ed2d3SChris Wilson } 734112ed2d3SChris Wilson 735cb823ed9SChris Wilson reset_count = i915_reset_count(global); 736cb823ed9SChris Wilson reset_engine_count = i915_reset_engine_count(global, engine); 737112ed2d3SChris Wilson 7381b90e4a4SChris Wilson st_engine_heartbeat_disable(engine); 7399030e39cSThomas Hellström GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, 7409030e39cSThomas Hellström >->reset.flags)); 74180655d2aSChris Wilson count = 0; 742112ed2d3SChris Wilson do { 743617e87c0SJohn Harrison struct i915_request *rq = NULL; 744617e87c0SJohn Harrison struct intel_selftest_saved_policy saved; 745617e87c0SJohn Harrison int err2; 746112ed2d3SChris Wilson 747617e87c0SJohn Harrison err = intel_selftest_modify_policy(engine, &saved, 748617e87c0SJohn Harrison SELFTEST_SCHEDULER_MODIFY_FAST_RESET); 749617e87c0SJohn Harrison if (err) { 750617e87c0SJohn Harrison pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); 751617e87c0SJohn Harrison break; 752617e87c0SJohn Harrison } 753617e87c0SJohn Harrison 754617e87c0SJohn Harrison if (active) { 755112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 756112ed2d3SChris Wilson if (IS_ERR(rq)) { 757112ed2d3SChris Wilson err = PTR_ERR(rq); 7583f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", 7593f5dff6cSJohn Harrison engine->name, err); 760617e87c0SJohn Harrison goto restore; 761112ed2d3SChris Wilson } 762112ed2d3SChris Wilson 763112ed2d3SChris Wilson i915_request_get(rq); 764112ed2d3SChris Wilson i915_request_add(rq); 765112ed2d3SChris Wilson 766112ed2d3SChris Wilson if (!wait_until_running(&h, rq)) { 767cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 768112ed2d3SChris Wilson 769112ed2d3SChris Wilson pr_err("%s: Failed to start request %llx, at %x\n", 770112ed2d3SChris Wilson __func__, rq->fence.seqno, hws_seqno(&h, rq)); 771112ed2d3SChris Wilson intel_engine_dump(engine, &p, 772112ed2d3SChris Wilson "%s\n", engine->name); 773112ed2d3SChris Wilson 774112ed2d3SChris Wilson i915_request_put(rq); 775112ed2d3SChris Wilson err = -EIO; 776617e87c0SJohn Harrison goto restore; 777617e87c0SJohn Harrison } 778112ed2d3SChris Wilson } 779112ed2d3SChris Wilson 780617e87c0SJohn Harrison if (!using_guc) { 781cb823ed9SChris Wilson err = intel_engine_reset(engine, NULL); 782112ed2d3SChris Wilson if (err) { 783cd7a214fSChris Wilson pr_err("intel_engine_reset(%s) failed, err:%d\n", 784cd7a214fSChris Wilson engine->name, err); 785617e87c0SJohn Harrison goto skip; 786112ed2d3SChris Wilson } 787617e87c0SJohn Harrison } 788617e87c0SJohn Harrison 789617e87c0SJohn Harrison if (rq) { 790617e87c0SJohn Harrison /* Ensure the reset happens and kills the engine */ 791617e87c0SJohn Harrison err = intel_selftest_wait_for_rq(rq); 792617e87c0SJohn Harrison if (err) 793617e87c0SJohn Harrison pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", 794617e87c0SJohn Harrison engine->name, rq->fence.context, 7953cb3e343SMatthew Brost rq->fence.seqno, rq->context->guc_id.id, err); 796617e87c0SJohn Harrison } 797617e87c0SJohn Harrison 798617e87c0SJohn Harrison skip: 799617e87c0SJohn Harrison if (rq) 800617e87c0SJohn Harrison i915_request_put(rq); 801112ed2d3SChris Wilson 802cb823ed9SChris Wilson if (i915_reset_count(global) != reset_count) { 803112ed2d3SChris Wilson pr_err("Full GPU reset recorded! (engine reset expected)\n"); 804112ed2d3SChris Wilson err = -EINVAL; 805617e87c0SJohn Harrison goto restore; 806112ed2d3SChris Wilson } 807112ed2d3SChris Wilson 808617e87c0SJohn Harrison /* GuC based resets are not logged per engine */ 809617e87c0SJohn Harrison if (!using_guc) { 810cb823ed9SChris Wilson if (i915_reset_engine_count(global, engine) != 811112ed2d3SChris Wilson ++reset_engine_count) { 812112ed2d3SChris Wilson pr_err("%s engine reset not recorded!\n", 813112ed2d3SChris Wilson engine->name); 814112ed2d3SChris Wilson err = -EINVAL; 815617e87c0SJohn Harrison goto restore; 816617e87c0SJohn Harrison } 817112ed2d3SChris Wilson } 81880655d2aSChris Wilson 81980655d2aSChris Wilson count++; 820617e87c0SJohn Harrison 821617e87c0SJohn Harrison restore: 822617e87c0SJohn Harrison err2 = intel_selftest_restore_policy(engine, &saved); 823617e87c0SJohn Harrison if (err2) 824617e87c0SJohn Harrison pr_err("[%s] Restore policy failed: %d!\n", engine->name, err); 825617e87c0SJohn Harrison if (err == 0) 826617e87c0SJohn Harrison err = err2; 827617e87c0SJohn Harrison if (err) 828617e87c0SJohn Harrison break; 829112ed2d3SChris Wilson } while (time_before(jiffies, end_time)); 8309030e39cSThomas Hellström clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); 8311b90e4a4SChris Wilson st_engine_heartbeat_enable(engine); 83280655d2aSChris Wilson pr_info("%s: Completed %lu %s resets\n", 83380655d2aSChris Wilson engine->name, count, active ? "active" : "idle"); 834112ed2d3SChris Wilson 835112ed2d3SChris Wilson if (err) 836112ed2d3SChris Wilson break; 837112ed2d3SChris Wilson 8387e805762SChris Wilson err = igt_flush_test(gt->i915); 8393f5dff6cSJohn Harrison if (err) { 8403f5dff6cSJohn Harrison pr_err("[%s] Flush failed: %d!\n", engine->name, err); 841112ed2d3SChris Wilson break; 842112ed2d3SChris Wilson } 8433f5dff6cSJohn Harrison } 844112ed2d3SChris Wilson 8453f5dff6cSJohn Harrison if (intel_gt_is_wedged(gt)) { 8463f5dff6cSJohn Harrison pr_err("GT is wedged!\n"); 847112ed2d3SChris Wilson err = -EIO; 8483f5dff6cSJohn Harrison } 849112ed2d3SChris Wilson 8507e805762SChris Wilson if (active) 851112ed2d3SChris Wilson hang_fini(&h); 852112ed2d3SChris Wilson 853112ed2d3SChris Wilson return err; 854112ed2d3SChris Wilson } 855112ed2d3SChris Wilson 856112ed2d3SChris Wilson static int igt_reset_idle_engine(void *arg) 857112ed2d3SChris Wilson { 858112ed2d3SChris Wilson return __igt_reset_engine(arg, false); 859112ed2d3SChris Wilson } 860112ed2d3SChris Wilson 861112ed2d3SChris Wilson static int igt_reset_active_engine(void *arg) 862112ed2d3SChris Wilson { 863112ed2d3SChris Wilson return __igt_reset_engine(arg, true); 864112ed2d3SChris Wilson } 865112ed2d3SChris Wilson 866112ed2d3SChris Wilson struct active_engine { 867112ed2d3SChris Wilson struct task_struct *task; 868112ed2d3SChris Wilson struct intel_engine_cs *engine; 869112ed2d3SChris Wilson unsigned long resets; 870112ed2d3SChris Wilson unsigned int flags; 871112ed2d3SChris Wilson }; 872112ed2d3SChris Wilson 873112ed2d3SChris Wilson #define TEST_ACTIVE BIT(0) 874112ed2d3SChris Wilson #define TEST_OTHERS BIT(1) 875112ed2d3SChris Wilson #define TEST_SELF BIT(2) 876112ed2d3SChris Wilson #define TEST_PRIORITY BIT(3) 877112ed2d3SChris Wilson 878112ed2d3SChris Wilson static int active_request_put(struct i915_request *rq) 879112ed2d3SChris Wilson { 880112ed2d3SChris Wilson int err = 0; 881112ed2d3SChris Wilson 882112ed2d3SChris Wilson if (!rq) 883112ed2d3SChris Wilson return 0; 884112ed2d3SChris Wilson 8853a7b7266SJohn Harrison if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 886112ed2d3SChris Wilson GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", 887112ed2d3SChris Wilson rq->engine->name, 888112ed2d3SChris Wilson rq->fence.context, 889112ed2d3SChris Wilson rq->fence.seqno); 890112ed2d3SChris Wilson GEM_TRACE_DUMP(); 891112ed2d3SChris Wilson 892cb823ed9SChris Wilson intel_gt_set_wedged(rq->engine->gt); 893112ed2d3SChris Wilson err = -EIO; 894112ed2d3SChris Wilson } 895112ed2d3SChris Wilson 896112ed2d3SChris Wilson i915_request_put(rq); 897112ed2d3SChris Wilson 898112ed2d3SChris Wilson return err; 899112ed2d3SChris Wilson } 900112ed2d3SChris Wilson 901112ed2d3SChris Wilson static int active_engine(void *data) 902112ed2d3SChris Wilson { 903112ed2d3SChris Wilson I915_RND_STATE(prng); 904112ed2d3SChris Wilson struct active_engine *arg = data; 905112ed2d3SChris Wilson struct intel_engine_cs *engine = arg->engine; 906112ed2d3SChris Wilson struct i915_request *rq[8] = {}; 907e6ba7648SChris Wilson struct intel_context *ce[ARRAY_SIZE(rq)]; 908e6ba7648SChris Wilson unsigned long count; 909112ed2d3SChris Wilson int err = 0; 910112ed2d3SChris Wilson 911e6ba7648SChris Wilson for (count = 0; count < ARRAY_SIZE(ce); count++) { 912e6ba7648SChris Wilson ce[count] = intel_context_create(engine); 913e6ba7648SChris Wilson if (IS_ERR(ce[count])) { 914e6ba7648SChris Wilson err = PTR_ERR(ce[count]); 9153f5dff6cSJohn Harrison pr_err("[%s] Create context #%ld failed: %d!\n", engine->name, count, err); 916112ed2d3SChris Wilson while (--count) 917e6ba7648SChris Wilson intel_context_put(ce[count]); 918e6ba7648SChris Wilson return err; 919112ed2d3SChris Wilson } 920112ed2d3SChris Wilson } 921112ed2d3SChris Wilson 922e6ba7648SChris Wilson count = 0; 923112ed2d3SChris Wilson while (!kthread_should_stop()) { 924112ed2d3SChris Wilson unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); 925112ed2d3SChris Wilson struct i915_request *old = rq[idx]; 926112ed2d3SChris Wilson struct i915_request *new; 927112ed2d3SChris Wilson 928e6ba7648SChris Wilson new = intel_context_create_request(ce[idx]); 929112ed2d3SChris Wilson if (IS_ERR(new)) { 930112ed2d3SChris Wilson err = PTR_ERR(new); 9313f5dff6cSJohn Harrison pr_err("[%s] Create request #%d failed: %d!\n", engine->name, idx, err); 932112ed2d3SChris Wilson break; 933112ed2d3SChris Wilson } 934112ed2d3SChris Wilson 935112ed2d3SChris Wilson rq[idx] = i915_request_get(new); 936112ed2d3SChris Wilson i915_request_add(new); 937112ed2d3SChris Wilson 9383f623e06SMatthew Brost if (engine->sched_engine->schedule && arg->flags & TEST_PRIORITY) { 939e6ba7648SChris Wilson struct i915_sched_attr attr = { 940e6ba7648SChris Wilson .priority = 941e6ba7648SChris Wilson i915_prandom_u32_max_state(512, &prng), 942e6ba7648SChris Wilson }; 9433f623e06SMatthew Brost engine->sched_engine->schedule(rq[idx], &attr); 944e6ba7648SChris Wilson } 945e6ba7648SChris Wilson 946112ed2d3SChris Wilson err = active_request_put(old); 9473f5dff6cSJohn Harrison if (err) { 9483f5dff6cSJohn Harrison pr_err("[%s] Request put failed: %d!\n", engine->name, err); 949112ed2d3SChris Wilson break; 9503f5dff6cSJohn Harrison } 951112ed2d3SChris Wilson 952112ed2d3SChris Wilson cond_resched(); 953112ed2d3SChris Wilson } 954112ed2d3SChris Wilson 955112ed2d3SChris Wilson for (count = 0; count < ARRAY_SIZE(rq); count++) { 956112ed2d3SChris Wilson int err__ = active_request_put(rq[count]); 957112ed2d3SChris Wilson 9583f5dff6cSJohn Harrison if (err) 9593f5dff6cSJohn Harrison pr_err("[%s] Request put #%ld failed: %d!\n", engine->name, count, err); 9603f5dff6cSJohn Harrison 961112ed2d3SChris Wilson /* Keep the first error */ 962112ed2d3SChris Wilson if (!err) 963112ed2d3SChris Wilson err = err__; 964e6ba7648SChris Wilson 965e6ba7648SChris Wilson intel_context_put(ce[count]); 966112ed2d3SChris Wilson } 967112ed2d3SChris Wilson 968112ed2d3SChris Wilson return err; 969112ed2d3SChris Wilson } 970112ed2d3SChris Wilson 971cb823ed9SChris Wilson static int __igt_reset_engines(struct intel_gt *gt, 972112ed2d3SChris Wilson const char *test_name, 973112ed2d3SChris Wilson unsigned int flags) 974112ed2d3SChris Wilson { 975cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 976112ed2d3SChris Wilson struct intel_engine_cs *engine, *other; 977112ed2d3SChris Wilson enum intel_engine_id id, tmp; 978112ed2d3SChris Wilson struct hang h; 979112ed2d3SChris Wilson int err = 0; 980112ed2d3SChris Wilson 981112ed2d3SChris Wilson /* Check that issuing a reset on one engine does not interfere 982112ed2d3SChris Wilson * with any other engine. 983112ed2d3SChris Wilson */ 984112ed2d3SChris Wilson 985260e6b71SChris Wilson if (!intel_has_reset_engine(gt)) 986112ed2d3SChris Wilson return 0; 987112ed2d3SChris Wilson 988112ed2d3SChris Wilson if (flags & TEST_ACTIVE) { 989cb823ed9SChris Wilson err = hang_init(&h, gt); 990112ed2d3SChris Wilson if (err) 991112ed2d3SChris Wilson return err; 992112ed2d3SChris Wilson 993112ed2d3SChris Wilson if (flags & TEST_PRIORITY) 994112ed2d3SChris Wilson h.ctx->sched.priority = 1024; 995112ed2d3SChris Wilson } 996112ed2d3SChris Wilson 9975d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 998112ed2d3SChris Wilson struct active_engine threads[I915_NUM_ENGINES] = {}; 999cb823ed9SChris Wilson unsigned long device = i915_reset_count(global); 1000112ed2d3SChris Wilson unsigned long count = 0, reported; 1001617e87c0SJohn Harrison bool using_guc = intel_engine_uses_guc(engine); 1002112ed2d3SChris Wilson IGT_TIMEOUT(end_time); 1003112ed2d3SChris Wilson 1004617e87c0SJohn Harrison if (flags & TEST_ACTIVE) { 1005617e87c0SJohn Harrison if (!intel_engine_can_store_dword(engine)) 1006617e87c0SJohn Harrison continue; 1007617e87c0SJohn Harrison } else if (using_guc) 1008112ed2d3SChris Wilson continue; 1009112ed2d3SChris Wilson 1010112ed2d3SChris Wilson if (!wait_for_idle(engine)) { 1011112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n", 1012112ed2d3SChris Wilson engine->name, test_name); 1013112ed2d3SChris Wilson err = -EIO; 1014112ed2d3SChris Wilson break; 1015112ed2d3SChris Wilson } 1016112ed2d3SChris Wilson 1017112ed2d3SChris Wilson memset(threads, 0, sizeof(threads)); 10185d904e3cSTvrtko Ursulin for_each_engine(other, gt, tmp) { 1019112ed2d3SChris Wilson struct task_struct *tsk; 1020112ed2d3SChris Wilson 1021112ed2d3SChris Wilson threads[tmp].resets = 1022cb823ed9SChris Wilson i915_reset_engine_count(global, other); 1023112ed2d3SChris Wilson 1024174b976dSChris Wilson if (other == engine && !(flags & TEST_SELF)) 1025112ed2d3SChris Wilson continue; 1026112ed2d3SChris Wilson 1027174b976dSChris Wilson if (other != engine && !(flags & TEST_OTHERS)) 1028112ed2d3SChris Wilson continue; 1029112ed2d3SChris Wilson 1030112ed2d3SChris Wilson threads[tmp].engine = other; 1031112ed2d3SChris Wilson threads[tmp].flags = flags; 1032112ed2d3SChris Wilson 1033112ed2d3SChris Wilson tsk = kthread_run(active_engine, &threads[tmp], 1034112ed2d3SChris Wilson "igt/%s", other->name); 1035112ed2d3SChris Wilson if (IS_ERR(tsk)) { 1036112ed2d3SChris Wilson err = PTR_ERR(tsk); 10373f5dff6cSJohn Harrison pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); 1038112ed2d3SChris Wilson goto unwind; 1039112ed2d3SChris Wilson } 1040112ed2d3SChris Wilson 1041112ed2d3SChris Wilson threads[tmp].task = tsk; 1042112ed2d3SChris Wilson get_task_struct(tsk); 1043112ed2d3SChris Wilson } 1044112ed2d3SChris Wilson 1045e5661c6aSChris Wilson yield(); /* start all threads before we begin */ 1046e5661c6aSChris Wilson 1047617e87c0SJohn Harrison st_engine_heartbeat_disable_no_pm(engine); 10489030e39cSThomas Hellström GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, 10499030e39cSThomas Hellström >->reset.flags)); 1050112ed2d3SChris Wilson do { 1051112ed2d3SChris Wilson struct i915_request *rq = NULL; 1052617e87c0SJohn Harrison struct intel_selftest_saved_policy saved; 1053617e87c0SJohn Harrison int err2; 1054617e87c0SJohn Harrison 1055617e87c0SJohn Harrison err = intel_selftest_modify_policy(engine, &saved, 1056617e87c0SJohn Harrison SELFTEST_SCHEDULER_MODIFY_FAST_RESET); 1057617e87c0SJohn Harrison if (err) { 1058617e87c0SJohn Harrison pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); 1059617e87c0SJohn Harrison break; 1060617e87c0SJohn Harrison } 1061112ed2d3SChris Wilson 1062112ed2d3SChris Wilson if (flags & TEST_ACTIVE) { 1063112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 1064112ed2d3SChris Wilson if (IS_ERR(rq)) { 1065112ed2d3SChris Wilson err = PTR_ERR(rq); 10663f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", 10673f5dff6cSJohn Harrison engine->name, err); 1068617e87c0SJohn Harrison goto restore; 1069112ed2d3SChris Wilson } 1070112ed2d3SChris Wilson 1071112ed2d3SChris Wilson i915_request_get(rq); 1072112ed2d3SChris Wilson i915_request_add(rq); 1073112ed2d3SChris Wilson 1074112ed2d3SChris Wilson if (!wait_until_running(&h, rq)) { 1075cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1076112ed2d3SChris Wilson 1077112ed2d3SChris Wilson pr_err("%s: Failed to start request %llx, at %x\n", 1078112ed2d3SChris Wilson __func__, rq->fence.seqno, hws_seqno(&h, rq)); 1079112ed2d3SChris Wilson intel_engine_dump(engine, &p, 1080112ed2d3SChris Wilson "%s\n", engine->name); 1081112ed2d3SChris Wilson 1082112ed2d3SChris Wilson i915_request_put(rq); 1083112ed2d3SChris Wilson err = -EIO; 1084617e87c0SJohn Harrison goto restore; 1085112ed2d3SChris Wilson } 1086617e87c0SJohn Harrison } else { 1087617e87c0SJohn Harrison intel_engine_pm_get(engine); 1088112ed2d3SChris Wilson } 1089112ed2d3SChris Wilson 1090617e87c0SJohn Harrison if (!using_guc) { 1091cb823ed9SChris Wilson err = intel_engine_reset(engine, NULL); 1092112ed2d3SChris Wilson if (err) { 1093112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", 1094112ed2d3SChris Wilson engine->name, test_name, err); 1095617e87c0SJohn Harrison goto restore; 1096617e87c0SJohn Harrison } 1097617e87c0SJohn Harrison } 1098617e87c0SJohn Harrison 1099617e87c0SJohn Harrison if (rq) { 1100617e87c0SJohn Harrison /* Ensure the reset happens and kills the engine */ 1101617e87c0SJohn Harrison err = intel_selftest_wait_for_rq(rq); 1102617e87c0SJohn Harrison if (err) 1103617e87c0SJohn Harrison pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", 1104617e87c0SJohn Harrison engine->name, rq->fence.context, 11053cb3e343SMatthew Brost rq->fence.seqno, rq->context->guc_id.id, err); 1106112ed2d3SChris Wilson } 1107112ed2d3SChris Wilson 1108112ed2d3SChris Wilson count++; 1109112ed2d3SChris Wilson 1110112ed2d3SChris Wilson if (rq) { 111194ed4753SChris Wilson if (rq->fence.error != -EIO) { 11123f5dff6cSJohn Harrison pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n", 111394ed4753SChris Wilson engine->name, test_name, 111494ed4753SChris Wilson rq->fence.context, 11153cb3e343SMatthew Brost rq->fence.seqno, rq->context->guc_id.id); 111694ed4753SChris Wilson i915_request_put(rq); 111794ed4753SChris Wilson 111894ed4753SChris Wilson GEM_TRACE_DUMP(); 111994ed4753SChris Wilson intel_gt_set_wedged(gt); 112094ed4753SChris Wilson err = -EIO; 1121617e87c0SJohn Harrison goto restore; 112294ed4753SChris Wilson } 112394ed4753SChris Wilson 1124112ed2d3SChris Wilson if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1125112ed2d3SChris Wilson struct drm_printer p = 1126cb823ed9SChris Wilson drm_info_printer(gt->i915->drm.dev); 1127112ed2d3SChris Wilson 1128112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s):" 112994ed4753SChris Wilson " failed to complete request %llx:%lld after reset\n", 113094ed4753SChris Wilson engine->name, test_name, 113194ed4753SChris Wilson rq->fence.context, 113294ed4753SChris Wilson rq->fence.seqno); 1133112ed2d3SChris Wilson intel_engine_dump(engine, &p, 1134112ed2d3SChris Wilson "%s\n", engine->name); 1135112ed2d3SChris Wilson i915_request_put(rq); 1136112ed2d3SChris Wilson 1137112ed2d3SChris Wilson GEM_TRACE_DUMP(); 1138cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1139112ed2d3SChris Wilson err = -EIO; 1140617e87c0SJohn Harrison goto restore; 1141112ed2d3SChris Wilson } 1142112ed2d3SChris Wilson 1143112ed2d3SChris Wilson i915_request_put(rq); 1144112ed2d3SChris Wilson } 1145112ed2d3SChris Wilson 1146617e87c0SJohn Harrison if (!(flags & TEST_ACTIVE)) 1147617e87c0SJohn Harrison intel_engine_pm_put(engine); 1148617e87c0SJohn Harrison 1149112ed2d3SChris Wilson if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { 1150112ed2d3SChris Wilson struct drm_printer p = 1151cb823ed9SChris Wilson drm_info_printer(gt->i915->drm.dev); 1152112ed2d3SChris Wilson 1153112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s):" 1154112ed2d3SChris Wilson " failed to idle after reset\n", 1155112ed2d3SChris Wilson engine->name, test_name); 1156112ed2d3SChris Wilson intel_engine_dump(engine, &p, 1157112ed2d3SChris Wilson "%s\n", engine->name); 1158112ed2d3SChris Wilson 1159112ed2d3SChris Wilson err = -EIO; 1160617e87c0SJohn Harrison goto restore; 1161112ed2d3SChris Wilson } 1162617e87c0SJohn Harrison 1163617e87c0SJohn Harrison restore: 1164617e87c0SJohn Harrison err2 = intel_selftest_restore_policy(engine, &saved); 1165617e87c0SJohn Harrison if (err2) 1166617e87c0SJohn Harrison pr_err("[%s] Restore policy failed: %d!\n", engine->name, err2); 1167617e87c0SJohn Harrison if (err == 0) 1168617e87c0SJohn Harrison err = err2; 1169617e87c0SJohn Harrison if (err) 1170617e87c0SJohn Harrison break; 1171112ed2d3SChris Wilson } while (time_before(jiffies, end_time)); 11729030e39cSThomas Hellström clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); 1173617e87c0SJohn Harrison st_engine_heartbeat_enable_no_pm(engine); 1174e6ba7648SChris Wilson 1175112ed2d3SChris Wilson pr_info("i915_reset_engine(%s:%s): %lu resets\n", 1176112ed2d3SChris Wilson engine->name, test_name, count); 1177112ed2d3SChris Wilson 1178617e87c0SJohn Harrison /* GuC based resets are not logged per engine */ 1179617e87c0SJohn Harrison if (!using_guc) { 1180cb823ed9SChris Wilson reported = i915_reset_engine_count(global, engine); 1181112ed2d3SChris Wilson reported -= threads[engine->id].resets; 1182112ed2d3SChris Wilson if (reported != count) { 1183112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", 1184112ed2d3SChris Wilson engine->name, test_name, count, reported); 1185112ed2d3SChris Wilson if (!err) 1186112ed2d3SChris Wilson err = -EINVAL; 1187112ed2d3SChris Wilson } 1188617e87c0SJohn Harrison } 1189112ed2d3SChris Wilson 1190112ed2d3SChris Wilson unwind: 11915d904e3cSTvrtko Ursulin for_each_engine(other, gt, tmp) { 1192112ed2d3SChris Wilson int ret; 1193112ed2d3SChris Wilson 1194112ed2d3SChris Wilson if (!threads[tmp].task) 1195112ed2d3SChris Wilson continue; 1196112ed2d3SChris Wilson 1197112ed2d3SChris Wilson ret = kthread_stop(threads[tmp].task); 1198112ed2d3SChris Wilson if (ret) { 1199112ed2d3SChris Wilson pr_err("kthread for other engine %s failed, err=%d\n", 1200112ed2d3SChris Wilson other->name, ret); 1201112ed2d3SChris Wilson if (!err) 1202112ed2d3SChris Wilson err = ret; 1203112ed2d3SChris Wilson } 1204112ed2d3SChris Wilson put_task_struct(threads[tmp].task); 1205112ed2d3SChris Wilson 1206617e87c0SJohn Harrison /* GuC based resets are not logged per engine */ 1207617e87c0SJohn Harrison if (!using_guc) { 1208cb823ed9SChris Wilson if (other->uabi_class != engine->uabi_class && 1209112ed2d3SChris Wilson threads[tmp].resets != 1210cb823ed9SChris Wilson i915_reset_engine_count(global, other)) { 1211112ed2d3SChris Wilson pr_err("Innocent engine %s was reset (count=%ld)\n", 1212112ed2d3SChris Wilson other->name, 1213cb823ed9SChris Wilson i915_reset_engine_count(global, other) - 1214112ed2d3SChris Wilson threads[tmp].resets); 1215112ed2d3SChris Wilson if (!err) 1216112ed2d3SChris Wilson err = -EINVAL; 1217112ed2d3SChris Wilson } 1218112ed2d3SChris Wilson } 1219617e87c0SJohn Harrison } 1220112ed2d3SChris Wilson 1221cb823ed9SChris Wilson if (device != i915_reset_count(global)) { 1222112ed2d3SChris Wilson pr_err("Global reset (count=%ld)!\n", 1223cb823ed9SChris Wilson i915_reset_count(global) - device); 1224112ed2d3SChris Wilson if (!err) 1225112ed2d3SChris Wilson err = -EINVAL; 1226112ed2d3SChris Wilson } 1227112ed2d3SChris Wilson 1228112ed2d3SChris Wilson if (err) 1229112ed2d3SChris Wilson break; 1230112ed2d3SChris Wilson 12317e805762SChris Wilson err = igt_flush_test(gt->i915); 12323f5dff6cSJohn Harrison if (err) { 12333f5dff6cSJohn Harrison pr_err("[%s] Flush failed: %d!\n", engine->name, err); 1234112ed2d3SChris Wilson break; 1235112ed2d3SChris Wilson } 12363f5dff6cSJohn Harrison } 1237112ed2d3SChris Wilson 1238cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 1239112ed2d3SChris Wilson err = -EIO; 1240112ed2d3SChris Wilson 12417e805762SChris Wilson if (flags & TEST_ACTIVE) 1242112ed2d3SChris Wilson hang_fini(&h); 1243112ed2d3SChris Wilson 1244112ed2d3SChris Wilson return err; 1245112ed2d3SChris Wilson } 1246112ed2d3SChris Wilson 1247112ed2d3SChris Wilson static int igt_reset_engines(void *arg) 1248112ed2d3SChris Wilson { 1249112ed2d3SChris Wilson static const struct { 1250112ed2d3SChris Wilson const char *name; 1251112ed2d3SChris Wilson unsigned int flags; 1252112ed2d3SChris Wilson } phases[] = { 1253112ed2d3SChris Wilson { "idle", 0 }, 1254112ed2d3SChris Wilson { "active", TEST_ACTIVE }, 1255112ed2d3SChris Wilson { "others-idle", TEST_OTHERS }, 1256112ed2d3SChris Wilson { "others-active", TEST_OTHERS | TEST_ACTIVE }, 1257112ed2d3SChris Wilson { 1258112ed2d3SChris Wilson "others-priority", 1259112ed2d3SChris Wilson TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY 1260112ed2d3SChris Wilson }, 1261112ed2d3SChris Wilson { 1262112ed2d3SChris Wilson "self-priority", 1263174b976dSChris Wilson TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, 1264112ed2d3SChris Wilson }, 1265112ed2d3SChris Wilson { } 1266112ed2d3SChris Wilson }; 1267cb823ed9SChris Wilson struct intel_gt *gt = arg; 1268112ed2d3SChris Wilson typeof(*phases) *p; 1269112ed2d3SChris Wilson int err; 1270112ed2d3SChris Wilson 1271112ed2d3SChris Wilson for (p = phases; p->name; p++) { 1272112ed2d3SChris Wilson if (p->flags & TEST_PRIORITY) { 1273cb823ed9SChris Wilson if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) 1274112ed2d3SChris Wilson continue; 1275112ed2d3SChris Wilson } 1276112ed2d3SChris Wilson 1277112ed2d3SChris Wilson err = __igt_reset_engines(arg, p->name, p->flags); 1278112ed2d3SChris Wilson if (err) 1279112ed2d3SChris Wilson return err; 1280112ed2d3SChris Wilson } 1281112ed2d3SChris Wilson 1282112ed2d3SChris Wilson return 0; 1283112ed2d3SChris Wilson } 1284112ed2d3SChris Wilson 1285cb823ed9SChris Wilson static u32 fake_hangcheck(struct intel_gt *gt, intel_engine_mask_t mask) 1286112ed2d3SChris Wilson { 1287cb823ed9SChris Wilson u32 count = i915_reset_count(>->i915->gpu_error); 1288112ed2d3SChris Wilson 1289cb823ed9SChris Wilson intel_gt_reset(gt, mask, NULL); 1290112ed2d3SChris Wilson 1291112ed2d3SChris Wilson return count; 1292112ed2d3SChris Wilson } 1293112ed2d3SChris Wilson 1294112ed2d3SChris Wilson static int igt_reset_wait(void *arg) 1295112ed2d3SChris Wilson { 1296cb823ed9SChris Wilson struct intel_gt *gt = arg; 1297cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 12981f9f6353SChris Wilson struct intel_engine_cs *engine = gt->engine[RCS0]; 1299112ed2d3SChris Wilson struct i915_request *rq; 1300112ed2d3SChris Wilson unsigned int reset_count; 1301112ed2d3SChris Wilson struct hang h; 1302112ed2d3SChris Wilson long timeout; 1303112ed2d3SChris Wilson int err; 1304112ed2d3SChris Wilson 1305cb823ed9SChris Wilson if (!engine || !intel_engine_can_store_dword(engine)) 1306112ed2d3SChris Wilson return 0; 1307112ed2d3SChris Wilson 1308112ed2d3SChris Wilson /* Check that we detect a stuck waiter and issue a reset */ 1309112ed2d3SChris Wilson 1310cb823ed9SChris Wilson igt_global_reset_lock(gt); 1311112ed2d3SChris Wilson 1312cb823ed9SChris Wilson err = hang_init(&h, gt); 13133f5dff6cSJohn Harrison if (err) { 13143f5dff6cSJohn Harrison pr_err("[%s] Hang init failed: %d!\n", engine->name, err); 1315112ed2d3SChris Wilson goto unlock; 13163f5dff6cSJohn Harrison } 1317112ed2d3SChris Wilson 1318cb823ed9SChris Wilson rq = hang_create_request(&h, engine); 1319112ed2d3SChris Wilson if (IS_ERR(rq)) { 1320112ed2d3SChris Wilson err = PTR_ERR(rq); 13213f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); 1322112ed2d3SChris Wilson goto fini; 1323112ed2d3SChris Wilson } 1324112ed2d3SChris Wilson 1325112ed2d3SChris Wilson i915_request_get(rq); 1326112ed2d3SChris Wilson i915_request_add(rq); 1327112ed2d3SChris Wilson 1328112ed2d3SChris Wilson if (!wait_until_running(&h, rq)) { 1329cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1330112ed2d3SChris Wilson 1331112ed2d3SChris Wilson pr_err("%s: Failed to start request %llx, at %x\n", 1332112ed2d3SChris Wilson __func__, rq->fence.seqno, hws_seqno(&h, rq)); 1333112ed2d3SChris Wilson intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); 1334112ed2d3SChris Wilson 1335cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1336112ed2d3SChris Wilson 1337112ed2d3SChris Wilson err = -EIO; 1338112ed2d3SChris Wilson goto out_rq; 1339112ed2d3SChris Wilson } 1340112ed2d3SChris Wilson 1341cb823ed9SChris Wilson reset_count = fake_hangcheck(gt, ALL_ENGINES); 1342112ed2d3SChris Wilson 13432f530945SChris Wilson timeout = i915_request_wait(rq, 0, 10); 1344112ed2d3SChris Wilson if (timeout < 0) { 1345112ed2d3SChris Wilson pr_err("i915_request_wait failed on a stuck request: err=%ld\n", 1346112ed2d3SChris Wilson timeout); 1347112ed2d3SChris Wilson err = timeout; 1348112ed2d3SChris Wilson goto out_rq; 1349112ed2d3SChris Wilson } 1350112ed2d3SChris Wilson 1351cb823ed9SChris Wilson if (i915_reset_count(global) == reset_count) { 1352112ed2d3SChris Wilson pr_err("No GPU reset recorded!\n"); 1353112ed2d3SChris Wilson err = -EINVAL; 1354112ed2d3SChris Wilson goto out_rq; 1355112ed2d3SChris Wilson } 1356112ed2d3SChris Wilson 1357112ed2d3SChris Wilson out_rq: 1358112ed2d3SChris Wilson i915_request_put(rq); 1359112ed2d3SChris Wilson fini: 1360112ed2d3SChris Wilson hang_fini(&h); 1361112ed2d3SChris Wilson unlock: 1362cb823ed9SChris Wilson igt_global_reset_unlock(gt); 1363112ed2d3SChris Wilson 1364cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 1365112ed2d3SChris Wilson return -EIO; 1366112ed2d3SChris Wilson 1367112ed2d3SChris Wilson return err; 1368112ed2d3SChris Wilson } 1369112ed2d3SChris Wilson 1370112ed2d3SChris Wilson struct evict_vma { 1371112ed2d3SChris Wilson struct completion completion; 1372112ed2d3SChris Wilson struct i915_vma *vma; 1373112ed2d3SChris Wilson }; 1374112ed2d3SChris Wilson 1375112ed2d3SChris Wilson static int evict_vma(void *data) 1376112ed2d3SChris Wilson { 1377112ed2d3SChris Wilson struct evict_vma *arg = data; 1378112ed2d3SChris Wilson struct i915_address_space *vm = arg->vma->vm; 1379112ed2d3SChris Wilson struct drm_mm_node evict = arg->vma->node; 1380112ed2d3SChris Wilson int err; 1381112ed2d3SChris Wilson 1382112ed2d3SChris Wilson complete(&arg->completion); 1383112ed2d3SChris Wilson 13842850748eSChris Wilson mutex_lock(&vm->mutex); 1385*7e00897bSMaarten Lankhorst err = i915_gem_evict_for_node(vm, NULL, &evict, 0); 13862850748eSChris Wilson mutex_unlock(&vm->mutex); 1387112ed2d3SChris Wilson 1388112ed2d3SChris Wilson return err; 1389112ed2d3SChris Wilson } 1390112ed2d3SChris Wilson 1391112ed2d3SChris Wilson static int evict_fence(void *data) 1392112ed2d3SChris Wilson { 1393112ed2d3SChris Wilson struct evict_vma *arg = data; 1394112ed2d3SChris Wilson int err; 1395112ed2d3SChris Wilson 1396112ed2d3SChris Wilson complete(&arg->completion); 1397112ed2d3SChris Wilson 1398112ed2d3SChris Wilson /* Mark the fence register as dirty to force the mmio update. */ 1399112ed2d3SChris Wilson err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512); 1400112ed2d3SChris Wilson if (err) { 1401112ed2d3SChris Wilson pr_err("Invalid Y-tiling settings; err:%d\n", err); 14022850748eSChris Wilson return err; 1403112ed2d3SChris Wilson } 1404112ed2d3SChris Wilson 1405e2ccc50aSChris Wilson err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE); 1406e2ccc50aSChris Wilson if (err) { 1407e2ccc50aSChris Wilson pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err); 14082850748eSChris Wilson return err; 1409e2ccc50aSChris Wilson } 1410e2ccc50aSChris Wilson 1411112ed2d3SChris Wilson err = i915_vma_pin_fence(arg->vma); 1412e2ccc50aSChris Wilson i915_vma_unpin(arg->vma); 1413112ed2d3SChris Wilson if (err) { 1414112ed2d3SChris Wilson pr_err("Unable to pin Y-tiled fence; err:%d\n", err); 14152850748eSChris Wilson return err; 1416112ed2d3SChris Wilson } 1417112ed2d3SChris Wilson 1418112ed2d3SChris Wilson i915_vma_unpin_fence(arg->vma); 1419112ed2d3SChris Wilson 14202850748eSChris Wilson return 0; 1421112ed2d3SChris Wilson } 1422112ed2d3SChris Wilson 1423cb823ed9SChris Wilson static int __igt_reset_evict_vma(struct intel_gt *gt, 1424112ed2d3SChris Wilson struct i915_address_space *vm, 1425112ed2d3SChris Wilson int (*fn)(void *), 1426112ed2d3SChris Wilson unsigned int flags) 1427112ed2d3SChris Wilson { 14281f9f6353SChris Wilson struct intel_engine_cs *engine = gt->engine[RCS0]; 1429112ed2d3SChris Wilson struct drm_i915_gem_object *obj; 1430112ed2d3SChris Wilson struct task_struct *tsk = NULL; 1431112ed2d3SChris Wilson struct i915_request *rq; 1432112ed2d3SChris Wilson struct evict_vma arg; 1433112ed2d3SChris Wilson struct hang h; 1434e60f7bb7SMatthew Auld unsigned int pin_flags; 1435112ed2d3SChris Wilson int err; 1436112ed2d3SChris Wilson 1437e60f7bb7SMatthew Auld if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) 1438e60f7bb7SMatthew Auld return 0; 1439e60f7bb7SMatthew Auld 1440cb823ed9SChris Wilson if (!engine || !intel_engine_can_store_dword(engine)) 1441112ed2d3SChris Wilson return 0; 1442112ed2d3SChris Wilson 1443112ed2d3SChris Wilson /* Check that we can recover an unbind stuck on a hanging request */ 1444112ed2d3SChris Wilson 1445cb823ed9SChris Wilson err = hang_init(&h, gt); 14463f5dff6cSJohn Harrison if (err) { 14473f5dff6cSJohn Harrison pr_err("[%s] Hang init failed: %d!\n", engine->name, err); 14487e805762SChris Wilson return err; 14493f5dff6cSJohn Harrison } 1450112ed2d3SChris Wilson 1451cb823ed9SChris Wilson obj = i915_gem_object_create_internal(gt->i915, SZ_1M); 1452112ed2d3SChris Wilson if (IS_ERR(obj)) { 1453112ed2d3SChris Wilson err = PTR_ERR(obj); 14543f5dff6cSJohn Harrison pr_err("[%s] Create object failed: %d!\n", engine->name, err); 1455112ed2d3SChris Wilson goto fini; 1456112ed2d3SChris Wilson } 1457112ed2d3SChris Wilson 1458112ed2d3SChris Wilson if (flags & EXEC_OBJECT_NEEDS_FENCE) { 1459112ed2d3SChris Wilson err = i915_gem_object_set_tiling(obj, I915_TILING_X, 512); 1460112ed2d3SChris Wilson if (err) { 1461112ed2d3SChris Wilson pr_err("Invalid X-tiling settings; err:%d\n", err); 1462112ed2d3SChris Wilson goto out_obj; 1463112ed2d3SChris Wilson } 1464112ed2d3SChris Wilson } 1465112ed2d3SChris Wilson 1466112ed2d3SChris Wilson arg.vma = i915_vma_instance(obj, vm, NULL); 1467112ed2d3SChris Wilson if (IS_ERR(arg.vma)) { 1468112ed2d3SChris Wilson err = PTR_ERR(arg.vma); 14693f5dff6cSJohn Harrison pr_err("[%s] VMA instance failed: %d!\n", engine->name, err); 1470112ed2d3SChris Wilson goto out_obj; 1471112ed2d3SChris Wilson } 1472112ed2d3SChris Wilson 1473cb823ed9SChris Wilson rq = hang_create_request(&h, engine); 1474112ed2d3SChris Wilson if (IS_ERR(rq)) { 1475112ed2d3SChris Wilson err = PTR_ERR(rq); 14763f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); 1477112ed2d3SChris Wilson goto out_obj; 1478112ed2d3SChris Wilson } 1479112ed2d3SChris Wilson 1480e60f7bb7SMatthew Auld pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER; 1481e60f7bb7SMatthew Auld 1482e60f7bb7SMatthew Auld if (flags & EXEC_OBJECT_NEEDS_FENCE) 1483e60f7bb7SMatthew Auld pin_flags |= PIN_MAPPABLE; 1484e60f7bb7SMatthew Auld 1485e60f7bb7SMatthew Auld err = i915_vma_pin(arg.vma, 0, 0, pin_flags); 1486112ed2d3SChris Wilson if (err) { 1487112ed2d3SChris Wilson i915_request_add(rq); 14883f5dff6cSJohn Harrison pr_err("[%s] VMA pin failed: %d!\n", engine->name, err); 1489112ed2d3SChris Wilson goto out_obj; 1490112ed2d3SChris Wilson } 1491112ed2d3SChris Wilson 1492112ed2d3SChris Wilson if (flags & EXEC_OBJECT_NEEDS_FENCE) { 1493112ed2d3SChris Wilson err = i915_vma_pin_fence(arg.vma); 1494112ed2d3SChris Wilson if (err) { 1495112ed2d3SChris Wilson pr_err("Unable to pin X-tiled fence; err:%d\n", err); 1496112ed2d3SChris Wilson i915_vma_unpin(arg.vma); 1497112ed2d3SChris Wilson i915_request_add(rq); 1498112ed2d3SChris Wilson goto out_obj; 1499112ed2d3SChris Wilson } 1500112ed2d3SChris Wilson } 1501112ed2d3SChris Wilson 15026951e589SChris Wilson i915_vma_lock(arg.vma); 150370d6894dSChris Wilson err = i915_request_await_object(rq, arg.vma->obj, 150470d6894dSChris Wilson flags & EXEC_OBJECT_WRITE); 15053f5dff6cSJohn Harrison if (err == 0) { 1506112ed2d3SChris Wilson err = i915_vma_move_to_active(arg.vma, rq, flags); 15073f5dff6cSJohn Harrison if (err) 15083f5dff6cSJohn Harrison pr_err("[%s] Move to active failed: %d!\n", engine->name, err); 15093f5dff6cSJohn Harrison } else { 15103f5dff6cSJohn Harrison pr_err("[%s] Request await failed: %d!\n", engine->name, err); 15113f5dff6cSJohn Harrison } 15123f5dff6cSJohn Harrison 15136951e589SChris Wilson i915_vma_unlock(arg.vma); 1514112ed2d3SChris Wilson 1515112ed2d3SChris Wilson if (flags & EXEC_OBJECT_NEEDS_FENCE) 1516112ed2d3SChris Wilson i915_vma_unpin_fence(arg.vma); 1517112ed2d3SChris Wilson i915_vma_unpin(arg.vma); 1518112ed2d3SChris Wilson 1519112ed2d3SChris Wilson i915_request_get(rq); 1520112ed2d3SChris Wilson i915_request_add(rq); 1521112ed2d3SChris Wilson if (err) 1522112ed2d3SChris Wilson goto out_rq; 1523112ed2d3SChris Wilson 1524112ed2d3SChris Wilson if (!wait_until_running(&h, rq)) { 1525cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1526112ed2d3SChris Wilson 1527112ed2d3SChris Wilson pr_err("%s: Failed to start request %llx, at %x\n", 1528112ed2d3SChris Wilson __func__, rq->fence.seqno, hws_seqno(&h, rq)); 1529112ed2d3SChris Wilson intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); 1530112ed2d3SChris Wilson 1531cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1532112ed2d3SChris Wilson goto out_reset; 1533112ed2d3SChris Wilson } 1534112ed2d3SChris Wilson 1535112ed2d3SChris Wilson init_completion(&arg.completion); 1536112ed2d3SChris Wilson 1537112ed2d3SChris Wilson tsk = kthread_run(fn, &arg, "igt/evict_vma"); 1538112ed2d3SChris Wilson if (IS_ERR(tsk)) { 1539112ed2d3SChris Wilson err = PTR_ERR(tsk); 15403f5dff6cSJohn Harrison pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); 1541112ed2d3SChris Wilson tsk = NULL; 1542112ed2d3SChris Wilson goto out_reset; 1543112ed2d3SChris Wilson } 1544112ed2d3SChris Wilson get_task_struct(tsk); 1545112ed2d3SChris Wilson 1546112ed2d3SChris Wilson wait_for_completion(&arg.completion); 1547112ed2d3SChris Wilson 1548112ed2d3SChris Wilson if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { 1549cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1550112ed2d3SChris Wilson 1551112ed2d3SChris Wilson pr_err("igt/evict_vma kthread did not wait\n"); 1552112ed2d3SChris Wilson intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); 1553112ed2d3SChris Wilson 1554cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1555112ed2d3SChris Wilson goto out_reset; 1556112ed2d3SChris Wilson } 1557112ed2d3SChris Wilson 1558112ed2d3SChris Wilson out_reset: 1559cb823ed9SChris Wilson igt_global_reset_lock(gt); 1560cb823ed9SChris Wilson fake_hangcheck(gt, rq->engine->mask); 1561cb823ed9SChris Wilson igt_global_reset_unlock(gt); 1562112ed2d3SChris Wilson 1563112ed2d3SChris Wilson if (tsk) { 1564cb823ed9SChris Wilson struct intel_wedge_me w; 1565112ed2d3SChris Wilson 1566112ed2d3SChris Wilson /* The reset, even indirectly, should take less than 10ms. */ 1567cb823ed9SChris Wilson intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) 1568112ed2d3SChris Wilson err = kthread_stop(tsk); 1569112ed2d3SChris Wilson 1570112ed2d3SChris Wilson put_task_struct(tsk); 1571112ed2d3SChris Wilson } 1572112ed2d3SChris Wilson 1573112ed2d3SChris Wilson out_rq: 1574112ed2d3SChris Wilson i915_request_put(rq); 1575112ed2d3SChris Wilson out_obj: 1576112ed2d3SChris Wilson i915_gem_object_put(obj); 1577112ed2d3SChris Wilson fini: 1578112ed2d3SChris Wilson hang_fini(&h); 1579cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 1580112ed2d3SChris Wilson return -EIO; 1581112ed2d3SChris Wilson 1582112ed2d3SChris Wilson return err; 1583112ed2d3SChris Wilson } 1584112ed2d3SChris Wilson 1585112ed2d3SChris Wilson static int igt_reset_evict_ggtt(void *arg) 1586112ed2d3SChris Wilson { 1587cb823ed9SChris Wilson struct intel_gt *gt = arg; 1588112ed2d3SChris Wilson 1589cb823ed9SChris Wilson return __igt_reset_evict_vma(gt, >->ggtt->vm, 1590112ed2d3SChris Wilson evict_vma, EXEC_OBJECT_WRITE); 1591112ed2d3SChris Wilson } 1592112ed2d3SChris Wilson 1593112ed2d3SChris Wilson static int igt_reset_evict_ppgtt(void *arg) 1594112ed2d3SChris Wilson { 1595cb823ed9SChris Wilson struct intel_gt *gt = arg; 1596e6ba7648SChris Wilson struct i915_ppgtt *ppgtt; 1597112ed2d3SChris Wilson int err; 1598112ed2d3SChris Wilson 1599a4e7ccdaSChris Wilson /* aliasing == global gtt locking, covered above */ 1600e6ba7648SChris Wilson if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL) 1601e6ba7648SChris Wilson return 0; 1602112ed2d3SChris Wilson 1603a259cc14SThomas Hellström ppgtt = i915_ppgtt_create(gt, 0); 1604e6ba7648SChris Wilson if (IS_ERR(ppgtt)) 1605e6ba7648SChris Wilson return PTR_ERR(ppgtt); 1606e6ba7648SChris Wilson 1607e6ba7648SChris Wilson err = __igt_reset_evict_vma(gt, &ppgtt->vm, 1608e6ba7648SChris Wilson evict_vma, EXEC_OBJECT_WRITE); 1609e6ba7648SChris Wilson i915_vm_put(&ppgtt->vm); 1610e6ba7648SChris Wilson 1611112ed2d3SChris Wilson return err; 1612112ed2d3SChris Wilson } 1613112ed2d3SChris Wilson 1614112ed2d3SChris Wilson static int igt_reset_evict_fence(void *arg) 1615112ed2d3SChris Wilson { 1616cb823ed9SChris Wilson struct intel_gt *gt = arg; 1617112ed2d3SChris Wilson 1618cb823ed9SChris Wilson return __igt_reset_evict_vma(gt, >->ggtt->vm, 1619112ed2d3SChris Wilson evict_fence, EXEC_OBJECT_NEEDS_FENCE); 1620112ed2d3SChris Wilson } 1621112ed2d3SChris Wilson 1622cb823ed9SChris Wilson static int wait_for_others(struct intel_gt *gt, 1623112ed2d3SChris Wilson struct intel_engine_cs *exclude) 1624112ed2d3SChris Wilson { 1625112ed2d3SChris Wilson struct intel_engine_cs *engine; 1626112ed2d3SChris Wilson enum intel_engine_id id; 1627112ed2d3SChris Wilson 16285d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 1629112ed2d3SChris Wilson if (engine == exclude) 1630112ed2d3SChris Wilson continue; 1631112ed2d3SChris Wilson 1632112ed2d3SChris Wilson if (!wait_for_idle(engine)) 1633112ed2d3SChris Wilson return -EIO; 1634112ed2d3SChris Wilson } 1635112ed2d3SChris Wilson 1636112ed2d3SChris Wilson return 0; 1637112ed2d3SChris Wilson } 1638112ed2d3SChris Wilson 1639112ed2d3SChris Wilson static int igt_reset_queue(void *arg) 1640112ed2d3SChris Wilson { 1641cb823ed9SChris Wilson struct intel_gt *gt = arg; 1642cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 1643112ed2d3SChris Wilson struct intel_engine_cs *engine; 1644112ed2d3SChris Wilson enum intel_engine_id id; 1645112ed2d3SChris Wilson struct hang h; 1646112ed2d3SChris Wilson int err; 1647112ed2d3SChris Wilson 1648112ed2d3SChris Wilson /* Check that we replay pending requests following a hang */ 1649112ed2d3SChris Wilson 1650cb823ed9SChris Wilson igt_global_reset_lock(gt); 1651112ed2d3SChris Wilson 1652cb823ed9SChris Wilson err = hang_init(&h, gt); 1653112ed2d3SChris Wilson if (err) 1654112ed2d3SChris Wilson goto unlock; 1655112ed2d3SChris Wilson 16565d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 1657617e87c0SJohn Harrison struct intel_selftest_saved_policy saved; 1658112ed2d3SChris Wilson struct i915_request *prev; 1659112ed2d3SChris Wilson IGT_TIMEOUT(end_time); 1660112ed2d3SChris Wilson unsigned int count; 1661617e87c0SJohn Harrison bool using_guc = intel_engine_uses_guc(engine); 1662112ed2d3SChris Wilson 1663112ed2d3SChris Wilson if (!intel_engine_can_store_dword(engine)) 1664112ed2d3SChris Wilson continue; 1665112ed2d3SChris Wilson 1666617e87c0SJohn Harrison if (using_guc) { 1667617e87c0SJohn Harrison err = intel_selftest_modify_policy(engine, &saved, 1668617e87c0SJohn Harrison SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK); 1669617e87c0SJohn Harrison if (err) { 1670617e87c0SJohn Harrison pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); 1671617e87c0SJohn Harrison goto fini; 1672617e87c0SJohn Harrison } 1673617e87c0SJohn Harrison } 1674617e87c0SJohn Harrison 1675112ed2d3SChris Wilson prev = hang_create_request(&h, engine); 1676112ed2d3SChris Wilson if (IS_ERR(prev)) { 1677112ed2d3SChris Wilson err = PTR_ERR(prev); 16783f5dff6cSJohn Harrison pr_err("[%s] Create 'prev' hang request failed: %d!\n", engine->name, err); 1679617e87c0SJohn Harrison goto restore; 1680112ed2d3SChris Wilson } 1681112ed2d3SChris Wilson 1682112ed2d3SChris Wilson i915_request_get(prev); 1683112ed2d3SChris Wilson i915_request_add(prev); 1684112ed2d3SChris Wilson 1685112ed2d3SChris Wilson count = 0; 1686112ed2d3SChris Wilson do { 1687112ed2d3SChris Wilson struct i915_request *rq; 1688112ed2d3SChris Wilson unsigned int reset_count; 1689112ed2d3SChris Wilson 1690112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 1691112ed2d3SChris Wilson if (IS_ERR(rq)) { 1692112ed2d3SChris Wilson err = PTR_ERR(rq); 16933f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); 1694617e87c0SJohn Harrison goto restore; 1695112ed2d3SChris Wilson } 1696112ed2d3SChris Wilson 1697112ed2d3SChris Wilson i915_request_get(rq); 1698112ed2d3SChris Wilson i915_request_add(rq); 1699112ed2d3SChris Wilson 1700112ed2d3SChris Wilson /* 1701112ed2d3SChris Wilson * XXX We don't handle resetting the kernel context 1702112ed2d3SChris Wilson * very well. If we trigger a device reset twice in 1703112ed2d3SChris Wilson * quick succession while the kernel context is 1704112ed2d3SChris Wilson * executing, we may end up skipping the breadcrumb. 1705112ed2d3SChris Wilson * This is really only a problem for the selftest as 1706112ed2d3SChris Wilson * normally there is a large interlude between resets 1707112ed2d3SChris Wilson * (hangcheck), or we focus on resetting just one 1708112ed2d3SChris Wilson * engine and so avoid repeatedly resetting innocents. 1709112ed2d3SChris Wilson */ 1710cb823ed9SChris Wilson err = wait_for_others(gt, engine); 1711112ed2d3SChris Wilson if (err) { 1712112ed2d3SChris Wilson pr_err("%s(%s): Failed to idle other inactive engines after device reset\n", 1713112ed2d3SChris Wilson __func__, engine->name); 1714112ed2d3SChris Wilson i915_request_put(rq); 1715112ed2d3SChris Wilson i915_request_put(prev); 1716112ed2d3SChris Wilson 1717112ed2d3SChris Wilson GEM_TRACE_DUMP(); 1718cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1719617e87c0SJohn Harrison goto restore; 1720112ed2d3SChris Wilson } 1721112ed2d3SChris Wilson 1722112ed2d3SChris Wilson if (!wait_until_running(&h, prev)) { 1723cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1724112ed2d3SChris Wilson 1725112ed2d3SChris Wilson pr_err("%s(%s): Failed to start request %llx, at %x\n", 1726112ed2d3SChris Wilson __func__, engine->name, 1727112ed2d3SChris Wilson prev->fence.seqno, hws_seqno(&h, prev)); 1728112ed2d3SChris Wilson intel_engine_dump(engine, &p, 1729112ed2d3SChris Wilson "%s\n", engine->name); 1730112ed2d3SChris Wilson 1731112ed2d3SChris Wilson i915_request_put(rq); 1732112ed2d3SChris Wilson i915_request_put(prev); 1733112ed2d3SChris Wilson 1734cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1735112ed2d3SChris Wilson 1736112ed2d3SChris Wilson err = -EIO; 1737617e87c0SJohn Harrison goto restore; 1738112ed2d3SChris Wilson } 1739112ed2d3SChris Wilson 1740cb823ed9SChris Wilson reset_count = fake_hangcheck(gt, BIT(id)); 1741112ed2d3SChris Wilson 1742112ed2d3SChris Wilson if (prev->fence.error != -EIO) { 1743112ed2d3SChris Wilson pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", 1744112ed2d3SChris Wilson prev->fence.error); 1745112ed2d3SChris Wilson i915_request_put(rq); 1746112ed2d3SChris Wilson i915_request_put(prev); 1747112ed2d3SChris Wilson err = -EINVAL; 1748617e87c0SJohn Harrison goto restore; 1749112ed2d3SChris Wilson } 1750112ed2d3SChris Wilson 1751112ed2d3SChris Wilson if (rq->fence.error) { 1752112ed2d3SChris Wilson pr_err("Fence error status not zero [%d] after unrelated reset\n", 1753112ed2d3SChris Wilson rq->fence.error); 1754112ed2d3SChris Wilson i915_request_put(rq); 1755112ed2d3SChris Wilson i915_request_put(prev); 1756112ed2d3SChris Wilson err = -EINVAL; 1757617e87c0SJohn Harrison goto restore; 1758112ed2d3SChris Wilson } 1759112ed2d3SChris Wilson 1760cb823ed9SChris Wilson if (i915_reset_count(global) == reset_count) { 1761112ed2d3SChris Wilson pr_err("No GPU reset recorded!\n"); 1762112ed2d3SChris Wilson i915_request_put(rq); 1763112ed2d3SChris Wilson i915_request_put(prev); 1764112ed2d3SChris Wilson err = -EINVAL; 1765617e87c0SJohn Harrison goto restore; 1766112ed2d3SChris Wilson } 1767112ed2d3SChris Wilson 1768112ed2d3SChris Wilson i915_request_put(prev); 1769112ed2d3SChris Wilson prev = rq; 1770112ed2d3SChris Wilson count++; 1771112ed2d3SChris Wilson } while (time_before(jiffies, end_time)); 177280655d2aSChris Wilson pr_info("%s: Completed %d queued resets\n", 177380655d2aSChris Wilson engine->name, count); 1774112ed2d3SChris Wilson 1775112ed2d3SChris Wilson *h.batch = MI_BATCH_BUFFER_END; 1776baea429dSTvrtko Ursulin intel_gt_chipset_flush(engine->gt); 1777112ed2d3SChris Wilson 1778112ed2d3SChris Wilson i915_request_put(prev); 1779112ed2d3SChris Wilson 1780617e87c0SJohn Harrison restore: 1781617e87c0SJohn Harrison if (using_guc) { 1782617e87c0SJohn Harrison int err2 = intel_selftest_restore_policy(engine, &saved); 1783617e87c0SJohn Harrison 1784617e87c0SJohn Harrison if (err2) 1785617e87c0SJohn Harrison pr_err("%s:%d> [%s] Restore policy failed: %d!\n", 1786617e87c0SJohn Harrison __func__, __LINE__, engine->name, err2); 1787617e87c0SJohn Harrison if (err == 0) 1788617e87c0SJohn Harrison err = err2; 1789617e87c0SJohn Harrison } 1790617e87c0SJohn Harrison if (err) 1791617e87c0SJohn Harrison goto fini; 1792617e87c0SJohn Harrison 17937e805762SChris Wilson err = igt_flush_test(gt->i915); 17943f5dff6cSJohn Harrison if (err) { 17953f5dff6cSJohn Harrison pr_err("[%s] Flush failed: %d!\n", engine->name, err); 1796112ed2d3SChris Wilson break; 1797112ed2d3SChris Wilson } 17983f5dff6cSJohn Harrison } 1799112ed2d3SChris Wilson 1800112ed2d3SChris Wilson fini: 1801112ed2d3SChris Wilson hang_fini(&h); 1802112ed2d3SChris Wilson unlock: 1803cb823ed9SChris Wilson igt_global_reset_unlock(gt); 1804112ed2d3SChris Wilson 1805cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 1806112ed2d3SChris Wilson return -EIO; 1807112ed2d3SChris Wilson 1808112ed2d3SChris Wilson return err; 1809112ed2d3SChris Wilson } 1810112ed2d3SChris Wilson 1811112ed2d3SChris Wilson static int igt_handle_error(void *arg) 1812112ed2d3SChris Wilson { 1813cb823ed9SChris Wilson struct intel_gt *gt = arg; 1814cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 18151f9f6353SChris Wilson struct intel_engine_cs *engine = gt->engine[RCS0]; 1816112ed2d3SChris Wilson struct hang h; 1817112ed2d3SChris Wilson struct i915_request *rq; 1818742379c0SChris Wilson struct i915_gpu_coredump *error; 1819112ed2d3SChris Wilson int err; 1820112ed2d3SChris Wilson 1821112ed2d3SChris Wilson /* Check that we can issue a global GPU and engine reset */ 1822112ed2d3SChris Wilson 1823260e6b71SChris Wilson if (!intel_has_reset_engine(gt)) 1824112ed2d3SChris Wilson return 0; 1825112ed2d3SChris Wilson 1826112ed2d3SChris Wilson if (!engine || !intel_engine_can_store_dword(engine)) 1827112ed2d3SChris Wilson return 0; 1828112ed2d3SChris Wilson 1829cb823ed9SChris Wilson err = hang_init(&h, gt); 18303f5dff6cSJohn Harrison if (err) { 18313f5dff6cSJohn Harrison pr_err("[%s] Hang init failed: %d!\n", engine->name, err); 18327e805762SChris Wilson return err; 18333f5dff6cSJohn Harrison } 1834112ed2d3SChris Wilson 1835112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 1836112ed2d3SChris Wilson if (IS_ERR(rq)) { 1837112ed2d3SChris Wilson err = PTR_ERR(rq); 18383f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); 1839112ed2d3SChris Wilson goto err_fini; 1840112ed2d3SChris Wilson } 1841112ed2d3SChris Wilson 1842112ed2d3SChris Wilson i915_request_get(rq); 1843112ed2d3SChris Wilson i915_request_add(rq); 1844112ed2d3SChris Wilson 1845112ed2d3SChris Wilson if (!wait_until_running(&h, rq)) { 1846cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1847112ed2d3SChris Wilson 1848112ed2d3SChris Wilson pr_err("%s: Failed to start request %llx, at %x\n", 1849112ed2d3SChris Wilson __func__, rq->fence.seqno, hws_seqno(&h, rq)); 1850112ed2d3SChris Wilson intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); 1851112ed2d3SChris Wilson 1852cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1853112ed2d3SChris Wilson 1854112ed2d3SChris Wilson err = -EIO; 1855112ed2d3SChris Wilson goto err_request; 1856112ed2d3SChris Wilson } 1857112ed2d3SChris Wilson 1858112ed2d3SChris Wilson /* Temporarily disable error capture */ 1859cb823ed9SChris Wilson error = xchg(&global->first_error, (void *)-1); 1860112ed2d3SChris Wilson 1861cb823ed9SChris Wilson intel_gt_handle_error(gt, engine->mask, 0, NULL); 1862112ed2d3SChris Wilson 1863cb823ed9SChris Wilson xchg(&global->first_error, error); 1864112ed2d3SChris Wilson 1865112ed2d3SChris Wilson if (rq->fence.error != -EIO) { 1866112ed2d3SChris Wilson pr_err("Guilty request not identified!\n"); 1867112ed2d3SChris Wilson err = -EINVAL; 1868112ed2d3SChris Wilson goto err_request; 1869112ed2d3SChris Wilson } 1870112ed2d3SChris Wilson 1871112ed2d3SChris Wilson err_request: 1872112ed2d3SChris Wilson i915_request_put(rq); 1873112ed2d3SChris Wilson err_fini: 1874112ed2d3SChris Wilson hang_fini(&h); 1875112ed2d3SChris Wilson return err; 1876112ed2d3SChris Wilson } 1877112ed2d3SChris Wilson 1878112ed2d3SChris Wilson static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, 1879f6470c9bSMichal Wajdeczko const struct igt_atomic_section *p, 1880112ed2d3SChris Wilson const char *mode) 1881112ed2d3SChris Wilson { 188222916badSMatthew Brost struct tasklet_struct * const t = &engine->sched_engine->tasklet; 1883112ed2d3SChris Wilson int err; 1884112ed2d3SChris Wilson 1885112ed2d3SChris Wilson GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", 1886112ed2d3SChris Wilson engine->name, mode, p->name); 1887112ed2d3SChris Wilson 188880655d2aSChris Wilson if (t->func) 188993100fdeSChris Wilson tasklet_disable(t); 189016f2941aSChris Wilson if (strcmp(p->name, "softirq")) 189116f2941aSChris Wilson local_bh_disable(); 1892112ed2d3SChris Wilson p->critical_section_begin(); 1893112ed2d3SChris Wilson 189416f2941aSChris Wilson err = __intel_engine_reset_bh(engine, NULL); 1895112ed2d3SChris Wilson 1896112ed2d3SChris Wilson p->critical_section_end(); 189716f2941aSChris Wilson if (strcmp(p->name, "softirq")) 189816f2941aSChris Wilson local_bh_enable(); 189980655d2aSChris Wilson if (t->func) { 1900112ed2d3SChris Wilson tasklet_enable(t); 190116f2941aSChris Wilson tasklet_hi_schedule(t); 190280655d2aSChris Wilson } 1903112ed2d3SChris Wilson 1904112ed2d3SChris Wilson if (err) 1905112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s) failed under %s\n", 1906112ed2d3SChris Wilson engine->name, mode, p->name); 1907112ed2d3SChris Wilson 1908112ed2d3SChris Wilson return err; 1909112ed2d3SChris Wilson } 1910112ed2d3SChris Wilson 1911112ed2d3SChris Wilson static int igt_atomic_reset_engine(struct intel_engine_cs *engine, 1912f6470c9bSMichal Wajdeczko const struct igt_atomic_section *p) 1913112ed2d3SChris Wilson { 1914112ed2d3SChris Wilson struct i915_request *rq; 1915112ed2d3SChris Wilson struct hang h; 1916112ed2d3SChris Wilson int err; 1917112ed2d3SChris Wilson 1918112ed2d3SChris Wilson err = __igt_atomic_reset_engine(engine, p, "idle"); 1919112ed2d3SChris Wilson if (err) 1920112ed2d3SChris Wilson return err; 1921112ed2d3SChris Wilson 1922cb823ed9SChris Wilson err = hang_init(&h, engine->gt); 19233f5dff6cSJohn Harrison if (err) { 19243f5dff6cSJohn Harrison pr_err("[%s] Hang init failed: %d!\n", engine->name, err); 1925112ed2d3SChris Wilson return err; 19263f5dff6cSJohn Harrison } 1927112ed2d3SChris Wilson 1928112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 1929112ed2d3SChris Wilson if (IS_ERR(rq)) { 1930112ed2d3SChris Wilson err = PTR_ERR(rq); 19313f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); 1932112ed2d3SChris Wilson goto out; 1933112ed2d3SChris Wilson } 1934112ed2d3SChris Wilson 1935112ed2d3SChris Wilson i915_request_get(rq); 1936112ed2d3SChris Wilson i915_request_add(rq); 1937112ed2d3SChris Wilson 1938112ed2d3SChris Wilson if (wait_until_running(&h, rq)) { 1939112ed2d3SChris Wilson err = __igt_atomic_reset_engine(engine, p, "active"); 1940112ed2d3SChris Wilson } else { 1941112ed2d3SChris Wilson pr_err("%s(%s): Failed to start request %llx, at %x\n", 1942112ed2d3SChris Wilson __func__, engine->name, 1943112ed2d3SChris Wilson rq->fence.seqno, hws_seqno(&h, rq)); 1944cb823ed9SChris Wilson intel_gt_set_wedged(engine->gt); 1945112ed2d3SChris Wilson err = -EIO; 1946112ed2d3SChris Wilson } 1947112ed2d3SChris Wilson 1948112ed2d3SChris Wilson if (err == 0) { 1949cb823ed9SChris Wilson struct intel_wedge_me w; 1950112ed2d3SChris Wilson 1951cb823ed9SChris Wilson intel_wedge_on_timeout(&w, engine->gt, HZ / 20 /* 50ms */) 19522f530945SChris Wilson i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1953cb823ed9SChris Wilson if (intel_gt_is_wedged(engine->gt)) 1954112ed2d3SChris Wilson err = -EIO; 1955112ed2d3SChris Wilson } 1956112ed2d3SChris Wilson 1957112ed2d3SChris Wilson i915_request_put(rq); 1958112ed2d3SChris Wilson out: 1959112ed2d3SChris Wilson hang_fini(&h); 1960112ed2d3SChris Wilson return err; 1961112ed2d3SChris Wilson } 1962112ed2d3SChris Wilson 1963f6470c9bSMichal Wajdeczko static int igt_reset_engines_atomic(void *arg) 1964112ed2d3SChris Wilson { 1965cb823ed9SChris Wilson struct intel_gt *gt = arg; 1966f6470c9bSMichal Wajdeczko const typeof(*igt_atomic_phases) *p; 1967112ed2d3SChris Wilson int err = 0; 1968112ed2d3SChris Wilson 1969f6470c9bSMichal Wajdeczko /* Check that the engines resets are usable from atomic context */ 1970f6470c9bSMichal Wajdeczko 1971260e6b71SChris Wilson if (!intel_has_reset_engine(gt)) 1972f6470c9bSMichal Wajdeczko return 0; 1973f6470c9bSMichal Wajdeczko 1974065273f7SDaniele Ceraolo Spurio if (intel_uc_uses_guc_submission(>->uc)) 1975f6470c9bSMichal Wajdeczko return 0; 1976112ed2d3SChris Wilson 1977cb823ed9SChris Wilson igt_global_reset_lock(gt); 1978112ed2d3SChris Wilson 1979112ed2d3SChris Wilson /* Flush any requests before we get started and check basics */ 1980cb823ed9SChris Wilson if (!igt_force_reset(gt)) 1981112ed2d3SChris Wilson goto unlock; 1982112ed2d3SChris Wilson 1983f6470c9bSMichal Wajdeczko for (p = igt_atomic_phases; p->name; p++) { 1984112ed2d3SChris Wilson struct intel_engine_cs *engine; 1985112ed2d3SChris Wilson enum intel_engine_id id; 1986112ed2d3SChris Wilson 19875d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 1988112ed2d3SChris Wilson err = igt_atomic_reset_engine(engine, p); 1989112ed2d3SChris Wilson if (err) 1990112ed2d3SChris Wilson goto out; 1991112ed2d3SChris Wilson } 1992112ed2d3SChris Wilson } 1993112ed2d3SChris Wilson 1994112ed2d3SChris Wilson out: 1995112ed2d3SChris Wilson /* As we poke around the guts, do a full reset before continuing. */ 1996cb823ed9SChris Wilson igt_force_reset(gt); 1997112ed2d3SChris Wilson unlock: 1998cb823ed9SChris Wilson igt_global_reset_unlock(gt); 1999112ed2d3SChris Wilson 2000112ed2d3SChris Wilson return err; 2001112ed2d3SChris Wilson } 2002112ed2d3SChris Wilson 2003112ed2d3SChris Wilson int intel_hangcheck_live_selftests(struct drm_i915_private *i915) 2004112ed2d3SChris Wilson { 2005112ed2d3SChris Wilson static const struct i915_subtest tests[] = { 2006112ed2d3SChris Wilson SUBTEST(igt_hang_sanitycheck), 2007112ed2d3SChris Wilson SUBTEST(igt_reset_nop), 2008112ed2d3SChris Wilson SUBTEST(igt_reset_nop_engine), 2009112ed2d3SChris Wilson SUBTEST(igt_reset_idle_engine), 2010112ed2d3SChris Wilson SUBTEST(igt_reset_active_engine), 2011106a9368SChris Wilson SUBTEST(igt_reset_fail_engine), 2012112ed2d3SChris Wilson SUBTEST(igt_reset_engines), 2013f6470c9bSMichal Wajdeczko SUBTEST(igt_reset_engines_atomic), 2014112ed2d3SChris Wilson SUBTEST(igt_reset_queue), 2015112ed2d3SChris Wilson SUBTEST(igt_reset_wait), 2016112ed2d3SChris Wilson SUBTEST(igt_reset_evict_ggtt), 2017112ed2d3SChris Wilson SUBTEST(igt_reset_evict_ppgtt), 2018112ed2d3SChris Wilson SUBTEST(igt_reset_evict_fence), 2019112ed2d3SChris Wilson SUBTEST(igt_handle_error), 2020112ed2d3SChris Wilson }; 2021c14adcbdSMichał Winiarski struct intel_gt *gt = to_gt(i915); 2022112ed2d3SChris Wilson intel_wakeref_t wakeref; 2023112ed2d3SChris Wilson int err; 2024112ed2d3SChris Wilson 2025260e6b71SChris Wilson if (!intel_has_gpu_reset(gt)) 2026112ed2d3SChris Wilson return 0; 2027112ed2d3SChris Wilson 2028cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 2029112ed2d3SChris Wilson return -EIO; /* we're long past hope of a successful reset */ 2030112ed2d3SChris Wilson 2031cd6a8513SChris Wilson wakeref = intel_runtime_pm_get(gt->uncore->rpm); 2032112ed2d3SChris Wilson 2033cb823ed9SChris Wilson err = intel_gt_live_subtests(tests, gt); 2034112ed2d3SChris Wilson 2035cd6a8513SChris Wilson intel_runtime_pm_put(gt->uncore->rpm, wakeref); 2036112ed2d3SChris Wilson 2037112ed2d3SChris Wilson return err; 2038112ed2d3SChris Wilson } 2039