124f90d66SChris Wilson // SPDX-License-Identifier: MIT 2112ed2d3SChris Wilson /* 3112ed2d3SChris Wilson * Copyright © 2016 Intel Corporation 4112ed2d3SChris Wilson */ 5112ed2d3SChris Wilson 6112ed2d3SChris Wilson #include <linux/kthread.h> 7112ed2d3SChris Wilson 810be98a7SChris Wilson #include "gem/i915_gem_context.h" 9b508d01fSJani Nikula #include "gem/i915_gem_internal.h" 10e6ba7648SChris Wilson 112ef97818SJani Nikula #include "i915_gem_evict.h" 12e6ba7648SChris Wilson #include "intel_gt.h" 13e6ba7648SChris Wilson #include "intel_engine_heartbeat.h" 1479ffac85SChris Wilson #include "intel_engine_pm.h" 151b90e4a4SChris Wilson #include "selftest_engine_heartbeat.h" 1679ffac85SChris Wilson 17112ed2d3SChris Wilson #include "i915_selftest.h" 18112ed2d3SChris Wilson #include "selftests/i915_random.h" 19112ed2d3SChris Wilson #include "selftests/igt_flush_test.h" 20112ed2d3SChris Wilson #include "selftests/igt_reset.h" 21f6470c9bSMichal Wajdeczko #include "selftests/igt_atomic.h" 22617e87c0SJohn Harrison #include "selftests/igt_spinner.h" 23617e87c0SJohn Harrison #include "selftests/intel_scheduler_helpers.h" 24112ed2d3SChris Wilson 25112ed2d3SChris Wilson #include "selftests/mock_drm.h" 26112ed2d3SChris Wilson 2710be98a7SChris Wilson #include "gem/selftests/mock_context.h" 2810be98a7SChris Wilson #include "gem/selftests/igt_gem_utils.h" 2910be98a7SChris Wilson 30112ed2d3SChris Wilson #define IGT_IDLE_TIMEOUT 50 /* ms; time to wait after flushing between tests */ 31112ed2d3SChris Wilson 32112ed2d3SChris Wilson struct hang { 33baea429dSTvrtko Ursulin struct intel_gt *gt; 34112ed2d3SChris Wilson struct drm_i915_gem_object *hws; 35112ed2d3SChris Wilson struct drm_i915_gem_object *obj; 36112ed2d3SChris Wilson struct i915_gem_context *ctx; 37112ed2d3SChris Wilson u32 *seqno; 38112ed2d3SChris Wilson u32 *batch; 39112ed2d3SChris Wilson }; 40112ed2d3SChris Wilson 41cb823ed9SChris Wilson static int hang_init(struct hang *h, struct intel_gt *gt) 42112ed2d3SChris Wilson { 43112ed2d3SChris Wilson void *vaddr; 44112ed2d3SChris Wilson int err; 45112ed2d3SChris Wilson 46112ed2d3SChris Wilson memset(h, 0, sizeof(*h)); 47cb823ed9SChris Wilson h->gt = gt; 48112ed2d3SChris Wilson 495888d588SJason Ekstrand h->ctx = kernel_context(gt->i915, NULL); 50112ed2d3SChris Wilson if (IS_ERR(h->ctx)) 51112ed2d3SChris Wilson return PTR_ERR(h->ctx); 52112ed2d3SChris Wilson 53112ed2d3SChris Wilson GEM_BUG_ON(i915_gem_context_is_bannable(h->ctx)); 54112ed2d3SChris Wilson 55cb823ed9SChris Wilson h->hws = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 56112ed2d3SChris Wilson if (IS_ERR(h->hws)) { 57112ed2d3SChris Wilson err = PTR_ERR(h->hws); 58112ed2d3SChris Wilson goto err_ctx; 59112ed2d3SChris Wilson } 60112ed2d3SChris Wilson 61cb823ed9SChris Wilson h->obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 62112ed2d3SChris Wilson if (IS_ERR(h->obj)) { 63112ed2d3SChris Wilson err = PTR_ERR(h->obj); 64112ed2d3SChris Wilson goto err_hws; 65112ed2d3SChris Wilson } 66112ed2d3SChris Wilson 67112ed2d3SChris Wilson i915_gem_object_set_cache_coherency(h->hws, I915_CACHE_LLC); 6817b7ab92SMaarten Lankhorst vaddr = i915_gem_object_pin_map_unlocked(h->hws, I915_MAP_WB); 69112ed2d3SChris Wilson if (IS_ERR(vaddr)) { 70112ed2d3SChris Wilson err = PTR_ERR(vaddr); 71112ed2d3SChris Wilson goto err_obj; 72112ed2d3SChris Wilson } 73112ed2d3SChris Wilson h->seqno = memset(vaddr, 0xff, PAGE_SIZE); 74112ed2d3SChris Wilson 7517b7ab92SMaarten Lankhorst vaddr = i915_gem_object_pin_map_unlocked(h->obj, 76fa85bfd1SVenkata Sandeep Dhanalakota i915_coherent_map_type(gt->i915, h->obj, false)); 77112ed2d3SChris Wilson if (IS_ERR(vaddr)) { 78112ed2d3SChris Wilson err = PTR_ERR(vaddr); 79112ed2d3SChris Wilson goto err_unpin_hws; 80112ed2d3SChris Wilson } 81112ed2d3SChris Wilson h->batch = vaddr; 82112ed2d3SChris Wilson 83112ed2d3SChris Wilson return 0; 84112ed2d3SChris Wilson 85112ed2d3SChris Wilson err_unpin_hws: 86112ed2d3SChris Wilson i915_gem_object_unpin_map(h->hws); 87112ed2d3SChris Wilson err_obj: 88112ed2d3SChris Wilson i915_gem_object_put(h->obj); 89112ed2d3SChris Wilson err_hws: 90112ed2d3SChris Wilson i915_gem_object_put(h->hws); 91112ed2d3SChris Wilson err_ctx: 92112ed2d3SChris Wilson kernel_context_close(h->ctx); 93112ed2d3SChris Wilson return err; 94112ed2d3SChris Wilson } 95112ed2d3SChris Wilson 96112ed2d3SChris Wilson static u64 hws_address(const struct i915_vma *hws, 97112ed2d3SChris Wilson const struct i915_request *rq) 98112ed2d3SChris Wilson { 99*8e4ee5e8SChris Wilson return i915_vma_offset(hws) + 100*8e4ee5e8SChris Wilson offset_in_page(sizeof(u32) * rq->fence.context); 101112ed2d3SChris Wilson } 102112ed2d3SChris Wilson 103112ed2d3SChris Wilson static struct i915_request * 104112ed2d3SChris Wilson hang_create_request(struct hang *h, struct intel_engine_cs *engine) 105112ed2d3SChris Wilson { 106cb823ed9SChris Wilson struct intel_gt *gt = h->gt; 107c6d04e48SDaniel Vetter struct i915_address_space *vm = i915_gem_context_get_eb_vm(h->ctx); 108a93615f9SChris Wilson struct drm_i915_gem_object *obj; 109112ed2d3SChris Wilson struct i915_request *rq = NULL; 110112ed2d3SChris Wilson struct i915_vma *hws, *vma; 111112ed2d3SChris Wilson unsigned int flags; 112a93615f9SChris Wilson void *vaddr; 113112ed2d3SChris Wilson u32 *batch; 114112ed2d3SChris Wilson int err; 115112ed2d3SChris Wilson 116cb823ed9SChris Wilson obj = i915_gem_object_create_internal(gt->i915, PAGE_SIZE); 117a4e7ccdaSChris Wilson if (IS_ERR(obj)) { 118a4e7ccdaSChris Wilson i915_vm_put(vm); 119112ed2d3SChris Wilson return ERR_CAST(obj); 120a4e7ccdaSChris Wilson } 121112ed2d3SChris Wilson 122fa85bfd1SVenkata Sandeep Dhanalakota vaddr = i915_gem_object_pin_map_unlocked(obj, i915_coherent_map_type(gt->i915, obj, false)); 123112ed2d3SChris Wilson if (IS_ERR(vaddr)) { 124112ed2d3SChris Wilson i915_gem_object_put(obj); 125a4e7ccdaSChris Wilson i915_vm_put(vm); 126112ed2d3SChris Wilson return ERR_CAST(vaddr); 127112ed2d3SChris Wilson } 128112ed2d3SChris Wilson 129112ed2d3SChris Wilson i915_gem_object_unpin_map(h->obj); 130112ed2d3SChris Wilson i915_gem_object_put(h->obj); 131112ed2d3SChris Wilson 132112ed2d3SChris Wilson h->obj = obj; 133112ed2d3SChris Wilson h->batch = vaddr; 134112ed2d3SChris Wilson 135112ed2d3SChris Wilson vma = i915_vma_instance(h->obj, vm, NULL); 136a4e7ccdaSChris Wilson if (IS_ERR(vma)) { 137a4e7ccdaSChris Wilson i915_vm_put(vm); 138112ed2d3SChris Wilson return ERR_CAST(vma); 139a4e7ccdaSChris Wilson } 140112ed2d3SChris Wilson 141112ed2d3SChris Wilson hws = i915_vma_instance(h->hws, vm, NULL); 142a4e7ccdaSChris Wilson if (IS_ERR(hws)) { 143a4e7ccdaSChris Wilson i915_vm_put(vm); 144112ed2d3SChris Wilson return ERR_CAST(hws); 145a4e7ccdaSChris Wilson } 146112ed2d3SChris Wilson 147112ed2d3SChris Wilson err = i915_vma_pin(vma, 0, 0, PIN_USER); 148a4e7ccdaSChris Wilson if (err) { 149a4e7ccdaSChris Wilson i915_vm_put(vm); 150112ed2d3SChris Wilson return ERR_PTR(err); 151a4e7ccdaSChris Wilson } 152112ed2d3SChris Wilson 153112ed2d3SChris Wilson err = i915_vma_pin(hws, 0, 0, PIN_USER); 154112ed2d3SChris Wilson if (err) 155112ed2d3SChris Wilson goto unpin_vma; 156112ed2d3SChris Wilson 15746472b3eSChris Wilson rq = igt_request_alloc(h->ctx, engine); 158112ed2d3SChris Wilson if (IS_ERR(rq)) { 159112ed2d3SChris Wilson err = PTR_ERR(rq); 160112ed2d3SChris Wilson goto unpin_hws; 161112ed2d3SChris Wilson } 162112ed2d3SChris Wilson 16356d7bd74SAndrzej Hajda err = igt_vma_move_to_active_unlocked(vma, rq, 0); 164112ed2d3SChris Wilson if (err) 165112ed2d3SChris Wilson goto cancel_rq; 166112ed2d3SChris Wilson 16756d7bd74SAndrzej Hajda err = igt_vma_move_to_active_unlocked(hws, rq, 0); 168112ed2d3SChris Wilson if (err) 169112ed2d3SChris Wilson goto cancel_rq; 170112ed2d3SChris Wilson 171112ed2d3SChris Wilson batch = h->batch; 172c816723bSLucas De Marchi if (GRAPHICS_VER(gt->i915) >= 8) { 173112ed2d3SChris Wilson *batch++ = MI_STORE_DWORD_IMM_GEN4; 174112ed2d3SChris Wilson *batch++ = lower_32_bits(hws_address(hws, rq)); 175112ed2d3SChris Wilson *batch++ = upper_32_bits(hws_address(hws, rq)); 176112ed2d3SChris Wilson *batch++ = rq->fence.seqno; 17794ed4753SChris Wilson *batch++ = MI_NOOP; 178112ed2d3SChris Wilson 179112ed2d3SChris Wilson memset(batch, 0, 1024); 180112ed2d3SChris Wilson batch += 1024 / sizeof(*batch); 181112ed2d3SChris Wilson 18294ed4753SChris Wilson *batch++ = MI_NOOP; 183112ed2d3SChris Wilson *batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1; 184*8e4ee5e8SChris Wilson *batch++ = lower_32_bits(i915_vma_offset(vma)); 185*8e4ee5e8SChris Wilson *batch++ = upper_32_bits(i915_vma_offset(vma)); 186c816723bSLucas De Marchi } else if (GRAPHICS_VER(gt->i915) >= 6) { 187112ed2d3SChris Wilson *batch++ = MI_STORE_DWORD_IMM_GEN4; 188112ed2d3SChris Wilson *batch++ = 0; 189112ed2d3SChris Wilson *batch++ = lower_32_bits(hws_address(hws, rq)); 190112ed2d3SChris Wilson *batch++ = rq->fence.seqno; 19194ed4753SChris Wilson *batch++ = MI_NOOP; 192112ed2d3SChris Wilson 193112ed2d3SChris Wilson memset(batch, 0, 1024); 194112ed2d3SChris Wilson batch += 1024 / sizeof(*batch); 195112ed2d3SChris Wilson 19694ed4753SChris Wilson *batch++ = MI_NOOP; 197112ed2d3SChris Wilson *batch++ = MI_BATCH_BUFFER_START | 1 << 8; 198*8e4ee5e8SChris Wilson *batch++ = lower_32_bits(i915_vma_offset(vma)); 199c816723bSLucas De Marchi } else if (GRAPHICS_VER(gt->i915) >= 4) { 200112ed2d3SChris Wilson *batch++ = MI_STORE_DWORD_IMM_GEN4 | MI_USE_GGTT; 201112ed2d3SChris Wilson *batch++ = 0; 202112ed2d3SChris Wilson *batch++ = lower_32_bits(hws_address(hws, rq)); 203112ed2d3SChris Wilson *batch++ = rq->fence.seqno; 20494ed4753SChris Wilson *batch++ = MI_NOOP; 205112ed2d3SChris Wilson 206112ed2d3SChris Wilson memset(batch, 0, 1024); 207112ed2d3SChris Wilson batch += 1024 / sizeof(*batch); 208112ed2d3SChris Wilson 20994ed4753SChris Wilson *batch++ = MI_NOOP; 210112ed2d3SChris Wilson *batch++ = MI_BATCH_BUFFER_START | 2 << 6; 211*8e4ee5e8SChris Wilson *batch++ = lower_32_bits(i915_vma_offset(vma)); 212112ed2d3SChris Wilson } else { 213112ed2d3SChris Wilson *batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL; 214112ed2d3SChris Wilson *batch++ = lower_32_bits(hws_address(hws, rq)); 215112ed2d3SChris Wilson *batch++ = rq->fence.seqno; 21694ed4753SChris Wilson *batch++ = MI_NOOP; 217112ed2d3SChris Wilson 218112ed2d3SChris Wilson memset(batch, 0, 1024); 219112ed2d3SChris Wilson batch += 1024 / sizeof(*batch); 220112ed2d3SChris Wilson 22194ed4753SChris Wilson *batch++ = MI_NOOP; 222112ed2d3SChris Wilson *batch++ = MI_BATCH_BUFFER_START | 2 << 6; 223*8e4ee5e8SChris Wilson *batch++ = lower_32_bits(i915_vma_offset(vma)); 224112ed2d3SChris Wilson } 225112ed2d3SChris Wilson *batch++ = MI_BATCH_BUFFER_END; /* not reached */ 226baea429dSTvrtko Ursulin intel_gt_chipset_flush(engine->gt); 227112ed2d3SChris Wilson 228112ed2d3SChris Wilson if (rq->engine->emit_init_breadcrumb) { 229112ed2d3SChris Wilson err = rq->engine->emit_init_breadcrumb(rq); 230112ed2d3SChris Wilson if (err) 231112ed2d3SChris Wilson goto cancel_rq; 232112ed2d3SChris Wilson } 233112ed2d3SChris Wilson 234112ed2d3SChris Wilson flags = 0; 235c816723bSLucas De Marchi if (GRAPHICS_VER(gt->i915) <= 5) 236112ed2d3SChris Wilson flags |= I915_DISPATCH_SECURE; 237112ed2d3SChris Wilson 238*8e4ee5e8SChris Wilson err = rq->engine->emit_bb_start(rq, i915_vma_offset(vma), PAGE_SIZE, flags); 239112ed2d3SChris Wilson 240112ed2d3SChris Wilson cancel_rq: 241112ed2d3SChris Wilson if (err) { 24236e191f0SChris Wilson i915_request_set_error_once(rq, err); 243112ed2d3SChris Wilson i915_request_add(rq); 244112ed2d3SChris Wilson } 245112ed2d3SChris Wilson unpin_hws: 246112ed2d3SChris Wilson i915_vma_unpin(hws); 247112ed2d3SChris Wilson unpin_vma: 248112ed2d3SChris Wilson i915_vma_unpin(vma); 249a4e7ccdaSChris Wilson i915_vm_put(vm); 250112ed2d3SChris Wilson return err ? ERR_PTR(err) : rq; 251112ed2d3SChris Wilson } 252112ed2d3SChris Wilson 253112ed2d3SChris Wilson static u32 hws_seqno(const struct hang *h, const struct i915_request *rq) 254112ed2d3SChris Wilson { 255112ed2d3SChris Wilson return READ_ONCE(h->seqno[rq->fence.context % (PAGE_SIZE/sizeof(u32))]); 256112ed2d3SChris Wilson } 257112ed2d3SChris Wilson 258112ed2d3SChris Wilson static void hang_fini(struct hang *h) 259112ed2d3SChris Wilson { 260112ed2d3SChris Wilson *h->batch = MI_BATCH_BUFFER_END; 261baea429dSTvrtko Ursulin intel_gt_chipset_flush(h->gt); 262112ed2d3SChris Wilson 263112ed2d3SChris Wilson i915_gem_object_unpin_map(h->obj); 264112ed2d3SChris Wilson i915_gem_object_put(h->obj); 265112ed2d3SChris Wilson 266112ed2d3SChris Wilson i915_gem_object_unpin_map(h->hws); 267112ed2d3SChris Wilson i915_gem_object_put(h->hws); 268112ed2d3SChris Wilson 269112ed2d3SChris Wilson kernel_context_close(h->ctx); 270112ed2d3SChris Wilson 2717e805762SChris Wilson igt_flush_test(h->gt->i915); 272112ed2d3SChris Wilson } 273112ed2d3SChris Wilson 274112ed2d3SChris Wilson static bool wait_until_running(struct hang *h, struct i915_request *rq) 275112ed2d3SChris Wilson { 276112ed2d3SChris Wilson return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq), 277112ed2d3SChris Wilson rq->fence.seqno), 278112ed2d3SChris Wilson 10) && 279112ed2d3SChris Wilson wait_for(i915_seqno_passed(hws_seqno(h, rq), 280112ed2d3SChris Wilson rq->fence.seqno), 281112ed2d3SChris Wilson 1000)); 282112ed2d3SChris Wilson } 283112ed2d3SChris Wilson 284112ed2d3SChris Wilson static int igt_hang_sanitycheck(void *arg) 285112ed2d3SChris Wilson { 286cb823ed9SChris Wilson struct intel_gt *gt = arg; 287112ed2d3SChris Wilson struct i915_request *rq; 288112ed2d3SChris Wilson struct intel_engine_cs *engine; 289112ed2d3SChris Wilson enum intel_engine_id id; 290112ed2d3SChris Wilson struct hang h; 291112ed2d3SChris Wilson int err; 292112ed2d3SChris Wilson 293112ed2d3SChris Wilson /* Basic check that we can execute our hanging batch */ 294112ed2d3SChris Wilson 295cb823ed9SChris Wilson err = hang_init(&h, gt); 296112ed2d3SChris Wilson if (err) 2977e805762SChris Wilson return err; 298112ed2d3SChris Wilson 2995d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 300cb823ed9SChris Wilson struct intel_wedge_me w; 301112ed2d3SChris Wilson long timeout; 302112ed2d3SChris Wilson 303112ed2d3SChris Wilson if (!intel_engine_can_store_dword(engine)) 304112ed2d3SChris Wilson continue; 305112ed2d3SChris Wilson 306112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 307112ed2d3SChris Wilson if (IS_ERR(rq)) { 308112ed2d3SChris Wilson err = PTR_ERR(rq); 309112ed2d3SChris Wilson pr_err("Failed to create request for %s, err=%d\n", 310112ed2d3SChris Wilson engine->name, err); 311112ed2d3SChris Wilson goto fini; 312112ed2d3SChris Wilson } 313112ed2d3SChris Wilson 314112ed2d3SChris Wilson i915_request_get(rq); 315112ed2d3SChris Wilson 316112ed2d3SChris Wilson *h.batch = MI_BATCH_BUFFER_END; 317baea429dSTvrtko Ursulin intel_gt_chipset_flush(engine->gt); 318112ed2d3SChris Wilson 319112ed2d3SChris Wilson i915_request_add(rq); 320112ed2d3SChris Wilson 321112ed2d3SChris Wilson timeout = 0; 322cb823ed9SChris Wilson intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) 3232f530945SChris Wilson timeout = i915_request_wait(rq, 0, 324112ed2d3SChris Wilson MAX_SCHEDULE_TIMEOUT); 325cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 326112ed2d3SChris Wilson timeout = -EIO; 327112ed2d3SChris Wilson 328112ed2d3SChris Wilson i915_request_put(rq); 329112ed2d3SChris Wilson 330112ed2d3SChris Wilson if (timeout < 0) { 331112ed2d3SChris Wilson err = timeout; 332112ed2d3SChris Wilson pr_err("Wait for request failed on %s, err=%d\n", 333112ed2d3SChris Wilson engine->name, err); 334112ed2d3SChris Wilson goto fini; 335112ed2d3SChris Wilson } 336112ed2d3SChris Wilson } 337112ed2d3SChris Wilson 338112ed2d3SChris Wilson fini: 339112ed2d3SChris Wilson hang_fini(&h); 340112ed2d3SChris Wilson return err; 341112ed2d3SChris Wilson } 342112ed2d3SChris Wilson 343112ed2d3SChris Wilson static bool wait_for_idle(struct intel_engine_cs *engine) 344112ed2d3SChris Wilson { 345112ed2d3SChris Wilson return wait_for(intel_engine_is_idle(engine), IGT_IDLE_TIMEOUT) == 0; 346112ed2d3SChris Wilson } 347112ed2d3SChris Wilson 348112ed2d3SChris Wilson static int igt_reset_nop(void *arg) 349112ed2d3SChris Wilson { 350cb823ed9SChris Wilson struct intel_gt *gt = arg; 351cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 352112ed2d3SChris Wilson struct intel_engine_cs *engine; 353112ed2d3SChris Wilson unsigned int reset_count, count; 354112ed2d3SChris Wilson enum intel_engine_id id; 355112ed2d3SChris Wilson IGT_TIMEOUT(end_time); 356112ed2d3SChris Wilson int err = 0; 357112ed2d3SChris Wilson 358112ed2d3SChris Wilson /* Check that we can reset during non-user portions of requests */ 359112ed2d3SChris Wilson 360cb823ed9SChris Wilson reset_count = i915_reset_count(global); 361112ed2d3SChris Wilson count = 0; 362112ed2d3SChris Wilson do { 3635d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 364e6ba7648SChris Wilson struct intel_context *ce; 365112ed2d3SChris Wilson int i; 366112ed2d3SChris Wilson 367e6ba7648SChris Wilson ce = intel_context_create(engine); 368e6ba7648SChris Wilson if (IS_ERR(ce)) { 369e6ba7648SChris Wilson err = PTR_ERR(ce); 3703f5dff6cSJohn Harrison pr_err("[%s] Create context failed: %d!\n", engine->name, err); 371e6ba7648SChris Wilson break; 372e6ba7648SChris Wilson } 373e6ba7648SChris Wilson 374112ed2d3SChris Wilson for (i = 0; i < 16; i++) { 375112ed2d3SChris Wilson struct i915_request *rq; 376112ed2d3SChris Wilson 377e6ba7648SChris Wilson rq = intel_context_create_request(ce); 378112ed2d3SChris Wilson if (IS_ERR(rq)) { 379112ed2d3SChris Wilson err = PTR_ERR(rq); 3803f5dff6cSJohn Harrison pr_err("[%s] Create request failed: %d!\n", 3813f5dff6cSJohn Harrison engine->name, err); 382112ed2d3SChris Wilson break; 383112ed2d3SChris Wilson } 384112ed2d3SChris Wilson 385112ed2d3SChris Wilson i915_request_add(rq); 386112ed2d3SChris Wilson } 387e6ba7648SChris Wilson 388e6ba7648SChris Wilson intel_context_put(ce); 389112ed2d3SChris Wilson } 390112ed2d3SChris Wilson 391cb823ed9SChris Wilson igt_global_reset_lock(gt); 392cb823ed9SChris Wilson intel_gt_reset(gt, ALL_ENGINES, NULL); 393cb823ed9SChris Wilson igt_global_reset_unlock(gt); 394d8474795SChris Wilson 395cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) { 3963f5dff6cSJohn Harrison pr_err("[%s] GT is wedged!\n", engine->name); 397112ed2d3SChris Wilson err = -EIO; 398112ed2d3SChris Wilson break; 399112ed2d3SChris Wilson } 400112ed2d3SChris Wilson 401cb823ed9SChris Wilson if (i915_reset_count(global) != reset_count + ++count) { 4023f5dff6cSJohn Harrison pr_err("[%s] Reset not recorded: %d vs %d + %d!\n", 4033f5dff6cSJohn Harrison engine->name, i915_reset_count(global), reset_count, count); 404112ed2d3SChris Wilson err = -EINVAL; 405112ed2d3SChris Wilson break; 406112ed2d3SChris Wilson } 407112ed2d3SChris Wilson 4087e805762SChris Wilson err = igt_flush_test(gt->i915); 4093f5dff6cSJohn Harrison if (err) { 4103f5dff6cSJohn Harrison pr_err("[%s] Flush failed: %d!\n", engine->name, err); 411112ed2d3SChris Wilson break; 4123f5dff6cSJohn Harrison } 413112ed2d3SChris Wilson } while (time_before(jiffies, end_time)); 414112ed2d3SChris Wilson pr_info("%s: %d resets\n", __func__, count); 415112ed2d3SChris Wilson 4163f5dff6cSJohn Harrison if (igt_flush_test(gt->i915)) { 4173f5dff6cSJohn Harrison pr_err("Post flush failed: %d!\n", err); 418112ed2d3SChris Wilson err = -EIO; 4193f5dff6cSJohn Harrison } 4203f5dff6cSJohn Harrison 421112ed2d3SChris Wilson return err; 422112ed2d3SChris Wilson } 423112ed2d3SChris Wilson 424112ed2d3SChris Wilson static int igt_reset_nop_engine(void *arg) 425112ed2d3SChris Wilson { 426cb823ed9SChris Wilson struct intel_gt *gt = arg; 427cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 428112ed2d3SChris Wilson struct intel_engine_cs *engine; 429112ed2d3SChris Wilson enum intel_engine_id id; 430112ed2d3SChris Wilson 431112ed2d3SChris Wilson /* Check that we can engine-reset during non-user portions */ 432112ed2d3SChris Wilson 433260e6b71SChris Wilson if (!intel_has_reset_engine(gt)) 434112ed2d3SChris Wilson return 0; 435112ed2d3SChris Wilson 4365d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 437e6ba7648SChris Wilson unsigned int reset_count, reset_engine_count, count; 438e6ba7648SChris Wilson struct intel_context *ce; 439112ed2d3SChris Wilson IGT_TIMEOUT(end_time); 440e6ba7648SChris Wilson int err; 441e6ba7648SChris Wilson 442617e87c0SJohn Harrison if (intel_engine_uses_guc(engine)) { 443617e87c0SJohn Harrison /* Engine level resets are triggered by GuC when a hang 444617e87c0SJohn Harrison * is detected. They can't be triggered by the KMD any 445617e87c0SJohn Harrison * more. Thus a nop batch cannot be used as a reset test 446617e87c0SJohn Harrison */ 447617e87c0SJohn Harrison continue; 448617e87c0SJohn Harrison } 449617e87c0SJohn Harrison 450e6ba7648SChris Wilson ce = intel_context_create(engine); 4513f5dff6cSJohn Harrison if (IS_ERR(ce)) { 452ac5a2dffSNathan Chancellor pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); 453e6ba7648SChris Wilson return PTR_ERR(ce); 4543f5dff6cSJohn Harrison } 455112ed2d3SChris Wilson 456cb823ed9SChris Wilson reset_count = i915_reset_count(global); 457cb823ed9SChris Wilson reset_engine_count = i915_reset_engine_count(global, engine); 458112ed2d3SChris Wilson count = 0; 459112ed2d3SChris Wilson 4601b90e4a4SChris Wilson st_engine_heartbeat_disable(engine); 4619030e39cSThomas Hellström GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, 4629030e39cSThomas Hellström >->reset.flags)); 463112ed2d3SChris Wilson do { 464112ed2d3SChris Wilson int i; 465112ed2d3SChris Wilson 466112ed2d3SChris Wilson if (!wait_for_idle(engine)) { 467112ed2d3SChris Wilson pr_err("%s failed to idle before reset\n", 468112ed2d3SChris Wilson engine->name); 469112ed2d3SChris Wilson err = -EIO; 470112ed2d3SChris Wilson break; 471112ed2d3SChris Wilson } 472112ed2d3SChris Wilson 473112ed2d3SChris Wilson for (i = 0; i < 16; i++) { 474112ed2d3SChris Wilson struct i915_request *rq; 475112ed2d3SChris Wilson 476e6ba7648SChris Wilson rq = intel_context_create_request(ce); 477112ed2d3SChris Wilson if (IS_ERR(rq)) { 478f2e85e57SChris Wilson struct drm_printer p = 479f2e85e57SChris Wilson drm_info_printer(gt->i915->drm.dev); 480f2e85e57SChris Wilson intel_engine_dump(engine, &p, 481f2e85e57SChris Wilson "%s(%s): failed to submit request\n", 482f2e85e57SChris Wilson __func__, 483f2e85e57SChris Wilson engine->name); 484f2e85e57SChris Wilson 485f2e85e57SChris Wilson GEM_TRACE("%s(%s): failed to submit request\n", 486f2e85e57SChris Wilson __func__, 487f2e85e57SChris Wilson engine->name); 488f2e85e57SChris Wilson GEM_TRACE_DUMP(); 489f2e85e57SChris Wilson 490f2e85e57SChris Wilson intel_gt_set_wedged(gt); 491f2e85e57SChris Wilson 492112ed2d3SChris Wilson err = PTR_ERR(rq); 493112ed2d3SChris Wilson break; 494112ed2d3SChris Wilson } 495112ed2d3SChris Wilson 496112ed2d3SChris Wilson i915_request_add(rq); 497112ed2d3SChris Wilson } 498cb823ed9SChris Wilson err = intel_engine_reset(engine, NULL); 499112ed2d3SChris Wilson if (err) { 500cd7a214fSChris Wilson pr_err("intel_engine_reset(%s) failed, err:%d\n", 501cd7a214fSChris Wilson engine->name, err); 502112ed2d3SChris Wilson break; 503112ed2d3SChris Wilson } 504112ed2d3SChris Wilson 505cb823ed9SChris Wilson if (i915_reset_count(global) != reset_count) { 506112ed2d3SChris Wilson pr_err("Full GPU reset recorded! (engine reset expected)\n"); 507112ed2d3SChris Wilson err = -EINVAL; 508112ed2d3SChris Wilson break; 509112ed2d3SChris Wilson } 510112ed2d3SChris Wilson 511cb823ed9SChris Wilson if (i915_reset_engine_count(global, engine) != 512112ed2d3SChris Wilson reset_engine_count + ++count) { 513112ed2d3SChris Wilson pr_err("%s engine reset not recorded!\n", 514112ed2d3SChris Wilson engine->name); 515112ed2d3SChris Wilson err = -EINVAL; 516112ed2d3SChris Wilson break; 517112ed2d3SChris Wilson } 518112ed2d3SChris Wilson } while (time_before(jiffies, end_time)); 5199030e39cSThomas Hellström clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); 5201b90e4a4SChris Wilson st_engine_heartbeat_enable(engine); 521e6ba7648SChris Wilson 522112ed2d3SChris Wilson pr_info("%s(%s): %d resets\n", __func__, engine->name, count); 523112ed2d3SChris Wilson 524e6ba7648SChris Wilson intel_context_put(ce); 525e6ba7648SChris Wilson if (igt_flush_test(gt->i915)) 526e6ba7648SChris Wilson err = -EIO; 527112ed2d3SChris Wilson if (err) 528e6ba7648SChris Wilson return err; 529112ed2d3SChris Wilson } 530112ed2d3SChris Wilson 531e6ba7648SChris Wilson return 0; 532112ed2d3SChris Wilson } 533112ed2d3SChris Wilson 534106a9368SChris Wilson static void force_reset_timeout(struct intel_engine_cs *engine) 535106a9368SChris Wilson { 536106a9368SChris Wilson engine->reset_timeout.probability = 999; 537106a9368SChris Wilson atomic_set(&engine->reset_timeout.times, -1); 538106a9368SChris Wilson } 539106a9368SChris Wilson 540106a9368SChris Wilson static void cancel_reset_timeout(struct intel_engine_cs *engine) 541106a9368SChris Wilson { 542106a9368SChris Wilson memset(&engine->reset_timeout, 0, sizeof(engine->reset_timeout)); 543106a9368SChris Wilson } 544106a9368SChris Wilson 545106a9368SChris Wilson static int igt_reset_fail_engine(void *arg) 546106a9368SChris Wilson { 547106a9368SChris Wilson struct intel_gt *gt = arg; 548106a9368SChris Wilson struct intel_engine_cs *engine; 549106a9368SChris Wilson enum intel_engine_id id; 550106a9368SChris Wilson 551106a9368SChris Wilson /* Check that we can recover from engine-reset failues */ 552106a9368SChris Wilson 553106a9368SChris Wilson if (!intel_has_reset_engine(gt)) 554106a9368SChris Wilson return 0; 555106a9368SChris Wilson 556106a9368SChris Wilson for_each_engine(engine, gt, id) { 557106a9368SChris Wilson unsigned int count; 558106a9368SChris Wilson struct intel_context *ce; 559106a9368SChris Wilson IGT_TIMEOUT(end_time); 560106a9368SChris Wilson int err; 561106a9368SChris Wilson 562617e87c0SJohn Harrison /* Can't manually break the reset if i915 doesn't perform it */ 563617e87c0SJohn Harrison if (intel_engine_uses_guc(engine)) 564617e87c0SJohn Harrison continue; 565617e87c0SJohn Harrison 566106a9368SChris Wilson ce = intel_context_create(engine); 5673f5dff6cSJohn Harrison if (IS_ERR(ce)) { 568ac5a2dffSNathan Chancellor pr_err("[%s] Create context failed: %pe!\n", engine->name, ce); 569106a9368SChris Wilson return PTR_ERR(ce); 5703f5dff6cSJohn Harrison } 571106a9368SChris Wilson 572106a9368SChris Wilson st_engine_heartbeat_disable(engine); 5739030e39cSThomas Hellström GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, 5749030e39cSThomas Hellström >->reset.flags)); 575106a9368SChris Wilson 576106a9368SChris Wilson force_reset_timeout(engine); 577106a9368SChris Wilson err = intel_engine_reset(engine, NULL); 578106a9368SChris Wilson cancel_reset_timeout(engine); 579106a9368SChris Wilson if (err == 0) /* timeouts only generated on gen8+ */ 580106a9368SChris Wilson goto skip; 581106a9368SChris Wilson 582106a9368SChris Wilson count = 0; 583106a9368SChris Wilson do { 584106a9368SChris Wilson struct i915_request *last = NULL; 585106a9368SChris Wilson int i; 586106a9368SChris Wilson 587106a9368SChris Wilson if (!wait_for_idle(engine)) { 588106a9368SChris Wilson pr_err("%s failed to idle before reset\n", 589106a9368SChris Wilson engine->name); 590106a9368SChris Wilson err = -EIO; 591106a9368SChris Wilson break; 592106a9368SChris Wilson } 593106a9368SChris Wilson 594106a9368SChris Wilson for (i = 0; i < count % 15; i++) { 595106a9368SChris Wilson struct i915_request *rq; 596106a9368SChris Wilson 597106a9368SChris Wilson rq = intel_context_create_request(ce); 598106a9368SChris Wilson if (IS_ERR(rq)) { 599106a9368SChris Wilson struct drm_printer p = 600106a9368SChris Wilson drm_info_printer(gt->i915->drm.dev); 601106a9368SChris Wilson intel_engine_dump(engine, &p, 602106a9368SChris Wilson "%s(%s): failed to submit request\n", 603106a9368SChris Wilson __func__, 604106a9368SChris Wilson engine->name); 605106a9368SChris Wilson 606106a9368SChris Wilson GEM_TRACE("%s(%s): failed to submit request\n", 607106a9368SChris Wilson __func__, 608106a9368SChris Wilson engine->name); 609106a9368SChris Wilson GEM_TRACE_DUMP(); 610106a9368SChris Wilson 611106a9368SChris Wilson intel_gt_set_wedged(gt); 612106a9368SChris Wilson if (last) 613106a9368SChris Wilson i915_request_put(last); 614106a9368SChris Wilson 615106a9368SChris Wilson err = PTR_ERR(rq); 616106a9368SChris Wilson goto out; 617106a9368SChris Wilson } 618106a9368SChris Wilson 619106a9368SChris Wilson if (last) 620106a9368SChris Wilson i915_request_put(last); 621106a9368SChris Wilson last = i915_request_get(rq); 622106a9368SChris Wilson i915_request_add(rq); 623106a9368SChris Wilson } 624106a9368SChris Wilson 625106a9368SChris Wilson if (count & 1) { 626106a9368SChris Wilson err = intel_engine_reset(engine, NULL); 627106a9368SChris Wilson if (err) { 628106a9368SChris Wilson GEM_TRACE_ERR("intel_engine_reset(%s) failed, err:%d\n", 629106a9368SChris Wilson engine->name, err); 630106a9368SChris Wilson GEM_TRACE_DUMP(); 631106a9368SChris Wilson i915_request_put(last); 632106a9368SChris Wilson break; 633106a9368SChris Wilson } 634106a9368SChris Wilson } else { 635106a9368SChris Wilson force_reset_timeout(engine); 636106a9368SChris Wilson err = intel_engine_reset(engine, NULL); 637106a9368SChris Wilson cancel_reset_timeout(engine); 638106a9368SChris Wilson if (err != -ETIMEDOUT) { 639106a9368SChris Wilson pr_err("intel_engine_reset(%s) did not fail, err:%d\n", 640106a9368SChris Wilson engine->name, err); 641106a9368SChris Wilson i915_request_put(last); 642106a9368SChris Wilson break; 643106a9368SChris Wilson } 644106a9368SChris Wilson } 645106a9368SChris Wilson 646106a9368SChris Wilson err = 0; 647106a9368SChris Wilson if (last) { 648106a9368SChris Wilson if (i915_request_wait(last, 0, HZ / 2) < 0) { 649106a9368SChris Wilson struct drm_printer p = 650106a9368SChris Wilson drm_info_printer(gt->i915->drm.dev); 651106a9368SChris Wilson 652106a9368SChris Wilson intel_engine_dump(engine, &p, 653106a9368SChris Wilson "%s(%s): failed to complete request\n", 654106a9368SChris Wilson __func__, 655106a9368SChris Wilson engine->name); 656106a9368SChris Wilson 657106a9368SChris Wilson GEM_TRACE("%s(%s): failed to complete request\n", 658106a9368SChris Wilson __func__, 659106a9368SChris Wilson engine->name); 660106a9368SChris Wilson GEM_TRACE_DUMP(); 661106a9368SChris Wilson 662106a9368SChris Wilson err = -EIO; 663106a9368SChris Wilson } 664106a9368SChris Wilson i915_request_put(last); 665106a9368SChris Wilson } 666106a9368SChris Wilson count++; 667106a9368SChris Wilson } while (err == 0 && time_before(jiffies, end_time)); 668106a9368SChris Wilson out: 669106a9368SChris Wilson pr_info("%s(%s): %d resets\n", __func__, engine->name, count); 670106a9368SChris Wilson skip: 6719030e39cSThomas Hellström clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); 672106a9368SChris Wilson st_engine_heartbeat_enable(engine); 673106a9368SChris Wilson intel_context_put(ce); 674106a9368SChris Wilson 675106a9368SChris Wilson if (igt_flush_test(gt->i915)) 676106a9368SChris Wilson err = -EIO; 677106a9368SChris Wilson if (err) 678106a9368SChris Wilson return err; 679106a9368SChris Wilson } 680106a9368SChris Wilson 681106a9368SChris Wilson return 0; 682106a9368SChris Wilson } 683106a9368SChris Wilson 684cb823ed9SChris Wilson static int __igt_reset_engine(struct intel_gt *gt, bool active) 685112ed2d3SChris Wilson { 686cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 687112ed2d3SChris Wilson struct intel_engine_cs *engine; 688112ed2d3SChris Wilson enum intel_engine_id id; 689112ed2d3SChris Wilson struct hang h; 690112ed2d3SChris Wilson int err = 0; 691112ed2d3SChris Wilson 692112ed2d3SChris Wilson /* Check that we can issue an engine reset on an idle engine (no-op) */ 693112ed2d3SChris Wilson 694260e6b71SChris Wilson if (!intel_has_reset_engine(gt)) 695112ed2d3SChris Wilson return 0; 696112ed2d3SChris Wilson 697112ed2d3SChris Wilson if (active) { 698cb823ed9SChris Wilson err = hang_init(&h, gt); 699112ed2d3SChris Wilson if (err) 700112ed2d3SChris Wilson return err; 701112ed2d3SChris Wilson } 702112ed2d3SChris Wilson 7035d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 704112ed2d3SChris Wilson unsigned int reset_count, reset_engine_count; 70580655d2aSChris Wilson unsigned long count; 706617e87c0SJohn Harrison bool using_guc = intel_engine_uses_guc(engine); 707112ed2d3SChris Wilson IGT_TIMEOUT(end_time); 708112ed2d3SChris Wilson 709617e87c0SJohn Harrison if (using_guc && !active) 710617e87c0SJohn Harrison continue; 711617e87c0SJohn Harrison 712112ed2d3SChris Wilson if (active && !intel_engine_can_store_dword(engine)) 713112ed2d3SChris Wilson continue; 714112ed2d3SChris Wilson 715112ed2d3SChris Wilson if (!wait_for_idle(engine)) { 716112ed2d3SChris Wilson pr_err("%s failed to idle before reset\n", 717112ed2d3SChris Wilson engine->name); 718112ed2d3SChris Wilson err = -EIO; 719112ed2d3SChris Wilson break; 720112ed2d3SChris Wilson } 721112ed2d3SChris Wilson 722cb823ed9SChris Wilson reset_count = i915_reset_count(global); 723cb823ed9SChris Wilson reset_engine_count = i915_reset_engine_count(global, engine); 724112ed2d3SChris Wilson 7251b90e4a4SChris Wilson st_engine_heartbeat_disable(engine); 7269030e39cSThomas Hellström GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, 7279030e39cSThomas Hellström >->reset.flags)); 72880655d2aSChris Wilson count = 0; 729112ed2d3SChris Wilson do { 730617e87c0SJohn Harrison struct i915_request *rq = NULL; 731617e87c0SJohn Harrison struct intel_selftest_saved_policy saved; 732617e87c0SJohn Harrison int err2; 733112ed2d3SChris Wilson 734617e87c0SJohn Harrison err = intel_selftest_modify_policy(engine, &saved, 735617e87c0SJohn Harrison SELFTEST_SCHEDULER_MODIFY_FAST_RESET); 736617e87c0SJohn Harrison if (err) { 737617e87c0SJohn Harrison pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); 738617e87c0SJohn Harrison break; 739617e87c0SJohn Harrison } 740617e87c0SJohn Harrison 741617e87c0SJohn Harrison if (active) { 742112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 743112ed2d3SChris Wilson if (IS_ERR(rq)) { 744112ed2d3SChris Wilson err = PTR_ERR(rq); 7453f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", 7463f5dff6cSJohn Harrison engine->name, err); 747617e87c0SJohn Harrison goto restore; 748112ed2d3SChris Wilson } 749112ed2d3SChris Wilson 750112ed2d3SChris Wilson i915_request_get(rq); 751112ed2d3SChris Wilson i915_request_add(rq); 752112ed2d3SChris Wilson 753112ed2d3SChris Wilson if (!wait_until_running(&h, rq)) { 754cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 755112ed2d3SChris Wilson 756112ed2d3SChris Wilson pr_err("%s: Failed to start request %llx, at %x\n", 757112ed2d3SChris Wilson __func__, rq->fence.seqno, hws_seqno(&h, rq)); 758112ed2d3SChris Wilson intel_engine_dump(engine, &p, 759112ed2d3SChris Wilson "%s\n", engine->name); 760112ed2d3SChris Wilson 761112ed2d3SChris Wilson i915_request_put(rq); 762112ed2d3SChris Wilson err = -EIO; 763617e87c0SJohn Harrison goto restore; 764617e87c0SJohn Harrison } 765112ed2d3SChris Wilson } 766112ed2d3SChris Wilson 767617e87c0SJohn Harrison if (!using_guc) { 768cb823ed9SChris Wilson err = intel_engine_reset(engine, NULL); 769112ed2d3SChris Wilson if (err) { 770cd7a214fSChris Wilson pr_err("intel_engine_reset(%s) failed, err:%d\n", 771cd7a214fSChris Wilson engine->name, err); 772617e87c0SJohn Harrison goto skip; 773112ed2d3SChris Wilson } 774617e87c0SJohn Harrison } 775617e87c0SJohn Harrison 776617e87c0SJohn Harrison if (rq) { 777617e87c0SJohn Harrison /* Ensure the reset happens and kills the engine */ 778617e87c0SJohn Harrison err = intel_selftest_wait_for_rq(rq); 779617e87c0SJohn Harrison if (err) 780617e87c0SJohn Harrison pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", 781617e87c0SJohn Harrison engine->name, rq->fence.context, 7823cb3e343SMatthew Brost rq->fence.seqno, rq->context->guc_id.id, err); 783617e87c0SJohn Harrison } 784617e87c0SJohn Harrison 785617e87c0SJohn Harrison skip: 786617e87c0SJohn Harrison if (rq) 787617e87c0SJohn Harrison i915_request_put(rq); 788112ed2d3SChris Wilson 789cb823ed9SChris Wilson if (i915_reset_count(global) != reset_count) { 790112ed2d3SChris Wilson pr_err("Full GPU reset recorded! (engine reset expected)\n"); 791112ed2d3SChris Wilson err = -EINVAL; 792617e87c0SJohn Harrison goto restore; 793112ed2d3SChris Wilson } 794112ed2d3SChris Wilson 795617e87c0SJohn Harrison /* GuC based resets are not logged per engine */ 796617e87c0SJohn Harrison if (!using_guc) { 797cb823ed9SChris Wilson if (i915_reset_engine_count(global, engine) != 798112ed2d3SChris Wilson ++reset_engine_count) { 799112ed2d3SChris Wilson pr_err("%s engine reset not recorded!\n", 800112ed2d3SChris Wilson engine->name); 801112ed2d3SChris Wilson err = -EINVAL; 802617e87c0SJohn Harrison goto restore; 803617e87c0SJohn Harrison } 804112ed2d3SChris Wilson } 80580655d2aSChris Wilson 80680655d2aSChris Wilson count++; 807617e87c0SJohn Harrison 808617e87c0SJohn Harrison restore: 809617e87c0SJohn Harrison err2 = intel_selftest_restore_policy(engine, &saved); 810617e87c0SJohn Harrison if (err2) 811617e87c0SJohn Harrison pr_err("[%s] Restore policy failed: %d!\n", engine->name, err); 812617e87c0SJohn Harrison if (err == 0) 813617e87c0SJohn Harrison err = err2; 814617e87c0SJohn Harrison if (err) 815617e87c0SJohn Harrison break; 816112ed2d3SChris Wilson } while (time_before(jiffies, end_time)); 8179030e39cSThomas Hellström clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); 8181b90e4a4SChris Wilson st_engine_heartbeat_enable(engine); 81980655d2aSChris Wilson pr_info("%s: Completed %lu %s resets\n", 82080655d2aSChris Wilson engine->name, count, active ? "active" : "idle"); 821112ed2d3SChris Wilson 822112ed2d3SChris Wilson if (err) 823112ed2d3SChris Wilson break; 824112ed2d3SChris Wilson 8257e805762SChris Wilson err = igt_flush_test(gt->i915); 8263f5dff6cSJohn Harrison if (err) { 8273f5dff6cSJohn Harrison pr_err("[%s] Flush failed: %d!\n", engine->name, err); 828112ed2d3SChris Wilson break; 829112ed2d3SChris Wilson } 8303f5dff6cSJohn Harrison } 831112ed2d3SChris Wilson 8323f5dff6cSJohn Harrison if (intel_gt_is_wedged(gt)) { 8333f5dff6cSJohn Harrison pr_err("GT is wedged!\n"); 834112ed2d3SChris Wilson err = -EIO; 8353f5dff6cSJohn Harrison } 836112ed2d3SChris Wilson 8377e805762SChris Wilson if (active) 838112ed2d3SChris Wilson hang_fini(&h); 839112ed2d3SChris Wilson 840112ed2d3SChris Wilson return err; 841112ed2d3SChris Wilson } 842112ed2d3SChris Wilson 843112ed2d3SChris Wilson static int igt_reset_idle_engine(void *arg) 844112ed2d3SChris Wilson { 845112ed2d3SChris Wilson return __igt_reset_engine(arg, false); 846112ed2d3SChris Wilson } 847112ed2d3SChris Wilson 848112ed2d3SChris Wilson static int igt_reset_active_engine(void *arg) 849112ed2d3SChris Wilson { 850112ed2d3SChris Wilson return __igt_reset_engine(arg, true); 851112ed2d3SChris Wilson } 852112ed2d3SChris Wilson 853112ed2d3SChris Wilson struct active_engine { 8546407cf53STvrtko Ursulin struct kthread_worker *worker; 8556407cf53STvrtko Ursulin struct kthread_work work; 856112ed2d3SChris Wilson struct intel_engine_cs *engine; 857112ed2d3SChris Wilson unsigned long resets; 858112ed2d3SChris Wilson unsigned int flags; 8596407cf53STvrtko Ursulin bool stop; 8606407cf53STvrtko Ursulin int result; 861112ed2d3SChris Wilson }; 862112ed2d3SChris Wilson 863112ed2d3SChris Wilson #define TEST_ACTIVE BIT(0) 864112ed2d3SChris Wilson #define TEST_OTHERS BIT(1) 865112ed2d3SChris Wilson #define TEST_SELF BIT(2) 866112ed2d3SChris Wilson #define TEST_PRIORITY BIT(3) 867112ed2d3SChris Wilson 868112ed2d3SChris Wilson static int active_request_put(struct i915_request *rq) 869112ed2d3SChris Wilson { 870112ed2d3SChris Wilson int err = 0; 871112ed2d3SChris Wilson 872112ed2d3SChris Wilson if (!rq) 873112ed2d3SChris Wilson return 0; 874112ed2d3SChris Wilson 8753a7b7266SJohn Harrison if (i915_request_wait(rq, 0, 10 * HZ) < 0) { 876112ed2d3SChris Wilson GEM_TRACE("%s timed out waiting for completion of fence %llx:%lld\n", 877112ed2d3SChris Wilson rq->engine->name, 878112ed2d3SChris Wilson rq->fence.context, 879112ed2d3SChris Wilson rq->fence.seqno); 880112ed2d3SChris Wilson GEM_TRACE_DUMP(); 881112ed2d3SChris Wilson 882cb823ed9SChris Wilson intel_gt_set_wedged(rq->engine->gt); 883112ed2d3SChris Wilson err = -EIO; 884112ed2d3SChris Wilson } 885112ed2d3SChris Wilson 886112ed2d3SChris Wilson i915_request_put(rq); 887112ed2d3SChris Wilson 888112ed2d3SChris Wilson return err; 889112ed2d3SChris Wilson } 890112ed2d3SChris Wilson 8916407cf53STvrtko Ursulin static void active_engine(struct kthread_work *work) 892112ed2d3SChris Wilson { 893112ed2d3SChris Wilson I915_RND_STATE(prng); 8946407cf53STvrtko Ursulin struct active_engine *arg = container_of(work, typeof(*arg), work); 895112ed2d3SChris Wilson struct intel_engine_cs *engine = arg->engine; 896112ed2d3SChris Wilson struct i915_request *rq[8] = {}; 897e6ba7648SChris Wilson struct intel_context *ce[ARRAY_SIZE(rq)]; 898e6ba7648SChris Wilson unsigned long count; 899112ed2d3SChris Wilson int err = 0; 900112ed2d3SChris Wilson 901e6ba7648SChris Wilson for (count = 0; count < ARRAY_SIZE(ce); count++) { 902e6ba7648SChris Wilson ce[count] = intel_context_create(engine); 903e6ba7648SChris Wilson if (IS_ERR(ce[count])) { 9046407cf53STvrtko Ursulin arg->result = PTR_ERR(ce[count]); 9056407cf53STvrtko Ursulin pr_err("[%s] Create context #%ld failed: %d!\n", 9066407cf53STvrtko Ursulin engine->name, count, arg->result); 907112ed2d3SChris Wilson while (--count) 908e6ba7648SChris Wilson intel_context_put(ce[count]); 9096407cf53STvrtko Ursulin return; 910112ed2d3SChris Wilson } 911112ed2d3SChris Wilson } 912112ed2d3SChris Wilson 913e6ba7648SChris Wilson count = 0; 9146407cf53STvrtko Ursulin while (!READ_ONCE(arg->stop)) { 915112ed2d3SChris Wilson unsigned int idx = count++ & (ARRAY_SIZE(rq) - 1); 916112ed2d3SChris Wilson struct i915_request *old = rq[idx]; 917112ed2d3SChris Wilson struct i915_request *new; 918112ed2d3SChris Wilson 919e6ba7648SChris Wilson new = intel_context_create_request(ce[idx]); 920112ed2d3SChris Wilson if (IS_ERR(new)) { 921112ed2d3SChris Wilson err = PTR_ERR(new); 9223f5dff6cSJohn Harrison pr_err("[%s] Create request #%d failed: %d!\n", engine->name, idx, err); 923112ed2d3SChris Wilson break; 924112ed2d3SChris Wilson } 925112ed2d3SChris Wilson 926112ed2d3SChris Wilson rq[idx] = i915_request_get(new); 927112ed2d3SChris Wilson i915_request_add(new); 928112ed2d3SChris Wilson 9293f623e06SMatthew Brost if (engine->sched_engine->schedule && arg->flags & TEST_PRIORITY) { 930e6ba7648SChris Wilson struct i915_sched_attr attr = { 931e6ba7648SChris Wilson .priority = 932e6ba7648SChris Wilson i915_prandom_u32_max_state(512, &prng), 933e6ba7648SChris Wilson }; 9343f623e06SMatthew Brost engine->sched_engine->schedule(rq[idx], &attr); 935e6ba7648SChris Wilson } 936e6ba7648SChris Wilson 937112ed2d3SChris Wilson err = active_request_put(old); 9383f5dff6cSJohn Harrison if (err) { 9393f5dff6cSJohn Harrison pr_err("[%s] Request put failed: %d!\n", engine->name, err); 940112ed2d3SChris Wilson break; 9413f5dff6cSJohn Harrison } 942112ed2d3SChris Wilson 943112ed2d3SChris Wilson cond_resched(); 944112ed2d3SChris Wilson } 945112ed2d3SChris Wilson 946112ed2d3SChris Wilson for (count = 0; count < ARRAY_SIZE(rq); count++) { 947112ed2d3SChris Wilson int err__ = active_request_put(rq[count]); 948112ed2d3SChris Wilson 9493f5dff6cSJohn Harrison if (err) 9503f5dff6cSJohn Harrison pr_err("[%s] Request put #%ld failed: %d!\n", engine->name, count, err); 9513f5dff6cSJohn Harrison 952112ed2d3SChris Wilson /* Keep the first error */ 953112ed2d3SChris Wilson if (!err) 954112ed2d3SChris Wilson err = err__; 955e6ba7648SChris Wilson 956e6ba7648SChris Wilson intel_context_put(ce[count]); 957112ed2d3SChris Wilson } 958112ed2d3SChris Wilson 9596407cf53STvrtko Ursulin arg->result = err; 960112ed2d3SChris Wilson } 961112ed2d3SChris Wilson 962cb823ed9SChris Wilson static int __igt_reset_engines(struct intel_gt *gt, 963112ed2d3SChris Wilson const char *test_name, 964112ed2d3SChris Wilson unsigned int flags) 965112ed2d3SChris Wilson { 966cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 967112ed2d3SChris Wilson struct intel_engine_cs *engine, *other; 9686cd96877SJohn Harrison struct active_engine *threads; 969112ed2d3SChris Wilson enum intel_engine_id id, tmp; 970112ed2d3SChris Wilson struct hang h; 971112ed2d3SChris Wilson int err = 0; 972112ed2d3SChris Wilson 973112ed2d3SChris Wilson /* Check that issuing a reset on one engine does not interfere 974112ed2d3SChris Wilson * with any other engine. 975112ed2d3SChris Wilson */ 976112ed2d3SChris Wilson 977260e6b71SChris Wilson if (!intel_has_reset_engine(gt)) 978112ed2d3SChris Wilson return 0; 979112ed2d3SChris Wilson 980112ed2d3SChris Wilson if (flags & TEST_ACTIVE) { 981cb823ed9SChris Wilson err = hang_init(&h, gt); 982112ed2d3SChris Wilson if (err) 983112ed2d3SChris Wilson return err; 984112ed2d3SChris Wilson 985112ed2d3SChris Wilson if (flags & TEST_PRIORITY) 986112ed2d3SChris Wilson h.ctx->sched.priority = 1024; 987112ed2d3SChris Wilson } 988112ed2d3SChris Wilson 9896cd96877SJohn Harrison threads = kmalloc_array(I915_NUM_ENGINES, sizeof(*threads), GFP_KERNEL); 9906cd96877SJohn Harrison if (!threads) 9916cd96877SJohn Harrison return -ENOMEM; 9926cd96877SJohn Harrison 9935d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 994cb823ed9SChris Wilson unsigned long device = i915_reset_count(global); 995112ed2d3SChris Wilson unsigned long count = 0, reported; 996617e87c0SJohn Harrison bool using_guc = intel_engine_uses_guc(engine); 997112ed2d3SChris Wilson IGT_TIMEOUT(end_time); 998112ed2d3SChris Wilson 999617e87c0SJohn Harrison if (flags & TEST_ACTIVE) { 1000617e87c0SJohn Harrison if (!intel_engine_can_store_dword(engine)) 1001617e87c0SJohn Harrison continue; 1002617e87c0SJohn Harrison } else if (using_guc) 1003112ed2d3SChris Wilson continue; 1004112ed2d3SChris Wilson 1005112ed2d3SChris Wilson if (!wait_for_idle(engine)) { 1006112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s): failed to idle before reset\n", 1007112ed2d3SChris Wilson engine->name, test_name); 1008112ed2d3SChris Wilson err = -EIO; 1009112ed2d3SChris Wilson break; 1010112ed2d3SChris Wilson } 1011112ed2d3SChris Wilson 10126cd96877SJohn Harrison memset(threads, 0, sizeof(*threads) * I915_NUM_ENGINES); 10135d904e3cSTvrtko Ursulin for_each_engine(other, gt, tmp) { 10146407cf53STvrtko Ursulin struct kthread_worker *worker; 1015112ed2d3SChris Wilson 1016112ed2d3SChris Wilson threads[tmp].resets = 1017cb823ed9SChris Wilson i915_reset_engine_count(global, other); 1018112ed2d3SChris Wilson 1019174b976dSChris Wilson if (other == engine && !(flags & TEST_SELF)) 1020112ed2d3SChris Wilson continue; 1021112ed2d3SChris Wilson 1022174b976dSChris Wilson if (other != engine && !(flags & TEST_OTHERS)) 1023112ed2d3SChris Wilson continue; 1024112ed2d3SChris Wilson 1025112ed2d3SChris Wilson threads[tmp].engine = other; 1026112ed2d3SChris Wilson threads[tmp].flags = flags; 1027112ed2d3SChris Wilson 10286407cf53STvrtko Ursulin worker = kthread_create_worker(0, "igt/%s", 10296407cf53STvrtko Ursulin other->name); 10306407cf53STvrtko Ursulin if (IS_ERR(worker)) { 10316407cf53STvrtko Ursulin err = PTR_ERR(worker); 10326407cf53STvrtko Ursulin pr_err("[%s] Worker create failed: %d!\n", 10336407cf53STvrtko Ursulin engine->name, err); 1034112ed2d3SChris Wilson goto unwind; 1035112ed2d3SChris Wilson } 1036112ed2d3SChris Wilson 10376407cf53STvrtko Ursulin threads[tmp].worker = worker; 1038112ed2d3SChris Wilson 10396407cf53STvrtko Ursulin kthread_init_work(&threads[tmp].work, active_engine); 10406407cf53STvrtko Ursulin kthread_queue_work(threads[tmp].worker, 10416407cf53STvrtko Ursulin &threads[tmp].work); 10426407cf53STvrtko Ursulin } 1043e5661c6aSChris Wilson 1044617e87c0SJohn Harrison st_engine_heartbeat_disable_no_pm(engine); 10459030e39cSThomas Hellström GEM_BUG_ON(test_and_set_bit(I915_RESET_ENGINE + id, 10469030e39cSThomas Hellström >->reset.flags)); 1047112ed2d3SChris Wilson do { 1048112ed2d3SChris Wilson struct i915_request *rq = NULL; 1049617e87c0SJohn Harrison struct intel_selftest_saved_policy saved; 1050617e87c0SJohn Harrison int err2; 1051617e87c0SJohn Harrison 1052617e87c0SJohn Harrison err = intel_selftest_modify_policy(engine, &saved, 1053617e87c0SJohn Harrison SELFTEST_SCHEDULER_MODIFY_FAST_RESET); 1054617e87c0SJohn Harrison if (err) { 1055617e87c0SJohn Harrison pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); 1056617e87c0SJohn Harrison break; 1057617e87c0SJohn Harrison } 1058112ed2d3SChris Wilson 1059112ed2d3SChris Wilson if (flags & TEST_ACTIVE) { 1060112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 1061112ed2d3SChris Wilson if (IS_ERR(rq)) { 1062112ed2d3SChris Wilson err = PTR_ERR(rq); 10633f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", 10643f5dff6cSJohn Harrison engine->name, err); 1065617e87c0SJohn Harrison goto restore; 1066112ed2d3SChris Wilson } 1067112ed2d3SChris Wilson 1068112ed2d3SChris Wilson i915_request_get(rq); 1069112ed2d3SChris Wilson i915_request_add(rq); 1070112ed2d3SChris Wilson 1071112ed2d3SChris Wilson if (!wait_until_running(&h, rq)) { 1072cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1073112ed2d3SChris Wilson 1074112ed2d3SChris Wilson pr_err("%s: Failed to start request %llx, at %x\n", 1075112ed2d3SChris Wilson __func__, rq->fence.seqno, hws_seqno(&h, rq)); 1076112ed2d3SChris Wilson intel_engine_dump(engine, &p, 1077112ed2d3SChris Wilson "%s\n", engine->name); 1078112ed2d3SChris Wilson 1079112ed2d3SChris Wilson i915_request_put(rq); 1080112ed2d3SChris Wilson err = -EIO; 1081617e87c0SJohn Harrison goto restore; 1082112ed2d3SChris Wilson } 1083617e87c0SJohn Harrison } else { 1084617e87c0SJohn Harrison intel_engine_pm_get(engine); 1085112ed2d3SChris Wilson } 1086112ed2d3SChris Wilson 1087617e87c0SJohn Harrison if (!using_guc) { 1088cb823ed9SChris Wilson err = intel_engine_reset(engine, NULL); 1089112ed2d3SChris Wilson if (err) { 1090112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s): failed, err=%d\n", 1091112ed2d3SChris Wilson engine->name, test_name, err); 1092617e87c0SJohn Harrison goto restore; 1093617e87c0SJohn Harrison } 1094617e87c0SJohn Harrison } 1095617e87c0SJohn Harrison 1096617e87c0SJohn Harrison if (rq) { 1097617e87c0SJohn Harrison /* Ensure the reset happens and kills the engine */ 1098617e87c0SJohn Harrison err = intel_selftest_wait_for_rq(rq); 1099617e87c0SJohn Harrison if (err) 1100617e87c0SJohn Harrison pr_err("[%s] Wait for request %lld:%lld [0x%04X] failed: %d!\n", 1101617e87c0SJohn Harrison engine->name, rq->fence.context, 11023cb3e343SMatthew Brost rq->fence.seqno, rq->context->guc_id.id, err); 1103112ed2d3SChris Wilson } 1104112ed2d3SChris Wilson 1105112ed2d3SChris Wilson count++; 1106112ed2d3SChris Wilson 1107112ed2d3SChris Wilson if (rq) { 110894ed4753SChris Wilson if (rq->fence.error != -EIO) { 11093f5dff6cSJohn Harrison pr_err("i915_reset_engine(%s:%s): failed to reset request %lld:%lld [0x%04X]\n", 111094ed4753SChris Wilson engine->name, test_name, 111194ed4753SChris Wilson rq->fence.context, 11123cb3e343SMatthew Brost rq->fence.seqno, rq->context->guc_id.id); 111394ed4753SChris Wilson i915_request_put(rq); 111494ed4753SChris Wilson 111594ed4753SChris Wilson GEM_TRACE_DUMP(); 111694ed4753SChris Wilson intel_gt_set_wedged(gt); 111794ed4753SChris Wilson err = -EIO; 1118617e87c0SJohn Harrison goto restore; 111994ed4753SChris Wilson } 112094ed4753SChris Wilson 1121112ed2d3SChris Wilson if (i915_request_wait(rq, 0, HZ / 5) < 0) { 1122112ed2d3SChris Wilson struct drm_printer p = 1123cb823ed9SChris Wilson drm_info_printer(gt->i915->drm.dev); 1124112ed2d3SChris Wilson 1125112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s):" 112694ed4753SChris Wilson " failed to complete request %llx:%lld after reset\n", 112794ed4753SChris Wilson engine->name, test_name, 112894ed4753SChris Wilson rq->fence.context, 112994ed4753SChris Wilson rq->fence.seqno); 1130112ed2d3SChris Wilson intel_engine_dump(engine, &p, 1131112ed2d3SChris Wilson "%s\n", engine->name); 1132112ed2d3SChris Wilson i915_request_put(rq); 1133112ed2d3SChris Wilson 1134112ed2d3SChris Wilson GEM_TRACE_DUMP(); 1135cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1136112ed2d3SChris Wilson err = -EIO; 1137617e87c0SJohn Harrison goto restore; 1138112ed2d3SChris Wilson } 1139112ed2d3SChris Wilson 1140112ed2d3SChris Wilson i915_request_put(rq); 1141112ed2d3SChris Wilson } 1142112ed2d3SChris Wilson 1143617e87c0SJohn Harrison if (!(flags & TEST_ACTIVE)) 1144617e87c0SJohn Harrison intel_engine_pm_put(engine); 1145617e87c0SJohn Harrison 1146112ed2d3SChris Wilson if (!(flags & TEST_SELF) && !wait_for_idle(engine)) { 1147112ed2d3SChris Wilson struct drm_printer p = 1148cb823ed9SChris Wilson drm_info_printer(gt->i915->drm.dev); 1149112ed2d3SChris Wilson 1150112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s):" 1151112ed2d3SChris Wilson " failed to idle after reset\n", 1152112ed2d3SChris Wilson engine->name, test_name); 1153112ed2d3SChris Wilson intel_engine_dump(engine, &p, 1154112ed2d3SChris Wilson "%s\n", engine->name); 1155112ed2d3SChris Wilson 1156112ed2d3SChris Wilson err = -EIO; 1157617e87c0SJohn Harrison goto restore; 1158112ed2d3SChris Wilson } 1159617e87c0SJohn Harrison 1160617e87c0SJohn Harrison restore: 1161617e87c0SJohn Harrison err2 = intel_selftest_restore_policy(engine, &saved); 1162617e87c0SJohn Harrison if (err2) 1163617e87c0SJohn Harrison pr_err("[%s] Restore policy failed: %d!\n", engine->name, err2); 1164617e87c0SJohn Harrison if (err == 0) 1165617e87c0SJohn Harrison err = err2; 1166617e87c0SJohn Harrison if (err) 1167617e87c0SJohn Harrison break; 1168112ed2d3SChris Wilson } while (time_before(jiffies, end_time)); 11699030e39cSThomas Hellström clear_and_wake_up_bit(I915_RESET_ENGINE + id, >->reset.flags); 1170617e87c0SJohn Harrison st_engine_heartbeat_enable_no_pm(engine); 1171e6ba7648SChris Wilson 1172112ed2d3SChris Wilson pr_info("i915_reset_engine(%s:%s): %lu resets\n", 1173112ed2d3SChris Wilson engine->name, test_name, count); 1174112ed2d3SChris Wilson 1175617e87c0SJohn Harrison /* GuC based resets are not logged per engine */ 1176617e87c0SJohn Harrison if (!using_guc) { 1177cb823ed9SChris Wilson reported = i915_reset_engine_count(global, engine); 1178112ed2d3SChris Wilson reported -= threads[engine->id].resets; 1179112ed2d3SChris Wilson if (reported != count) { 1180112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n", 1181112ed2d3SChris Wilson engine->name, test_name, count, reported); 1182112ed2d3SChris Wilson if (!err) 1183112ed2d3SChris Wilson err = -EINVAL; 1184112ed2d3SChris Wilson } 1185617e87c0SJohn Harrison } 1186112ed2d3SChris Wilson 1187112ed2d3SChris Wilson unwind: 11885d904e3cSTvrtko Ursulin for_each_engine(other, gt, tmp) { 1189112ed2d3SChris Wilson int ret; 1190112ed2d3SChris Wilson 11916407cf53STvrtko Ursulin if (!threads[tmp].worker) 1192112ed2d3SChris Wilson continue; 1193112ed2d3SChris Wilson 11946407cf53STvrtko Ursulin WRITE_ONCE(threads[tmp].stop, true); 11956407cf53STvrtko Ursulin kthread_flush_work(&threads[tmp].work); 11966407cf53STvrtko Ursulin ret = READ_ONCE(threads[tmp].result); 1197112ed2d3SChris Wilson if (ret) { 1198112ed2d3SChris Wilson pr_err("kthread for other engine %s failed, err=%d\n", 1199112ed2d3SChris Wilson other->name, ret); 1200112ed2d3SChris Wilson if (!err) 1201112ed2d3SChris Wilson err = ret; 1202112ed2d3SChris Wilson } 12036407cf53STvrtko Ursulin 12046407cf53STvrtko Ursulin kthread_destroy_worker(threads[tmp].worker); 1205112ed2d3SChris Wilson 1206617e87c0SJohn Harrison /* GuC based resets are not logged per engine */ 1207617e87c0SJohn Harrison if (!using_guc) { 1208cb823ed9SChris Wilson if (other->uabi_class != engine->uabi_class && 1209112ed2d3SChris Wilson threads[tmp].resets != 1210cb823ed9SChris Wilson i915_reset_engine_count(global, other)) { 1211112ed2d3SChris Wilson pr_err("Innocent engine %s was reset (count=%ld)\n", 1212112ed2d3SChris Wilson other->name, 1213cb823ed9SChris Wilson i915_reset_engine_count(global, other) - 1214112ed2d3SChris Wilson threads[tmp].resets); 1215112ed2d3SChris Wilson if (!err) 1216112ed2d3SChris Wilson err = -EINVAL; 1217112ed2d3SChris Wilson } 1218112ed2d3SChris Wilson } 1219617e87c0SJohn Harrison } 1220112ed2d3SChris Wilson 1221cb823ed9SChris Wilson if (device != i915_reset_count(global)) { 1222112ed2d3SChris Wilson pr_err("Global reset (count=%ld)!\n", 1223cb823ed9SChris Wilson i915_reset_count(global) - device); 1224112ed2d3SChris Wilson if (!err) 1225112ed2d3SChris Wilson err = -EINVAL; 1226112ed2d3SChris Wilson } 1227112ed2d3SChris Wilson 1228112ed2d3SChris Wilson if (err) 1229112ed2d3SChris Wilson break; 1230112ed2d3SChris Wilson 12317e805762SChris Wilson err = igt_flush_test(gt->i915); 12323f5dff6cSJohn Harrison if (err) { 12333f5dff6cSJohn Harrison pr_err("[%s] Flush failed: %d!\n", engine->name, err); 1234112ed2d3SChris Wilson break; 1235112ed2d3SChris Wilson } 12363f5dff6cSJohn Harrison } 12376cd96877SJohn Harrison kfree(threads); 1238112ed2d3SChris Wilson 1239cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 1240112ed2d3SChris Wilson err = -EIO; 1241112ed2d3SChris Wilson 12427e805762SChris Wilson if (flags & TEST_ACTIVE) 1243112ed2d3SChris Wilson hang_fini(&h); 1244112ed2d3SChris Wilson 1245112ed2d3SChris Wilson return err; 1246112ed2d3SChris Wilson } 1247112ed2d3SChris Wilson 1248112ed2d3SChris Wilson static int igt_reset_engines(void *arg) 1249112ed2d3SChris Wilson { 1250112ed2d3SChris Wilson static const struct { 1251112ed2d3SChris Wilson const char *name; 1252112ed2d3SChris Wilson unsigned int flags; 1253112ed2d3SChris Wilson } phases[] = { 1254112ed2d3SChris Wilson { "idle", 0 }, 1255112ed2d3SChris Wilson { "active", TEST_ACTIVE }, 1256112ed2d3SChris Wilson { "others-idle", TEST_OTHERS }, 1257112ed2d3SChris Wilson { "others-active", TEST_OTHERS | TEST_ACTIVE }, 1258112ed2d3SChris Wilson { 1259112ed2d3SChris Wilson "others-priority", 1260112ed2d3SChris Wilson TEST_OTHERS | TEST_ACTIVE | TEST_PRIORITY 1261112ed2d3SChris Wilson }, 1262112ed2d3SChris Wilson { 1263112ed2d3SChris Wilson "self-priority", 1264174b976dSChris Wilson TEST_ACTIVE | TEST_PRIORITY | TEST_SELF, 1265112ed2d3SChris Wilson }, 1266112ed2d3SChris Wilson { } 1267112ed2d3SChris Wilson }; 1268cb823ed9SChris Wilson struct intel_gt *gt = arg; 1269112ed2d3SChris Wilson typeof(*phases) *p; 1270112ed2d3SChris Wilson int err; 1271112ed2d3SChris Wilson 1272112ed2d3SChris Wilson for (p = phases; p->name; p++) { 1273112ed2d3SChris Wilson if (p->flags & TEST_PRIORITY) { 1274cb823ed9SChris Wilson if (!(gt->i915->caps.scheduler & I915_SCHEDULER_CAP_PRIORITY)) 1275112ed2d3SChris Wilson continue; 1276112ed2d3SChris Wilson } 1277112ed2d3SChris Wilson 1278112ed2d3SChris Wilson err = __igt_reset_engines(arg, p->name, p->flags); 1279112ed2d3SChris Wilson if (err) 1280112ed2d3SChris Wilson return err; 1281112ed2d3SChris Wilson } 1282112ed2d3SChris Wilson 1283112ed2d3SChris Wilson return 0; 1284112ed2d3SChris Wilson } 1285112ed2d3SChris Wilson 1286cb823ed9SChris Wilson static u32 fake_hangcheck(struct intel_gt *gt, intel_engine_mask_t mask) 1287112ed2d3SChris Wilson { 1288cb823ed9SChris Wilson u32 count = i915_reset_count(>->i915->gpu_error); 1289112ed2d3SChris Wilson 1290cb823ed9SChris Wilson intel_gt_reset(gt, mask, NULL); 1291112ed2d3SChris Wilson 1292112ed2d3SChris Wilson return count; 1293112ed2d3SChris Wilson } 1294112ed2d3SChris Wilson 1295112ed2d3SChris Wilson static int igt_reset_wait(void *arg) 1296112ed2d3SChris Wilson { 1297cb823ed9SChris Wilson struct intel_gt *gt = arg; 1298cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 1299a96d8f05SJohn Harrison struct intel_engine_cs *engine; 1300112ed2d3SChris Wilson struct i915_request *rq; 1301112ed2d3SChris Wilson unsigned int reset_count; 1302112ed2d3SChris Wilson struct hang h; 1303112ed2d3SChris Wilson long timeout; 1304112ed2d3SChris Wilson int err; 1305112ed2d3SChris Wilson 1306a96d8f05SJohn Harrison engine = intel_selftest_find_any_engine(gt); 1307a96d8f05SJohn Harrison 1308cb823ed9SChris Wilson if (!engine || !intel_engine_can_store_dword(engine)) 1309112ed2d3SChris Wilson return 0; 1310112ed2d3SChris Wilson 1311112ed2d3SChris Wilson /* Check that we detect a stuck waiter and issue a reset */ 1312112ed2d3SChris Wilson 1313cb823ed9SChris Wilson igt_global_reset_lock(gt); 1314112ed2d3SChris Wilson 1315cb823ed9SChris Wilson err = hang_init(&h, gt); 13163f5dff6cSJohn Harrison if (err) { 13173f5dff6cSJohn Harrison pr_err("[%s] Hang init failed: %d!\n", engine->name, err); 1318112ed2d3SChris Wilson goto unlock; 13193f5dff6cSJohn Harrison } 1320112ed2d3SChris Wilson 1321cb823ed9SChris Wilson rq = hang_create_request(&h, engine); 1322112ed2d3SChris Wilson if (IS_ERR(rq)) { 1323112ed2d3SChris Wilson err = PTR_ERR(rq); 13243f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); 1325112ed2d3SChris Wilson goto fini; 1326112ed2d3SChris Wilson } 1327112ed2d3SChris Wilson 1328112ed2d3SChris Wilson i915_request_get(rq); 1329112ed2d3SChris Wilson i915_request_add(rq); 1330112ed2d3SChris Wilson 1331112ed2d3SChris Wilson if (!wait_until_running(&h, rq)) { 1332cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1333112ed2d3SChris Wilson 1334112ed2d3SChris Wilson pr_err("%s: Failed to start request %llx, at %x\n", 1335112ed2d3SChris Wilson __func__, rq->fence.seqno, hws_seqno(&h, rq)); 1336112ed2d3SChris Wilson intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); 1337112ed2d3SChris Wilson 1338cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1339112ed2d3SChris Wilson 1340112ed2d3SChris Wilson err = -EIO; 1341112ed2d3SChris Wilson goto out_rq; 1342112ed2d3SChris Wilson } 1343112ed2d3SChris Wilson 1344cb823ed9SChris Wilson reset_count = fake_hangcheck(gt, ALL_ENGINES); 1345112ed2d3SChris Wilson 13462f530945SChris Wilson timeout = i915_request_wait(rq, 0, 10); 1347112ed2d3SChris Wilson if (timeout < 0) { 1348112ed2d3SChris Wilson pr_err("i915_request_wait failed on a stuck request: err=%ld\n", 1349112ed2d3SChris Wilson timeout); 1350112ed2d3SChris Wilson err = timeout; 1351112ed2d3SChris Wilson goto out_rq; 1352112ed2d3SChris Wilson } 1353112ed2d3SChris Wilson 1354cb823ed9SChris Wilson if (i915_reset_count(global) == reset_count) { 1355112ed2d3SChris Wilson pr_err("No GPU reset recorded!\n"); 1356112ed2d3SChris Wilson err = -EINVAL; 1357112ed2d3SChris Wilson goto out_rq; 1358112ed2d3SChris Wilson } 1359112ed2d3SChris Wilson 1360112ed2d3SChris Wilson out_rq: 1361112ed2d3SChris Wilson i915_request_put(rq); 1362112ed2d3SChris Wilson fini: 1363112ed2d3SChris Wilson hang_fini(&h); 1364112ed2d3SChris Wilson unlock: 1365cb823ed9SChris Wilson igt_global_reset_unlock(gt); 1366112ed2d3SChris Wilson 1367cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 1368112ed2d3SChris Wilson return -EIO; 1369112ed2d3SChris Wilson 1370112ed2d3SChris Wilson return err; 1371112ed2d3SChris Wilson } 1372112ed2d3SChris Wilson 1373112ed2d3SChris Wilson struct evict_vma { 1374112ed2d3SChris Wilson struct completion completion; 1375112ed2d3SChris Wilson struct i915_vma *vma; 1376112ed2d3SChris Wilson }; 1377112ed2d3SChris Wilson 1378112ed2d3SChris Wilson static int evict_vma(void *data) 1379112ed2d3SChris Wilson { 1380112ed2d3SChris Wilson struct evict_vma *arg = data; 1381112ed2d3SChris Wilson struct i915_address_space *vm = arg->vma->vm; 1382112ed2d3SChris Wilson struct drm_mm_node evict = arg->vma->node; 1383112ed2d3SChris Wilson int err; 1384112ed2d3SChris Wilson 1385112ed2d3SChris Wilson complete(&arg->completion); 1386112ed2d3SChris Wilson 13872850748eSChris Wilson mutex_lock(&vm->mutex); 13887e00897bSMaarten Lankhorst err = i915_gem_evict_for_node(vm, NULL, &evict, 0); 13892850748eSChris Wilson mutex_unlock(&vm->mutex); 1390112ed2d3SChris Wilson 1391112ed2d3SChris Wilson return err; 1392112ed2d3SChris Wilson } 1393112ed2d3SChris Wilson 1394112ed2d3SChris Wilson static int evict_fence(void *data) 1395112ed2d3SChris Wilson { 1396112ed2d3SChris Wilson struct evict_vma *arg = data; 1397112ed2d3SChris Wilson int err; 1398112ed2d3SChris Wilson 1399112ed2d3SChris Wilson complete(&arg->completion); 1400112ed2d3SChris Wilson 1401112ed2d3SChris Wilson /* Mark the fence register as dirty to force the mmio update. */ 1402112ed2d3SChris Wilson err = i915_gem_object_set_tiling(arg->vma->obj, I915_TILING_Y, 512); 1403112ed2d3SChris Wilson if (err) { 1404112ed2d3SChris Wilson pr_err("Invalid Y-tiling settings; err:%d\n", err); 14052850748eSChris Wilson return err; 1406112ed2d3SChris Wilson } 1407112ed2d3SChris Wilson 1408e2ccc50aSChris Wilson err = i915_vma_pin(arg->vma, 0, 0, PIN_GLOBAL | PIN_MAPPABLE); 1409e2ccc50aSChris Wilson if (err) { 1410e2ccc50aSChris Wilson pr_err("Unable to pin vma for Y-tiled fence; err:%d\n", err); 14112850748eSChris Wilson return err; 1412e2ccc50aSChris Wilson } 1413e2ccc50aSChris Wilson 1414112ed2d3SChris Wilson err = i915_vma_pin_fence(arg->vma); 1415e2ccc50aSChris Wilson i915_vma_unpin(arg->vma); 1416112ed2d3SChris Wilson if (err) { 1417112ed2d3SChris Wilson pr_err("Unable to pin Y-tiled fence; err:%d\n", err); 14182850748eSChris Wilson return err; 1419112ed2d3SChris Wilson } 1420112ed2d3SChris Wilson 1421112ed2d3SChris Wilson i915_vma_unpin_fence(arg->vma); 1422112ed2d3SChris Wilson 14232850748eSChris Wilson return 0; 1424112ed2d3SChris Wilson } 1425112ed2d3SChris Wilson 1426cb823ed9SChris Wilson static int __igt_reset_evict_vma(struct intel_gt *gt, 1427112ed2d3SChris Wilson struct i915_address_space *vm, 1428112ed2d3SChris Wilson int (*fn)(void *), 1429112ed2d3SChris Wilson unsigned int flags) 1430112ed2d3SChris Wilson { 1431a96d8f05SJohn Harrison struct intel_engine_cs *engine; 1432112ed2d3SChris Wilson struct drm_i915_gem_object *obj; 1433112ed2d3SChris Wilson struct task_struct *tsk = NULL; 1434112ed2d3SChris Wilson struct i915_request *rq; 1435112ed2d3SChris Wilson struct evict_vma arg; 1436112ed2d3SChris Wilson struct hang h; 1437e60f7bb7SMatthew Auld unsigned int pin_flags; 1438112ed2d3SChris Wilson int err; 1439112ed2d3SChris Wilson 1440e60f7bb7SMatthew Auld if (!gt->ggtt->num_fences && flags & EXEC_OBJECT_NEEDS_FENCE) 1441e60f7bb7SMatthew Auld return 0; 1442e60f7bb7SMatthew Auld 1443a96d8f05SJohn Harrison engine = intel_selftest_find_any_engine(gt); 1444a96d8f05SJohn Harrison 1445cb823ed9SChris Wilson if (!engine || !intel_engine_can_store_dword(engine)) 1446112ed2d3SChris Wilson return 0; 1447112ed2d3SChris Wilson 1448112ed2d3SChris Wilson /* Check that we can recover an unbind stuck on a hanging request */ 1449112ed2d3SChris Wilson 1450cb823ed9SChris Wilson err = hang_init(&h, gt); 14513f5dff6cSJohn Harrison if (err) { 14523f5dff6cSJohn Harrison pr_err("[%s] Hang init failed: %d!\n", engine->name, err); 14537e805762SChris Wilson return err; 14543f5dff6cSJohn Harrison } 1455112ed2d3SChris Wilson 1456cb823ed9SChris Wilson obj = i915_gem_object_create_internal(gt->i915, SZ_1M); 1457112ed2d3SChris Wilson if (IS_ERR(obj)) { 1458112ed2d3SChris Wilson err = PTR_ERR(obj); 14593f5dff6cSJohn Harrison pr_err("[%s] Create object failed: %d!\n", engine->name, err); 1460112ed2d3SChris Wilson goto fini; 1461112ed2d3SChris Wilson } 1462112ed2d3SChris Wilson 1463112ed2d3SChris Wilson if (flags & EXEC_OBJECT_NEEDS_FENCE) { 1464112ed2d3SChris Wilson err = i915_gem_object_set_tiling(obj, I915_TILING_X, 512); 1465112ed2d3SChris Wilson if (err) { 1466112ed2d3SChris Wilson pr_err("Invalid X-tiling settings; err:%d\n", err); 1467112ed2d3SChris Wilson goto out_obj; 1468112ed2d3SChris Wilson } 1469112ed2d3SChris Wilson } 1470112ed2d3SChris Wilson 1471112ed2d3SChris Wilson arg.vma = i915_vma_instance(obj, vm, NULL); 1472112ed2d3SChris Wilson if (IS_ERR(arg.vma)) { 1473112ed2d3SChris Wilson err = PTR_ERR(arg.vma); 14743f5dff6cSJohn Harrison pr_err("[%s] VMA instance failed: %d!\n", engine->name, err); 1475112ed2d3SChris Wilson goto out_obj; 1476112ed2d3SChris Wilson } 1477112ed2d3SChris Wilson 1478cb823ed9SChris Wilson rq = hang_create_request(&h, engine); 1479112ed2d3SChris Wilson if (IS_ERR(rq)) { 1480112ed2d3SChris Wilson err = PTR_ERR(rq); 14813f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); 1482112ed2d3SChris Wilson goto out_obj; 1483112ed2d3SChris Wilson } 1484112ed2d3SChris Wilson 1485e60f7bb7SMatthew Auld pin_flags = i915_vma_is_ggtt(arg.vma) ? PIN_GLOBAL : PIN_USER; 1486e60f7bb7SMatthew Auld 1487e60f7bb7SMatthew Auld if (flags & EXEC_OBJECT_NEEDS_FENCE) 1488e60f7bb7SMatthew Auld pin_flags |= PIN_MAPPABLE; 1489e60f7bb7SMatthew Auld 1490e60f7bb7SMatthew Auld err = i915_vma_pin(arg.vma, 0, 0, pin_flags); 1491112ed2d3SChris Wilson if (err) { 1492112ed2d3SChris Wilson i915_request_add(rq); 14933f5dff6cSJohn Harrison pr_err("[%s] VMA pin failed: %d!\n", engine->name, err); 1494112ed2d3SChris Wilson goto out_obj; 1495112ed2d3SChris Wilson } 1496112ed2d3SChris Wilson 1497112ed2d3SChris Wilson if (flags & EXEC_OBJECT_NEEDS_FENCE) { 1498112ed2d3SChris Wilson err = i915_vma_pin_fence(arg.vma); 1499112ed2d3SChris Wilson if (err) { 1500112ed2d3SChris Wilson pr_err("Unable to pin X-tiled fence; err:%d\n", err); 1501112ed2d3SChris Wilson i915_vma_unpin(arg.vma); 1502112ed2d3SChris Wilson i915_request_add(rq); 1503112ed2d3SChris Wilson goto out_obj; 1504112ed2d3SChris Wilson } 1505112ed2d3SChris Wilson } 1506112ed2d3SChris Wilson 150756d7bd74SAndrzej Hajda err = igt_vma_move_to_active_unlocked(arg.vma, rq, flags); 15083f5dff6cSJohn Harrison if (err) 15093f5dff6cSJohn Harrison pr_err("[%s] Move to active failed: %d!\n", engine->name, err); 15103f5dff6cSJohn Harrison 1511112ed2d3SChris Wilson if (flags & EXEC_OBJECT_NEEDS_FENCE) 1512112ed2d3SChris Wilson i915_vma_unpin_fence(arg.vma); 1513112ed2d3SChris Wilson i915_vma_unpin(arg.vma); 1514112ed2d3SChris Wilson 1515112ed2d3SChris Wilson i915_request_get(rq); 1516112ed2d3SChris Wilson i915_request_add(rq); 1517112ed2d3SChris Wilson if (err) 1518112ed2d3SChris Wilson goto out_rq; 1519112ed2d3SChris Wilson 1520112ed2d3SChris Wilson if (!wait_until_running(&h, rq)) { 1521cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1522112ed2d3SChris Wilson 1523112ed2d3SChris Wilson pr_err("%s: Failed to start request %llx, at %x\n", 1524112ed2d3SChris Wilson __func__, rq->fence.seqno, hws_seqno(&h, rq)); 1525112ed2d3SChris Wilson intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); 1526112ed2d3SChris Wilson 1527cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1528112ed2d3SChris Wilson goto out_reset; 1529112ed2d3SChris Wilson } 1530112ed2d3SChris Wilson 1531112ed2d3SChris Wilson init_completion(&arg.completion); 1532112ed2d3SChris Wilson 1533112ed2d3SChris Wilson tsk = kthread_run(fn, &arg, "igt/evict_vma"); 1534112ed2d3SChris Wilson if (IS_ERR(tsk)) { 1535112ed2d3SChris Wilson err = PTR_ERR(tsk); 15363f5dff6cSJohn Harrison pr_err("[%s] Thread spawn failed: %d!\n", engine->name, err); 1537112ed2d3SChris Wilson tsk = NULL; 1538112ed2d3SChris Wilson goto out_reset; 1539112ed2d3SChris Wilson } 1540112ed2d3SChris Wilson get_task_struct(tsk); 1541112ed2d3SChris Wilson 1542112ed2d3SChris Wilson wait_for_completion(&arg.completion); 1543112ed2d3SChris Wilson 1544112ed2d3SChris Wilson if (wait_for(!list_empty(&rq->fence.cb_list), 10)) { 1545cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1546112ed2d3SChris Wilson 1547112ed2d3SChris Wilson pr_err("igt/evict_vma kthread did not wait\n"); 1548112ed2d3SChris Wilson intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); 1549112ed2d3SChris Wilson 1550cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1551112ed2d3SChris Wilson goto out_reset; 1552112ed2d3SChris Wilson } 1553112ed2d3SChris Wilson 1554112ed2d3SChris Wilson out_reset: 1555cb823ed9SChris Wilson igt_global_reset_lock(gt); 1556cb823ed9SChris Wilson fake_hangcheck(gt, rq->engine->mask); 1557cb823ed9SChris Wilson igt_global_reset_unlock(gt); 1558112ed2d3SChris Wilson 1559112ed2d3SChris Wilson if (tsk) { 1560cb823ed9SChris Wilson struct intel_wedge_me w; 1561112ed2d3SChris Wilson 1562112ed2d3SChris Wilson /* The reset, even indirectly, should take less than 10ms. */ 1563cb823ed9SChris Wilson intel_wedge_on_timeout(&w, gt, HZ / 10 /* 100ms */) 1564112ed2d3SChris Wilson err = kthread_stop(tsk); 1565112ed2d3SChris Wilson 1566112ed2d3SChris Wilson put_task_struct(tsk); 1567112ed2d3SChris Wilson } 1568112ed2d3SChris Wilson 1569112ed2d3SChris Wilson out_rq: 1570112ed2d3SChris Wilson i915_request_put(rq); 1571112ed2d3SChris Wilson out_obj: 1572112ed2d3SChris Wilson i915_gem_object_put(obj); 1573112ed2d3SChris Wilson fini: 1574112ed2d3SChris Wilson hang_fini(&h); 1575cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 1576112ed2d3SChris Wilson return -EIO; 1577112ed2d3SChris Wilson 1578112ed2d3SChris Wilson return err; 1579112ed2d3SChris Wilson } 1580112ed2d3SChris Wilson 1581112ed2d3SChris Wilson static int igt_reset_evict_ggtt(void *arg) 1582112ed2d3SChris Wilson { 1583cb823ed9SChris Wilson struct intel_gt *gt = arg; 1584112ed2d3SChris Wilson 1585cb823ed9SChris Wilson return __igt_reset_evict_vma(gt, >->ggtt->vm, 1586112ed2d3SChris Wilson evict_vma, EXEC_OBJECT_WRITE); 1587112ed2d3SChris Wilson } 1588112ed2d3SChris Wilson 1589112ed2d3SChris Wilson static int igt_reset_evict_ppgtt(void *arg) 1590112ed2d3SChris Wilson { 1591cb823ed9SChris Wilson struct intel_gt *gt = arg; 1592e6ba7648SChris Wilson struct i915_ppgtt *ppgtt; 1593112ed2d3SChris Wilson int err; 1594112ed2d3SChris Wilson 1595a4e7ccdaSChris Wilson /* aliasing == global gtt locking, covered above */ 1596e6ba7648SChris Wilson if (INTEL_PPGTT(gt->i915) < INTEL_PPGTT_FULL) 1597e6ba7648SChris Wilson return 0; 1598112ed2d3SChris Wilson 1599a259cc14SThomas Hellström ppgtt = i915_ppgtt_create(gt, 0); 1600e6ba7648SChris Wilson if (IS_ERR(ppgtt)) 1601e6ba7648SChris Wilson return PTR_ERR(ppgtt); 1602e6ba7648SChris Wilson 1603e6ba7648SChris Wilson err = __igt_reset_evict_vma(gt, &ppgtt->vm, 1604e6ba7648SChris Wilson evict_vma, EXEC_OBJECT_WRITE); 1605e6ba7648SChris Wilson i915_vm_put(&ppgtt->vm); 1606e6ba7648SChris Wilson 1607112ed2d3SChris Wilson return err; 1608112ed2d3SChris Wilson } 1609112ed2d3SChris Wilson 1610112ed2d3SChris Wilson static int igt_reset_evict_fence(void *arg) 1611112ed2d3SChris Wilson { 1612cb823ed9SChris Wilson struct intel_gt *gt = arg; 1613112ed2d3SChris Wilson 1614cb823ed9SChris Wilson return __igt_reset_evict_vma(gt, >->ggtt->vm, 1615112ed2d3SChris Wilson evict_fence, EXEC_OBJECT_NEEDS_FENCE); 1616112ed2d3SChris Wilson } 1617112ed2d3SChris Wilson 1618cb823ed9SChris Wilson static int wait_for_others(struct intel_gt *gt, 1619112ed2d3SChris Wilson struct intel_engine_cs *exclude) 1620112ed2d3SChris Wilson { 1621112ed2d3SChris Wilson struct intel_engine_cs *engine; 1622112ed2d3SChris Wilson enum intel_engine_id id; 1623112ed2d3SChris Wilson 16245d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 1625112ed2d3SChris Wilson if (engine == exclude) 1626112ed2d3SChris Wilson continue; 1627112ed2d3SChris Wilson 1628112ed2d3SChris Wilson if (!wait_for_idle(engine)) 1629112ed2d3SChris Wilson return -EIO; 1630112ed2d3SChris Wilson } 1631112ed2d3SChris Wilson 1632112ed2d3SChris Wilson return 0; 1633112ed2d3SChris Wilson } 1634112ed2d3SChris Wilson 1635112ed2d3SChris Wilson static int igt_reset_queue(void *arg) 1636112ed2d3SChris Wilson { 1637cb823ed9SChris Wilson struct intel_gt *gt = arg; 1638cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 1639112ed2d3SChris Wilson struct intel_engine_cs *engine; 1640112ed2d3SChris Wilson enum intel_engine_id id; 1641112ed2d3SChris Wilson struct hang h; 1642112ed2d3SChris Wilson int err; 1643112ed2d3SChris Wilson 1644112ed2d3SChris Wilson /* Check that we replay pending requests following a hang */ 1645112ed2d3SChris Wilson 1646cb823ed9SChris Wilson igt_global_reset_lock(gt); 1647112ed2d3SChris Wilson 1648cb823ed9SChris Wilson err = hang_init(&h, gt); 1649112ed2d3SChris Wilson if (err) 1650112ed2d3SChris Wilson goto unlock; 1651112ed2d3SChris Wilson 16525d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 1653617e87c0SJohn Harrison struct intel_selftest_saved_policy saved; 1654112ed2d3SChris Wilson struct i915_request *prev; 1655112ed2d3SChris Wilson IGT_TIMEOUT(end_time); 1656112ed2d3SChris Wilson unsigned int count; 1657617e87c0SJohn Harrison bool using_guc = intel_engine_uses_guc(engine); 1658112ed2d3SChris Wilson 1659112ed2d3SChris Wilson if (!intel_engine_can_store_dword(engine)) 1660112ed2d3SChris Wilson continue; 1661112ed2d3SChris Wilson 1662617e87c0SJohn Harrison if (using_guc) { 1663617e87c0SJohn Harrison err = intel_selftest_modify_policy(engine, &saved, 1664617e87c0SJohn Harrison SELFTEST_SCHEDULER_MODIFY_NO_HANGCHECK); 1665617e87c0SJohn Harrison if (err) { 1666617e87c0SJohn Harrison pr_err("[%s] Modify policy failed: %d!\n", engine->name, err); 1667617e87c0SJohn Harrison goto fini; 1668617e87c0SJohn Harrison } 1669617e87c0SJohn Harrison } 1670617e87c0SJohn Harrison 1671112ed2d3SChris Wilson prev = hang_create_request(&h, engine); 1672112ed2d3SChris Wilson if (IS_ERR(prev)) { 1673112ed2d3SChris Wilson err = PTR_ERR(prev); 16743f5dff6cSJohn Harrison pr_err("[%s] Create 'prev' hang request failed: %d!\n", engine->name, err); 1675617e87c0SJohn Harrison goto restore; 1676112ed2d3SChris Wilson } 1677112ed2d3SChris Wilson 1678112ed2d3SChris Wilson i915_request_get(prev); 1679112ed2d3SChris Wilson i915_request_add(prev); 1680112ed2d3SChris Wilson 1681112ed2d3SChris Wilson count = 0; 1682112ed2d3SChris Wilson do { 1683112ed2d3SChris Wilson struct i915_request *rq; 1684112ed2d3SChris Wilson unsigned int reset_count; 1685112ed2d3SChris Wilson 1686112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 1687112ed2d3SChris Wilson if (IS_ERR(rq)) { 1688112ed2d3SChris Wilson err = PTR_ERR(rq); 16893f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); 1690617e87c0SJohn Harrison goto restore; 1691112ed2d3SChris Wilson } 1692112ed2d3SChris Wilson 1693112ed2d3SChris Wilson i915_request_get(rq); 1694112ed2d3SChris Wilson i915_request_add(rq); 1695112ed2d3SChris Wilson 1696112ed2d3SChris Wilson /* 1697112ed2d3SChris Wilson * XXX We don't handle resetting the kernel context 1698112ed2d3SChris Wilson * very well. If we trigger a device reset twice in 1699112ed2d3SChris Wilson * quick succession while the kernel context is 1700112ed2d3SChris Wilson * executing, we may end up skipping the breadcrumb. 1701112ed2d3SChris Wilson * This is really only a problem for the selftest as 1702112ed2d3SChris Wilson * normally there is a large interlude between resets 1703112ed2d3SChris Wilson * (hangcheck), or we focus on resetting just one 1704112ed2d3SChris Wilson * engine and so avoid repeatedly resetting innocents. 1705112ed2d3SChris Wilson */ 1706cb823ed9SChris Wilson err = wait_for_others(gt, engine); 1707112ed2d3SChris Wilson if (err) { 1708112ed2d3SChris Wilson pr_err("%s(%s): Failed to idle other inactive engines after device reset\n", 1709112ed2d3SChris Wilson __func__, engine->name); 1710112ed2d3SChris Wilson i915_request_put(rq); 1711112ed2d3SChris Wilson i915_request_put(prev); 1712112ed2d3SChris Wilson 1713112ed2d3SChris Wilson GEM_TRACE_DUMP(); 1714cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1715617e87c0SJohn Harrison goto restore; 1716112ed2d3SChris Wilson } 1717112ed2d3SChris Wilson 1718112ed2d3SChris Wilson if (!wait_until_running(&h, prev)) { 1719cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1720112ed2d3SChris Wilson 1721112ed2d3SChris Wilson pr_err("%s(%s): Failed to start request %llx, at %x\n", 1722112ed2d3SChris Wilson __func__, engine->name, 1723112ed2d3SChris Wilson prev->fence.seqno, hws_seqno(&h, prev)); 1724112ed2d3SChris Wilson intel_engine_dump(engine, &p, 1725112ed2d3SChris Wilson "%s\n", engine->name); 1726112ed2d3SChris Wilson 1727112ed2d3SChris Wilson i915_request_put(rq); 1728112ed2d3SChris Wilson i915_request_put(prev); 1729112ed2d3SChris Wilson 1730cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1731112ed2d3SChris Wilson 1732112ed2d3SChris Wilson err = -EIO; 1733617e87c0SJohn Harrison goto restore; 1734112ed2d3SChris Wilson } 1735112ed2d3SChris Wilson 1736cb823ed9SChris Wilson reset_count = fake_hangcheck(gt, BIT(id)); 1737112ed2d3SChris Wilson 1738112ed2d3SChris Wilson if (prev->fence.error != -EIO) { 1739112ed2d3SChris Wilson pr_err("GPU reset not recorded on hanging request [fence.error=%d]!\n", 1740112ed2d3SChris Wilson prev->fence.error); 1741112ed2d3SChris Wilson i915_request_put(rq); 1742112ed2d3SChris Wilson i915_request_put(prev); 1743112ed2d3SChris Wilson err = -EINVAL; 1744617e87c0SJohn Harrison goto restore; 1745112ed2d3SChris Wilson } 1746112ed2d3SChris Wilson 1747112ed2d3SChris Wilson if (rq->fence.error) { 1748112ed2d3SChris Wilson pr_err("Fence error status not zero [%d] after unrelated reset\n", 1749112ed2d3SChris Wilson rq->fence.error); 1750112ed2d3SChris Wilson i915_request_put(rq); 1751112ed2d3SChris Wilson i915_request_put(prev); 1752112ed2d3SChris Wilson err = -EINVAL; 1753617e87c0SJohn Harrison goto restore; 1754112ed2d3SChris Wilson } 1755112ed2d3SChris Wilson 1756cb823ed9SChris Wilson if (i915_reset_count(global) == reset_count) { 1757112ed2d3SChris Wilson pr_err("No GPU reset recorded!\n"); 1758112ed2d3SChris Wilson i915_request_put(rq); 1759112ed2d3SChris Wilson i915_request_put(prev); 1760112ed2d3SChris Wilson err = -EINVAL; 1761617e87c0SJohn Harrison goto restore; 1762112ed2d3SChris Wilson } 1763112ed2d3SChris Wilson 1764112ed2d3SChris Wilson i915_request_put(prev); 1765112ed2d3SChris Wilson prev = rq; 1766112ed2d3SChris Wilson count++; 1767112ed2d3SChris Wilson } while (time_before(jiffies, end_time)); 176880655d2aSChris Wilson pr_info("%s: Completed %d queued resets\n", 176980655d2aSChris Wilson engine->name, count); 1770112ed2d3SChris Wilson 1771112ed2d3SChris Wilson *h.batch = MI_BATCH_BUFFER_END; 1772baea429dSTvrtko Ursulin intel_gt_chipset_flush(engine->gt); 1773112ed2d3SChris Wilson 1774112ed2d3SChris Wilson i915_request_put(prev); 1775112ed2d3SChris Wilson 1776617e87c0SJohn Harrison restore: 1777617e87c0SJohn Harrison if (using_guc) { 1778617e87c0SJohn Harrison int err2 = intel_selftest_restore_policy(engine, &saved); 1779617e87c0SJohn Harrison 1780617e87c0SJohn Harrison if (err2) 1781617e87c0SJohn Harrison pr_err("%s:%d> [%s] Restore policy failed: %d!\n", 1782617e87c0SJohn Harrison __func__, __LINE__, engine->name, err2); 1783617e87c0SJohn Harrison if (err == 0) 1784617e87c0SJohn Harrison err = err2; 1785617e87c0SJohn Harrison } 1786617e87c0SJohn Harrison if (err) 1787617e87c0SJohn Harrison goto fini; 1788617e87c0SJohn Harrison 17897e805762SChris Wilson err = igt_flush_test(gt->i915); 17903f5dff6cSJohn Harrison if (err) { 17913f5dff6cSJohn Harrison pr_err("[%s] Flush failed: %d!\n", engine->name, err); 1792112ed2d3SChris Wilson break; 1793112ed2d3SChris Wilson } 17943f5dff6cSJohn Harrison } 1795112ed2d3SChris Wilson 1796112ed2d3SChris Wilson fini: 1797112ed2d3SChris Wilson hang_fini(&h); 1798112ed2d3SChris Wilson unlock: 1799cb823ed9SChris Wilson igt_global_reset_unlock(gt); 1800112ed2d3SChris Wilson 1801cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 1802112ed2d3SChris Wilson return -EIO; 1803112ed2d3SChris Wilson 1804112ed2d3SChris Wilson return err; 1805112ed2d3SChris Wilson } 1806112ed2d3SChris Wilson 1807112ed2d3SChris Wilson static int igt_handle_error(void *arg) 1808112ed2d3SChris Wilson { 1809cb823ed9SChris Wilson struct intel_gt *gt = arg; 1810cb823ed9SChris Wilson struct i915_gpu_error *global = >->i915->gpu_error; 1811a96d8f05SJohn Harrison struct intel_engine_cs *engine; 1812112ed2d3SChris Wilson struct hang h; 1813112ed2d3SChris Wilson struct i915_request *rq; 1814742379c0SChris Wilson struct i915_gpu_coredump *error; 1815112ed2d3SChris Wilson int err; 1816112ed2d3SChris Wilson 1817a96d8f05SJohn Harrison engine = intel_selftest_find_any_engine(gt); 1818a96d8f05SJohn Harrison 1819112ed2d3SChris Wilson /* Check that we can issue a global GPU and engine reset */ 1820112ed2d3SChris Wilson 1821260e6b71SChris Wilson if (!intel_has_reset_engine(gt)) 1822112ed2d3SChris Wilson return 0; 1823112ed2d3SChris Wilson 1824112ed2d3SChris Wilson if (!engine || !intel_engine_can_store_dword(engine)) 1825112ed2d3SChris Wilson return 0; 1826112ed2d3SChris Wilson 1827cb823ed9SChris Wilson err = hang_init(&h, gt); 18283f5dff6cSJohn Harrison if (err) { 18293f5dff6cSJohn Harrison pr_err("[%s] Hang init failed: %d!\n", engine->name, err); 18307e805762SChris Wilson return err; 18313f5dff6cSJohn Harrison } 1832112ed2d3SChris Wilson 1833112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 1834112ed2d3SChris Wilson if (IS_ERR(rq)) { 1835112ed2d3SChris Wilson err = PTR_ERR(rq); 18363f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); 1837112ed2d3SChris Wilson goto err_fini; 1838112ed2d3SChris Wilson } 1839112ed2d3SChris Wilson 1840112ed2d3SChris Wilson i915_request_get(rq); 1841112ed2d3SChris Wilson i915_request_add(rq); 1842112ed2d3SChris Wilson 1843112ed2d3SChris Wilson if (!wait_until_running(&h, rq)) { 1844cb823ed9SChris Wilson struct drm_printer p = drm_info_printer(gt->i915->drm.dev); 1845112ed2d3SChris Wilson 1846112ed2d3SChris Wilson pr_err("%s: Failed to start request %llx, at %x\n", 1847112ed2d3SChris Wilson __func__, rq->fence.seqno, hws_seqno(&h, rq)); 1848112ed2d3SChris Wilson intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name); 1849112ed2d3SChris Wilson 1850cb823ed9SChris Wilson intel_gt_set_wedged(gt); 1851112ed2d3SChris Wilson 1852112ed2d3SChris Wilson err = -EIO; 1853112ed2d3SChris Wilson goto err_request; 1854112ed2d3SChris Wilson } 1855112ed2d3SChris Wilson 1856112ed2d3SChris Wilson /* Temporarily disable error capture */ 1857cb823ed9SChris Wilson error = xchg(&global->first_error, (void *)-1); 1858112ed2d3SChris Wilson 1859cb823ed9SChris Wilson intel_gt_handle_error(gt, engine->mask, 0, NULL); 1860112ed2d3SChris Wilson 1861cb823ed9SChris Wilson xchg(&global->first_error, error); 1862112ed2d3SChris Wilson 1863112ed2d3SChris Wilson if (rq->fence.error != -EIO) { 1864112ed2d3SChris Wilson pr_err("Guilty request not identified!\n"); 1865112ed2d3SChris Wilson err = -EINVAL; 1866112ed2d3SChris Wilson goto err_request; 1867112ed2d3SChris Wilson } 1868112ed2d3SChris Wilson 1869112ed2d3SChris Wilson err_request: 1870112ed2d3SChris Wilson i915_request_put(rq); 1871112ed2d3SChris Wilson err_fini: 1872112ed2d3SChris Wilson hang_fini(&h); 1873112ed2d3SChris Wilson return err; 1874112ed2d3SChris Wilson } 1875112ed2d3SChris Wilson 1876112ed2d3SChris Wilson static int __igt_atomic_reset_engine(struct intel_engine_cs *engine, 1877f6470c9bSMichal Wajdeczko const struct igt_atomic_section *p, 1878112ed2d3SChris Wilson const char *mode) 1879112ed2d3SChris Wilson { 188022916badSMatthew Brost struct tasklet_struct * const t = &engine->sched_engine->tasklet; 1881112ed2d3SChris Wilson int err; 1882112ed2d3SChris Wilson 1883112ed2d3SChris Wilson GEM_TRACE("i915_reset_engine(%s:%s) under %s\n", 1884112ed2d3SChris Wilson engine->name, mode, p->name); 1885112ed2d3SChris Wilson 188680655d2aSChris Wilson if (t->func) 188793100fdeSChris Wilson tasklet_disable(t); 188816f2941aSChris Wilson if (strcmp(p->name, "softirq")) 188916f2941aSChris Wilson local_bh_disable(); 1890112ed2d3SChris Wilson p->critical_section_begin(); 1891112ed2d3SChris Wilson 189216f2941aSChris Wilson err = __intel_engine_reset_bh(engine, NULL); 1893112ed2d3SChris Wilson 1894112ed2d3SChris Wilson p->critical_section_end(); 189516f2941aSChris Wilson if (strcmp(p->name, "softirq")) 189616f2941aSChris Wilson local_bh_enable(); 189780655d2aSChris Wilson if (t->func) { 1898112ed2d3SChris Wilson tasklet_enable(t); 189916f2941aSChris Wilson tasklet_hi_schedule(t); 190080655d2aSChris Wilson } 1901112ed2d3SChris Wilson 1902112ed2d3SChris Wilson if (err) 1903112ed2d3SChris Wilson pr_err("i915_reset_engine(%s:%s) failed under %s\n", 1904112ed2d3SChris Wilson engine->name, mode, p->name); 1905112ed2d3SChris Wilson 1906112ed2d3SChris Wilson return err; 1907112ed2d3SChris Wilson } 1908112ed2d3SChris Wilson 1909112ed2d3SChris Wilson static int igt_atomic_reset_engine(struct intel_engine_cs *engine, 1910f6470c9bSMichal Wajdeczko const struct igt_atomic_section *p) 1911112ed2d3SChris Wilson { 1912112ed2d3SChris Wilson struct i915_request *rq; 1913112ed2d3SChris Wilson struct hang h; 1914112ed2d3SChris Wilson int err; 1915112ed2d3SChris Wilson 1916112ed2d3SChris Wilson err = __igt_atomic_reset_engine(engine, p, "idle"); 1917112ed2d3SChris Wilson if (err) 1918112ed2d3SChris Wilson return err; 1919112ed2d3SChris Wilson 1920cb823ed9SChris Wilson err = hang_init(&h, engine->gt); 19213f5dff6cSJohn Harrison if (err) { 19223f5dff6cSJohn Harrison pr_err("[%s] Hang init failed: %d!\n", engine->name, err); 1923112ed2d3SChris Wilson return err; 19243f5dff6cSJohn Harrison } 1925112ed2d3SChris Wilson 1926112ed2d3SChris Wilson rq = hang_create_request(&h, engine); 1927112ed2d3SChris Wilson if (IS_ERR(rq)) { 1928112ed2d3SChris Wilson err = PTR_ERR(rq); 19293f5dff6cSJohn Harrison pr_err("[%s] Create hang request failed: %d!\n", engine->name, err); 1930112ed2d3SChris Wilson goto out; 1931112ed2d3SChris Wilson } 1932112ed2d3SChris Wilson 1933112ed2d3SChris Wilson i915_request_get(rq); 1934112ed2d3SChris Wilson i915_request_add(rq); 1935112ed2d3SChris Wilson 1936112ed2d3SChris Wilson if (wait_until_running(&h, rq)) { 1937112ed2d3SChris Wilson err = __igt_atomic_reset_engine(engine, p, "active"); 1938112ed2d3SChris Wilson } else { 1939112ed2d3SChris Wilson pr_err("%s(%s): Failed to start request %llx, at %x\n", 1940112ed2d3SChris Wilson __func__, engine->name, 1941112ed2d3SChris Wilson rq->fence.seqno, hws_seqno(&h, rq)); 1942cb823ed9SChris Wilson intel_gt_set_wedged(engine->gt); 1943112ed2d3SChris Wilson err = -EIO; 1944112ed2d3SChris Wilson } 1945112ed2d3SChris Wilson 1946112ed2d3SChris Wilson if (err == 0) { 1947cb823ed9SChris Wilson struct intel_wedge_me w; 1948112ed2d3SChris Wilson 1949cb823ed9SChris Wilson intel_wedge_on_timeout(&w, engine->gt, HZ / 20 /* 50ms */) 19502f530945SChris Wilson i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT); 1951cb823ed9SChris Wilson if (intel_gt_is_wedged(engine->gt)) 1952112ed2d3SChris Wilson err = -EIO; 1953112ed2d3SChris Wilson } 1954112ed2d3SChris Wilson 1955112ed2d3SChris Wilson i915_request_put(rq); 1956112ed2d3SChris Wilson out: 1957112ed2d3SChris Wilson hang_fini(&h); 1958112ed2d3SChris Wilson return err; 1959112ed2d3SChris Wilson } 1960112ed2d3SChris Wilson 1961f6470c9bSMichal Wajdeczko static int igt_reset_engines_atomic(void *arg) 1962112ed2d3SChris Wilson { 1963cb823ed9SChris Wilson struct intel_gt *gt = arg; 1964f6470c9bSMichal Wajdeczko const typeof(*igt_atomic_phases) *p; 1965112ed2d3SChris Wilson int err = 0; 1966112ed2d3SChris Wilson 1967f6470c9bSMichal Wajdeczko /* Check that the engines resets are usable from atomic context */ 1968f6470c9bSMichal Wajdeczko 1969260e6b71SChris Wilson if (!intel_has_reset_engine(gt)) 1970f6470c9bSMichal Wajdeczko return 0; 1971f6470c9bSMichal Wajdeczko 1972065273f7SDaniele Ceraolo Spurio if (intel_uc_uses_guc_submission(>->uc)) 1973f6470c9bSMichal Wajdeczko return 0; 1974112ed2d3SChris Wilson 1975cb823ed9SChris Wilson igt_global_reset_lock(gt); 1976112ed2d3SChris Wilson 1977112ed2d3SChris Wilson /* Flush any requests before we get started and check basics */ 1978cb823ed9SChris Wilson if (!igt_force_reset(gt)) 1979112ed2d3SChris Wilson goto unlock; 1980112ed2d3SChris Wilson 1981f6470c9bSMichal Wajdeczko for (p = igt_atomic_phases; p->name; p++) { 1982112ed2d3SChris Wilson struct intel_engine_cs *engine; 1983112ed2d3SChris Wilson enum intel_engine_id id; 1984112ed2d3SChris Wilson 19855d904e3cSTvrtko Ursulin for_each_engine(engine, gt, id) { 1986112ed2d3SChris Wilson err = igt_atomic_reset_engine(engine, p); 1987112ed2d3SChris Wilson if (err) 1988112ed2d3SChris Wilson goto out; 1989112ed2d3SChris Wilson } 1990112ed2d3SChris Wilson } 1991112ed2d3SChris Wilson 1992112ed2d3SChris Wilson out: 1993112ed2d3SChris Wilson /* As we poke around the guts, do a full reset before continuing. */ 1994cb823ed9SChris Wilson igt_force_reset(gt); 1995112ed2d3SChris Wilson unlock: 1996cb823ed9SChris Wilson igt_global_reset_unlock(gt); 1997112ed2d3SChris Wilson 1998112ed2d3SChris Wilson return err; 1999112ed2d3SChris Wilson } 2000112ed2d3SChris Wilson 2001112ed2d3SChris Wilson int intel_hangcheck_live_selftests(struct drm_i915_private *i915) 2002112ed2d3SChris Wilson { 2003112ed2d3SChris Wilson static const struct i915_subtest tests[] = { 2004112ed2d3SChris Wilson SUBTEST(igt_hang_sanitycheck), 2005112ed2d3SChris Wilson SUBTEST(igt_reset_nop), 2006112ed2d3SChris Wilson SUBTEST(igt_reset_nop_engine), 2007112ed2d3SChris Wilson SUBTEST(igt_reset_idle_engine), 2008112ed2d3SChris Wilson SUBTEST(igt_reset_active_engine), 2009106a9368SChris Wilson SUBTEST(igt_reset_fail_engine), 2010112ed2d3SChris Wilson SUBTEST(igt_reset_engines), 2011f6470c9bSMichal Wajdeczko SUBTEST(igt_reset_engines_atomic), 2012112ed2d3SChris Wilson SUBTEST(igt_reset_queue), 2013112ed2d3SChris Wilson SUBTEST(igt_reset_wait), 2014112ed2d3SChris Wilson SUBTEST(igt_reset_evict_ggtt), 2015112ed2d3SChris Wilson SUBTEST(igt_reset_evict_ppgtt), 2016112ed2d3SChris Wilson SUBTEST(igt_reset_evict_fence), 2017112ed2d3SChris Wilson SUBTEST(igt_handle_error), 2018112ed2d3SChris Wilson }; 2019c14adcbdSMichał Winiarski struct intel_gt *gt = to_gt(i915); 2020112ed2d3SChris Wilson intel_wakeref_t wakeref; 2021112ed2d3SChris Wilson int err; 2022112ed2d3SChris Wilson 2023260e6b71SChris Wilson if (!intel_has_gpu_reset(gt)) 2024112ed2d3SChris Wilson return 0; 2025112ed2d3SChris Wilson 2026cb823ed9SChris Wilson if (intel_gt_is_wedged(gt)) 2027112ed2d3SChris Wilson return -EIO; /* we're long past hope of a successful reset */ 2028112ed2d3SChris Wilson 2029cd6a8513SChris Wilson wakeref = intel_runtime_pm_get(gt->uncore->rpm); 2030112ed2d3SChris Wilson 2031cb823ed9SChris Wilson err = intel_gt_live_subtests(tests, gt); 2032112ed2d3SChris Wilson 2033cd6a8513SChris Wilson intel_runtime_pm_put(gt->uncore->rpm, wakeref); 2034112ed2d3SChris Wilson 2035112ed2d3SChris Wilson return err; 2036112ed2d3SChris Wilson } 2037