1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2008-2018 Intel Corporation 5 */ 6 7 #include <linux/sched/mm.h> 8 #include <linux/stop_machine.h> 9 10 #include "display/intel_display_types.h" 11 #include "display/intel_overlay.h" 12 13 #include "gem/i915_gem_context.h" 14 15 #include "i915_drv.h" 16 #include "i915_gpu_error.h" 17 #include "i915_irq.h" 18 #include "intel_engine_pm.h" 19 #include "intel_gt.h" 20 #include "intel_gt_pm.h" 21 #include "intel_reset.h" 22 23 #include "uc/intel_guc.h" 24 #include "uc/intel_guc_submission.h" 25 26 #define RESET_MAX_RETRIES 3 27 28 /* XXX How to handle concurrent GGTT updates using tiling registers? */ 29 #define RESET_UNDER_STOP_MACHINE 0 30 31 static void rmw_set_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 set) 32 { 33 intel_uncore_rmw_fw(uncore, reg, 0, set); 34 } 35 36 static void rmw_clear_fw(struct intel_uncore *uncore, i915_reg_t reg, u32 clr) 37 { 38 intel_uncore_rmw_fw(uncore, reg, clr, 0); 39 } 40 41 static void engine_skip_context(struct i915_request *rq) 42 { 43 struct intel_engine_cs *engine = rq->engine; 44 struct intel_context *hung_ctx = rq->context; 45 46 if (!i915_request_is_active(rq)) 47 return; 48 49 lockdep_assert_held(&engine->active.lock); 50 list_for_each_entry_continue(rq, &engine->active.requests, sched.link) 51 if (rq->context == hung_ctx) 52 i915_request_skip(rq, -EIO); 53 } 54 55 static void client_mark_guilty(struct i915_gem_context *ctx, bool banned) 56 { 57 struct drm_i915_file_private *file_priv = ctx->file_priv; 58 unsigned long prev_hang; 59 unsigned int score; 60 61 if (IS_ERR_OR_NULL(file_priv)) 62 return; 63 64 score = 0; 65 if (banned) 66 score = I915_CLIENT_SCORE_CONTEXT_BAN; 67 68 prev_hang = xchg(&file_priv->hang_timestamp, jiffies); 69 if (time_before(jiffies, prev_hang + I915_CLIENT_FAST_HANG_JIFFIES)) 70 score += I915_CLIENT_SCORE_HANG_FAST; 71 72 if (score) { 73 atomic_add(score, &file_priv->ban_score); 74 75 DRM_DEBUG_DRIVER("client %s: gained %u ban score, now %u\n", 76 ctx->name, score, 77 atomic_read(&file_priv->ban_score)); 78 } 79 } 80 81 static bool mark_guilty(struct i915_request *rq) 82 { 83 struct i915_gem_context *ctx; 84 unsigned long prev_hang; 85 bool banned; 86 int i; 87 88 rcu_read_lock(); 89 ctx = rcu_dereference(rq->context->gem_context); 90 if (ctx && !kref_get_unless_zero(&ctx->ref)) 91 ctx = NULL; 92 rcu_read_unlock(); 93 if (!ctx) 94 return false; 95 96 if (i915_gem_context_is_closed(ctx)) { 97 intel_context_set_banned(rq->context); 98 banned = true; 99 goto out; 100 } 101 102 atomic_inc(&ctx->guilty_count); 103 104 /* Cool contexts are too cool to be banned! (Used for reset testing.) */ 105 if (!i915_gem_context_is_bannable(ctx)) { 106 banned = false; 107 goto out; 108 } 109 110 dev_notice(ctx->i915->drm.dev, 111 "%s context reset due to GPU hang\n", 112 ctx->name); 113 114 /* Record the timestamp for the last N hangs */ 115 prev_hang = ctx->hang_timestamp[0]; 116 for (i = 0; i < ARRAY_SIZE(ctx->hang_timestamp) - 1; i++) 117 ctx->hang_timestamp[i] = ctx->hang_timestamp[i + 1]; 118 ctx->hang_timestamp[i] = jiffies; 119 120 /* If we have hung N+1 times in rapid succession, we ban the context! */ 121 banned = !i915_gem_context_is_recoverable(ctx); 122 if (time_before(jiffies, prev_hang + CONTEXT_FAST_HANG_JIFFIES)) 123 banned = true; 124 if (banned) { 125 DRM_DEBUG_DRIVER("context %s: guilty %d, banned\n", 126 ctx->name, atomic_read(&ctx->guilty_count)); 127 intel_context_set_banned(rq->context); 128 } 129 130 client_mark_guilty(ctx, banned); 131 132 out: 133 i915_gem_context_put(ctx); 134 return banned; 135 } 136 137 static void mark_innocent(struct i915_request *rq) 138 { 139 struct i915_gem_context *ctx; 140 141 rcu_read_lock(); 142 ctx = rcu_dereference(rq->context->gem_context); 143 if (ctx) 144 atomic_inc(&ctx->active_count); 145 rcu_read_unlock(); 146 } 147 148 void __i915_request_reset(struct i915_request *rq, bool guilty) 149 { 150 RQ_TRACE(rq, "guilty? %s\n", yesno(guilty)); 151 152 GEM_BUG_ON(i915_request_completed(rq)); 153 154 rcu_read_lock(); /* protect the GEM context */ 155 if (guilty) { 156 i915_request_skip(rq, -EIO); 157 if (mark_guilty(rq)) 158 engine_skip_context(rq); 159 } else { 160 dma_fence_set_error(&rq->fence, -EAGAIN); 161 mark_innocent(rq); 162 } 163 rcu_read_unlock(); 164 } 165 166 static bool i915_in_reset(struct pci_dev *pdev) 167 { 168 u8 gdrst; 169 170 pci_read_config_byte(pdev, I915_GDRST, &gdrst); 171 return gdrst & GRDOM_RESET_STATUS; 172 } 173 174 static int i915_do_reset(struct intel_gt *gt, 175 intel_engine_mask_t engine_mask, 176 unsigned int retry) 177 { 178 struct pci_dev *pdev = gt->i915->drm.pdev; 179 int err; 180 181 /* Assert reset for at least 20 usec, and wait for acknowledgement. */ 182 pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); 183 udelay(50); 184 err = wait_for_atomic(i915_in_reset(pdev), 50); 185 186 /* Clear the reset request. */ 187 pci_write_config_byte(pdev, I915_GDRST, 0); 188 udelay(50); 189 if (!err) 190 err = wait_for_atomic(!i915_in_reset(pdev), 50); 191 192 return err; 193 } 194 195 static bool g4x_reset_complete(struct pci_dev *pdev) 196 { 197 u8 gdrst; 198 199 pci_read_config_byte(pdev, I915_GDRST, &gdrst); 200 return (gdrst & GRDOM_RESET_ENABLE) == 0; 201 } 202 203 static int g33_do_reset(struct intel_gt *gt, 204 intel_engine_mask_t engine_mask, 205 unsigned int retry) 206 { 207 struct pci_dev *pdev = gt->i915->drm.pdev; 208 209 pci_write_config_byte(pdev, I915_GDRST, GRDOM_RESET_ENABLE); 210 return wait_for_atomic(g4x_reset_complete(pdev), 50); 211 } 212 213 static int g4x_do_reset(struct intel_gt *gt, 214 intel_engine_mask_t engine_mask, 215 unsigned int retry) 216 { 217 struct pci_dev *pdev = gt->i915->drm.pdev; 218 struct intel_uncore *uncore = gt->uncore; 219 int ret; 220 221 /* WaVcpClkGateDisableForMediaReset:ctg,elk */ 222 rmw_set_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE); 223 intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D); 224 225 pci_write_config_byte(pdev, I915_GDRST, 226 GRDOM_MEDIA | GRDOM_RESET_ENABLE); 227 ret = wait_for_atomic(g4x_reset_complete(pdev), 50); 228 if (ret) { 229 DRM_DEBUG_DRIVER("Wait for media reset failed\n"); 230 goto out; 231 } 232 233 pci_write_config_byte(pdev, I915_GDRST, 234 GRDOM_RENDER | GRDOM_RESET_ENABLE); 235 ret = wait_for_atomic(g4x_reset_complete(pdev), 50); 236 if (ret) { 237 DRM_DEBUG_DRIVER("Wait for render reset failed\n"); 238 goto out; 239 } 240 241 out: 242 pci_write_config_byte(pdev, I915_GDRST, 0); 243 244 rmw_clear_fw(uncore, VDECCLK_GATE_D, VCP_UNIT_CLOCK_GATE_DISABLE); 245 intel_uncore_posting_read_fw(uncore, VDECCLK_GATE_D); 246 247 return ret; 248 } 249 250 static int ilk_do_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask, 251 unsigned int retry) 252 { 253 struct intel_uncore *uncore = gt->uncore; 254 int ret; 255 256 intel_uncore_write_fw(uncore, ILK_GDSR, 257 ILK_GRDOM_RENDER | ILK_GRDOM_RESET_ENABLE); 258 ret = __intel_wait_for_register_fw(uncore, ILK_GDSR, 259 ILK_GRDOM_RESET_ENABLE, 0, 260 5000, 0, 261 NULL); 262 if (ret) { 263 DRM_DEBUG_DRIVER("Wait for render reset failed\n"); 264 goto out; 265 } 266 267 intel_uncore_write_fw(uncore, ILK_GDSR, 268 ILK_GRDOM_MEDIA | ILK_GRDOM_RESET_ENABLE); 269 ret = __intel_wait_for_register_fw(uncore, ILK_GDSR, 270 ILK_GRDOM_RESET_ENABLE, 0, 271 5000, 0, 272 NULL); 273 if (ret) { 274 DRM_DEBUG_DRIVER("Wait for media reset failed\n"); 275 goto out; 276 } 277 278 out: 279 intel_uncore_write_fw(uncore, ILK_GDSR, 0); 280 intel_uncore_posting_read_fw(uncore, ILK_GDSR); 281 return ret; 282 } 283 284 /* Reset the hardware domains (GENX_GRDOM_*) specified by mask */ 285 static int gen6_hw_domain_reset(struct intel_gt *gt, u32 hw_domain_mask) 286 { 287 struct intel_uncore *uncore = gt->uncore; 288 int err; 289 290 /* 291 * GEN6_GDRST is not in the gt power well, no need to check 292 * for fifo space for the write or forcewake the chip for 293 * the read 294 */ 295 intel_uncore_write_fw(uncore, GEN6_GDRST, hw_domain_mask); 296 297 /* Wait for the device to ack the reset requests */ 298 err = __intel_wait_for_register_fw(uncore, 299 GEN6_GDRST, hw_domain_mask, 0, 300 500, 0, 301 NULL); 302 if (err) 303 DRM_DEBUG_DRIVER("Wait for 0x%08x engines reset failed\n", 304 hw_domain_mask); 305 306 return err; 307 } 308 309 static int gen6_reset_engines(struct intel_gt *gt, 310 intel_engine_mask_t engine_mask, 311 unsigned int retry) 312 { 313 static const u32 hw_engine_mask[] = { 314 [RCS0] = GEN6_GRDOM_RENDER, 315 [BCS0] = GEN6_GRDOM_BLT, 316 [VCS0] = GEN6_GRDOM_MEDIA, 317 [VCS1] = GEN8_GRDOM_MEDIA2, 318 [VECS0] = GEN6_GRDOM_VECS, 319 }; 320 struct intel_engine_cs *engine; 321 u32 hw_mask; 322 323 if (engine_mask == ALL_ENGINES) { 324 hw_mask = GEN6_GRDOM_FULL; 325 } else { 326 intel_engine_mask_t tmp; 327 328 hw_mask = 0; 329 for_each_engine_masked(engine, gt, engine_mask, tmp) { 330 GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); 331 hw_mask |= hw_engine_mask[engine->id]; 332 } 333 } 334 335 return gen6_hw_domain_reset(gt, hw_mask); 336 } 337 338 static int gen11_lock_sfc(struct intel_engine_cs *engine, u32 *hw_mask) 339 { 340 struct intel_uncore *uncore = engine->uncore; 341 u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; 342 i915_reg_t sfc_forced_lock, sfc_forced_lock_ack; 343 u32 sfc_forced_lock_bit, sfc_forced_lock_ack_bit; 344 i915_reg_t sfc_usage; 345 u32 sfc_usage_bit; 346 u32 sfc_reset_bit; 347 int ret; 348 349 switch (engine->class) { 350 case VIDEO_DECODE_CLASS: 351 if ((BIT(engine->instance) & vdbox_sfc_access) == 0) 352 return 0; 353 354 sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); 355 sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; 356 357 sfc_forced_lock_ack = GEN11_VCS_SFC_LOCK_STATUS(engine); 358 sfc_forced_lock_ack_bit = GEN11_VCS_SFC_LOCK_ACK_BIT; 359 360 sfc_usage = GEN11_VCS_SFC_LOCK_STATUS(engine); 361 sfc_usage_bit = GEN11_VCS_SFC_USAGE_BIT; 362 sfc_reset_bit = GEN11_VCS_SFC_RESET_BIT(engine->instance); 363 break; 364 365 case VIDEO_ENHANCEMENT_CLASS: 366 sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); 367 sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; 368 369 sfc_forced_lock_ack = GEN11_VECS_SFC_LOCK_ACK(engine); 370 sfc_forced_lock_ack_bit = GEN11_VECS_SFC_LOCK_ACK_BIT; 371 372 sfc_usage = GEN11_VECS_SFC_USAGE(engine); 373 sfc_usage_bit = GEN11_VECS_SFC_USAGE_BIT; 374 sfc_reset_bit = GEN11_VECS_SFC_RESET_BIT(engine->instance); 375 break; 376 377 default: 378 return 0; 379 } 380 381 /* 382 * If the engine is using a SFC, tell the engine that a software reset 383 * is going to happen. The engine will then try to force lock the SFC. 384 * If SFC ends up being locked to the engine we want to reset, we have 385 * to reset it as well (we will unlock it once the reset sequence is 386 * completed). 387 */ 388 if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) 389 return 0; 390 391 rmw_set_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); 392 393 ret = __intel_wait_for_register_fw(uncore, 394 sfc_forced_lock_ack, 395 sfc_forced_lock_ack_bit, 396 sfc_forced_lock_ack_bit, 397 1000, 0, NULL); 398 399 /* Was the SFC released while we were trying to lock it? */ 400 if (!(intel_uncore_read_fw(uncore, sfc_usage) & sfc_usage_bit)) 401 return 0; 402 403 if (ret) { 404 DRM_DEBUG_DRIVER("Wait for SFC forced lock ack failed\n"); 405 return ret; 406 } 407 408 *hw_mask |= sfc_reset_bit; 409 return 0; 410 } 411 412 static void gen11_unlock_sfc(struct intel_engine_cs *engine) 413 { 414 struct intel_uncore *uncore = engine->uncore; 415 u8 vdbox_sfc_access = RUNTIME_INFO(engine->i915)->vdbox_sfc_access; 416 i915_reg_t sfc_forced_lock; 417 u32 sfc_forced_lock_bit; 418 419 switch (engine->class) { 420 case VIDEO_DECODE_CLASS: 421 if ((BIT(engine->instance) & vdbox_sfc_access) == 0) 422 return; 423 424 sfc_forced_lock = GEN11_VCS_SFC_FORCED_LOCK(engine); 425 sfc_forced_lock_bit = GEN11_VCS_SFC_FORCED_LOCK_BIT; 426 break; 427 428 case VIDEO_ENHANCEMENT_CLASS: 429 sfc_forced_lock = GEN11_VECS_SFC_FORCED_LOCK(engine); 430 sfc_forced_lock_bit = GEN11_VECS_SFC_FORCED_LOCK_BIT; 431 break; 432 433 default: 434 return; 435 } 436 437 rmw_clear_fw(uncore, sfc_forced_lock, sfc_forced_lock_bit); 438 } 439 440 static int gen11_reset_engines(struct intel_gt *gt, 441 intel_engine_mask_t engine_mask, 442 unsigned int retry) 443 { 444 static const u32 hw_engine_mask[] = { 445 [RCS0] = GEN11_GRDOM_RENDER, 446 [BCS0] = GEN11_GRDOM_BLT, 447 [VCS0] = GEN11_GRDOM_MEDIA, 448 [VCS1] = GEN11_GRDOM_MEDIA2, 449 [VCS2] = GEN11_GRDOM_MEDIA3, 450 [VCS3] = GEN11_GRDOM_MEDIA4, 451 [VECS0] = GEN11_GRDOM_VECS, 452 [VECS1] = GEN11_GRDOM_VECS2, 453 }; 454 struct intel_engine_cs *engine; 455 intel_engine_mask_t tmp; 456 u32 hw_mask; 457 int ret; 458 459 if (engine_mask == ALL_ENGINES) { 460 hw_mask = GEN11_GRDOM_FULL; 461 } else { 462 hw_mask = 0; 463 for_each_engine_masked(engine, gt, engine_mask, tmp) { 464 GEM_BUG_ON(engine->id >= ARRAY_SIZE(hw_engine_mask)); 465 hw_mask |= hw_engine_mask[engine->id]; 466 ret = gen11_lock_sfc(engine, &hw_mask); 467 if (ret) 468 goto sfc_unlock; 469 } 470 } 471 472 ret = gen6_hw_domain_reset(gt, hw_mask); 473 474 sfc_unlock: 475 /* 476 * We unlock the SFC based on the lock status and not the result of 477 * gen11_lock_sfc to make sure that we clean properly if something 478 * wrong happened during the lock (e.g. lock acquired after timeout 479 * expiration). 480 */ 481 if (engine_mask != ALL_ENGINES) 482 for_each_engine_masked(engine, gt, engine_mask, tmp) 483 gen11_unlock_sfc(engine); 484 485 return ret; 486 } 487 488 static int gen8_engine_reset_prepare(struct intel_engine_cs *engine) 489 { 490 struct intel_uncore *uncore = engine->uncore; 491 const i915_reg_t reg = RING_RESET_CTL(engine->mmio_base); 492 u32 request, mask, ack; 493 int ret; 494 495 ack = intel_uncore_read_fw(uncore, reg); 496 if (ack & RESET_CTL_CAT_ERROR) { 497 /* 498 * For catastrophic errors, ready-for-reset sequence 499 * needs to be bypassed: HAS#396813 500 */ 501 request = RESET_CTL_CAT_ERROR; 502 mask = RESET_CTL_CAT_ERROR; 503 504 /* Catastrophic errors need to be cleared by HW */ 505 ack = 0; 506 } else if (!(ack & RESET_CTL_READY_TO_RESET)) { 507 request = RESET_CTL_REQUEST_RESET; 508 mask = RESET_CTL_READY_TO_RESET; 509 ack = RESET_CTL_READY_TO_RESET; 510 } else { 511 return 0; 512 } 513 514 intel_uncore_write_fw(uncore, reg, _MASKED_BIT_ENABLE(request)); 515 ret = __intel_wait_for_register_fw(uncore, reg, mask, ack, 516 700, 0, NULL); 517 if (ret) 518 DRM_ERROR("%s reset request timed out: {request: %08x, RESET_CTL: %08x}\n", 519 engine->name, request, 520 intel_uncore_read_fw(uncore, reg)); 521 522 return ret; 523 } 524 525 static void gen8_engine_reset_cancel(struct intel_engine_cs *engine) 526 { 527 intel_uncore_write_fw(engine->uncore, 528 RING_RESET_CTL(engine->mmio_base), 529 _MASKED_BIT_DISABLE(RESET_CTL_REQUEST_RESET)); 530 } 531 532 static int gen8_reset_engines(struct intel_gt *gt, 533 intel_engine_mask_t engine_mask, 534 unsigned int retry) 535 { 536 struct intel_engine_cs *engine; 537 const bool reset_non_ready = retry >= 1; 538 intel_engine_mask_t tmp; 539 int ret; 540 541 for_each_engine_masked(engine, gt, engine_mask, tmp) { 542 ret = gen8_engine_reset_prepare(engine); 543 if (ret && !reset_non_ready) 544 goto skip_reset; 545 546 /* 547 * If this is not the first failed attempt to prepare, 548 * we decide to proceed anyway. 549 * 550 * By doing so we risk context corruption and with 551 * some gens (kbl), possible system hang if reset 552 * happens during active bb execution. 553 * 554 * We rather take context corruption instead of 555 * failed reset with a wedged driver/gpu. And 556 * active bb execution case should be covered by 557 * stop_engines() we have before the reset. 558 */ 559 } 560 561 if (INTEL_GEN(gt->i915) >= 11) 562 ret = gen11_reset_engines(gt, engine_mask, retry); 563 else 564 ret = gen6_reset_engines(gt, engine_mask, retry); 565 566 skip_reset: 567 for_each_engine_masked(engine, gt, engine_mask, tmp) 568 gen8_engine_reset_cancel(engine); 569 570 return ret; 571 } 572 573 static int mock_reset(struct intel_gt *gt, 574 intel_engine_mask_t mask, 575 unsigned int retry) 576 { 577 return 0; 578 } 579 580 typedef int (*reset_func)(struct intel_gt *, 581 intel_engine_mask_t engine_mask, 582 unsigned int retry); 583 584 static reset_func intel_get_gpu_reset(const struct intel_gt *gt) 585 { 586 struct drm_i915_private *i915 = gt->i915; 587 588 if (is_mock_gt(gt)) 589 return mock_reset; 590 else if (INTEL_GEN(i915) >= 8) 591 return gen8_reset_engines; 592 else if (INTEL_GEN(i915) >= 6) 593 return gen6_reset_engines; 594 else if (INTEL_GEN(i915) >= 5) 595 return ilk_do_reset; 596 else if (IS_G4X(i915)) 597 return g4x_do_reset; 598 else if (IS_G33(i915) || IS_PINEVIEW(i915)) 599 return g33_do_reset; 600 else if (INTEL_GEN(i915) >= 3) 601 return i915_do_reset; 602 else 603 return NULL; 604 } 605 606 int __intel_gt_reset(struct intel_gt *gt, intel_engine_mask_t engine_mask) 607 { 608 const int retries = engine_mask == ALL_ENGINES ? RESET_MAX_RETRIES : 1; 609 reset_func reset; 610 int ret = -ETIMEDOUT; 611 int retry; 612 613 reset = intel_get_gpu_reset(gt); 614 if (!reset) 615 return -ENODEV; 616 617 /* 618 * If the power well sleeps during the reset, the reset 619 * request may be dropped and never completes (causing -EIO). 620 */ 621 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 622 for (retry = 0; ret == -ETIMEDOUT && retry < retries; retry++) { 623 GT_TRACE(gt, "engine_mask=%x\n", engine_mask); 624 preempt_disable(); 625 ret = reset(gt, engine_mask, retry); 626 preempt_enable(); 627 } 628 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 629 630 return ret; 631 } 632 633 bool intel_has_gpu_reset(const struct intel_gt *gt) 634 { 635 if (!i915_modparams.reset) 636 return NULL; 637 638 return intel_get_gpu_reset(gt); 639 } 640 641 bool intel_has_reset_engine(const struct intel_gt *gt) 642 { 643 if (i915_modparams.reset < 2) 644 return false; 645 646 return INTEL_INFO(gt->i915)->has_reset_engine; 647 } 648 649 int intel_reset_guc(struct intel_gt *gt) 650 { 651 u32 guc_domain = 652 INTEL_GEN(gt->i915) >= 11 ? GEN11_GRDOM_GUC : GEN9_GRDOM_GUC; 653 int ret; 654 655 GEM_BUG_ON(!HAS_GT_UC(gt->i915)); 656 657 intel_uncore_forcewake_get(gt->uncore, FORCEWAKE_ALL); 658 ret = gen6_hw_domain_reset(gt, guc_domain); 659 intel_uncore_forcewake_put(gt->uncore, FORCEWAKE_ALL); 660 661 return ret; 662 } 663 664 /* 665 * Ensure irq handler finishes, and not run again. 666 * Also return the active request so that we only search for it once. 667 */ 668 static void reset_prepare_engine(struct intel_engine_cs *engine) 669 { 670 /* 671 * During the reset sequence, we must prevent the engine from 672 * entering RC6. As the context state is undefined until we restart 673 * the engine, if it does enter RC6 during the reset, the state 674 * written to the powercontext is undefined and so we may lose 675 * GPU state upon resume, i.e. fail to restart after a reset. 676 */ 677 intel_uncore_forcewake_get(engine->uncore, FORCEWAKE_ALL); 678 if (engine->reset.prepare) 679 engine->reset.prepare(engine); 680 } 681 682 static void revoke_mmaps(struct intel_gt *gt) 683 { 684 int i; 685 686 for (i = 0; i < gt->ggtt->num_fences; i++) { 687 struct drm_vma_offset_node *node; 688 struct i915_vma *vma; 689 u64 vma_offset; 690 691 vma = READ_ONCE(gt->ggtt->fence_regs[i].vma); 692 if (!vma) 693 continue; 694 695 if (!i915_vma_has_userfault(vma)) 696 continue; 697 698 GEM_BUG_ON(vma->fence != >->ggtt->fence_regs[i]); 699 700 if (!vma->mmo) 701 continue; 702 703 node = &vma->mmo->vma_node; 704 vma_offset = vma->ggtt_view.partial.offset << PAGE_SHIFT; 705 706 unmap_mapping_range(gt->i915->drm.anon_inode->i_mapping, 707 drm_vma_node_offset_addr(node) + vma_offset, 708 vma->size, 709 1); 710 } 711 } 712 713 static intel_engine_mask_t reset_prepare(struct intel_gt *gt) 714 { 715 struct intel_engine_cs *engine; 716 intel_engine_mask_t awake = 0; 717 enum intel_engine_id id; 718 719 for_each_engine(engine, gt, id) { 720 if (intel_engine_pm_get_if_awake(engine)) 721 awake |= engine->mask; 722 reset_prepare_engine(engine); 723 } 724 725 intel_uc_reset_prepare(>->uc); 726 727 return awake; 728 } 729 730 static void gt_revoke(struct intel_gt *gt) 731 { 732 revoke_mmaps(gt); 733 } 734 735 static int gt_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) 736 { 737 struct intel_engine_cs *engine; 738 enum intel_engine_id id; 739 int err; 740 741 /* 742 * Everything depends on having the GTT running, so we need to start 743 * there. 744 */ 745 err = i915_ggtt_enable_hw(gt->i915); 746 if (err) 747 return err; 748 749 for_each_engine(engine, gt, id) 750 __intel_engine_reset(engine, stalled_mask & engine->mask); 751 752 i915_gem_restore_fences(gt->ggtt); 753 754 return err; 755 } 756 757 static void reset_finish_engine(struct intel_engine_cs *engine) 758 { 759 if (engine->reset.finish) 760 engine->reset.finish(engine); 761 intel_uncore_forcewake_put(engine->uncore, FORCEWAKE_ALL); 762 763 intel_engine_signal_breadcrumbs(engine); 764 } 765 766 static void reset_finish(struct intel_gt *gt, intel_engine_mask_t awake) 767 { 768 struct intel_engine_cs *engine; 769 enum intel_engine_id id; 770 771 for_each_engine(engine, gt, id) { 772 reset_finish_engine(engine); 773 if (awake & engine->mask) 774 intel_engine_pm_put(engine); 775 } 776 } 777 778 static void nop_submit_request(struct i915_request *request) 779 { 780 struct intel_engine_cs *engine = request->engine; 781 unsigned long flags; 782 783 RQ_TRACE(request, "-EIO\n"); 784 dma_fence_set_error(&request->fence, -EIO); 785 786 spin_lock_irqsave(&engine->active.lock, flags); 787 __i915_request_submit(request); 788 i915_request_mark_complete(request); 789 spin_unlock_irqrestore(&engine->active.lock, flags); 790 791 intel_engine_signal_breadcrumbs(engine); 792 } 793 794 static void __intel_gt_set_wedged(struct intel_gt *gt) 795 { 796 struct intel_engine_cs *engine; 797 intel_engine_mask_t awake; 798 enum intel_engine_id id; 799 800 if (test_bit(I915_WEDGED, >->reset.flags)) 801 return; 802 803 if (GEM_SHOW_DEBUG() && !intel_engines_are_idle(gt)) { 804 struct drm_printer p = drm_debug_printer(__func__); 805 806 for_each_engine(engine, gt, id) 807 intel_engine_dump(engine, &p, "%s\n", engine->name); 808 } 809 810 GT_TRACE(gt, "start\n"); 811 812 /* 813 * First, stop submission to hw, but do not yet complete requests by 814 * rolling the global seqno forward (since this would complete requests 815 * for which we haven't set the fence error to EIO yet). 816 */ 817 awake = reset_prepare(gt); 818 819 /* Even if the GPU reset fails, it should still stop the engines */ 820 if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) 821 __intel_gt_reset(gt, ALL_ENGINES); 822 823 for_each_engine(engine, gt, id) 824 engine->submit_request = nop_submit_request; 825 826 /* 827 * Make sure no request can slip through without getting completed by 828 * either this call here to intel_engine_write_global_seqno, or the one 829 * in nop_submit_request. 830 */ 831 synchronize_rcu_expedited(); 832 set_bit(I915_WEDGED, >->reset.flags); 833 834 /* Mark all executing requests as skipped */ 835 for_each_engine(engine, gt, id) 836 if (engine->reset.cancel) 837 engine->reset.cancel(engine); 838 839 reset_finish(gt, awake); 840 841 GT_TRACE(gt, "end\n"); 842 } 843 844 void intel_gt_set_wedged(struct intel_gt *gt) 845 { 846 intel_wakeref_t wakeref; 847 848 mutex_lock(>->reset.mutex); 849 with_intel_runtime_pm(gt->uncore->rpm, wakeref) 850 __intel_gt_set_wedged(gt); 851 mutex_unlock(>->reset.mutex); 852 } 853 854 static bool __intel_gt_unset_wedged(struct intel_gt *gt) 855 { 856 struct intel_gt_timelines *timelines = >->timelines; 857 struct intel_timeline *tl; 858 bool ok; 859 860 if (!test_bit(I915_WEDGED, >->reset.flags)) 861 return true; 862 863 /* Never fully initialised, recovery impossible */ 864 if (test_bit(I915_WEDGED_ON_INIT, >->reset.flags)) 865 return false; 866 867 GT_TRACE(gt, "start\n"); 868 869 /* 870 * Before unwedging, make sure that all pending operations 871 * are flushed and errored out - we may have requests waiting upon 872 * third party fences. We marked all inflight requests as EIO, and 873 * every execbuf since returned EIO, for consistency we want all 874 * the currently pending requests to also be marked as EIO, which 875 * is done inside our nop_submit_request - and so we must wait. 876 * 877 * No more can be submitted until we reset the wedged bit. 878 */ 879 spin_lock(&timelines->lock); 880 list_for_each_entry(tl, &timelines->active_list, link) { 881 struct dma_fence *fence; 882 883 fence = i915_active_fence_get(&tl->last_request); 884 if (!fence) 885 continue; 886 887 spin_unlock(&timelines->lock); 888 889 /* 890 * All internal dependencies (i915_requests) will have 891 * been flushed by the set-wedge, but we may be stuck waiting 892 * for external fences. These should all be capped to 10s 893 * (I915_FENCE_TIMEOUT) so this wait should not be unbounded 894 * in the worst case. 895 */ 896 dma_fence_default_wait(fence, false, MAX_SCHEDULE_TIMEOUT); 897 dma_fence_put(fence); 898 899 /* Restart iteration after droping lock */ 900 spin_lock(&timelines->lock); 901 tl = list_entry(&timelines->active_list, typeof(*tl), link); 902 } 903 spin_unlock(&timelines->lock); 904 905 /* We must reset pending GPU events before restoring our submission */ 906 ok = !HAS_EXECLISTS(gt->i915); /* XXX better agnosticism desired */ 907 if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) 908 ok = __intel_gt_reset(gt, ALL_ENGINES) == 0; 909 if (!ok) { 910 /* 911 * Warn CI about the unrecoverable wedged condition. 912 * Time for a reboot. 913 */ 914 add_taint_for_CI(TAINT_WARN); 915 return false; 916 } 917 918 /* 919 * Undo nop_submit_request. We prevent all new i915 requests from 920 * being queued (by disallowing execbuf whilst wedged) so having 921 * waited for all active requests above, we know the system is idle 922 * and do not have to worry about a thread being inside 923 * engine->submit_request() as we swap over. So unlike installing 924 * the nop_submit_request on reset, we can do this from normal 925 * context and do not require stop_machine(). 926 */ 927 intel_engines_reset_default_submission(gt); 928 929 GT_TRACE(gt, "end\n"); 930 931 smp_mb__before_atomic(); /* complete takeover before enabling execbuf */ 932 clear_bit(I915_WEDGED, >->reset.flags); 933 934 return true; 935 } 936 937 bool intel_gt_unset_wedged(struct intel_gt *gt) 938 { 939 bool result; 940 941 mutex_lock(>->reset.mutex); 942 result = __intel_gt_unset_wedged(gt); 943 mutex_unlock(>->reset.mutex); 944 945 return result; 946 } 947 948 static int do_reset(struct intel_gt *gt, intel_engine_mask_t stalled_mask) 949 { 950 int err, i; 951 952 gt_revoke(gt); 953 954 err = __intel_gt_reset(gt, ALL_ENGINES); 955 for (i = 0; err && i < RESET_MAX_RETRIES; i++) { 956 msleep(10 * (i + 1)); 957 err = __intel_gt_reset(gt, ALL_ENGINES); 958 } 959 if (err) 960 return err; 961 962 return gt_reset(gt, stalled_mask); 963 } 964 965 static int resume(struct intel_gt *gt) 966 { 967 struct intel_engine_cs *engine; 968 enum intel_engine_id id; 969 int ret; 970 971 for_each_engine(engine, gt, id) { 972 ret = engine->resume(engine); 973 if (ret) 974 return ret; 975 } 976 977 return 0; 978 } 979 980 /** 981 * intel_gt_reset - reset chip after a hang 982 * @gt: #intel_gt to reset 983 * @stalled_mask: mask of the stalled engines with the guilty requests 984 * @reason: user error message for why we are resetting 985 * 986 * Reset the chip. Useful if a hang is detected. Marks the device as wedged 987 * on failure. 988 * 989 * Procedure is fairly simple: 990 * - reset the chip using the reset reg 991 * - re-init context state 992 * - re-init hardware status page 993 * - re-init ring buffer 994 * - re-init interrupt state 995 * - re-init display 996 */ 997 void intel_gt_reset(struct intel_gt *gt, 998 intel_engine_mask_t stalled_mask, 999 const char *reason) 1000 { 1001 intel_engine_mask_t awake; 1002 int ret; 1003 1004 GT_TRACE(gt, "flags=%lx\n", gt->reset.flags); 1005 1006 might_sleep(); 1007 GEM_BUG_ON(!test_bit(I915_RESET_BACKOFF, >->reset.flags)); 1008 mutex_lock(>->reset.mutex); 1009 1010 /* Clear any previous failed attempts at recovery. Time to try again. */ 1011 if (!__intel_gt_unset_wedged(gt)) 1012 goto unlock; 1013 1014 if (reason) 1015 dev_notice(gt->i915->drm.dev, 1016 "Resetting chip for %s\n", reason); 1017 atomic_inc(>->i915->gpu_error.reset_count); 1018 1019 awake = reset_prepare(gt); 1020 1021 if (!intel_has_gpu_reset(gt)) { 1022 if (i915_modparams.reset) 1023 dev_err(gt->i915->drm.dev, "GPU reset not supported\n"); 1024 else 1025 DRM_DEBUG_DRIVER("GPU reset disabled\n"); 1026 goto error; 1027 } 1028 1029 if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) 1030 intel_runtime_pm_disable_interrupts(gt->i915); 1031 1032 if (do_reset(gt, stalled_mask)) { 1033 dev_err(gt->i915->drm.dev, "Failed to reset chip\n"); 1034 goto taint; 1035 } 1036 1037 if (INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) 1038 intel_runtime_pm_enable_interrupts(gt->i915); 1039 1040 intel_overlay_reset(gt->i915); 1041 1042 /* 1043 * Next we need to restore the context, but we don't use those 1044 * yet either... 1045 * 1046 * Ring buffer needs to be re-initialized in the KMS case, or if X 1047 * was running at the time of the reset (i.e. we weren't VT 1048 * switched away). 1049 */ 1050 ret = intel_gt_init_hw(gt); 1051 if (ret) { 1052 DRM_ERROR("Failed to initialise HW following reset (%d)\n", 1053 ret); 1054 goto taint; 1055 } 1056 1057 ret = resume(gt); 1058 if (ret) 1059 goto taint; 1060 1061 finish: 1062 reset_finish(gt, awake); 1063 unlock: 1064 mutex_unlock(>->reset.mutex); 1065 return; 1066 1067 taint: 1068 /* 1069 * History tells us that if we cannot reset the GPU now, we 1070 * never will. This then impacts everything that is run 1071 * subsequently. On failing the reset, we mark the driver 1072 * as wedged, preventing further execution on the GPU. 1073 * We also want to go one step further and add a taint to the 1074 * kernel so that any subsequent faults can be traced back to 1075 * this failure. This is important for CI, where if the 1076 * GPU/driver fails we would like to reboot and restart testing 1077 * rather than continue on into oblivion. For everyone else, 1078 * the system should still plod along, but they have been warned! 1079 */ 1080 add_taint_for_CI(TAINT_WARN); 1081 error: 1082 __intel_gt_set_wedged(gt); 1083 goto finish; 1084 } 1085 1086 static inline int intel_gt_reset_engine(struct intel_engine_cs *engine) 1087 { 1088 return __intel_gt_reset(engine->gt, engine->mask); 1089 } 1090 1091 /** 1092 * intel_engine_reset - reset GPU engine to recover from a hang 1093 * @engine: engine to reset 1094 * @msg: reason for GPU reset; or NULL for no dev_notice() 1095 * 1096 * Reset a specific GPU engine. Useful if a hang is detected. 1097 * Returns zero on successful reset or otherwise an error code. 1098 * 1099 * Procedure is: 1100 * - identifies the request that caused the hang and it is dropped 1101 * - reset engine (which will force the engine to idle) 1102 * - re-init/configure engine 1103 */ 1104 int intel_engine_reset(struct intel_engine_cs *engine, const char *msg) 1105 { 1106 struct intel_gt *gt = engine->gt; 1107 bool uses_guc = intel_engine_in_guc_submission_mode(engine); 1108 int ret; 1109 1110 ENGINE_TRACE(engine, "flags=%lx\n", gt->reset.flags); 1111 GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, >->reset.flags)); 1112 1113 if (!intel_engine_pm_get_if_awake(engine)) 1114 return 0; 1115 1116 reset_prepare_engine(engine); 1117 1118 if (msg) 1119 dev_notice(engine->i915->drm.dev, 1120 "Resetting %s for %s\n", engine->name, msg); 1121 atomic_inc(&engine->i915->gpu_error.reset_engine_count[engine->uabi_class]); 1122 1123 if (!uses_guc) 1124 ret = intel_gt_reset_engine(engine); 1125 else 1126 ret = intel_guc_reset_engine(&engine->gt->uc.guc, engine); 1127 if (ret) { 1128 /* If we fail here, we expect to fallback to a global reset */ 1129 DRM_DEBUG_DRIVER("%sFailed to reset %s, ret=%d\n", 1130 uses_guc ? "GuC " : "", 1131 engine->name, ret); 1132 goto out; 1133 } 1134 1135 /* 1136 * The request that caused the hang is stuck on elsp, we know the 1137 * active request and can drop it, adjust head to skip the offending 1138 * request to resume executing remaining requests in the queue. 1139 */ 1140 __intel_engine_reset(engine, true); 1141 1142 /* 1143 * The engine and its registers (and workarounds in case of render) 1144 * have been reset to their default values. Follow the init_ring 1145 * process to program RING_MODE, HWSP and re-enable submission. 1146 */ 1147 ret = engine->resume(engine); 1148 1149 out: 1150 intel_engine_cancel_stop_cs(engine); 1151 reset_finish_engine(engine); 1152 intel_engine_pm_put_async(engine); 1153 return ret; 1154 } 1155 1156 static void intel_gt_reset_global(struct intel_gt *gt, 1157 u32 engine_mask, 1158 const char *reason) 1159 { 1160 struct kobject *kobj = >->i915->drm.primary->kdev->kobj; 1161 char *error_event[] = { I915_ERROR_UEVENT "=1", NULL }; 1162 char *reset_event[] = { I915_RESET_UEVENT "=1", NULL }; 1163 char *reset_done_event[] = { I915_ERROR_UEVENT "=0", NULL }; 1164 struct intel_wedge_me w; 1165 1166 kobject_uevent_env(kobj, KOBJ_CHANGE, error_event); 1167 1168 DRM_DEBUG_DRIVER("resetting chip\n"); 1169 kobject_uevent_env(kobj, KOBJ_CHANGE, reset_event); 1170 1171 /* Use a watchdog to ensure that our reset completes */ 1172 intel_wedge_on_timeout(&w, gt, 5 * HZ) { 1173 intel_prepare_reset(gt->i915); 1174 1175 /* Flush everyone using a resource about to be clobbered */ 1176 synchronize_srcu_expedited(>->reset.backoff_srcu); 1177 1178 intel_gt_reset(gt, engine_mask, reason); 1179 1180 intel_finish_reset(gt->i915); 1181 } 1182 1183 if (!test_bit(I915_WEDGED, >->reset.flags)) 1184 kobject_uevent_env(kobj, KOBJ_CHANGE, reset_done_event); 1185 } 1186 1187 /** 1188 * intel_gt_handle_error - handle a gpu error 1189 * @gt: the intel_gt 1190 * @engine_mask: mask representing engines that are hung 1191 * @flags: control flags 1192 * @fmt: Error message format string 1193 * 1194 * Do some basic checking of register state at error time and 1195 * dump it to the syslog. Also call i915_capture_error_state() to make 1196 * sure we get a record and make it available in debugfs. Fire a uevent 1197 * so userspace knows something bad happened (should trigger collection 1198 * of a ring dump etc.). 1199 */ 1200 void intel_gt_handle_error(struct intel_gt *gt, 1201 intel_engine_mask_t engine_mask, 1202 unsigned long flags, 1203 const char *fmt, ...) 1204 { 1205 struct intel_engine_cs *engine; 1206 intel_wakeref_t wakeref; 1207 intel_engine_mask_t tmp; 1208 char error_msg[80]; 1209 char *msg = NULL; 1210 1211 if (fmt) { 1212 va_list args; 1213 1214 va_start(args, fmt); 1215 vscnprintf(error_msg, sizeof(error_msg), fmt, args); 1216 va_end(args); 1217 1218 msg = error_msg; 1219 } 1220 1221 /* 1222 * In most cases it's guaranteed that we get here with an RPM 1223 * reference held, for example because there is a pending GPU 1224 * request that won't finish until the reset is done. This 1225 * isn't the case at least when we get here by doing a 1226 * simulated reset via debugfs, so get an RPM reference. 1227 */ 1228 wakeref = intel_runtime_pm_get(gt->uncore->rpm); 1229 1230 engine_mask &= INTEL_INFO(gt->i915)->engine_mask; 1231 1232 if (flags & I915_ERROR_CAPTURE) { 1233 i915_capture_error_state(gt->i915); 1234 intel_gt_clear_error_registers(gt, engine_mask); 1235 } 1236 1237 /* 1238 * Try engine reset when available. We fall back to full reset if 1239 * single reset fails. 1240 */ 1241 if (intel_has_reset_engine(gt) && !intel_gt_is_wedged(gt)) { 1242 for_each_engine_masked(engine, gt, engine_mask, tmp) { 1243 BUILD_BUG_ON(I915_RESET_MODESET >= I915_RESET_ENGINE); 1244 if (test_and_set_bit(I915_RESET_ENGINE + engine->id, 1245 >->reset.flags)) 1246 continue; 1247 1248 if (intel_engine_reset(engine, msg) == 0) 1249 engine_mask &= ~engine->mask; 1250 1251 clear_and_wake_up_bit(I915_RESET_ENGINE + engine->id, 1252 >->reset.flags); 1253 } 1254 } 1255 1256 if (!engine_mask) 1257 goto out; 1258 1259 /* Full reset needs the mutex, stop any other user trying to do so. */ 1260 if (test_and_set_bit(I915_RESET_BACKOFF, >->reset.flags)) { 1261 wait_event(gt->reset.queue, 1262 !test_bit(I915_RESET_BACKOFF, >->reset.flags)); 1263 goto out; /* piggy-back on the other reset */ 1264 } 1265 1266 /* Make sure i915_reset_trylock() sees the I915_RESET_BACKOFF */ 1267 synchronize_rcu_expedited(); 1268 1269 /* Prevent any other reset-engine attempt. */ 1270 for_each_engine(engine, gt, tmp) { 1271 while (test_and_set_bit(I915_RESET_ENGINE + engine->id, 1272 >->reset.flags)) 1273 wait_on_bit(>->reset.flags, 1274 I915_RESET_ENGINE + engine->id, 1275 TASK_UNINTERRUPTIBLE); 1276 } 1277 1278 intel_gt_reset_global(gt, engine_mask, msg); 1279 1280 for_each_engine(engine, gt, tmp) 1281 clear_bit_unlock(I915_RESET_ENGINE + engine->id, 1282 >->reset.flags); 1283 clear_bit_unlock(I915_RESET_BACKOFF, >->reset.flags); 1284 smp_mb__after_atomic(); 1285 wake_up_all(>->reset.queue); 1286 1287 out: 1288 intel_runtime_pm_put(gt->uncore->rpm, wakeref); 1289 } 1290 1291 int intel_gt_reset_trylock(struct intel_gt *gt, int *srcu) 1292 { 1293 might_lock(>->reset.backoff_srcu); 1294 might_sleep(); 1295 1296 rcu_read_lock(); 1297 while (test_bit(I915_RESET_BACKOFF, >->reset.flags)) { 1298 rcu_read_unlock(); 1299 1300 if (wait_event_interruptible(gt->reset.queue, 1301 !test_bit(I915_RESET_BACKOFF, 1302 >->reset.flags))) 1303 return -EINTR; 1304 1305 rcu_read_lock(); 1306 } 1307 *srcu = srcu_read_lock(>->reset.backoff_srcu); 1308 rcu_read_unlock(); 1309 1310 return 0; 1311 } 1312 1313 void intel_gt_reset_unlock(struct intel_gt *gt, int tag) 1314 __releases(>->reset.backoff_srcu) 1315 { 1316 srcu_read_unlock(>->reset.backoff_srcu, tag); 1317 } 1318 1319 int intel_gt_terminally_wedged(struct intel_gt *gt) 1320 { 1321 might_sleep(); 1322 1323 if (!intel_gt_is_wedged(gt)) 1324 return 0; 1325 1326 if (intel_gt_has_init_error(gt)) 1327 return -EIO; 1328 1329 /* Reset still in progress? Maybe we will recover? */ 1330 if (wait_event_interruptible(gt->reset.queue, 1331 !test_bit(I915_RESET_BACKOFF, 1332 >->reset.flags))) 1333 return -EINTR; 1334 1335 return intel_gt_is_wedged(gt) ? -EIO : 0; 1336 } 1337 1338 void intel_gt_set_wedged_on_init(struct intel_gt *gt) 1339 { 1340 BUILD_BUG_ON(I915_RESET_ENGINE + I915_NUM_ENGINES > 1341 I915_WEDGED_ON_INIT); 1342 intel_gt_set_wedged(gt); 1343 set_bit(I915_WEDGED_ON_INIT, >->reset.flags); 1344 } 1345 1346 void intel_gt_init_reset(struct intel_gt *gt) 1347 { 1348 init_waitqueue_head(>->reset.queue); 1349 mutex_init(>->reset.mutex); 1350 init_srcu_struct(>->reset.backoff_srcu); 1351 1352 /* no GPU until we are ready! */ 1353 __set_bit(I915_WEDGED, >->reset.flags); 1354 } 1355 1356 void intel_gt_fini_reset(struct intel_gt *gt) 1357 { 1358 cleanup_srcu_struct(>->reset.backoff_srcu); 1359 } 1360 1361 static void intel_wedge_me(struct work_struct *work) 1362 { 1363 struct intel_wedge_me *w = container_of(work, typeof(*w), work.work); 1364 1365 dev_err(w->gt->i915->drm.dev, 1366 "%s timed out, cancelling all in-flight rendering.\n", 1367 w->name); 1368 intel_gt_set_wedged(w->gt); 1369 } 1370 1371 void __intel_init_wedge(struct intel_wedge_me *w, 1372 struct intel_gt *gt, 1373 long timeout, 1374 const char *name) 1375 { 1376 w->gt = gt; 1377 w->name = name; 1378 1379 INIT_DELAYED_WORK_ONSTACK(&w->work, intel_wedge_me); 1380 schedule_delayed_work(&w->work, timeout); 1381 } 1382 1383 void __intel_fini_wedge(struct intel_wedge_me *w) 1384 { 1385 cancel_delayed_work_sync(&w->work); 1386 destroy_delayed_work_on_stack(&w->work); 1387 w->gt = NULL; 1388 } 1389 1390 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1391 #include "selftest_reset.c" 1392 #include "selftest_hangcheck.c" 1393 #endif 1394