1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2016 Intel Corporation 4 */ 5 6 #include <linux/string_helpers.h> 7 8 #include <drm/drm_print.h> 9 10 #include "gem/i915_gem_context.h" 11 #include "gem/i915_gem_internal.h" 12 #include "gt/intel_gt_regs.h" 13 14 #include "i915_cmd_parser.h" 15 #include "i915_drv.h" 16 #include "intel_breadcrumbs.h" 17 #include "intel_context.h" 18 #include "intel_engine.h" 19 #include "intel_engine_pm.h" 20 #include "intel_engine_regs.h" 21 #include "intel_engine_user.h" 22 #include "intel_execlists_submission.h" 23 #include "intel_gt.h" 24 #include "intel_gt_requests.h" 25 #include "intel_gt_pm.h" 26 #include "intel_lrc.h" 27 #include "intel_lrc_reg.h" 28 #include "intel_reset.h" 29 #include "intel_ring.h" 30 #include "uc/intel_guc_submission.h" 31 32 /* Haswell does have the CXT_SIZE register however it does not appear to be 33 * valid. Now, docs explain in dwords what is in the context object. The full 34 * size is 70720 bytes, however, the power context and execlist context will 35 * never be saved (power context is stored elsewhere, and execlists don't work 36 * on HSW) - so the final size, including the extra state required for the 37 * Resource Streamer, is 66944 bytes, which rounds to 17 pages. 38 */ 39 #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) 40 41 #define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) 42 #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) 43 #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) 44 #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) 45 46 #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) 47 48 #define MAX_MMIO_BASES 3 49 struct engine_info { 50 u8 class; 51 u8 instance; 52 /* mmio bases table *must* be sorted in reverse graphics_ver order */ 53 struct engine_mmio_base { 54 u32 graphics_ver : 8; 55 u32 base : 24; 56 } mmio_bases[MAX_MMIO_BASES]; 57 }; 58 59 static const struct engine_info intel_engines[] = { 60 [RCS0] = { 61 .class = RENDER_CLASS, 62 .instance = 0, 63 .mmio_bases = { 64 { .graphics_ver = 1, .base = RENDER_RING_BASE } 65 }, 66 }, 67 [BCS0] = { 68 .class = COPY_ENGINE_CLASS, 69 .instance = 0, 70 .mmio_bases = { 71 { .graphics_ver = 6, .base = BLT_RING_BASE } 72 }, 73 }, 74 [VCS0] = { 75 .class = VIDEO_DECODE_CLASS, 76 .instance = 0, 77 .mmio_bases = { 78 { .graphics_ver = 11, .base = GEN11_BSD_RING_BASE }, 79 { .graphics_ver = 6, .base = GEN6_BSD_RING_BASE }, 80 { .graphics_ver = 4, .base = BSD_RING_BASE } 81 }, 82 }, 83 [VCS1] = { 84 .class = VIDEO_DECODE_CLASS, 85 .instance = 1, 86 .mmio_bases = { 87 { .graphics_ver = 11, .base = GEN11_BSD2_RING_BASE }, 88 { .graphics_ver = 8, .base = GEN8_BSD2_RING_BASE } 89 }, 90 }, 91 [VCS2] = { 92 .class = VIDEO_DECODE_CLASS, 93 .instance = 2, 94 .mmio_bases = { 95 { .graphics_ver = 11, .base = GEN11_BSD3_RING_BASE } 96 }, 97 }, 98 [VCS3] = { 99 .class = VIDEO_DECODE_CLASS, 100 .instance = 3, 101 .mmio_bases = { 102 { .graphics_ver = 11, .base = GEN11_BSD4_RING_BASE } 103 }, 104 }, 105 [VCS4] = { 106 .class = VIDEO_DECODE_CLASS, 107 .instance = 4, 108 .mmio_bases = { 109 { .graphics_ver = 12, .base = XEHP_BSD5_RING_BASE } 110 }, 111 }, 112 [VCS5] = { 113 .class = VIDEO_DECODE_CLASS, 114 .instance = 5, 115 .mmio_bases = { 116 { .graphics_ver = 12, .base = XEHP_BSD6_RING_BASE } 117 }, 118 }, 119 [VCS6] = { 120 .class = VIDEO_DECODE_CLASS, 121 .instance = 6, 122 .mmio_bases = { 123 { .graphics_ver = 12, .base = XEHP_BSD7_RING_BASE } 124 }, 125 }, 126 [VCS7] = { 127 .class = VIDEO_DECODE_CLASS, 128 .instance = 7, 129 .mmio_bases = { 130 { .graphics_ver = 12, .base = XEHP_BSD8_RING_BASE } 131 }, 132 }, 133 [VECS0] = { 134 .class = VIDEO_ENHANCEMENT_CLASS, 135 .instance = 0, 136 .mmio_bases = { 137 { .graphics_ver = 11, .base = GEN11_VEBOX_RING_BASE }, 138 { .graphics_ver = 7, .base = VEBOX_RING_BASE } 139 }, 140 }, 141 [VECS1] = { 142 .class = VIDEO_ENHANCEMENT_CLASS, 143 .instance = 1, 144 .mmio_bases = { 145 { .graphics_ver = 11, .base = GEN11_VEBOX2_RING_BASE } 146 }, 147 }, 148 [VECS2] = { 149 .class = VIDEO_ENHANCEMENT_CLASS, 150 .instance = 2, 151 .mmio_bases = { 152 { .graphics_ver = 12, .base = XEHP_VEBOX3_RING_BASE } 153 }, 154 }, 155 [VECS3] = { 156 .class = VIDEO_ENHANCEMENT_CLASS, 157 .instance = 3, 158 .mmio_bases = { 159 { .graphics_ver = 12, .base = XEHP_VEBOX4_RING_BASE } 160 }, 161 }, 162 [CCS0] = { 163 .class = COMPUTE_CLASS, 164 .instance = 0, 165 .mmio_bases = { 166 { .graphics_ver = 12, .base = GEN12_COMPUTE0_RING_BASE } 167 } 168 }, 169 [CCS1] = { 170 .class = COMPUTE_CLASS, 171 .instance = 1, 172 .mmio_bases = { 173 { .graphics_ver = 12, .base = GEN12_COMPUTE1_RING_BASE } 174 } 175 }, 176 [CCS2] = { 177 .class = COMPUTE_CLASS, 178 .instance = 2, 179 .mmio_bases = { 180 { .graphics_ver = 12, .base = GEN12_COMPUTE2_RING_BASE } 181 } 182 }, 183 [CCS3] = { 184 .class = COMPUTE_CLASS, 185 .instance = 3, 186 .mmio_bases = { 187 { .graphics_ver = 12, .base = GEN12_COMPUTE3_RING_BASE } 188 } 189 }, 190 }; 191 192 /** 193 * intel_engine_context_size() - return the size of the context for an engine 194 * @gt: the gt 195 * @class: engine class 196 * 197 * Each engine class may require a different amount of space for a context 198 * image. 199 * 200 * Return: size (in bytes) of an engine class specific context image 201 * 202 * Note: this size includes the HWSP, which is part of the context image 203 * in LRC mode, but does not include the "shared data page" used with 204 * GuC submission. The caller should account for this if using the GuC. 205 */ 206 u32 intel_engine_context_size(struct intel_gt *gt, u8 class) 207 { 208 struct intel_uncore *uncore = gt->uncore; 209 u32 cxt_size; 210 211 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); 212 213 switch (class) { 214 case COMPUTE_CLASS: 215 fallthrough; 216 case RENDER_CLASS: 217 switch (GRAPHICS_VER(gt->i915)) { 218 default: 219 MISSING_CASE(GRAPHICS_VER(gt->i915)); 220 return DEFAULT_LR_CONTEXT_RENDER_SIZE; 221 case 12: 222 case 11: 223 return GEN11_LR_CONTEXT_RENDER_SIZE; 224 case 9: 225 return GEN9_LR_CONTEXT_RENDER_SIZE; 226 case 8: 227 return GEN8_LR_CONTEXT_RENDER_SIZE; 228 case 7: 229 if (IS_HASWELL(gt->i915)) 230 return HSW_CXT_TOTAL_SIZE; 231 232 cxt_size = intel_uncore_read(uncore, GEN7_CXT_SIZE); 233 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, 234 PAGE_SIZE); 235 case 6: 236 cxt_size = intel_uncore_read(uncore, CXT_SIZE); 237 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, 238 PAGE_SIZE); 239 case 5: 240 case 4: 241 /* 242 * There is a discrepancy here between the size reported 243 * by the register and the size of the context layout 244 * in the docs. Both are described as authorative! 245 * 246 * The discrepancy is on the order of a few cachelines, 247 * but the total is under one page (4k), which is our 248 * minimum allocation anyway so it should all come 249 * out in the wash. 250 */ 251 cxt_size = intel_uncore_read(uncore, CXT_SIZE) + 1; 252 drm_dbg(>->i915->drm, 253 "graphics_ver = %d CXT_SIZE = %d bytes [0x%08x]\n", 254 GRAPHICS_VER(gt->i915), cxt_size * 64, 255 cxt_size - 1); 256 return round_up(cxt_size * 64, PAGE_SIZE); 257 case 3: 258 case 2: 259 /* For the special day when i810 gets merged. */ 260 case 1: 261 return 0; 262 } 263 break; 264 default: 265 MISSING_CASE(class); 266 fallthrough; 267 case VIDEO_DECODE_CLASS: 268 case VIDEO_ENHANCEMENT_CLASS: 269 case COPY_ENGINE_CLASS: 270 if (GRAPHICS_VER(gt->i915) < 8) 271 return 0; 272 return GEN8_LR_CONTEXT_OTHER_SIZE; 273 } 274 } 275 276 static u32 __engine_mmio_base(struct drm_i915_private *i915, 277 const struct engine_mmio_base *bases) 278 { 279 int i; 280 281 for (i = 0; i < MAX_MMIO_BASES; i++) 282 if (GRAPHICS_VER(i915) >= bases[i].graphics_ver) 283 break; 284 285 GEM_BUG_ON(i == MAX_MMIO_BASES); 286 GEM_BUG_ON(!bases[i].base); 287 288 return bases[i].base; 289 } 290 291 static void __sprint_engine_name(struct intel_engine_cs *engine) 292 { 293 /* 294 * Before we know what the uABI name for this engine will be, 295 * we still would like to keep track of this engine in the debug logs. 296 * We throw in a ' here as a reminder that this isn't its final name. 297 */ 298 GEM_WARN_ON(snprintf(engine->name, sizeof(engine->name), "%s'%u", 299 intel_engine_class_repr(engine->class), 300 engine->instance) >= sizeof(engine->name)); 301 } 302 303 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask) 304 { 305 /* 306 * Though they added more rings on g4x/ilk, they did not add 307 * per-engine HWSTAM until gen6. 308 */ 309 if (GRAPHICS_VER(engine->i915) < 6 && engine->class != RENDER_CLASS) 310 return; 311 312 if (GRAPHICS_VER(engine->i915) >= 3) 313 ENGINE_WRITE(engine, RING_HWSTAM, mask); 314 else 315 ENGINE_WRITE16(engine, RING_HWSTAM, mask); 316 } 317 318 static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) 319 { 320 /* Mask off all writes into the unknown HWSP */ 321 intel_engine_set_hwsp_writemask(engine, ~0u); 322 } 323 324 static void nop_irq_handler(struct intel_engine_cs *engine, u16 iir) 325 { 326 GEM_DEBUG_WARN_ON(iir); 327 } 328 329 static u32 get_reset_domain(u8 ver, enum intel_engine_id id) 330 { 331 u32 reset_domain; 332 333 if (ver >= 11) { 334 static const u32 engine_reset_domains[] = { 335 [RCS0] = GEN11_GRDOM_RENDER, 336 [BCS0] = GEN11_GRDOM_BLT, 337 [VCS0] = GEN11_GRDOM_MEDIA, 338 [VCS1] = GEN11_GRDOM_MEDIA2, 339 [VCS2] = GEN11_GRDOM_MEDIA3, 340 [VCS3] = GEN11_GRDOM_MEDIA4, 341 [VCS4] = GEN11_GRDOM_MEDIA5, 342 [VCS5] = GEN11_GRDOM_MEDIA6, 343 [VCS6] = GEN11_GRDOM_MEDIA7, 344 [VCS7] = GEN11_GRDOM_MEDIA8, 345 [VECS0] = GEN11_GRDOM_VECS, 346 [VECS1] = GEN11_GRDOM_VECS2, 347 [VECS2] = GEN11_GRDOM_VECS3, 348 [VECS3] = GEN11_GRDOM_VECS4, 349 [CCS0] = GEN11_GRDOM_RENDER, 350 [CCS1] = GEN11_GRDOM_RENDER, 351 [CCS2] = GEN11_GRDOM_RENDER, 352 [CCS3] = GEN11_GRDOM_RENDER, 353 }; 354 GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || 355 !engine_reset_domains[id]); 356 reset_domain = engine_reset_domains[id]; 357 } else { 358 static const u32 engine_reset_domains[] = { 359 [RCS0] = GEN6_GRDOM_RENDER, 360 [BCS0] = GEN6_GRDOM_BLT, 361 [VCS0] = GEN6_GRDOM_MEDIA, 362 [VCS1] = GEN8_GRDOM_MEDIA2, 363 [VECS0] = GEN6_GRDOM_VECS, 364 }; 365 GEM_BUG_ON(id >= ARRAY_SIZE(engine_reset_domains) || 366 !engine_reset_domains[id]); 367 reset_domain = engine_reset_domains[id]; 368 } 369 370 return reset_domain; 371 } 372 373 static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id, 374 u8 logical_instance) 375 { 376 const struct engine_info *info = &intel_engines[id]; 377 struct drm_i915_private *i915 = gt->i915; 378 struct intel_engine_cs *engine; 379 u8 guc_class; 380 381 BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); 382 BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); 383 BUILD_BUG_ON(I915_MAX_VCS > (MAX_ENGINE_INSTANCE + 1)); 384 BUILD_BUG_ON(I915_MAX_VECS > (MAX_ENGINE_INSTANCE + 1)); 385 386 if (GEM_DEBUG_WARN_ON(id >= ARRAY_SIZE(gt->engine))) 387 return -EINVAL; 388 389 if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS)) 390 return -EINVAL; 391 392 if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) 393 return -EINVAL; 394 395 if (GEM_DEBUG_WARN_ON(gt->engine_class[info->class][info->instance])) 396 return -EINVAL; 397 398 engine = kzalloc(sizeof(*engine), GFP_KERNEL); 399 if (!engine) 400 return -ENOMEM; 401 402 BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); 403 404 INIT_LIST_HEAD(&engine->pinned_contexts_list); 405 engine->id = id; 406 engine->legacy_idx = INVALID_ENGINE; 407 engine->mask = BIT(id); 408 engine->reset_domain = get_reset_domain(GRAPHICS_VER(gt->i915), 409 id); 410 engine->i915 = i915; 411 engine->gt = gt; 412 engine->uncore = gt->uncore; 413 guc_class = engine_class_to_guc_class(info->class); 414 engine->guc_id = MAKE_GUC_ID(guc_class, info->instance); 415 engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); 416 417 engine->irq_handler = nop_irq_handler; 418 419 engine->class = info->class; 420 engine->instance = info->instance; 421 engine->logical_mask = BIT(logical_instance); 422 __sprint_engine_name(engine); 423 424 engine->props.heartbeat_interval_ms = 425 CONFIG_DRM_I915_HEARTBEAT_INTERVAL; 426 engine->props.max_busywait_duration_ns = 427 CONFIG_DRM_I915_MAX_REQUEST_BUSYWAIT; 428 engine->props.preempt_timeout_ms = 429 CONFIG_DRM_I915_PREEMPT_TIMEOUT; 430 engine->props.stop_timeout_ms = 431 CONFIG_DRM_I915_STOP_TIMEOUT; 432 engine->props.timeslice_duration_ms = 433 CONFIG_DRM_I915_TIMESLICE_DURATION; 434 435 /* Override to uninterruptible for OpenCL workloads. */ 436 if (GRAPHICS_VER(i915) == 12 && engine->class == RENDER_CLASS) 437 engine->props.preempt_timeout_ms = 0; 438 439 /* features common between engines sharing EUs */ 440 if (engine->class == RENDER_CLASS || engine->class == COMPUTE_CLASS) { 441 engine->flags |= I915_ENGINE_HAS_RCS_REG_STATE; 442 engine->flags |= I915_ENGINE_HAS_EU_PRIORITY; 443 } 444 445 engine->defaults = engine->props; /* never to change again */ 446 447 engine->context_size = intel_engine_context_size(gt, engine->class); 448 if (WARN_ON(engine->context_size > BIT(20))) 449 engine->context_size = 0; 450 if (engine->context_size) 451 DRIVER_CAPS(i915)->has_logical_contexts = true; 452 453 ewma__engine_latency_init(&engine->latency); 454 seqcount_init(&engine->stats.execlists.lock); 455 456 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); 457 458 /* Scrub mmio state on takeover */ 459 intel_engine_sanitize_mmio(engine); 460 461 gt->engine_class[info->class][info->instance] = engine; 462 gt->engine[id] = engine; 463 464 return 0; 465 } 466 467 static void __setup_engine_capabilities(struct intel_engine_cs *engine) 468 { 469 struct drm_i915_private *i915 = engine->i915; 470 471 if (engine->class == VIDEO_DECODE_CLASS) { 472 /* 473 * HEVC support is present on first engine instance 474 * before Gen11 and on all instances afterwards. 475 */ 476 if (GRAPHICS_VER(i915) >= 11 || 477 (GRAPHICS_VER(i915) >= 9 && engine->instance == 0)) 478 engine->uabi_capabilities |= 479 I915_VIDEO_CLASS_CAPABILITY_HEVC; 480 481 /* 482 * SFC block is present only on even logical engine 483 * instances. 484 */ 485 if ((GRAPHICS_VER(i915) >= 11 && 486 (engine->gt->info.vdbox_sfc_access & 487 BIT(engine->instance))) || 488 (GRAPHICS_VER(i915) >= 9 && engine->instance == 0)) 489 engine->uabi_capabilities |= 490 I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; 491 } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) { 492 if (GRAPHICS_VER(i915) >= 9 && 493 engine->gt->info.sfc_mask & BIT(engine->instance)) 494 engine->uabi_capabilities |= 495 I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; 496 } 497 } 498 499 static void intel_setup_engine_capabilities(struct intel_gt *gt) 500 { 501 struct intel_engine_cs *engine; 502 enum intel_engine_id id; 503 504 for_each_engine(engine, gt, id) 505 __setup_engine_capabilities(engine); 506 } 507 508 /** 509 * intel_engines_release() - free the resources allocated for Command Streamers 510 * @gt: pointer to struct intel_gt 511 */ 512 void intel_engines_release(struct intel_gt *gt) 513 { 514 struct intel_engine_cs *engine; 515 enum intel_engine_id id; 516 517 /* 518 * Before we release the resources held by engine, we must be certain 519 * that the HW is no longer accessing them -- having the GPU scribble 520 * to or read from a page being used for something else causes no end 521 * of fun. 522 * 523 * The GPU should be reset by this point, but assume the worst just 524 * in case we aborted before completely initialising the engines. 525 */ 526 GEM_BUG_ON(intel_gt_pm_is_awake(gt)); 527 if (!INTEL_INFO(gt->i915)->gpu_reset_clobbers_display) 528 __intel_gt_reset(gt, ALL_ENGINES); 529 530 /* Decouple the backend; but keep the layout for late GPU resets */ 531 for_each_engine(engine, gt, id) { 532 if (!engine->release) 533 continue; 534 535 intel_wakeref_wait_for_idle(&engine->wakeref); 536 GEM_BUG_ON(intel_engine_pm_is_awake(engine)); 537 538 engine->release(engine); 539 engine->release = NULL; 540 541 memset(&engine->reset, 0, sizeof(engine->reset)); 542 } 543 } 544 545 void intel_engine_free_request_pool(struct intel_engine_cs *engine) 546 { 547 if (!engine->request_pool) 548 return; 549 550 kmem_cache_free(i915_request_slab_cache(), engine->request_pool); 551 } 552 553 void intel_engines_free(struct intel_gt *gt) 554 { 555 struct intel_engine_cs *engine; 556 enum intel_engine_id id; 557 558 /* Free the requests! dma-resv keeps fences around for an eternity */ 559 rcu_barrier(); 560 561 for_each_engine(engine, gt, id) { 562 intel_engine_free_request_pool(engine); 563 kfree(engine); 564 gt->engine[id] = NULL; 565 } 566 } 567 568 static 569 bool gen11_vdbox_has_sfc(struct intel_gt *gt, 570 unsigned int physical_vdbox, 571 unsigned int logical_vdbox, u16 vdbox_mask) 572 { 573 struct drm_i915_private *i915 = gt->i915; 574 575 /* 576 * In Gen11, only even numbered logical VDBOXes are hooked 577 * up to an SFC (Scaler & Format Converter) unit. 578 * In Gen12, Even numbered physical instance always are connected 579 * to an SFC. Odd numbered physical instances have SFC only if 580 * previous even instance is fused off. 581 * 582 * Starting with Xe_HP, there's also a dedicated SFC_ENABLE field 583 * in the fuse register that tells us whether a specific SFC is present. 584 */ 585 if ((gt->info.sfc_mask & BIT(physical_vdbox / 2)) == 0) 586 return false; 587 else if (GRAPHICS_VER(i915) == 12) 588 return (physical_vdbox % 2 == 0) || 589 !(BIT(physical_vdbox - 1) & vdbox_mask); 590 else if (GRAPHICS_VER(i915) == 11) 591 return logical_vdbox % 2 == 0; 592 593 MISSING_CASE(GRAPHICS_VER(i915)); 594 return false; 595 } 596 597 static void engine_mask_apply_compute_fuses(struct intel_gt *gt) 598 { 599 struct drm_i915_private *i915 = gt->i915; 600 struct intel_gt_info *info = >->info; 601 int ss_per_ccs = info->sseu.max_subslices / I915_MAX_CCS; 602 unsigned long ccs_mask; 603 unsigned int i; 604 605 if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) 606 return; 607 608 ccs_mask = intel_slicemask_from_dssmask(intel_sseu_get_compute_subslices(&info->sseu), 609 ss_per_ccs); 610 /* 611 * If all DSS in a quadrant are fused off, the corresponding CCS 612 * engine is not available for use. 613 */ 614 for_each_clear_bit(i, &ccs_mask, I915_MAX_CCS) { 615 info->engine_mask &= ~BIT(_CCS(i)); 616 drm_dbg(&i915->drm, "ccs%u fused off\n", i); 617 } 618 } 619 620 /* 621 * Determine which engines are fused off in our particular hardware. 622 * Note that we have a catch-22 situation where we need to be able to access 623 * the blitter forcewake domain to read the engine fuses, but at the same time 624 * we need to know which engines are available on the system to know which 625 * forcewake domains are present. We solve this by intializing the forcewake 626 * domains based on the full engine mask in the platform capabilities before 627 * calling this function and pruning the domains for fused-off engines 628 * afterwards. 629 */ 630 static intel_engine_mask_t init_engine_mask(struct intel_gt *gt) 631 { 632 struct drm_i915_private *i915 = gt->i915; 633 struct intel_gt_info *info = >->info; 634 struct intel_uncore *uncore = gt->uncore; 635 unsigned int logical_vdbox = 0; 636 unsigned int i; 637 u32 media_fuse, fuse1; 638 u16 vdbox_mask; 639 u16 vebox_mask; 640 641 info->engine_mask = INTEL_INFO(i915)->platform_engine_mask; 642 643 if (GRAPHICS_VER(i915) < 11) 644 return info->engine_mask; 645 646 /* 647 * On newer platforms the fusing register is called 'enable' and has 648 * enable semantics, while on older platforms it is called 'disable' 649 * and bits have disable semantices. 650 */ 651 media_fuse = intel_uncore_read(uncore, GEN11_GT_VEBOX_VDBOX_DISABLE); 652 if (GRAPHICS_VER_FULL(i915) < IP_VER(12, 50)) 653 media_fuse = ~media_fuse; 654 655 vdbox_mask = media_fuse & GEN11_GT_VDBOX_DISABLE_MASK; 656 vebox_mask = (media_fuse & GEN11_GT_VEBOX_DISABLE_MASK) >> 657 GEN11_GT_VEBOX_DISABLE_SHIFT; 658 659 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { 660 fuse1 = intel_uncore_read(uncore, HSW_PAVP_FUSE1); 661 gt->info.sfc_mask = REG_FIELD_GET(XEHP_SFC_ENABLE_MASK, fuse1); 662 } else { 663 gt->info.sfc_mask = ~0; 664 } 665 666 for (i = 0; i < I915_MAX_VCS; i++) { 667 if (!HAS_ENGINE(gt, _VCS(i))) { 668 vdbox_mask &= ~BIT(i); 669 continue; 670 } 671 672 if (!(BIT(i) & vdbox_mask)) { 673 info->engine_mask &= ~BIT(_VCS(i)); 674 drm_dbg(&i915->drm, "vcs%u fused off\n", i); 675 continue; 676 } 677 678 if (gen11_vdbox_has_sfc(gt, i, logical_vdbox, vdbox_mask)) 679 gt->info.vdbox_sfc_access |= BIT(i); 680 logical_vdbox++; 681 } 682 drm_dbg(&i915->drm, "vdbox enable: %04x, instances: %04lx\n", 683 vdbox_mask, VDBOX_MASK(gt)); 684 GEM_BUG_ON(vdbox_mask != VDBOX_MASK(gt)); 685 686 for (i = 0; i < I915_MAX_VECS; i++) { 687 if (!HAS_ENGINE(gt, _VECS(i))) { 688 vebox_mask &= ~BIT(i); 689 continue; 690 } 691 692 if (!(BIT(i) & vebox_mask)) { 693 info->engine_mask &= ~BIT(_VECS(i)); 694 drm_dbg(&i915->drm, "vecs%u fused off\n", i); 695 } 696 } 697 drm_dbg(&i915->drm, "vebox enable: %04x, instances: %04lx\n", 698 vebox_mask, VEBOX_MASK(gt)); 699 GEM_BUG_ON(vebox_mask != VEBOX_MASK(gt)); 700 701 engine_mask_apply_compute_fuses(gt); 702 703 return info->engine_mask; 704 } 705 706 static void populate_logical_ids(struct intel_gt *gt, u8 *logical_ids, 707 u8 class, const u8 *map, u8 num_instances) 708 { 709 int i, j; 710 u8 current_logical_id = 0; 711 712 for (j = 0; j < num_instances; ++j) { 713 for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) { 714 if (!HAS_ENGINE(gt, i) || 715 intel_engines[i].class != class) 716 continue; 717 718 if (intel_engines[i].instance == map[j]) { 719 logical_ids[intel_engines[i].instance] = 720 current_logical_id++; 721 break; 722 } 723 } 724 } 725 } 726 727 static void setup_logical_ids(struct intel_gt *gt, u8 *logical_ids, u8 class) 728 { 729 int i; 730 u8 map[MAX_ENGINE_INSTANCE + 1]; 731 732 for (i = 0; i < MAX_ENGINE_INSTANCE + 1; ++i) 733 map[i] = i; 734 populate_logical_ids(gt, logical_ids, class, map, ARRAY_SIZE(map)); 735 } 736 737 /** 738 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers 739 * @gt: pointer to struct intel_gt 740 * 741 * Return: non-zero if the initialization failed. 742 */ 743 int intel_engines_init_mmio(struct intel_gt *gt) 744 { 745 struct drm_i915_private *i915 = gt->i915; 746 const unsigned int engine_mask = init_engine_mask(gt); 747 unsigned int mask = 0; 748 unsigned int i, class; 749 u8 logical_ids[MAX_ENGINE_INSTANCE + 1]; 750 int err; 751 752 drm_WARN_ON(&i915->drm, engine_mask == 0); 753 drm_WARN_ON(&i915->drm, engine_mask & 754 GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); 755 756 if (i915_inject_probe_failure(i915)) 757 return -ENODEV; 758 759 for (class = 0; class < MAX_ENGINE_CLASS + 1; ++class) { 760 setup_logical_ids(gt, logical_ids, class); 761 762 for (i = 0; i < ARRAY_SIZE(intel_engines); ++i) { 763 u8 instance = intel_engines[i].instance; 764 765 if (intel_engines[i].class != class || 766 !HAS_ENGINE(gt, i)) 767 continue; 768 769 err = intel_engine_setup(gt, i, 770 logical_ids[instance]); 771 if (err) 772 goto cleanup; 773 774 mask |= BIT(i); 775 } 776 } 777 778 /* 779 * Catch failures to update intel_engines table when the new engines 780 * are added to the driver by a warning and disabling the forgotten 781 * engines. 782 */ 783 if (drm_WARN_ON(&i915->drm, mask != engine_mask)) 784 gt->info.engine_mask = mask; 785 786 gt->info.num_engines = hweight32(mask); 787 788 intel_gt_check_and_clear_faults(gt); 789 790 intel_setup_engine_capabilities(gt); 791 792 intel_uncore_prune_engine_fw_domains(gt->uncore, gt); 793 794 return 0; 795 796 cleanup: 797 intel_engines_free(gt); 798 return err; 799 } 800 801 void intel_engine_init_execlists(struct intel_engine_cs *engine) 802 { 803 struct intel_engine_execlists * const execlists = &engine->execlists; 804 805 execlists->port_mask = 1; 806 GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); 807 GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); 808 809 memset(execlists->pending, 0, sizeof(execlists->pending)); 810 execlists->active = 811 memset(execlists->inflight, 0, sizeof(execlists->inflight)); 812 } 813 814 static void cleanup_status_page(struct intel_engine_cs *engine) 815 { 816 struct i915_vma *vma; 817 818 /* Prevent writes into HWSP after returning the page to the system */ 819 intel_engine_set_hwsp_writemask(engine, ~0u); 820 821 vma = fetch_and_zero(&engine->status_page.vma); 822 if (!vma) 823 return; 824 825 if (!HWS_NEEDS_PHYSICAL(engine->i915)) 826 i915_vma_unpin(vma); 827 828 i915_gem_object_unpin_map(vma->obj); 829 i915_gem_object_put(vma->obj); 830 } 831 832 static int pin_ggtt_status_page(struct intel_engine_cs *engine, 833 struct i915_gem_ww_ctx *ww, 834 struct i915_vma *vma) 835 { 836 unsigned int flags; 837 838 if (!HAS_LLC(engine->i915) && i915_ggtt_has_aperture(engine->gt->ggtt)) 839 /* 840 * On g33, we cannot place HWS above 256MiB, so 841 * restrict its pinning to the low mappable arena. 842 * Though this restriction is not documented for 843 * gen4, gen5, or byt, they also behave similarly 844 * and hang if the HWS is placed at the top of the 845 * GTT. To generalise, it appears that all !llc 846 * platforms have issues with us placing the HWS 847 * above the mappable region (even though we never 848 * actually map it). 849 */ 850 flags = PIN_MAPPABLE; 851 else 852 flags = PIN_HIGH; 853 854 return i915_ggtt_pin(vma, ww, 0, flags); 855 } 856 857 static int init_status_page(struct intel_engine_cs *engine) 858 { 859 struct drm_i915_gem_object *obj; 860 struct i915_gem_ww_ctx ww; 861 struct i915_vma *vma; 862 void *vaddr; 863 int ret; 864 865 INIT_LIST_HEAD(&engine->status_page.timelines); 866 867 /* 868 * Though the HWS register does support 36bit addresses, historically 869 * we have had hangs and corruption reported due to wild writes if 870 * the HWS is placed above 4G. We only allow objects to be allocated 871 * in GFP_DMA32 for i965, and no earlier physical address users had 872 * access to more than 4G. 873 */ 874 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); 875 if (IS_ERR(obj)) { 876 drm_err(&engine->i915->drm, 877 "Failed to allocate status page\n"); 878 return PTR_ERR(obj); 879 } 880 881 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 882 883 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 884 if (IS_ERR(vma)) { 885 ret = PTR_ERR(vma); 886 goto err_put; 887 } 888 889 i915_gem_ww_ctx_init(&ww, true); 890 retry: 891 ret = i915_gem_object_lock(obj, &ww); 892 if (!ret && !HWS_NEEDS_PHYSICAL(engine->i915)) 893 ret = pin_ggtt_status_page(engine, &ww, vma); 894 if (ret) 895 goto err; 896 897 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 898 if (IS_ERR(vaddr)) { 899 ret = PTR_ERR(vaddr); 900 goto err_unpin; 901 } 902 903 engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE); 904 engine->status_page.vma = vma; 905 906 err_unpin: 907 if (ret) 908 i915_vma_unpin(vma); 909 err: 910 if (ret == -EDEADLK) { 911 ret = i915_gem_ww_ctx_backoff(&ww); 912 if (!ret) 913 goto retry; 914 } 915 i915_gem_ww_ctx_fini(&ww); 916 err_put: 917 if (ret) 918 i915_gem_object_put(obj); 919 return ret; 920 } 921 922 static int engine_setup_common(struct intel_engine_cs *engine) 923 { 924 int err; 925 926 init_llist_head(&engine->barrier_tasks); 927 928 err = init_status_page(engine); 929 if (err) 930 return err; 931 932 engine->breadcrumbs = intel_breadcrumbs_create(engine); 933 if (!engine->breadcrumbs) { 934 err = -ENOMEM; 935 goto err_status; 936 } 937 938 engine->sched_engine = i915_sched_engine_create(ENGINE_PHYSICAL); 939 if (!engine->sched_engine) { 940 err = -ENOMEM; 941 goto err_sched_engine; 942 } 943 engine->sched_engine->private_data = engine; 944 945 err = intel_engine_init_cmd_parser(engine); 946 if (err) 947 goto err_cmd_parser; 948 949 intel_engine_init_execlists(engine); 950 intel_engine_init__pm(engine); 951 intel_engine_init_retire(engine); 952 953 /* Use the whole device by default */ 954 engine->sseu = 955 intel_sseu_from_device_info(&engine->gt->info.sseu); 956 957 intel_engine_init_workarounds(engine); 958 intel_engine_init_whitelist(engine); 959 intel_engine_init_ctx_wa(engine); 960 961 if (GRAPHICS_VER(engine->i915) >= 12) 962 engine->flags |= I915_ENGINE_HAS_RELATIVE_MMIO; 963 964 return 0; 965 966 err_cmd_parser: 967 i915_sched_engine_put(engine->sched_engine); 968 err_sched_engine: 969 intel_breadcrumbs_put(engine->breadcrumbs); 970 err_status: 971 cleanup_status_page(engine); 972 return err; 973 } 974 975 struct measure_breadcrumb { 976 struct i915_request rq; 977 struct intel_ring ring; 978 u32 cs[2048]; 979 }; 980 981 static int measure_breadcrumb_dw(struct intel_context *ce) 982 { 983 struct intel_engine_cs *engine = ce->engine; 984 struct measure_breadcrumb *frame; 985 int dw; 986 987 GEM_BUG_ON(!engine->gt->scratch); 988 989 frame = kzalloc(sizeof(*frame), GFP_KERNEL); 990 if (!frame) 991 return -ENOMEM; 992 993 frame->rq.engine = engine; 994 frame->rq.context = ce; 995 rcu_assign_pointer(frame->rq.timeline, ce->timeline); 996 frame->rq.hwsp_seqno = ce->timeline->hwsp_seqno; 997 998 frame->ring.vaddr = frame->cs; 999 frame->ring.size = sizeof(frame->cs); 1000 frame->ring.wrap = 1001 BITS_PER_TYPE(frame->ring.size) - ilog2(frame->ring.size); 1002 frame->ring.effective_size = frame->ring.size; 1003 intel_ring_update_space(&frame->ring); 1004 frame->rq.ring = &frame->ring; 1005 1006 mutex_lock(&ce->timeline->mutex); 1007 spin_lock_irq(&engine->sched_engine->lock); 1008 1009 dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; 1010 1011 spin_unlock_irq(&engine->sched_engine->lock); 1012 mutex_unlock(&ce->timeline->mutex); 1013 1014 GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ 1015 1016 kfree(frame); 1017 return dw; 1018 } 1019 1020 struct intel_context * 1021 intel_engine_create_pinned_context(struct intel_engine_cs *engine, 1022 struct i915_address_space *vm, 1023 unsigned int ring_size, 1024 unsigned int hwsp, 1025 struct lock_class_key *key, 1026 const char *name) 1027 { 1028 struct intel_context *ce; 1029 int err; 1030 1031 ce = intel_context_create(engine); 1032 if (IS_ERR(ce)) 1033 return ce; 1034 1035 __set_bit(CONTEXT_BARRIER_BIT, &ce->flags); 1036 ce->timeline = page_pack_bits(NULL, hwsp); 1037 ce->ring = NULL; 1038 ce->ring_size = ring_size; 1039 1040 i915_vm_put(ce->vm); 1041 ce->vm = i915_vm_get(vm); 1042 1043 err = intel_context_pin(ce); /* perma-pin so it is always available */ 1044 if (err) { 1045 intel_context_put(ce); 1046 return ERR_PTR(err); 1047 } 1048 1049 list_add_tail(&ce->pinned_contexts_link, &engine->pinned_contexts_list); 1050 1051 /* 1052 * Give our perma-pinned kernel timelines a separate lockdep class, 1053 * so that we can use them from within the normal user timelines 1054 * should we need to inject GPU operations during their request 1055 * construction. 1056 */ 1057 lockdep_set_class_and_name(&ce->timeline->mutex, key, name); 1058 1059 return ce; 1060 } 1061 1062 void intel_engine_destroy_pinned_context(struct intel_context *ce) 1063 { 1064 struct intel_engine_cs *engine = ce->engine; 1065 struct i915_vma *hwsp = engine->status_page.vma; 1066 1067 GEM_BUG_ON(ce->timeline->hwsp_ggtt != hwsp); 1068 1069 mutex_lock(&hwsp->vm->mutex); 1070 list_del(&ce->timeline->engine_link); 1071 mutex_unlock(&hwsp->vm->mutex); 1072 1073 list_del(&ce->pinned_contexts_link); 1074 intel_context_unpin(ce); 1075 intel_context_put(ce); 1076 } 1077 1078 static struct intel_context * 1079 create_kernel_context(struct intel_engine_cs *engine) 1080 { 1081 static struct lock_class_key kernel; 1082 1083 return intel_engine_create_pinned_context(engine, engine->gt->vm, SZ_4K, 1084 I915_GEM_HWS_SEQNO_ADDR, 1085 &kernel, "kernel_context"); 1086 } 1087 1088 /** 1089 * intel_engines_init_common - initialize cengine state which might require hw access 1090 * @engine: Engine to initialize. 1091 * 1092 * Initializes @engine@ structure members shared between legacy and execlists 1093 * submission modes which do require hardware access. 1094 * 1095 * Typcally done at later stages of submission mode specific engine setup. 1096 * 1097 * Returns zero on success or an error code on failure. 1098 */ 1099 static int engine_init_common(struct intel_engine_cs *engine) 1100 { 1101 struct intel_context *ce; 1102 int ret; 1103 1104 engine->set_default_submission(engine); 1105 1106 /* 1107 * We may need to do things with the shrinker which 1108 * require us to immediately switch back to the default 1109 * context. This can cause a problem as pinning the 1110 * default context also requires GTT space which may not 1111 * be available. To avoid this we always pin the default 1112 * context. 1113 */ 1114 ce = create_kernel_context(engine); 1115 if (IS_ERR(ce)) 1116 return PTR_ERR(ce); 1117 1118 ret = measure_breadcrumb_dw(ce); 1119 if (ret < 0) 1120 goto err_context; 1121 1122 engine->emit_fini_breadcrumb_dw = ret; 1123 engine->kernel_context = ce; 1124 1125 return 0; 1126 1127 err_context: 1128 intel_engine_destroy_pinned_context(ce); 1129 return ret; 1130 } 1131 1132 int intel_engines_init(struct intel_gt *gt) 1133 { 1134 int (*setup)(struct intel_engine_cs *engine); 1135 struct intel_engine_cs *engine; 1136 enum intel_engine_id id; 1137 int err; 1138 1139 if (intel_uc_uses_guc_submission(>->uc)) { 1140 gt->submission_method = INTEL_SUBMISSION_GUC; 1141 setup = intel_guc_submission_setup; 1142 } else if (HAS_EXECLISTS(gt->i915)) { 1143 gt->submission_method = INTEL_SUBMISSION_ELSP; 1144 setup = intel_execlists_submission_setup; 1145 } else { 1146 gt->submission_method = INTEL_SUBMISSION_RING; 1147 setup = intel_ring_submission_setup; 1148 } 1149 1150 for_each_engine(engine, gt, id) { 1151 err = engine_setup_common(engine); 1152 if (err) 1153 return err; 1154 1155 err = setup(engine); 1156 if (err) 1157 return err; 1158 1159 err = engine_init_common(engine); 1160 if (err) 1161 return err; 1162 1163 intel_engine_add_user(engine); 1164 } 1165 1166 return 0; 1167 } 1168 1169 /** 1170 * intel_engines_cleanup_common - cleans up the engine state created by 1171 * the common initiailizers. 1172 * @engine: Engine to cleanup. 1173 * 1174 * This cleans up everything created by the common helpers. 1175 */ 1176 void intel_engine_cleanup_common(struct intel_engine_cs *engine) 1177 { 1178 GEM_BUG_ON(!list_empty(&engine->sched_engine->requests)); 1179 1180 i915_sched_engine_put(engine->sched_engine); 1181 intel_breadcrumbs_put(engine->breadcrumbs); 1182 1183 intel_engine_fini_retire(engine); 1184 intel_engine_cleanup_cmd_parser(engine); 1185 1186 if (engine->default_state) 1187 fput(engine->default_state); 1188 1189 if (engine->kernel_context) 1190 intel_engine_destroy_pinned_context(engine->kernel_context); 1191 1192 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); 1193 cleanup_status_page(engine); 1194 1195 intel_wa_list_free(&engine->ctx_wa_list); 1196 intel_wa_list_free(&engine->wa_list); 1197 intel_wa_list_free(&engine->whitelist); 1198 } 1199 1200 /** 1201 * intel_engine_resume - re-initializes the HW state of the engine 1202 * @engine: Engine to resume. 1203 * 1204 * Returns zero on success or an error code on failure. 1205 */ 1206 int intel_engine_resume(struct intel_engine_cs *engine) 1207 { 1208 intel_engine_apply_workarounds(engine); 1209 intel_engine_apply_whitelist(engine); 1210 1211 return engine->resume(engine); 1212 } 1213 1214 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) 1215 { 1216 struct drm_i915_private *i915 = engine->i915; 1217 1218 u64 acthd; 1219 1220 if (GRAPHICS_VER(i915) >= 8) 1221 acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW); 1222 else if (GRAPHICS_VER(i915) >= 4) 1223 acthd = ENGINE_READ(engine, RING_ACTHD); 1224 else 1225 acthd = ENGINE_READ(engine, ACTHD); 1226 1227 return acthd; 1228 } 1229 1230 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) 1231 { 1232 u64 bbaddr; 1233 1234 if (GRAPHICS_VER(engine->i915) >= 8) 1235 bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW); 1236 else 1237 bbaddr = ENGINE_READ(engine, RING_BBADDR); 1238 1239 return bbaddr; 1240 } 1241 1242 static unsigned long stop_timeout(const struct intel_engine_cs *engine) 1243 { 1244 if (in_atomic() || irqs_disabled()) /* inside atomic preempt-reset? */ 1245 return 0; 1246 1247 /* 1248 * If we are doing a normal GPU reset, we can take our time and allow 1249 * the engine to quiesce. We've stopped submission to the engine, and 1250 * if we wait long enough an innocent context should complete and 1251 * leave the engine idle. So they should not be caught unaware by 1252 * the forthcoming GPU reset (which usually follows the stop_cs)! 1253 */ 1254 return READ_ONCE(engine->props.stop_timeout_ms); 1255 } 1256 1257 static int __intel_engine_stop_cs(struct intel_engine_cs *engine, 1258 int fast_timeout_us, 1259 int slow_timeout_ms) 1260 { 1261 struct intel_uncore *uncore = engine->uncore; 1262 const i915_reg_t mode = RING_MI_MODE(engine->mmio_base); 1263 int err; 1264 1265 intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); 1266 err = __intel_wait_for_register_fw(engine->uncore, mode, 1267 MODE_IDLE, MODE_IDLE, 1268 fast_timeout_us, 1269 slow_timeout_ms, 1270 NULL); 1271 1272 /* A final mmio read to let GPU writes be hopefully flushed to memory */ 1273 intel_uncore_posting_read_fw(uncore, mode); 1274 return err; 1275 } 1276 1277 int intel_engine_stop_cs(struct intel_engine_cs *engine) 1278 { 1279 int err = 0; 1280 1281 if (GRAPHICS_VER(engine->i915) < 3) 1282 return -ENODEV; 1283 1284 ENGINE_TRACE(engine, "\n"); 1285 if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) { 1286 ENGINE_TRACE(engine, 1287 "timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n", 1288 ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR, 1289 ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR); 1290 1291 /* 1292 * Sometimes we observe that the idle flag is not 1293 * set even though the ring is empty. So double 1294 * check before giving up. 1295 */ 1296 if ((ENGINE_READ_FW(engine, RING_HEAD) & HEAD_ADDR) != 1297 (ENGINE_READ_FW(engine, RING_TAIL) & TAIL_ADDR)) 1298 err = -ETIMEDOUT; 1299 } 1300 1301 return err; 1302 } 1303 1304 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) 1305 { 1306 ENGINE_TRACE(engine, "\n"); 1307 1308 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 1309 } 1310 1311 static u32 1312 read_subslice_reg(const struct intel_engine_cs *engine, 1313 int slice, int subslice, i915_reg_t reg) 1314 { 1315 return intel_uncore_read_with_mcr_steering(engine->uncore, reg, 1316 slice, subslice); 1317 } 1318 1319 /* NB: please notice the memset */ 1320 void intel_engine_get_instdone(const struct intel_engine_cs *engine, 1321 struct intel_instdone *instdone) 1322 { 1323 struct drm_i915_private *i915 = engine->i915; 1324 const struct sseu_dev_info *sseu = &engine->gt->info.sseu; 1325 struct intel_uncore *uncore = engine->uncore; 1326 u32 mmio_base = engine->mmio_base; 1327 int slice; 1328 int subslice; 1329 int iter; 1330 1331 memset(instdone, 0, sizeof(*instdone)); 1332 1333 if (GRAPHICS_VER(i915) >= 8) { 1334 instdone->instdone = 1335 intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); 1336 1337 if (engine->id != RCS0) 1338 return; 1339 1340 instdone->slice_common = 1341 intel_uncore_read(uncore, GEN7_SC_INSTDONE); 1342 if (GRAPHICS_VER(i915) >= 12) { 1343 instdone->slice_common_extra[0] = 1344 intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA); 1345 instdone->slice_common_extra[1] = 1346 intel_uncore_read(uncore, GEN12_SC_INSTDONE_EXTRA2); 1347 } 1348 1349 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { 1350 for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) { 1351 instdone->sampler[slice][subslice] = 1352 read_subslice_reg(engine, slice, subslice, 1353 GEN7_SAMPLER_INSTDONE); 1354 instdone->row[slice][subslice] = 1355 read_subslice_reg(engine, slice, subslice, 1356 GEN7_ROW_INSTDONE); 1357 } 1358 } else { 1359 for_each_instdone_slice_subslice(i915, sseu, slice, subslice) { 1360 instdone->sampler[slice][subslice] = 1361 read_subslice_reg(engine, slice, subslice, 1362 GEN7_SAMPLER_INSTDONE); 1363 instdone->row[slice][subslice] = 1364 read_subslice_reg(engine, slice, subslice, 1365 GEN7_ROW_INSTDONE); 1366 } 1367 } 1368 1369 if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55)) { 1370 for_each_instdone_gslice_dss_xehp(i915, sseu, iter, slice, subslice) 1371 instdone->geom_svg[slice][subslice] = 1372 read_subslice_reg(engine, slice, subslice, 1373 XEHPG_INSTDONE_GEOM_SVG); 1374 } 1375 } else if (GRAPHICS_VER(i915) >= 7) { 1376 instdone->instdone = 1377 intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); 1378 1379 if (engine->id != RCS0) 1380 return; 1381 1382 instdone->slice_common = 1383 intel_uncore_read(uncore, GEN7_SC_INSTDONE); 1384 instdone->sampler[0][0] = 1385 intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE); 1386 instdone->row[0][0] = 1387 intel_uncore_read(uncore, GEN7_ROW_INSTDONE); 1388 } else if (GRAPHICS_VER(i915) >= 4) { 1389 instdone->instdone = 1390 intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); 1391 if (engine->id == RCS0) 1392 /* HACK: Using the wrong struct member */ 1393 instdone->slice_common = 1394 intel_uncore_read(uncore, GEN4_INSTDONE1); 1395 } else { 1396 instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE); 1397 } 1398 } 1399 1400 static bool ring_is_idle(struct intel_engine_cs *engine) 1401 { 1402 bool idle = true; 1403 1404 if (I915_SELFTEST_ONLY(!engine->mmio_base)) 1405 return true; 1406 1407 if (!intel_engine_pm_get_if_awake(engine)) 1408 return true; 1409 1410 /* First check that no commands are left in the ring */ 1411 if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) != 1412 (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR)) 1413 idle = false; 1414 1415 /* No bit for gen2, so assume the CS parser is idle */ 1416 if (GRAPHICS_VER(engine->i915) > 2 && 1417 !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE)) 1418 idle = false; 1419 1420 intel_engine_pm_put(engine); 1421 1422 return idle; 1423 } 1424 1425 void __intel_engine_flush_submission(struct intel_engine_cs *engine, bool sync) 1426 { 1427 struct tasklet_struct *t = &engine->sched_engine->tasklet; 1428 1429 if (!t->callback) 1430 return; 1431 1432 local_bh_disable(); 1433 if (tasklet_trylock(t)) { 1434 /* Must wait for any GPU reset in progress. */ 1435 if (__tasklet_is_enabled(t)) 1436 t->callback(t); 1437 tasklet_unlock(t); 1438 } 1439 local_bh_enable(); 1440 1441 /* Synchronise and wait for the tasklet on another CPU */ 1442 if (sync) 1443 tasklet_unlock_wait(t); 1444 } 1445 1446 /** 1447 * intel_engine_is_idle() - Report if the engine has finished process all work 1448 * @engine: the intel_engine_cs 1449 * 1450 * Return true if there are no requests pending, nothing left to be submitted 1451 * to hardware, and that the engine is idle. 1452 */ 1453 bool intel_engine_is_idle(struct intel_engine_cs *engine) 1454 { 1455 /* More white lies, if wedged, hw state is inconsistent */ 1456 if (intel_gt_is_wedged(engine->gt)) 1457 return true; 1458 1459 if (!intel_engine_pm_is_awake(engine)) 1460 return true; 1461 1462 /* Waiting to drain ELSP? */ 1463 intel_synchronize_hardirq(engine->i915); 1464 intel_engine_flush_submission(engine); 1465 1466 /* ELSP is empty, but there are ready requests? E.g. after reset */ 1467 if (!i915_sched_engine_is_empty(engine->sched_engine)) 1468 return false; 1469 1470 /* Ring stopped? */ 1471 return ring_is_idle(engine); 1472 } 1473 1474 bool intel_engines_are_idle(struct intel_gt *gt) 1475 { 1476 struct intel_engine_cs *engine; 1477 enum intel_engine_id id; 1478 1479 /* 1480 * If the driver is wedged, HW state may be very inconsistent and 1481 * report that it is still busy, even though we have stopped using it. 1482 */ 1483 if (intel_gt_is_wedged(gt)) 1484 return true; 1485 1486 /* Already parked (and passed an idleness test); must still be idle */ 1487 if (!READ_ONCE(gt->awake)) 1488 return true; 1489 1490 for_each_engine(engine, gt, id) { 1491 if (!intel_engine_is_idle(engine)) 1492 return false; 1493 } 1494 1495 return true; 1496 } 1497 1498 bool intel_engine_irq_enable(struct intel_engine_cs *engine) 1499 { 1500 if (!engine->irq_enable) 1501 return false; 1502 1503 /* Caller disables interrupts */ 1504 spin_lock(&engine->gt->irq_lock); 1505 engine->irq_enable(engine); 1506 spin_unlock(&engine->gt->irq_lock); 1507 1508 return true; 1509 } 1510 1511 void intel_engine_irq_disable(struct intel_engine_cs *engine) 1512 { 1513 if (!engine->irq_disable) 1514 return; 1515 1516 /* Caller disables interrupts */ 1517 spin_lock(&engine->gt->irq_lock); 1518 engine->irq_disable(engine); 1519 spin_unlock(&engine->gt->irq_lock); 1520 } 1521 1522 void intel_engines_reset_default_submission(struct intel_gt *gt) 1523 { 1524 struct intel_engine_cs *engine; 1525 enum intel_engine_id id; 1526 1527 for_each_engine(engine, gt, id) { 1528 if (engine->sanitize) 1529 engine->sanitize(engine); 1530 1531 engine->set_default_submission(engine); 1532 } 1533 } 1534 1535 bool intel_engine_can_store_dword(struct intel_engine_cs *engine) 1536 { 1537 switch (GRAPHICS_VER(engine->i915)) { 1538 case 2: 1539 return false; /* uses physical not virtual addresses */ 1540 case 3: 1541 /* maybe only uses physical not virtual addresses */ 1542 return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); 1543 case 4: 1544 return !IS_I965G(engine->i915); /* who knows! */ 1545 case 6: 1546 return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ 1547 default: 1548 return true; 1549 } 1550 } 1551 1552 static struct intel_timeline *get_timeline(struct i915_request *rq) 1553 { 1554 struct intel_timeline *tl; 1555 1556 /* 1557 * Even though we are holding the engine->sched_engine->lock here, there 1558 * is no control over the submission queue per-se and we are 1559 * inspecting the active state at a random point in time, with an 1560 * unknown queue. Play safe and make sure the timeline remains valid. 1561 * (Only being used for pretty printing, one extra kref shouldn't 1562 * cause a camel stampede!) 1563 */ 1564 rcu_read_lock(); 1565 tl = rcu_dereference(rq->timeline); 1566 if (!kref_get_unless_zero(&tl->kref)) 1567 tl = NULL; 1568 rcu_read_unlock(); 1569 1570 return tl; 1571 } 1572 1573 static int print_ring(char *buf, int sz, struct i915_request *rq) 1574 { 1575 int len = 0; 1576 1577 if (!i915_request_signaled(rq)) { 1578 struct intel_timeline *tl = get_timeline(rq); 1579 1580 len = scnprintf(buf, sz, 1581 "ring:{start:%08x, hwsp:%08x, seqno:%08x, runtime:%llums}, ", 1582 i915_ggtt_offset(rq->ring->vma), 1583 tl ? tl->hwsp_offset : 0, 1584 hwsp_seqno(rq), 1585 DIV_ROUND_CLOSEST_ULL(intel_context_get_total_runtime_ns(rq->context), 1586 1000 * 1000)); 1587 1588 if (tl) 1589 intel_timeline_put(tl); 1590 } 1591 1592 return len; 1593 } 1594 1595 static void hexdump(struct drm_printer *m, const void *buf, size_t len) 1596 { 1597 const size_t rowsize = 8 * sizeof(u32); 1598 const void *prev = NULL; 1599 bool skip = false; 1600 size_t pos; 1601 1602 for (pos = 0; pos < len; pos += rowsize) { 1603 char line[128]; 1604 1605 if (prev && !memcmp(prev, buf + pos, rowsize)) { 1606 if (!skip) { 1607 drm_printf(m, "*\n"); 1608 skip = true; 1609 } 1610 continue; 1611 } 1612 1613 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, 1614 rowsize, sizeof(u32), 1615 line, sizeof(line), 1616 false) >= sizeof(line)); 1617 drm_printf(m, "[%04zx] %s\n", pos, line); 1618 1619 prev = buf + pos; 1620 skip = false; 1621 } 1622 } 1623 1624 static const char *repr_timer(const struct timer_list *t) 1625 { 1626 if (!READ_ONCE(t->expires)) 1627 return "inactive"; 1628 1629 if (timer_pending(t)) 1630 return "active"; 1631 1632 return "expired"; 1633 } 1634 1635 static void intel_engine_print_registers(struct intel_engine_cs *engine, 1636 struct drm_printer *m) 1637 { 1638 struct drm_i915_private *dev_priv = engine->i915; 1639 struct intel_engine_execlists * const execlists = &engine->execlists; 1640 u64 addr; 1641 1642 if (engine->id == RENDER_CLASS && IS_GRAPHICS_VER(dev_priv, 4, 7)) 1643 drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); 1644 if (HAS_EXECLISTS(dev_priv)) { 1645 drm_printf(m, "\tEL_STAT_HI: 0x%08x\n", 1646 ENGINE_READ(engine, RING_EXECLIST_STATUS_HI)); 1647 drm_printf(m, "\tEL_STAT_LO: 0x%08x\n", 1648 ENGINE_READ(engine, RING_EXECLIST_STATUS_LO)); 1649 } 1650 drm_printf(m, "\tRING_START: 0x%08x\n", 1651 ENGINE_READ(engine, RING_START)); 1652 drm_printf(m, "\tRING_HEAD: 0x%08x\n", 1653 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR); 1654 drm_printf(m, "\tRING_TAIL: 0x%08x\n", 1655 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR); 1656 drm_printf(m, "\tRING_CTL: 0x%08x%s\n", 1657 ENGINE_READ(engine, RING_CTL), 1658 ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : ""); 1659 if (GRAPHICS_VER(engine->i915) > 2) { 1660 drm_printf(m, "\tRING_MODE: 0x%08x%s\n", 1661 ENGINE_READ(engine, RING_MI_MODE), 1662 ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : ""); 1663 } 1664 1665 if (GRAPHICS_VER(dev_priv) >= 6) { 1666 drm_printf(m, "\tRING_IMR: 0x%08x\n", 1667 ENGINE_READ(engine, RING_IMR)); 1668 drm_printf(m, "\tRING_ESR: 0x%08x\n", 1669 ENGINE_READ(engine, RING_ESR)); 1670 drm_printf(m, "\tRING_EMR: 0x%08x\n", 1671 ENGINE_READ(engine, RING_EMR)); 1672 drm_printf(m, "\tRING_EIR: 0x%08x\n", 1673 ENGINE_READ(engine, RING_EIR)); 1674 } 1675 1676 addr = intel_engine_get_active_head(engine); 1677 drm_printf(m, "\tACTHD: 0x%08x_%08x\n", 1678 upper_32_bits(addr), lower_32_bits(addr)); 1679 addr = intel_engine_get_last_batch_head(engine); 1680 drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", 1681 upper_32_bits(addr), lower_32_bits(addr)); 1682 if (GRAPHICS_VER(dev_priv) >= 8) 1683 addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW); 1684 else if (GRAPHICS_VER(dev_priv) >= 4) 1685 addr = ENGINE_READ(engine, RING_DMA_FADD); 1686 else 1687 addr = ENGINE_READ(engine, DMA_FADD_I8XX); 1688 drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n", 1689 upper_32_bits(addr), lower_32_bits(addr)); 1690 if (GRAPHICS_VER(dev_priv) >= 4) { 1691 drm_printf(m, "\tIPEIR: 0x%08x\n", 1692 ENGINE_READ(engine, RING_IPEIR)); 1693 drm_printf(m, "\tIPEHR: 0x%08x\n", 1694 ENGINE_READ(engine, RING_IPEHR)); 1695 } else { 1696 drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR)); 1697 drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR)); 1698 } 1699 1700 if (intel_engine_uses_guc(engine)) { 1701 /* nothing to print yet */ 1702 } else if (HAS_EXECLISTS(dev_priv)) { 1703 struct i915_request * const *port, *rq; 1704 const u32 *hws = 1705 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; 1706 const u8 num_entries = execlists->csb_size; 1707 unsigned int idx; 1708 u8 read, write; 1709 1710 drm_printf(m, "\tExeclist tasklet queued? %s (%s), preempt? %s, timeslice? %s\n", 1711 str_yes_no(test_bit(TASKLET_STATE_SCHED, &engine->sched_engine->tasklet.state)), 1712 str_enabled_disabled(!atomic_read(&engine->sched_engine->tasklet.count)), 1713 repr_timer(&engine->execlists.preempt), 1714 repr_timer(&engine->execlists.timer)); 1715 1716 read = execlists->csb_head; 1717 write = READ_ONCE(*execlists->csb_write); 1718 1719 drm_printf(m, "\tExeclist status: 0x%08x %08x; CSB read:%d, write:%d, entries:%d\n", 1720 ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), 1721 ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), 1722 read, write, num_entries); 1723 1724 if (read >= num_entries) 1725 read = 0; 1726 if (write >= num_entries) 1727 write = 0; 1728 if (read > write) 1729 write += num_entries; 1730 while (read < write) { 1731 idx = ++read % num_entries; 1732 drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n", 1733 idx, hws[idx * 2], hws[idx * 2 + 1]); 1734 } 1735 1736 i915_sched_engine_active_lock_bh(engine->sched_engine); 1737 rcu_read_lock(); 1738 for (port = execlists->active; (rq = *port); port++) { 1739 char hdr[160]; 1740 int len; 1741 1742 len = scnprintf(hdr, sizeof(hdr), 1743 "\t\tActive[%d]: ccid:%08x%s%s, ", 1744 (int)(port - execlists->active), 1745 rq->context->lrc.ccid, 1746 intel_context_is_closed(rq->context) ? "!" : "", 1747 intel_context_is_banned(rq->context) ? "*" : ""); 1748 len += print_ring(hdr + len, sizeof(hdr) - len, rq); 1749 scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); 1750 i915_request_show(m, rq, hdr, 0); 1751 } 1752 for (port = execlists->pending; (rq = *port); port++) { 1753 char hdr[160]; 1754 int len; 1755 1756 len = scnprintf(hdr, sizeof(hdr), 1757 "\t\tPending[%d]: ccid:%08x%s%s, ", 1758 (int)(port - execlists->pending), 1759 rq->context->lrc.ccid, 1760 intel_context_is_closed(rq->context) ? "!" : "", 1761 intel_context_is_banned(rq->context) ? "*" : ""); 1762 len += print_ring(hdr + len, sizeof(hdr) - len, rq); 1763 scnprintf(hdr + len, sizeof(hdr) - len, "rq: "); 1764 i915_request_show(m, rq, hdr, 0); 1765 } 1766 rcu_read_unlock(); 1767 i915_sched_engine_active_unlock_bh(engine->sched_engine); 1768 } else if (GRAPHICS_VER(dev_priv) > 6) { 1769 drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", 1770 ENGINE_READ(engine, RING_PP_DIR_BASE)); 1771 drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", 1772 ENGINE_READ(engine, RING_PP_DIR_BASE_READ)); 1773 drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", 1774 ENGINE_READ(engine, RING_PP_DIR_DCLV)); 1775 } 1776 } 1777 1778 static void print_request_ring(struct drm_printer *m, struct i915_request *rq) 1779 { 1780 struct i915_vma_resource *vma_res = rq->batch_res; 1781 void *ring; 1782 int size; 1783 1784 drm_printf(m, 1785 "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", 1786 rq->head, rq->postfix, rq->tail, 1787 vma_res ? upper_32_bits(vma_res->start) : ~0u, 1788 vma_res ? lower_32_bits(vma_res->start) : ~0u); 1789 1790 size = rq->tail - rq->head; 1791 if (rq->tail < rq->head) 1792 size += rq->ring->size; 1793 1794 ring = kmalloc(size, GFP_ATOMIC); 1795 if (ring) { 1796 const void *vaddr = rq->ring->vaddr; 1797 unsigned int head = rq->head; 1798 unsigned int len = 0; 1799 1800 if (rq->tail < head) { 1801 len = rq->ring->size - head; 1802 memcpy(ring, vaddr + head, len); 1803 head = 0; 1804 } 1805 memcpy(ring + len, vaddr + head, size - len); 1806 1807 hexdump(m, ring, size); 1808 kfree(ring); 1809 } 1810 } 1811 1812 static unsigned long list_count(struct list_head *list) 1813 { 1814 struct list_head *pos; 1815 unsigned long count = 0; 1816 1817 list_for_each(pos, list) 1818 count++; 1819 1820 return count; 1821 } 1822 1823 static unsigned long read_ul(void *p, size_t x) 1824 { 1825 return *(unsigned long *)(p + x); 1826 } 1827 1828 static void print_properties(struct intel_engine_cs *engine, 1829 struct drm_printer *m) 1830 { 1831 static const struct pmap { 1832 size_t offset; 1833 const char *name; 1834 } props[] = { 1835 #define P(x) { \ 1836 .offset = offsetof(typeof(engine->props), x), \ 1837 .name = #x \ 1838 } 1839 P(heartbeat_interval_ms), 1840 P(max_busywait_duration_ns), 1841 P(preempt_timeout_ms), 1842 P(stop_timeout_ms), 1843 P(timeslice_duration_ms), 1844 1845 {}, 1846 #undef P 1847 }; 1848 const struct pmap *p; 1849 1850 drm_printf(m, "\tProperties:\n"); 1851 for (p = props; p->name; p++) 1852 drm_printf(m, "\t\t%s: %lu [default %lu]\n", 1853 p->name, 1854 read_ul(&engine->props, p->offset), 1855 read_ul(&engine->defaults, p->offset)); 1856 } 1857 1858 static void engine_dump_request(struct i915_request *rq, struct drm_printer *m, const char *msg) 1859 { 1860 struct intel_timeline *tl = get_timeline(rq); 1861 1862 i915_request_show(m, rq, msg, 0); 1863 1864 drm_printf(m, "\t\tring->start: 0x%08x\n", 1865 i915_ggtt_offset(rq->ring->vma)); 1866 drm_printf(m, "\t\tring->head: 0x%08x\n", 1867 rq->ring->head); 1868 drm_printf(m, "\t\tring->tail: 0x%08x\n", 1869 rq->ring->tail); 1870 drm_printf(m, "\t\tring->emit: 0x%08x\n", 1871 rq->ring->emit); 1872 drm_printf(m, "\t\tring->space: 0x%08x\n", 1873 rq->ring->space); 1874 1875 if (tl) { 1876 drm_printf(m, "\t\tring->hwsp: 0x%08x\n", 1877 tl->hwsp_offset); 1878 intel_timeline_put(tl); 1879 } 1880 1881 print_request_ring(m, rq); 1882 1883 if (rq->context->lrc_reg_state) { 1884 drm_printf(m, "Logical Ring Context:\n"); 1885 hexdump(m, rq->context->lrc_reg_state, PAGE_SIZE); 1886 } 1887 } 1888 1889 void intel_engine_dump_active_requests(struct list_head *requests, 1890 struct i915_request *hung_rq, 1891 struct drm_printer *m) 1892 { 1893 struct i915_request *rq; 1894 const char *msg; 1895 enum i915_request_state state; 1896 1897 list_for_each_entry(rq, requests, sched.link) { 1898 if (rq == hung_rq) 1899 continue; 1900 1901 state = i915_test_request_state(rq); 1902 if (state < I915_REQUEST_QUEUED) 1903 continue; 1904 1905 if (state == I915_REQUEST_ACTIVE) 1906 msg = "\t\tactive on engine"; 1907 else 1908 msg = "\t\tactive in queue"; 1909 1910 engine_dump_request(rq, m, msg); 1911 } 1912 } 1913 1914 static void engine_dump_active_requests(struct intel_engine_cs *engine, struct drm_printer *m) 1915 { 1916 struct i915_request *hung_rq = NULL; 1917 struct intel_context *ce; 1918 bool guc; 1919 1920 /* 1921 * No need for an engine->irq_seqno_barrier() before the seqno reads. 1922 * The GPU is still running so requests are still executing and any 1923 * hardware reads will be out of date by the time they are reported. 1924 * But the intention here is just to report an instantaneous snapshot 1925 * so that's fine. 1926 */ 1927 lockdep_assert_held(&engine->sched_engine->lock); 1928 1929 drm_printf(m, "\tRequests:\n"); 1930 1931 guc = intel_uc_uses_guc_submission(&engine->gt->uc); 1932 if (guc) { 1933 ce = intel_engine_get_hung_context(engine); 1934 if (ce) 1935 hung_rq = intel_context_find_active_request(ce); 1936 } else { 1937 hung_rq = intel_engine_execlist_find_hung_request(engine); 1938 } 1939 1940 if (hung_rq) 1941 engine_dump_request(hung_rq, m, "\t\thung"); 1942 1943 if (guc) 1944 intel_guc_dump_active_requests(engine, hung_rq, m); 1945 else 1946 intel_engine_dump_active_requests(&engine->sched_engine->requests, 1947 hung_rq, m); 1948 } 1949 1950 void intel_engine_dump(struct intel_engine_cs *engine, 1951 struct drm_printer *m, 1952 const char *header, ...) 1953 { 1954 struct i915_gpu_error * const error = &engine->i915->gpu_error; 1955 struct i915_request *rq; 1956 intel_wakeref_t wakeref; 1957 unsigned long flags; 1958 ktime_t dummy; 1959 1960 if (header) { 1961 va_list ap; 1962 1963 va_start(ap, header); 1964 drm_vprintf(m, header, &ap); 1965 va_end(ap); 1966 } 1967 1968 if (intel_gt_is_wedged(engine->gt)) 1969 drm_printf(m, "*** WEDGED ***\n"); 1970 1971 drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); 1972 drm_printf(m, "\tBarriers?: %s\n", 1973 str_yes_no(!llist_empty(&engine->barrier_tasks))); 1974 drm_printf(m, "\tLatency: %luus\n", 1975 ewma__engine_latency_read(&engine->latency)); 1976 if (intel_engine_supports_stats(engine)) 1977 drm_printf(m, "\tRuntime: %llums\n", 1978 ktime_to_ms(intel_engine_get_busy_time(engine, 1979 &dummy))); 1980 drm_printf(m, "\tForcewake: %x domains, %d active\n", 1981 engine->fw_domain, READ_ONCE(engine->fw_active)); 1982 1983 rcu_read_lock(); 1984 rq = READ_ONCE(engine->heartbeat.systole); 1985 if (rq) 1986 drm_printf(m, "\tHeartbeat: %d ms ago\n", 1987 jiffies_to_msecs(jiffies - rq->emitted_jiffies)); 1988 rcu_read_unlock(); 1989 drm_printf(m, "\tReset count: %d (global %d)\n", 1990 i915_reset_engine_count(error, engine), 1991 i915_reset_count(error)); 1992 print_properties(engine, m); 1993 1994 spin_lock_irqsave(&engine->sched_engine->lock, flags); 1995 engine_dump_active_requests(engine, m); 1996 1997 drm_printf(m, "\tOn hold?: %lu\n", 1998 list_count(&engine->sched_engine->hold)); 1999 spin_unlock_irqrestore(&engine->sched_engine->lock, flags); 2000 2001 drm_printf(m, "\tMMIO base: 0x%08x\n", engine->mmio_base); 2002 wakeref = intel_runtime_pm_get_if_in_use(engine->uncore->rpm); 2003 if (wakeref) { 2004 intel_engine_print_registers(engine, m); 2005 intel_runtime_pm_put(engine->uncore->rpm, wakeref); 2006 } else { 2007 drm_printf(m, "\tDevice is asleep; skipping register dump\n"); 2008 } 2009 2010 intel_execlists_show_requests(engine, m, i915_request_show, 8); 2011 2012 drm_printf(m, "HWSP:\n"); 2013 hexdump(m, engine->status_page.addr, PAGE_SIZE); 2014 2015 drm_printf(m, "Idle? %s\n", str_yes_no(intel_engine_is_idle(engine))); 2016 2017 intel_engine_print_breadcrumbs(engine, m); 2018 } 2019 2020 /** 2021 * intel_engine_get_busy_time() - Return current accumulated engine busyness 2022 * @engine: engine to report on 2023 * @now: monotonic timestamp of sampling 2024 * 2025 * Returns accumulated time @engine was busy since engine stats were enabled. 2026 */ 2027 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine, ktime_t *now) 2028 { 2029 return engine->busyness(engine, now); 2030 } 2031 2032 struct intel_context * 2033 intel_engine_create_virtual(struct intel_engine_cs **siblings, 2034 unsigned int count, unsigned long flags) 2035 { 2036 if (count == 0) 2037 return ERR_PTR(-EINVAL); 2038 2039 if (count == 1 && !(flags & FORCE_VIRTUAL)) 2040 return intel_context_create(siblings[0]); 2041 2042 GEM_BUG_ON(!siblings[0]->cops->create_virtual); 2043 return siblings[0]->cops->create_virtual(siblings, count, flags); 2044 } 2045 2046 struct i915_request * 2047 intel_engine_execlist_find_hung_request(struct intel_engine_cs *engine) 2048 { 2049 struct i915_request *request, *active = NULL; 2050 2051 /* 2052 * This search does not work in GuC submission mode. However, the GuC 2053 * will report the hanging context directly to the driver itself. So 2054 * the driver should never get here when in GuC mode. 2055 */ 2056 GEM_BUG_ON(intel_uc_uses_guc_submission(&engine->gt->uc)); 2057 2058 /* 2059 * We are called by the error capture, reset and to dump engine 2060 * state at random points in time. In particular, note that neither is 2061 * crucially ordered with an interrupt. After a hang, the GPU is dead 2062 * and we assume that no more writes can happen (we waited long enough 2063 * for all writes that were in transaction to be flushed) - adding an 2064 * extra delay for a recent interrupt is pointless. Hence, we do 2065 * not need an engine->irq_seqno_barrier() before the seqno reads. 2066 * At all other times, we must assume the GPU is still running, but 2067 * we only care about the snapshot of this moment. 2068 */ 2069 lockdep_assert_held(&engine->sched_engine->lock); 2070 2071 rcu_read_lock(); 2072 request = execlists_active(&engine->execlists); 2073 if (request) { 2074 struct intel_timeline *tl = request->context->timeline; 2075 2076 list_for_each_entry_from_reverse(request, &tl->requests, link) { 2077 if (__i915_request_is_complete(request)) 2078 break; 2079 2080 active = request; 2081 } 2082 } 2083 rcu_read_unlock(); 2084 if (active) 2085 return active; 2086 2087 list_for_each_entry(request, &engine->sched_engine->requests, 2088 sched.link) { 2089 if (i915_test_request_state(request) != I915_REQUEST_ACTIVE) 2090 continue; 2091 2092 active = request; 2093 break; 2094 } 2095 2096 return active; 2097 } 2098 2099 void xehp_enable_ccs_engines(struct intel_engine_cs *engine) 2100 { 2101 /* 2102 * If there are any non-fused-off CCS engines, we need to enable CCS 2103 * support in the RCU_MODE register. This only needs to be done once, 2104 * so for simplicity we'll take care of this in the RCS engine's 2105 * resume handler; since the RCS and all CCS engines belong to the 2106 * same reset domain and are reset together, this will also take care 2107 * of re-applying the setting after i915-triggered resets. 2108 */ 2109 if (!CCS_MASK(engine->gt)) 2110 return; 2111 2112 intel_uncore_write(engine->uncore, GEN12_RCU_MODE, 2113 _MASKED_BIT_ENABLE(GEN12_RCU_MODE_CCS_ENABLE)); 2114 } 2115 2116 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 2117 #include "mock_engine.c" 2118 #include "selftest_engine.c" 2119 #include "selftest_engine_cs.c" 2120 #endif 2121