1 /* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25 #include <drm/drm_print.h> 26 27 #include "gem/i915_gem_context.h" 28 29 #include "i915_drv.h" 30 31 #include "gt/intel_gt.h" 32 33 #include "intel_engine.h" 34 #include "intel_engine_pm.h" 35 #include "intel_context.h" 36 #include "intel_lrc.h" 37 #include "intel_reset.h" 38 39 /* Haswell does have the CXT_SIZE register however it does not appear to be 40 * valid. Now, docs explain in dwords what is in the context object. The full 41 * size is 70720 bytes, however, the power context and execlist context will 42 * never be saved (power context is stored elsewhere, and execlists don't work 43 * on HSW) - so the final size, including the extra state required for the 44 * Resource Streamer, is 66944 bytes, which rounds to 17 pages. 45 */ 46 #define HSW_CXT_TOTAL_SIZE (17 * PAGE_SIZE) 47 48 #define DEFAULT_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) 49 #define GEN8_LR_CONTEXT_RENDER_SIZE (20 * PAGE_SIZE) 50 #define GEN9_LR_CONTEXT_RENDER_SIZE (22 * PAGE_SIZE) 51 #define GEN10_LR_CONTEXT_RENDER_SIZE (18 * PAGE_SIZE) 52 #define GEN11_LR_CONTEXT_RENDER_SIZE (14 * PAGE_SIZE) 53 54 #define GEN8_LR_CONTEXT_OTHER_SIZE ( 2 * PAGE_SIZE) 55 56 struct engine_class_info { 57 const char *name; 58 u8 uabi_class; 59 }; 60 61 static const struct engine_class_info intel_engine_classes[] = { 62 [RENDER_CLASS] = { 63 .name = "rcs", 64 .uabi_class = I915_ENGINE_CLASS_RENDER, 65 }, 66 [COPY_ENGINE_CLASS] = { 67 .name = "bcs", 68 .uabi_class = I915_ENGINE_CLASS_COPY, 69 }, 70 [VIDEO_DECODE_CLASS] = { 71 .name = "vcs", 72 .uabi_class = I915_ENGINE_CLASS_VIDEO, 73 }, 74 [VIDEO_ENHANCEMENT_CLASS] = { 75 .name = "vecs", 76 .uabi_class = I915_ENGINE_CLASS_VIDEO_ENHANCE, 77 }, 78 }; 79 80 #define MAX_MMIO_BASES 3 81 struct engine_info { 82 unsigned int hw_id; 83 u8 class; 84 u8 instance; 85 /* mmio bases table *must* be sorted in reverse gen order */ 86 struct engine_mmio_base { 87 u32 gen : 8; 88 u32 base : 24; 89 } mmio_bases[MAX_MMIO_BASES]; 90 }; 91 92 static const struct engine_info intel_engines[] = { 93 [RCS0] = { 94 .hw_id = RCS0_HW, 95 .class = RENDER_CLASS, 96 .instance = 0, 97 .mmio_bases = { 98 { .gen = 1, .base = RENDER_RING_BASE } 99 }, 100 }, 101 [BCS0] = { 102 .hw_id = BCS0_HW, 103 .class = COPY_ENGINE_CLASS, 104 .instance = 0, 105 .mmio_bases = { 106 { .gen = 6, .base = BLT_RING_BASE } 107 }, 108 }, 109 [VCS0] = { 110 .hw_id = VCS0_HW, 111 .class = VIDEO_DECODE_CLASS, 112 .instance = 0, 113 .mmio_bases = { 114 { .gen = 11, .base = GEN11_BSD_RING_BASE }, 115 { .gen = 6, .base = GEN6_BSD_RING_BASE }, 116 { .gen = 4, .base = BSD_RING_BASE } 117 }, 118 }, 119 [VCS1] = { 120 .hw_id = VCS1_HW, 121 .class = VIDEO_DECODE_CLASS, 122 .instance = 1, 123 .mmio_bases = { 124 { .gen = 11, .base = GEN11_BSD2_RING_BASE }, 125 { .gen = 8, .base = GEN8_BSD2_RING_BASE } 126 }, 127 }, 128 [VCS2] = { 129 .hw_id = VCS2_HW, 130 .class = VIDEO_DECODE_CLASS, 131 .instance = 2, 132 .mmio_bases = { 133 { .gen = 11, .base = GEN11_BSD3_RING_BASE } 134 }, 135 }, 136 [VCS3] = { 137 .hw_id = VCS3_HW, 138 .class = VIDEO_DECODE_CLASS, 139 .instance = 3, 140 .mmio_bases = { 141 { .gen = 11, .base = GEN11_BSD4_RING_BASE } 142 }, 143 }, 144 [VECS0] = { 145 .hw_id = VECS0_HW, 146 .class = VIDEO_ENHANCEMENT_CLASS, 147 .instance = 0, 148 .mmio_bases = { 149 { .gen = 11, .base = GEN11_VEBOX_RING_BASE }, 150 { .gen = 7, .base = VEBOX_RING_BASE } 151 }, 152 }, 153 [VECS1] = { 154 .hw_id = VECS1_HW, 155 .class = VIDEO_ENHANCEMENT_CLASS, 156 .instance = 1, 157 .mmio_bases = { 158 { .gen = 11, .base = GEN11_VEBOX2_RING_BASE } 159 }, 160 }, 161 }; 162 163 /** 164 * intel_engine_context_size() - return the size of the context for an engine 165 * @dev_priv: i915 device private 166 * @class: engine class 167 * 168 * Each engine class may require a different amount of space for a context 169 * image. 170 * 171 * Return: size (in bytes) of an engine class specific context image 172 * 173 * Note: this size includes the HWSP, which is part of the context image 174 * in LRC mode, but does not include the "shared data page" used with 175 * GuC submission. The caller should account for this if using the GuC. 176 */ 177 u32 intel_engine_context_size(struct drm_i915_private *dev_priv, u8 class) 178 { 179 u32 cxt_size; 180 181 BUILD_BUG_ON(I915_GTT_PAGE_SIZE != PAGE_SIZE); 182 183 switch (class) { 184 case RENDER_CLASS: 185 switch (INTEL_GEN(dev_priv)) { 186 default: 187 MISSING_CASE(INTEL_GEN(dev_priv)); 188 return DEFAULT_LR_CONTEXT_RENDER_SIZE; 189 case 11: 190 return GEN11_LR_CONTEXT_RENDER_SIZE; 191 case 10: 192 return GEN10_LR_CONTEXT_RENDER_SIZE; 193 case 9: 194 return GEN9_LR_CONTEXT_RENDER_SIZE; 195 case 8: 196 return GEN8_LR_CONTEXT_RENDER_SIZE; 197 case 7: 198 if (IS_HASWELL(dev_priv)) 199 return HSW_CXT_TOTAL_SIZE; 200 201 cxt_size = I915_READ(GEN7_CXT_SIZE); 202 return round_up(GEN7_CXT_TOTAL_SIZE(cxt_size) * 64, 203 PAGE_SIZE); 204 case 6: 205 cxt_size = I915_READ(CXT_SIZE); 206 return round_up(GEN6_CXT_TOTAL_SIZE(cxt_size) * 64, 207 PAGE_SIZE); 208 case 5: 209 case 4: 210 /* 211 * There is a discrepancy here between the size reported 212 * by the register and the size of the context layout 213 * in the docs. Both are described as authorative! 214 * 215 * The discrepancy is on the order of a few cachelines, 216 * but the total is under one page (4k), which is our 217 * minimum allocation anyway so it should all come 218 * out in the wash. 219 */ 220 cxt_size = I915_READ(CXT_SIZE) + 1; 221 DRM_DEBUG_DRIVER("gen%d CXT_SIZE = %d bytes [0x%08x]\n", 222 INTEL_GEN(dev_priv), 223 cxt_size * 64, 224 cxt_size - 1); 225 return round_up(cxt_size * 64, PAGE_SIZE); 226 case 3: 227 case 2: 228 /* For the special day when i810 gets merged. */ 229 case 1: 230 return 0; 231 } 232 break; 233 default: 234 MISSING_CASE(class); 235 /* fall through */ 236 case VIDEO_DECODE_CLASS: 237 case VIDEO_ENHANCEMENT_CLASS: 238 case COPY_ENGINE_CLASS: 239 if (INTEL_GEN(dev_priv) < 8) 240 return 0; 241 return GEN8_LR_CONTEXT_OTHER_SIZE; 242 } 243 } 244 245 static u32 __engine_mmio_base(struct drm_i915_private *i915, 246 const struct engine_mmio_base *bases) 247 { 248 int i; 249 250 for (i = 0; i < MAX_MMIO_BASES; i++) 251 if (INTEL_GEN(i915) >= bases[i].gen) 252 break; 253 254 GEM_BUG_ON(i == MAX_MMIO_BASES); 255 GEM_BUG_ON(!bases[i].base); 256 257 return bases[i].base; 258 } 259 260 static void __sprint_engine_name(char *name, const struct engine_info *info) 261 { 262 WARN_ON(snprintf(name, INTEL_ENGINE_CS_MAX_NAME, "%s%u", 263 intel_engine_classes[info->class].name, 264 info->instance) >= INTEL_ENGINE_CS_MAX_NAME); 265 } 266 267 void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask) 268 { 269 /* 270 * Though they added more rings on g4x/ilk, they did not add 271 * per-engine HWSTAM until gen6. 272 */ 273 if (INTEL_GEN(engine->i915) < 6 && engine->class != RENDER_CLASS) 274 return; 275 276 if (INTEL_GEN(engine->i915) >= 3) 277 ENGINE_WRITE(engine, RING_HWSTAM, mask); 278 else 279 ENGINE_WRITE16(engine, RING_HWSTAM, mask); 280 } 281 282 static void intel_engine_sanitize_mmio(struct intel_engine_cs *engine) 283 { 284 /* Mask off all writes into the unknown HWSP */ 285 intel_engine_set_hwsp_writemask(engine, ~0u); 286 } 287 288 static int 289 intel_engine_setup(struct drm_i915_private *dev_priv, 290 enum intel_engine_id id) 291 { 292 const struct engine_info *info = &intel_engines[id]; 293 struct intel_engine_cs *engine; 294 295 GEM_BUG_ON(info->class >= ARRAY_SIZE(intel_engine_classes)); 296 297 BUILD_BUG_ON(MAX_ENGINE_CLASS >= BIT(GEN11_ENGINE_CLASS_WIDTH)); 298 BUILD_BUG_ON(MAX_ENGINE_INSTANCE >= BIT(GEN11_ENGINE_INSTANCE_WIDTH)); 299 300 if (GEM_DEBUG_WARN_ON(info->class > MAX_ENGINE_CLASS)) 301 return -EINVAL; 302 303 if (GEM_DEBUG_WARN_ON(info->instance > MAX_ENGINE_INSTANCE)) 304 return -EINVAL; 305 306 if (GEM_DEBUG_WARN_ON(dev_priv->engine_class[info->class][info->instance])) 307 return -EINVAL; 308 309 GEM_BUG_ON(dev_priv->engine[id]); 310 engine = kzalloc(sizeof(*engine), GFP_KERNEL); 311 if (!engine) 312 return -ENOMEM; 313 314 BUILD_BUG_ON(BITS_PER_TYPE(engine->mask) < I915_NUM_ENGINES); 315 316 engine->id = id; 317 engine->mask = BIT(id); 318 engine->i915 = dev_priv; 319 engine->gt = &dev_priv->gt; 320 engine->uncore = &dev_priv->uncore; 321 __sprint_engine_name(engine->name, info); 322 engine->hw_id = engine->guc_id = info->hw_id; 323 engine->mmio_base = __engine_mmio_base(dev_priv, info->mmio_bases); 324 engine->class = info->class; 325 engine->instance = info->instance; 326 327 /* 328 * To be overridden by the backend on setup. However to facilitate 329 * cleanup on error during setup, we always provide the destroy vfunc. 330 */ 331 engine->destroy = (typeof(engine->destroy))kfree; 332 333 engine->uabi_class = intel_engine_classes[info->class].uabi_class; 334 335 engine->context_size = intel_engine_context_size(dev_priv, 336 engine->class); 337 if (WARN_ON(engine->context_size > BIT(20))) 338 engine->context_size = 0; 339 if (engine->context_size) 340 DRIVER_CAPS(dev_priv)->has_logical_contexts = true; 341 342 /* Nothing to do here, execute in order of dependencies */ 343 engine->schedule = NULL; 344 345 seqlock_init(&engine->stats.lock); 346 347 ATOMIC_INIT_NOTIFIER_HEAD(&engine->context_status_notifier); 348 349 /* Scrub mmio state on takeover */ 350 intel_engine_sanitize_mmio(engine); 351 352 dev_priv->engine_class[info->class][info->instance] = engine; 353 dev_priv->engine[id] = engine; 354 return 0; 355 } 356 357 static void __setup_engine_capabilities(struct intel_engine_cs *engine) 358 { 359 struct drm_i915_private *i915 = engine->i915; 360 361 if (engine->class == VIDEO_DECODE_CLASS) { 362 /* 363 * HEVC support is present on first engine instance 364 * before Gen11 and on all instances afterwards. 365 */ 366 if (INTEL_GEN(i915) >= 11 || 367 (INTEL_GEN(i915) >= 9 && engine->instance == 0)) 368 engine->uabi_capabilities |= 369 I915_VIDEO_CLASS_CAPABILITY_HEVC; 370 371 /* 372 * SFC block is present only on even logical engine 373 * instances. 374 */ 375 if ((INTEL_GEN(i915) >= 11 && 376 RUNTIME_INFO(i915)->vdbox_sfc_access & engine->mask) || 377 (INTEL_GEN(i915) >= 9 && engine->instance == 0)) 378 engine->uabi_capabilities |= 379 I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; 380 } else if (engine->class == VIDEO_ENHANCEMENT_CLASS) { 381 if (INTEL_GEN(i915) >= 9) 382 engine->uabi_capabilities |= 383 I915_VIDEO_AND_ENHANCE_CLASS_CAPABILITY_SFC; 384 } 385 } 386 387 static void intel_setup_engine_capabilities(struct drm_i915_private *i915) 388 { 389 struct intel_engine_cs *engine; 390 enum intel_engine_id id; 391 392 for_each_engine(engine, i915, id) 393 __setup_engine_capabilities(engine); 394 } 395 396 /** 397 * intel_engines_cleanup() - free the resources allocated for Command Streamers 398 * @i915: the i915 devic 399 */ 400 void intel_engines_cleanup(struct drm_i915_private *i915) 401 { 402 struct intel_engine_cs *engine; 403 enum intel_engine_id id; 404 405 for_each_engine(engine, i915, id) { 406 engine->destroy(engine); 407 i915->engine[id] = NULL; 408 } 409 } 410 411 /** 412 * intel_engines_init_mmio() - allocate and prepare the Engine Command Streamers 413 * @i915: the i915 device 414 * 415 * Return: non-zero if the initialization failed. 416 */ 417 int intel_engines_init_mmio(struct drm_i915_private *i915) 418 { 419 struct intel_device_info *device_info = mkwrite_device_info(i915); 420 const unsigned int engine_mask = INTEL_INFO(i915)->engine_mask; 421 unsigned int mask = 0; 422 unsigned int i; 423 int err; 424 425 WARN_ON(engine_mask == 0); 426 WARN_ON(engine_mask & 427 GENMASK(BITS_PER_TYPE(mask) - 1, I915_NUM_ENGINES)); 428 429 if (i915_inject_probe_failure()) 430 return -ENODEV; 431 432 for (i = 0; i < ARRAY_SIZE(intel_engines); i++) { 433 if (!HAS_ENGINE(i915, i)) 434 continue; 435 436 err = intel_engine_setup(i915, i); 437 if (err) 438 goto cleanup; 439 440 mask |= BIT(i); 441 } 442 443 /* 444 * Catch failures to update intel_engines table when the new engines 445 * are added to the driver by a warning and disabling the forgotten 446 * engines. 447 */ 448 if (WARN_ON(mask != engine_mask)) 449 device_info->engine_mask = mask; 450 451 RUNTIME_INFO(i915)->num_engines = hweight32(mask); 452 453 intel_gt_check_and_clear_faults(&i915->gt); 454 455 intel_setup_engine_capabilities(i915); 456 457 return 0; 458 459 cleanup: 460 intel_engines_cleanup(i915); 461 return err; 462 } 463 464 /** 465 * intel_engines_init() - init the Engine Command Streamers 466 * @i915: i915 device private 467 * 468 * Return: non-zero if the initialization failed. 469 */ 470 int intel_engines_init(struct drm_i915_private *i915) 471 { 472 int (*init)(struct intel_engine_cs *engine); 473 struct intel_engine_cs *engine; 474 enum intel_engine_id id; 475 int err; 476 477 if (HAS_EXECLISTS(i915)) 478 init = intel_execlists_submission_init; 479 else 480 init = intel_ring_submission_init; 481 482 for_each_engine(engine, i915, id) { 483 err = init(engine); 484 if (err) 485 goto cleanup; 486 } 487 488 return 0; 489 490 cleanup: 491 intel_engines_cleanup(i915); 492 return err; 493 } 494 495 static void intel_engine_init_batch_pool(struct intel_engine_cs *engine) 496 { 497 i915_gem_batch_pool_init(&engine->batch_pool, engine); 498 } 499 500 void intel_engine_init_execlists(struct intel_engine_cs *engine) 501 { 502 struct intel_engine_execlists * const execlists = &engine->execlists; 503 504 execlists->port_mask = 1; 505 GEM_BUG_ON(!is_power_of_2(execlists_num_ports(execlists))); 506 GEM_BUG_ON(execlists_num_ports(execlists) > EXECLIST_MAX_PORTS); 507 508 memset(execlists->pending, 0, sizeof(execlists->pending)); 509 execlists->active = 510 memset(execlists->inflight, 0, sizeof(execlists->inflight)); 511 512 execlists->queue_priority_hint = INT_MIN; 513 execlists->queue = RB_ROOT_CACHED; 514 } 515 516 static void cleanup_status_page(struct intel_engine_cs *engine) 517 { 518 struct i915_vma *vma; 519 520 /* Prevent writes into HWSP after returning the page to the system */ 521 intel_engine_set_hwsp_writemask(engine, ~0u); 522 523 vma = fetch_and_zero(&engine->status_page.vma); 524 if (!vma) 525 return; 526 527 if (!HWS_NEEDS_PHYSICAL(engine->i915)) 528 i915_vma_unpin(vma); 529 530 i915_gem_object_unpin_map(vma->obj); 531 i915_gem_object_put(vma->obj); 532 } 533 534 static int pin_ggtt_status_page(struct intel_engine_cs *engine, 535 struct i915_vma *vma) 536 { 537 unsigned int flags; 538 539 flags = PIN_GLOBAL; 540 if (!HAS_LLC(engine->i915)) 541 /* 542 * On g33, we cannot place HWS above 256MiB, so 543 * restrict its pinning to the low mappable arena. 544 * Though this restriction is not documented for 545 * gen4, gen5, or byt, they also behave similarly 546 * and hang if the HWS is placed at the top of the 547 * GTT. To generalise, it appears that all !llc 548 * platforms have issues with us placing the HWS 549 * above the mappable region (even though we never 550 * actually map it). 551 */ 552 flags |= PIN_MAPPABLE; 553 else 554 flags |= PIN_HIGH; 555 556 return i915_vma_pin(vma, 0, 0, flags); 557 } 558 559 static int init_status_page(struct intel_engine_cs *engine) 560 { 561 struct drm_i915_gem_object *obj; 562 struct i915_vma *vma; 563 void *vaddr; 564 int ret; 565 566 /* 567 * Though the HWS register does support 36bit addresses, historically 568 * we have had hangs and corruption reported due to wild writes if 569 * the HWS is placed above 4G. We only allow objects to be allocated 570 * in GFP_DMA32 for i965, and no earlier physical address users had 571 * access to more than 4G. 572 */ 573 obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE); 574 if (IS_ERR(obj)) { 575 DRM_ERROR("Failed to allocate status page\n"); 576 return PTR_ERR(obj); 577 } 578 579 i915_gem_object_set_cache_coherency(obj, I915_CACHE_LLC); 580 581 vma = i915_vma_instance(obj, &engine->gt->ggtt->vm, NULL); 582 if (IS_ERR(vma)) { 583 ret = PTR_ERR(vma); 584 goto err; 585 } 586 587 vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB); 588 if (IS_ERR(vaddr)) { 589 ret = PTR_ERR(vaddr); 590 goto err; 591 } 592 593 engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE); 594 engine->status_page.vma = vma; 595 596 if (!HWS_NEEDS_PHYSICAL(engine->i915)) { 597 ret = pin_ggtt_status_page(engine, vma); 598 if (ret) 599 goto err_unpin; 600 } 601 602 return 0; 603 604 err_unpin: 605 i915_gem_object_unpin_map(obj); 606 err: 607 i915_gem_object_put(obj); 608 return ret; 609 } 610 611 static int intel_engine_setup_common(struct intel_engine_cs *engine) 612 { 613 int err; 614 615 init_llist_head(&engine->barrier_tasks); 616 617 err = init_status_page(engine); 618 if (err) 619 return err; 620 621 intel_engine_init_active(engine, ENGINE_PHYSICAL); 622 intel_engine_init_breadcrumbs(engine); 623 intel_engine_init_execlists(engine); 624 intel_engine_init_hangcheck(engine); 625 intel_engine_init_batch_pool(engine); 626 intel_engine_init_cmd_parser(engine); 627 intel_engine_init__pm(engine); 628 629 /* Use the whole device by default */ 630 engine->sseu = 631 intel_sseu_from_device_info(&RUNTIME_INFO(engine->i915)->sseu); 632 633 intel_engine_init_workarounds(engine); 634 intel_engine_init_whitelist(engine); 635 intel_engine_init_ctx_wa(engine); 636 637 return 0; 638 } 639 640 /** 641 * intel_engines_setup- setup engine state not requiring hw access 642 * @i915: Device to setup. 643 * 644 * Initializes engine structure members shared between legacy and execlists 645 * submission modes which do not require hardware access. 646 * 647 * Typically done early in the submission mode specific engine setup stage. 648 */ 649 int intel_engines_setup(struct drm_i915_private *i915) 650 { 651 int (*setup)(struct intel_engine_cs *engine); 652 struct intel_engine_cs *engine; 653 enum intel_engine_id id; 654 int err; 655 656 if (HAS_EXECLISTS(i915)) 657 setup = intel_execlists_submission_setup; 658 else 659 setup = intel_ring_submission_setup; 660 661 for_each_engine(engine, i915, id) { 662 err = intel_engine_setup_common(engine); 663 if (err) 664 goto cleanup; 665 666 err = setup(engine); 667 if (err) 668 goto cleanup; 669 670 /* We expect the backend to take control over its state */ 671 GEM_BUG_ON(engine->destroy == (typeof(engine->destroy))kfree); 672 673 GEM_BUG_ON(!engine->cops); 674 } 675 676 return 0; 677 678 cleanup: 679 intel_engines_cleanup(i915); 680 return err; 681 } 682 683 void intel_engines_set_scheduler_caps(struct drm_i915_private *i915) 684 { 685 static const struct { 686 u8 engine; 687 u8 sched; 688 } map[] = { 689 #define MAP(x, y) { ilog2(I915_ENGINE_##x), ilog2(I915_SCHEDULER_CAP_##y) } 690 MAP(HAS_PREEMPTION, PREEMPTION), 691 MAP(HAS_SEMAPHORES, SEMAPHORES), 692 MAP(SUPPORTS_STATS, ENGINE_BUSY_STATS), 693 #undef MAP 694 }; 695 struct intel_engine_cs *engine; 696 enum intel_engine_id id; 697 u32 enabled, disabled; 698 699 enabled = 0; 700 disabled = 0; 701 for_each_engine(engine, i915, id) { /* all engines must agree! */ 702 int i; 703 704 if (engine->schedule) 705 enabled |= (I915_SCHEDULER_CAP_ENABLED | 706 I915_SCHEDULER_CAP_PRIORITY); 707 else 708 disabled |= (I915_SCHEDULER_CAP_ENABLED | 709 I915_SCHEDULER_CAP_PRIORITY); 710 711 for (i = 0; i < ARRAY_SIZE(map); i++) { 712 if (engine->flags & BIT(map[i].engine)) 713 enabled |= BIT(map[i].sched); 714 else 715 disabled |= BIT(map[i].sched); 716 } 717 } 718 719 i915->caps.scheduler = enabled & ~disabled; 720 if (!(i915->caps.scheduler & I915_SCHEDULER_CAP_ENABLED)) 721 i915->caps.scheduler = 0; 722 } 723 724 struct measure_breadcrumb { 725 struct i915_request rq; 726 struct intel_timeline timeline; 727 struct intel_ring ring; 728 u32 cs[1024]; 729 }; 730 731 static int measure_breadcrumb_dw(struct intel_engine_cs *engine) 732 { 733 struct measure_breadcrumb *frame; 734 int dw = -ENOMEM; 735 736 GEM_BUG_ON(!engine->gt->scratch); 737 738 frame = kzalloc(sizeof(*frame), GFP_KERNEL); 739 if (!frame) 740 return -ENOMEM; 741 742 if (intel_timeline_init(&frame->timeline, 743 engine->gt, 744 engine->status_page.vma)) 745 goto out_frame; 746 747 INIT_LIST_HEAD(&frame->ring.request_list); 748 frame->ring.timeline = &frame->timeline; 749 frame->ring.vaddr = frame->cs; 750 frame->ring.size = sizeof(frame->cs); 751 frame->ring.effective_size = frame->ring.size; 752 intel_ring_update_space(&frame->ring); 753 754 frame->rq.i915 = engine->i915; 755 frame->rq.engine = engine; 756 frame->rq.ring = &frame->ring; 757 frame->rq.timeline = &frame->timeline; 758 759 dw = intel_timeline_pin(&frame->timeline); 760 if (dw < 0) 761 goto out_timeline; 762 763 dw = engine->emit_fini_breadcrumb(&frame->rq, frame->cs) - frame->cs; 764 GEM_BUG_ON(dw & 1); /* RING_TAIL must be qword aligned */ 765 766 intel_timeline_unpin(&frame->timeline); 767 768 out_timeline: 769 intel_timeline_fini(&frame->timeline); 770 out_frame: 771 kfree(frame); 772 return dw; 773 } 774 775 static int pin_context(struct i915_gem_context *ctx, 776 struct intel_engine_cs *engine, 777 struct intel_context **out) 778 { 779 struct intel_context *ce; 780 int err; 781 782 ce = i915_gem_context_get_engine(ctx, engine->id); 783 if (IS_ERR(ce)) 784 return PTR_ERR(ce); 785 786 err = intel_context_pin(ce); 787 intel_context_put(ce); 788 if (err) 789 return err; 790 791 *out = ce; 792 return 0; 793 } 794 795 void 796 intel_engine_init_active(struct intel_engine_cs *engine, unsigned int subclass) 797 { 798 INIT_LIST_HEAD(&engine->active.requests); 799 800 spin_lock_init(&engine->active.lock); 801 lockdep_set_subclass(&engine->active.lock, subclass); 802 803 /* 804 * Due to an interesting quirk in lockdep's internal debug tracking, 805 * after setting a subclass we must ensure the lock is used. Otherwise, 806 * nr_unused_locks is incremented once too often. 807 */ 808 #ifdef CONFIG_DEBUG_LOCK_ALLOC 809 local_irq_disable(); 810 lock_map_acquire(&engine->active.lock.dep_map); 811 lock_map_release(&engine->active.lock.dep_map); 812 local_irq_enable(); 813 #endif 814 } 815 816 /** 817 * intel_engines_init_common - initialize cengine state which might require hw access 818 * @engine: Engine to initialize. 819 * 820 * Initializes @engine@ structure members shared between legacy and execlists 821 * submission modes which do require hardware access. 822 * 823 * Typcally done at later stages of submission mode specific engine setup. 824 * 825 * Returns zero on success or an error code on failure. 826 */ 827 int intel_engine_init_common(struct intel_engine_cs *engine) 828 { 829 struct drm_i915_private *i915 = engine->i915; 830 int ret; 831 832 engine->set_default_submission(engine); 833 834 /* We may need to do things with the shrinker which 835 * require us to immediately switch back to the default 836 * context. This can cause a problem as pinning the 837 * default context also requires GTT space which may not 838 * be available. To avoid this we always pin the default 839 * context. 840 */ 841 ret = pin_context(i915->kernel_context, engine, 842 &engine->kernel_context); 843 if (ret) 844 return ret; 845 846 ret = measure_breadcrumb_dw(engine); 847 if (ret < 0) 848 goto err_unpin; 849 850 engine->emit_fini_breadcrumb_dw = ret; 851 852 return 0; 853 854 err_unpin: 855 intel_context_unpin(engine->kernel_context); 856 return ret; 857 } 858 859 /** 860 * intel_engines_cleanup_common - cleans up the engine state created by 861 * the common initiailizers. 862 * @engine: Engine to cleanup. 863 * 864 * This cleans up everything created by the common helpers. 865 */ 866 void intel_engine_cleanup_common(struct intel_engine_cs *engine) 867 { 868 GEM_BUG_ON(!list_empty(&engine->active.requests)); 869 870 cleanup_status_page(engine); 871 872 intel_engine_fini_breadcrumbs(engine); 873 intel_engine_cleanup_cmd_parser(engine); 874 i915_gem_batch_pool_fini(&engine->batch_pool); 875 876 if (engine->default_state) 877 i915_gem_object_put(engine->default_state); 878 879 intel_context_unpin(engine->kernel_context); 880 GEM_BUG_ON(!llist_empty(&engine->barrier_tasks)); 881 882 intel_wa_list_free(&engine->ctx_wa_list); 883 intel_wa_list_free(&engine->wa_list); 884 intel_wa_list_free(&engine->whitelist); 885 } 886 887 u64 intel_engine_get_active_head(const struct intel_engine_cs *engine) 888 { 889 struct drm_i915_private *i915 = engine->i915; 890 891 u64 acthd; 892 893 if (INTEL_GEN(i915) >= 8) 894 acthd = ENGINE_READ64(engine, RING_ACTHD, RING_ACTHD_UDW); 895 else if (INTEL_GEN(i915) >= 4) 896 acthd = ENGINE_READ(engine, RING_ACTHD); 897 else 898 acthd = ENGINE_READ(engine, ACTHD); 899 900 return acthd; 901 } 902 903 u64 intel_engine_get_last_batch_head(const struct intel_engine_cs *engine) 904 { 905 u64 bbaddr; 906 907 if (INTEL_GEN(engine->i915) >= 8) 908 bbaddr = ENGINE_READ64(engine, RING_BBADDR, RING_BBADDR_UDW); 909 else 910 bbaddr = ENGINE_READ(engine, RING_BBADDR); 911 912 return bbaddr; 913 } 914 915 int intel_engine_stop_cs(struct intel_engine_cs *engine) 916 { 917 struct intel_uncore *uncore = engine->uncore; 918 const u32 base = engine->mmio_base; 919 const i915_reg_t mode = RING_MI_MODE(base); 920 int err; 921 922 if (INTEL_GEN(engine->i915) < 3) 923 return -ENODEV; 924 925 GEM_TRACE("%s\n", engine->name); 926 927 intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING)); 928 929 err = 0; 930 if (__intel_wait_for_register_fw(uncore, 931 mode, MODE_IDLE, MODE_IDLE, 932 1000, 0, 933 NULL)) { 934 GEM_TRACE("%s: timed out on STOP_RING -> IDLE\n", engine->name); 935 err = -ETIMEDOUT; 936 } 937 938 /* A final mmio read to let GPU writes be hopefully flushed to memory */ 939 intel_uncore_posting_read_fw(uncore, mode); 940 941 return err; 942 } 943 944 void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine) 945 { 946 GEM_TRACE("%s\n", engine->name); 947 948 ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING)); 949 } 950 951 const char *i915_cache_level_str(struct drm_i915_private *i915, int type) 952 { 953 switch (type) { 954 case I915_CACHE_NONE: return " uncached"; 955 case I915_CACHE_LLC: return HAS_LLC(i915) ? " LLC" : " snooped"; 956 case I915_CACHE_L3_LLC: return " L3+LLC"; 957 case I915_CACHE_WT: return " WT"; 958 default: return ""; 959 } 960 } 961 962 static u32 963 read_subslice_reg(struct intel_engine_cs *engine, int slice, int subslice, 964 i915_reg_t reg) 965 { 966 struct drm_i915_private *i915 = engine->i915; 967 struct intel_uncore *uncore = engine->uncore; 968 u32 mcr_mask, mcr_ss, mcr, old_mcr, val; 969 enum forcewake_domains fw_domains; 970 971 if (INTEL_GEN(i915) >= 11) { 972 mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK; 973 mcr_ss = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice); 974 } else { 975 mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK; 976 mcr_ss = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice); 977 } 978 979 fw_domains = intel_uncore_forcewake_for_reg(uncore, reg, 980 FW_REG_READ); 981 fw_domains |= intel_uncore_forcewake_for_reg(uncore, 982 GEN8_MCR_SELECTOR, 983 FW_REG_READ | FW_REG_WRITE); 984 985 spin_lock_irq(&uncore->lock); 986 intel_uncore_forcewake_get__locked(uncore, fw_domains); 987 988 old_mcr = mcr = intel_uncore_read_fw(uncore, GEN8_MCR_SELECTOR); 989 990 mcr &= ~mcr_mask; 991 mcr |= mcr_ss; 992 intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); 993 994 val = intel_uncore_read_fw(uncore, reg); 995 996 mcr &= ~mcr_mask; 997 mcr |= old_mcr & mcr_mask; 998 999 intel_uncore_write_fw(uncore, GEN8_MCR_SELECTOR, mcr); 1000 1001 intel_uncore_forcewake_put__locked(uncore, fw_domains); 1002 spin_unlock_irq(&uncore->lock); 1003 1004 return val; 1005 } 1006 1007 /* NB: please notice the memset */ 1008 void intel_engine_get_instdone(struct intel_engine_cs *engine, 1009 struct intel_instdone *instdone) 1010 { 1011 struct drm_i915_private *i915 = engine->i915; 1012 struct intel_uncore *uncore = engine->uncore; 1013 u32 mmio_base = engine->mmio_base; 1014 int slice; 1015 int subslice; 1016 1017 memset(instdone, 0, sizeof(*instdone)); 1018 1019 switch (INTEL_GEN(i915)) { 1020 default: 1021 instdone->instdone = 1022 intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); 1023 1024 if (engine->id != RCS0) 1025 break; 1026 1027 instdone->slice_common = 1028 intel_uncore_read(uncore, GEN7_SC_INSTDONE); 1029 for_each_instdone_slice_subslice(i915, slice, subslice) { 1030 instdone->sampler[slice][subslice] = 1031 read_subslice_reg(engine, slice, subslice, 1032 GEN7_SAMPLER_INSTDONE); 1033 instdone->row[slice][subslice] = 1034 read_subslice_reg(engine, slice, subslice, 1035 GEN7_ROW_INSTDONE); 1036 } 1037 break; 1038 case 7: 1039 instdone->instdone = 1040 intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); 1041 1042 if (engine->id != RCS0) 1043 break; 1044 1045 instdone->slice_common = 1046 intel_uncore_read(uncore, GEN7_SC_INSTDONE); 1047 instdone->sampler[0][0] = 1048 intel_uncore_read(uncore, GEN7_SAMPLER_INSTDONE); 1049 instdone->row[0][0] = 1050 intel_uncore_read(uncore, GEN7_ROW_INSTDONE); 1051 1052 break; 1053 case 6: 1054 case 5: 1055 case 4: 1056 instdone->instdone = 1057 intel_uncore_read(uncore, RING_INSTDONE(mmio_base)); 1058 if (engine->id == RCS0) 1059 /* HACK: Using the wrong struct member */ 1060 instdone->slice_common = 1061 intel_uncore_read(uncore, GEN4_INSTDONE1); 1062 break; 1063 case 3: 1064 case 2: 1065 instdone->instdone = intel_uncore_read(uncore, GEN2_INSTDONE); 1066 break; 1067 } 1068 } 1069 1070 static bool ring_is_idle(struct intel_engine_cs *engine) 1071 { 1072 struct drm_i915_private *dev_priv = engine->i915; 1073 intel_wakeref_t wakeref; 1074 bool idle = true; 1075 1076 if (I915_SELFTEST_ONLY(!engine->mmio_base)) 1077 return true; 1078 1079 /* If the whole device is asleep, the engine must be idle */ 1080 wakeref = intel_runtime_pm_get_if_in_use(&dev_priv->runtime_pm); 1081 if (!wakeref) 1082 return true; 1083 1084 /* First check that no commands are left in the ring */ 1085 if ((ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR) != 1086 (ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR)) 1087 idle = false; 1088 1089 /* No bit for gen2, so assume the CS parser is idle */ 1090 if (INTEL_GEN(dev_priv) > 2 && 1091 !(ENGINE_READ(engine, RING_MI_MODE) & MODE_IDLE)) 1092 idle = false; 1093 1094 intel_runtime_pm_put(&dev_priv->runtime_pm, wakeref); 1095 1096 return idle; 1097 } 1098 1099 /** 1100 * intel_engine_is_idle() - Report if the engine has finished process all work 1101 * @engine: the intel_engine_cs 1102 * 1103 * Return true if there are no requests pending, nothing left to be submitted 1104 * to hardware, and that the engine is idle. 1105 */ 1106 bool intel_engine_is_idle(struct intel_engine_cs *engine) 1107 { 1108 /* More white lies, if wedged, hw state is inconsistent */ 1109 if (intel_gt_is_wedged(engine->gt)) 1110 return true; 1111 1112 if (!intel_engine_pm_is_awake(engine)) 1113 return true; 1114 1115 /* Waiting to drain ELSP? */ 1116 if (execlists_active(&engine->execlists)) { 1117 struct tasklet_struct *t = &engine->execlists.tasklet; 1118 1119 synchronize_hardirq(engine->i915->drm.pdev->irq); 1120 1121 local_bh_disable(); 1122 if (tasklet_trylock(t)) { 1123 /* Must wait for any GPU reset in progress. */ 1124 if (__tasklet_is_enabled(t)) 1125 t->func(t->data); 1126 tasklet_unlock(t); 1127 } 1128 local_bh_enable(); 1129 1130 /* Otherwise flush the tasklet if it was on another cpu */ 1131 tasklet_unlock_wait(t); 1132 1133 if (execlists_active(&engine->execlists)) 1134 return false; 1135 } 1136 1137 /* ELSP is empty, but there are ready requests? E.g. after reset */ 1138 if (!RB_EMPTY_ROOT(&engine->execlists.queue.rb_root)) 1139 return false; 1140 1141 /* Ring stopped? */ 1142 return ring_is_idle(engine); 1143 } 1144 1145 bool intel_engines_are_idle(struct intel_gt *gt) 1146 { 1147 struct intel_engine_cs *engine; 1148 enum intel_engine_id id; 1149 1150 /* 1151 * If the driver is wedged, HW state may be very inconsistent and 1152 * report that it is still busy, even though we have stopped using it. 1153 */ 1154 if (intel_gt_is_wedged(gt)) 1155 return true; 1156 1157 /* Already parked (and passed an idleness test); must still be idle */ 1158 if (!READ_ONCE(gt->awake)) 1159 return true; 1160 1161 for_each_engine(engine, gt->i915, id) { 1162 if (!intel_engine_is_idle(engine)) 1163 return false; 1164 } 1165 1166 return true; 1167 } 1168 1169 void intel_engines_reset_default_submission(struct intel_gt *gt) 1170 { 1171 struct intel_engine_cs *engine; 1172 enum intel_engine_id id; 1173 1174 for_each_engine(engine, gt->i915, id) 1175 engine->set_default_submission(engine); 1176 } 1177 1178 bool intel_engine_can_store_dword(struct intel_engine_cs *engine) 1179 { 1180 switch (INTEL_GEN(engine->i915)) { 1181 case 2: 1182 return false; /* uses physical not virtual addresses */ 1183 case 3: 1184 /* maybe only uses physical not virtual addresses */ 1185 return !(IS_I915G(engine->i915) || IS_I915GM(engine->i915)); 1186 case 6: 1187 return engine->class != VIDEO_DECODE_CLASS; /* b0rked */ 1188 default: 1189 return true; 1190 } 1191 } 1192 1193 unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915) 1194 { 1195 struct intel_engine_cs *engine; 1196 enum intel_engine_id id; 1197 unsigned int which; 1198 1199 which = 0; 1200 for_each_engine(engine, i915, id) 1201 if (engine->default_state) 1202 which |= BIT(engine->uabi_class); 1203 1204 return which; 1205 } 1206 1207 static int print_sched_attr(struct drm_i915_private *i915, 1208 const struct i915_sched_attr *attr, 1209 char *buf, int x, int len) 1210 { 1211 if (attr->priority == I915_PRIORITY_INVALID) 1212 return x; 1213 1214 x += snprintf(buf + x, len - x, 1215 " prio=%d", attr->priority); 1216 1217 return x; 1218 } 1219 1220 static void print_request(struct drm_printer *m, 1221 struct i915_request *rq, 1222 const char *prefix) 1223 { 1224 const char *name = rq->fence.ops->get_timeline_name(&rq->fence); 1225 char buf[80] = ""; 1226 int x = 0; 1227 1228 x = print_sched_attr(rq->i915, &rq->sched.attr, buf, x, sizeof(buf)); 1229 1230 drm_printf(m, "%s %llx:%llx%s%s %s @ %dms: %s\n", 1231 prefix, 1232 rq->fence.context, rq->fence.seqno, 1233 i915_request_completed(rq) ? "!" : 1234 i915_request_started(rq) ? "*" : 1235 "", 1236 test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, 1237 &rq->fence.flags) ? "+" : 1238 test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, 1239 &rq->fence.flags) ? "-" : 1240 "", 1241 buf, 1242 jiffies_to_msecs(jiffies - rq->emitted_jiffies), 1243 name); 1244 } 1245 1246 static void hexdump(struct drm_printer *m, const void *buf, size_t len) 1247 { 1248 const size_t rowsize = 8 * sizeof(u32); 1249 const void *prev = NULL; 1250 bool skip = false; 1251 size_t pos; 1252 1253 for (pos = 0; pos < len; pos += rowsize) { 1254 char line[128]; 1255 1256 if (prev && !memcmp(prev, buf + pos, rowsize)) { 1257 if (!skip) { 1258 drm_printf(m, "*\n"); 1259 skip = true; 1260 } 1261 continue; 1262 } 1263 1264 WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos, 1265 rowsize, sizeof(u32), 1266 line, sizeof(line), 1267 false) >= sizeof(line)); 1268 drm_printf(m, "[%04zx] %s\n", pos, line); 1269 1270 prev = buf + pos; 1271 skip = false; 1272 } 1273 } 1274 1275 static void intel_engine_print_registers(struct intel_engine_cs *engine, 1276 struct drm_printer *m) 1277 { 1278 struct drm_i915_private *dev_priv = engine->i915; 1279 const struct intel_engine_execlists * const execlists = 1280 &engine->execlists; 1281 unsigned long flags; 1282 u64 addr; 1283 1284 if (engine->id == RCS0 && IS_GEN_RANGE(dev_priv, 4, 7)) 1285 drm_printf(m, "\tCCID: 0x%08x\n", ENGINE_READ(engine, CCID)); 1286 drm_printf(m, "\tRING_START: 0x%08x\n", 1287 ENGINE_READ(engine, RING_START)); 1288 drm_printf(m, "\tRING_HEAD: 0x%08x\n", 1289 ENGINE_READ(engine, RING_HEAD) & HEAD_ADDR); 1290 drm_printf(m, "\tRING_TAIL: 0x%08x\n", 1291 ENGINE_READ(engine, RING_TAIL) & TAIL_ADDR); 1292 drm_printf(m, "\tRING_CTL: 0x%08x%s\n", 1293 ENGINE_READ(engine, RING_CTL), 1294 ENGINE_READ(engine, RING_CTL) & (RING_WAIT | RING_WAIT_SEMAPHORE) ? " [waiting]" : ""); 1295 if (INTEL_GEN(engine->i915) > 2) { 1296 drm_printf(m, "\tRING_MODE: 0x%08x%s\n", 1297 ENGINE_READ(engine, RING_MI_MODE), 1298 ENGINE_READ(engine, RING_MI_MODE) & (MODE_IDLE) ? " [idle]" : ""); 1299 } 1300 1301 if (INTEL_GEN(dev_priv) >= 6) { 1302 drm_printf(m, "\tRING_IMR: %08x\n", 1303 ENGINE_READ(engine, RING_IMR)); 1304 } 1305 1306 addr = intel_engine_get_active_head(engine); 1307 drm_printf(m, "\tACTHD: 0x%08x_%08x\n", 1308 upper_32_bits(addr), lower_32_bits(addr)); 1309 addr = intel_engine_get_last_batch_head(engine); 1310 drm_printf(m, "\tBBADDR: 0x%08x_%08x\n", 1311 upper_32_bits(addr), lower_32_bits(addr)); 1312 if (INTEL_GEN(dev_priv) >= 8) 1313 addr = ENGINE_READ64(engine, RING_DMA_FADD, RING_DMA_FADD_UDW); 1314 else if (INTEL_GEN(dev_priv) >= 4) 1315 addr = ENGINE_READ(engine, RING_DMA_FADD); 1316 else 1317 addr = ENGINE_READ(engine, DMA_FADD_I8XX); 1318 drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n", 1319 upper_32_bits(addr), lower_32_bits(addr)); 1320 if (INTEL_GEN(dev_priv) >= 4) { 1321 drm_printf(m, "\tIPEIR: 0x%08x\n", 1322 ENGINE_READ(engine, RING_IPEIR)); 1323 drm_printf(m, "\tIPEHR: 0x%08x\n", 1324 ENGINE_READ(engine, RING_IPEHR)); 1325 } else { 1326 drm_printf(m, "\tIPEIR: 0x%08x\n", ENGINE_READ(engine, IPEIR)); 1327 drm_printf(m, "\tIPEHR: 0x%08x\n", ENGINE_READ(engine, IPEHR)); 1328 } 1329 1330 if (HAS_EXECLISTS(dev_priv)) { 1331 struct i915_request * const *port, *rq; 1332 const u32 *hws = 1333 &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX]; 1334 const u8 num_entries = execlists->csb_size; 1335 unsigned int idx; 1336 u8 read, write; 1337 1338 drm_printf(m, "\tExeclist status: 0x%08x %08x, entries %u\n", 1339 ENGINE_READ(engine, RING_EXECLIST_STATUS_LO), 1340 ENGINE_READ(engine, RING_EXECLIST_STATUS_HI), 1341 num_entries); 1342 1343 read = execlists->csb_head; 1344 write = READ_ONCE(*execlists->csb_write); 1345 1346 drm_printf(m, "\tExeclist CSB read %d, write %d, tasklet queued? %s (%s)\n", 1347 read, write, 1348 yesno(test_bit(TASKLET_STATE_SCHED, 1349 &engine->execlists.tasklet.state)), 1350 enableddisabled(!atomic_read(&engine->execlists.tasklet.count))); 1351 if (read >= num_entries) 1352 read = 0; 1353 if (write >= num_entries) 1354 write = 0; 1355 if (read > write) 1356 write += num_entries; 1357 while (read < write) { 1358 idx = ++read % num_entries; 1359 drm_printf(m, "\tExeclist CSB[%d]: 0x%08x, context: %d\n", 1360 idx, hws[idx * 2], hws[idx * 2 + 1]); 1361 } 1362 1363 spin_lock_irqsave(&engine->active.lock, flags); 1364 for (port = execlists->active; (rq = *port); port++) { 1365 char hdr[80]; 1366 int len; 1367 1368 len = snprintf(hdr, sizeof(hdr), 1369 "\t\tActive[%d: ", 1370 (int)(port - execlists->active)); 1371 if (!i915_request_signaled(rq)) 1372 len += snprintf(hdr + len, sizeof(hdr) - len, 1373 "ring:{start:%08x, hwsp:%08x, seqno:%08x}, ", 1374 i915_ggtt_offset(rq->ring->vma), 1375 rq->timeline->hwsp_offset, 1376 hwsp_seqno(rq)); 1377 snprintf(hdr + len, sizeof(hdr) - len, "rq: "); 1378 print_request(m, rq, hdr); 1379 } 1380 for (port = execlists->pending; (rq = *port); port++) { 1381 char hdr[80]; 1382 1383 snprintf(hdr, sizeof(hdr), 1384 "\t\tPending[%d] ring:{start:%08x, hwsp:%08x, seqno:%08x}, rq: ", 1385 (int)(port - execlists->pending), 1386 i915_ggtt_offset(rq->ring->vma), 1387 rq->timeline->hwsp_offset, 1388 hwsp_seqno(rq)); 1389 print_request(m, rq, hdr); 1390 } 1391 spin_unlock_irqrestore(&engine->active.lock, flags); 1392 } else if (INTEL_GEN(dev_priv) > 6) { 1393 drm_printf(m, "\tPP_DIR_BASE: 0x%08x\n", 1394 ENGINE_READ(engine, RING_PP_DIR_BASE)); 1395 drm_printf(m, "\tPP_DIR_BASE_READ: 0x%08x\n", 1396 ENGINE_READ(engine, RING_PP_DIR_BASE_READ)); 1397 drm_printf(m, "\tPP_DIR_DCLV: 0x%08x\n", 1398 ENGINE_READ(engine, RING_PP_DIR_DCLV)); 1399 } 1400 } 1401 1402 static void print_request_ring(struct drm_printer *m, struct i915_request *rq) 1403 { 1404 void *ring; 1405 int size; 1406 1407 drm_printf(m, 1408 "[head %04x, postfix %04x, tail %04x, batch 0x%08x_%08x]:\n", 1409 rq->head, rq->postfix, rq->tail, 1410 rq->batch ? upper_32_bits(rq->batch->node.start) : ~0u, 1411 rq->batch ? lower_32_bits(rq->batch->node.start) : ~0u); 1412 1413 size = rq->tail - rq->head; 1414 if (rq->tail < rq->head) 1415 size += rq->ring->size; 1416 1417 ring = kmalloc(size, GFP_ATOMIC); 1418 if (ring) { 1419 const void *vaddr = rq->ring->vaddr; 1420 unsigned int head = rq->head; 1421 unsigned int len = 0; 1422 1423 if (rq->tail < head) { 1424 len = rq->ring->size - head; 1425 memcpy(ring, vaddr + head, len); 1426 head = 0; 1427 } 1428 memcpy(ring + len, vaddr + head, size - len); 1429 1430 hexdump(m, ring, size); 1431 kfree(ring); 1432 } 1433 } 1434 1435 void intel_engine_dump(struct intel_engine_cs *engine, 1436 struct drm_printer *m, 1437 const char *header, ...) 1438 { 1439 struct i915_gpu_error * const error = &engine->i915->gpu_error; 1440 struct i915_request *rq; 1441 intel_wakeref_t wakeref; 1442 unsigned long flags; 1443 1444 if (header) { 1445 va_list ap; 1446 1447 va_start(ap, header); 1448 drm_vprintf(m, header, &ap); 1449 va_end(ap); 1450 } 1451 1452 if (intel_gt_is_wedged(engine->gt)) 1453 drm_printf(m, "*** WEDGED ***\n"); 1454 1455 drm_printf(m, "\tAwake? %d\n", atomic_read(&engine->wakeref.count)); 1456 drm_printf(m, "\tHangcheck: %d ms ago\n", 1457 jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp)); 1458 drm_printf(m, "\tReset count: %d (global %d)\n", 1459 i915_reset_engine_count(error, engine), 1460 i915_reset_count(error)); 1461 1462 drm_printf(m, "\tRequests:\n"); 1463 1464 spin_lock_irqsave(&engine->active.lock, flags); 1465 rq = intel_engine_find_active_request(engine); 1466 if (rq) { 1467 print_request(m, rq, "\t\tactive "); 1468 1469 drm_printf(m, "\t\tring->start: 0x%08x\n", 1470 i915_ggtt_offset(rq->ring->vma)); 1471 drm_printf(m, "\t\tring->head: 0x%08x\n", 1472 rq->ring->head); 1473 drm_printf(m, "\t\tring->tail: 0x%08x\n", 1474 rq->ring->tail); 1475 drm_printf(m, "\t\tring->emit: 0x%08x\n", 1476 rq->ring->emit); 1477 drm_printf(m, "\t\tring->space: 0x%08x\n", 1478 rq->ring->space); 1479 drm_printf(m, "\t\tring->hwsp: 0x%08x\n", 1480 rq->timeline->hwsp_offset); 1481 1482 print_request_ring(m, rq); 1483 } 1484 spin_unlock_irqrestore(&engine->active.lock, flags); 1485 1486 wakeref = intel_runtime_pm_get_if_in_use(&engine->i915->runtime_pm); 1487 if (wakeref) { 1488 intel_engine_print_registers(engine, m); 1489 intel_runtime_pm_put(&engine->i915->runtime_pm, wakeref); 1490 } else { 1491 drm_printf(m, "\tDevice is asleep; skipping register dump\n"); 1492 } 1493 1494 intel_execlists_show_requests(engine, m, print_request, 8); 1495 1496 drm_printf(m, "HWSP:\n"); 1497 hexdump(m, engine->status_page.addr, PAGE_SIZE); 1498 1499 drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine))); 1500 1501 intel_engine_print_breadcrumbs(engine, m); 1502 } 1503 1504 static u8 user_class_map[] = { 1505 [I915_ENGINE_CLASS_RENDER] = RENDER_CLASS, 1506 [I915_ENGINE_CLASS_COPY] = COPY_ENGINE_CLASS, 1507 [I915_ENGINE_CLASS_VIDEO] = VIDEO_DECODE_CLASS, 1508 [I915_ENGINE_CLASS_VIDEO_ENHANCE] = VIDEO_ENHANCEMENT_CLASS, 1509 }; 1510 1511 struct intel_engine_cs * 1512 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance) 1513 { 1514 if (class >= ARRAY_SIZE(user_class_map)) 1515 return NULL; 1516 1517 class = user_class_map[class]; 1518 1519 GEM_BUG_ON(class > MAX_ENGINE_CLASS); 1520 1521 if (instance > MAX_ENGINE_INSTANCE) 1522 return NULL; 1523 1524 return i915->engine_class[class][instance]; 1525 } 1526 1527 /** 1528 * intel_enable_engine_stats() - Enable engine busy tracking on engine 1529 * @engine: engine to enable stats collection 1530 * 1531 * Start collecting the engine busyness data for @engine. 1532 * 1533 * Returns 0 on success or a negative error code. 1534 */ 1535 int intel_enable_engine_stats(struct intel_engine_cs *engine) 1536 { 1537 struct intel_engine_execlists *execlists = &engine->execlists; 1538 unsigned long flags; 1539 int err = 0; 1540 1541 if (!intel_engine_supports_stats(engine)) 1542 return -ENODEV; 1543 1544 spin_lock_irqsave(&engine->active.lock, flags); 1545 write_seqlock(&engine->stats.lock); 1546 1547 if (unlikely(engine->stats.enabled == ~0)) { 1548 err = -EBUSY; 1549 goto unlock; 1550 } 1551 1552 if (engine->stats.enabled++ == 0) { 1553 struct i915_request * const *port; 1554 struct i915_request *rq; 1555 1556 engine->stats.enabled_at = ktime_get(); 1557 1558 /* XXX submission method oblivious? */ 1559 for (port = execlists->active; (rq = *port); port++) 1560 engine->stats.active++; 1561 1562 for (port = execlists->pending; (rq = *port); port++) { 1563 /* Exclude any contexts already counted in active */ 1564 if (intel_context_inflight_count(rq->hw_context) == 1) 1565 engine->stats.active++; 1566 } 1567 1568 if (engine->stats.active) 1569 engine->stats.start = engine->stats.enabled_at; 1570 } 1571 1572 unlock: 1573 write_sequnlock(&engine->stats.lock); 1574 spin_unlock_irqrestore(&engine->active.lock, flags); 1575 1576 return err; 1577 } 1578 1579 static ktime_t __intel_engine_get_busy_time(struct intel_engine_cs *engine) 1580 { 1581 ktime_t total = engine->stats.total; 1582 1583 /* 1584 * If the engine is executing something at the moment 1585 * add it to the total. 1586 */ 1587 if (engine->stats.active) 1588 total = ktime_add(total, 1589 ktime_sub(ktime_get(), engine->stats.start)); 1590 1591 return total; 1592 } 1593 1594 /** 1595 * intel_engine_get_busy_time() - Return current accumulated engine busyness 1596 * @engine: engine to report on 1597 * 1598 * Returns accumulated time @engine was busy since engine stats were enabled. 1599 */ 1600 ktime_t intel_engine_get_busy_time(struct intel_engine_cs *engine) 1601 { 1602 unsigned int seq; 1603 ktime_t total; 1604 1605 do { 1606 seq = read_seqbegin(&engine->stats.lock); 1607 total = __intel_engine_get_busy_time(engine); 1608 } while (read_seqretry(&engine->stats.lock, seq)); 1609 1610 return total; 1611 } 1612 1613 /** 1614 * intel_disable_engine_stats() - Disable engine busy tracking on engine 1615 * @engine: engine to disable stats collection 1616 * 1617 * Stops collecting the engine busyness data for @engine. 1618 */ 1619 void intel_disable_engine_stats(struct intel_engine_cs *engine) 1620 { 1621 unsigned long flags; 1622 1623 if (!intel_engine_supports_stats(engine)) 1624 return; 1625 1626 write_seqlock_irqsave(&engine->stats.lock, flags); 1627 WARN_ON_ONCE(engine->stats.enabled == 0); 1628 if (--engine->stats.enabled == 0) { 1629 engine->stats.total = __intel_engine_get_busy_time(engine); 1630 engine->stats.active = 0; 1631 } 1632 write_sequnlock_irqrestore(&engine->stats.lock, flags); 1633 } 1634 1635 static bool match_ring(struct i915_request *rq) 1636 { 1637 u32 ring = ENGINE_READ(rq->engine, RING_START); 1638 1639 return ring == i915_ggtt_offset(rq->ring->vma); 1640 } 1641 1642 struct i915_request * 1643 intel_engine_find_active_request(struct intel_engine_cs *engine) 1644 { 1645 struct i915_request *request, *active = NULL; 1646 1647 /* 1648 * We are called by the error capture, reset and to dump engine 1649 * state at random points in time. In particular, note that neither is 1650 * crucially ordered with an interrupt. After a hang, the GPU is dead 1651 * and we assume that no more writes can happen (we waited long enough 1652 * for all writes that were in transaction to be flushed) - adding an 1653 * extra delay for a recent interrupt is pointless. Hence, we do 1654 * not need an engine->irq_seqno_barrier() before the seqno reads. 1655 * At all other times, we must assume the GPU is still running, but 1656 * we only care about the snapshot of this moment. 1657 */ 1658 lockdep_assert_held(&engine->active.lock); 1659 list_for_each_entry(request, &engine->active.requests, sched.link) { 1660 if (i915_request_completed(request)) 1661 continue; 1662 1663 if (!i915_request_started(request)) 1664 continue; 1665 1666 /* More than one preemptible request may match! */ 1667 if (!match_ring(request)) 1668 continue; 1669 1670 active = request; 1671 break; 1672 } 1673 1674 return active; 1675 } 1676 1677 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST) 1678 #include "selftest_engine_cs.c" 1679 #endif 1680