1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2014-2019 Intel Corporation 4 */ 5 6 #include <linux/bsearch.h> 7 8 #include "gt/intel_engine_regs.h" 9 #include "gt/intel_gt.h" 10 #include "gt/intel_gt_regs.h" 11 #include "gt/intel_lrc.h" 12 #include "gt/shmem_utils.h" 13 #include "intel_guc_ads.h" 14 #include "intel_guc_fwif.h" 15 #include "intel_uc.h" 16 #include "i915_drv.h" 17 18 /* 19 * The Additional Data Struct (ADS) has pointers for different buffers used by 20 * the GuC. One single gem object contains the ADS struct itself (guc_ads) and 21 * all the extra buffers indirectly linked via the ADS struct's entries. 22 * 23 * Layout of the ADS blob allocated for the GuC: 24 * 25 * +---------------------------------------+ <== base 26 * | guc_ads | 27 * +---------------------------------------+ 28 * | guc_policies | 29 * +---------------------------------------+ 30 * | guc_gt_system_info | 31 * +---------------------------------------+ 32 * | guc_engine_usage | 33 * +---------------------------------------+ <== static 34 * | guc_mmio_reg[countA] (engine 0.0) | 35 * | guc_mmio_reg[countB] (engine 0.1) | 36 * | guc_mmio_reg[countC] (engine 1.0) | 37 * | ... | 38 * +---------------------------------------+ <== dynamic 39 * | padding | 40 * +---------------------------------------+ <== 4K aligned 41 * | golden contexts | 42 * +---------------------------------------+ 43 * | padding | 44 * +---------------------------------------+ <== 4K aligned 45 * | private data | 46 * +---------------------------------------+ 47 * | padding | 48 * +---------------------------------------+ <== 4K aligned 49 */ 50 struct __guc_ads_blob { 51 struct guc_ads ads; 52 struct guc_policies policies; 53 struct guc_gt_system_info system_info; 54 struct guc_engine_usage engine_usage; 55 /* From here on, location is dynamic! Refer to above diagram. */ 56 struct guc_mmio_reg regset[0]; 57 } __packed; 58 59 static u32 guc_ads_regset_size(struct intel_guc *guc) 60 { 61 GEM_BUG_ON(!guc->ads_regset_size); 62 return guc->ads_regset_size; 63 } 64 65 static u32 guc_ads_golden_ctxt_size(struct intel_guc *guc) 66 { 67 return PAGE_ALIGN(guc->ads_golden_ctxt_size); 68 } 69 70 static u32 guc_ads_private_data_size(struct intel_guc *guc) 71 { 72 return PAGE_ALIGN(guc->fw.private_data_size); 73 } 74 75 static u32 guc_ads_regset_offset(struct intel_guc *guc) 76 { 77 return offsetof(struct __guc_ads_blob, regset); 78 } 79 80 static u32 guc_ads_golden_ctxt_offset(struct intel_guc *guc) 81 { 82 u32 offset; 83 84 offset = guc_ads_regset_offset(guc) + 85 guc_ads_regset_size(guc); 86 87 return PAGE_ALIGN(offset); 88 } 89 90 static u32 guc_ads_private_data_offset(struct intel_guc *guc) 91 { 92 u32 offset; 93 94 offset = guc_ads_golden_ctxt_offset(guc) + 95 guc_ads_golden_ctxt_size(guc); 96 97 return PAGE_ALIGN(offset); 98 } 99 100 static u32 guc_ads_blob_size(struct intel_guc *guc) 101 { 102 return guc_ads_private_data_offset(guc) + 103 guc_ads_private_data_size(guc); 104 } 105 106 static void guc_policies_init(struct intel_guc *guc, struct guc_policies *policies) 107 { 108 struct intel_gt *gt = guc_to_gt(guc); 109 struct drm_i915_private *i915 = gt->i915; 110 111 policies->dpc_promote_time = GLOBAL_POLICY_DEFAULT_DPC_PROMOTE_TIME_US; 112 policies->max_num_work_items = GLOBAL_POLICY_MAX_NUM_WI; 113 114 policies->global_flags = 0; 115 if (i915->params.reset < 2) 116 policies->global_flags |= GLOBAL_POLICY_DISABLE_ENGINE_RESET; 117 118 policies->is_valid = 1; 119 } 120 121 void intel_guc_ads_print_policy_info(struct intel_guc *guc, 122 struct drm_printer *dp) 123 { 124 struct __guc_ads_blob *blob = guc->ads_blob; 125 126 if (unlikely(!blob)) 127 return; 128 129 drm_printf(dp, "Global scheduling policies:\n"); 130 drm_printf(dp, " DPC promote time = %u\n", blob->policies.dpc_promote_time); 131 drm_printf(dp, " Max num work items = %u\n", blob->policies.max_num_work_items); 132 drm_printf(dp, " Flags = %u\n", blob->policies.global_flags); 133 } 134 135 static int guc_action_policies_update(struct intel_guc *guc, u32 policy_offset) 136 { 137 u32 action[] = { 138 INTEL_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE, 139 policy_offset 140 }; 141 142 return intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true); 143 } 144 145 int intel_guc_global_policies_update(struct intel_guc *guc) 146 { 147 struct __guc_ads_blob *blob = guc->ads_blob; 148 struct intel_gt *gt = guc_to_gt(guc); 149 intel_wakeref_t wakeref; 150 int ret; 151 152 if (!blob) 153 return -EOPNOTSUPP; 154 155 GEM_BUG_ON(!blob->ads.scheduler_policies); 156 157 guc_policies_init(guc, &blob->policies); 158 159 if (!intel_guc_is_ready(guc)) 160 return 0; 161 162 with_intel_runtime_pm(>->i915->runtime_pm, wakeref) 163 ret = guc_action_policies_update(guc, blob->ads.scheduler_policies); 164 165 return ret; 166 } 167 168 static void guc_mapping_table_init(struct intel_gt *gt, 169 struct guc_gt_system_info *system_info) 170 { 171 unsigned int i, j; 172 struct intel_engine_cs *engine; 173 enum intel_engine_id id; 174 175 /* Table must be set to invalid values for entries not used */ 176 for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) 177 for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) 178 system_info->mapping_table[i][j] = 179 GUC_MAX_INSTANCES_PER_CLASS; 180 181 for_each_engine(engine, gt, id) { 182 u8 guc_class = engine_class_to_guc_class(engine->class); 183 184 system_info->mapping_table[guc_class][ilog2(engine->logical_mask)] = 185 engine->instance; 186 } 187 } 188 189 /* 190 * The save/restore register list must be pre-calculated to a temporary 191 * buffer of driver defined size before it can be generated in place 192 * inside the ADS. 193 */ 194 #define MAX_MMIO_REGS 128 /* Arbitrary size, increase as needed */ 195 struct temp_regset { 196 struct guc_mmio_reg *registers; 197 u32 used; 198 u32 size; 199 }; 200 201 static int guc_mmio_reg_cmp(const void *a, const void *b) 202 { 203 const struct guc_mmio_reg *ra = a; 204 const struct guc_mmio_reg *rb = b; 205 206 return (int)ra->offset - (int)rb->offset; 207 } 208 209 static void guc_mmio_reg_add(struct temp_regset *regset, 210 u32 offset, u32 flags) 211 { 212 u32 count = regset->used; 213 struct guc_mmio_reg reg = { 214 .offset = offset, 215 .flags = flags, 216 }; 217 struct guc_mmio_reg *slot; 218 219 GEM_BUG_ON(count >= regset->size); 220 221 /* 222 * The mmio list is built using separate lists within the driver. 223 * It's possible that at some point we may attempt to add the same 224 * register more than once. Do not consider this an error; silently 225 * move on if the register is already in the list. 226 */ 227 if (bsearch(®, regset->registers, count, 228 sizeof(reg), guc_mmio_reg_cmp)) 229 return; 230 231 slot = ®set->registers[count]; 232 regset->used++; 233 *slot = reg; 234 235 while (slot-- > regset->registers) { 236 GEM_BUG_ON(slot[0].offset == slot[1].offset); 237 if (slot[1].offset > slot[0].offset) 238 break; 239 240 swap(slot[1], slot[0]); 241 } 242 } 243 244 #define GUC_MMIO_REG_ADD(regset, reg, masked) \ 245 guc_mmio_reg_add(regset, \ 246 i915_mmio_reg_offset((reg)), \ 247 (masked) ? GUC_REGSET_MASKED : 0) 248 249 static void guc_mmio_regset_init(struct temp_regset *regset, 250 struct intel_engine_cs *engine) 251 { 252 const u32 base = engine->mmio_base; 253 struct i915_wa_list *wal = &engine->wa_list; 254 struct i915_wa *wa; 255 unsigned int i; 256 257 regset->used = 0; 258 259 GUC_MMIO_REG_ADD(regset, RING_MODE_GEN7(base), true); 260 GUC_MMIO_REG_ADD(regset, RING_HWS_PGA(base), false); 261 GUC_MMIO_REG_ADD(regset, RING_IMR(base), false); 262 263 for (i = 0, wa = wal->list; i < wal->count; i++, wa++) 264 GUC_MMIO_REG_ADD(regset, wa->reg, wa->masked_reg); 265 266 /* Be extra paranoid and include all whitelist registers. */ 267 for (i = 0; i < RING_MAX_NONPRIV_SLOTS; i++) 268 GUC_MMIO_REG_ADD(regset, 269 RING_FORCE_TO_NONPRIV(base, i), 270 false); 271 272 /* add in local MOCS registers */ 273 for (i = 0; i < GEN9_LNCFCMOCS_REG_COUNT; i++) 274 GUC_MMIO_REG_ADD(regset, GEN9_LNCFCMOCS(i), false); 275 } 276 277 static int guc_mmio_reg_state_query(struct intel_guc *guc) 278 { 279 struct intel_gt *gt = guc_to_gt(guc); 280 struct intel_engine_cs *engine; 281 enum intel_engine_id id; 282 struct temp_regset temp_set; 283 u32 total; 284 285 /* 286 * Need to actually build the list in order to filter out 287 * duplicates and other such data dependent constructions. 288 */ 289 temp_set.size = MAX_MMIO_REGS; 290 temp_set.registers = kmalloc_array(temp_set.size, 291 sizeof(*temp_set.registers), 292 GFP_KERNEL); 293 if (!temp_set.registers) 294 return -ENOMEM; 295 296 total = 0; 297 for_each_engine(engine, gt, id) { 298 guc_mmio_regset_init(&temp_set, engine); 299 total += temp_set.used; 300 } 301 302 kfree(temp_set.registers); 303 304 return total * sizeof(struct guc_mmio_reg); 305 } 306 307 static void guc_mmio_reg_state_init(struct intel_guc *guc, 308 struct __guc_ads_blob *blob) 309 { 310 struct intel_gt *gt = guc_to_gt(guc); 311 struct intel_engine_cs *engine; 312 enum intel_engine_id id; 313 struct temp_regset temp_set; 314 struct guc_mmio_reg_set *ads_reg_set; 315 u32 addr_ggtt, offset; 316 u8 guc_class; 317 318 offset = guc_ads_regset_offset(guc); 319 addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; 320 temp_set.registers = (struct guc_mmio_reg *)(((u8 *)blob) + offset); 321 temp_set.size = guc->ads_regset_size / sizeof(temp_set.registers[0]); 322 323 for_each_engine(engine, gt, id) { 324 /* Class index is checked in class converter */ 325 GEM_BUG_ON(engine->instance >= GUC_MAX_INSTANCES_PER_CLASS); 326 327 guc_class = engine_class_to_guc_class(engine->class); 328 ads_reg_set = &blob->ads.reg_state_list[guc_class][engine->instance]; 329 330 guc_mmio_regset_init(&temp_set, engine); 331 if (!temp_set.used) { 332 ads_reg_set->address = 0; 333 ads_reg_set->count = 0; 334 continue; 335 } 336 337 ads_reg_set->address = addr_ggtt; 338 ads_reg_set->count = temp_set.used; 339 340 temp_set.size -= temp_set.used; 341 temp_set.registers += temp_set.used; 342 addr_ggtt += temp_set.used * sizeof(struct guc_mmio_reg); 343 } 344 345 GEM_BUG_ON(temp_set.size); 346 } 347 348 static void fill_engine_enable_masks(struct intel_gt *gt, 349 struct guc_gt_system_info *info) 350 { 351 info->engine_enabled_masks[GUC_RENDER_CLASS] = 1; 352 info->engine_enabled_masks[GUC_BLITTER_CLASS] = 1; 353 info->engine_enabled_masks[GUC_VIDEO_CLASS] = VDBOX_MASK(gt); 354 info->engine_enabled_masks[GUC_VIDEOENHANCE_CLASS] = VEBOX_MASK(gt); 355 } 356 357 #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) 358 #define LRC_SKIP_SIZE (LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE) 359 static int guc_prep_golden_context(struct intel_guc *guc, 360 struct __guc_ads_blob *blob) 361 { 362 struct intel_gt *gt = guc_to_gt(guc); 363 u32 addr_ggtt, offset; 364 u32 total_size = 0, alloc_size, real_size; 365 u8 engine_class, guc_class; 366 struct guc_gt_system_info *info, local_info; 367 368 /* 369 * Reserve the memory for the golden contexts and point GuC at it but 370 * leave it empty for now. The context data will be filled in later 371 * once there is something available to put there. 372 * 373 * Note that the HWSP and ring context are not included. 374 * 375 * Note also that the storage must be pinned in the GGTT, so that the 376 * address won't change after GuC has been told where to find it. The 377 * GuC will also validate that the LRC base + size fall within the 378 * allowed GGTT range. 379 */ 380 if (blob) { 381 offset = guc_ads_golden_ctxt_offset(guc); 382 addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; 383 info = &blob->system_info; 384 } else { 385 memset(&local_info, 0, sizeof(local_info)); 386 info = &local_info; 387 fill_engine_enable_masks(gt, info); 388 } 389 390 for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { 391 if (engine_class == OTHER_CLASS) 392 continue; 393 394 guc_class = engine_class_to_guc_class(engine_class); 395 396 if (!info->engine_enabled_masks[guc_class]) 397 continue; 398 399 real_size = intel_engine_context_size(gt, engine_class); 400 alloc_size = PAGE_ALIGN(real_size); 401 total_size += alloc_size; 402 403 if (!blob) 404 continue; 405 406 /* 407 * This interface is slightly confusing. We need to pass the 408 * base address of the full golden context and the size of just 409 * the engine state, which is the section of the context image 410 * that starts after the execlists context. This is required to 411 * allow the GuC to restore just the engine state when a 412 * watchdog reset occurs. 413 * We calculate the engine state size by removing the size of 414 * what comes before it in the context image (which is identical 415 * on all engines). 416 */ 417 blob->ads.eng_state_size[guc_class] = real_size - LRC_SKIP_SIZE; 418 blob->ads.golden_context_lrca[guc_class] = addr_ggtt; 419 addr_ggtt += alloc_size; 420 } 421 422 if (!blob) 423 return total_size; 424 425 GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); 426 return total_size; 427 } 428 429 static struct intel_engine_cs *find_engine_state(struct intel_gt *gt, u8 engine_class) 430 { 431 struct intel_engine_cs *engine; 432 enum intel_engine_id id; 433 434 for_each_engine(engine, gt, id) { 435 if (engine->class != engine_class) 436 continue; 437 438 if (!engine->default_state) 439 continue; 440 441 return engine; 442 } 443 444 return NULL; 445 } 446 447 static void guc_init_golden_context(struct intel_guc *guc) 448 { 449 struct __guc_ads_blob *blob = guc->ads_blob; 450 struct intel_engine_cs *engine; 451 struct intel_gt *gt = guc_to_gt(guc); 452 u32 addr_ggtt, offset; 453 u32 total_size = 0, alloc_size, real_size; 454 u8 engine_class, guc_class; 455 u8 *ptr; 456 457 if (!intel_uc_uses_guc_submission(>->uc)) 458 return; 459 460 GEM_BUG_ON(!blob); 461 462 /* 463 * Go back and fill in the golden context data now that it is 464 * available. 465 */ 466 offset = guc_ads_golden_ctxt_offset(guc); 467 addr_ggtt = intel_guc_ggtt_offset(guc, guc->ads_vma) + offset; 468 ptr = ((u8 *)blob) + offset; 469 470 for (engine_class = 0; engine_class <= MAX_ENGINE_CLASS; ++engine_class) { 471 if (engine_class == OTHER_CLASS) 472 continue; 473 474 guc_class = engine_class_to_guc_class(engine_class); 475 476 if (!blob->system_info.engine_enabled_masks[guc_class]) 477 continue; 478 479 real_size = intel_engine_context_size(gt, engine_class); 480 alloc_size = PAGE_ALIGN(real_size); 481 total_size += alloc_size; 482 483 engine = find_engine_state(gt, engine_class); 484 if (!engine) { 485 drm_err(>->i915->drm, "No engine state recorded for class %d!\n", 486 engine_class); 487 blob->ads.eng_state_size[guc_class] = 0; 488 blob->ads.golden_context_lrca[guc_class] = 0; 489 continue; 490 } 491 492 GEM_BUG_ON(blob->ads.eng_state_size[guc_class] != 493 real_size - LRC_SKIP_SIZE); 494 GEM_BUG_ON(blob->ads.golden_context_lrca[guc_class] != addr_ggtt); 495 addr_ggtt += alloc_size; 496 497 shmem_read(engine->default_state, 0, ptr, real_size); 498 ptr += alloc_size; 499 } 500 501 GEM_BUG_ON(guc->ads_golden_ctxt_size != total_size); 502 } 503 504 static void __guc_ads_init(struct intel_guc *guc) 505 { 506 struct intel_gt *gt = guc_to_gt(guc); 507 struct drm_i915_private *i915 = gt->i915; 508 struct __guc_ads_blob *blob = guc->ads_blob; 509 u32 base; 510 511 /* GuC scheduling policies */ 512 guc_policies_init(guc, &blob->policies); 513 514 /* System info */ 515 fill_engine_enable_masks(gt, &blob->system_info); 516 517 blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] = 518 hweight8(gt->info.sseu.slice_mask); 519 blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK] = 520 gt->info.vdbox_sfc_access; 521 522 if (GRAPHICS_VER(i915) >= 12 && !IS_DGFX(i915)) { 523 u32 distdbreg = intel_uncore_read(gt->uncore, 524 GEN12_DIST_DBS_POPULATED); 525 blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI] = 526 ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) & 527 GEN12_DOORBELLS_PER_SQIDI) + 1; 528 } 529 530 /* Golden contexts for re-initialising after a watchdog reset */ 531 guc_prep_golden_context(guc, blob); 532 533 guc_mapping_table_init(guc_to_gt(guc), &blob->system_info); 534 535 base = intel_guc_ggtt_offset(guc, guc->ads_vma); 536 537 /* ADS */ 538 blob->ads.scheduler_policies = base + ptr_offset(blob, policies); 539 blob->ads.gt_system_info = base + ptr_offset(blob, system_info); 540 541 /* MMIO save/restore list */ 542 guc_mmio_reg_state_init(guc, blob); 543 544 /* Private Data */ 545 blob->ads.private_data = base + guc_ads_private_data_offset(guc); 546 547 i915_gem_object_flush_map(guc->ads_vma->obj); 548 } 549 550 /** 551 * intel_guc_ads_create() - allocates and initializes GuC ADS. 552 * @guc: intel_guc struct 553 * 554 * GuC needs memory block (Additional Data Struct), where it will store 555 * some data. Allocate and initialize such memory block for GuC use. 556 */ 557 int intel_guc_ads_create(struct intel_guc *guc) 558 { 559 u32 size; 560 int ret; 561 562 GEM_BUG_ON(guc->ads_vma); 563 564 /* Need to calculate the reg state size dynamically: */ 565 ret = guc_mmio_reg_state_query(guc); 566 if (ret < 0) 567 return ret; 568 guc->ads_regset_size = ret; 569 570 /* Likewise the golden contexts: */ 571 ret = guc_prep_golden_context(guc, NULL); 572 if (ret < 0) 573 return ret; 574 guc->ads_golden_ctxt_size = ret; 575 576 /* Now the total size can be determined: */ 577 size = guc_ads_blob_size(guc); 578 579 ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, 580 (void **)&guc->ads_blob); 581 if (ret) 582 return ret; 583 584 __guc_ads_init(guc); 585 586 return 0; 587 } 588 589 void intel_guc_ads_init_late(struct intel_guc *guc) 590 { 591 /* 592 * The golden context setup requires the saved engine state from 593 * __engines_record_defaults(). However, that requires engines to be 594 * operational which means the ADS must already have been configured. 595 * Fortunately, the golden context state is not needed until a hang 596 * occurs, so it can be filled in during this late init phase. 597 */ 598 guc_init_golden_context(guc); 599 } 600 601 void intel_guc_ads_destroy(struct intel_guc *guc) 602 { 603 i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); 604 guc->ads_blob = NULL; 605 } 606 607 static void guc_ads_private_data_reset(struct intel_guc *guc) 608 { 609 u32 size; 610 611 size = guc_ads_private_data_size(guc); 612 if (!size) 613 return; 614 615 memset((void *)guc->ads_blob + guc_ads_private_data_offset(guc), 0, 616 size); 617 } 618 619 /** 620 * intel_guc_ads_reset() - prepares GuC Additional Data Struct for reuse 621 * @guc: intel_guc struct 622 * 623 * GuC stores some data in ADS, which might be stale after a reset. 624 * Reinitialize whole ADS in case any part of it was corrupted during 625 * previous GuC run. 626 */ 627 void intel_guc_ads_reset(struct intel_guc *guc) 628 { 629 if (!guc->ads_vma) 630 return; 631 632 __guc_ads_init(guc); 633 634 guc_ads_private_data_reset(guc); 635 } 636 637 u32 intel_guc_engine_usage_offset(struct intel_guc *guc) 638 { 639 struct __guc_ads_blob *blob = guc->ads_blob; 640 u32 base = intel_guc_ggtt_offset(guc, guc->ads_vma); 641 u32 offset = base + ptr_offset(blob, engine_usage); 642 643 return offset; 644 } 645 646 struct guc_engine_usage_record *intel_guc_engine_usage(struct intel_engine_cs *engine) 647 { 648 struct intel_guc *guc = &engine->gt->uc.guc; 649 struct __guc_ads_blob *blob = guc->ads_blob; 650 u8 guc_class = engine_class_to_guc_class(engine->class); 651 652 return &blob->engine_usage.engines[guc_class][ilog2(engine->logical_mask)]; 653 } 654