1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "i915_drv.h" 7 #include "intel_lrc_reg.h" 8 #include "intel_sseu.h" 9 10 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, 11 u8 max_subslices, u8 max_eus_per_subslice) 12 { 13 sseu->max_slices = max_slices; 14 sseu->max_subslices = max_subslices; 15 sseu->max_eus_per_subslice = max_eus_per_subslice; 16 17 sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); 18 GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); 19 sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); 20 GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); 21 } 22 23 unsigned int 24 intel_sseu_subslice_total(const struct sseu_dev_info *sseu) 25 { 26 unsigned int i, total = 0; 27 28 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) 29 total += hweight8(sseu->subslice_mask[i]); 30 31 return total; 32 } 33 34 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) 35 { 36 int i, offset = slice * sseu->ss_stride; 37 u32 mask = 0; 38 39 GEM_BUG_ON(slice >= sseu->max_slices); 40 41 for (i = 0; i < sseu->ss_stride; i++) 42 mask |= (u32)sseu->subslice_mask[offset + i] << 43 i * BITS_PER_BYTE; 44 45 return mask; 46 } 47 48 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, 49 u32 ss_mask) 50 { 51 int offset = slice * sseu->ss_stride; 52 53 memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); 54 } 55 56 unsigned int 57 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) 58 { 59 return hweight32(intel_sseu_get_subslices(sseu, slice)); 60 } 61 62 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, 63 int subslice) 64 { 65 int slice_stride = sseu->max_subslices * sseu->eu_stride; 66 67 return slice * slice_stride + subslice * sseu->eu_stride; 68 } 69 70 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, 71 int subslice) 72 { 73 int i, offset = sseu_eu_idx(sseu, slice, subslice); 74 u16 eu_mask = 0; 75 76 for (i = 0; i < sseu->eu_stride; i++) 77 eu_mask |= 78 ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); 79 80 return eu_mask; 81 } 82 83 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, 84 u16 eu_mask) 85 { 86 int i, offset = sseu_eu_idx(sseu, slice, subslice); 87 88 for (i = 0; i < sseu->eu_stride; i++) 89 sseu->eu_mask[offset + i] = 90 (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; 91 } 92 93 static u16 compute_eu_total(const struct sseu_dev_info *sseu) 94 { 95 u16 i, total = 0; 96 97 for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) 98 total += hweight8(sseu->eu_mask[i]); 99 100 return total; 101 } 102 103 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, 104 u8 s_en, u32 ss_en, u16 eu_en) 105 { 106 int s, ss; 107 108 /* ss_en represents entire subslice mask across all slices */ 109 GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > 110 sizeof(ss_en) * BITS_PER_BYTE); 111 112 for (s = 0; s < sseu->max_slices; s++) { 113 if ((s_en & BIT(s)) == 0) 114 continue; 115 116 sseu->slice_mask |= BIT(s); 117 118 intel_sseu_set_subslices(sseu, s, ss_en); 119 120 for (ss = 0; ss < sseu->max_subslices; ss++) 121 if (intel_sseu_has_subslice(sseu, s, ss)) 122 sseu_set_eus(sseu, s, ss, eu_en); 123 } 124 sseu->eu_per_subslice = hweight16(eu_en); 125 sseu->eu_total = compute_eu_total(sseu); 126 } 127 128 static void gen12_sseu_info_init(struct intel_gt *gt) 129 { 130 struct sseu_dev_info *sseu = >->info.sseu; 131 struct intel_uncore *uncore = gt->uncore; 132 u32 dss_en; 133 u16 eu_en = 0; 134 u8 eu_en_fuse; 135 u8 s_en; 136 int eu; 137 138 /* 139 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. 140 * Instead of splitting these, provide userspace with an array 141 * of DSS to more closely represent the hardware resource. 142 * 143 * In addition, the concept of slice has been removed in Xe_HP. 144 * To be compatible with prior generations, assume a single slice 145 * across the entire device. Then calculate out the DSS for each 146 * workload type within that software slice. 147 */ 148 if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) 149 intel_sseu_set_info(sseu, 1, 32, 16); 150 else 151 intel_sseu_set_info(sseu, 1, 6, 16); 152 153 /* 154 * As mentioned above, Xe_HP does not have the concept of a slice. 155 * Enable one for software backwards compatibility. 156 */ 157 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 158 s_en = 0x1; 159 else 160 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 161 GEN11_GT_S_ENA_MASK; 162 163 dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); 164 165 /* one bit per pair of EUs */ 166 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 167 eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; 168 else 169 eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 170 GEN11_EU_DIS_MASK); 171 172 for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) 173 if (eu_en_fuse & BIT(eu)) 174 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); 175 176 gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); 177 178 /* TGL only supports slice-level power gating */ 179 sseu->has_slice_pg = 1; 180 } 181 182 static void gen11_sseu_info_init(struct intel_gt *gt) 183 { 184 struct sseu_dev_info *sseu = >->info.sseu; 185 struct intel_uncore *uncore = gt->uncore; 186 u32 ss_en; 187 u8 eu_en; 188 u8 s_en; 189 190 if (IS_JSL_EHL(gt->i915)) 191 intel_sseu_set_info(sseu, 1, 4, 8); 192 else 193 intel_sseu_set_info(sseu, 1, 8, 8); 194 195 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 196 GEN11_GT_S_ENA_MASK; 197 ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); 198 199 eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 200 GEN11_EU_DIS_MASK); 201 202 gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); 203 204 /* ICL has no power gating restrictions. */ 205 sseu->has_slice_pg = 1; 206 sseu->has_subslice_pg = 1; 207 sseu->has_eu_pg = 1; 208 } 209 210 static void cherryview_sseu_info_init(struct intel_gt *gt) 211 { 212 struct sseu_dev_info *sseu = >->info.sseu; 213 u32 fuse; 214 u8 subslice_mask = 0; 215 216 fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); 217 218 sseu->slice_mask = BIT(0); 219 intel_sseu_set_info(sseu, 1, 2, 8); 220 221 if (!(fuse & CHV_FGT_DISABLE_SS0)) { 222 u8 disabled_mask = 223 ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 224 CHV_FGT_EU_DIS_SS0_R0_SHIFT) | 225 (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> 226 CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); 227 228 subslice_mask |= BIT(0); 229 sseu_set_eus(sseu, 0, 0, ~disabled_mask); 230 } 231 232 if (!(fuse & CHV_FGT_DISABLE_SS1)) { 233 u8 disabled_mask = 234 ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 235 CHV_FGT_EU_DIS_SS1_R0_SHIFT) | 236 (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> 237 CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); 238 239 subslice_mask |= BIT(1); 240 sseu_set_eus(sseu, 0, 1, ~disabled_mask); 241 } 242 243 intel_sseu_set_subslices(sseu, 0, subslice_mask); 244 245 sseu->eu_total = compute_eu_total(sseu); 246 247 /* 248 * CHV expected to always have a uniform distribution of EU 249 * across subslices. 250 */ 251 sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? 252 sseu->eu_total / 253 intel_sseu_subslice_total(sseu) : 254 0; 255 /* 256 * CHV supports subslice power gating on devices with more than 257 * one subslice, and supports EU power gating on devices with 258 * more than one EU pair per subslice. 259 */ 260 sseu->has_slice_pg = 0; 261 sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; 262 sseu->has_eu_pg = (sseu->eu_per_subslice > 2); 263 } 264 265 static void gen9_sseu_info_init(struct intel_gt *gt) 266 { 267 struct drm_i915_private *i915 = gt->i915; 268 struct intel_device_info *info = mkwrite_device_info(i915); 269 struct sseu_dev_info *sseu = >->info.sseu; 270 struct intel_uncore *uncore = gt->uncore; 271 u32 fuse2, eu_disable, subslice_mask; 272 const u8 eu_mask = 0xff; 273 int s, ss; 274 275 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 276 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 277 278 /* BXT has a single slice and at most 3 subslices. */ 279 intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, 280 IS_GEN9_LP(i915) ? 3 : 4, 8); 281 282 /* 283 * The subslice disable field is global, i.e. it applies 284 * to each of the enabled slices. 285 */ 286 subslice_mask = (1 << sseu->max_subslices) - 1; 287 subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> 288 GEN9_F2_SS_DIS_SHIFT); 289 290 /* 291 * Iterate through enabled slices and subslices to 292 * count the total enabled EU. 293 */ 294 for (s = 0; s < sseu->max_slices; s++) { 295 if (!(sseu->slice_mask & BIT(s))) 296 /* skip disabled slice */ 297 continue; 298 299 intel_sseu_set_subslices(sseu, s, subslice_mask); 300 301 eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); 302 for (ss = 0; ss < sseu->max_subslices; ss++) { 303 int eu_per_ss; 304 u8 eu_disabled_mask; 305 306 if (!intel_sseu_has_subslice(sseu, s, ss)) 307 /* skip disabled subslice */ 308 continue; 309 310 eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; 311 312 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 313 314 eu_per_ss = sseu->max_eus_per_subslice - 315 hweight8(eu_disabled_mask); 316 317 /* 318 * Record which subslice(s) has(have) 7 EUs. we 319 * can tune the hash used to spread work among 320 * subslices if they are unbalanced. 321 */ 322 if (eu_per_ss == 7) 323 sseu->subslice_7eu[s] |= BIT(ss); 324 } 325 } 326 327 sseu->eu_total = compute_eu_total(sseu); 328 329 /* 330 * SKL is expected to always have a uniform distribution 331 * of EU across subslices with the exception that any one 332 * EU in any one subslice may be fused off for die 333 * recovery. BXT is expected to be perfectly uniform in EU 334 * distribution. 335 */ 336 sseu->eu_per_subslice = 337 intel_sseu_subslice_total(sseu) ? 338 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 339 0; 340 341 /* 342 * SKL+ supports slice power gating on devices with more than 343 * one slice, and supports EU power gating on devices with 344 * more than one EU pair per subslice. BXT+ supports subslice 345 * power gating on devices with more than one subslice, and 346 * supports EU power gating on devices with more than one EU 347 * pair per subslice. 348 */ 349 sseu->has_slice_pg = 350 !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1; 351 sseu->has_subslice_pg = 352 IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1; 353 sseu->has_eu_pg = sseu->eu_per_subslice > 2; 354 355 if (IS_GEN9_LP(i915)) { 356 #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) 357 info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; 358 359 sseu->min_eu_in_pool = 0; 360 if (info->has_pooled_eu) { 361 if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) 362 sseu->min_eu_in_pool = 3; 363 else if (IS_SS_DISABLED(1)) 364 sseu->min_eu_in_pool = 6; 365 else 366 sseu->min_eu_in_pool = 9; 367 } 368 #undef IS_SS_DISABLED 369 } 370 } 371 372 static void bdw_sseu_info_init(struct intel_gt *gt) 373 { 374 struct sseu_dev_info *sseu = >->info.sseu; 375 struct intel_uncore *uncore = gt->uncore; 376 int s, ss; 377 u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ 378 u32 eu_disable0, eu_disable1, eu_disable2; 379 380 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 381 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 382 intel_sseu_set_info(sseu, 3, 3, 8); 383 384 /* 385 * The subslice disable field is global, i.e. it applies 386 * to each of the enabled slices. 387 */ 388 subslice_mask = GENMASK(sseu->max_subslices - 1, 0); 389 subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> 390 GEN8_F2_SS_DIS_SHIFT); 391 eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); 392 eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); 393 eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); 394 eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; 395 eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | 396 ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << 397 (32 - GEN8_EU_DIS0_S1_SHIFT)); 398 eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | 399 ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << 400 (32 - GEN8_EU_DIS1_S2_SHIFT)); 401 402 /* 403 * Iterate through enabled slices and subslices to 404 * count the total enabled EU. 405 */ 406 for (s = 0; s < sseu->max_slices; s++) { 407 if (!(sseu->slice_mask & BIT(s))) 408 /* skip disabled slice */ 409 continue; 410 411 intel_sseu_set_subslices(sseu, s, subslice_mask); 412 413 for (ss = 0; ss < sseu->max_subslices; ss++) { 414 u8 eu_disabled_mask; 415 u32 n_disabled; 416 417 if (!intel_sseu_has_subslice(sseu, s, ss)) 418 /* skip disabled subslice */ 419 continue; 420 421 eu_disabled_mask = 422 eu_disable[s] >> (ss * sseu->max_eus_per_subslice); 423 424 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 425 426 n_disabled = hweight8(eu_disabled_mask); 427 428 /* 429 * Record which subslices have 7 EUs. 430 */ 431 if (sseu->max_eus_per_subslice - n_disabled == 7) 432 sseu->subslice_7eu[s] |= 1 << ss; 433 } 434 } 435 436 sseu->eu_total = compute_eu_total(sseu); 437 438 /* 439 * BDW is expected to always have a uniform distribution of EU across 440 * subslices with the exception that any one EU in any one subslice may 441 * be fused off for die recovery. 442 */ 443 sseu->eu_per_subslice = 444 intel_sseu_subslice_total(sseu) ? 445 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 446 0; 447 448 /* 449 * BDW supports slice power gating on devices with more than 450 * one slice. 451 */ 452 sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1; 453 sseu->has_subslice_pg = 0; 454 sseu->has_eu_pg = 0; 455 } 456 457 static void hsw_sseu_info_init(struct intel_gt *gt) 458 { 459 struct drm_i915_private *i915 = gt->i915; 460 struct sseu_dev_info *sseu = >->info.sseu; 461 u32 fuse1; 462 u8 subslice_mask = 0; 463 int s, ss; 464 465 /* 466 * There isn't a register to tell us how many slices/subslices. We 467 * work off the PCI-ids here. 468 */ 469 switch (INTEL_INFO(i915)->gt) { 470 default: 471 MISSING_CASE(INTEL_INFO(i915)->gt); 472 fallthrough; 473 case 1: 474 sseu->slice_mask = BIT(0); 475 subslice_mask = BIT(0); 476 break; 477 case 2: 478 sseu->slice_mask = BIT(0); 479 subslice_mask = BIT(0) | BIT(1); 480 break; 481 case 3: 482 sseu->slice_mask = BIT(0) | BIT(1); 483 subslice_mask = BIT(0) | BIT(1); 484 break; 485 } 486 487 fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); 488 switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { 489 default: 490 MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >> 491 HSW_F1_EU_DIS_SHIFT); 492 fallthrough; 493 case HSW_F1_EU_DIS_10EUS: 494 sseu->eu_per_subslice = 10; 495 break; 496 case HSW_F1_EU_DIS_8EUS: 497 sseu->eu_per_subslice = 8; 498 break; 499 case HSW_F1_EU_DIS_6EUS: 500 sseu->eu_per_subslice = 6; 501 break; 502 } 503 504 intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), 505 hweight8(subslice_mask), 506 sseu->eu_per_subslice); 507 508 for (s = 0; s < sseu->max_slices; s++) { 509 intel_sseu_set_subslices(sseu, s, subslice_mask); 510 511 for (ss = 0; ss < sseu->max_subslices; ss++) { 512 sseu_set_eus(sseu, s, ss, 513 (1UL << sseu->eu_per_subslice) - 1); 514 } 515 } 516 517 sseu->eu_total = compute_eu_total(sseu); 518 519 /* No powergating for you. */ 520 sseu->has_slice_pg = 0; 521 sseu->has_subslice_pg = 0; 522 sseu->has_eu_pg = 0; 523 } 524 525 void intel_sseu_info_init(struct intel_gt *gt) 526 { 527 struct drm_i915_private *i915 = gt->i915; 528 529 if (IS_HASWELL(i915)) 530 hsw_sseu_info_init(gt); 531 else if (IS_CHERRYVIEW(i915)) 532 cherryview_sseu_info_init(gt); 533 else if (IS_BROADWELL(i915)) 534 bdw_sseu_info_init(gt); 535 else if (GRAPHICS_VER(i915) == 9) 536 gen9_sseu_info_init(gt); 537 else if (GRAPHICS_VER(i915) == 11) 538 gen11_sseu_info_init(gt); 539 else if (GRAPHICS_VER(i915) >= 12) 540 gen12_sseu_info_init(gt); 541 } 542 543 u32 intel_sseu_make_rpcs(struct intel_gt *gt, 544 const struct intel_sseu *req_sseu) 545 { 546 struct drm_i915_private *i915 = gt->i915; 547 const struct sseu_dev_info *sseu = >->info.sseu; 548 bool subslice_pg = sseu->has_subslice_pg; 549 u8 slices, subslices; 550 u32 rpcs = 0; 551 552 /* 553 * No explicit RPCS request is needed to ensure full 554 * slice/subslice/EU enablement prior to Gen9. 555 */ 556 if (GRAPHICS_VER(i915) < 9) 557 return 0; 558 559 /* 560 * If i915/perf is active, we want a stable powergating configuration 561 * on the system. Use the configuration pinned by i915/perf. 562 */ 563 if (i915->perf.exclusive_stream) 564 req_sseu = &i915->perf.sseu; 565 566 slices = hweight8(req_sseu->slice_mask); 567 subslices = hweight8(req_sseu->subslice_mask); 568 569 /* 570 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits 571 * wide and Icelake has up to eight subslices, specfial programming is 572 * needed in order to correctly enable all subslices. 573 * 574 * According to documentation software must consider the configuration 575 * as 2x4x8 and hardware will translate this to 1x8x8. 576 * 577 * Furthemore, even though SScount is three bits, maximum documented 578 * value for it is four. From this some rules/restrictions follow: 579 * 580 * 1. 581 * If enabled subslice count is greater than four, two whole slices must 582 * be enabled instead. 583 * 584 * 2. 585 * When more than one slice is enabled, hardware ignores the subslice 586 * count altogether. 587 * 588 * From these restrictions it follows that it is not possible to enable 589 * a count of subslices between the SScount maximum of four restriction, 590 * and the maximum available number on a particular SKU. Either all 591 * subslices are enabled, or a count between one and four on the first 592 * slice. 593 */ 594 if (GRAPHICS_VER(i915) == 11 && 595 slices == 1 && 596 subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { 597 GEM_BUG_ON(subslices & 1); 598 599 subslice_pg = false; 600 slices *= 2; 601 } 602 603 /* 604 * Starting in Gen9, render power gating can leave 605 * slice/subslice/EU in a partially enabled state. We 606 * must make an explicit request through RPCS for full 607 * enablement. 608 */ 609 if (sseu->has_slice_pg) { 610 u32 mask, val = slices; 611 612 if (GRAPHICS_VER(i915) >= 11) { 613 mask = GEN11_RPCS_S_CNT_MASK; 614 val <<= GEN11_RPCS_S_CNT_SHIFT; 615 } else { 616 mask = GEN8_RPCS_S_CNT_MASK; 617 val <<= GEN8_RPCS_S_CNT_SHIFT; 618 } 619 620 GEM_BUG_ON(val & ~mask); 621 val &= mask; 622 623 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; 624 } 625 626 if (subslice_pg) { 627 u32 val = subslices; 628 629 val <<= GEN8_RPCS_SS_CNT_SHIFT; 630 631 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); 632 val &= GEN8_RPCS_SS_CNT_MASK; 633 634 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; 635 } 636 637 if (sseu->has_eu_pg) { 638 u32 val; 639 640 val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; 641 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); 642 val &= GEN8_RPCS_EU_MIN_MASK; 643 644 rpcs |= val; 645 646 val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; 647 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); 648 val &= GEN8_RPCS_EU_MAX_MASK; 649 650 rpcs |= val; 651 652 rpcs |= GEN8_RPCS_ENABLE; 653 } 654 655 return rpcs; 656 } 657 658 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) 659 { 660 int s; 661 662 drm_printf(p, "slice total: %u, mask=%04x\n", 663 hweight8(sseu->slice_mask), sseu->slice_mask); 664 drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); 665 for (s = 0; s < sseu->max_slices; s++) { 666 drm_printf(p, "slice%d: %u subslices, mask=%08x\n", 667 s, intel_sseu_subslices_per_slice(sseu, s), 668 intel_sseu_get_subslices(sseu, s)); 669 } 670 drm_printf(p, "EU total: %u\n", sseu->eu_total); 671 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); 672 drm_printf(p, "has slice power gating: %s\n", 673 yesno(sseu->has_slice_pg)); 674 drm_printf(p, "has subslice power gating: %s\n", 675 yesno(sseu->has_subslice_pg)); 676 drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg)); 677 } 678 679 void intel_sseu_print_topology(const struct sseu_dev_info *sseu, 680 struct drm_printer *p) 681 { 682 int s, ss; 683 684 if (sseu->max_slices == 0) { 685 drm_printf(p, "Unavailable\n"); 686 return; 687 } 688 689 for (s = 0; s < sseu->max_slices; s++) { 690 drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", 691 s, intel_sseu_subslices_per_slice(sseu, s), 692 intel_sseu_get_subslices(sseu, s)); 693 694 for (ss = 0; ss < sseu->max_subslices; ss++) { 695 u16 enabled_eus = sseu_get_eus(sseu, s, ss); 696 697 drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", 698 ss, hweight16(enabled_eus), enabled_eus); 699 } 700 } 701 } 702 703 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) 704 { 705 u16 slice_mask = 0; 706 int i; 707 708 WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); 709 710 for (i = 0; dss_mask; i++) { 711 if (dss_mask & GENMASK(dss_per_slice - 1, 0)) 712 slice_mask |= BIT(i); 713 714 dss_mask >>= dss_per_slice; 715 } 716 717 return slice_mask; 718 } 719 720