1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "i915_drv.h" 7 #include "intel_lrc_reg.h" 8 #include "intel_sseu.h" 9 10 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, 11 u8 max_subslices, u8 max_eus_per_subslice) 12 { 13 sseu->max_slices = max_slices; 14 sseu->max_subslices = max_subslices; 15 sseu->max_eus_per_subslice = max_eus_per_subslice; 16 17 sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); 18 GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); 19 sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); 20 GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); 21 } 22 23 unsigned int 24 intel_sseu_subslice_total(const struct sseu_dev_info *sseu) 25 { 26 unsigned int i, total = 0; 27 28 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) 29 total += hweight8(sseu->subslice_mask[i]); 30 31 return total; 32 } 33 34 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) 35 { 36 int i, offset = slice * sseu->ss_stride; 37 u32 mask = 0; 38 39 GEM_BUG_ON(slice >= sseu->max_slices); 40 41 for (i = 0; i < sseu->ss_stride; i++) 42 mask |= (u32)sseu->subslice_mask[offset + i] << 43 i * BITS_PER_BYTE; 44 45 return mask; 46 } 47 48 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, 49 u8 *subslice_mask, u32 ss_mask) 50 { 51 int offset = slice * sseu->ss_stride; 52 53 memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); 54 } 55 56 unsigned int 57 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) 58 { 59 return hweight32(intel_sseu_get_subslices(sseu, slice)); 60 } 61 62 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, 63 int subslice) 64 { 65 int slice_stride = sseu->max_subslices * sseu->eu_stride; 66 67 return slice * slice_stride + subslice * sseu->eu_stride; 68 } 69 70 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, 71 int subslice) 72 { 73 int i, offset = sseu_eu_idx(sseu, slice, subslice); 74 u16 eu_mask = 0; 75 76 for (i = 0; i < sseu->eu_stride; i++) 77 eu_mask |= 78 ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); 79 80 return eu_mask; 81 } 82 83 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, 84 u16 eu_mask) 85 { 86 int i, offset = sseu_eu_idx(sseu, slice, subslice); 87 88 for (i = 0; i < sseu->eu_stride; i++) 89 sseu->eu_mask[offset + i] = 90 (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; 91 } 92 93 static u16 compute_eu_total(const struct sseu_dev_info *sseu) 94 { 95 u16 i, total = 0; 96 97 for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) 98 total += hweight8(sseu->eu_mask[i]); 99 100 return total; 101 } 102 103 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) 104 { 105 u32 ss_mask; 106 107 ss_mask = ss_en >> (s * sseu->max_subslices); 108 ss_mask &= GENMASK(sseu->max_subslices - 1, 0); 109 110 return ss_mask; 111 } 112 113 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, 114 u32 g_ss_en, u32 c_ss_en, u16 eu_en) 115 { 116 int s, ss; 117 118 /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ 119 GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > 120 sizeof(g_ss_en) * BITS_PER_BYTE); 121 122 for (s = 0; s < sseu->max_slices; s++) { 123 if ((s_en & BIT(s)) == 0) 124 continue; 125 126 sseu->slice_mask |= BIT(s); 127 128 /* 129 * XeHP introduces the concept of compute vs geometry DSS. To 130 * reduce variation between GENs around subslice usage, store a 131 * mask for both the geometry and compute enabled masks since 132 * userspace will need to be able to query these masks 133 * independently. Also compute a total enabled subslice count 134 * for the purposes of selecting subslices to use in a 135 * particular GEM context. 136 */ 137 intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, 138 get_ss_stride_mask(sseu, s, c_ss_en)); 139 intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, 140 get_ss_stride_mask(sseu, s, g_ss_en)); 141 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 142 get_ss_stride_mask(sseu, s, 143 g_ss_en | c_ss_en)); 144 145 for (ss = 0; ss < sseu->max_subslices; ss++) 146 if (intel_sseu_has_subslice(sseu, s, ss)) 147 sseu_set_eus(sseu, s, ss, eu_en); 148 } 149 sseu->eu_per_subslice = hweight16(eu_en); 150 sseu->eu_total = compute_eu_total(sseu); 151 } 152 153 static void gen12_sseu_info_init(struct intel_gt *gt) 154 { 155 struct sseu_dev_info *sseu = >->info.sseu; 156 struct intel_uncore *uncore = gt->uncore; 157 u32 g_dss_en, c_dss_en = 0; 158 u16 eu_en = 0; 159 u8 eu_en_fuse; 160 u8 s_en; 161 int eu; 162 163 /* 164 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. 165 * Instead of splitting these, provide userspace with an array 166 * of DSS to more closely represent the hardware resource. 167 * 168 * In addition, the concept of slice has been removed in Xe_HP. 169 * To be compatible with prior generations, assume a single slice 170 * across the entire device. Then calculate out the DSS for each 171 * workload type within that software slice. 172 */ 173 if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) 174 intel_sseu_set_info(sseu, 1, 32, 16); 175 else 176 intel_sseu_set_info(sseu, 1, 6, 16); 177 178 /* 179 * As mentioned above, Xe_HP does not have the concept of a slice. 180 * Enable one for software backwards compatibility. 181 */ 182 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 183 s_en = 0x1; 184 else 185 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 186 GEN11_GT_S_ENA_MASK; 187 188 g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); 189 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 190 c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); 191 192 /* one bit per pair of EUs */ 193 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 194 eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; 195 else 196 eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 197 GEN11_EU_DIS_MASK); 198 199 for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) 200 if (eu_en_fuse & BIT(eu)) 201 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); 202 203 gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); 204 205 /* TGL only supports slice-level power gating */ 206 sseu->has_slice_pg = 1; 207 } 208 209 static void gen11_sseu_info_init(struct intel_gt *gt) 210 { 211 struct sseu_dev_info *sseu = >->info.sseu; 212 struct intel_uncore *uncore = gt->uncore; 213 u32 ss_en; 214 u8 eu_en; 215 u8 s_en; 216 217 if (IS_JSL_EHL(gt->i915)) 218 intel_sseu_set_info(sseu, 1, 4, 8); 219 else 220 intel_sseu_set_info(sseu, 1, 8, 8); 221 222 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 223 GEN11_GT_S_ENA_MASK; 224 ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); 225 226 eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 227 GEN11_EU_DIS_MASK); 228 229 gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); 230 231 /* ICL has no power gating restrictions. */ 232 sseu->has_slice_pg = 1; 233 sseu->has_subslice_pg = 1; 234 sseu->has_eu_pg = 1; 235 } 236 237 static void cherryview_sseu_info_init(struct intel_gt *gt) 238 { 239 struct sseu_dev_info *sseu = >->info.sseu; 240 u32 fuse; 241 u8 subslice_mask = 0; 242 243 fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); 244 245 sseu->slice_mask = BIT(0); 246 intel_sseu_set_info(sseu, 1, 2, 8); 247 248 if (!(fuse & CHV_FGT_DISABLE_SS0)) { 249 u8 disabled_mask = 250 ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 251 CHV_FGT_EU_DIS_SS0_R0_SHIFT) | 252 (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> 253 CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); 254 255 subslice_mask |= BIT(0); 256 sseu_set_eus(sseu, 0, 0, ~disabled_mask); 257 } 258 259 if (!(fuse & CHV_FGT_DISABLE_SS1)) { 260 u8 disabled_mask = 261 ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 262 CHV_FGT_EU_DIS_SS1_R0_SHIFT) | 263 (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> 264 CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); 265 266 subslice_mask |= BIT(1); 267 sseu_set_eus(sseu, 0, 1, ~disabled_mask); 268 } 269 270 intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); 271 272 sseu->eu_total = compute_eu_total(sseu); 273 274 /* 275 * CHV expected to always have a uniform distribution of EU 276 * across subslices. 277 */ 278 sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? 279 sseu->eu_total / 280 intel_sseu_subslice_total(sseu) : 281 0; 282 /* 283 * CHV supports subslice power gating on devices with more than 284 * one subslice, and supports EU power gating on devices with 285 * more than one EU pair per subslice. 286 */ 287 sseu->has_slice_pg = 0; 288 sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; 289 sseu->has_eu_pg = (sseu->eu_per_subslice > 2); 290 } 291 292 static void gen9_sseu_info_init(struct intel_gt *gt) 293 { 294 struct drm_i915_private *i915 = gt->i915; 295 struct intel_device_info *info = mkwrite_device_info(i915); 296 struct sseu_dev_info *sseu = >->info.sseu; 297 struct intel_uncore *uncore = gt->uncore; 298 u32 fuse2, eu_disable, subslice_mask; 299 const u8 eu_mask = 0xff; 300 int s, ss; 301 302 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 303 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 304 305 /* BXT has a single slice and at most 3 subslices. */ 306 intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, 307 IS_GEN9_LP(i915) ? 3 : 4, 8); 308 309 /* 310 * The subslice disable field is global, i.e. it applies 311 * to each of the enabled slices. 312 */ 313 subslice_mask = (1 << sseu->max_subslices) - 1; 314 subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> 315 GEN9_F2_SS_DIS_SHIFT); 316 317 /* 318 * Iterate through enabled slices and subslices to 319 * count the total enabled EU. 320 */ 321 for (s = 0; s < sseu->max_slices; s++) { 322 if (!(sseu->slice_mask & BIT(s))) 323 /* skip disabled slice */ 324 continue; 325 326 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 327 subslice_mask); 328 329 eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); 330 for (ss = 0; ss < sseu->max_subslices; ss++) { 331 int eu_per_ss; 332 u8 eu_disabled_mask; 333 334 if (!intel_sseu_has_subslice(sseu, s, ss)) 335 /* skip disabled subslice */ 336 continue; 337 338 eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; 339 340 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 341 342 eu_per_ss = sseu->max_eus_per_subslice - 343 hweight8(eu_disabled_mask); 344 345 /* 346 * Record which subslice(s) has(have) 7 EUs. we 347 * can tune the hash used to spread work among 348 * subslices if they are unbalanced. 349 */ 350 if (eu_per_ss == 7) 351 sseu->subslice_7eu[s] |= BIT(ss); 352 } 353 } 354 355 sseu->eu_total = compute_eu_total(sseu); 356 357 /* 358 * SKL is expected to always have a uniform distribution 359 * of EU across subslices with the exception that any one 360 * EU in any one subslice may be fused off for die 361 * recovery. BXT is expected to be perfectly uniform in EU 362 * distribution. 363 */ 364 sseu->eu_per_subslice = 365 intel_sseu_subslice_total(sseu) ? 366 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 367 0; 368 369 /* 370 * SKL+ supports slice power gating on devices with more than 371 * one slice, and supports EU power gating on devices with 372 * more than one EU pair per subslice. BXT+ supports subslice 373 * power gating on devices with more than one subslice, and 374 * supports EU power gating on devices with more than one EU 375 * pair per subslice. 376 */ 377 sseu->has_slice_pg = 378 !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1; 379 sseu->has_subslice_pg = 380 IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1; 381 sseu->has_eu_pg = sseu->eu_per_subslice > 2; 382 383 if (IS_GEN9_LP(i915)) { 384 #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) 385 info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; 386 387 sseu->min_eu_in_pool = 0; 388 if (info->has_pooled_eu) { 389 if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) 390 sseu->min_eu_in_pool = 3; 391 else if (IS_SS_DISABLED(1)) 392 sseu->min_eu_in_pool = 6; 393 else 394 sseu->min_eu_in_pool = 9; 395 } 396 #undef IS_SS_DISABLED 397 } 398 } 399 400 static void bdw_sseu_info_init(struct intel_gt *gt) 401 { 402 struct sseu_dev_info *sseu = >->info.sseu; 403 struct intel_uncore *uncore = gt->uncore; 404 int s, ss; 405 u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ 406 u32 eu_disable0, eu_disable1, eu_disable2; 407 408 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 409 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 410 intel_sseu_set_info(sseu, 3, 3, 8); 411 412 /* 413 * The subslice disable field is global, i.e. it applies 414 * to each of the enabled slices. 415 */ 416 subslice_mask = GENMASK(sseu->max_subslices - 1, 0); 417 subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> 418 GEN8_F2_SS_DIS_SHIFT); 419 eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); 420 eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); 421 eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); 422 eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; 423 eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | 424 ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << 425 (32 - GEN8_EU_DIS0_S1_SHIFT)); 426 eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | 427 ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << 428 (32 - GEN8_EU_DIS1_S2_SHIFT)); 429 430 /* 431 * Iterate through enabled slices and subslices to 432 * count the total enabled EU. 433 */ 434 for (s = 0; s < sseu->max_slices; s++) { 435 if (!(sseu->slice_mask & BIT(s))) 436 /* skip disabled slice */ 437 continue; 438 439 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 440 subslice_mask); 441 442 for (ss = 0; ss < sseu->max_subslices; ss++) { 443 u8 eu_disabled_mask; 444 u32 n_disabled; 445 446 if (!intel_sseu_has_subslice(sseu, s, ss)) 447 /* skip disabled subslice */ 448 continue; 449 450 eu_disabled_mask = 451 eu_disable[s] >> (ss * sseu->max_eus_per_subslice); 452 453 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 454 455 n_disabled = hweight8(eu_disabled_mask); 456 457 /* 458 * Record which subslices have 7 EUs. 459 */ 460 if (sseu->max_eus_per_subslice - n_disabled == 7) 461 sseu->subslice_7eu[s] |= 1 << ss; 462 } 463 } 464 465 sseu->eu_total = compute_eu_total(sseu); 466 467 /* 468 * BDW is expected to always have a uniform distribution of EU across 469 * subslices with the exception that any one EU in any one subslice may 470 * be fused off for die recovery. 471 */ 472 sseu->eu_per_subslice = 473 intel_sseu_subslice_total(sseu) ? 474 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 475 0; 476 477 /* 478 * BDW supports slice power gating on devices with more than 479 * one slice. 480 */ 481 sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1; 482 sseu->has_subslice_pg = 0; 483 sseu->has_eu_pg = 0; 484 } 485 486 static void hsw_sseu_info_init(struct intel_gt *gt) 487 { 488 struct drm_i915_private *i915 = gt->i915; 489 struct sseu_dev_info *sseu = >->info.sseu; 490 u32 fuse1; 491 u8 subslice_mask = 0; 492 int s, ss; 493 494 /* 495 * There isn't a register to tell us how many slices/subslices. We 496 * work off the PCI-ids here. 497 */ 498 switch (INTEL_INFO(i915)->gt) { 499 default: 500 MISSING_CASE(INTEL_INFO(i915)->gt); 501 fallthrough; 502 case 1: 503 sseu->slice_mask = BIT(0); 504 subslice_mask = BIT(0); 505 break; 506 case 2: 507 sseu->slice_mask = BIT(0); 508 subslice_mask = BIT(0) | BIT(1); 509 break; 510 case 3: 511 sseu->slice_mask = BIT(0) | BIT(1); 512 subslice_mask = BIT(0) | BIT(1); 513 break; 514 } 515 516 fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); 517 switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) { 518 default: 519 MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)); 520 fallthrough; 521 case HSW_F1_EU_DIS_10EUS: 522 sseu->eu_per_subslice = 10; 523 break; 524 case HSW_F1_EU_DIS_8EUS: 525 sseu->eu_per_subslice = 8; 526 break; 527 case HSW_F1_EU_DIS_6EUS: 528 sseu->eu_per_subslice = 6; 529 break; 530 } 531 532 intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), 533 hweight8(subslice_mask), 534 sseu->eu_per_subslice); 535 536 for (s = 0; s < sseu->max_slices; s++) { 537 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 538 subslice_mask); 539 540 for (ss = 0; ss < sseu->max_subslices; ss++) { 541 sseu_set_eus(sseu, s, ss, 542 (1UL << sseu->eu_per_subslice) - 1); 543 } 544 } 545 546 sseu->eu_total = compute_eu_total(sseu); 547 548 /* No powergating for you. */ 549 sseu->has_slice_pg = 0; 550 sseu->has_subslice_pg = 0; 551 sseu->has_eu_pg = 0; 552 } 553 554 void intel_sseu_info_init(struct intel_gt *gt) 555 { 556 struct drm_i915_private *i915 = gt->i915; 557 558 if (IS_HASWELL(i915)) 559 hsw_sseu_info_init(gt); 560 else if (IS_CHERRYVIEW(i915)) 561 cherryview_sseu_info_init(gt); 562 else if (IS_BROADWELL(i915)) 563 bdw_sseu_info_init(gt); 564 else if (GRAPHICS_VER(i915) == 9) 565 gen9_sseu_info_init(gt); 566 else if (GRAPHICS_VER(i915) == 11) 567 gen11_sseu_info_init(gt); 568 else if (GRAPHICS_VER(i915) >= 12) 569 gen12_sseu_info_init(gt); 570 } 571 572 u32 intel_sseu_make_rpcs(struct intel_gt *gt, 573 const struct intel_sseu *req_sseu) 574 { 575 struct drm_i915_private *i915 = gt->i915; 576 const struct sseu_dev_info *sseu = >->info.sseu; 577 bool subslice_pg = sseu->has_subslice_pg; 578 u8 slices, subslices; 579 u32 rpcs = 0; 580 581 /* 582 * No explicit RPCS request is needed to ensure full 583 * slice/subslice/EU enablement prior to Gen9. 584 */ 585 if (GRAPHICS_VER(i915) < 9) 586 return 0; 587 588 /* 589 * If i915/perf is active, we want a stable powergating configuration 590 * on the system. Use the configuration pinned by i915/perf. 591 */ 592 if (i915->perf.exclusive_stream) 593 req_sseu = &i915->perf.sseu; 594 595 slices = hweight8(req_sseu->slice_mask); 596 subslices = hweight8(req_sseu->subslice_mask); 597 598 /* 599 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits 600 * wide and Icelake has up to eight subslices, specfial programming is 601 * needed in order to correctly enable all subslices. 602 * 603 * According to documentation software must consider the configuration 604 * as 2x4x8 and hardware will translate this to 1x8x8. 605 * 606 * Furthemore, even though SScount is three bits, maximum documented 607 * value for it is four. From this some rules/restrictions follow: 608 * 609 * 1. 610 * If enabled subslice count is greater than four, two whole slices must 611 * be enabled instead. 612 * 613 * 2. 614 * When more than one slice is enabled, hardware ignores the subslice 615 * count altogether. 616 * 617 * From these restrictions it follows that it is not possible to enable 618 * a count of subslices between the SScount maximum of four restriction, 619 * and the maximum available number on a particular SKU. Either all 620 * subslices are enabled, or a count between one and four on the first 621 * slice. 622 */ 623 if (GRAPHICS_VER(i915) == 11 && 624 slices == 1 && 625 subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { 626 GEM_BUG_ON(subslices & 1); 627 628 subslice_pg = false; 629 slices *= 2; 630 } 631 632 /* 633 * Starting in Gen9, render power gating can leave 634 * slice/subslice/EU in a partially enabled state. We 635 * must make an explicit request through RPCS for full 636 * enablement. 637 */ 638 if (sseu->has_slice_pg) { 639 u32 mask, val = slices; 640 641 if (GRAPHICS_VER(i915) >= 11) { 642 mask = GEN11_RPCS_S_CNT_MASK; 643 val <<= GEN11_RPCS_S_CNT_SHIFT; 644 } else { 645 mask = GEN8_RPCS_S_CNT_MASK; 646 val <<= GEN8_RPCS_S_CNT_SHIFT; 647 } 648 649 GEM_BUG_ON(val & ~mask); 650 val &= mask; 651 652 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; 653 } 654 655 if (subslice_pg) { 656 u32 val = subslices; 657 658 val <<= GEN8_RPCS_SS_CNT_SHIFT; 659 660 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); 661 val &= GEN8_RPCS_SS_CNT_MASK; 662 663 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; 664 } 665 666 if (sseu->has_eu_pg) { 667 u32 val; 668 669 val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; 670 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); 671 val &= GEN8_RPCS_EU_MIN_MASK; 672 673 rpcs |= val; 674 675 val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; 676 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); 677 val &= GEN8_RPCS_EU_MAX_MASK; 678 679 rpcs |= val; 680 681 rpcs |= GEN8_RPCS_ENABLE; 682 } 683 684 return rpcs; 685 } 686 687 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) 688 { 689 int s; 690 691 drm_printf(p, "slice total: %u, mask=%04x\n", 692 hweight8(sseu->slice_mask), sseu->slice_mask); 693 drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); 694 for (s = 0; s < sseu->max_slices; s++) { 695 drm_printf(p, "slice%d: %u subslices, mask=%08x\n", 696 s, intel_sseu_subslices_per_slice(sseu, s), 697 intel_sseu_get_subslices(sseu, s)); 698 } 699 drm_printf(p, "EU total: %u\n", sseu->eu_total); 700 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); 701 drm_printf(p, "has slice power gating: %s\n", 702 yesno(sseu->has_slice_pg)); 703 drm_printf(p, "has subslice power gating: %s\n", 704 yesno(sseu->has_subslice_pg)); 705 drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg)); 706 } 707 708 void intel_sseu_print_topology(const struct sseu_dev_info *sseu, 709 struct drm_printer *p) 710 { 711 int s, ss; 712 713 if (sseu->max_slices == 0) { 714 drm_printf(p, "Unavailable\n"); 715 return; 716 } 717 718 for (s = 0; s < sseu->max_slices; s++) { 719 drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", 720 s, intel_sseu_subslices_per_slice(sseu, s), 721 intel_sseu_get_subslices(sseu, s)); 722 723 for (ss = 0; ss < sseu->max_subslices; ss++) { 724 u16 enabled_eus = sseu_get_eus(sseu, s, ss); 725 726 drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", 727 ss, hweight16(enabled_eus), enabled_eus); 728 } 729 } 730 } 731 732 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) 733 { 734 u16 slice_mask = 0; 735 int i; 736 737 WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); 738 739 for (i = 0; dss_mask; i++) { 740 if (dss_mask & GENMASK(dss_per_slice - 1, 0)) 741 slice_mask |= BIT(i); 742 743 dss_mask >>= dss_per_slice; 744 } 745 746 return slice_mask; 747 } 748 749