1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/string_helpers.h> 7 8 #include "i915_drv.h" 9 #include "intel_engine_regs.h" 10 #include "intel_gt_regs.h" 11 #include "intel_sseu.h" 12 13 #include "linux/string_helpers.h" 14 15 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, 16 u8 max_subslices, u8 max_eus_per_subslice) 17 { 18 sseu->max_slices = max_slices; 19 sseu->max_subslices = max_subslices; 20 sseu->max_eus_per_subslice = max_eus_per_subslice; 21 22 sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); 23 GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); 24 sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); 25 GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); 26 } 27 28 unsigned int 29 intel_sseu_subslice_total(const struct sseu_dev_info *sseu) 30 { 31 unsigned int i, total = 0; 32 33 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) 34 total += hweight8(sseu->subslice_mask[i]); 35 36 return total; 37 } 38 39 static u32 40 sseu_get_subslices(const struct sseu_dev_info *sseu, 41 const u8 *subslice_mask, u8 slice) 42 { 43 int i, offset = slice * sseu->ss_stride; 44 u32 mask = 0; 45 46 GEM_BUG_ON(slice >= sseu->max_slices); 47 48 for (i = 0; i < sseu->ss_stride; i++) 49 mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE; 50 51 return mask; 52 } 53 54 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) 55 { 56 return sseu_get_subslices(sseu, sseu->subslice_mask, slice); 57 } 58 59 static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu) 60 { 61 return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0); 62 } 63 64 u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) 65 { 66 return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); 67 } 68 69 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, 70 u8 *subslice_mask, u32 ss_mask) 71 { 72 int offset = slice * sseu->ss_stride; 73 74 memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); 75 } 76 77 unsigned int 78 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) 79 { 80 return hweight32(intel_sseu_get_subslices(sseu, slice)); 81 } 82 83 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, 84 int subslice) 85 { 86 int slice_stride = sseu->max_subslices * sseu->eu_stride; 87 88 return slice * slice_stride + subslice * sseu->eu_stride; 89 } 90 91 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, 92 int subslice) 93 { 94 int i, offset = sseu_eu_idx(sseu, slice, subslice); 95 u16 eu_mask = 0; 96 97 for (i = 0; i < sseu->eu_stride; i++) 98 eu_mask |= 99 ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); 100 101 return eu_mask; 102 } 103 104 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, 105 u16 eu_mask) 106 { 107 int i, offset = sseu_eu_idx(sseu, slice, subslice); 108 109 for (i = 0; i < sseu->eu_stride; i++) 110 sseu->eu_mask[offset + i] = 111 (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; 112 } 113 114 static u16 compute_eu_total(const struct sseu_dev_info *sseu) 115 { 116 u16 i, total = 0; 117 118 for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) 119 total += hweight8(sseu->eu_mask[i]); 120 121 return total; 122 } 123 124 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) 125 { 126 u32 ss_mask; 127 128 ss_mask = ss_en >> (s * sseu->max_subslices); 129 ss_mask &= GENMASK(sseu->max_subslices - 1, 0); 130 131 return ss_mask; 132 } 133 134 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, 135 u32 g_ss_en, u32 c_ss_en, u16 eu_en) 136 { 137 int s, ss; 138 139 /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ 140 GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > 141 sizeof(g_ss_en) * BITS_PER_BYTE); 142 143 for (s = 0; s < sseu->max_slices; s++) { 144 if ((s_en & BIT(s)) == 0) 145 continue; 146 147 sseu->slice_mask |= BIT(s); 148 149 /* 150 * XeHP introduces the concept of compute vs geometry DSS. To 151 * reduce variation between GENs around subslice usage, store a 152 * mask for both the geometry and compute enabled masks since 153 * userspace will need to be able to query these masks 154 * independently. Also compute a total enabled subslice count 155 * for the purposes of selecting subslices to use in a 156 * particular GEM context. 157 */ 158 intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, 159 get_ss_stride_mask(sseu, s, c_ss_en)); 160 intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, 161 get_ss_stride_mask(sseu, s, g_ss_en)); 162 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 163 get_ss_stride_mask(sseu, s, 164 g_ss_en | c_ss_en)); 165 166 for (ss = 0; ss < sseu->max_subslices; ss++) 167 if (intel_sseu_has_subslice(sseu, s, ss)) 168 sseu_set_eus(sseu, s, ss, eu_en); 169 } 170 sseu->eu_per_subslice = hweight16(eu_en); 171 sseu->eu_total = compute_eu_total(sseu); 172 } 173 174 static void gen12_sseu_info_init(struct intel_gt *gt) 175 { 176 struct sseu_dev_info *sseu = >->info.sseu; 177 struct intel_uncore *uncore = gt->uncore; 178 u32 g_dss_en, c_dss_en = 0; 179 u16 eu_en = 0; 180 u8 eu_en_fuse; 181 u8 s_en; 182 int eu; 183 184 /* 185 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. 186 * Instead of splitting these, provide userspace with an array 187 * of DSS to more closely represent the hardware resource. 188 * 189 * In addition, the concept of slice has been removed in Xe_HP. 190 * To be compatible with prior generations, assume a single slice 191 * across the entire device. Then calculate out the DSS for each 192 * workload type within that software slice. 193 */ 194 if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) 195 intel_sseu_set_info(sseu, 1, 32, 16); 196 else 197 intel_sseu_set_info(sseu, 1, 6, 16); 198 199 /* 200 * As mentioned above, Xe_HP does not have the concept of a slice. 201 * Enable one for software backwards compatibility. 202 */ 203 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 204 s_en = 0x1; 205 else 206 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 207 GEN11_GT_S_ENA_MASK; 208 209 g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); 210 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 211 c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); 212 213 /* one bit per pair of EUs */ 214 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 215 eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; 216 else 217 eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 218 GEN11_EU_DIS_MASK); 219 220 for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) 221 if (eu_en_fuse & BIT(eu)) 222 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); 223 224 gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); 225 226 /* TGL only supports slice-level power gating */ 227 sseu->has_slice_pg = 1; 228 } 229 230 static void gen11_sseu_info_init(struct intel_gt *gt) 231 { 232 struct sseu_dev_info *sseu = >->info.sseu; 233 struct intel_uncore *uncore = gt->uncore; 234 u32 ss_en; 235 u8 eu_en; 236 u8 s_en; 237 238 if (IS_JSL_EHL(gt->i915)) 239 intel_sseu_set_info(sseu, 1, 4, 8); 240 else 241 intel_sseu_set_info(sseu, 1, 8, 8); 242 243 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 244 GEN11_GT_S_ENA_MASK; 245 ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); 246 247 eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 248 GEN11_EU_DIS_MASK); 249 250 gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); 251 252 /* ICL has no power gating restrictions. */ 253 sseu->has_slice_pg = 1; 254 sseu->has_subslice_pg = 1; 255 sseu->has_eu_pg = 1; 256 } 257 258 static void cherryview_sseu_info_init(struct intel_gt *gt) 259 { 260 struct sseu_dev_info *sseu = >->info.sseu; 261 u32 fuse; 262 u8 subslice_mask = 0; 263 264 fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); 265 266 sseu->slice_mask = BIT(0); 267 intel_sseu_set_info(sseu, 1, 2, 8); 268 269 if (!(fuse & CHV_FGT_DISABLE_SS0)) { 270 u8 disabled_mask = 271 ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 272 CHV_FGT_EU_DIS_SS0_R0_SHIFT) | 273 (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> 274 CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); 275 276 subslice_mask |= BIT(0); 277 sseu_set_eus(sseu, 0, 0, ~disabled_mask); 278 } 279 280 if (!(fuse & CHV_FGT_DISABLE_SS1)) { 281 u8 disabled_mask = 282 ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 283 CHV_FGT_EU_DIS_SS1_R0_SHIFT) | 284 (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> 285 CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); 286 287 subslice_mask |= BIT(1); 288 sseu_set_eus(sseu, 0, 1, ~disabled_mask); 289 } 290 291 intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); 292 293 sseu->eu_total = compute_eu_total(sseu); 294 295 /* 296 * CHV expected to always have a uniform distribution of EU 297 * across subslices. 298 */ 299 sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? 300 sseu->eu_total / 301 intel_sseu_subslice_total(sseu) : 302 0; 303 /* 304 * CHV supports subslice power gating on devices with more than 305 * one subslice, and supports EU power gating on devices with 306 * more than one EU pair per subslice. 307 */ 308 sseu->has_slice_pg = 0; 309 sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; 310 sseu->has_eu_pg = (sseu->eu_per_subslice > 2); 311 } 312 313 static void gen9_sseu_info_init(struct intel_gt *gt) 314 { 315 struct drm_i915_private *i915 = gt->i915; 316 struct intel_device_info *info = mkwrite_device_info(i915); 317 struct sseu_dev_info *sseu = >->info.sseu; 318 struct intel_uncore *uncore = gt->uncore; 319 u32 fuse2, eu_disable, subslice_mask; 320 const u8 eu_mask = 0xff; 321 int s, ss; 322 323 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 324 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 325 326 /* BXT has a single slice and at most 3 subslices. */ 327 intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, 328 IS_GEN9_LP(i915) ? 3 : 4, 8); 329 330 /* 331 * The subslice disable field is global, i.e. it applies 332 * to each of the enabled slices. 333 */ 334 subslice_mask = (1 << sseu->max_subslices) - 1; 335 subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> 336 GEN9_F2_SS_DIS_SHIFT); 337 338 /* 339 * Iterate through enabled slices and subslices to 340 * count the total enabled EU. 341 */ 342 for (s = 0; s < sseu->max_slices; s++) { 343 if (!(sseu->slice_mask & BIT(s))) 344 /* skip disabled slice */ 345 continue; 346 347 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 348 subslice_mask); 349 350 eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); 351 for (ss = 0; ss < sseu->max_subslices; ss++) { 352 int eu_per_ss; 353 u8 eu_disabled_mask; 354 355 if (!intel_sseu_has_subslice(sseu, s, ss)) 356 /* skip disabled subslice */ 357 continue; 358 359 eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; 360 361 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 362 363 eu_per_ss = sseu->max_eus_per_subslice - 364 hweight8(eu_disabled_mask); 365 366 /* 367 * Record which subslice(s) has(have) 7 EUs. we 368 * can tune the hash used to spread work among 369 * subslices if they are unbalanced. 370 */ 371 if (eu_per_ss == 7) 372 sseu->subslice_7eu[s] |= BIT(ss); 373 } 374 } 375 376 sseu->eu_total = compute_eu_total(sseu); 377 378 /* 379 * SKL is expected to always have a uniform distribution 380 * of EU across subslices with the exception that any one 381 * EU in any one subslice may be fused off for die 382 * recovery. BXT is expected to be perfectly uniform in EU 383 * distribution. 384 */ 385 sseu->eu_per_subslice = 386 intel_sseu_subslice_total(sseu) ? 387 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 388 0; 389 390 /* 391 * SKL+ supports slice power gating on devices with more than 392 * one slice, and supports EU power gating on devices with 393 * more than one EU pair per subslice. BXT+ supports subslice 394 * power gating on devices with more than one subslice, and 395 * supports EU power gating on devices with more than one EU 396 * pair per subslice. 397 */ 398 sseu->has_slice_pg = 399 !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1; 400 sseu->has_subslice_pg = 401 IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1; 402 sseu->has_eu_pg = sseu->eu_per_subslice > 2; 403 404 if (IS_GEN9_LP(i915)) { 405 #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) 406 info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; 407 408 sseu->min_eu_in_pool = 0; 409 if (info->has_pooled_eu) { 410 if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) 411 sseu->min_eu_in_pool = 3; 412 else if (IS_SS_DISABLED(1)) 413 sseu->min_eu_in_pool = 6; 414 else 415 sseu->min_eu_in_pool = 9; 416 } 417 #undef IS_SS_DISABLED 418 } 419 } 420 421 static void bdw_sseu_info_init(struct intel_gt *gt) 422 { 423 struct sseu_dev_info *sseu = >->info.sseu; 424 struct intel_uncore *uncore = gt->uncore; 425 int s, ss; 426 u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ 427 u32 eu_disable0, eu_disable1, eu_disable2; 428 429 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 430 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 431 intel_sseu_set_info(sseu, 3, 3, 8); 432 433 /* 434 * The subslice disable field is global, i.e. it applies 435 * to each of the enabled slices. 436 */ 437 subslice_mask = GENMASK(sseu->max_subslices - 1, 0); 438 subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> 439 GEN8_F2_SS_DIS_SHIFT); 440 eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); 441 eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); 442 eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); 443 eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; 444 eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | 445 ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << 446 (32 - GEN8_EU_DIS0_S1_SHIFT)); 447 eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | 448 ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << 449 (32 - GEN8_EU_DIS1_S2_SHIFT)); 450 451 /* 452 * Iterate through enabled slices and subslices to 453 * count the total enabled EU. 454 */ 455 for (s = 0; s < sseu->max_slices; s++) { 456 if (!(sseu->slice_mask & BIT(s))) 457 /* skip disabled slice */ 458 continue; 459 460 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 461 subslice_mask); 462 463 for (ss = 0; ss < sseu->max_subslices; ss++) { 464 u8 eu_disabled_mask; 465 u32 n_disabled; 466 467 if (!intel_sseu_has_subslice(sseu, s, ss)) 468 /* skip disabled subslice */ 469 continue; 470 471 eu_disabled_mask = 472 eu_disable[s] >> (ss * sseu->max_eus_per_subslice); 473 474 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 475 476 n_disabled = hweight8(eu_disabled_mask); 477 478 /* 479 * Record which subslices have 7 EUs. 480 */ 481 if (sseu->max_eus_per_subslice - n_disabled == 7) 482 sseu->subslice_7eu[s] |= 1 << ss; 483 } 484 } 485 486 sseu->eu_total = compute_eu_total(sseu); 487 488 /* 489 * BDW is expected to always have a uniform distribution of EU across 490 * subslices with the exception that any one EU in any one subslice may 491 * be fused off for die recovery. 492 */ 493 sseu->eu_per_subslice = 494 intel_sseu_subslice_total(sseu) ? 495 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 496 0; 497 498 /* 499 * BDW supports slice power gating on devices with more than 500 * one slice. 501 */ 502 sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1; 503 sseu->has_subslice_pg = 0; 504 sseu->has_eu_pg = 0; 505 } 506 507 static void hsw_sseu_info_init(struct intel_gt *gt) 508 { 509 struct drm_i915_private *i915 = gt->i915; 510 struct sseu_dev_info *sseu = >->info.sseu; 511 u32 fuse1; 512 u8 subslice_mask = 0; 513 int s, ss; 514 515 /* 516 * There isn't a register to tell us how many slices/subslices. We 517 * work off the PCI-ids here. 518 */ 519 switch (INTEL_INFO(i915)->gt) { 520 default: 521 MISSING_CASE(INTEL_INFO(i915)->gt); 522 fallthrough; 523 case 1: 524 sseu->slice_mask = BIT(0); 525 subslice_mask = BIT(0); 526 break; 527 case 2: 528 sseu->slice_mask = BIT(0); 529 subslice_mask = BIT(0) | BIT(1); 530 break; 531 case 3: 532 sseu->slice_mask = BIT(0) | BIT(1); 533 subslice_mask = BIT(0) | BIT(1); 534 break; 535 } 536 537 fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); 538 switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) { 539 default: 540 MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)); 541 fallthrough; 542 case HSW_F1_EU_DIS_10EUS: 543 sseu->eu_per_subslice = 10; 544 break; 545 case HSW_F1_EU_DIS_8EUS: 546 sseu->eu_per_subslice = 8; 547 break; 548 case HSW_F1_EU_DIS_6EUS: 549 sseu->eu_per_subslice = 6; 550 break; 551 } 552 553 intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), 554 hweight8(subslice_mask), 555 sseu->eu_per_subslice); 556 557 for (s = 0; s < sseu->max_slices; s++) { 558 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 559 subslice_mask); 560 561 for (ss = 0; ss < sseu->max_subslices; ss++) { 562 sseu_set_eus(sseu, s, ss, 563 (1UL << sseu->eu_per_subslice) - 1); 564 } 565 } 566 567 sseu->eu_total = compute_eu_total(sseu); 568 569 /* No powergating for you. */ 570 sseu->has_slice_pg = 0; 571 sseu->has_subslice_pg = 0; 572 sseu->has_eu_pg = 0; 573 } 574 575 void intel_sseu_info_init(struct intel_gt *gt) 576 { 577 struct drm_i915_private *i915 = gt->i915; 578 579 if (IS_HASWELL(i915)) 580 hsw_sseu_info_init(gt); 581 else if (IS_CHERRYVIEW(i915)) 582 cherryview_sseu_info_init(gt); 583 else if (IS_BROADWELL(i915)) 584 bdw_sseu_info_init(gt); 585 else if (GRAPHICS_VER(i915) == 9) 586 gen9_sseu_info_init(gt); 587 else if (GRAPHICS_VER(i915) == 11) 588 gen11_sseu_info_init(gt); 589 else if (GRAPHICS_VER(i915) >= 12) 590 gen12_sseu_info_init(gt); 591 } 592 593 u32 intel_sseu_make_rpcs(struct intel_gt *gt, 594 const struct intel_sseu *req_sseu) 595 { 596 struct drm_i915_private *i915 = gt->i915; 597 const struct sseu_dev_info *sseu = >->info.sseu; 598 bool subslice_pg = sseu->has_subslice_pg; 599 u8 slices, subslices; 600 u32 rpcs = 0; 601 602 /* 603 * No explicit RPCS request is needed to ensure full 604 * slice/subslice/EU enablement prior to Gen9. 605 */ 606 if (GRAPHICS_VER(i915) < 9) 607 return 0; 608 609 /* 610 * If i915/perf is active, we want a stable powergating configuration 611 * on the system. Use the configuration pinned by i915/perf. 612 */ 613 if (i915->perf.exclusive_stream) 614 req_sseu = &i915->perf.sseu; 615 616 slices = hweight8(req_sseu->slice_mask); 617 subslices = hweight8(req_sseu->subslice_mask); 618 619 /* 620 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits 621 * wide and Icelake has up to eight subslices, specfial programming is 622 * needed in order to correctly enable all subslices. 623 * 624 * According to documentation software must consider the configuration 625 * as 2x4x8 and hardware will translate this to 1x8x8. 626 * 627 * Furthemore, even though SScount is three bits, maximum documented 628 * value for it is four. From this some rules/restrictions follow: 629 * 630 * 1. 631 * If enabled subslice count is greater than four, two whole slices must 632 * be enabled instead. 633 * 634 * 2. 635 * When more than one slice is enabled, hardware ignores the subslice 636 * count altogether. 637 * 638 * From these restrictions it follows that it is not possible to enable 639 * a count of subslices between the SScount maximum of four restriction, 640 * and the maximum available number on a particular SKU. Either all 641 * subslices are enabled, or a count between one and four on the first 642 * slice. 643 */ 644 if (GRAPHICS_VER(i915) == 11 && 645 slices == 1 && 646 subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { 647 GEM_BUG_ON(subslices & 1); 648 649 subslice_pg = false; 650 slices *= 2; 651 } 652 653 /* 654 * Starting in Gen9, render power gating can leave 655 * slice/subslice/EU in a partially enabled state. We 656 * must make an explicit request through RPCS for full 657 * enablement. 658 */ 659 if (sseu->has_slice_pg) { 660 u32 mask, val = slices; 661 662 if (GRAPHICS_VER(i915) >= 11) { 663 mask = GEN11_RPCS_S_CNT_MASK; 664 val <<= GEN11_RPCS_S_CNT_SHIFT; 665 } else { 666 mask = GEN8_RPCS_S_CNT_MASK; 667 val <<= GEN8_RPCS_S_CNT_SHIFT; 668 } 669 670 GEM_BUG_ON(val & ~mask); 671 val &= mask; 672 673 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; 674 } 675 676 if (subslice_pg) { 677 u32 val = subslices; 678 679 val <<= GEN8_RPCS_SS_CNT_SHIFT; 680 681 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); 682 val &= GEN8_RPCS_SS_CNT_MASK; 683 684 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; 685 } 686 687 if (sseu->has_eu_pg) { 688 u32 val; 689 690 val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; 691 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); 692 val &= GEN8_RPCS_EU_MIN_MASK; 693 694 rpcs |= val; 695 696 val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; 697 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); 698 val &= GEN8_RPCS_EU_MAX_MASK; 699 700 rpcs |= val; 701 702 rpcs |= GEN8_RPCS_ENABLE; 703 } 704 705 return rpcs; 706 } 707 708 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) 709 { 710 int s; 711 712 drm_printf(p, "slice total: %u, mask=%04x\n", 713 hweight8(sseu->slice_mask), sseu->slice_mask); 714 drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); 715 for (s = 0; s < sseu->max_slices; s++) { 716 drm_printf(p, "slice%d: %u subslices, mask=%08x\n", 717 s, intel_sseu_subslices_per_slice(sseu, s), 718 intel_sseu_get_subslices(sseu, s)); 719 } 720 drm_printf(p, "EU total: %u\n", sseu->eu_total); 721 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); 722 drm_printf(p, "has slice power gating: %s\n", 723 str_yes_no(sseu->has_slice_pg)); 724 drm_printf(p, "has subslice power gating: %s\n", 725 str_yes_no(sseu->has_subslice_pg)); 726 drm_printf(p, "has EU power gating: %s\n", 727 str_yes_no(sseu->has_eu_pg)); 728 } 729 730 static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu, 731 struct drm_printer *p) 732 { 733 int s, ss; 734 735 for (s = 0; s < sseu->max_slices; s++) { 736 drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", 737 s, intel_sseu_subslices_per_slice(sseu, s), 738 intel_sseu_get_subslices(sseu, s)); 739 740 for (ss = 0; ss < sseu->max_subslices; ss++) { 741 u16 enabled_eus = sseu_get_eus(sseu, s, ss); 742 743 drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", 744 ss, hweight16(enabled_eus), enabled_eus); 745 } 746 } 747 } 748 749 static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu, 750 struct drm_printer *p) 751 { 752 u32 g_dss_mask = sseu_get_geometry_subslices(sseu); 753 u32 c_dss_mask = intel_sseu_get_compute_subslices(sseu); 754 int dss; 755 756 for (dss = 0; dss < sseu->max_subslices; dss++) { 757 u16 enabled_eus = sseu_get_eus(sseu, 0, dss); 758 759 drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss, 760 str_yes_no(g_dss_mask & BIT(dss)), 761 str_yes_no(c_dss_mask & BIT(dss)), 762 hweight16(enabled_eus), enabled_eus); 763 } 764 } 765 766 void intel_sseu_print_topology(struct drm_i915_private *i915, 767 const struct sseu_dev_info *sseu, 768 struct drm_printer *p) 769 { 770 if (sseu->max_slices == 0) { 771 drm_printf(p, "Unavailable\n"); 772 } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { 773 sseu_print_xehp_topology(sseu, p); 774 } else { 775 sseu_print_hsw_topology(sseu, p); 776 } 777 } 778 779 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) 780 { 781 u16 slice_mask = 0; 782 int i; 783 784 WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); 785 786 for (i = 0; dss_mask; i++) { 787 if (dss_mask & GENMASK(dss_per_slice - 1, 0)) 788 slice_mask |= BIT(i); 789 790 dss_mask >>= dss_per_slice; 791 } 792 793 return slice_mask; 794 } 795 796