1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <linux/string_helpers.h> 7 8 #include "i915_drv.h" 9 #include "intel_engine_regs.h" 10 #include "intel_gt_regs.h" 11 #include "intel_sseu.h" 12 13 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, 14 u8 max_subslices, u8 max_eus_per_subslice) 15 { 16 sseu->max_slices = max_slices; 17 sseu->max_subslices = max_subslices; 18 sseu->max_eus_per_subslice = max_eus_per_subslice; 19 20 sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); 21 GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); 22 sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); 23 GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); 24 } 25 26 unsigned int 27 intel_sseu_subslice_total(const struct sseu_dev_info *sseu) 28 { 29 unsigned int i, total = 0; 30 31 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) 32 total += hweight8(sseu->subslice_mask[i]); 33 34 return total; 35 } 36 37 static u32 38 sseu_get_subslices(const struct sseu_dev_info *sseu, 39 const u8 *subslice_mask, u8 slice) 40 { 41 int i, offset = slice * sseu->ss_stride; 42 u32 mask = 0; 43 44 GEM_BUG_ON(slice >= sseu->max_slices); 45 46 for (i = 0; i < sseu->ss_stride; i++) 47 mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE; 48 49 return mask; 50 } 51 52 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) 53 { 54 return sseu_get_subslices(sseu, sseu->subslice_mask, slice); 55 } 56 57 static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu) 58 { 59 return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0); 60 } 61 62 u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) 63 { 64 return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); 65 } 66 67 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, 68 u8 *subslice_mask, u32 ss_mask) 69 { 70 int offset = slice * sseu->ss_stride; 71 72 memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); 73 } 74 75 unsigned int 76 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) 77 { 78 return hweight32(intel_sseu_get_subslices(sseu, slice)); 79 } 80 81 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, 82 int subslice) 83 { 84 int slice_stride = sseu->max_subslices * sseu->eu_stride; 85 86 return slice * slice_stride + subslice * sseu->eu_stride; 87 } 88 89 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, 90 int subslice) 91 { 92 int i, offset = sseu_eu_idx(sseu, slice, subslice); 93 u16 eu_mask = 0; 94 95 for (i = 0; i < sseu->eu_stride; i++) 96 eu_mask |= 97 ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); 98 99 return eu_mask; 100 } 101 102 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, 103 u16 eu_mask) 104 { 105 int i, offset = sseu_eu_idx(sseu, slice, subslice); 106 107 for (i = 0; i < sseu->eu_stride; i++) 108 sseu->eu_mask[offset + i] = 109 (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; 110 } 111 112 static u16 compute_eu_total(const struct sseu_dev_info *sseu) 113 { 114 u16 i, total = 0; 115 116 for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) 117 total += hweight8(sseu->eu_mask[i]); 118 119 return total; 120 } 121 122 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) 123 { 124 u32 ss_mask; 125 126 ss_mask = ss_en >> (s * sseu->max_subslices); 127 ss_mask &= GENMASK(sseu->max_subslices - 1, 0); 128 129 return ss_mask; 130 } 131 132 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, 133 u32 g_ss_en, u32 c_ss_en, u16 eu_en) 134 { 135 int s, ss; 136 137 /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ 138 GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > 139 sizeof(g_ss_en) * BITS_PER_BYTE); 140 141 for (s = 0; s < sseu->max_slices; s++) { 142 if ((s_en & BIT(s)) == 0) 143 continue; 144 145 sseu->slice_mask |= BIT(s); 146 147 /* 148 * XeHP introduces the concept of compute vs geometry DSS. To 149 * reduce variation between GENs around subslice usage, store a 150 * mask for both the geometry and compute enabled masks since 151 * userspace will need to be able to query these masks 152 * independently. Also compute a total enabled subslice count 153 * for the purposes of selecting subslices to use in a 154 * particular GEM context. 155 */ 156 intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, 157 get_ss_stride_mask(sseu, s, c_ss_en)); 158 intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, 159 get_ss_stride_mask(sseu, s, g_ss_en)); 160 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 161 get_ss_stride_mask(sseu, s, 162 g_ss_en | c_ss_en)); 163 164 for (ss = 0; ss < sseu->max_subslices; ss++) 165 if (intel_sseu_has_subslice(sseu, s, ss)) 166 sseu_set_eus(sseu, s, ss, eu_en); 167 } 168 sseu->eu_per_subslice = hweight16(eu_en); 169 sseu->eu_total = compute_eu_total(sseu); 170 } 171 172 static void gen12_sseu_info_init(struct intel_gt *gt) 173 { 174 struct sseu_dev_info *sseu = >->info.sseu; 175 struct intel_uncore *uncore = gt->uncore; 176 u32 g_dss_en, c_dss_en = 0; 177 u16 eu_en = 0; 178 u8 eu_en_fuse; 179 u8 s_en; 180 int eu; 181 182 /* 183 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. 184 * Instead of splitting these, provide userspace with an array 185 * of DSS to more closely represent the hardware resource. 186 * 187 * In addition, the concept of slice has been removed in Xe_HP. 188 * To be compatible with prior generations, assume a single slice 189 * across the entire device. Then calculate out the DSS for each 190 * workload type within that software slice. 191 */ 192 if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) 193 intel_sseu_set_info(sseu, 1, 32, 16); 194 else 195 intel_sseu_set_info(sseu, 1, 6, 16); 196 197 /* 198 * As mentioned above, Xe_HP does not have the concept of a slice. 199 * Enable one for software backwards compatibility. 200 */ 201 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 202 s_en = 0x1; 203 else 204 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 205 GEN11_GT_S_ENA_MASK; 206 207 g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); 208 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 209 c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); 210 211 /* one bit per pair of EUs */ 212 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 213 eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; 214 else 215 eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 216 GEN11_EU_DIS_MASK); 217 218 for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) 219 if (eu_en_fuse & BIT(eu)) 220 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); 221 222 gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); 223 224 /* TGL only supports slice-level power gating */ 225 sseu->has_slice_pg = 1; 226 } 227 228 static void gen11_sseu_info_init(struct intel_gt *gt) 229 { 230 struct sseu_dev_info *sseu = >->info.sseu; 231 struct intel_uncore *uncore = gt->uncore; 232 u32 ss_en; 233 u8 eu_en; 234 u8 s_en; 235 236 if (IS_JSL_EHL(gt->i915)) 237 intel_sseu_set_info(sseu, 1, 4, 8); 238 else 239 intel_sseu_set_info(sseu, 1, 8, 8); 240 241 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 242 GEN11_GT_S_ENA_MASK; 243 ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); 244 245 eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 246 GEN11_EU_DIS_MASK); 247 248 gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); 249 250 /* ICL has no power gating restrictions. */ 251 sseu->has_slice_pg = 1; 252 sseu->has_subslice_pg = 1; 253 sseu->has_eu_pg = 1; 254 } 255 256 static void cherryview_sseu_info_init(struct intel_gt *gt) 257 { 258 struct sseu_dev_info *sseu = >->info.sseu; 259 u32 fuse; 260 u8 subslice_mask = 0; 261 262 fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); 263 264 sseu->slice_mask = BIT(0); 265 intel_sseu_set_info(sseu, 1, 2, 8); 266 267 if (!(fuse & CHV_FGT_DISABLE_SS0)) { 268 u8 disabled_mask = 269 ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 270 CHV_FGT_EU_DIS_SS0_R0_SHIFT) | 271 (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> 272 CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); 273 274 subslice_mask |= BIT(0); 275 sseu_set_eus(sseu, 0, 0, ~disabled_mask); 276 } 277 278 if (!(fuse & CHV_FGT_DISABLE_SS1)) { 279 u8 disabled_mask = 280 ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 281 CHV_FGT_EU_DIS_SS1_R0_SHIFT) | 282 (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> 283 CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); 284 285 subslice_mask |= BIT(1); 286 sseu_set_eus(sseu, 0, 1, ~disabled_mask); 287 } 288 289 intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); 290 291 sseu->eu_total = compute_eu_total(sseu); 292 293 /* 294 * CHV expected to always have a uniform distribution of EU 295 * across subslices. 296 */ 297 sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? 298 sseu->eu_total / 299 intel_sseu_subslice_total(sseu) : 300 0; 301 /* 302 * CHV supports subslice power gating on devices with more than 303 * one subslice, and supports EU power gating on devices with 304 * more than one EU pair per subslice. 305 */ 306 sseu->has_slice_pg = 0; 307 sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; 308 sseu->has_eu_pg = (sseu->eu_per_subslice > 2); 309 } 310 311 static void gen9_sseu_info_init(struct intel_gt *gt) 312 { 313 struct drm_i915_private *i915 = gt->i915; 314 struct intel_device_info *info = mkwrite_device_info(i915); 315 struct sseu_dev_info *sseu = >->info.sseu; 316 struct intel_uncore *uncore = gt->uncore; 317 u32 fuse2, eu_disable, subslice_mask; 318 const u8 eu_mask = 0xff; 319 int s, ss; 320 321 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 322 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 323 324 /* BXT has a single slice and at most 3 subslices. */ 325 intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, 326 IS_GEN9_LP(i915) ? 3 : 4, 8); 327 328 /* 329 * The subslice disable field is global, i.e. it applies 330 * to each of the enabled slices. 331 */ 332 subslice_mask = (1 << sseu->max_subslices) - 1; 333 subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> 334 GEN9_F2_SS_DIS_SHIFT); 335 336 /* 337 * Iterate through enabled slices and subslices to 338 * count the total enabled EU. 339 */ 340 for (s = 0; s < sseu->max_slices; s++) { 341 if (!(sseu->slice_mask & BIT(s))) 342 /* skip disabled slice */ 343 continue; 344 345 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 346 subslice_mask); 347 348 eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); 349 for (ss = 0; ss < sseu->max_subslices; ss++) { 350 int eu_per_ss; 351 u8 eu_disabled_mask; 352 353 if (!intel_sseu_has_subslice(sseu, s, ss)) 354 /* skip disabled subslice */ 355 continue; 356 357 eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; 358 359 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 360 361 eu_per_ss = sseu->max_eus_per_subslice - 362 hweight8(eu_disabled_mask); 363 364 /* 365 * Record which subslice(s) has(have) 7 EUs. we 366 * can tune the hash used to spread work among 367 * subslices if they are unbalanced. 368 */ 369 if (eu_per_ss == 7) 370 sseu->subslice_7eu[s] |= BIT(ss); 371 } 372 } 373 374 sseu->eu_total = compute_eu_total(sseu); 375 376 /* 377 * SKL is expected to always have a uniform distribution 378 * of EU across subslices with the exception that any one 379 * EU in any one subslice may be fused off for die 380 * recovery. BXT is expected to be perfectly uniform in EU 381 * distribution. 382 */ 383 sseu->eu_per_subslice = 384 intel_sseu_subslice_total(sseu) ? 385 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 386 0; 387 388 /* 389 * SKL+ supports slice power gating on devices with more than 390 * one slice, and supports EU power gating on devices with 391 * more than one EU pair per subslice. BXT+ supports subslice 392 * power gating on devices with more than one subslice, and 393 * supports EU power gating on devices with more than one EU 394 * pair per subslice. 395 */ 396 sseu->has_slice_pg = 397 !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1; 398 sseu->has_subslice_pg = 399 IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1; 400 sseu->has_eu_pg = sseu->eu_per_subslice > 2; 401 402 if (IS_GEN9_LP(i915)) { 403 #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) 404 info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; 405 406 sseu->min_eu_in_pool = 0; 407 if (info->has_pooled_eu) { 408 if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) 409 sseu->min_eu_in_pool = 3; 410 else if (IS_SS_DISABLED(1)) 411 sseu->min_eu_in_pool = 6; 412 else 413 sseu->min_eu_in_pool = 9; 414 } 415 #undef IS_SS_DISABLED 416 } 417 } 418 419 static void bdw_sseu_info_init(struct intel_gt *gt) 420 { 421 struct sseu_dev_info *sseu = >->info.sseu; 422 struct intel_uncore *uncore = gt->uncore; 423 int s, ss; 424 u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ 425 u32 eu_disable0, eu_disable1, eu_disable2; 426 427 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 428 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 429 intel_sseu_set_info(sseu, 3, 3, 8); 430 431 /* 432 * The subslice disable field is global, i.e. it applies 433 * to each of the enabled slices. 434 */ 435 subslice_mask = GENMASK(sseu->max_subslices - 1, 0); 436 subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> 437 GEN8_F2_SS_DIS_SHIFT); 438 eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); 439 eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); 440 eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); 441 eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; 442 eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | 443 ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << 444 (32 - GEN8_EU_DIS0_S1_SHIFT)); 445 eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | 446 ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << 447 (32 - GEN8_EU_DIS1_S2_SHIFT)); 448 449 /* 450 * Iterate through enabled slices and subslices to 451 * count the total enabled EU. 452 */ 453 for (s = 0; s < sseu->max_slices; s++) { 454 if (!(sseu->slice_mask & BIT(s))) 455 /* skip disabled slice */ 456 continue; 457 458 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 459 subslice_mask); 460 461 for (ss = 0; ss < sseu->max_subslices; ss++) { 462 u8 eu_disabled_mask; 463 u32 n_disabled; 464 465 if (!intel_sseu_has_subslice(sseu, s, ss)) 466 /* skip disabled subslice */ 467 continue; 468 469 eu_disabled_mask = 470 eu_disable[s] >> (ss * sseu->max_eus_per_subslice); 471 472 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 473 474 n_disabled = hweight8(eu_disabled_mask); 475 476 /* 477 * Record which subslices have 7 EUs. 478 */ 479 if (sseu->max_eus_per_subslice - n_disabled == 7) 480 sseu->subslice_7eu[s] |= 1 << ss; 481 } 482 } 483 484 sseu->eu_total = compute_eu_total(sseu); 485 486 /* 487 * BDW is expected to always have a uniform distribution of EU across 488 * subslices with the exception that any one EU in any one subslice may 489 * be fused off for die recovery. 490 */ 491 sseu->eu_per_subslice = 492 intel_sseu_subslice_total(sseu) ? 493 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 494 0; 495 496 /* 497 * BDW supports slice power gating on devices with more than 498 * one slice. 499 */ 500 sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1; 501 sseu->has_subslice_pg = 0; 502 sseu->has_eu_pg = 0; 503 } 504 505 static void hsw_sseu_info_init(struct intel_gt *gt) 506 { 507 struct drm_i915_private *i915 = gt->i915; 508 struct sseu_dev_info *sseu = >->info.sseu; 509 u32 fuse1; 510 u8 subslice_mask = 0; 511 int s, ss; 512 513 /* 514 * There isn't a register to tell us how many slices/subslices. We 515 * work off the PCI-ids here. 516 */ 517 switch (INTEL_INFO(i915)->gt) { 518 default: 519 MISSING_CASE(INTEL_INFO(i915)->gt); 520 fallthrough; 521 case 1: 522 sseu->slice_mask = BIT(0); 523 subslice_mask = BIT(0); 524 break; 525 case 2: 526 sseu->slice_mask = BIT(0); 527 subslice_mask = BIT(0) | BIT(1); 528 break; 529 case 3: 530 sseu->slice_mask = BIT(0) | BIT(1); 531 subslice_mask = BIT(0) | BIT(1); 532 break; 533 } 534 535 fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); 536 switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) { 537 default: 538 MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)); 539 fallthrough; 540 case HSW_F1_EU_DIS_10EUS: 541 sseu->eu_per_subslice = 10; 542 break; 543 case HSW_F1_EU_DIS_8EUS: 544 sseu->eu_per_subslice = 8; 545 break; 546 case HSW_F1_EU_DIS_6EUS: 547 sseu->eu_per_subslice = 6; 548 break; 549 } 550 551 intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), 552 hweight8(subslice_mask), 553 sseu->eu_per_subslice); 554 555 for (s = 0; s < sseu->max_slices; s++) { 556 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 557 subslice_mask); 558 559 for (ss = 0; ss < sseu->max_subslices; ss++) { 560 sseu_set_eus(sseu, s, ss, 561 (1UL << sseu->eu_per_subslice) - 1); 562 } 563 } 564 565 sseu->eu_total = compute_eu_total(sseu); 566 567 /* No powergating for you. */ 568 sseu->has_slice_pg = 0; 569 sseu->has_subslice_pg = 0; 570 sseu->has_eu_pg = 0; 571 } 572 573 void intel_sseu_info_init(struct intel_gt *gt) 574 { 575 struct drm_i915_private *i915 = gt->i915; 576 577 if (IS_HASWELL(i915)) 578 hsw_sseu_info_init(gt); 579 else if (IS_CHERRYVIEW(i915)) 580 cherryview_sseu_info_init(gt); 581 else if (IS_BROADWELL(i915)) 582 bdw_sseu_info_init(gt); 583 else if (GRAPHICS_VER(i915) == 9) 584 gen9_sseu_info_init(gt); 585 else if (GRAPHICS_VER(i915) == 11) 586 gen11_sseu_info_init(gt); 587 else if (GRAPHICS_VER(i915) >= 12) 588 gen12_sseu_info_init(gt); 589 } 590 591 u32 intel_sseu_make_rpcs(struct intel_gt *gt, 592 const struct intel_sseu *req_sseu) 593 { 594 struct drm_i915_private *i915 = gt->i915; 595 const struct sseu_dev_info *sseu = >->info.sseu; 596 bool subslice_pg = sseu->has_subslice_pg; 597 u8 slices, subslices; 598 u32 rpcs = 0; 599 600 /* 601 * No explicit RPCS request is needed to ensure full 602 * slice/subslice/EU enablement prior to Gen9. 603 */ 604 if (GRAPHICS_VER(i915) < 9) 605 return 0; 606 607 /* 608 * If i915/perf is active, we want a stable powergating configuration 609 * on the system. Use the configuration pinned by i915/perf. 610 */ 611 if (i915->perf.exclusive_stream) 612 req_sseu = &i915->perf.sseu; 613 614 slices = hweight8(req_sseu->slice_mask); 615 subslices = hweight8(req_sseu->subslice_mask); 616 617 /* 618 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits 619 * wide and Icelake has up to eight subslices, specfial programming is 620 * needed in order to correctly enable all subslices. 621 * 622 * According to documentation software must consider the configuration 623 * as 2x4x8 and hardware will translate this to 1x8x8. 624 * 625 * Furthemore, even though SScount is three bits, maximum documented 626 * value for it is four. From this some rules/restrictions follow: 627 * 628 * 1. 629 * If enabled subslice count is greater than four, two whole slices must 630 * be enabled instead. 631 * 632 * 2. 633 * When more than one slice is enabled, hardware ignores the subslice 634 * count altogether. 635 * 636 * From these restrictions it follows that it is not possible to enable 637 * a count of subslices between the SScount maximum of four restriction, 638 * and the maximum available number on a particular SKU. Either all 639 * subslices are enabled, or a count between one and four on the first 640 * slice. 641 */ 642 if (GRAPHICS_VER(i915) == 11 && 643 slices == 1 && 644 subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { 645 GEM_BUG_ON(subslices & 1); 646 647 subslice_pg = false; 648 slices *= 2; 649 } 650 651 /* 652 * Starting in Gen9, render power gating can leave 653 * slice/subslice/EU in a partially enabled state. We 654 * must make an explicit request through RPCS for full 655 * enablement. 656 */ 657 if (sseu->has_slice_pg) { 658 u32 mask, val = slices; 659 660 if (GRAPHICS_VER(i915) >= 11) { 661 mask = GEN11_RPCS_S_CNT_MASK; 662 val <<= GEN11_RPCS_S_CNT_SHIFT; 663 } else { 664 mask = GEN8_RPCS_S_CNT_MASK; 665 val <<= GEN8_RPCS_S_CNT_SHIFT; 666 } 667 668 GEM_BUG_ON(val & ~mask); 669 val &= mask; 670 671 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; 672 } 673 674 if (subslice_pg) { 675 u32 val = subslices; 676 677 val <<= GEN8_RPCS_SS_CNT_SHIFT; 678 679 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); 680 val &= GEN8_RPCS_SS_CNT_MASK; 681 682 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; 683 } 684 685 if (sseu->has_eu_pg) { 686 u32 val; 687 688 val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; 689 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); 690 val &= GEN8_RPCS_EU_MIN_MASK; 691 692 rpcs |= val; 693 694 val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; 695 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); 696 val &= GEN8_RPCS_EU_MAX_MASK; 697 698 rpcs |= val; 699 700 rpcs |= GEN8_RPCS_ENABLE; 701 } 702 703 return rpcs; 704 } 705 706 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) 707 { 708 int s; 709 710 drm_printf(p, "slice total: %u, mask=%04x\n", 711 hweight8(sseu->slice_mask), sseu->slice_mask); 712 drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); 713 for (s = 0; s < sseu->max_slices; s++) { 714 drm_printf(p, "slice%d: %u subslices, mask=%08x\n", 715 s, intel_sseu_subslices_per_slice(sseu, s), 716 intel_sseu_get_subslices(sseu, s)); 717 } 718 drm_printf(p, "EU total: %u\n", sseu->eu_total); 719 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); 720 drm_printf(p, "has slice power gating: %s\n", 721 str_yes_no(sseu->has_slice_pg)); 722 drm_printf(p, "has subslice power gating: %s\n", 723 str_yes_no(sseu->has_subslice_pg)); 724 drm_printf(p, "has EU power gating: %s\n", 725 str_yes_no(sseu->has_eu_pg)); 726 } 727 728 static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu, 729 struct drm_printer *p) 730 { 731 int s, ss; 732 733 for (s = 0; s < sseu->max_slices; s++) { 734 drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", 735 s, intel_sseu_subslices_per_slice(sseu, s), 736 intel_sseu_get_subslices(sseu, s)); 737 738 for (ss = 0; ss < sseu->max_subslices; ss++) { 739 u16 enabled_eus = sseu_get_eus(sseu, s, ss); 740 741 drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", 742 ss, hweight16(enabled_eus), enabled_eus); 743 } 744 } 745 } 746 747 static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu, 748 struct drm_printer *p) 749 { 750 u32 g_dss_mask = sseu_get_geometry_subslices(sseu); 751 u32 c_dss_mask = intel_sseu_get_compute_subslices(sseu); 752 int dss; 753 754 for (dss = 0; dss < sseu->max_subslices; dss++) { 755 u16 enabled_eus = sseu_get_eus(sseu, 0, dss); 756 757 drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss, 758 str_yes_no(g_dss_mask & BIT(dss)), 759 str_yes_no(c_dss_mask & BIT(dss)), 760 hweight16(enabled_eus), enabled_eus); 761 } 762 } 763 764 void intel_sseu_print_topology(struct drm_i915_private *i915, 765 const struct sseu_dev_info *sseu, 766 struct drm_printer *p) 767 { 768 if (sseu->max_slices == 0) { 769 drm_printf(p, "Unavailable\n"); 770 } else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) { 771 sseu_print_xehp_topology(sseu, p); 772 } else { 773 sseu_print_hsw_topology(sseu, p); 774 } 775 } 776 777 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) 778 { 779 u16 slice_mask = 0; 780 int i; 781 782 WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); 783 784 for (i = 0; dss_mask; i++) { 785 if (dss_mask & GENMASK(dss_per_slice - 1, 0)) 786 slice_mask |= BIT(i); 787 788 dss_mask >>= dss_per_slice; 789 } 790 791 return slice_mask; 792 } 793 794