1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include "i915_drv.h" 7 #include "intel_engine_regs.h" 8 #include "intel_gt_regs.h" 9 #include "intel_sseu.h" 10 11 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, 12 u8 max_subslices, u8 max_eus_per_subslice) 13 { 14 sseu->max_slices = max_slices; 15 sseu->max_subslices = max_subslices; 16 sseu->max_eus_per_subslice = max_eus_per_subslice; 17 18 sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); 19 GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); 20 sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); 21 GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); 22 } 23 24 unsigned int 25 intel_sseu_subslice_total(const struct sseu_dev_info *sseu) 26 { 27 unsigned int i, total = 0; 28 29 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) 30 total += hweight8(sseu->subslice_mask[i]); 31 32 return total; 33 } 34 35 static u32 36 _intel_sseu_get_subslices(const struct sseu_dev_info *sseu, 37 const u8 *subslice_mask, u8 slice) 38 { 39 int i, offset = slice * sseu->ss_stride; 40 u32 mask = 0; 41 42 GEM_BUG_ON(slice >= sseu->max_slices); 43 44 for (i = 0; i < sseu->ss_stride; i++) 45 mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE; 46 47 return mask; 48 } 49 50 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) 51 { 52 return _intel_sseu_get_subslices(sseu, sseu->subslice_mask, slice); 53 } 54 55 u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu) 56 { 57 return _intel_sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0); 58 } 59 60 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, 61 u8 *subslice_mask, u32 ss_mask) 62 { 63 int offset = slice * sseu->ss_stride; 64 65 memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride); 66 } 67 68 unsigned int 69 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) 70 { 71 return hweight32(intel_sseu_get_subslices(sseu, slice)); 72 } 73 74 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, 75 int subslice) 76 { 77 int slice_stride = sseu->max_subslices * sseu->eu_stride; 78 79 return slice * slice_stride + subslice * sseu->eu_stride; 80 } 81 82 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, 83 int subslice) 84 { 85 int i, offset = sseu_eu_idx(sseu, slice, subslice); 86 u16 eu_mask = 0; 87 88 for (i = 0; i < sseu->eu_stride; i++) 89 eu_mask |= 90 ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); 91 92 return eu_mask; 93 } 94 95 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, 96 u16 eu_mask) 97 { 98 int i, offset = sseu_eu_idx(sseu, slice, subslice); 99 100 for (i = 0; i < sseu->eu_stride; i++) 101 sseu->eu_mask[offset + i] = 102 (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; 103 } 104 105 static u16 compute_eu_total(const struct sseu_dev_info *sseu) 106 { 107 u16 i, total = 0; 108 109 for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) 110 total += hweight8(sseu->eu_mask[i]); 111 112 return total; 113 } 114 115 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en) 116 { 117 u32 ss_mask; 118 119 ss_mask = ss_en >> (s * sseu->max_subslices); 120 ss_mask &= GENMASK(sseu->max_subslices - 1, 0); 121 122 return ss_mask; 123 } 124 125 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en, 126 u32 g_ss_en, u32 c_ss_en, u16 eu_en) 127 { 128 int s, ss; 129 130 /* g_ss_en/c_ss_en represent entire subslice mask across all slices */ 131 GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > 132 sizeof(g_ss_en) * BITS_PER_BYTE); 133 134 for (s = 0; s < sseu->max_slices; s++) { 135 if ((s_en & BIT(s)) == 0) 136 continue; 137 138 sseu->slice_mask |= BIT(s); 139 140 /* 141 * XeHP introduces the concept of compute vs geometry DSS. To 142 * reduce variation between GENs around subslice usage, store a 143 * mask for both the geometry and compute enabled masks since 144 * userspace will need to be able to query these masks 145 * independently. Also compute a total enabled subslice count 146 * for the purposes of selecting subslices to use in a 147 * particular GEM context. 148 */ 149 intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask, 150 get_ss_stride_mask(sseu, s, c_ss_en)); 151 intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask, 152 get_ss_stride_mask(sseu, s, g_ss_en)); 153 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 154 get_ss_stride_mask(sseu, s, 155 g_ss_en | c_ss_en)); 156 157 for (ss = 0; ss < sseu->max_subslices; ss++) 158 if (intel_sseu_has_subslice(sseu, s, ss)) 159 sseu_set_eus(sseu, s, ss, eu_en); 160 } 161 sseu->eu_per_subslice = hweight16(eu_en); 162 sseu->eu_total = compute_eu_total(sseu); 163 } 164 165 static void gen12_sseu_info_init(struct intel_gt *gt) 166 { 167 struct sseu_dev_info *sseu = >->info.sseu; 168 struct intel_uncore *uncore = gt->uncore; 169 u32 g_dss_en, c_dss_en = 0; 170 u16 eu_en = 0; 171 u8 eu_en_fuse; 172 u8 s_en; 173 int eu; 174 175 /* 176 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. 177 * Instead of splitting these, provide userspace with an array 178 * of DSS to more closely represent the hardware resource. 179 * 180 * In addition, the concept of slice has been removed in Xe_HP. 181 * To be compatible with prior generations, assume a single slice 182 * across the entire device. Then calculate out the DSS for each 183 * workload type within that software slice. 184 */ 185 if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915)) 186 intel_sseu_set_info(sseu, 1, 32, 16); 187 else 188 intel_sseu_set_info(sseu, 1, 6, 16); 189 190 /* 191 * As mentioned above, Xe_HP does not have the concept of a slice. 192 * Enable one for software backwards compatibility. 193 */ 194 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 195 s_en = 0x1; 196 else 197 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 198 GEN11_GT_S_ENA_MASK; 199 200 g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE); 201 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 202 c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE); 203 204 /* one bit per pair of EUs */ 205 if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50)) 206 eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK; 207 else 208 eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 209 GEN11_EU_DIS_MASK); 210 211 for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) 212 if (eu_en_fuse & BIT(eu)) 213 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); 214 215 gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en); 216 217 /* TGL only supports slice-level power gating */ 218 sseu->has_slice_pg = 1; 219 } 220 221 static void gen11_sseu_info_init(struct intel_gt *gt) 222 { 223 struct sseu_dev_info *sseu = >->info.sseu; 224 struct intel_uncore *uncore = gt->uncore; 225 u32 ss_en; 226 u8 eu_en; 227 u8 s_en; 228 229 if (IS_JSL_EHL(gt->i915)) 230 intel_sseu_set_info(sseu, 1, 4, 8); 231 else 232 intel_sseu_set_info(sseu, 1, 8, 8); 233 234 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 235 GEN11_GT_S_ENA_MASK; 236 ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); 237 238 eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 239 GEN11_EU_DIS_MASK); 240 241 gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en); 242 243 /* ICL has no power gating restrictions. */ 244 sseu->has_slice_pg = 1; 245 sseu->has_subslice_pg = 1; 246 sseu->has_eu_pg = 1; 247 } 248 249 static void cherryview_sseu_info_init(struct intel_gt *gt) 250 { 251 struct sseu_dev_info *sseu = >->info.sseu; 252 u32 fuse; 253 u8 subslice_mask = 0; 254 255 fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); 256 257 sseu->slice_mask = BIT(0); 258 intel_sseu_set_info(sseu, 1, 2, 8); 259 260 if (!(fuse & CHV_FGT_DISABLE_SS0)) { 261 u8 disabled_mask = 262 ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 263 CHV_FGT_EU_DIS_SS0_R0_SHIFT) | 264 (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> 265 CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); 266 267 subslice_mask |= BIT(0); 268 sseu_set_eus(sseu, 0, 0, ~disabled_mask); 269 } 270 271 if (!(fuse & CHV_FGT_DISABLE_SS1)) { 272 u8 disabled_mask = 273 ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 274 CHV_FGT_EU_DIS_SS1_R0_SHIFT) | 275 (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> 276 CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); 277 278 subslice_mask |= BIT(1); 279 sseu_set_eus(sseu, 0, 1, ~disabled_mask); 280 } 281 282 intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask); 283 284 sseu->eu_total = compute_eu_total(sseu); 285 286 /* 287 * CHV expected to always have a uniform distribution of EU 288 * across subslices. 289 */ 290 sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? 291 sseu->eu_total / 292 intel_sseu_subslice_total(sseu) : 293 0; 294 /* 295 * CHV supports subslice power gating on devices with more than 296 * one subslice, and supports EU power gating on devices with 297 * more than one EU pair per subslice. 298 */ 299 sseu->has_slice_pg = 0; 300 sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; 301 sseu->has_eu_pg = (sseu->eu_per_subslice > 2); 302 } 303 304 static void gen9_sseu_info_init(struct intel_gt *gt) 305 { 306 struct drm_i915_private *i915 = gt->i915; 307 struct intel_device_info *info = mkwrite_device_info(i915); 308 struct sseu_dev_info *sseu = >->info.sseu; 309 struct intel_uncore *uncore = gt->uncore; 310 u32 fuse2, eu_disable, subslice_mask; 311 const u8 eu_mask = 0xff; 312 int s, ss; 313 314 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 315 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 316 317 /* BXT has a single slice and at most 3 subslices. */ 318 intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, 319 IS_GEN9_LP(i915) ? 3 : 4, 8); 320 321 /* 322 * The subslice disable field is global, i.e. it applies 323 * to each of the enabled slices. 324 */ 325 subslice_mask = (1 << sseu->max_subslices) - 1; 326 subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> 327 GEN9_F2_SS_DIS_SHIFT); 328 329 /* 330 * Iterate through enabled slices and subslices to 331 * count the total enabled EU. 332 */ 333 for (s = 0; s < sseu->max_slices; s++) { 334 if (!(sseu->slice_mask & BIT(s))) 335 /* skip disabled slice */ 336 continue; 337 338 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 339 subslice_mask); 340 341 eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); 342 for (ss = 0; ss < sseu->max_subslices; ss++) { 343 int eu_per_ss; 344 u8 eu_disabled_mask; 345 346 if (!intel_sseu_has_subslice(sseu, s, ss)) 347 /* skip disabled subslice */ 348 continue; 349 350 eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; 351 352 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 353 354 eu_per_ss = sseu->max_eus_per_subslice - 355 hweight8(eu_disabled_mask); 356 357 /* 358 * Record which subslice(s) has(have) 7 EUs. we 359 * can tune the hash used to spread work among 360 * subslices if they are unbalanced. 361 */ 362 if (eu_per_ss == 7) 363 sseu->subslice_7eu[s] |= BIT(ss); 364 } 365 } 366 367 sseu->eu_total = compute_eu_total(sseu); 368 369 /* 370 * SKL is expected to always have a uniform distribution 371 * of EU across subslices with the exception that any one 372 * EU in any one subslice may be fused off for die 373 * recovery. BXT is expected to be perfectly uniform in EU 374 * distribution. 375 */ 376 sseu->eu_per_subslice = 377 intel_sseu_subslice_total(sseu) ? 378 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 379 0; 380 381 /* 382 * SKL+ supports slice power gating on devices with more than 383 * one slice, and supports EU power gating on devices with 384 * more than one EU pair per subslice. BXT+ supports subslice 385 * power gating on devices with more than one subslice, and 386 * supports EU power gating on devices with more than one EU 387 * pair per subslice. 388 */ 389 sseu->has_slice_pg = 390 !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1; 391 sseu->has_subslice_pg = 392 IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1; 393 sseu->has_eu_pg = sseu->eu_per_subslice > 2; 394 395 if (IS_GEN9_LP(i915)) { 396 #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) 397 info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; 398 399 sseu->min_eu_in_pool = 0; 400 if (info->has_pooled_eu) { 401 if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) 402 sseu->min_eu_in_pool = 3; 403 else if (IS_SS_DISABLED(1)) 404 sseu->min_eu_in_pool = 6; 405 else 406 sseu->min_eu_in_pool = 9; 407 } 408 #undef IS_SS_DISABLED 409 } 410 } 411 412 static void bdw_sseu_info_init(struct intel_gt *gt) 413 { 414 struct sseu_dev_info *sseu = >->info.sseu; 415 struct intel_uncore *uncore = gt->uncore; 416 int s, ss; 417 u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ 418 u32 eu_disable0, eu_disable1, eu_disable2; 419 420 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 421 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 422 intel_sseu_set_info(sseu, 3, 3, 8); 423 424 /* 425 * The subslice disable field is global, i.e. it applies 426 * to each of the enabled slices. 427 */ 428 subslice_mask = GENMASK(sseu->max_subslices - 1, 0); 429 subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> 430 GEN8_F2_SS_DIS_SHIFT); 431 eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); 432 eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); 433 eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); 434 eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; 435 eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | 436 ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << 437 (32 - GEN8_EU_DIS0_S1_SHIFT)); 438 eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | 439 ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << 440 (32 - GEN8_EU_DIS1_S2_SHIFT)); 441 442 /* 443 * Iterate through enabled slices and subslices to 444 * count the total enabled EU. 445 */ 446 for (s = 0; s < sseu->max_slices; s++) { 447 if (!(sseu->slice_mask & BIT(s))) 448 /* skip disabled slice */ 449 continue; 450 451 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 452 subslice_mask); 453 454 for (ss = 0; ss < sseu->max_subslices; ss++) { 455 u8 eu_disabled_mask; 456 u32 n_disabled; 457 458 if (!intel_sseu_has_subslice(sseu, s, ss)) 459 /* skip disabled subslice */ 460 continue; 461 462 eu_disabled_mask = 463 eu_disable[s] >> (ss * sseu->max_eus_per_subslice); 464 465 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 466 467 n_disabled = hweight8(eu_disabled_mask); 468 469 /* 470 * Record which subslices have 7 EUs. 471 */ 472 if (sseu->max_eus_per_subslice - n_disabled == 7) 473 sseu->subslice_7eu[s] |= 1 << ss; 474 } 475 } 476 477 sseu->eu_total = compute_eu_total(sseu); 478 479 /* 480 * BDW is expected to always have a uniform distribution of EU across 481 * subslices with the exception that any one EU in any one subslice may 482 * be fused off for die recovery. 483 */ 484 sseu->eu_per_subslice = 485 intel_sseu_subslice_total(sseu) ? 486 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 487 0; 488 489 /* 490 * BDW supports slice power gating on devices with more than 491 * one slice. 492 */ 493 sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1; 494 sseu->has_subslice_pg = 0; 495 sseu->has_eu_pg = 0; 496 } 497 498 static void hsw_sseu_info_init(struct intel_gt *gt) 499 { 500 struct drm_i915_private *i915 = gt->i915; 501 struct sseu_dev_info *sseu = >->info.sseu; 502 u32 fuse1; 503 u8 subslice_mask = 0; 504 int s, ss; 505 506 /* 507 * There isn't a register to tell us how many slices/subslices. We 508 * work off the PCI-ids here. 509 */ 510 switch (INTEL_INFO(i915)->gt) { 511 default: 512 MISSING_CASE(INTEL_INFO(i915)->gt); 513 fallthrough; 514 case 1: 515 sseu->slice_mask = BIT(0); 516 subslice_mask = BIT(0); 517 break; 518 case 2: 519 sseu->slice_mask = BIT(0); 520 subslice_mask = BIT(0) | BIT(1); 521 break; 522 case 3: 523 sseu->slice_mask = BIT(0) | BIT(1); 524 subslice_mask = BIT(0) | BIT(1); 525 break; 526 } 527 528 fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); 529 switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) { 530 default: 531 MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)); 532 fallthrough; 533 case HSW_F1_EU_DIS_10EUS: 534 sseu->eu_per_subslice = 10; 535 break; 536 case HSW_F1_EU_DIS_8EUS: 537 sseu->eu_per_subslice = 8; 538 break; 539 case HSW_F1_EU_DIS_6EUS: 540 sseu->eu_per_subslice = 6; 541 break; 542 } 543 544 intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), 545 hweight8(subslice_mask), 546 sseu->eu_per_subslice); 547 548 for (s = 0; s < sseu->max_slices; s++) { 549 intel_sseu_set_subslices(sseu, s, sseu->subslice_mask, 550 subslice_mask); 551 552 for (ss = 0; ss < sseu->max_subslices; ss++) { 553 sseu_set_eus(sseu, s, ss, 554 (1UL << sseu->eu_per_subslice) - 1); 555 } 556 } 557 558 sseu->eu_total = compute_eu_total(sseu); 559 560 /* No powergating for you. */ 561 sseu->has_slice_pg = 0; 562 sseu->has_subslice_pg = 0; 563 sseu->has_eu_pg = 0; 564 } 565 566 void intel_sseu_info_init(struct intel_gt *gt) 567 { 568 struct drm_i915_private *i915 = gt->i915; 569 570 if (IS_HASWELL(i915)) 571 hsw_sseu_info_init(gt); 572 else if (IS_CHERRYVIEW(i915)) 573 cherryview_sseu_info_init(gt); 574 else if (IS_BROADWELL(i915)) 575 bdw_sseu_info_init(gt); 576 else if (GRAPHICS_VER(i915) == 9) 577 gen9_sseu_info_init(gt); 578 else if (GRAPHICS_VER(i915) == 11) 579 gen11_sseu_info_init(gt); 580 else if (GRAPHICS_VER(i915) >= 12) 581 gen12_sseu_info_init(gt); 582 } 583 584 u32 intel_sseu_make_rpcs(struct intel_gt *gt, 585 const struct intel_sseu *req_sseu) 586 { 587 struct drm_i915_private *i915 = gt->i915; 588 const struct sseu_dev_info *sseu = >->info.sseu; 589 bool subslice_pg = sseu->has_subslice_pg; 590 u8 slices, subslices; 591 u32 rpcs = 0; 592 593 /* 594 * No explicit RPCS request is needed to ensure full 595 * slice/subslice/EU enablement prior to Gen9. 596 */ 597 if (GRAPHICS_VER(i915) < 9) 598 return 0; 599 600 /* 601 * If i915/perf is active, we want a stable powergating configuration 602 * on the system. Use the configuration pinned by i915/perf. 603 */ 604 if (i915->perf.exclusive_stream) 605 req_sseu = &i915->perf.sseu; 606 607 slices = hweight8(req_sseu->slice_mask); 608 subslices = hweight8(req_sseu->subslice_mask); 609 610 /* 611 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits 612 * wide and Icelake has up to eight subslices, specfial programming is 613 * needed in order to correctly enable all subslices. 614 * 615 * According to documentation software must consider the configuration 616 * as 2x4x8 and hardware will translate this to 1x8x8. 617 * 618 * Furthemore, even though SScount is three bits, maximum documented 619 * value for it is four. From this some rules/restrictions follow: 620 * 621 * 1. 622 * If enabled subslice count is greater than four, two whole slices must 623 * be enabled instead. 624 * 625 * 2. 626 * When more than one slice is enabled, hardware ignores the subslice 627 * count altogether. 628 * 629 * From these restrictions it follows that it is not possible to enable 630 * a count of subslices between the SScount maximum of four restriction, 631 * and the maximum available number on a particular SKU. Either all 632 * subslices are enabled, or a count between one and four on the first 633 * slice. 634 */ 635 if (GRAPHICS_VER(i915) == 11 && 636 slices == 1 && 637 subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { 638 GEM_BUG_ON(subslices & 1); 639 640 subslice_pg = false; 641 slices *= 2; 642 } 643 644 /* 645 * Starting in Gen9, render power gating can leave 646 * slice/subslice/EU in a partially enabled state. We 647 * must make an explicit request through RPCS for full 648 * enablement. 649 */ 650 if (sseu->has_slice_pg) { 651 u32 mask, val = slices; 652 653 if (GRAPHICS_VER(i915) >= 11) { 654 mask = GEN11_RPCS_S_CNT_MASK; 655 val <<= GEN11_RPCS_S_CNT_SHIFT; 656 } else { 657 mask = GEN8_RPCS_S_CNT_MASK; 658 val <<= GEN8_RPCS_S_CNT_SHIFT; 659 } 660 661 GEM_BUG_ON(val & ~mask); 662 val &= mask; 663 664 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; 665 } 666 667 if (subslice_pg) { 668 u32 val = subslices; 669 670 val <<= GEN8_RPCS_SS_CNT_SHIFT; 671 672 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); 673 val &= GEN8_RPCS_SS_CNT_MASK; 674 675 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; 676 } 677 678 if (sseu->has_eu_pg) { 679 u32 val; 680 681 val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; 682 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); 683 val &= GEN8_RPCS_EU_MIN_MASK; 684 685 rpcs |= val; 686 687 val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; 688 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); 689 val &= GEN8_RPCS_EU_MAX_MASK; 690 691 rpcs |= val; 692 693 rpcs |= GEN8_RPCS_ENABLE; 694 } 695 696 return rpcs; 697 } 698 699 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) 700 { 701 int s; 702 703 drm_printf(p, "slice total: %u, mask=%04x\n", 704 hweight8(sseu->slice_mask), sseu->slice_mask); 705 drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); 706 for (s = 0; s < sseu->max_slices; s++) { 707 drm_printf(p, "slice%d: %u subslices, mask=%08x\n", 708 s, intel_sseu_subslices_per_slice(sseu, s), 709 intel_sseu_get_subslices(sseu, s)); 710 } 711 drm_printf(p, "EU total: %u\n", sseu->eu_total); 712 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); 713 drm_printf(p, "has slice power gating: %s\n", 714 yesno(sseu->has_slice_pg)); 715 drm_printf(p, "has subslice power gating: %s\n", 716 yesno(sseu->has_subslice_pg)); 717 drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg)); 718 } 719 720 void intel_sseu_print_topology(const struct sseu_dev_info *sseu, 721 struct drm_printer *p) 722 { 723 int s, ss; 724 725 if (sseu->max_slices == 0) { 726 drm_printf(p, "Unavailable\n"); 727 return; 728 } 729 730 for (s = 0; s < sseu->max_slices; s++) { 731 drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", 732 s, intel_sseu_subslices_per_slice(sseu, s), 733 intel_sseu_get_subslices(sseu, s)); 734 735 for (ss = 0; ss < sseu->max_subslices; ss++) { 736 u16 enabled_eus = sseu_get_eus(sseu, s, ss); 737 738 drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", 739 ss, hweight16(enabled_eus), enabled_eus); 740 } 741 } 742 } 743 744 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice) 745 { 746 u16 slice_mask = 0; 747 int i; 748 749 WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask)); 750 751 for (i = 0; dss_mask; i++) { 752 if (dss_mask & GENMASK(dss_per_slice - 1, 0)) 753 slice_mask |= BIT(i); 754 755 dss_mask >>= dss_per_slice; 756 } 757 758 return slice_mask; 759 } 760 761