1 /* 2 * SPDX-License-Identifier: MIT 3 * 4 * Copyright © 2019 Intel Corporation 5 */ 6 7 #include "i915_drv.h" 8 #include "intel_lrc_reg.h" 9 #include "intel_sseu.h" 10 11 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices, 12 u8 max_subslices, u8 max_eus_per_subslice) 13 { 14 sseu->max_slices = max_slices; 15 sseu->max_subslices = max_subslices; 16 sseu->max_eus_per_subslice = max_eus_per_subslice; 17 18 sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices); 19 GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE); 20 sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice); 21 GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE); 22 } 23 24 unsigned int 25 intel_sseu_subslice_total(const struct sseu_dev_info *sseu) 26 { 27 unsigned int i, total = 0; 28 29 for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++) 30 total += hweight8(sseu->subslice_mask[i]); 31 32 return total; 33 } 34 35 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice) 36 { 37 int i, offset = slice * sseu->ss_stride; 38 u32 mask = 0; 39 40 GEM_BUG_ON(slice >= sseu->max_slices); 41 42 for (i = 0; i < sseu->ss_stride; i++) 43 mask |= (u32)sseu->subslice_mask[offset + i] << 44 i * BITS_PER_BYTE; 45 46 return mask; 47 } 48 49 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice, 50 u32 ss_mask) 51 { 52 int offset = slice * sseu->ss_stride; 53 54 memcpy(&sseu->subslice_mask[offset], &ss_mask, sseu->ss_stride); 55 } 56 57 unsigned int 58 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice) 59 { 60 return hweight32(intel_sseu_get_subslices(sseu, slice)); 61 } 62 63 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice, 64 int subslice) 65 { 66 int slice_stride = sseu->max_subslices * sseu->eu_stride; 67 68 return slice * slice_stride + subslice * sseu->eu_stride; 69 } 70 71 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice, 72 int subslice) 73 { 74 int i, offset = sseu_eu_idx(sseu, slice, subslice); 75 u16 eu_mask = 0; 76 77 for (i = 0; i < sseu->eu_stride; i++) 78 eu_mask |= 79 ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE); 80 81 return eu_mask; 82 } 83 84 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice, 85 u16 eu_mask) 86 { 87 int i, offset = sseu_eu_idx(sseu, slice, subslice); 88 89 for (i = 0; i < sseu->eu_stride; i++) 90 sseu->eu_mask[offset + i] = 91 (eu_mask >> (BITS_PER_BYTE * i)) & 0xff; 92 } 93 94 static u16 compute_eu_total(const struct sseu_dev_info *sseu) 95 { 96 u16 i, total = 0; 97 98 for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++) 99 total += hweight8(sseu->eu_mask[i]); 100 101 return total; 102 } 103 104 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, 105 u8 s_en, u32 ss_en, u16 eu_en) 106 { 107 int s, ss; 108 109 /* ss_en represents entire subslice mask across all slices */ 110 GEM_BUG_ON(sseu->max_slices * sseu->max_subslices > 111 sizeof(ss_en) * BITS_PER_BYTE); 112 113 for (s = 0; s < sseu->max_slices; s++) { 114 if ((s_en & BIT(s)) == 0) 115 continue; 116 117 sseu->slice_mask |= BIT(s); 118 119 intel_sseu_set_subslices(sseu, s, ss_en); 120 121 for (ss = 0; ss < sseu->max_subslices; ss++) 122 if (intel_sseu_has_subslice(sseu, s, ss)) 123 sseu_set_eus(sseu, s, ss, eu_en); 124 } 125 sseu->eu_per_subslice = hweight16(eu_en); 126 sseu->eu_total = compute_eu_total(sseu); 127 } 128 129 static void gen12_sseu_info_init(struct intel_gt *gt) 130 { 131 struct sseu_dev_info *sseu = >->info.sseu; 132 struct intel_uncore *uncore = gt->uncore; 133 u32 dss_en; 134 u16 eu_en = 0; 135 u8 eu_en_fuse; 136 u8 s_en; 137 int eu; 138 139 /* 140 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS. 141 * Instead of splitting these, provide userspace with an array 142 * of DSS to more closely represent the hardware resource. 143 */ 144 intel_sseu_set_info(sseu, 1, 6, 16); 145 146 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 147 GEN11_GT_S_ENA_MASK; 148 149 dss_en = intel_uncore_read(uncore, GEN12_GT_DSS_ENABLE); 150 151 /* one bit per pair of EUs */ 152 eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 153 GEN11_EU_DIS_MASK); 154 for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++) 155 if (eu_en_fuse & BIT(eu)) 156 eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1); 157 158 gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en); 159 160 /* TGL only supports slice-level power gating */ 161 sseu->has_slice_pg = 1; 162 } 163 164 static void gen11_sseu_info_init(struct intel_gt *gt) 165 { 166 struct sseu_dev_info *sseu = >->info.sseu; 167 struct intel_uncore *uncore = gt->uncore; 168 u32 ss_en; 169 u8 eu_en; 170 u8 s_en; 171 172 if (IS_ELKHARTLAKE(gt->i915)) 173 intel_sseu_set_info(sseu, 1, 4, 8); 174 else 175 intel_sseu_set_info(sseu, 1, 8, 8); 176 177 s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) & 178 GEN11_GT_S_ENA_MASK; 179 ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE); 180 181 eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) & 182 GEN11_EU_DIS_MASK); 183 184 gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en); 185 186 /* ICL has no power gating restrictions. */ 187 sseu->has_slice_pg = 1; 188 sseu->has_subslice_pg = 1; 189 sseu->has_eu_pg = 1; 190 } 191 192 static void gen10_sseu_info_init(struct intel_gt *gt) 193 { 194 struct intel_uncore *uncore = gt->uncore; 195 struct sseu_dev_info *sseu = >->info.sseu; 196 const u32 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 197 const int eu_mask = 0xff; 198 u32 subslice_mask, eu_en; 199 int s, ss; 200 201 intel_sseu_set_info(sseu, 6, 4, 8); 202 203 sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >> 204 GEN10_F2_S_ENA_SHIFT; 205 206 /* Slice0 */ 207 eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE0); 208 for (ss = 0; ss < sseu->max_subslices; ss++) 209 sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask); 210 /* Slice1 */ 211 sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask); 212 eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE1); 213 sseu_set_eus(sseu, 1, 1, eu_en & eu_mask); 214 /* Slice2 */ 215 sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask); 216 sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask); 217 /* Slice3 */ 218 sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask); 219 eu_en = ~intel_uncore_read(uncore, GEN8_EU_DISABLE2); 220 sseu_set_eus(sseu, 3, 1, eu_en & eu_mask); 221 /* Slice4 */ 222 sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask); 223 sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask); 224 /* Slice5 */ 225 sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask); 226 eu_en = ~intel_uncore_read(uncore, GEN10_EU_DISABLE3); 227 sseu_set_eus(sseu, 5, 1, eu_en & eu_mask); 228 229 subslice_mask = (1 << 4) - 1; 230 subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >> 231 GEN10_F2_SS_DIS_SHIFT); 232 233 for (s = 0; s < sseu->max_slices; s++) { 234 u32 subslice_mask_with_eus = subslice_mask; 235 236 for (ss = 0; ss < sseu->max_subslices; ss++) { 237 if (sseu_get_eus(sseu, s, ss) == 0) 238 subslice_mask_with_eus &= ~BIT(ss); 239 } 240 241 /* 242 * Slice0 can have up to 3 subslices, but there are only 2 in 243 * slice1/2. 244 */ 245 intel_sseu_set_subslices(sseu, s, s == 0 ? 246 subslice_mask_with_eus : 247 subslice_mask_with_eus & 0x3); 248 } 249 250 sseu->eu_total = compute_eu_total(sseu); 251 252 /* 253 * CNL is expected to always have a uniform distribution 254 * of EU across subslices with the exception that any one 255 * EU in any one subslice may be fused off for die 256 * recovery. 257 */ 258 sseu->eu_per_subslice = 259 intel_sseu_subslice_total(sseu) ? 260 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 261 0; 262 263 /* No restrictions on Power Gating */ 264 sseu->has_slice_pg = 1; 265 sseu->has_subslice_pg = 1; 266 sseu->has_eu_pg = 1; 267 } 268 269 static void cherryview_sseu_info_init(struct intel_gt *gt) 270 { 271 struct sseu_dev_info *sseu = >->info.sseu; 272 u32 fuse; 273 u8 subslice_mask = 0; 274 275 fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT); 276 277 sseu->slice_mask = BIT(0); 278 intel_sseu_set_info(sseu, 1, 2, 8); 279 280 if (!(fuse & CHV_FGT_DISABLE_SS0)) { 281 u8 disabled_mask = 282 ((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 283 CHV_FGT_EU_DIS_SS0_R0_SHIFT) | 284 (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> 285 CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4); 286 287 subslice_mask |= BIT(0); 288 sseu_set_eus(sseu, 0, 0, ~disabled_mask); 289 } 290 291 if (!(fuse & CHV_FGT_DISABLE_SS1)) { 292 u8 disabled_mask = 293 ((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 294 CHV_FGT_EU_DIS_SS1_R0_SHIFT) | 295 (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> 296 CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4); 297 298 subslice_mask |= BIT(1); 299 sseu_set_eus(sseu, 0, 1, ~disabled_mask); 300 } 301 302 intel_sseu_set_subslices(sseu, 0, subslice_mask); 303 304 sseu->eu_total = compute_eu_total(sseu); 305 306 /* 307 * CHV expected to always have a uniform distribution of EU 308 * across subslices. 309 */ 310 sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ? 311 sseu->eu_total / 312 intel_sseu_subslice_total(sseu) : 313 0; 314 /* 315 * CHV supports subslice power gating on devices with more than 316 * one subslice, and supports EU power gating on devices with 317 * more than one EU pair per subslice. 318 */ 319 sseu->has_slice_pg = 0; 320 sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1; 321 sseu->has_eu_pg = (sseu->eu_per_subslice > 2); 322 } 323 324 static void gen9_sseu_info_init(struct intel_gt *gt) 325 { 326 struct drm_i915_private *i915 = gt->i915; 327 struct intel_device_info *info = mkwrite_device_info(i915); 328 struct sseu_dev_info *sseu = >->info.sseu; 329 struct intel_uncore *uncore = gt->uncore; 330 u32 fuse2, eu_disable, subslice_mask; 331 const u8 eu_mask = 0xff; 332 int s, ss; 333 334 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 335 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 336 337 /* BXT has a single slice and at most 3 subslices. */ 338 intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3, 339 IS_GEN9_LP(i915) ? 3 : 4, 8); 340 341 /* 342 * The subslice disable field is global, i.e. it applies 343 * to each of the enabled slices. 344 */ 345 subslice_mask = (1 << sseu->max_subslices) - 1; 346 subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >> 347 GEN9_F2_SS_DIS_SHIFT); 348 349 /* 350 * Iterate through enabled slices and subslices to 351 * count the total enabled EU. 352 */ 353 for (s = 0; s < sseu->max_slices; s++) { 354 if (!(sseu->slice_mask & BIT(s))) 355 /* skip disabled slice */ 356 continue; 357 358 intel_sseu_set_subslices(sseu, s, subslice_mask); 359 360 eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s)); 361 for (ss = 0; ss < sseu->max_subslices; ss++) { 362 int eu_per_ss; 363 u8 eu_disabled_mask; 364 365 if (!intel_sseu_has_subslice(sseu, s, ss)) 366 /* skip disabled subslice */ 367 continue; 368 369 eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask; 370 371 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 372 373 eu_per_ss = sseu->max_eus_per_subslice - 374 hweight8(eu_disabled_mask); 375 376 /* 377 * Record which subslice(s) has(have) 7 EUs. we 378 * can tune the hash used to spread work among 379 * subslices if they are unbalanced. 380 */ 381 if (eu_per_ss == 7) 382 sseu->subslice_7eu[s] |= BIT(ss); 383 } 384 } 385 386 sseu->eu_total = compute_eu_total(sseu); 387 388 /* 389 * SKL is expected to always have a uniform distribution 390 * of EU across subslices with the exception that any one 391 * EU in any one subslice may be fused off for die 392 * recovery. BXT is expected to be perfectly uniform in EU 393 * distribution. 394 */ 395 sseu->eu_per_subslice = 396 intel_sseu_subslice_total(sseu) ? 397 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 398 0; 399 400 /* 401 * SKL+ supports slice power gating on devices with more than 402 * one slice, and supports EU power gating on devices with 403 * more than one EU pair per subslice. BXT+ supports subslice 404 * power gating on devices with more than one subslice, and 405 * supports EU power gating on devices with more than one EU 406 * pair per subslice. 407 */ 408 sseu->has_slice_pg = 409 !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1; 410 sseu->has_subslice_pg = 411 IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1; 412 sseu->has_eu_pg = sseu->eu_per_subslice > 2; 413 414 if (IS_GEN9_LP(i915)) { 415 #define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss))) 416 info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3; 417 418 sseu->min_eu_in_pool = 0; 419 if (info->has_pooled_eu) { 420 if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0)) 421 sseu->min_eu_in_pool = 3; 422 else if (IS_SS_DISABLED(1)) 423 sseu->min_eu_in_pool = 6; 424 else 425 sseu->min_eu_in_pool = 9; 426 } 427 #undef IS_SS_DISABLED 428 } 429 } 430 431 static void bdw_sseu_info_init(struct intel_gt *gt) 432 { 433 struct sseu_dev_info *sseu = >->info.sseu; 434 struct intel_uncore *uncore = gt->uncore; 435 int s, ss; 436 u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */ 437 u32 eu_disable0, eu_disable1, eu_disable2; 438 439 fuse2 = intel_uncore_read(uncore, GEN8_FUSE2); 440 sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT; 441 intel_sseu_set_info(sseu, 3, 3, 8); 442 443 /* 444 * The subslice disable field is global, i.e. it applies 445 * to each of the enabled slices. 446 */ 447 subslice_mask = GENMASK(sseu->max_subslices - 1, 0); 448 subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >> 449 GEN8_F2_SS_DIS_SHIFT); 450 eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0); 451 eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1); 452 eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2); 453 eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK; 454 eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) | 455 ((eu_disable1 & GEN8_EU_DIS1_S1_MASK) << 456 (32 - GEN8_EU_DIS0_S1_SHIFT)); 457 eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) | 458 ((eu_disable2 & GEN8_EU_DIS2_S2_MASK) << 459 (32 - GEN8_EU_DIS1_S2_SHIFT)); 460 461 /* 462 * Iterate through enabled slices and subslices to 463 * count the total enabled EU. 464 */ 465 for (s = 0; s < sseu->max_slices; s++) { 466 if (!(sseu->slice_mask & BIT(s))) 467 /* skip disabled slice */ 468 continue; 469 470 intel_sseu_set_subslices(sseu, s, subslice_mask); 471 472 for (ss = 0; ss < sseu->max_subslices; ss++) { 473 u8 eu_disabled_mask; 474 u32 n_disabled; 475 476 if (!intel_sseu_has_subslice(sseu, s, ss)) 477 /* skip disabled subslice */ 478 continue; 479 480 eu_disabled_mask = 481 eu_disable[s] >> (ss * sseu->max_eus_per_subslice); 482 483 sseu_set_eus(sseu, s, ss, ~eu_disabled_mask); 484 485 n_disabled = hweight8(eu_disabled_mask); 486 487 /* 488 * Record which subslices have 7 EUs. 489 */ 490 if (sseu->max_eus_per_subslice - n_disabled == 7) 491 sseu->subslice_7eu[s] |= 1 << ss; 492 } 493 } 494 495 sseu->eu_total = compute_eu_total(sseu); 496 497 /* 498 * BDW is expected to always have a uniform distribution of EU across 499 * subslices with the exception that any one EU in any one subslice may 500 * be fused off for die recovery. 501 */ 502 sseu->eu_per_subslice = 503 intel_sseu_subslice_total(sseu) ? 504 DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) : 505 0; 506 507 /* 508 * BDW supports slice power gating on devices with more than 509 * one slice. 510 */ 511 sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1; 512 sseu->has_subslice_pg = 0; 513 sseu->has_eu_pg = 0; 514 } 515 516 static void hsw_sseu_info_init(struct intel_gt *gt) 517 { 518 struct drm_i915_private *i915 = gt->i915; 519 struct sseu_dev_info *sseu = >->info.sseu; 520 u32 fuse1; 521 u8 subslice_mask = 0; 522 int s, ss; 523 524 /* 525 * There isn't a register to tell us how many slices/subslices. We 526 * work off the PCI-ids here. 527 */ 528 switch (INTEL_INFO(i915)->gt) { 529 default: 530 MISSING_CASE(INTEL_INFO(i915)->gt); 531 fallthrough; 532 case 1: 533 sseu->slice_mask = BIT(0); 534 subslice_mask = BIT(0); 535 break; 536 case 2: 537 sseu->slice_mask = BIT(0); 538 subslice_mask = BIT(0) | BIT(1); 539 break; 540 case 3: 541 sseu->slice_mask = BIT(0) | BIT(1); 542 subslice_mask = BIT(0) | BIT(1); 543 break; 544 } 545 546 fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1); 547 switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) { 548 default: 549 MISSING_CASE((fuse1 & HSW_F1_EU_DIS_MASK) >> 550 HSW_F1_EU_DIS_SHIFT); 551 fallthrough; 552 case HSW_F1_EU_DIS_10EUS: 553 sseu->eu_per_subslice = 10; 554 break; 555 case HSW_F1_EU_DIS_8EUS: 556 sseu->eu_per_subslice = 8; 557 break; 558 case HSW_F1_EU_DIS_6EUS: 559 sseu->eu_per_subslice = 6; 560 break; 561 } 562 563 intel_sseu_set_info(sseu, hweight8(sseu->slice_mask), 564 hweight8(subslice_mask), 565 sseu->eu_per_subslice); 566 567 for (s = 0; s < sseu->max_slices; s++) { 568 intel_sseu_set_subslices(sseu, s, subslice_mask); 569 570 for (ss = 0; ss < sseu->max_subslices; ss++) { 571 sseu_set_eus(sseu, s, ss, 572 (1UL << sseu->eu_per_subslice) - 1); 573 } 574 } 575 576 sseu->eu_total = compute_eu_total(sseu); 577 578 /* No powergating for you. */ 579 sseu->has_slice_pg = 0; 580 sseu->has_subslice_pg = 0; 581 sseu->has_eu_pg = 0; 582 } 583 584 void intel_sseu_info_init(struct intel_gt *gt) 585 { 586 struct drm_i915_private *i915 = gt->i915; 587 588 if (IS_HASWELL(i915)) 589 hsw_sseu_info_init(gt); 590 else if (IS_CHERRYVIEW(i915)) 591 cherryview_sseu_info_init(gt); 592 else if (IS_BROADWELL(i915)) 593 bdw_sseu_info_init(gt); 594 else if (IS_GEN(i915, 9)) 595 gen9_sseu_info_init(gt); 596 else if (IS_GEN(i915, 10)) 597 gen10_sseu_info_init(gt); 598 else if (IS_GEN(i915, 11)) 599 gen11_sseu_info_init(gt); 600 else if (INTEL_GEN(i915) >= 12) 601 gen12_sseu_info_init(gt); 602 } 603 604 u32 intel_sseu_make_rpcs(struct intel_gt *gt, 605 const struct intel_sseu *req_sseu) 606 { 607 struct drm_i915_private *i915 = gt->i915; 608 const struct sseu_dev_info *sseu = >->info.sseu; 609 bool subslice_pg = sseu->has_subslice_pg; 610 u8 slices, subslices; 611 u32 rpcs = 0; 612 613 /* 614 * No explicit RPCS request is needed to ensure full 615 * slice/subslice/EU enablement prior to Gen9. 616 */ 617 if (INTEL_GEN(i915) < 9) 618 return 0; 619 620 /* 621 * If i915/perf is active, we want a stable powergating configuration 622 * on the system. Use the configuration pinned by i915/perf. 623 */ 624 if (i915->perf.exclusive_stream) 625 req_sseu = &i915->perf.sseu; 626 627 slices = hweight8(req_sseu->slice_mask); 628 subslices = hweight8(req_sseu->subslice_mask); 629 630 /* 631 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits 632 * wide and Icelake has up to eight subslices, specfial programming is 633 * needed in order to correctly enable all subslices. 634 * 635 * According to documentation software must consider the configuration 636 * as 2x4x8 and hardware will translate this to 1x8x8. 637 * 638 * Furthemore, even though SScount is three bits, maximum documented 639 * value for it is four. From this some rules/restrictions follow: 640 * 641 * 1. 642 * If enabled subslice count is greater than four, two whole slices must 643 * be enabled instead. 644 * 645 * 2. 646 * When more than one slice is enabled, hardware ignores the subslice 647 * count altogether. 648 * 649 * From these restrictions it follows that it is not possible to enable 650 * a count of subslices between the SScount maximum of four restriction, 651 * and the maximum available number on a particular SKU. Either all 652 * subslices are enabled, or a count between one and four on the first 653 * slice. 654 */ 655 if (IS_GEN(i915, 11) && 656 slices == 1 && 657 subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) { 658 GEM_BUG_ON(subslices & 1); 659 660 subslice_pg = false; 661 slices *= 2; 662 } 663 664 /* 665 * Starting in Gen9, render power gating can leave 666 * slice/subslice/EU in a partially enabled state. We 667 * must make an explicit request through RPCS for full 668 * enablement. 669 */ 670 if (sseu->has_slice_pg) { 671 u32 mask, val = slices; 672 673 if (INTEL_GEN(i915) >= 11) { 674 mask = GEN11_RPCS_S_CNT_MASK; 675 val <<= GEN11_RPCS_S_CNT_SHIFT; 676 } else { 677 mask = GEN8_RPCS_S_CNT_MASK; 678 val <<= GEN8_RPCS_S_CNT_SHIFT; 679 } 680 681 GEM_BUG_ON(val & ~mask); 682 val &= mask; 683 684 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val; 685 } 686 687 if (subslice_pg) { 688 u32 val = subslices; 689 690 val <<= GEN8_RPCS_SS_CNT_SHIFT; 691 692 GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK); 693 val &= GEN8_RPCS_SS_CNT_MASK; 694 695 rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val; 696 } 697 698 if (sseu->has_eu_pg) { 699 u32 val; 700 701 val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT; 702 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK); 703 val &= GEN8_RPCS_EU_MIN_MASK; 704 705 rpcs |= val; 706 707 val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT; 708 GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK); 709 val &= GEN8_RPCS_EU_MAX_MASK; 710 711 rpcs |= val; 712 713 rpcs |= GEN8_RPCS_ENABLE; 714 } 715 716 return rpcs; 717 } 718 719 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p) 720 { 721 int s; 722 723 drm_printf(p, "slice total: %u, mask=%04x\n", 724 hweight8(sseu->slice_mask), sseu->slice_mask); 725 drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu)); 726 for (s = 0; s < sseu->max_slices; s++) { 727 drm_printf(p, "slice%d: %u subslices, mask=%08x\n", 728 s, intel_sseu_subslices_per_slice(sseu, s), 729 intel_sseu_get_subslices(sseu, s)); 730 } 731 drm_printf(p, "EU total: %u\n", sseu->eu_total); 732 drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice); 733 drm_printf(p, "has slice power gating: %s\n", 734 yesno(sseu->has_slice_pg)); 735 drm_printf(p, "has subslice power gating: %s\n", 736 yesno(sseu->has_subslice_pg)); 737 drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg)); 738 } 739 740 void intel_sseu_print_topology(const struct sseu_dev_info *sseu, 741 struct drm_printer *p) 742 { 743 int s, ss; 744 745 if (sseu->max_slices == 0) { 746 drm_printf(p, "Unavailable\n"); 747 return; 748 } 749 750 for (s = 0; s < sseu->max_slices; s++) { 751 drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n", 752 s, intel_sseu_subslices_per_slice(sseu, s), 753 intel_sseu_get_subslices(sseu, s)); 754 755 for (ss = 0; ss < sseu->max_subslices; ss++) { 756 u16 enabled_eus = sseu_get_eus(sseu, s, ss); 757 758 drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n", 759 ss, hweight16(enabled_eus), enabled_eus); 760 } 761 } 762 } 763