xref: /openbmc/linux/drivers/gpu/drm/i915/gt/intel_sseu.c (revision 16c8d76a)
1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include <linux/string_helpers.h>
7 
8 #include "i915_drv.h"
9 #include "intel_engine_regs.h"
10 #include "intel_gt_regs.h"
11 #include "intel_sseu.h"
12 
13 #include "linux/string_helpers.h"
14 
15 void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
16 			 u8 max_subslices, u8 max_eus_per_subslice)
17 {
18 	sseu->max_slices = max_slices;
19 	sseu->max_subslices = max_subslices;
20 	sseu->max_eus_per_subslice = max_eus_per_subslice;
21 
22 	sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
23 	GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
24 	sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
25 	GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
26 }
27 
28 unsigned int
29 intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
30 {
31 	unsigned int i, total = 0;
32 
33 	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
34 		total += hweight8(sseu->subslice_mask[i]);
35 
36 	return total;
37 }
38 
39 static u32
40 sseu_get_subslices(const struct sseu_dev_info *sseu,
41 		   const u8 *subslice_mask, u8 slice)
42 {
43 	int i, offset = slice * sseu->ss_stride;
44 	u32 mask = 0;
45 
46 	GEM_BUG_ON(slice >= sseu->max_slices);
47 
48 	for (i = 0; i < sseu->ss_stride; i++)
49 		mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE;
50 
51 	return mask;
52 }
53 
54 u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
55 {
56 	return sseu_get_subslices(sseu, sseu->subslice_mask, slice);
57 }
58 
59 static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu)
60 {
61 	return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0);
62 }
63 
64 u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu)
65 {
66 	return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
67 }
68 
69 void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
70 			      u8 *subslice_mask, u32 ss_mask)
71 {
72 	int offset = slice * sseu->ss_stride;
73 
74 	memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride);
75 }
76 
77 unsigned int
78 intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
79 {
80 	return hweight32(intel_sseu_get_subslices(sseu, slice));
81 }
82 
83 static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
84 		       int subslice)
85 {
86 	int slice_stride = sseu->max_subslices * sseu->eu_stride;
87 
88 	return slice * slice_stride + subslice * sseu->eu_stride;
89 }
90 
91 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
92 			int subslice)
93 {
94 	int i, offset = sseu_eu_idx(sseu, slice, subslice);
95 	u16 eu_mask = 0;
96 
97 	for (i = 0; i < sseu->eu_stride; i++)
98 		eu_mask |=
99 			((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
100 
101 	return eu_mask;
102 }
103 
104 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
105 			 u16 eu_mask)
106 {
107 	int i, offset = sseu_eu_idx(sseu, slice, subslice);
108 
109 	for (i = 0; i < sseu->eu_stride; i++)
110 		sseu->eu_mask[offset + i] =
111 			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
112 }
113 
114 static u16 compute_eu_total(const struct sseu_dev_info *sseu)
115 {
116 	u16 i, total = 0;
117 
118 	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
119 		total += hweight8(sseu->eu_mask[i]);
120 
121 	return total;
122 }
123 
124 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
125 {
126 	u32 ss_mask;
127 
128 	ss_mask = ss_en >> (s * sseu->max_subslices);
129 	ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
130 
131 	return ss_mask;
132 }
133 
134 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
135 				    u32 g_ss_en, u32 c_ss_en, u16 eu_en)
136 {
137 	int s, ss;
138 
139 	/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
140 	GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
141 		   sizeof(g_ss_en) * BITS_PER_BYTE);
142 
143 	for (s = 0; s < sseu->max_slices; s++) {
144 		if ((s_en & BIT(s)) == 0)
145 			continue;
146 
147 		sseu->slice_mask |= BIT(s);
148 
149 		/*
150 		 * XeHP introduces the concept of compute vs geometry DSS. To
151 		 * reduce variation between GENs around subslice usage, store a
152 		 * mask for both the geometry and compute enabled masks since
153 		 * userspace will need to be able to query these masks
154 		 * independently.  Also compute a total enabled subslice count
155 		 * for the purposes of selecting subslices to use in a
156 		 * particular GEM context.
157 		 */
158 		intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
159 					 get_ss_stride_mask(sseu, s, c_ss_en));
160 		intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
161 					 get_ss_stride_mask(sseu, s, g_ss_en));
162 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
163 					 get_ss_stride_mask(sseu, s,
164 							    g_ss_en | c_ss_en));
165 
166 		for (ss = 0; ss < sseu->max_subslices; ss++)
167 			if (intel_sseu_has_subslice(sseu, s, ss))
168 				sseu_set_eus(sseu, s, ss, eu_en);
169 	}
170 	sseu->eu_per_subslice = hweight16(eu_en);
171 	sseu->eu_total = compute_eu_total(sseu);
172 }
173 
174 static void gen12_sseu_info_init(struct intel_gt *gt)
175 {
176 	struct sseu_dev_info *sseu = &gt->info.sseu;
177 	struct intel_uncore *uncore = gt->uncore;
178 	u32 g_dss_en, c_dss_en = 0;
179 	u16 eu_en = 0;
180 	u8 eu_en_fuse;
181 	u8 s_en;
182 	int eu;
183 
184 	/*
185 	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
186 	 * Instead of splitting these, provide userspace with an array
187 	 * of DSS to more closely represent the hardware resource.
188 	 *
189 	 * In addition, the concept of slice has been removed in Xe_HP.
190 	 * To be compatible with prior generations, assume a single slice
191 	 * across the entire device. Then calculate out the DSS for each
192 	 * workload type within that software slice.
193 	 */
194 	if (IS_DG2(gt->i915) || IS_XEHPSDV(gt->i915))
195 		intel_sseu_set_info(sseu, 1, 32, 16);
196 	else
197 		intel_sseu_set_info(sseu, 1, 6, 16);
198 
199 	/*
200 	 * As mentioned above, Xe_HP does not have the concept of a slice.
201 	 * Enable one for software backwards compatibility.
202 	 */
203 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
204 		s_en = 0x1;
205 	else
206 		s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
207 		       GEN11_GT_S_ENA_MASK;
208 
209 	g_dss_en = intel_uncore_read(uncore, GEN12_GT_GEOMETRY_DSS_ENABLE);
210 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
211 		c_dss_en = intel_uncore_read(uncore, GEN12_GT_COMPUTE_DSS_ENABLE);
212 
213 	/* one bit per pair of EUs */
214 	if (GRAPHICS_VER_FULL(gt->i915) >= IP_VER(12, 50))
215 		eu_en_fuse = intel_uncore_read(uncore, XEHP_EU_ENABLE) & XEHP_EU_ENA_MASK;
216 	else
217 		eu_en_fuse = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
218 			       GEN11_EU_DIS_MASK);
219 
220 	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
221 		if (eu_en_fuse & BIT(eu))
222 			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
223 
224 	gen11_compute_sseu_info(sseu, s_en, g_dss_en, c_dss_en, eu_en);
225 
226 	/* TGL only supports slice-level power gating */
227 	sseu->has_slice_pg = 1;
228 }
229 
230 static void gen11_sseu_info_init(struct intel_gt *gt)
231 {
232 	struct sseu_dev_info *sseu = &gt->info.sseu;
233 	struct intel_uncore *uncore = gt->uncore;
234 	u32 ss_en;
235 	u8 eu_en;
236 	u8 s_en;
237 
238 	if (IS_JSL_EHL(gt->i915))
239 		intel_sseu_set_info(sseu, 1, 4, 8);
240 	else
241 		intel_sseu_set_info(sseu, 1, 8, 8);
242 
243 	s_en = intel_uncore_read(uncore, GEN11_GT_SLICE_ENABLE) &
244 		GEN11_GT_S_ENA_MASK;
245 	ss_en = ~intel_uncore_read(uncore, GEN11_GT_SUBSLICE_DISABLE);
246 
247 	eu_en = ~(intel_uncore_read(uncore, GEN11_EU_DISABLE) &
248 		  GEN11_EU_DIS_MASK);
249 
250 	gen11_compute_sseu_info(sseu, s_en, ss_en, 0, eu_en);
251 
252 	/* ICL has no power gating restrictions. */
253 	sseu->has_slice_pg = 1;
254 	sseu->has_subslice_pg = 1;
255 	sseu->has_eu_pg = 1;
256 }
257 
258 static void cherryview_sseu_info_init(struct intel_gt *gt)
259 {
260 	struct sseu_dev_info *sseu = &gt->info.sseu;
261 	u32 fuse;
262 	u8 subslice_mask = 0;
263 
264 	fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
265 
266 	sseu->slice_mask = BIT(0);
267 	intel_sseu_set_info(sseu, 1, 2, 8);
268 
269 	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
270 		u8 disabled_mask =
271 			((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
272 			 CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
273 			(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
274 			  CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
275 
276 		subslice_mask |= BIT(0);
277 		sseu_set_eus(sseu, 0, 0, ~disabled_mask);
278 	}
279 
280 	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
281 		u8 disabled_mask =
282 			((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
283 			 CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
284 			(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
285 			  CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
286 
287 		subslice_mask |= BIT(1);
288 		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
289 	}
290 
291 	intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask);
292 
293 	sseu->eu_total = compute_eu_total(sseu);
294 
295 	/*
296 	 * CHV expected to always have a uniform distribution of EU
297 	 * across subslices.
298 	 */
299 	sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
300 		sseu->eu_total /
301 		intel_sseu_subslice_total(sseu) :
302 		0;
303 	/*
304 	 * CHV supports subslice power gating on devices with more than
305 	 * one subslice, and supports EU power gating on devices with
306 	 * more than one EU pair per subslice.
307 	 */
308 	sseu->has_slice_pg = 0;
309 	sseu->has_subslice_pg = intel_sseu_subslice_total(sseu) > 1;
310 	sseu->has_eu_pg = (sseu->eu_per_subslice > 2);
311 }
312 
313 static void gen9_sseu_info_init(struct intel_gt *gt)
314 {
315 	struct drm_i915_private *i915 = gt->i915;
316 	struct intel_device_info *info = mkwrite_device_info(i915);
317 	struct sseu_dev_info *sseu = &gt->info.sseu;
318 	struct intel_uncore *uncore = gt->uncore;
319 	u32 fuse2, eu_disable, subslice_mask;
320 	const u8 eu_mask = 0xff;
321 	int s, ss;
322 
323 	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
324 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
325 
326 	/* BXT has a single slice and at most 3 subslices. */
327 	intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
328 			    IS_GEN9_LP(i915) ? 3 : 4, 8);
329 
330 	/*
331 	 * The subslice disable field is global, i.e. it applies
332 	 * to each of the enabled slices.
333 	 */
334 	subslice_mask = (1 << sseu->max_subslices) - 1;
335 	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
336 			   GEN9_F2_SS_DIS_SHIFT);
337 
338 	/*
339 	 * Iterate through enabled slices and subslices to
340 	 * count the total enabled EU.
341 	 */
342 	for (s = 0; s < sseu->max_slices; s++) {
343 		if (!(sseu->slice_mask & BIT(s)))
344 			/* skip disabled slice */
345 			continue;
346 
347 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
348 					 subslice_mask);
349 
350 		eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
351 		for (ss = 0; ss < sseu->max_subslices; ss++) {
352 			int eu_per_ss;
353 			u8 eu_disabled_mask;
354 
355 			if (!intel_sseu_has_subslice(sseu, s, ss))
356 				/* skip disabled subslice */
357 				continue;
358 
359 			eu_disabled_mask = (eu_disable >> (ss * 8)) & eu_mask;
360 
361 			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
362 
363 			eu_per_ss = sseu->max_eus_per_subslice -
364 				hweight8(eu_disabled_mask);
365 
366 			/*
367 			 * Record which subslice(s) has(have) 7 EUs. we
368 			 * can tune the hash used to spread work among
369 			 * subslices if they are unbalanced.
370 			 */
371 			if (eu_per_ss == 7)
372 				sseu->subslice_7eu[s] |= BIT(ss);
373 		}
374 	}
375 
376 	sseu->eu_total = compute_eu_total(sseu);
377 
378 	/*
379 	 * SKL is expected to always have a uniform distribution
380 	 * of EU across subslices with the exception that any one
381 	 * EU in any one subslice may be fused off for die
382 	 * recovery. BXT is expected to be perfectly uniform in EU
383 	 * distribution.
384 	 */
385 	sseu->eu_per_subslice =
386 		intel_sseu_subslice_total(sseu) ?
387 		DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
388 		0;
389 
390 	/*
391 	 * SKL+ supports slice power gating on devices with more than
392 	 * one slice, and supports EU power gating on devices with
393 	 * more than one EU pair per subslice. BXT+ supports subslice
394 	 * power gating on devices with more than one subslice, and
395 	 * supports EU power gating on devices with more than one EU
396 	 * pair per subslice.
397 	 */
398 	sseu->has_slice_pg =
399 		!IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1;
400 	sseu->has_subslice_pg =
401 		IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1;
402 	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
403 
404 	if (IS_GEN9_LP(i915)) {
405 #define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask[0] & BIT(ss)))
406 		info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
407 
408 		sseu->min_eu_in_pool = 0;
409 		if (info->has_pooled_eu) {
410 			if (IS_SS_DISABLED(2) || IS_SS_DISABLED(0))
411 				sseu->min_eu_in_pool = 3;
412 			else if (IS_SS_DISABLED(1))
413 				sseu->min_eu_in_pool = 6;
414 			else
415 				sseu->min_eu_in_pool = 9;
416 		}
417 #undef IS_SS_DISABLED
418 	}
419 }
420 
421 static void bdw_sseu_info_init(struct intel_gt *gt)
422 {
423 	struct sseu_dev_info *sseu = &gt->info.sseu;
424 	struct intel_uncore *uncore = gt->uncore;
425 	int s, ss;
426 	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
427 	u32 eu_disable0, eu_disable1, eu_disable2;
428 
429 	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
430 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
431 	intel_sseu_set_info(sseu, 3, 3, 8);
432 
433 	/*
434 	 * The subslice disable field is global, i.e. it applies
435 	 * to each of the enabled slices.
436 	 */
437 	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
438 	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
439 			   GEN8_F2_SS_DIS_SHIFT);
440 	eu_disable0 = intel_uncore_read(uncore, GEN8_EU_DISABLE0);
441 	eu_disable1 = intel_uncore_read(uncore, GEN8_EU_DISABLE1);
442 	eu_disable2 = intel_uncore_read(uncore, GEN8_EU_DISABLE2);
443 	eu_disable[0] = eu_disable0 & GEN8_EU_DIS0_S0_MASK;
444 	eu_disable[1] = (eu_disable0 >> GEN8_EU_DIS0_S1_SHIFT) |
445 		((eu_disable1 & GEN8_EU_DIS1_S1_MASK) <<
446 		 (32 - GEN8_EU_DIS0_S1_SHIFT));
447 	eu_disable[2] = (eu_disable1 >> GEN8_EU_DIS1_S2_SHIFT) |
448 		((eu_disable2 & GEN8_EU_DIS2_S2_MASK) <<
449 		 (32 - GEN8_EU_DIS1_S2_SHIFT));
450 
451 	/*
452 	 * Iterate through enabled slices and subslices to
453 	 * count the total enabled EU.
454 	 */
455 	for (s = 0; s < sseu->max_slices; s++) {
456 		if (!(sseu->slice_mask & BIT(s)))
457 			/* skip disabled slice */
458 			continue;
459 
460 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
461 					 subslice_mask);
462 
463 		for (ss = 0; ss < sseu->max_subslices; ss++) {
464 			u8 eu_disabled_mask;
465 			u32 n_disabled;
466 
467 			if (!intel_sseu_has_subslice(sseu, s, ss))
468 				/* skip disabled subslice */
469 				continue;
470 
471 			eu_disabled_mask =
472 				eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
473 
474 			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
475 
476 			n_disabled = hweight8(eu_disabled_mask);
477 
478 			/*
479 			 * Record which subslices have 7 EUs.
480 			 */
481 			if (sseu->max_eus_per_subslice - n_disabled == 7)
482 				sseu->subslice_7eu[s] |= 1 << ss;
483 		}
484 	}
485 
486 	sseu->eu_total = compute_eu_total(sseu);
487 
488 	/*
489 	 * BDW is expected to always have a uniform distribution of EU across
490 	 * subslices with the exception that any one EU in any one subslice may
491 	 * be fused off for die recovery.
492 	 */
493 	sseu->eu_per_subslice =
494 		intel_sseu_subslice_total(sseu) ?
495 		DIV_ROUND_UP(sseu->eu_total, intel_sseu_subslice_total(sseu)) :
496 		0;
497 
498 	/*
499 	 * BDW supports slice power gating on devices with more than
500 	 * one slice.
501 	 */
502 	sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1;
503 	sseu->has_subslice_pg = 0;
504 	sseu->has_eu_pg = 0;
505 }
506 
507 static void hsw_sseu_info_init(struct intel_gt *gt)
508 {
509 	struct drm_i915_private *i915 = gt->i915;
510 	struct sseu_dev_info *sseu = &gt->info.sseu;
511 	u32 fuse1;
512 	u8 subslice_mask = 0;
513 	int s, ss;
514 
515 	/*
516 	 * There isn't a register to tell us how many slices/subslices. We
517 	 * work off the PCI-ids here.
518 	 */
519 	switch (INTEL_INFO(i915)->gt) {
520 	default:
521 		MISSING_CASE(INTEL_INFO(i915)->gt);
522 		fallthrough;
523 	case 1:
524 		sseu->slice_mask = BIT(0);
525 		subslice_mask = BIT(0);
526 		break;
527 	case 2:
528 		sseu->slice_mask = BIT(0);
529 		subslice_mask = BIT(0) | BIT(1);
530 		break;
531 	case 3:
532 		sseu->slice_mask = BIT(0) | BIT(1);
533 		subslice_mask = BIT(0) | BIT(1);
534 		break;
535 	}
536 
537 	fuse1 = intel_uncore_read(gt->uncore, HSW_PAVP_FUSE1);
538 	switch (REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1)) {
539 	default:
540 		MISSING_CASE(REG_FIELD_GET(HSW_F1_EU_DIS_MASK, fuse1));
541 		fallthrough;
542 	case HSW_F1_EU_DIS_10EUS:
543 		sseu->eu_per_subslice = 10;
544 		break;
545 	case HSW_F1_EU_DIS_8EUS:
546 		sseu->eu_per_subslice = 8;
547 		break;
548 	case HSW_F1_EU_DIS_6EUS:
549 		sseu->eu_per_subslice = 6;
550 		break;
551 	}
552 
553 	intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
554 			    hweight8(subslice_mask),
555 			    sseu->eu_per_subslice);
556 
557 	for (s = 0; s < sseu->max_slices; s++) {
558 		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
559 					 subslice_mask);
560 
561 		for (ss = 0; ss < sseu->max_subslices; ss++) {
562 			sseu_set_eus(sseu, s, ss,
563 				     (1UL << sseu->eu_per_subslice) - 1);
564 		}
565 	}
566 
567 	sseu->eu_total = compute_eu_total(sseu);
568 
569 	/* No powergating for you. */
570 	sseu->has_slice_pg = 0;
571 	sseu->has_subslice_pg = 0;
572 	sseu->has_eu_pg = 0;
573 }
574 
575 void intel_sseu_info_init(struct intel_gt *gt)
576 {
577 	struct drm_i915_private *i915 = gt->i915;
578 
579 	if (IS_HASWELL(i915))
580 		hsw_sseu_info_init(gt);
581 	else if (IS_CHERRYVIEW(i915))
582 		cherryview_sseu_info_init(gt);
583 	else if (IS_BROADWELL(i915))
584 		bdw_sseu_info_init(gt);
585 	else if (GRAPHICS_VER(i915) == 9)
586 		gen9_sseu_info_init(gt);
587 	else if (GRAPHICS_VER(i915) == 11)
588 		gen11_sseu_info_init(gt);
589 	else if (GRAPHICS_VER(i915) >= 12)
590 		gen12_sseu_info_init(gt);
591 }
592 
593 u32 intel_sseu_make_rpcs(struct intel_gt *gt,
594 			 const struct intel_sseu *req_sseu)
595 {
596 	struct drm_i915_private *i915 = gt->i915;
597 	const struct sseu_dev_info *sseu = &gt->info.sseu;
598 	bool subslice_pg = sseu->has_subslice_pg;
599 	u8 slices, subslices;
600 	u32 rpcs = 0;
601 
602 	/*
603 	 * No explicit RPCS request is needed to ensure full
604 	 * slice/subslice/EU enablement prior to Gen9.
605 	 */
606 	if (GRAPHICS_VER(i915) < 9)
607 		return 0;
608 
609 	/*
610 	 * If i915/perf is active, we want a stable powergating configuration
611 	 * on the system. Use the configuration pinned by i915/perf.
612 	 */
613 	if (i915->perf.exclusive_stream)
614 		req_sseu = &i915->perf.sseu;
615 
616 	slices = hweight8(req_sseu->slice_mask);
617 	subslices = hweight8(req_sseu->subslice_mask);
618 
619 	/*
620 	 * Since the SScount bitfield in GEN8_R_PWR_CLK_STATE is only three bits
621 	 * wide and Icelake has up to eight subslices, specfial programming is
622 	 * needed in order to correctly enable all subslices.
623 	 *
624 	 * According to documentation software must consider the configuration
625 	 * as 2x4x8 and hardware will translate this to 1x8x8.
626 	 *
627 	 * Furthemore, even though SScount is three bits, maximum documented
628 	 * value for it is four. From this some rules/restrictions follow:
629 	 *
630 	 * 1.
631 	 * If enabled subslice count is greater than four, two whole slices must
632 	 * be enabled instead.
633 	 *
634 	 * 2.
635 	 * When more than one slice is enabled, hardware ignores the subslice
636 	 * count altogether.
637 	 *
638 	 * From these restrictions it follows that it is not possible to enable
639 	 * a count of subslices between the SScount maximum of four restriction,
640 	 * and the maximum available number on a particular SKU. Either all
641 	 * subslices are enabled, or a count between one and four on the first
642 	 * slice.
643 	 */
644 	if (GRAPHICS_VER(i915) == 11 &&
645 	    slices == 1 &&
646 	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
647 		GEM_BUG_ON(subslices & 1);
648 
649 		subslice_pg = false;
650 		slices *= 2;
651 	}
652 
653 	/*
654 	 * Starting in Gen9, render power gating can leave
655 	 * slice/subslice/EU in a partially enabled state. We
656 	 * must make an explicit request through RPCS for full
657 	 * enablement.
658 	 */
659 	if (sseu->has_slice_pg) {
660 		u32 mask, val = slices;
661 
662 		if (GRAPHICS_VER(i915) >= 11) {
663 			mask = GEN11_RPCS_S_CNT_MASK;
664 			val <<= GEN11_RPCS_S_CNT_SHIFT;
665 		} else {
666 			mask = GEN8_RPCS_S_CNT_MASK;
667 			val <<= GEN8_RPCS_S_CNT_SHIFT;
668 		}
669 
670 		GEM_BUG_ON(val & ~mask);
671 		val &= mask;
672 
673 		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_S_CNT_ENABLE | val;
674 	}
675 
676 	if (subslice_pg) {
677 		u32 val = subslices;
678 
679 		val <<= GEN8_RPCS_SS_CNT_SHIFT;
680 
681 		GEM_BUG_ON(val & ~GEN8_RPCS_SS_CNT_MASK);
682 		val &= GEN8_RPCS_SS_CNT_MASK;
683 
684 		rpcs |= GEN8_RPCS_ENABLE | GEN8_RPCS_SS_CNT_ENABLE | val;
685 	}
686 
687 	if (sseu->has_eu_pg) {
688 		u32 val;
689 
690 		val = req_sseu->min_eus_per_subslice << GEN8_RPCS_EU_MIN_SHIFT;
691 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MIN_MASK);
692 		val &= GEN8_RPCS_EU_MIN_MASK;
693 
694 		rpcs |= val;
695 
696 		val = req_sseu->max_eus_per_subslice << GEN8_RPCS_EU_MAX_SHIFT;
697 		GEM_BUG_ON(val & ~GEN8_RPCS_EU_MAX_MASK);
698 		val &= GEN8_RPCS_EU_MAX_MASK;
699 
700 		rpcs |= val;
701 
702 		rpcs |= GEN8_RPCS_ENABLE;
703 	}
704 
705 	return rpcs;
706 }
707 
708 void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
709 {
710 	int s;
711 
712 	drm_printf(p, "slice total: %u, mask=%04x\n",
713 		   hweight8(sseu->slice_mask), sseu->slice_mask);
714 	drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
715 	for (s = 0; s < sseu->max_slices; s++) {
716 		drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
717 			   s, intel_sseu_subslices_per_slice(sseu, s),
718 			   intel_sseu_get_subslices(sseu, s));
719 	}
720 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
721 	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
722 	drm_printf(p, "has slice power gating: %s\n",
723 		   str_yes_no(sseu->has_slice_pg));
724 	drm_printf(p, "has subslice power gating: %s\n",
725 		   str_yes_no(sseu->has_subslice_pg));
726 	drm_printf(p, "has EU power gating: %s\n",
727 		   str_yes_no(sseu->has_eu_pg));
728 }
729 
730 static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu,
731 				    struct drm_printer *p)
732 {
733 	int s, ss;
734 
735 	for (s = 0; s < sseu->max_slices; s++) {
736 		drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
737 			   s, intel_sseu_subslices_per_slice(sseu, s),
738 			   intel_sseu_get_subslices(sseu, s));
739 
740 		for (ss = 0; ss < sseu->max_subslices; ss++) {
741 			u16 enabled_eus = sseu_get_eus(sseu, s, ss);
742 
743 			drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
744 				   ss, hweight16(enabled_eus), enabled_eus);
745 		}
746 	}
747 }
748 
749 static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu,
750 				     struct drm_printer *p)
751 {
752 	u32 g_dss_mask = sseu_get_geometry_subslices(sseu);
753 	u32 c_dss_mask = intel_sseu_get_compute_subslices(sseu);
754 	int dss;
755 
756 	for (dss = 0; dss < sseu->max_subslices; dss++) {
757 		u16 enabled_eus = sseu_get_eus(sseu, 0, dss);
758 
759 		drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss,
760 			   str_yes_no(g_dss_mask & BIT(dss)),
761 			   str_yes_no(c_dss_mask & BIT(dss)),
762 			   hweight16(enabled_eus), enabled_eus);
763 	}
764 }
765 
766 void intel_sseu_print_topology(struct drm_i915_private *i915,
767 			       const struct sseu_dev_info *sseu,
768 			       struct drm_printer *p)
769 {
770 	if (sseu->max_slices == 0) {
771 		drm_printf(p, "Unavailable\n");
772 	} else if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 50)) {
773 		sseu_print_xehp_topology(sseu, p);
774 	} else {
775 		sseu_print_hsw_topology(sseu, p);
776 	}
777 }
778 
779 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
780 {
781 	u16 slice_mask = 0;
782 	int i;
783 
784 	WARN_ON(sizeof(dss_mask) * 8 / dss_per_slice > 8 * sizeof(slice_mask));
785 
786 	for (i = 0; dss_mask; i++) {
787 		if (dss_mask & GENMASK(dss_per_slice - 1, 0))
788 			slice_mask |= BIT(i);
789 
790 		dss_mask >>= dss_per_slice;
791 	}
792 
793 	return slice_mask;
794 }
795 
796