1 // SPDX-License-Identifier: MIT
2 /*
3  * Copyright © 2019 Intel Corporation
4  */
5 
6 #include <drm/drm_atomic_state_helper.h>
7 
8 #include "i915_reg.h"
9 #include "intel_atomic.h"
10 #include "intel_bw.h"
11 #include "intel_cdclk.h"
12 #include "intel_display_types.h"
13 #include "intel_pcode.h"
14 #include "intel_pm.h"
15 
16 /* Parameters for Qclk Geyserville (QGV) */
17 struct intel_qgv_point {
18 	u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd;
19 };
20 
21 struct intel_psf_gv_point {
22 	u8 clk; /* clock in multiples of 16.6666 MHz */
23 };
24 
25 struct intel_qgv_info {
26 	struct intel_qgv_point points[I915_NUM_QGV_POINTS];
27 	struct intel_psf_gv_point psf_points[I915_NUM_PSF_GV_POINTS];
28 	u8 num_points;
29 	u8 num_psf_points;
30 	u8 t_bl;
31 	u8 max_numchannels;
32 	u8 channel_width;
33 	u8 deinterleave;
34 };
35 
36 static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv,
37 					  struct intel_qgv_point *sp,
38 					  int point)
39 {
40 	u32 dclk_ratio, dclk_reference;
41 	u32 val;
42 
43 	val = intel_uncore_read(&dev_priv->uncore, SA_PERF_STATUS_0_0_0_MCHBAR_PC);
44 	dclk_ratio = REG_FIELD_GET(DG1_QCLK_RATIO_MASK, val);
45 	if (val & DG1_QCLK_REFERENCE)
46 		dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */
47 	else
48 		dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */
49 	sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) + 500, 1000);
50 
51 	val = intel_uncore_read(&dev_priv->uncore, SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU);
52 	if (val & DG1_GEAR_TYPE)
53 		sp->dclk *= 2;
54 
55 	if (sp->dclk == 0)
56 		return -EINVAL;
57 
58 	val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR);
59 	sp->t_rp = REG_FIELD_GET(DG1_DRAM_T_RP_MASK, val);
60 	sp->t_rdpre = REG_FIELD_GET(DG1_DRAM_T_RDPRE_MASK, val);
61 
62 	val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH);
63 	sp->t_rcd = REG_FIELD_GET(DG1_DRAM_T_RCD_MASK, val);
64 	sp->t_ras = REG_FIELD_GET(DG1_DRAM_T_RAS_MASK, val);
65 
66 	sp->t_rc = sp->t_rp + sp->t_ras;
67 
68 	return 0;
69 }
70 
71 static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv,
72 					 struct intel_qgv_point *sp,
73 					 int point)
74 {
75 	u32 val = 0, val2 = 0;
76 	u16 dclk;
77 	int ret;
78 
79 	ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
80 			     ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point),
81 			     &val, &val2);
82 	if (ret)
83 		return ret;
84 
85 	dclk = val & 0xffff;
86 	sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000);
87 	sp->t_rp = (val & 0xff0000) >> 16;
88 	sp->t_rcd = (val & 0xff000000) >> 24;
89 
90 	sp->t_rdpre = val2 & 0xff;
91 	sp->t_ras = (val2 & 0xff00) >> 8;
92 
93 	sp->t_rc = sp->t_rp + sp->t_ras;
94 
95 	return 0;
96 }
97 
98 static int adls_pcode_read_psf_gv_point_info(struct drm_i915_private *dev_priv,
99 					    struct intel_psf_gv_point *points)
100 {
101 	u32 val = 0;
102 	int ret;
103 	int i;
104 
105 	ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO |
106 			     ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, &val, NULL);
107 	if (ret)
108 		return ret;
109 
110 	for (i = 0; i < I915_NUM_PSF_GV_POINTS; i++) {
111 		points[i].clk = val & 0xff;
112 		val >>= 8;
113 	}
114 
115 	return 0;
116 }
117 
118 int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv,
119 				  u32 points_mask)
120 {
121 	int ret;
122 
123 	/* bspec says to keep retrying for at least 1 ms */
124 	ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG,
125 				points_mask,
126 				ICL_PCODE_POINTS_RESTRICTED_MASK,
127 				ICL_PCODE_POINTS_RESTRICTED,
128 				1);
129 
130 	if (ret < 0) {
131 		drm_err(&dev_priv->drm, "Failed to disable qgv points (%d) points: 0x%x\n", ret, points_mask);
132 		return ret;
133 	}
134 
135 	return 0;
136 }
137 
138 static int icl_get_qgv_points(struct drm_i915_private *dev_priv,
139 			      struct intel_qgv_info *qi,
140 			      bool is_y_tile)
141 {
142 	const struct dram_info *dram_info = &dev_priv->dram_info;
143 	int i, ret;
144 
145 	qi->num_points = dram_info->num_qgv_points;
146 	qi->num_psf_points = dram_info->num_psf_gv_points;
147 
148 	if (DISPLAY_VER(dev_priv) >= 12)
149 		switch (dram_info->type) {
150 		case INTEL_DRAM_DDR4:
151 			qi->t_bl = is_y_tile ? 8 : 4;
152 			qi->max_numchannels = 2;
153 			qi->channel_width = 64;
154 			qi->deinterleave = is_y_tile ? 1 : 2;
155 			break;
156 		case INTEL_DRAM_DDR5:
157 			qi->t_bl = is_y_tile ? 16 : 8;
158 			qi->max_numchannels = 4;
159 			qi->channel_width = 32;
160 			qi->deinterleave = is_y_tile ? 1 : 2;
161 			break;
162 		case INTEL_DRAM_LPDDR4:
163 			if (IS_ROCKETLAKE(dev_priv)) {
164 				qi->t_bl = 8;
165 				qi->max_numchannels = 4;
166 				qi->channel_width = 32;
167 				qi->deinterleave = 2;
168 				break;
169 			}
170 			fallthrough;
171 		case INTEL_DRAM_LPDDR5:
172 			qi->t_bl = 16;
173 			qi->max_numchannels = 8;
174 			qi->channel_width = 16;
175 			qi->deinterleave = is_y_tile ? 2 : 4;
176 			break;
177 		default:
178 			qi->t_bl = 16;
179 			qi->max_numchannels = 1;
180 			break;
181 		}
182 	else if (DISPLAY_VER(dev_priv) == 11) {
183 		qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8;
184 		qi->max_numchannels = 1;
185 	}
186 
187 	if (drm_WARN_ON(&dev_priv->drm,
188 			qi->num_points > ARRAY_SIZE(qi->points)))
189 		qi->num_points = ARRAY_SIZE(qi->points);
190 
191 	for (i = 0; i < qi->num_points; i++) {
192 		struct intel_qgv_point *sp = &qi->points[i];
193 
194 		if (IS_DG1(dev_priv))
195 			ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i);
196 		else
197 			ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i);
198 
199 		if (ret)
200 			return ret;
201 
202 		drm_dbg_kms(&dev_priv->drm,
203 			    "QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n",
204 			    i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras,
205 			    sp->t_rcd, sp->t_rc);
206 	}
207 
208 	if (qi->num_psf_points > 0) {
209 		ret = adls_pcode_read_psf_gv_point_info(dev_priv, qi->psf_points);
210 		if (ret) {
211 			drm_err(&dev_priv->drm, "Failed to read PSF point data; PSF points will not be considered in bandwidth calculations.\n");
212 			qi->num_psf_points = 0;
213 		}
214 
215 		for (i = 0; i < qi->num_psf_points; i++)
216 			drm_dbg_kms(&dev_priv->drm,
217 				    "PSF GV %d: CLK=%d \n",
218 				    i, qi->psf_points[i].clk);
219 	}
220 
221 	return 0;
222 }
223 
224 static int adl_calc_psf_bw(int clk)
225 {
226 	/*
227 	 * clk is multiples of 16.666MHz (100/6)
228 	 * According to BSpec PSF GV bandwidth is
229 	 * calculated as BW = 64 * clk * 16.666Mhz
230 	 */
231 	return DIV_ROUND_CLOSEST(64 * clk * 100, 6);
232 }
233 
234 static int icl_sagv_max_dclk(const struct intel_qgv_info *qi)
235 {
236 	u16 dclk = 0;
237 	int i;
238 
239 	for (i = 0; i < qi->num_points; i++)
240 		dclk = max(dclk, qi->points[i].dclk);
241 
242 	return dclk;
243 }
244 
245 struct intel_sa_info {
246 	u16 displayrtids;
247 	u8 deburst, deprogbwlimit, derating;
248 };
249 
250 static const struct intel_sa_info icl_sa_info = {
251 	.deburst = 8,
252 	.deprogbwlimit = 25, /* GB/s */
253 	.displayrtids = 128,
254 	.derating = 10,
255 };
256 
257 static const struct intel_sa_info tgl_sa_info = {
258 	.deburst = 16,
259 	.deprogbwlimit = 34, /* GB/s */
260 	.displayrtids = 256,
261 	.derating = 10,
262 };
263 
264 static const struct intel_sa_info rkl_sa_info = {
265 	.deburst = 8,
266 	.deprogbwlimit = 20, /* GB/s */
267 	.displayrtids = 128,
268 	.derating = 10,
269 };
270 
271 static const struct intel_sa_info adls_sa_info = {
272 	.deburst = 16,
273 	.deprogbwlimit = 38, /* GB/s */
274 	.displayrtids = 256,
275 	.derating = 10,
276 };
277 
278 static const struct intel_sa_info adlp_sa_info = {
279 	.deburst = 16,
280 	.deprogbwlimit = 38, /* GB/s */
281 	.displayrtids = 256,
282 	.derating = 20,
283 };
284 
285 static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
286 {
287 	struct intel_qgv_info qi = {};
288 	bool is_y_tile = true; /* assume y tile may be used */
289 	int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels);
290 	int ipqdepth, ipqdepthpch = 16;
291 	int dclk_max;
292 	int maxdebw;
293 	int num_groups = ARRAY_SIZE(dev_priv->max_bw);
294 	int i, ret;
295 
296 	ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
297 	if (ret) {
298 		drm_dbg_kms(&dev_priv->drm,
299 			    "Failed to get memory subsystem information, ignoring bandwidth limits");
300 		return ret;
301 	}
302 
303 	dclk_max = icl_sagv_max_dclk(&qi);
304 	maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10);
305 	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
306 	qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
307 
308 	for (i = 0; i < num_groups; i++) {
309 		struct intel_bw_info *bi = &dev_priv->max_bw[i];
310 		int clpchgroup;
311 		int j;
312 
313 		clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;
314 		bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
315 
316 		bi->num_qgv_points = qi.num_points;
317 		bi->num_psf_gv_points = qi.num_psf_points;
318 
319 		for (j = 0; j < qi.num_points; j++) {
320 			const struct intel_qgv_point *sp = &qi.points[j];
321 			int ct, bw;
322 
323 			/*
324 			 * Max row cycle time
325 			 *
326 			 * FIXME what is the logic behind the
327 			 * assumed burst length?
328 			 */
329 			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
330 				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
331 			bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct);
332 
333 			bi->deratedbw[j] = min(maxdebw,
334 					       bw * (100 - sa->derating) / 100);
335 
336 			drm_dbg_kms(&dev_priv->drm,
337 				    "BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
338 				    i, j, bi->num_planes, bi->deratedbw[j]);
339 		}
340 	}
341 	/*
342 	 * In case if SAGV is disabled in BIOS, we always get 1
343 	 * SAGV point, but we can't send PCode commands to restrict it
344 	 * as it will fail and pointless anyway.
345 	 */
346 	if (qi.num_points == 1)
347 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
348 	else
349 		dev_priv->sagv_status = I915_SAGV_ENABLED;
350 
351 	return 0;
352 }
353 
354 static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa)
355 {
356 	struct intel_qgv_info qi = {};
357 	const struct dram_info *dram_info = &dev_priv->dram_info;
358 	bool is_y_tile = true; /* assume y tile may be used */
359 	int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels);
360 	int ipqdepth, ipqdepthpch = 16;
361 	int dclk_max;
362 	int maxdebw, peakbw;
363 	int clperchgroup;
364 	int num_groups = ARRAY_SIZE(dev_priv->max_bw);
365 	int i, ret;
366 
367 	ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile);
368 	if (ret) {
369 		drm_dbg_kms(&dev_priv->drm,
370 			    "Failed to get memory subsystem information, ignoring bandwidth limits");
371 		return ret;
372 	}
373 
374 	if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5)
375 		num_channels *= 2;
376 
377 	qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
378 
379 	if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12)
380 		qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1);
381 
382 	if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels)
383 		drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels.");
384 	if (qi.max_numchannels != 0)
385 		num_channels = min_t(u8, num_channels, qi.max_numchannels);
386 
387 	dclk_max = icl_sagv_max_dclk(&qi);
388 
389 	peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) * dclk_max;
390 	maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% */
391 
392 	ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels);
393 	/*
394 	 * clperchgroup = 4kpagespermempage * clperchperblock,
395 	 * clperchperblock = 8 / num_channels * interleave
396 	 */
397 	clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * qi.deinterleave;
398 
399 	for (i = 0; i < num_groups; i++) {
400 		struct intel_bw_info *bi = &dev_priv->max_bw[i];
401 		struct intel_bw_info *bi_next;
402 		int clpchgroup;
403 		int j;
404 
405 		if (i < num_groups - 1)
406 			bi_next = &dev_priv->max_bw[i + 1];
407 
408 		clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i;
409 
410 		if (i < num_groups - 1 && clpchgroup < clperchgroup)
411 			bi_next->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1;
412 		else
413 			bi_next->num_planes = 0;
414 
415 		bi->num_qgv_points = qi.num_points;
416 		bi->num_psf_gv_points = qi.num_psf_points;
417 
418 		for (j = 0; j < qi.num_points; j++) {
419 			const struct intel_qgv_point *sp = &qi.points[j];
420 			int ct, bw;
421 
422 			/*
423 			 * Max row cycle time
424 			 *
425 			 * FIXME what is the logic behind the
426 			 * assumed burst length?
427 			 */
428 			ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd +
429 				   (clpchgroup - 1) * qi.t_bl + sp->t_rdpre);
430 			bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct);
431 
432 			bi->deratedbw[j] = min(maxdebw,
433 					       bw * (100 - sa->derating) / 100);
434 
435 			drm_dbg_kms(&dev_priv->drm,
436 				    "BW%d / QGV %d: num_planes=%d deratedbw=%u\n",
437 				    i, j, bi->num_planes, bi->deratedbw[j]);
438 		}
439 
440 		for (j = 0; j < qi.num_psf_points; j++) {
441 			const struct intel_psf_gv_point *sp = &qi.psf_points[j];
442 
443 			bi->psf_bw[j] = adl_calc_psf_bw(sp->clk);
444 
445 			drm_dbg_kms(&dev_priv->drm,
446 				    "BW%d / PSF GV %d: num_planes=%d bw=%u\n",
447 				    i, j, bi->num_planes, bi->psf_bw[j]);
448 		}
449 	}
450 
451 	/*
452 	 * In case if SAGV is disabled in BIOS, we always get 1
453 	 * SAGV point, but we can't send PCode commands to restrict it
454 	 * as it will fail and pointless anyway.
455 	 */
456 	if (qi.num_points == 1)
457 		dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED;
458 	else
459 		dev_priv->sagv_status = I915_SAGV_ENABLED;
460 
461 	return 0;
462 }
463 
464 static void dg2_get_bw_info(struct drm_i915_private *i915)
465 {
466 	struct intel_bw_info *bi = &i915->max_bw[0];
467 
468 	/*
469 	 * DG2 doesn't have SAGV or QGV points, just a constant max bandwidth
470 	 * that doesn't depend on the number of planes enabled.  Create a
471 	 * single dummy QGV point to reflect that.  DG2-G10 platforms have a
472 	 * constant 50 GB/s bandwidth, whereas DG2-G11 platforms have 38 GB/s.
473 	 */
474 	bi->num_planes = 1;
475 	bi->num_qgv_points = 1;
476 	if (IS_DG2_G11(i915))
477 		bi->deratedbw[0] = 38000;
478 	else
479 		bi->deratedbw[0] = 50000;
480 
481 	i915->sagv_status = I915_SAGV_NOT_CONTROLLED;
482 }
483 
484 static unsigned int icl_max_bw(struct drm_i915_private *dev_priv,
485 			       int num_planes, int qgv_point)
486 {
487 	int i;
488 
489 	/*
490 	 * Let's return max bw for 0 planes
491 	 */
492 	num_planes = max(1, num_planes);
493 
494 	for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) {
495 		const struct intel_bw_info *bi =
496 			&dev_priv->max_bw[i];
497 
498 		/*
499 		 * Pcode will not expose all QGV points when
500 		 * SAGV is forced to off/min/med/max.
501 		 */
502 		if (qgv_point >= bi->num_qgv_points)
503 			return UINT_MAX;
504 
505 		if (num_planes >= bi->num_planes)
506 			return bi->deratedbw[qgv_point];
507 	}
508 
509 	return 0;
510 }
511 
512 static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv,
513 			       int num_planes, int qgv_point)
514 {
515 	int i;
516 
517 	/*
518 	 * Let's return max bw for 0 planes
519 	 */
520 	num_planes = max(1, num_planes);
521 
522 	for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) {
523 		const struct intel_bw_info *bi =
524 			&dev_priv->max_bw[i];
525 
526 		/*
527 		 * Pcode will not expose all QGV points when
528 		 * SAGV is forced to off/min/med/max.
529 		 */
530 		if (qgv_point >= bi->num_qgv_points)
531 			return UINT_MAX;
532 
533 		if (num_planes <= bi->num_planes)
534 			return bi->deratedbw[qgv_point];
535 	}
536 
537 	return dev_priv->max_bw[0].deratedbw[qgv_point];
538 }
539 
540 static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv,
541 			       int psf_gv_point)
542 {
543 	const struct intel_bw_info *bi =
544 			&dev_priv->max_bw[0];
545 
546 	return bi->psf_bw[psf_gv_point];
547 }
548 
549 void intel_bw_init_hw(struct drm_i915_private *dev_priv)
550 {
551 	if (!HAS_DISPLAY(dev_priv))
552 		return;
553 
554 	if (IS_DG2(dev_priv))
555 		dg2_get_bw_info(dev_priv);
556 	else if (IS_ALDERLAKE_P(dev_priv))
557 		tgl_get_bw_info(dev_priv, &adlp_sa_info);
558 	else if (IS_ALDERLAKE_S(dev_priv))
559 		tgl_get_bw_info(dev_priv, &adls_sa_info);
560 	else if (IS_ROCKETLAKE(dev_priv))
561 		tgl_get_bw_info(dev_priv, &rkl_sa_info);
562 	else if (DISPLAY_VER(dev_priv) == 12)
563 		tgl_get_bw_info(dev_priv, &tgl_sa_info);
564 	else if (DISPLAY_VER(dev_priv) == 11)
565 		icl_get_bw_info(dev_priv, &icl_sa_info);
566 }
567 
568 static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state)
569 {
570 	/*
571 	 * We assume cursors are small enough
572 	 * to not not cause bandwidth problems.
573 	 */
574 	return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR));
575 }
576 
577 static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state)
578 {
579 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
580 	unsigned int data_rate = 0;
581 	enum plane_id plane_id;
582 
583 	for_each_plane_id_on_crtc(crtc, plane_id) {
584 		/*
585 		 * We assume cursors are small enough
586 		 * to not not cause bandwidth problems.
587 		 */
588 		if (plane_id == PLANE_CURSOR)
589 			continue;
590 
591 		data_rate += crtc_state->data_rate[plane_id];
592 	}
593 
594 	return data_rate;
595 }
596 
597 void intel_bw_crtc_update(struct intel_bw_state *bw_state,
598 			  const struct intel_crtc_state *crtc_state)
599 {
600 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
601 	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
602 
603 	bw_state->data_rate[crtc->pipe] =
604 		intel_bw_crtc_data_rate(crtc_state);
605 	bw_state->num_active_planes[crtc->pipe] =
606 		intel_bw_crtc_num_active_planes(crtc_state);
607 
608 	drm_dbg_kms(&i915->drm, "pipe %c data rate %u num active planes %u\n",
609 		    pipe_name(crtc->pipe),
610 		    bw_state->data_rate[crtc->pipe],
611 		    bw_state->num_active_planes[crtc->pipe]);
612 }
613 
614 static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv,
615 					       const struct intel_bw_state *bw_state)
616 {
617 	unsigned int num_active_planes = 0;
618 	enum pipe pipe;
619 
620 	for_each_pipe(dev_priv, pipe)
621 		num_active_planes += bw_state->num_active_planes[pipe];
622 
623 	return num_active_planes;
624 }
625 
626 static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv,
627 				       const struct intel_bw_state *bw_state)
628 {
629 	unsigned int data_rate = 0;
630 	enum pipe pipe;
631 
632 	for_each_pipe(dev_priv, pipe)
633 		data_rate += bw_state->data_rate[pipe];
634 
635 	if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active(dev_priv))
636 		data_rate = data_rate * 105 / 100;
637 
638 	return data_rate;
639 }
640 
641 struct intel_bw_state *
642 intel_atomic_get_old_bw_state(struct intel_atomic_state *state)
643 {
644 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
645 	struct intel_global_state *bw_state;
646 
647 	bw_state = intel_atomic_get_old_global_obj_state(state, &dev_priv->bw_obj);
648 
649 	return to_intel_bw_state(bw_state);
650 }
651 
652 struct intel_bw_state *
653 intel_atomic_get_new_bw_state(struct intel_atomic_state *state)
654 {
655 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
656 	struct intel_global_state *bw_state;
657 
658 	bw_state = intel_atomic_get_new_global_obj_state(state, &dev_priv->bw_obj);
659 
660 	return to_intel_bw_state(bw_state);
661 }
662 
663 struct intel_bw_state *
664 intel_atomic_get_bw_state(struct intel_atomic_state *state)
665 {
666 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
667 	struct intel_global_state *bw_state;
668 
669 	bw_state = intel_atomic_get_global_obj_state(state, &dev_priv->bw_obj);
670 	if (IS_ERR(bw_state))
671 		return ERR_CAST(bw_state);
672 
673 	return to_intel_bw_state(bw_state);
674 }
675 
676 int skl_bw_calc_min_cdclk(struct intel_atomic_state *state)
677 {
678 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
679 	struct intel_bw_state *new_bw_state = NULL;
680 	struct intel_bw_state *old_bw_state = NULL;
681 	const struct intel_crtc_state *crtc_state;
682 	struct intel_crtc *crtc;
683 	int max_bw = 0;
684 	enum pipe pipe;
685 	int i;
686 
687 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
688 		enum plane_id plane_id;
689 		struct intel_dbuf_bw *crtc_bw;
690 
691 		new_bw_state = intel_atomic_get_bw_state(state);
692 		if (IS_ERR(new_bw_state))
693 			return PTR_ERR(new_bw_state);
694 
695 		old_bw_state = intel_atomic_get_old_bw_state(state);
696 
697 		crtc_bw = &new_bw_state->dbuf_bw[crtc->pipe];
698 
699 		memset(&crtc_bw->used_bw, 0, sizeof(crtc_bw->used_bw));
700 
701 		if (!crtc_state->hw.active)
702 			continue;
703 
704 		for_each_plane_id_on_crtc(crtc, plane_id) {
705 			const struct skl_ddb_entry *plane_alloc =
706 				&crtc_state->wm.skl.plane_ddb_y[plane_id];
707 			const struct skl_ddb_entry *uv_plane_alloc =
708 				&crtc_state->wm.skl.plane_ddb_uv[plane_id];
709 			unsigned int data_rate = crtc_state->data_rate[plane_id];
710 			unsigned int dbuf_mask = 0;
711 			enum dbuf_slice slice;
712 
713 			dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, plane_alloc);
714 			dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, uv_plane_alloc);
715 
716 			/*
717 			 * FIXME: To calculate that more properly we probably
718 			 * need to to split per plane data_rate into data_rate_y
719 			 * and data_rate_uv for multiplanar formats in order not
720 			 * to get accounted those twice if they happen to reside
721 			 * on different slices.
722 			 * However for pre-icl this would work anyway because
723 			 * we have only single slice and for icl+ uv plane has
724 			 * non-zero data rate.
725 			 * So in worst case those calculation are a bit
726 			 * pessimistic, which shouldn't pose any significant
727 			 * problem anyway.
728 			 */
729 			for_each_dbuf_slice_in_mask(dev_priv, slice, dbuf_mask)
730 				crtc_bw->used_bw[slice] += data_rate;
731 		}
732 	}
733 
734 	if (!old_bw_state)
735 		return 0;
736 
737 	for_each_pipe(dev_priv, pipe) {
738 		struct intel_dbuf_bw *crtc_bw;
739 		enum dbuf_slice slice;
740 
741 		crtc_bw = &new_bw_state->dbuf_bw[pipe];
742 
743 		for_each_dbuf_slice(dev_priv, slice) {
744 			/*
745 			 * Current experimental observations show that contrary
746 			 * to BSpec we get underruns once we exceed 64 * CDCLK
747 			 * for slices in total.
748 			 * As a temporary measure in order not to keep CDCLK
749 			 * bumped up all the time we calculate CDCLK according
750 			 * to this formula for  overall bw consumed by slices.
751 			 */
752 			max_bw += crtc_bw->used_bw[slice];
753 		}
754 	}
755 
756 	new_bw_state->min_cdclk = max_bw / 64;
757 
758 	if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) {
759 		int ret = intel_atomic_lock_global_state(&new_bw_state->base);
760 
761 		if (ret)
762 			return ret;
763 	}
764 
765 	return 0;
766 }
767 
768 int intel_bw_calc_min_cdclk(struct intel_atomic_state *state)
769 {
770 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
771 	struct intel_bw_state *new_bw_state = NULL;
772 	struct intel_bw_state *old_bw_state = NULL;
773 	const struct intel_crtc_state *crtc_state;
774 	struct intel_crtc *crtc;
775 	int min_cdclk = 0;
776 	enum pipe pipe;
777 	int i;
778 
779 	for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) {
780 		new_bw_state = intel_atomic_get_bw_state(state);
781 		if (IS_ERR(new_bw_state))
782 			return PTR_ERR(new_bw_state);
783 
784 		old_bw_state = intel_atomic_get_old_bw_state(state);
785 	}
786 
787 	if (!old_bw_state)
788 		return 0;
789 
790 	for_each_pipe(dev_priv, pipe) {
791 		struct intel_cdclk_state *cdclk_state;
792 
793 		cdclk_state = intel_atomic_get_new_cdclk_state(state);
794 		if (!cdclk_state)
795 			return 0;
796 
797 		min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk);
798 	}
799 
800 	new_bw_state->min_cdclk = min_cdclk;
801 
802 	if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) {
803 		int ret = intel_atomic_lock_global_state(&new_bw_state->base);
804 
805 		if (ret)
806 			return ret;
807 	}
808 
809 	return 0;
810 }
811 
812 int intel_bw_atomic_check(struct intel_atomic_state *state)
813 {
814 	struct drm_i915_private *dev_priv = to_i915(state->base.dev);
815 	struct intel_crtc_state *new_crtc_state, *old_crtc_state;
816 	struct intel_bw_state *new_bw_state = NULL;
817 	const struct intel_bw_state *old_bw_state = NULL;
818 	unsigned int data_rate;
819 	unsigned int num_active_planes;
820 	struct intel_crtc *crtc;
821 	int i, ret;
822 	u32 allowed_points = 0;
823 	unsigned int max_bw_point = 0, max_bw = 0;
824 	unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points;
825 	unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points;
826 	u32 mask = 0;
827 
828 	/* FIXME earlier gens need some checks too */
829 	if (DISPLAY_VER(dev_priv) < 11)
830 		return 0;
831 
832 	/*
833 	 * We can _not_ use the whole ADLS_QGV_PT_MASK here, as PCode rejects
834 	 * it with failure if we try masking any unadvertised points.
835 	 * So need to operate only with those returned from PCode.
836 	 */
837 	if (num_qgv_points > 0)
838 		mask |= REG_GENMASK(num_qgv_points - 1, 0);
839 
840 	if (num_psf_gv_points > 0)
841 		mask |= REG_GENMASK(num_psf_gv_points - 1, 0) << ADLS_PSF_PT_SHIFT;
842 
843 	for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state,
844 					    new_crtc_state, i) {
845 		unsigned int old_data_rate =
846 			intel_bw_crtc_data_rate(old_crtc_state);
847 		unsigned int new_data_rate =
848 			intel_bw_crtc_data_rate(new_crtc_state);
849 		unsigned int old_active_planes =
850 			intel_bw_crtc_num_active_planes(old_crtc_state);
851 		unsigned int new_active_planes =
852 			intel_bw_crtc_num_active_planes(new_crtc_state);
853 
854 		/*
855 		 * Avoid locking the bw state when
856 		 * nothing significant has changed.
857 		 */
858 		if (old_data_rate == new_data_rate &&
859 		    old_active_planes == new_active_planes)
860 			continue;
861 
862 		new_bw_state = intel_atomic_get_bw_state(state);
863 		if (IS_ERR(new_bw_state))
864 			return PTR_ERR(new_bw_state);
865 
866 		new_bw_state->data_rate[crtc->pipe] = new_data_rate;
867 		new_bw_state->num_active_planes[crtc->pipe] = new_active_planes;
868 
869 		drm_dbg_kms(&dev_priv->drm,
870 			    "pipe %c data rate %u num active planes %u\n",
871 			    pipe_name(crtc->pipe),
872 			    new_bw_state->data_rate[crtc->pipe],
873 			    new_bw_state->num_active_planes[crtc->pipe]);
874 	}
875 
876 	if (!new_bw_state)
877 		return 0;
878 
879 	ret = intel_atomic_lock_global_state(&new_bw_state->base);
880 	if (ret)
881 		return ret;
882 
883 	data_rate = intel_bw_data_rate(dev_priv, new_bw_state);
884 	data_rate = DIV_ROUND_UP(data_rate, 1000);
885 
886 	num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state);
887 
888 	for (i = 0; i < num_qgv_points; i++) {
889 		unsigned int max_data_rate;
890 
891 		if (DISPLAY_VER(dev_priv) > 11)
892 			max_data_rate = tgl_max_bw(dev_priv, num_active_planes, i);
893 		else
894 			max_data_rate = icl_max_bw(dev_priv, num_active_planes, i);
895 		/*
896 		 * We need to know which qgv point gives us
897 		 * maximum bandwidth in order to disable SAGV
898 		 * if we find that we exceed SAGV block time
899 		 * with watermarks. By that moment we already
900 		 * have those, as it is calculated earlier in
901 		 * intel_atomic_check,
902 		 */
903 		if (max_data_rate > max_bw) {
904 			max_bw_point = i;
905 			max_bw = max_data_rate;
906 		}
907 		if (max_data_rate >= data_rate)
908 			allowed_points |= REG_FIELD_PREP(ADLS_QGV_PT_MASK, BIT(i));
909 
910 		drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n",
911 			    i, max_data_rate, data_rate);
912 	}
913 
914 	for (i = 0; i < num_psf_gv_points; i++) {
915 		unsigned int max_data_rate = adl_psf_bw(dev_priv, i);
916 
917 		if (max_data_rate >= data_rate)
918 			allowed_points |= REG_FIELD_PREP(ADLS_PSF_PT_MASK, BIT(i));
919 
920 		drm_dbg_kms(&dev_priv->drm, "PSF GV point %d: max bw %d"
921 			    " required %d\n",
922 			    i, max_data_rate, data_rate);
923 	}
924 
925 	/*
926 	 * BSpec states that we always should have at least one allowed point
927 	 * left, so if we couldn't - simply reject the configuration for obvious
928 	 * reasons.
929 	 */
930 	if ((allowed_points & ADLS_QGV_PT_MASK) == 0) {
931 		drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory"
932 			    " bandwidth %d for display configuration(%d active planes).\n",
933 			    data_rate, num_active_planes);
934 		return -EINVAL;
935 	}
936 
937 	if (num_psf_gv_points > 0) {
938 		if ((allowed_points & ADLS_PSF_PT_MASK) == 0) {
939 			drm_dbg_kms(&dev_priv->drm, "No PSF GV points provide sufficient memory"
940 				    " bandwidth %d for display configuration(%d active planes).\n",
941 				    data_rate, num_active_planes);
942 			return -EINVAL;
943 		}
944 	}
945 
946 	/*
947 	 * Leave only single point with highest bandwidth, if
948 	 * we can't enable SAGV due to the increased memory latency it may
949 	 * cause.
950 	 */
951 	if (!intel_can_enable_sagv(dev_priv, new_bw_state)) {
952 		allowed_points = BIT(max_bw_point);
953 		drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n",
954 			    max_bw_point);
955 	}
956 	/*
957 	 * We store the ones which need to be masked as that is what PCode
958 	 * actually accepts as a parameter.
959 	 */
960 	new_bw_state->qgv_points_mask = ~allowed_points & mask;
961 
962 	old_bw_state = intel_atomic_get_old_bw_state(state);
963 	/*
964 	 * If the actual mask had changed we need to make sure that
965 	 * the commits are serialized(in case this is a nomodeset, nonblocking)
966 	 */
967 	if (new_bw_state->qgv_points_mask != old_bw_state->qgv_points_mask) {
968 		ret = intel_atomic_serialize_global_state(&new_bw_state->base);
969 		if (ret)
970 			return ret;
971 	}
972 
973 	return 0;
974 }
975 
976 static struct intel_global_state *
977 intel_bw_duplicate_state(struct intel_global_obj *obj)
978 {
979 	struct intel_bw_state *state;
980 
981 	state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL);
982 	if (!state)
983 		return NULL;
984 
985 	return &state->base;
986 }
987 
988 static void intel_bw_destroy_state(struct intel_global_obj *obj,
989 				   struct intel_global_state *state)
990 {
991 	kfree(state);
992 }
993 
994 static const struct intel_global_state_funcs intel_bw_funcs = {
995 	.atomic_duplicate_state = intel_bw_duplicate_state,
996 	.atomic_destroy_state = intel_bw_destroy_state,
997 };
998 
999 int intel_bw_init(struct drm_i915_private *dev_priv)
1000 {
1001 	struct intel_bw_state *state;
1002 
1003 	state = kzalloc(sizeof(*state), GFP_KERNEL);
1004 	if (!state)
1005 		return -ENOMEM;
1006 
1007 	intel_atomic_global_obj_init(dev_priv, &dev_priv->bw_obj,
1008 				     &state->base, &intel_bw_funcs);
1009 
1010 	return 0;
1011 }
1012