1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <drm/drm_atomic_state_helper.h> 7 8 #include "i915_reg.h" 9 #include "i915_utils.h" 10 #include "intel_atomic.h" 11 #include "intel_bw.h" 12 #include "intel_cdclk.h" 13 #include "intel_display_types.h" 14 #include "intel_mchbar_regs.h" 15 #include "intel_pcode.h" 16 #include "intel_pm.h" 17 18 /* Parameters for Qclk Geyserville (QGV) */ 19 struct intel_qgv_point { 20 u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd; 21 }; 22 23 struct intel_psf_gv_point { 24 u8 clk; /* clock in multiples of 16.6666 MHz */ 25 }; 26 27 struct intel_qgv_info { 28 struct intel_qgv_point points[I915_NUM_QGV_POINTS]; 29 struct intel_psf_gv_point psf_points[I915_NUM_PSF_GV_POINTS]; 30 u8 num_points; 31 u8 num_psf_points; 32 u8 t_bl; 33 u8 max_numchannels; 34 u8 channel_width; 35 u8 deinterleave; 36 }; 37 38 static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv, 39 struct intel_qgv_point *sp, 40 int point) 41 { 42 u32 dclk_ratio, dclk_reference; 43 u32 val; 44 45 val = intel_uncore_read(&dev_priv->uncore, SA_PERF_STATUS_0_0_0_MCHBAR_PC); 46 dclk_ratio = REG_FIELD_GET(DG1_QCLK_RATIO_MASK, val); 47 if (val & DG1_QCLK_REFERENCE) 48 dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */ 49 else 50 dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */ 51 sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) + 500, 1000); 52 53 val = intel_uncore_read(&dev_priv->uncore, SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU); 54 if (val & DG1_GEAR_TYPE) 55 sp->dclk *= 2; 56 57 if (sp->dclk == 0) 58 return -EINVAL; 59 60 val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR); 61 sp->t_rp = REG_FIELD_GET(DG1_DRAM_T_RP_MASK, val); 62 sp->t_rdpre = REG_FIELD_GET(DG1_DRAM_T_RDPRE_MASK, val); 63 64 val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH); 65 sp->t_rcd = REG_FIELD_GET(DG1_DRAM_T_RCD_MASK, val); 66 sp->t_ras = REG_FIELD_GET(DG1_DRAM_T_RAS_MASK, val); 67 68 sp->t_rc = sp->t_rp + sp->t_ras; 69 70 return 0; 71 } 72 73 static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, 74 struct intel_qgv_point *sp, 75 int point) 76 { 77 u32 val = 0, val2 = 0; 78 u16 dclk; 79 int ret; 80 81 ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO | 82 ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point), 83 &val, &val2); 84 if (ret) 85 return ret; 86 87 dclk = val & 0xffff; 88 sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000); 89 sp->t_rp = (val & 0xff0000) >> 16; 90 sp->t_rcd = (val & 0xff000000) >> 24; 91 92 sp->t_rdpre = val2 & 0xff; 93 sp->t_ras = (val2 & 0xff00) >> 8; 94 95 sp->t_rc = sp->t_rp + sp->t_ras; 96 97 return 0; 98 } 99 100 static int adls_pcode_read_psf_gv_point_info(struct drm_i915_private *dev_priv, 101 struct intel_psf_gv_point *points) 102 { 103 u32 val = 0; 104 int ret; 105 int i; 106 107 ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO | 108 ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, &val, NULL); 109 if (ret) 110 return ret; 111 112 for (i = 0; i < I915_NUM_PSF_GV_POINTS; i++) { 113 points[i].clk = val & 0xff; 114 val >>= 8; 115 } 116 117 return 0; 118 } 119 120 int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, 121 u32 points_mask) 122 { 123 int ret; 124 125 /* bspec says to keep retrying for at least 1 ms */ 126 ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG, 127 points_mask, 128 ICL_PCODE_REP_QGV_MASK | ADLS_PCODE_REP_PSF_MASK, 129 ICL_PCODE_REP_QGV_SAFE | ADLS_PCODE_REP_PSF_SAFE, 130 1); 131 132 if (ret < 0) { 133 drm_err(&dev_priv->drm, "Failed to disable qgv points (%d) points: 0x%x\n", ret, points_mask); 134 return ret; 135 } 136 137 return 0; 138 } 139 140 static int icl_get_qgv_points(struct drm_i915_private *dev_priv, 141 struct intel_qgv_info *qi, 142 bool is_y_tile) 143 { 144 const struct dram_info *dram_info = &dev_priv->dram_info; 145 int i, ret; 146 147 qi->num_points = dram_info->num_qgv_points; 148 qi->num_psf_points = dram_info->num_psf_gv_points; 149 150 if (DISPLAY_VER(dev_priv) >= 12) 151 switch (dram_info->type) { 152 case INTEL_DRAM_DDR4: 153 qi->t_bl = is_y_tile ? 8 : 4; 154 qi->max_numchannels = 2; 155 qi->channel_width = 64; 156 qi->deinterleave = is_y_tile ? 1 : 2; 157 break; 158 case INTEL_DRAM_DDR5: 159 qi->t_bl = is_y_tile ? 16 : 8; 160 qi->max_numchannels = 4; 161 qi->channel_width = 32; 162 qi->deinterleave = is_y_tile ? 1 : 2; 163 break; 164 case INTEL_DRAM_LPDDR4: 165 if (IS_ROCKETLAKE(dev_priv)) { 166 qi->t_bl = 8; 167 qi->max_numchannels = 4; 168 qi->channel_width = 32; 169 qi->deinterleave = 2; 170 break; 171 } 172 fallthrough; 173 case INTEL_DRAM_LPDDR5: 174 qi->t_bl = 16; 175 qi->max_numchannels = 8; 176 qi->channel_width = 16; 177 qi->deinterleave = is_y_tile ? 2 : 4; 178 break; 179 default: 180 qi->t_bl = 16; 181 qi->max_numchannels = 1; 182 break; 183 } 184 else if (DISPLAY_VER(dev_priv) == 11) { 185 qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8; 186 qi->max_numchannels = 1; 187 } 188 189 if (drm_WARN_ON(&dev_priv->drm, 190 qi->num_points > ARRAY_SIZE(qi->points))) 191 qi->num_points = ARRAY_SIZE(qi->points); 192 193 for (i = 0; i < qi->num_points; i++) { 194 struct intel_qgv_point *sp = &qi->points[i]; 195 196 if (IS_DG1(dev_priv)) 197 ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i); 198 else 199 ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i); 200 201 if (ret) 202 return ret; 203 204 drm_dbg_kms(&dev_priv->drm, 205 "QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n", 206 i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras, 207 sp->t_rcd, sp->t_rc); 208 } 209 210 if (qi->num_psf_points > 0) { 211 ret = adls_pcode_read_psf_gv_point_info(dev_priv, qi->psf_points); 212 if (ret) { 213 drm_err(&dev_priv->drm, "Failed to read PSF point data; PSF points will not be considered in bandwidth calculations.\n"); 214 qi->num_psf_points = 0; 215 } 216 217 for (i = 0; i < qi->num_psf_points; i++) 218 drm_dbg_kms(&dev_priv->drm, 219 "PSF GV %d: CLK=%d \n", 220 i, qi->psf_points[i].clk); 221 } 222 223 return 0; 224 } 225 226 static int adl_calc_psf_bw(int clk) 227 { 228 /* 229 * clk is multiples of 16.666MHz (100/6) 230 * According to BSpec PSF GV bandwidth is 231 * calculated as BW = 64 * clk * 16.666Mhz 232 */ 233 return DIV_ROUND_CLOSEST(64 * clk * 100, 6); 234 } 235 236 static int icl_sagv_max_dclk(const struct intel_qgv_info *qi) 237 { 238 u16 dclk = 0; 239 int i; 240 241 for (i = 0; i < qi->num_points; i++) 242 dclk = max(dclk, qi->points[i].dclk); 243 244 return dclk; 245 } 246 247 struct intel_sa_info { 248 u16 displayrtids; 249 u8 deburst, deprogbwlimit, derating; 250 }; 251 252 static const struct intel_sa_info icl_sa_info = { 253 .deburst = 8, 254 .deprogbwlimit = 25, /* GB/s */ 255 .displayrtids = 128, 256 .derating = 10, 257 }; 258 259 static const struct intel_sa_info tgl_sa_info = { 260 .deburst = 16, 261 .deprogbwlimit = 34, /* GB/s */ 262 .displayrtids = 256, 263 .derating = 10, 264 }; 265 266 static const struct intel_sa_info rkl_sa_info = { 267 .deburst = 8, 268 .deprogbwlimit = 20, /* GB/s */ 269 .displayrtids = 128, 270 .derating = 10, 271 }; 272 273 static const struct intel_sa_info adls_sa_info = { 274 .deburst = 16, 275 .deprogbwlimit = 38, /* GB/s */ 276 .displayrtids = 256, 277 .derating = 10, 278 }; 279 280 static const struct intel_sa_info adlp_sa_info = { 281 .deburst = 16, 282 .deprogbwlimit = 38, /* GB/s */ 283 .displayrtids = 256, 284 .derating = 20, 285 }; 286 287 static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) 288 { 289 struct intel_qgv_info qi = {}; 290 bool is_y_tile = true; /* assume y tile may be used */ 291 int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels); 292 int ipqdepth, ipqdepthpch = 16; 293 int dclk_max; 294 int maxdebw; 295 int num_groups = ARRAY_SIZE(dev_priv->max_bw); 296 int i, ret; 297 298 ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); 299 if (ret) { 300 drm_dbg_kms(&dev_priv->drm, 301 "Failed to get memory subsystem information, ignoring bandwidth limits"); 302 return ret; 303 } 304 305 dclk_max = icl_sagv_max_dclk(&qi); 306 maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10); 307 ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); 308 qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); 309 310 for (i = 0; i < num_groups; i++) { 311 struct intel_bw_info *bi = &dev_priv->max_bw[i]; 312 int clpchgroup; 313 int j; 314 315 clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i; 316 bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; 317 318 bi->num_qgv_points = qi.num_points; 319 bi->num_psf_gv_points = qi.num_psf_points; 320 321 for (j = 0; j < qi.num_points; j++) { 322 const struct intel_qgv_point *sp = &qi.points[j]; 323 int ct, bw; 324 325 /* 326 * Max row cycle time 327 * 328 * FIXME what is the logic behind the 329 * assumed burst length? 330 */ 331 ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd + 332 (clpchgroup - 1) * qi.t_bl + sp->t_rdpre); 333 bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct); 334 335 bi->deratedbw[j] = min(maxdebw, 336 bw * (100 - sa->derating) / 100); 337 338 drm_dbg_kms(&dev_priv->drm, 339 "BW%d / QGV %d: num_planes=%d deratedbw=%u\n", 340 i, j, bi->num_planes, bi->deratedbw[j]); 341 } 342 } 343 /* 344 * In case if SAGV is disabled in BIOS, we always get 1 345 * SAGV point, but we can't send PCode commands to restrict it 346 * as it will fail and pointless anyway. 347 */ 348 if (qi.num_points == 1) 349 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 350 else 351 dev_priv->sagv_status = I915_SAGV_ENABLED; 352 353 return 0; 354 } 355 356 static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) 357 { 358 struct intel_qgv_info qi = {}; 359 const struct dram_info *dram_info = &dev_priv->dram_info; 360 bool is_y_tile = true; /* assume y tile may be used */ 361 int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels); 362 int ipqdepth, ipqdepthpch = 16; 363 int dclk_max; 364 int maxdebw, peakbw; 365 int clperchgroup; 366 int num_groups = ARRAY_SIZE(dev_priv->max_bw); 367 int i, ret; 368 369 ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); 370 if (ret) { 371 drm_dbg_kms(&dev_priv->drm, 372 "Failed to get memory subsystem information, ignoring bandwidth limits"); 373 return ret; 374 } 375 376 if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5) 377 num_channels *= 2; 378 379 qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); 380 381 if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12) 382 qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1); 383 384 if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels) 385 drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels."); 386 if (qi.max_numchannels != 0) 387 num_channels = min_t(u8, num_channels, qi.max_numchannels); 388 389 dclk_max = icl_sagv_max_dclk(&qi); 390 391 peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) * dclk_max; 392 maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% */ 393 394 ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); 395 /* 396 * clperchgroup = 4kpagespermempage * clperchperblock, 397 * clperchperblock = 8 / num_channels * interleave 398 */ 399 clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * qi.deinterleave; 400 401 for (i = 0; i < num_groups; i++) { 402 struct intel_bw_info *bi = &dev_priv->max_bw[i]; 403 struct intel_bw_info *bi_next; 404 int clpchgroup; 405 int j; 406 407 if (i < num_groups - 1) 408 bi_next = &dev_priv->max_bw[i + 1]; 409 410 clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i; 411 412 if (i < num_groups - 1 && clpchgroup < clperchgroup) 413 bi_next->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; 414 else 415 bi_next->num_planes = 0; 416 417 bi->num_qgv_points = qi.num_points; 418 bi->num_psf_gv_points = qi.num_psf_points; 419 420 for (j = 0; j < qi.num_points; j++) { 421 const struct intel_qgv_point *sp = &qi.points[j]; 422 int ct, bw; 423 424 /* 425 * Max row cycle time 426 * 427 * FIXME what is the logic behind the 428 * assumed burst length? 429 */ 430 ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd + 431 (clpchgroup - 1) * qi.t_bl + sp->t_rdpre); 432 bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct); 433 434 bi->deratedbw[j] = min(maxdebw, 435 bw * (100 - sa->derating) / 100); 436 437 drm_dbg_kms(&dev_priv->drm, 438 "BW%d / QGV %d: num_planes=%d deratedbw=%u\n", 439 i, j, bi->num_planes, bi->deratedbw[j]); 440 } 441 442 for (j = 0; j < qi.num_psf_points; j++) { 443 const struct intel_psf_gv_point *sp = &qi.psf_points[j]; 444 445 bi->psf_bw[j] = adl_calc_psf_bw(sp->clk); 446 447 drm_dbg_kms(&dev_priv->drm, 448 "BW%d / PSF GV %d: num_planes=%d bw=%u\n", 449 i, j, bi->num_planes, bi->psf_bw[j]); 450 } 451 } 452 453 /* 454 * In case if SAGV is disabled in BIOS, we always get 1 455 * SAGV point, but we can't send PCode commands to restrict it 456 * as it will fail and pointless anyway. 457 */ 458 if (qi.num_points == 1) 459 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 460 else 461 dev_priv->sagv_status = I915_SAGV_ENABLED; 462 463 return 0; 464 } 465 466 static void dg2_get_bw_info(struct drm_i915_private *i915) 467 { 468 unsigned int deratedbw = IS_DG2_G11(i915) ? 38000 : 50000; 469 int num_groups = ARRAY_SIZE(i915->max_bw); 470 int i; 471 472 /* 473 * DG2 doesn't have SAGV or QGV points, just a constant max bandwidth 474 * that doesn't depend on the number of planes enabled. So fill all the 475 * plane group with constant bw information for uniformity with other 476 * platforms. DG2-G10 platforms have a constant 50 GB/s bandwidth, 477 * whereas DG2-G11 platforms have 38 GB/s. 478 */ 479 for (i = 0; i < num_groups; i++) { 480 struct intel_bw_info *bi = &i915->max_bw[i]; 481 482 bi->num_planes = 1; 483 /* Need only one dummy QGV point per group */ 484 bi->num_qgv_points = 1; 485 bi->deratedbw[0] = deratedbw; 486 } 487 488 i915->sagv_status = I915_SAGV_NOT_CONTROLLED; 489 } 490 491 static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, 492 int num_planes, int qgv_point) 493 { 494 int i; 495 496 /* 497 * Let's return max bw for 0 planes 498 */ 499 num_planes = max(1, num_planes); 500 501 for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { 502 const struct intel_bw_info *bi = 503 &dev_priv->max_bw[i]; 504 505 /* 506 * Pcode will not expose all QGV points when 507 * SAGV is forced to off/min/med/max. 508 */ 509 if (qgv_point >= bi->num_qgv_points) 510 return UINT_MAX; 511 512 if (num_planes >= bi->num_planes) 513 return bi->deratedbw[qgv_point]; 514 } 515 516 return 0; 517 } 518 519 static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv, 520 int num_planes, int qgv_point) 521 { 522 int i; 523 524 /* 525 * Let's return max bw for 0 planes 526 */ 527 num_planes = max(1, num_planes); 528 529 for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) { 530 const struct intel_bw_info *bi = 531 &dev_priv->max_bw[i]; 532 533 /* 534 * Pcode will not expose all QGV points when 535 * SAGV is forced to off/min/med/max. 536 */ 537 if (qgv_point >= bi->num_qgv_points) 538 return UINT_MAX; 539 540 if (num_planes <= bi->num_planes) 541 return bi->deratedbw[qgv_point]; 542 } 543 544 return dev_priv->max_bw[0].deratedbw[qgv_point]; 545 } 546 547 static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv, 548 int psf_gv_point) 549 { 550 const struct intel_bw_info *bi = 551 &dev_priv->max_bw[0]; 552 553 return bi->psf_bw[psf_gv_point]; 554 } 555 556 void intel_bw_init_hw(struct drm_i915_private *dev_priv) 557 { 558 if (!HAS_DISPLAY(dev_priv)) 559 return; 560 561 if (IS_DG2(dev_priv)) 562 dg2_get_bw_info(dev_priv); 563 else if (IS_ALDERLAKE_P(dev_priv)) 564 tgl_get_bw_info(dev_priv, &adlp_sa_info); 565 else if (IS_ALDERLAKE_S(dev_priv)) 566 tgl_get_bw_info(dev_priv, &adls_sa_info); 567 else if (IS_ROCKETLAKE(dev_priv)) 568 tgl_get_bw_info(dev_priv, &rkl_sa_info); 569 else if (DISPLAY_VER(dev_priv) == 12) 570 tgl_get_bw_info(dev_priv, &tgl_sa_info); 571 else if (DISPLAY_VER(dev_priv) == 11) 572 icl_get_bw_info(dev_priv, &icl_sa_info); 573 } 574 575 static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state) 576 { 577 /* 578 * We assume cursors are small enough 579 * to not not cause bandwidth problems. 580 */ 581 return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); 582 } 583 584 static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state) 585 { 586 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); 587 struct drm_i915_private *i915 = to_i915(crtc->base.dev); 588 unsigned int data_rate = 0; 589 enum plane_id plane_id; 590 591 for_each_plane_id_on_crtc(crtc, plane_id) { 592 /* 593 * We assume cursors are small enough 594 * to not not cause bandwidth problems. 595 */ 596 if (plane_id == PLANE_CURSOR) 597 continue; 598 599 data_rate += crtc_state->data_rate[plane_id]; 600 601 if (DISPLAY_VER(i915) < 11) 602 data_rate += crtc_state->data_rate_y[plane_id]; 603 } 604 605 return data_rate; 606 } 607 608 /* "Maximum Pipe Read Bandwidth" */ 609 static int intel_bw_crtc_min_cdclk(const struct intel_crtc_state *crtc_state) 610 { 611 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); 612 struct drm_i915_private *i915 = to_i915(crtc->base.dev); 613 614 if (DISPLAY_VER(i915) < 12) 615 return 0; 616 617 return DIV_ROUND_UP_ULL(mul_u32_u32(intel_bw_crtc_data_rate(crtc_state), 10), 512); 618 } 619 620 void intel_bw_crtc_update(struct intel_bw_state *bw_state, 621 const struct intel_crtc_state *crtc_state) 622 { 623 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); 624 struct drm_i915_private *i915 = to_i915(crtc->base.dev); 625 626 bw_state->data_rate[crtc->pipe] = 627 intel_bw_crtc_data_rate(crtc_state); 628 bw_state->num_active_planes[crtc->pipe] = 629 intel_bw_crtc_num_active_planes(crtc_state); 630 631 drm_dbg_kms(&i915->drm, "pipe %c data rate %u num active planes %u\n", 632 pipe_name(crtc->pipe), 633 bw_state->data_rate[crtc->pipe], 634 bw_state->num_active_planes[crtc->pipe]); 635 } 636 637 static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv, 638 const struct intel_bw_state *bw_state) 639 { 640 unsigned int num_active_planes = 0; 641 enum pipe pipe; 642 643 for_each_pipe(dev_priv, pipe) 644 num_active_planes += bw_state->num_active_planes[pipe]; 645 646 return num_active_planes; 647 } 648 649 static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv, 650 const struct intel_bw_state *bw_state) 651 { 652 unsigned int data_rate = 0; 653 enum pipe pipe; 654 655 for_each_pipe(dev_priv, pipe) 656 data_rate += bw_state->data_rate[pipe]; 657 658 if (DISPLAY_VER(dev_priv) >= 13 && i915_vtd_active(dev_priv)) 659 data_rate = DIV_ROUND_UP(data_rate * 105, 100); 660 661 return data_rate; 662 } 663 664 struct intel_bw_state * 665 intel_atomic_get_old_bw_state(struct intel_atomic_state *state) 666 { 667 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 668 struct intel_global_state *bw_state; 669 670 bw_state = intel_atomic_get_old_global_obj_state(state, &dev_priv->bw_obj); 671 672 return to_intel_bw_state(bw_state); 673 } 674 675 struct intel_bw_state * 676 intel_atomic_get_new_bw_state(struct intel_atomic_state *state) 677 { 678 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 679 struct intel_global_state *bw_state; 680 681 bw_state = intel_atomic_get_new_global_obj_state(state, &dev_priv->bw_obj); 682 683 return to_intel_bw_state(bw_state); 684 } 685 686 struct intel_bw_state * 687 intel_atomic_get_bw_state(struct intel_atomic_state *state) 688 { 689 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 690 struct intel_global_state *bw_state; 691 692 bw_state = intel_atomic_get_global_obj_state(state, &dev_priv->bw_obj); 693 if (IS_ERR(bw_state)) 694 return ERR_CAST(bw_state); 695 696 return to_intel_bw_state(bw_state); 697 } 698 699 static bool intel_bw_state_changed(struct drm_i915_private *i915, 700 const struct intel_bw_state *old_bw_state, 701 const struct intel_bw_state *new_bw_state) 702 { 703 enum pipe pipe; 704 705 for_each_pipe(i915, pipe) { 706 const struct intel_dbuf_bw *old_crtc_bw = 707 &old_bw_state->dbuf_bw[pipe]; 708 const struct intel_dbuf_bw *new_crtc_bw = 709 &new_bw_state->dbuf_bw[pipe]; 710 enum dbuf_slice slice; 711 712 for_each_dbuf_slice(i915, slice) { 713 if (old_crtc_bw->max_bw[slice] != new_crtc_bw->max_bw[slice] || 714 old_crtc_bw->active_planes[slice] != new_crtc_bw->active_planes[slice]) 715 return true; 716 } 717 718 if (old_bw_state->min_cdclk[pipe] != new_bw_state->min_cdclk[pipe]) 719 return true; 720 } 721 722 return false; 723 } 724 725 static void skl_plane_calc_dbuf_bw(struct intel_bw_state *bw_state, 726 struct intel_crtc *crtc, 727 enum plane_id plane_id, 728 const struct skl_ddb_entry *ddb, 729 unsigned int data_rate) 730 { 731 struct drm_i915_private *i915 = to_i915(crtc->base.dev); 732 struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe]; 733 unsigned int dbuf_mask = skl_ddb_dbuf_slice_mask(i915, ddb); 734 enum dbuf_slice slice; 735 736 /* 737 * The arbiter can only really guarantee an 738 * equal share of the total bw to each plane. 739 */ 740 for_each_dbuf_slice_in_mask(i915, slice, dbuf_mask) { 741 crtc_bw->max_bw[slice] = max(crtc_bw->max_bw[slice], data_rate); 742 crtc_bw->active_planes[slice] |= BIT(plane_id); 743 } 744 } 745 746 static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state, 747 const struct intel_crtc_state *crtc_state) 748 { 749 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); 750 struct drm_i915_private *i915 = to_i915(crtc->base.dev); 751 struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe]; 752 enum plane_id plane_id; 753 754 memset(crtc_bw, 0, sizeof(*crtc_bw)); 755 756 if (!crtc_state->hw.active) 757 return; 758 759 for_each_plane_id_on_crtc(crtc, plane_id) { 760 /* 761 * We assume cursors are small enough 762 * to not cause bandwidth problems. 763 */ 764 if (plane_id == PLANE_CURSOR) 765 continue; 766 767 skl_plane_calc_dbuf_bw(bw_state, crtc, plane_id, 768 &crtc_state->wm.skl.plane_ddb[plane_id], 769 crtc_state->data_rate[plane_id]); 770 771 if (DISPLAY_VER(i915) < 11) 772 skl_plane_calc_dbuf_bw(bw_state, crtc, plane_id, 773 &crtc_state->wm.skl.plane_ddb_y[plane_id], 774 crtc_state->data_rate[plane_id]); 775 } 776 } 777 778 /* "Maximum Data Buffer Bandwidth" */ 779 static int 780 intel_bw_dbuf_min_cdclk(struct drm_i915_private *i915, 781 const struct intel_bw_state *bw_state) 782 { 783 unsigned int total_max_bw = 0; 784 enum dbuf_slice slice; 785 786 for_each_dbuf_slice(i915, slice) { 787 int num_active_planes = 0; 788 unsigned int max_bw = 0; 789 enum pipe pipe; 790 791 /* 792 * The arbiter can only really guarantee an 793 * equal share of the total bw to each plane. 794 */ 795 for_each_pipe(i915, pipe) { 796 const struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[pipe]; 797 798 max_bw = max(crtc_bw->max_bw[slice], max_bw); 799 num_active_planes += hweight8(crtc_bw->active_planes[slice]); 800 } 801 max_bw *= num_active_planes; 802 803 total_max_bw = max(total_max_bw, max_bw); 804 } 805 806 return DIV_ROUND_UP(total_max_bw, 64); 807 } 808 809 int intel_bw_min_cdclk(struct drm_i915_private *i915, 810 const struct intel_bw_state *bw_state) 811 { 812 enum pipe pipe; 813 int min_cdclk; 814 815 min_cdclk = intel_bw_dbuf_min_cdclk(i915, bw_state); 816 817 for_each_pipe(i915, pipe) 818 min_cdclk = max(bw_state->min_cdclk[pipe], min_cdclk); 819 820 return min_cdclk; 821 } 822 823 int intel_bw_calc_min_cdclk(struct intel_atomic_state *state, 824 bool *need_cdclk_calc) 825 { 826 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 827 struct intel_bw_state *new_bw_state = NULL; 828 const struct intel_bw_state *old_bw_state = NULL; 829 const struct intel_cdclk_state *cdclk_state; 830 const struct intel_crtc_state *crtc_state; 831 int old_min_cdclk, new_min_cdclk; 832 struct intel_crtc *crtc; 833 int i; 834 835 if (DISPLAY_VER(dev_priv) < 9) 836 return 0; 837 838 for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { 839 new_bw_state = intel_atomic_get_bw_state(state); 840 if (IS_ERR(new_bw_state)) 841 return PTR_ERR(new_bw_state); 842 843 old_bw_state = intel_atomic_get_old_bw_state(state); 844 845 skl_crtc_calc_dbuf_bw(new_bw_state, crtc_state); 846 847 new_bw_state->min_cdclk[crtc->pipe] = 848 intel_bw_crtc_min_cdclk(crtc_state); 849 } 850 851 if (!old_bw_state) 852 return 0; 853 854 if (intel_bw_state_changed(dev_priv, old_bw_state, new_bw_state)) { 855 int ret = intel_atomic_lock_global_state(&new_bw_state->base); 856 if (ret) 857 return ret; 858 } 859 860 old_min_cdclk = intel_bw_min_cdclk(dev_priv, old_bw_state); 861 new_min_cdclk = intel_bw_min_cdclk(dev_priv, new_bw_state); 862 863 /* 864 * No need to check against the cdclk state if 865 * the min cdclk doesn't increase. 866 * 867 * Ie. we only ever increase the cdclk due to bandwidth 868 * requirements. This can reduce back and forth 869 * display blinking due to constant cdclk changes. 870 */ 871 if (new_min_cdclk <= old_min_cdclk) 872 return 0; 873 874 cdclk_state = intel_atomic_get_cdclk_state(state); 875 if (IS_ERR(cdclk_state)) 876 return PTR_ERR(cdclk_state); 877 878 /* 879 * No need to recalculate the cdclk state if 880 * the min cdclk doesn't increase. 881 * 882 * Ie. we only ever increase the cdclk due to bandwidth 883 * requirements. This can reduce back and forth 884 * display blinking due to constant cdclk changes. 885 */ 886 if (new_min_cdclk <= cdclk_state->bw_min_cdclk) 887 return 0; 888 889 drm_dbg_kms(&dev_priv->drm, 890 "new bandwidth min cdclk (%d kHz) > old min cdclk (%d kHz)\n", 891 new_min_cdclk, cdclk_state->bw_min_cdclk); 892 *need_cdclk_calc = true; 893 894 return 0; 895 } 896 897 static u16 icl_qgv_points_mask(struct drm_i915_private *i915) 898 { 899 unsigned int num_psf_gv_points = i915->max_bw[0].num_psf_gv_points; 900 unsigned int num_qgv_points = i915->max_bw[0].num_qgv_points; 901 u16 qgv_points = 0, psf_points = 0; 902 903 /* 904 * We can _not_ use the whole ADLS_QGV_PT_MASK here, as PCode rejects 905 * it with failure if we try masking any unadvertised points. 906 * So need to operate only with those returned from PCode. 907 */ 908 if (num_qgv_points > 0) 909 qgv_points = GENMASK(num_qgv_points - 1, 0); 910 911 if (num_psf_gv_points > 0) 912 psf_points = GENMASK(num_psf_gv_points - 1, 0); 913 914 return ICL_PCODE_REQ_QGV_PT(qgv_points) | ADLS_PCODE_REQ_PSF_PT(psf_points); 915 } 916 917 static int intel_bw_check_data_rate(struct intel_atomic_state *state, bool *changed) 918 { 919 struct drm_i915_private *i915 = to_i915(state->base.dev); 920 const struct intel_crtc_state *new_crtc_state, *old_crtc_state; 921 struct intel_crtc *crtc; 922 int i; 923 924 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, 925 new_crtc_state, i) { 926 unsigned int old_data_rate = 927 intel_bw_crtc_data_rate(old_crtc_state); 928 unsigned int new_data_rate = 929 intel_bw_crtc_data_rate(new_crtc_state); 930 unsigned int old_active_planes = 931 intel_bw_crtc_num_active_planes(old_crtc_state); 932 unsigned int new_active_planes = 933 intel_bw_crtc_num_active_planes(new_crtc_state); 934 struct intel_bw_state *new_bw_state; 935 936 /* 937 * Avoid locking the bw state when 938 * nothing significant has changed. 939 */ 940 if (old_data_rate == new_data_rate && 941 old_active_planes == new_active_planes) 942 continue; 943 944 new_bw_state = intel_atomic_get_bw_state(state); 945 if (IS_ERR(new_bw_state)) 946 return PTR_ERR(new_bw_state); 947 948 new_bw_state->data_rate[crtc->pipe] = new_data_rate; 949 new_bw_state->num_active_planes[crtc->pipe] = new_active_planes; 950 951 *changed = true; 952 953 drm_dbg_kms(&i915->drm, 954 "[CRTC:%d:%s] data rate %u num active planes %u\n", 955 crtc->base.base.id, crtc->base.name, 956 new_bw_state->data_rate[crtc->pipe], 957 new_bw_state->num_active_planes[crtc->pipe]); 958 } 959 960 return 0; 961 } 962 963 int intel_bw_atomic_check(struct intel_atomic_state *state) 964 { 965 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 966 const struct intel_bw_state *old_bw_state; 967 struct intel_bw_state *new_bw_state; 968 unsigned int data_rate; 969 unsigned int num_active_planes; 970 int i, ret; 971 u16 qgv_points = 0, psf_points = 0; 972 unsigned int max_bw_point = 0, max_bw = 0; 973 unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points; 974 unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points; 975 bool changed = false; 976 977 /* FIXME earlier gens need some checks too */ 978 if (DISPLAY_VER(dev_priv) < 11) 979 return 0; 980 981 ret = intel_bw_check_data_rate(state, &changed); 982 if (ret) 983 return ret; 984 985 old_bw_state = intel_atomic_get_old_bw_state(state); 986 new_bw_state = intel_atomic_get_new_bw_state(state); 987 988 if (new_bw_state && 989 intel_can_enable_sagv(dev_priv, old_bw_state) != 990 intel_can_enable_sagv(dev_priv, new_bw_state)) 991 changed = true; 992 993 /* 994 * If none of our inputs (data rates, number of active 995 * planes, SAGV yes/no) changed then nothing to do here. 996 */ 997 if (!changed) 998 return 0; 999 1000 ret = intel_atomic_lock_global_state(&new_bw_state->base); 1001 if (ret) 1002 return ret; 1003 1004 data_rate = intel_bw_data_rate(dev_priv, new_bw_state); 1005 data_rate = DIV_ROUND_UP(data_rate, 1000); 1006 1007 num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state); 1008 1009 for (i = 0; i < num_qgv_points; i++) { 1010 unsigned int max_data_rate; 1011 1012 if (DISPLAY_VER(dev_priv) > 11) 1013 max_data_rate = tgl_max_bw(dev_priv, num_active_planes, i); 1014 else 1015 max_data_rate = icl_max_bw(dev_priv, num_active_planes, i); 1016 /* 1017 * We need to know which qgv point gives us 1018 * maximum bandwidth in order to disable SAGV 1019 * if we find that we exceed SAGV block time 1020 * with watermarks. By that moment we already 1021 * have those, as it is calculated earlier in 1022 * intel_atomic_check, 1023 */ 1024 if (max_data_rate > max_bw) { 1025 max_bw_point = i; 1026 max_bw = max_data_rate; 1027 } 1028 if (max_data_rate >= data_rate) 1029 qgv_points |= BIT(i); 1030 1031 drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n", 1032 i, max_data_rate, data_rate); 1033 } 1034 1035 for (i = 0; i < num_psf_gv_points; i++) { 1036 unsigned int max_data_rate = adl_psf_bw(dev_priv, i); 1037 1038 if (max_data_rate >= data_rate) 1039 psf_points |= BIT(i); 1040 1041 drm_dbg_kms(&dev_priv->drm, "PSF GV point %d: max bw %d" 1042 " required %d\n", 1043 i, max_data_rate, data_rate); 1044 } 1045 1046 /* 1047 * BSpec states that we always should have at least one allowed point 1048 * left, so if we couldn't - simply reject the configuration for obvious 1049 * reasons. 1050 */ 1051 if (qgv_points == 0) { 1052 drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory" 1053 " bandwidth %d for display configuration(%d active planes).\n", 1054 data_rate, num_active_planes); 1055 return -EINVAL; 1056 } 1057 1058 if (num_psf_gv_points > 0 && psf_points == 0) { 1059 drm_dbg_kms(&dev_priv->drm, "No PSF GV points provide sufficient memory" 1060 " bandwidth %d for display configuration(%d active planes).\n", 1061 data_rate, num_active_planes); 1062 return -EINVAL; 1063 } 1064 1065 /* 1066 * Leave only single point with highest bandwidth, if 1067 * we can't enable SAGV due to the increased memory latency it may 1068 * cause. 1069 */ 1070 if (!intel_can_enable_sagv(dev_priv, new_bw_state)) { 1071 qgv_points = BIT(max_bw_point); 1072 drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n", 1073 max_bw_point); 1074 } 1075 1076 /* 1077 * We store the ones which need to be masked as that is what PCode 1078 * actually accepts as a parameter. 1079 */ 1080 new_bw_state->qgv_points_mask = 1081 ~(ICL_PCODE_REQ_QGV_PT(qgv_points) | 1082 ADLS_PCODE_REQ_PSF_PT(psf_points)) & 1083 icl_qgv_points_mask(dev_priv); 1084 1085 /* 1086 * If the actual mask had changed we need to make sure that 1087 * the commits are serialized(in case this is a nomodeset, nonblocking) 1088 */ 1089 if (new_bw_state->qgv_points_mask != old_bw_state->qgv_points_mask) { 1090 ret = intel_atomic_serialize_global_state(&new_bw_state->base); 1091 if (ret) 1092 return ret; 1093 } 1094 1095 return 0; 1096 } 1097 1098 static struct intel_global_state * 1099 intel_bw_duplicate_state(struct intel_global_obj *obj) 1100 { 1101 struct intel_bw_state *state; 1102 1103 state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL); 1104 if (!state) 1105 return NULL; 1106 1107 return &state->base; 1108 } 1109 1110 static void intel_bw_destroy_state(struct intel_global_obj *obj, 1111 struct intel_global_state *state) 1112 { 1113 kfree(state); 1114 } 1115 1116 static const struct intel_global_state_funcs intel_bw_funcs = { 1117 .atomic_duplicate_state = intel_bw_duplicate_state, 1118 .atomic_destroy_state = intel_bw_destroy_state, 1119 }; 1120 1121 int intel_bw_init(struct drm_i915_private *dev_priv) 1122 { 1123 struct intel_bw_state *state; 1124 1125 state = kzalloc(sizeof(*state), GFP_KERNEL); 1126 if (!state) 1127 return -ENOMEM; 1128 1129 intel_atomic_global_obj_init(dev_priv, &dev_priv->bw_obj, 1130 &state->base, &intel_bw_funcs); 1131 1132 return 0; 1133 } 1134