1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <drm/drm_atomic_state_helper.h> 7 8 #include "i915_reg.h" 9 #include "intel_atomic.h" 10 #include "intel_bw.h" 11 #include "intel_cdclk.h" 12 #include "intel_display_types.h" 13 #include "intel_mchbar_regs.h" 14 #include "intel_pcode.h" 15 #include "intel_pm.h" 16 17 /* Parameters for Qclk Geyserville (QGV) */ 18 struct intel_qgv_point { 19 u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd; 20 }; 21 22 struct intel_psf_gv_point { 23 u8 clk; /* clock in multiples of 16.6666 MHz */ 24 }; 25 26 struct intel_qgv_info { 27 struct intel_qgv_point points[I915_NUM_QGV_POINTS]; 28 struct intel_psf_gv_point psf_points[I915_NUM_PSF_GV_POINTS]; 29 u8 num_points; 30 u8 num_psf_points; 31 u8 t_bl; 32 u8 max_numchannels; 33 u8 channel_width; 34 u8 deinterleave; 35 }; 36 37 static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv, 38 struct intel_qgv_point *sp, 39 int point) 40 { 41 u32 dclk_ratio, dclk_reference; 42 u32 val; 43 44 val = intel_uncore_read(&dev_priv->uncore, SA_PERF_STATUS_0_0_0_MCHBAR_PC); 45 dclk_ratio = REG_FIELD_GET(DG1_QCLK_RATIO_MASK, val); 46 if (val & DG1_QCLK_REFERENCE) 47 dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */ 48 else 49 dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */ 50 sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) + 500, 1000); 51 52 val = intel_uncore_read(&dev_priv->uncore, SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU); 53 if (val & DG1_GEAR_TYPE) 54 sp->dclk *= 2; 55 56 if (sp->dclk == 0) 57 return -EINVAL; 58 59 val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR); 60 sp->t_rp = REG_FIELD_GET(DG1_DRAM_T_RP_MASK, val); 61 sp->t_rdpre = REG_FIELD_GET(DG1_DRAM_T_RDPRE_MASK, val); 62 63 val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH); 64 sp->t_rcd = REG_FIELD_GET(DG1_DRAM_T_RCD_MASK, val); 65 sp->t_ras = REG_FIELD_GET(DG1_DRAM_T_RAS_MASK, val); 66 67 sp->t_rc = sp->t_rp + sp->t_ras; 68 69 return 0; 70 } 71 72 static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, 73 struct intel_qgv_point *sp, 74 int point) 75 { 76 u32 val = 0, val2 = 0; 77 u16 dclk; 78 int ret; 79 80 ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO | 81 ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point), 82 &val, &val2); 83 if (ret) 84 return ret; 85 86 dclk = val & 0xffff; 87 sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000); 88 sp->t_rp = (val & 0xff0000) >> 16; 89 sp->t_rcd = (val & 0xff000000) >> 24; 90 91 sp->t_rdpre = val2 & 0xff; 92 sp->t_ras = (val2 & 0xff00) >> 8; 93 94 sp->t_rc = sp->t_rp + sp->t_ras; 95 96 return 0; 97 } 98 99 static int adls_pcode_read_psf_gv_point_info(struct drm_i915_private *dev_priv, 100 struct intel_psf_gv_point *points) 101 { 102 u32 val = 0; 103 int ret; 104 int i; 105 106 ret = snb_pcode_read(dev_priv, ICL_PCODE_MEM_SUBSYSYSTEM_INFO | 107 ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, &val, NULL); 108 if (ret) 109 return ret; 110 111 for (i = 0; i < I915_NUM_PSF_GV_POINTS; i++) { 112 points[i].clk = val & 0xff; 113 val >>= 8; 114 } 115 116 return 0; 117 } 118 119 int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, 120 u32 points_mask) 121 { 122 int ret; 123 124 /* bspec says to keep retrying for at least 1 ms */ 125 ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG, 126 points_mask, 127 ICL_PCODE_POINTS_RESTRICTED_MASK, 128 ICL_PCODE_POINTS_RESTRICTED, 129 1); 130 131 if (ret < 0) { 132 drm_err(&dev_priv->drm, "Failed to disable qgv points (%d) points: 0x%x\n", ret, points_mask); 133 return ret; 134 } 135 136 return 0; 137 } 138 139 static int icl_get_qgv_points(struct drm_i915_private *dev_priv, 140 struct intel_qgv_info *qi, 141 bool is_y_tile) 142 { 143 const struct dram_info *dram_info = &dev_priv->dram_info; 144 int i, ret; 145 146 qi->num_points = dram_info->num_qgv_points; 147 qi->num_psf_points = dram_info->num_psf_gv_points; 148 149 if (DISPLAY_VER(dev_priv) >= 12) 150 switch (dram_info->type) { 151 case INTEL_DRAM_DDR4: 152 qi->t_bl = is_y_tile ? 8 : 4; 153 qi->max_numchannels = 2; 154 qi->channel_width = 64; 155 qi->deinterleave = is_y_tile ? 1 : 2; 156 break; 157 case INTEL_DRAM_DDR5: 158 qi->t_bl = is_y_tile ? 16 : 8; 159 qi->max_numchannels = 4; 160 qi->channel_width = 32; 161 qi->deinterleave = is_y_tile ? 1 : 2; 162 break; 163 case INTEL_DRAM_LPDDR4: 164 if (IS_ROCKETLAKE(dev_priv)) { 165 qi->t_bl = 8; 166 qi->max_numchannels = 4; 167 qi->channel_width = 32; 168 qi->deinterleave = 2; 169 break; 170 } 171 fallthrough; 172 case INTEL_DRAM_LPDDR5: 173 qi->t_bl = 16; 174 qi->max_numchannels = 8; 175 qi->channel_width = 16; 176 qi->deinterleave = is_y_tile ? 2 : 4; 177 break; 178 default: 179 qi->t_bl = 16; 180 qi->max_numchannels = 1; 181 break; 182 } 183 else if (DISPLAY_VER(dev_priv) == 11) { 184 qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8; 185 qi->max_numchannels = 1; 186 } 187 188 if (drm_WARN_ON(&dev_priv->drm, 189 qi->num_points > ARRAY_SIZE(qi->points))) 190 qi->num_points = ARRAY_SIZE(qi->points); 191 192 for (i = 0; i < qi->num_points; i++) { 193 struct intel_qgv_point *sp = &qi->points[i]; 194 195 if (IS_DG1(dev_priv)) 196 ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i); 197 else 198 ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i); 199 200 if (ret) 201 return ret; 202 203 drm_dbg_kms(&dev_priv->drm, 204 "QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n", 205 i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras, 206 sp->t_rcd, sp->t_rc); 207 } 208 209 if (qi->num_psf_points > 0) { 210 ret = adls_pcode_read_psf_gv_point_info(dev_priv, qi->psf_points); 211 if (ret) { 212 drm_err(&dev_priv->drm, "Failed to read PSF point data; PSF points will not be considered in bandwidth calculations.\n"); 213 qi->num_psf_points = 0; 214 } 215 216 for (i = 0; i < qi->num_psf_points; i++) 217 drm_dbg_kms(&dev_priv->drm, 218 "PSF GV %d: CLK=%d \n", 219 i, qi->psf_points[i].clk); 220 } 221 222 return 0; 223 } 224 225 static int adl_calc_psf_bw(int clk) 226 { 227 /* 228 * clk is multiples of 16.666MHz (100/6) 229 * According to BSpec PSF GV bandwidth is 230 * calculated as BW = 64 * clk * 16.666Mhz 231 */ 232 return DIV_ROUND_CLOSEST(64 * clk * 100, 6); 233 } 234 235 static int icl_sagv_max_dclk(const struct intel_qgv_info *qi) 236 { 237 u16 dclk = 0; 238 int i; 239 240 for (i = 0; i < qi->num_points; i++) 241 dclk = max(dclk, qi->points[i].dclk); 242 243 return dclk; 244 } 245 246 struct intel_sa_info { 247 u16 displayrtids; 248 u8 deburst, deprogbwlimit, derating; 249 }; 250 251 static const struct intel_sa_info icl_sa_info = { 252 .deburst = 8, 253 .deprogbwlimit = 25, /* GB/s */ 254 .displayrtids = 128, 255 .derating = 10, 256 }; 257 258 static const struct intel_sa_info tgl_sa_info = { 259 .deburst = 16, 260 .deprogbwlimit = 34, /* GB/s */ 261 .displayrtids = 256, 262 .derating = 10, 263 }; 264 265 static const struct intel_sa_info rkl_sa_info = { 266 .deburst = 8, 267 .deprogbwlimit = 20, /* GB/s */ 268 .displayrtids = 128, 269 .derating = 10, 270 }; 271 272 static const struct intel_sa_info adls_sa_info = { 273 .deburst = 16, 274 .deprogbwlimit = 38, /* GB/s */ 275 .displayrtids = 256, 276 .derating = 10, 277 }; 278 279 static const struct intel_sa_info adlp_sa_info = { 280 .deburst = 16, 281 .deprogbwlimit = 38, /* GB/s */ 282 .displayrtids = 256, 283 .derating = 20, 284 }; 285 286 static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) 287 { 288 struct intel_qgv_info qi = {}; 289 bool is_y_tile = true; /* assume y tile may be used */ 290 int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels); 291 int ipqdepth, ipqdepthpch = 16; 292 int dclk_max; 293 int maxdebw; 294 int num_groups = ARRAY_SIZE(dev_priv->max_bw); 295 int i, ret; 296 297 ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); 298 if (ret) { 299 drm_dbg_kms(&dev_priv->drm, 300 "Failed to get memory subsystem information, ignoring bandwidth limits"); 301 return ret; 302 } 303 304 dclk_max = icl_sagv_max_dclk(&qi); 305 maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10); 306 ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); 307 qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); 308 309 for (i = 0; i < num_groups; i++) { 310 struct intel_bw_info *bi = &dev_priv->max_bw[i]; 311 int clpchgroup; 312 int j; 313 314 clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i; 315 bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; 316 317 bi->num_qgv_points = qi.num_points; 318 bi->num_psf_gv_points = qi.num_psf_points; 319 320 for (j = 0; j < qi.num_points; j++) { 321 const struct intel_qgv_point *sp = &qi.points[j]; 322 int ct, bw; 323 324 /* 325 * Max row cycle time 326 * 327 * FIXME what is the logic behind the 328 * assumed burst length? 329 */ 330 ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd + 331 (clpchgroup - 1) * qi.t_bl + sp->t_rdpre); 332 bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct); 333 334 bi->deratedbw[j] = min(maxdebw, 335 bw * (100 - sa->derating) / 100); 336 337 drm_dbg_kms(&dev_priv->drm, 338 "BW%d / QGV %d: num_planes=%d deratedbw=%u\n", 339 i, j, bi->num_planes, bi->deratedbw[j]); 340 } 341 } 342 /* 343 * In case if SAGV is disabled in BIOS, we always get 1 344 * SAGV point, but we can't send PCode commands to restrict it 345 * as it will fail and pointless anyway. 346 */ 347 if (qi.num_points == 1) 348 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 349 else 350 dev_priv->sagv_status = I915_SAGV_ENABLED; 351 352 return 0; 353 } 354 355 static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) 356 { 357 struct intel_qgv_info qi = {}; 358 const struct dram_info *dram_info = &dev_priv->dram_info; 359 bool is_y_tile = true; /* assume y tile may be used */ 360 int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels); 361 int ipqdepth, ipqdepthpch = 16; 362 int dclk_max; 363 int maxdebw, peakbw; 364 int clperchgroup; 365 int num_groups = ARRAY_SIZE(dev_priv->max_bw); 366 int i, ret; 367 368 ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); 369 if (ret) { 370 drm_dbg_kms(&dev_priv->drm, 371 "Failed to get memory subsystem information, ignoring bandwidth limits"); 372 return ret; 373 } 374 375 if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5) 376 num_channels *= 2; 377 378 qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); 379 380 if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12) 381 qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1); 382 383 if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels) 384 drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels."); 385 if (qi.max_numchannels != 0) 386 num_channels = min_t(u8, num_channels, qi.max_numchannels); 387 388 dclk_max = icl_sagv_max_dclk(&qi); 389 390 peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) * dclk_max; 391 maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% */ 392 393 ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); 394 /* 395 * clperchgroup = 4kpagespermempage * clperchperblock, 396 * clperchperblock = 8 / num_channels * interleave 397 */ 398 clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * qi.deinterleave; 399 400 for (i = 0; i < num_groups; i++) { 401 struct intel_bw_info *bi = &dev_priv->max_bw[i]; 402 struct intel_bw_info *bi_next; 403 int clpchgroup; 404 int j; 405 406 if (i < num_groups - 1) 407 bi_next = &dev_priv->max_bw[i + 1]; 408 409 clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i; 410 411 if (i < num_groups - 1 && clpchgroup < clperchgroup) 412 bi_next->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; 413 else 414 bi_next->num_planes = 0; 415 416 bi->num_qgv_points = qi.num_points; 417 bi->num_psf_gv_points = qi.num_psf_points; 418 419 for (j = 0; j < qi.num_points; j++) { 420 const struct intel_qgv_point *sp = &qi.points[j]; 421 int ct, bw; 422 423 /* 424 * Max row cycle time 425 * 426 * FIXME what is the logic behind the 427 * assumed burst length? 428 */ 429 ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd + 430 (clpchgroup - 1) * qi.t_bl + sp->t_rdpre); 431 bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct); 432 433 bi->deratedbw[j] = min(maxdebw, 434 bw * (100 - sa->derating) / 100); 435 436 drm_dbg_kms(&dev_priv->drm, 437 "BW%d / QGV %d: num_planes=%d deratedbw=%u\n", 438 i, j, bi->num_planes, bi->deratedbw[j]); 439 } 440 441 for (j = 0; j < qi.num_psf_points; j++) { 442 const struct intel_psf_gv_point *sp = &qi.psf_points[j]; 443 444 bi->psf_bw[j] = adl_calc_psf_bw(sp->clk); 445 446 drm_dbg_kms(&dev_priv->drm, 447 "BW%d / PSF GV %d: num_planes=%d bw=%u\n", 448 i, j, bi->num_planes, bi->psf_bw[j]); 449 } 450 } 451 452 /* 453 * In case if SAGV is disabled in BIOS, we always get 1 454 * SAGV point, but we can't send PCode commands to restrict it 455 * as it will fail and pointless anyway. 456 */ 457 if (qi.num_points == 1) 458 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 459 else 460 dev_priv->sagv_status = I915_SAGV_ENABLED; 461 462 return 0; 463 } 464 465 static void dg2_get_bw_info(struct drm_i915_private *i915) 466 { 467 struct intel_bw_info *bi = &i915->max_bw[0]; 468 469 /* 470 * DG2 doesn't have SAGV or QGV points, just a constant max bandwidth 471 * that doesn't depend on the number of planes enabled. Create a 472 * single dummy QGV point to reflect that. DG2-G10 platforms have a 473 * constant 50 GB/s bandwidth, whereas DG2-G11 platforms have 38 GB/s. 474 */ 475 bi->num_planes = 1; 476 bi->num_qgv_points = 1; 477 if (IS_DG2_G11(i915)) 478 bi->deratedbw[0] = 38000; 479 else 480 bi->deratedbw[0] = 50000; 481 482 i915->sagv_status = I915_SAGV_NOT_CONTROLLED; 483 } 484 485 static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, 486 int num_planes, int qgv_point) 487 { 488 int i; 489 490 /* 491 * Let's return max bw for 0 planes 492 */ 493 num_planes = max(1, num_planes); 494 495 for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { 496 const struct intel_bw_info *bi = 497 &dev_priv->max_bw[i]; 498 499 /* 500 * Pcode will not expose all QGV points when 501 * SAGV is forced to off/min/med/max. 502 */ 503 if (qgv_point >= bi->num_qgv_points) 504 return UINT_MAX; 505 506 if (num_planes >= bi->num_planes) 507 return bi->deratedbw[qgv_point]; 508 } 509 510 return 0; 511 } 512 513 static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv, 514 int num_planes, int qgv_point) 515 { 516 int i; 517 518 /* 519 * Let's return max bw for 0 planes 520 */ 521 num_planes = max(1, num_planes); 522 523 for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) { 524 const struct intel_bw_info *bi = 525 &dev_priv->max_bw[i]; 526 527 /* 528 * Pcode will not expose all QGV points when 529 * SAGV is forced to off/min/med/max. 530 */ 531 if (qgv_point >= bi->num_qgv_points) 532 return UINT_MAX; 533 534 if (num_planes <= bi->num_planes) 535 return bi->deratedbw[qgv_point]; 536 } 537 538 return dev_priv->max_bw[0].deratedbw[qgv_point]; 539 } 540 541 static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv, 542 int psf_gv_point) 543 { 544 const struct intel_bw_info *bi = 545 &dev_priv->max_bw[0]; 546 547 return bi->psf_bw[psf_gv_point]; 548 } 549 550 void intel_bw_init_hw(struct drm_i915_private *dev_priv) 551 { 552 if (!HAS_DISPLAY(dev_priv)) 553 return; 554 555 if (IS_DG2(dev_priv)) 556 dg2_get_bw_info(dev_priv); 557 else if (IS_ALDERLAKE_P(dev_priv)) 558 tgl_get_bw_info(dev_priv, &adlp_sa_info); 559 else if (IS_ALDERLAKE_S(dev_priv)) 560 tgl_get_bw_info(dev_priv, &adls_sa_info); 561 else if (IS_ROCKETLAKE(dev_priv)) 562 tgl_get_bw_info(dev_priv, &rkl_sa_info); 563 else if (DISPLAY_VER(dev_priv) == 12) 564 tgl_get_bw_info(dev_priv, &tgl_sa_info); 565 else if (DISPLAY_VER(dev_priv) == 11) 566 icl_get_bw_info(dev_priv, &icl_sa_info); 567 } 568 569 static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state) 570 { 571 /* 572 * We assume cursors are small enough 573 * to not not cause bandwidth problems. 574 */ 575 return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); 576 } 577 578 static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state) 579 { 580 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); 581 unsigned int data_rate = 0; 582 enum plane_id plane_id; 583 584 for_each_plane_id_on_crtc(crtc, plane_id) { 585 /* 586 * We assume cursors are small enough 587 * to not not cause bandwidth problems. 588 */ 589 if (plane_id == PLANE_CURSOR) 590 continue; 591 592 data_rate += crtc_state->data_rate[plane_id]; 593 } 594 595 return data_rate; 596 } 597 598 void intel_bw_crtc_update(struct intel_bw_state *bw_state, 599 const struct intel_crtc_state *crtc_state) 600 { 601 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); 602 struct drm_i915_private *i915 = to_i915(crtc->base.dev); 603 604 bw_state->data_rate[crtc->pipe] = 605 intel_bw_crtc_data_rate(crtc_state); 606 bw_state->num_active_planes[crtc->pipe] = 607 intel_bw_crtc_num_active_planes(crtc_state); 608 609 drm_dbg_kms(&i915->drm, "pipe %c data rate %u num active planes %u\n", 610 pipe_name(crtc->pipe), 611 bw_state->data_rate[crtc->pipe], 612 bw_state->num_active_planes[crtc->pipe]); 613 } 614 615 static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv, 616 const struct intel_bw_state *bw_state) 617 { 618 unsigned int num_active_planes = 0; 619 enum pipe pipe; 620 621 for_each_pipe(dev_priv, pipe) 622 num_active_planes += bw_state->num_active_planes[pipe]; 623 624 return num_active_planes; 625 } 626 627 static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv, 628 const struct intel_bw_state *bw_state) 629 { 630 unsigned int data_rate = 0; 631 enum pipe pipe; 632 633 for_each_pipe(dev_priv, pipe) 634 data_rate += bw_state->data_rate[pipe]; 635 636 if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active(dev_priv)) 637 data_rate = data_rate * 105 / 100; 638 639 return data_rate; 640 } 641 642 struct intel_bw_state * 643 intel_atomic_get_old_bw_state(struct intel_atomic_state *state) 644 { 645 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 646 struct intel_global_state *bw_state; 647 648 bw_state = intel_atomic_get_old_global_obj_state(state, &dev_priv->bw_obj); 649 650 return to_intel_bw_state(bw_state); 651 } 652 653 struct intel_bw_state * 654 intel_atomic_get_new_bw_state(struct intel_atomic_state *state) 655 { 656 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 657 struct intel_global_state *bw_state; 658 659 bw_state = intel_atomic_get_new_global_obj_state(state, &dev_priv->bw_obj); 660 661 return to_intel_bw_state(bw_state); 662 } 663 664 struct intel_bw_state * 665 intel_atomic_get_bw_state(struct intel_atomic_state *state) 666 { 667 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 668 struct intel_global_state *bw_state; 669 670 bw_state = intel_atomic_get_global_obj_state(state, &dev_priv->bw_obj); 671 if (IS_ERR(bw_state)) 672 return ERR_CAST(bw_state); 673 674 return to_intel_bw_state(bw_state); 675 } 676 677 static void skl_crtc_calc_dbuf_bw(struct intel_bw_state *bw_state, 678 const struct intel_crtc_state *crtc_state) 679 { 680 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); 681 struct drm_i915_private *i915 = to_i915(crtc->base.dev); 682 struct intel_dbuf_bw *crtc_bw = &bw_state->dbuf_bw[crtc->pipe]; 683 enum plane_id plane_id; 684 685 memset(&crtc_bw->used_bw, 0, sizeof(crtc_bw->used_bw)); 686 687 if (!crtc_state->hw.active) 688 return; 689 690 for_each_plane_id_on_crtc(crtc, plane_id) { 691 const struct skl_ddb_entry *ddb_y = 692 &crtc_state->wm.skl.plane_ddb_y[plane_id]; 693 const struct skl_ddb_entry *ddb_uv = 694 &crtc_state->wm.skl.plane_ddb_uv[plane_id]; 695 unsigned int data_rate = crtc_state->data_rate[plane_id]; 696 unsigned int dbuf_mask = 0; 697 enum dbuf_slice slice; 698 699 dbuf_mask |= skl_ddb_dbuf_slice_mask(i915, ddb_y); 700 dbuf_mask |= skl_ddb_dbuf_slice_mask(i915, ddb_uv); 701 702 /* 703 * FIXME: To calculate that more properly we probably 704 * need to split per plane data_rate into data_rate_y 705 * and data_rate_uv for multiplanar formats in order not 706 * to get accounted those twice if they happen to reside 707 * on different slices. 708 * However for pre-icl this would work anyway because 709 * we have only single slice and for icl+ uv plane has 710 * non-zero data rate. 711 * So in worst case those calculation are a bit 712 * pessimistic, which shouldn't pose any significant 713 * problem anyway. 714 */ 715 for_each_dbuf_slice_in_mask(i915, slice, dbuf_mask) 716 crtc_bw->used_bw[slice] += data_rate; 717 } 718 } 719 720 int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) 721 { 722 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 723 struct intel_bw_state *new_bw_state = NULL; 724 struct intel_bw_state *old_bw_state = NULL; 725 const struct intel_crtc_state *crtc_state; 726 struct intel_crtc *crtc; 727 int max_bw = 0; 728 enum pipe pipe; 729 int i; 730 731 for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { 732 new_bw_state = intel_atomic_get_bw_state(state); 733 if (IS_ERR(new_bw_state)) 734 return PTR_ERR(new_bw_state); 735 736 old_bw_state = intel_atomic_get_old_bw_state(state); 737 738 skl_crtc_calc_dbuf_bw(new_bw_state, crtc_state); 739 } 740 741 if (!old_bw_state) 742 return 0; 743 744 for_each_pipe(dev_priv, pipe) { 745 struct intel_dbuf_bw *crtc_bw; 746 enum dbuf_slice slice; 747 748 crtc_bw = &new_bw_state->dbuf_bw[pipe]; 749 750 for_each_dbuf_slice(dev_priv, slice) { 751 /* 752 * Current experimental observations show that contrary 753 * to BSpec we get underruns once we exceed 64 * CDCLK 754 * for slices in total. 755 * As a temporary measure in order not to keep CDCLK 756 * bumped up all the time we calculate CDCLK according 757 * to this formula for overall bw consumed by slices. 758 */ 759 max_bw += crtc_bw->used_bw[slice]; 760 } 761 } 762 763 new_bw_state->min_cdclk = max_bw / 64; 764 765 if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) { 766 int ret = intel_atomic_lock_global_state(&new_bw_state->base); 767 768 if (ret) 769 return ret; 770 } 771 772 return 0; 773 } 774 775 int intel_bw_calc_min_cdclk(struct intel_atomic_state *state) 776 { 777 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 778 struct intel_bw_state *new_bw_state = NULL; 779 struct intel_bw_state *old_bw_state = NULL; 780 const struct intel_crtc_state *crtc_state; 781 struct intel_crtc *crtc; 782 int min_cdclk = 0; 783 enum pipe pipe; 784 int i; 785 786 for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { 787 new_bw_state = intel_atomic_get_bw_state(state); 788 if (IS_ERR(new_bw_state)) 789 return PTR_ERR(new_bw_state); 790 791 old_bw_state = intel_atomic_get_old_bw_state(state); 792 } 793 794 if (!old_bw_state) 795 return 0; 796 797 for_each_pipe(dev_priv, pipe) { 798 struct intel_cdclk_state *cdclk_state; 799 800 cdclk_state = intel_atomic_get_new_cdclk_state(state); 801 if (!cdclk_state) 802 return 0; 803 804 min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk); 805 } 806 807 new_bw_state->min_cdclk = min_cdclk; 808 809 if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) { 810 int ret = intel_atomic_lock_global_state(&new_bw_state->base); 811 812 if (ret) 813 return ret; 814 } 815 816 return 0; 817 } 818 819 static u16 icl_qgv_points_mask(struct drm_i915_private *i915) 820 { 821 unsigned int num_psf_gv_points = i915->max_bw[0].num_psf_gv_points; 822 unsigned int num_qgv_points = i915->max_bw[0].num_qgv_points; 823 u16 mask = 0; 824 825 /* 826 * We can _not_ use the whole ADLS_QGV_PT_MASK here, as PCode rejects 827 * it with failure if we try masking any unadvertised points. 828 * So need to operate only with those returned from PCode. 829 */ 830 if (num_qgv_points > 0) 831 mask |= REG_GENMASK(num_qgv_points - 1, 0); 832 833 if (num_psf_gv_points > 0) 834 mask |= REG_GENMASK(num_psf_gv_points - 1, 0) << ADLS_PSF_PT_SHIFT; 835 836 return mask; 837 } 838 839 static int intel_bw_check_data_rate(struct intel_atomic_state *state, bool *changed) 840 { 841 struct drm_i915_private *i915 = to_i915(state->base.dev); 842 const struct intel_crtc_state *new_crtc_state, *old_crtc_state; 843 struct intel_crtc *crtc; 844 int i; 845 846 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, 847 new_crtc_state, i) { 848 unsigned int old_data_rate = 849 intel_bw_crtc_data_rate(old_crtc_state); 850 unsigned int new_data_rate = 851 intel_bw_crtc_data_rate(new_crtc_state); 852 unsigned int old_active_planes = 853 intel_bw_crtc_num_active_planes(old_crtc_state); 854 unsigned int new_active_planes = 855 intel_bw_crtc_num_active_planes(new_crtc_state); 856 struct intel_bw_state *new_bw_state; 857 858 /* 859 * Avoid locking the bw state when 860 * nothing significant has changed. 861 */ 862 if (old_data_rate == new_data_rate && 863 old_active_planes == new_active_planes) 864 continue; 865 866 new_bw_state = intel_atomic_get_bw_state(state); 867 if (IS_ERR(new_bw_state)) 868 return PTR_ERR(new_bw_state); 869 870 new_bw_state->data_rate[crtc->pipe] = new_data_rate; 871 new_bw_state->num_active_planes[crtc->pipe] = new_active_planes; 872 873 *changed = true; 874 875 drm_dbg_kms(&i915->drm, 876 "[CRTC:%d:%s] data rate %u num active planes %u\n", 877 crtc->base.base.id, crtc->base.name, 878 new_bw_state->data_rate[crtc->pipe], 879 new_bw_state->num_active_planes[crtc->pipe]); 880 } 881 882 return 0; 883 } 884 885 int intel_bw_atomic_check(struct intel_atomic_state *state) 886 { 887 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 888 const struct intel_bw_state *old_bw_state; 889 struct intel_bw_state *new_bw_state; 890 unsigned int data_rate; 891 unsigned int num_active_planes; 892 int i, ret; 893 u32 allowed_points = 0; 894 unsigned int max_bw_point = 0, max_bw = 0; 895 unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points; 896 unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points; 897 bool changed = false; 898 899 /* FIXME earlier gens need some checks too */ 900 if (DISPLAY_VER(dev_priv) < 11) 901 return 0; 902 903 ret = intel_bw_check_data_rate(state, &changed); 904 if (ret) 905 return ret; 906 907 old_bw_state = intel_atomic_get_old_bw_state(state); 908 new_bw_state = intel_atomic_get_new_bw_state(state); 909 910 if (new_bw_state && 911 intel_can_enable_sagv(dev_priv, old_bw_state) != 912 intel_can_enable_sagv(dev_priv, new_bw_state)) 913 changed = true; 914 915 /* 916 * If none of our inputs (data rates, number of active 917 * planes, SAGV yes/no) changed then nothing to do here. 918 */ 919 if (!changed) 920 return 0; 921 922 ret = intel_atomic_lock_global_state(&new_bw_state->base); 923 if (ret) 924 return ret; 925 926 data_rate = intel_bw_data_rate(dev_priv, new_bw_state); 927 data_rate = DIV_ROUND_UP(data_rate, 1000); 928 929 num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state); 930 931 for (i = 0; i < num_qgv_points; i++) { 932 unsigned int max_data_rate; 933 934 if (DISPLAY_VER(dev_priv) > 11) 935 max_data_rate = tgl_max_bw(dev_priv, num_active_planes, i); 936 else 937 max_data_rate = icl_max_bw(dev_priv, num_active_planes, i); 938 /* 939 * We need to know which qgv point gives us 940 * maximum bandwidth in order to disable SAGV 941 * if we find that we exceed SAGV block time 942 * with watermarks. By that moment we already 943 * have those, as it is calculated earlier in 944 * intel_atomic_check, 945 */ 946 if (max_data_rate > max_bw) { 947 max_bw_point = i; 948 max_bw = max_data_rate; 949 } 950 if (max_data_rate >= data_rate) 951 allowed_points |= REG_FIELD_PREP(ADLS_QGV_PT_MASK, BIT(i)); 952 953 drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n", 954 i, max_data_rate, data_rate); 955 } 956 957 for (i = 0; i < num_psf_gv_points; i++) { 958 unsigned int max_data_rate = adl_psf_bw(dev_priv, i); 959 960 if (max_data_rate >= data_rate) 961 allowed_points |= REG_FIELD_PREP(ADLS_PSF_PT_MASK, BIT(i)); 962 963 drm_dbg_kms(&dev_priv->drm, "PSF GV point %d: max bw %d" 964 " required %d\n", 965 i, max_data_rate, data_rate); 966 } 967 968 /* 969 * BSpec states that we always should have at least one allowed point 970 * left, so if we couldn't - simply reject the configuration for obvious 971 * reasons. 972 */ 973 if ((allowed_points & ADLS_QGV_PT_MASK) == 0) { 974 drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory" 975 " bandwidth %d for display configuration(%d active planes).\n", 976 data_rate, num_active_planes); 977 return -EINVAL; 978 } 979 980 if (num_psf_gv_points > 0) { 981 if ((allowed_points & ADLS_PSF_PT_MASK) == 0) { 982 drm_dbg_kms(&dev_priv->drm, "No PSF GV points provide sufficient memory" 983 " bandwidth %d for display configuration(%d active planes).\n", 984 data_rate, num_active_planes); 985 return -EINVAL; 986 } 987 } 988 989 /* 990 * Leave only single point with highest bandwidth, if 991 * we can't enable SAGV due to the increased memory latency it may 992 * cause. 993 */ 994 if (!intel_can_enable_sagv(dev_priv, new_bw_state)) { 995 allowed_points &= ADLS_PSF_PT_MASK; 996 allowed_points |= BIT(max_bw_point); 997 drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n", 998 max_bw_point); 999 } 1000 /* 1001 * We store the ones which need to be masked as that is what PCode 1002 * actually accepts as a parameter. 1003 */ 1004 new_bw_state->qgv_points_mask = ~allowed_points & 1005 icl_qgv_points_mask(dev_priv); 1006 1007 /* 1008 * If the actual mask had changed we need to make sure that 1009 * the commits are serialized(in case this is a nomodeset, nonblocking) 1010 */ 1011 if (new_bw_state->qgv_points_mask != old_bw_state->qgv_points_mask) { 1012 ret = intel_atomic_serialize_global_state(&new_bw_state->base); 1013 if (ret) 1014 return ret; 1015 } 1016 1017 return 0; 1018 } 1019 1020 static struct intel_global_state * 1021 intel_bw_duplicate_state(struct intel_global_obj *obj) 1022 { 1023 struct intel_bw_state *state; 1024 1025 state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL); 1026 if (!state) 1027 return NULL; 1028 1029 return &state->base; 1030 } 1031 1032 static void intel_bw_destroy_state(struct intel_global_obj *obj, 1033 struct intel_global_state *state) 1034 { 1035 kfree(state); 1036 } 1037 1038 static const struct intel_global_state_funcs intel_bw_funcs = { 1039 .atomic_duplicate_state = intel_bw_duplicate_state, 1040 .atomic_destroy_state = intel_bw_destroy_state, 1041 }; 1042 1043 int intel_bw_init(struct drm_i915_private *dev_priv) 1044 { 1045 struct intel_bw_state *state; 1046 1047 state = kzalloc(sizeof(*state), GFP_KERNEL); 1048 if (!state) 1049 return -ENOMEM; 1050 1051 intel_atomic_global_obj_init(dev_priv, &dev_priv->bw_obj, 1052 &state->base, &intel_bw_funcs); 1053 1054 return 0; 1055 } 1056