1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright © 2019 Intel Corporation 4 */ 5 6 #include <drm/drm_atomic_state_helper.h> 7 8 #include "intel_atomic.h" 9 #include "intel_bw.h" 10 #include "intel_cdclk.h" 11 #include "intel_display_types.h" 12 #include "intel_pcode.h" 13 #include "intel_pm.h" 14 15 /* Parameters for Qclk Geyserville (QGV) */ 16 struct intel_qgv_point { 17 u16 dclk, t_rp, t_rdpre, t_rc, t_ras, t_rcd; 18 }; 19 20 struct intel_psf_gv_point { 21 u8 clk; /* clock in multiples of 16.6666 MHz */ 22 }; 23 24 struct intel_qgv_info { 25 struct intel_qgv_point points[I915_NUM_QGV_POINTS]; 26 struct intel_psf_gv_point psf_points[I915_NUM_PSF_GV_POINTS]; 27 u8 num_points; 28 u8 num_psf_points; 29 u8 t_bl; 30 u8 max_numchannels; 31 u8 channel_width; 32 u8 deinterleave; 33 }; 34 35 static int dg1_mchbar_read_qgv_point_info(struct drm_i915_private *dev_priv, 36 struct intel_qgv_point *sp, 37 int point) 38 { 39 u32 dclk_ratio, dclk_reference; 40 u32 val; 41 42 val = intel_uncore_read(&dev_priv->uncore, SA_PERF_STATUS_0_0_0_MCHBAR_PC); 43 dclk_ratio = REG_FIELD_GET(DG1_QCLK_RATIO_MASK, val); 44 if (val & DG1_QCLK_REFERENCE) 45 dclk_reference = 6; /* 6 * 16.666 MHz = 100 MHz */ 46 else 47 dclk_reference = 8; /* 8 * 16.666 MHz = 133 MHz */ 48 sp->dclk = DIV_ROUND_UP((16667 * dclk_ratio * dclk_reference) + 500, 1000); 49 50 val = intel_uncore_read(&dev_priv->uncore, SKL_MC_BIOS_DATA_0_0_0_MCHBAR_PCU); 51 if (val & DG1_GEAR_TYPE) 52 sp->dclk *= 2; 53 54 if (sp->dclk == 0) 55 return -EINVAL; 56 57 val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR); 58 sp->t_rp = REG_FIELD_GET(DG1_DRAM_T_RP_MASK, val); 59 sp->t_rdpre = REG_FIELD_GET(DG1_DRAM_T_RDPRE_MASK, val); 60 61 val = intel_uncore_read(&dev_priv->uncore, MCHBAR_CH0_CR_TC_PRE_0_0_0_MCHBAR_HIGH); 62 sp->t_rcd = REG_FIELD_GET(DG1_DRAM_T_RCD_MASK, val); 63 sp->t_ras = REG_FIELD_GET(DG1_DRAM_T_RAS_MASK, val); 64 65 sp->t_rc = sp->t_rp + sp->t_ras; 66 67 return 0; 68 } 69 70 static int icl_pcode_read_qgv_point_info(struct drm_i915_private *dev_priv, 71 struct intel_qgv_point *sp, 72 int point) 73 { 74 u32 val = 0, val2 = 0; 75 u16 dclk; 76 int ret; 77 78 ret = sandybridge_pcode_read(dev_priv, 79 ICL_PCODE_MEM_SUBSYSYSTEM_INFO | 80 ICL_PCODE_MEM_SS_READ_QGV_POINT_INFO(point), 81 &val, &val2); 82 if (ret) 83 return ret; 84 85 dclk = val & 0xffff; 86 sp->dclk = DIV_ROUND_UP((16667 * dclk) + (DISPLAY_VER(dev_priv) > 11 ? 500 : 0), 1000); 87 sp->t_rp = (val & 0xff0000) >> 16; 88 sp->t_rcd = (val & 0xff000000) >> 24; 89 90 sp->t_rdpre = val2 & 0xff; 91 sp->t_ras = (val2 & 0xff00) >> 8; 92 93 sp->t_rc = sp->t_rp + sp->t_ras; 94 95 return 0; 96 } 97 98 static int adls_pcode_read_psf_gv_point_info(struct drm_i915_private *dev_priv, 99 struct intel_psf_gv_point *points) 100 { 101 u32 val = 0; 102 int ret; 103 int i; 104 105 ret = sandybridge_pcode_read(dev_priv, 106 ICL_PCODE_MEM_SUBSYSYSTEM_INFO | 107 ADL_PCODE_MEM_SS_READ_PSF_GV_INFO, 108 &val, NULL); 109 if (ret) 110 return ret; 111 112 for (i = 0; i < I915_NUM_PSF_GV_POINTS; i++) { 113 points[i].clk = val & 0xff; 114 val >>= 8; 115 } 116 117 return 0; 118 } 119 120 int icl_pcode_restrict_qgv_points(struct drm_i915_private *dev_priv, 121 u32 points_mask) 122 { 123 int ret; 124 125 /* bspec says to keep retrying for at least 1 ms */ 126 ret = skl_pcode_request(dev_priv, ICL_PCODE_SAGV_DE_MEM_SS_CONFIG, 127 points_mask, 128 ICL_PCODE_POINTS_RESTRICTED_MASK, 129 ICL_PCODE_POINTS_RESTRICTED, 130 1); 131 132 if (ret < 0) { 133 drm_err(&dev_priv->drm, "Failed to disable qgv points (%d) points: 0x%x\n", ret, points_mask); 134 return ret; 135 } 136 137 return 0; 138 } 139 140 static int icl_get_qgv_points(struct drm_i915_private *dev_priv, 141 struct intel_qgv_info *qi, 142 bool is_y_tile) 143 { 144 const struct dram_info *dram_info = &dev_priv->dram_info; 145 int i, ret; 146 147 qi->num_points = dram_info->num_qgv_points; 148 qi->num_psf_points = dram_info->num_psf_gv_points; 149 150 if (DISPLAY_VER(dev_priv) >= 12) 151 switch (dram_info->type) { 152 case INTEL_DRAM_DDR4: 153 qi->t_bl = is_y_tile ? 8 : 4; 154 qi->max_numchannels = 2; 155 qi->channel_width = 64; 156 qi->deinterleave = is_y_tile ? 1 : 2; 157 break; 158 case INTEL_DRAM_DDR5: 159 qi->t_bl = is_y_tile ? 16 : 8; 160 qi->max_numchannels = 4; 161 qi->channel_width = 32; 162 qi->deinterleave = is_y_tile ? 1 : 2; 163 break; 164 case INTEL_DRAM_LPDDR4: 165 if (IS_ROCKETLAKE(dev_priv)) { 166 qi->t_bl = 8; 167 qi->max_numchannels = 4; 168 qi->channel_width = 32; 169 qi->deinterleave = 2; 170 break; 171 } 172 fallthrough; 173 case INTEL_DRAM_LPDDR5: 174 qi->t_bl = 16; 175 qi->max_numchannels = 8; 176 qi->channel_width = 16; 177 qi->deinterleave = is_y_tile ? 2 : 4; 178 break; 179 default: 180 qi->t_bl = 16; 181 qi->max_numchannels = 1; 182 break; 183 } 184 else if (DISPLAY_VER(dev_priv) == 11) { 185 qi->t_bl = dev_priv->dram_info.type == INTEL_DRAM_DDR4 ? 4 : 8; 186 qi->max_numchannels = 1; 187 } 188 189 if (drm_WARN_ON(&dev_priv->drm, 190 qi->num_points > ARRAY_SIZE(qi->points))) 191 qi->num_points = ARRAY_SIZE(qi->points); 192 193 for (i = 0; i < qi->num_points; i++) { 194 struct intel_qgv_point *sp = &qi->points[i]; 195 196 if (IS_DG1(dev_priv)) 197 ret = dg1_mchbar_read_qgv_point_info(dev_priv, sp, i); 198 else 199 ret = icl_pcode_read_qgv_point_info(dev_priv, sp, i); 200 201 if (ret) 202 return ret; 203 204 drm_dbg_kms(&dev_priv->drm, 205 "QGV %d: DCLK=%d tRP=%d tRDPRE=%d tRAS=%d tRCD=%d tRC=%d\n", 206 i, sp->dclk, sp->t_rp, sp->t_rdpre, sp->t_ras, 207 sp->t_rcd, sp->t_rc); 208 } 209 210 if (qi->num_psf_points > 0) { 211 ret = adls_pcode_read_psf_gv_point_info(dev_priv, qi->psf_points); 212 if (ret) { 213 drm_err(&dev_priv->drm, "Failed to read PSF point data; PSF points will not be considered in bandwidth calculations.\n"); 214 qi->num_psf_points = 0; 215 } 216 217 for (i = 0; i < qi->num_psf_points; i++) 218 drm_dbg_kms(&dev_priv->drm, 219 "PSF GV %d: CLK=%d \n", 220 i, qi->psf_points[i].clk); 221 } 222 223 return 0; 224 } 225 226 static int adl_calc_psf_bw(int clk) 227 { 228 /* 229 * clk is multiples of 16.666MHz (100/6) 230 * According to BSpec PSF GV bandwidth is 231 * calculated as BW = 64 * clk * 16.666Mhz 232 */ 233 return DIV_ROUND_CLOSEST(64 * clk * 100, 6); 234 } 235 236 static int icl_sagv_max_dclk(const struct intel_qgv_info *qi) 237 { 238 u16 dclk = 0; 239 int i; 240 241 for (i = 0; i < qi->num_points; i++) 242 dclk = max(dclk, qi->points[i].dclk); 243 244 return dclk; 245 } 246 247 struct intel_sa_info { 248 u16 displayrtids; 249 u8 deburst, deprogbwlimit, derating; 250 }; 251 252 static const struct intel_sa_info icl_sa_info = { 253 .deburst = 8, 254 .deprogbwlimit = 25, /* GB/s */ 255 .displayrtids = 128, 256 .derating = 10, 257 }; 258 259 static const struct intel_sa_info tgl_sa_info = { 260 .deburst = 16, 261 .deprogbwlimit = 34, /* GB/s */ 262 .displayrtids = 256, 263 .derating = 10, 264 }; 265 266 static const struct intel_sa_info rkl_sa_info = { 267 .deburst = 8, 268 .deprogbwlimit = 20, /* GB/s */ 269 .displayrtids = 128, 270 .derating = 10, 271 }; 272 273 static const struct intel_sa_info adls_sa_info = { 274 .deburst = 16, 275 .deprogbwlimit = 38, /* GB/s */ 276 .displayrtids = 256, 277 .derating = 10, 278 }; 279 280 static const struct intel_sa_info adlp_sa_info = { 281 .deburst = 16, 282 .deprogbwlimit = 38, /* GB/s */ 283 .displayrtids = 256, 284 .derating = 20, 285 }; 286 287 static int icl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) 288 { 289 struct intel_qgv_info qi = {}; 290 bool is_y_tile = true; /* assume y tile may be used */ 291 int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels); 292 int ipqdepth, ipqdepthpch = 16; 293 int dclk_max; 294 int maxdebw; 295 int num_groups = ARRAY_SIZE(dev_priv->max_bw); 296 int i, ret; 297 298 ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); 299 if (ret) { 300 drm_dbg_kms(&dev_priv->drm, 301 "Failed to get memory subsystem information, ignoring bandwidth limits"); 302 return ret; 303 } 304 305 dclk_max = icl_sagv_max_dclk(&qi); 306 maxdebw = min(sa->deprogbwlimit * 1000, dclk_max * 16 * 6 / 10); 307 ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); 308 qi.deinterleave = DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); 309 310 for (i = 0; i < num_groups; i++) { 311 struct intel_bw_info *bi = &dev_priv->max_bw[i]; 312 int clpchgroup; 313 int j; 314 315 clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i; 316 bi->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; 317 318 bi->num_qgv_points = qi.num_points; 319 bi->num_psf_gv_points = qi.num_psf_points; 320 321 for (j = 0; j < qi.num_points; j++) { 322 const struct intel_qgv_point *sp = &qi.points[j]; 323 int ct, bw; 324 325 /* 326 * Max row cycle time 327 * 328 * FIXME what is the logic behind the 329 * assumed burst length? 330 */ 331 ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd + 332 (clpchgroup - 1) * qi.t_bl + sp->t_rdpre); 333 bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct); 334 335 bi->deratedbw[j] = min(maxdebw, 336 bw * (100 - sa->derating) / 100); 337 338 drm_dbg_kms(&dev_priv->drm, 339 "BW%d / QGV %d: num_planes=%d deratedbw=%u\n", 340 i, j, bi->num_planes, bi->deratedbw[j]); 341 } 342 } 343 /* 344 * In case if SAGV is disabled in BIOS, we always get 1 345 * SAGV point, but we can't send PCode commands to restrict it 346 * as it will fail and pointless anyway. 347 */ 348 if (qi.num_points == 1) 349 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 350 else 351 dev_priv->sagv_status = I915_SAGV_ENABLED; 352 353 return 0; 354 } 355 356 static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel_sa_info *sa) 357 { 358 struct intel_qgv_info qi = {}; 359 const struct dram_info *dram_info = &dev_priv->dram_info; 360 bool is_y_tile = true; /* assume y tile may be used */ 361 int num_channels = max_t(u8, 1, dev_priv->dram_info.num_channels); 362 int ipqdepth, ipqdepthpch = 16; 363 int dclk_max; 364 int maxdebw, peakbw; 365 int clperchgroup; 366 int num_groups = ARRAY_SIZE(dev_priv->max_bw); 367 int i, ret; 368 369 ret = icl_get_qgv_points(dev_priv, &qi, is_y_tile); 370 if (ret) { 371 drm_dbg_kms(&dev_priv->drm, 372 "Failed to get memory subsystem information, ignoring bandwidth limits"); 373 return ret; 374 } 375 376 if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5) 377 num_channels *= 2; 378 379 qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2); 380 381 if (num_channels < qi.max_numchannels && DISPLAY_VER(dev_priv) >= 12) 382 qi.deinterleave = max(DIV_ROUND_UP(qi.deinterleave, 2), 1); 383 384 if (DISPLAY_VER(dev_priv) > 11 && num_channels > qi.max_numchannels) 385 drm_warn(&dev_priv->drm, "Number of channels exceeds max number of channels."); 386 if (qi.max_numchannels != 0) 387 num_channels = min_t(u8, num_channels, qi.max_numchannels); 388 389 dclk_max = icl_sagv_max_dclk(&qi); 390 391 peakbw = num_channels * DIV_ROUND_UP(qi.channel_width, 8) * dclk_max; 392 maxdebw = min(sa->deprogbwlimit * 1000, peakbw * 6 / 10); /* 60% */ 393 394 ipqdepth = min(ipqdepthpch, sa->displayrtids / num_channels); 395 /* 396 * clperchgroup = 4kpagespermempage * clperchperblock, 397 * clperchperblock = 8 / num_channels * interleave 398 */ 399 clperchgroup = 4 * DIV_ROUND_UP(8, num_channels) * qi.deinterleave; 400 401 for (i = 0; i < num_groups; i++) { 402 struct intel_bw_info *bi = &dev_priv->max_bw[i]; 403 struct intel_bw_info *bi_next; 404 int clpchgroup; 405 int j; 406 407 if (i < num_groups - 1) 408 bi_next = &dev_priv->max_bw[i + 1]; 409 410 clpchgroup = (sa->deburst * qi.deinterleave / num_channels) << i; 411 412 if (i < num_groups - 1 && clpchgroup < clperchgroup) 413 bi_next->num_planes = (ipqdepth - clpchgroup) / clpchgroup + 1; 414 else 415 bi_next->num_planes = 0; 416 417 bi->num_qgv_points = qi.num_points; 418 bi->num_psf_gv_points = qi.num_psf_points; 419 420 for (j = 0; j < qi.num_points; j++) { 421 const struct intel_qgv_point *sp = &qi.points[j]; 422 int ct, bw; 423 424 /* 425 * Max row cycle time 426 * 427 * FIXME what is the logic behind the 428 * assumed burst length? 429 */ 430 ct = max_t(int, sp->t_rc, sp->t_rp + sp->t_rcd + 431 (clpchgroup - 1) * qi.t_bl + sp->t_rdpre); 432 bw = DIV_ROUND_UP(sp->dclk * clpchgroup * 32 * num_channels, ct); 433 434 bi->deratedbw[j] = min(maxdebw, 435 bw * (100 - sa->derating) / 100); 436 437 drm_dbg_kms(&dev_priv->drm, 438 "BW%d / QGV %d: num_planes=%d deratedbw=%u\n", 439 i, j, bi->num_planes, bi->deratedbw[j]); 440 } 441 442 for (j = 0; j < qi.num_psf_points; j++) { 443 const struct intel_psf_gv_point *sp = &qi.psf_points[j]; 444 445 bi->psf_bw[j] = adl_calc_psf_bw(sp->clk); 446 447 drm_dbg_kms(&dev_priv->drm, 448 "BW%d / PSF GV %d: num_planes=%d bw=%u\n", 449 i, j, bi->num_planes, bi->psf_bw[j]); 450 } 451 } 452 453 /* 454 * In case if SAGV is disabled in BIOS, we always get 1 455 * SAGV point, but we can't send PCode commands to restrict it 456 * as it will fail and pointless anyway. 457 */ 458 if (qi.num_points == 1) 459 dev_priv->sagv_status = I915_SAGV_NOT_CONTROLLED; 460 else 461 dev_priv->sagv_status = I915_SAGV_ENABLED; 462 463 return 0; 464 } 465 466 static void dg2_get_bw_info(struct drm_i915_private *i915) 467 { 468 struct intel_bw_info *bi = &i915->max_bw[0]; 469 470 /* 471 * DG2 doesn't have SAGV or QGV points, just a constant max bandwidth 472 * that doesn't depend on the number of planes enabled. Create a 473 * single dummy QGV point to reflect that. DG2-G10 platforms have a 474 * constant 50 GB/s bandwidth, whereas DG2-G11 platforms have 38 GB/s. 475 */ 476 bi->num_planes = 1; 477 bi->num_qgv_points = 1; 478 if (IS_DG2_G11(i915)) 479 bi->deratedbw[0] = 38000; 480 else 481 bi->deratedbw[0] = 50000; 482 483 i915->sagv_status = I915_SAGV_NOT_CONTROLLED; 484 } 485 486 static unsigned int icl_max_bw(struct drm_i915_private *dev_priv, 487 int num_planes, int qgv_point) 488 { 489 int i; 490 491 /* 492 * Let's return max bw for 0 planes 493 */ 494 num_planes = max(1, num_planes); 495 496 for (i = 0; i < ARRAY_SIZE(dev_priv->max_bw); i++) { 497 const struct intel_bw_info *bi = 498 &dev_priv->max_bw[i]; 499 500 /* 501 * Pcode will not expose all QGV points when 502 * SAGV is forced to off/min/med/max. 503 */ 504 if (qgv_point >= bi->num_qgv_points) 505 return UINT_MAX; 506 507 if (num_planes >= bi->num_planes) 508 return bi->deratedbw[qgv_point]; 509 } 510 511 return 0; 512 } 513 514 static unsigned int tgl_max_bw(struct drm_i915_private *dev_priv, 515 int num_planes, int qgv_point) 516 { 517 int i; 518 519 /* 520 * Let's return max bw for 0 planes 521 */ 522 num_planes = max(1, num_planes); 523 524 for (i = ARRAY_SIZE(dev_priv->max_bw) - 1; i >= 0; i--) { 525 const struct intel_bw_info *bi = 526 &dev_priv->max_bw[i]; 527 528 /* 529 * Pcode will not expose all QGV points when 530 * SAGV is forced to off/min/med/max. 531 */ 532 if (qgv_point >= bi->num_qgv_points) 533 return UINT_MAX; 534 535 if (num_planes <= bi->num_planes) 536 return bi->deratedbw[qgv_point]; 537 } 538 539 return dev_priv->max_bw[0].deratedbw[qgv_point]; 540 } 541 542 static unsigned int adl_psf_bw(struct drm_i915_private *dev_priv, 543 int psf_gv_point) 544 { 545 const struct intel_bw_info *bi = 546 &dev_priv->max_bw[0]; 547 548 return bi->psf_bw[psf_gv_point]; 549 } 550 551 void intel_bw_init_hw(struct drm_i915_private *dev_priv) 552 { 553 if (!HAS_DISPLAY(dev_priv)) 554 return; 555 556 if (IS_DG2(dev_priv)) 557 dg2_get_bw_info(dev_priv); 558 else if (IS_ALDERLAKE_P(dev_priv)) 559 tgl_get_bw_info(dev_priv, &adlp_sa_info); 560 else if (IS_ALDERLAKE_S(dev_priv)) 561 tgl_get_bw_info(dev_priv, &adls_sa_info); 562 else if (IS_ROCKETLAKE(dev_priv)) 563 tgl_get_bw_info(dev_priv, &rkl_sa_info); 564 else if (DISPLAY_VER(dev_priv) == 12) 565 tgl_get_bw_info(dev_priv, &tgl_sa_info); 566 else if (DISPLAY_VER(dev_priv) == 11) 567 icl_get_bw_info(dev_priv, &icl_sa_info); 568 } 569 570 static unsigned int intel_bw_crtc_num_active_planes(const struct intel_crtc_state *crtc_state) 571 { 572 /* 573 * We assume cursors are small enough 574 * to not not cause bandwidth problems. 575 */ 576 return hweight8(crtc_state->active_planes & ~BIT(PLANE_CURSOR)); 577 } 578 579 static unsigned int intel_bw_crtc_data_rate(const struct intel_crtc_state *crtc_state) 580 { 581 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); 582 unsigned int data_rate = 0; 583 enum plane_id plane_id; 584 585 for_each_plane_id_on_crtc(crtc, plane_id) { 586 /* 587 * We assume cursors are small enough 588 * to not not cause bandwidth problems. 589 */ 590 if (plane_id == PLANE_CURSOR) 591 continue; 592 593 data_rate += crtc_state->data_rate[plane_id]; 594 } 595 596 return data_rate; 597 } 598 599 void intel_bw_crtc_update(struct intel_bw_state *bw_state, 600 const struct intel_crtc_state *crtc_state) 601 { 602 struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc); 603 struct drm_i915_private *i915 = to_i915(crtc->base.dev); 604 605 bw_state->data_rate[crtc->pipe] = 606 intel_bw_crtc_data_rate(crtc_state); 607 bw_state->num_active_planes[crtc->pipe] = 608 intel_bw_crtc_num_active_planes(crtc_state); 609 610 drm_dbg_kms(&i915->drm, "pipe %c data rate %u num active planes %u\n", 611 pipe_name(crtc->pipe), 612 bw_state->data_rate[crtc->pipe], 613 bw_state->num_active_planes[crtc->pipe]); 614 } 615 616 static unsigned int intel_bw_num_active_planes(struct drm_i915_private *dev_priv, 617 const struct intel_bw_state *bw_state) 618 { 619 unsigned int num_active_planes = 0; 620 enum pipe pipe; 621 622 for_each_pipe(dev_priv, pipe) 623 num_active_planes += bw_state->num_active_planes[pipe]; 624 625 return num_active_planes; 626 } 627 628 static unsigned int intel_bw_data_rate(struct drm_i915_private *dev_priv, 629 const struct intel_bw_state *bw_state) 630 { 631 unsigned int data_rate = 0; 632 enum pipe pipe; 633 634 for_each_pipe(dev_priv, pipe) 635 data_rate += bw_state->data_rate[pipe]; 636 637 if (DISPLAY_VER(dev_priv) >= 13 && intel_vtd_active(dev_priv)) 638 data_rate = data_rate * 105 / 100; 639 640 return data_rate; 641 } 642 643 struct intel_bw_state * 644 intel_atomic_get_old_bw_state(struct intel_atomic_state *state) 645 { 646 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 647 struct intel_global_state *bw_state; 648 649 bw_state = intel_atomic_get_old_global_obj_state(state, &dev_priv->bw_obj); 650 651 return to_intel_bw_state(bw_state); 652 } 653 654 struct intel_bw_state * 655 intel_atomic_get_new_bw_state(struct intel_atomic_state *state) 656 { 657 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 658 struct intel_global_state *bw_state; 659 660 bw_state = intel_atomic_get_new_global_obj_state(state, &dev_priv->bw_obj); 661 662 return to_intel_bw_state(bw_state); 663 } 664 665 struct intel_bw_state * 666 intel_atomic_get_bw_state(struct intel_atomic_state *state) 667 { 668 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 669 struct intel_global_state *bw_state; 670 671 bw_state = intel_atomic_get_global_obj_state(state, &dev_priv->bw_obj); 672 if (IS_ERR(bw_state)) 673 return ERR_CAST(bw_state); 674 675 return to_intel_bw_state(bw_state); 676 } 677 678 int skl_bw_calc_min_cdclk(struct intel_atomic_state *state) 679 { 680 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 681 struct intel_bw_state *new_bw_state = NULL; 682 struct intel_bw_state *old_bw_state = NULL; 683 const struct intel_crtc_state *crtc_state; 684 struct intel_crtc *crtc; 685 int max_bw = 0; 686 enum pipe pipe; 687 int i; 688 689 for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { 690 enum plane_id plane_id; 691 struct intel_dbuf_bw *crtc_bw; 692 693 new_bw_state = intel_atomic_get_bw_state(state); 694 if (IS_ERR(new_bw_state)) 695 return PTR_ERR(new_bw_state); 696 697 old_bw_state = intel_atomic_get_old_bw_state(state); 698 699 crtc_bw = &new_bw_state->dbuf_bw[crtc->pipe]; 700 701 memset(&crtc_bw->used_bw, 0, sizeof(crtc_bw->used_bw)); 702 703 if (!crtc_state->hw.active) 704 continue; 705 706 for_each_plane_id_on_crtc(crtc, plane_id) { 707 const struct skl_ddb_entry *plane_alloc = 708 &crtc_state->wm.skl.plane_ddb_y[plane_id]; 709 const struct skl_ddb_entry *uv_plane_alloc = 710 &crtc_state->wm.skl.plane_ddb_uv[plane_id]; 711 unsigned int data_rate = crtc_state->data_rate[plane_id]; 712 unsigned int dbuf_mask = 0; 713 enum dbuf_slice slice; 714 715 dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, plane_alloc); 716 dbuf_mask |= skl_ddb_dbuf_slice_mask(dev_priv, uv_plane_alloc); 717 718 /* 719 * FIXME: To calculate that more properly we probably 720 * need to to split per plane data_rate into data_rate_y 721 * and data_rate_uv for multiplanar formats in order not 722 * to get accounted those twice if they happen to reside 723 * on different slices. 724 * However for pre-icl this would work anyway because 725 * we have only single slice and for icl+ uv plane has 726 * non-zero data rate. 727 * So in worst case those calculation are a bit 728 * pessimistic, which shouldn't pose any significant 729 * problem anyway. 730 */ 731 for_each_dbuf_slice_in_mask(dev_priv, slice, dbuf_mask) 732 crtc_bw->used_bw[slice] += data_rate; 733 } 734 } 735 736 if (!old_bw_state) 737 return 0; 738 739 for_each_pipe(dev_priv, pipe) { 740 struct intel_dbuf_bw *crtc_bw; 741 enum dbuf_slice slice; 742 743 crtc_bw = &new_bw_state->dbuf_bw[pipe]; 744 745 for_each_dbuf_slice(dev_priv, slice) { 746 /* 747 * Current experimental observations show that contrary 748 * to BSpec we get underruns once we exceed 64 * CDCLK 749 * for slices in total. 750 * As a temporary measure in order not to keep CDCLK 751 * bumped up all the time we calculate CDCLK according 752 * to this formula for overall bw consumed by slices. 753 */ 754 max_bw += crtc_bw->used_bw[slice]; 755 } 756 } 757 758 new_bw_state->min_cdclk = max_bw / 64; 759 760 if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) { 761 int ret = intel_atomic_lock_global_state(&new_bw_state->base); 762 763 if (ret) 764 return ret; 765 } 766 767 return 0; 768 } 769 770 int intel_bw_calc_min_cdclk(struct intel_atomic_state *state) 771 { 772 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 773 struct intel_bw_state *new_bw_state = NULL; 774 struct intel_bw_state *old_bw_state = NULL; 775 const struct intel_crtc_state *crtc_state; 776 struct intel_crtc *crtc; 777 int min_cdclk = 0; 778 enum pipe pipe; 779 int i; 780 781 for_each_new_intel_crtc_in_state(state, crtc, crtc_state, i) { 782 new_bw_state = intel_atomic_get_bw_state(state); 783 if (IS_ERR(new_bw_state)) 784 return PTR_ERR(new_bw_state); 785 786 old_bw_state = intel_atomic_get_old_bw_state(state); 787 } 788 789 if (!old_bw_state) 790 return 0; 791 792 for_each_pipe(dev_priv, pipe) { 793 struct intel_cdclk_state *cdclk_state; 794 795 cdclk_state = intel_atomic_get_new_cdclk_state(state); 796 if (!cdclk_state) 797 return 0; 798 799 min_cdclk = max(cdclk_state->min_cdclk[pipe], min_cdclk); 800 } 801 802 new_bw_state->min_cdclk = min_cdclk; 803 804 if (new_bw_state->min_cdclk != old_bw_state->min_cdclk) { 805 int ret = intel_atomic_lock_global_state(&new_bw_state->base); 806 807 if (ret) 808 return ret; 809 } 810 811 return 0; 812 } 813 814 int intel_bw_atomic_check(struct intel_atomic_state *state) 815 { 816 struct drm_i915_private *dev_priv = to_i915(state->base.dev); 817 struct intel_crtc_state *new_crtc_state, *old_crtc_state; 818 struct intel_bw_state *new_bw_state = NULL; 819 const struct intel_bw_state *old_bw_state = NULL; 820 unsigned int data_rate; 821 unsigned int num_active_planes; 822 struct intel_crtc *crtc; 823 int i, ret; 824 u32 allowed_points = 0; 825 unsigned int max_bw_point = 0, max_bw = 0; 826 unsigned int num_qgv_points = dev_priv->max_bw[0].num_qgv_points; 827 unsigned int num_psf_gv_points = dev_priv->max_bw[0].num_psf_gv_points; 828 u32 mask = 0; 829 830 /* FIXME earlier gens need some checks too */ 831 if (DISPLAY_VER(dev_priv) < 11) 832 return 0; 833 834 /* 835 * We can _not_ use the whole ADLS_QGV_PT_MASK here, as PCode rejects 836 * it with failure if we try masking any unadvertised points. 837 * So need to operate only with those returned from PCode. 838 */ 839 if (num_qgv_points > 0) 840 mask |= REG_GENMASK(num_qgv_points - 1, 0); 841 842 if (num_psf_gv_points > 0) 843 mask |= REG_GENMASK(num_psf_gv_points - 1, 0) << ADLS_PSF_PT_SHIFT; 844 845 for_each_oldnew_intel_crtc_in_state(state, crtc, old_crtc_state, 846 new_crtc_state, i) { 847 unsigned int old_data_rate = 848 intel_bw_crtc_data_rate(old_crtc_state); 849 unsigned int new_data_rate = 850 intel_bw_crtc_data_rate(new_crtc_state); 851 unsigned int old_active_planes = 852 intel_bw_crtc_num_active_planes(old_crtc_state); 853 unsigned int new_active_planes = 854 intel_bw_crtc_num_active_planes(new_crtc_state); 855 856 /* 857 * Avoid locking the bw state when 858 * nothing significant has changed. 859 */ 860 if (old_data_rate == new_data_rate && 861 old_active_planes == new_active_planes) 862 continue; 863 864 new_bw_state = intel_atomic_get_bw_state(state); 865 if (IS_ERR(new_bw_state)) 866 return PTR_ERR(new_bw_state); 867 868 new_bw_state->data_rate[crtc->pipe] = new_data_rate; 869 new_bw_state->num_active_planes[crtc->pipe] = new_active_planes; 870 871 drm_dbg_kms(&dev_priv->drm, 872 "pipe %c data rate %u num active planes %u\n", 873 pipe_name(crtc->pipe), 874 new_bw_state->data_rate[crtc->pipe], 875 new_bw_state->num_active_planes[crtc->pipe]); 876 } 877 878 if (!new_bw_state) 879 return 0; 880 881 ret = intel_atomic_lock_global_state(&new_bw_state->base); 882 if (ret) 883 return ret; 884 885 data_rate = intel_bw_data_rate(dev_priv, new_bw_state); 886 data_rate = DIV_ROUND_UP(data_rate, 1000); 887 888 num_active_planes = intel_bw_num_active_planes(dev_priv, new_bw_state); 889 890 for (i = 0; i < num_qgv_points; i++) { 891 unsigned int max_data_rate; 892 893 if (DISPLAY_VER(dev_priv) > 11) 894 max_data_rate = tgl_max_bw(dev_priv, num_active_planes, i); 895 else 896 max_data_rate = icl_max_bw(dev_priv, num_active_planes, i); 897 /* 898 * We need to know which qgv point gives us 899 * maximum bandwidth in order to disable SAGV 900 * if we find that we exceed SAGV block time 901 * with watermarks. By that moment we already 902 * have those, as it is calculated earlier in 903 * intel_atomic_check, 904 */ 905 if (max_data_rate > max_bw) { 906 max_bw_point = i; 907 max_bw = max_data_rate; 908 } 909 if (max_data_rate >= data_rate) 910 allowed_points |= REG_FIELD_PREP(ADLS_QGV_PT_MASK, BIT(i)); 911 912 drm_dbg_kms(&dev_priv->drm, "QGV point %d: max bw %d required %d\n", 913 i, max_data_rate, data_rate); 914 } 915 916 for (i = 0; i < num_psf_gv_points; i++) { 917 unsigned int max_data_rate = adl_psf_bw(dev_priv, i); 918 919 if (max_data_rate >= data_rate) 920 allowed_points |= REG_FIELD_PREP(ADLS_PSF_PT_MASK, BIT(i)); 921 922 drm_dbg_kms(&dev_priv->drm, "PSF GV point %d: max bw %d" 923 " required %d\n", 924 i, max_data_rate, data_rate); 925 } 926 927 /* 928 * BSpec states that we always should have at least one allowed point 929 * left, so if we couldn't - simply reject the configuration for obvious 930 * reasons. 931 */ 932 if ((allowed_points & ADLS_QGV_PT_MASK) == 0) { 933 drm_dbg_kms(&dev_priv->drm, "No QGV points provide sufficient memory" 934 " bandwidth %d for display configuration(%d active planes).\n", 935 data_rate, num_active_planes); 936 return -EINVAL; 937 } 938 939 if (num_psf_gv_points > 0) { 940 if ((allowed_points & ADLS_PSF_PT_MASK) == 0) { 941 drm_dbg_kms(&dev_priv->drm, "No PSF GV points provide sufficient memory" 942 " bandwidth %d for display configuration(%d active planes).\n", 943 data_rate, num_active_planes); 944 return -EINVAL; 945 } 946 } 947 948 /* 949 * Leave only single point with highest bandwidth, if 950 * we can't enable SAGV due to the increased memory latency it may 951 * cause. 952 */ 953 if (!intel_can_enable_sagv(dev_priv, new_bw_state)) { 954 allowed_points = BIT(max_bw_point); 955 drm_dbg_kms(&dev_priv->drm, "No SAGV, using single QGV point %d\n", 956 max_bw_point); 957 } 958 /* 959 * We store the ones which need to be masked as that is what PCode 960 * actually accepts as a parameter. 961 */ 962 new_bw_state->qgv_points_mask = ~allowed_points & mask; 963 964 old_bw_state = intel_atomic_get_old_bw_state(state); 965 /* 966 * If the actual mask had changed we need to make sure that 967 * the commits are serialized(in case this is a nomodeset, nonblocking) 968 */ 969 if (new_bw_state->qgv_points_mask != old_bw_state->qgv_points_mask) { 970 ret = intel_atomic_serialize_global_state(&new_bw_state->base); 971 if (ret) 972 return ret; 973 } 974 975 return 0; 976 } 977 978 static struct intel_global_state * 979 intel_bw_duplicate_state(struct intel_global_obj *obj) 980 { 981 struct intel_bw_state *state; 982 983 state = kmemdup(obj->state, sizeof(*state), GFP_KERNEL); 984 if (!state) 985 return NULL; 986 987 return &state->base; 988 } 989 990 static void intel_bw_destroy_state(struct intel_global_obj *obj, 991 struct intel_global_state *state) 992 { 993 kfree(state); 994 } 995 996 static const struct intel_global_state_funcs intel_bw_funcs = { 997 .atomic_duplicate_state = intel_bw_duplicate_state, 998 .atomic_destroy_state = intel_bw_destroy_state, 999 }; 1000 1001 int intel_bw_init(struct drm_i915_private *dev_priv) 1002 { 1003 struct intel_bw_state *state; 1004 1005 state = kzalloc(sizeof(*state), GFP_KERNEL); 1006 if (!state) 1007 return -ENOMEM; 1008 1009 intel_atomic_global_obj_init(dev_priv, &dev_priv->bw_obj, 1010 &state->base, &intel_bw_funcs); 1011 1012 return 0; 1013 } 1014