1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: AMD 24 * 25 */ 26 27 #define UNIT_TEST 0 28 #if !UNIT_TEST 29 #include "dc.h" 30 #include "dc_link.h" 31 #endif 32 #include "../display_mode_lib.h" 33 #include "display_mode_vba_314.h" 34 #include "../dml_inline_defs.h" 35 36 /* 37 * NOTE: 38 * This file is gcc-parsable HW gospel, coming straight from HW engineers. 39 * 40 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 41 * ways. Unless there is something clearly wrong with it the code should 42 * remain as-is as it provides us with a guarantee from HW that it is correct. 43 */ 44 45 #define BPP_INVALID 0 46 #define BPP_BLENDED_PIPE 0xffffffff 47 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184 48 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096 49 50 // For DML-C changes that hasn't been propagated to VBA yet 51 //#define __DML_VBA_ALLOW_DELTA__ 52 53 // Move these to ip parameters/constant 54 55 // At which vstartup the DML start to try if the mode can be supported 56 #define __DML_VBA_MIN_VSTARTUP__ 9 57 58 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) 59 #define __DML_ARB_TO_RET_DELAY__ (7 + 95) 60 61 // fudge factor for min dcfclk calclation 62 #define __DML_MIN_DCFCLK_FACTOR__ 1.15 63 64 typedef struct { 65 double DPPCLK; 66 double DISPCLK; 67 double PixelClock; 68 double DCFCLKDeepSleep; 69 unsigned int DPPPerPlane; 70 bool ScalerEnabled; 71 double VRatio; 72 double VRatioChroma; 73 enum scan_direction_class SourceScan; 74 unsigned int BlockWidth256BytesY; 75 unsigned int BlockHeight256BytesY; 76 unsigned int BlockWidth256BytesC; 77 unsigned int BlockHeight256BytesC; 78 unsigned int InterlaceEnable; 79 unsigned int NumberOfCursors; 80 unsigned int VBlank; 81 unsigned int HTotal; 82 unsigned int DCCEnable; 83 bool ODMCombineIsEnabled; 84 enum source_format_class SourcePixelFormat; 85 int BytePerPixelY; 86 int BytePerPixelC; 87 bool ProgressiveToInterlaceUnitInOPP; 88 } Pipe; 89 90 #define BPP_INVALID 0 91 #define BPP_BLENDED_PIPE 0xffffffff 92 93 static bool CalculateBytePerPixelAnd256BBlockSizes( 94 enum source_format_class SourcePixelFormat, 95 enum dm_swizzle_mode SurfaceTiling, 96 unsigned int *BytePerPixelY, 97 unsigned int *BytePerPixelC, 98 double *BytePerPixelDETY, 99 double *BytePerPixelDETC, 100 unsigned int *BlockHeight256BytesY, 101 unsigned int *BlockHeight256BytesC, 102 unsigned int *BlockWidth256BytesY, 103 unsigned int *BlockWidth256BytesC); 104 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 105 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); 106 static unsigned int dscceComputeDelay( 107 unsigned int bpc, 108 double BPP, 109 unsigned int sliceWidth, 110 unsigned int numSlices, 111 enum output_format_class pixelFormat, 112 enum output_encoder_class Output); 113 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); 114 static bool CalculatePrefetchSchedule( 115 struct display_mode_lib *mode_lib, 116 double HostVMInefficiencyFactor, 117 Pipe *myPipe, 118 unsigned int DSCDelay, 119 double DPPCLKDelaySubtotalPlusCNVCFormater, 120 double DPPCLKDelaySCL, 121 double DPPCLKDelaySCLLBOnly, 122 double DPPCLKDelayCNVCCursor, 123 double DISPCLKDelaySubtotal, 124 unsigned int DPP_RECOUT_WIDTH, 125 enum output_format_class OutputFormat, 126 unsigned int MaxInterDCNTileRepeaters, 127 unsigned int VStartup, 128 unsigned int MaxVStartup, 129 unsigned int GPUVMPageTableLevels, 130 bool GPUVMEnable, 131 bool HostVMEnable, 132 unsigned int HostVMMaxNonCachedPageTableLevels, 133 double HostVMMinPageSize, 134 bool DynamicMetadataEnable, 135 bool DynamicMetadataVMEnabled, 136 int DynamicMetadataLinesBeforeActiveRequired, 137 unsigned int DynamicMetadataTransmittedBytes, 138 double UrgentLatency, 139 double UrgentExtraLatency, 140 double TCalc, 141 unsigned int PDEAndMetaPTEBytesFrame, 142 unsigned int MetaRowByte, 143 unsigned int PixelPTEBytesPerRow, 144 double PrefetchSourceLinesY, 145 unsigned int SwathWidthY, 146 double VInitPreFillY, 147 unsigned int MaxNumSwathY, 148 double PrefetchSourceLinesC, 149 unsigned int SwathWidthC, 150 double VInitPreFillC, 151 unsigned int MaxNumSwathC, 152 int swath_width_luma_ub, 153 int swath_width_chroma_ub, 154 unsigned int SwathHeightY, 155 unsigned int SwathHeightC, 156 double TWait, 157 double *DSTXAfterScaler, 158 double *DSTYAfterScaler, 159 double *DestinationLinesForPrefetch, 160 double *PrefetchBandwidth, 161 double *DestinationLinesToRequestVMInVBlank, 162 double *DestinationLinesToRequestRowInVBlank, 163 double *VRatioPrefetchY, 164 double *VRatioPrefetchC, 165 double *RequiredPrefetchPixDataBWLuma, 166 double *RequiredPrefetchPixDataBWChroma, 167 bool *NotEnoughTimeForDynamicMetadata, 168 double *Tno_bw, 169 double *prefetch_vmrow_bw, 170 double *Tdmdl_vm, 171 double *Tdmdl, 172 double *TSetup, 173 int *VUpdateOffsetPix, 174 double *VUpdateWidthPix, 175 double *VReadyOffsetPix); 176 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 177 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 178 static void CalculateDCCConfiguration( 179 bool DCCEnabled, 180 bool DCCProgrammingAssumesScanDirectionUnknown, 181 enum source_format_class SourcePixelFormat, 182 unsigned int SurfaceWidthLuma, 183 unsigned int SurfaceWidthChroma, 184 unsigned int SurfaceHeightLuma, 185 unsigned int SurfaceHeightChroma, 186 double DETBufferSize, 187 unsigned int RequestHeight256ByteLuma, 188 unsigned int RequestHeight256ByteChroma, 189 enum dm_swizzle_mode TilingFormat, 190 unsigned int BytePerPixelY, 191 unsigned int BytePerPixelC, 192 double BytePerPixelDETY, 193 double BytePerPixelDETC, 194 enum scan_direction_class ScanOrientation, 195 unsigned int *MaxUncompressedBlockLuma, 196 unsigned int *MaxUncompressedBlockChroma, 197 unsigned int *MaxCompressedBlockLuma, 198 unsigned int *MaxCompressedBlockChroma, 199 unsigned int *IndependentBlockLuma, 200 unsigned int *IndependentBlockChroma); 201 static double CalculatePrefetchSourceLines( 202 struct display_mode_lib *mode_lib, 203 double VRatio, 204 double vtaps, 205 bool Interlace, 206 bool ProgressiveToInterlaceUnitInOPP, 207 unsigned int SwathHeight, 208 unsigned int ViewportYStart, 209 double *VInitPreFill, 210 unsigned int *MaxNumSwath); 211 static unsigned int CalculateVMAndRowBytes( 212 struct display_mode_lib *mode_lib, 213 bool DCCEnable, 214 unsigned int BlockHeight256Bytes, 215 unsigned int BlockWidth256Bytes, 216 enum source_format_class SourcePixelFormat, 217 unsigned int SurfaceTiling, 218 unsigned int BytePerPixel, 219 enum scan_direction_class ScanDirection, 220 unsigned int SwathWidth, 221 unsigned int ViewportHeight, 222 bool GPUVMEnable, 223 bool HostVMEnable, 224 unsigned int HostVMMaxNonCachedPageTableLevels, 225 unsigned int GPUVMMinPageSize, 226 unsigned int HostVMMinPageSize, 227 unsigned int PTEBufferSizeInRequests, 228 unsigned int Pitch, 229 unsigned int DCCMetaPitch, 230 unsigned int *MacroTileWidth, 231 unsigned int *MetaRowByte, 232 unsigned int *PixelPTEBytesPerRow, 233 bool *PTEBufferSizeNotExceeded, 234 int *dpte_row_width_ub, 235 unsigned int *dpte_row_height, 236 unsigned int *MetaRequestWidth, 237 unsigned int *MetaRequestHeight, 238 unsigned int *meta_row_width, 239 unsigned int *meta_row_height, 240 int *vm_group_bytes, 241 unsigned int *dpte_group_bytes, 242 unsigned int *PixelPTEReqWidth, 243 unsigned int *PixelPTEReqHeight, 244 unsigned int *PTERequestSize, 245 int *DPDE0BytesFrame, 246 int *MetaPTEBytesFrame); 247 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); 248 static void CalculateRowBandwidth( 249 bool GPUVMEnable, 250 enum source_format_class SourcePixelFormat, 251 double VRatio, 252 double VRatioChroma, 253 bool DCCEnable, 254 double LineTime, 255 unsigned int MetaRowByteLuma, 256 unsigned int MetaRowByteChroma, 257 unsigned int meta_row_height_luma, 258 unsigned int meta_row_height_chroma, 259 unsigned int PixelPTEBytesPerRowLuma, 260 unsigned int PixelPTEBytesPerRowChroma, 261 unsigned int dpte_row_height_luma, 262 unsigned int dpte_row_height_chroma, 263 double *meta_row_bw, 264 double *dpte_row_bw); 265 266 static void CalculateFlipSchedule( 267 struct display_mode_lib *mode_lib, 268 unsigned int k, 269 double HostVMInefficiencyFactor, 270 double UrgentExtraLatency, 271 double UrgentLatency, 272 double PDEAndMetaPTEBytesPerFrame, 273 double MetaRowBytes, 274 double DPTEBytesPerRow); 275 static double CalculateWriteBackDelay( 276 enum source_format_class WritebackPixelFormat, 277 double WritebackHRatio, 278 double WritebackVRatio, 279 unsigned int WritebackVTaps, 280 int WritebackDestinationWidth, 281 int WritebackDestinationHeight, 282 int WritebackSourceHeight, 283 unsigned int HTotal); 284 285 static void CalculateVupdateAndDynamicMetadataParameters( 286 int MaxInterDCNTileRepeaters, 287 double DPPCLK, 288 double DISPCLK, 289 double DCFClkDeepSleep, 290 double PixelClock, 291 int HTotal, 292 int VBlank, 293 int DynamicMetadataTransmittedBytes, 294 int DynamicMetadataLinesBeforeActiveRequired, 295 int InterlaceEnable, 296 bool ProgressiveToInterlaceUnitInOPP, 297 double *TSetup, 298 double *Tdmbf, 299 double *Tdmec, 300 double *Tdmsks, 301 int *VUpdateOffsetPix, 302 double *VUpdateWidthPix, 303 double *VReadyOffsetPix); 304 305 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 306 struct display_mode_lib *mode_lib, 307 unsigned int PrefetchMode, 308 double DCFCLK, 309 double ReturnBW, 310 double UrgentLatency, 311 double ExtraLatency, 312 double SOCCLK, 313 double DCFCLKDeepSleep, 314 unsigned int DETBufferSizeY[], 315 unsigned int DETBufferSizeC[], 316 unsigned int SwathHeightY[], 317 unsigned int SwathHeightC[], 318 double SwathWidthY[], 319 double SwathWidthC[], 320 unsigned int DPPPerPlane[], 321 double BytePerPixelDETY[], 322 double BytePerPixelDETC[], 323 bool UnboundedRequestEnabled, 324 unsigned int CompressedBufferSizeInkByte, 325 enum clock_change_support *DRAMClockChangeSupport, 326 double *StutterExitWatermark, 327 double *StutterEnterPlusExitWatermark, 328 double *Z8StutterExitWatermark, 329 double *Z8StutterEnterPlusExitWatermark); 330 331 static void CalculateDCFCLKDeepSleep( 332 struct display_mode_lib *mode_lib, 333 unsigned int NumberOfActivePlanes, 334 int BytePerPixelY[], 335 int BytePerPixelC[], 336 double VRatio[], 337 double VRatioChroma[], 338 double SwathWidthY[], 339 double SwathWidthC[], 340 unsigned int DPPPerPlane[], 341 double HRatio[], 342 double HRatioChroma[], 343 double PixelClock[], 344 double PSCL_THROUGHPUT[], 345 double PSCL_THROUGHPUT_CHROMA[], 346 double DPPCLK[], 347 double ReadBandwidthLuma[], 348 double ReadBandwidthChroma[], 349 int ReturnBusWidth, 350 double *DCFCLKDeepSleep); 351 352 static void CalculateUrgentBurstFactor( 353 int swath_width_luma_ub, 354 int swath_width_chroma_ub, 355 unsigned int SwathHeightY, 356 unsigned int SwathHeightC, 357 double LineTime, 358 double UrgentLatency, 359 double CursorBufferSize, 360 unsigned int CursorWidth, 361 unsigned int CursorBPP, 362 double VRatio, 363 double VRatioC, 364 double BytePerPixelInDETY, 365 double BytePerPixelInDETC, 366 double DETBufferSizeY, 367 double DETBufferSizeC, 368 double *UrgentBurstFactorCursor, 369 double *UrgentBurstFactorLuma, 370 double *UrgentBurstFactorChroma, 371 bool *NotEnoughUrgentLatencyHiding); 372 373 static void UseMinimumDCFCLK( 374 struct display_mode_lib *mode_lib, 375 int MaxPrefetchMode, 376 int ReorderingBytes); 377 378 static void CalculatePixelDeliveryTimes( 379 unsigned int NumberOfActivePlanes, 380 double VRatio[], 381 double VRatioChroma[], 382 double VRatioPrefetchY[], 383 double VRatioPrefetchC[], 384 unsigned int swath_width_luma_ub[], 385 unsigned int swath_width_chroma_ub[], 386 unsigned int DPPPerPlane[], 387 double HRatio[], 388 double HRatioChroma[], 389 double PixelClock[], 390 double PSCL_THROUGHPUT[], 391 double PSCL_THROUGHPUT_CHROMA[], 392 double DPPCLK[], 393 int BytePerPixelC[], 394 enum scan_direction_class SourceScan[], 395 unsigned int NumberOfCursors[], 396 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 397 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 398 unsigned int BlockWidth256BytesY[], 399 unsigned int BlockHeight256BytesY[], 400 unsigned int BlockWidth256BytesC[], 401 unsigned int BlockHeight256BytesC[], 402 double DisplayPipeLineDeliveryTimeLuma[], 403 double DisplayPipeLineDeliveryTimeChroma[], 404 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 405 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 406 double DisplayPipeRequestDeliveryTimeLuma[], 407 double DisplayPipeRequestDeliveryTimeChroma[], 408 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 409 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 410 double CursorRequestDeliveryTime[], 411 double CursorRequestDeliveryTimePrefetch[]); 412 413 static void CalculateMetaAndPTETimes( 414 int NumberOfActivePlanes, 415 bool GPUVMEnable, 416 int MetaChunkSize, 417 int MinMetaChunkSizeBytes, 418 int HTotal[], 419 double VRatio[], 420 double VRatioChroma[], 421 double DestinationLinesToRequestRowInVBlank[], 422 double DestinationLinesToRequestRowInImmediateFlip[], 423 bool DCCEnable[], 424 double PixelClock[], 425 int BytePerPixelY[], 426 int BytePerPixelC[], 427 enum scan_direction_class SourceScan[], 428 int dpte_row_height[], 429 int dpte_row_height_chroma[], 430 int meta_row_width[], 431 int meta_row_width_chroma[], 432 int meta_row_height[], 433 int meta_row_height_chroma[], 434 int meta_req_width[], 435 int meta_req_width_chroma[], 436 int meta_req_height[], 437 int meta_req_height_chroma[], 438 int dpte_group_bytes[], 439 int PTERequestSizeY[], 440 int PTERequestSizeC[], 441 int PixelPTEReqWidthY[], 442 int PixelPTEReqHeightY[], 443 int PixelPTEReqWidthC[], 444 int PixelPTEReqHeightC[], 445 int dpte_row_width_luma_ub[], 446 int dpte_row_width_chroma_ub[], 447 double DST_Y_PER_PTE_ROW_NOM_L[], 448 double DST_Y_PER_PTE_ROW_NOM_C[], 449 double DST_Y_PER_META_ROW_NOM_L[], 450 double DST_Y_PER_META_ROW_NOM_C[], 451 double TimePerMetaChunkNominal[], 452 double TimePerChromaMetaChunkNominal[], 453 double TimePerMetaChunkVBlank[], 454 double TimePerChromaMetaChunkVBlank[], 455 double TimePerMetaChunkFlip[], 456 double TimePerChromaMetaChunkFlip[], 457 double time_per_pte_group_nom_luma[], 458 double time_per_pte_group_vblank_luma[], 459 double time_per_pte_group_flip_luma[], 460 double time_per_pte_group_nom_chroma[], 461 double time_per_pte_group_vblank_chroma[], 462 double time_per_pte_group_flip_chroma[]); 463 464 static void CalculateVMGroupAndRequestTimes( 465 unsigned int NumberOfActivePlanes, 466 bool GPUVMEnable, 467 unsigned int GPUVMMaxPageTableLevels, 468 unsigned int HTotal[], 469 int BytePerPixelC[], 470 double DestinationLinesToRequestVMInVBlank[], 471 double DestinationLinesToRequestVMInImmediateFlip[], 472 bool DCCEnable[], 473 double PixelClock[], 474 int dpte_row_width_luma_ub[], 475 int dpte_row_width_chroma_ub[], 476 int vm_group_bytes[], 477 unsigned int dpde0_bytes_per_frame_ub_l[], 478 unsigned int dpde0_bytes_per_frame_ub_c[], 479 int meta_pte_bytes_per_frame_ub_l[], 480 int meta_pte_bytes_per_frame_ub_c[], 481 double TimePerVMGroupVBlank[], 482 double TimePerVMGroupFlip[], 483 double TimePerVMRequestVBlank[], 484 double TimePerVMRequestFlip[]); 485 486 static void CalculateStutterEfficiency( 487 struct display_mode_lib *mode_lib, 488 int CompressedBufferSizeInkByte, 489 bool UnboundedRequestEnabled, 490 int ConfigReturnBufferSizeInKByte, 491 int MetaFIFOSizeInKEntries, 492 int ZeroSizeBufferEntries, 493 int NumberOfActivePlanes, 494 int ROBBufferSizeInKByte, 495 double TotalDataReadBandwidth, 496 double DCFCLK, 497 double ReturnBW, 498 double COMPBUF_RESERVED_SPACE_64B, 499 double COMPBUF_RESERVED_SPACE_ZS, 500 double SRExitTime, 501 double SRExitZ8Time, 502 bool SynchronizedVBlank, 503 double Z8StutterEnterPlusExitWatermark, 504 double StutterEnterPlusExitWatermark, 505 bool ProgressiveToInterlaceUnitInOPP, 506 bool Interlace[], 507 double MinTTUVBlank[], 508 int DPPPerPlane[], 509 unsigned int DETBufferSizeY[], 510 int BytePerPixelY[], 511 double BytePerPixelDETY[], 512 double SwathWidthY[], 513 int SwathHeightY[], 514 int SwathHeightC[], 515 double NetDCCRateLuma[], 516 double NetDCCRateChroma[], 517 double DCCFractionOfZeroSizeRequestsLuma[], 518 double DCCFractionOfZeroSizeRequestsChroma[], 519 int HTotal[], 520 int VTotal[], 521 double PixelClock[], 522 double VRatio[], 523 enum scan_direction_class SourceScan[], 524 int BlockHeight256BytesY[], 525 int BlockWidth256BytesY[], 526 int BlockHeight256BytesC[], 527 int BlockWidth256BytesC[], 528 int DCCYMaxUncompressedBlock[], 529 int DCCCMaxUncompressedBlock[], 530 int VActive[], 531 bool DCCEnable[], 532 bool WritebackEnable[], 533 double ReadBandwidthPlaneLuma[], 534 double ReadBandwidthPlaneChroma[], 535 double meta_row_bw[], 536 double dpte_row_bw[], 537 double *StutterEfficiencyNotIncludingVBlank, 538 double *StutterEfficiency, 539 int *NumberOfStutterBurstsPerFrame, 540 double *Z8StutterEfficiencyNotIncludingVBlank, 541 double *Z8StutterEfficiency, 542 int *Z8NumberOfStutterBurstsPerFrame, 543 double *StutterPeriod); 544 545 static void CalculateSwathAndDETConfiguration( 546 bool ForceSingleDPP, 547 int NumberOfActivePlanes, 548 unsigned int DETBufferSizeInKByte, 549 double MaximumSwathWidthLuma[], 550 double MaximumSwathWidthChroma[], 551 enum scan_direction_class SourceScan[], 552 enum source_format_class SourcePixelFormat[], 553 enum dm_swizzle_mode SurfaceTiling[], 554 int ViewportWidth[], 555 int ViewportHeight[], 556 int SurfaceWidthY[], 557 int SurfaceWidthC[], 558 int SurfaceHeightY[], 559 int SurfaceHeightC[], 560 int Read256BytesBlockHeightY[], 561 int Read256BytesBlockHeightC[], 562 int Read256BytesBlockWidthY[], 563 int Read256BytesBlockWidthC[], 564 enum odm_combine_mode ODMCombineEnabled[], 565 int BlendingAndTiming[], 566 int BytePerPixY[], 567 int BytePerPixC[], 568 double BytePerPixDETY[], 569 double BytePerPixDETC[], 570 int HActive[], 571 double HRatio[], 572 double HRatioChroma[], 573 int DPPPerPlane[], 574 int swath_width_luma_ub[], 575 int swath_width_chroma_ub[], 576 double SwathWidth[], 577 double SwathWidthChroma[], 578 int SwathHeightY[], 579 int SwathHeightC[], 580 unsigned int DETBufferSizeY[], 581 unsigned int DETBufferSizeC[], 582 bool ViewportSizeSupportPerPlane[], 583 bool *ViewportSizeSupport); 584 static void CalculateSwathWidth( 585 bool ForceSingleDPP, 586 int NumberOfActivePlanes, 587 enum source_format_class SourcePixelFormat[], 588 enum scan_direction_class SourceScan[], 589 int ViewportWidth[], 590 int ViewportHeight[], 591 int SurfaceWidthY[], 592 int SurfaceWidthC[], 593 int SurfaceHeightY[], 594 int SurfaceHeightC[], 595 enum odm_combine_mode ODMCombineEnabled[], 596 int BytePerPixY[], 597 int BytePerPixC[], 598 int Read256BytesBlockHeightY[], 599 int Read256BytesBlockHeightC[], 600 int Read256BytesBlockWidthY[], 601 int Read256BytesBlockWidthC[], 602 int BlendingAndTiming[], 603 int HActive[], 604 double HRatio[], 605 int DPPPerPlane[], 606 double SwathWidthSingleDPPY[], 607 double SwathWidthSingleDPPC[], 608 double SwathWidthY[], 609 double SwathWidthC[], 610 int MaximumSwathHeightY[], 611 int MaximumSwathHeightC[], 612 int swath_width_luma_ub[], 613 int swath_width_chroma_ub[]); 614 615 static double CalculateExtraLatency( 616 int RoundTripPingLatencyCycles, 617 int ReorderingBytes, 618 double DCFCLK, 619 int TotalNumberOfActiveDPP, 620 int PixelChunkSizeInKByte, 621 int TotalNumberOfDCCActiveDPP, 622 int MetaChunkSize, 623 double ReturnBW, 624 bool GPUVMEnable, 625 bool HostVMEnable, 626 int NumberOfActivePlanes, 627 int NumberOfDPP[], 628 int dpte_group_bytes[], 629 double HostVMInefficiencyFactor, 630 double HostVMMinPageSize, 631 int HostVMMaxNonCachedPageTableLevels); 632 633 static double CalculateExtraLatencyBytes( 634 int ReorderingBytes, 635 int TotalNumberOfActiveDPP, 636 int PixelChunkSizeInKByte, 637 int TotalNumberOfDCCActiveDPP, 638 int MetaChunkSize, 639 bool GPUVMEnable, 640 bool HostVMEnable, 641 int NumberOfActivePlanes, 642 int NumberOfDPP[], 643 int dpte_group_bytes[], 644 double HostVMInefficiencyFactor, 645 double HostVMMinPageSize, 646 int HostVMMaxNonCachedPageTableLevels); 647 648 static double CalculateUrgentLatency( 649 double UrgentLatencyPixelDataOnly, 650 double UrgentLatencyPixelMixedWithVMData, 651 double UrgentLatencyVMDataOnly, 652 bool DoUrgentLatencyAdjustment, 653 double UrgentLatencyAdjustmentFabricClockComponent, 654 double UrgentLatencyAdjustmentFabricClockReference, 655 double FabricClockSingle); 656 657 static void CalculateUnboundedRequestAndCompressedBufferSize( 658 unsigned int DETBufferSizeInKByte, 659 int ConfigReturnBufferSizeInKByte, 660 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 661 int TotalActiveDPP, 662 bool NoChromaPlanes, 663 int MaxNumDPP, 664 int CompressedBufferSegmentSizeInkByteFinal, 665 enum output_encoder_class *Output, 666 bool *UnboundedRequestEnabled, 667 int *CompressedBufferSizeInkByte); 668 669 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); 670 static unsigned int CalculateMaxVStartup( 671 unsigned int VTotal, 672 unsigned int VActive, 673 unsigned int VBlankNom, 674 unsigned int HTotal, 675 double PixelClock, 676 bool ProgressiveTointerlaceUnitinOPP, 677 bool Interlace, 678 unsigned int VBlankNomDefaultUS, 679 double WritebackDelayTime); 680 681 void dml314_recalculate(struct display_mode_lib *mode_lib) 682 { 683 ModeSupportAndSystemConfiguration(mode_lib); 684 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 685 DisplayPipeConfiguration(mode_lib); 686 #ifdef __DML_VBA_DEBUG__ 687 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); 688 #endif 689 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 690 } 691 692 static unsigned int dscceComputeDelay( 693 unsigned int bpc, 694 double BPP, 695 unsigned int sliceWidth, 696 unsigned int numSlices, 697 enum output_format_class pixelFormat, 698 enum output_encoder_class Output) 699 { 700 // valid bpc = source bits per component in the set of {8, 10, 12} 701 // valid bpp = increments of 1/16 of a bit 702 // min = 6/7/8 in N420/N422/444, respectively 703 // max = such that compression is 1:1 704 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 705 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 706 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 707 708 // fixed value 709 unsigned int rcModelSize = 8192; 710 711 // N422/N420 operate at 2 pixels per clock 712 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; 713 714 if (pixelFormat == dm_420) 715 pixelsPerClock = 2; 716 else if (pixelFormat == dm_444) 717 pixelsPerClock = 1; 718 else if (pixelFormat == dm_n422) 719 pixelsPerClock = 2; 720 // #all other modes operate at 1 pixel per clock 721 else 722 pixelsPerClock = 1; 723 724 //initial transmit delay as per PPS 725 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 726 727 //compute ssm delay 728 if (bpc == 8) 729 D = 81; 730 else if (bpc == 10) 731 D = 89; 732 else 733 D = 113; 734 735 //divide by pixel per cycle to compute slice width as seen by DSC 736 w = sliceWidth / pixelsPerClock; 737 738 //422 mode has an additional cycle of delay 739 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 740 s = 0; 741 else 742 s = 1; 743 744 //main calculation for the dscce 745 ix = initalXmitDelay + 45; 746 wx = (w + 2) / 3; 747 P = 3 * wx - w; 748 l0 = ix / w; 749 a = ix + P * l0; 750 ax = (a + 2) / 3 + D + 6 + 1; 751 L = (ax + wx - 1) / wx; 752 if ((ix % w) == 0 && P != 0) 753 lstall = 1; 754 else 755 lstall = 0; 756 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 757 758 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 759 pixels = Delay * 3 * pixelsPerClock; 760 return pixels; 761 } 762 763 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 764 { 765 unsigned int Delay = 0; 766 767 if (pixelFormat == dm_420) { 768 // sfr 769 Delay = Delay + 2; 770 // dsccif 771 Delay = Delay + 0; 772 // dscc - input deserializer 773 Delay = Delay + 3; 774 // dscc gets pixels every other cycle 775 Delay = Delay + 2; 776 // dscc - input cdc fifo 777 Delay = Delay + 12; 778 // dscc gets pixels every other cycle 779 Delay = Delay + 13; 780 // dscc - cdc uncertainty 781 Delay = Delay + 2; 782 // dscc - output cdc fifo 783 Delay = Delay + 7; 784 // dscc gets pixels every other cycle 785 Delay = Delay + 3; 786 // dscc - cdc uncertainty 787 Delay = Delay + 2; 788 // dscc - output serializer 789 Delay = Delay + 1; 790 // sft 791 Delay = Delay + 1; 792 } else if (pixelFormat == dm_n422) { 793 // sfr 794 Delay = Delay + 2; 795 // dsccif 796 Delay = Delay + 1; 797 // dscc - input deserializer 798 Delay = Delay + 5; 799 // dscc - input cdc fifo 800 Delay = Delay + 25; 801 // dscc - cdc uncertainty 802 Delay = Delay + 2; 803 // dscc - output cdc fifo 804 Delay = Delay + 10; 805 // dscc - cdc uncertainty 806 Delay = Delay + 2; 807 // dscc - output serializer 808 Delay = Delay + 1; 809 // sft 810 Delay = Delay + 1; 811 } else { 812 // sfr 813 Delay = Delay + 2; 814 // dsccif 815 Delay = Delay + 0; 816 // dscc - input deserializer 817 Delay = Delay + 3; 818 // dscc - input cdc fifo 819 Delay = Delay + 12; 820 // dscc - cdc uncertainty 821 Delay = Delay + 2; 822 // dscc - output cdc fifo 823 Delay = Delay + 7; 824 // dscc - output serializer 825 Delay = Delay + 1; 826 // dscc - cdc uncertainty 827 Delay = Delay + 2; 828 // sft 829 Delay = Delay + 1; 830 } 831 832 return Delay; 833 } 834 835 static bool CalculatePrefetchSchedule( 836 struct display_mode_lib *mode_lib, 837 double HostVMInefficiencyFactor, 838 Pipe *myPipe, 839 unsigned int DSCDelay, 840 double DPPCLKDelaySubtotalPlusCNVCFormater, 841 double DPPCLKDelaySCL, 842 double DPPCLKDelaySCLLBOnly, 843 double DPPCLKDelayCNVCCursor, 844 double DISPCLKDelaySubtotal, 845 unsigned int DPP_RECOUT_WIDTH, 846 enum output_format_class OutputFormat, 847 unsigned int MaxInterDCNTileRepeaters, 848 unsigned int VStartup, 849 unsigned int MaxVStartup, 850 unsigned int GPUVMPageTableLevels, 851 bool GPUVMEnable, 852 bool HostVMEnable, 853 unsigned int HostVMMaxNonCachedPageTableLevels, 854 double HostVMMinPageSize, 855 bool DynamicMetadataEnable, 856 bool DynamicMetadataVMEnabled, 857 int DynamicMetadataLinesBeforeActiveRequired, 858 unsigned int DynamicMetadataTransmittedBytes, 859 double UrgentLatency, 860 double UrgentExtraLatency, 861 double TCalc, 862 unsigned int PDEAndMetaPTEBytesFrame, 863 unsigned int MetaRowByte, 864 unsigned int PixelPTEBytesPerRow, 865 double PrefetchSourceLinesY, 866 unsigned int SwathWidthY, 867 double VInitPreFillY, 868 unsigned int MaxNumSwathY, 869 double PrefetchSourceLinesC, 870 unsigned int SwathWidthC, 871 double VInitPreFillC, 872 unsigned int MaxNumSwathC, 873 int swath_width_luma_ub, 874 int swath_width_chroma_ub, 875 unsigned int SwathHeightY, 876 unsigned int SwathHeightC, 877 double TWait, 878 double *DSTXAfterScaler, 879 double *DSTYAfterScaler, 880 double *DestinationLinesForPrefetch, 881 double *PrefetchBandwidth, 882 double *DestinationLinesToRequestVMInVBlank, 883 double *DestinationLinesToRequestRowInVBlank, 884 double *VRatioPrefetchY, 885 double *VRatioPrefetchC, 886 double *RequiredPrefetchPixDataBWLuma, 887 double *RequiredPrefetchPixDataBWChroma, 888 bool *NotEnoughTimeForDynamicMetadata, 889 double *Tno_bw, 890 double *prefetch_vmrow_bw, 891 double *Tdmdl_vm, 892 double *Tdmdl, 893 double *TSetup, 894 int *VUpdateOffsetPix, 895 double *VUpdateWidthPix, 896 double *VReadyOffsetPix) 897 { 898 bool MyError = false; 899 unsigned int DPPCycles, DISPCLKCycles; 900 double DSTTotalPixelsAfterScaler; 901 double LineTime; 902 double dst_y_prefetch_equ; 903 double Tsw_oto; 904 double prefetch_bw_oto; 905 double prefetch_bw_pr; 906 double Tvm_oto; 907 double Tr0_oto; 908 double Tvm_oto_lines; 909 double Tr0_oto_lines; 910 double dst_y_prefetch_oto; 911 double TimeForFetchingMetaPTE = 0; 912 double TimeForFetchingRowInVBlank = 0; 913 double LinesToRequestPrefetchPixelData = 0; 914 unsigned int HostVMDynamicLevelsTrips; 915 double trip_to_mem; 916 double Tvm_trips; 917 double Tr0_trips; 918 double Tvm_trips_rounded; 919 double Tr0_trips_rounded; 920 double Lsw_oto; 921 double Tpre_rounded; 922 double prefetch_bw_equ; 923 double Tvm_equ; 924 double Tr0_equ; 925 double Tdmbf; 926 double Tdmec; 927 double Tdmsks; 928 double prefetch_sw_bytes; 929 double bytes_pp; 930 double dep_bytes; 931 int max_vratio_pre = 4; 932 double min_Lsw; 933 double Tsw_est1 = 0; 934 double Tsw_est3 = 0; 935 double max_Tsw = 0; 936 937 if (GPUVMEnable == true && HostVMEnable == true) { 938 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 939 } else { 940 HostVMDynamicLevelsTrips = 0; 941 } 942 #ifdef __DML_VBA_DEBUG__ 943 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); 944 #endif 945 CalculateVupdateAndDynamicMetadataParameters( 946 MaxInterDCNTileRepeaters, 947 myPipe->DPPCLK, 948 myPipe->DISPCLK, 949 myPipe->DCFCLKDeepSleep, 950 myPipe->PixelClock, 951 myPipe->HTotal, 952 myPipe->VBlank, 953 DynamicMetadataTransmittedBytes, 954 DynamicMetadataLinesBeforeActiveRequired, 955 myPipe->InterlaceEnable, 956 myPipe->ProgressiveToInterlaceUnitInOPP, 957 TSetup, 958 &Tdmbf, 959 &Tdmec, 960 &Tdmsks, 961 VUpdateOffsetPix, 962 VUpdateWidthPix, 963 VReadyOffsetPix); 964 965 LineTime = myPipe->HTotal / myPipe->PixelClock; 966 trip_to_mem = UrgentLatency; 967 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 968 969 #ifdef __DML_VBA_ALLOW_DELTA__ 970 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 971 #else 972 if (DynamicMetadataVMEnabled == true) { 973 #endif 974 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 975 } else { 976 *Tdmdl = TWait + UrgentExtraLatency; 977 } 978 979 #ifdef __DML_VBA_ALLOW_DELTA__ 980 if (DynamicMetadataEnable == false) { 981 *Tdmdl = 0.0; 982 } 983 #endif 984 985 if (DynamicMetadataEnable == true) { 986 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 987 *NotEnoughTimeForDynamicMetadata = true; 988 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 989 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 990 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 991 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); 992 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 993 } else { 994 *NotEnoughTimeForDynamicMetadata = false; 995 } 996 } else { 997 *NotEnoughTimeForDynamicMetadata = false; 998 } 999 1000 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 1001 1002 if (myPipe->ScalerEnabled) 1003 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 1004 else 1005 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 1006 1007 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 1008 1009 DISPCLKCycles = DISPCLKDelaySubtotal; 1010 1011 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 1012 return true; 1013 1014 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 1015 1016 #ifdef __DML_VBA_DEBUG__ 1017 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 1018 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 1019 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); 1020 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 1021 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); 1022 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 1023 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 1024 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); 1025 #endif 1026 1027 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 1028 1029 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 1030 *DSTYAfterScaler = 1; 1031 else 1032 *DSTYAfterScaler = 0; 1033 1034 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 1035 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 1036 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 1037 1038 #ifdef __DML_VBA_DEBUG__ 1039 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 1040 #endif 1041 1042 MyError = false; 1043 1044 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 1045 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 1046 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 1047 1048 #ifdef __DML_VBA_ALLOW_DELTA__ 1049 if (!myPipe->DCCEnable) { 1050 Tr0_trips = 0.0; 1051 Tr0_trips_rounded = 0.0; 1052 } 1053 #endif 1054 1055 if (!GPUVMEnable) { 1056 Tvm_trips = 0.0; 1057 Tvm_trips_rounded = 0.0; 1058 } 1059 1060 if (GPUVMEnable) { 1061 if (GPUVMPageTableLevels >= 3) { 1062 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 1063 } else { 1064 *Tno_bw = 0; 1065 } 1066 } else if (!myPipe->DCCEnable) { 1067 *Tno_bw = LineTime; 1068 } else { 1069 *Tno_bw = LineTime / 4; 1070 } 1071 1072 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) 1073 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 1074 else 1075 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 1076 /*rev 99*/ 1077 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane; 1078 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr; 1079 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; 1080 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 1081 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); 1082 1083 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); 1084 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; 1085 Tsw_oto = Lsw_oto * LineTime; 1086 1087 1088 #ifdef __DML_VBA_DEBUG__ 1089 dml_print("DML: HTotal: %d\n", myPipe->HTotal); 1090 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); 1091 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); 1092 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); 1093 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); 1094 dml_print("DML: Tsw_oto: %f\n", Tsw_oto); 1095 #endif 1096 1097 if (GPUVMEnable == true) 1098 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); 1099 else 1100 Tvm_oto = LineTime / 4.0; 1101 1102 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1103 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) 1104 LineTime - Tvm_oto, 1105 LineTime / 4); 1106 } else { 1107 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 1108 } 1109 1110 #ifdef __DML_VBA_DEBUG__ 1111 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 1112 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 1113 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); 1114 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1115 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1116 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1117 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 1118 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 1119 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 1120 #endif 1121 1122 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 1123 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 1124 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 1125 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1126 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 1127 Tpre_rounded = dst_y_prefetch_equ * LineTime; 1128 1129 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 1130 1131 if (prefetch_sw_bytes < dep_bytes) 1132 prefetch_sw_bytes = 2 * dep_bytes; 1133 1134 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); 1135 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); 1136 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); 1137 dml_print("DML: Lsw_oto: %f\n", Lsw_oto); 1138 dml_print("DML: LineTime: %f\n", LineTime); 1139 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); 1140 1141 dml_print("DML: LineTime: %f\n", LineTime); 1142 dml_print("DML: VStartup: %d\n", VStartup); 1143 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); 1144 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); 1145 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); 1146 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); 1147 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 1148 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 1149 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 1150 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm); 1151 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl); 1152 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler); 1153 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler); 1154 1155 *PrefetchBandwidth = 0; 1156 *DestinationLinesToRequestVMInVBlank = 0; 1157 *DestinationLinesToRequestRowInVBlank = 0; 1158 *VRatioPrefetchY = 0; 1159 *VRatioPrefetchC = 0; 1160 *RequiredPrefetchPixDataBWLuma = 0; 1161 if (dst_y_prefetch_equ > 1) { 1162 double PrefetchBandwidth1; 1163 double PrefetchBandwidth2; 1164 double PrefetchBandwidth3; 1165 double PrefetchBandwidth4; 1166 1167 if (Tpre_rounded - *Tno_bw > 0) { 1168 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1169 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 1170 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 1171 } else { 1172 PrefetchBandwidth1 = 0; 1173 } 1174 1175 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 1176 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1177 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 1178 } 1179 1180 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1181 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1182 else 1183 PrefetchBandwidth2 = 0; 1184 1185 if (Tpre_rounded - Tvm_trips_rounded > 0) { 1186 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1187 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 1188 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 1189 } else { 1190 PrefetchBandwidth3 = 0; 1191 } 1192 1193 #ifdef __DML_VBA_DEBUG__ 1194 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 1195 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 1196 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 1197 #endif 1198 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { 1199 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1200 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 1201 } 1202 1203 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1204 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1205 else 1206 PrefetchBandwidth4 = 0; 1207 1208 { 1209 bool Case1OK; 1210 bool Case2OK; 1211 bool Case3OK; 1212 1213 if (PrefetchBandwidth1 > 0) { 1214 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded 1215 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1216 Case1OK = true; 1217 } else { 1218 Case1OK = false; 1219 } 1220 } else { 1221 Case1OK = false; 1222 } 1223 1224 if (PrefetchBandwidth2 > 0) { 1225 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded 1226 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1227 Case2OK = true; 1228 } else { 1229 Case2OK = false; 1230 } 1231 } else { 1232 Case2OK = false; 1233 } 1234 1235 if (PrefetchBandwidth3 > 0) { 1236 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded 1237 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1238 Case3OK = true; 1239 } else { 1240 Case3OK = false; 1241 } 1242 } else { 1243 Case3OK = false; 1244 } 1245 1246 if (Case1OK) { 1247 prefetch_bw_equ = PrefetchBandwidth1; 1248 } else if (Case2OK) { 1249 prefetch_bw_equ = PrefetchBandwidth2; 1250 } else if (Case3OK) { 1251 prefetch_bw_equ = PrefetchBandwidth3; 1252 } else { 1253 prefetch_bw_equ = PrefetchBandwidth4; 1254 } 1255 1256 #ifdef __DML_VBA_DEBUG__ 1257 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 1258 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 1259 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 1260 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 1261 #endif 1262 1263 if (prefetch_bw_equ > 0) { 1264 if (GPUVMEnable == true) { 1265 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1266 } else { 1267 Tvm_equ = LineTime / 4; 1268 } 1269 1270 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1271 Tr0_equ = dml_max4( 1272 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1273 Tr0_trips, 1274 (LineTime - Tvm_equ) / 2, 1275 LineTime / 4); 1276 } else { 1277 Tr0_equ = (LineTime - Tvm_equ) / 2; 1278 } 1279 } else { 1280 Tvm_equ = 0; 1281 Tr0_equ = 0; 1282 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 1283 } 1284 } 1285 1286 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 1287 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 1288 TimeForFetchingMetaPTE = Tvm_oto; 1289 TimeForFetchingRowInVBlank = Tr0_oto; 1290 *PrefetchBandwidth = prefetch_bw_oto; 1291 } else { 1292 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 1293 TimeForFetchingMetaPTE = Tvm_equ; 1294 TimeForFetchingRowInVBlank = Tr0_equ; 1295 *PrefetchBandwidth = prefetch_bw_equ; 1296 } 1297 1298 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 1299 1300 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 1301 1302 #ifdef __DML_VBA_ALLOW_DELTA__ 1303 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 1304 // See note above dated 5/30/2018 1305 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 1306 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? 1307 #else 1308 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 1309 #endif 1310 1311 #ifdef __DML_VBA_DEBUG__ 1312 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 1313 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1314 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 1315 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1316 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1317 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1318 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 1319 #endif 1320 1321 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { 1322 1323 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 1324 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1325 #ifdef __DML_VBA_DEBUG__ 1326 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1327 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 1328 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); 1329 #endif 1330 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 1331 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 1332 *VRatioPrefetchY = dml_max( 1333 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, 1334 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); 1335 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1336 } else { 1337 MyError = true; 1338 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1339 *VRatioPrefetchY = 0; 1340 } 1341 #ifdef __DML_VBA_DEBUG__ 1342 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1343 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1344 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 1345 #endif 1346 } 1347 1348 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 1349 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1350 1351 #ifdef __DML_VBA_DEBUG__ 1352 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1353 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 1354 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); 1355 #endif 1356 if ((SwathHeightC > 4) || VInitPreFillC > 3) { 1357 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 1358 *VRatioPrefetchC = dml_max( 1359 *VRatioPrefetchC, 1360 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); 1361 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1362 } else { 1363 MyError = true; 1364 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1365 *VRatioPrefetchC = 0; 1366 } 1367 #ifdef __DML_VBA_DEBUG__ 1368 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1369 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 1370 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 1371 #endif 1372 } 1373 1374 #ifdef __DML_VBA_DEBUG__ 1375 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 1376 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 1377 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1378 #endif 1379 1380 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; 1381 1382 #ifdef __DML_VBA_DEBUG__ 1383 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); 1384 #endif 1385 1386 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub 1387 / LineTime; 1388 } else { 1389 MyError = true; 1390 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1391 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); 1392 *VRatioPrefetchY = 0; 1393 *VRatioPrefetchC = 0; 1394 *RequiredPrefetchPixDataBWLuma = 0; 1395 *RequiredPrefetchPixDataBWChroma = 0; 1396 } 1397 1398 dml_print( 1399 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 1400 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 1401 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 1402 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1403 dml_print( 1404 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", 1405 (double) LinesToRequestPrefetchPixelData * LineTime); 1406 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 1407 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 1408 dml_print( 1409 "DML: Tslack(pre): %fus - time left over in schedule\n", 1410 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank 1411 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 1412 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1413 1414 } else { 1415 MyError = true; 1416 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1417 } 1418 1419 { 1420 double prefetch_vm_bw; 1421 double prefetch_row_bw; 1422 1423 if (PDEAndMetaPTEBytesFrame == 0) { 1424 prefetch_vm_bw = 0; 1425 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1426 #ifdef __DML_VBA_DEBUG__ 1427 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1428 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1429 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1430 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1431 #endif 1432 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1433 #ifdef __DML_VBA_DEBUG__ 1434 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 1435 #endif 1436 } else { 1437 prefetch_vm_bw = 0; 1438 MyError = true; 1439 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1440 } 1441 1442 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1443 prefetch_row_bw = 0; 1444 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1445 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1446 1447 #ifdef __DML_VBA_DEBUG__ 1448 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1449 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1450 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1451 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 1452 #endif 1453 } else { 1454 prefetch_row_bw = 0; 1455 MyError = true; 1456 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1457 } 1458 1459 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1460 } 1461 1462 if (MyError) { 1463 *PrefetchBandwidth = 0; 1464 TimeForFetchingMetaPTE = 0; 1465 TimeForFetchingRowInVBlank = 0; 1466 *DestinationLinesToRequestVMInVBlank = 0; 1467 *DestinationLinesToRequestRowInVBlank = 0; 1468 *DestinationLinesForPrefetch = 0; 1469 LinesToRequestPrefetchPixelData = 0; 1470 *VRatioPrefetchY = 0; 1471 *VRatioPrefetchC = 0; 1472 *RequiredPrefetchPixDataBWLuma = 0; 1473 *RequiredPrefetchPixDataBWChroma = 0; 1474 } 1475 1476 return MyError; 1477 } 1478 1479 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1480 { 1481 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1482 } 1483 1484 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1485 { 1486 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); 1487 } 1488 1489 static void CalculateDCCConfiguration( 1490 bool DCCEnabled, 1491 bool DCCProgrammingAssumesScanDirectionUnknown, 1492 enum source_format_class SourcePixelFormat, 1493 unsigned int SurfaceWidthLuma, 1494 unsigned int SurfaceWidthChroma, 1495 unsigned int SurfaceHeightLuma, 1496 unsigned int SurfaceHeightChroma, 1497 double DETBufferSize, 1498 unsigned int RequestHeight256ByteLuma, 1499 unsigned int RequestHeight256ByteChroma, 1500 enum dm_swizzle_mode TilingFormat, 1501 unsigned int BytePerPixelY, 1502 unsigned int BytePerPixelC, 1503 double BytePerPixelDETY, 1504 double BytePerPixelDETC, 1505 enum scan_direction_class ScanOrientation, 1506 unsigned int *MaxUncompressedBlockLuma, 1507 unsigned int *MaxUncompressedBlockChroma, 1508 unsigned int *MaxCompressedBlockLuma, 1509 unsigned int *MaxCompressedBlockChroma, 1510 unsigned int *IndependentBlockLuma, 1511 unsigned int *IndependentBlockChroma) 1512 { 1513 int yuv420; 1514 int horz_div_l; 1515 int horz_div_c; 1516 int vert_div_l; 1517 int vert_div_c; 1518 1519 int swath_buf_size; 1520 double detile_buf_vp_horz_limit; 1521 double detile_buf_vp_vert_limit; 1522 1523 int MAS_vp_horz_limit; 1524 int MAS_vp_vert_limit; 1525 int max_vp_horz_width; 1526 int max_vp_vert_height; 1527 int eff_surf_width_l; 1528 int eff_surf_width_c; 1529 int eff_surf_height_l; 1530 int eff_surf_height_c; 1531 1532 int full_swath_bytes_horz_wc_l; 1533 int full_swath_bytes_horz_wc_c; 1534 int full_swath_bytes_vert_wc_l; 1535 int full_swath_bytes_vert_wc_c; 1536 int req128_horz_wc_l; 1537 int req128_horz_wc_c; 1538 int req128_vert_wc_l; 1539 int req128_vert_wc_c; 1540 int segment_order_horz_contiguous_luma; 1541 int segment_order_horz_contiguous_chroma; 1542 int segment_order_vert_contiguous_luma; 1543 int segment_order_vert_contiguous_chroma; 1544 1545 typedef enum { 1546 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA 1547 } RequestType; 1548 RequestType RequestLuma; 1549 RequestType RequestChroma; 1550 1551 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); 1552 horz_div_l = 1; 1553 horz_div_c = 1; 1554 vert_div_l = 1; 1555 vert_div_c = 1; 1556 1557 if (BytePerPixelY == 1) 1558 vert_div_l = 0; 1559 if (BytePerPixelC == 1) 1560 vert_div_c = 0; 1561 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1562 horz_div_l = 0; 1563 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1564 horz_div_c = 0; 1565 1566 if (BytePerPixelC == 0) { 1567 swath_buf_size = DETBufferSize / 2 - 2 * 256; 1568 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 1569 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 1570 } else { 1571 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; 1572 detile_buf_vp_horz_limit = (double) swath_buf_size 1573 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) 1574 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 1575 detile_buf_vp_vert_limit = (double) swath_buf_size 1576 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 1577 } 1578 1579 if (SourcePixelFormat == dm_420_10) { 1580 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 1581 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 1582 } 1583 1584 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 1585 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 1586 1587 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; 1588 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); 1589 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 1590 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 1591 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 1592 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 1593 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 1594 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 1595 1596 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 1597 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 1598 if (BytePerPixelC > 0) { 1599 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 1600 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 1601 } else { 1602 full_swath_bytes_horz_wc_c = 0; 1603 full_swath_bytes_vert_wc_c = 0; 1604 } 1605 1606 if (SourcePixelFormat == dm_420_10) { 1607 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); 1608 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); 1609 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); 1610 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); 1611 } 1612 1613 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1614 req128_horz_wc_l = 0; 1615 req128_horz_wc_c = 0; 1616 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { 1617 req128_horz_wc_l = 0; 1618 req128_horz_wc_c = 1; 1619 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1620 req128_horz_wc_l = 1; 1621 req128_horz_wc_c = 0; 1622 } else { 1623 req128_horz_wc_l = 1; 1624 req128_horz_wc_c = 1; 1625 } 1626 1627 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1628 req128_vert_wc_l = 0; 1629 req128_vert_wc_c = 0; 1630 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { 1631 req128_vert_wc_l = 0; 1632 req128_vert_wc_c = 1; 1633 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1634 req128_vert_wc_l = 1; 1635 req128_vert_wc_c = 0; 1636 } else { 1637 req128_vert_wc_l = 1; 1638 req128_vert_wc_c = 1; 1639 } 1640 1641 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1642 segment_order_horz_contiguous_luma = 0; 1643 } else { 1644 segment_order_horz_contiguous_luma = 1; 1645 } 1646 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1647 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1648 segment_order_vert_contiguous_luma = 0; 1649 } else { 1650 segment_order_vert_contiguous_luma = 1; 1651 } 1652 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1653 segment_order_horz_contiguous_chroma = 0; 1654 } else { 1655 segment_order_horz_contiguous_chroma = 1; 1656 } 1657 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1658 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1659 segment_order_vert_contiguous_chroma = 0; 1660 } else { 1661 segment_order_vert_contiguous_chroma = 1; 1662 } 1663 1664 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1665 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 1666 RequestLuma = REQ_256Bytes; 1667 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 1668 RequestLuma = REQ_128BytesNonContiguous; 1669 } else { 1670 RequestLuma = REQ_128BytesContiguous; 1671 } 1672 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 1673 RequestChroma = REQ_256Bytes; 1674 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 1675 RequestChroma = REQ_128BytesNonContiguous; 1676 } else { 1677 RequestChroma = REQ_128BytesContiguous; 1678 } 1679 } else if (ScanOrientation != dm_vert) { 1680 if (req128_horz_wc_l == 0) { 1681 RequestLuma = REQ_256Bytes; 1682 } else if (segment_order_horz_contiguous_luma == 0) { 1683 RequestLuma = REQ_128BytesNonContiguous; 1684 } else { 1685 RequestLuma = REQ_128BytesContiguous; 1686 } 1687 if (req128_horz_wc_c == 0) { 1688 RequestChroma = REQ_256Bytes; 1689 } else if (segment_order_horz_contiguous_chroma == 0) { 1690 RequestChroma = REQ_128BytesNonContiguous; 1691 } else { 1692 RequestChroma = REQ_128BytesContiguous; 1693 } 1694 } else { 1695 if (req128_vert_wc_l == 0) { 1696 RequestLuma = REQ_256Bytes; 1697 } else if (segment_order_vert_contiguous_luma == 0) { 1698 RequestLuma = REQ_128BytesNonContiguous; 1699 } else { 1700 RequestLuma = REQ_128BytesContiguous; 1701 } 1702 if (req128_vert_wc_c == 0) { 1703 RequestChroma = REQ_256Bytes; 1704 } else if (segment_order_vert_contiguous_chroma == 0) { 1705 RequestChroma = REQ_128BytesNonContiguous; 1706 } else { 1707 RequestChroma = REQ_128BytesContiguous; 1708 } 1709 } 1710 1711 if (RequestLuma == REQ_256Bytes) { 1712 *MaxUncompressedBlockLuma = 256; 1713 *MaxCompressedBlockLuma = 256; 1714 *IndependentBlockLuma = 0; 1715 } else if (RequestLuma == REQ_128BytesContiguous) { 1716 *MaxUncompressedBlockLuma = 256; 1717 *MaxCompressedBlockLuma = 128; 1718 *IndependentBlockLuma = 128; 1719 } else { 1720 *MaxUncompressedBlockLuma = 256; 1721 *MaxCompressedBlockLuma = 64; 1722 *IndependentBlockLuma = 64; 1723 } 1724 1725 if (RequestChroma == REQ_256Bytes) { 1726 *MaxUncompressedBlockChroma = 256; 1727 *MaxCompressedBlockChroma = 256; 1728 *IndependentBlockChroma = 0; 1729 } else if (RequestChroma == REQ_128BytesContiguous) { 1730 *MaxUncompressedBlockChroma = 256; 1731 *MaxCompressedBlockChroma = 128; 1732 *IndependentBlockChroma = 128; 1733 } else { 1734 *MaxUncompressedBlockChroma = 256; 1735 *MaxCompressedBlockChroma = 64; 1736 *IndependentBlockChroma = 64; 1737 } 1738 1739 if (DCCEnabled != true || BytePerPixelC == 0) { 1740 *MaxUncompressedBlockChroma = 0; 1741 *MaxCompressedBlockChroma = 0; 1742 *IndependentBlockChroma = 0; 1743 } 1744 1745 if (DCCEnabled != true) { 1746 *MaxUncompressedBlockLuma = 0; 1747 *MaxCompressedBlockLuma = 0; 1748 *IndependentBlockLuma = 0; 1749 } 1750 } 1751 1752 static double CalculatePrefetchSourceLines( 1753 struct display_mode_lib *mode_lib, 1754 double VRatio, 1755 double vtaps, 1756 bool Interlace, 1757 bool ProgressiveToInterlaceUnitInOPP, 1758 unsigned int SwathHeight, 1759 unsigned int ViewportYStart, 1760 double *VInitPreFill, 1761 unsigned int *MaxNumSwath) 1762 { 1763 struct vba_vars_st *v = &mode_lib->vba; 1764 unsigned int MaxPartialSwath; 1765 1766 if (ProgressiveToInterlaceUnitInOPP) 1767 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1768 else 1769 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1770 1771 if (!v->IgnoreViewportPositioning) { 1772 1773 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1774 1775 if (*VInitPreFill > 1.0) 1776 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1777 else 1778 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; 1779 MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1780 1781 } else { 1782 1783 if (ViewportYStart != 0) 1784 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1785 1786 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1787 1788 if (*VInitPreFill > 1.0) 1789 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1790 else 1791 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; 1792 } 1793 1794 #ifdef __DML_VBA_DEBUG__ 1795 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 1796 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); 1797 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); 1798 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); 1799 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); 1800 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 1801 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 1802 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 1803 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); 1804 #endif 1805 return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1806 } 1807 1808 static unsigned int CalculateVMAndRowBytes( 1809 struct display_mode_lib *mode_lib, 1810 bool DCCEnable, 1811 unsigned int BlockHeight256Bytes, 1812 unsigned int BlockWidth256Bytes, 1813 enum source_format_class SourcePixelFormat, 1814 unsigned int SurfaceTiling, 1815 unsigned int BytePerPixel, 1816 enum scan_direction_class ScanDirection, 1817 unsigned int SwathWidth, 1818 unsigned int ViewportHeight, 1819 bool GPUVMEnable, 1820 bool HostVMEnable, 1821 unsigned int HostVMMaxNonCachedPageTableLevels, 1822 unsigned int GPUVMMinPageSize, 1823 unsigned int HostVMMinPageSize, 1824 unsigned int PTEBufferSizeInRequests, 1825 unsigned int Pitch, 1826 unsigned int DCCMetaPitch, 1827 unsigned int *MacroTileWidth, 1828 unsigned int *MetaRowByte, 1829 unsigned int *PixelPTEBytesPerRow, 1830 bool *PTEBufferSizeNotExceeded, 1831 int *dpte_row_width_ub, 1832 unsigned int *dpte_row_height, 1833 unsigned int *MetaRequestWidth, 1834 unsigned int *MetaRequestHeight, 1835 unsigned int *meta_row_width, 1836 unsigned int *meta_row_height, 1837 int *vm_group_bytes, 1838 unsigned int *dpte_group_bytes, 1839 unsigned int *PixelPTEReqWidth, 1840 unsigned int *PixelPTEReqHeight, 1841 unsigned int *PTERequestSize, 1842 int *DPDE0BytesFrame, 1843 int *MetaPTEBytesFrame) 1844 { 1845 struct vba_vars_st *v = &mode_lib->vba; 1846 unsigned int MPDEBytesFrame; 1847 unsigned int DCCMetaSurfaceBytes; 1848 unsigned int MacroTileSizeBytes; 1849 unsigned int MacroTileHeight; 1850 unsigned int ExtraDPDEBytesFrame; 1851 unsigned int PDEAndMetaPTEBytesFrame; 1852 unsigned int PixelPTEReqHeightPTEs = 0; 1853 unsigned int HostVMDynamicLevels = 0; 1854 double FractionOfPTEReturnDrop; 1855 1856 if (GPUVMEnable == true && HostVMEnable == true) { 1857 if (HostVMMinPageSize < 2048) { 1858 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1859 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 1860 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 1861 } else { 1862 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 1863 } 1864 } 1865 1866 *MetaRequestHeight = 8 * BlockHeight256Bytes; 1867 *MetaRequestWidth = 8 * BlockWidth256Bytes; 1868 if (ScanDirection != dm_vert) { 1869 *meta_row_height = *MetaRequestHeight; 1870 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 1871 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1872 } else { 1873 *meta_row_height = *MetaRequestWidth; 1874 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 1875 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1876 } 1877 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; 1878 if (GPUVMEnable == true) { 1879 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; 1880 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); 1881 } else { 1882 *MetaPTEBytesFrame = 0; 1883 MPDEBytesFrame = 0; 1884 } 1885 1886 if (DCCEnable != true) { 1887 *MetaPTEBytesFrame = 0; 1888 MPDEBytesFrame = 0; 1889 *MetaRowByte = 0; 1890 } 1891 1892 if (SurfaceTiling == dm_sw_linear) { 1893 MacroTileSizeBytes = 256; 1894 MacroTileHeight = BlockHeight256Bytes; 1895 } else { 1896 MacroTileSizeBytes = 65536; 1897 MacroTileHeight = 16 * BlockHeight256Bytes; 1898 } 1899 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1900 1901 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { 1902 if (ScanDirection != dm_vert) { 1903 *DPDE0BytesFrame = 64 1904 * (dml_ceil( 1905 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1906 / (8 * 2097152), 1907 1) + 1); 1908 } else { 1909 *DPDE0BytesFrame = 64 1910 * (dml_ceil( 1911 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1912 / (8 * 2097152), 1913 1) + 1); 1914 } 1915 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); 1916 } else { 1917 *DPDE0BytesFrame = 0; 1918 ExtraDPDEBytesFrame = 0; 1919 } 1920 1921 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 1922 1923 #ifdef __DML_VBA_DEBUG__ 1924 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 1925 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 1926 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 1927 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 1928 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1929 #endif 1930 1931 if (HostVMEnable == true) { 1932 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 1933 } 1934 #ifdef __DML_VBA_DEBUG__ 1935 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1936 #endif 1937 1938 if (SurfaceTiling == dm_sw_linear) { 1939 PixelPTEReqHeightPTEs = 1; 1940 *PixelPTEReqHeight = 1; 1941 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 1942 *PTERequestSize = 64; 1943 FractionOfPTEReturnDrop = 0; 1944 } else if (MacroTileSizeBytes == 4096) { 1945 PixelPTEReqHeightPTEs = 1; 1946 *PixelPTEReqHeight = MacroTileHeight; 1947 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1948 *PTERequestSize = 64; 1949 if (ScanDirection != dm_vert) 1950 FractionOfPTEReturnDrop = 0; 1951 else 1952 FractionOfPTEReturnDrop = 7 / 8; 1953 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 1954 PixelPTEReqHeightPTEs = 16; 1955 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 1956 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 1957 *PTERequestSize = 128; 1958 FractionOfPTEReturnDrop = 0; 1959 } else { 1960 PixelPTEReqHeightPTEs = 1; 1961 *PixelPTEReqHeight = MacroTileHeight; 1962 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1963 *PTERequestSize = 64; 1964 FractionOfPTEReturnDrop = 0; 1965 } 1966 1967 if (SurfaceTiling == dm_sw_linear) { 1968 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 1969 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1970 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1971 } else if (ScanDirection != dm_vert) { 1972 *dpte_row_height = *PixelPTEReqHeight; 1973 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1974 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1975 } else { 1976 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 1977 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 1978 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 1979 } 1980 1981 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { 1982 *PTEBufferSizeNotExceeded = true; 1983 } else { 1984 *PTEBufferSizeNotExceeded = false; 1985 } 1986 1987 if (GPUVMEnable != true) { 1988 *PixelPTEBytesPerRow = 0; 1989 *PTEBufferSizeNotExceeded = true; 1990 } 1991 1992 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); 1993 1994 if (HostVMEnable == true) { 1995 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 1996 } 1997 1998 if (HostVMEnable == true) { 1999 *vm_group_bytes = 512; 2000 *dpte_group_bytes = 512; 2001 } else if (GPUVMEnable == true) { 2002 *vm_group_bytes = 2048; 2003 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { 2004 *dpte_group_bytes = 512; 2005 } else { 2006 *dpte_group_bytes = 2048; 2007 } 2008 } else { 2009 *vm_group_bytes = 0; 2010 *dpte_group_bytes = 0; 2011 } 2012 return PDEAndMetaPTEBytesFrame; 2013 } 2014 2015 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) 2016 { 2017 struct vba_vars_st *v = &mode_lib->vba; 2018 unsigned int j, k; 2019 double HostVMInefficiencyFactor = 1.0; 2020 bool NoChromaPlanes = true; 2021 int ReorderBytes; 2022 double VMDataOnlyReturnBW; 2023 double MaxTotalRDBandwidth = 0; 2024 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; 2025 2026 v->WritebackDISPCLK = 0.0; 2027 v->DISPCLKWithRamping = 0; 2028 v->DISPCLKWithoutRamping = 0; 2029 v->GlobalDPPCLK = 0.0; 2030 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */ 2031 { 2032 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 2033 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], 2034 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); 2035 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; 2036 2037 if (v->HostVMEnable != true) { 2038 v->ReturnBW = dml_min( 2039 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2040 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 2041 } else { 2042 v->ReturnBW = dml_min( 2043 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2044 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 2045 } 2046 } 2047 /* End DAL custom code */ 2048 2049 // DISPCLK and DPPCLK Calculation 2050 // 2051 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2052 if (v->WritebackEnable[k]) { 2053 v->WritebackDISPCLK = dml_max( 2054 v->WritebackDISPCLK, 2055 dml314_CalculateWriteBackDISPCLK( 2056 v->WritebackPixelFormat[k], 2057 v->PixelClock[k], 2058 v->WritebackHRatio[k], 2059 v->WritebackVRatio[k], 2060 v->WritebackHTaps[k], 2061 v->WritebackVTaps[k], 2062 v->WritebackSourceWidth[k], 2063 v->WritebackDestinationWidth[k], 2064 v->HTotal[k], 2065 v->WritebackLineBufferSize)); 2066 } 2067 } 2068 2069 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2070 if (v->HRatio[k] > 1) { 2071 v->PSCL_THROUGHPUT_LUMA[k] = dml_min( 2072 v->MaxDCHUBToPSCLThroughput, 2073 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); 2074 } else { 2075 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2076 } 2077 2078 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] 2079 * dml_max( 2080 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 2081 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); 2082 2083 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { 2084 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; 2085 } 2086 2087 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 2088 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { 2089 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 2090 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; 2091 } else { 2092 if (v->HRatioChroma[k] > 1) { 2093 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 2094 v->MaxDCHUBToPSCLThroughput, 2095 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 2096 } else { 2097 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2098 } 2099 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] 2100 * dml_max3( 2101 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 2102 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 2103 1.0); 2104 2105 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { 2106 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; 2107 } 2108 2109 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); 2110 } 2111 } 2112 2113 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2114 if (v->BlendingAndTiming[k] != k) 2115 continue; 2116 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { 2117 v->DISPCLKWithRamping = dml_max( 2118 v->DISPCLKWithRamping, 2119 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2120 * (1 + v->DISPCLKRampingMargin / 100)); 2121 v->DISPCLKWithoutRamping = dml_max( 2122 v->DISPCLKWithoutRamping, 2123 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2124 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2125 v->DISPCLKWithRamping = dml_max( 2126 v->DISPCLKWithRamping, 2127 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2128 * (1 + v->DISPCLKRampingMargin / 100)); 2129 v->DISPCLKWithoutRamping = dml_max( 2130 v->DISPCLKWithoutRamping, 2131 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2132 } else { 2133 v->DISPCLKWithRamping = dml_max( 2134 v->DISPCLKWithRamping, 2135 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); 2136 v->DISPCLKWithoutRamping = dml_max( 2137 v->DISPCLKWithoutRamping, 2138 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2139 } 2140 } 2141 2142 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); 2143 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); 2144 2145 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); 2146 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); 2147 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); 2148 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 2149 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, 2150 v->DISPCLKDPPCLKVCOSpeed); 2151 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2152 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; 2153 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2154 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; 2155 } else { 2156 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; 2157 } 2158 v->DISPCLK = v->DISPCLK_calculated; 2159 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); 2160 2161 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2162 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2163 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); 2164 } 2165 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); 2166 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2167 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); 2168 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); 2169 } 2170 2171 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2172 v->DPPCLK[k] = v->DPPCLK_calculated[k]; 2173 } 2174 2175 // Urgent and B P-State/DRAM Clock Change Watermark 2176 DTRACE(" dcfclk_mhz = %f", v->DCFCLK); 2177 DTRACE(" return_bus_bw = %f", v->ReturnBW); 2178 2179 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2180 CalculateBytePerPixelAnd256BBlockSizes( 2181 v->SourcePixelFormat[k], 2182 v->SurfaceTiling[k], 2183 &v->BytePerPixelY[k], 2184 &v->BytePerPixelC[k], 2185 &v->BytePerPixelDETY[k], 2186 &v->BytePerPixelDETC[k], 2187 &v->BlockHeight256BytesY[k], 2188 &v->BlockHeight256BytesC[k], 2189 &v->BlockWidth256BytesY[k], 2190 &v->BlockWidth256BytesC[k]); 2191 } 2192 2193 CalculateSwathWidth( 2194 false, 2195 v->NumberOfActivePlanes, 2196 v->SourcePixelFormat, 2197 v->SourceScan, 2198 v->ViewportWidth, 2199 v->ViewportHeight, 2200 v->SurfaceWidthY, 2201 v->SurfaceWidthC, 2202 v->SurfaceHeightY, 2203 v->SurfaceHeightC, 2204 v->ODMCombineEnabled, 2205 v->BytePerPixelY, 2206 v->BytePerPixelC, 2207 v->BlockHeight256BytesY, 2208 v->BlockHeight256BytesC, 2209 v->BlockWidth256BytesY, 2210 v->BlockWidth256BytesC, 2211 v->BlendingAndTiming, 2212 v->HActive, 2213 v->HRatio, 2214 v->DPPPerPlane, 2215 v->SwathWidthSingleDPPY, 2216 v->SwathWidthSingleDPPC, 2217 v->SwathWidthY, 2218 v->SwathWidthC, 2219 v->dummyinteger3, 2220 v->dummyinteger4, 2221 v->swath_width_luma_ub, 2222 v->swath_width_chroma_ub); 2223 2224 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2225 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) 2226 * v->VRatio[k]; 2227 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) 2228 * v->VRatioChroma[k]; 2229 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); 2230 } 2231 2232 // DCFCLK Deep Sleep 2233 CalculateDCFCLKDeepSleep( 2234 mode_lib, 2235 v->NumberOfActivePlanes, 2236 v->BytePerPixelY, 2237 v->BytePerPixelC, 2238 v->VRatio, 2239 v->VRatioChroma, 2240 v->SwathWidthY, 2241 v->SwathWidthC, 2242 v->DPPPerPlane, 2243 v->HRatio, 2244 v->HRatioChroma, 2245 v->PixelClock, 2246 v->PSCL_THROUGHPUT_LUMA, 2247 v->PSCL_THROUGHPUT_CHROMA, 2248 v->DPPCLK, 2249 v->ReadBandwidthPlaneLuma, 2250 v->ReadBandwidthPlaneChroma, 2251 v->ReturnBusWidth, 2252 &v->DCFCLKDeepSleep); 2253 2254 // DSCCLK 2255 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2256 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { 2257 v->DSCCLK_calculated[k] = 0.0; 2258 } else { 2259 if (v->OutputFormat[k] == dm_420) 2260 v->DSCFormatFactor = 2; 2261 else if (v->OutputFormat[k] == dm_444) 2262 v->DSCFormatFactor = 1; 2263 else if (v->OutputFormat[k] == dm_n422) 2264 v->DSCFormatFactor = 2; 2265 else 2266 v->DSCFormatFactor = 1; 2267 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) 2268 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor 2269 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2270 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) 2271 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor 2272 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2273 else 2274 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor 2275 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2276 } 2277 } 2278 2279 // DSC Delay 2280 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2281 double BPP = v->OutputBpp[k]; 2282 2283 if (v->DSCEnabled[k] && BPP != 0) { 2284 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { 2285 v->DSCDelay[k] = dscceComputeDelay( 2286 v->DSCInputBitPerComponent[k], 2287 BPP, 2288 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2289 v->NumberOfDSCSlices[k], 2290 v->OutputFormat[k], 2291 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 2292 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2293 v->DSCDelay[k] = 2 2294 * (dscceComputeDelay( 2295 v->DSCInputBitPerComponent[k], 2296 BPP, 2297 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2298 v->NumberOfDSCSlices[k] / 2.0, 2299 v->OutputFormat[k], 2300 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2301 } else { 2302 v->DSCDelay[k] = 4 2303 * (dscceComputeDelay( 2304 v->DSCInputBitPerComponent[k], 2305 BPP, 2306 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2307 v->NumberOfDSCSlices[k] / 4.0, 2308 v->OutputFormat[k], 2309 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2310 } 2311 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 2312 } else { 2313 v->DSCDelay[k] = 0; 2314 } 2315 } 2316 2317 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2318 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes 2319 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) 2320 v->DSCDelay[k] = v->DSCDelay[j]; 2321 2322 // Prefetch 2323 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2324 unsigned int PDEAndMetaPTEBytesFrameY; 2325 unsigned int PixelPTEBytesPerRowY; 2326 unsigned int MetaRowByteY; 2327 unsigned int MetaRowByteC; 2328 unsigned int PDEAndMetaPTEBytesFrameC; 2329 unsigned int PixelPTEBytesPerRowC; 2330 bool PTEBufferSizeNotExceededY; 2331 bool PTEBufferSizeNotExceededC; 2332 2333 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2334 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 2335 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { 2336 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; 2337 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 2338 } else { 2339 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 2340 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 2341 } 2342 2343 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( 2344 mode_lib, 2345 v->DCCEnable[k], 2346 v->BlockHeight256BytesC[k], 2347 v->BlockWidth256BytesC[k], 2348 v->SourcePixelFormat[k], 2349 v->SurfaceTiling[k], 2350 v->BytePerPixelC[k], 2351 v->SourceScan[k], 2352 v->SwathWidthC[k], 2353 v->ViewportHeightChroma[k], 2354 v->GPUVMEnable, 2355 v->HostVMEnable, 2356 v->HostVMMaxNonCachedPageTableLevels, 2357 v->GPUVMMinPageSize, 2358 v->HostVMMinPageSize, 2359 v->PTEBufferSizeInRequestsForChroma, 2360 v->PitchC[k], 2361 v->DCCMetaPitchC[k], 2362 &v->MacroTileWidthC[k], 2363 &MetaRowByteC, 2364 &PixelPTEBytesPerRowC, 2365 &PTEBufferSizeNotExceededC, 2366 &v->dpte_row_width_chroma_ub[k], 2367 &v->dpte_row_height_chroma[k], 2368 &v->meta_req_width_chroma[k], 2369 &v->meta_req_height_chroma[k], 2370 &v->meta_row_width_chroma[k], 2371 &v->meta_row_height_chroma[k], 2372 &v->dummyinteger1, 2373 &v->dummyinteger2, 2374 &v->PixelPTEReqWidthC[k], 2375 &v->PixelPTEReqHeightC[k], 2376 &v->PTERequestSizeC[k], 2377 &v->dpde0_bytes_per_frame_ub_c[k], 2378 &v->meta_pte_bytes_per_frame_ub_c[k]); 2379 2380 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2381 mode_lib, 2382 v->VRatioChroma[k], 2383 v->VTAPsChroma[k], 2384 v->Interlace[k], 2385 v->ProgressiveToInterlaceUnitInOPP, 2386 v->SwathHeightC[k], 2387 v->ViewportYStartC[k], 2388 &v->VInitPreFillC[k], 2389 &v->MaxNumSwathC[k]); 2390 } else { 2391 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 2392 v->PTEBufferSizeInRequestsForChroma = 0; 2393 PixelPTEBytesPerRowC = 0; 2394 PDEAndMetaPTEBytesFrameC = 0; 2395 MetaRowByteC = 0; 2396 v->MaxNumSwathC[k] = 0; 2397 v->PrefetchSourceLinesC[k] = 0; 2398 } 2399 2400 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 2401 mode_lib, 2402 v->DCCEnable[k], 2403 v->BlockHeight256BytesY[k], 2404 v->BlockWidth256BytesY[k], 2405 v->SourcePixelFormat[k], 2406 v->SurfaceTiling[k], 2407 v->BytePerPixelY[k], 2408 v->SourceScan[k], 2409 v->SwathWidthY[k], 2410 v->ViewportHeight[k], 2411 v->GPUVMEnable, 2412 v->HostVMEnable, 2413 v->HostVMMaxNonCachedPageTableLevels, 2414 v->GPUVMMinPageSize, 2415 v->HostVMMinPageSize, 2416 v->PTEBufferSizeInRequestsForLuma, 2417 v->PitchY[k], 2418 v->DCCMetaPitchY[k], 2419 &v->MacroTileWidthY[k], 2420 &MetaRowByteY, 2421 &PixelPTEBytesPerRowY, 2422 &PTEBufferSizeNotExceededY, 2423 &v->dpte_row_width_luma_ub[k], 2424 &v->dpte_row_height[k], 2425 &v->meta_req_width[k], 2426 &v->meta_req_height[k], 2427 &v->meta_row_width[k], 2428 &v->meta_row_height[k], 2429 &v->vm_group_bytes[k], 2430 &v->dpte_group_bytes[k], 2431 &v->PixelPTEReqWidthY[k], 2432 &v->PixelPTEReqHeightY[k], 2433 &v->PTERequestSizeY[k], 2434 &v->dpde0_bytes_per_frame_ub_l[k], 2435 &v->meta_pte_bytes_per_frame_ub_l[k]); 2436 2437 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 2438 mode_lib, 2439 v->VRatio[k], 2440 v->vtaps[k], 2441 v->Interlace[k], 2442 v->ProgressiveToInterlaceUnitInOPP, 2443 v->SwathHeightY[k], 2444 v->ViewportYStartY[k], 2445 &v->VInitPreFillY[k], 2446 &v->MaxNumSwathY[k]); 2447 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 2448 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2449 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 2450 2451 CalculateRowBandwidth( 2452 v->GPUVMEnable, 2453 v->SourcePixelFormat[k], 2454 v->VRatio[k], 2455 v->VRatioChroma[k], 2456 v->DCCEnable[k], 2457 v->HTotal[k] / v->PixelClock[k], 2458 MetaRowByteY, 2459 MetaRowByteC, 2460 v->meta_row_height[k], 2461 v->meta_row_height_chroma[k], 2462 PixelPTEBytesPerRowY, 2463 PixelPTEBytesPerRowC, 2464 v->dpte_row_height[k], 2465 v->dpte_row_height_chroma[k], 2466 &v->meta_row_bw[k], 2467 &v->dpte_row_bw[k]); 2468 } 2469 2470 v->TotalDCCActiveDPP = 0; 2471 v->TotalActiveDPP = 0; 2472 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2473 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; 2474 if (v->DCCEnable[k]) 2475 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; 2476 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2477 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 2478 NoChromaPlanes = false; 2479 } 2480 2481 ReorderBytes = v->NumberOfChannels 2482 * dml_max3( 2483 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2484 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2485 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 2486 2487 VMDataOnlyReturnBW = dml_min( 2488 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) 2489 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2490 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth 2491 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 2492 2493 #ifdef __DML_VBA_DEBUG__ 2494 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); 2495 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); 2496 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); 2497 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); 2498 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); 2499 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); 2500 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); 2501 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); 2502 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); 2503 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 2504 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); 2505 #endif 2506 2507 if (v->GPUVMEnable && v->HostVMEnable) 2508 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; 2509 2510 v->UrgentExtraLatency = CalculateExtraLatency( 2511 v->RoundTripPingLatencyCycles, 2512 ReorderBytes, 2513 v->DCFCLK, 2514 v->TotalActiveDPP, 2515 v->PixelChunkSizeInKByte, 2516 v->TotalDCCActiveDPP, 2517 v->MetaChunkSize, 2518 v->ReturnBW, 2519 v->GPUVMEnable, 2520 v->HostVMEnable, 2521 v->NumberOfActivePlanes, 2522 v->DPPPerPlane, 2523 v->dpte_group_bytes, 2524 HostVMInefficiencyFactor, 2525 v->HostVMMinPageSize, 2526 v->HostVMMaxNonCachedPageTableLevels); 2527 2528 v->TCalc = 24.0 / v->DCFCLKDeepSleep; 2529 2530 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2531 if (v->BlendingAndTiming[k] == k) { 2532 if (v->WritebackEnable[k] == true) { 2533 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency 2534 + CalculateWriteBackDelay( 2535 v->WritebackPixelFormat[k], 2536 v->WritebackHRatio[k], 2537 v->WritebackVRatio[k], 2538 v->WritebackVTaps[k], 2539 v->WritebackDestinationWidth[k], 2540 v->WritebackDestinationHeight[k], 2541 v->WritebackSourceHeight[k], 2542 v->HTotal[k]) / v->DISPCLK; 2543 } else 2544 v->WritebackDelay[v->VoltageLevel][k] = 0; 2545 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 2546 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { 2547 v->WritebackDelay[v->VoltageLevel][k] = dml_max( 2548 v->WritebackDelay[v->VoltageLevel][k], 2549 v->WritebackLatency 2550 + CalculateWriteBackDelay( 2551 v->WritebackPixelFormat[j], 2552 v->WritebackHRatio[j], 2553 v->WritebackVRatio[j], 2554 v->WritebackVTaps[j], 2555 v->WritebackDestinationWidth[j], 2556 v->WritebackDestinationHeight[j], 2557 v->WritebackSourceHeight[j], 2558 v->HTotal[k]) / v->DISPCLK); 2559 } 2560 } 2561 } 2562 } 2563 2564 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2565 for (j = 0; j < v->NumberOfActivePlanes; ++j) 2566 if (v->BlendingAndTiming[k] == j) 2567 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; 2568 2569 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2570 v->MaxVStartupLines[k] = 2571 CalculateMaxVStartup( 2572 v->VTotal[k], 2573 v->VActive[k], 2574 v->VBlankNom[k], 2575 v->HTotal[k], 2576 v->PixelClock[k], 2577 v->ProgressiveToInterlaceUnitInOPP, 2578 v->Interlace[k], 2579 v->ip.VBlankNomDefaultUS, 2580 v->WritebackDelay[v->VoltageLevel][k]); 2581 2582 #ifdef __DML_VBA_DEBUG__ 2583 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 2584 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); 2585 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); 2586 #endif 2587 } 2588 2589 v->MaximumMaxVStartupLines = 0; 2590 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2591 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); 2592 2593 // VBA_DELTA 2594 // We don't really care to iterate between the various prefetch modes 2595 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); 2596 2597 v->UrgentLatency = CalculateUrgentLatency( 2598 v->UrgentLatencyPixelDataOnly, 2599 v->UrgentLatencyPixelMixedWithVMData, 2600 v->UrgentLatencyVMDataOnly, 2601 v->DoUrgentLatencyAdjustment, 2602 v->UrgentLatencyAdjustmentFabricClockComponent, 2603 v->UrgentLatencyAdjustmentFabricClockReference, 2604 v->FabricClock); 2605 2606 v->FractionOfUrgentBandwidth = 0.0; 2607 v->FractionOfUrgentBandwidthImmediateFlip = 0.0; 2608 2609 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; 2610 2611 do { 2612 double MaxTotalRDBandwidthNoUrgentBurst = 0.0; 2613 bool DestinationLineTimesForPrefetchLessThan2 = false; 2614 bool VRatioPrefetchMoreThan4 = false; 2615 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); 2616 2617 MaxTotalRDBandwidth = 0; 2618 2619 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); 2620 2621 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2622 Pipe myPipe; 2623 2624 myPipe.DPPCLK = v->DPPCLK[k]; 2625 myPipe.DISPCLK = v->DISPCLK; 2626 myPipe.PixelClock = v->PixelClock[k]; 2627 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; 2628 myPipe.DPPPerPlane = v->DPPPerPlane[k]; 2629 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 2630 myPipe.VRatio = v->VRatio[k]; 2631 myPipe.VRatioChroma = v->VRatioChroma[k]; 2632 myPipe.SourceScan = v->SourceScan[k]; 2633 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; 2634 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; 2635 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; 2636 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; 2637 myPipe.InterlaceEnable = v->Interlace[k]; 2638 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 2639 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 2640 myPipe.HTotal = v->HTotal[k]; 2641 myPipe.DCCEnable = v->DCCEnable[k]; 2642 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 2643 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; 2644 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 2645 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 2646 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 2647 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 2648 v->ErrorResult[k] = CalculatePrefetchSchedule( 2649 mode_lib, 2650 HostVMInefficiencyFactor, 2651 &myPipe, 2652 v->DSCDelay[k], 2653 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 2654 v->DPPCLKDelaySCL, 2655 v->DPPCLKDelaySCLLBOnly, 2656 v->DPPCLKDelayCNVCCursor, 2657 v->DISPCLKDelaySubtotal, 2658 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2659 v->OutputFormat[k], 2660 v->MaxInterDCNTileRepeaters, 2661 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2662 v->MaxVStartupLines[k], 2663 v->GPUVMMaxPageTableLevels, 2664 v->GPUVMEnable, 2665 v->HostVMEnable, 2666 v->HostVMMaxNonCachedPageTableLevels, 2667 v->HostVMMinPageSize, 2668 v->DynamicMetadataEnable[k], 2669 v->DynamicMetadataVMEnabled, 2670 v->DynamicMetadataLinesBeforeActiveRequired[k], 2671 v->DynamicMetadataTransmittedBytes[k], 2672 v->UrgentLatency, 2673 v->UrgentExtraLatency, 2674 v->TCalc, 2675 v->PDEAndMetaPTEBytesFrame[k], 2676 v->MetaRowByte[k], 2677 v->PixelPTEBytesPerRow[k], 2678 v->PrefetchSourceLinesY[k], 2679 v->SwathWidthY[k], 2680 v->VInitPreFillY[k], 2681 v->MaxNumSwathY[k], 2682 v->PrefetchSourceLinesC[k], 2683 v->SwathWidthC[k], 2684 v->VInitPreFillC[k], 2685 v->MaxNumSwathC[k], 2686 v->swath_width_luma_ub[k], 2687 v->swath_width_chroma_ub[k], 2688 v->SwathHeightY[k], 2689 v->SwathHeightC[k], 2690 TWait, 2691 &v->DSTXAfterScaler[k], 2692 &v->DSTYAfterScaler[k], 2693 &v->DestinationLinesForPrefetch[k], 2694 &v->PrefetchBandwidth[k], 2695 &v->DestinationLinesToRequestVMInVBlank[k], 2696 &v->DestinationLinesToRequestRowInVBlank[k], 2697 &v->VRatioPrefetchY[k], 2698 &v->VRatioPrefetchC[k], 2699 &v->RequiredPrefetchPixDataBWLuma[k], 2700 &v->RequiredPrefetchPixDataBWChroma[k], 2701 &v->NotEnoughTimeForDynamicMetadata[k], 2702 &v->Tno_bw[k], 2703 &v->prefetch_vmrow_bw[k], 2704 &v->Tdmdl_vm[k], 2705 &v->Tdmdl[k], 2706 &v->TSetup[k], 2707 &v->VUpdateOffsetPix[k], 2708 &v->VUpdateWidthPix[k], 2709 &v->VReadyOffsetPix[k]); 2710 2711 #ifdef __DML_VBA_DEBUG__ 2712 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); 2713 #endif 2714 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); 2715 } 2716 2717 v->NoEnoughUrgentLatencyHiding = false; 2718 v->NoEnoughUrgentLatencyHidingPre = false; 2719 2720 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2721 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2722 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 2723 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2724 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; 2725 2726 CalculateUrgentBurstFactor( 2727 v->swath_width_luma_ub[k], 2728 v->swath_width_chroma_ub[k], 2729 v->SwathHeightY[k], 2730 v->SwathHeightC[k], 2731 v->HTotal[k] / v->PixelClock[k], 2732 v->UrgentLatency, 2733 v->CursorBufferSize, 2734 v->CursorWidth[k][0], 2735 v->CursorBPP[k][0], 2736 v->VRatio[k], 2737 v->VRatioChroma[k], 2738 v->BytePerPixelDETY[k], 2739 v->BytePerPixelDETC[k], 2740 v->DETBufferSizeY[k], 2741 v->DETBufferSizeC[k], 2742 &v->UrgBurstFactorCursor[k], 2743 &v->UrgBurstFactorLuma[k], 2744 &v->UrgBurstFactorChroma[k], 2745 &v->NoUrgentLatencyHiding[k]); 2746 2747 CalculateUrgentBurstFactor( 2748 v->swath_width_luma_ub[k], 2749 v->swath_width_chroma_ub[k], 2750 v->SwathHeightY[k], 2751 v->SwathHeightC[k], 2752 v->HTotal[k] / v->PixelClock[k], 2753 v->UrgentLatency, 2754 v->CursorBufferSize, 2755 v->CursorWidth[k][0], 2756 v->CursorBPP[k][0], 2757 v->VRatioPrefetchY[k], 2758 v->VRatioPrefetchC[k], 2759 v->BytePerPixelDETY[k], 2760 v->BytePerPixelDETC[k], 2761 v->DETBufferSizeY[k], 2762 v->DETBufferSizeC[k], 2763 &v->UrgBurstFactorCursorPre[k], 2764 &v->UrgBurstFactorLumaPre[k], 2765 &v->UrgBurstFactorChromaPre[k], 2766 &v->NoUrgentLatencyHidingPre[k]); 2767 2768 MaxTotalRDBandwidth = MaxTotalRDBandwidth 2769 + dml_max3( 2770 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2771 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2772 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2773 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] 2774 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2775 v->DPPPerPlane[k] 2776 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2777 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2778 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2779 2780 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst 2781 + dml_max3( 2782 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2783 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] 2784 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2785 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) 2786 + v->cursor_bw_pre[k]); 2787 2788 #ifdef __DML_VBA_DEBUG__ 2789 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); 2790 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); 2791 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); 2792 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); 2793 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); 2794 2795 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); 2796 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); 2797 2798 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); 2799 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); 2800 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); 2801 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); 2802 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); 2803 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); 2804 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); 2805 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); 2806 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); 2807 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); 2808 #endif 2809 2810 if (v->DestinationLinesForPrefetch[k] < 2) 2811 DestinationLineTimesForPrefetchLessThan2 = true; 2812 2813 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) 2814 VRatioPrefetchMoreThan4 = true; 2815 2816 if (v->NoUrgentLatencyHiding[k] == true) 2817 v->NoEnoughUrgentLatencyHiding = true; 2818 2819 if (v->NoUrgentLatencyHidingPre[k] == true) 2820 v->NoEnoughUrgentLatencyHidingPre = true; 2821 } 2822 2823 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; 2824 2825 #ifdef __DML_VBA_DEBUG__ 2826 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst); 2827 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW); 2828 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth); 2829 #endif 2830 2831 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 2832 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) 2833 v->PrefetchModeSupported = true; 2834 else { 2835 v->PrefetchModeSupported = false; 2836 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); 2837 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); 2838 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); 2839 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); 2840 } 2841 2842 // PREVIOUS_ERROR 2843 // This error result check was done after the PrefetchModeSupported. So we will 2844 // still try to calculate flip schedule even prefetch mode not supported 2845 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2846 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { 2847 v->PrefetchModeSupported = false; 2848 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); 2849 } 2850 } 2851 2852 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { 2853 v->BandwidthAvailableForImmediateFlip = v->ReturnBW; 2854 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2855 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 2856 - dml_max( 2857 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2858 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2859 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2860 v->DPPPerPlane[k] 2861 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2862 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2863 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2864 } 2865 2866 v->TotImmediateFlipBytes = 0; 2867 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2868 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 2869 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); 2870 } 2871 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2872 CalculateFlipSchedule( 2873 mode_lib, 2874 k, 2875 HostVMInefficiencyFactor, 2876 v->UrgentExtraLatency, 2877 v->UrgentLatency, 2878 v->PDEAndMetaPTEBytesFrame[k], 2879 v->MetaRowByte[k], 2880 v->PixelPTEBytesPerRow[k]); 2881 } 2882 2883 v->total_dcn_read_bw_with_flip = 0.0; 2884 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2885 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2886 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 2887 + dml_max3( 2888 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2889 v->DPPPerPlane[k] * v->final_flip_bw[k] 2890 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] 2891 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] 2892 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2893 v->DPPPerPlane[k] 2894 * (v->final_flip_bw[k] 2895 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2896 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2897 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2898 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst 2899 + dml_max3( 2900 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2901 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] 2902 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], 2903 v->DPPPerPlane[k] 2904 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] 2905 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); 2906 } 2907 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; 2908 2909 v->ImmediateFlipSupported = true; 2910 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { 2911 #ifdef __DML_VBA_DEBUG__ 2912 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); 2913 #endif 2914 v->ImmediateFlipSupported = false; 2915 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; 2916 } 2917 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2918 if (v->ImmediateFlipSupportedForPipe[k] == false) { 2919 #ifdef __DML_VBA_DEBUG__ 2920 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); 2921 #endif 2922 v->ImmediateFlipSupported = false; 2923 } 2924 } 2925 } else { 2926 v->ImmediateFlipSupported = false; 2927 } 2928 2929 v->PrefetchAndImmediateFlipSupported = 2930 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable 2931 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || 2932 v->ImmediateFlipSupported)) ? true : false; 2933 #ifdef __DML_VBA_DEBUG__ 2934 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); 2935 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required); 2936 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); 2937 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); 2938 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); 2939 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); 2940 #endif 2941 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); 2942 2943 v->VStartupLines = v->VStartupLines + 1; 2944 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); 2945 ASSERT(v->PrefetchAndImmediateFlipSupported); 2946 2947 // Unbounded Request Enabled 2948 CalculateUnboundedRequestAndCompressedBufferSize( 2949 v->DETBufferSizeInKByte[0], 2950 v->ConfigReturnBufferSizeInKByte, 2951 v->UseUnboundedRequesting, 2952 v->TotalActiveDPP, 2953 NoChromaPlanes, 2954 v->MaxNumDPP, 2955 v->CompressedBufferSegmentSizeInkByte, 2956 v->Output, 2957 &v->UnboundedRequestEnabled, 2958 &v->CompressedBufferSizeInkByte); 2959 2960 //Watermarks and NB P-State/DRAM Clock Change Support 2961 { 2962 enum clock_change_support DRAMClockChangeSupport; // dummy 2963 2964 CalculateWatermarksAndDRAMSpeedChangeSupport( 2965 mode_lib, 2966 PrefetchMode, 2967 v->DCFCLK, 2968 v->ReturnBW, 2969 v->UrgentLatency, 2970 v->UrgentExtraLatency, 2971 v->SOCCLK, 2972 v->DCFCLKDeepSleep, 2973 v->DETBufferSizeY, 2974 v->DETBufferSizeC, 2975 v->SwathHeightY, 2976 v->SwathHeightC, 2977 v->SwathWidthY, 2978 v->SwathWidthC, 2979 v->DPPPerPlane, 2980 v->BytePerPixelDETY, 2981 v->BytePerPixelDETC, 2982 v->UnboundedRequestEnabled, 2983 v->CompressedBufferSizeInkByte, 2984 &DRAMClockChangeSupport, 2985 &v->StutterExitWatermark, 2986 &v->StutterEnterPlusExitWatermark, 2987 &v->Z8StutterExitWatermark, 2988 &v->Z8StutterEnterPlusExitWatermark); 2989 2990 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2991 if (v->WritebackEnable[k] == true) { 2992 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 2993 0, 2994 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); 2995 } else { 2996 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; 2997 } 2998 } 2999 } 3000 3001 //Display Pipeline Delivery Time in Prefetch, Groups 3002 CalculatePixelDeliveryTimes( 3003 v->NumberOfActivePlanes, 3004 v->VRatio, 3005 v->VRatioChroma, 3006 v->VRatioPrefetchY, 3007 v->VRatioPrefetchC, 3008 v->swath_width_luma_ub, 3009 v->swath_width_chroma_ub, 3010 v->DPPPerPlane, 3011 v->HRatio, 3012 v->HRatioChroma, 3013 v->PixelClock, 3014 v->PSCL_THROUGHPUT_LUMA, 3015 v->PSCL_THROUGHPUT_CHROMA, 3016 v->DPPCLK, 3017 v->BytePerPixelC, 3018 v->SourceScan, 3019 v->NumberOfCursors, 3020 v->CursorWidth, 3021 v->CursorBPP, 3022 v->BlockWidth256BytesY, 3023 v->BlockHeight256BytesY, 3024 v->BlockWidth256BytesC, 3025 v->BlockHeight256BytesC, 3026 v->DisplayPipeLineDeliveryTimeLuma, 3027 v->DisplayPipeLineDeliveryTimeChroma, 3028 v->DisplayPipeLineDeliveryTimeLumaPrefetch, 3029 v->DisplayPipeLineDeliveryTimeChromaPrefetch, 3030 v->DisplayPipeRequestDeliveryTimeLuma, 3031 v->DisplayPipeRequestDeliveryTimeChroma, 3032 v->DisplayPipeRequestDeliveryTimeLumaPrefetch, 3033 v->DisplayPipeRequestDeliveryTimeChromaPrefetch, 3034 v->CursorRequestDeliveryTime, 3035 v->CursorRequestDeliveryTimePrefetch); 3036 3037 CalculateMetaAndPTETimes( 3038 v->NumberOfActivePlanes, 3039 v->GPUVMEnable, 3040 v->MetaChunkSize, 3041 v->MinMetaChunkSizeBytes, 3042 v->HTotal, 3043 v->VRatio, 3044 v->VRatioChroma, 3045 v->DestinationLinesToRequestRowInVBlank, 3046 v->DestinationLinesToRequestRowInImmediateFlip, 3047 v->DCCEnable, 3048 v->PixelClock, 3049 v->BytePerPixelY, 3050 v->BytePerPixelC, 3051 v->SourceScan, 3052 v->dpte_row_height, 3053 v->dpte_row_height_chroma, 3054 v->meta_row_width, 3055 v->meta_row_width_chroma, 3056 v->meta_row_height, 3057 v->meta_row_height_chroma, 3058 v->meta_req_width, 3059 v->meta_req_width_chroma, 3060 v->meta_req_height, 3061 v->meta_req_height_chroma, 3062 v->dpte_group_bytes, 3063 v->PTERequestSizeY, 3064 v->PTERequestSizeC, 3065 v->PixelPTEReqWidthY, 3066 v->PixelPTEReqHeightY, 3067 v->PixelPTEReqWidthC, 3068 v->PixelPTEReqHeightC, 3069 v->dpte_row_width_luma_ub, 3070 v->dpte_row_width_chroma_ub, 3071 v->DST_Y_PER_PTE_ROW_NOM_L, 3072 v->DST_Y_PER_PTE_ROW_NOM_C, 3073 v->DST_Y_PER_META_ROW_NOM_L, 3074 v->DST_Y_PER_META_ROW_NOM_C, 3075 v->TimePerMetaChunkNominal, 3076 v->TimePerChromaMetaChunkNominal, 3077 v->TimePerMetaChunkVBlank, 3078 v->TimePerChromaMetaChunkVBlank, 3079 v->TimePerMetaChunkFlip, 3080 v->TimePerChromaMetaChunkFlip, 3081 v->time_per_pte_group_nom_luma, 3082 v->time_per_pte_group_vblank_luma, 3083 v->time_per_pte_group_flip_luma, 3084 v->time_per_pte_group_nom_chroma, 3085 v->time_per_pte_group_vblank_chroma, 3086 v->time_per_pte_group_flip_chroma); 3087 3088 CalculateVMGroupAndRequestTimes( 3089 v->NumberOfActivePlanes, 3090 v->GPUVMEnable, 3091 v->GPUVMMaxPageTableLevels, 3092 v->HTotal, 3093 v->BytePerPixelC, 3094 v->DestinationLinesToRequestVMInVBlank, 3095 v->DestinationLinesToRequestVMInImmediateFlip, 3096 v->DCCEnable, 3097 v->PixelClock, 3098 v->dpte_row_width_luma_ub, 3099 v->dpte_row_width_chroma_ub, 3100 v->vm_group_bytes, 3101 v->dpde0_bytes_per_frame_ub_l, 3102 v->dpde0_bytes_per_frame_ub_c, 3103 v->meta_pte_bytes_per_frame_ub_l, 3104 v->meta_pte_bytes_per_frame_ub_c, 3105 v->TimePerVMGroupVBlank, 3106 v->TimePerVMGroupFlip, 3107 v->TimePerVMRequestVBlank, 3108 v->TimePerVMRequestFlip); 3109 3110 // Min TTUVBlank 3111 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3112 if (PrefetchMode == 0) { 3113 v->AllowDRAMClockChangeDuringVBlank[k] = true; 3114 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3115 v->MinTTUVBlank[k] = dml_max( 3116 v->DRAMClockChangeWatermark, 3117 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); 3118 } else if (PrefetchMode == 1) { 3119 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3120 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3121 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); 3122 } else { 3123 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3124 v->AllowDRAMSelfRefreshDuringVBlank[k] = false; 3125 v->MinTTUVBlank[k] = v->UrgentWatermark; 3126 } 3127 if (!v->DynamicMetadataEnable[k]) 3128 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; 3129 } 3130 3131 // DCC Configuration 3132 v->ActiveDPPs = 0; 3133 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3134 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 3135 v->SourcePixelFormat[k], 3136 v->SurfaceWidthY[k], 3137 v->SurfaceWidthC[k], 3138 v->SurfaceHeightY[k], 3139 v->SurfaceHeightC[k], 3140 v->DETBufferSizeInKByte[0] * 1024, 3141 v->BlockHeight256BytesY[k], 3142 v->BlockHeight256BytesC[k], 3143 v->SurfaceTiling[k], 3144 v->BytePerPixelY[k], 3145 v->BytePerPixelC[k], 3146 v->BytePerPixelDETY[k], 3147 v->BytePerPixelDETC[k], 3148 v->SourceScan[k], 3149 &v->DCCYMaxUncompressedBlock[k], 3150 &v->DCCCMaxUncompressedBlock[k], 3151 &v->DCCYMaxCompressedBlock[k], 3152 &v->DCCCMaxCompressedBlock[k], 3153 &v->DCCYIndependentBlock[k], 3154 &v->DCCCIndependentBlock[k]); 3155 } 3156 3157 // VStartup Adjustment 3158 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3159 bool isInterlaceTiming; 3160 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; 3161 #ifdef __DML_VBA_DEBUG__ 3162 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); 3163 #endif 3164 3165 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; 3166 3167 #ifdef __DML_VBA_DEBUG__ 3168 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); 3169 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 3170 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3171 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); 3172 #endif 3173 3174 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; 3175 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { 3176 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; 3177 } 3178 3179 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); 3180 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); 3181 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) { 3182 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0); 3183 } else { 3184 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]; 3185 } 3186 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / (double)v->HTotal[k] / v->PixelClock[k], 1.0) / 4.0; 3187 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) 3188 <= (isInterlaceTiming ? 3189 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : 3190 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { 3191 v->VREADY_AT_OR_AFTER_VSYNC[k] = true; 3192 } else { 3193 v->VREADY_AT_OR_AFTER_VSYNC[k] = false; 3194 } 3195 #ifdef __DML_VBA_DEBUG__ 3196 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); 3197 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); 3198 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); 3199 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); 3200 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); 3201 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); 3202 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); 3203 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); 3204 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3205 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); 3206 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); 3207 #endif 3208 } 3209 3210 { 3211 //Maximum Bandwidth Used 3212 double TotalWRBandwidth = 0; 3213 double MaxPerPlaneVActiveWRBandwidth = 0; 3214 double WRBandwidth = 0; 3215 3216 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3217 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { 3218 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3219 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; 3220 } else if (v->WritebackEnable[k] == true) { 3221 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3222 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; 3223 } 3224 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; 3225 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); 3226 } 3227 3228 v->TotalDataReadBandwidth = 0; 3229 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3230 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; 3231 } 3232 } 3233 // Stutter Efficiency 3234 CalculateStutterEfficiency( 3235 mode_lib, 3236 v->CompressedBufferSizeInkByte, 3237 v->UnboundedRequestEnabled, 3238 v->ConfigReturnBufferSizeInKByte, 3239 v->MetaFIFOSizeInKEntries, 3240 v->ZeroSizeBufferEntries, 3241 v->NumberOfActivePlanes, 3242 v->ROBBufferSizeInKByte, 3243 v->TotalDataReadBandwidth, 3244 v->DCFCLK, 3245 v->ReturnBW, 3246 v->COMPBUF_RESERVED_SPACE_64B, 3247 v->COMPBUF_RESERVED_SPACE_ZS, 3248 v->SRExitTime, 3249 v->SRExitZ8Time, 3250 v->SynchronizedVBlank, 3251 v->StutterEnterPlusExitWatermark, 3252 v->Z8StutterEnterPlusExitWatermark, 3253 v->ProgressiveToInterlaceUnitInOPP, 3254 v->Interlace, 3255 v->MinTTUVBlank, 3256 v->DPPPerPlane, 3257 v->DETBufferSizeY, 3258 v->BytePerPixelY, 3259 v->BytePerPixelDETY, 3260 v->SwathWidthY, 3261 v->SwathHeightY, 3262 v->SwathHeightC, 3263 v->DCCRateLuma, 3264 v->DCCRateChroma, 3265 v->DCCFractionOfZeroSizeRequestsLuma, 3266 v->DCCFractionOfZeroSizeRequestsChroma, 3267 v->HTotal, 3268 v->VTotal, 3269 v->PixelClock, 3270 v->VRatio, 3271 v->SourceScan, 3272 v->BlockHeight256BytesY, 3273 v->BlockWidth256BytesY, 3274 v->BlockHeight256BytesC, 3275 v->BlockWidth256BytesC, 3276 v->DCCYMaxUncompressedBlock, 3277 v->DCCCMaxUncompressedBlock, 3278 v->VActive, 3279 v->DCCEnable, 3280 v->WritebackEnable, 3281 v->ReadBandwidthPlaneLuma, 3282 v->ReadBandwidthPlaneChroma, 3283 v->meta_row_bw, 3284 v->dpte_row_bw, 3285 &v->StutterEfficiencyNotIncludingVBlank, 3286 &v->StutterEfficiency, 3287 &v->NumberOfStutterBurstsPerFrame, 3288 &v->Z8StutterEfficiencyNotIncludingVBlank, 3289 &v->Z8StutterEfficiency, 3290 &v->Z8NumberOfStutterBurstsPerFrame, 3291 &v->StutterPeriod); 3292 } 3293 3294 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 3295 { 3296 struct vba_vars_st *v = &mode_lib->vba; 3297 // Display Pipe Configuration 3298 double BytePerPixDETY[DC__NUM_DPP__MAX]; 3299 double BytePerPixDETC[DC__NUM_DPP__MAX]; 3300 int BytePerPixY[DC__NUM_DPP__MAX]; 3301 int BytePerPixC[DC__NUM_DPP__MAX]; 3302 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; 3303 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; 3304 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; 3305 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; 3306 double dummy1[DC__NUM_DPP__MAX]; 3307 double dummy2[DC__NUM_DPP__MAX]; 3308 double dummy3[DC__NUM_DPP__MAX]; 3309 double dummy4[DC__NUM_DPP__MAX]; 3310 int dummy5[DC__NUM_DPP__MAX]; 3311 int dummy6[DC__NUM_DPP__MAX]; 3312 bool dummy7[DC__NUM_DPP__MAX]; 3313 bool dummysinglestring; 3314 3315 unsigned int k; 3316 3317 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3318 3319 CalculateBytePerPixelAnd256BBlockSizes( 3320 v->SourcePixelFormat[k], 3321 v->SurfaceTiling[k], 3322 &BytePerPixY[k], 3323 &BytePerPixC[k], 3324 &BytePerPixDETY[k], 3325 &BytePerPixDETC[k], 3326 &Read256BytesBlockHeightY[k], 3327 &Read256BytesBlockHeightC[k], 3328 &Read256BytesBlockWidthY[k], 3329 &Read256BytesBlockWidthC[k]); 3330 } 3331 3332 CalculateSwathAndDETConfiguration( 3333 false, 3334 v->NumberOfActivePlanes, 3335 v->DETBufferSizeInKByte[0], 3336 dummy1, 3337 dummy2, 3338 v->SourceScan, 3339 v->SourcePixelFormat, 3340 v->SurfaceTiling, 3341 v->ViewportWidth, 3342 v->ViewportHeight, 3343 v->SurfaceWidthY, 3344 v->SurfaceWidthC, 3345 v->SurfaceHeightY, 3346 v->SurfaceHeightC, 3347 Read256BytesBlockHeightY, 3348 Read256BytesBlockHeightC, 3349 Read256BytesBlockWidthY, 3350 Read256BytesBlockWidthC, 3351 v->ODMCombineEnabled, 3352 v->BlendingAndTiming, 3353 BytePerPixY, 3354 BytePerPixC, 3355 BytePerPixDETY, 3356 BytePerPixDETC, 3357 v->HActive, 3358 v->HRatio, 3359 v->HRatioChroma, 3360 v->DPPPerPlane, 3361 dummy5, 3362 dummy6, 3363 dummy3, 3364 dummy4, 3365 v->SwathHeightY, 3366 v->SwathHeightC, 3367 v->DETBufferSizeY, 3368 v->DETBufferSizeC, 3369 dummy7, 3370 &dummysinglestring); 3371 } 3372 3373 static bool CalculateBytePerPixelAnd256BBlockSizes( 3374 enum source_format_class SourcePixelFormat, 3375 enum dm_swizzle_mode SurfaceTiling, 3376 unsigned int *BytePerPixelY, 3377 unsigned int *BytePerPixelC, 3378 double *BytePerPixelDETY, 3379 double *BytePerPixelDETC, 3380 unsigned int *BlockHeight256BytesY, 3381 unsigned int *BlockHeight256BytesC, 3382 unsigned int *BlockWidth256BytesY, 3383 unsigned int *BlockWidth256BytesC) 3384 { 3385 if (SourcePixelFormat == dm_444_64) { 3386 *BytePerPixelDETY = 8; 3387 *BytePerPixelDETC = 0; 3388 *BytePerPixelY = 8; 3389 *BytePerPixelC = 0; 3390 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 3391 *BytePerPixelDETY = 4; 3392 *BytePerPixelDETC = 0; 3393 *BytePerPixelY = 4; 3394 *BytePerPixelC = 0; 3395 } else if (SourcePixelFormat == dm_444_16) { 3396 *BytePerPixelDETY = 2; 3397 *BytePerPixelDETC = 0; 3398 *BytePerPixelY = 2; 3399 *BytePerPixelC = 0; 3400 } else if (SourcePixelFormat == dm_444_8) { 3401 *BytePerPixelDETY = 1; 3402 *BytePerPixelDETC = 0; 3403 *BytePerPixelY = 1; 3404 *BytePerPixelC = 0; 3405 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3406 *BytePerPixelDETY = 4; 3407 *BytePerPixelDETC = 1; 3408 *BytePerPixelY = 4; 3409 *BytePerPixelC = 1; 3410 } else if (SourcePixelFormat == dm_420_8) { 3411 *BytePerPixelDETY = 1; 3412 *BytePerPixelDETC = 2; 3413 *BytePerPixelY = 1; 3414 *BytePerPixelC = 2; 3415 } else if (SourcePixelFormat == dm_420_12) { 3416 *BytePerPixelDETY = 2; 3417 *BytePerPixelDETC = 4; 3418 *BytePerPixelY = 2; 3419 *BytePerPixelC = 4; 3420 } else { 3421 *BytePerPixelDETY = 4.0 / 3; 3422 *BytePerPixelDETC = 8.0 / 3; 3423 *BytePerPixelY = 2; 3424 *BytePerPixelC = 4; 3425 } 3426 3427 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16 3428 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) { 3429 if (SurfaceTiling == dm_sw_linear) { 3430 *BlockHeight256BytesY = 1; 3431 } else if (SourcePixelFormat == dm_444_64) { 3432 *BlockHeight256BytesY = 4; 3433 } else if (SourcePixelFormat == dm_444_8) { 3434 *BlockHeight256BytesY = 16; 3435 } else { 3436 *BlockHeight256BytesY = 8; 3437 } 3438 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3439 *BlockHeight256BytesC = 0; 3440 *BlockWidth256BytesC = 0; 3441 } else { 3442 if (SurfaceTiling == dm_sw_linear) { 3443 *BlockHeight256BytesY = 1; 3444 *BlockHeight256BytesC = 1; 3445 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3446 *BlockHeight256BytesY = 8; 3447 *BlockHeight256BytesC = 16; 3448 } else if (SourcePixelFormat == dm_420_8) { 3449 *BlockHeight256BytesY = 16; 3450 *BlockHeight256BytesC = 8; 3451 } else { 3452 *BlockHeight256BytesY = 8; 3453 *BlockHeight256BytesC = 8; 3454 } 3455 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3456 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 3457 } 3458 return true; 3459 } 3460 3461 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) 3462 { 3463 if (PrefetchMode == 0) { 3464 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); 3465 } else if (PrefetchMode == 1) { 3466 return dml_max(SREnterPlusExitTime, UrgentLatency); 3467 } else { 3468 return UrgentLatency; 3469 } 3470 } 3471 3472 double dml314_CalculateWriteBackDISPCLK( 3473 enum source_format_class WritebackPixelFormat, 3474 double PixelClock, 3475 double WritebackHRatio, 3476 double WritebackVRatio, 3477 unsigned int WritebackHTaps, 3478 unsigned int WritebackVTaps, 3479 long WritebackSourceWidth, 3480 long WritebackDestinationWidth, 3481 unsigned int HTotal, 3482 unsigned int WritebackLineBufferSize) 3483 { 3484 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 3485 3486 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 3487 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 3488 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 3489 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 3490 } 3491 3492 static double CalculateWriteBackDelay( 3493 enum source_format_class WritebackPixelFormat, 3494 double WritebackHRatio, 3495 double WritebackVRatio, 3496 unsigned int WritebackVTaps, 3497 int WritebackDestinationWidth, 3498 int WritebackDestinationHeight, 3499 int WritebackSourceHeight, 3500 unsigned int HTotal) 3501 { 3502 double CalculateWriteBackDelay; 3503 double Line_length; 3504 double Output_lines_last_notclamped; 3505 double WritebackVInit; 3506 3507 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3508 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); 3509 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); 3510 if (Output_lines_last_notclamped < 0) { 3511 CalculateWriteBackDelay = 0; 3512 } else { 3513 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3514 } 3515 return CalculateWriteBackDelay; 3516 } 3517 3518 static void CalculateVupdateAndDynamicMetadataParameters( 3519 int MaxInterDCNTileRepeaters, 3520 double DPPCLK, 3521 double DISPCLK, 3522 double DCFClkDeepSleep, 3523 double PixelClock, 3524 int HTotal, 3525 int VBlank, 3526 int DynamicMetadataTransmittedBytes, 3527 int DynamicMetadataLinesBeforeActiveRequired, 3528 int InterlaceEnable, 3529 bool ProgressiveToInterlaceUnitInOPP, 3530 double *TSetup, 3531 double *Tdmbf, 3532 double *Tdmec, 3533 double *Tdmsks, 3534 int *VUpdateOffsetPix, 3535 double *VUpdateWidthPix, 3536 double *VReadyOffsetPix) 3537 { 3538 double TotalRepeaterDelayTime; 3539 3540 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); 3541 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); 3542 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); 3543 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); 3544 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3545 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; 3546 *Tdmec = HTotal / PixelClock; 3547 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 3548 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3549 } else { 3550 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3551 } 3552 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 3553 *Tdmsks = *Tdmsks / 2; 3554 } 3555 #ifdef __DML_VBA_DEBUG__ 3556 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3557 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3558 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3559 #endif 3560 } 3561 3562 static void CalculateRowBandwidth( 3563 bool GPUVMEnable, 3564 enum source_format_class SourcePixelFormat, 3565 double VRatio, 3566 double VRatioChroma, 3567 bool DCCEnable, 3568 double LineTime, 3569 unsigned int MetaRowByteLuma, 3570 unsigned int MetaRowByteChroma, 3571 unsigned int meta_row_height_luma, 3572 unsigned int meta_row_height_chroma, 3573 unsigned int PixelPTEBytesPerRowLuma, 3574 unsigned int PixelPTEBytesPerRowChroma, 3575 unsigned int dpte_row_height_luma, 3576 unsigned int dpte_row_height_chroma, 3577 double *meta_row_bw, 3578 double *dpte_row_bw) 3579 { 3580 if (DCCEnable != true) { 3581 *meta_row_bw = 0; 3582 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3583 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); 3584 } else { 3585 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3586 } 3587 3588 if (GPUVMEnable != true) { 3589 *dpte_row_bw = 0; 3590 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3591 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3592 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 3593 } else { 3594 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3595 } 3596 } 3597 3598 static void CalculateFlipSchedule( 3599 struct display_mode_lib *mode_lib, 3600 unsigned int k, 3601 double HostVMInefficiencyFactor, 3602 double UrgentExtraLatency, 3603 double UrgentLatency, 3604 double PDEAndMetaPTEBytesPerFrame, 3605 double MetaRowBytes, 3606 double DPTEBytesPerRow) 3607 { 3608 struct vba_vars_st *v = &mode_lib->vba; 3609 double min_row_time = 0.0; 3610 unsigned int HostVMDynamicLevelsTrips; 3611 double TimeForFetchingMetaPTEImmediateFlip; 3612 double TimeForFetchingRowInVBlankImmediateFlip; 3613 double ImmediateFlipBW; 3614 double LineTime = v->HTotal[k] / v->PixelClock[k]; 3615 3616 if (v->GPUVMEnable == true && v->HostVMEnable == true) { 3617 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3618 } else { 3619 HostVMDynamicLevelsTrips = 0; 3620 } 3621 3622 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) { 3623 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; 3624 } 3625 3626 if (v->GPUVMEnable == true) { 3627 TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3628 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3629 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 3630 LineTime / 4.0); 3631 } else { 3632 TimeForFetchingMetaPTEImmediateFlip = 0; 3633 } 3634 3635 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3636 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3637 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 3638 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 3639 UrgentLatency * (HostVMDynamicLevelsTrips + 1), 3640 LineTime / 4); 3641 } else { 3642 TimeForFetchingRowInVBlankImmediateFlip = 0; 3643 } 3644 3645 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3646 3647 if (v->GPUVMEnable == true) { 3648 v->final_flip_bw[k] = dml_max( 3649 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), 3650 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); 3651 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3652 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); 3653 } else { 3654 v->final_flip_bw[k] = 0; 3655 } 3656 3657 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 3658 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3659 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3660 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3661 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3662 } else { 3663 min_row_time = dml_min4( 3664 v->dpte_row_height[k] * LineTime / v->VRatio[k], 3665 v->meta_row_height[k] * LineTime / v->VRatio[k], 3666 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], 3667 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3668 } 3669 } else { 3670 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3671 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; 3672 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3673 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; 3674 } else { 3675 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); 3676 } 3677 } 3678 3679 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 3680 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3681 v->ImmediateFlipSupportedForPipe[k] = false; 3682 } else { 3683 v->ImmediateFlipSupportedForPipe[k] = true; 3684 } 3685 3686 #ifdef __DML_VBA_DEBUG__ 3687 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]); 3688 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]); 3689 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 3690 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); 3691 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 3692 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]); 3693 #endif 3694 3695 } 3696 3697 static double TruncToValidBPP( 3698 double LinkBitRate, 3699 int Lanes, 3700 int HTotal, 3701 int HActive, 3702 double PixelClock, 3703 double DesiredBPP, 3704 bool DSCEnable, 3705 enum output_encoder_class Output, 3706 enum output_format_class Format, 3707 unsigned int DSCInputBitPerComponent, 3708 int DSCSlices, 3709 int AudioRate, 3710 int AudioLayout, 3711 enum odm_combine_mode ODMCombine) 3712 { 3713 double MaxLinkBPP; 3714 int MinDSCBPP; 3715 double MaxDSCBPP; 3716 int NonDSCBPP0; 3717 int NonDSCBPP1; 3718 int NonDSCBPP2; 3719 3720 if (Format == dm_420) { 3721 NonDSCBPP0 = 12; 3722 NonDSCBPP1 = 15; 3723 NonDSCBPP2 = 18; 3724 MinDSCBPP = 6; 3725 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 3726 } else if (Format == dm_444) { 3727 NonDSCBPP0 = 24; 3728 NonDSCBPP1 = 30; 3729 NonDSCBPP2 = 36; 3730 MinDSCBPP = 8; 3731 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 3732 } else { 3733 3734 NonDSCBPP0 = 16; 3735 NonDSCBPP1 = 20; 3736 NonDSCBPP2 = 24; 3737 3738 if (Format == dm_n422) { 3739 MinDSCBPP = 7; 3740 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3741 } else { 3742 MinDSCBPP = 8; 3743 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3744 } 3745 } 3746 3747 if (DSCEnable && Output == dm_dp) { 3748 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 3749 } else { 3750 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 3751 } 3752 3753 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { 3754 MaxLinkBPP = 16; 3755 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { 3756 MaxLinkBPP = 32; 3757 } 3758 3759 if (DesiredBPP == 0) { 3760 if (DSCEnable) { 3761 if (MaxLinkBPP < MinDSCBPP) { 3762 return BPP_INVALID; 3763 } else if (MaxLinkBPP >= MaxDSCBPP) { 3764 return MaxDSCBPP; 3765 } else { 3766 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 3767 } 3768 } else { 3769 if (MaxLinkBPP >= NonDSCBPP2) { 3770 return NonDSCBPP2; 3771 } else if (MaxLinkBPP >= NonDSCBPP1) { 3772 return NonDSCBPP1; 3773 } else if (MaxLinkBPP >= NonDSCBPP0) { 3774 return 16.0; 3775 } else { 3776 return BPP_INVALID; 3777 } 3778 } 3779 } else { 3780 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) 3781 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 3782 return BPP_INVALID; 3783 } else { 3784 return DesiredBPP; 3785 } 3786 } 3787 return BPP_INVALID; 3788 } 3789 3790 static noinline void CalculatePrefetchSchedulePerPlane( 3791 struct display_mode_lib *mode_lib, 3792 double HostVMInefficiencyFactor, 3793 int i, 3794 unsigned int j, 3795 unsigned int k) 3796 { 3797 struct vba_vars_st *v = &mode_lib->vba; 3798 Pipe myPipe; 3799 3800 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; 3801 myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; 3802 myPipe.PixelClock = v->PixelClock[k]; 3803 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; 3804 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; 3805 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 3806 myPipe.VRatio = mode_lib->vba.VRatio[k]; 3807 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; 3808 3809 myPipe.SourceScan = v->SourceScan[k]; 3810 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; 3811 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; 3812 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; 3813 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; 3814 myPipe.InterlaceEnable = v->Interlace[k]; 3815 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 3816 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 3817 myPipe.HTotal = v->HTotal[k]; 3818 myPipe.DCCEnable = v->DCCEnable[k]; 3819 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 3820 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; 3821 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 3822 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 3823 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 3824 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 3825 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 3826 mode_lib, 3827 HostVMInefficiencyFactor, 3828 &myPipe, 3829 v->DSCDelayPerState[i][k], 3830 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 3831 v->DPPCLKDelaySCL, 3832 v->DPPCLKDelaySCLLBOnly, 3833 v->DPPCLKDelayCNVCCursor, 3834 v->DISPCLKDelaySubtotal, 3835 v->SwathWidthYThisState[k] / v->HRatio[k], 3836 v->OutputFormat[k], 3837 v->MaxInterDCNTileRepeaters, 3838 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 3839 v->MaximumVStartup[i][j][k], 3840 v->GPUVMMaxPageTableLevels, 3841 v->GPUVMEnable, 3842 v->HostVMEnable, 3843 v->HostVMMaxNonCachedPageTableLevels, 3844 v->HostVMMinPageSize, 3845 v->DynamicMetadataEnable[k], 3846 v->DynamicMetadataVMEnabled, 3847 v->DynamicMetadataLinesBeforeActiveRequired[k], 3848 v->DynamicMetadataTransmittedBytes[k], 3849 v->UrgLatency[i], 3850 v->ExtraLatency, 3851 v->TimeCalc, 3852 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 3853 v->MetaRowBytes[i][j][k], 3854 v->DPTEBytesPerRow[i][j][k], 3855 v->PrefetchLinesY[i][j][k], 3856 v->SwathWidthYThisState[k], 3857 v->PrefillY[k], 3858 v->MaxNumSwY[k], 3859 v->PrefetchLinesC[i][j][k], 3860 v->SwathWidthCThisState[k], 3861 v->PrefillC[k], 3862 v->MaxNumSwC[k], 3863 v->swath_width_luma_ub_this_state[k], 3864 v->swath_width_chroma_ub_this_state[k], 3865 v->SwathHeightYThisState[k], 3866 v->SwathHeightCThisState[k], 3867 v->TWait, 3868 &v->DSTXAfterScaler[k], 3869 &v->DSTYAfterScaler[k], 3870 &v->LineTimesForPrefetch[k], 3871 &v->PrefetchBW[k], 3872 &v->LinesForMetaPTE[k], 3873 &v->LinesForMetaAndDPTERow[k], 3874 &v->VRatioPreY[i][j][k], 3875 &v->VRatioPreC[i][j][k], 3876 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 3877 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 3878 &v->NoTimeForDynamicMetadata[i][j][k], 3879 &v->Tno_bw[k], 3880 &v->prefetch_vmrow_bw[k], 3881 &v->dummy7[k], 3882 &v->dummy8[k], 3883 &v->dummy13[k], 3884 &v->VUpdateOffsetPix[k], 3885 &v->VUpdateWidthPix[k], 3886 &v->VReadyOffsetPix[k]); 3887 } 3888 3889 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 3890 { 3891 struct vba_vars_st *v = &mode_lib->vba; 3892 3893 int i, j; 3894 unsigned int k, m; 3895 int ReorderingBytes; 3896 int MinPrefetchMode = 0, MaxPrefetchMode = 2; 3897 bool NoChroma = true; 3898 bool EnoughWritebackUnits = true; 3899 bool P2IWith420 = false; 3900 bool DSCOnlyIfNecessaryWithBPP = false; 3901 bool DSC422NativeNotSupported = false; 3902 double MaxTotalVActiveRDBandwidth; 3903 bool ViewportExceedsSurface = false; 3904 bool FMTBufferExceeded = false; 3905 3906 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 3907 3908 CalculateMinAndMaxPrefetchMode( 3909 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 3910 &MinPrefetchMode, &MaxPrefetchMode); 3911 3912 /*Scale Ratio, taps Support Check*/ 3913 3914 v->ScaleRatioAndTapsSupport = true; 3915 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3916 if (v->ScalerEnabled[k] == false 3917 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3918 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3919 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3920 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 3921 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { 3922 v->ScaleRatioAndTapsSupport = false; 3923 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 3924 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio 3925 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] 3926 || v->VRatio[k] > v->vtaps[k] 3927 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3928 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3929 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3930 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 3931 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) 3932 || v->HRatioChroma[k] > v->MaxHSCLRatio 3933 || v->VRatioChroma[k] > v->MaxVSCLRatio 3934 || v->HRatioChroma[k] > v->HTAPsChroma[k] 3935 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { 3936 v->ScaleRatioAndTapsSupport = false; 3937 } 3938 } 3939 /*Source Format, Pixel Format and Scan Support Check*/ 3940 3941 v->SourceFormatPixelAndScanSupport = true; 3942 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3943 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) { 3944 v->SourceFormatPixelAndScanSupport = false; 3945 } 3946 } 3947 /*Bandwidth Support Check*/ 3948 3949 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3950 CalculateBytePerPixelAnd256BBlockSizes( 3951 v->SourcePixelFormat[k], 3952 v->SurfaceTiling[k], 3953 &v->BytePerPixelY[k], 3954 &v->BytePerPixelC[k], 3955 &v->BytePerPixelInDETY[k], 3956 &v->BytePerPixelInDETC[k], 3957 &v->Read256BlockHeightY[k], 3958 &v->Read256BlockHeightC[k], 3959 &v->Read256BlockWidthY[k], 3960 &v->Read256BlockWidthC[k]); 3961 } 3962 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3963 if (v->SourceScan[k] != dm_vert) { 3964 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; 3965 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; 3966 } else { 3967 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; 3968 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; 3969 } 3970 } 3971 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3972 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) 3973 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 3974 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) 3975 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; 3976 } 3977 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3978 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { 3979 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3980 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; 3981 } else if (v->WritebackEnable[k] == true) { 3982 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3983 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; 3984 } else { 3985 v->WriteBandwidth[k] = 0.0; 3986 } 3987 } 3988 3989 /*Writeback Latency support check*/ 3990 3991 v->WritebackLatencySupport = true; 3992 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3993 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { 3994 v->WritebackLatencySupport = false; 3995 } 3996 } 3997 3998 /*Writeback Mode Support Check*/ 3999 4000 v->TotalNumberOfActiveWriteback = 0; 4001 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4002 if (v->WritebackEnable[k] == true) { 4003 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; 4004 } 4005 } 4006 4007 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { 4008 EnoughWritebackUnits = false; 4009 } 4010 4011 /*Writeback Scale Ratio and Taps Support Check*/ 4012 4013 v->WritebackScaleRatioAndTapsSupport = true; 4014 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4015 if (v->WritebackEnable[k] == true) { 4016 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio 4017 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio 4018 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio 4019 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps 4020 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps 4021 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] 4022 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { 4023 v->WritebackScaleRatioAndTapsSupport = false; 4024 } 4025 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { 4026 v->WritebackScaleRatioAndTapsSupport = false; 4027 } 4028 } 4029 } 4030 /*Maximum DISPCLK/DPPCLK Support check*/ 4031 4032 v->WritebackRequiredDISPCLK = 0.0; 4033 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4034 if (v->WritebackEnable[k] == true) { 4035 v->WritebackRequiredDISPCLK = dml_max( 4036 v->WritebackRequiredDISPCLK, 4037 dml314_CalculateWriteBackDISPCLK( 4038 v->WritebackPixelFormat[k], 4039 v->PixelClock[k], 4040 v->WritebackHRatio[k], 4041 v->WritebackVRatio[k], 4042 v->WritebackHTaps[k], 4043 v->WritebackVTaps[k], 4044 v->WritebackSourceWidth[k], 4045 v->WritebackDestinationWidth[k], 4046 v->HTotal[k], 4047 v->WritebackLineBufferSize)); 4048 } 4049 } 4050 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4051 if (v->HRatio[k] > 1.0) { 4052 v->PSCL_FACTOR[k] = dml_min( 4053 v->MaxDCHUBToPSCLThroughput, 4054 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); 4055 } else { 4056 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4057 } 4058 if (v->BytePerPixelC[k] == 0.0) { 4059 v->PSCL_FACTOR_CHROMA[k] = 0.0; 4060 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4061 * dml_max3( 4062 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4063 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4064 1.0); 4065 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4066 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4067 } 4068 } else { 4069 if (v->HRatioChroma[k] > 1.0) { 4070 v->PSCL_FACTOR_CHROMA[k] = dml_min( 4071 v->MaxDCHUBToPSCLThroughput, 4072 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 4073 } else { 4074 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4075 } 4076 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4077 * dml_max5( 4078 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4079 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4080 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 4081 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 4082 1.0); 4083 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) 4084 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4085 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4086 } 4087 } 4088 } 4089 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4090 int MaximumSwathWidthSupportLuma; 4091 int MaximumSwathWidthSupportChroma; 4092 4093 if (v->SurfaceTiling[k] == dm_sw_linear) { 4094 MaximumSwathWidthSupportLuma = 8192.0; 4095 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { 4096 MaximumSwathWidthSupportLuma = 2880.0; 4097 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4098 MaximumSwathWidthSupportLuma = 3840.0; 4099 } else { 4100 MaximumSwathWidthSupportLuma = 5760.0; 4101 } 4102 4103 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { 4104 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; 4105 } else { 4106 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; 4107 } 4108 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] 4109 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); 4110 if (v->BytePerPixelC[k] == 0.0) { 4111 v->MaximumSwathWidthInLineBufferChroma = 0; 4112 } else { 4113 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] 4114 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); 4115 } 4116 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); 4117 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); 4118 } 4119 4120 CalculateSwathAndDETConfiguration( 4121 true, 4122 v->NumberOfActivePlanes, 4123 v->DETBufferSizeInKByte[0], 4124 v->MaximumSwathWidthLuma, 4125 v->MaximumSwathWidthChroma, 4126 v->SourceScan, 4127 v->SourcePixelFormat, 4128 v->SurfaceTiling, 4129 v->ViewportWidth, 4130 v->ViewportHeight, 4131 v->SurfaceWidthY, 4132 v->SurfaceWidthC, 4133 v->SurfaceHeightY, 4134 v->SurfaceHeightC, 4135 v->Read256BlockHeightY, 4136 v->Read256BlockHeightC, 4137 v->Read256BlockWidthY, 4138 v->Read256BlockWidthC, 4139 v->odm_combine_dummy, 4140 v->BlendingAndTiming, 4141 v->BytePerPixelY, 4142 v->BytePerPixelC, 4143 v->BytePerPixelInDETY, 4144 v->BytePerPixelInDETC, 4145 v->HActive, 4146 v->HRatio, 4147 v->HRatioChroma, 4148 v->NoOfDPPThisState, 4149 v->swath_width_luma_ub_this_state, 4150 v->swath_width_chroma_ub_this_state, 4151 v->SwathWidthYThisState, 4152 v->SwathWidthCThisState, 4153 v->SwathHeightYThisState, 4154 v->SwathHeightCThisState, 4155 v->DETBufferSizeYThisState, 4156 v->DETBufferSizeCThisState, 4157 v->SingleDPPViewportSizeSupportPerPlane, 4158 &v->ViewportSizeSupport[0][0]); 4159 4160 for (i = 0; i < v->soc.num_states; i++) { 4161 for (j = 0; j < 2; j++) { 4162 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); 4163 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); 4164 v->RequiredDISPCLK[i][j] = 0.0; 4165 v->DISPCLK_DPPCLK_Support[i][j] = true; 4166 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4167 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4168 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4169 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] 4170 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4171 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4172 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] 4173 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4174 } 4175 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4176 * (1 + v->DISPCLKRampingMargin / 100.0); 4177 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] 4178 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4179 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4180 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 4181 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4182 } 4183 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4184 * (1 + v->DISPCLKRampingMargin / 100.0); 4185 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] 4186 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4187 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4188 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 4189 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4190 } 4191 4192 if (v->ODMCombinePolicy == dm_odm_combine_policy_none 4193 || !(v->Output[k] == dm_dp || 4194 v->Output[k] == dm_dp2p0 || 4195 v->Output[k] == dm_edp)) { 4196 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4197 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4198 4199 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) 4200 FMTBufferExceeded = true; 4201 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { 4202 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4203 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4204 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 4205 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { 4206 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4207 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4208 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { 4209 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4210 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4211 } else { 4212 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4213 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4214 } 4215 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH 4216 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4217 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) { 4218 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4219 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4220 } else { 4221 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4222 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4223 } 4224 } 4225 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH 4226 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4227 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) { 4228 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4229 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4230 4231 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH) 4232 FMTBufferExceeded = true; 4233 } else { 4234 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4235 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4236 } 4237 } 4238 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4239 v->MPCCombine[i][j][k] = false; 4240 v->NoOfDPP[i][j][k] = 4; 4241 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; 4242 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4243 v->MPCCombine[i][j][k] = false; 4244 v->NoOfDPP[i][j][k] = 2; 4245 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; 4246 } else if ((v->WhenToDoMPCCombine == dm_mpc_never 4247 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4248 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { 4249 v->MPCCombine[i][j][k] = false; 4250 v->NoOfDPP[i][j][k] = 1; 4251 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4252 } else { 4253 v->MPCCombine[i][j][k] = true; 4254 v->NoOfDPP[i][j][k] = 2; 4255 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4256 } 4257 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4258 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4259 > v->MaxDppclkRoundedDownToDFSGranularity) 4260 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4261 v->DISPCLK_DPPCLK_Support[i][j] = false; 4262 } 4263 } 4264 v->TotalNumberOfActiveDPP[i][j] = 0; 4265 v->TotalNumberOfSingleDPPPlanes[i][j] = 0; 4266 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4267 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4268 if (v->NoOfDPP[i][j][k] == 1) 4269 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; 4270 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4271 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 4272 NoChroma = false; 4273 } 4274 4275 // UPTO 4276 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never 4277 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { 4278 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { 4279 double BWOfNonSplitPlaneOfMaximumBandwidth; 4280 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4281 4282 BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4283 NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4284 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4285 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth 4286 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { 4287 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4288 NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4289 } 4290 } 4291 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; 4292 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4293 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4294 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4295 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4296 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; 4297 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; 4298 } 4299 } 4300 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { 4301 v->RequiredDISPCLK[i][j] = 0.0; 4302 v->DISPCLK_DPPCLK_Support[i][j] = true; 4303 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4304 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4305 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { 4306 v->MPCCombine[i][j][k] = true; 4307 v->NoOfDPP[i][j][k] = 2; 4308 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4309 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4310 } else { 4311 v->MPCCombine[i][j][k] = false; 4312 v->NoOfDPP[i][j][k] = 1; 4313 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4314 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4315 } 4316 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4317 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4318 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4319 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4320 } else { 4321 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4322 } 4323 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4324 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4325 > v->MaxDppclkRoundedDownToDFSGranularity) 4326 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4327 v->DISPCLK_DPPCLK_Support[i][j] = false; 4328 } 4329 } 4330 v->TotalNumberOfActiveDPP[i][j] = 0.0; 4331 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4332 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4333 } 4334 } 4335 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); 4336 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { 4337 v->DISPCLK_DPPCLK_Support[i][j] = false; 4338 } 4339 } 4340 } 4341 4342 /*Total Available Pipes Support Check*/ 4343 4344 for (i = 0; i < v->soc.num_states; i++) { 4345 for (j = 0; j < 2; j++) { 4346 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { 4347 v->TotalAvailablePipesSupport[i][j] = true; 4348 } else { 4349 v->TotalAvailablePipesSupport[i][j] = false; 4350 } 4351 } 4352 } 4353 /*Display IO and DSC Support Check*/ 4354 4355 v->NonsupportedDSCInputBPC = false; 4356 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4357 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) 4358 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { 4359 v->NonsupportedDSCInputBPC = true; 4360 } 4361 } 4362 4363 /*Number Of DSC Slices*/ 4364 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4365 if (v->BlendingAndTiming[k] == k) { 4366 if (v->PixelClockBackEnd[k] > 3200) { 4367 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); 4368 } else if (v->PixelClockBackEnd[k] > 1360) { 4369 v->NumberOfDSCSlices[k] = 8; 4370 } else if (v->PixelClockBackEnd[k] > 680) { 4371 v->NumberOfDSCSlices[k] = 4; 4372 } else if (v->PixelClockBackEnd[k] > 340) { 4373 v->NumberOfDSCSlices[k] = 2; 4374 } else { 4375 v->NumberOfDSCSlices[k] = 1; 4376 } 4377 } else { 4378 v->NumberOfDSCSlices[k] = 0; 4379 } 4380 } 4381 4382 for (i = 0; i < v->soc.num_states; i++) { 4383 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4384 v->RequiresDSC[i][k] = false; 4385 v->RequiresFEC[i][k] = false; 4386 if (v->BlendingAndTiming[k] == k) { 4387 if (v->Output[k] == dm_hdmi) { 4388 v->RequiresDSC[i][k] = false; 4389 v->RequiresFEC[i][k] = false; 4390 v->OutputBppPerState[i][k] = TruncToValidBPP( 4391 dml_min(600.0, v->PHYCLKPerState[i]) * 10, 4392 3, 4393 v->HTotal[k], 4394 v->HActive[k], 4395 v->PixelClockBackEnd[k], 4396 v->ForcedOutputLinkBPP[k], 4397 false, 4398 v->Output[k], 4399 v->OutputFormat[k], 4400 v->DSCInputBitPerComponent[k], 4401 v->NumberOfDSCSlices[k], 4402 v->AudioSampleRate[k], 4403 v->AudioSampleLayout[k], 4404 v->ODMCombineEnablePerState[i][k]); 4405 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) { 4406 if (v->DSCEnable[k] == true) { 4407 v->RequiresDSC[i][k] = true; 4408 v->LinkDSCEnable = true; 4409 if (v->Output[k] == dm_dp) { 4410 v->RequiresFEC[i][k] = true; 4411 } else { 4412 v->RequiresFEC[i][k] = false; 4413 } 4414 } else { 4415 v->RequiresDSC[i][k] = false; 4416 v->LinkDSCEnable = false; 4417 v->RequiresFEC[i][k] = false; 4418 } 4419 4420 v->Outbpp = BPP_INVALID; 4421 if (v->PHYCLKPerState[i] >= 270.0) { 4422 v->Outbpp = TruncToValidBPP( 4423 (1.0 - v->Downspreading / 100.0) * 2700, 4424 v->OutputLinkDPLanes[k], 4425 v->HTotal[k], 4426 v->HActive[k], 4427 v->PixelClockBackEnd[k], 4428 v->ForcedOutputLinkBPP[k], 4429 v->LinkDSCEnable, 4430 v->Output[k], 4431 v->OutputFormat[k], 4432 v->DSCInputBitPerComponent[k], 4433 v->NumberOfDSCSlices[k], 4434 v->AudioSampleRate[k], 4435 v->AudioSampleLayout[k], 4436 v->ODMCombineEnablePerState[i][k]); 4437 v->OutputBppPerState[i][k] = v->Outbpp; 4438 // TODO: Need some other way to handle this nonsense 4439 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" 4440 } 4441 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { 4442 v->Outbpp = TruncToValidBPP( 4443 (1.0 - v->Downspreading / 100.0) * 5400, 4444 v->OutputLinkDPLanes[k], 4445 v->HTotal[k], 4446 v->HActive[k], 4447 v->PixelClockBackEnd[k], 4448 v->ForcedOutputLinkBPP[k], 4449 v->LinkDSCEnable, 4450 v->Output[k], 4451 v->OutputFormat[k], 4452 v->DSCInputBitPerComponent[k], 4453 v->NumberOfDSCSlices[k], 4454 v->AudioSampleRate[k], 4455 v->AudioSampleLayout[k], 4456 v->ODMCombineEnablePerState[i][k]); 4457 v->OutputBppPerState[i][k] = v->Outbpp; 4458 // TODO: Need some other way to handle this nonsense 4459 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" 4460 } 4461 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { 4462 v->Outbpp = TruncToValidBPP( 4463 (1.0 - v->Downspreading / 100.0) * 8100, 4464 v->OutputLinkDPLanes[k], 4465 v->HTotal[k], 4466 v->HActive[k], 4467 v->PixelClockBackEnd[k], 4468 v->ForcedOutputLinkBPP[k], 4469 v->LinkDSCEnable, 4470 v->Output[k], 4471 v->OutputFormat[k], 4472 v->DSCInputBitPerComponent[k], 4473 v->NumberOfDSCSlices[k], 4474 v->AudioSampleRate[k], 4475 v->AudioSampleLayout[k], 4476 v->ODMCombineEnablePerState[i][k]); 4477 v->OutputBppPerState[i][k] = v->Outbpp; 4478 // TODO: Need some other way to handle this nonsense 4479 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" 4480 } 4481 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) { 4482 v->Outbpp = TruncToValidBPP( 4483 (1.0 - v->Downspreading / 100.0) * 10000, 4484 4, 4485 v->HTotal[k], 4486 v->HActive[k], 4487 v->PixelClockBackEnd[k], 4488 v->ForcedOutputLinkBPP[k], 4489 v->LinkDSCEnable, 4490 v->Output[k], 4491 v->OutputFormat[k], 4492 v->DSCInputBitPerComponent[k], 4493 v->NumberOfDSCSlices[k], 4494 v->AudioSampleRate[k], 4495 v->AudioSampleLayout[k], 4496 v->ODMCombineEnablePerState[i][k]); 4497 v->OutputBppPerState[i][k] = v->Outbpp; 4498 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4"; 4499 } 4500 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) { 4501 v->Outbpp = TruncToValidBPP( 4502 12000, 4503 4, 4504 v->HTotal[k], 4505 v->HActive[k], 4506 v->PixelClockBackEnd[k], 4507 v->ForcedOutputLinkBPP[k], 4508 v->LinkDSCEnable, 4509 v->Output[k], 4510 v->OutputFormat[k], 4511 v->DSCInputBitPerComponent[k], 4512 v->NumberOfDSCSlices[k], 4513 v->AudioSampleRate[k], 4514 v->AudioSampleLayout[k], 4515 v->ODMCombineEnablePerState[i][k]); 4516 v->OutputBppPerState[i][k] = v->Outbpp; 4517 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4"; 4518 } 4519 } 4520 } else { 4521 v->OutputBppPerState[i][k] = 0; 4522 } 4523 } 4524 } 4525 4526 for (i = 0; i < v->soc.num_states; i++) { 4527 v->LinkCapacitySupport[i] = true; 4528 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4529 if (v->BlendingAndTiming[k] == k 4530 && (v->Output[k] == dm_dp || 4531 v->Output[k] == dm_edp || 4532 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { 4533 v->LinkCapacitySupport[i] = false; 4534 } 4535 } 4536 } 4537 4538 // UPTO 2172 4539 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4540 if (v->BlendingAndTiming[k] == k 4541 && (v->Output[k] == dm_dp || 4542 v->Output[k] == dm_edp || 4543 v->Output[k] == dm_hdmi)) { 4544 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { 4545 P2IWith420 = true; 4546 } 4547 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 4548 && !v->DSC422NativeSupport) { 4549 DSC422NativeNotSupported = true; 4550 } 4551 } 4552 } 4553 4554 4555 for (i = 0; i < v->soc.num_states; ++i) { 4556 v->ODMCombine4To1SupportCheckOK[i] = true; 4557 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4558 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 4559 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp 4560 || v->Output[k] == dm_hdmi)) { 4561 v->ODMCombine4To1SupportCheckOK[i] = false; 4562 } 4563 } 4564 } 4565 4566 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ 4567 4568 for (i = 0; i < v->soc.num_states; i++) { 4569 v->NotEnoughDSCUnits[i] = false; 4570 v->TotalDSCUnitsRequired = 0.0; 4571 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4572 if (v->RequiresDSC[i][k] == true) { 4573 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4574 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; 4575 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4576 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; 4577 } else { 4578 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; 4579 } 4580 } 4581 } 4582 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { 4583 v->NotEnoughDSCUnits[i] = true; 4584 } 4585 } 4586 /*DSC Delay per state*/ 4587 4588 for (i = 0; i < v->soc.num_states; i++) { 4589 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4590 if (v->OutputBppPerState[i][k] == BPP_INVALID) { 4591 v->BPP = 0.0; 4592 } else { 4593 v->BPP = v->OutputBppPerState[i][k]; 4594 } 4595 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { 4596 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { 4597 v->DSCDelayPerState[i][k] = dscceComputeDelay( 4598 v->DSCInputBitPerComponent[k], 4599 v->BPP, 4600 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4601 v->NumberOfDSCSlices[k], 4602 v->OutputFormat[k], 4603 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 4604 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4605 v->DSCDelayPerState[i][k] = 2.0 4606 * (dscceComputeDelay( 4607 v->DSCInputBitPerComponent[k], 4608 v->BPP, 4609 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4610 v->NumberOfDSCSlices[k] / 2, 4611 v->OutputFormat[k], 4612 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4613 } else { 4614 v->DSCDelayPerState[i][k] = 4.0 4615 * (dscceComputeDelay( 4616 v->DSCInputBitPerComponent[k], 4617 v->BPP, 4618 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4619 v->NumberOfDSCSlices[k] / 4, 4620 v->OutputFormat[k], 4621 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4622 } 4623 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 4624 } else { 4625 v->DSCDelayPerState[i][k] = 0.0; 4626 } 4627 } 4628 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4629 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4630 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { 4631 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; 4632 } 4633 } 4634 } 4635 } 4636 4637 //Calculate Swath, DET Configuration, DCFCLKDeepSleep 4638 // 4639 for (i = 0; i < v->soc.num_states; ++i) { 4640 for (j = 0; j <= 1; ++j) { 4641 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4642 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; 4643 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 4644 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; 4645 } 4646 4647 CalculateSwathAndDETConfiguration( 4648 false, 4649 v->NumberOfActivePlanes, 4650 v->DETBufferSizeInKByte[0], 4651 v->MaximumSwathWidthLuma, 4652 v->MaximumSwathWidthChroma, 4653 v->SourceScan, 4654 v->SourcePixelFormat, 4655 v->SurfaceTiling, 4656 v->ViewportWidth, 4657 v->ViewportHeight, 4658 v->SurfaceWidthY, 4659 v->SurfaceWidthC, 4660 v->SurfaceHeightY, 4661 v->SurfaceHeightC, 4662 v->Read256BlockHeightY, 4663 v->Read256BlockHeightC, 4664 v->Read256BlockWidthY, 4665 v->Read256BlockWidthC, 4666 v->ODMCombineEnableThisState, 4667 v->BlendingAndTiming, 4668 v->BytePerPixelY, 4669 v->BytePerPixelC, 4670 v->BytePerPixelInDETY, 4671 v->BytePerPixelInDETC, 4672 v->HActive, 4673 v->HRatio, 4674 v->HRatioChroma, 4675 v->NoOfDPPThisState, 4676 v->swath_width_luma_ub_this_state, 4677 v->swath_width_chroma_ub_this_state, 4678 v->SwathWidthYThisState, 4679 v->SwathWidthCThisState, 4680 v->SwathHeightYThisState, 4681 v->SwathHeightCThisState, 4682 v->DETBufferSizeYThisState, 4683 v->DETBufferSizeCThisState, 4684 v->dummystring, 4685 &v->ViewportSizeSupport[i][j]); 4686 4687 CalculateDCFCLKDeepSleep( 4688 mode_lib, 4689 v->NumberOfActivePlanes, 4690 v->BytePerPixelY, 4691 v->BytePerPixelC, 4692 v->VRatio, 4693 v->VRatioChroma, 4694 v->SwathWidthYThisState, 4695 v->SwathWidthCThisState, 4696 v->NoOfDPPThisState, 4697 v->HRatio, 4698 v->HRatioChroma, 4699 v->PixelClock, 4700 v->PSCL_FACTOR, 4701 v->PSCL_FACTOR_CHROMA, 4702 v->RequiredDPPCLKThisState, 4703 v->ReadBandwidthLuma, 4704 v->ReadBandwidthChroma, 4705 v->ReturnBusWidth, 4706 &v->ProjectedDCFCLKDeepSleep[i][j]); 4707 4708 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4709 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; 4710 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; 4711 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; 4712 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; 4713 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; 4714 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; 4715 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; 4716 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; 4717 } 4718 } 4719 } 4720 4721 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4722 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 4723 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4724 } 4725 4726 for (i = 0; i < v->soc.num_states; i++) { 4727 for (j = 0; j < 2; j++) { 4728 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; 4729 4730 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4731 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 4732 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 4733 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 4734 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 4735 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 4736 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 4737 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 4738 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 4739 } 4740 4741 v->TotalNumberOfDCCActiveDPP[i][j] = 0; 4742 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4743 if (v->DCCEnable[k] == true) { 4744 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4745 } 4746 } 4747 4748 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4749 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4750 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4751 4752 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) 4753 && v->SourceScan[k] != dm_vert) { 4754 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) 4755 / 2; 4756 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 4757 } else { 4758 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 4759 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 4760 } 4761 4762 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4763 mode_lib, 4764 v->DCCEnable[k], 4765 v->Read256BlockHeightC[k], 4766 v->Read256BlockWidthC[k], 4767 v->SourcePixelFormat[k], 4768 v->SurfaceTiling[k], 4769 v->BytePerPixelC[k], 4770 v->SourceScan[k], 4771 v->SwathWidthCThisState[k], 4772 v->ViewportHeightChroma[k], 4773 v->GPUVMEnable, 4774 v->HostVMEnable, 4775 v->HostVMMaxNonCachedPageTableLevels, 4776 v->GPUVMMinPageSize, 4777 v->HostVMMinPageSize, 4778 v->PTEBufferSizeInRequestsForChroma, 4779 v->PitchC[k], 4780 0.0, 4781 &v->MacroTileWidthC[k], 4782 &v->MetaRowBytesC, 4783 &v->DPTEBytesPerRowC, 4784 &v->PTEBufferSizeNotExceededC[i][j][k], 4785 &v->dummyinteger7, 4786 &v->dpte_row_height_chroma[k], 4787 &v->dummyinteger28, 4788 &v->dummyinteger26, 4789 &v->dummyinteger23, 4790 &v->meta_row_height_chroma[k], 4791 &v->dummyinteger8, 4792 &v->dummyinteger9, 4793 &v->dummyinteger19, 4794 &v->dummyinteger20, 4795 &v->dummyinteger17, 4796 &v->dummyinteger10, 4797 &v->dummyinteger11); 4798 4799 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( 4800 mode_lib, 4801 v->VRatioChroma[k], 4802 v->VTAPsChroma[k], 4803 v->Interlace[k], 4804 v->ProgressiveToInterlaceUnitInOPP, 4805 v->SwathHeightCThisState[k], 4806 v->ViewportYStartC[k], 4807 &v->PrefillC[k], 4808 &v->MaxNumSwC[k]); 4809 } else { 4810 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 4811 v->PTEBufferSizeInRequestsForChroma = 0; 4812 v->PDEAndMetaPTEBytesPerFrameC = 0.0; 4813 v->MetaRowBytesC = 0.0; 4814 v->DPTEBytesPerRowC = 0.0; 4815 v->PrefetchLinesC[i][j][k] = 0.0; 4816 v->PTEBufferSizeNotExceededC[i][j][k] = true; 4817 } 4818 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4819 mode_lib, 4820 v->DCCEnable[k], 4821 v->Read256BlockHeightY[k], 4822 v->Read256BlockWidthY[k], 4823 v->SourcePixelFormat[k], 4824 v->SurfaceTiling[k], 4825 v->BytePerPixelY[k], 4826 v->SourceScan[k], 4827 v->SwathWidthYThisState[k], 4828 v->ViewportHeight[k], 4829 v->GPUVMEnable, 4830 v->HostVMEnable, 4831 v->HostVMMaxNonCachedPageTableLevels, 4832 v->GPUVMMinPageSize, 4833 v->HostVMMinPageSize, 4834 v->PTEBufferSizeInRequestsForLuma, 4835 v->PitchY[k], 4836 v->DCCMetaPitchY[k], 4837 &v->MacroTileWidthY[k], 4838 &v->MetaRowBytesY, 4839 &v->DPTEBytesPerRowY, 4840 &v->PTEBufferSizeNotExceededY[i][j][k], 4841 &v->dummyinteger7, 4842 &v->dpte_row_height[k], 4843 &v->dummyinteger29, 4844 &v->dummyinteger27, 4845 &v->dummyinteger24, 4846 &v->meta_row_height[k], 4847 &v->dummyinteger25, 4848 &v->dpte_group_bytes[k], 4849 &v->dummyinteger21, 4850 &v->dummyinteger22, 4851 &v->dummyinteger18, 4852 &v->dummyinteger5, 4853 &v->dummyinteger6); 4854 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( 4855 mode_lib, 4856 v->VRatio[k], 4857 v->vtaps[k], 4858 v->Interlace[k], 4859 v->ProgressiveToInterlaceUnitInOPP, 4860 v->SwathHeightYThisState[k], 4861 v->ViewportYStartY[k], 4862 &v->PrefillY[k], 4863 &v->MaxNumSwY[k]); 4864 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; 4865 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; 4866 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; 4867 4868 CalculateRowBandwidth( 4869 v->GPUVMEnable, 4870 v->SourcePixelFormat[k], 4871 v->VRatio[k], 4872 v->VRatioChroma[k], 4873 v->DCCEnable[k], 4874 v->HTotal[k] / v->PixelClock[k], 4875 v->MetaRowBytesY, 4876 v->MetaRowBytesC, 4877 v->meta_row_height[k], 4878 v->meta_row_height_chroma[k], 4879 v->DPTEBytesPerRowY, 4880 v->DPTEBytesPerRowC, 4881 v->dpte_row_height[k], 4882 v->dpte_row_height_chroma[k], 4883 &v->meta_row_bandwidth[i][j][k], 4884 &v->dpte_row_bandwidth[i][j][k]); 4885 } 4886 /* 4887 * DCCMetaBufferSizeSupport(i, j) = True 4888 * For k = 0 To NumberOfActivePlanes - 1 4889 * If MetaRowBytes(i, j, k) > 24064 Then 4890 * DCCMetaBufferSizeSupport(i, j) = False 4891 * End If 4892 * Next k 4893 */ 4894 v->DCCMetaBufferSizeSupport[i][j] = true; 4895 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4896 if (v->MetaRowBytes[i][j][k] > 24064) 4897 v->DCCMetaBufferSizeSupport[i][j] = false; 4898 } 4899 v->UrgLatency[i] = CalculateUrgentLatency( 4900 v->UrgentLatencyPixelDataOnly, 4901 v->UrgentLatencyPixelMixedWithVMData, 4902 v->UrgentLatencyVMDataOnly, 4903 v->DoUrgentLatencyAdjustment, 4904 v->UrgentLatencyAdjustmentFabricClockComponent, 4905 v->UrgentLatencyAdjustmentFabricClockReference, 4906 v->FabricClockPerState[i]); 4907 4908 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4909 CalculateUrgentBurstFactor( 4910 v->swath_width_luma_ub_this_state[k], 4911 v->swath_width_chroma_ub_this_state[k], 4912 v->SwathHeightYThisState[k], 4913 v->SwathHeightCThisState[k], 4914 v->HTotal[k] / v->PixelClock[k], 4915 v->UrgLatency[i], 4916 v->CursorBufferSize, 4917 v->CursorWidth[k][0], 4918 v->CursorBPP[k][0], 4919 v->VRatio[k], 4920 v->VRatioChroma[k], 4921 v->BytePerPixelInDETY[k], 4922 v->BytePerPixelInDETC[k], 4923 v->DETBufferSizeYThisState[k], 4924 v->DETBufferSizeCThisState[k], 4925 &v->UrgentBurstFactorCursor[k], 4926 &v->UrgentBurstFactorLuma[k], 4927 &v->UrgentBurstFactorChroma[k], 4928 &NotUrgentLatencyHiding[k]); 4929 } 4930 4931 v->NotEnoughUrgentLatencyHidingA[i][j] = false; 4932 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4933 if (NotUrgentLatencyHiding[k]) { 4934 v->NotEnoughUrgentLatencyHidingA[i][j] = true; 4935 } 4936 } 4937 4938 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4939 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] 4940 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; 4941 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; 4942 } 4943 4944 v->TotalVActivePixelBandwidth[i][j] = 0; 4945 v->TotalVActiveCursorBandwidth[i][j] = 0; 4946 v->TotalMetaRowBandwidth[i][j] = 0; 4947 v->TotalDPTERowBandwidth[i][j] = 0; 4948 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4949 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; 4950 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; 4951 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; 4952 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; 4953 } 4954 } 4955 } 4956 4957 //Calculate Return BW 4958 for (i = 0; i < v->soc.num_states; ++i) { 4959 for (j = 0; j <= 1; ++j) { 4960 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4961 if (v->BlendingAndTiming[k] == k) { 4962 if (v->WritebackEnable[k] == true) { 4963 v->WritebackDelayTime[k] = v->WritebackLatency 4964 + CalculateWriteBackDelay( 4965 v->WritebackPixelFormat[k], 4966 v->WritebackHRatio[k], 4967 v->WritebackVRatio[k], 4968 v->WritebackVTaps[k], 4969 v->WritebackDestinationWidth[k], 4970 v->WritebackDestinationHeight[k], 4971 v->WritebackSourceHeight[k], 4972 v->HTotal[k]) / v->RequiredDISPCLK[i][j]; 4973 } else { 4974 v->WritebackDelayTime[k] = 0.0; 4975 } 4976 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4977 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { 4978 v->WritebackDelayTime[k] = dml_max( 4979 v->WritebackDelayTime[k], 4980 v->WritebackLatency 4981 + CalculateWriteBackDelay( 4982 v->WritebackPixelFormat[m], 4983 v->WritebackHRatio[m], 4984 v->WritebackVRatio[m], 4985 v->WritebackVTaps[m], 4986 v->WritebackDestinationWidth[m], 4987 v->WritebackDestinationHeight[m], 4988 v->WritebackSourceHeight[m], 4989 v->HTotal[m]) / v->RequiredDISPCLK[i][j]); 4990 } 4991 } 4992 } 4993 } 4994 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4995 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4996 if (v->BlendingAndTiming[k] == m) { 4997 v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; 4998 } 4999 } 5000 } 5001 v->MaxMaxVStartup[i][j] = 0; 5002 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5003 v->MaximumVStartup[i][j][k] = 5004 CalculateMaxVStartup( 5005 v->VTotal[k], 5006 v->VActive[k], 5007 v->VBlankNom[k], 5008 v->HTotal[k], 5009 v->PixelClock[k], 5010 v->ProgressiveToInterlaceUnitInOPP, 5011 v->Interlace[k], 5012 v->ip.VBlankNomDefaultUS, 5013 v->WritebackDelayTime[k]); 5014 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); 5015 } 5016 } 5017 } 5018 5019 ReorderingBytes = v->NumberOfChannels 5020 * dml_max3( 5021 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 5022 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 5023 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 5024 5025 for (i = 0; i < v->soc.num_states; ++i) { 5026 for (j = 0; j <= 1; ++j) { 5027 v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; 5028 } 5029 } 5030 5031 if (v->UseMinimumRequiredDCFCLK == true) 5032 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); 5033 5034 for (i = 0; i < v->soc.num_states; ++i) { 5035 for (j = 0; j <= 1; ++j) { 5036 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 5037 v->ReturnBusWidth * v->DCFCLKState[i][j], 5038 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); 5039 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; 5040 double PixelDataOnlyReturnBWPerState = dml_min( 5041 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5042 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 5043 double PixelMixedWithVMDataReturnBWPerState = dml_min( 5044 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5045 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 5046 5047 if (v->HostVMEnable != true) { 5048 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; 5049 } else { 5050 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; 5051 } 5052 } 5053 } 5054 5055 //Re-ordering Buffer Support Check 5056 for (i = 0; i < v->soc.num_states; ++i) { 5057 for (j = 0; j <= 1; ++j) { 5058 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] 5059 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { 5060 v->ROBSupport[i][j] = true; 5061 } else { 5062 v->ROBSupport[i][j] = false; 5063 } 5064 } 5065 } 5066 5067 //Vertical Active BW support check 5068 5069 MaxTotalVActiveRDBandwidth = 0; 5070 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5071 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 5072 } 5073 5074 for (i = 0; i < v->soc.num_states; ++i) { 5075 for (j = 0; j <= 1; ++j) { 5076 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( 5077 dml_min( 5078 v->ReturnBusWidth * v->DCFCLKState[i][j], 5079 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5080 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, 5081 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5082 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); 5083 5084 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { 5085 v->TotalVerticalActiveBandwidthSupport[i][j] = true; 5086 } else { 5087 v->TotalVerticalActiveBandwidthSupport[i][j] = false; 5088 } 5089 } 5090 } 5091 5092 v->UrgentLatency = CalculateUrgentLatency( 5093 v->UrgentLatencyPixelDataOnly, 5094 v->UrgentLatencyPixelMixedWithVMData, 5095 v->UrgentLatencyVMDataOnly, 5096 v->DoUrgentLatencyAdjustment, 5097 v->UrgentLatencyAdjustmentFabricClockComponent, 5098 v->UrgentLatencyAdjustmentFabricClockReference, 5099 v->FabricClock); 5100 //Prefetch Check 5101 for (i = 0; i < v->soc.num_states; ++i) { 5102 for (j = 0; j <= 1; ++j) { 5103 double VMDataOnlyReturnBWPerState; 5104 double HostVMInefficiencyFactor = 1; 5105 int NextPrefetchModeState = MinPrefetchMode; 5106 bool UnboundedRequestEnabledThisState = false; 5107 int CompressedBufferSizeInkByteThisState = 0; 5108 double dummy; 5109 5110 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; 5111 5112 v->BandwidthWithoutPrefetchSupported[i][j] = true; 5113 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] 5114 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { 5115 v->BandwidthWithoutPrefetchSupported[i][j] = false; 5116 } 5117 5118 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5119 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 5120 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 5121 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 5122 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 5123 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 5124 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 5125 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 5126 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 5127 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 5128 } 5129 5130 VMDataOnlyReturnBWPerState = dml_min( 5131 dml_min( 5132 v->ReturnBusWidth * v->DCFCLKState[i][j], 5133 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5134 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5135 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5136 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 5137 if (v->GPUVMEnable && v->HostVMEnable) 5138 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; 5139 5140 v->ExtraLatency = CalculateExtraLatency( 5141 v->RoundTripPingLatencyCycles, 5142 ReorderingBytes, 5143 v->DCFCLKState[i][j], 5144 v->TotalNumberOfActiveDPP[i][j], 5145 v->PixelChunkSizeInKByte, 5146 v->TotalNumberOfDCCActiveDPP[i][j], 5147 v->MetaChunkSize, 5148 v->ReturnBWPerState[i][j], 5149 v->GPUVMEnable, 5150 v->HostVMEnable, 5151 v->NumberOfActivePlanes, 5152 v->NoOfDPPThisState, 5153 v->dpte_group_bytes, 5154 HostVMInefficiencyFactor, 5155 v->HostVMMinPageSize, 5156 v->HostVMMaxNonCachedPageTableLevels); 5157 5158 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5159 do { 5160 v->PrefetchModePerState[i][j] = NextPrefetchModeState; 5161 v->MaxVStartup = v->NextMaxVStartup; 5162 5163 v->TWait = CalculateTWait( 5164 v->PrefetchModePerState[i][j], 5165 v->DRAMClockChangeLatency, 5166 v->UrgLatency[i], 5167 v->SREnterPlusExitTime); 5168 5169 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5170 CalculatePrefetchSchedulePerPlane(mode_lib, 5171 HostVMInefficiencyFactor, 5172 i, j, k); 5173 } 5174 5175 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5176 CalculateUrgentBurstFactor( 5177 v->swath_width_luma_ub_this_state[k], 5178 v->swath_width_chroma_ub_this_state[k], 5179 v->SwathHeightYThisState[k], 5180 v->SwathHeightCThisState[k], 5181 v->HTotal[k] / v->PixelClock[k], 5182 v->UrgLatency[i], 5183 v->CursorBufferSize, 5184 v->CursorWidth[k][0], 5185 v->CursorBPP[k][0], 5186 v->VRatioPreY[i][j][k], 5187 v->VRatioPreC[i][j][k], 5188 v->BytePerPixelInDETY[k], 5189 v->BytePerPixelInDETC[k], 5190 v->DETBufferSizeYThisState[k], 5191 v->DETBufferSizeCThisState[k], 5192 &v->UrgentBurstFactorCursorPre[k], 5193 &v->UrgentBurstFactorLumaPre[k], 5194 &v->UrgentBurstFactorChroma[k], 5195 &v->NotUrgentLatencyHidingPre[k]); 5196 } 5197 5198 v->MaximumReadBandwidthWithPrefetch = 0.0; 5199 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5200 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 5201 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; 5202 5203 v->MaximumReadBandwidthWithPrefetch = 5204 v->MaximumReadBandwidthWithPrefetch 5205 + dml_max3( 5206 v->VActivePixelBandwidth[i][j][k] 5207 + v->VActiveCursorBandwidth[i][j][k] 5208 + v->NoOfDPP[i][j][k] 5209 * (v->meta_row_bandwidth[i][j][k] 5210 + v->dpte_row_bandwidth[i][j][k]), 5211 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5212 v->NoOfDPP[i][j][k] 5213 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5214 * v->UrgentBurstFactorLumaPre[k] 5215 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5216 * v->UrgentBurstFactorChromaPre[k]) 5217 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5218 } 5219 5220 v->NotEnoughUrgentLatencyHidingPre = false; 5221 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5222 if (v->NotUrgentLatencyHidingPre[k] == true) { 5223 v->NotEnoughUrgentLatencyHidingPre = true; 5224 } 5225 } 5226 5227 v->PrefetchSupported[i][j] = true; 5228 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] 5229 || v->NotEnoughUrgentLatencyHidingPre == 1) { 5230 v->PrefetchSupported[i][j] = false; 5231 } 5232 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5233 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 5234 || v->NoTimeForPrefetch[i][j][k] == true) { 5235 v->PrefetchSupported[i][j] = false; 5236 } 5237 } 5238 5239 v->DynamicMetadataSupported[i][j] = true; 5240 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5241 if (v->NoTimeForDynamicMetadata[i][j][k] == true) { 5242 v->DynamicMetadataSupported[i][j] = false; 5243 } 5244 } 5245 5246 v->VRatioInPrefetchSupported[i][j] = true; 5247 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5248 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { 5249 v->VRatioInPrefetchSupported[i][j] = false; 5250 } 5251 } 5252 v->AnyLinesForVMOrRowTooLarge = false; 5253 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5254 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { 5255 v->AnyLinesForVMOrRowTooLarge = true; 5256 } 5257 } 5258 5259 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5260 5261 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { 5262 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; 5263 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5264 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 5265 - dml_max( 5266 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], 5267 v->NoOfDPP[i][j][k] 5268 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5269 * v->UrgentBurstFactorLumaPre[k] 5270 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5271 * v->UrgentBurstFactorChromaPre[k]) 5272 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5273 } 5274 v->TotImmediateFlipBytes = 0.0; 5275 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5276 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 5277 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] 5278 + v->DPTEBytesPerRow[i][j][k]; 5279 } 5280 5281 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5282 CalculateFlipSchedule( 5283 mode_lib, 5284 k, 5285 HostVMInefficiencyFactor, 5286 v->ExtraLatency, 5287 v->UrgLatency[i], 5288 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5289 v->MetaRowBytes[i][j][k], 5290 v->DPTEBytesPerRow[i][j][k]); 5291 } 5292 v->total_dcn_read_bw_with_flip = 0.0; 5293 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5294 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 5295 + dml_max3( 5296 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5297 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] 5298 + v->VActiveCursorBandwidth[i][j][k], 5299 v->NoOfDPP[i][j][k] 5300 * (v->final_flip_bw[k] 5301 + v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5302 * v->UrgentBurstFactorLumaPre[k] 5303 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5304 * v->UrgentBurstFactorChromaPre[k]) 5305 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5306 } 5307 v->ImmediateFlipSupportedForState[i][j] = true; 5308 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { 5309 v->ImmediateFlipSupportedForState[i][j] = false; 5310 } 5311 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5312 if (v->ImmediateFlipSupportedForPipe[k] == false) { 5313 v->ImmediateFlipSupportedForState[i][j] = false; 5314 } 5315 } 5316 } else { 5317 v->ImmediateFlipSupportedForState[i][j] = false; 5318 } 5319 5320 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { 5321 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5322 NextPrefetchModeState = NextPrefetchModeState + 1; 5323 } else { 5324 v->NextMaxVStartup = v->NextMaxVStartup - 1; 5325 } 5326 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5327 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5328 && ((v->HostVMEnable == false && 5329 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5330 || v->ImmediateFlipSupportedForState[i][j] == true)) 5331 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); 5332 5333 CalculateUnboundedRequestAndCompressedBufferSize( 5334 v->DETBufferSizeInKByte[0], 5335 v->ConfigReturnBufferSizeInKByte, 5336 v->UseUnboundedRequesting, 5337 v->TotalNumberOfActiveDPP[i][j], 5338 NoChroma, 5339 v->MaxNumDPP, 5340 v->CompressedBufferSegmentSizeInkByte, 5341 v->Output, 5342 &UnboundedRequestEnabledThisState, 5343 &CompressedBufferSizeInkByteThisState); 5344 5345 CalculateWatermarksAndDRAMSpeedChangeSupport( 5346 mode_lib, 5347 v->PrefetchModePerState[i][j], 5348 v->DCFCLKState[i][j], 5349 v->ReturnBWPerState[i][j], 5350 v->UrgLatency[i], 5351 v->ExtraLatency, 5352 v->SOCCLKPerState[i], 5353 v->ProjectedDCFCLKDeepSleep[i][j], 5354 v->DETBufferSizeYThisState, 5355 v->DETBufferSizeCThisState, 5356 v->SwathHeightYThisState, 5357 v->SwathHeightCThisState, 5358 v->SwathWidthYThisState, 5359 v->SwathWidthCThisState, 5360 v->NoOfDPPThisState, 5361 v->BytePerPixelInDETY, 5362 v->BytePerPixelInDETC, 5363 UnboundedRequestEnabledThisState, 5364 CompressedBufferSizeInkByteThisState, 5365 &v->DRAMClockChangeSupport[i][j], 5366 &dummy, 5367 &dummy, 5368 &dummy, 5369 &dummy); 5370 } 5371 } 5372 5373 /*PTE Buffer Size Check*/ 5374 for (i = 0; i < v->soc.num_states; i++) { 5375 for (j = 0; j < 2; j++) { 5376 v->PTEBufferSizeNotExceeded[i][j] = true; 5377 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5378 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { 5379 v->PTEBufferSizeNotExceeded[i][j] = false; 5380 } 5381 } 5382 } 5383 } 5384 5385 /*Cursor Support Check*/ 5386 v->CursorSupport = true; 5387 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5388 if (v->CursorWidth[k][0] > 0.0) { 5389 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { 5390 v->CursorSupport = false; 5391 } 5392 } 5393 } 5394 5395 /*Valid Pitch Check*/ 5396 v->PitchSupport = true; 5397 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5398 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); 5399 if (v->DCCEnable[k] == true) { 5400 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); 5401 } else { 5402 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; 5403 } 5404 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 5405 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe 5406 && v->SourcePixelFormat[k] != dm_mono_8) { 5407 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); 5408 if (v->DCCEnable[k] == true) { 5409 v->AlignedDCCMetaPitchC[k] = dml_ceil( 5410 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 5411 64.0 * v->Read256BlockWidthC[k]); 5412 } else { 5413 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5414 } 5415 } else { 5416 v->AlignedCPitch[k] = v->PitchC[k]; 5417 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5418 } 5419 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] 5420 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { 5421 v->PitchSupport = false; 5422 } 5423 } 5424 5425 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5426 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { 5427 ViewportExceedsSurface = true; 5428 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 5429 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 5430 && v->SourcePixelFormat[k] != dm_rgbe) { 5431 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] 5432 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { 5433 ViewportExceedsSurface = true; 5434 } 5435 } 5436 } 5437 } 5438 5439 /*Mode Support, Voltage State and SOC Configuration*/ 5440 for (i = v->soc.num_states - 1; i >= 0; i--) { 5441 for (j = 0; j < 2; j++) { 5442 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true 5443 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP 5444 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false 5445 && v->DTBCLKRequiredMoreThanSupported[i] == false 5446 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true 5447 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true 5448 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true 5449 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false 5450 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true 5451 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5452 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false 5453 && ((v->HostVMEnable == false 5454 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5455 || v->ImmediateFlipSupportedForState[i][j] == true) 5456 && FMTBufferExceeded == false) { 5457 v->ModeSupport[i][j] = true; 5458 } else { 5459 v->ModeSupport[i][j] = false; 5460 } 5461 } 5462 } 5463 5464 { 5465 unsigned int MaximumMPCCombine = 0; 5466 5467 for (i = v->soc.num_states; i >= 0; i--) { 5468 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { 5469 v->VoltageLevel = i; 5470 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; 5471 if (v->ModeSupport[i][0] == true) { 5472 MaximumMPCCombine = 0; 5473 } else { 5474 MaximumMPCCombine = 1; 5475 } 5476 } 5477 } 5478 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; 5479 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { 5480 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; 5481 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; 5482 } 5483 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; 5484 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; 5485 v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; 5486 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; 5487 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; 5488 v->maxMpcComb = MaximumMPCCombine; 5489 } 5490 } 5491 5492 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5493 struct display_mode_lib *mode_lib, 5494 unsigned int PrefetchMode, 5495 double DCFCLK, 5496 double ReturnBW, 5497 double UrgentLatency, 5498 double ExtraLatency, 5499 double SOCCLK, 5500 double DCFCLKDeepSleep, 5501 unsigned int DETBufferSizeY[], 5502 unsigned int DETBufferSizeC[], 5503 unsigned int SwathHeightY[], 5504 unsigned int SwathHeightC[], 5505 double SwathWidthY[], 5506 double SwathWidthC[], 5507 unsigned int DPPPerPlane[], 5508 double BytePerPixelDETY[], 5509 double BytePerPixelDETC[], 5510 bool UnboundedRequestEnabled, 5511 unsigned int CompressedBufferSizeInkByte, 5512 enum clock_change_support *DRAMClockChangeSupport, 5513 double *StutterExitWatermark, 5514 double *StutterEnterPlusExitWatermark, 5515 double *Z8StutterExitWatermark, 5516 double *Z8StutterEnterPlusExitWatermark) 5517 { 5518 struct vba_vars_st *v = &mode_lib->vba; 5519 double EffectiveLBLatencyHidingY; 5520 double EffectiveLBLatencyHidingC; 5521 double LinesInDETY[DC__NUM_DPP__MAX]; 5522 double LinesInDETC; 5523 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5524 unsigned int LinesInDETCRoundedDownToSwath; 5525 double FullDETBufferingTimeY; 5526 double FullDETBufferingTimeC; 5527 double ActiveDRAMClockChangeLatencyMarginY; 5528 double ActiveDRAMClockChangeLatencyMarginC; 5529 double WritebackDRAMClockChangeLatencyMargin; 5530 double PlaneWithMinActiveDRAMClockChangeMargin; 5531 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5532 double WritebackDRAMClockChangeLatencyHiding; 5533 double TotalPixelBW = 0.0; 5534 int k, j; 5535 5536 v->UrgentWatermark = UrgentLatency + ExtraLatency; 5537 5538 #ifdef __DML_VBA_DEBUG__ 5539 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 5540 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); 5541 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark); 5542 #endif 5543 5544 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; 5545 5546 #ifdef __DML_VBA_DEBUG__ 5547 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency); 5548 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark); 5549 #endif 5550 5551 v->TotalActiveWriteback = 0; 5552 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5553 if (v->WritebackEnable[k] == true) { 5554 v->TotalActiveWriteback = v->TotalActiveWriteback + 1; 5555 } 5556 } 5557 5558 if (v->TotalActiveWriteback <= 1) { 5559 v->WritebackUrgentWatermark = v->WritebackLatency; 5560 } else { 5561 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5562 } 5563 5564 if (v->TotalActiveWriteback <= 1) { 5565 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; 5566 } else { 5567 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5568 } 5569 5570 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5571 TotalPixelBW = TotalPixelBW 5572 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) 5573 / (v->HTotal[k] / v->PixelClock[k]); 5574 } 5575 5576 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5577 double EffectiveDETBufferSizeY = DETBufferSizeY[k]; 5578 5579 v->LBLatencyHidingSourceLinesY = dml_min( 5580 (double) v->MaxLineBufferLines, 5581 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 5582 5583 v->LBLatencyHidingSourceLinesC = dml_min( 5584 (double) v->MaxLineBufferLines, 5585 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 5586 5587 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 5588 5589 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 5590 5591 if (UnboundedRequestEnabled) { 5592 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 5593 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 5594 } 5595 5596 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5597 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5598 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 5599 if (BytePerPixelDETC[k] > 0) { 5600 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 5601 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5602 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; 5603 } else { 5604 LinesInDETC = 0; 5605 FullDETBufferingTimeC = 999999; 5606 } 5607 5608 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 5609 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5610 5611 if (v->NumberOfActivePlanes > 1) { 5612 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5613 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; 5614 } 5615 5616 if (BytePerPixelDETC[k] > 0) { 5617 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 5618 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5619 5620 if (v->NumberOfActivePlanes > 1) { 5621 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5622 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; 5623 } 5624 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); 5625 } else { 5626 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5627 } 5628 5629 if (v->WritebackEnable[k] == true) { 5630 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 5631 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 5632 if (v->WritebackPixelFormat[k] == dm_444_64) { 5633 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; 5634 } 5635 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; 5636 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); 5637 } 5638 } 5639 5640 v->MinActiveDRAMClockChangeMargin = 999999; 5641 PlaneWithMinActiveDRAMClockChangeMargin = 0; 5642 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5643 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { 5644 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; 5645 if (v->BlendingAndTiming[k] == k) { 5646 PlaneWithMinActiveDRAMClockChangeMargin = k; 5647 } else { 5648 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 5649 if (v->BlendingAndTiming[k] == j) { 5650 PlaneWithMinActiveDRAMClockChangeMargin = j; 5651 } 5652 } 5653 } 5654 } 5655 } 5656 5657 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; 5658 5659 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5660 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5661 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5662 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5663 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; 5664 } 5665 } 5666 5667 v->TotalNumberOfActiveOTG = 0; 5668 5669 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5670 if (v->BlendingAndTiming[k] == k) { 5671 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; 5672 } 5673 } 5674 5675 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { 5676 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5677 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 5678 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { 5679 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5680 } else { 5681 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5682 } 5683 5684 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; 5685 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); 5686 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5687 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5688 5689 #ifdef __DML_VBA_DEBUG__ 5690 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); 5691 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); 5692 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); 5693 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); 5694 #endif 5695 } 5696 5697 static void CalculateDCFCLKDeepSleep( 5698 struct display_mode_lib *mode_lib, 5699 unsigned int NumberOfActivePlanes, 5700 int BytePerPixelY[], 5701 int BytePerPixelC[], 5702 double VRatio[], 5703 double VRatioChroma[], 5704 double SwathWidthY[], 5705 double SwathWidthC[], 5706 unsigned int DPPPerPlane[], 5707 double HRatio[], 5708 double HRatioChroma[], 5709 double PixelClock[], 5710 double PSCL_THROUGHPUT[], 5711 double PSCL_THROUGHPUT_CHROMA[], 5712 double DPPCLK[], 5713 double ReadBandwidthLuma[], 5714 double ReadBandwidthChroma[], 5715 int ReturnBusWidth, 5716 double *DCFCLKDeepSleep) 5717 { 5718 struct vba_vars_st *v = &mode_lib->vba; 5719 double DisplayPipeLineDeliveryTimeLuma; 5720 double DisplayPipeLineDeliveryTimeChroma; 5721 double ReadBandwidth = 0.0; 5722 int k; 5723 5724 for (k = 0; k < NumberOfActivePlanes; ++k) { 5725 5726 if (VRatio[k] <= 1) { 5727 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5728 } else { 5729 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5730 } 5731 if (BytePerPixelC[k] == 0) { 5732 DisplayPipeLineDeliveryTimeChroma = 0; 5733 } else { 5734 if (VRatioChroma[k] <= 1) { 5735 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5736 } else { 5737 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5738 } 5739 } 5740 5741 if (BytePerPixelC[k] > 0) { 5742 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 5743 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 5744 } else { 5745 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 5746 } 5747 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); 5748 5749 } 5750 5751 for (k = 0; k < NumberOfActivePlanes; ++k) { 5752 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 5753 } 5754 5755 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); 5756 5757 for (k = 0; k < NumberOfActivePlanes; ++k) { 5758 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); 5759 } 5760 } 5761 5762 static void CalculateUrgentBurstFactor( 5763 int swath_width_luma_ub, 5764 int swath_width_chroma_ub, 5765 unsigned int SwathHeightY, 5766 unsigned int SwathHeightC, 5767 double LineTime, 5768 double UrgentLatency, 5769 double CursorBufferSize, 5770 unsigned int CursorWidth, 5771 unsigned int CursorBPP, 5772 double VRatio, 5773 double VRatioC, 5774 double BytePerPixelInDETY, 5775 double BytePerPixelInDETC, 5776 double DETBufferSizeY, 5777 double DETBufferSizeC, 5778 double *UrgentBurstFactorCursor, 5779 double *UrgentBurstFactorLuma, 5780 double *UrgentBurstFactorChroma, 5781 bool *NotEnoughUrgentLatencyHiding) 5782 { 5783 double LinesInDETLuma; 5784 double LinesInDETChroma; 5785 unsigned int LinesInCursorBuffer; 5786 double CursorBufferSizeInTime; 5787 double DETBufferSizeInTimeLuma; 5788 double DETBufferSizeInTimeChroma; 5789 5790 *NotEnoughUrgentLatencyHiding = 0; 5791 5792 if (CursorWidth > 0) { 5793 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 5794 if (VRatio > 0) { 5795 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 5796 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 5797 *NotEnoughUrgentLatencyHiding = 1; 5798 *UrgentBurstFactorCursor = 0; 5799 } else { 5800 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 5801 } 5802 } else { 5803 *UrgentBurstFactorCursor = 1; 5804 } 5805 } 5806 5807 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; 5808 if (VRatio > 0) { 5809 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 5810 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 5811 *NotEnoughUrgentLatencyHiding = 1; 5812 *UrgentBurstFactorLuma = 0; 5813 } else { 5814 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 5815 } 5816 } else { 5817 *UrgentBurstFactorLuma = 1; 5818 } 5819 5820 if (BytePerPixelInDETC > 0) { 5821 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; 5822 if (VRatio > 0) { 5823 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 5824 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 5825 *NotEnoughUrgentLatencyHiding = 1; 5826 *UrgentBurstFactorChroma = 0; 5827 } else { 5828 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 5829 } 5830 } else { 5831 *UrgentBurstFactorChroma = 1; 5832 } 5833 } 5834 } 5835 5836 static void CalculatePixelDeliveryTimes( 5837 unsigned int NumberOfActivePlanes, 5838 double VRatio[], 5839 double VRatioChroma[], 5840 double VRatioPrefetchY[], 5841 double VRatioPrefetchC[], 5842 unsigned int swath_width_luma_ub[], 5843 unsigned int swath_width_chroma_ub[], 5844 unsigned int DPPPerPlane[], 5845 double HRatio[], 5846 double HRatioChroma[], 5847 double PixelClock[], 5848 double PSCL_THROUGHPUT[], 5849 double PSCL_THROUGHPUT_CHROMA[], 5850 double DPPCLK[], 5851 int BytePerPixelC[], 5852 enum scan_direction_class SourceScan[], 5853 unsigned int NumberOfCursors[], 5854 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 5855 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 5856 unsigned int BlockWidth256BytesY[], 5857 unsigned int BlockHeight256BytesY[], 5858 unsigned int BlockWidth256BytesC[], 5859 unsigned int BlockHeight256BytesC[], 5860 double DisplayPipeLineDeliveryTimeLuma[], 5861 double DisplayPipeLineDeliveryTimeChroma[], 5862 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 5863 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 5864 double DisplayPipeRequestDeliveryTimeLuma[], 5865 double DisplayPipeRequestDeliveryTimeChroma[], 5866 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 5867 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 5868 double CursorRequestDeliveryTime[], 5869 double CursorRequestDeliveryTimePrefetch[]) 5870 { 5871 double req_per_swath_ub; 5872 int k; 5873 5874 for (k = 0; k < NumberOfActivePlanes; ++k) { 5875 if (VRatio[k] <= 1) { 5876 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5877 } else { 5878 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5879 } 5880 5881 if (BytePerPixelC[k] == 0) { 5882 DisplayPipeLineDeliveryTimeChroma[k] = 0; 5883 } else { 5884 if (VRatioChroma[k] <= 1) { 5885 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5886 } else { 5887 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5888 } 5889 } 5890 5891 if (VRatioPrefetchY[k] <= 1) { 5892 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5893 } else { 5894 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5895 } 5896 5897 if (BytePerPixelC[k] == 0) { 5898 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 5899 } else { 5900 if (VRatioPrefetchC[k] <= 1) { 5901 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5902 } else { 5903 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5904 } 5905 } 5906 } 5907 5908 for (k = 0; k < NumberOfActivePlanes; ++k) { 5909 if (SourceScan[k] != dm_vert) { 5910 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 5911 } else { 5912 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 5913 } 5914 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 5915 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 5916 if (BytePerPixelC[k] == 0) { 5917 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 5918 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 5919 } else { 5920 if (SourceScan[k] != dm_vert) { 5921 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 5922 } else { 5923 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 5924 } 5925 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 5926 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 5927 } 5928 #ifdef __DML_VBA_DEBUG__ 5929 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 5930 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 5931 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 5932 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 5933 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 5934 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 5935 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 5936 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 5937 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 5938 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 5939 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 5940 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 5941 #endif 5942 } 5943 5944 for (k = 0; k < NumberOfActivePlanes; ++k) { 5945 int cursor_req_per_width; 5946 5947 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); 5948 if (NumberOfCursors[k] > 0) { 5949 if (VRatio[k] <= 1) { 5950 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 5951 } else { 5952 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 5953 } 5954 if (VRatioPrefetchY[k] <= 1) { 5955 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 5956 } else { 5957 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 5958 } 5959 } else { 5960 CursorRequestDeliveryTime[k] = 0; 5961 CursorRequestDeliveryTimePrefetch[k] = 0; 5962 } 5963 #ifdef __DML_VBA_DEBUG__ 5964 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); 5965 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); 5966 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); 5967 #endif 5968 } 5969 } 5970 5971 static void CalculateMetaAndPTETimes( 5972 int NumberOfActivePlanes, 5973 bool GPUVMEnable, 5974 int MetaChunkSize, 5975 int MinMetaChunkSizeBytes, 5976 int HTotal[], 5977 double VRatio[], 5978 double VRatioChroma[], 5979 double DestinationLinesToRequestRowInVBlank[], 5980 double DestinationLinesToRequestRowInImmediateFlip[], 5981 bool DCCEnable[], 5982 double PixelClock[], 5983 int BytePerPixelY[], 5984 int BytePerPixelC[], 5985 enum scan_direction_class SourceScan[], 5986 int dpte_row_height[], 5987 int dpte_row_height_chroma[], 5988 int meta_row_width[], 5989 int meta_row_width_chroma[], 5990 int meta_row_height[], 5991 int meta_row_height_chroma[], 5992 int meta_req_width[], 5993 int meta_req_width_chroma[], 5994 int meta_req_height[], 5995 int meta_req_height_chroma[], 5996 int dpte_group_bytes[], 5997 int PTERequestSizeY[], 5998 int PTERequestSizeC[], 5999 int PixelPTEReqWidthY[], 6000 int PixelPTEReqHeightY[], 6001 int PixelPTEReqWidthC[], 6002 int PixelPTEReqHeightC[], 6003 int dpte_row_width_luma_ub[], 6004 int dpte_row_width_chroma_ub[], 6005 double DST_Y_PER_PTE_ROW_NOM_L[], 6006 double DST_Y_PER_PTE_ROW_NOM_C[], 6007 double DST_Y_PER_META_ROW_NOM_L[], 6008 double DST_Y_PER_META_ROW_NOM_C[], 6009 double TimePerMetaChunkNominal[], 6010 double TimePerChromaMetaChunkNominal[], 6011 double TimePerMetaChunkVBlank[], 6012 double TimePerChromaMetaChunkVBlank[], 6013 double TimePerMetaChunkFlip[], 6014 double TimePerChromaMetaChunkFlip[], 6015 double time_per_pte_group_nom_luma[], 6016 double time_per_pte_group_vblank_luma[], 6017 double time_per_pte_group_flip_luma[], 6018 double time_per_pte_group_nom_chroma[], 6019 double time_per_pte_group_vblank_chroma[], 6020 double time_per_pte_group_flip_chroma[]) 6021 { 6022 unsigned int meta_chunk_width; 6023 unsigned int min_meta_chunk_width; 6024 unsigned int meta_chunk_per_row_int; 6025 unsigned int meta_row_remainder; 6026 unsigned int meta_chunk_threshold; 6027 unsigned int meta_chunks_per_row_ub; 6028 unsigned int meta_chunk_width_chroma; 6029 unsigned int min_meta_chunk_width_chroma; 6030 unsigned int meta_chunk_per_row_int_chroma; 6031 unsigned int meta_row_remainder_chroma; 6032 unsigned int meta_chunk_threshold_chroma; 6033 unsigned int meta_chunks_per_row_ub_chroma; 6034 unsigned int dpte_group_width_luma; 6035 unsigned int dpte_groups_per_row_luma_ub; 6036 unsigned int dpte_group_width_chroma; 6037 unsigned int dpte_groups_per_row_chroma_ub; 6038 int k; 6039 6040 for (k = 0; k < NumberOfActivePlanes; ++k) { 6041 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 6042 if (BytePerPixelC[k] == 0) { 6043 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 6044 } else { 6045 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 6046 } 6047 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 6048 if (BytePerPixelC[k] == 0) { 6049 DST_Y_PER_META_ROW_NOM_C[k] = 0; 6050 } else { 6051 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 6052 } 6053 } 6054 6055 for (k = 0; k < NumberOfActivePlanes; ++k) { 6056 if (DCCEnable[k] == true) { 6057 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 6058 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 6059 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 6060 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 6061 if (SourceScan[k] != dm_vert) { 6062 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 6063 } else { 6064 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 6065 } 6066 if (meta_row_remainder <= meta_chunk_threshold) { 6067 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 6068 } else { 6069 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 6070 } 6071 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6072 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6073 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6074 if (BytePerPixelC[k] == 0) { 6075 TimePerChromaMetaChunkNominal[k] = 0; 6076 TimePerChromaMetaChunkVBlank[k] = 0; 6077 TimePerChromaMetaChunkFlip[k] = 0; 6078 } else { 6079 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6080 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6081 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; 6082 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 6083 if (SourceScan[k] != dm_vert) { 6084 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; 6085 } else { 6086 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; 6087 } 6088 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 6089 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 6090 } else { 6091 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 6092 } 6093 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6094 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6095 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6096 } 6097 } else { 6098 TimePerMetaChunkNominal[k] = 0; 6099 TimePerMetaChunkVBlank[k] = 0; 6100 TimePerMetaChunkFlip[k] = 0; 6101 TimePerChromaMetaChunkNominal[k] = 0; 6102 TimePerChromaMetaChunkVBlank[k] = 0; 6103 TimePerChromaMetaChunkFlip[k] = 0; 6104 } 6105 } 6106 6107 for (k = 0; k < NumberOfActivePlanes; ++k) { 6108 if (GPUVMEnable == true) { 6109 if (SourceScan[k] != dm_vert) { 6110 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 6111 } else { 6112 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 6113 } 6114 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); 6115 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6116 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6117 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6118 if (BytePerPixelC[k] == 0) { 6119 time_per_pte_group_nom_chroma[k] = 0; 6120 time_per_pte_group_vblank_chroma[k] = 0; 6121 time_per_pte_group_flip_chroma[k] = 0; 6122 } else { 6123 if (SourceScan[k] != dm_vert) { 6124 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 6125 } else { 6126 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 6127 } 6128 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); 6129 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6130 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6131 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6132 } 6133 } else { 6134 time_per_pte_group_nom_luma[k] = 0; 6135 time_per_pte_group_vblank_luma[k] = 0; 6136 time_per_pte_group_flip_luma[k] = 0; 6137 time_per_pte_group_nom_chroma[k] = 0; 6138 time_per_pte_group_vblank_chroma[k] = 0; 6139 time_per_pte_group_flip_chroma[k] = 0; 6140 } 6141 } 6142 } 6143 6144 static void CalculateVMGroupAndRequestTimes( 6145 unsigned int NumberOfActivePlanes, 6146 bool GPUVMEnable, 6147 unsigned int GPUVMMaxPageTableLevels, 6148 unsigned int HTotal[], 6149 int BytePerPixelC[], 6150 double DestinationLinesToRequestVMInVBlank[], 6151 double DestinationLinesToRequestVMInImmediateFlip[], 6152 bool DCCEnable[], 6153 double PixelClock[], 6154 int dpte_row_width_luma_ub[], 6155 int dpte_row_width_chroma_ub[], 6156 int vm_group_bytes[], 6157 unsigned int dpde0_bytes_per_frame_ub_l[], 6158 unsigned int dpde0_bytes_per_frame_ub_c[], 6159 int meta_pte_bytes_per_frame_ub_l[], 6160 int meta_pte_bytes_per_frame_ub_c[], 6161 double TimePerVMGroupVBlank[], 6162 double TimePerVMGroupFlip[], 6163 double TimePerVMRequestVBlank[], 6164 double TimePerVMRequestFlip[]) 6165 { 6166 int num_group_per_lower_vm_stage; 6167 int num_req_per_lower_vm_stage; 6168 int k; 6169 6170 for (k = 0; k < NumberOfActivePlanes; ++k) { 6171 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 6172 if (DCCEnable[k] == false) { 6173 if (BytePerPixelC[k] > 0) { 6174 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6175 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6176 } else { 6177 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6178 } 6179 } else { 6180 if (GPUVMMaxPageTableLevels == 1) { 6181 if (BytePerPixelC[k] > 0) { 6182 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6183 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6184 } else { 6185 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6186 } 6187 } else { 6188 if (BytePerPixelC[k] > 0) { 6189 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6190 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6191 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6192 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6193 } else { 6194 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6195 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6196 } 6197 } 6198 } 6199 6200 if (DCCEnable[k] == false) { 6201 if (BytePerPixelC[k] > 0) { 6202 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6203 } else { 6204 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 6205 } 6206 } else { 6207 if (GPUVMMaxPageTableLevels == 1) { 6208 if (BytePerPixelC[k] > 0) { 6209 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6210 } else { 6211 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6212 } 6213 } else { 6214 if (BytePerPixelC[k] > 0) { 6215 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 6216 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6217 } else { 6218 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 6219 } 6220 } 6221 } 6222 6223 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6224 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6225 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6226 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6227 6228 if (GPUVMMaxPageTableLevels > 2) { 6229 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6230 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6231 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6232 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6233 } 6234 6235 } else { 6236 TimePerVMGroupVBlank[k] = 0; 6237 TimePerVMGroupFlip[k] = 0; 6238 TimePerVMRequestVBlank[k] = 0; 6239 TimePerVMRequestFlip[k] = 0; 6240 } 6241 } 6242 } 6243 6244 static void CalculateStutterEfficiency( 6245 struct display_mode_lib *mode_lib, 6246 int CompressedBufferSizeInkByte, 6247 bool UnboundedRequestEnabled, 6248 int ConfigReturnBufferSizeInKByte, 6249 int MetaFIFOSizeInKEntries, 6250 int ZeroSizeBufferEntries, 6251 int NumberOfActivePlanes, 6252 int ROBBufferSizeInKByte, 6253 double TotalDataReadBandwidth, 6254 double DCFCLK, 6255 double ReturnBW, 6256 double COMPBUF_RESERVED_SPACE_64B, 6257 double COMPBUF_RESERVED_SPACE_ZS, 6258 double SRExitTime, 6259 double SRExitZ8Time, 6260 bool SynchronizedVBlank, 6261 double Z8StutterEnterPlusExitWatermark, 6262 double StutterEnterPlusExitWatermark, 6263 bool ProgressiveToInterlaceUnitInOPP, 6264 bool Interlace[], 6265 double MinTTUVBlank[], 6266 int DPPPerPlane[], 6267 unsigned int DETBufferSizeY[], 6268 int BytePerPixelY[], 6269 double BytePerPixelDETY[], 6270 double SwathWidthY[], 6271 int SwathHeightY[], 6272 int SwathHeightC[], 6273 double NetDCCRateLuma[], 6274 double NetDCCRateChroma[], 6275 double DCCFractionOfZeroSizeRequestsLuma[], 6276 double DCCFractionOfZeroSizeRequestsChroma[], 6277 int HTotal[], 6278 int VTotal[], 6279 double PixelClock[], 6280 double VRatio[], 6281 enum scan_direction_class SourceScan[], 6282 int BlockHeight256BytesY[], 6283 int BlockWidth256BytesY[], 6284 int BlockHeight256BytesC[], 6285 int BlockWidth256BytesC[], 6286 int DCCYMaxUncompressedBlock[], 6287 int DCCCMaxUncompressedBlock[], 6288 int VActive[], 6289 bool DCCEnable[], 6290 bool WritebackEnable[], 6291 double ReadBandwidthPlaneLuma[], 6292 double ReadBandwidthPlaneChroma[], 6293 double meta_row_bw[], 6294 double dpte_row_bw[], 6295 double *StutterEfficiencyNotIncludingVBlank, 6296 double *StutterEfficiency, 6297 int *NumberOfStutterBurstsPerFrame, 6298 double *Z8StutterEfficiencyNotIncludingVBlank, 6299 double *Z8StutterEfficiency, 6300 int *Z8NumberOfStutterBurstsPerFrame, 6301 double *StutterPeriod) 6302 { 6303 struct vba_vars_st *v = &mode_lib->vba; 6304 6305 double DETBufferingTimeY; 6306 double SwathWidthYCriticalPlane = 0; 6307 double VActiveTimeCriticalPlane = 0; 6308 double FrameTimeCriticalPlane = 0; 6309 int BytePerPixelYCriticalPlane = 0; 6310 double LinesToFinishSwathTransferStutterCriticalPlane = 0; 6311 double MinTTUVBlankCriticalPlane = 0; 6312 double TotalCompressedReadBandwidth; 6313 double TotalRowReadBandwidth; 6314 double AverageDCCCompressionRate; 6315 double EffectiveCompressedBufferSize; 6316 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 6317 double StutterBurstTime; 6318 int TotalActiveWriteback; 6319 double LinesInDETY; 6320 double LinesInDETYRoundedDownToSwath; 6321 double MaximumEffectiveCompressionLuma; 6322 double MaximumEffectiveCompressionChroma; 6323 double TotalZeroSizeRequestReadBandwidth; 6324 double TotalZeroSizeCompressedReadBandwidth; 6325 double AverageDCCZeroSizeFraction; 6326 double AverageZeroSizeCompressionRate; 6327 int TotalNumberOfActiveOTG = 0; 6328 double LastStutterPeriod = 0.0; 6329 double LastZ8StutterPeriod = 0.0; 6330 int k; 6331 6332 TotalZeroSizeRequestReadBandwidth = 0; 6333 TotalZeroSizeCompressedReadBandwidth = 0; 6334 TotalRowReadBandwidth = 0; 6335 TotalCompressedReadBandwidth = 0; 6336 6337 for (k = 0; k < NumberOfActivePlanes; ++k) { 6338 if (DCCEnable[k] == true) { 6339 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) 6340 || DCCYMaxUncompressedBlock[k] < 256) { 6341 MaximumEffectiveCompressionLuma = 2; 6342 } else { 6343 MaximumEffectiveCompressionLuma = 4; 6344 } 6345 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); 6346 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 6347 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6348 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; 6349 if (ReadBandwidthPlaneChroma[k] > 0) { 6350 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) 6351 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { 6352 MaximumEffectiveCompressionChroma = 2; 6353 } else { 6354 MaximumEffectiveCompressionChroma = 4; 6355 } 6356 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 6357 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); 6358 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; 6359 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6360 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; 6361 } 6362 } else { 6363 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 6364 } 6365 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); 6366 } 6367 6368 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 6369 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 6370 6371 #ifdef __DML_VBA_DEBUG__ 6372 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 6373 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 6374 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); 6375 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 6376 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 6377 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6378 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 6379 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 6380 #endif 6381 6382 if (AverageDCCZeroSizeFraction == 1) { 6383 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6384 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; 6385 } else if (AverageDCCZeroSizeFraction > 0) { 6386 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6387 EffectiveCompressedBufferSize = dml_min( 6388 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6389 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) 6390 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, 6391 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6392 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6393 dml_print( 6394 "DML::%s: min 2 = %f\n", 6395 __func__, 6396 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); 6397 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); 6398 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6399 } else { 6400 EffectiveCompressedBufferSize = dml_min( 6401 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6402 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; 6403 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6404 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 6405 } 6406 6407 #ifdef __DML_VBA_DEBUG__ 6408 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 6409 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 6410 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6411 #endif 6412 6413 *StutterPeriod = 0; 6414 for (k = 0; k < NumberOfActivePlanes; ++k) { 6415 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) 6416 / BytePerPixelDETY[k] / SwathWidthY[k]; 6417 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 6418 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; 6419 #ifdef __DML_VBA_DEBUG__ 6420 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); 6421 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 6422 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); 6423 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); 6424 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 6425 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); 6426 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); 6427 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); 6428 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6429 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); 6430 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 6431 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6432 #endif 6433 6434 if (k == 0 || DETBufferingTimeY < *StutterPeriod) { 6435 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 6436 6437 *StutterPeriod = DETBufferingTimeY; 6438 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; 6439 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; 6440 BytePerPixelYCriticalPlane = BytePerPixelY[k]; 6441 SwathWidthYCriticalPlane = SwathWidthY[k]; 6442 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); 6443 MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; 6444 6445 #ifdef __DML_VBA_DEBUG__ 6446 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6447 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); 6448 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); 6449 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6450 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); 6451 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); 6452 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); 6453 #endif 6454 } 6455 } 6456 6457 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); 6458 #ifdef __DML_VBA_DEBUG__ 6459 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 6460 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6461 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); 6462 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); 6463 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6464 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 6465 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6466 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 6467 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 6468 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 6469 #endif 6470 6471 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW 6472 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 6473 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 6474 #ifdef __DML_VBA_DEBUG__ 6475 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); 6476 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); 6477 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 6478 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 6479 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6480 #endif 6481 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6482 6483 dml_print( 6484 "DML::%s: Time to finish residue swath=%f\n", 6485 __func__, 6486 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6487 6488 TotalActiveWriteback = 0; 6489 for (k = 0; k < NumberOfActivePlanes; ++k) { 6490 if (WritebackEnable[k]) { 6491 TotalActiveWriteback = TotalActiveWriteback + 1; 6492 } 6493 } 6494 6495 if (TotalActiveWriteback == 0) { 6496 #ifdef __DML_VBA_DEBUG__ 6497 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 6498 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 6499 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 6500 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6501 #endif 6502 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 6503 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 6504 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6505 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6506 } else { 6507 *StutterEfficiencyNotIncludingVBlank = 0.; 6508 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 6509 *NumberOfStutterBurstsPerFrame = 0; 6510 *Z8NumberOfStutterBurstsPerFrame = 0; 6511 } 6512 #ifdef __DML_VBA_DEBUG__ 6513 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6514 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6515 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); 6516 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 6517 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6518 #endif 6519 6520 for (k = 0; k < NumberOfActivePlanes; ++k) { 6521 if (v->BlendingAndTiming[k] == k) { 6522 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6523 } 6524 } 6525 6526 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6527 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6528 6529 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { 6530 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane 6531 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6532 } else { 6533 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6534 } 6535 } else { 6536 *StutterEfficiency = 0; 6537 } 6538 6539 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6540 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6541 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { 6542 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane 6543 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6544 } else { 6545 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6546 } 6547 } else { 6548 *Z8StutterEfficiency = 0.; 6549 } 6550 6551 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6552 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6553 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6554 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6555 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6556 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6557 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6558 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6559 } 6560 6561 static void CalculateSwathAndDETConfiguration( 6562 bool ForceSingleDPP, 6563 int NumberOfActivePlanes, 6564 unsigned int DETBufferSizeInKByte, 6565 double MaximumSwathWidthLuma[], 6566 double MaximumSwathWidthChroma[], 6567 enum scan_direction_class SourceScan[], 6568 enum source_format_class SourcePixelFormat[], 6569 enum dm_swizzle_mode SurfaceTiling[], 6570 int ViewportWidth[], 6571 int ViewportHeight[], 6572 int SurfaceWidthY[], 6573 int SurfaceWidthC[], 6574 int SurfaceHeightY[], 6575 int SurfaceHeightC[], 6576 int Read256BytesBlockHeightY[], 6577 int Read256BytesBlockHeightC[], 6578 int Read256BytesBlockWidthY[], 6579 int Read256BytesBlockWidthC[], 6580 enum odm_combine_mode ODMCombineEnabled[], 6581 int BlendingAndTiming[], 6582 int BytePerPixY[], 6583 int BytePerPixC[], 6584 double BytePerPixDETY[], 6585 double BytePerPixDETC[], 6586 int HActive[], 6587 double HRatio[], 6588 double HRatioChroma[], 6589 int DPPPerPlane[], 6590 int swath_width_luma_ub[], 6591 int swath_width_chroma_ub[], 6592 double SwathWidth[], 6593 double SwathWidthChroma[], 6594 int SwathHeightY[], 6595 int SwathHeightC[], 6596 unsigned int DETBufferSizeY[], 6597 unsigned int DETBufferSizeC[], 6598 bool ViewportSizeSupportPerPlane[], 6599 bool *ViewportSizeSupport) 6600 { 6601 int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 6602 int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 6603 int MinimumSwathHeightY; 6604 int MinimumSwathHeightC; 6605 int RoundedUpMaxSwathSizeBytesY; 6606 int RoundedUpMaxSwathSizeBytesC; 6607 int RoundedUpMinSwathSizeBytesY; 6608 int RoundedUpMinSwathSizeBytesC; 6609 int RoundedUpSwathSizeBytesY; 6610 int RoundedUpSwathSizeBytesC; 6611 double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; 6612 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; 6613 int k; 6614 6615 CalculateSwathWidth( 6616 ForceSingleDPP, 6617 NumberOfActivePlanes, 6618 SourcePixelFormat, 6619 SourceScan, 6620 ViewportWidth, 6621 ViewportHeight, 6622 SurfaceWidthY, 6623 SurfaceWidthC, 6624 SurfaceHeightY, 6625 SurfaceHeightC, 6626 ODMCombineEnabled, 6627 BytePerPixY, 6628 BytePerPixC, 6629 Read256BytesBlockHeightY, 6630 Read256BytesBlockHeightC, 6631 Read256BytesBlockWidthY, 6632 Read256BytesBlockWidthC, 6633 BlendingAndTiming, 6634 HActive, 6635 HRatio, 6636 DPPPerPlane, 6637 SwathWidthSingleDPP, 6638 SwathWidthSingleDPPChroma, 6639 SwathWidth, 6640 SwathWidthChroma, 6641 MaximumSwathHeightY, 6642 MaximumSwathHeightC, 6643 swath_width_luma_ub, 6644 swath_width_chroma_ub); 6645 6646 *ViewportSizeSupport = true; 6647 for (k = 0; k < NumberOfActivePlanes; ++k) { 6648 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 6649 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { 6650 if (SurfaceTiling[k] == dm_sw_linear 6651 || (SourcePixelFormat[k] == dm_444_64 6652 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) 6653 && SourceScan[k] != dm_vert)) { 6654 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6655 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { 6656 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6657 } else { 6658 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6659 } 6660 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6661 } else { 6662 if (SurfaceTiling[k] == dm_sw_linear) { 6663 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6664 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6665 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { 6666 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6667 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6668 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { 6669 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6670 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6671 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { 6672 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6673 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6674 } else { 6675 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6676 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6677 } 6678 } 6679 6680 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 6681 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; 6682 if (SourcePixelFormat[k] == dm_420_10) { 6683 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); 6684 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); 6685 } 6686 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 6687 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; 6688 if (SourcePixelFormat[k] == dm_420_10) { 6689 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); 6690 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); 6691 } 6692 6693 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6694 SwathHeightY[k] = MaximumSwathHeightY[k]; 6695 SwathHeightC[k] = MaximumSwathHeightC[k]; 6696 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6697 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6698 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC 6699 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6700 SwathHeightY[k] = MinimumSwathHeightY; 6701 SwathHeightC[k] = MaximumSwathHeightC[k]; 6702 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6703 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6704 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC 6705 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6706 SwathHeightY[k] = MaximumSwathHeightY[k]; 6707 SwathHeightC[k] = MinimumSwathHeightC; 6708 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6709 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6710 } else { 6711 SwathHeightY[k] = MinimumSwathHeightY; 6712 SwathHeightC[k] = MinimumSwathHeightC; 6713 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6714 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6715 } 6716 { 6717 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 6718 6719 if (SwathHeightC[k] == 0) { 6720 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; 6721 DETBufferSizeC[k] = 0; 6722 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 6723 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; 6724 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; 6725 } else { 6726 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); 6727 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; 6728 } 6729 6730 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] 6731 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 6732 *ViewportSizeSupport = false; 6733 ViewportSizeSupportPerPlane[k] = false; 6734 } else { 6735 ViewportSizeSupportPerPlane[k] = true; 6736 } 6737 } 6738 } 6739 } 6740 6741 static void CalculateSwathWidth( 6742 bool ForceSingleDPP, 6743 int NumberOfActivePlanes, 6744 enum source_format_class SourcePixelFormat[], 6745 enum scan_direction_class SourceScan[], 6746 int ViewportWidth[], 6747 int ViewportHeight[], 6748 int SurfaceWidthY[], 6749 int SurfaceWidthC[], 6750 int SurfaceHeightY[], 6751 int SurfaceHeightC[], 6752 enum odm_combine_mode ODMCombineEnabled[], 6753 int BytePerPixY[], 6754 int BytePerPixC[], 6755 int Read256BytesBlockHeightY[], 6756 int Read256BytesBlockHeightC[], 6757 int Read256BytesBlockWidthY[], 6758 int Read256BytesBlockWidthC[], 6759 int BlendingAndTiming[], 6760 int HActive[], 6761 double HRatio[], 6762 int DPPPerPlane[], 6763 double SwathWidthSingleDPPY[], 6764 double SwathWidthSingleDPPC[], 6765 double SwathWidthY[], 6766 double SwathWidthC[], 6767 int MaximumSwathHeightY[], 6768 int MaximumSwathHeightC[], 6769 int swath_width_luma_ub[], 6770 int swath_width_chroma_ub[]) 6771 { 6772 enum odm_combine_mode MainPlaneODMCombine; 6773 int j, k; 6774 6775 #ifdef __DML_VBA_DEBUG__ 6776 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); 6777 #endif 6778 6779 for (k = 0; k < NumberOfActivePlanes; ++k) { 6780 if (SourceScan[k] != dm_vert) { 6781 SwathWidthSingleDPPY[k] = ViewportWidth[k]; 6782 } else { 6783 SwathWidthSingleDPPY[k] = ViewportHeight[k]; 6784 } 6785 6786 #ifdef __DML_VBA_DEBUG__ 6787 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 6788 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 6789 #endif 6790 6791 MainPlaneODMCombine = ODMCombineEnabled[k]; 6792 for (j = 0; j < NumberOfActivePlanes; ++j) { 6793 if (BlendingAndTiming[k] == j) { 6794 MainPlaneODMCombine = ODMCombineEnabled[j]; 6795 } 6796 } 6797 6798 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) 6799 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); 6800 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) 6801 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); 6802 else if (DPPPerPlane[k] == 2) 6803 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 6804 else 6805 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6806 6807 #ifdef __DML_VBA_DEBUG__ 6808 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); 6809 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); 6810 #endif 6811 6812 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { 6813 SwathWidthC[k] = SwathWidthY[k] / 2; 6814 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 6815 } else { 6816 SwathWidthC[k] = SwathWidthY[k]; 6817 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 6818 } 6819 6820 if (ForceSingleDPP == true) { 6821 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6822 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 6823 } 6824 { 6825 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 6826 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 6827 6828 #ifdef __DML_VBA_DEBUG__ 6829 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 6830 #endif 6831 6832 if (SourceScan[k] != dm_vert) { 6833 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 6834 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 6835 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); 6836 if (BytePerPixC[k] > 0) { 6837 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 6838 6839 swath_width_chroma_ub[k] = dml_min( 6840 surface_width_ub_c, 6841 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); 6842 } else { 6843 swath_width_chroma_ub[k] = 0; 6844 } 6845 } else { 6846 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 6847 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 6848 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 6849 if (BytePerPixC[k] > 0) { 6850 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 6851 6852 swath_width_chroma_ub[k] = dml_min( 6853 surface_height_ub_c, 6854 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); 6855 } else { 6856 swath_width_chroma_ub[k] = 0; 6857 } 6858 } 6859 } 6860 } 6861 } 6862 6863 static double CalculateExtraLatency( 6864 int RoundTripPingLatencyCycles, 6865 int ReorderingBytes, 6866 double DCFCLK, 6867 int TotalNumberOfActiveDPP, 6868 int PixelChunkSizeInKByte, 6869 int TotalNumberOfDCCActiveDPP, 6870 int MetaChunkSize, 6871 double ReturnBW, 6872 bool GPUVMEnable, 6873 bool HostVMEnable, 6874 int NumberOfActivePlanes, 6875 int NumberOfDPP[], 6876 int dpte_group_bytes[], 6877 double HostVMInefficiencyFactor, 6878 double HostVMMinPageSize, 6879 int HostVMMaxNonCachedPageTableLevels) 6880 { 6881 double ExtraLatencyBytes; 6882 double ExtraLatency; 6883 6884 ExtraLatencyBytes = CalculateExtraLatencyBytes( 6885 ReorderingBytes, 6886 TotalNumberOfActiveDPP, 6887 PixelChunkSizeInKByte, 6888 TotalNumberOfDCCActiveDPP, 6889 MetaChunkSize, 6890 GPUVMEnable, 6891 HostVMEnable, 6892 NumberOfActivePlanes, 6893 NumberOfDPP, 6894 dpte_group_bytes, 6895 HostVMInefficiencyFactor, 6896 HostVMMinPageSize, 6897 HostVMMaxNonCachedPageTableLevels); 6898 6899 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 6900 6901 #ifdef __DML_VBA_DEBUG__ 6902 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 6903 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 6904 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 6905 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 6906 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 6907 #endif 6908 6909 return ExtraLatency; 6910 } 6911 6912 static double CalculateExtraLatencyBytes( 6913 int ReorderingBytes, 6914 int TotalNumberOfActiveDPP, 6915 int PixelChunkSizeInKByte, 6916 int TotalNumberOfDCCActiveDPP, 6917 int MetaChunkSize, 6918 bool GPUVMEnable, 6919 bool HostVMEnable, 6920 int NumberOfActivePlanes, 6921 int NumberOfDPP[], 6922 int dpte_group_bytes[], 6923 double HostVMInefficiencyFactor, 6924 double HostVMMinPageSize, 6925 int HostVMMaxNonCachedPageTableLevels) 6926 { 6927 double ret; 6928 int HostVMDynamicLevels = 0, k; 6929 6930 if (GPUVMEnable == true && HostVMEnable == true) { 6931 if (HostVMMinPageSize < 2048) 6932 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 6933 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 6934 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 6935 else 6936 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 6937 } else { 6938 HostVMDynamicLevels = 0; 6939 } 6940 6941 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 6942 6943 if (GPUVMEnable == true) { 6944 for (k = 0; k < NumberOfActivePlanes; ++k) 6945 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 6946 } 6947 return ret; 6948 } 6949 6950 static double CalculateUrgentLatency( 6951 double UrgentLatencyPixelDataOnly, 6952 double UrgentLatencyPixelMixedWithVMData, 6953 double UrgentLatencyVMDataOnly, 6954 bool DoUrgentLatencyAdjustment, 6955 double UrgentLatencyAdjustmentFabricClockComponent, 6956 double UrgentLatencyAdjustmentFabricClockReference, 6957 double FabricClock) 6958 { 6959 double ret; 6960 6961 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 6962 if (DoUrgentLatencyAdjustment == true) 6963 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 6964 return ret; 6965 } 6966 6967 static void UseMinimumDCFCLK( 6968 struct display_mode_lib *mode_lib, 6969 int MaxPrefetchMode, 6970 int ReorderingBytes) 6971 { 6972 struct vba_vars_st *v = &mode_lib->vba; 6973 int dummy1, i, j, k; 6974 double NormalEfficiency, dummy2, dummy3; 6975 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 6976 6977 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; 6978 for (i = 0; i < v->soc.num_states; ++i) { 6979 for (j = 0; j <= 1; ++j) { 6980 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 6981 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 6982 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; 6983 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 6984 double MinimumTWait; 6985 double NonDPTEBandwidth; 6986 double DPTEBandwidth; 6987 double DCFCLKRequiredForAverageBandwidth; 6988 double ExtraLatencyBytes; 6989 double ExtraLatencyCycles; 6990 double DCFCLKRequiredForPeakBandwidth; 6991 int NoOfDPPState[DC__NUM_DPP__MAX]; 6992 double MinimumTvmPlus2Tr0; 6993 6994 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 6995 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 6996 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 6997 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); 6998 } 6999 7000 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) 7001 NoOfDPPState[k] = v->NoOfDPP[i][j][k]; 7002 7003 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); 7004 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; 7005 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? 7006 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; 7007 DCFCLKRequiredForAverageBandwidth = dml_max3( 7008 v->ProjectedDCFCLKDeepSleep[i][j], 7009 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth 7010 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), 7011 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); 7012 7013 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7014 ReorderingBytes, 7015 v->TotalNumberOfActiveDPP[i][j], 7016 v->PixelChunkSizeInKByte, 7017 v->TotalNumberOfDCCActiveDPP[i][j], 7018 v->MetaChunkSize, 7019 v->GPUVMEnable, 7020 v->HostVMEnable, 7021 v->NumberOfActivePlanes, 7022 NoOfDPPState, 7023 v->dpte_group_bytes, 7024 1, 7025 v->HostVMMinPageSize, 7026 v->HostVMMaxNonCachedPageTableLevels); 7027 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; 7028 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7029 double DCFCLKCyclesRequiredInPrefetch; 7030 double ExpectedPrefetchBWAcceleration; 7031 double PrefetchTime; 7032 7033 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] 7034 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; 7035 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 7036 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) 7037 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth 7038 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; 7039 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; 7040 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) 7041 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); 7042 DynamicMetadataVMExtraLatency[k] = 7043 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? 7044 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 7045 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait 7046 - v->UrgLatency[i] 7047 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) 7048 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) 7049 - DynamicMetadataVMExtraLatency[k]; 7050 7051 if (PrefetchTime > 0) { 7052 double ExpectedVRatioPrefetch; 7053 7054 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] 7055 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); 7056 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] 7057 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; 7058 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { 7059 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] 7060 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; 7061 } 7062 } else { 7063 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7064 } 7065 if (v->DynamicMetadataEnable[k] == true) { 7066 double TSetupPipe; 7067 double TdmbfPipe; 7068 double TdmsksPipe; 7069 double TdmecPipe; 7070 double AllowedTimeForUrgentExtraLatency; 7071 7072 CalculateVupdateAndDynamicMetadataParameters( 7073 v->MaxInterDCNTileRepeaters, 7074 v->RequiredDPPCLK[i][j][k], 7075 v->RequiredDISPCLK[i][j], 7076 v->ProjectedDCFCLKDeepSleep[i][j], 7077 v->PixelClock[k], 7078 v->HTotal[k], 7079 v->VTotal[k] - v->VActive[k], 7080 v->DynamicMetadataTransmittedBytes[k], 7081 v->DynamicMetadataLinesBeforeActiveRequired[k], 7082 v->Interlace[k], 7083 v->ProgressiveToInterlaceUnitInOPP, 7084 &TSetupPipe, 7085 &TdmbfPipe, 7086 &TdmecPipe, 7087 &TdmsksPipe, 7088 &dummy1, 7089 &dummy2, 7090 &dummy3); 7091 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe 7092 - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 7093 if (AllowedTimeForUrgentExtraLatency > 0) { 7094 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( 7095 DCFCLKRequiredForPeakBandwidthPerPlane[k], 7096 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 7097 } else { 7098 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7099 } 7100 } 7101 } 7102 DCFCLKRequiredForPeakBandwidth = 0; 7103 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) 7104 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; 7105 7106 MinimumTvmPlus2Tr0 = v->UrgLatency[i] 7107 * (v->GPUVMEnable == true ? 7108 (v->HostVMEnable == true ? 7109 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 7110 0); 7111 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7112 double MaximumTvmPlus2Tr0PlusTsw; 7113 7114 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 7115 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 7116 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; 7117 } else { 7118 DCFCLKRequiredForPeakBandwidth = dml_max3( 7119 DCFCLKRequiredForPeakBandwidth, 7120 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), 7121 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 7122 } 7123 } 7124 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 7125 } 7126 } 7127 } 7128 7129 static void CalculateUnboundedRequestAndCompressedBufferSize( 7130 unsigned int DETBufferSizeInKByte, 7131 int ConfigReturnBufferSizeInKByte, 7132 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 7133 int TotalActiveDPP, 7134 bool NoChromaPlanes, 7135 int MaxNumDPP, 7136 int CompressedBufferSegmentSizeInkByteFinal, 7137 enum output_encoder_class *Output, 7138 bool *UnboundedRequestEnabled, 7139 int *CompressedBufferSizeInkByte) 7140 { 7141 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7142 7143 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); 7144 *CompressedBufferSizeInkByte = ( 7145 *UnboundedRequestEnabled == true ? 7146 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : 7147 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); 7148 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 7149 7150 #ifdef __DML_VBA_DEBUG__ 7151 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 7152 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); 7153 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 7154 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 7155 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); 7156 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 7157 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 7158 #endif 7159 } 7160 7161 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) 7162 { 7163 bool ret_val = false; 7164 7165 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); 7166 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 7167 ret_val = false; 7168 return ret_val; 7169 } 7170 7171 static unsigned int CalculateMaxVStartup( 7172 unsigned int VTotal, 7173 unsigned int VActive, 7174 unsigned int VBlankNom, 7175 unsigned int HTotal, 7176 double PixelClock, 7177 bool ProgressiveTointerlaceUnitinOPP, 7178 bool Interlace, 7179 unsigned int VBlankNomDefaultUS, 7180 double WritebackDelayTime) 7181 { 7182 unsigned int MaxVStartup = 0; 7183 unsigned int vblank_size = 0; 7184 double line_time_us = HTotal / PixelClock; 7185 unsigned int vblank_actual = VTotal - VActive; 7186 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0); 7187 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line); 7188 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input; 7189 7190 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail); 7191 if (Interlace && !ProgressiveTointerlaceUnitinOPP) 7192 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0); 7193 else 7194 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0)); 7195 if (MaxVStartup > 1023) 7196 MaxVStartup = 1023; 7197 return MaxVStartup; 7198 } 7199