1 /* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dc.h" 27 #include "dc_link.h" 28 #include "../display_mode_lib.h" 29 #include "display_mode_vba_31.h" 30 #include "../dml_inline_defs.h" 31 32 /* 33 * NOTE: 34 * This file is gcc-parsable HW gospel, coming straight from HW engineers. 35 * 36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 37 * ways. Unless there is something clearly wrong with it the code should 38 * remain as-is as it provides us with a guarantee from HW that it is correct. 39 */ 40 41 #define BPP_INVALID 0 42 #define BPP_BLENDED_PIPE 0xffffffff 43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184 44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096 45 46 // For DML-C changes that hasn't been propagated to VBA yet 47 //#define __DML_VBA_ALLOW_DELTA__ 48 49 // Move these to ip paramaters/constant 50 51 // At which vstartup the DML start to try if the mode can be supported 52 #define __DML_VBA_MIN_VSTARTUP__ 9 53 54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) 55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95) 56 57 // fudge factor for min dcfclk calclation 58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15 59 60 typedef struct { 61 double DPPCLK; 62 double DISPCLK; 63 double PixelClock; 64 double DCFCLKDeepSleep; 65 unsigned int DPPPerPlane; 66 bool ScalerEnabled; 67 enum scan_direction_class SourceScan; 68 unsigned int BlockWidth256BytesY; 69 unsigned int BlockHeight256BytesY; 70 unsigned int BlockWidth256BytesC; 71 unsigned int BlockHeight256BytesC; 72 unsigned int InterlaceEnable; 73 unsigned int NumberOfCursors; 74 unsigned int VBlank; 75 unsigned int HTotal; 76 unsigned int DCCEnable; 77 bool ODMCombineIsEnabled; 78 enum source_format_class SourcePixelFormat; 79 int BytePerPixelY; 80 int BytePerPixelC; 81 bool ProgressiveToInterlaceUnitInOPP; 82 } Pipe; 83 84 #define BPP_INVALID 0 85 #define BPP_BLENDED_PIPE 0xffffffff 86 87 static bool CalculateBytePerPixelAnd256BBlockSizes( 88 enum source_format_class SourcePixelFormat, 89 enum dm_swizzle_mode SurfaceTiling, 90 unsigned int *BytePerPixelY, 91 unsigned int *BytePerPixelC, 92 double *BytePerPixelDETY, 93 double *BytePerPixelDETC, 94 unsigned int *BlockHeight256BytesY, 95 unsigned int *BlockHeight256BytesC, 96 unsigned int *BlockWidth256BytesY, 97 unsigned int *BlockWidth256BytesC); 98 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 99 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); 100 static unsigned int dscceComputeDelay( 101 unsigned int bpc, 102 double BPP, 103 unsigned int sliceWidth, 104 unsigned int numSlices, 105 enum output_format_class pixelFormat, 106 enum output_encoder_class Output); 107 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); 108 static bool CalculatePrefetchSchedule( 109 struct display_mode_lib *mode_lib, 110 double HostVMInefficiencyFactor, 111 Pipe *myPipe, 112 unsigned int DSCDelay, 113 double DPPCLKDelaySubtotalPlusCNVCFormater, 114 double DPPCLKDelaySCL, 115 double DPPCLKDelaySCLLBOnly, 116 double DPPCLKDelayCNVCCursor, 117 double DISPCLKDelaySubtotal, 118 unsigned int DPP_RECOUT_WIDTH, 119 enum output_format_class OutputFormat, 120 unsigned int MaxInterDCNTileRepeaters, 121 unsigned int VStartup, 122 unsigned int MaxVStartup, 123 unsigned int GPUVMPageTableLevels, 124 bool GPUVMEnable, 125 bool HostVMEnable, 126 unsigned int HostVMMaxNonCachedPageTableLevels, 127 double HostVMMinPageSize, 128 bool DynamicMetadataEnable, 129 bool DynamicMetadataVMEnabled, 130 int DynamicMetadataLinesBeforeActiveRequired, 131 unsigned int DynamicMetadataTransmittedBytes, 132 double UrgentLatency, 133 double UrgentExtraLatency, 134 double TCalc, 135 unsigned int PDEAndMetaPTEBytesFrame, 136 unsigned int MetaRowByte, 137 unsigned int PixelPTEBytesPerRow, 138 double PrefetchSourceLinesY, 139 unsigned int SwathWidthY, 140 double VInitPreFillY, 141 unsigned int MaxNumSwathY, 142 double PrefetchSourceLinesC, 143 unsigned int SwathWidthC, 144 double VInitPreFillC, 145 unsigned int MaxNumSwathC, 146 int swath_width_luma_ub, 147 int swath_width_chroma_ub, 148 unsigned int SwathHeightY, 149 unsigned int SwathHeightC, 150 double TWait, 151 double *DSTXAfterScaler, 152 double *DSTYAfterScaler, 153 double *DestinationLinesForPrefetch, 154 double *PrefetchBandwidth, 155 double *DestinationLinesToRequestVMInVBlank, 156 double *DestinationLinesToRequestRowInVBlank, 157 double *VRatioPrefetchY, 158 double *VRatioPrefetchC, 159 double *RequiredPrefetchPixDataBWLuma, 160 double *RequiredPrefetchPixDataBWChroma, 161 bool *NotEnoughTimeForDynamicMetadata, 162 double *Tno_bw, 163 double *prefetch_vmrow_bw, 164 double *Tdmdl_vm, 165 double *Tdmdl, 166 double *TSetup, 167 int *VUpdateOffsetPix, 168 double *VUpdateWidthPix, 169 double *VReadyOffsetPix); 170 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 171 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 172 static void CalculateDCCConfiguration( 173 bool DCCEnabled, 174 bool DCCProgrammingAssumesScanDirectionUnknown, 175 enum source_format_class SourcePixelFormat, 176 unsigned int SurfaceWidthLuma, 177 unsigned int SurfaceWidthChroma, 178 unsigned int SurfaceHeightLuma, 179 unsigned int SurfaceHeightChroma, 180 double DETBufferSize, 181 unsigned int RequestHeight256ByteLuma, 182 unsigned int RequestHeight256ByteChroma, 183 enum dm_swizzle_mode TilingFormat, 184 unsigned int BytePerPixelY, 185 unsigned int BytePerPixelC, 186 double BytePerPixelDETY, 187 double BytePerPixelDETC, 188 enum scan_direction_class ScanOrientation, 189 unsigned int *MaxUncompressedBlockLuma, 190 unsigned int *MaxUncompressedBlockChroma, 191 unsigned int *MaxCompressedBlockLuma, 192 unsigned int *MaxCompressedBlockChroma, 193 unsigned int *IndependentBlockLuma, 194 unsigned int *IndependentBlockChroma); 195 static double CalculatePrefetchSourceLines( 196 struct display_mode_lib *mode_lib, 197 double VRatio, 198 double vtaps, 199 bool Interlace, 200 bool ProgressiveToInterlaceUnitInOPP, 201 unsigned int SwathHeight, 202 unsigned int ViewportYStart, 203 double *VInitPreFill, 204 unsigned int *MaxNumSwath); 205 static unsigned int CalculateVMAndRowBytes( 206 struct display_mode_lib *mode_lib, 207 bool DCCEnable, 208 unsigned int BlockHeight256Bytes, 209 unsigned int BlockWidth256Bytes, 210 enum source_format_class SourcePixelFormat, 211 unsigned int SurfaceTiling, 212 unsigned int BytePerPixel, 213 enum scan_direction_class ScanDirection, 214 unsigned int SwathWidth, 215 unsigned int ViewportHeight, 216 bool GPUVMEnable, 217 bool HostVMEnable, 218 unsigned int HostVMMaxNonCachedPageTableLevels, 219 unsigned int GPUVMMinPageSize, 220 unsigned int HostVMMinPageSize, 221 unsigned int PTEBufferSizeInRequests, 222 unsigned int Pitch, 223 unsigned int DCCMetaPitch, 224 unsigned int *MacroTileWidth, 225 unsigned int *MetaRowByte, 226 unsigned int *PixelPTEBytesPerRow, 227 bool *PTEBufferSizeNotExceeded, 228 int *dpte_row_width_ub, 229 unsigned int *dpte_row_height, 230 unsigned int *MetaRequestWidth, 231 unsigned int *MetaRequestHeight, 232 unsigned int *meta_row_width, 233 unsigned int *meta_row_height, 234 int *vm_group_bytes, 235 unsigned int *dpte_group_bytes, 236 unsigned int *PixelPTEReqWidth, 237 unsigned int *PixelPTEReqHeight, 238 unsigned int *PTERequestSize, 239 int *DPDE0BytesFrame, 240 int *MetaPTEBytesFrame); 241 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); 242 static void CalculateRowBandwidth( 243 bool GPUVMEnable, 244 enum source_format_class SourcePixelFormat, 245 double VRatio, 246 double VRatioChroma, 247 bool DCCEnable, 248 double LineTime, 249 unsigned int MetaRowByteLuma, 250 unsigned int MetaRowByteChroma, 251 unsigned int meta_row_height_luma, 252 unsigned int meta_row_height_chroma, 253 unsigned int PixelPTEBytesPerRowLuma, 254 unsigned int PixelPTEBytesPerRowChroma, 255 unsigned int dpte_row_height_luma, 256 unsigned int dpte_row_height_chroma, 257 double *meta_row_bw, 258 double *dpte_row_bw); 259 260 static void CalculateFlipSchedule( 261 struct display_mode_lib *mode_lib, 262 double HostVMInefficiencyFactor, 263 double UrgentExtraLatency, 264 double UrgentLatency, 265 unsigned int GPUVMMaxPageTableLevels, 266 bool HostVMEnable, 267 unsigned int HostVMMaxNonCachedPageTableLevels, 268 bool GPUVMEnable, 269 double HostVMMinPageSize, 270 double PDEAndMetaPTEBytesPerFrame, 271 double MetaRowBytes, 272 double DPTEBytesPerRow, 273 double BandwidthAvailableForImmediateFlip, 274 unsigned int TotImmediateFlipBytes, 275 enum source_format_class SourcePixelFormat, 276 double LineTime, 277 double VRatio, 278 double VRatioChroma, 279 double Tno_bw, 280 bool DCCEnable, 281 unsigned int dpte_row_height, 282 unsigned int meta_row_height, 283 unsigned int dpte_row_height_chroma, 284 unsigned int meta_row_height_chroma, 285 double *DestinationLinesToRequestVMInImmediateFlip, 286 double *DestinationLinesToRequestRowInImmediateFlip, 287 double *final_flip_bw, 288 bool *ImmediateFlipSupportedForPipe); 289 static double CalculateWriteBackDelay( 290 enum source_format_class WritebackPixelFormat, 291 double WritebackHRatio, 292 double WritebackVRatio, 293 unsigned int WritebackVTaps, 294 int WritebackDestinationWidth, 295 int WritebackDestinationHeight, 296 int WritebackSourceHeight, 297 unsigned int HTotal); 298 299 static void CalculateVupdateAndDynamicMetadataParameters( 300 int MaxInterDCNTileRepeaters, 301 double DPPCLK, 302 double DISPCLK, 303 double DCFClkDeepSleep, 304 double PixelClock, 305 int HTotal, 306 int VBlank, 307 int DynamicMetadataTransmittedBytes, 308 int DynamicMetadataLinesBeforeActiveRequired, 309 int InterlaceEnable, 310 bool ProgressiveToInterlaceUnitInOPP, 311 double *TSetup, 312 double *Tdmbf, 313 double *Tdmec, 314 double *Tdmsks, 315 int *VUpdateOffsetPix, 316 double *VUpdateWidthPix, 317 double *VReadyOffsetPix); 318 319 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 320 struct display_mode_lib *mode_lib, 321 unsigned int PrefetchMode, 322 unsigned int NumberOfActivePlanes, 323 unsigned int MaxLineBufferLines, 324 unsigned int LineBufferSize, 325 unsigned int WritebackInterfaceBufferSize, 326 double DCFCLK, 327 double ReturnBW, 328 bool SynchronizedVBlank, 329 unsigned int dpte_group_bytes[], 330 unsigned int MetaChunkSize, 331 double UrgentLatency, 332 double ExtraLatency, 333 double WritebackLatency, 334 double WritebackChunkSize, 335 double SOCCLK, 336 double DRAMClockChangeLatency, 337 double SRExitTime, 338 double SREnterPlusExitTime, 339 double SRExitZ8Time, 340 double SREnterPlusExitZ8Time, 341 double DCFCLKDeepSleep, 342 unsigned int DETBufferSizeY[], 343 unsigned int DETBufferSizeC[], 344 unsigned int SwathHeightY[], 345 unsigned int SwathHeightC[], 346 unsigned int LBBitPerPixel[], 347 double SwathWidthY[], 348 double SwathWidthC[], 349 double HRatio[], 350 double HRatioChroma[], 351 unsigned int vtaps[], 352 unsigned int VTAPsChroma[], 353 double VRatio[], 354 double VRatioChroma[], 355 unsigned int HTotal[], 356 double PixelClock[], 357 unsigned int BlendingAndTiming[], 358 unsigned int DPPPerPlane[], 359 double BytePerPixelDETY[], 360 double BytePerPixelDETC[], 361 double DSTXAfterScaler[], 362 double DSTYAfterScaler[], 363 bool WritebackEnable[], 364 enum source_format_class WritebackPixelFormat[], 365 double WritebackDestinationWidth[], 366 double WritebackDestinationHeight[], 367 double WritebackSourceHeight[], 368 bool UnboundedRequestEnabled, 369 int unsigned CompressedBufferSizeInkByte, 370 enum clock_change_support *DRAMClockChangeSupport, 371 double *UrgentWatermark, 372 double *WritebackUrgentWatermark, 373 double *DRAMClockChangeWatermark, 374 double *WritebackDRAMClockChangeWatermark, 375 double *StutterExitWatermark, 376 double *StutterEnterPlusExitWatermark, 377 double *Z8StutterExitWatermark, 378 double *Z8StutterEnterPlusExitWatermark, 379 double *MinActiveDRAMClockChangeLatencySupported); 380 381 static void CalculateDCFCLKDeepSleep( 382 struct display_mode_lib *mode_lib, 383 unsigned int NumberOfActivePlanes, 384 int BytePerPixelY[], 385 int BytePerPixelC[], 386 double VRatio[], 387 double VRatioChroma[], 388 double SwathWidthY[], 389 double SwathWidthC[], 390 unsigned int DPPPerPlane[], 391 double HRatio[], 392 double HRatioChroma[], 393 double PixelClock[], 394 double PSCL_THROUGHPUT[], 395 double PSCL_THROUGHPUT_CHROMA[], 396 double DPPCLK[], 397 double ReadBandwidthLuma[], 398 double ReadBandwidthChroma[], 399 int ReturnBusWidth, 400 double *DCFCLKDeepSleep); 401 402 static void CalculateUrgentBurstFactor( 403 int swath_width_luma_ub, 404 int swath_width_chroma_ub, 405 unsigned int SwathHeightY, 406 unsigned int SwathHeightC, 407 double LineTime, 408 double UrgentLatency, 409 double CursorBufferSize, 410 unsigned int CursorWidth, 411 unsigned int CursorBPP, 412 double VRatio, 413 double VRatioC, 414 double BytePerPixelInDETY, 415 double BytePerPixelInDETC, 416 double DETBufferSizeY, 417 double DETBufferSizeC, 418 double *UrgentBurstFactorCursor, 419 double *UrgentBurstFactorLuma, 420 double *UrgentBurstFactorChroma, 421 bool *NotEnoughUrgentLatencyHiding); 422 423 static void UseMinimumDCFCLK( 424 struct display_mode_lib *mode_lib, 425 int MaxInterDCNTileRepeaters, 426 int MaxPrefetchMode, 427 double FinalDRAMClockChangeLatency, 428 double SREnterPlusExitTime, 429 int ReturnBusWidth, 430 int RoundTripPingLatencyCycles, 431 int ReorderingBytes, 432 int PixelChunkSizeInKByte, 433 int MetaChunkSize, 434 bool GPUVMEnable, 435 int GPUVMMaxPageTableLevels, 436 bool HostVMEnable, 437 int NumberOfActivePlanes, 438 double HostVMMinPageSize, 439 int HostVMMaxNonCachedPageTableLevels, 440 bool DynamicMetadataVMEnabled, 441 enum immediate_flip_requirement ImmediateFlipRequirement, 442 bool ProgressiveToInterlaceUnitInOPP, 443 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, 444 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, 445 int VTotal[], 446 int VActive[], 447 int DynamicMetadataTransmittedBytes[], 448 int DynamicMetadataLinesBeforeActiveRequired[], 449 bool Interlace[], 450 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX], 451 double RequiredDISPCLK[][2], 452 double UrgLatency[], 453 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 454 double ProjectedDCFCLKDeepSleep[][2], 455 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 456 double TotalVActivePixelBandwidth[][2], 457 double TotalVActiveCursorBandwidth[][2], 458 double TotalMetaRowBandwidth[][2], 459 double TotalDPTERowBandwidth[][2], 460 unsigned int TotalNumberOfActiveDPP[][2], 461 unsigned int TotalNumberOfDCCActiveDPP[][2], 462 int dpte_group_bytes[], 463 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 464 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 465 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 466 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 467 int BytePerPixelY[], 468 int BytePerPixelC[], 469 int HTotal[], 470 double PixelClock[], 471 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 472 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 473 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 474 bool DynamicMetadataEnable[], 475 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX], 476 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX], 477 double ReadBandwidthLuma[], 478 double ReadBandwidthChroma[], 479 double DCFCLKPerState[], 480 double DCFCLKState[][2]); 481 482 static void CalculatePixelDeliveryTimes( 483 unsigned int NumberOfActivePlanes, 484 double VRatio[], 485 double VRatioChroma[], 486 double VRatioPrefetchY[], 487 double VRatioPrefetchC[], 488 unsigned int swath_width_luma_ub[], 489 unsigned int swath_width_chroma_ub[], 490 unsigned int DPPPerPlane[], 491 double HRatio[], 492 double HRatioChroma[], 493 double PixelClock[], 494 double PSCL_THROUGHPUT[], 495 double PSCL_THROUGHPUT_CHROMA[], 496 double DPPCLK[], 497 int BytePerPixelC[], 498 enum scan_direction_class SourceScan[], 499 unsigned int NumberOfCursors[], 500 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 501 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 502 unsigned int BlockWidth256BytesY[], 503 unsigned int BlockHeight256BytesY[], 504 unsigned int BlockWidth256BytesC[], 505 unsigned int BlockHeight256BytesC[], 506 double DisplayPipeLineDeliveryTimeLuma[], 507 double DisplayPipeLineDeliveryTimeChroma[], 508 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 509 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 510 double DisplayPipeRequestDeliveryTimeLuma[], 511 double DisplayPipeRequestDeliveryTimeChroma[], 512 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 513 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 514 double CursorRequestDeliveryTime[], 515 double CursorRequestDeliveryTimePrefetch[]); 516 517 static void CalculateMetaAndPTETimes( 518 int NumberOfActivePlanes, 519 bool GPUVMEnable, 520 int MetaChunkSize, 521 int MinMetaChunkSizeBytes, 522 int HTotal[], 523 double VRatio[], 524 double VRatioChroma[], 525 double DestinationLinesToRequestRowInVBlank[], 526 double DestinationLinesToRequestRowInImmediateFlip[], 527 bool DCCEnable[], 528 double PixelClock[], 529 int BytePerPixelY[], 530 int BytePerPixelC[], 531 enum scan_direction_class SourceScan[], 532 int dpte_row_height[], 533 int dpte_row_height_chroma[], 534 int meta_row_width[], 535 int meta_row_width_chroma[], 536 int meta_row_height[], 537 int meta_row_height_chroma[], 538 int meta_req_width[], 539 int meta_req_width_chroma[], 540 int meta_req_height[], 541 int meta_req_height_chroma[], 542 int dpte_group_bytes[], 543 int PTERequestSizeY[], 544 int PTERequestSizeC[], 545 int PixelPTEReqWidthY[], 546 int PixelPTEReqHeightY[], 547 int PixelPTEReqWidthC[], 548 int PixelPTEReqHeightC[], 549 int dpte_row_width_luma_ub[], 550 int dpte_row_width_chroma_ub[], 551 double DST_Y_PER_PTE_ROW_NOM_L[], 552 double DST_Y_PER_PTE_ROW_NOM_C[], 553 double DST_Y_PER_META_ROW_NOM_L[], 554 double DST_Y_PER_META_ROW_NOM_C[], 555 double TimePerMetaChunkNominal[], 556 double TimePerChromaMetaChunkNominal[], 557 double TimePerMetaChunkVBlank[], 558 double TimePerChromaMetaChunkVBlank[], 559 double TimePerMetaChunkFlip[], 560 double TimePerChromaMetaChunkFlip[], 561 double time_per_pte_group_nom_luma[], 562 double time_per_pte_group_vblank_luma[], 563 double time_per_pte_group_flip_luma[], 564 double time_per_pte_group_nom_chroma[], 565 double time_per_pte_group_vblank_chroma[], 566 double time_per_pte_group_flip_chroma[]); 567 568 static void CalculateVMGroupAndRequestTimes( 569 unsigned int NumberOfActivePlanes, 570 bool GPUVMEnable, 571 unsigned int GPUVMMaxPageTableLevels, 572 unsigned int HTotal[], 573 int BytePerPixelC[], 574 double DestinationLinesToRequestVMInVBlank[], 575 double DestinationLinesToRequestVMInImmediateFlip[], 576 bool DCCEnable[], 577 double PixelClock[], 578 int dpte_row_width_luma_ub[], 579 int dpte_row_width_chroma_ub[], 580 int vm_group_bytes[], 581 unsigned int dpde0_bytes_per_frame_ub_l[], 582 unsigned int dpde0_bytes_per_frame_ub_c[], 583 int meta_pte_bytes_per_frame_ub_l[], 584 int meta_pte_bytes_per_frame_ub_c[], 585 double TimePerVMGroupVBlank[], 586 double TimePerVMGroupFlip[], 587 double TimePerVMRequestVBlank[], 588 double TimePerVMRequestFlip[]); 589 590 static void CalculateStutterEfficiency( 591 struct display_mode_lib *mode_lib, 592 int CompressedBufferSizeInkByte, 593 bool UnboundedRequestEnabled, 594 int ConfigReturnBufferSizeInKByte, 595 int MetaFIFOSizeInKEntries, 596 int ZeroSizeBufferEntries, 597 int NumberOfActivePlanes, 598 int ROBBufferSizeInKByte, 599 double TotalDataReadBandwidth, 600 double DCFCLK, 601 double ReturnBW, 602 double COMPBUF_RESERVED_SPACE_64B, 603 double COMPBUF_RESERVED_SPACE_ZS, 604 double SRExitTime, 605 double SRExitZ8Time, 606 bool SynchronizedVBlank, 607 double Z8StutterEnterPlusExitWatermark, 608 double StutterEnterPlusExitWatermark, 609 bool ProgressiveToInterlaceUnitInOPP, 610 bool Interlace[], 611 double MinTTUVBlank[], 612 int DPPPerPlane[], 613 unsigned int DETBufferSizeY[], 614 int BytePerPixelY[], 615 double BytePerPixelDETY[], 616 double SwathWidthY[], 617 int SwathHeightY[], 618 int SwathHeightC[], 619 double NetDCCRateLuma[], 620 double NetDCCRateChroma[], 621 double DCCFractionOfZeroSizeRequestsLuma[], 622 double DCCFractionOfZeroSizeRequestsChroma[], 623 int HTotal[], 624 int VTotal[], 625 double PixelClock[], 626 double VRatio[], 627 enum scan_direction_class SourceScan[], 628 int BlockHeight256BytesY[], 629 int BlockWidth256BytesY[], 630 int BlockHeight256BytesC[], 631 int BlockWidth256BytesC[], 632 int DCCYMaxUncompressedBlock[], 633 int DCCCMaxUncompressedBlock[], 634 int VActive[], 635 bool DCCEnable[], 636 bool WritebackEnable[], 637 double ReadBandwidthPlaneLuma[], 638 double ReadBandwidthPlaneChroma[], 639 double meta_row_bw[], 640 double dpte_row_bw[], 641 double *StutterEfficiencyNotIncludingVBlank, 642 double *StutterEfficiency, 643 int *NumberOfStutterBurstsPerFrame, 644 double *Z8StutterEfficiencyNotIncludingVBlank, 645 double *Z8StutterEfficiency, 646 int *Z8NumberOfStutterBurstsPerFrame, 647 double *StutterPeriod); 648 649 static void CalculateSwathAndDETConfiguration( 650 bool ForceSingleDPP, 651 int NumberOfActivePlanes, 652 unsigned int DETBufferSizeInKByte, 653 double MaximumSwathWidthLuma[], 654 double MaximumSwathWidthChroma[], 655 enum scan_direction_class SourceScan[], 656 enum source_format_class SourcePixelFormat[], 657 enum dm_swizzle_mode SurfaceTiling[], 658 int ViewportWidth[], 659 int ViewportHeight[], 660 int SurfaceWidthY[], 661 int SurfaceWidthC[], 662 int SurfaceHeightY[], 663 int SurfaceHeightC[], 664 int Read256BytesBlockHeightY[], 665 int Read256BytesBlockHeightC[], 666 int Read256BytesBlockWidthY[], 667 int Read256BytesBlockWidthC[], 668 enum odm_combine_mode ODMCombineEnabled[], 669 int BlendingAndTiming[], 670 int BytePerPixY[], 671 int BytePerPixC[], 672 double BytePerPixDETY[], 673 double BytePerPixDETC[], 674 int HActive[], 675 double HRatio[], 676 double HRatioChroma[], 677 int DPPPerPlane[], 678 int swath_width_luma_ub[], 679 int swath_width_chroma_ub[], 680 double SwathWidth[], 681 double SwathWidthChroma[], 682 int SwathHeightY[], 683 int SwathHeightC[], 684 unsigned int DETBufferSizeY[], 685 unsigned int DETBufferSizeC[], 686 bool ViewportSizeSupportPerPlane[], 687 bool *ViewportSizeSupport); 688 static void CalculateSwathWidth( 689 bool ForceSingleDPP, 690 int NumberOfActivePlanes, 691 enum source_format_class SourcePixelFormat[], 692 enum scan_direction_class SourceScan[], 693 int ViewportWidth[], 694 int ViewportHeight[], 695 int SurfaceWidthY[], 696 int SurfaceWidthC[], 697 int SurfaceHeightY[], 698 int SurfaceHeightC[], 699 enum odm_combine_mode ODMCombineEnabled[], 700 int BytePerPixY[], 701 int BytePerPixC[], 702 int Read256BytesBlockHeightY[], 703 int Read256BytesBlockHeightC[], 704 int Read256BytesBlockWidthY[], 705 int Read256BytesBlockWidthC[], 706 int BlendingAndTiming[], 707 int HActive[], 708 double HRatio[], 709 int DPPPerPlane[], 710 double SwathWidthSingleDPPY[], 711 double SwathWidthSingleDPPC[], 712 double SwathWidthY[], 713 double SwathWidthC[], 714 int MaximumSwathHeightY[], 715 int MaximumSwathHeightC[], 716 int swath_width_luma_ub[], 717 int swath_width_chroma_ub[]); 718 719 static double CalculateExtraLatency( 720 int RoundTripPingLatencyCycles, 721 int ReorderingBytes, 722 double DCFCLK, 723 int TotalNumberOfActiveDPP, 724 int PixelChunkSizeInKByte, 725 int TotalNumberOfDCCActiveDPP, 726 int MetaChunkSize, 727 double ReturnBW, 728 bool GPUVMEnable, 729 bool HostVMEnable, 730 int NumberOfActivePlanes, 731 int NumberOfDPP[], 732 int dpte_group_bytes[], 733 double HostVMInefficiencyFactor, 734 double HostVMMinPageSize, 735 int HostVMMaxNonCachedPageTableLevels); 736 737 static double CalculateExtraLatencyBytes( 738 int ReorderingBytes, 739 int TotalNumberOfActiveDPP, 740 int PixelChunkSizeInKByte, 741 int TotalNumberOfDCCActiveDPP, 742 int MetaChunkSize, 743 bool GPUVMEnable, 744 bool HostVMEnable, 745 int NumberOfActivePlanes, 746 int NumberOfDPP[], 747 int dpte_group_bytes[], 748 double HostVMInefficiencyFactor, 749 double HostVMMinPageSize, 750 int HostVMMaxNonCachedPageTableLevels); 751 752 static double CalculateUrgentLatency( 753 double UrgentLatencyPixelDataOnly, 754 double UrgentLatencyPixelMixedWithVMData, 755 double UrgentLatencyVMDataOnly, 756 bool DoUrgentLatencyAdjustment, 757 double UrgentLatencyAdjustmentFabricClockComponent, 758 double UrgentLatencyAdjustmentFabricClockReference, 759 double FabricClockSingle); 760 761 static void CalculateUnboundedRequestAndCompressedBufferSize( 762 unsigned int DETBufferSizeInKByte, 763 int ConfigReturnBufferSizeInKByte, 764 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 765 int TotalActiveDPP, 766 bool NoChromaPlanes, 767 int MaxNumDPP, 768 int CompressedBufferSegmentSizeInkByteFinal, 769 enum output_encoder_class *Output, 770 bool *UnboundedRequestEnabled, 771 int *CompressedBufferSizeInkByte); 772 773 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); 774 775 void dml31_recalculate(struct display_mode_lib *mode_lib) 776 { 777 ModeSupportAndSystemConfiguration(mode_lib); 778 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 779 DisplayPipeConfiguration(mode_lib); 780 #ifdef __DML_VBA_DEBUG__ 781 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); 782 #endif 783 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 784 } 785 786 static unsigned int dscceComputeDelay( 787 unsigned int bpc, 788 double BPP, 789 unsigned int sliceWidth, 790 unsigned int numSlices, 791 enum output_format_class pixelFormat, 792 enum output_encoder_class Output) 793 { 794 // valid bpc = source bits per component in the set of {8, 10, 12} 795 // valid bpp = increments of 1/16 of a bit 796 // min = 6/7/8 in N420/N422/444, respectively 797 // max = such that compression is 1:1 798 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 799 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 800 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 801 802 // fixed value 803 unsigned int rcModelSize = 8192; 804 805 // N422/N420 operate at 2 pixels per clock 806 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; 807 808 if (pixelFormat == dm_420) 809 pixelsPerClock = 2; 810 else if (pixelFormat == dm_444) 811 pixelsPerClock = 1; 812 else if (pixelFormat == dm_n422) 813 pixelsPerClock = 2; 814 // #all other modes operate at 1 pixel per clock 815 else 816 pixelsPerClock = 1; 817 818 //initial transmit delay as per PPS 819 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 820 821 //compute ssm delay 822 if (bpc == 8) 823 D = 81; 824 else if (bpc == 10) 825 D = 89; 826 else 827 D = 113; 828 829 //divide by pixel per cycle to compute slice width as seen by DSC 830 w = sliceWidth / pixelsPerClock; 831 832 //422 mode has an additional cycle of delay 833 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 834 s = 0; 835 else 836 s = 1; 837 838 //main calculation for the dscce 839 ix = initalXmitDelay + 45; 840 wx = (w + 2) / 3; 841 P = 3 * wx - w; 842 l0 = ix / w; 843 a = ix + P * l0; 844 ax = (a + 2) / 3 + D + 6 + 1; 845 L = (ax + wx - 1) / wx; 846 if ((ix % w) == 0 && P != 0) 847 lstall = 1; 848 else 849 lstall = 0; 850 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 851 852 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 853 pixels = Delay * 3 * pixelsPerClock; 854 return pixels; 855 } 856 857 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 858 { 859 unsigned int Delay = 0; 860 861 if (pixelFormat == dm_420) { 862 // sfr 863 Delay = Delay + 2; 864 // dsccif 865 Delay = Delay + 0; 866 // dscc - input deserializer 867 Delay = Delay + 3; 868 // dscc gets pixels every other cycle 869 Delay = Delay + 2; 870 // dscc - input cdc fifo 871 Delay = Delay + 12; 872 // dscc gets pixels every other cycle 873 Delay = Delay + 13; 874 // dscc - cdc uncertainty 875 Delay = Delay + 2; 876 // dscc - output cdc fifo 877 Delay = Delay + 7; 878 // dscc gets pixels every other cycle 879 Delay = Delay + 3; 880 // dscc - cdc uncertainty 881 Delay = Delay + 2; 882 // dscc - output serializer 883 Delay = Delay + 1; 884 // sft 885 Delay = Delay + 1; 886 } else if (pixelFormat == dm_n422) { 887 // sfr 888 Delay = Delay + 2; 889 // dsccif 890 Delay = Delay + 1; 891 // dscc - input deserializer 892 Delay = Delay + 5; 893 // dscc - input cdc fifo 894 Delay = Delay + 25; 895 // dscc - cdc uncertainty 896 Delay = Delay + 2; 897 // dscc - output cdc fifo 898 Delay = Delay + 10; 899 // dscc - cdc uncertainty 900 Delay = Delay + 2; 901 // dscc - output serializer 902 Delay = Delay + 1; 903 // sft 904 Delay = Delay + 1; 905 } else { 906 // sfr 907 Delay = Delay + 2; 908 // dsccif 909 Delay = Delay + 0; 910 // dscc - input deserializer 911 Delay = Delay + 3; 912 // dscc - input cdc fifo 913 Delay = Delay + 12; 914 // dscc - cdc uncertainty 915 Delay = Delay + 2; 916 // dscc - output cdc fifo 917 Delay = Delay + 7; 918 // dscc - output serializer 919 Delay = Delay + 1; 920 // dscc - cdc uncertainty 921 Delay = Delay + 2; 922 // sft 923 Delay = Delay + 1; 924 } 925 926 return Delay; 927 } 928 929 static bool CalculatePrefetchSchedule( 930 struct display_mode_lib *mode_lib, 931 double HostVMInefficiencyFactor, 932 Pipe *myPipe, 933 unsigned int DSCDelay, 934 double DPPCLKDelaySubtotalPlusCNVCFormater, 935 double DPPCLKDelaySCL, 936 double DPPCLKDelaySCLLBOnly, 937 double DPPCLKDelayCNVCCursor, 938 double DISPCLKDelaySubtotal, 939 unsigned int DPP_RECOUT_WIDTH, 940 enum output_format_class OutputFormat, 941 unsigned int MaxInterDCNTileRepeaters, 942 unsigned int VStartup, 943 unsigned int MaxVStartup, 944 unsigned int GPUVMPageTableLevels, 945 bool GPUVMEnable, 946 bool HostVMEnable, 947 unsigned int HostVMMaxNonCachedPageTableLevels, 948 double HostVMMinPageSize, 949 bool DynamicMetadataEnable, 950 bool DynamicMetadataVMEnabled, 951 int DynamicMetadataLinesBeforeActiveRequired, 952 unsigned int DynamicMetadataTransmittedBytes, 953 double UrgentLatency, 954 double UrgentExtraLatency, 955 double TCalc, 956 unsigned int PDEAndMetaPTEBytesFrame, 957 unsigned int MetaRowByte, 958 unsigned int PixelPTEBytesPerRow, 959 double PrefetchSourceLinesY, 960 unsigned int SwathWidthY, 961 double VInitPreFillY, 962 unsigned int MaxNumSwathY, 963 double PrefetchSourceLinesC, 964 unsigned int SwathWidthC, 965 double VInitPreFillC, 966 unsigned int MaxNumSwathC, 967 int swath_width_luma_ub, 968 int swath_width_chroma_ub, 969 unsigned int SwathHeightY, 970 unsigned int SwathHeightC, 971 double TWait, 972 double *DSTXAfterScaler, 973 double *DSTYAfterScaler, 974 double *DestinationLinesForPrefetch, 975 double *PrefetchBandwidth, 976 double *DestinationLinesToRequestVMInVBlank, 977 double *DestinationLinesToRequestRowInVBlank, 978 double *VRatioPrefetchY, 979 double *VRatioPrefetchC, 980 double *RequiredPrefetchPixDataBWLuma, 981 double *RequiredPrefetchPixDataBWChroma, 982 bool *NotEnoughTimeForDynamicMetadata, 983 double *Tno_bw, 984 double *prefetch_vmrow_bw, 985 double *Tdmdl_vm, 986 double *Tdmdl, 987 double *TSetup, 988 int *VUpdateOffsetPix, 989 double *VUpdateWidthPix, 990 double *VReadyOffsetPix) 991 { 992 bool MyError = false; 993 unsigned int DPPCycles, DISPCLKCycles; 994 double DSTTotalPixelsAfterScaler; 995 double LineTime; 996 double dst_y_prefetch_equ; 997 double Tsw_oto; 998 double prefetch_bw_oto; 999 double Tvm_oto; 1000 double Tr0_oto; 1001 double Tvm_oto_lines; 1002 double Tr0_oto_lines; 1003 double dst_y_prefetch_oto; 1004 double TimeForFetchingMetaPTE = 0; 1005 double TimeForFetchingRowInVBlank = 0; 1006 double LinesToRequestPrefetchPixelData = 0; 1007 unsigned int HostVMDynamicLevelsTrips; 1008 double trip_to_mem; 1009 double Tvm_trips; 1010 double Tr0_trips; 1011 double Tvm_trips_rounded; 1012 double Tr0_trips_rounded; 1013 double Lsw_oto; 1014 double Tpre_rounded; 1015 double prefetch_bw_equ; 1016 double Tvm_equ; 1017 double Tr0_equ; 1018 double Tdmbf; 1019 double Tdmec; 1020 double Tdmsks; 1021 double prefetch_sw_bytes; 1022 double bytes_pp; 1023 double dep_bytes; 1024 int max_vratio_pre = 4; 1025 double min_Lsw; 1026 double Tsw_est1 = 0; 1027 double Tsw_est3 = 0; 1028 1029 if (GPUVMEnable == true && HostVMEnable == true) { 1030 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 1031 } else { 1032 HostVMDynamicLevelsTrips = 0; 1033 } 1034 #ifdef __DML_VBA_DEBUG__ 1035 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); 1036 #endif 1037 CalculateVupdateAndDynamicMetadataParameters( 1038 MaxInterDCNTileRepeaters, 1039 myPipe->DPPCLK, 1040 myPipe->DISPCLK, 1041 myPipe->DCFCLKDeepSleep, 1042 myPipe->PixelClock, 1043 myPipe->HTotal, 1044 myPipe->VBlank, 1045 DynamicMetadataTransmittedBytes, 1046 DynamicMetadataLinesBeforeActiveRequired, 1047 myPipe->InterlaceEnable, 1048 myPipe->ProgressiveToInterlaceUnitInOPP, 1049 TSetup, 1050 &Tdmbf, 1051 &Tdmec, 1052 &Tdmsks, 1053 VUpdateOffsetPix, 1054 VUpdateWidthPix, 1055 VReadyOffsetPix); 1056 1057 LineTime = myPipe->HTotal / myPipe->PixelClock; 1058 trip_to_mem = UrgentLatency; 1059 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 1060 1061 #ifdef __DML_VBA_ALLOW_DELTA__ 1062 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 1063 #else 1064 if (DynamicMetadataVMEnabled == true) { 1065 #endif 1066 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 1067 } else { 1068 *Tdmdl = TWait + UrgentExtraLatency; 1069 } 1070 1071 #ifdef __DML_VBA_ALLOW_DELTA__ 1072 if (DynamicMetadataEnable == false) { 1073 *Tdmdl = 0.0; 1074 } 1075 #endif 1076 1077 if (DynamicMetadataEnable == true) { 1078 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 1079 *NotEnoughTimeForDynamicMetadata = true; 1080 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 1081 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 1082 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 1083 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); 1084 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl); 1085 } else { 1086 *NotEnoughTimeForDynamicMetadata = false; 1087 } 1088 } else { 1089 *NotEnoughTimeForDynamicMetadata = false; 1090 } 1091 1092 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 1093 1094 if (myPipe->ScalerEnabled) 1095 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 1096 else 1097 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 1098 1099 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 1100 1101 DISPCLKCycles = DISPCLKDelaySubtotal; 1102 1103 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 1104 return true; 1105 1106 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 1107 1108 #ifdef __DML_VBA_DEBUG__ 1109 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 1110 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 1111 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); 1112 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 1113 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); 1114 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 1115 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 1116 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); 1117 #endif 1118 1119 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 1120 1121 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 1122 *DSTYAfterScaler = 1; 1123 else 1124 *DSTYAfterScaler = 0; 1125 1126 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 1127 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 1128 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 1129 1130 #ifdef __DML_VBA_DEBUG__ 1131 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 1132 #endif 1133 1134 MyError = false; 1135 1136 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 1137 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 1138 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 1139 1140 #ifdef __DML_VBA_ALLOW_DELTA__ 1141 if (!myPipe->DCCEnable) { 1142 Tr0_trips = 0.0; 1143 Tr0_trips_rounded = 0.0; 1144 } 1145 #endif 1146 1147 if (!GPUVMEnable) { 1148 Tvm_trips = 0.0; 1149 Tvm_trips_rounded = 0.0; 1150 } 1151 1152 if (GPUVMEnable) { 1153 if (GPUVMPageTableLevels >= 3) { 1154 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 1155 } else { 1156 *Tno_bw = 0; 1157 } 1158 } else if (!myPipe->DCCEnable) { 1159 *Tno_bw = LineTime; 1160 } else { 1161 *Tno_bw = LineTime / 4; 1162 } 1163 1164 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) 1165 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 1166 else 1167 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 1168 1169 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 1170 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 1171 1172 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 1173 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; 1174 Tsw_oto = Lsw_oto * LineTime; 1175 1176 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto; 1177 1178 #ifdef __DML_VBA_DEBUG__ 1179 dml_print("DML: HTotal: %d\n", myPipe->HTotal); 1180 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); 1181 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); 1182 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); 1183 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); 1184 dml_print("DML: Tsw_oto: %f\n", Tsw_oto); 1185 #endif 1186 1187 if (GPUVMEnable == true) 1188 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); 1189 else 1190 Tvm_oto = LineTime / 4.0; 1191 1192 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1193 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) 1194 LineTime - Tvm_oto, 1195 LineTime / 4); 1196 } else { 1197 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 1198 } 1199 1200 #ifdef __DML_VBA_DEBUG__ 1201 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 1202 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 1203 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); 1204 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1205 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1206 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1207 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 1208 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 1209 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 1210 #endif 1211 1212 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 1213 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 1214 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 1215 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1216 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 1217 Tpre_rounded = dst_y_prefetch_equ * LineTime; 1218 1219 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 1220 1221 if (prefetch_sw_bytes < dep_bytes) 1222 prefetch_sw_bytes = 2 * dep_bytes; 1223 1224 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); 1225 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); 1226 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); 1227 dml_print("DML: Lsw_oto: %f\n", Lsw_oto); 1228 dml_print("DML: LineTime: %f\n", LineTime); 1229 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); 1230 1231 dml_print("DML: LineTime: %f\n", LineTime); 1232 dml_print("DML: VStartup: %d\n", VStartup); 1233 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); 1234 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); 1235 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); 1236 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); 1237 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 1238 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 1239 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 1240 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); 1241 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); 1242 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); 1243 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler); 1244 1245 *PrefetchBandwidth = 0; 1246 *DestinationLinesToRequestVMInVBlank = 0; 1247 *DestinationLinesToRequestRowInVBlank = 0; 1248 *VRatioPrefetchY = 0; 1249 *VRatioPrefetchC = 0; 1250 *RequiredPrefetchPixDataBWLuma = 0; 1251 if (dst_y_prefetch_equ > 1) { 1252 double PrefetchBandwidth1; 1253 double PrefetchBandwidth2; 1254 double PrefetchBandwidth3; 1255 double PrefetchBandwidth4; 1256 1257 if (Tpre_rounded - *Tno_bw > 0) { 1258 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1259 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 1260 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 1261 } else { 1262 PrefetchBandwidth1 = 0; 1263 } 1264 1265 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 1266 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1267 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 1268 } 1269 1270 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1271 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1272 else 1273 PrefetchBandwidth2 = 0; 1274 1275 if (Tpre_rounded - Tvm_trips_rounded > 0) { 1276 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1277 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 1278 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 1279 } else { 1280 PrefetchBandwidth3 = 0; 1281 } 1282 1283 #ifdef __DML_VBA_DEBUG__ 1284 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 1285 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 1286 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 1287 #endif 1288 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { 1289 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1290 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 1291 } 1292 1293 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1294 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1295 else 1296 PrefetchBandwidth4 = 0; 1297 1298 { 1299 bool Case1OK; 1300 bool Case2OK; 1301 bool Case3OK; 1302 1303 if (PrefetchBandwidth1 > 0) { 1304 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded 1305 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1306 Case1OK = true; 1307 } else { 1308 Case1OK = false; 1309 } 1310 } else { 1311 Case1OK = false; 1312 } 1313 1314 if (PrefetchBandwidth2 > 0) { 1315 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded 1316 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1317 Case2OK = true; 1318 } else { 1319 Case2OK = false; 1320 } 1321 } else { 1322 Case2OK = false; 1323 } 1324 1325 if (PrefetchBandwidth3 > 0) { 1326 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded 1327 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1328 Case3OK = true; 1329 } else { 1330 Case3OK = false; 1331 } 1332 } else { 1333 Case3OK = false; 1334 } 1335 1336 if (Case1OK) { 1337 prefetch_bw_equ = PrefetchBandwidth1; 1338 } else if (Case2OK) { 1339 prefetch_bw_equ = PrefetchBandwidth2; 1340 } else if (Case3OK) { 1341 prefetch_bw_equ = PrefetchBandwidth3; 1342 } else { 1343 prefetch_bw_equ = PrefetchBandwidth4; 1344 } 1345 1346 #ifdef __DML_VBA_DEBUG__ 1347 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 1348 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 1349 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 1350 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 1351 #endif 1352 1353 if (prefetch_bw_equ > 0) { 1354 if (GPUVMEnable == true) { 1355 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1356 } else { 1357 Tvm_equ = LineTime / 4; 1358 } 1359 1360 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1361 Tr0_equ = dml_max4( 1362 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1363 Tr0_trips, 1364 (LineTime - Tvm_equ) / 2, 1365 LineTime / 4); 1366 } else { 1367 Tr0_equ = (LineTime - Tvm_equ) / 2; 1368 } 1369 } else { 1370 Tvm_equ = 0; 1371 Tr0_equ = 0; 1372 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 1373 } 1374 } 1375 1376 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 1377 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 1378 TimeForFetchingMetaPTE = Tvm_oto; 1379 TimeForFetchingRowInVBlank = Tr0_oto; 1380 *PrefetchBandwidth = prefetch_bw_oto; 1381 } else { 1382 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 1383 TimeForFetchingMetaPTE = Tvm_equ; 1384 TimeForFetchingRowInVBlank = Tr0_equ; 1385 *PrefetchBandwidth = prefetch_bw_equ; 1386 } 1387 1388 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 1389 1390 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 1391 1392 #ifdef __DML_VBA_ALLOW_DELTA__ 1393 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 1394 // See note above dated 5/30/2018 1395 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 1396 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? 1397 #else 1398 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 1399 #endif 1400 1401 #ifdef __DML_VBA_DEBUG__ 1402 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 1403 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1404 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 1405 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1406 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1407 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1408 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 1409 #endif 1410 1411 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { 1412 1413 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 1414 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1415 #ifdef __DML_VBA_DEBUG__ 1416 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1417 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 1418 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); 1419 #endif 1420 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 1421 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 1422 *VRatioPrefetchY = dml_max( 1423 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, 1424 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); 1425 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1426 } else { 1427 MyError = true; 1428 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1429 *VRatioPrefetchY = 0; 1430 } 1431 #ifdef __DML_VBA_DEBUG__ 1432 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1433 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1434 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 1435 #endif 1436 } 1437 1438 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 1439 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1440 1441 #ifdef __DML_VBA_DEBUG__ 1442 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1443 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 1444 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); 1445 #endif 1446 if ((SwathHeightC > 4)) { 1447 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 1448 *VRatioPrefetchC = dml_max( 1449 *VRatioPrefetchC, 1450 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); 1451 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1452 } else { 1453 MyError = true; 1454 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1455 *VRatioPrefetchC = 0; 1456 } 1457 #ifdef __DML_VBA_DEBUG__ 1458 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1459 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 1460 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 1461 #endif 1462 } 1463 1464 #ifdef __DML_VBA_DEBUG__ 1465 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 1466 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 1467 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1468 #endif 1469 1470 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; 1471 1472 #ifdef __DML_VBA_DEBUG__ 1473 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); 1474 #endif 1475 1476 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub 1477 / LineTime; 1478 } else { 1479 MyError = true; 1480 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1481 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); 1482 *VRatioPrefetchY = 0; 1483 *VRatioPrefetchC = 0; 1484 *RequiredPrefetchPixDataBWLuma = 0; 1485 *RequiredPrefetchPixDataBWChroma = 0; 1486 } 1487 1488 dml_print( 1489 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 1490 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 1491 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 1492 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1493 dml_print( 1494 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", 1495 (double) LinesToRequestPrefetchPixelData * LineTime); 1496 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 1497 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / 1498 (double) myPipe->HTotal)) * LineTime); 1499 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 1500 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", 1501 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank 1502 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 1503 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1504 1505 } else { 1506 MyError = true; 1507 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1508 } 1509 1510 { 1511 double prefetch_vm_bw; 1512 double prefetch_row_bw; 1513 1514 if (PDEAndMetaPTEBytesFrame == 0) { 1515 prefetch_vm_bw = 0; 1516 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1517 #ifdef __DML_VBA_DEBUG__ 1518 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1519 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1520 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1521 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1522 #endif 1523 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1524 #ifdef __DML_VBA_DEBUG__ 1525 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 1526 #endif 1527 } else { 1528 prefetch_vm_bw = 0; 1529 MyError = true; 1530 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1531 } 1532 1533 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1534 prefetch_row_bw = 0; 1535 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1536 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1537 1538 #ifdef __DML_VBA_DEBUG__ 1539 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1540 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1541 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1542 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 1543 #endif 1544 } else { 1545 prefetch_row_bw = 0; 1546 MyError = true; 1547 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1548 } 1549 1550 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1551 } 1552 1553 if (MyError) { 1554 *PrefetchBandwidth = 0; 1555 TimeForFetchingMetaPTE = 0; 1556 TimeForFetchingRowInVBlank = 0; 1557 *DestinationLinesToRequestVMInVBlank = 0; 1558 *DestinationLinesToRequestRowInVBlank = 0; 1559 *DestinationLinesForPrefetch = 0; 1560 LinesToRequestPrefetchPixelData = 0; 1561 *VRatioPrefetchY = 0; 1562 *VRatioPrefetchC = 0; 1563 *RequiredPrefetchPixDataBWLuma = 0; 1564 *RequiredPrefetchPixDataBWChroma = 0; 1565 } 1566 1567 return MyError; 1568 } 1569 1570 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1571 { 1572 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1573 } 1574 1575 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1576 { 1577 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); 1578 } 1579 1580 static void CalculateDCCConfiguration( 1581 bool DCCEnabled, 1582 bool DCCProgrammingAssumesScanDirectionUnknown, 1583 enum source_format_class SourcePixelFormat, 1584 unsigned int SurfaceWidthLuma, 1585 unsigned int SurfaceWidthChroma, 1586 unsigned int SurfaceHeightLuma, 1587 unsigned int SurfaceHeightChroma, 1588 double DETBufferSize, 1589 unsigned int RequestHeight256ByteLuma, 1590 unsigned int RequestHeight256ByteChroma, 1591 enum dm_swizzle_mode TilingFormat, 1592 unsigned int BytePerPixelY, 1593 unsigned int BytePerPixelC, 1594 double BytePerPixelDETY, 1595 double BytePerPixelDETC, 1596 enum scan_direction_class ScanOrientation, 1597 unsigned int *MaxUncompressedBlockLuma, 1598 unsigned int *MaxUncompressedBlockChroma, 1599 unsigned int *MaxCompressedBlockLuma, 1600 unsigned int *MaxCompressedBlockChroma, 1601 unsigned int *IndependentBlockLuma, 1602 unsigned int *IndependentBlockChroma) 1603 { 1604 int yuv420; 1605 int horz_div_l; 1606 int horz_div_c; 1607 int vert_div_l; 1608 int vert_div_c; 1609 1610 int swath_buf_size; 1611 double detile_buf_vp_horz_limit; 1612 double detile_buf_vp_vert_limit; 1613 1614 int MAS_vp_horz_limit; 1615 int MAS_vp_vert_limit; 1616 int max_vp_horz_width; 1617 int max_vp_vert_height; 1618 int eff_surf_width_l; 1619 int eff_surf_width_c; 1620 int eff_surf_height_l; 1621 int eff_surf_height_c; 1622 1623 int full_swath_bytes_horz_wc_l; 1624 int full_swath_bytes_horz_wc_c; 1625 int full_swath_bytes_vert_wc_l; 1626 int full_swath_bytes_vert_wc_c; 1627 int req128_horz_wc_l; 1628 int req128_horz_wc_c; 1629 int req128_vert_wc_l; 1630 int req128_vert_wc_c; 1631 int segment_order_horz_contiguous_luma; 1632 int segment_order_horz_contiguous_chroma; 1633 int segment_order_vert_contiguous_luma; 1634 int segment_order_vert_contiguous_chroma; 1635 1636 typedef enum { 1637 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA 1638 } RequestType; 1639 RequestType RequestLuma; 1640 RequestType RequestChroma; 1641 1642 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); 1643 horz_div_l = 1; 1644 horz_div_c = 1; 1645 vert_div_l = 1; 1646 vert_div_c = 1; 1647 1648 if (BytePerPixelY == 1) 1649 vert_div_l = 0; 1650 if (BytePerPixelC == 1) 1651 vert_div_c = 0; 1652 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1653 horz_div_l = 0; 1654 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1655 horz_div_c = 0; 1656 1657 if (BytePerPixelC == 0) { 1658 swath_buf_size = DETBufferSize / 2 - 2 * 256; 1659 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 1660 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 1661 } else { 1662 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; 1663 detile_buf_vp_horz_limit = (double) swath_buf_size 1664 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) 1665 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 1666 detile_buf_vp_vert_limit = (double) swath_buf_size 1667 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 1668 } 1669 1670 if (SourcePixelFormat == dm_420_10) { 1671 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 1672 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 1673 } 1674 1675 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 1676 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 1677 1678 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; 1679 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); 1680 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 1681 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 1682 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 1683 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 1684 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 1685 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 1686 1687 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 1688 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 1689 if (BytePerPixelC > 0) { 1690 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 1691 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 1692 } else { 1693 full_swath_bytes_horz_wc_c = 0; 1694 full_swath_bytes_vert_wc_c = 0; 1695 } 1696 1697 if (SourcePixelFormat == dm_420_10) { 1698 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); 1699 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); 1700 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); 1701 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); 1702 } 1703 1704 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1705 req128_horz_wc_l = 0; 1706 req128_horz_wc_c = 0; 1707 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { 1708 req128_horz_wc_l = 0; 1709 req128_horz_wc_c = 1; 1710 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1711 req128_horz_wc_l = 1; 1712 req128_horz_wc_c = 0; 1713 } else { 1714 req128_horz_wc_l = 1; 1715 req128_horz_wc_c = 1; 1716 } 1717 1718 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1719 req128_vert_wc_l = 0; 1720 req128_vert_wc_c = 0; 1721 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { 1722 req128_vert_wc_l = 0; 1723 req128_vert_wc_c = 1; 1724 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1725 req128_vert_wc_l = 1; 1726 req128_vert_wc_c = 0; 1727 } else { 1728 req128_vert_wc_l = 1; 1729 req128_vert_wc_c = 1; 1730 } 1731 1732 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1733 segment_order_horz_contiguous_luma = 0; 1734 } else { 1735 segment_order_horz_contiguous_luma = 1; 1736 } 1737 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1738 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1739 segment_order_vert_contiguous_luma = 0; 1740 } else { 1741 segment_order_vert_contiguous_luma = 1; 1742 } 1743 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1744 segment_order_horz_contiguous_chroma = 0; 1745 } else { 1746 segment_order_horz_contiguous_chroma = 1; 1747 } 1748 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1749 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1750 segment_order_vert_contiguous_chroma = 0; 1751 } else { 1752 segment_order_vert_contiguous_chroma = 1; 1753 } 1754 1755 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1756 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 1757 RequestLuma = REQ_256Bytes; 1758 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 1759 RequestLuma = REQ_128BytesNonContiguous; 1760 } else { 1761 RequestLuma = REQ_128BytesContiguous; 1762 } 1763 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 1764 RequestChroma = REQ_256Bytes; 1765 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 1766 RequestChroma = REQ_128BytesNonContiguous; 1767 } else { 1768 RequestChroma = REQ_128BytesContiguous; 1769 } 1770 } else if (ScanOrientation != dm_vert) { 1771 if (req128_horz_wc_l == 0) { 1772 RequestLuma = REQ_256Bytes; 1773 } else if (segment_order_horz_contiguous_luma == 0) { 1774 RequestLuma = REQ_128BytesNonContiguous; 1775 } else { 1776 RequestLuma = REQ_128BytesContiguous; 1777 } 1778 if (req128_horz_wc_c == 0) { 1779 RequestChroma = REQ_256Bytes; 1780 } else if (segment_order_horz_contiguous_chroma == 0) { 1781 RequestChroma = REQ_128BytesNonContiguous; 1782 } else { 1783 RequestChroma = REQ_128BytesContiguous; 1784 } 1785 } else { 1786 if (req128_vert_wc_l == 0) { 1787 RequestLuma = REQ_256Bytes; 1788 } else if (segment_order_vert_contiguous_luma == 0) { 1789 RequestLuma = REQ_128BytesNonContiguous; 1790 } else { 1791 RequestLuma = REQ_128BytesContiguous; 1792 } 1793 if (req128_vert_wc_c == 0) { 1794 RequestChroma = REQ_256Bytes; 1795 } else if (segment_order_vert_contiguous_chroma == 0) { 1796 RequestChroma = REQ_128BytesNonContiguous; 1797 } else { 1798 RequestChroma = REQ_128BytesContiguous; 1799 } 1800 } 1801 1802 if (RequestLuma == REQ_256Bytes) { 1803 *MaxUncompressedBlockLuma = 256; 1804 *MaxCompressedBlockLuma = 256; 1805 *IndependentBlockLuma = 0; 1806 } else if (RequestLuma == REQ_128BytesContiguous) { 1807 *MaxUncompressedBlockLuma = 256; 1808 *MaxCompressedBlockLuma = 128; 1809 *IndependentBlockLuma = 128; 1810 } else { 1811 *MaxUncompressedBlockLuma = 256; 1812 *MaxCompressedBlockLuma = 64; 1813 *IndependentBlockLuma = 64; 1814 } 1815 1816 if (RequestChroma == REQ_256Bytes) { 1817 *MaxUncompressedBlockChroma = 256; 1818 *MaxCompressedBlockChroma = 256; 1819 *IndependentBlockChroma = 0; 1820 } else if (RequestChroma == REQ_128BytesContiguous) { 1821 *MaxUncompressedBlockChroma = 256; 1822 *MaxCompressedBlockChroma = 128; 1823 *IndependentBlockChroma = 128; 1824 } else { 1825 *MaxUncompressedBlockChroma = 256; 1826 *MaxCompressedBlockChroma = 64; 1827 *IndependentBlockChroma = 64; 1828 } 1829 1830 if (DCCEnabled != true || BytePerPixelC == 0) { 1831 *MaxUncompressedBlockChroma = 0; 1832 *MaxCompressedBlockChroma = 0; 1833 *IndependentBlockChroma = 0; 1834 } 1835 1836 if (DCCEnabled != true) { 1837 *MaxUncompressedBlockLuma = 0; 1838 *MaxCompressedBlockLuma = 0; 1839 *IndependentBlockLuma = 0; 1840 } 1841 } 1842 1843 static double CalculatePrefetchSourceLines( 1844 struct display_mode_lib *mode_lib, 1845 double VRatio, 1846 double vtaps, 1847 bool Interlace, 1848 bool ProgressiveToInterlaceUnitInOPP, 1849 unsigned int SwathHeight, 1850 unsigned int ViewportYStart, 1851 double *VInitPreFill, 1852 unsigned int *MaxNumSwath) 1853 { 1854 struct vba_vars_st *v = &mode_lib->vba; 1855 unsigned int MaxPartialSwath; 1856 1857 if (ProgressiveToInterlaceUnitInOPP) 1858 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1859 else 1860 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1861 1862 if (!v->IgnoreViewportPositioning) { 1863 1864 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1865 1866 if (*VInitPreFill > 1.0) 1867 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1868 else 1869 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; 1870 MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1871 1872 } else { 1873 1874 if (ViewportYStart != 0) 1875 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1876 1877 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1878 1879 if (*VInitPreFill > 1.0) 1880 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1881 else 1882 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; 1883 } 1884 1885 #ifdef __DML_VBA_DEBUG__ 1886 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 1887 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); 1888 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); 1889 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); 1890 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); 1891 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 1892 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 1893 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 1894 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); 1895 #endif 1896 return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1897 } 1898 1899 static unsigned int CalculateVMAndRowBytes( 1900 struct display_mode_lib *mode_lib, 1901 bool DCCEnable, 1902 unsigned int BlockHeight256Bytes, 1903 unsigned int BlockWidth256Bytes, 1904 enum source_format_class SourcePixelFormat, 1905 unsigned int SurfaceTiling, 1906 unsigned int BytePerPixel, 1907 enum scan_direction_class ScanDirection, 1908 unsigned int SwathWidth, 1909 unsigned int ViewportHeight, 1910 bool GPUVMEnable, 1911 bool HostVMEnable, 1912 unsigned int HostVMMaxNonCachedPageTableLevels, 1913 unsigned int GPUVMMinPageSize, 1914 unsigned int HostVMMinPageSize, 1915 unsigned int PTEBufferSizeInRequests, 1916 unsigned int Pitch, 1917 unsigned int DCCMetaPitch, 1918 unsigned int *MacroTileWidth, 1919 unsigned int *MetaRowByte, 1920 unsigned int *PixelPTEBytesPerRow, 1921 bool *PTEBufferSizeNotExceeded, 1922 int *dpte_row_width_ub, 1923 unsigned int *dpte_row_height, 1924 unsigned int *MetaRequestWidth, 1925 unsigned int *MetaRequestHeight, 1926 unsigned int *meta_row_width, 1927 unsigned int *meta_row_height, 1928 int *vm_group_bytes, 1929 unsigned int *dpte_group_bytes, 1930 unsigned int *PixelPTEReqWidth, 1931 unsigned int *PixelPTEReqHeight, 1932 unsigned int *PTERequestSize, 1933 int *DPDE0BytesFrame, 1934 int *MetaPTEBytesFrame) 1935 { 1936 struct vba_vars_st *v = &mode_lib->vba; 1937 unsigned int MPDEBytesFrame; 1938 unsigned int DCCMetaSurfaceBytes; 1939 unsigned int MacroTileSizeBytes; 1940 unsigned int MacroTileHeight; 1941 unsigned int ExtraDPDEBytesFrame; 1942 unsigned int PDEAndMetaPTEBytesFrame; 1943 unsigned int PixelPTEReqHeightPTEs = 0; 1944 unsigned int HostVMDynamicLevels = 0; 1945 double FractionOfPTEReturnDrop; 1946 1947 if (GPUVMEnable == true && HostVMEnable == true) { 1948 if (HostVMMinPageSize < 2048) { 1949 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1950 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 1951 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 1952 } else { 1953 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 1954 } 1955 } 1956 1957 *MetaRequestHeight = 8 * BlockHeight256Bytes; 1958 *MetaRequestWidth = 8 * BlockWidth256Bytes; 1959 if (ScanDirection != dm_vert) { 1960 *meta_row_height = *MetaRequestHeight; 1961 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 1962 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1963 } else { 1964 *meta_row_height = *MetaRequestWidth; 1965 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 1966 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1967 } 1968 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; 1969 if (GPUVMEnable == true) { 1970 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; 1971 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); 1972 } else { 1973 *MetaPTEBytesFrame = 0; 1974 MPDEBytesFrame = 0; 1975 } 1976 1977 if (DCCEnable != true) { 1978 *MetaPTEBytesFrame = 0; 1979 MPDEBytesFrame = 0; 1980 *MetaRowByte = 0; 1981 } 1982 1983 if (SurfaceTiling == dm_sw_linear) { 1984 MacroTileSizeBytes = 256; 1985 MacroTileHeight = BlockHeight256Bytes; 1986 } else { 1987 MacroTileSizeBytes = 65536; 1988 MacroTileHeight = 16 * BlockHeight256Bytes; 1989 } 1990 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1991 1992 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { 1993 if (ScanDirection != dm_vert) { 1994 *DPDE0BytesFrame = 64 1995 * (dml_ceil( 1996 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1997 / (8 * 2097152), 1998 1) + 1); 1999 } else { 2000 *DPDE0BytesFrame = 64 2001 * (dml_ceil( 2002 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 2003 / (8 * 2097152), 2004 1) + 1); 2005 } 2006 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); 2007 } else { 2008 *DPDE0BytesFrame = 0; 2009 ExtraDPDEBytesFrame = 0; 2010 } 2011 2012 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 2013 2014 #ifdef __DML_VBA_DEBUG__ 2015 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 2016 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 2017 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 2018 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 2019 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2020 #endif 2021 2022 if (HostVMEnable == true) { 2023 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 2024 } 2025 #ifdef __DML_VBA_DEBUG__ 2026 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2027 #endif 2028 2029 if (SurfaceTiling == dm_sw_linear) { 2030 PixelPTEReqHeightPTEs = 1; 2031 *PixelPTEReqHeight = 1; 2032 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 2033 *PTERequestSize = 64; 2034 FractionOfPTEReturnDrop = 0; 2035 } else if (MacroTileSizeBytes == 4096) { 2036 PixelPTEReqHeightPTEs = 1; 2037 *PixelPTEReqHeight = MacroTileHeight; 2038 *PixelPTEReqWidth = 8 * *MacroTileWidth; 2039 *PTERequestSize = 64; 2040 if (ScanDirection != dm_vert) 2041 FractionOfPTEReturnDrop = 0; 2042 else 2043 FractionOfPTEReturnDrop = 7 / 8; 2044 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 2045 PixelPTEReqHeightPTEs = 16; 2046 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2047 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2048 *PTERequestSize = 128; 2049 FractionOfPTEReturnDrop = 0; 2050 } else { 2051 PixelPTEReqHeightPTEs = 1; 2052 *PixelPTEReqHeight = MacroTileHeight; 2053 *PixelPTEReqWidth = 8 * *MacroTileWidth; 2054 *PTERequestSize = 64; 2055 FractionOfPTEReturnDrop = 0; 2056 } 2057 2058 if (SurfaceTiling == dm_sw_linear) { 2059 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2060 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2061 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2062 } else if (ScanDirection != dm_vert) { 2063 *dpte_row_height = *PixelPTEReqHeight; 2064 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2065 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2066 } else { 2067 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 2068 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 2069 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2070 } 2071 2072 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { 2073 *PTEBufferSizeNotExceeded = true; 2074 } else { 2075 *PTEBufferSizeNotExceeded = false; 2076 } 2077 2078 if (GPUVMEnable != true) { 2079 *PixelPTEBytesPerRow = 0; 2080 *PTEBufferSizeNotExceeded = true; 2081 } 2082 2083 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); 2084 2085 if (HostVMEnable == true) { 2086 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2087 } 2088 2089 if (HostVMEnable == true) { 2090 *vm_group_bytes = 512; 2091 *dpte_group_bytes = 512; 2092 } else if (GPUVMEnable == true) { 2093 *vm_group_bytes = 2048; 2094 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { 2095 *dpte_group_bytes = 512; 2096 } else { 2097 *dpte_group_bytes = 2048; 2098 } 2099 } else { 2100 *vm_group_bytes = 0; 2101 *dpte_group_bytes = 0; 2102 } 2103 return PDEAndMetaPTEBytesFrame; 2104 } 2105 2106 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) 2107 { 2108 struct vba_vars_st *v = &mode_lib->vba; 2109 unsigned int j, k; 2110 double HostVMInefficiencyFactor = 1.0; 2111 bool NoChromaPlanes = true; 2112 int ReorderBytes; 2113 double VMDataOnlyReturnBW; 2114 double MaxTotalRDBandwidth = 0; 2115 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; 2116 2117 v->WritebackDISPCLK = 0.0; 2118 v->DISPCLKWithRamping = 0; 2119 v->DISPCLKWithoutRamping = 0; 2120 v->GlobalDPPCLK = 0.0; 2121 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */ 2122 { 2123 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 2124 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], 2125 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); 2126 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; 2127 if (v->HostVMEnable != true) { 2128 v->ReturnBW = dml_min( 2129 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2130 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 2131 } else { 2132 v->ReturnBW = dml_min( 2133 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2134 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 2135 } 2136 } 2137 /* End DAL custom code */ 2138 2139 // DISPCLK and DPPCLK Calculation 2140 // 2141 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2142 if (v->WritebackEnable[k]) { 2143 v->WritebackDISPCLK = dml_max( 2144 v->WritebackDISPCLK, 2145 dml31_CalculateWriteBackDISPCLK( 2146 v->WritebackPixelFormat[k], 2147 v->PixelClock[k], 2148 v->WritebackHRatio[k], 2149 v->WritebackVRatio[k], 2150 v->WritebackHTaps[k], 2151 v->WritebackVTaps[k], 2152 v->WritebackSourceWidth[k], 2153 v->WritebackDestinationWidth[k], 2154 v->HTotal[k], 2155 v->WritebackLineBufferSize)); 2156 } 2157 } 2158 2159 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2160 if (v->HRatio[k] > 1) { 2161 v->PSCL_THROUGHPUT_LUMA[k] = dml_min( 2162 v->MaxDCHUBToPSCLThroughput, 2163 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); 2164 } else { 2165 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2166 } 2167 2168 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] 2169 * dml_max( 2170 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 2171 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); 2172 2173 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { 2174 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; 2175 } 2176 2177 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 2178 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { 2179 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 2180 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; 2181 } else { 2182 if (v->HRatioChroma[k] > 1) { 2183 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 2184 v->MaxDCHUBToPSCLThroughput, 2185 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 2186 } else { 2187 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2188 } 2189 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] 2190 * dml_max3( 2191 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 2192 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 2193 1.0); 2194 2195 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { 2196 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; 2197 } 2198 2199 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); 2200 } 2201 } 2202 2203 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2204 if (v->BlendingAndTiming[k] != k) 2205 continue; 2206 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { 2207 v->DISPCLKWithRamping = dml_max( 2208 v->DISPCLKWithRamping, 2209 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2210 * (1 + v->DISPCLKRampingMargin / 100)); 2211 v->DISPCLKWithoutRamping = dml_max( 2212 v->DISPCLKWithoutRamping, 2213 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2214 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2215 v->DISPCLKWithRamping = dml_max( 2216 v->DISPCLKWithRamping, 2217 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2218 * (1 + v->DISPCLKRampingMargin / 100)); 2219 v->DISPCLKWithoutRamping = dml_max( 2220 v->DISPCLKWithoutRamping, 2221 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2222 } else { 2223 v->DISPCLKWithRamping = dml_max( 2224 v->DISPCLKWithRamping, 2225 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); 2226 v->DISPCLKWithoutRamping = dml_max( 2227 v->DISPCLKWithoutRamping, 2228 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2229 } 2230 } 2231 2232 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); 2233 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); 2234 2235 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); 2236 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); 2237 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); 2238 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 2239 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, 2240 v->DISPCLKDPPCLKVCOSpeed); 2241 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2242 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; 2243 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2244 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; 2245 } else { 2246 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; 2247 } 2248 v->DISPCLK = v->DISPCLK_calculated; 2249 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); 2250 2251 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2252 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2253 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); 2254 } 2255 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); 2256 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2257 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); 2258 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); 2259 } 2260 2261 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2262 v->DPPCLK[k] = v->DPPCLK_calculated[k]; 2263 } 2264 2265 // Urgent and B P-State/DRAM Clock Change Watermark 2266 DTRACE(" dcfclk_mhz = %f", v->DCFCLK); 2267 DTRACE(" return_bus_bw = %f", v->ReturnBW); 2268 2269 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2270 CalculateBytePerPixelAnd256BBlockSizes( 2271 v->SourcePixelFormat[k], 2272 v->SurfaceTiling[k], 2273 &v->BytePerPixelY[k], 2274 &v->BytePerPixelC[k], 2275 &v->BytePerPixelDETY[k], 2276 &v->BytePerPixelDETC[k], 2277 &v->BlockHeight256BytesY[k], 2278 &v->BlockHeight256BytesC[k], 2279 &v->BlockWidth256BytesY[k], 2280 &v->BlockWidth256BytesC[k]); 2281 } 2282 2283 CalculateSwathWidth( 2284 false, 2285 v->NumberOfActivePlanes, 2286 v->SourcePixelFormat, 2287 v->SourceScan, 2288 v->ViewportWidth, 2289 v->ViewportHeight, 2290 v->SurfaceWidthY, 2291 v->SurfaceWidthC, 2292 v->SurfaceHeightY, 2293 v->SurfaceHeightC, 2294 v->ODMCombineEnabled, 2295 v->BytePerPixelY, 2296 v->BytePerPixelC, 2297 v->BlockHeight256BytesY, 2298 v->BlockHeight256BytesC, 2299 v->BlockWidth256BytesY, 2300 v->BlockWidth256BytesC, 2301 v->BlendingAndTiming, 2302 v->HActive, 2303 v->HRatio, 2304 v->DPPPerPlane, 2305 v->SwathWidthSingleDPPY, 2306 v->SwathWidthSingleDPPC, 2307 v->SwathWidthY, 2308 v->SwathWidthC, 2309 v->dummyinteger3, 2310 v->dummyinteger4, 2311 v->swath_width_luma_ub, 2312 v->swath_width_chroma_ub); 2313 2314 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2315 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) 2316 * v->VRatio[k]; 2317 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) 2318 * v->VRatioChroma[k]; 2319 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); 2320 } 2321 2322 // DCFCLK Deep Sleep 2323 CalculateDCFCLKDeepSleep( 2324 mode_lib, 2325 v->NumberOfActivePlanes, 2326 v->BytePerPixelY, 2327 v->BytePerPixelC, 2328 v->VRatio, 2329 v->VRatioChroma, 2330 v->SwathWidthY, 2331 v->SwathWidthC, 2332 v->DPPPerPlane, 2333 v->HRatio, 2334 v->HRatioChroma, 2335 v->PixelClock, 2336 v->PSCL_THROUGHPUT_LUMA, 2337 v->PSCL_THROUGHPUT_CHROMA, 2338 v->DPPCLK, 2339 v->ReadBandwidthPlaneLuma, 2340 v->ReadBandwidthPlaneChroma, 2341 v->ReturnBusWidth, 2342 &v->DCFCLKDeepSleep); 2343 2344 // DSCCLK 2345 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2346 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { 2347 v->DSCCLK_calculated[k] = 0.0; 2348 } else { 2349 if (v->OutputFormat[k] == dm_420) 2350 v->DSCFormatFactor = 2; 2351 else if (v->OutputFormat[k] == dm_444) 2352 v->DSCFormatFactor = 1; 2353 else if (v->OutputFormat[k] == dm_n422) 2354 v->DSCFormatFactor = 2; 2355 else 2356 v->DSCFormatFactor = 1; 2357 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) 2358 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor 2359 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2360 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) 2361 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor 2362 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2363 else 2364 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor 2365 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2366 } 2367 } 2368 2369 // DSC Delay 2370 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2371 double BPP = v->OutputBpp[k]; 2372 2373 if (v->DSCEnabled[k] && BPP != 0) { 2374 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { 2375 v->DSCDelay[k] = dscceComputeDelay( 2376 v->DSCInputBitPerComponent[k], 2377 BPP, 2378 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2379 v->NumberOfDSCSlices[k], 2380 v->OutputFormat[k], 2381 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 2382 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2383 v->DSCDelay[k] = 2 2384 * (dscceComputeDelay( 2385 v->DSCInputBitPerComponent[k], 2386 BPP, 2387 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2388 v->NumberOfDSCSlices[k] / 2.0, 2389 v->OutputFormat[k], 2390 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2391 } else { 2392 v->DSCDelay[k] = 4 2393 * (dscceComputeDelay( 2394 v->DSCInputBitPerComponent[k], 2395 BPP, 2396 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2397 v->NumberOfDSCSlices[k] / 4.0, 2398 v->OutputFormat[k], 2399 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2400 } 2401 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 2402 } else { 2403 v->DSCDelay[k] = 0; 2404 } 2405 } 2406 2407 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2408 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes 2409 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) 2410 v->DSCDelay[k] = v->DSCDelay[j]; 2411 2412 // Prefetch 2413 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2414 unsigned int PDEAndMetaPTEBytesFrameY; 2415 unsigned int PixelPTEBytesPerRowY; 2416 unsigned int MetaRowByteY; 2417 unsigned int MetaRowByteC; 2418 unsigned int PDEAndMetaPTEBytesFrameC; 2419 unsigned int PixelPTEBytesPerRowC; 2420 bool PTEBufferSizeNotExceededY; 2421 bool PTEBufferSizeNotExceededC; 2422 2423 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2424 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 2425 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { 2426 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; 2427 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 2428 } else { 2429 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 2430 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 2431 } 2432 2433 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( 2434 mode_lib, 2435 v->DCCEnable[k], 2436 v->BlockHeight256BytesC[k], 2437 v->BlockWidth256BytesC[k], 2438 v->SourcePixelFormat[k], 2439 v->SurfaceTiling[k], 2440 v->BytePerPixelC[k], 2441 v->SourceScan[k], 2442 v->SwathWidthC[k], 2443 v->ViewportHeightChroma[k], 2444 v->GPUVMEnable, 2445 v->HostVMEnable, 2446 v->HostVMMaxNonCachedPageTableLevels, 2447 v->GPUVMMinPageSize, 2448 v->HostVMMinPageSize, 2449 v->PTEBufferSizeInRequestsForChroma, 2450 v->PitchC[k], 2451 v->DCCMetaPitchC[k], 2452 &v->MacroTileWidthC[k], 2453 &MetaRowByteC, 2454 &PixelPTEBytesPerRowC, 2455 &PTEBufferSizeNotExceededC, 2456 &v->dpte_row_width_chroma_ub[k], 2457 &v->dpte_row_height_chroma[k], 2458 &v->meta_req_width_chroma[k], 2459 &v->meta_req_height_chroma[k], 2460 &v->meta_row_width_chroma[k], 2461 &v->meta_row_height_chroma[k], 2462 &v->dummyinteger1, 2463 &v->dummyinteger2, 2464 &v->PixelPTEReqWidthC[k], 2465 &v->PixelPTEReqHeightC[k], 2466 &v->PTERequestSizeC[k], 2467 &v->dpde0_bytes_per_frame_ub_c[k], 2468 &v->meta_pte_bytes_per_frame_ub_c[k]); 2469 2470 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2471 mode_lib, 2472 v->VRatioChroma[k], 2473 v->VTAPsChroma[k], 2474 v->Interlace[k], 2475 v->ProgressiveToInterlaceUnitInOPP, 2476 v->SwathHeightC[k], 2477 v->ViewportYStartC[k], 2478 &v->VInitPreFillC[k], 2479 &v->MaxNumSwathC[k]); 2480 } else { 2481 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 2482 v->PTEBufferSizeInRequestsForChroma = 0; 2483 PixelPTEBytesPerRowC = 0; 2484 PDEAndMetaPTEBytesFrameC = 0; 2485 MetaRowByteC = 0; 2486 v->MaxNumSwathC[k] = 0; 2487 v->PrefetchSourceLinesC[k] = 0; 2488 } 2489 2490 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 2491 mode_lib, 2492 v->DCCEnable[k], 2493 v->BlockHeight256BytesY[k], 2494 v->BlockWidth256BytesY[k], 2495 v->SourcePixelFormat[k], 2496 v->SurfaceTiling[k], 2497 v->BytePerPixelY[k], 2498 v->SourceScan[k], 2499 v->SwathWidthY[k], 2500 v->ViewportHeight[k], 2501 v->GPUVMEnable, 2502 v->HostVMEnable, 2503 v->HostVMMaxNonCachedPageTableLevels, 2504 v->GPUVMMinPageSize, 2505 v->HostVMMinPageSize, 2506 v->PTEBufferSizeInRequestsForLuma, 2507 v->PitchY[k], 2508 v->DCCMetaPitchY[k], 2509 &v->MacroTileWidthY[k], 2510 &MetaRowByteY, 2511 &PixelPTEBytesPerRowY, 2512 &PTEBufferSizeNotExceededY, 2513 &v->dpte_row_width_luma_ub[k], 2514 &v->dpte_row_height[k], 2515 &v->meta_req_width[k], 2516 &v->meta_req_height[k], 2517 &v->meta_row_width[k], 2518 &v->meta_row_height[k], 2519 &v->vm_group_bytes[k], 2520 &v->dpte_group_bytes[k], 2521 &v->PixelPTEReqWidthY[k], 2522 &v->PixelPTEReqHeightY[k], 2523 &v->PTERequestSizeY[k], 2524 &v->dpde0_bytes_per_frame_ub_l[k], 2525 &v->meta_pte_bytes_per_frame_ub_l[k]); 2526 2527 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 2528 mode_lib, 2529 v->VRatio[k], 2530 v->vtaps[k], 2531 v->Interlace[k], 2532 v->ProgressiveToInterlaceUnitInOPP, 2533 v->SwathHeightY[k], 2534 v->ViewportYStartY[k], 2535 &v->VInitPreFillY[k], 2536 &v->MaxNumSwathY[k]); 2537 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 2538 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2539 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 2540 2541 CalculateRowBandwidth( 2542 v->GPUVMEnable, 2543 v->SourcePixelFormat[k], 2544 v->VRatio[k], 2545 v->VRatioChroma[k], 2546 v->DCCEnable[k], 2547 v->HTotal[k] / v->PixelClock[k], 2548 MetaRowByteY, 2549 MetaRowByteC, 2550 v->meta_row_height[k], 2551 v->meta_row_height_chroma[k], 2552 PixelPTEBytesPerRowY, 2553 PixelPTEBytesPerRowC, 2554 v->dpte_row_height[k], 2555 v->dpte_row_height_chroma[k], 2556 &v->meta_row_bw[k], 2557 &v->dpte_row_bw[k]); 2558 } 2559 2560 v->TotalDCCActiveDPP = 0; 2561 v->TotalActiveDPP = 0; 2562 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2563 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; 2564 if (v->DCCEnable[k]) 2565 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; 2566 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2567 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 2568 NoChromaPlanes = false; 2569 } 2570 2571 ReorderBytes = v->NumberOfChannels 2572 * dml_max3( 2573 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2574 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2575 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 2576 2577 VMDataOnlyReturnBW = dml_min( 2578 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) 2579 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2580 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth 2581 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 2582 2583 #ifdef __DML_VBA_DEBUG__ 2584 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); 2585 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); 2586 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); 2587 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); 2588 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); 2589 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); 2590 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); 2591 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); 2592 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); 2593 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 2594 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); 2595 #endif 2596 2597 if (v->GPUVMEnable && v->HostVMEnable) 2598 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; 2599 2600 v->UrgentExtraLatency = CalculateExtraLatency( 2601 v->RoundTripPingLatencyCycles, 2602 ReorderBytes, 2603 v->DCFCLK, 2604 v->TotalActiveDPP, 2605 v->PixelChunkSizeInKByte, 2606 v->TotalDCCActiveDPP, 2607 v->MetaChunkSize, 2608 v->ReturnBW, 2609 v->GPUVMEnable, 2610 v->HostVMEnable, 2611 v->NumberOfActivePlanes, 2612 v->DPPPerPlane, 2613 v->dpte_group_bytes, 2614 HostVMInefficiencyFactor, 2615 v->HostVMMinPageSize, 2616 v->HostVMMaxNonCachedPageTableLevels); 2617 2618 v->TCalc = 24.0 / v->DCFCLKDeepSleep; 2619 2620 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2621 if (v->BlendingAndTiming[k] == k) { 2622 if (v->WritebackEnable[k] == true) { 2623 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency 2624 + CalculateWriteBackDelay( 2625 v->WritebackPixelFormat[k], 2626 v->WritebackHRatio[k], 2627 v->WritebackVRatio[k], 2628 v->WritebackVTaps[k], 2629 v->WritebackDestinationWidth[k], 2630 v->WritebackDestinationHeight[k], 2631 v->WritebackSourceHeight[k], 2632 v->HTotal[k]) / v->DISPCLK; 2633 } else 2634 v->WritebackDelay[v->VoltageLevel][k] = 0; 2635 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 2636 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { 2637 v->WritebackDelay[v->VoltageLevel][k] = dml_max( 2638 v->WritebackDelay[v->VoltageLevel][k], 2639 v->WritebackLatency 2640 + CalculateWriteBackDelay( 2641 v->WritebackPixelFormat[j], 2642 v->WritebackHRatio[j], 2643 v->WritebackVRatio[j], 2644 v->WritebackVTaps[j], 2645 v->WritebackDestinationWidth[j], 2646 v->WritebackDestinationHeight[j], 2647 v->WritebackSourceHeight[j], 2648 v->HTotal[k]) / v->DISPCLK); 2649 } 2650 } 2651 } 2652 } 2653 2654 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2655 for (j = 0; j < v->NumberOfActivePlanes; ++j) 2656 if (v->BlendingAndTiming[k] == j) 2657 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; 2658 2659 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2660 v->MaxVStartupLines[k] = 2661 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 2662 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 2663 v->VTotal[k] - v->VActive[k] 2664 - dml_max( 2665 1.0, 2666 dml_ceil( 2667 (double) v->WritebackDelay[v->VoltageLevel][k] 2668 / (v->HTotal[k] / v->PixelClock[k]), 2669 1)); 2670 if (v->MaxVStartupLines[k] > 1023) 2671 v->MaxVStartupLines[k] = 1023; 2672 2673 #ifdef __DML_VBA_DEBUG__ 2674 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 2675 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); 2676 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); 2677 #endif 2678 } 2679 2680 v->MaximumMaxVStartupLines = 0; 2681 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2682 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); 2683 2684 // VBA_DELTA 2685 // We don't really care to iterate between the various prefetch modes 2686 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); 2687 2688 v->UrgentLatency = CalculateUrgentLatency( 2689 v->UrgentLatencyPixelDataOnly, 2690 v->UrgentLatencyPixelMixedWithVMData, 2691 v->UrgentLatencyVMDataOnly, 2692 v->DoUrgentLatencyAdjustment, 2693 v->UrgentLatencyAdjustmentFabricClockComponent, 2694 v->UrgentLatencyAdjustmentFabricClockReference, 2695 v->FabricClock); 2696 2697 v->FractionOfUrgentBandwidth = 0.0; 2698 v->FractionOfUrgentBandwidthImmediateFlip = 0.0; 2699 2700 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; 2701 2702 do { 2703 double MaxTotalRDBandwidthNoUrgentBurst = 0.0; 2704 bool DestinationLineTimesForPrefetchLessThan2 = false; 2705 bool VRatioPrefetchMoreThan4 = false; 2706 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); 2707 MaxTotalRDBandwidth = 0; 2708 2709 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); 2710 2711 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2712 Pipe myPipe; 2713 2714 myPipe.DPPCLK = v->DPPCLK[k]; 2715 myPipe.DISPCLK = v->DISPCLK; 2716 myPipe.PixelClock = v->PixelClock[k]; 2717 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; 2718 myPipe.DPPPerPlane = v->DPPPerPlane[k]; 2719 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 2720 myPipe.SourceScan = v->SourceScan[k]; 2721 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; 2722 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; 2723 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; 2724 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; 2725 myPipe.InterlaceEnable = v->Interlace[k]; 2726 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 2727 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 2728 myPipe.HTotal = v->HTotal[k]; 2729 myPipe.DCCEnable = v->DCCEnable[k]; 2730 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 2731 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; 2732 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 2733 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 2734 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 2735 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 2736 v->ErrorResult[k] = CalculatePrefetchSchedule( 2737 mode_lib, 2738 HostVMInefficiencyFactor, 2739 &myPipe, 2740 v->DSCDelay[k], 2741 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 2742 v->DPPCLKDelaySCL, 2743 v->DPPCLKDelaySCLLBOnly, 2744 v->DPPCLKDelayCNVCCursor, 2745 v->DISPCLKDelaySubtotal, 2746 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2747 v->OutputFormat[k], 2748 v->MaxInterDCNTileRepeaters, 2749 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2750 v->MaxVStartupLines[k], 2751 v->GPUVMMaxPageTableLevels, 2752 v->GPUVMEnable, 2753 v->HostVMEnable, 2754 v->HostVMMaxNonCachedPageTableLevels, 2755 v->HostVMMinPageSize, 2756 v->DynamicMetadataEnable[k], 2757 v->DynamicMetadataVMEnabled, 2758 v->DynamicMetadataLinesBeforeActiveRequired[k], 2759 v->DynamicMetadataTransmittedBytes[k], 2760 v->UrgentLatency, 2761 v->UrgentExtraLatency, 2762 v->TCalc, 2763 v->PDEAndMetaPTEBytesFrame[k], 2764 v->MetaRowByte[k], 2765 v->PixelPTEBytesPerRow[k], 2766 v->PrefetchSourceLinesY[k], 2767 v->SwathWidthY[k], 2768 v->VInitPreFillY[k], 2769 v->MaxNumSwathY[k], 2770 v->PrefetchSourceLinesC[k], 2771 v->SwathWidthC[k], 2772 v->VInitPreFillC[k], 2773 v->MaxNumSwathC[k], 2774 v->swath_width_luma_ub[k], 2775 v->swath_width_chroma_ub[k], 2776 v->SwathHeightY[k], 2777 v->SwathHeightC[k], 2778 TWait, 2779 &v->DSTXAfterScaler[k], 2780 &v->DSTYAfterScaler[k], 2781 &v->DestinationLinesForPrefetch[k], 2782 &v->PrefetchBandwidth[k], 2783 &v->DestinationLinesToRequestVMInVBlank[k], 2784 &v->DestinationLinesToRequestRowInVBlank[k], 2785 &v->VRatioPrefetchY[k], 2786 &v->VRatioPrefetchC[k], 2787 &v->RequiredPrefetchPixDataBWLuma[k], 2788 &v->RequiredPrefetchPixDataBWChroma[k], 2789 &v->NotEnoughTimeForDynamicMetadata[k], 2790 &v->Tno_bw[k], 2791 &v->prefetch_vmrow_bw[k], 2792 &v->Tdmdl_vm[k], 2793 &v->Tdmdl[k], 2794 &v->TSetup[k], 2795 &v->VUpdateOffsetPix[k], 2796 &v->VUpdateWidthPix[k], 2797 &v->VReadyOffsetPix[k]); 2798 2799 #ifdef __DML_VBA_DEBUG__ 2800 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); 2801 #endif 2802 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); 2803 } 2804 2805 v->NoEnoughUrgentLatencyHiding = false; 2806 v->NoEnoughUrgentLatencyHidingPre = false; 2807 2808 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2809 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2810 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 2811 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2812 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; 2813 2814 CalculateUrgentBurstFactor( 2815 v->swath_width_luma_ub[k], 2816 v->swath_width_chroma_ub[k], 2817 v->SwathHeightY[k], 2818 v->SwathHeightC[k], 2819 v->HTotal[k] / v->PixelClock[k], 2820 v->UrgentLatency, 2821 v->CursorBufferSize, 2822 v->CursorWidth[k][0], 2823 v->CursorBPP[k][0], 2824 v->VRatio[k], 2825 v->VRatioChroma[k], 2826 v->BytePerPixelDETY[k], 2827 v->BytePerPixelDETC[k], 2828 v->DETBufferSizeY[k], 2829 v->DETBufferSizeC[k], 2830 &v->UrgBurstFactorCursor[k], 2831 &v->UrgBurstFactorLuma[k], 2832 &v->UrgBurstFactorChroma[k], 2833 &v->NoUrgentLatencyHiding[k]); 2834 2835 CalculateUrgentBurstFactor( 2836 v->swath_width_luma_ub[k], 2837 v->swath_width_chroma_ub[k], 2838 v->SwathHeightY[k], 2839 v->SwathHeightC[k], 2840 v->HTotal[k] / v->PixelClock[k], 2841 v->UrgentLatency, 2842 v->CursorBufferSize, 2843 v->CursorWidth[k][0], 2844 v->CursorBPP[k][0], 2845 v->VRatioPrefetchY[k], 2846 v->VRatioPrefetchC[k], 2847 v->BytePerPixelDETY[k], 2848 v->BytePerPixelDETC[k], 2849 v->DETBufferSizeY[k], 2850 v->DETBufferSizeC[k], 2851 &v->UrgBurstFactorCursorPre[k], 2852 &v->UrgBurstFactorLumaPre[k], 2853 &v->UrgBurstFactorChromaPre[k], 2854 &v->NoUrgentLatencyHidingPre[k]); 2855 2856 MaxTotalRDBandwidth = MaxTotalRDBandwidth 2857 + dml_max3( 2858 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2859 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2860 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2861 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] 2862 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2863 v->DPPPerPlane[k] 2864 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2865 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2866 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2867 2868 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst 2869 + dml_max3( 2870 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2871 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] 2872 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2873 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) 2874 + v->cursor_bw_pre[k]); 2875 2876 #ifdef __DML_VBA_DEBUG__ 2877 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); 2878 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); 2879 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); 2880 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); 2881 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); 2882 2883 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); 2884 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); 2885 2886 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); 2887 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); 2888 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); 2889 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); 2890 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); 2891 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); 2892 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); 2893 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); 2894 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); 2895 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); 2896 #endif 2897 2898 if (v->DestinationLinesForPrefetch[k] < 2) 2899 DestinationLineTimesForPrefetchLessThan2 = true; 2900 2901 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) 2902 VRatioPrefetchMoreThan4 = true; 2903 2904 if (v->NoUrgentLatencyHiding[k] == true) 2905 v->NoEnoughUrgentLatencyHiding = true; 2906 2907 if (v->NoUrgentLatencyHidingPre[k] == true) 2908 v->NoEnoughUrgentLatencyHidingPre = true; 2909 } 2910 2911 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; 2912 2913 #ifdef __DML_VBA_DEBUG__ 2914 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst); 2915 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW); 2916 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth); 2917 #endif 2918 2919 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 2920 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) 2921 v->PrefetchModeSupported = true; 2922 else { 2923 v->PrefetchModeSupported = false; 2924 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); 2925 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); 2926 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); 2927 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); 2928 } 2929 2930 // PREVIOUS_ERROR 2931 // This error result check was done after the PrefetchModeSupported. So we will 2932 // still try to calculate flip schedule even prefetch mode not supported 2933 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2934 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { 2935 v->PrefetchModeSupported = false; 2936 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); 2937 } 2938 } 2939 2940 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { 2941 v->BandwidthAvailableForImmediateFlip = v->ReturnBW; 2942 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2943 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 2944 - dml_max( 2945 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2946 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2947 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2948 v->DPPPerPlane[k] 2949 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2950 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2951 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2952 } 2953 2954 v->TotImmediateFlipBytes = 0; 2955 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2956 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 2957 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); 2958 } 2959 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2960 CalculateFlipSchedule( 2961 mode_lib, 2962 HostVMInefficiencyFactor, 2963 v->UrgentExtraLatency, 2964 v->UrgentLatency, 2965 v->GPUVMMaxPageTableLevels, 2966 v->HostVMEnable, 2967 v->HostVMMaxNonCachedPageTableLevels, 2968 v->GPUVMEnable, 2969 v->HostVMMinPageSize, 2970 v->PDEAndMetaPTEBytesFrame[k], 2971 v->MetaRowByte[k], 2972 v->PixelPTEBytesPerRow[k], 2973 v->BandwidthAvailableForImmediateFlip, 2974 v->TotImmediateFlipBytes, 2975 v->SourcePixelFormat[k], 2976 v->HTotal[k] / v->PixelClock[k], 2977 v->VRatio[k], 2978 v->VRatioChroma[k], 2979 v->Tno_bw[k], 2980 v->DCCEnable[k], 2981 v->dpte_row_height[k], 2982 v->meta_row_height[k], 2983 v->dpte_row_height_chroma[k], 2984 v->meta_row_height_chroma[k], 2985 &v->DestinationLinesToRequestVMInImmediateFlip[k], 2986 &v->DestinationLinesToRequestRowInImmediateFlip[k], 2987 &v->final_flip_bw[k], 2988 &v->ImmediateFlipSupportedForPipe[k]); 2989 } 2990 2991 v->total_dcn_read_bw_with_flip = 0.0; 2992 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2993 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2994 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 2995 + dml_max3( 2996 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2997 v->DPPPerPlane[k] * v->final_flip_bw[k] 2998 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] 2999 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] 3000 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 3001 v->DPPPerPlane[k] 3002 * (v->final_flip_bw[k] 3003 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 3004 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 3005 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 3006 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst 3007 + dml_max3( 3008 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 3009 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] 3010 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], 3011 v->DPPPerPlane[k] 3012 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] 3013 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); 3014 } 3015 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; 3016 3017 v->ImmediateFlipSupported = true; 3018 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { 3019 #ifdef __DML_VBA_DEBUG__ 3020 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); 3021 #endif 3022 v->ImmediateFlipSupported = false; 3023 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; 3024 } 3025 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3026 if (v->ImmediateFlipSupportedForPipe[k] == false) { 3027 #ifdef __DML_VBA_DEBUG__ 3028 dml_print("DML::%s: Pipe %0d not supporting iflip\n", 3029 __func__, k); 3030 #endif 3031 v->ImmediateFlipSupported = false; 3032 } 3033 } 3034 } else { 3035 v->ImmediateFlipSupported = false; 3036 } 3037 3038 v->PrefetchAndImmediateFlipSupported = 3039 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable 3040 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || 3041 v->ImmediateFlipSupported)) ? true : false; 3042 #ifdef __DML_VBA_DEBUG__ 3043 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); 3044 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required); 3045 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); 3046 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); 3047 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); 3048 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); 3049 #endif 3050 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); 3051 3052 v->VStartupLines = v->VStartupLines + 1; 3053 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); 3054 ASSERT(v->PrefetchAndImmediateFlipSupported); 3055 3056 // Unbounded Request Enabled 3057 CalculateUnboundedRequestAndCompressedBufferSize( 3058 v->DETBufferSizeInKByte[0], 3059 v->ConfigReturnBufferSizeInKByte, 3060 v->UseUnboundedRequesting, 3061 v->TotalActiveDPP, 3062 NoChromaPlanes, 3063 v->MaxNumDPP, 3064 v->CompressedBufferSegmentSizeInkByte, 3065 v->Output, 3066 &v->UnboundedRequestEnabled, 3067 &v->CompressedBufferSizeInkByte); 3068 3069 //Watermarks and NB P-State/DRAM Clock Change Support 3070 { 3071 enum clock_change_support DRAMClockChangeSupport; // dummy 3072 CalculateWatermarksAndDRAMSpeedChangeSupport( 3073 mode_lib, 3074 PrefetchMode, 3075 v->NumberOfActivePlanes, 3076 v->MaxLineBufferLines, 3077 v->LineBufferSize, 3078 v->WritebackInterfaceBufferSize, 3079 v->DCFCLK, 3080 v->ReturnBW, 3081 v->SynchronizedVBlank, 3082 v->dpte_group_bytes, 3083 v->MetaChunkSize, 3084 v->UrgentLatency, 3085 v->UrgentExtraLatency, 3086 v->WritebackLatency, 3087 v->WritebackChunkSize, 3088 v->SOCCLK, 3089 v->DRAMClockChangeLatency, 3090 v->SRExitTime, 3091 v->SREnterPlusExitTime, 3092 v->SRExitZ8Time, 3093 v->SREnterPlusExitZ8Time, 3094 v->DCFCLKDeepSleep, 3095 v->DETBufferSizeY, 3096 v->DETBufferSizeC, 3097 v->SwathHeightY, 3098 v->SwathHeightC, 3099 v->LBBitPerPixel, 3100 v->SwathWidthY, 3101 v->SwathWidthC, 3102 v->HRatio, 3103 v->HRatioChroma, 3104 v->vtaps, 3105 v->VTAPsChroma, 3106 v->VRatio, 3107 v->VRatioChroma, 3108 v->HTotal, 3109 v->PixelClock, 3110 v->BlendingAndTiming, 3111 v->DPPPerPlane, 3112 v->BytePerPixelDETY, 3113 v->BytePerPixelDETC, 3114 v->DSTXAfterScaler, 3115 v->DSTYAfterScaler, 3116 v->WritebackEnable, 3117 v->WritebackPixelFormat, 3118 v->WritebackDestinationWidth, 3119 v->WritebackDestinationHeight, 3120 v->WritebackSourceHeight, 3121 v->UnboundedRequestEnabled, 3122 v->CompressedBufferSizeInkByte, 3123 &DRAMClockChangeSupport, 3124 &v->UrgentWatermark, 3125 &v->WritebackUrgentWatermark, 3126 &v->DRAMClockChangeWatermark, 3127 &v->WritebackDRAMClockChangeWatermark, 3128 &v->StutterExitWatermark, 3129 &v->StutterEnterPlusExitWatermark, 3130 &v->Z8StutterExitWatermark, 3131 &v->Z8StutterEnterPlusExitWatermark, 3132 &v->MinActiveDRAMClockChangeLatencySupported); 3133 3134 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3135 if (v->WritebackEnable[k] == true) { 3136 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 3137 0, 3138 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); 3139 } else { 3140 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; 3141 } 3142 } 3143 } 3144 3145 //Display Pipeline Delivery Time in Prefetch, Groups 3146 CalculatePixelDeliveryTimes( 3147 v->NumberOfActivePlanes, 3148 v->VRatio, 3149 v->VRatioChroma, 3150 v->VRatioPrefetchY, 3151 v->VRatioPrefetchC, 3152 v->swath_width_luma_ub, 3153 v->swath_width_chroma_ub, 3154 v->DPPPerPlane, 3155 v->HRatio, 3156 v->HRatioChroma, 3157 v->PixelClock, 3158 v->PSCL_THROUGHPUT_LUMA, 3159 v->PSCL_THROUGHPUT_CHROMA, 3160 v->DPPCLK, 3161 v->BytePerPixelC, 3162 v->SourceScan, 3163 v->NumberOfCursors, 3164 v->CursorWidth, 3165 v->CursorBPP, 3166 v->BlockWidth256BytesY, 3167 v->BlockHeight256BytesY, 3168 v->BlockWidth256BytesC, 3169 v->BlockHeight256BytesC, 3170 v->DisplayPipeLineDeliveryTimeLuma, 3171 v->DisplayPipeLineDeliveryTimeChroma, 3172 v->DisplayPipeLineDeliveryTimeLumaPrefetch, 3173 v->DisplayPipeLineDeliveryTimeChromaPrefetch, 3174 v->DisplayPipeRequestDeliveryTimeLuma, 3175 v->DisplayPipeRequestDeliveryTimeChroma, 3176 v->DisplayPipeRequestDeliveryTimeLumaPrefetch, 3177 v->DisplayPipeRequestDeliveryTimeChromaPrefetch, 3178 v->CursorRequestDeliveryTime, 3179 v->CursorRequestDeliveryTimePrefetch); 3180 3181 CalculateMetaAndPTETimes( 3182 v->NumberOfActivePlanes, 3183 v->GPUVMEnable, 3184 v->MetaChunkSize, 3185 v->MinMetaChunkSizeBytes, 3186 v->HTotal, 3187 v->VRatio, 3188 v->VRatioChroma, 3189 v->DestinationLinesToRequestRowInVBlank, 3190 v->DestinationLinesToRequestRowInImmediateFlip, 3191 v->DCCEnable, 3192 v->PixelClock, 3193 v->BytePerPixelY, 3194 v->BytePerPixelC, 3195 v->SourceScan, 3196 v->dpte_row_height, 3197 v->dpte_row_height_chroma, 3198 v->meta_row_width, 3199 v->meta_row_width_chroma, 3200 v->meta_row_height, 3201 v->meta_row_height_chroma, 3202 v->meta_req_width, 3203 v->meta_req_width_chroma, 3204 v->meta_req_height, 3205 v->meta_req_height_chroma, 3206 v->dpte_group_bytes, 3207 v->PTERequestSizeY, 3208 v->PTERequestSizeC, 3209 v->PixelPTEReqWidthY, 3210 v->PixelPTEReqHeightY, 3211 v->PixelPTEReqWidthC, 3212 v->PixelPTEReqHeightC, 3213 v->dpte_row_width_luma_ub, 3214 v->dpte_row_width_chroma_ub, 3215 v->DST_Y_PER_PTE_ROW_NOM_L, 3216 v->DST_Y_PER_PTE_ROW_NOM_C, 3217 v->DST_Y_PER_META_ROW_NOM_L, 3218 v->DST_Y_PER_META_ROW_NOM_C, 3219 v->TimePerMetaChunkNominal, 3220 v->TimePerChromaMetaChunkNominal, 3221 v->TimePerMetaChunkVBlank, 3222 v->TimePerChromaMetaChunkVBlank, 3223 v->TimePerMetaChunkFlip, 3224 v->TimePerChromaMetaChunkFlip, 3225 v->time_per_pte_group_nom_luma, 3226 v->time_per_pte_group_vblank_luma, 3227 v->time_per_pte_group_flip_luma, 3228 v->time_per_pte_group_nom_chroma, 3229 v->time_per_pte_group_vblank_chroma, 3230 v->time_per_pte_group_flip_chroma); 3231 3232 CalculateVMGroupAndRequestTimes( 3233 v->NumberOfActivePlanes, 3234 v->GPUVMEnable, 3235 v->GPUVMMaxPageTableLevels, 3236 v->HTotal, 3237 v->BytePerPixelC, 3238 v->DestinationLinesToRequestVMInVBlank, 3239 v->DestinationLinesToRequestVMInImmediateFlip, 3240 v->DCCEnable, 3241 v->PixelClock, 3242 v->dpte_row_width_luma_ub, 3243 v->dpte_row_width_chroma_ub, 3244 v->vm_group_bytes, 3245 v->dpde0_bytes_per_frame_ub_l, 3246 v->dpde0_bytes_per_frame_ub_c, 3247 v->meta_pte_bytes_per_frame_ub_l, 3248 v->meta_pte_bytes_per_frame_ub_c, 3249 v->TimePerVMGroupVBlank, 3250 v->TimePerVMGroupFlip, 3251 v->TimePerVMRequestVBlank, 3252 v->TimePerVMRequestFlip); 3253 3254 // Min TTUVBlank 3255 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3256 if (PrefetchMode == 0) { 3257 v->AllowDRAMClockChangeDuringVBlank[k] = true; 3258 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3259 v->MinTTUVBlank[k] = dml_max( 3260 v->DRAMClockChangeWatermark, 3261 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); 3262 } else if (PrefetchMode == 1) { 3263 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3264 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3265 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); 3266 } else { 3267 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3268 v->AllowDRAMSelfRefreshDuringVBlank[k] = false; 3269 v->MinTTUVBlank[k] = v->UrgentWatermark; 3270 } 3271 if (!v->DynamicMetadataEnable[k]) 3272 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; 3273 } 3274 3275 // DCC Configuration 3276 v->ActiveDPPs = 0; 3277 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3278 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 3279 v->SourcePixelFormat[k], 3280 v->SurfaceWidthY[k], 3281 v->SurfaceWidthC[k], 3282 v->SurfaceHeightY[k], 3283 v->SurfaceHeightC[k], 3284 v->DETBufferSizeInKByte[0] * 1024, 3285 v->BlockHeight256BytesY[k], 3286 v->BlockHeight256BytesC[k], 3287 v->SurfaceTiling[k], 3288 v->BytePerPixelY[k], 3289 v->BytePerPixelC[k], 3290 v->BytePerPixelDETY[k], 3291 v->BytePerPixelDETC[k], 3292 v->SourceScan[k], 3293 &v->DCCYMaxUncompressedBlock[k], 3294 &v->DCCCMaxUncompressedBlock[k], 3295 &v->DCCYMaxCompressedBlock[k], 3296 &v->DCCCMaxCompressedBlock[k], 3297 &v->DCCYIndependentBlock[k], 3298 &v->DCCCIndependentBlock[k]); 3299 } 3300 3301 // VStartup Adjustment 3302 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3303 bool isInterlaceTiming; 3304 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; 3305 #ifdef __DML_VBA_DEBUG__ 3306 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); 3307 #endif 3308 3309 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; 3310 3311 #ifdef __DML_VBA_DEBUG__ 3312 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); 3313 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 3314 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3315 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); 3316 #endif 3317 3318 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; 3319 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { 3320 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; 3321 } 3322 3323 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); 3324 3325 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k]) 3326 - v->VFrontPorch[k]) 3327 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0)) 3328 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; 3329 3330 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); 3331 3332 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) 3333 <= (isInterlaceTiming ? 3334 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : 3335 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { 3336 v->VREADY_AT_OR_AFTER_VSYNC[k] = true; 3337 } else { 3338 v->VREADY_AT_OR_AFTER_VSYNC[k] = false; 3339 } 3340 #ifdef __DML_VBA_DEBUG__ 3341 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); 3342 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); 3343 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); 3344 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); 3345 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); 3346 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); 3347 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); 3348 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); 3349 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3350 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); 3351 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); 3352 #endif 3353 } 3354 3355 { 3356 //Maximum Bandwidth Used 3357 double TotalWRBandwidth = 0; 3358 double MaxPerPlaneVActiveWRBandwidth = 0; 3359 double WRBandwidth = 0; 3360 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3361 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { 3362 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3363 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; 3364 } else if (v->WritebackEnable[k] == true) { 3365 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3366 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; 3367 } 3368 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; 3369 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); 3370 } 3371 3372 v->TotalDataReadBandwidth = 0; 3373 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3374 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; 3375 } 3376 } 3377 // Stutter Efficiency 3378 CalculateStutterEfficiency( 3379 mode_lib, 3380 v->CompressedBufferSizeInkByte, 3381 v->UnboundedRequestEnabled, 3382 v->ConfigReturnBufferSizeInKByte, 3383 v->MetaFIFOSizeInKEntries, 3384 v->ZeroSizeBufferEntries, 3385 v->NumberOfActivePlanes, 3386 v->ROBBufferSizeInKByte, 3387 v->TotalDataReadBandwidth, 3388 v->DCFCLK, 3389 v->ReturnBW, 3390 v->COMPBUF_RESERVED_SPACE_64B, 3391 v->COMPBUF_RESERVED_SPACE_ZS, 3392 v->SRExitTime, 3393 v->SRExitZ8Time, 3394 v->SynchronizedVBlank, 3395 v->StutterEnterPlusExitWatermark, 3396 v->Z8StutterEnterPlusExitWatermark, 3397 v->ProgressiveToInterlaceUnitInOPP, 3398 v->Interlace, 3399 v->MinTTUVBlank, 3400 v->DPPPerPlane, 3401 v->DETBufferSizeY, 3402 v->BytePerPixelY, 3403 v->BytePerPixelDETY, 3404 v->SwathWidthY, 3405 v->SwathHeightY, 3406 v->SwathHeightC, 3407 v->DCCRateLuma, 3408 v->DCCRateChroma, 3409 v->DCCFractionOfZeroSizeRequestsLuma, 3410 v->DCCFractionOfZeroSizeRequestsChroma, 3411 v->HTotal, 3412 v->VTotal, 3413 v->PixelClock, 3414 v->VRatio, 3415 v->SourceScan, 3416 v->BlockHeight256BytesY, 3417 v->BlockWidth256BytesY, 3418 v->BlockHeight256BytesC, 3419 v->BlockWidth256BytesC, 3420 v->DCCYMaxUncompressedBlock, 3421 v->DCCCMaxUncompressedBlock, 3422 v->VActive, 3423 v->DCCEnable, 3424 v->WritebackEnable, 3425 v->ReadBandwidthPlaneLuma, 3426 v->ReadBandwidthPlaneChroma, 3427 v->meta_row_bw, 3428 v->dpte_row_bw, 3429 &v->StutterEfficiencyNotIncludingVBlank, 3430 &v->StutterEfficiency, 3431 &v->NumberOfStutterBurstsPerFrame, 3432 &v->Z8StutterEfficiencyNotIncludingVBlank, 3433 &v->Z8StutterEfficiency, 3434 &v->Z8NumberOfStutterBurstsPerFrame, 3435 &v->StutterPeriod); 3436 } 3437 3438 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 3439 { 3440 struct vba_vars_st *v = &mode_lib->vba; 3441 // Display Pipe Configuration 3442 double BytePerPixDETY[DC__NUM_DPP__MAX]; 3443 double BytePerPixDETC[DC__NUM_DPP__MAX]; 3444 int BytePerPixY[DC__NUM_DPP__MAX]; 3445 int BytePerPixC[DC__NUM_DPP__MAX]; 3446 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; 3447 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; 3448 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; 3449 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; 3450 double dummy1[DC__NUM_DPP__MAX]; 3451 double dummy2[DC__NUM_DPP__MAX]; 3452 double dummy3[DC__NUM_DPP__MAX]; 3453 double dummy4[DC__NUM_DPP__MAX]; 3454 int dummy5[DC__NUM_DPP__MAX]; 3455 int dummy6[DC__NUM_DPP__MAX]; 3456 bool dummy7[DC__NUM_DPP__MAX]; 3457 bool dummysinglestring; 3458 3459 unsigned int k; 3460 3461 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3462 3463 CalculateBytePerPixelAnd256BBlockSizes( 3464 v->SourcePixelFormat[k], 3465 v->SurfaceTiling[k], 3466 &BytePerPixY[k], 3467 &BytePerPixC[k], 3468 &BytePerPixDETY[k], 3469 &BytePerPixDETC[k], 3470 &Read256BytesBlockHeightY[k], 3471 &Read256BytesBlockHeightC[k], 3472 &Read256BytesBlockWidthY[k], 3473 &Read256BytesBlockWidthC[k]); 3474 } 3475 3476 CalculateSwathAndDETConfiguration( 3477 false, 3478 v->NumberOfActivePlanes, 3479 v->DETBufferSizeInKByte[0], 3480 dummy1, 3481 dummy2, 3482 v->SourceScan, 3483 v->SourcePixelFormat, 3484 v->SurfaceTiling, 3485 v->ViewportWidth, 3486 v->ViewportHeight, 3487 v->SurfaceWidthY, 3488 v->SurfaceWidthC, 3489 v->SurfaceHeightY, 3490 v->SurfaceHeightC, 3491 Read256BytesBlockHeightY, 3492 Read256BytesBlockHeightC, 3493 Read256BytesBlockWidthY, 3494 Read256BytesBlockWidthC, 3495 v->ODMCombineEnabled, 3496 v->BlendingAndTiming, 3497 BytePerPixY, 3498 BytePerPixC, 3499 BytePerPixDETY, 3500 BytePerPixDETC, 3501 v->HActive, 3502 v->HRatio, 3503 v->HRatioChroma, 3504 v->DPPPerPlane, 3505 dummy5, 3506 dummy6, 3507 dummy3, 3508 dummy4, 3509 v->SwathHeightY, 3510 v->SwathHeightC, 3511 v->DETBufferSizeY, 3512 v->DETBufferSizeC, 3513 dummy7, 3514 &dummysinglestring); 3515 } 3516 3517 static bool CalculateBytePerPixelAnd256BBlockSizes( 3518 enum source_format_class SourcePixelFormat, 3519 enum dm_swizzle_mode SurfaceTiling, 3520 unsigned int *BytePerPixelY, 3521 unsigned int *BytePerPixelC, 3522 double *BytePerPixelDETY, 3523 double *BytePerPixelDETC, 3524 unsigned int *BlockHeight256BytesY, 3525 unsigned int *BlockHeight256BytesC, 3526 unsigned int *BlockWidth256BytesY, 3527 unsigned int *BlockWidth256BytesC) 3528 { 3529 if (SourcePixelFormat == dm_444_64) { 3530 *BytePerPixelDETY = 8; 3531 *BytePerPixelDETC = 0; 3532 *BytePerPixelY = 8; 3533 *BytePerPixelC = 0; 3534 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 3535 *BytePerPixelDETY = 4; 3536 *BytePerPixelDETC = 0; 3537 *BytePerPixelY = 4; 3538 *BytePerPixelC = 0; 3539 } else if (SourcePixelFormat == dm_444_16) { 3540 *BytePerPixelDETY = 2; 3541 *BytePerPixelDETC = 0; 3542 *BytePerPixelY = 2; 3543 *BytePerPixelC = 0; 3544 } else if (SourcePixelFormat == dm_444_8) { 3545 *BytePerPixelDETY = 1; 3546 *BytePerPixelDETC = 0; 3547 *BytePerPixelY = 1; 3548 *BytePerPixelC = 0; 3549 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3550 *BytePerPixelDETY = 4; 3551 *BytePerPixelDETC = 1; 3552 *BytePerPixelY = 4; 3553 *BytePerPixelC = 1; 3554 } else if (SourcePixelFormat == dm_420_8) { 3555 *BytePerPixelDETY = 1; 3556 *BytePerPixelDETC = 2; 3557 *BytePerPixelY = 1; 3558 *BytePerPixelC = 2; 3559 } else if (SourcePixelFormat == dm_420_12) { 3560 *BytePerPixelDETY = 2; 3561 *BytePerPixelDETC = 4; 3562 *BytePerPixelY = 2; 3563 *BytePerPixelC = 4; 3564 } else { 3565 *BytePerPixelDETY = 4.0 / 3; 3566 *BytePerPixelDETC = 8.0 / 3; 3567 *BytePerPixelY = 2; 3568 *BytePerPixelC = 4; 3569 } 3570 3571 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16 3572 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) { 3573 if (SurfaceTiling == dm_sw_linear) { 3574 *BlockHeight256BytesY = 1; 3575 } else if (SourcePixelFormat == dm_444_64) { 3576 *BlockHeight256BytesY = 4; 3577 } else if (SourcePixelFormat == dm_444_8) { 3578 *BlockHeight256BytesY = 16; 3579 } else { 3580 *BlockHeight256BytesY = 8; 3581 } 3582 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3583 *BlockHeight256BytesC = 0; 3584 *BlockWidth256BytesC = 0; 3585 } else { 3586 if (SurfaceTiling == dm_sw_linear) { 3587 *BlockHeight256BytesY = 1; 3588 *BlockHeight256BytesC = 1; 3589 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3590 *BlockHeight256BytesY = 8; 3591 *BlockHeight256BytesC = 16; 3592 } else if (SourcePixelFormat == dm_420_8) { 3593 *BlockHeight256BytesY = 16; 3594 *BlockHeight256BytesC = 8; 3595 } else { 3596 *BlockHeight256BytesY = 8; 3597 *BlockHeight256BytesC = 8; 3598 } 3599 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3600 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 3601 } 3602 return true; 3603 } 3604 3605 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) 3606 { 3607 if (PrefetchMode == 0) { 3608 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); 3609 } else if (PrefetchMode == 1) { 3610 return dml_max(SREnterPlusExitTime, UrgentLatency); 3611 } else { 3612 return UrgentLatency; 3613 } 3614 } 3615 3616 double dml31_CalculateWriteBackDISPCLK( 3617 enum source_format_class WritebackPixelFormat, 3618 double PixelClock, 3619 double WritebackHRatio, 3620 double WritebackVRatio, 3621 unsigned int WritebackHTaps, 3622 unsigned int WritebackVTaps, 3623 long WritebackSourceWidth, 3624 long WritebackDestinationWidth, 3625 unsigned int HTotal, 3626 unsigned int WritebackLineBufferSize) 3627 { 3628 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 3629 3630 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 3631 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 3632 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 3633 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 3634 } 3635 3636 static double CalculateWriteBackDelay( 3637 enum source_format_class WritebackPixelFormat, 3638 double WritebackHRatio, 3639 double WritebackVRatio, 3640 unsigned int WritebackVTaps, 3641 int WritebackDestinationWidth, 3642 int WritebackDestinationHeight, 3643 int WritebackSourceHeight, 3644 unsigned int HTotal) 3645 { 3646 double CalculateWriteBackDelay; 3647 double Line_length; 3648 double Output_lines_last_notclamped; 3649 double WritebackVInit; 3650 3651 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3652 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); 3653 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); 3654 if (Output_lines_last_notclamped < 0) { 3655 CalculateWriteBackDelay = 0; 3656 } else { 3657 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3658 } 3659 return CalculateWriteBackDelay; 3660 } 3661 3662 static void CalculateVupdateAndDynamicMetadataParameters( 3663 int MaxInterDCNTileRepeaters, 3664 double DPPCLK, 3665 double DISPCLK, 3666 double DCFClkDeepSleep, 3667 double PixelClock, 3668 int HTotal, 3669 int VBlank, 3670 int DynamicMetadataTransmittedBytes, 3671 int DynamicMetadataLinesBeforeActiveRequired, 3672 int InterlaceEnable, 3673 bool ProgressiveToInterlaceUnitInOPP, 3674 double *TSetup, 3675 double *Tdmbf, 3676 double *Tdmec, 3677 double *Tdmsks, 3678 int *VUpdateOffsetPix, 3679 double *VUpdateWidthPix, 3680 double *VReadyOffsetPix) 3681 { 3682 double TotalRepeaterDelayTime; 3683 3684 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); 3685 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); 3686 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); 3687 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); 3688 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3689 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; 3690 *Tdmec = HTotal / PixelClock; 3691 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 3692 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3693 } else { 3694 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3695 } 3696 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 3697 *Tdmsks = *Tdmsks / 2; 3698 } 3699 #ifdef __DML_VBA_DEBUG__ 3700 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3701 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3702 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3703 #endif 3704 } 3705 3706 static void CalculateRowBandwidth( 3707 bool GPUVMEnable, 3708 enum source_format_class SourcePixelFormat, 3709 double VRatio, 3710 double VRatioChroma, 3711 bool DCCEnable, 3712 double LineTime, 3713 unsigned int MetaRowByteLuma, 3714 unsigned int MetaRowByteChroma, 3715 unsigned int meta_row_height_luma, 3716 unsigned int meta_row_height_chroma, 3717 unsigned int PixelPTEBytesPerRowLuma, 3718 unsigned int PixelPTEBytesPerRowChroma, 3719 unsigned int dpte_row_height_luma, 3720 unsigned int dpte_row_height_chroma, 3721 double *meta_row_bw, 3722 double *dpte_row_bw) 3723 { 3724 if (DCCEnable != true) { 3725 *meta_row_bw = 0; 3726 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3727 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); 3728 } else { 3729 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3730 } 3731 3732 if (GPUVMEnable != true) { 3733 *dpte_row_bw = 0; 3734 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3735 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3736 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 3737 } else { 3738 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3739 } 3740 } 3741 3742 static void CalculateFlipSchedule( 3743 struct display_mode_lib *mode_lib, 3744 double HostVMInefficiencyFactor, 3745 double UrgentExtraLatency, 3746 double UrgentLatency, 3747 unsigned int GPUVMMaxPageTableLevels, 3748 bool HostVMEnable, 3749 unsigned int HostVMMaxNonCachedPageTableLevels, 3750 bool GPUVMEnable, 3751 double HostVMMinPageSize, 3752 double PDEAndMetaPTEBytesPerFrame, 3753 double MetaRowBytes, 3754 double DPTEBytesPerRow, 3755 double BandwidthAvailableForImmediateFlip, 3756 unsigned int TotImmediateFlipBytes, 3757 enum source_format_class SourcePixelFormat, 3758 double LineTime, 3759 double VRatio, 3760 double VRatioChroma, 3761 double Tno_bw, 3762 bool DCCEnable, 3763 unsigned int dpte_row_height, 3764 unsigned int meta_row_height, 3765 unsigned int dpte_row_height_chroma, 3766 unsigned int meta_row_height_chroma, 3767 double *DestinationLinesToRequestVMInImmediateFlip, 3768 double *DestinationLinesToRequestRowInImmediateFlip, 3769 double *final_flip_bw, 3770 bool *ImmediateFlipSupportedForPipe) 3771 { 3772 double min_row_time = 0.0; 3773 unsigned int HostVMDynamicLevelsTrips; 3774 double TimeForFetchingMetaPTEImmediateFlip; 3775 double TimeForFetchingRowInVBlankImmediateFlip; 3776 double ImmediateFlipBW; 3777 3778 if (GPUVMEnable == true && HostVMEnable == true) { 3779 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 3780 } else { 3781 HostVMDynamicLevelsTrips = 0; 3782 } 3783 3784 if (GPUVMEnable == true || DCCEnable == true) { 3785 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 3786 } 3787 3788 if (GPUVMEnable == true) { 3789 TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3790 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3791 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 3792 LineTime / 4.0); 3793 } else { 3794 TimeForFetchingMetaPTEImmediateFlip = 0; 3795 } 3796 3797 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3798 if ((GPUVMEnable == true || DCCEnable == true)) { 3799 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 3800 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 3801 UrgentLatency * (HostVMDynamicLevelsTrips + 1), 3802 LineTime / 4); 3803 } else { 3804 TimeForFetchingRowInVBlankImmediateFlip = 0; 3805 } 3806 3807 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3808 3809 if (GPUVMEnable == true) { 3810 *final_flip_bw = dml_max( 3811 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 3812 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 3813 } else if ((GPUVMEnable == true || DCCEnable == true)) { 3814 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 3815 } else { 3816 *final_flip_bw = 0; 3817 } 3818 3819 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 3820 if (GPUVMEnable == true && DCCEnable != true) { 3821 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 3822 } else if (GPUVMEnable != true && DCCEnable == true) { 3823 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 3824 } else { 3825 min_row_time = dml_min4( 3826 dpte_row_height * LineTime / VRatio, 3827 meta_row_height * LineTime / VRatio, 3828 dpte_row_height_chroma * LineTime / VRatioChroma, 3829 meta_row_height_chroma * LineTime / VRatioChroma); 3830 } 3831 } else { 3832 if (GPUVMEnable == true && DCCEnable != true) { 3833 min_row_time = dpte_row_height * LineTime / VRatio; 3834 } else if (GPUVMEnable != true && DCCEnable == true) { 3835 min_row_time = meta_row_height * LineTime / VRatio; 3836 } else { 3837 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 3838 } 3839 } 3840 3841 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 3842 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3843 *ImmediateFlipSupportedForPipe = false; 3844 } else { 3845 *ImmediateFlipSupportedForPipe = true; 3846 } 3847 3848 #ifdef __DML_VBA_DEBUG__ 3849 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip); 3850 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip); 3851 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 3852 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); 3853 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 3854 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 3855 #endif 3856 3857 } 3858 3859 static double TruncToValidBPP( 3860 double LinkBitRate, 3861 int Lanes, 3862 int HTotal, 3863 int HActive, 3864 double PixelClock, 3865 double DesiredBPP, 3866 bool DSCEnable, 3867 enum output_encoder_class Output, 3868 enum output_format_class Format, 3869 unsigned int DSCInputBitPerComponent, 3870 int DSCSlices, 3871 int AudioRate, 3872 int AudioLayout, 3873 enum odm_combine_mode ODMCombine) 3874 { 3875 double MaxLinkBPP; 3876 int MinDSCBPP; 3877 double MaxDSCBPP; 3878 int NonDSCBPP0; 3879 int NonDSCBPP1; 3880 int NonDSCBPP2; 3881 3882 if (Format == dm_420) { 3883 NonDSCBPP0 = 12; 3884 NonDSCBPP1 = 15; 3885 NonDSCBPP2 = 18; 3886 MinDSCBPP = 6; 3887 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 3888 } else if (Format == dm_444) { 3889 NonDSCBPP0 = 24; 3890 NonDSCBPP1 = 30; 3891 NonDSCBPP2 = 36; 3892 MinDSCBPP = 8; 3893 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 3894 } else { 3895 3896 NonDSCBPP0 = 16; 3897 NonDSCBPP1 = 20; 3898 NonDSCBPP2 = 24; 3899 3900 if (Format == dm_n422) { 3901 MinDSCBPP = 7; 3902 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3903 } else { 3904 MinDSCBPP = 8; 3905 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3906 } 3907 } 3908 3909 if (DSCEnable && Output == dm_dp) { 3910 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 3911 } else { 3912 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 3913 } 3914 3915 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { 3916 MaxLinkBPP = 16; 3917 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { 3918 MaxLinkBPP = 32; 3919 } 3920 3921 if (DesiredBPP == 0) { 3922 if (DSCEnable) { 3923 if (MaxLinkBPP < MinDSCBPP) { 3924 return BPP_INVALID; 3925 } else if (MaxLinkBPP >= MaxDSCBPP) { 3926 return MaxDSCBPP; 3927 } else { 3928 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 3929 } 3930 } else { 3931 if (MaxLinkBPP >= NonDSCBPP2) { 3932 return NonDSCBPP2; 3933 } else if (MaxLinkBPP >= NonDSCBPP1) { 3934 return NonDSCBPP1; 3935 } else if (MaxLinkBPP >= NonDSCBPP0) { 3936 return 16.0; 3937 } else { 3938 return BPP_INVALID; 3939 } 3940 } 3941 } else { 3942 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) 3943 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 3944 return BPP_INVALID; 3945 } else { 3946 return DesiredBPP; 3947 } 3948 } 3949 return BPP_INVALID; 3950 } 3951 3952 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 3953 { 3954 struct vba_vars_st *v = &mode_lib->vba; 3955 3956 int i, j; 3957 unsigned int k, m; 3958 int ReorderingBytes; 3959 int MinPrefetchMode = 0, MaxPrefetchMode = 2; 3960 bool NoChroma = true; 3961 bool EnoughWritebackUnits = true; 3962 bool P2IWith420 = false; 3963 bool DSCOnlyIfNecessaryWithBPP = false; 3964 bool DSC422NativeNotSupported = false; 3965 double MaxTotalVActiveRDBandwidth; 3966 bool ViewportExceedsSurface = false; 3967 bool FMTBufferExceeded = false; 3968 3969 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 3970 3971 CalculateMinAndMaxPrefetchMode( 3972 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 3973 &MinPrefetchMode, &MaxPrefetchMode); 3974 3975 /*Scale Ratio, taps Support Check*/ 3976 3977 v->ScaleRatioAndTapsSupport = true; 3978 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3979 if (v->ScalerEnabled[k] == false 3980 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3981 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3982 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3983 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 3984 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { 3985 v->ScaleRatioAndTapsSupport = false; 3986 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 3987 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio 3988 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] 3989 || v->VRatio[k] > v->vtaps[k] 3990 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3991 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3992 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3993 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 3994 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) 3995 || v->HRatioChroma[k] > v->MaxHSCLRatio 3996 || v->VRatioChroma[k] > v->MaxVSCLRatio 3997 || v->HRatioChroma[k] > v->HTAPsChroma[k] 3998 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { 3999 v->ScaleRatioAndTapsSupport = false; 4000 } 4001 } 4002 /*Source Format, Pixel Format and Scan Support Check*/ 4003 4004 v->SourceFormatPixelAndScanSupport = true; 4005 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4006 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) 4007 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t 4008 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { 4009 v->SourceFormatPixelAndScanSupport = false; 4010 } 4011 } 4012 /*Bandwidth Support Check*/ 4013 4014 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4015 CalculateBytePerPixelAnd256BBlockSizes( 4016 v->SourcePixelFormat[k], 4017 v->SurfaceTiling[k], 4018 &v->BytePerPixelY[k], 4019 &v->BytePerPixelC[k], 4020 &v->BytePerPixelInDETY[k], 4021 &v->BytePerPixelInDETC[k], 4022 &v->Read256BlockHeightY[k], 4023 &v->Read256BlockHeightC[k], 4024 &v->Read256BlockWidthY[k], 4025 &v->Read256BlockWidthC[k]); 4026 } 4027 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4028 if (v->SourceScan[k] != dm_vert) { 4029 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; 4030 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; 4031 } else { 4032 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; 4033 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; 4034 } 4035 } 4036 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4037 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) 4038 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4039 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) 4040 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; 4041 } 4042 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4043 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { 4044 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4045 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; 4046 } else if (v->WritebackEnable[k] == true) { 4047 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4048 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; 4049 } else { 4050 v->WriteBandwidth[k] = 0.0; 4051 } 4052 } 4053 4054 /*Writeback Latency support check*/ 4055 4056 v->WritebackLatencySupport = true; 4057 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4058 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { 4059 v->WritebackLatencySupport = false; 4060 } 4061 } 4062 4063 /*Writeback Mode Support Check*/ 4064 4065 v->TotalNumberOfActiveWriteback = 0; 4066 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4067 if (v->WritebackEnable[k] == true) { 4068 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; 4069 } 4070 } 4071 4072 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { 4073 EnoughWritebackUnits = false; 4074 } 4075 4076 /*Writeback Scale Ratio and Taps Support Check*/ 4077 4078 v->WritebackScaleRatioAndTapsSupport = true; 4079 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4080 if (v->WritebackEnable[k] == true) { 4081 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio 4082 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio 4083 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio 4084 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps 4085 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps 4086 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] 4087 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { 4088 v->WritebackScaleRatioAndTapsSupport = false; 4089 } 4090 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { 4091 v->WritebackScaleRatioAndTapsSupport = false; 4092 } 4093 } 4094 } 4095 /*Maximum DISPCLK/DPPCLK Support check*/ 4096 4097 v->WritebackRequiredDISPCLK = 0.0; 4098 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4099 if (v->WritebackEnable[k] == true) { 4100 v->WritebackRequiredDISPCLK = dml_max( 4101 v->WritebackRequiredDISPCLK, 4102 dml31_CalculateWriteBackDISPCLK( 4103 v->WritebackPixelFormat[k], 4104 v->PixelClock[k], 4105 v->WritebackHRatio[k], 4106 v->WritebackVRatio[k], 4107 v->WritebackHTaps[k], 4108 v->WritebackVTaps[k], 4109 v->WritebackSourceWidth[k], 4110 v->WritebackDestinationWidth[k], 4111 v->HTotal[k], 4112 v->WritebackLineBufferSize)); 4113 } 4114 } 4115 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4116 if (v->HRatio[k] > 1.0) { 4117 v->PSCL_FACTOR[k] = dml_min( 4118 v->MaxDCHUBToPSCLThroughput, 4119 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); 4120 } else { 4121 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4122 } 4123 if (v->BytePerPixelC[k] == 0.0) { 4124 v->PSCL_FACTOR_CHROMA[k] = 0.0; 4125 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4126 * dml_max3( 4127 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4128 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4129 1.0); 4130 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4131 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4132 } 4133 } else { 4134 if (v->HRatioChroma[k] > 1.0) { 4135 v->PSCL_FACTOR_CHROMA[k] = dml_min( 4136 v->MaxDCHUBToPSCLThroughput, 4137 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 4138 } else { 4139 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4140 } 4141 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4142 * dml_max5( 4143 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4144 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4145 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 4146 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 4147 1.0); 4148 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) 4149 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4150 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4151 } 4152 } 4153 } 4154 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4155 int MaximumSwathWidthSupportLuma; 4156 int MaximumSwathWidthSupportChroma; 4157 4158 if (v->SurfaceTiling[k] == dm_sw_linear) { 4159 MaximumSwathWidthSupportLuma = 8192.0; 4160 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { 4161 MaximumSwathWidthSupportLuma = 2880.0; 4162 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4163 MaximumSwathWidthSupportLuma = 3840.0; 4164 } else { 4165 MaximumSwathWidthSupportLuma = 5760.0; 4166 } 4167 4168 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { 4169 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; 4170 } else { 4171 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; 4172 } 4173 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] 4174 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); 4175 if (v->BytePerPixelC[k] == 0.0) { 4176 v->MaximumSwathWidthInLineBufferChroma = 0; 4177 } else { 4178 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] 4179 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); 4180 } 4181 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); 4182 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); 4183 } 4184 4185 CalculateSwathAndDETConfiguration( 4186 true, 4187 v->NumberOfActivePlanes, 4188 v->DETBufferSizeInKByte[0], 4189 v->MaximumSwathWidthLuma, 4190 v->MaximumSwathWidthChroma, 4191 v->SourceScan, 4192 v->SourcePixelFormat, 4193 v->SurfaceTiling, 4194 v->ViewportWidth, 4195 v->ViewportHeight, 4196 v->SurfaceWidthY, 4197 v->SurfaceWidthC, 4198 v->SurfaceHeightY, 4199 v->SurfaceHeightC, 4200 v->Read256BlockHeightY, 4201 v->Read256BlockHeightC, 4202 v->Read256BlockWidthY, 4203 v->Read256BlockWidthC, 4204 v->odm_combine_dummy, 4205 v->BlendingAndTiming, 4206 v->BytePerPixelY, 4207 v->BytePerPixelC, 4208 v->BytePerPixelInDETY, 4209 v->BytePerPixelInDETC, 4210 v->HActive, 4211 v->HRatio, 4212 v->HRatioChroma, 4213 v->NoOfDPPThisState, 4214 v->swath_width_luma_ub_this_state, 4215 v->swath_width_chroma_ub_this_state, 4216 v->SwathWidthYThisState, 4217 v->SwathWidthCThisState, 4218 v->SwathHeightYThisState, 4219 v->SwathHeightCThisState, 4220 v->DETBufferSizeYThisState, 4221 v->DETBufferSizeCThisState, 4222 v->SingleDPPViewportSizeSupportPerPlane, 4223 &v->ViewportSizeSupport[0][0]); 4224 4225 for (i = 0; i < v->soc.num_states; i++) { 4226 for (j = 0; j < 2; j++) { 4227 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); 4228 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); 4229 v->RequiredDISPCLK[i][j] = 0.0; 4230 v->DISPCLK_DPPCLK_Support[i][j] = true; 4231 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4232 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4233 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4234 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] 4235 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4236 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4237 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] 4238 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4239 } 4240 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4241 * (1 + v->DISPCLKRampingMargin / 100.0); 4242 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] 4243 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4244 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4245 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 4246 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4247 } 4248 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4249 * (1 + v->DISPCLKRampingMargin / 100.0); 4250 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] 4251 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4252 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4253 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 4254 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4255 } 4256 4257 if (v->ODMCombinePolicy == dm_odm_combine_policy_none 4258 || !(v->Output[k] == dm_dp || 4259 v->Output[k] == dm_edp)) { 4260 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4261 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4262 4263 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4264 FMTBufferExceeded = true; 4265 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { 4266 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4267 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4268 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 4269 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { 4270 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4271 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4272 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { 4273 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4274 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4275 } else { 4276 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4277 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4278 } 4279 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH 4280 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4281 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) { 4282 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4283 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4284 } else { 4285 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4286 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4287 } 4288 } 4289 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH 4290 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4291 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) { 4292 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4293 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4294 4295 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4296 FMTBufferExceeded = true; 4297 } else { 4298 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4299 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4300 } 4301 } 4302 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4303 v->MPCCombine[i][j][k] = false; 4304 v->NoOfDPP[i][j][k] = 4; 4305 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; 4306 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4307 v->MPCCombine[i][j][k] = false; 4308 v->NoOfDPP[i][j][k] = 2; 4309 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; 4310 } else if ((v->WhenToDoMPCCombine == dm_mpc_never 4311 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4312 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { 4313 v->MPCCombine[i][j][k] = false; 4314 v->NoOfDPP[i][j][k] = 1; 4315 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4316 } else { 4317 v->MPCCombine[i][j][k] = true; 4318 v->NoOfDPP[i][j][k] = 2; 4319 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4320 } 4321 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4322 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4323 > v->MaxDppclkRoundedDownToDFSGranularity) 4324 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4325 v->DISPCLK_DPPCLK_Support[i][j] = false; 4326 } 4327 } 4328 v->TotalNumberOfActiveDPP[i][j] = 0; 4329 v->TotalNumberOfSingleDPPPlanes[i][j] = 0; 4330 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4331 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4332 if (v->NoOfDPP[i][j][k] == 1) 4333 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; 4334 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4335 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 4336 NoChroma = false; 4337 } 4338 4339 // UPTO 4340 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never 4341 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { 4342 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { 4343 double BWOfNonSplitPlaneOfMaximumBandwidth; 4344 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4345 BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4346 NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4347 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4348 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth 4349 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { 4350 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4351 NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4352 } 4353 } 4354 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; 4355 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4356 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4357 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4358 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4359 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; 4360 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; 4361 } 4362 } 4363 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { 4364 v->RequiredDISPCLK[i][j] = 0.0; 4365 v->DISPCLK_DPPCLK_Support[i][j] = true; 4366 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4367 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4368 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { 4369 v->MPCCombine[i][j][k] = true; 4370 v->NoOfDPP[i][j][k] = 2; 4371 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4372 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4373 } else { 4374 v->MPCCombine[i][j][k] = false; 4375 v->NoOfDPP[i][j][k] = 1; 4376 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4377 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4378 } 4379 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4380 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4381 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4382 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4383 } else { 4384 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4385 } 4386 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4387 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4388 > v->MaxDppclkRoundedDownToDFSGranularity) 4389 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4390 v->DISPCLK_DPPCLK_Support[i][j] = false; 4391 } 4392 } 4393 v->TotalNumberOfActiveDPP[i][j] = 0.0; 4394 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4395 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4396 } 4397 } 4398 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); 4399 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { 4400 v->DISPCLK_DPPCLK_Support[i][j] = false; 4401 } 4402 } 4403 } 4404 4405 /*Total Available Pipes Support Check*/ 4406 4407 for (i = 0; i < v->soc.num_states; i++) { 4408 for (j = 0; j < 2; j++) { 4409 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { 4410 v->TotalAvailablePipesSupport[i][j] = true; 4411 } else { 4412 v->TotalAvailablePipesSupport[i][j] = false; 4413 } 4414 } 4415 } 4416 /*Display IO and DSC Support Check*/ 4417 4418 v->NonsupportedDSCInputBPC = false; 4419 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4420 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) 4421 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { 4422 v->NonsupportedDSCInputBPC = true; 4423 } 4424 } 4425 4426 /*Number Of DSC Slices*/ 4427 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4428 if (v->BlendingAndTiming[k] == k) { 4429 if (v->PixelClockBackEnd[k] > 3200) { 4430 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); 4431 } else if (v->PixelClockBackEnd[k] > 1360) { 4432 v->NumberOfDSCSlices[k] = 8; 4433 } else if (v->PixelClockBackEnd[k] > 680) { 4434 v->NumberOfDSCSlices[k] = 4; 4435 } else if (v->PixelClockBackEnd[k] > 340) { 4436 v->NumberOfDSCSlices[k] = 2; 4437 } else { 4438 v->NumberOfDSCSlices[k] = 1; 4439 } 4440 } else { 4441 v->NumberOfDSCSlices[k] = 0; 4442 } 4443 } 4444 4445 for (i = 0; i < v->soc.num_states; i++) { 4446 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4447 v->RequiresDSC[i][k] = false; 4448 v->RequiresFEC[i][k] = false; 4449 if (v->BlendingAndTiming[k] == k) { 4450 if (v->Output[k] == dm_hdmi) { 4451 v->RequiresDSC[i][k] = false; 4452 v->RequiresFEC[i][k] = false; 4453 v->OutputBppPerState[i][k] = TruncToValidBPP( 4454 dml_min(600.0, v->PHYCLKPerState[i]) * 10, 4455 3, 4456 v->HTotal[k], 4457 v->HActive[k], 4458 v->PixelClockBackEnd[k], 4459 v->ForcedOutputLinkBPP[k], 4460 false, 4461 v->Output[k], 4462 v->OutputFormat[k], 4463 v->DSCInputBitPerComponent[k], 4464 v->NumberOfDSCSlices[k], 4465 v->AudioSampleRate[k], 4466 v->AudioSampleLayout[k], 4467 v->ODMCombineEnablePerState[i][k]); 4468 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) { 4469 if (v->DSCEnable[k] == true) { 4470 v->RequiresDSC[i][k] = true; 4471 v->LinkDSCEnable = true; 4472 if (v->Output[k] == dm_dp) { 4473 v->RequiresFEC[i][k] = true; 4474 } else { 4475 v->RequiresFEC[i][k] = false; 4476 } 4477 } else { 4478 v->RequiresDSC[i][k] = false; 4479 v->LinkDSCEnable = false; 4480 v->RequiresFEC[i][k] = false; 4481 } 4482 4483 v->Outbpp = BPP_INVALID; 4484 if (v->PHYCLKPerState[i] >= 270.0) { 4485 v->Outbpp = TruncToValidBPP( 4486 (1.0 - v->Downspreading / 100.0) * 2700, 4487 v->OutputLinkDPLanes[k], 4488 v->HTotal[k], 4489 v->HActive[k], 4490 v->PixelClockBackEnd[k], 4491 v->ForcedOutputLinkBPP[k], 4492 v->LinkDSCEnable, 4493 v->Output[k], 4494 v->OutputFormat[k], 4495 v->DSCInputBitPerComponent[k], 4496 v->NumberOfDSCSlices[k], 4497 v->AudioSampleRate[k], 4498 v->AudioSampleLayout[k], 4499 v->ODMCombineEnablePerState[i][k]); 4500 v->OutputBppPerState[i][k] = v->Outbpp; 4501 // TODO: Need some other way to handle this nonsense 4502 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" 4503 } 4504 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { 4505 v->Outbpp = TruncToValidBPP( 4506 (1.0 - v->Downspreading / 100.0) * 5400, 4507 v->OutputLinkDPLanes[k], 4508 v->HTotal[k], 4509 v->HActive[k], 4510 v->PixelClockBackEnd[k], 4511 v->ForcedOutputLinkBPP[k], 4512 v->LinkDSCEnable, 4513 v->Output[k], 4514 v->OutputFormat[k], 4515 v->DSCInputBitPerComponent[k], 4516 v->NumberOfDSCSlices[k], 4517 v->AudioSampleRate[k], 4518 v->AudioSampleLayout[k], 4519 v->ODMCombineEnablePerState[i][k]); 4520 v->OutputBppPerState[i][k] = v->Outbpp; 4521 // TODO: Need some other way to handle this nonsense 4522 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" 4523 } 4524 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { 4525 v->Outbpp = TruncToValidBPP( 4526 (1.0 - v->Downspreading / 100.0) * 8100, 4527 v->OutputLinkDPLanes[k], 4528 v->HTotal[k], 4529 v->HActive[k], 4530 v->PixelClockBackEnd[k], 4531 v->ForcedOutputLinkBPP[k], 4532 v->LinkDSCEnable, 4533 v->Output[k], 4534 v->OutputFormat[k], 4535 v->DSCInputBitPerComponent[k], 4536 v->NumberOfDSCSlices[k], 4537 v->AudioSampleRate[k], 4538 v->AudioSampleLayout[k], 4539 v->ODMCombineEnablePerState[i][k]); 4540 v->OutputBppPerState[i][k] = v->Outbpp; 4541 // TODO: Need some other way to handle this nonsense 4542 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" 4543 } 4544 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) { 4545 v->Outbpp = TruncToValidBPP( 4546 (1.0 - v->Downspreading / 100.0) * 10000, 4547 4, 4548 v->HTotal[k], 4549 v->HActive[k], 4550 v->PixelClockBackEnd[k], 4551 v->ForcedOutputLinkBPP[k], 4552 v->LinkDSCEnable, 4553 v->Output[k], 4554 v->OutputFormat[k], 4555 v->DSCInputBitPerComponent[k], 4556 v->NumberOfDSCSlices[k], 4557 v->AudioSampleRate[k], 4558 v->AudioSampleLayout[k], 4559 v->ODMCombineEnablePerState[i][k]); 4560 v->OutputBppPerState[i][k] = v->Outbpp; 4561 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4"; 4562 } 4563 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) { 4564 v->Outbpp = TruncToValidBPP( 4565 12000, 4566 4, 4567 v->HTotal[k], 4568 v->HActive[k], 4569 v->PixelClockBackEnd[k], 4570 v->ForcedOutputLinkBPP[k], 4571 v->LinkDSCEnable, 4572 v->Output[k], 4573 v->OutputFormat[k], 4574 v->DSCInputBitPerComponent[k], 4575 v->NumberOfDSCSlices[k], 4576 v->AudioSampleRate[k], 4577 v->AudioSampleLayout[k], 4578 v->ODMCombineEnablePerState[i][k]); 4579 v->OutputBppPerState[i][k] = v->Outbpp; 4580 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4"; 4581 } 4582 } 4583 } else { 4584 v->OutputBppPerState[i][k] = 0; 4585 } 4586 } 4587 } 4588 4589 for (i = 0; i < v->soc.num_states; i++) { 4590 v->LinkCapacitySupport[i] = true; 4591 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4592 if (v->BlendingAndTiming[k] == k 4593 && (v->Output[k] == dm_dp || 4594 v->Output[k] == dm_edp || 4595 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { 4596 v->LinkCapacitySupport[i] = false; 4597 } 4598 } 4599 } 4600 4601 // UPTO 2172 4602 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4603 if (v->BlendingAndTiming[k] == k 4604 && (v->Output[k] == dm_dp || 4605 v->Output[k] == dm_edp || 4606 v->Output[k] == dm_hdmi)) { 4607 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { 4608 P2IWith420 = true; 4609 } 4610 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 4611 && !v->DSC422NativeSupport) { 4612 DSC422NativeNotSupported = true; 4613 } 4614 } 4615 } 4616 4617 for (i = 0; i < v->soc.num_states; ++i) { 4618 v->ODMCombine4To1SupportCheckOK[i] = true; 4619 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4620 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 4621 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp 4622 || v->Output[k] == dm_hdmi)) { 4623 v->ODMCombine4To1SupportCheckOK[i] = false; 4624 } 4625 } 4626 } 4627 4628 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ 4629 4630 for (i = 0; i < v->soc.num_states; i++) { 4631 v->NotEnoughDSCUnits[i] = false; 4632 v->TotalDSCUnitsRequired = 0.0; 4633 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4634 if (v->RequiresDSC[i][k] == true) { 4635 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4636 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; 4637 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4638 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; 4639 } else { 4640 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; 4641 } 4642 } 4643 } 4644 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { 4645 v->NotEnoughDSCUnits[i] = true; 4646 } 4647 } 4648 /*DSC Delay per state*/ 4649 4650 for (i = 0; i < v->soc.num_states; i++) { 4651 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4652 if (v->OutputBppPerState[i][k] == BPP_INVALID) { 4653 v->BPP = 0.0; 4654 } else { 4655 v->BPP = v->OutputBppPerState[i][k]; 4656 } 4657 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { 4658 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { 4659 v->DSCDelayPerState[i][k] = dscceComputeDelay( 4660 v->DSCInputBitPerComponent[k], 4661 v->BPP, 4662 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4663 v->NumberOfDSCSlices[k], 4664 v->OutputFormat[k], 4665 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 4666 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4667 v->DSCDelayPerState[i][k] = 2.0 4668 * (dscceComputeDelay( 4669 v->DSCInputBitPerComponent[k], 4670 v->BPP, 4671 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4672 v->NumberOfDSCSlices[k] / 2, 4673 v->OutputFormat[k], 4674 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4675 } else { 4676 v->DSCDelayPerState[i][k] = 4.0 4677 * (dscceComputeDelay( 4678 v->DSCInputBitPerComponent[k], 4679 v->BPP, 4680 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4681 v->NumberOfDSCSlices[k] / 4, 4682 v->OutputFormat[k], 4683 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4684 } 4685 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 4686 } else { 4687 v->DSCDelayPerState[i][k] = 0.0; 4688 } 4689 } 4690 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4691 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4692 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { 4693 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; 4694 } 4695 } 4696 } 4697 } 4698 4699 //Calculate Swath, DET Configuration, DCFCLKDeepSleep 4700 // 4701 for (i = 0; i < v->soc.num_states; ++i) { 4702 for (j = 0; j <= 1; ++j) { 4703 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4704 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; 4705 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 4706 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; 4707 } 4708 4709 CalculateSwathAndDETConfiguration( 4710 false, 4711 v->NumberOfActivePlanes, 4712 v->DETBufferSizeInKByte[0], 4713 v->MaximumSwathWidthLuma, 4714 v->MaximumSwathWidthChroma, 4715 v->SourceScan, 4716 v->SourcePixelFormat, 4717 v->SurfaceTiling, 4718 v->ViewportWidth, 4719 v->ViewportHeight, 4720 v->SurfaceWidthY, 4721 v->SurfaceWidthC, 4722 v->SurfaceHeightY, 4723 v->SurfaceHeightC, 4724 v->Read256BlockHeightY, 4725 v->Read256BlockHeightC, 4726 v->Read256BlockWidthY, 4727 v->Read256BlockWidthC, 4728 v->ODMCombineEnableThisState, 4729 v->BlendingAndTiming, 4730 v->BytePerPixelY, 4731 v->BytePerPixelC, 4732 v->BytePerPixelInDETY, 4733 v->BytePerPixelInDETC, 4734 v->HActive, 4735 v->HRatio, 4736 v->HRatioChroma, 4737 v->NoOfDPPThisState, 4738 v->swath_width_luma_ub_this_state, 4739 v->swath_width_chroma_ub_this_state, 4740 v->SwathWidthYThisState, 4741 v->SwathWidthCThisState, 4742 v->SwathHeightYThisState, 4743 v->SwathHeightCThisState, 4744 v->DETBufferSizeYThisState, 4745 v->DETBufferSizeCThisState, 4746 v->dummystring, 4747 &v->ViewportSizeSupport[i][j]); 4748 4749 CalculateDCFCLKDeepSleep( 4750 mode_lib, 4751 v->NumberOfActivePlanes, 4752 v->BytePerPixelY, 4753 v->BytePerPixelC, 4754 v->VRatio, 4755 v->VRatioChroma, 4756 v->SwathWidthYThisState, 4757 v->SwathWidthCThisState, 4758 v->NoOfDPPThisState, 4759 v->HRatio, 4760 v->HRatioChroma, 4761 v->PixelClock, 4762 v->PSCL_FACTOR, 4763 v->PSCL_FACTOR_CHROMA, 4764 v->RequiredDPPCLKThisState, 4765 v->ReadBandwidthLuma, 4766 v->ReadBandwidthChroma, 4767 v->ReturnBusWidth, 4768 &v->ProjectedDCFCLKDeepSleep[i][j]); 4769 4770 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4771 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; 4772 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; 4773 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; 4774 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; 4775 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; 4776 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; 4777 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; 4778 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; 4779 } 4780 } 4781 } 4782 4783 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4784 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 4785 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4786 } 4787 4788 for (i = 0; i < v->soc.num_states; i++) { 4789 for (j = 0; j < 2; j++) { 4790 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; 4791 4792 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4793 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 4794 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 4795 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 4796 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 4797 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 4798 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 4799 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 4800 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 4801 } 4802 4803 v->TotalNumberOfDCCActiveDPP[i][j] = 0; 4804 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4805 if (v->DCCEnable[k] == true) { 4806 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4807 } 4808 } 4809 4810 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4811 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4812 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4813 4814 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) 4815 && v->SourceScan[k] != dm_vert) { 4816 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) 4817 / 2; 4818 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 4819 } else { 4820 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 4821 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 4822 } 4823 4824 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4825 mode_lib, 4826 v->DCCEnable[k], 4827 v->Read256BlockHeightC[k], 4828 v->Read256BlockWidthC[k], 4829 v->SourcePixelFormat[k], 4830 v->SurfaceTiling[k], 4831 v->BytePerPixelC[k], 4832 v->SourceScan[k], 4833 v->SwathWidthCThisState[k], 4834 v->ViewportHeightChroma[k], 4835 v->GPUVMEnable, 4836 v->HostVMEnable, 4837 v->HostVMMaxNonCachedPageTableLevels, 4838 v->GPUVMMinPageSize, 4839 v->HostVMMinPageSize, 4840 v->PTEBufferSizeInRequestsForChroma, 4841 v->PitchC[k], 4842 0.0, 4843 &v->MacroTileWidthC[k], 4844 &v->MetaRowBytesC, 4845 &v->DPTEBytesPerRowC, 4846 &v->PTEBufferSizeNotExceededC[i][j][k], 4847 &v->dummyinteger7, 4848 &v->dpte_row_height_chroma[k], 4849 &v->dummyinteger28, 4850 &v->dummyinteger26, 4851 &v->dummyinteger23, 4852 &v->meta_row_height_chroma[k], 4853 &v->dummyinteger8, 4854 &v->dummyinteger9, 4855 &v->dummyinteger19, 4856 &v->dummyinteger20, 4857 &v->dummyinteger17, 4858 &v->dummyinteger10, 4859 &v->dummyinteger11); 4860 4861 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( 4862 mode_lib, 4863 v->VRatioChroma[k], 4864 v->VTAPsChroma[k], 4865 v->Interlace[k], 4866 v->ProgressiveToInterlaceUnitInOPP, 4867 v->SwathHeightCThisState[k], 4868 v->ViewportYStartC[k], 4869 &v->PrefillC[k], 4870 &v->MaxNumSwC[k]); 4871 } else { 4872 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 4873 v->PTEBufferSizeInRequestsForChroma = 0; 4874 v->PDEAndMetaPTEBytesPerFrameC = 0.0; 4875 v->MetaRowBytesC = 0.0; 4876 v->DPTEBytesPerRowC = 0.0; 4877 v->PrefetchLinesC[i][j][k] = 0.0; 4878 v->PTEBufferSizeNotExceededC[i][j][k] = true; 4879 } 4880 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4881 mode_lib, 4882 v->DCCEnable[k], 4883 v->Read256BlockHeightY[k], 4884 v->Read256BlockWidthY[k], 4885 v->SourcePixelFormat[k], 4886 v->SurfaceTiling[k], 4887 v->BytePerPixelY[k], 4888 v->SourceScan[k], 4889 v->SwathWidthYThisState[k], 4890 v->ViewportHeight[k], 4891 v->GPUVMEnable, 4892 v->HostVMEnable, 4893 v->HostVMMaxNonCachedPageTableLevels, 4894 v->GPUVMMinPageSize, 4895 v->HostVMMinPageSize, 4896 v->PTEBufferSizeInRequestsForLuma, 4897 v->PitchY[k], 4898 v->DCCMetaPitchY[k], 4899 &v->MacroTileWidthY[k], 4900 &v->MetaRowBytesY, 4901 &v->DPTEBytesPerRowY, 4902 &v->PTEBufferSizeNotExceededY[i][j][k], 4903 &v->dummyinteger7, 4904 &v->dpte_row_height[k], 4905 &v->dummyinteger29, 4906 &v->dummyinteger27, 4907 &v->dummyinteger24, 4908 &v->meta_row_height[k], 4909 &v->dummyinteger25, 4910 &v->dpte_group_bytes[k], 4911 &v->dummyinteger21, 4912 &v->dummyinteger22, 4913 &v->dummyinteger18, 4914 &v->dummyinteger5, 4915 &v->dummyinteger6); 4916 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( 4917 mode_lib, 4918 v->VRatio[k], 4919 v->vtaps[k], 4920 v->Interlace[k], 4921 v->ProgressiveToInterlaceUnitInOPP, 4922 v->SwathHeightYThisState[k], 4923 v->ViewportYStartY[k], 4924 &v->PrefillY[k], 4925 &v->MaxNumSwY[k]); 4926 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; 4927 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; 4928 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; 4929 4930 CalculateRowBandwidth( 4931 v->GPUVMEnable, 4932 v->SourcePixelFormat[k], 4933 v->VRatio[k], 4934 v->VRatioChroma[k], 4935 v->DCCEnable[k], 4936 v->HTotal[k] / v->PixelClock[k], 4937 v->MetaRowBytesY, 4938 v->MetaRowBytesC, 4939 v->meta_row_height[k], 4940 v->meta_row_height_chroma[k], 4941 v->DPTEBytesPerRowY, 4942 v->DPTEBytesPerRowC, 4943 v->dpte_row_height[k], 4944 v->dpte_row_height_chroma[k], 4945 &v->meta_row_bandwidth[i][j][k], 4946 &v->dpte_row_bandwidth[i][j][k]); 4947 } 4948 v->UrgLatency[i] = CalculateUrgentLatency( 4949 v->UrgentLatencyPixelDataOnly, 4950 v->UrgentLatencyPixelMixedWithVMData, 4951 v->UrgentLatencyVMDataOnly, 4952 v->DoUrgentLatencyAdjustment, 4953 v->UrgentLatencyAdjustmentFabricClockComponent, 4954 v->UrgentLatencyAdjustmentFabricClockReference, 4955 v->FabricClockPerState[i]); 4956 4957 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4958 CalculateUrgentBurstFactor( 4959 v->swath_width_luma_ub_this_state[k], 4960 v->swath_width_chroma_ub_this_state[k], 4961 v->SwathHeightYThisState[k], 4962 v->SwathHeightCThisState[k], 4963 v->HTotal[k] / v->PixelClock[k], 4964 v->UrgLatency[i], 4965 v->CursorBufferSize, 4966 v->CursorWidth[k][0], 4967 v->CursorBPP[k][0], 4968 v->VRatio[k], 4969 v->VRatioChroma[k], 4970 v->BytePerPixelInDETY[k], 4971 v->BytePerPixelInDETC[k], 4972 v->DETBufferSizeYThisState[k], 4973 v->DETBufferSizeCThisState[k], 4974 &v->UrgentBurstFactorCursor[k], 4975 &v->UrgentBurstFactorLuma[k], 4976 &v->UrgentBurstFactorChroma[k], 4977 &NotUrgentLatencyHiding[k]); 4978 } 4979 4980 v->NotEnoughUrgentLatencyHidingA[i][j] = false; 4981 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4982 if (NotUrgentLatencyHiding[k]) { 4983 v->NotEnoughUrgentLatencyHidingA[i][j] = true; 4984 } 4985 } 4986 4987 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4988 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] 4989 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; 4990 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; 4991 } 4992 4993 v->TotalVActivePixelBandwidth[i][j] = 0; 4994 v->TotalVActiveCursorBandwidth[i][j] = 0; 4995 v->TotalMetaRowBandwidth[i][j] = 0; 4996 v->TotalDPTERowBandwidth[i][j] = 0; 4997 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4998 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; 4999 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; 5000 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; 5001 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; 5002 } 5003 } 5004 } 5005 5006 //Calculate Return BW 5007 for (i = 0; i < v->soc.num_states; ++i) { 5008 for (j = 0; j <= 1; ++j) { 5009 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5010 if (v->BlendingAndTiming[k] == k) { 5011 if (v->WritebackEnable[k] == true) { 5012 v->WritebackDelayTime[k] = v->WritebackLatency 5013 + CalculateWriteBackDelay( 5014 v->WritebackPixelFormat[k], 5015 v->WritebackHRatio[k], 5016 v->WritebackVRatio[k], 5017 v->WritebackVTaps[k], 5018 v->WritebackDestinationWidth[k], 5019 v->WritebackDestinationHeight[k], 5020 v->WritebackSourceHeight[k], 5021 v->HTotal[k]) / v->RequiredDISPCLK[i][j]; 5022 } else { 5023 v->WritebackDelayTime[k] = 0.0; 5024 } 5025 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5026 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { 5027 v->WritebackDelayTime[k] = dml_max( 5028 v->WritebackDelayTime[k], 5029 v->WritebackLatency 5030 + CalculateWriteBackDelay( 5031 v->WritebackPixelFormat[m], 5032 v->WritebackHRatio[m], 5033 v->WritebackVRatio[m], 5034 v->WritebackVTaps[m], 5035 v->WritebackDestinationWidth[m], 5036 v->WritebackDestinationHeight[m], 5037 v->WritebackSourceHeight[m], 5038 v->HTotal[m]) / v->RequiredDISPCLK[i][j]); 5039 } 5040 } 5041 } 5042 } 5043 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5044 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5045 if (v->BlendingAndTiming[k] == m) { 5046 v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; 5047 } 5048 } 5049 } 5050 v->MaxMaxVStartup[i][j] = 0; 5051 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5052 v->MaximumVStartup[i][j][k] = 5053 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 5054 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 5055 v->VTotal[k] - v->VActive[k] 5056 - dml_max( 5057 1.0, 5058 dml_ceil( 5059 1.0 * v->WritebackDelayTime[k] 5060 / (v->HTotal[k] 5061 / v->PixelClock[k]), 5062 1.0)); 5063 if (v->MaximumVStartup[i][j][k] > 1023) 5064 v->MaximumVStartup[i][j][k] = 1023; 5065 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); 5066 } 5067 } 5068 } 5069 5070 ReorderingBytes = v->NumberOfChannels 5071 * dml_max3( 5072 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 5073 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 5074 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 5075 5076 for (i = 0; i < v->soc.num_states; ++i) { 5077 for (j = 0; j <= 1; ++j) { 5078 v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; 5079 } 5080 } 5081 5082 if (v->UseMinimumRequiredDCFCLK == true) { 5083 UseMinimumDCFCLK( 5084 mode_lib, 5085 v->MaxInterDCNTileRepeaters, 5086 MaxPrefetchMode, 5087 v->DRAMClockChangeLatency, 5088 v->SREnterPlusExitTime, 5089 v->ReturnBusWidth, 5090 v->RoundTripPingLatencyCycles, 5091 ReorderingBytes, 5092 v->PixelChunkSizeInKByte, 5093 v->MetaChunkSize, 5094 v->GPUVMEnable, 5095 v->GPUVMMaxPageTableLevels, 5096 v->HostVMEnable, 5097 v->NumberOfActivePlanes, 5098 v->HostVMMinPageSize, 5099 v->HostVMMaxNonCachedPageTableLevels, 5100 v->DynamicMetadataVMEnabled, 5101 v->ImmediateFlipRequirement[0], 5102 v->ProgressiveToInterlaceUnitInOPP, 5103 v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, 5104 v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, 5105 v->VTotal, 5106 v->VActive, 5107 v->DynamicMetadataTransmittedBytes, 5108 v->DynamicMetadataLinesBeforeActiveRequired, 5109 v->Interlace, 5110 v->RequiredDPPCLK, 5111 v->RequiredDISPCLK, 5112 v->UrgLatency, 5113 v->NoOfDPP, 5114 v->ProjectedDCFCLKDeepSleep, 5115 v->MaximumVStartup, 5116 v->TotalVActivePixelBandwidth, 5117 v->TotalVActiveCursorBandwidth, 5118 v->TotalMetaRowBandwidth, 5119 v->TotalDPTERowBandwidth, 5120 v->TotalNumberOfActiveDPP, 5121 v->TotalNumberOfDCCActiveDPP, 5122 v->dpte_group_bytes, 5123 v->PrefetchLinesY, 5124 v->PrefetchLinesC, 5125 v->swath_width_luma_ub_all_states, 5126 v->swath_width_chroma_ub_all_states, 5127 v->BytePerPixelY, 5128 v->BytePerPixelC, 5129 v->HTotal, 5130 v->PixelClock, 5131 v->PDEAndMetaPTEBytesPerFrame, 5132 v->DPTEBytesPerRow, 5133 v->MetaRowBytes, 5134 v->DynamicMetadataEnable, 5135 v->VActivePixelBandwidth, 5136 v->VActiveCursorBandwidth, 5137 v->ReadBandwidthLuma, 5138 v->ReadBandwidthChroma, 5139 v->DCFCLKPerState, 5140 v->DCFCLKState); 5141 } 5142 5143 for (i = 0; i < v->soc.num_states; ++i) { 5144 for (j = 0; j <= 1; ++j) { 5145 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 5146 v->ReturnBusWidth * v->DCFCLKState[i][j], 5147 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); 5148 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; 5149 double PixelDataOnlyReturnBWPerState = dml_min( 5150 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5151 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 5152 double PixelMixedWithVMDataReturnBWPerState = dml_min( 5153 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5154 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 5155 5156 if (v->HostVMEnable != true) { 5157 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; 5158 } else { 5159 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; 5160 } 5161 } 5162 } 5163 5164 //Re-ordering Buffer Support Check 5165 for (i = 0; i < v->soc.num_states; ++i) { 5166 for (j = 0; j <= 1; ++j) { 5167 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] 5168 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { 5169 v->ROBSupport[i][j] = true; 5170 } else { 5171 v->ROBSupport[i][j] = false; 5172 } 5173 } 5174 } 5175 5176 //Vertical Active BW support check 5177 5178 MaxTotalVActiveRDBandwidth = 0; 5179 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5180 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 5181 } 5182 5183 for (i = 0; i < v->soc.num_states; ++i) { 5184 for (j = 0; j <= 1; ++j) { 5185 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( 5186 dml_min( 5187 v->ReturnBusWidth * v->DCFCLKState[i][j], 5188 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5189 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, 5190 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5191 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); 5192 5193 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { 5194 v->TotalVerticalActiveBandwidthSupport[i][j] = true; 5195 } else { 5196 v->TotalVerticalActiveBandwidthSupport[i][j] = false; 5197 } 5198 } 5199 } 5200 5201 v->UrgentLatency = CalculateUrgentLatency( 5202 v->UrgentLatencyPixelDataOnly, 5203 v->UrgentLatencyPixelMixedWithVMData, 5204 v->UrgentLatencyVMDataOnly, 5205 v->DoUrgentLatencyAdjustment, 5206 v->UrgentLatencyAdjustmentFabricClockComponent, 5207 v->UrgentLatencyAdjustmentFabricClockReference, 5208 v->FabricClock); 5209 //Prefetch Check 5210 for (i = 0; i < v->soc.num_states; ++i) { 5211 for (j = 0; j <= 1; ++j) { 5212 double VMDataOnlyReturnBWPerState; 5213 double HostVMInefficiencyFactor = 1; 5214 int NextPrefetchModeState = MinPrefetchMode; 5215 bool UnboundedRequestEnabledThisState = false; 5216 int CompressedBufferSizeInkByteThisState = 0; 5217 double dummy; 5218 5219 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; 5220 5221 v->BandwidthWithoutPrefetchSupported[i][j] = true; 5222 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] 5223 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { 5224 v->BandwidthWithoutPrefetchSupported[i][j] = false; 5225 } 5226 5227 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5228 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 5229 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 5230 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 5231 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 5232 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 5233 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 5234 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 5235 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 5236 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 5237 } 5238 5239 VMDataOnlyReturnBWPerState = dml_min( 5240 dml_min( 5241 v->ReturnBusWidth * v->DCFCLKState[i][j], 5242 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5243 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5244 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5245 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 5246 if (v->GPUVMEnable && v->HostVMEnable) 5247 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; 5248 5249 v->ExtraLatency = CalculateExtraLatency( 5250 v->RoundTripPingLatencyCycles, 5251 ReorderingBytes, 5252 v->DCFCLKState[i][j], 5253 v->TotalNumberOfActiveDPP[i][j], 5254 v->PixelChunkSizeInKByte, 5255 v->TotalNumberOfDCCActiveDPP[i][j], 5256 v->MetaChunkSize, 5257 v->ReturnBWPerState[i][j], 5258 v->GPUVMEnable, 5259 v->HostVMEnable, 5260 v->NumberOfActivePlanes, 5261 v->NoOfDPPThisState, 5262 v->dpte_group_bytes, 5263 HostVMInefficiencyFactor, 5264 v->HostVMMinPageSize, 5265 v->HostVMMaxNonCachedPageTableLevels); 5266 5267 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5268 do { 5269 v->PrefetchModePerState[i][j] = NextPrefetchModeState; 5270 v->MaxVStartup = v->NextMaxVStartup; 5271 5272 v->TWait = CalculateTWait( 5273 v->PrefetchModePerState[i][j], 5274 v->DRAMClockChangeLatency, 5275 v->UrgLatency[i], 5276 v->SREnterPlusExitTime); 5277 5278 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5279 Pipe myPipe; 5280 5281 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; 5282 myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; 5283 myPipe.PixelClock = v->PixelClock[k]; 5284 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; 5285 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; 5286 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 5287 myPipe.SourceScan = v->SourceScan[k]; 5288 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; 5289 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; 5290 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; 5291 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; 5292 myPipe.InterlaceEnable = v->Interlace[k]; 5293 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 5294 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 5295 myPipe.HTotal = v->HTotal[k]; 5296 myPipe.DCCEnable = v->DCCEnable[k]; 5297 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 5298 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; 5299 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 5300 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 5301 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 5302 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 5303 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 5304 mode_lib, 5305 HostVMInefficiencyFactor, 5306 &myPipe, 5307 v->DSCDelayPerState[i][k], 5308 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 5309 v->DPPCLKDelaySCL, 5310 v->DPPCLKDelaySCLLBOnly, 5311 v->DPPCLKDelayCNVCCursor, 5312 v->DISPCLKDelaySubtotal, 5313 v->SwathWidthYThisState[k] / v->HRatio[k], 5314 v->OutputFormat[k], 5315 v->MaxInterDCNTileRepeaters, 5316 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 5317 v->MaximumVStartup[i][j][k], 5318 v->GPUVMMaxPageTableLevels, 5319 v->GPUVMEnable, 5320 v->HostVMEnable, 5321 v->HostVMMaxNonCachedPageTableLevels, 5322 v->HostVMMinPageSize, 5323 v->DynamicMetadataEnable[k], 5324 v->DynamicMetadataVMEnabled, 5325 v->DynamicMetadataLinesBeforeActiveRequired[k], 5326 v->DynamicMetadataTransmittedBytes[k], 5327 v->UrgLatency[i], 5328 v->ExtraLatency, 5329 v->TimeCalc, 5330 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5331 v->MetaRowBytes[i][j][k], 5332 v->DPTEBytesPerRow[i][j][k], 5333 v->PrefetchLinesY[i][j][k], 5334 v->SwathWidthYThisState[k], 5335 v->PrefillY[k], 5336 v->MaxNumSwY[k], 5337 v->PrefetchLinesC[i][j][k], 5338 v->SwathWidthCThisState[k], 5339 v->PrefillC[k], 5340 v->MaxNumSwC[k], 5341 v->swath_width_luma_ub_this_state[k], 5342 v->swath_width_chroma_ub_this_state[k], 5343 v->SwathHeightYThisState[k], 5344 v->SwathHeightCThisState[k], 5345 v->TWait, 5346 &v->DSTXAfterScaler[k], 5347 &v->DSTYAfterScaler[k], 5348 &v->LineTimesForPrefetch[k], 5349 &v->PrefetchBW[k], 5350 &v->LinesForMetaPTE[k], 5351 &v->LinesForMetaAndDPTERow[k], 5352 &v->VRatioPreY[i][j][k], 5353 &v->VRatioPreC[i][j][k], 5354 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 5355 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 5356 &v->NoTimeForDynamicMetadata[i][j][k], 5357 &v->Tno_bw[k], 5358 &v->prefetch_vmrow_bw[k], 5359 &v->dummy7[k], 5360 &v->dummy8[k], 5361 &v->dummy13[k], 5362 &v->VUpdateOffsetPix[k], 5363 &v->VUpdateWidthPix[k], 5364 &v->VReadyOffsetPix[k]); 5365 } 5366 5367 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5368 CalculateUrgentBurstFactor( 5369 v->swath_width_luma_ub_this_state[k], 5370 v->swath_width_chroma_ub_this_state[k], 5371 v->SwathHeightYThisState[k], 5372 v->SwathHeightCThisState[k], 5373 v->HTotal[k] / v->PixelClock[k], 5374 v->UrgentLatency, 5375 v->CursorBufferSize, 5376 v->CursorWidth[k][0], 5377 v->CursorBPP[k][0], 5378 v->VRatioPreY[i][j][k], 5379 v->VRatioPreC[i][j][k], 5380 v->BytePerPixelInDETY[k], 5381 v->BytePerPixelInDETC[k], 5382 v->DETBufferSizeYThisState[k], 5383 v->DETBufferSizeCThisState[k], 5384 &v->UrgentBurstFactorCursorPre[k], 5385 &v->UrgentBurstFactorLumaPre[k], 5386 &v->UrgentBurstFactorChroma[k], 5387 &v->NotUrgentLatencyHidingPre[k]); 5388 } 5389 5390 v->MaximumReadBandwidthWithPrefetch = 0.0; 5391 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5392 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 5393 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; 5394 5395 v->MaximumReadBandwidthWithPrefetch = 5396 v->MaximumReadBandwidthWithPrefetch 5397 + dml_max3( 5398 v->VActivePixelBandwidth[i][j][k] 5399 + v->VActiveCursorBandwidth[i][j][k] 5400 + v->NoOfDPP[i][j][k] 5401 * (v->meta_row_bandwidth[i][j][k] 5402 + v->dpte_row_bandwidth[i][j][k]), 5403 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5404 v->NoOfDPP[i][j][k] 5405 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5406 * v->UrgentBurstFactorLumaPre[k] 5407 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5408 * v->UrgentBurstFactorChromaPre[k]) 5409 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5410 } 5411 5412 v->NotEnoughUrgentLatencyHidingPre = false; 5413 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5414 if (v->NotUrgentLatencyHidingPre[k] == true) { 5415 v->NotEnoughUrgentLatencyHidingPre = true; 5416 } 5417 } 5418 5419 v->PrefetchSupported[i][j] = true; 5420 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] 5421 || v->NotEnoughUrgentLatencyHidingPre == 1) { 5422 v->PrefetchSupported[i][j] = false; 5423 } 5424 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5425 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 5426 || v->NoTimeForPrefetch[i][j][k] == true) { 5427 v->PrefetchSupported[i][j] = false; 5428 } 5429 } 5430 5431 v->DynamicMetadataSupported[i][j] = true; 5432 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5433 if (v->NoTimeForDynamicMetadata[i][j][k] == true) { 5434 v->DynamicMetadataSupported[i][j] = false; 5435 } 5436 } 5437 5438 v->VRatioInPrefetchSupported[i][j] = true; 5439 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5440 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { 5441 v->VRatioInPrefetchSupported[i][j] = false; 5442 } 5443 } 5444 v->AnyLinesForVMOrRowTooLarge = false; 5445 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5446 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { 5447 v->AnyLinesForVMOrRowTooLarge = true; 5448 } 5449 } 5450 5451 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5452 5453 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { 5454 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; 5455 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5456 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 5457 - dml_max( 5458 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], 5459 v->NoOfDPP[i][j][k] 5460 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5461 * v->UrgentBurstFactorLumaPre[k] 5462 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5463 * v->UrgentBurstFactorChromaPre[k]) 5464 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5465 } 5466 v->TotImmediateFlipBytes = 0.0; 5467 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5468 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 5469 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] 5470 + v->DPTEBytesPerRow[i][j][k]; 5471 } 5472 5473 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5474 CalculateFlipSchedule( 5475 mode_lib, 5476 HostVMInefficiencyFactor, 5477 v->ExtraLatency, 5478 v->UrgLatency[i], 5479 v->GPUVMMaxPageTableLevels, 5480 v->HostVMEnable, 5481 v->HostVMMaxNonCachedPageTableLevels, 5482 v->GPUVMEnable, 5483 v->HostVMMinPageSize, 5484 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5485 v->MetaRowBytes[i][j][k], 5486 v->DPTEBytesPerRow[i][j][k], 5487 v->BandwidthAvailableForImmediateFlip, 5488 v->TotImmediateFlipBytes, 5489 v->SourcePixelFormat[k], 5490 v->HTotal[k] / v->PixelClock[k], 5491 v->VRatio[k], 5492 v->VRatioChroma[k], 5493 v->Tno_bw[k], 5494 v->DCCEnable[k], 5495 v->dpte_row_height[k], 5496 v->meta_row_height[k], 5497 v->dpte_row_height_chroma[k], 5498 v->meta_row_height_chroma[k], 5499 &v->DestinationLinesToRequestVMInImmediateFlip[k], 5500 &v->DestinationLinesToRequestRowInImmediateFlip[k], 5501 &v->final_flip_bw[k], 5502 &v->ImmediateFlipSupportedForPipe[k]); 5503 } 5504 v->total_dcn_read_bw_with_flip = 0.0; 5505 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5506 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 5507 + dml_max3( 5508 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5509 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] 5510 + v->VActiveCursorBandwidth[i][j][k], 5511 v->NoOfDPP[i][j][k] 5512 * (v->final_flip_bw[k] 5513 + v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5514 * v->UrgentBurstFactorLumaPre[k] 5515 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5516 * v->UrgentBurstFactorChromaPre[k]) 5517 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5518 } 5519 v->ImmediateFlipSupportedForState[i][j] = true; 5520 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { 5521 v->ImmediateFlipSupportedForState[i][j] = false; 5522 } 5523 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5524 if (v->ImmediateFlipSupportedForPipe[k] == false) { 5525 v->ImmediateFlipSupportedForState[i][j] = false; 5526 } 5527 } 5528 } else { 5529 v->ImmediateFlipSupportedForState[i][j] = false; 5530 } 5531 5532 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { 5533 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5534 NextPrefetchModeState = NextPrefetchModeState + 1; 5535 } else { 5536 v->NextMaxVStartup = v->NextMaxVStartup - 1; 5537 } 5538 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5539 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5540 && ((v->HostVMEnable == false && 5541 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5542 || v->ImmediateFlipSupportedForState[i][j] == true)) 5543 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); 5544 5545 CalculateUnboundedRequestAndCompressedBufferSize( 5546 v->DETBufferSizeInKByte[0], 5547 v->ConfigReturnBufferSizeInKByte, 5548 v->UseUnboundedRequesting, 5549 v->TotalNumberOfActiveDPP[i][j], 5550 NoChroma, 5551 v->MaxNumDPP, 5552 v->CompressedBufferSegmentSizeInkByte, 5553 v->Output, 5554 &UnboundedRequestEnabledThisState, 5555 &CompressedBufferSizeInkByteThisState); 5556 5557 CalculateWatermarksAndDRAMSpeedChangeSupport( 5558 mode_lib, 5559 v->PrefetchModePerState[i][j], 5560 v->NumberOfActivePlanes, 5561 v->MaxLineBufferLines, 5562 v->LineBufferSize, 5563 v->WritebackInterfaceBufferSize, 5564 v->DCFCLKState[i][j], 5565 v->ReturnBWPerState[i][j], 5566 v->SynchronizedVBlank, 5567 v->dpte_group_bytes, 5568 v->MetaChunkSize, 5569 v->UrgLatency[i], 5570 v->ExtraLatency, 5571 v->WritebackLatency, 5572 v->WritebackChunkSize, 5573 v->SOCCLKPerState[i], 5574 v->DRAMClockChangeLatency, 5575 v->SRExitTime, 5576 v->SREnterPlusExitTime, 5577 v->SRExitZ8Time, 5578 v->SREnterPlusExitZ8Time, 5579 v->ProjectedDCFCLKDeepSleep[i][j], 5580 v->DETBufferSizeYThisState, 5581 v->DETBufferSizeCThisState, 5582 v->SwathHeightYThisState, 5583 v->SwathHeightCThisState, 5584 v->LBBitPerPixel, 5585 v->SwathWidthYThisState, 5586 v->SwathWidthCThisState, 5587 v->HRatio, 5588 v->HRatioChroma, 5589 v->vtaps, 5590 v->VTAPsChroma, 5591 v->VRatio, 5592 v->VRatioChroma, 5593 v->HTotal, 5594 v->PixelClock, 5595 v->BlendingAndTiming, 5596 v->NoOfDPPThisState, 5597 v->BytePerPixelInDETY, 5598 v->BytePerPixelInDETC, 5599 v->DSTXAfterScaler, 5600 v->DSTYAfterScaler, 5601 v->WritebackEnable, 5602 v->WritebackPixelFormat, 5603 v->WritebackDestinationWidth, 5604 v->WritebackDestinationHeight, 5605 v->WritebackSourceHeight, 5606 UnboundedRequestEnabledThisState, 5607 CompressedBufferSizeInkByteThisState, 5608 &v->DRAMClockChangeSupport[i][j], 5609 &v->UrgentWatermark, 5610 &v->WritebackUrgentWatermark, 5611 &v->DRAMClockChangeWatermark, 5612 &v->WritebackDRAMClockChangeWatermark, 5613 &dummy, 5614 &dummy, 5615 &dummy, 5616 &dummy, 5617 &v->MinActiveDRAMClockChangeLatencySupported); 5618 } 5619 } 5620 5621 /*PTE Buffer Size Check*/ 5622 for (i = 0; i < v->soc.num_states; i++) { 5623 for (j = 0; j < 2; j++) { 5624 v->PTEBufferSizeNotExceeded[i][j] = true; 5625 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5626 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { 5627 v->PTEBufferSizeNotExceeded[i][j] = false; 5628 } 5629 } 5630 } 5631 } 5632 5633 /*Cursor Support Check*/ 5634 v->CursorSupport = true; 5635 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5636 if (v->CursorWidth[k][0] > 0.0) { 5637 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { 5638 v->CursorSupport = false; 5639 } 5640 } 5641 } 5642 5643 /*Valid Pitch Check*/ 5644 v->PitchSupport = true; 5645 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5646 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); 5647 if (v->DCCEnable[k] == true) { 5648 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); 5649 } else { 5650 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; 5651 } 5652 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 5653 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe 5654 && v->SourcePixelFormat[k] != dm_mono_8) { 5655 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); 5656 if (v->DCCEnable[k] == true) { 5657 v->AlignedDCCMetaPitchC[k] = dml_ceil( 5658 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 5659 64.0 * v->Read256BlockWidthC[k]); 5660 } else { 5661 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5662 } 5663 } else { 5664 v->AlignedCPitch[k] = v->PitchC[k]; 5665 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5666 } 5667 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] 5668 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { 5669 v->PitchSupport = false; 5670 } 5671 } 5672 5673 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5674 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { 5675 ViewportExceedsSurface = true; 5676 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 5677 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 5678 && v->SourcePixelFormat[k] != dm_rgbe) { 5679 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] 5680 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { 5681 ViewportExceedsSurface = true; 5682 } 5683 } 5684 } 5685 } 5686 5687 /*Mode Support, Voltage State and SOC Configuration*/ 5688 for (i = v->soc.num_states - 1; i >= 0; i--) { 5689 for (j = 0; j < 2; j++) { 5690 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true 5691 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP 5692 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false 5693 && v->DTBCLKRequiredMoreThanSupported[i] == false 5694 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true 5695 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true 5696 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true 5697 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false 5698 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true 5699 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5700 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false 5701 && ((v->HostVMEnable == false 5702 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5703 || v->ImmediateFlipSupportedForState[i][j] == true) 5704 && FMTBufferExceeded == false) { 5705 v->ModeSupport[i][j] = true; 5706 } else { 5707 v->ModeSupport[i][j] = false; 5708 } 5709 } 5710 } 5711 5712 { 5713 unsigned int MaximumMPCCombine = 0; 5714 for (i = v->soc.num_states; i >= 0; i--) { 5715 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { 5716 v->VoltageLevel = i; 5717 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; 5718 if (v->ModeSupport[i][0] == true) { 5719 MaximumMPCCombine = 0; 5720 } else { 5721 MaximumMPCCombine = 1; 5722 } 5723 } 5724 } 5725 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; 5726 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { 5727 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; 5728 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; 5729 } 5730 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; 5731 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; 5732 v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; 5733 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; 5734 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; 5735 v->maxMpcComb = MaximumMPCCombine; 5736 } 5737 } 5738 5739 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5740 struct display_mode_lib *mode_lib, 5741 unsigned int PrefetchMode, 5742 unsigned int NumberOfActivePlanes, 5743 unsigned int MaxLineBufferLines, 5744 unsigned int LineBufferSize, 5745 unsigned int WritebackInterfaceBufferSize, 5746 double DCFCLK, 5747 double ReturnBW, 5748 bool SynchronizedVBlank, 5749 unsigned int dpte_group_bytes[], 5750 unsigned int MetaChunkSize, 5751 double UrgentLatency, 5752 double ExtraLatency, 5753 double WritebackLatency, 5754 double WritebackChunkSize, 5755 double SOCCLK, 5756 double DRAMClockChangeLatency, 5757 double SRExitTime, 5758 double SREnterPlusExitTime, 5759 double SRExitZ8Time, 5760 double SREnterPlusExitZ8Time, 5761 double DCFCLKDeepSleep, 5762 unsigned int DETBufferSizeY[], 5763 unsigned int DETBufferSizeC[], 5764 unsigned int SwathHeightY[], 5765 unsigned int SwathHeightC[], 5766 unsigned int LBBitPerPixel[], 5767 double SwathWidthY[], 5768 double SwathWidthC[], 5769 double HRatio[], 5770 double HRatioChroma[], 5771 unsigned int vtaps[], 5772 unsigned int VTAPsChroma[], 5773 double VRatio[], 5774 double VRatioChroma[], 5775 unsigned int HTotal[], 5776 double PixelClock[], 5777 unsigned int BlendingAndTiming[], 5778 unsigned int DPPPerPlane[], 5779 double BytePerPixelDETY[], 5780 double BytePerPixelDETC[], 5781 double DSTXAfterScaler[], 5782 double DSTYAfterScaler[], 5783 bool WritebackEnable[], 5784 enum source_format_class WritebackPixelFormat[], 5785 double WritebackDestinationWidth[], 5786 double WritebackDestinationHeight[], 5787 double WritebackSourceHeight[], 5788 bool UnboundedRequestEnabled, 5789 int unsigned CompressedBufferSizeInkByte, 5790 enum clock_change_support *DRAMClockChangeSupport, 5791 double *UrgentWatermark, 5792 double *WritebackUrgentWatermark, 5793 double *DRAMClockChangeWatermark, 5794 double *WritebackDRAMClockChangeWatermark, 5795 double *StutterExitWatermark, 5796 double *StutterEnterPlusExitWatermark, 5797 double *Z8StutterExitWatermark, 5798 double *Z8StutterEnterPlusExitWatermark, 5799 double *MinActiveDRAMClockChangeLatencySupported) 5800 { 5801 struct vba_vars_st *v = &mode_lib->vba; 5802 double EffectiveLBLatencyHidingY; 5803 double EffectiveLBLatencyHidingC; 5804 double LinesInDETY[DC__NUM_DPP__MAX]; 5805 double LinesInDETC; 5806 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5807 unsigned int LinesInDETCRoundedDownToSwath; 5808 double FullDETBufferingTimeY; 5809 double FullDETBufferingTimeC; 5810 double ActiveDRAMClockChangeLatencyMarginY; 5811 double ActiveDRAMClockChangeLatencyMarginC; 5812 double WritebackDRAMClockChangeLatencyMargin; 5813 double PlaneWithMinActiveDRAMClockChangeMargin; 5814 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5815 double WritebackDRAMClockChangeLatencyHiding; 5816 double TotalPixelBW = 0.0; 5817 int k, j; 5818 5819 *UrgentWatermark = UrgentLatency + ExtraLatency; 5820 5821 #ifdef __DML_VBA_DEBUG__ 5822 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 5823 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); 5824 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark); 5825 #endif 5826 5827 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; 5828 5829 #ifdef __DML_VBA_DEBUG__ 5830 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency); 5831 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark); 5832 #endif 5833 5834 v->TotalActiveWriteback = 0; 5835 for (k = 0; k < NumberOfActivePlanes; ++k) { 5836 if (WritebackEnable[k] == true) { 5837 v->TotalActiveWriteback = v->TotalActiveWriteback + 1; 5838 } 5839 } 5840 5841 if (v->TotalActiveWriteback <= 1) { 5842 *WritebackUrgentWatermark = WritebackLatency; 5843 } else { 5844 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5845 } 5846 5847 if (v->TotalActiveWriteback <= 1) { 5848 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; 5849 } else { 5850 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5851 } 5852 5853 for (k = 0; k < NumberOfActivePlanes; ++k) { 5854 TotalPixelBW = TotalPixelBW 5855 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) 5856 / (HTotal[k] / PixelClock[k]); 5857 } 5858 5859 for (k = 0; k < NumberOfActivePlanes; ++k) { 5860 double EffectiveDETBufferSizeY = DETBufferSizeY[k]; 5861 5862 v->LBLatencyHidingSourceLinesY = dml_min( 5863 (double) MaxLineBufferLines, 5864 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1); 5865 5866 v->LBLatencyHidingSourceLinesC = dml_min( 5867 (double) MaxLineBufferLines, 5868 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1); 5869 5870 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]); 5871 5872 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); 5873 5874 if (UnboundedRequestEnabled) { 5875 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 5876 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW; 5877 } 5878 5879 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5880 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5881 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; 5882 if (BytePerPixelDETC[k] > 0) { 5883 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 5884 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5885 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; 5886 } else { 5887 LinesInDETC = 0; 5888 FullDETBufferingTimeC = 999999; 5889 } 5890 5891 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 5892 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; 5893 5894 if (NumberOfActivePlanes > 1) { 5895 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5896 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; 5897 } 5898 5899 if (BytePerPixelDETC[k] > 0) { 5900 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 5901 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; 5902 5903 if (NumberOfActivePlanes > 1) { 5904 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5905 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; 5906 } 5907 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); 5908 } else { 5909 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5910 } 5911 5912 if (WritebackEnable[k] == true) { 5913 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 5914 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); 5915 if (WritebackPixelFormat[k] == dm_444_64) { 5916 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; 5917 } 5918 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; 5919 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); 5920 } 5921 } 5922 5923 v->MinActiveDRAMClockChangeMargin = 999999; 5924 PlaneWithMinActiveDRAMClockChangeMargin = 0; 5925 for (k = 0; k < NumberOfActivePlanes; ++k) { 5926 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { 5927 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; 5928 if (BlendingAndTiming[k] == k) { 5929 PlaneWithMinActiveDRAMClockChangeMargin = k; 5930 } else { 5931 for (j = 0; j < NumberOfActivePlanes; ++j) { 5932 if (BlendingAndTiming[k] == j) { 5933 PlaneWithMinActiveDRAMClockChangeMargin = j; 5934 } 5935 } 5936 } 5937 } 5938 } 5939 5940 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; 5941 5942 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5943 for (k = 0; k < NumberOfActivePlanes; ++k) { 5944 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5945 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5946 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; 5947 } 5948 } 5949 5950 v->TotalNumberOfActiveOTG = 0; 5951 5952 for (k = 0; k < NumberOfActivePlanes; ++k) { 5953 if (BlendingAndTiming[k] == k) { 5954 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; 5955 } 5956 } 5957 5958 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { 5959 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5960 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 5961 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { 5962 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5963 } else { 5964 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5965 } 5966 5967 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; 5968 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); 5969 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5970 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5971 5972 #ifdef __DML_VBA_DEBUG__ 5973 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); 5974 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); 5975 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); 5976 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); 5977 #endif 5978 } 5979 5980 static void CalculateDCFCLKDeepSleep( 5981 struct display_mode_lib *mode_lib, 5982 unsigned int NumberOfActivePlanes, 5983 int BytePerPixelY[], 5984 int BytePerPixelC[], 5985 double VRatio[], 5986 double VRatioChroma[], 5987 double SwathWidthY[], 5988 double SwathWidthC[], 5989 unsigned int DPPPerPlane[], 5990 double HRatio[], 5991 double HRatioChroma[], 5992 double PixelClock[], 5993 double PSCL_THROUGHPUT[], 5994 double PSCL_THROUGHPUT_CHROMA[], 5995 double DPPCLK[], 5996 double ReadBandwidthLuma[], 5997 double ReadBandwidthChroma[], 5998 int ReturnBusWidth, 5999 double *DCFCLKDeepSleep) 6000 { 6001 struct vba_vars_st *v = &mode_lib->vba; 6002 double DisplayPipeLineDeliveryTimeLuma; 6003 double DisplayPipeLineDeliveryTimeChroma; 6004 double ReadBandwidth = 0.0; 6005 int k; 6006 6007 for (k = 0; k < NumberOfActivePlanes; ++k) { 6008 6009 if (VRatio[k] <= 1) { 6010 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6011 } else { 6012 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6013 } 6014 if (BytePerPixelC[k] == 0) { 6015 DisplayPipeLineDeliveryTimeChroma = 0; 6016 } else { 6017 if (VRatioChroma[k] <= 1) { 6018 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6019 } else { 6020 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6021 } 6022 } 6023 6024 if (BytePerPixelC[k] > 0) { 6025 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 6026 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 6027 } else { 6028 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 6029 } 6030 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); 6031 6032 } 6033 6034 for (k = 0; k < NumberOfActivePlanes; ++k) { 6035 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 6036 } 6037 6038 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); 6039 6040 for (k = 0; k < NumberOfActivePlanes; ++k) { 6041 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); 6042 } 6043 } 6044 6045 static void CalculateUrgentBurstFactor( 6046 int swath_width_luma_ub, 6047 int swath_width_chroma_ub, 6048 unsigned int SwathHeightY, 6049 unsigned int SwathHeightC, 6050 double LineTime, 6051 double UrgentLatency, 6052 double CursorBufferSize, 6053 unsigned int CursorWidth, 6054 unsigned int CursorBPP, 6055 double VRatio, 6056 double VRatioC, 6057 double BytePerPixelInDETY, 6058 double BytePerPixelInDETC, 6059 double DETBufferSizeY, 6060 double DETBufferSizeC, 6061 double *UrgentBurstFactorCursor, 6062 double *UrgentBurstFactorLuma, 6063 double *UrgentBurstFactorChroma, 6064 bool *NotEnoughUrgentLatencyHiding) 6065 { 6066 double LinesInDETLuma; 6067 double LinesInDETChroma; 6068 unsigned int LinesInCursorBuffer; 6069 double CursorBufferSizeInTime; 6070 double DETBufferSizeInTimeLuma; 6071 double DETBufferSizeInTimeChroma; 6072 6073 *NotEnoughUrgentLatencyHiding = 0; 6074 6075 if (CursorWidth > 0) { 6076 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 6077 if (VRatio > 0) { 6078 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 6079 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 6080 *NotEnoughUrgentLatencyHiding = 1; 6081 *UrgentBurstFactorCursor = 0; 6082 } else { 6083 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 6084 } 6085 } else { 6086 *UrgentBurstFactorCursor = 1; 6087 } 6088 } 6089 6090 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; 6091 if (VRatio > 0) { 6092 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 6093 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 6094 *NotEnoughUrgentLatencyHiding = 1; 6095 *UrgentBurstFactorLuma = 0; 6096 } else { 6097 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 6098 } 6099 } else { 6100 *UrgentBurstFactorLuma = 1; 6101 } 6102 6103 if (BytePerPixelInDETC > 0) { 6104 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; 6105 if (VRatio > 0) { 6106 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 6107 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 6108 *NotEnoughUrgentLatencyHiding = 1; 6109 *UrgentBurstFactorChroma = 0; 6110 } else { 6111 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 6112 } 6113 } else { 6114 *UrgentBurstFactorChroma = 1; 6115 } 6116 } 6117 } 6118 6119 static void CalculatePixelDeliveryTimes( 6120 unsigned int NumberOfActivePlanes, 6121 double VRatio[], 6122 double VRatioChroma[], 6123 double VRatioPrefetchY[], 6124 double VRatioPrefetchC[], 6125 unsigned int swath_width_luma_ub[], 6126 unsigned int swath_width_chroma_ub[], 6127 unsigned int DPPPerPlane[], 6128 double HRatio[], 6129 double HRatioChroma[], 6130 double PixelClock[], 6131 double PSCL_THROUGHPUT[], 6132 double PSCL_THROUGHPUT_CHROMA[], 6133 double DPPCLK[], 6134 int BytePerPixelC[], 6135 enum scan_direction_class SourceScan[], 6136 unsigned int NumberOfCursors[], 6137 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 6138 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 6139 unsigned int BlockWidth256BytesY[], 6140 unsigned int BlockHeight256BytesY[], 6141 unsigned int BlockWidth256BytesC[], 6142 unsigned int BlockHeight256BytesC[], 6143 double DisplayPipeLineDeliveryTimeLuma[], 6144 double DisplayPipeLineDeliveryTimeChroma[], 6145 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 6146 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 6147 double DisplayPipeRequestDeliveryTimeLuma[], 6148 double DisplayPipeRequestDeliveryTimeChroma[], 6149 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 6150 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 6151 double CursorRequestDeliveryTime[], 6152 double CursorRequestDeliveryTimePrefetch[]) 6153 { 6154 double req_per_swath_ub; 6155 int k; 6156 6157 for (k = 0; k < NumberOfActivePlanes; ++k) { 6158 if (VRatio[k] <= 1) { 6159 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6160 } else { 6161 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6162 } 6163 6164 if (BytePerPixelC[k] == 0) { 6165 DisplayPipeLineDeliveryTimeChroma[k] = 0; 6166 } else { 6167 if (VRatioChroma[k] <= 1) { 6168 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6169 } else { 6170 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6171 } 6172 } 6173 6174 if (VRatioPrefetchY[k] <= 1) { 6175 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6176 } else { 6177 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6178 } 6179 6180 if (BytePerPixelC[k] == 0) { 6181 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 6182 } else { 6183 if (VRatioPrefetchC[k] <= 1) { 6184 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6185 } else { 6186 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6187 } 6188 } 6189 } 6190 6191 for (k = 0; k < NumberOfActivePlanes; ++k) { 6192 if (SourceScan[k] != dm_vert) { 6193 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 6194 } else { 6195 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 6196 } 6197 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 6198 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 6199 if (BytePerPixelC[k] == 0) { 6200 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 6201 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 6202 } else { 6203 if (SourceScan[k] != dm_vert) { 6204 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 6205 } else { 6206 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 6207 } 6208 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 6209 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 6210 } 6211 #ifdef __DML_VBA_DEBUG__ 6212 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 6213 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 6214 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 6215 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 6216 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 6217 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 6218 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 6219 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 6220 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 6221 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 6222 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 6223 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 6224 #endif 6225 } 6226 6227 for (k = 0; k < NumberOfActivePlanes; ++k) { 6228 int cursor_req_per_width; 6229 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); 6230 if (NumberOfCursors[k] > 0) { 6231 if (VRatio[k] <= 1) { 6232 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6233 } else { 6234 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6235 } 6236 if (VRatioPrefetchY[k] <= 1) { 6237 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6238 } else { 6239 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6240 } 6241 } else { 6242 CursorRequestDeliveryTime[k] = 0; 6243 CursorRequestDeliveryTimePrefetch[k] = 0; 6244 } 6245 #ifdef __DML_VBA_DEBUG__ 6246 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); 6247 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); 6248 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); 6249 #endif 6250 } 6251 } 6252 6253 static void CalculateMetaAndPTETimes( 6254 int NumberOfActivePlanes, 6255 bool GPUVMEnable, 6256 int MetaChunkSize, 6257 int MinMetaChunkSizeBytes, 6258 int HTotal[], 6259 double VRatio[], 6260 double VRatioChroma[], 6261 double DestinationLinesToRequestRowInVBlank[], 6262 double DestinationLinesToRequestRowInImmediateFlip[], 6263 bool DCCEnable[], 6264 double PixelClock[], 6265 int BytePerPixelY[], 6266 int BytePerPixelC[], 6267 enum scan_direction_class SourceScan[], 6268 int dpte_row_height[], 6269 int dpte_row_height_chroma[], 6270 int meta_row_width[], 6271 int meta_row_width_chroma[], 6272 int meta_row_height[], 6273 int meta_row_height_chroma[], 6274 int meta_req_width[], 6275 int meta_req_width_chroma[], 6276 int meta_req_height[], 6277 int meta_req_height_chroma[], 6278 int dpte_group_bytes[], 6279 int PTERequestSizeY[], 6280 int PTERequestSizeC[], 6281 int PixelPTEReqWidthY[], 6282 int PixelPTEReqHeightY[], 6283 int PixelPTEReqWidthC[], 6284 int PixelPTEReqHeightC[], 6285 int dpte_row_width_luma_ub[], 6286 int dpte_row_width_chroma_ub[], 6287 double DST_Y_PER_PTE_ROW_NOM_L[], 6288 double DST_Y_PER_PTE_ROW_NOM_C[], 6289 double DST_Y_PER_META_ROW_NOM_L[], 6290 double DST_Y_PER_META_ROW_NOM_C[], 6291 double TimePerMetaChunkNominal[], 6292 double TimePerChromaMetaChunkNominal[], 6293 double TimePerMetaChunkVBlank[], 6294 double TimePerChromaMetaChunkVBlank[], 6295 double TimePerMetaChunkFlip[], 6296 double TimePerChromaMetaChunkFlip[], 6297 double time_per_pte_group_nom_luma[], 6298 double time_per_pte_group_vblank_luma[], 6299 double time_per_pte_group_flip_luma[], 6300 double time_per_pte_group_nom_chroma[], 6301 double time_per_pte_group_vblank_chroma[], 6302 double time_per_pte_group_flip_chroma[]) 6303 { 6304 unsigned int meta_chunk_width; 6305 unsigned int min_meta_chunk_width; 6306 unsigned int meta_chunk_per_row_int; 6307 unsigned int meta_row_remainder; 6308 unsigned int meta_chunk_threshold; 6309 unsigned int meta_chunks_per_row_ub; 6310 unsigned int meta_chunk_width_chroma; 6311 unsigned int min_meta_chunk_width_chroma; 6312 unsigned int meta_chunk_per_row_int_chroma; 6313 unsigned int meta_row_remainder_chroma; 6314 unsigned int meta_chunk_threshold_chroma; 6315 unsigned int meta_chunks_per_row_ub_chroma; 6316 unsigned int dpte_group_width_luma; 6317 unsigned int dpte_groups_per_row_luma_ub; 6318 unsigned int dpte_group_width_chroma; 6319 unsigned int dpte_groups_per_row_chroma_ub; 6320 int k; 6321 6322 for (k = 0; k < NumberOfActivePlanes; ++k) { 6323 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 6324 if (BytePerPixelC[k] == 0) { 6325 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 6326 } else { 6327 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 6328 } 6329 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 6330 if (BytePerPixelC[k] == 0) { 6331 DST_Y_PER_META_ROW_NOM_C[k] = 0; 6332 } else { 6333 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 6334 } 6335 } 6336 6337 for (k = 0; k < NumberOfActivePlanes; ++k) { 6338 if (DCCEnable[k] == true) { 6339 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 6340 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 6341 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 6342 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 6343 if (SourceScan[k] != dm_vert) { 6344 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 6345 } else { 6346 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 6347 } 6348 if (meta_row_remainder <= meta_chunk_threshold) { 6349 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 6350 } else { 6351 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 6352 } 6353 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6354 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6355 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6356 if (BytePerPixelC[k] == 0) { 6357 TimePerChromaMetaChunkNominal[k] = 0; 6358 TimePerChromaMetaChunkVBlank[k] = 0; 6359 TimePerChromaMetaChunkFlip[k] = 0; 6360 } else { 6361 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6362 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6363 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; 6364 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 6365 if (SourceScan[k] != dm_vert) { 6366 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; 6367 } else { 6368 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; 6369 } 6370 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 6371 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 6372 } else { 6373 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 6374 } 6375 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6376 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6377 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6378 } 6379 } else { 6380 TimePerMetaChunkNominal[k] = 0; 6381 TimePerMetaChunkVBlank[k] = 0; 6382 TimePerMetaChunkFlip[k] = 0; 6383 TimePerChromaMetaChunkNominal[k] = 0; 6384 TimePerChromaMetaChunkVBlank[k] = 0; 6385 TimePerChromaMetaChunkFlip[k] = 0; 6386 } 6387 } 6388 6389 for (k = 0; k < NumberOfActivePlanes; ++k) { 6390 if (GPUVMEnable == true) { 6391 if (SourceScan[k] != dm_vert) { 6392 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 6393 } else { 6394 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 6395 } 6396 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); 6397 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6398 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6399 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6400 if (BytePerPixelC[k] == 0) { 6401 time_per_pte_group_nom_chroma[k] = 0; 6402 time_per_pte_group_vblank_chroma[k] = 0; 6403 time_per_pte_group_flip_chroma[k] = 0; 6404 } else { 6405 if (SourceScan[k] != dm_vert) { 6406 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 6407 } else { 6408 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 6409 } 6410 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); 6411 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6412 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6413 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6414 } 6415 } else { 6416 time_per_pte_group_nom_luma[k] = 0; 6417 time_per_pte_group_vblank_luma[k] = 0; 6418 time_per_pte_group_flip_luma[k] = 0; 6419 time_per_pte_group_nom_chroma[k] = 0; 6420 time_per_pte_group_vblank_chroma[k] = 0; 6421 time_per_pte_group_flip_chroma[k] = 0; 6422 } 6423 } 6424 } 6425 6426 static void CalculateVMGroupAndRequestTimes( 6427 unsigned int NumberOfActivePlanes, 6428 bool GPUVMEnable, 6429 unsigned int GPUVMMaxPageTableLevels, 6430 unsigned int HTotal[], 6431 int BytePerPixelC[], 6432 double DestinationLinesToRequestVMInVBlank[], 6433 double DestinationLinesToRequestVMInImmediateFlip[], 6434 bool DCCEnable[], 6435 double PixelClock[], 6436 int dpte_row_width_luma_ub[], 6437 int dpte_row_width_chroma_ub[], 6438 int vm_group_bytes[], 6439 unsigned int dpde0_bytes_per_frame_ub_l[], 6440 unsigned int dpde0_bytes_per_frame_ub_c[], 6441 int meta_pte_bytes_per_frame_ub_l[], 6442 int meta_pte_bytes_per_frame_ub_c[], 6443 double TimePerVMGroupVBlank[], 6444 double TimePerVMGroupFlip[], 6445 double TimePerVMRequestVBlank[], 6446 double TimePerVMRequestFlip[]) 6447 { 6448 int num_group_per_lower_vm_stage; 6449 int num_req_per_lower_vm_stage; 6450 int k; 6451 6452 for (k = 0; k < NumberOfActivePlanes; ++k) { 6453 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 6454 if (DCCEnable[k] == false) { 6455 if (BytePerPixelC[k] > 0) { 6456 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6457 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6458 } else { 6459 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6460 } 6461 } else { 6462 if (GPUVMMaxPageTableLevels == 1) { 6463 if (BytePerPixelC[k] > 0) { 6464 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6465 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6466 } else { 6467 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6468 } 6469 } else { 6470 if (BytePerPixelC[k] > 0) { 6471 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6472 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6473 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6474 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6475 } else { 6476 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6477 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6478 } 6479 } 6480 } 6481 6482 if (DCCEnable[k] == false) { 6483 if (BytePerPixelC[k] > 0) { 6484 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6485 } else { 6486 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 6487 } 6488 } else { 6489 if (GPUVMMaxPageTableLevels == 1) { 6490 if (BytePerPixelC[k] > 0) { 6491 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6492 } else { 6493 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6494 } 6495 } else { 6496 if (BytePerPixelC[k] > 0) { 6497 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 6498 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6499 } else { 6500 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 6501 } 6502 } 6503 } 6504 6505 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6506 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6507 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6508 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6509 6510 if (GPUVMMaxPageTableLevels > 2) { 6511 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6512 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6513 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6514 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6515 } 6516 6517 } else { 6518 TimePerVMGroupVBlank[k] = 0; 6519 TimePerVMGroupFlip[k] = 0; 6520 TimePerVMRequestVBlank[k] = 0; 6521 TimePerVMRequestFlip[k] = 0; 6522 } 6523 } 6524 } 6525 6526 static void CalculateStutterEfficiency( 6527 struct display_mode_lib *mode_lib, 6528 int CompressedBufferSizeInkByte, 6529 bool UnboundedRequestEnabled, 6530 int ConfigReturnBufferSizeInKByte, 6531 int MetaFIFOSizeInKEntries, 6532 int ZeroSizeBufferEntries, 6533 int NumberOfActivePlanes, 6534 int ROBBufferSizeInKByte, 6535 double TotalDataReadBandwidth, 6536 double DCFCLK, 6537 double ReturnBW, 6538 double COMPBUF_RESERVED_SPACE_64B, 6539 double COMPBUF_RESERVED_SPACE_ZS, 6540 double SRExitTime, 6541 double SRExitZ8Time, 6542 bool SynchronizedVBlank, 6543 double Z8StutterEnterPlusExitWatermark, 6544 double StutterEnterPlusExitWatermark, 6545 bool ProgressiveToInterlaceUnitInOPP, 6546 bool Interlace[], 6547 double MinTTUVBlank[], 6548 int DPPPerPlane[], 6549 unsigned int DETBufferSizeY[], 6550 int BytePerPixelY[], 6551 double BytePerPixelDETY[], 6552 double SwathWidthY[], 6553 int SwathHeightY[], 6554 int SwathHeightC[], 6555 double NetDCCRateLuma[], 6556 double NetDCCRateChroma[], 6557 double DCCFractionOfZeroSizeRequestsLuma[], 6558 double DCCFractionOfZeroSizeRequestsChroma[], 6559 int HTotal[], 6560 int VTotal[], 6561 double PixelClock[], 6562 double VRatio[], 6563 enum scan_direction_class SourceScan[], 6564 int BlockHeight256BytesY[], 6565 int BlockWidth256BytesY[], 6566 int BlockHeight256BytesC[], 6567 int BlockWidth256BytesC[], 6568 int DCCYMaxUncompressedBlock[], 6569 int DCCCMaxUncompressedBlock[], 6570 int VActive[], 6571 bool DCCEnable[], 6572 bool WritebackEnable[], 6573 double ReadBandwidthPlaneLuma[], 6574 double ReadBandwidthPlaneChroma[], 6575 double meta_row_bw[], 6576 double dpte_row_bw[], 6577 double *StutterEfficiencyNotIncludingVBlank, 6578 double *StutterEfficiency, 6579 int *NumberOfStutterBurstsPerFrame, 6580 double *Z8StutterEfficiencyNotIncludingVBlank, 6581 double *Z8StutterEfficiency, 6582 int *Z8NumberOfStutterBurstsPerFrame, 6583 double *StutterPeriod) 6584 { 6585 struct vba_vars_st *v = &mode_lib->vba; 6586 6587 double DETBufferingTimeY; 6588 double SwathWidthYCriticalPlane = 0; 6589 double VActiveTimeCriticalPlane = 0; 6590 double FrameTimeCriticalPlane = 0; 6591 int BytePerPixelYCriticalPlane = 0; 6592 double LinesToFinishSwathTransferStutterCriticalPlane = 0; 6593 double MinTTUVBlankCriticalPlane = 0; 6594 double TotalCompressedReadBandwidth; 6595 double TotalRowReadBandwidth; 6596 double AverageDCCCompressionRate; 6597 double EffectiveCompressedBufferSize; 6598 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 6599 double StutterBurstTime; 6600 int TotalActiveWriteback; 6601 double LinesInDETY; 6602 double LinesInDETYRoundedDownToSwath; 6603 double MaximumEffectiveCompressionLuma; 6604 double MaximumEffectiveCompressionChroma; 6605 double TotalZeroSizeRequestReadBandwidth; 6606 double TotalZeroSizeCompressedReadBandwidth; 6607 double AverageDCCZeroSizeFraction; 6608 double AverageZeroSizeCompressionRate; 6609 int TotalNumberOfActiveOTG = 0; 6610 double LastStutterPeriod = 0.0; 6611 double LastZ8StutterPeriod = 0.0; 6612 int k; 6613 6614 TotalZeroSizeRequestReadBandwidth = 0; 6615 TotalZeroSizeCompressedReadBandwidth = 0; 6616 TotalRowReadBandwidth = 0; 6617 TotalCompressedReadBandwidth = 0; 6618 6619 for (k = 0; k < NumberOfActivePlanes; ++k) { 6620 if (DCCEnable[k] == true) { 6621 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) 6622 || DCCYMaxUncompressedBlock[k] < 256) { 6623 MaximumEffectiveCompressionLuma = 2; 6624 } else { 6625 MaximumEffectiveCompressionLuma = 4; 6626 } 6627 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); 6628 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 6629 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6630 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; 6631 if (ReadBandwidthPlaneChroma[k] > 0) { 6632 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) 6633 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { 6634 MaximumEffectiveCompressionChroma = 2; 6635 } else { 6636 MaximumEffectiveCompressionChroma = 4; 6637 } 6638 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 6639 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); 6640 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; 6641 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6642 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; 6643 } 6644 } else { 6645 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 6646 } 6647 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); 6648 } 6649 6650 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 6651 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 6652 6653 #ifdef __DML_VBA_DEBUG__ 6654 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 6655 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 6656 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); 6657 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 6658 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 6659 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6660 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 6661 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 6662 #endif 6663 6664 if (AverageDCCZeroSizeFraction == 1) { 6665 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6666 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; 6667 } else if (AverageDCCZeroSizeFraction > 0) { 6668 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6669 EffectiveCompressedBufferSize = dml_min( 6670 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6671 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) 6672 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, 6673 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6674 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6675 dml_print( 6676 "DML::%s: min 2 = %f\n", 6677 __func__, 6678 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); 6679 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); 6680 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6681 } else { 6682 EffectiveCompressedBufferSize = dml_min( 6683 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6684 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; 6685 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6686 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 6687 } 6688 6689 #ifdef __DML_VBA_DEBUG__ 6690 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 6691 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 6692 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6693 #endif 6694 6695 *StutterPeriod = 0; 6696 for (k = 0; k < NumberOfActivePlanes; ++k) { 6697 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) 6698 / BytePerPixelDETY[k] / SwathWidthY[k]; 6699 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 6700 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; 6701 #ifdef __DML_VBA_DEBUG__ 6702 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); 6703 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 6704 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); 6705 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); 6706 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 6707 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); 6708 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); 6709 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); 6710 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6711 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); 6712 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 6713 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6714 #endif 6715 6716 if (k == 0 || DETBufferingTimeY < *StutterPeriod) { 6717 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 6718 6719 *StutterPeriod = DETBufferingTimeY; 6720 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; 6721 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; 6722 BytePerPixelYCriticalPlane = BytePerPixelY[k]; 6723 SwathWidthYCriticalPlane = SwathWidthY[k]; 6724 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); 6725 MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; 6726 6727 #ifdef __DML_VBA_DEBUG__ 6728 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6729 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); 6730 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); 6731 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6732 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); 6733 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); 6734 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); 6735 #endif 6736 } 6737 } 6738 6739 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); 6740 #ifdef __DML_VBA_DEBUG__ 6741 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 6742 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6743 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); 6744 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); 6745 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6746 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 6747 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6748 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 6749 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 6750 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 6751 #endif 6752 6753 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW 6754 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 6755 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 6756 #ifdef __DML_VBA_DEBUG__ 6757 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); 6758 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); 6759 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 6760 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 6761 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6762 #endif 6763 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6764 6765 dml_print( 6766 "DML::%s: Time to finish residue swath=%f\n", 6767 __func__, 6768 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6769 6770 TotalActiveWriteback = 0; 6771 for (k = 0; k < NumberOfActivePlanes; ++k) { 6772 if (WritebackEnable[k]) { 6773 TotalActiveWriteback = TotalActiveWriteback + 1; 6774 } 6775 } 6776 6777 if (TotalActiveWriteback == 0) { 6778 #ifdef __DML_VBA_DEBUG__ 6779 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 6780 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 6781 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 6782 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6783 #endif 6784 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 6785 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 6786 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6787 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6788 } else { 6789 *StutterEfficiencyNotIncludingVBlank = 0.; 6790 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 6791 *NumberOfStutterBurstsPerFrame = 0; 6792 *Z8NumberOfStutterBurstsPerFrame = 0; 6793 } 6794 #ifdef __DML_VBA_DEBUG__ 6795 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6796 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6797 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); 6798 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 6799 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6800 #endif 6801 6802 for (k = 0; k < NumberOfActivePlanes; ++k) { 6803 if (v->BlendingAndTiming[k] == k) { 6804 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6805 } 6806 } 6807 6808 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6809 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6810 6811 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { 6812 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane 6813 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6814 } else { 6815 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6816 } 6817 } else { 6818 *StutterEfficiency = 0; 6819 } 6820 6821 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6822 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6823 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { 6824 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane 6825 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6826 } else { 6827 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6828 } 6829 } else { 6830 *Z8StutterEfficiency = 0.; 6831 } 6832 6833 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6834 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6835 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6836 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6837 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6838 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6839 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6840 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6841 } 6842 6843 static void CalculateSwathAndDETConfiguration( 6844 bool ForceSingleDPP, 6845 int NumberOfActivePlanes, 6846 unsigned int DETBufferSizeInKByte, 6847 double MaximumSwathWidthLuma[], 6848 double MaximumSwathWidthChroma[], 6849 enum scan_direction_class SourceScan[], 6850 enum source_format_class SourcePixelFormat[], 6851 enum dm_swizzle_mode SurfaceTiling[], 6852 int ViewportWidth[], 6853 int ViewportHeight[], 6854 int SurfaceWidthY[], 6855 int SurfaceWidthC[], 6856 int SurfaceHeightY[], 6857 int SurfaceHeightC[], 6858 int Read256BytesBlockHeightY[], 6859 int Read256BytesBlockHeightC[], 6860 int Read256BytesBlockWidthY[], 6861 int Read256BytesBlockWidthC[], 6862 enum odm_combine_mode ODMCombineEnabled[], 6863 int BlendingAndTiming[], 6864 int BytePerPixY[], 6865 int BytePerPixC[], 6866 double BytePerPixDETY[], 6867 double BytePerPixDETC[], 6868 int HActive[], 6869 double HRatio[], 6870 double HRatioChroma[], 6871 int DPPPerPlane[], 6872 int swath_width_luma_ub[], 6873 int swath_width_chroma_ub[], 6874 double SwathWidth[], 6875 double SwathWidthChroma[], 6876 int SwathHeightY[], 6877 int SwathHeightC[], 6878 unsigned int DETBufferSizeY[], 6879 unsigned int DETBufferSizeC[], 6880 bool ViewportSizeSupportPerPlane[], 6881 bool *ViewportSizeSupport) 6882 { 6883 int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 6884 int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 6885 int MinimumSwathHeightY; 6886 int MinimumSwathHeightC; 6887 int RoundedUpMaxSwathSizeBytesY; 6888 int RoundedUpMaxSwathSizeBytesC; 6889 int RoundedUpMinSwathSizeBytesY; 6890 int RoundedUpMinSwathSizeBytesC; 6891 int RoundedUpSwathSizeBytesY; 6892 int RoundedUpSwathSizeBytesC; 6893 double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; 6894 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; 6895 int k; 6896 6897 CalculateSwathWidth( 6898 ForceSingleDPP, 6899 NumberOfActivePlanes, 6900 SourcePixelFormat, 6901 SourceScan, 6902 ViewportWidth, 6903 ViewportHeight, 6904 SurfaceWidthY, 6905 SurfaceWidthC, 6906 SurfaceHeightY, 6907 SurfaceHeightC, 6908 ODMCombineEnabled, 6909 BytePerPixY, 6910 BytePerPixC, 6911 Read256BytesBlockHeightY, 6912 Read256BytesBlockHeightC, 6913 Read256BytesBlockWidthY, 6914 Read256BytesBlockWidthC, 6915 BlendingAndTiming, 6916 HActive, 6917 HRatio, 6918 DPPPerPlane, 6919 SwathWidthSingleDPP, 6920 SwathWidthSingleDPPChroma, 6921 SwathWidth, 6922 SwathWidthChroma, 6923 MaximumSwathHeightY, 6924 MaximumSwathHeightC, 6925 swath_width_luma_ub, 6926 swath_width_chroma_ub); 6927 6928 *ViewportSizeSupport = true; 6929 for (k = 0; k < NumberOfActivePlanes; ++k) { 6930 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 6931 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { 6932 if (SurfaceTiling[k] == dm_sw_linear 6933 || (SourcePixelFormat[k] == dm_444_64 6934 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) 6935 && SourceScan[k] != dm_vert)) { 6936 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6937 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { 6938 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6939 } else { 6940 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6941 } 6942 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6943 } else { 6944 if (SurfaceTiling[k] == dm_sw_linear) { 6945 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6946 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6947 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { 6948 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6949 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6950 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { 6951 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6952 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6953 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { 6954 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6955 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6956 } else { 6957 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6958 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6959 } 6960 } 6961 6962 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 6963 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; 6964 if (SourcePixelFormat[k] == dm_420_10) { 6965 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); 6966 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); 6967 } 6968 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 6969 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; 6970 if (SourcePixelFormat[k] == dm_420_10) { 6971 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); 6972 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); 6973 } 6974 6975 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6976 SwathHeightY[k] = MaximumSwathHeightY[k]; 6977 SwathHeightC[k] = MaximumSwathHeightC[k]; 6978 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6979 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6980 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC 6981 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6982 SwathHeightY[k] = MinimumSwathHeightY; 6983 SwathHeightC[k] = MaximumSwathHeightC[k]; 6984 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6985 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6986 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC 6987 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6988 SwathHeightY[k] = MaximumSwathHeightY[k]; 6989 SwathHeightC[k] = MinimumSwathHeightC; 6990 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6991 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6992 } else { 6993 SwathHeightY[k] = MinimumSwathHeightY; 6994 SwathHeightC[k] = MinimumSwathHeightC; 6995 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6996 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6997 } 6998 { 6999 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7000 if (SwathHeightC[k] == 0) { 7001 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; 7002 DETBufferSizeC[k] = 0; 7003 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 7004 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; 7005 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; 7006 } else { 7007 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); 7008 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; 7009 } 7010 7011 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] 7012 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 7013 *ViewportSizeSupport = false; 7014 ViewportSizeSupportPerPlane[k] = false; 7015 } else { 7016 ViewportSizeSupportPerPlane[k] = true; 7017 } 7018 } 7019 } 7020 } 7021 7022 static void CalculateSwathWidth( 7023 bool ForceSingleDPP, 7024 int NumberOfActivePlanes, 7025 enum source_format_class SourcePixelFormat[], 7026 enum scan_direction_class SourceScan[], 7027 int ViewportWidth[], 7028 int ViewportHeight[], 7029 int SurfaceWidthY[], 7030 int SurfaceWidthC[], 7031 int SurfaceHeightY[], 7032 int SurfaceHeightC[], 7033 enum odm_combine_mode ODMCombineEnabled[], 7034 int BytePerPixY[], 7035 int BytePerPixC[], 7036 int Read256BytesBlockHeightY[], 7037 int Read256BytesBlockHeightC[], 7038 int Read256BytesBlockWidthY[], 7039 int Read256BytesBlockWidthC[], 7040 int BlendingAndTiming[], 7041 int HActive[], 7042 double HRatio[], 7043 int DPPPerPlane[], 7044 double SwathWidthSingleDPPY[], 7045 double SwathWidthSingleDPPC[], 7046 double SwathWidthY[], 7047 double SwathWidthC[], 7048 int MaximumSwathHeightY[], 7049 int MaximumSwathHeightC[], 7050 int swath_width_luma_ub[], 7051 int swath_width_chroma_ub[]) 7052 { 7053 enum odm_combine_mode MainPlaneODMCombine; 7054 int j, k; 7055 7056 #ifdef __DML_VBA_DEBUG__ 7057 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); 7058 #endif 7059 7060 for (k = 0; k < NumberOfActivePlanes; ++k) { 7061 if (SourceScan[k] != dm_vert) { 7062 SwathWidthSingleDPPY[k] = ViewportWidth[k]; 7063 } else { 7064 SwathWidthSingleDPPY[k] = ViewportHeight[k]; 7065 } 7066 7067 #ifdef __DML_VBA_DEBUG__ 7068 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 7069 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 7070 #endif 7071 7072 MainPlaneODMCombine = ODMCombineEnabled[k]; 7073 for (j = 0; j < NumberOfActivePlanes; ++j) { 7074 if (BlendingAndTiming[k] == j) { 7075 MainPlaneODMCombine = ODMCombineEnabled[j]; 7076 } 7077 } 7078 7079 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) { 7080 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); 7081 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) { 7082 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); 7083 } else if (DPPPerPlane[k] == 2) { 7084 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 7085 } else { 7086 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 7087 } 7088 7089 #ifdef __DML_VBA_DEBUG__ 7090 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); 7091 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); 7092 #endif 7093 7094 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { 7095 SwathWidthC[k] = SwathWidthY[k] / 2; 7096 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 7097 } else { 7098 SwathWidthC[k] = SwathWidthY[k]; 7099 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 7100 } 7101 7102 if (ForceSingleDPP == true) { 7103 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 7104 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 7105 } 7106 { 7107 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 7108 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 7109 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 7110 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 7111 7112 #ifdef __DML_VBA_DEBUG__ 7113 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 7114 #endif 7115 7116 if (SourceScan[k] != dm_vert) { 7117 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 7118 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 7119 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); 7120 if (BytePerPixC[k] > 0) { 7121 swath_width_chroma_ub[k] = dml_min( 7122 surface_width_ub_c, 7123 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); 7124 } else { 7125 swath_width_chroma_ub[k] = 0; 7126 } 7127 } else { 7128 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 7129 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 7130 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 7131 if (BytePerPixC[k] > 0) { 7132 swath_width_chroma_ub[k] = dml_min( 7133 surface_height_ub_c, 7134 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); 7135 } else { 7136 swath_width_chroma_ub[k] = 0; 7137 } 7138 } 7139 } 7140 } 7141 } 7142 7143 static double CalculateExtraLatency( 7144 int RoundTripPingLatencyCycles, 7145 int ReorderingBytes, 7146 double DCFCLK, 7147 int TotalNumberOfActiveDPP, 7148 int PixelChunkSizeInKByte, 7149 int TotalNumberOfDCCActiveDPP, 7150 int MetaChunkSize, 7151 double ReturnBW, 7152 bool GPUVMEnable, 7153 bool HostVMEnable, 7154 int NumberOfActivePlanes, 7155 int NumberOfDPP[], 7156 int dpte_group_bytes[], 7157 double HostVMInefficiencyFactor, 7158 double HostVMMinPageSize, 7159 int HostVMMaxNonCachedPageTableLevels) 7160 { 7161 double ExtraLatencyBytes; 7162 double ExtraLatency; 7163 7164 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7165 ReorderingBytes, 7166 TotalNumberOfActiveDPP, 7167 PixelChunkSizeInKByte, 7168 TotalNumberOfDCCActiveDPP, 7169 MetaChunkSize, 7170 GPUVMEnable, 7171 HostVMEnable, 7172 NumberOfActivePlanes, 7173 NumberOfDPP, 7174 dpte_group_bytes, 7175 HostVMInefficiencyFactor, 7176 HostVMMinPageSize, 7177 HostVMMaxNonCachedPageTableLevels); 7178 7179 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 7180 7181 #ifdef __DML_VBA_DEBUG__ 7182 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 7183 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 7184 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 7185 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 7186 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 7187 #endif 7188 7189 return ExtraLatency; 7190 } 7191 7192 static double CalculateExtraLatencyBytes( 7193 int ReorderingBytes, 7194 int TotalNumberOfActiveDPP, 7195 int PixelChunkSizeInKByte, 7196 int TotalNumberOfDCCActiveDPP, 7197 int MetaChunkSize, 7198 bool GPUVMEnable, 7199 bool HostVMEnable, 7200 int NumberOfActivePlanes, 7201 int NumberOfDPP[], 7202 int dpte_group_bytes[], 7203 double HostVMInefficiencyFactor, 7204 double HostVMMinPageSize, 7205 int HostVMMaxNonCachedPageTableLevels) 7206 { 7207 double ret; 7208 int HostVMDynamicLevels = 0, k; 7209 7210 if (GPUVMEnable == true && HostVMEnable == true) { 7211 if (HostVMMinPageSize < 2048) { 7212 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 7213 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 7214 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 7215 } else { 7216 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 7217 } 7218 } else { 7219 HostVMDynamicLevels = 0; 7220 } 7221 7222 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 7223 7224 if (GPUVMEnable == true) { 7225 for (k = 0; k < NumberOfActivePlanes; ++k) { 7226 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 7227 } 7228 } 7229 return ret; 7230 } 7231 7232 static double CalculateUrgentLatency( 7233 double UrgentLatencyPixelDataOnly, 7234 double UrgentLatencyPixelMixedWithVMData, 7235 double UrgentLatencyVMDataOnly, 7236 bool DoUrgentLatencyAdjustment, 7237 double UrgentLatencyAdjustmentFabricClockComponent, 7238 double UrgentLatencyAdjustmentFabricClockReference, 7239 double FabricClock) 7240 { 7241 double ret; 7242 7243 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 7244 if (DoUrgentLatencyAdjustment == true) { 7245 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 7246 } 7247 return ret; 7248 } 7249 7250 static void UseMinimumDCFCLK( 7251 struct display_mode_lib *mode_lib, 7252 int MaxInterDCNTileRepeaters, 7253 int MaxPrefetchMode, 7254 double FinalDRAMClockChangeLatency, 7255 double SREnterPlusExitTime, 7256 int ReturnBusWidth, 7257 int RoundTripPingLatencyCycles, 7258 int ReorderingBytes, 7259 int PixelChunkSizeInKByte, 7260 int MetaChunkSize, 7261 bool GPUVMEnable, 7262 int GPUVMMaxPageTableLevels, 7263 bool HostVMEnable, 7264 int NumberOfActivePlanes, 7265 double HostVMMinPageSize, 7266 int HostVMMaxNonCachedPageTableLevels, 7267 bool DynamicMetadataVMEnabled, 7268 enum immediate_flip_requirement ImmediateFlipRequirement, 7269 bool ProgressiveToInterlaceUnitInOPP, 7270 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, 7271 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, 7272 int VTotal[], 7273 int VActive[], 7274 int DynamicMetadataTransmittedBytes[], 7275 int DynamicMetadataLinesBeforeActiveRequired[], 7276 bool Interlace[], 7277 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX], 7278 double RequiredDISPCLK[][2], 7279 double UrgLatency[], 7280 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 7281 double ProjectedDCFCLKDeepSleep[][2], 7282 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 7283 double TotalVActivePixelBandwidth[][2], 7284 double TotalVActiveCursorBandwidth[][2], 7285 double TotalMetaRowBandwidth[][2], 7286 double TotalDPTERowBandwidth[][2], 7287 unsigned int TotalNumberOfActiveDPP[][2], 7288 unsigned int TotalNumberOfDCCActiveDPP[][2], 7289 int dpte_group_bytes[], 7290 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 7291 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 7292 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 7293 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 7294 int BytePerPixelY[], 7295 int BytePerPixelC[], 7296 int HTotal[], 7297 double PixelClock[], 7298 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 7299 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 7300 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 7301 bool DynamicMetadataEnable[], 7302 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX], 7303 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX], 7304 double ReadBandwidthLuma[], 7305 double ReadBandwidthChroma[], 7306 double DCFCLKPerState[], 7307 double DCFCLKState[][2]) 7308 { 7309 struct vba_vars_st *v = &mode_lib->vba; 7310 int dummy1, i, j, k; 7311 double NormalEfficiency, dummy2, dummy3; 7312 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 7313 7314 NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; 7315 for (i = 0; i < v->soc.num_states; ++i) { 7316 for (j = 0; j <= 1; ++j) { 7317 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 7318 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 7319 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; 7320 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 7321 double MinimumTWait; 7322 double NonDPTEBandwidth; 7323 double DPTEBandwidth; 7324 double DCFCLKRequiredForAverageBandwidth; 7325 double ExtraLatencyBytes; 7326 double ExtraLatencyCycles; 7327 double DCFCLKRequiredForPeakBandwidth; 7328 int NoOfDPPState[DC__NUM_DPP__MAX]; 7329 double MinimumTvmPlus2Tr0; 7330 7331 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 7332 for (k = 0; k < NumberOfActivePlanes; ++k) { 7333 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 7334 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]); 7335 } 7336 7337 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) { 7338 NoOfDPPState[k] = NoOfDPP[i][j][k]; 7339 } 7340 7341 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime); 7342 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j]; 7343 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ? 7344 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j]; 7345 DCFCLKRequiredForAverageBandwidth = dml_max3( 7346 ProjectedDCFCLKDeepSleep[i][j], 7347 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth 7348 / (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), 7349 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth); 7350 7351 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7352 ReorderingBytes, 7353 TotalNumberOfActiveDPP[i][j], 7354 PixelChunkSizeInKByte, 7355 TotalNumberOfDCCActiveDPP[i][j], 7356 MetaChunkSize, 7357 GPUVMEnable, 7358 HostVMEnable, 7359 NumberOfActivePlanes, 7360 NoOfDPPState, 7361 dpte_group_bytes, 7362 1, 7363 HostVMMinPageSize, 7364 HostVMMaxNonCachedPageTableLevels); 7365 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; 7366 for (k = 0; k < NumberOfActivePlanes; ++k) { 7367 double DCFCLKCyclesRequiredInPrefetch; 7368 double ExpectedPrefetchBWAcceleration; 7369 double PrefetchTime; 7370 7371 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] 7372 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth; 7373 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 7374 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) 7375 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth 7376 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; 7377 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k]; 7378 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) 7379 / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 7380 DynamicMetadataVMExtraLatency[k] = 7381 (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? 7382 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 7383 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait 7384 - UrgLatency[i] 7385 * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2) 7386 * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) 7387 - DynamicMetadataVMExtraLatency[k]; 7388 7389 if (PrefetchTime > 0) { 7390 double ExpectedVRatioPrefetch; 7391 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] 7392 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); 7393 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] 7394 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; 7395 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) { 7396 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] 7397 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth; 7398 } 7399 } else { 7400 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i]; 7401 } 7402 if (DynamicMetadataEnable[k] == true) { 7403 double TSetupPipe; 7404 double TdmbfPipe; 7405 double TdmsksPipe; 7406 double TdmecPipe; 7407 double AllowedTimeForUrgentExtraLatency; 7408 7409 CalculateVupdateAndDynamicMetadataParameters( 7410 MaxInterDCNTileRepeaters, 7411 RequiredDPPCLK[i][j][k], 7412 RequiredDISPCLK[i][j], 7413 ProjectedDCFCLKDeepSleep[i][j], 7414 PixelClock[k], 7415 HTotal[k], 7416 VTotal[k] - VActive[k], 7417 DynamicMetadataTransmittedBytes[k], 7418 DynamicMetadataLinesBeforeActiveRequired[k], 7419 Interlace[k], 7420 ProgressiveToInterlaceUnitInOPP, 7421 &TSetupPipe, 7422 &TdmbfPipe, 7423 &TdmecPipe, 7424 &TdmsksPipe, 7425 &dummy1, 7426 &dummy2, 7427 &dummy3); 7428 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe 7429 - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 7430 if (AllowedTimeForUrgentExtraLatency > 0) { 7431 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( 7432 DCFCLKRequiredForPeakBandwidthPerPlane[k], 7433 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 7434 } else { 7435 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i]; 7436 } 7437 } 7438 } 7439 DCFCLKRequiredForPeakBandwidth = 0; 7440 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) { 7441 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; 7442 } 7443 MinimumTvmPlus2Tr0 = UrgLatency[i] 7444 * (GPUVMEnable == true ? 7445 (HostVMEnable == true ? 7446 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 7447 0); 7448 for (k = 0; k < NumberOfActivePlanes; ++k) { 7449 double MaximumTvmPlus2Tr0PlusTsw; 7450 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 7451 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 7452 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; 7453 } else { 7454 DCFCLKRequiredForPeakBandwidth = dml_max3( 7455 DCFCLKRequiredForPeakBandwidth, 7456 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), 7457 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 7458 } 7459 } 7460 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 7461 } 7462 } 7463 } 7464 7465 static void CalculateUnboundedRequestAndCompressedBufferSize( 7466 unsigned int DETBufferSizeInKByte, 7467 int ConfigReturnBufferSizeInKByte, 7468 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 7469 int TotalActiveDPP, 7470 bool NoChromaPlanes, 7471 int MaxNumDPP, 7472 int CompressedBufferSegmentSizeInkByteFinal, 7473 enum output_encoder_class *Output, 7474 bool *UnboundedRequestEnabled, 7475 int *CompressedBufferSizeInkByte) 7476 { 7477 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7478 7479 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); 7480 *CompressedBufferSizeInkByte = ( 7481 *UnboundedRequestEnabled == true ? 7482 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : 7483 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); 7484 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 7485 7486 #ifdef __DML_VBA_DEBUG__ 7487 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 7488 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); 7489 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 7490 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 7491 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); 7492 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 7493 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 7494 #endif 7495 } 7496 7497 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) 7498 { 7499 bool ret_val = false; 7500 7501 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); 7502 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) { 7503 ret_val = false; 7504 } 7505 return (ret_val); 7506 } 7507 7508