1 /* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dc.h" 27 #include "dc_link.h" 28 #include "../display_mode_lib.h" 29 #include "display_mode_vba_31.h" 30 #include "../dml_inline_defs.h" 31 32 /* 33 * NOTE: 34 * This file is gcc-parsable HW gospel, coming straight from HW engineers. 35 * 36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 37 * ways. Unless there is something clearly wrong with it the code should 38 * remain as-is as it provides us with a guarantee from HW that it is correct. 39 */ 40 41 #define BPP_INVALID 0 42 #define BPP_BLENDED_PIPE 0xffffffff 43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184 44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096 45 46 // For DML-C changes that hasn't been propagated to VBA yet 47 //#define __DML_VBA_ALLOW_DELTA__ 48 49 // Move these to ip paramaters/constant 50 51 // At which vstartup the DML start to try if the mode can be supported 52 #define __DML_VBA_MIN_VSTARTUP__ 9 53 54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) 55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95) 56 57 // fudge factor for min dcfclk calclation 58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15 59 60 typedef struct { 61 double DPPCLK; 62 double DISPCLK; 63 double PixelClock; 64 double DCFCLKDeepSleep; 65 unsigned int DPPPerPlane; 66 bool ScalerEnabled; 67 enum scan_direction_class SourceScan; 68 unsigned int BlockWidth256BytesY; 69 unsigned int BlockHeight256BytesY; 70 unsigned int BlockWidth256BytesC; 71 unsigned int BlockHeight256BytesC; 72 unsigned int InterlaceEnable; 73 unsigned int NumberOfCursors; 74 unsigned int VBlank; 75 unsigned int HTotal; 76 unsigned int DCCEnable; 77 bool ODMCombineIsEnabled; 78 enum source_format_class SourcePixelFormat; 79 int BytePerPixelY; 80 int BytePerPixelC; 81 bool ProgressiveToInterlaceUnitInOPP; 82 } Pipe; 83 84 #define BPP_INVALID 0 85 #define BPP_BLENDED_PIPE 0xffffffff 86 87 static bool CalculateBytePerPixelAnd256BBlockSizes( 88 enum source_format_class SourcePixelFormat, 89 enum dm_swizzle_mode SurfaceTiling, 90 unsigned int *BytePerPixelY, 91 unsigned int *BytePerPixelC, 92 double *BytePerPixelDETY, 93 double *BytePerPixelDETC, 94 unsigned int *BlockHeight256BytesY, 95 unsigned int *BlockHeight256BytesC, 96 unsigned int *BlockWidth256BytesY, 97 unsigned int *BlockWidth256BytesC); 98 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 99 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); 100 static unsigned int dscceComputeDelay( 101 unsigned int bpc, 102 double BPP, 103 unsigned int sliceWidth, 104 unsigned int numSlices, 105 enum output_format_class pixelFormat, 106 enum output_encoder_class Output); 107 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); 108 static bool CalculatePrefetchSchedule( 109 struct display_mode_lib *mode_lib, 110 double HostVMInefficiencyFactor, 111 Pipe *myPipe, 112 unsigned int DSCDelay, 113 double DPPCLKDelaySubtotalPlusCNVCFormater, 114 double DPPCLKDelaySCL, 115 double DPPCLKDelaySCLLBOnly, 116 double DPPCLKDelayCNVCCursor, 117 double DISPCLKDelaySubtotal, 118 unsigned int DPP_RECOUT_WIDTH, 119 enum output_format_class OutputFormat, 120 unsigned int MaxInterDCNTileRepeaters, 121 unsigned int VStartup, 122 unsigned int MaxVStartup, 123 unsigned int GPUVMPageTableLevels, 124 bool GPUVMEnable, 125 bool HostVMEnable, 126 unsigned int HostVMMaxNonCachedPageTableLevels, 127 double HostVMMinPageSize, 128 bool DynamicMetadataEnable, 129 bool DynamicMetadataVMEnabled, 130 int DynamicMetadataLinesBeforeActiveRequired, 131 unsigned int DynamicMetadataTransmittedBytes, 132 double UrgentLatency, 133 double UrgentExtraLatency, 134 double TCalc, 135 unsigned int PDEAndMetaPTEBytesFrame, 136 unsigned int MetaRowByte, 137 unsigned int PixelPTEBytesPerRow, 138 double PrefetchSourceLinesY, 139 unsigned int SwathWidthY, 140 double VInitPreFillY, 141 unsigned int MaxNumSwathY, 142 double PrefetchSourceLinesC, 143 unsigned int SwathWidthC, 144 double VInitPreFillC, 145 unsigned int MaxNumSwathC, 146 int swath_width_luma_ub, 147 int swath_width_chroma_ub, 148 unsigned int SwathHeightY, 149 unsigned int SwathHeightC, 150 double TWait, 151 double *DSTXAfterScaler, 152 double *DSTYAfterScaler, 153 double *DestinationLinesForPrefetch, 154 double *PrefetchBandwidth, 155 double *DestinationLinesToRequestVMInVBlank, 156 double *DestinationLinesToRequestRowInVBlank, 157 double *VRatioPrefetchY, 158 double *VRatioPrefetchC, 159 double *RequiredPrefetchPixDataBWLuma, 160 double *RequiredPrefetchPixDataBWChroma, 161 bool *NotEnoughTimeForDynamicMetadata, 162 double *Tno_bw, 163 double *prefetch_vmrow_bw, 164 double *Tdmdl_vm, 165 double *Tdmdl, 166 double *TSetup, 167 int *VUpdateOffsetPix, 168 double *VUpdateWidthPix, 169 double *VReadyOffsetPix); 170 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 171 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 172 static void CalculateDCCConfiguration( 173 bool DCCEnabled, 174 bool DCCProgrammingAssumesScanDirectionUnknown, 175 enum source_format_class SourcePixelFormat, 176 unsigned int SurfaceWidthLuma, 177 unsigned int SurfaceWidthChroma, 178 unsigned int SurfaceHeightLuma, 179 unsigned int SurfaceHeightChroma, 180 double DETBufferSize, 181 unsigned int RequestHeight256ByteLuma, 182 unsigned int RequestHeight256ByteChroma, 183 enum dm_swizzle_mode TilingFormat, 184 unsigned int BytePerPixelY, 185 unsigned int BytePerPixelC, 186 double BytePerPixelDETY, 187 double BytePerPixelDETC, 188 enum scan_direction_class ScanOrientation, 189 unsigned int *MaxUncompressedBlockLuma, 190 unsigned int *MaxUncompressedBlockChroma, 191 unsigned int *MaxCompressedBlockLuma, 192 unsigned int *MaxCompressedBlockChroma, 193 unsigned int *IndependentBlockLuma, 194 unsigned int *IndependentBlockChroma); 195 static double CalculatePrefetchSourceLines( 196 struct display_mode_lib *mode_lib, 197 double VRatio, 198 double vtaps, 199 bool Interlace, 200 bool ProgressiveToInterlaceUnitInOPP, 201 unsigned int SwathHeight, 202 unsigned int ViewportYStart, 203 double *VInitPreFill, 204 unsigned int *MaxNumSwath); 205 static unsigned int CalculateVMAndRowBytes( 206 struct display_mode_lib *mode_lib, 207 bool DCCEnable, 208 unsigned int BlockHeight256Bytes, 209 unsigned int BlockWidth256Bytes, 210 enum source_format_class SourcePixelFormat, 211 unsigned int SurfaceTiling, 212 unsigned int BytePerPixel, 213 enum scan_direction_class ScanDirection, 214 unsigned int SwathWidth, 215 unsigned int ViewportHeight, 216 bool GPUVMEnable, 217 bool HostVMEnable, 218 unsigned int HostVMMaxNonCachedPageTableLevels, 219 unsigned int GPUVMMinPageSize, 220 unsigned int HostVMMinPageSize, 221 unsigned int PTEBufferSizeInRequests, 222 unsigned int Pitch, 223 unsigned int DCCMetaPitch, 224 unsigned int *MacroTileWidth, 225 unsigned int *MetaRowByte, 226 unsigned int *PixelPTEBytesPerRow, 227 bool *PTEBufferSizeNotExceeded, 228 int *dpte_row_width_ub, 229 unsigned int *dpte_row_height, 230 unsigned int *MetaRequestWidth, 231 unsigned int *MetaRequestHeight, 232 unsigned int *meta_row_width, 233 unsigned int *meta_row_height, 234 int *vm_group_bytes, 235 unsigned int *dpte_group_bytes, 236 unsigned int *PixelPTEReqWidth, 237 unsigned int *PixelPTEReqHeight, 238 unsigned int *PTERequestSize, 239 int *DPDE0BytesFrame, 240 int *MetaPTEBytesFrame); 241 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); 242 static void CalculateRowBandwidth( 243 bool GPUVMEnable, 244 enum source_format_class SourcePixelFormat, 245 double VRatio, 246 double VRatioChroma, 247 bool DCCEnable, 248 double LineTime, 249 unsigned int MetaRowByteLuma, 250 unsigned int MetaRowByteChroma, 251 unsigned int meta_row_height_luma, 252 unsigned int meta_row_height_chroma, 253 unsigned int PixelPTEBytesPerRowLuma, 254 unsigned int PixelPTEBytesPerRowChroma, 255 unsigned int dpte_row_height_luma, 256 unsigned int dpte_row_height_chroma, 257 double *meta_row_bw, 258 double *dpte_row_bw); 259 260 static void CalculateFlipSchedule( 261 struct display_mode_lib *mode_lib, 262 double HostVMInefficiencyFactor, 263 double UrgentExtraLatency, 264 double UrgentLatency, 265 unsigned int GPUVMMaxPageTableLevels, 266 bool HostVMEnable, 267 unsigned int HostVMMaxNonCachedPageTableLevels, 268 bool GPUVMEnable, 269 double HostVMMinPageSize, 270 double PDEAndMetaPTEBytesPerFrame, 271 double MetaRowBytes, 272 double DPTEBytesPerRow, 273 double BandwidthAvailableForImmediateFlip, 274 unsigned int TotImmediateFlipBytes, 275 enum source_format_class SourcePixelFormat, 276 double LineTime, 277 double VRatio, 278 double VRatioChroma, 279 double Tno_bw, 280 bool DCCEnable, 281 unsigned int dpte_row_height, 282 unsigned int meta_row_height, 283 unsigned int dpte_row_height_chroma, 284 unsigned int meta_row_height_chroma, 285 double *DestinationLinesToRequestVMInImmediateFlip, 286 double *DestinationLinesToRequestRowInImmediateFlip, 287 double *final_flip_bw, 288 bool *ImmediateFlipSupportedForPipe); 289 static double CalculateWriteBackDelay( 290 enum source_format_class WritebackPixelFormat, 291 double WritebackHRatio, 292 double WritebackVRatio, 293 unsigned int WritebackVTaps, 294 int WritebackDestinationWidth, 295 int WritebackDestinationHeight, 296 int WritebackSourceHeight, 297 unsigned int HTotal); 298 299 static void CalculateVupdateAndDynamicMetadataParameters( 300 int MaxInterDCNTileRepeaters, 301 double DPPCLK, 302 double DISPCLK, 303 double DCFClkDeepSleep, 304 double PixelClock, 305 int HTotal, 306 int VBlank, 307 int DynamicMetadataTransmittedBytes, 308 int DynamicMetadataLinesBeforeActiveRequired, 309 int InterlaceEnable, 310 bool ProgressiveToInterlaceUnitInOPP, 311 double *TSetup, 312 double *Tdmbf, 313 double *Tdmec, 314 double *Tdmsks, 315 int *VUpdateOffsetPix, 316 double *VUpdateWidthPix, 317 double *VReadyOffsetPix); 318 319 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 320 struct display_mode_lib *mode_lib, 321 unsigned int PrefetchMode, 322 unsigned int NumberOfActivePlanes, 323 unsigned int MaxLineBufferLines, 324 unsigned int LineBufferSize, 325 unsigned int WritebackInterfaceBufferSize, 326 double DCFCLK, 327 double ReturnBW, 328 bool SynchronizedVBlank, 329 unsigned int dpte_group_bytes[], 330 unsigned int MetaChunkSize, 331 double UrgentLatency, 332 double ExtraLatency, 333 double WritebackLatency, 334 double WritebackChunkSize, 335 double SOCCLK, 336 double DRAMClockChangeLatency, 337 double SRExitTime, 338 double SREnterPlusExitTime, 339 double SRExitZ8Time, 340 double SREnterPlusExitZ8Time, 341 double DCFCLKDeepSleep, 342 unsigned int DETBufferSizeY[], 343 unsigned int DETBufferSizeC[], 344 unsigned int SwathHeightY[], 345 unsigned int SwathHeightC[], 346 unsigned int LBBitPerPixel[], 347 double SwathWidthY[], 348 double SwathWidthC[], 349 double HRatio[], 350 double HRatioChroma[], 351 unsigned int vtaps[], 352 unsigned int VTAPsChroma[], 353 double VRatio[], 354 double VRatioChroma[], 355 unsigned int HTotal[], 356 double PixelClock[], 357 unsigned int BlendingAndTiming[], 358 unsigned int DPPPerPlane[], 359 double BytePerPixelDETY[], 360 double BytePerPixelDETC[], 361 double DSTXAfterScaler[], 362 double DSTYAfterScaler[], 363 bool WritebackEnable[], 364 enum source_format_class WritebackPixelFormat[], 365 double WritebackDestinationWidth[], 366 double WritebackDestinationHeight[], 367 double WritebackSourceHeight[], 368 bool UnboundedRequestEnabled, 369 int unsigned CompressedBufferSizeInkByte, 370 enum clock_change_support *DRAMClockChangeSupport, 371 double *UrgentWatermark, 372 double *WritebackUrgentWatermark, 373 double *DRAMClockChangeWatermark, 374 double *WritebackDRAMClockChangeWatermark, 375 double *StutterExitWatermark, 376 double *StutterEnterPlusExitWatermark, 377 double *Z8StutterExitWatermark, 378 double *Z8StutterEnterPlusExitWatermark, 379 double *MinActiveDRAMClockChangeLatencySupported); 380 381 static void CalculateDCFCLKDeepSleep( 382 struct display_mode_lib *mode_lib, 383 unsigned int NumberOfActivePlanes, 384 int BytePerPixelY[], 385 int BytePerPixelC[], 386 double VRatio[], 387 double VRatioChroma[], 388 double SwathWidthY[], 389 double SwathWidthC[], 390 unsigned int DPPPerPlane[], 391 double HRatio[], 392 double HRatioChroma[], 393 double PixelClock[], 394 double PSCL_THROUGHPUT[], 395 double PSCL_THROUGHPUT_CHROMA[], 396 double DPPCLK[], 397 double ReadBandwidthLuma[], 398 double ReadBandwidthChroma[], 399 int ReturnBusWidth, 400 double *DCFCLKDeepSleep); 401 402 static void CalculateUrgentBurstFactor( 403 int swath_width_luma_ub, 404 int swath_width_chroma_ub, 405 unsigned int SwathHeightY, 406 unsigned int SwathHeightC, 407 double LineTime, 408 double UrgentLatency, 409 double CursorBufferSize, 410 unsigned int CursorWidth, 411 unsigned int CursorBPP, 412 double VRatio, 413 double VRatioC, 414 double BytePerPixelInDETY, 415 double BytePerPixelInDETC, 416 double DETBufferSizeY, 417 double DETBufferSizeC, 418 double *UrgentBurstFactorCursor, 419 double *UrgentBurstFactorLuma, 420 double *UrgentBurstFactorChroma, 421 bool *NotEnoughUrgentLatencyHiding); 422 423 static void UseMinimumDCFCLK( 424 struct display_mode_lib *mode_lib, 425 int MaxInterDCNTileRepeaters, 426 int MaxPrefetchMode, 427 double FinalDRAMClockChangeLatency, 428 double SREnterPlusExitTime, 429 int ReturnBusWidth, 430 int RoundTripPingLatencyCycles, 431 int ReorderingBytes, 432 int PixelChunkSizeInKByte, 433 int MetaChunkSize, 434 bool GPUVMEnable, 435 int GPUVMMaxPageTableLevels, 436 bool HostVMEnable, 437 int NumberOfActivePlanes, 438 double HostVMMinPageSize, 439 int HostVMMaxNonCachedPageTableLevels, 440 bool DynamicMetadataVMEnabled, 441 enum immediate_flip_requirement ImmediateFlipRequirement, 442 bool ProgressiveToInterlaceUnitInOPP, 443 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, 444 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, 445 int VTotal[], 446 int VActive[], 447 int DynamicMetadataTransmittedBytes[], 448 int DynamicMetadataLinesBeforeActiveRequired[], 449 bool Interlace[], 450 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX], 451 double RequiredDISPCLK[][2], 452 double UrgLatency[], 453 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 454 double ProjectedDCFCLKDeepSleep[][2], 455 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 456 double TotalVActivePixelBandwidth[][2], 457 double TotalVActiveCursorBandwidth[][2], 458 double TotalMetaRowBandwidth[][2], 459 double TotalDPTERowBandwidth[][2], 460 unsigned int TotalNumberOfActiveDPP[][2], 461 unsigned int TotalNumberOfDCCActiveDPP[][2], 462 int dpte_group_bytes[], 463 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 464 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 465 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 466 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 467 int BytePerPixelY[], 468 int BytePerPixelC[], 469 int HTotal[], 470 double PixelClock[], 471 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 472 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 473 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 474 bool DynamicMetadataEnable[], 475 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX], 476 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX], 477 double ReadBandwidthLuma[], 478 double ReadBandwidthChroma[], 479 double DCFCLKPerState[], 480 double DCFCLKState[][2]); 481 482 static void CalculatePixelDeliveryTimes( 483 unsigned int NumberOfActivePlanes, 484 double VRatio[], 485 double VRatioChroma[], 486 double VRatioPrefetchY[], 487 double VRatioPrefetchC[], 488 unsigned int swath_width_luma_ub[], 489 unsigned int swath_width_chroma_ub[], 490 unsigned int DPPPerPlane[], 491 double HRatio[], 492 double HRatioChroma[], 493 double PixelClock[], 494 double PSCL_THROUGHPUT[], 495 double PSCL_THROUGHPUT_CHROMA[], 496 double DPPCLK[], 497 int BytePerPixelC[], 498 enum scan_direction_class SourceScan[], 499 unsigned int NumberOfCursors[], 500 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 501 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 502 unsigned int BlockWidth256BytesY[], 503 unsigned int BlockHeight256BytesY[], 504 unsigned int BlockWidth256BytesC[], 505 unsigned int BlockHeight256BytesC[], 506 double DisplayPipeLineDeliveryTimeLuma[], 507 double DisplayPipeLineDeliveryTimeChroma[], 508 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 509 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 510 double DisplayPipeRequestDeliveryTimeLuma[], 511 double DisplayPipeRequestDeliveryTimeChroma[], 512 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 513 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 514 double CursorRequestDeliveryTime[], 515 double CursorRequestDeliveryTimePrefetch[]); 516 517 static void CalculateMetaAndPTETimes( 518 int NumberOfActivePlanes, 519 bool GPUVMEnable, 520 int MetaChunkSize, 521 int MinMetaChunkSizeBytes, 522 int HTotal[], 523 double VRatio[], 524 double VRatioChroma[], 525 double DestinationLinesToRequestRowInVBlank[], 526 double DestinationLinesToRequestRowInImmediateFlip[], 527 bool DCCEnable[], 528 double PixelClock[], 529 int BytePerPixelY[], 530 int BytePerPixelC[], 531 enum scan_direction_class SourceScan[], 532 int dpte_row_height[], 533 int dpte_row_height_chroma[], 534 int meta_row_width[], 535 int meta_row_width_chroma[], 536 int meta_row_height[], 537 int meta_row_height_chroma[], 538 int meta_req_width[], 539 int meta_req_width_chroma[], 540 int meta_req_height[], 541 int meta_req_height_chroma[], 542 int dpte_group_bytes[], 543 int PTERequestSizeY[], 544 int PTERequestSizeC[], 545 int PixelPTEReqWidthY[], 546 int PixelPTEReqHeightY[], 547 int PixelPTEReqWidthC[], 548 int PixelPTEReqHeightC[], 549 int dpte_row_width_luma_ub[], 550 int dpte_row_width_chroma_ub[], 551 double DST_Y_PER_PTE_ROW_NOM_L[], 552 double DST_Y_PER_PTE_ROW_NOM_C[], 553 double DST_Y_PER_META_ROW_NOM_L[], 554 double DST_Y_PER_META_ROW_NOM_C[], 555 double TimePerMetaChunkNominal[], 556 double TimePerChromaMetaChunkNominal[], 557 double TimePerMetaChunkVBlank[], 558 double TimePerChromaMetaChunkVBlank[], 559 double TimePerMetaChunkFlip[], 560 double TimePerChromaMetaChunkFlip[], 561 double time_per_pte_group_nom_luma[], 562 double time_per_pte_group_vblank_luma[], 563 double time_per_pte_group_flip_luma[], 564 double time_per_pte_group_nom_chroma[], 565 double time_per_pte_group_vblank_chroma[], 566 double time_per_pte_group_flip_chroma[]); 567 568 static void CalculateVMGroupAndRequestTimes( 569 unsigned int NumberOfActivePlanes, 570 bool GPUVMEnable, 571 unsigned int GPUVMMaxPageTableLevels, 572 unsigned int HTotal[], 573 int BytePerPixelC[], 574 double DestinationLinesToRequestVMInVBlank[], 575 double DestinationLinesToRequestVMInImmediateFlip[], 576 bool DCCEnable[], 577 double PixelClock[], 578 int dpte_row_width_luma_ub[], 579 int dpte_row_width_chroma_ub[], 580 int vm_group_bytes[], 581 unsigned int dpde0_bytes_per_frame_ub_l[], 582 unsigned int dpde0_bytes_per_frame_ub_c[], 583 int meta_pte_bytes_per_frame_ub_l[], 584 int meta_pte_bytes_per_frame_ub_c[], 585 double TimePerVMGroupVBlank[], 586 double TimePerVMGroupFlip[], 587 double TimePerVMRequestVBlank[], 588 double TimePerVMRequestFlip[]); 589 590 static void CalculateStutterEfficiency( 591 struct display_mode_lib *mode_lib, 592 int CompressedBufferSizeInkByte, 593 bool UnboundedRequestEnabled, 594 int ConfigReturnBufferSizeInKByte, 595 int MetaFIFOSizeInKEntries, 596 int ZeroSizeBufferEntries, 597 int NumberOfActivePlanes, 598 int ROBBufferSizeInKByte, 599 double TotalDataReadBandwidth, 600 double DCFCLK, 601 double ReturnBW, 602 double COMPBUF_RESERVED_SPACE_64B, 603 double COMPBUF_RESERVED_SPACE_ZS, 604 double SRExitTime, 605 double SRExitZ8Time, 606 bool SynchronizedVBlank, 607 double Z8StutterEnterPlusExitWatermark, 608 double StutterEnterPlusExitWatermark, 609 bool ProgressiveToInterlaceUnitInOPP, 610 bool Interlace[], 611 double MinTTUVBlank[], 612 int DPPPerPlane[], 613 unsigned int DETBufferSizeY[], 614 int BytePerPixelY[], 615 double BytePerPixelDETY[], 616 double SwathWidthY[], 617 int SwathHeightY[], 618 int SwathHeightC[], 619 double NetDCCRateLuma[], 620 double NetDCCRateChroma[], 621 double DCCFractionOfZeroSizeRequestsLuma[], 622 double DCCFractionOfZeroSizeRequestsChroma[], 623 int HTotal[], 624 int VTotal[], 625 double PixelClock[], 626 double VRatio[], 627 enum scan_direction_class SourceScan[], 628 int BlockHeight256BytesY[], 629 int BlockWidth256BytesY[], 630 int BlockHeight256BytesC[], 631 int BlockWidth256BytesC[], 632 int DCCYMaxUncompressedBlock[], 633 int DCCCMaxUncompressedBlock[], 634 int VActive[], 635 bool DCCEnable[], 636 bool WritebackEnable[], 637 double ReadBandwidthPlaneLuma[], 638 double ReadBandwidthPlaneChroma[], 639 double meta_row_bw[], 640 double dpte_row_bw[], 641 double *StutterEfficiencyNotIncludingVBlank, 642 double *StutterEfficiency, 643 int *NumberOfStutterBurstsPerFrame, 644 double *Z8StutterEfficiencyNotIncludingVBlank, 645 double *Z8StutterEfficiency, 646 int *Z8NumberOfStutterBurstsPerFrame, 647 double *StutterPeriod); 648 649 static void CalculateSwathAndDETConfiguration( 650 bool ForceSingleDPP, 651 int NumberOfActivePlanes, 652 unsigned int DETBufferSizeInKByte, 653 double MaximumSwathWidthLuma[], 654 double MaximumSwathWidthChroma[], 655 enum scan_direction_class SourceScan[], 656 enum source_format_class SourcePixelFormat[], 657 enum dm_swizzle_mode SurfaceTiling[], 658 int ViewportWidth[], 659 int ViewportHeight[], 660 int SurfaceWidthY[], 661 int SurfaceWidthC[], 662 int SurfaceHeightY[], 663 int SurfaceHeightC[], 664 int Read256BytesBlockHeightY[], 665 int Read256BytesBlockHeightC[], 666 int Read256BytesBlockWidthY[], 667 int Read256BytesBlockWidthC[], 668 enum odm_combine_mode ODMCombineEnabled[], 669 int BlendingAndTiming[], 670 int BytePerPixY[], 671 int BytePerPixC[], 672 double BytePerPixDETY[], 673 double BytePerPixDETC[], 674 int HActive[], 675 double HRatio[], 676 double HRatioChroma[], 677 int DPPPerPlane[], 678 int swath_width_luma_ub[], 679 int swath_width_chroma_ub[], 680 double SwathWidth[], 681 double SwathWidthChroma[], 682 int SwathHeightY[], 683 int SwathHeightC[], 684 unsigned int DETBufferSizeY[], 685 unsigned int DETBufferSizeC[], 686 bool ViewportSizeSupportPerPlane[], 687 bool *ViewportSizeSupport); 688 static void CalculateSwathWidth( 689 bool ForceSingleDPP, 690 int NumberOfActivePlanes, 691 enum source_format_class SourcePixelFormat[], 692 enum scan_direction_class SourceScan[], 693 int ViewportWidth[], 694 int ViewportHeight[], 695 int SurfaceWidthY[], 696 int SurfaceWidthC[], 697 int SurfaceHeightY[], 698 int SurfaceHeightC[], 699 enum odm_combine_mode ODMCombineEnabled[], 700 int BytePerPixY[], 701 int BytePerPixC[], 702 int Read256BytesBlockHeightY[], 703 int Read256BytesBlockHeightC[], 704 int Read256BytesBlockWidthY[], 705 int Read256BytesBlockWidthC[], 706 int BlendingAndTiming[], 707 int HActive[], 708 double HRatio[], 709 int DPPPerPlane[], 710 double SwathWidthSingleDPPY[], 711 double SwathWidthSingleDPPC[], 712 double SwathWidthY[], 713 double SwathWidthC[], 714 int MaximumSwathHeightY[], 715 int MaximumSwathHeightC[], 716 int swath_width_luma_ub[], 717 int swath_width_chroma_ub[]); 718 719 static double CalculateExtraLatency( 720 int RoundTripPingLatencyCycles, 721 int ReorderingBytes, 722 double DCFCLK, 723 int TotalNumberOfActiveDPP, 724 int PixelChunkSizeInKByte, 725 int TotalNumberOfDCCActiveDPP, 726 int MetaChunkSize, 727 double ReturnBW, 728 bool GPUVMEnable, 729 bool HostVMEnable, 730 int NumberOfActivePlanes, 731 int NumberOfDPP[], 732 int dpte_group_bytes[], 733 double HostVMInefficiencyFactor, 734 double HostVMMinPageSize, 735 int HostVMMaxNonCachedPageTableLevels); 736 737 static double CalculateExtraLatencyBytes( 738 int ReorderingBytes, 739 int TotalNumberOfActiveDPP, 740 int PixelChunkSizeInKByte, 741 int TotalNumberOfDCCActiveDPP, 742 int MetaChunkSize, 743 bool GPUVMEnable, 744 bool HostVMEnable, 745 int NumberOfActivePlanes, 746 int NumberOfDPP[], 747 int dpte_group_bytes[], 748 double HostVMInefficiencyFactor, 749 double HostVMMinPageSize, 750 int HostVMMaxNonCachedPageTableLevels); 751 752 static double CalculateUrgentLatency( 753 double UrgentLatencyPixelDataOnly, 754 double UrgentLatencyPixelMixedWithVMData, 755 double UrgentLatencyVMDataOnly, 756 bool DoUrgentLatencyAdjustment, 757 double UrgentLatencyAdjustmentFabricClockComponent, 758 double UrgentLatencyAdjustmentFabricClockReference, 759 double FabricClockSingle); 760 761 static void CalculateUnboundedRequestAndCompressedBufferSize( 762 unsigned int DETBufferSizeInKByte, 763 int ConfigReturnBufferSizeInKByte, 764 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 765 int TotalActiveDPP, 766 bool NoChromaPlanes, 767 int MaxNumDPP, 768 int CompressedBufferSegmentSizeInkByteFinal, 769 enum output_encoder_class *Output, 770 bool *UnboundedRequestEnabled, 771 int *CompressedBufferSizeInkByte); 772 773 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); 774 775 void dml31_recalculate(struct display_mode_lib *mode_lib) 776 { 777 ModeSupportAndSystemConfiguration(mode_lib); 778 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 779 DisplayPipeConfiguration(mode_lib); 780 #ifdef __DML_VBA_DEBUG__ 781 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); 782 #endif 783 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 784 } 785 786 static unsigned int dscceComputeDelay( 787 unsigned int bpc, 788 double BPP, 789 unsigned int sliceWidth, 790 unsigned int numSlices, 791 enum output_format_class pixelFormat, 792 enum output_encoder_class Output) 793 { 794 // valid bpc = source bits per component in the set of {8, 10, 12} 795 // valid bpp = increments of 1/16 of a bit 796 // min = 6/7/8 in N420/N422/444, respectively 797 // max = such that compression is 1:1 798 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 799 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 800 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 801 802 // fixed value 803 unsigned int rcModelSize = 8192; 804 805 // N422/N420 operate at 2 pixels per clock 806 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; 807 808 if (pixelFormat == dm_420) 809 pixelsPerClock = 2; 810 else if (pixelFormat == dm_444) 811 pixelsPerClock = 1; 812 else if (pixelFormat == dm_n422) 813 pixelsPerClock = 2; 814 // #all other modes operate at 1 pixel per clock 815 else 816 pixelsPerClock = 1; 817 818 //initial transmit delay as per PPS 819 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 820 821 //compute ssm delay 822 if (bpc == 8) 823 D = 81; 824 else if (bpc == 10) 825 D = 89; 826 else 827 D = 113; 828 829 //divide by pixel per cycle to compute slice width as seen by DSC 830 w = sliceWidth / pixelsPerClock; 831 832 //422 mode has an additional cycle of delay 833 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 834 s = 0; 835 else 836 s = 1; 837 838 //main calculation for the dscce 839 ix = initalXmitDelay + 45; 840 wx = (w + 2) / 3; 841 P = 3 * wx - w; 842 l0 = ix / w; 843 a = ix + P * l0; 844 ax = (a + 2) / 3 + D + 6 + 1; 845 L = (ax + wx - 1) / wx; 846 if ((ix % w) == 0 && P != 0) 847 lstall = 1; 848 else 849 lstall = 0; 850 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 851 852 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 853 pixels = Delay * 3 * pixelsPerClock; 854 return pixels; 855 } 856 857 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 858 { 859 unsigned int Delay = 0; 860 861 if (pixelFormat == dm_420) { 862 // sfr 863 Delay = Delay + 2; 864 // dsccif 865 Delay = Delay + 0; 866 // dscc - input deserializer 867 Delay = Delay + 3; 868 // dscc gets pixels every other cycle 869 Delay = Delay + 2; 870 // dscc - input cdc fifo 871 Delay = Delay + 12; 872 // dscc gets pixels every other cycle 873 Delay = Delay + 13; 874 // dscc - cdc uncertainty 875 Delay = Delay + 2; 876 // dscc - output cdc fifo 877 Delay = Delay + 7; 878 // dscc gets pixels every other cycle 879 Delay = Delay + 3; 880 // dscc - cdc uncertainty 881 Delay = Delay + 2; 882 // dscc - output serializer 883 Delay = Delay + 1; 884 // sft 885 Delay = Delay + 1; 886 } else if (pixelFormat == dm_n422) { 887 // sfr 888 Delay = Delay + 2; 889 // dsccif 890 Delay = Delay + 1; 891 // dscc - input deserializer 892 Delay = Delay + 5; 893 // dscc - input cdc fifo 894 Delay = Delay + 25; 895 // dscc - cdc uncertainty 896 Delay = Delay + 2; 897 // dscc - output cdc fifo 898 Delay = Delay + 10; 899 // dscc - cdc uncertainty 900 Delay = Delay + 2; 901 // dscc - output serializer 902 Delay = Delay + 1; 903 // sft 904 Delay = Delay + 1; 905 } else { 906 // sfr 907 Delay = Delay + 2; 908 // dsccif 909 Delay = Delay + 0; 910 // dscc - input deserializer 911 Delay = Delay + 3; 912 // dscc - input cdc fifo 913 Delay = Delay + 12; 914 // dscc - cdc uncertainty 915 Delay = Delay + 2; 916 // dscc - output cdc fifo 917 Delay = Delay + 7; 918 // dscc - output serializer 919 Delay = Delay + 1; 920 // dscc - cdc uncertainty 921 Delay = Delay + 2; 922 // sft 923 Delay = Delay + 1; 924 } 925 926 return Delay; 927 } 928 929 static bool CalculatePrefetchSchedule( 930 struct display_mode_lib *mode_lib, 931 double HostVMInefficiencyFactor, 932 Pipe *myPipe, 933 unsigned int DSCDelay, 934 double DPPCLKDelaySubtotalPlusCNVCFormater, 935 double DPPCLKDelaySCL, 936 double DPPCLKDelaySCLLBOnly, 937 double DPPCLKDelayCNVCCursor, 938 double DISPCLKDelaySubtotal, 939 unsigned int DPP_RECOUT_WIDTH, 940 enum output_format_class OutputFormat, 941 unsigned int MaxInterDCNTileRepeaters, 942 unsigned int VStartup, 943 unsigned int MaxVStartup, 944 unsigned int GPUVMPageTableLevels, 945 bool GPUVMEnable, 946 bool HostVMEnable, 947 unsigned int HostVMMaxNonCachedPageTableLevels, 948 double HostVMMinPageSize, 949 bool DynamicMetadataEnable, 950 bool DynamicMetadataVMEnabled, 951 int DynamicMetadataLinesBeforeActiveRequired, 952 unsigned int DynamicMetadataTransmittedBytes, 953 double UrgentLatency, 954 double UrgentExtraLatency, 955 double TCalc, 956 unsigned int PDEAndMetaPTEBytesFrame, 957 unsigned int MetaRowByte, 958 unsigned int PixelPTEBytesPerRow, 959 double PrefetchSourceLinesY, 960 unsigned int SwathWidthY, 961 double VInitPreFillY, 962 unsigned int MaxNumSwathY, 963 double PrefetchSourceLinesC, 964 unsigned int SwathWidthC, 965 double VInitPreFillC, 966 unsigned int MaxNumSwathC, 967 int swath_width_luma_ub, 968 int swath_width_chroma_ub, 969 unsigned int SwathHeightY, 970 unsigned int SwathHeightC, 971 double TWait, 972 double *DSTXAfterScaler, 973 double *DSTYAfterScaler, 974 double *DestinationLinesForPrefetch, 975 double *PrefetchBandwidth, 976 double *DestinationLinesToRequestVMInVBlank, 977 double *DestinationLinesToRequestRowInVBlank, 978 double *VRatioPrefetchY, 979 double *VRatioPrefetchC, 980 double *RequiredPrefetchPixDataBWLuma, 981 double *RequiredPrefetchPixDataBWChroma, 982 bool *NotEnoughTimeForDynamicMetadata, 983 double *Tno_bw, 984 double *prefetch_vmrow_bw, 985 double *Tdmdl_vm, 986 double *Tdmdl, 987 double *TSetup, 988 int *VUpdateOffsetPix, 989 double *VUpdateWidthPix, 990 double *VReadyOffsetPix) 991 { 992 bool MyError = false; 993 unsigned int DPPCycles, DISPCLKCycles; 994 double DSTTotalPixelsAfterScaler; 995 double LineTime; 996 double dst_y_prefetch_equ; 997 double Tsw_oto; 998 double prefetch_bw_oto; 999 double Tvm_oto; 1000 double Tr0_oto; 1001 double Tvm_oto_lines; 1002 double Tr0_oto_lines; 1003 double dst_y_prefetch_oto; 1004 double TimeForFetchingMetaPTE = 0; 1005 double TimeForFetchingRowInVBlank = 0; 1006 double LinesToRequestPrefetchPixelData = 0; 1007 unsigned int HostVMDynamicLevelsTrips; 1008 double trip_to_mem; 1009 double Tvm_trips; 1010 double Tr0_trips; 1011 double Tvm_trips_rounded; 1012 double Tr0_trips_rounded; 1013 double Lsw_oto; 1014 double Tpre_rounded; 1015 double prefetch_bw_equ; 1016 double Tvm_equ; 1017 double Tr0_equ; 1018 double Tdmbf; 1019 double Tdmec; 1020 double Tdmsks; 1021 double prefetch_sw_bytes; 1022 double bytes_pp; 1023 double dep_bytes; 1024 int max_vratio_pre = 4; 1025 double min_Lsw; 1026 double Tsw_est1 = 0; 1027 double Tsw_est3 = 0; 1028 1029 if (GPUVMEnable == true && HostVMEnable == true) { 1030 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 1031 } else { 1032 HostVMDynamicLevelsTrips = 0; 1033 } 1034 #ifdef __DML_VBA_DEBUG__ 1035 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); 1036 #endif 1037 CalculateVupdateAndDynamicMetadataParameters( 1038 MaxInterDCNTileRepeaters, 1039 myPipe->DPPCLK, 1040 myPipe->DISPCLK, 1041 myPipe->DCFCLKDeepSleep, 1042 myPipe->PixelClock, 1043 myPipe->HTotal, 1044 myPipe->VBlank, 1045 DynamicMetadataTransmittedBytes, 1046 DynamicMetadataLinesBeforeActiveRequired, 1047 myPipe->InterlaceEnable, 1048 myPipe->ProgressiveToInterlaceUnitInOPP, 1049 TSetup, 1050 &Tdmbf, 1051 &Tdmec, 1052 &Tdmsks, 1053 VUpdateOffsetPix, 1054 VUpdateWidthPix, 1055 VReadyOffsetPix); 1056 1057 LineTime = myPipe->HTotal / myPipe->PixelClock; 1058 trip_to_mem = UrgentLatency; 1059 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 1060 1061 #ifdef __DML_VBA_ALLOW_DELTA__ 1062 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 1063 #else 1064 if (DynamicMetadataVMEnabled == true) { 1065 #endif 1066 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 1067 } else { 1068 *Tdmdl = TWait + UrgentExtraLatency; 1069 } 1070 1071 #ifdef __DML_VBA_ALLOW_DELTA__ 1072 if (DynamicMetadataEnable == false) { 1073 *Tdmdl = 0.0; 1074 } 1075 #endif 1076 1077 if (DynamicMetadataEnable == true) { 1078 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 1079 *NotEnoughTimeForDynamicMetadata = true; 1080 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 1081 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 1082 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 1083 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); 1084 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl); 1085 } else { 1086 *NotEnoughTimeForDynamicMetadata = false; 1087 } 1088 } else { 1089 *NotEnoughTimeForDynamicMetadata = false; 1090 } 1091 1092 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 1093 1094 if (myPipe->ScalerEnabled) 1095 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 1096 else 1097 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 1098 1099 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 1100 1101 DISPCLKCycles = DISPCLKDelaySubtotal; 1102 1103 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 1104 return true; 1105 1106 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 1107 1108 #ifdef __DML_VBA_DEBUG__ 1109 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 1110 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 1111 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); 1112 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 1113 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); 1114 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 1115 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 1116 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); 1117 #endif 1118 1119 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 1120 1121 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 1122 *DSTYAfterScaler = 1; 1123 else 1124 *DSTYAfterScaler = 0; 1125 1126 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 1127 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 1128 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 1129 1130 #ifdef __DML_VBA_DEBUG__ 1131 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 1132 #endif 1133 1134 MyError = false; 1135 1136 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 1137 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 1138 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 1139 1140 #ifdef __DML_VBA_ALLOW_DELTA__ 1141 if (!myPipe->DCCEnable) { 1142 Tr0_trips = 0.0; 1143 Tr0_trips_rounded = 0.0; 1144 } 1145 #endif 1146 1147 if (!GPUVMEnable) { 1148 Tvm_trips = 0.0; 1149 Tvm_trips_rounded = 0.0; 1150 } 1151 1152 if (GPUVMEnable) { 1153 if (GPUVMPageTableLevels >= 3) { 1154 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 1155 } else { 1156 *Tno_bw = 0; 1157 } 1158 } else if (!myPipe->DCCEnable) { 1159 *Tno_bw = LineTime; 1160 } else { 1161 *Tno_bw = LineTime / 4; 1162 } 1163 1164 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) 1165 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 1166 else 1167 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 1168 1169 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 1170 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 1171 1172 min_Lsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre; 1173 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; 1174 Tsw_oto = Lsw_oto * LineTime; 1175 1176 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto; 1177 1178 #ifdef __DML_VBA_DEBUG__ 1179 dml_print("DML: HTotal: %d\n", myPipe->HTotal); 1180 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); 1181 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); 1182 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); 1183 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); 1184 dml_print("DML: Tsw_oto: %f\n", Tsw_oto); 1185 #endif 1186 1187 if (GPUVMEnable == true) 1188 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); 1189 else 1190 Tvm_oto = LineTime / 4.0; 1191 1192 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1193 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) 1194 LineTime - Tvm_oto, 1195 LineTime / 4); 1196 } else { 1197 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 1198 } 1199 1200 #ifdef __DML_VBA_DEBUG__ 1201 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 1202 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 1203 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); 1204 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1205 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1206 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1207 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 1208 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 1209 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 1210 #endif 1211 1212 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 1213 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 1214 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 1215 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1216 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 1217 Tpre_rounded = dst_y_prefetch_equ * LineTime; 1218 1219 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 1220 1221 if (prefetch_sw_bytes < dep_bytes) 1222 prefetch_sw_bytes = 2 * dep_bytes; 1223 1224 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); 1225 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); 1226 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); 1227 dml_print("DML: Lsw_oto: %f\n", Lsw_oto); 1228 dml_print("DML: LineTime: %f\n", LineTime); 1229 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); 1230 1231 dml_print("DML: LineTime: %f\n", LineTime); 1232 dml_print("DML: VStartup: %d\n", VStartup); 1233 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); 1234 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); 1235 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); 1236 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); 1237 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 1238 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 1239 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 1240 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); 1241 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); 1242 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); 1243 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler); 1244 1245 *PrefetchBandwidth = 0; 1246 *DestinationLinesToRequestVMInVBlank = 0; 1247 *DestinationLinesToRequestRowInVBlank = 0; 1248 *VRatioPrefetchY = 0; 1249 *VRatioPrefetchC = 0; 1250 *RequiredPrefetchPixDataBWLuma = 0; 1251 if (dst_y_prefetch_equ > 1) { 1252 double PrefetchBandwidth1; 1253 double PrefetchBandwidth2; 1254 double PrefetchBandwidth3; 1255 double PrefetchBandwidth4; 1256 1257 if (Tpre_rounded - *Tno_bw > 0) { 1258 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1259 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 1260 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 1261 } else { 1262 PrefetchBandwidth1 = 0; 1263 } 1264 1265 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 1266 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1267 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 1268 } 1269 1270 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1271 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1272 else 1273 PrefetchBandwidth2 = 0; 1274 1275 if (Tpre_rounded - Tvm_trips_rounded > 0) { 1276 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1277 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 1278 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 1279 } else { 1280 PrefetchBandwidth3 = 0; 1281 } 1282 1283 #ifdef __DML_VBA_DEBUG__ 1284 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 1285 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 1286 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 1287 #endif 1288 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { 1289 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1290 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 1291 } 1292 1293 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1294 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1295 else 1296 PrefetchBandwidth4 = 0; 1297 1298 { 1299 bool Case1OK; 1300 bool Case2OK; 1301 bool Case3OK; 1302 1303 if (PrefetchBandwidth1 > 0) { 1304 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded 1305 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1306 Case1OK = true; 1307 } else { 1308 Case1OK = false; 1309 } 1310 } else { 1311 Case1OK = false; 1312 } 1313 1314 if (PrefetchBandwidth2 > 0) { 1315 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded 1316 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1317 Case2OK = true; 1318 } else { 1319 Case2OK = false; 1320 } 1321 } else { 1322 Case2OK = false; 1323 } 1324 1325 if (PrefetchBandwidth3 > 0) { 1326 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded 1327 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1328 Case3OK = true; 1329 } else { 1330 Case3OK = false; 1331 } 1332 } else { 1333 Case3OK = false; 1334 } 1335 1336 if (Case1OK) { 1337 prefetch_bw_equ = PrefetchBandwidth1; 1338 } else if (Case2OK) { 1339 prefetch_bw_equ = PrefetchBandwidth2; 1340 } else if (Case3OK) { 1341 prefetch_bw_equ = PrefetchBandwidth3; 1342 } else { 1343 prefetch_bw_equ = PrefetchBandwidth4; 1344 } 1345 1346 #ifdef __DML_VBA_DEBUG__ 1347 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 1348 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 1349 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 1350 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 1351 #endif 1352 1353 if (prefetch_bw_equ > 0) { 1354 if (GPUVMEnable == true) { 1355 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1356 } else { 1357 Tvm_equ = LineTime / 4; 1358 } 1359 1360 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1361 Tr0_equ = dml_max4( 1362 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1363 Tr0_trips, 1364 (LineTime - Tvm_equ) / 2, 1365 LineTime / 4); 1366 } else { 1367 Tr0_equ = (LineTime - Tvm_equ) / 2; 1368 } 1369 } else { 1370 Tvm_equ = 0; 1371 Tr0_equ = 0; 1372 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 1373 } 1374 } 1375 1376 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 1377 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 1378 TimeForFetchingMetaPTE = Tvm_oto; 1379 TimeForFetchingRowInVBlank = Tr0_oto; 1380 *PrefetchBandwidth = prefetch_bw_oto; 1381 } else { 1382 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 1383 TimeForFetchingMetaPTE = Tvm_equ; 1384 TimeForFetchingRowInVBlank = Tr0_equ; 1385 *PrefetchBandwidth = prefetch_bw_equ; 1386 } 1387 1388 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 1389 1390 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 1391 1392 #ifdef __DML_VBA_ALLOW_DELTA__ 1393 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 1394 // See note above dated 5/30/2018 1395 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 1396 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? 1397 #else 1398 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 1399 #endif 1400 1401 #ifdef __DML_VBA_DEBUG__ 1402 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 1403 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1404 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 1405 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1406 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1407 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1408 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 1409 #endif 1410 1411 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { 1412 1413 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 1414 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1415 #ifdef __DML_VBA_DEBUG__ 1416 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1417 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 1418 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); 1419 #endif 1420 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 1421 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 1422 *VRatioPrefetchY = dml_max( 1423 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, 1424 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); 1425 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1426 } else { 1427 MyError = true; 1428 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1429 *VRatioPrefetchY = 0; 1430 } 1431 #ifdef __DML_VBA_DEBUG__ 1432 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1433 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1434 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 1435 #endif 1436 } 1437 1438 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 1439 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1440 1441 #ifdef __DML_VBA_DEBUG__ 1442 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1443 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 1444 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); 1445 #endif 1446 if ((SwathHeightC > 4)) { 1447 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 1448 *VRatioPrefetchC = dml_max( 1449 *VRatioPrefetchC, 1450 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); 1451 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1452 } else { 1453 MyError = true; 1454 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1455 *VRatioPrefetchC = 0; 1456 } 1457 #ifdef __DML_VBA_DEBUG__ 1458 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1459 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 1460 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 1461 #endif 1462 } 1463 1464 #ifdef __DML_VBA_DEBUG__ 1465 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 1466 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 1467 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1468 #endif 1469 1470 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; 1471 1472 #ifdef __DML_VBA_DEBUG__ 1473 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); 1474 #endif 1475 1476 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub 1477 / LineTime; 1478 } else { 1479 MyError = true; 1480 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1481 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); 1482 *VRatioPrefetchY = 0; 1483 *VRatioPrefetchC = 0; 1484 *RequiredPrefetchPixDataBWLuma = 0; 1485 *RequiredPrefetchPixDataBWChroma = 0; 1486 } 1487 1488 dml_print( 1489 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 1490 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 1491 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 1492 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1493 dml_print( 1494 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", 1495 (double) LinesToRequestPrefetchPixelData * LineTime); 1496 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 1497 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / 1498 (double) myPipe->HTotal)) * LineTime); 1499 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 1500 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", 1501 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank 1502 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 1503 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1504 1505 } else { 1506 MyError = true; 1507 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1508 } 1509 1510 { 1511 double prefetch_vm_bw; 1512 double prefetch_row_bw; 1513 1514 if (PDEAndMetaPTEBytesFrame == 0) { 1515 prefetch_vm_bw = 0; 1516 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1517 #ifdef __DML_VBA_DEBUG__ 1518 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1519 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1520 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1521 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1522 #endif 1523 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1524 #ifdef __DML_VBA_DEBUG__ 1525 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 1526 #endif 1527 } else { 1528 prefetch_vm_bw = 0; 1529 MyError = true; 1530 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1531 } 1532 1533 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1534 prefetch_row_bw = 0; 1535 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1536 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1537 1538 #ifdef __DML_VBA_DEBUG__ 1539 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1540 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1541 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1542 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 1543 #endif 1544 } else { 1545 prefetch_row_bw = 0; 1546 MyError = true; 1547 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1548 } 1549 1550 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1551 } 1552 1553 if (MyError) { 1554 *PrefetchBandwidth = 0; 1555 TimeForFetchingMetaPTE = 0; 1556 TimeForFetchingRowInVBlank = 0; 1557 *DestinationLinesToRequestVMInVBlank = 0; 1558 *DestinationLinesToRequestRowInVBlank = 0; 1559 *DestinationLinesForPrefetch = 0; 1560 LinesToRequestPrefetchPixelData = 0; 1561 *VRatioPrefetchY = 0; 1562 *VRatioPrefetchC = 0; 1563 *RequiredPrefetchPixDataBWLuma = 0; 1564 *RequiredPrefetchPixDataBWChroma = 0; 1565 } 1566 1567 return MyError; 1568 } 1569 1570 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1571 { 1572 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1573 } 1574 1575 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1576 { 1577 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); 1578 } 1579 1580 static void CalculateDCCConfiguration( 1581 bool DCCEnabled, 1582 bool DCCProgrammingAssumesScanDirectionUnknown, 1583 enum source_format_class SourcePixelFormat, 1584 unsigned int SurfaceWidthLuma, 1585 unsigned int SurfaceWidthChroma, 1586 unsigned int SurfaceHeightLuma, 1587 unsigned int SurfaceHeightChroma, 1588 double DETBufferSize, 1589 unsigned int RequestHeight256ByteLuma, 1590 unsigned int RequestHeight256ByteChroma, 1591 enum dm_swizzle_mode TilingFormat, 1592 unsigned int BytePerPixelY, 1593 unsigned int BytePerPixelC, 1594 double BytePerPixelDETY, 1595 double BytePerPixelDETC, 1596 enum scan_direction_class ScanOrientation, 1597 unsigned int *MaxUncompressedBlockLuma, 1598 unsigned int *MaxUncompressedBlockChroma, 1599 unsigned int *MaxCompressedBlockLuma, 1600 unsigned int *MaxCompressedBlockChroma, 1601 unsigned int *IndependentBlockLuma, 1602 unsigned int *IndependentBlockChroma) 1603 { 1604 int yuv420; 1605 int horz_div_l; 1606 int horz_div_c; 1607 int vert_div_l; 1608 int vert_div_c; 1609 1610 int swath_buf_size; 1611 double detile_buf_vp_horz_limit; 1612 double detile_buf_vp_vert_limit; 1613 1614 int MAS_vp_horz_limit; 1615 int MAS_vp_vert_limit; 1616 int max_vp_horz_width; 1617 int max_vp_vert_height; 1618 int eff_surf_width_l; 1619 int eff_surf_width_c; 1620 int eff_surf_height_l; 1621 int eff_surf_height_c; 1622 1623 int full_swath_bytes_horz_wc_l; 1624 int full_swath_bytes_horz_wc_c; 1625 int full_swath_bytes_vert_wc_l; 1626 int full_swath_bytes_vert_wc_c; 1627 int req128_horz_wc_l; 1628 int req128_horz_wc_c; 1629 int req128_vert_wc_l; 1630 int req128_vert_wc_c; 1631 int segment_order_horz_contiguous_luma; 1632 int segment_order_horz_contiguous_chroma; 1633 int segment_order_vert_contiguous_luma; 1634 int segment_order_vert_contiguous_chroma; 1635 1636 typedef enum { 1637 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA 1638 } RequestType; 1639 RequestType RequestLuma; 1640 RequestType RequestChroma; 1641 1642 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); 1643 horz_div_l = 1; 1644 horz_div_c = 1; 1645 vert_div_l = 1; 1646 vert_div_c = 1; 1647 1648 if (BytePerPixelY == 1) 1649 vert_div_l = 0; 1650 if (BytePerPixelC == 1) 1651 vert_div_c = 0; 1652 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1653 horz_div_l = 0; 1654 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1655 horz_div_c = 0; 1656 1657 if (BytePerPixelC == 0) { 1658 swath_buf_size = DETBufferSize / 2 - 2 * 256; 1659 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 1660 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 1661 } else { 1662 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; 1663 detile_buf_vp_horz_limit = (double) swath_buf_size 1664 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) 1665 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 1666 detile_buf_vp_vert_limit = (double) swath_buf_size 1667 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 1668 } 1669 1670 if (SourcePixelFormat == dm_420_10) { 1671 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 1672 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 1673 } 1674 1675 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 1676 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 1677 1678 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; 1679 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); 1680 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 1681 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 1682 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 1683 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 1684 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 1685 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 1686 1687 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 1688 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 1689 if (BytePerPixelC > 0) { 1690 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 1691 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 1692 } else { 1693 full_swath_bytes_horz_wc_c = 0; 1694 full_swath_bytes_vert_wc_c = 0; 1695 } 1696 1697 if (SourcePixelFormat == dm_420_10) { 1698 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); 1699 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); 1700 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); 1701 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); 1702 } 1703 1704 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1705 req128_horz_wc_l = 0; 1706 req128_horz_wc_c = 0; 1707 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { 1708 req128_horz_wc_l = 0; 1709 req128_horz_wc_c = 1; 1710 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1711 req128_horz_wc_l = 1; 1712 req128_horz_wc_c = 0; 1713 } else { 1714 req128_horz_wc_l = 1; 1715 req128_horz_wc_c = 1; 1716 } 1717 1718 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1719 req128_vert_wc_l = 0; 1720 req128_vert_wc_c = 0; 1721 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { 1722 req128_vert_wc_l = 0; 1723 req128_vert_wc_c = 1; 1724 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1725 req128_vert_wc_l = 1; 1726 req128_vert_wc_c = 0; 1727 } else { 1728 req128_vert_wc_l = 1; 1729 req128_vert_wc_c = 1; 1730 } 1731 1732 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1733 segment_order_horz_contiguous_luma = 0; 1734 } else { 1735 segment_order_horz_contiguous_luma = 1; 1736 } 1737 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1738 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1739 segment_order_vert_contiguous_luma = 0; 1740 } else { 1741 segment_order_vert_contiguous_luma = 1; 1742 } 1743 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1744 segment_order_horz_contiguous_chroma = 0; 1745 } else { 1746 segment_order_horz_contiguous_chroma = 1; 1747 } 1748 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1749 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1750 segment_order_vert_contiguous_chroma = 0; 1751 } else { 1752 segment_order_vert_contiguous_chroma = 1; 1753 } 1754 1755 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1756 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 1757 RequestLuma = REQ_256Bytes; 1758 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 1759 RequestLuma = REQ_128BytesNonContiguous; 1760 } else { 1761 RequestLuma = REQ_128BytesContiguous; 1762 } 1763 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 1764 RequestChroma = REQ_256Bytes; 1765 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 1766 RequestChroma = REQ_128BytesNonContiguous; 1767 } else { 1768 RequestChroma = REQ_128BytesContiguous; 1769 } 1770 } else if (ScanOrientation != dm_vert) { 1771 if (req128_horz_wc_l == 0) { 1772 RequestLuma = REQ_256Bytes; 1773 } else if (segment_order_horz_contiguous_luma == 0) { 1774 RequestLuma = REQ_128BytesNonContiguous; 1775 } else { 1776 RequestLuma = REQ_128BytesContiguous; 1777 } 1778 if (req128_horz_wc_c == 0) { 1779 RequestChroma = REQ_256Bytes; 1780 } else if (segment_order_horz_contiguous_chroma == 0) { 1781 RequestChroma = REQ_128BytesNonContiguous; 1782 } else { 1783 RequestChroma = REQ_128BytesContiguous; 1784 } 1785 } else { 1786 if (req128_vert_wc_l == 0) { 1787 RequestLuma = REQ_256Bytes; 1788 } else if (segment_order_vert_contiguous_luma == 0) { 1789 RequestLuma = REQ_128BytesNonContiguous; 1790 } else { 1791 RequestLuma = REQ_128BytesContiguous; 1792 } 1793 if (req128_vert_wc_c == 0) { 1794 RequestChroma = REQ_256Bytes; 1795 } else if (segment_order_vert_contiguous_chroma == 0) { 1796 RequestChroma = REQ_128BytesNonContiguous; 1797 } else { 1798 RequestChroma = REQ_128BytesContiguous; 1799 } 1800 } 1801 1802 if (RequestLuma == REQ_256Bytes) { 1803 *MaxUncompressedBlockLuma = 256; 1804 *MaxCompressedBlockLuma = 256; 1805 *IndependentBlockLuma = 0; 1806 } else if (RequestLuma == REQ_128BytesContiguous) { 1807 *MaxUncompressedBlockLuma = 256; 1808 *MaxCompressedBlockLuma = 128; 1809 *IndependentBlockLuma = 128; 1810 } else { 1811 *MaxUncompressedBlockLuma = 256; 1812 *MaxCompressedBlockLuma = 64; 1813 *IndependentBlockLuma = 64; 1814 } 1815 1816 if (RequestChroma == REQ_256Bytes) { 1817 *MaxUncompressedBlockChroma = 256; 1818 *MaxCompressedBlockChroma = 256; 1819 *IndependentBlockChroma = 0; 1820 } else if (RequestChroma == REQ_128BytesContiguous) { 1821 *MaxUncompressedBlockChroma = 256; 1822 *MaxCompressedBlockChroma = 128; 1823 *IndependentBlockChroma = 128; 1824 } else { 1825 *MaxUncompressedBlockChroma = 256; 1826 *MaxCompressedBlockChroma = 64; 1827 *IndependentBlockChroma = 64; 1828 } 1829 1830 if (DCCEnabled != true || BytePerPixelC == 0) { 1831 *MaxUncompressedBlockChroma = 0; 1832 *MaxCompressedBlockChroma = 0; 1833 *IndependentBlockChroma = 0; 1834 } 1835 1836 if (DCCEnabled != true) { 1837 *MaxUncompressedBlockLuma = 0; 1838 *MaxCompressedBlockLuma = 0; 1839 *IndependentBlockLuma = 0; 1840 } 1841 } 1842 1843 static double CalculatePrefetchSourceLines( 1844 struct display_mode_lib *mode_lib, 1845 double VRatio, 1846 double vtaps, 1847 bool Interlace, 1848 bool ProgressiveToInterlaceUnitInOPP, 1849 unsigned int SwathHeight, 1850 unsigned int ViewportYStart, 1851 double *VInitPreFill, 1852 unsigned int *MaxNumSwath) 1853 { 1854 struct vba_vars_st *v = &mode_lib->vba; 1855 unsigned int MaxPartialSwath; 1856 1857 if (ProgressiveToInterlaceUnitInOPP) 1858 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1859 else 1860 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1861 1862 if (!v->IgnoreViewportPositioning) { 1863 1864 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1865 1866 if (*VInitPreFill > 1.0) 1867 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1868 else 1869 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; 1870 MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1871 1872 } else { 1873 1874 if (ViewportYStart != 0) 1875 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1876 1877 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1878 1879 if (*VInitPreFill > 1.0) 1880 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1881 else 1882 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; 1883 } 1884 1885 #ifdef __DML_VBA_DEBUG__ 1886 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 1887 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); 1888 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); 1889 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); 1890 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); 1891 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 1892 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 1893 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 1894 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); 1895 #endif 1896 return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1897 } 1898 1899 static unsigned int CalculateVMAndRowBytes( 1900 struct display_mode_lib *mode_lib, 1901 bool DCCEnable, 1902 unsigned int BlockHeight256Bytes, 1903 unsigned int BlockWidth256Bytes, 1904 enum source_format_class SourcePixelFormat, 1905 unsigned int SurfaceTiling, 1906 unsigned int BytePerPixel, 1907 enum scan_direction_class ScanDirection, 1908 unsigned int SwathWidth, 1909 unsigned int ViewportHeight, 1910 bool GPUVMEnable, 1911 bool HostVMEnable, 1912 unsigned int HostVMMaxNonCachedPageTableLevels, 1913 unsigned int GPUVMMinPageSize, 1914 unsigned int HostVMMinPageSize, 1915 unsigned int PTEBufferSizeInRequests, 1916 unsigned int Pitch, 1917 unsigned int DCCMetaPitch, 1918 unsigned int *MacroTileWidth, 1919 unsigned int *MetaRowByte, 1920 unsigned int *PixelPTEBytesPerRow, 1921 bool *PTEBufferSizeNotExceeded, 1922 int *dpte_row_width_ub, 1923 unsigned int *dpte_row_height, 1924 unsigned int *MetaRequestWidth, 1925 unsigned int *MetaRequestHeight, 1926 unsigned int *meta_row_width, 1927 unsigned int *meta_row_height, 1928 int *vm_group_bytes, 1929 unsigned int *dpte_group_bytes, 1930 unsigned int *PixelPTEReqWidth, 1931 unsigned int *PixelPTEReqHeight, 1932 unsigned int *PTERequestSize, 1933 int *DPDE0BytesFrame, 1934 int *MetaPTEBytesFrame) 1935 { 1936 struct vba_vars_st *v = &mode_lib->vba; 1937 unsigned int MPDEBytesFrame; 1938 unsigned int DCCMetaSurfaceBytes; 1939 unsigned int MacroTileSizeBytes; 1940 unsigned int MacroTileHeight; 1941 unsigned int ExtraDPDEBytesFrame; 1942 unsigned int PDEAndMetaPTEBytesFrame; 1943 unsigned int PixelPTEReqHeightPTEs = 0; 1944 unsigned int HostVMDynamicLevels = 0; 1945 double FractionOfPTEReturnDrop; 1946 1947 if (GPUVMEnable == true && HostVMEnable == true) { 1948 if (HostVMMinPageSize < 2048) { 1949 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1950 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 1951 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 1952 } else { 1953 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 1954 } 1955 } 1956 1957 *MetaRequestHeight = 8 * BlockHeight256Bytes; 1958 *MetaRequestWidth = 8 * BlockWidth256Bytes; 1959 if (ScanDirection != dm_vert) { 1960 *meta_row_height = *MetaRequestHeight; 1961 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 1962 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1963 } else { 1964 *meta_row_height = *MetaRequestWidth; 1965 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 1966 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1967 } 1968 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; 1969 if (GPUVMEnable == true) { 1970 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; 1971 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); 1972 } else { 1973 *MetaPTEBytesFrame = 0; 1974 MPDEBytesFrame = 0; 1975 } 1976 1977 if (DCCEnable != true) { 1978 *MetaPTEBytesFrame = 0; 1979 MPDEBytesFrame = 0; 1980 *MetaRowByte = 0; 1981 } 1982 1983 if (SurfaceTiling == dm_sw_linear) { 1984 MacroTileSizeBytes = 256; 1985 MacroTileHeight = BlockHeight256Bytes; 1986 } else { 1987 MacroTileSizeBytes = 65536; 1988 MacroTileHeight = 16 * BlockHeight256Bytes; 1989 } 1990 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1991 1992 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { 1993 if (ScanDirection != dm_vert) { 1994 *DPDE0BytesFrame = 64 1995 * (dml_ceil( 1996 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1997 / (8 * 2097152), 1998 1) + 1); 1999 } else { 2000 *DPDE0BytesFrame = 64 2001 * (dml_ceil( 2002 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 2003 / (8 * 2097152), 2004 1) + 1); 2005 } 2006 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); 2007 } else { 2008 *DPDE0BytesFrame = 0; 2009 ExtraDPDEBytesFrame = 0; 2010 } 2011 2012 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 2013 2014 #ifdef __DML_VBA_DEBUG__ 2015 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 2016 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 2017 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 2018 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 2019 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2020 #endif 2021 2022 if (HostVMEnable == true) { 2023 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 2024 } 2025 #ifdef __DML_VBA_DEBUG__ 2026 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 2027 #endif 2028 2029 if (SurfaceTiling == dm_sw_linear) { 2030 PixelPTEReqHeightPTEs = 1; 2031 *PixelPTEReqHeight = 1; 2032 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 2033 *PTERequestSize = 64; 2034 FractionOfPTEReturnDrop = 0; 2035 } else if (MacroTileSizeBytes == 4096) { 2036 PixelPTEReqHeightPTEs = 1; 2037 *PixelPTEReqHeight = MacroTileHeight; 2038 *PixelPTEReqWidth = 8 * *MacroTileWidth; 2039 *PTERequestSize = 64; 2040 if (ScanDirection != dm_vert) 2041 FractionOfPTEReturnDrop = 0; 2042 else 2043 FractionOfPTEReturnDrop = 7 / 8; 2044 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 2045 PixelPTEReqHeightPTEs = 16; 2046 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2047 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2048 *PTERequestSize = 128; 2049 FractionOfPTEReturnDrop = 0; 2050 } else { 2051 PixelPTEReqHeightPTEs = 1; 2052 *PixelPTEReqHeight = MacroTileHeight; 2053 *PixelPTEReqWidth = 8 * *MacroTileWidth; 2054 *PTERequestSize = 64; 2055 FractionOfPTEReturnDrop = 0; 2056 } 2057 2058 if (SurfaceTiling == dm_sw_linear) { 2059 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2060 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2061 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2062 } else if (ScanDirection != dm_vert) { 2063 *dpte_row_height = *PixelPTEReqHeight; 2064 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2065 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2066 } else { 2067 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 2068 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 2069 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2070 } 2071 2072 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { 2073 *PTEBufferSizeNotExceeded = true; 2074 } else { 2075 *PTEBufferSizeNotExceeded = false; 2076 } 2077 2078 if (GPUVMEnable != true) { 2079 *PixelPTEBytesPerRow = 0; 2080 *PTEBufferSizeNotExceeded = true; 2081 } 2082 2083 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); 2084 2085 if (HostVMEnable == true) { 2086 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2087 } 2088 2089 if (HostVMEnable == true) { 2090 *vm_group_bytes = 512; 2091 *dpte_group_bytes = 512; 2092 } else if (GPUVMEnable == true) { 2093 *vm_group_bytes = 2048; 2094 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { 2095 *dpte_group_bytes = 512; 2096 } else { 2097 *dpte_group_bytes = 2048; 2098 } 2099 } else { 2100 *vm_group_bytes = 0; 2101 *dpte_group_bytes = 0; 2102 } 2103 return PDEAndMetaPTEBytesFrame; 2104 } 2105 2106 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) 2107 { 2108 struct vba_vars_st *v = &mode_lib->vba; 2109 unsigned int j, k; 2110 double HostVMInefficiencyFactor = 1.0; 2111 bool NoChromaPlanes = true; 2112 int ReorderBytes; 2113 double VMDataOnlyReturnBW; 2114 double MaxTotalRDBandwidth = 0; 2115 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; 2116 2117 v->WritebackDISPCLK = 0.0; 2118 v->DISPCLKWithRamping = 0; 2119 v->DISPCLKWithoutRamping = 0; 2120 v->GlobalDPPCLK = 0.0; 2121 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */ 2122 { 2123 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 2124 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], 2125 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); 2126 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; 2127 if (v->HostVMEnable != true) { 2128 v->ReturnBW = dml_min( 2129 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2130 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 2131 } else { 2132 v->ReturnBW = dml_min( 2133 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2134 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 2135 } 2136 } 2137 /* End DAL custom code */ 2138 2139 // DISPCLK and DPPCLK Calculation 2140 // 2141 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2142 if (v->WritebackEnable[k]) { 2143 v->WritebackDISPCLK = dml_max( 2144 v->WritebackDISPCLK, 2145 dml31_CalculateWriteBackDISPCLK( 2146 v->WritebackPixelFormat[k], 2147 v->PixelClock[k], 2148 v->WritebackHRatio[k], 2149 v->WritebackVRatio[k], 2150 v->WritebackHTaps[k], 2151 v->WritebackVTaps[k], 2152 v->WritebackSourceWidth[k], 2153 v->WritebackDestinationWidth[k], 2154 v->HTotal[k], 2155 v->WritebackLineBufferSize)); 2156 } 2157 } 2158 2159 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2160 if (v->HRatio[k] > 1) { 2161 v->PSCL_THROUGHPUT_LUMA[k] = dml_min( 2162 v->MaxDCHUBToPSCLThroughput, 2163 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); 2164 } else { 2165 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2166 } 2167 2168 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] 2169 * dml_max( 2170 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 2171 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); 2172 2173 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { 2174 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; 2175 } 2176 2177 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 2178 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { 2179 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 2180 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; 2181 } else { 2182 if (v->HRatioChroma[k] > 1) { 2183 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 2184 v->MaxDCHUBToPSCLThroughput, 2185 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 2186 } else { 2187 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2188 } 2189 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] 2190 * dml_max3( 2191 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 2192 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 2193 1.0); 2194 2195 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { 2196 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; 2197 } 2198 2199 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); 2200 } 2201 } 2202 2203 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2204 if (v->BlendingAndTiming[k] != k) 2205 continue; 2206 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { 2207 v->DISPCLKWithRamping = dml_max( 2208 v->DISPCLKWithRamping, 2209 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2210 * (1 + v->DISPCLKRampingMargin / 100)); 2211 v->DISPCLKWithoutRamping = dml_max( 2212 v->DISPCLKWithoutRamping, 2213 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2214 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2215 v->DISPCLKWithRamping = dml_max( 2216 v->DISPCLKWithRamping, 2217 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2218 * (1 + v->DISPCLKRampingMargin / 100)); 2219 v->DISPCLKWithoutRamping = dml_max( 2220 v->DISPCLKWithoutRamping, 2221 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2222 } else { 2223 v->DISPCLKWithRamping = dml_max( 2224 v->DISPCLKWithRamping, 2225 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); 2226 v->DISPCLKWithoutRamping = dml_max( 2227 v->DISPCLKWithoutRamping, 2228 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2229 } 2230 } 2231 2232 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); 2233 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); 2234 2235 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); 2236 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); 2237 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); 2238 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 2239 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, 2240 v->DISPCLKDPPCLKVCOSpeed); 2241 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2242 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; 2243 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2244 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; 2245 } else { 2246 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; 2247 } 2248 v->DISPCLK = v->DISPCLK_calculated; 2249 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); 2250 2251 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2252 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2253 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); 2254 } 2255 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); 2256 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2257 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); 2258 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); 2259 } 2260 2261 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2262 v->DPPCLK[k] = v->DPPCLK_calculated[k]; 2263 } 2264 2265 // Urgent and B P-State/DRAM Clock Change Watermark 2266 DTRACE(" dcfclk_mhz = %f", v->DCFCLK); 2267 DTRACE(" return_bus_bw = %f", v->ReturnBW); 2268 2269 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2270 CalculateBytePerPixelAnd256BBlockSizes( 2271 v->SourcePixelFormat[k], 2272 v->SurfaceTiling[k], 2273 &v->BytePerPixelY[k], 2274 &v->BytePerPixelC[k], 2275 &v->BytePerPixelDETY[k], 2276 &v->BytePerPixelDETC[k], 2277 &v->BlockHeight256BytesY[k], 2278 &v->BlockHeight256BytesC[k], 2279 &v->BlockWidth256BytesY[k], 2280 &v->BlockWidth256BytesC[k]); 2281 } 2282 2283 CalculateSwathWidth( 2284 false, 2285 v->NumberOfActivePlanes, 2286 v->SourcePixelFormat, 2287 v->SourceScan, 2288 v->ViewportWidth, 2289 v->ViewportHeight, 2290 v->SurfaceWidthY, 2291 v->SurfaceWidthC, 2292 v->SurfaceHeightY, 2293 v->SurfaceHeightC, 2294 v->ODMCombineEnabled, 2295 v->BytePerPixelY, 2296 v->BytePerPixelC, 2297 v->BlockHeight256BytesY, 2298 v->BlockHeight256BytesC, 2299 v->BlockWidth256BytesY, 2300 v->BlockWidth256BytesC, 2301 v->BlendingAndTiming, 2302 v->HActive, 2303 v->HRatio, 2304 v->DPPPerPlane, 2305 v->SwathWidthSingleDPPY, 2306 v->SwathWidthSingleDPPC, 2307 v->SwathWidthY, 2308 v->SwathWidthC, 2309 v->dummyinteger3, 2310 v->dummyinteger4, 2311 v->swath_width_luma_ub, 2312 v->swath_width_chroma_ub); 2313 2314 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2315 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) 2316 * v->VRatio[k]; 2317 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) 2318 * v->VRatioChroma[k]; 2319 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); 2320 } 2321 2322 // DCFCLK Deep Sleep 2323 CalculateDCFCLKDeepSleep( 2324 mode_lib, 2325 v->NumberOfActivePlanes, 2326 v->BytePerPixelY, 2327 v->BytePerPixelC, 2328 v->VRatio, 2329 v->VRatioChroma, 2330 v->SwathWidthY, 2331 v->SwathWidthC, 2332 v->DPPPerPlane, 2333 v->HRatio, 2334 v->HRatioChroma, 2335 v->PixelClock, 2336 v->PSCL_THROUGHPUT_LUMA, 2337 v->PSCL_THROUGHPUT_CHROMA, 2338 v->DPPCLK, 2339 v->ReadBandwidthPlaneLuma, 2340 v->ReadBandwidthPlaneChroma, 2341 v->ReturnBusWidth, 2342 &v->DCFCLKDeepSleep); 2343 2344 // DSCCLK 2345 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2346 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { 2347 v->DSCCLK_calculated[k] = 0.0; 2348 } else { 2349 if (v->OutputFormat[k] == dm_420) 2350 v->DSCFormatFactor = 2; 2351 else if (v->OutputFormat[k] == dm_444) 2352 v->DSCFormatFactor = 1; 2353 else if (v->OutputFormat[k] == dm_n422) 2354 v->DSCFormatFactor = 2; 2355 else 2356 v->DSCFormatFactor = 1; 2357 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) 2358 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor 2359 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2360 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) 2361 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor 2362 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2363 else 2364 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor 2365 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2366 } 2367 } 2368 2369 // DSC Delay 2370 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2371 double BPP = v->OutputBpp[k]; 2372 2373 if (v->DSCEnabled[k] && BPP != 0) { 2374 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { 2375 v->DSCDelay[k] = dscceComputeDelay( 2376 v->DSCInputBitPerComponent[k], 2377 BPP, 2378 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2379 v->NumberOfDSCSlices[k], 2380 v->OutputFormat[k], 2381 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 2382 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2383 v->DSCDelay[k] = 2 2384 * (dscceComputeDelay( 2385 v->DSCInputBitPerComponent[k], 2386 BPP, 2387 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2388 v->NumberOfDSCSlices[k] / 2.0, 2389 v->OutputFormat[k], 2390 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2391 } else { 2392 v->DSCDelay[k] = 4 2393 * (dscceComputeDelay( 2394 v->DSCInputBitPerComponent[k], 2395 BPP, 2396 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2397 v->NumberOfDSCSlices[k] / 4.0, 2398 v->OutputFormat[k], 2399 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2400 } 2401 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 2402 } else { 2403 v->DSCDelay[k] = 0; 2404 } 2405 } 2406 2407 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2408 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes 2409 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) 2410 v->DSCDelay[k] = v->DSCDelay[j]; 2411 2412 // Prefetch 2413 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2414 unsigned int PDEAndMetaPTEBytesFrameY; 2415 unsigned int PixelPTEBytesPerRowY; 2416 unsigned int MetaRowByteY; 2417 unsigned int MetaRowByteC; 2418 unsigned int PDEAndMetaPTEBytesFrameC; 2419 unsigned int PixelPTEBytesPerRowC; 2420 bool PTEBufferSizeNotExceededY; 2421 bool PTEBufferSizeNotExceededC; 2422 2423 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2424 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 2425 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { 2426 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; 2427 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 2428 } else { 2429 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 2430 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 2431 } 2432 2433 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( 2434 mode_lib, 2435 v->DCCEnable[k], 2436 v->BlockHeight256BytesC[k], 2437 v->BlockWidth256BytesC[k], 2438 v->SourcePixelFormat[k], 2439 v->SurfaceTiling[k], 2440 v->BytePerPixelC[k], 2441 v->SourceScan[k], 2442 v->SwathWidthC[k], 2443 v->ViewportHeightChroma[k], 2444 v->GPUVMEnable, 2445 v->HostVMEnable, 2446 v->HostVMMaxNonCachedPageTableLevels, 2447 v->GPUVMMinPageSize, 2448 v->HostVMMinPageSize, 2449 v->PTEBufferSizeInRequestsForChroma, 2450 v->PitchC[k], 2451 v->DCCMetaPitchC[k], 2452 &v->MacroTileWidthC[k], 2453 &MetaRowByteC, 2454 &PixelPTEBytesPerRowC, 2455 &PTEBufferSizeNotExceededC, 2456 &v->dpte_row_width_chroma_ub[k], 2457 &v->dpte_row_height_chroma[k], 2458 &v->meta_req_width_chroma[k], 2459 &v->meta_req_height_chroma[k], 2460 &v->meta_row_width_chroma[k], 2461 &v->meta_row_height_chroma[k], 2462 &v->dummyinteger1, 2463 &v->dummyinteger2, 2464 &v->PixelPTEReqWidthC[k], 2465 &v->PixelPTEReqHeightC[k], 2466 &v->PTERequestSizeC[k], 2467 &v->dpde0_bytes_per_frame_ub_c[k], 2468 &v->meta_pte_bytes_per_frame_ub_c[k]); 2469 2470 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2471 mode_lib, 2472 v->VRatioChroma[k], 2473 v->VTAPsChroma[k], 2474 v->Interlace[k], 2475 v->ProgressiveToInterlaceUnitInOPP, 2476 v->SwathHeightC[k], 2477 v->ViewportYStartC[k], 2478 &v->VInitPreFillC[k], 2479 &v->MaxNumSwathC[k]); 2480 } else { 2481 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 2482 v->PTEBufferSizeInRequestsForChroma = 0; 2483 PixelPTEBytesPerRowC = 0; 2484 PDEAndMetaPTEBytesFrameC = 0; 2485 MetaRowByteC = 0; 2486 v->MaxNumSwathC[k] = 0; 2487 v->PrefetchSourceLinesC[k] = 0; 2488 } 2489 2490 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 2491 mode_lib, 2492 v->DCCEnable[k], 2493 v->BlockHeight256BytesY[k], 2494 v->BlockWidth256BytesY[k], 2495 v->SourcePixelFormat[k], 2496 v->SurfaceTiling[k], 2497 v->BytePerPixelY[k], 2498 v->SourceScan[k], 2499 v->SwathWidthY[k], 2500 v->ViewportHeight[k], 2501 v->GPUVMEnable, 2502 v->HostVMEnable, 2503 v->HostVMMaxNonCachedPageTableLevels, 2504 v->GPUVMMinPageSize, 2505 v->HostVMMinPageSize, 2506 v->PTEBufferSizeInRequestsForLuma, 2507 v->PitchY[k], 2508 v->DCCMetaPitchY[k], 2509 &v->MacroTileWidthY[k], 2510 &MetaRowByteY, 2511 &PixelPTEBytesPerRowY, 2512 &PTEBufferSizeNotExceededY, 2513 &v->dpte_row_width_luma_ub[k], 2514 &v->dpte_row_height[k], 2515 &v->meta_req_width[k], 2516 &v->meta_req_height[k], 2517 &v->meta_row_width[k], 2518 &v->meta_row_height[k], 2519 &v->vm_group_bytes[k], 2520 &v->dpte_group_bytes[k], 2521 &v->PixelPTEReqWidthY[k], 2522 &v->PixelPTEReqHeightY[k], 2523 &v->PTERequestSizeY[k], 2524 &v->dpde0_bytes_per_frame_ub_l[k], 2525 &v->meta_pte_bytes_per_frame_ub_l[k]); 2526 2527 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 2528 mode_lib, 2529 v->VRatio[k], 2530 v->vtaps[k], 2531 v->Interlace[k], 2532 v->ProgressiveToInterlaceUnitInOPP, 2533 v->SwathHeightY[k], 2534 v->ViewportYStartY[k], 2535 &v->VInitPreFillY[k], 2536 &v->MaxNumSwathY[k]); 2537 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 2538 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2539 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 2540 2541 CalculateRowBandwidth( 2542 v->GPUVMEnable, 2543 v->SourcePixelFormat[k], 2544 v->VRatio[k], 2545 v->VRatioChroma[k], 2546 v->DCCEnable[k], 2547 v->HTotal[k] / v->PixelClock[k], 2548 MetaRowByteY, 2549 MetaRowByteC, 2550 v->meta_row_height[k], 2551 v->meta_row_height_chroma[k], 2552 PixelPTEBytesPerRowY, 2553 PixelPTEBytesPerRowC, 2554 v->dpte_row_height[k], 2555 v->dpte_row_height_chroma[k], 2556 &v->meta_row_bw[k], 2557 &v->dpte_row_bw[k]); 2558 } 2559 2560 v->TotalDCCActiveDPP = 0; 2561 v->TotalActiveDPP = 0; 2562 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2563 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; 2564 if (v->DCCEnable[k]) 2565 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; 2566 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2567 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 2568 NoChromaPlanes = false; 2569 } 2570 2571 ReorderBytes = v->NumberOfChannels 2572 * dml_max3( 2573 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2574 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2575 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 2576 2577 VMDataOnlyReturnBW = dml_min( 2578 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) 2579 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2580 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth 2581 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 2582 2583 #ifdef __DML_VBA_DEBUG__ 2584 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); 2585 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); 2586 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); 2587 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); 2588 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); 2589 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); 2590 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); 2591 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); 2592 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); 2593 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 2594 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); 2595 #endif 2596 2597 if (v->GPUVMEnable && v->HostVMEnable) 2598 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; 2599 2600 v->UrgentExtraLatency = CalculateExtraLatency( 2601 v->RoundTripPingLatencyCycles, 2602 ReorderBytes, 2603 v->DCFCLK, 2604 v->TotalActiveDPP, 2605 v->PixelChunkSizeInKByte, 2606 v->TotalDCCActiveDPP, 2607 v->MetaChunkSize, 2608 v->ReturnBW, 2609 v->GPUVMEnable, 2610 v->HostVMEnable, 2611 v->NumberOfActivePlanes, 2612 v->DPPPerPlane, 2613 v->dpte_group_bytes, 2614 HostVMInefficiencyFactor, 2615 v->HostVMMinPageSize, 2616 v->HostVMMaxNonCachedPageTableLevels); 2617 2618 v->TCalc = 24.0 / v->DCFCLKDeepSleep; 2619 2620 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2621 if (v->BlendingAndTiming[k] == k) { 2622 if (v->WritebackEnable[k] == true) { 2623 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency 2624 + CalculateWriteBackDelay( 2625 v->WritebackPixelFormat[k], 2626 v->WritebackHRatio[k], 2627 v->WritebackVRatio[k], 2628 v->WritebackVTaps[k], 2629 v->WritebackDestinationWidth[k], 2630 v->WritebackDestinationHeight[k], 2631 v->WritebackSourceHeight[k], 2632 v->HTotal[k]) / v->DISPCLK; 2633 } else 2634 v->WritebackDelay[v->VoltageLevel][k] = 0; 2635 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 2636 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { 2637 v->WritebackDelay[v->VoltageLevel][k] = dml_max( 2638 v->WritebackDelay[v->VoltageLevel][k], 2639 v->WritebackLatency 2640 + CalculateWriteBackDelay( 2641 v->WritebackPixelFormat[j], 2642 v->WritebackHRatio[j], 2643 v->WritebackVRatio[j], 2644 v->WritebackVTaps[j], 2645 v->WritebackDestinationWidth[j], 2646 v->WritebackDestinationHeight[j], 2647 v->WritebackSourceHeight[j], 2648 v->HTotal[k]) / v->DISPCLK); 2649 } 2650 } 2651 } 2652 } 2653 2654 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2655 for (j = 0; j < v->NumberOfActivePlanes; ++j) 2656 if (v->BlendingAndTiming[k] == j) 2657 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; 2658 2659 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2660 v->MaxVStartupLines[k] = 2661 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 2662 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 2663 v->VTotal[k] - v->VActive[k] 2664 - dml_max( 2665 1.0, 2666 dml_ceil( 2667 (double) v->WritebackDelay[v->VoltageLevel][k] 2668 / (v->HTotal[k] / v->PixelClock[k]), 2669 1)); 2670 if (v->MaxVStartupLines[k] > 1023) 2671 v->MaxVStartupLines[k] = 1023; 2672 2673 #ifdef __DML_VBA_DEBUG__ 2674 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 2675 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); 2676 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); 2677 #endif 2678 } 2679 2680 v->MaximumMaxVStartupLines = 0; 2681 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2682 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); 2683 2684 // VBA_DELTA 2685 // We don't really care to iterate between the various prefetch modes 2686 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); 2687 2688 v->UrgentLatency = CalculateUrgentLatency( 2689 v->UrgentLatencyPixelDataOnly, 2690 v->UrgentLatencyPixelMixedWithVMData, 2691 v->UrgentLatencyVMDataOnly, 2692 v->DoUrgentLatencyAdjustment, 2693 v->UrgentLatencyAdjustmentFabricClockComponent, 2694 v->UrgentLatencyAdjustmentFabricClockReference, 2695 v->FabricClock); 2696 2697 v->FractionOfUrgentBandwidth = 0.0; 2698 v->FractionOfUrgentBandwidthImmediateFlip = 0.0; 2699 2700 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; 2701 2702 do { 2703 double MaxTotalRDBandwidthNoUrgentBurst = 0.0; 2704 bool DestinationLineTimesForPrefetchLessThan2 = false; 2705 bool VRatioPrefetchMoreThan4 = false; 2706 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); 2707 MaxTotalRDBandwidth = 0; 2708 2709 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); 2710 2711 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2712 Pipe myPipe; 2713 2714 myPipe.DPPCLK = v->DPPCLK[k]; 2715 myPipe.DISPCLK = v->DISPCLK; 2716 myPipe.PixelClock = v->PixelClock[k]; 2717 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; 2718 myPipe.DPPPerPlane = v->DPPPerPlane[k]; 2719 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 2720 myPipe.SourceScan = v->SourceScan[k]; 2721 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; 2722 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; 2723 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; 2724 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; 2725 myPipe.InterlaceEnable = v->Interlace[k]; 2726 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 2727 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 2728 myPipe.HTotal = v->HTotal[k]; 2729 myPipe.DCCEnable = v->DCCEnable[k]; 2730 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 2731 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; 2732 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 2733 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 2734 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 2735 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 2736 v->ErrorResult[k] = CalculatePrefetchSchedule( 2737 mode_lib, 2738 HostVMInefficiencyFactor, 2739 &myPipe, 2740 v->DSCDelay[k], 2741 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 2742 v->DPPCLKDelaySCL, 2743 v->DPPCLKDelaySCLLBOnly, 2744 v->DPPCLKDelayCNVCCursor, 2745 v->DISPCLKDelaySubtotal, 2746 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2747 v->OutputFormat[k], 2748 v->MaxInterDCNTileRepeaters, 2749 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2750 v->MaxVStartupLines[k], 2751 v->GPUVMMaxPageTableLevels, 2752 v->GPUVMEnable, 2753 v->HostVMEnable, 2754 v->HostVMMaxNonCachedPageTableLevels, 2755 v->HostVMMinPageSize, 2756 v->DynamicMetadataEnable[k], 2757 v->DynamicMetadataVMEnabled, 2758 v->DynamicMetadataLinesBeforeActiveRequired[k], 2759 v->DynamicMetadataTransmittedBytes[k], 2760 v->UrgentLatency, 2761 v->UrgentExtraLatency, 2762 v->TCalc, 2763 v->PDEAndMetaPTEBytesFrame[k], 2764 v->MetaRowByte[k], 2765 v->PixelPTEBytesPerRow[k], 2766 v->PrefetchSourceLinesY[k], 2767 v->SwathWidthY[k], 2768 v->VInitPreFillY[k], 2769 v->MaxNumSwathY[k], 2770 v->PrefetchSourceLinesC[k], 2771 v->SwathWidthC[k], 2772 v->VInitPreFillC[k], 2773 v->MaxNumSwathC[k], 2774 v->swath_width_luma_ub[k], 2775 v->swath_width_chroma_ub[k], 2776 v->SwathHeightY[k], 2777 v->SwathHeightC[k], 2778 TWait, 2779 &v->DSTXAfterScaler[k], 2780 &v->DSTYAfterScaler[k], 2781 &v->DestinationLinesForPrefetch[k], 2782 &v->PrefetchBandwidth[k], 2783 &v->DestinationLinesToRequestVMInVBlank[k], 2784 &v->DestinationLinesToRequestRowInVBlank[k], 2785 &v->VRatioPrefetchY[k], 2786 &v->VRatioPrefetchC[k], 2787 &v->RequiredPrefetchPixDataBWLuma[k], 2788 &v->RequiredPrefetchPixDataBWChroma[k], 2789 &v->NotEnoughTimeForDynamicMetadata[k], 2790 &v->Tno_bw[k], 2791 &v->prefetch_vmrow_bw[k], 2792 &v->Tdmdl_vm[k], 2793 &v->Tdmdl[k], 2794 &v->TSetup[k], 2795 &v->VUpdateOffsetPix[k], 2796 &v->VUpdateWidthPix[k], 2797 &v->VReadyOffsetPix[k]); 2798 2799 #ifdef __DML_VBA_DEBUG__ 2800 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); 2801 #endif 2802 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); 2803 } 2804 2805 v->NoEnoughUrgentLatencyHiding = false; 2806 v->NoEnoughUrgentLatencyHidingPre = false; 2807 2808 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2809 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2810 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 2811 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2812 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; 2813 2814 CalculateUrgentBurstFactor( 2815 v->swath_width_luma_ub[k], 2816 v->swath_width_chroma_ub[k], 2817 v->SwathHeightY[k], 2818 v->SwathHeightC[k], 2819 v->HTotal[k] / v->PixelClock[k], 2820 v->UrgentLatency, 2821 v->CursorBufferSize, 2822 v->CursorWidth[k][0], 2823 v->CursorBPP[k][0], 2824 v->VRatio[k], 2825 v->VRatioChroma[k], 2826 v->BytePerPixelDETY[k], 2827 v->BytePerPixelDETC[k], 2828 v->DETBufferSizeY[k], 2829 v->DETBufferSizeC[k], 2830 &v->UrgBurstFactorCursor[k], 2831 &v->UrgBurstFactorLuma[k], 2832 &v->UrgBurstFactorChroma[k], 2833 &v->NoUrgentLatencyHiding[k]); 2834 2835 CalculateUrgentBurstFactor( 2836 v->swath_width_luma_ub[k], 2837 v->swath_width_chroma_ub[k], 2838 v->SwathHeightY[k], 2839 v->SwathHeightC[k], 2840 v->HTotal[k] / v->PixelClock[k], 2841 v->UrgentLatency, 2842 v->CursorBufferSize, 2843 v->CursorWidth[k][0], 2844 v->CursorBPP[k][0], 2845 v->VRatioPrefetchY[k], 2846 v->VRatioPrefetchC[k], 2847 v->BytePerPixelDETY[k], 2848 v->BytePerPixelDETC[k], 2849 v->DETBufferSizeY[k], 2850 v->DETBufferSizeC[k], 2851 &v->UrgBurstFactorCursorPre[k], 2852 &v->UrgBurstFactorLumaPre[k], 2853 &v->UrgBurstFactorChromaPre[k], 2854 &v->NoUrgentLatencyHidingPre[k]); 2855 2856 MaxTotalRDBandwidth = MaxTotalRDBandwidth 2857 + dml_max3( 2858 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2859 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2860 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2861 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] 2862 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2863 v->DPPPerPlane[k] 2864 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2865 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2866 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2867 2868 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst 2869 + dml_max3( 2870 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2871 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] 2872 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2873 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) 2874 + v->cursor_bw_pre[k]); 2875 2876 #ifdef __DML_VBA_DEBUG__ 2877 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); 2878 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); 2879 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); 2880 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); 2881 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); 2882 2883 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); 2884 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); 2885 2886 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); 2887 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); 2888 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); 2889 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); 2890 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); 2891 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); 2892 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); 2893 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); 2894 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); 2895 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); 2896 #endif 2897 2898 if (v->DestinationLinesForPrefetch[k] < 2) 2899 DestinationLineTimesForPrefetchLessThan2 = true; 2900 2901 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) 2902 VRatioPrefetchMoreThan4 = true; 2903 2904 if (v->NoUrgentLatencyHiding[k] == true) 2905 v->NoEnoughUrgentLatencyHiding = true; 2906 2907 if (v->NoUrgentLatencyHidingPre[k] == true) 2908 v->NoEnoughUrgentLatencyHidingPre = true; 2909 } 2910 2911 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; 2912 2913 #ifdef __DML_VBA_DEBUG__ 2914 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst); 2915 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW); 2916 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth); 2917 #endif 2918 2919 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 2920 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) 2921 v->PrefetchModeSupported = true; 2922 else { 2923 v->PrefetchModeSupported = false; 2924 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); 2925 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); 2926 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); 2927 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); 2928 } 2929 2930 // PREVIOUS_ERROR 2931 // This error result check was done after the PrefetchModeSupported. So we will 2932 // still try to calculate flip schedule even prefetch mode not supported 2933 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2934 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { 2935 v->PrefetchModeSupported = false; 2936 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); 2937 } 2938 } 2939 2940 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { 2941 v->BandwidthAvailableForImmediateFlip = v->ReturnBW; 2942 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2943 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 2944 - dml_max( 2945 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2946 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2947 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2948 v->DPPPerPlane[k] 2949 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2950 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2951 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2952 } 2953 2954 v->TotImmediateFlipBytes = 0; 2955 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2956 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 2957 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); 2958 } 2959 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2960 CalculateFlipSchedule( 2961 mode_lib, 2962 HostVMInefficiencyFactor, 2963 v->UrgentExtraLatency, 2964 v->UrgentLatency, 2965 v->GPUVMMaxPageTableLevels, 2966 v->HostVMEnable, 2967 v->HostVMMaxNonCachedPageTableLevels, 2968 v->GPUVMEnable, 2969 v->HostVMMinPageSize, 2970 v->PDEAndMetaPTEBytesFrame[k], 2971 v->MetaRowByte[k], 2972 v->PixelPTEBytesPerRow[k], 2973 v->BandwidthAvailableForImmediateFlip, 2974 v->TotImmediateFlipBytes, 2975 v->SourcePixelFormat[k], 2976 v->HTotal[k] / v->PixelClock[k], 2977 v->VRatio[k], 2978 v->VRatioChroma[k], 2979 v->Tno_bw[k], 2980 v->DCCEnable[k], 2981 v->dpte_row_height[k], 2982 v->meta_row_height[k], 2983 v->dpte_row_height_chroma[k], 2984 v->meta_row_height_chroma[k], 2985 &v->DestinationLinesToRequestVMInImmediateFlip[k], 2986 &v->DestinationLinesToRequestRowInImmediateFlip[k], 2987 &v->final_flip_bw[k], 2988 &v->ImmediateFlipSupportedForPipe[k]); 2989 } 2990 2991 v->total_dcn_read_bw_with_flip = 0.0; 2992 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2993 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2994 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 2995 + dml_max3( 2996 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2997 v->DPPPerPlane[k] * v->final_flip_bw[k] 2998 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] 2999 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] 3000 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 3001 v->DPPPerPlane[k] 3002 * (v->final_flip_bw[k] 3003 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 3004 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 3005 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 3006 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst 3007 + dml_max3( 3008 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 3009 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] 3010 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], 3011 v->DPPPerPlane[k] 3012 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] 3013 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); 3014 } 3015 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; 3016 3017 v->ImmediateFlipSupported = true; 3018 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { 3019 #ifdef __DML_VBA_DEBUG__ 3020 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); 3021 #endif 3022 v->ImmediateFlipSupported = false; 3023 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; 3024 } 3025 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3026 if (v->ImmediateFlipSupportedForPipe[k] == false) { 3027 #ifdef __DML_VBA_DEBUG__ 3028 dml_print("DML::%s: Pipe %0d not supporting iflip\n", 3029 __func__, k); 3030 #endif 3031 v->ImmediateFlipSupported = false; 3032 } 3033 } 3034 } else { 3035 v->ImmediateFlipSupported = false; 3036 } 3037 3038 v->PrefetchAndImmediateFlipSupported = 3039 (v->PrefetchModeSupported == true 3040 && ((!v->ImmediateFlipSupport && !v->HostVMEnable 3041 && v->ImmediateFlipRequirement != dm_immediate_flip_required) || v->ImmediateFlipSupported)) ? 3042 true : false; 3043 #ifdef __DML_VBA_DEBUG__ 3044 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); 3045 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required); 3046 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); 3047 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); 3048 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); 3049 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); 3050 #endif 3051 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); 3052 3053 v->VStartupLines = v->VStartupLines + 1; 3054 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); 3055 ASSERT(v->PrefetchAndImmediateFlipSupported); 3056 3057 // Unbounded Request Enabled 3058 CalculateUnboundedRequestAndCompressedBufferSize( 3059 v->DETBufferSizeInKByte[0], 3060 v->ConfigReturnBufferSizeInKByte, 3061 v->UseUnboundedRequesting, 3062 v->TotalActiveDPP, 3063 NoChromaPlanes, 3064 v->MaxNumDPP, 3065 v->CompressedBufferSegmentSizeInkByte, 3066 v->Output, 3067 &v->UnboundedRequestEnabled, 3068 &v->CompressedBufferSizeInkByte); 3069 3070 //Watermarks and NB P-State/DRAM Clock Change Support 3071 { 3072 enum clock_change_support DRAMClockChangeSupport; // dummy 3073 CalculateWatermarksAndDRAMSpeedChangeSupport( 3074 mode_lib, 3075 PrefetchMode, 3076 v->NumberOfActivePlanes, 3077 v->MaxLineBufferLines, 3078 v->LineBufferSize, 3079 v->WritebackInterfaceBufferSize, 3080 v->DCFCLK, 3081 v->ReturnBW, 3082 v->SynchronizedVBlank, 3083 v->dpte_group_bytes, 3084 v->MetaChunkSize, 3085 v->UrgentLatency, 3086 v->UrgentExtraLatency, 3087 v->WritebackLatency, 3088 v->WritebackChunkSize, 3089 v->SOCCLK, 3090 v->DRAMClockChangeLatency, 3091 v->SRExitTime, 3092 v->SREnterPlusExitTime, 3093 v->SRExitZ8Time, 3094 v->SREnterPlusExitZ8Time, 3095 v->DCFCLKDeepSleep, 3096 v->DETBufferSizeY, 3097 v->DETBufferSizeC, 3098 v->SwathHeightY, 3099 v->SwathHeightC, 3100 v->LBBitPerPixel, 3101 v->SwathWidthY, 3102 v->SwathWidthC, 3103 v->HRatio, 3104 v->HRatioChroma, 3105 v->vtaps, 3106 v->VTAPsChroma, 3107 v->VRatio, 3108 v->VRatioChroma, 3109 v->HTotal, 3110 v->PixelClock, 3111 v->BlendingAndTiming, 3112 v->DPPPerPlane, 3113 v->BytePerPixelDETY, 3114 v->BytePerPixelDETC, 3115 v->DSTXAfterScaler, 3116 v->DSTYAfterScaler, 3117 v->WritebackEnable, 3118 v->WritebackPixelFormat, 3119 v->WritebackDestinationWidth, 3120 v->WritebackDestinationHeight, 3121 v->WritebackSourceHeight, 3122 v->UnboundedRequestEnabled, 3123 v->CompressedBufferSizeInkByte, 3124 &DRAMClockChangeSupport, 3125 &v->UrgentWatermark, 3126 &v->WritebackUrgentWatermark, 3127 &v->DRAMClockChangeWatermark, 3128 &v->WritebackDRAMClockChangeWatermark, 3129 &v->StutterExitWatermark, 3130 &v->StutterEnterPlusExitWatermark, 3131 &v->Z8StutterExitWatermark, 3132 &v->Z8StutterEnterPlusExitWatermark, 3133 &v->MinActiveDRAMClockChangeLatencySupported); 3134 3135 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3136 if (v->WritebackEnable[k] == true) { 3137 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 3138 0, 3139 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); 3140 } else { 3141 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; 3142 } 3143 } 3144 } 3145 3146 //Display Pipeline Delivery Time in Prefetch, Groups 3147 CalculatePixelDeliveryTimes( 3148 v->NumberOfActivePlanes, 3149 v->VRatio, 3150 v->VRatioChroma, 3151 v->VRatioPrefetchY, 3152 v->VRatioPrefetchC, 3153 v->swath_width_luma_ub, 3154 v->swath_width_chroma_ub, 3155 v->DPPPerPlane, 3156 v->HRatio, 3157 v->HRatioChroma, 3158 v->PixelClock, 3159 v->PSCL_THROUGHPUT_LUMA, 3160 v->PSCL_THROUGHPUT_CHROMA, 3161 v->DPPCLK, 3162 v->BytePerPixelC, 3163 v->SourceScan, 3164 v->NumberOfCursors, 3165 v->CursorWidth, 3166 v->CursorBPP, 3167 v->BlockWidth256BytesY, 3168 v->BlockHeight256BytesY, 3169 v->BlockWidth256BytesC, 3170 v->BlockHeight256BytesC, 3171 v->DisplayPipeLineDeliveryTimeLuma, 3172 v->DisplayPipeLineDeliveryTimeChroma, 3173 v->DisplayPipeLineDeliveryTimeLumaPrefetch, 3174 v->DisplayPipeLineDeliveryTimeChromaPrefetch, 3175 v->DisplayPipeRequestDeliveryTimeLuma, 3176 v->DisplayPipeRequestDeliveryTimeChroma, 3177 v->DisplayPipeRequestDeliveryTimeLumaPrefetch, 3178 v->DisplayPipeRequestDeliveryTimeChromaPrefetch, 3179 v->CursorRequestDeliveryTime, 3180 v->CursorRequestDeliveryTimePrefetch); 3181 3182 CalculateMetaAndPTETimes( 3183 v->NumberOfActivePlanes, 3184 v->GPUVMEnable, 3185 v->MetaChunkSize, 3186 v->MinMetaChunkSizeBytes, 3187 v->HTotal, 3188 v->VRatio, 3189 v->VRatioChroma, 3190 v->DestinationLinesToRequestRowInVBlank, 3191 v->DestinationLinesToRequestRowInImmediateFlip, 3192 v->DCCEnable, 3193 v->PixelClock, 3194 v->BytePerPixelY, 3195 v->BytePerPixelC, 3196 v->SourceScan, 3197 v->dpte_row_height, 3198 v->dpte_row_height_chroma, 3199 v->meta_row_width, 3200 v->meta_row_width_chroma, 3201 v->meta_row_height, 3202 v->meta_row_height_chroma, 3203 v->meta_req_width, 3204 v->meta_req_width_chroma, 3205 v->meta_req_height, 3206 v->meta_req_height_chroma, 3207 v->dpte_group_bytes, 3208 v->PTERequestSizeY, 3209 v->PTERequestSizeC, 3210 v->PixelPTEReqWidthY, 3211 v->PixelPTEReqHeightY, 3212 v->PixelPTEReqWidthC, 3213 v->PixelPTEReqHeightC, 3214 v->dpte_row_width_luma_ub, 3215 v->dpte_row_width_chroma_ub, 3216 v->DST_Y_PER_PTE_ROW_NOM_L, 3217 v->DST_Y_PER_PTE_ROW_NOM_C, 3218 v->DST_Y_PER_META_ROW_NOM_L, 3219 v->DST_Y_PER_META_ROW_NOM_C, 3220 v->TimePerMetaChunkNominal, 3221 v->TimePerChromaMetaChunkNominal, 3222 v->TimePerMetaChunkVBlank, 3223 v->TimePerChromaMetaChunkVBlank, 3224 v->TimePerMetaChunkFlip, 3225 v->TimePerChromaMetaChunkFlip, 3226 v->time_per_pte_group_nom_luma, 3227 v->time_per_pte_group_vblank_luma, 3228 v->time_per_pte_group_flip_luma, 3229 v->time_per_pte_group_nom_chroma, 3230 v->time_per_pte_group_vblank_chroma, 3231 v->time_per_pte_group_flip_chroma); 3232 3233 CalculateVMGroupAndRequestTimes( 3234 v->NumberOfActivePlanes, 3235 v->GPUVMEnable, 3236 v->GPUVMMaxPageTableLevels, 3237 v->HTotal, 3238 v->BytePerPixelC, 3239 v->DestinationLinesToRequestVMInVBlank, 3240 v->DestinationLinesToRequestVMInImmediateFlip, 3241 v->DCCEnable, 3242 v->PixelClock, 3243 v->dpte_row_width_luma_ub, 3244 v->dpte_row_width_chroma_ub, 3245 v->vm_group_bytes, 3246 v->dpde0_bytes_per_frame_ub_l, 3247 v->dpde0_bytes_per_frame_ub_c, 3248 v->meta_pte_bytes_per_frame_ub_l, 3249 v->meta_pte_bytes_per_frame_ub_c, 3250 v->TimePerVMGroupVBlank, 3251 v->TimePerVMGroupFlip, 3252 v->TimePerVMRequestVBlank, 3253 v->TimePerVMRequestFlip); 3254 3255 // Min TTUVBlank 3256 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3257 if (PrefetchMode == 0) { 3258 v->AllowDRAMClockChangeDuringVBlank[k] = true; 3259 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3260 v->MinTTUVBlank[k] = dml_max( 3261 v->DRAMClockChangeWatermark, 3262 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); 3263 } else if (PrefetchMode == 1) { 3264 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3265 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3266 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); 3267 } else { 3268 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3269 v->AllowDRAMSelfRefreshDuringVBlank[k] = false; 3270 v->MinTTUVBlank[k] = v->UrgentWatermark; 3271 } 3272 if (!v->DynamicMetadataEnable[k]) 3273 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; 3274 } 3275 3276 // DCC Configuration 3277 v->ActiveDPPs = 0; 3278 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3279 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 3280 v->SourcePixelFormat[k], 3281 v->SurfaceWidthY[k], 3282 v->SurfaceWidthC[k], 3283 v->SurfaceHeightY[k], 3284 v->SurfaceHeightC[k], 3285 v->DETBufferSizeInKByte[0] * 1024, 3286 v->BlockHeight256BytesY[k], 3287 v->BlockHeight256BytesC[k], 3288 v->SurfaceTiling[k], 3289 v->BytePerPixelY[k], 3290 v->BytePerPixelC[k], 3291 v->BytePerPixelDETY[k], 3292 v->BytePerPixelDETC[k], 3293 v->SourceScan[k], 3294 &v->DCCYMaxUncompressedBlock[k], 3295 &v->DCCCMaxUncompressedBlock[k], 3296 &v->DCCYMaxCompressedBlock[k], 3297 &v->DCCCMaxCompressedBlock[k], 3298 &v->DCCYIndependentBlock[k], 3299 &v->DCCCIndependentBlock[k]); 3300 } 3301 3302 // VStartup Adjustment 3303 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3304 bool isInterlaceTiming; 3305 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; 3306 #ifdef __DML_VBA_DEBUG__ 3307 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); 3308 #endif 3309 3310 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; 3311 3312 #ifdef __DML_VBA_DEBUG__ 3313 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); 3314 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 3315 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3316 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); 3317 #endif 3318 3319 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; 3320 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { 3321 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; 3322 } 3323 3324 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); 3325 3326 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k]) 3327 - v->VFrontPorch[k]) 3328 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0)) 3329 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; 3330 3331 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); 3332 3333 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) 3334 <= (isInterlaceTiming ? 3335 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : 3336 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { 3337 v->VREADY_AT_OR_AFTER_VSYNC[k] = true; 3338 } else { 3339 v->VREADY_AT_OR_AFTER_VSYNC[k] = false; 3340 } 3341 #ifdef __DML_VBA_DEBUG__ 3342 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); 3343 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); 3344 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); 3345 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); 3346 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); 3347 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); 3348 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); 3349 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); 3350 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3351 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); 3352 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); 3353 #endif 3354 } 3355 3356 { 3357 //Maximum Bandwidth Used 3358 double TotalWRBandwidth = 0; 3359 double MaxPerPlaneVActiveWRBandwidth = 0; 3360 double WRBandwidth = 0; 3361 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3362 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { 3363 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3364 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; 3365 } else if (v->WritebackEnable[k] == true) { 3366 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3367 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; 3368 } 3369 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; 3370 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); 3371 } 3372 3373 v->TotalDataReadBandwidth = 0; 3374 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3375 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; 3376 } 3377 } 3378 // Stutter Efficiency 3379 CalculateStutterEfficiency( 3380 mode_lib, 3381 v->CompressedBufferSizeInkByte, 3382 v->UnboundedRequestEnabled, 3383 v->ConfigReturnBufferSizeInKByte, 3384 v->MetaFIFOSizeInKEntries, 3385 v->ZeroSizeBufferEntries, 3386 v->NumberOfActivePlanes, 3387 v->ROBBufferSizeInKByte, 3388 v->TotalDataReadBandwidth, 3389 v->DCFCLK, 3390 v->ReturnBW, 3391 v->COMPBUF_RESERVED_SPACE_64B, 3392 v->COMPBUF_RESERVED_SPACE_ZS, 3393 v->SRExitTime, 3394 v->SRExitZ8Time, 3395 v->SynchronizedVBlank, 3396 v->StutterEnterPlusExitWatermark, 3397 v->Z8StutterEnterPlusExitWatermark, 3398 v->ProgressiveToInterlaceUnitInOPP, 3399 v->Interlace, 3400 v->MinTTUVBlank, 3401 v->DPPPerPlane, 3402 v->DETBufferSizeY, 3403 v->BytePerPixelY, 3404 v->BytePerPixelDETY, 3405 v->SwathWidthY, 3406 v->SwathHeightY, 3407 v->SwathHeightC, 3408 v->DCCRateLuma, 3409 v->DCCRateChroma, 3410 v->DCCFractionOfZeroSizeRequestsLuma, 3411 v->DCCFractionOfZeroSizeRequestsChroma, 3412 v->HTotal, 3413 v->VTotal, 3414 v->PixelClock, 3415 v->VRatio, 3416 v->SourceScan, 3417 v->BlockHeight256BytesY, 3418 v->BlockWidth256BytesY, 3419 v->BlockHeight256BytesC, 3420 v->BlockWidth256BytesC, 3421 v->DCCYMaxUncompressedBlock, 3422 v->DCCCMaxUncompressedBlock, 3423 v->VActive, 3424 v->DCCEnable, 3425 v->WritebackEnable, 3426 v->ReadBandwidthPlaneLuma, 3427 v->ReadBandwidthPlaneChroma, 3428 v->meta_row_bw, 3429 v->dpte_row_bw, 3430 &v->StutterEfficiencyNotIncludingVBlank, 3431 &v->StutterEfficiency, 3432 &v->NumberOfStutterBurstsPerFrame, 3433 &v->Z8StutterEfficiencyNotIncludingVBlank, 3434 &v->Z8StutterEfficiency, 3435 &v->Z8NumberOfStutterBurstsPerFrame, 3436 &v->StutterPeriod); 3437 } 3438 3439 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 3440 { 3441 struct vba_vars_st *v = &mode_lib->vba; 3442 // Display Pipe Configuration 3443 double BytePerPixDETY[DC__NUM_DPP__MAX]; 3444 double BytePerPixDETC[DC__NUM_DPP__MAX]; 3445 int BytePerPixY[DC__NUM_DPP__MAX]; 3446 int BytePerPixC[DC__NUM_DPP__MAX]; 3447 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; 3448 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; 3449 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; 3450 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; 3451 double dummy1[DC__NUM_DPP__MAX]; 3452 double dummy2[DC__NUM_DPP__MAX]; 3453 double dummy3[DC__NUM_DPP__MAX]; 3454 double dummy4[DC__NUM_DPP__MAX]; 3455 int dummy5[DC__NUM_DPP__MAX]; 3456 int dummy6[DC__NUM_DPP__MAX]; 3457 bool dummy7[DC__NUM_DPP__MAX]; 3458 bool dummysinglestring; 3459 3460 unsigned int k; 3461 3462 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3463 3464 CalculateBytePerPixelAnd256BBlockSizes( 3465 v->SourcePixelFormat[k], 3466 v->SurfaceTiling[k], 3467 &BytePerPixY[k], 3468 &BytePerPixC[k], 3469 &BytePerPixDETY[k], 3470 &BytePerPixDETC[k], 3471 &Read256BytesBlockHeightY[k], 3472 &Read256BytesBlockHeightC[k], 3473 &Read256BytesBlockWidthY[k], 3474 &Read256BytesBlockWidthC[k]); 3475 } 3476 3477 CalculateSwathAndDETConfiguration( 3478 false, 3479 v->NumberOfActivePlanes, 3480 v->DETBufferSizeInKByte[0], 3481 dummy1, 3482 dummy2, 3483 v->SourceScan, 3484 v->SourcePixelFormat, 3485 v->SurfaceTiling, 3486 v->ViewportWidth, 3487 v->ViewportHeight, 3488 v->SurfaceWidthY, 3489 v->SurfaceWidthC, 3490 v->SurfaceHeightY, 3491 v->SurfaceHeightC, 3492 Read256BytesBlockHeightY, 3493 Read256BytesBlockHeightC, 3494 Read256BytesBlockWidthY, 3495 Read256BytesBlockWidthC, 3496 v->ODMCombineEnabled, 3497 v->BlendingAndTiming, 3498 BytePerPixY, 3499 BytePerPixC, 3500 BytePerPixDETY, 3501 BytePerPixDETC, 3502 v->HActive, 3503 v->HRatio, 3504 v->HRatioChroma, 3505 v->DPPPerPlane, 3506 dummy5, 3507 dummy6, 3508 dummy3, 3509 dummy4, 3510 v->SwathHeightY, 3511 v->SwathHeightC, 3512 v->DETBufferSizeY, 3513 v->DETBufferSizeC, 3514 dummy7, 3515 &dummysinglestring); 3516 } 3517 3518 static bool CalculateBytePerPixelAnd256BBlockSizes( 3519 enum source_format_class SourcePixelFormat, 3520 enum dm_swizzle_mode SurfaceTiling, 3521 unsigned int *BytePerPixelY, 3522 unsigned int *BytePerPixelC, 3523 double *BytePerPixelDETY, 3524 double *BytePerPixelDETC, 3525 unsigned int *BlockHeight256BytesY, 3526 unsigned int *BlockHeight256BytesC, 3527 unsigned int *BlockWidth256BytesY, 3528 unsigned int *BlockWidth256BytesC) 3529 { 3530 if (SourcePixelFormat == dm_444_64) { 3531 *BytePerPixelDETY = 8; 3532 *BytePerPixelDETC = 0; 3533 *BytePerPixelY = 8; 3534 *BytePerPixelC = 0; 3535 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 3536 *BytePerPixelDETY = 4; 3537 *BytePerPixelDETC = 0; 3538 *BytePerPixelY = 4; 3539 *BytePerPixelC = 0; 3540 } else if (SourcePixelFormat == dm_444_16) { 3541 *BytePerPixelDETY = 2; 3542 *BytePerPixelDETC = 0; 3543 *BytePerPixelY = 2; 3544 *BytePerPixelC = 0; 3545 } else if (SourcePixelFormat == dm_444_8) { 3546 *BytePerPixelDETY = 1; 3547 *BytePerPixelDETC = 0; 3548 *BytePerPixelY = 1; 3549 *BytePerPixelC = 0; 3550 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3551 *BytePerPixelDETY = 4; 3552 *BytePerPixelDETC = 1; 3553 *BytePerPixelY = 4; 3554 *BytePerPixelC = 1; 3555 } else if (SourcePixelFormat == dm_420_8) { 3556 *BytePerPixelDETY = 1; 3557 *BytePerPixelDETC = 2; 3558 *BytePerPixelY = 1; 3559 *BytePerPixelC = 2; 3560 } else if (SourcePixelFormat == dm_420_12) { 3561 *BytePerPixelDETY = 2; 3562 *BytePerPixelDETC = 4; 3563 *BytePerPixelY = 2; 3564 *BytePerPixelC = 4; 3565 } else { 3566 *BytePerPixelDETY = 4.0 / 3; 3567 *BytePerPixelDETC = 8.0 / 3; 3568 *BytePerPixelY = 2; 3569 *BytePerPixelC = 4; 3570 } 3571 3572 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16 3573 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) { 3574 if (SurfaceTiling == dm_sw_linear) { 3575 *BlockHeight256BytesY = 1; 3576 } else if (SourcePixelFormat == dm_444_64) { 3577 *BlockHeight256BytesY = 4; 3578 } else if (SourcePixelFormat == dm_444_8) { 3579 *BlockHeight256BytesY = 16; 3580 } else { 3581 *BlockHeight256BytesY = 8; 3582 } 3583 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3584 *BlockHeight256BytesC = 0; 3585 *BlockWidth256BytesC = 0; 3586 } else { 3587 if (SurfaceTiling == dm_sw_linear) { 3588 *BlockHeight256BytesY = 1; 3589 *BlockHeight256BytesC = 1; 3590 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3591 *BlockHeight256BytesY = 8; 3592 *BlockHeight256BytesC = 16; 3593 } else if (SourcePixelFormat == dm_420_8) { 3594 *BlockHeight256BytesY = 16; 3595 *BlockHeight256BytesC = 8; 3596 } else { 3597 *BlockHeight256BytesY = 8; 3598 *BlockHeight256BytesC = 8; 3599 } 3600 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3601 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 3602 } 3603 return true; 3604 } 3605 3606 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) 3607 { 3608 if (PrefetchMode == 0) { 3609 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); 3610 } else if (PrefetchMode == 1) { 3611 return dml_max(SREnterPlusExitTime, UrgentLatency); 3612 } else { 3613 return UrgentLatency; 3614 } 3615 } 3616 3617 double dml31_CalculateWriteBackDISPCLK( 3618 enum source_format_class WritebackPixelFormat, 3619 double PixelClock, 3620 double WritebackHRatio, 3621 double WritebackVRatio, 3622 unsigned int WritebackHTaps, 3623 unsigned int WritebackVTaps, 3624 long WritebackSourceWidth, 3625 long WritebackDestinationWidth, 3626 unsigned int HTotal, 3627 unsigned int WritebackLineBufferSize) 3628 { 3629 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 3630 3631 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 3632 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 3633 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 3634 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 3635 } 3636 3637 static double CalculateWriteBackDelay( 3638 enum source_format_class WritebackPixelFormat, 3639 double WritebackHRatio, 3640 double WritebackVRatio, 3641 unsigned int WritebackVTaps, 3642 int WritebackDestinationWidth, 3643 int WritebackDestinationHeight, 3644 int WritebackSourceHeight, 3645 unsigned int HTotal) 3646 { 3647 double CalculateWriteBackDelay; 3648 double Line_length; 3649 double Output_lines_last_notclamped; 3650 double WritebackVInit; 3651 3652 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3653 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); 3654 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); 3655 if (Output_lines_last_notclamped < 0) { 3656 CalculateWriteBackDelay = 0; 3657 } else { 3658 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3659 } 3660 return CalculateWriteBackDelay; 3661 } 3662 3663 static void CalculateVupdateAndDynamicMetadataParameters( 3664 int MaxInterDCNTileRepeaters, 3665 double DPPCLK, 3666 double DISPCLK, 3667 double DCFClkDeepSleep, 3668 double PixelClock, 3669 int HTotal, 3670 int VBlank, 3671 int DynamicMetadataTransmittedBytes, 3672 int DynamicMetadataLinesBeforeActiveRequired, 3673 int InterlaceEnable, 3674 bool ProgressiveToInterlaceUnitInOPP, 3675 double *TSetup, 3676 double *Tdmbf, 3677 double *Tdmec, 3678 double *Tdmsks, 3679 int *VUpdateOffsetPix, 3680 double *VUpdateWidthPix, 3681 double *VReadyOffsetPix) 3682 { 3683 double TotalRepeaterDelayTime; 3684 3685 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); 3686 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); 3687 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); 3688 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); 3689 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3690 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; 3691 *Tdmec = HTotal / PixelClock; 3692 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 3693 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3694 } else { 3695 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3696 } 3697 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 3698 *Tdmsks = *Tdmsks / 2; 3699 } 3700 #ifdef __DML_VBA_DEBUG__ 3701 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3702 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3703 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3704 #endif 3705 } 3706 3707 static void CalculateRowBandwidth( 3708 bool GPUVMEnable, 3709 enum source_format_class SourcePixelFormat, 3710 double VRatio, 3711 double VRatioChroma, 3712 bool DCCEnable, 3713 double LineTime, 3714 unsigned int MetaRowByteLuma, 3715 unsigned int MetaRowByteChroma, 3716 unsigned int meta_row_height_luma, 3717 unsigned int meta_row_height_chroma, 3718 unsigned int PixelPTEBytesPerRowLuma, 3719 unsigned int PixelPTEBytesPerRowChroma, 3720 unsigned int dpte_row_height_luma, 3721 unsigned int dpte_row_height_chroma, 3722 double *meta_row_bw, 3723 double *dpte_row_bw) 3724 { 3725 if (DCCEnable != true) { 3726 *meta_row_bw = 0; 3727 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3728 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); 3729 } else { 3730 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3731 } 3732 3733 if (GPUVMEnable != true) { 3734 *dpte_row_bw = 0; 3735 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3736 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3737 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 3738 } else { 3739 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3740 } 3741 } 3742 3743 static void CalculateFlipSchedule( 3744 struct display_mode_lib *mode_lib, 3745 double HostVMInefficiencyFactor, 3746 double UrgentExtraLatency, 3747 double UrgentLatency, 3748 unsigned int GPUVMMaxPageTableLevels, 3749 bool HostVMEnable, 3750 unsigned int HostVMMaxNonCachedPageTableLevels, 3751 bool GPUVMEnable, 3752 double HostVMMinPageSize, 3753 double PDEAndMetaPTEBytesPerFrame, 3754 double MetaRowBytes, 3755 double DPTEBytesPerRow, 3756 double BandwidthAvailableForImmediateFlip, 3757 unsigned int TotImmediateFlipBytes, 3758 enum source_format_class SourcePixelFormat, 3759 double LineTime, 3760 double VRatio, 3761 double VRatioChroma, 3762 double Tno_bw, 3763 bool DCCEnable, 3764 unsigned int dpte_row_height, 3765 unsigned int meta_row_height, 3766 unsigned int dpte_row_height_chroma, 3767 unsigned int meta_row_height_chroma, 3768 double *DestinationLinesToRequestVMInImmediateFlip, 3769 double *DestinationLinesToRequestRowInImmediateFlip, 3770 double *final_flip_bw, 3771 bool *ImmediateFlipSupportedForPipe) 3772 { 3773 double min_row_time = 0.0; 3774 unsigned int HostVMDynamicLevelsTrips; 3775 double TimeForFetchingMetaPTEImmediateFlip; 3776 double TimeForFetchingRowInVBlankImmediateFlip; 3777 double ImmediateFlipBW; 3778 3779 if (GPUVMEnable == true && HostVMEnable == true) { 3780 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 3781 } else { 3782 HostVMDynamicLevelsTrips = 0; 3783 } 3784 3785 if (GPUVMEnable == true || DCCEnable == true) { 3786 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 3787 } 3788 3789 if (GPUVMEnable == true) { 3790 TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3791 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3792 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 3793 LineTime / 4.0); 3794 } else { 3795 TimeForFetchingMetaPTEImmediateFlip = 0; 3796 } 3797 3798 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3799 if ((GPUVMEnable == true || DCCEnable == true)) { 3800 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 3801 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 3802 UrgentLatency * (HostVMDynamicLevelsTrips + 1), 3803 LineTime / 4); 3804 } else { 3805 TimeForFetchingRowInVBlankImmediateFlip = 0; 3806 } 3807 3808 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3809 3810 if (GPUVMEnable == true) { 3811 *final_flip_bw = dml_max( 3812 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 3813 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 3814 } else if ((GPUVMEnable == true || DCCEnable == true)) { 3815 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 3816 } else { 3817 *final_flip_bw = 0; 3818 } 3819 3820 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 3821 if (GPUVMEnable == true && DCCEnable != true) { 3822 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 3823 } else if (GPUVMEnable != true && DCCEnable == true) { 3824 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 3825 } else { 3826 min_row_time = dml_min4( 3827 dpte_row_height * LineTime / VRatio, 3828 meta_row_height * LineTime / VRatio, 3829 dpte_row_height_chroma * LineTime / VRatioChroma, 3830 meta_row_height_chroma * LineTime / VRatioChroma); 3831 } 3832 } else { 3833 if (GPUVMEnable == true && DCCEnable != true) { 3834 min_row_time = dpte_row_height * LineTime / VRatio; 3835 } else if (GPUVMEnable != true && DCCEnable == true) { 3836 min_row_time = meta_row_height * LineTime / VRatio; 3837 } else { 3838 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 3839 } 3840 } 3841 3842 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 3843 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3844 *ImmediateFlipSupportedForPipe = false; 3845 } else { 3846 *ImmediateFlipSupportedForPipe = true; 3847 } 3848 3849 #ifdef __DML_VBA_DEBUG__ 3850 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip); 3851 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip); 3852 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 3853 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); 3854 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 3855 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 3856 #endif 3857 3858 } 3859 3860 static double TruncToValidBPP( 3861 double LinkBitRate, 3862 int Lanes, 3863 int HTotal, 3864 int HActive, 3865 double PixelClock, 3866 double DesiredBPP, 3867 bool DSCEnable, 3868 enum output_encoder_class Output, 3869 enum output_format_class Format, 3870 unsigned int DSCInputBitPerComponent, 3871 int DSCSlices, 3872 int AudioRate, 3873 int AudioLayout, 3874 enum odm_combine_mode ODMCombine) 3875 { 3876 double MaxLinkBPP; 3877 int MinDSCBPP; 3878 double MaxDSCBPP; 3879 int NonDSCBPP0; 3880 int NonDSCBPP1; 3881 int NonDSCBPP2; 3882 3883 if (Format == dm_420) { 3884 NonDSCBPP0 = 12; 3885 NonDSCBPP1 = 15; 3886 NonDSCBPP2 = 18; 3887 MinDSCBPP = 6; 3888 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 3889 } else if (Format == dm_444) { 3890 NonDSCBPP0 = 24; 3891 NonDSCBPP1 = 30; 3892 NonDSCBPP2 = 36; 3893 MinDSCBPP = 8; 3894 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 3895 } else { 3896 if (Output == dm_hdmi) { 3897 NonDSCBPP0 = 24; 3898 NonDSCBPP1 = 24; 3899 NonDSCBPP2 = 24; 3900 } else { 3901 NonDSCBPP0 = 16; 3902 NonDSCBPP1 = 20; 3903 NonDSCBPP2 = 24; 3904 } 3905 if (Format == dm_n422) { 3906 MinDSCBPP = 7; 3907 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3908 } else { 3909 MinDSCBPP = 8; 3910 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3911 } 3912 } 3913 3914 if (DSCEnable && Output == dm_dp) { 3915 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 3916 } else { 3917 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 3918 } 3919 3920 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { 3921 MaxLinkBPP = 16; 3922 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { 3923 MaxLinkBPP = 32; 3924 } 3925 3926 if (DesiredBPP == 0) { 3927 if (DSCEnable) { 3928 if (MaxLinkBPP < MinDSCBPP) { 3929 return BPP_INVALID; 3930 } else if (MaxLinkBPP >= MaxDSCBPP) { 3931 return MaxDSCBPP; 3932 } else { 3933 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 3934 } 3935 } else { 3936 if (MaxLinkBPP >= NonDSCBPP2) { 3937 return NonDSCBPP2; 3938 } else if (MaxLinkBPP >= NonDSCBPP1) { 3939 return NonDSCBPP1; 3940 } else if (MaxLinkBPP >= NonDSCBPP0) { 3941 return 16.0; 3942 } else { 3943 return BPP_INVALID; 3944 } 3945 } 3946 } else { 3947 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) 3948 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 3949 return BPP_INVALID; 3950 } else { 3951 return DesiredBPP; 3952 } 3953 } 3954 return BPP_INVALID; 3955 } 3956 3957 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 3958 { 3959 struct vba_vars_st *v = &mode_lib->vba; 3960 3961 int i, j; 3962 unsigned int k, m; 3963 int ReorderingBytes; 3964 int MinPrefetchMode = 0, MaxPrefetchMode = 2; 3965 bool NoChroma = true; 3966 bool EnoughWritebackUnits = true; 3967 bool P2IWith420 = false; 3968 bool DSCOnlyIfNecessaryWithBPP = false; 3969 bool DSC422NativeNotSupported = false; 3970 double MaxTotalVActiveRDBandwidth; 3971 bool ViewportExceedsSurface = false; 3972 bool FMTBufferExceeded = false; 3973 3974 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 3975 3976 CalculateMinAndMaxPrefetchMode( 3977 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 3978 &MinPrefetchMode, &MaxPrefetchMode); 3979 3980 /*Scale Ratio, taps Support Check*/ 3981 3982 v->ScaleRatioAndTapsSupport = true; 3983 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3984 if (v->ScalerEnabled[k] == false 3985 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3986 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3987 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3988 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 3989 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { 3990 v->ScaleRatioAndTapsSupport = false; 3991 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 3992 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio 3993 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] 3994 || v->VRatio[k] > v->vtaps[k] 3995 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3996 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3997 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3998 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 3999 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) 4000 || v->HRatioChroma[k] > v->MaxHSCLRatio 4001 || v->VRatioChroma[k] > v->MaxVSCLRatio 4002 || v->HRatioChroma[k] > v->HTAPsChroma[k] 4003 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { 4004 v->ScaleRatioAndTapsSupport = false; 4005 } 4006 } 4007 /*Source Format, Pixel Format and Scan Support Check*/ 4008 4009 v->SourceFormatPixelAndScanSupport = true; 4010 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4011 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) 4012 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t 4013 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { 4014 v->SourceFormatPixelAndScanSupport = false; 4015 } 4016 } 4017 /*Bandwidth Support Check*/ 4018 4019 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4020 CalculateBytePerPixelAnd256BBlockSizes( 4021 v->SourcePixelFormat[k], 4022 v->SurfaceTiling[k], 4023 &v->BytePerPixelY[k], 4024 &v->BytePerPixelC[k], 4025 &v->BytePerPixelInDETY[k], 4026 &v->BytePerPixelInDETC[k], 4027 &v->Read256BlockHeightY[k], 4028 &v->Read256BlockHeightC[k], 4029 &v->Read256BlockWidthY[k], 4030 &v->Read256BlockWidthC[k]); 4031 } 4032 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4033 if (v->SourceScan[k] != dm_vert) { 4034 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; 4035 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; 4036 } else { 4037 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; 4038 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; 4039 } 4040 } 4041 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4042 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) 4043 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4044 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) 4045 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; 4046 } 4047 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4048 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { 4049 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4050 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; 4051 } else if (v->WritebackEnable[k] == true) { 4052 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4053 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; 4054 } else { 4055 v->WriteBandwidth[k] = 0.0; 4056 } 4057 } 4058 4059 /*Writeback Latency support check*/ 4060 4061 v->WritebackLatencySupport = true; 4062 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4063 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { 4064 v->WritebackLatencySupport = false; 4065 } 4066 } 4067 4068 /*Writeback Mode Support Check*/ 4069 4070 v->TotalNumberOfActiveWriteback = 0; 4071 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4072 if (v->WritebackEnable[k] == true) { 4073 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; 4074 } 4075 } 4076 4077 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { 4078 EnoughWritebackUnits = false; 4079 } 4080 4081 /*Writeback Scale Ratio and Taps Support Check*/ 4082 4083 v->WritebackScaleRatioAndTapsSupport = true; 4084 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4085 if (v->WritebackEnable[k] == true) { 4086 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio 4087 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio 4088 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio 4089 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps 4090 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps 4091 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] 4092 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { 4093 v->WritebackScaleRatioAndTapsSupport = false; 4094 } 4095 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { 4096 v->WritebackScaleRatioAndTapsSupport = false; 4097 } 4098 } 4099 } 4100 /*Maximum DISPCLK/DPPCLK Support check*/ 4101 4102 v->WritebackRequiredDISPCLK = 0.0; 4103 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4104 if (v->WritebackEnable[k] == true) { 4105 v->WritebackRequiredDISPCLK = dml_max( 4106 v->WritebackRequiredDISPCLK, 4107 dml31_CalculateWriteBackDISPCLK( 4108 v->WritebackPixelFormat[k], 4109 v->PixelClock[k], 4110 v->WritebackHRatio[k], 4111 v->WritebackVRatio[k], 4112 v->WritebackHTaps[k], 4113 v->WritebackVTaps[k], 4114 v->WritebackSourceWidth[k], 4115 v->WritebackDestinationWidth[k], 4116 v->HTotal[k], 4117 v->WritebackLineBufferSize)); 4118 } 4119 } 4120 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4121 if (v->HRatio[k] > 1.0) { 4122 v->PSCL_FACTOR[k] = dml_min( 4123 v->MaxDCHUBToPSCLThroughput, 4124 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); 4125 } else { 4126 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4127 } 4128 if (v->BytePerPixelC[k] == 0.0) { 4129 v->PSCL_FACTOR_CHROMA[k] = 0.0; 4130 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4131 * dml_max3( 4132 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4133 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4134 1.0); 4135 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4136 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4137 } 4138 } else { 4139 if (v->HRatioChroma[k] > 1.0) { 4140 v->PSCL_FACTOR_CHROMA[k] = dml_min( 4141 v->MaxDCHUBToPSCLThroughput, 4142 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 4143 } else { 4144 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4145 } 4146 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4147 * dml_max5( 4148 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4149 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4150 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 4151 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 4152 1.0); 4153 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) 4154 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4155 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4156 } 4157 } 4158 } 4159 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4160 int MaximumSwathWidthSupportLuma; 4161 int MaximumSwathWidthSupportChroma; 4162 4163 if (v->SurfaceTiling[k] == dm_sw_linear) { 4164 MaximumSwathWidthSupportLuma = 8192.0; 4165 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { 4166 MaximumSwathWidthSupportLuma = 2880.0; 4167 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4168 MaximumSwathWidthSupportLuma = 3840.0; 4169 } else { 4170 MaximumSwathWidthSupportLuma = 5760.0; 4171 } 4172 4173 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { 4174 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; 4175 } else { 4176 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; 4177 } 4178 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] 4179 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); 4180 if (v->BytePerPixelC[k] == 0.0) { 4181 v->MaximumSwathWidthInLineBufferChroma = 0; 4182 } else { 4183 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] 4184 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); 4185 } 4186 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); 4187 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); 4188 } 4189 4190 CalculateSwathAndDETConfiguration( 4191 true, 4192 v->NumberOfActivePlanes, 4193 v->DETBufferSizeInKByte[0], 4194 v->MaximumSwathWidthLuma, 4195 v->MaximumSwathWidthChroma, 4196 v->SourceScan, 4197 v->SourcePixelFormat, 4198 v->SurfaceTiling, 4199 v->ViewportWidth, 4200 v->ViewportHeight, 4201 v->SurfaceWidthY, 4202 v->SurfaceWidthC, 4203 v->SurfaceHeightY, 4204 v->SurfaceHeightC, 4205 v->Read256BlockHeightY, 4206 v->Read256BlockHeightC, 4207 v->Read256BlockWidthY, 4208 v->Read256BlockWidthC, 4209 v->odm_combine_dummy, 4210 v->BlendingAndTiming, 4211 v->BytePerPixelY, 4212 v->BytePerPixelC, 4213 v->BytePerPixelInDETY, 4214 v->BytePerPixelInDETC, 4215 v->HActive, 4216 v->HRatio, 4217 v->HRatioChroma, 4218 v->NoOfDPPThisState, 4219 v->swath_width_luma_ub_this_state, 4220 v->swath_width_chroma_ub_this_state, 4221 v->SwathWidthYThisState, 4222 v->SwathWidthCThisState, 4223 v->SwathHeightYThisState, 4224 v->SwathHeightCThisState, 4225 v->DETBufferSizeYThisState, 4226 v->DETBufferSizeCThisState, 4227 v->SingleDPPViewportSizeSupportPerPlane, 4228 &v->ViewportSizeSupport[0][0]); 4229 4230 for (i = 0; i < v->soc.num_states; i++) { 4231 for (j = 0; j < 2; j++) { 4232 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); 4233 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); 4234 v->RequiredDISPCLK[i][j] = 0.0; 4235 v->DISPCLK_DPPCLK_Support[i][j] = true; 4236 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4237 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4238 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4239 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] 4240 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4241 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4242 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] 4243 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4244 } 4245 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4246 * (1 + v->DISPCLKRampingMargin / 100.0); 4247 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] 4248 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4249 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4250 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 4251 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4252 } 4253 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4254 * (1 + v->DISPCLKRampingMargin / 100.0); 4255 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] 4256 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4257 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4258 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 4259 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4260 } 4261 4262 if (v->ODMCombinePolicy == dm_odm_combine_policy_none 4263 || !(v->Output[k] == dm_dp || 4264 v->Output[k] == dm_edp)) { 4265 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4266 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4267 4268 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4269 FMTBufferExceeded = true; 4270 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { 4271 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4272 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4273 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 4274 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { 4275 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4276 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4277 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { 4278 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4279 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4280 } else { 4281 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4282 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4283 } 4284 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH 4285 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4286 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) { 4287 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4288 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4289 } else { 4290 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4291 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4292 } 4293 } 4294 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH 4295 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4296 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) { 4297 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4298 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4299 4300 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4301 FMTBufferExceeded = true; 4302 } else { 4303 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4304 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4305 } 4306 } 4307 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4308 v->MPCCombine[i][j][k] = false; 4309 v->NoOfDPP[i][j][k] = 4; 4310 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; 4311 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4312 v->MPCCombine[i][j][k] = false; 4313 v->NoOfDPP[i][j][k] = 2; 4314 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; 4315 } else if ((v->WhenToDoMPCCombine == dm_mpc_never 4316 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4317 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { 4318 v->MPCCombine[i][j][k] = false; 4319 v->NoOfDPP[i][j][k] = 1; 4320 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4321 } else { 4322 v->MPCCombine[i][j][k] = true; 4323 v->NoOfDPP[i][j][k] = 2; 4324 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4325 } 4326 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4327 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4328 > v->MaxDppclkRoundedDownToDFSGranularity) 4329 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4330 v->DISPCLK_DPPCLK_Support[i][j] = false; 4331 } 4332 } 4333 v->TotalNumberOfActiveDPP[i][j] = 0; 4334 v->TotalNumberOfSingleDPPPlanes[i][j] = 0; 4335 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4336 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4337 if (v->NoOfDPP[i][j][k] == 1) 4338 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; 4339 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4340 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 4341 NoChroma = false; 4342 } 4343 4344 // UPTO 4345 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never 4346 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { 4347 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { 4348 double BWOfNonSplitPlaneOfMaximumBandwidth; 4349 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4350 BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4351 NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4352 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4353 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth 4354 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { 4355 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4356 NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4357 } 4358 } 4359 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; 4360 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4361 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4362 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4363 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4364 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; 4365 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; 4366 } 4367 } 4368 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { 4369 v->RequiredDISPCLK[i][j] = 0.0; 4370 v->DISPCLK_DPPCLK_Support[i][j] = true; 4371 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4372 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4373 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { 4374 v->MPCCombine[i][j][k] = true; 4375 v->NoOfDPP[i][j][k] = 2; 4376 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4377 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4378 } else { 4379 v->MPCCombine[i][j][k] = false; 4380 v->NoOfDPP[i][j][k] = 1; 4381 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4382 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4383 } 4384 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4385 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4386 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4387 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4388 } else { 4389 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4390 } 4391 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4392 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4393 > v->MaxDppclkRoundedDownToDFSGranularity) 4394 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4395 v->DISPCLK_DPPCLK_Support[i][j] = false; 4396 } 4397 } 4398 v->TotalNumberOfActiveDPP[i][j] = 0.0; 4399 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4400 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4401 } 4402 } 4403 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); 4404 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { 4405 v->DISPCLK_DPPCLK_Support[i][j] = false; 4406 } 4407 } 4408 } 4409 4410 /*Total Available Pipes Support Check*/ 4411 4412 for (i = 0; i < v->soc.num_states; i++) { 4413 for (j = 0; j < 2; j++) { 4414 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { 4415 v->TotalAvailablePipesSupport[i][j] = true; 4416 } else { 4417 v->TotalAvailablePipesSupport[i][j] = false; 4418 } 4419 } 4420 } 4421 /*Display IO and DSC Support Check*/ 4422 4423 v->NonsupportedDSCInputBPC = false; 4424 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4425 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) 4426 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { 4427 v->NonsupportedDSCInputBPC = true; 4428 } 4429 } 4430 4431 /*Number Of DSC Slices*/ 4432 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4433 if (v->BlendingAndTiming[k] == k) { 4434 if (v->PixelClockBackEnd[k] > 3200) { 4435 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); 4436 } else if (v->PixelClockBackEnd[k] > 1360) { 4437 v->NumberOfDSCSlices[k] = 8; 4438 } else if (v->PixelClockBackEnd[k] > 680) { 4439 v->NumberOfDSCSlices[k] = 4; 4440 } else if (v->PixelClockBackEnd[k] > 340) { 4441 v->NumberOfDSCSlices[k] = 2; 4442 } else { 4443 v->NumberOfDSCSlices[k] = 1; 4444 } 4445 } else { 4446 v->NumberOfDSCSlices[k] = 0; 4447 } 4448 } 4449 4450 for (i = 0; i < v->soc.num_states; i++) { 4451 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4452 v->RequiresDSC[i][k] = false; 4453 v->RequiresFEC[i][k] = false; 4454 if (v->BlendingAndTiming[k] == k) { 4455 if (v->Output[k] == dm_hdmi) { 4456 v->RequiresDSC[i][k] = false; 4457 v->RequiresFEC[i][k] = false; 4458 v->OutputBppPerState[i][k] = TruncToValidBPP( 4459 dml_min(600.0, v->PHYCLKPerState[i]) * 10, 4460 3, 4461 v->HTotal[k], 4462 v->HActive[k], 4463 v->PixelClockBackEnd[k], 4464 v->ForcedOutputLinkBPP[k], 4465 false, 4466 v->Output[k], 4467 v->OutputFormat[k], 4468 v->DSCInputBitPerComponent[k], 4469 v->NumberOfDSCSlices[k], 4470 v->AudioSampleRate[k], 4471 v->AudioSampleLayout[k], 4472 v->ODMCombineEnablePerState[i][k]); 4473 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) { 4474 if (v->DSCEnable[k] == true) { 4475 v->RequiresDSC[i][k] = true; 4476 v->LinkDSCEnable = true; 4477 if (v->Output[k] == dm_dp) { 4478 v->RequiresFEC[i][k] = true; 4479 } else { 4480 v->RequiresFEC[i][k] = false; 4481 } 4482 } else { 4483 v->RequiresDSC[i][k] = false; 4484 v->LinkDSCEnable = false; 4485 v->RequiresFEC[i][k] = false; 4486 } 4487 4488 v->Outbpp = BPP_INVALID; 4489 if (v->PHYCLKPerState[i] >= 270.0) { 4490 v->Outbpp = TruncToValidBPP( 4491 (1.0 - v->Downspreading / 100.0) * 2700, 4492 v->OutputLinkDPLanes[k], 4493 v->HTotal[k], 4494 v->HActive[k], 4495 v->PixelClockBackEnd[k], 4496 v->ForcedOutputLinkBPP[k], 4497 v->LinkDSCEnable, 4498 v->Output[k], 4499 v->OutputFormat[k], 4500 v->DSCInputBitPerComponent[k], 4501 v->NumberOfDSCSlices[k], 4502 v->AudioSampleRate[k], 4503 v->AudioSampleLayout[k], 4504 v->ODMCombineEnablePerState[i][k]); 4505 v->OutputBppPerState[i][k] = v->Outbpp; 4506 // TODO: Need some other way to handle this nonsense 4507 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" 4508 } 4509 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { 4510 v->Outbpp = TruncToValidBPP( 4511 (1.0 - v->Downspreading / 100.0) * 5400, 4512 v->OutputLinkDPLanes[k], 4513 v->HTotal[k], 4514 v->HActive[k], 4515 v->PixelClockBackEnd[k], 4516 v->ForcedOutputLinkBPP[k], 4517 v->LinkDSCEnable, 4518 v->Output[k], 4519 v->OutputFormat[k], 4520 v->DSCInputBitPerComponent[k], 4521 v->NumberOfDSCSlices[k], 4522 v->AudioSampleRate[k], 4523 v->AudioSampleLayout[k], 4524 v->ODMCombineEnablePerState[i][k]); 4525 v->OutputBppPerState[i][k] = v->Outbpp; 4526 // TODO: Need some other way to handle this nonsense 4527 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" 4528 } 4529 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { 4530 v->Outbpp = TruncToValidBPP( 4531 (1.0 - v->Downspreading / 100.0) * 8100, 4532 v->OutputLinkDPLanes[k], 4533 v->HTotal[k], 4534 v->HActive[k], 4535 v->PixelClockBackEnd[k], 4536 v->ForcedOutputLinkBPP[k], 4537 v->LinkDSCEnable, 4538 v->Output[k], 4539 v->OutputFormat[k], 4540 v->DSCInputBitPerComponent[k], 4541 v->NumberOfDSCSlices[k], 4542 v->AudioSampleRate[k], 4543 v->AudioSampleLayout[k], 4544 v->ODMCombineEnablePerState[i][k]); 4545 v->OutputBppPerState[i][k] = v->Outbpp; 4546 // TODO: Need some other way to handle this nonsense 4547 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" 4548 } 4549 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) { 4550 v->Outbpp = TruncToValidBPP( 4551 (1.0 - v->Downspreading / 100.0) * 10000, 4552 4, 4553 v->HTotal[k], 4554 v->HActive[k], 4555 v->PixelClockBackEnd[k], 4556 v->ForcedOutputLinkBPP[k], 4557 v->LinkDSCEnable, 4558 v->Output[k], 4559 v->OutputFormat[k], 4560 v->DSCInputBitPerComponent[k], 4561 v->NumberOfDSCSlices[k], 4562 v->AudioSampleRate[k], 4563 v->AudioSampleLayout[k], 4564 v->ODMCombineEnablePerState[i][k]); 4565 v->OutputBppPerState[i][k] = v->Outbpp; 4566 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4"; 4567 } 4568 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) { 4569 v->Outbpp = TruncToValidBPP( 4570 12000, 4571 4, 4572 v->HTotal[k], 4573 v->HActive[k], 4574 v->PixelClockBackEnd[k], 4575 v->ForcedOutputLinkBPP[k], 4576 v->LinkDSCEnable, 4577 v->Output[k], 4578 v->OutputFormat[k], 4579 v->DSCInputBitPerComponent[k], 4580 v->NumberOfDSCSlices[k], 4581 v->AudioSampleRate[k], 4582 v->AudioSampleLayout[k], 4583 v->ODMCombineEnablePerState[i][k]); 4584 v->OutputBppPerState[i][k] = v->Outbpp; 4585 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4"; 4586 } 4587 } 4588 } else { 4589 v->OutputBppPerState[i][k] = 0; 4590 } 4591 } 4592 } 4593 4594 for (i = 0; i < v->soc.num_states; i++) { 4595 v->LinkCapacitySupport[i] = true; 4596 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4597 if (v->BlendingAndTiming[k] == k 4598 && (v->Output[k] == dm_dp || 4599 v->Output[k] == dm_edp || 4600 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { 4601 v->LinkCapacitySupport[i] = false; 4602 } 4603 } 4604 } 4605 4606 // UPTO 2172 4607 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4608 if (v->BlendingAndTiming[k] == k 4609 && (v->Output[k] == dm_dp || 4610 v->Output[k] == dm_edp || 4611 v->Output[k] == dm_hdmi)) { 4612 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { 4613 P2IWith420 = true; 4614 } 4615 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 4616 && !v->DSC422NativeSupport) { 4617 DSC422NativeNotSupported = true; 4618 } 4619 } 4620 } 4621 4622 for (i = 0; i < v->soc.num_states; ++i) { 4623 v->ODMCombine4To1SupportCheckOK[i] = true; 4624 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4625 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 4626 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp 4627 || v->Output[k] == dm_hdmi)) { 4628 v->ODMCombine4To1SupportCheckOK[i] = false; 4629 } 4630 } 4631 } 4632 4633 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ 4634 4635 for (i = 0; i < v->soc.num_states; i++) { 4636 v->NotEnoughDSCUnits[i] = false; 4637 v->TotalDSCUnitsRequired = 0.0; 4638 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4639 if (v->RequiresDSC[i][k] == true) { 4640 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4641 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; 4642 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4643 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; 4644 } else { 4645 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; 4646 } 4647 } 4648 } 4649 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { 4650 v->NotEnoughDSCUnits[i] = true; 4651 } 4652 } 4653 /*DSC Delay per state*/ 4654 4655 for (i = 0; i < v->soc.num_states; i++) { 4656 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4657 if (v->OutputBppPerState[i][k] == BPP_INVALID) { 4658 v->BPP = 0.0; 4659 } else { 4660 v->BPP = v->OutputBppPerState[i][k]; 4661 } 4662 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { 4663 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { 4664 v->DSCDelayPerState[i][k] = dscceComputeDelay( 4665 v->DSCInputBitPerComponent[k], 4666 v->BPP, 4667 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4668 v->NumberOfDSCSlices[k], 4669 v->OutputFormat[k], 4670 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 4671 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4672 v->DSCDelayPerState[i][k] = 2.0 4673 * (dscceComputeDelay( 4674 v->DSCInputBitPerComponent[k], 4675 v->BPP, 4676 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4677 v->NumberOfDSCSlices[k] / 2, 4678 v->OutputFormat[k], 4679 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4680 } else { 4681 v->DSCDelayPerState[i][k] = 4.0 4682 * (dscceComputeDelay( 4683 v->DSCInputBitPerComponent[k], 4684 v->BPP, 4685 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4686 v->NumberOfDSCSlices[k] / 4, 4687 v->OutputFormat[k], 4688 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4689 } 4690 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 4691 } else { 4692 v->DSCDelayPerState[i][k] = 0.0; 4693 } 4694 } 4695 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4696 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4697 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { 4698 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; 4699 } 4700 } 4701 } 4702 } 4703 4704 //Calculate Swath, DET Configuration, DCFCLKDeepSleep 4705 // 4706 for (i = 0; i < v->soc.num_states; ++i) { 4707 for (j = 0; j <= 1; ++j) { 4708 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4709 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; 4710 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 4711 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; 4712 } 4713 4714 CalculateSwathAndDETConfiguration( 4715 false, 4716 v->NumberOfActivePlanes, 4717 v->DETBufferSizeInKByte[0], 4718 v->MaximumSwathWidthLuma, 4719 v->MaximumSwathWidthChroma, 4720 v->SourceScan, 4721 v->SourcePixelFormat, 4722 v->SurfaceTiling, 4723 v->ViewportWidth, 4724 v->ViewportHeight, 4725 v->SurfaceWidthY, 4726 v->SurfaceWidthC, 4727 v->SurfaceHeightY, 4728 v->SurfaceHeightC, 4729 v->Read256BlockHeightY, 4730 v->Read256BlockHeightC, 4731 v->Read256BlockWidthY, 4732 v->Read256BlockWidthC, 4733 v->ODMCombineEnableThisState, 4734 v->BlendingAndTiming, 4735 v->BytePerPixelY, 4736 v->BytePerPixelC, 4737 v->BytePerPixelInDETY, 4738 v->BytePerPixelInDETC, 4739 v->HActive, 4740 v->HRatio, 4741 v->HRatioChroma, 4742 v->NoOfDPPThisState, 4743 v->swath_width_luma_ub_this_state, 4744 v->swath_width_chroma_ub_this_state, 4745 v->SwathWidthYThisState, 4746 v->SwathWidthCThisState, 4747 v->SwathHeightYThisState, 4748 v->SwathHeightCThisState, 4749 v->DETBufferSizeYThisState, 4750 v->DETBufferSizeCThisState, 4751 v->dummystring, 4752 &v->ViewportSizeSupport[i][j]); 4753 4754 CalculateDCFCLKDeepSleep( 4755 mode_lib, 4756 v->NumberOfActivePlanes, 4757 v->BytePerPixelY, 4758 v->BytePerPixelC, 4759 v->VRatio, 4760 v->VRatioChroma, 4761 v->SwathWidthYThisState, 4762 v->SwathWidthCThisState, 4763 v->NoOfDPPThisState, 4764 v->HRatio, 4765 v->HRatioChroma, 4766 v->PixelClock, 4767 v->PSCL_FACTOR, 4768 v->PSCL_FACTOR_CHROMA, 4769 v->RequiredDPPCLKThisState, 4770 v->ReadBandwidthLuma, 4771 v->ReadBandwidthChroma, 4772 v->ReturnBusWidth, 4773 &v->ProjectedDCFCLKDeepSleep[i][j]); 4774 4775 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4776 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; 4777 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; 4778 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; 4779 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; 4780 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; 4781 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; 4782 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; 4783 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; 4784 } 4785 } 4786 } 4787 4788 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4789 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 4790 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4791 } 4792 4793 for (i = 0; i < v->soc.num_states; i++) { 4794 for (j = 0; j < 2; j++) { 4795 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; 4796 4797 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4798 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 4799 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 4800 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 4801 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 4802 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 4803 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 4804 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 4805 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 4806 } 4807 4808 v->TotalNumberOfDCCActiveDPP[i][j] = 0; 4809 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4810 if (v->DCCEnable[k] == true) { 4811 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4812 } 4813 } 4814 4815 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4816 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4817 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4818 4819 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) 4820 && v->SourceScan[k] != dm_vert) { 4821 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) 4822 / 2; 4823 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 4824 } else { 4825 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 4826 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 4827 } 4828 4829 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4830 mode_lib, 4831 v->DCCEnable[k], 4832 v->Read256BlockHeightC[k], 4833 v->Read256BlockWidthC[k], 4834 v->SourcePixelFormat[k], 4835 v->SurfaceTiling[k], 4836 v->BytePerPixelC[k], 4837 v->SourceScan[k], 4838 v->SwathWidthCThisState[k], 4839 v->ViewportHeightChroma[k], 4840 v->GPUVMEnable, 4841 v->HostVMEnable, 4842 v->HostVMMaxNonCachedPageTableLevels, 4843 v->GPUVMMinPageSize, 4844 v->HostVMMinPageSize, 4845 v->PTEBufferSizeInRequestsForChroma, 4846 v->PitchC[k], 4847 0.0, 4848 &v->MacroTileWidthC[k], 4849 &v->MetaRowBytesC, 4850 &v->DPTEBytesPerRowC, 4851 &v->PTEBufferSizeNotExceededC[i][j][k], 4852 &v->dummyinteger7, 4853 &v->dpte_row_height_chroma[k], 4854 &v->dummyinteger28, 4855 &v->dummyinteger26, 4856 &v->dummyinteger23, 4857 &v->meta_row_height_chroma[k], 4858 &v->dummyinteger8, 4859 &v->dummyinteger9, 4860 &v->dummyinteger19, 4861 &v->dummyinteger20, 4862 &v->dummyinteger17, 4863 &v->dummyinteger10, 4864 &v->dummyinteger11); 4865 4866 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( 4867 mode_lib, 4868 v->VRatioChroma[k], 4869 v->VTAPsChroma[k], 4870 v->Interlace[k], 4871 v->ProgressiveToInterlaceUnitInOPP, 4872 v->SwathHeightCThisState[k], 4873 v->ViewportYStartC[k], 4874 &v->PrefillC[k], 4875 &v->MaxNumSwC[k]); 4876 } else { 4877 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 4878 v->PTEBufferSizeInRequestsForChroma = 0; 4879 v->PDEAndMetaPTEBytesPerFrameC = 0.0; 4880 v->MetaRowBytesC = 0.0; 4881 v->DPTEBytesPerRowC = 0.0; 4882 v->PrefetchLinesC[i][j][k] = 0.0; 4883 v->PTEBufferSizeNotExceededC[i][j][k] = true; 4884 } 4885 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4886 mode_lib, 4887 v->DCCEnable[k], 4888 v->Read256BlockHeightY[k], 4889 v->Read256BlockWidthY[k], 4890 v->SourcePixelFormat[k], 4891 v->SurfaceTiling[k], 4892 v->BytePerPixelY[k], 4893 v->SourceScan[k], 4894 v->SwathWidthYThisState[k], 4895 v->ViewportHeight[k], 4896 v->GPUVMEnable, 4897 v->HostVMEnable, 4898 v->HostVMMaxNonCachedPageTableLevels, 4899 v->GPUVMMinPageSize, 4900 v->HostVMMinPageSize, 4901 v->PTEBufferSizeInRequestsForLuma, 4902 v->PitchY[k], 4903 v->DCCMetaPitchY[k], 4904 &v->MacroTileWidthY[k], 4905 &v->MetaRowBytesY, 4906 &v->DPTEBytesPerRowY, 4907 &v->PTEBufferSizeNotExceededY[i][j][k], 4908 &v->dummyinteger7, 4909 &v->dpte_row_height[k], 4910 &v->dummyinteger29, 4911 &v->dummyinteger27, 4912 &v->dummyinteger24, 4913 &v->meta_row_height[k], 4914 &v->dummyinteger25, 4915 &v->dpte_group_bytes[k], 4916 &v->dummyinteger21, 4917 &v->dummyinteger22, 4918 &v->dummyinteger18, 4919 &v->dummyinteger5, 4920 &v->dummyinteger6); 4921 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( 4922 mode_lib, 4923 v->VRatio[k], 4924 v->vtaps[k], 4925 v->Interlace[k], 4926 v->ProgressiveToInterlaceUnitInOPP, 4927 v->SwathHeightYThisState[k], 4928 v->ViewportYStartY[k], 4929 &v->PrefillY[k], 4930 &v->MaxNumSwY[k]); 4931 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; 4932 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; 4933 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; 4934 4935 CalculateRowBandwidth( 4936 v->GPUVMEnable, 4937 v->SourcePixelFormat[k], 4938 v->VRatio[k], 4939 v->VRatioChroma[k], 4940 v->DCCEnable[k], 4941 v->HTotal[k] / v->PixelClock[k], 4942 v->MetaRowBytesY, 4943 v->MetaRowBytesC, 4944 v->meta_row_height[k], 4945 v->meta_row_height_chroma[k], 4946 v->DPTEBytesPerRowY, 4947 v->DPTEBytesPerRowC, 4948 v->dpte_row_height[k], 4949 v->dpte_row_height_chroma[k], 4950 &v->meta_row_bandwidth[i][j][k], 4951 &v->dpte_row_bandwidth[i][j][k]); 4952 } 4953 v->UrgLatency[i] = CalculateUrgentLatency( 4954 v->UrgentLatencyPixelDataOnly, 4955 v->UrgentLatencyPixelMixedWithVMData, 4956 v->UrgentLatencyVMDataOnly, 4957 v->DoUrgentLatencyAdjustment, 4958 v->UrgentLatencyAdjustmentFabricClockComponent, 4959 v->UrgentLatencyAdjustmentFabricClockReference, 4960 v->FabricClockPerState[i]); 4961 4962 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4963 CalculateUrgentBurstFactor( 4964 v->swath_width_luma_ub_this_state[k], 4965 v->swath_width_chroma_ub_this_state[k], 4966 v->SwathHeightYThisState[k], 4967 v->SwathHeightCThisState[k], 4968 v->HTotal[k] / v->PixelClock[k], 4969 v->UrgLatency[i], 4970 v->CursorBufferSize, 4971 v->CursorWidth[k][0], 4972 v->CursorBPP[k][0], 4973 v->VRatio[k], 4974 v->VRatioChroma[k], 4975 v->BytePerPixelInDETY[k], 4976 v->BytePerPixelInDETC[k], 4977 v->DETBufferSizeYThisState[k], 4978 v->DETBufferSizeCThisState[k], 4979 &v->UrgentBurstFactorCursor[k], 4980 &v->UrgentBurstFactorLuma[k], 4981 &v->UrgentBurstFactorChroma[k], 4982 &NotUrgentLatencyHiding[k]); 4983 } 4984 4985 v->NotEnoughUrgentLatencyHidingA[i][j] = false; 4986 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4987 if (NotUrgentLatencyHiding[k]) { 4988 v->NotEnoughUrgentLatencyHidingA[i][j] = true; 4989 } 4990 } 4991 4992 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4993 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] 4994 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; 4995 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; 4996 } 4997 4998 v->TotalVActivePixelBandwidth[i][j] = 0; 4999 v->TotalVActiveCursorBandwidth[i][j] = 0; 5000 v->TotalMetaRowBandwidth[i][j] = 0; 5001 v->TotalDPTERowBandwidth[i][j] = 0; 5002 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5003 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; 5004 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; 5005 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; 5006 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; 5007 } 5008 } 5009 } 5010 5011 //Calculate Return BW 5012 for (i = 0; i < v->soc.num_states; ++i) { 5013 for (j = 0; j <= 1; ++j) { 5014 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5015 if (v->BlendingAndTiming[k] == k) { 5016 if (v->WritebackEnable[k] == true) { 5017 v->WritebackDelayTime[k] = v->WritebackLatency 5018 + CalculateWriteBackDelay( 5019 v->WritebackPixelFormat[k], 5020 v->WritebackHRatio[k], 5021 v->WritebackVRatio[k], 5022 v->WritebackVTaps[k], 5023 v->WritebackDestinationWidth[k], 5024 v->WritebackDestinationHeight[k], 5025 v->WritebackSourceHeight[k], 5026 v->HTotal[k]) / v->RequiredDISPCLK[i][j]; 5027 } else { 5028 v->WritebackDelayTime[k] = 0.0; 5029 } 5030 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5031 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { 5032 v->WritebackDelayTime[k] = dml_max( 5033 v->WritebackDelayTime[k], 5034 v->WritebackLatency 5035 + CalculateWriteBackDelay( 5036 v->WritebackPixelFormat[m], 5037 v->WritebackHRatio[m], 5038 v->WritebackVRatio[m], 5039 v->WritebackVTaps[m], 5040 v->WritebackDestinationWidth[m], 5041 v->WritebackDestinationHeight[m], 5042 v->WritebackSourceHeight[m], 5043 v->HTotal[m]) / v->RequiredDISPCLK[i][j]); 5044 } 5045 } 5046 } 5047 } 5048 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5049 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5050 if (v->BlendingAndTiming[k] == m) { 5051 v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; 5052 } 5053 } 5054 } 5055 v->MaxMaxVStartup[i][j] = 0; 5056 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5057 v->MaximumVStartup[i][j][k] = 5058 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 5059 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 5060 v->VTotal[k] - v->VActive[k] 5061 - dml_max( 5062 1.0, 5063 dml_ceil( 5064 1.0 * v->WritebackDelayTime[k] 5065 / (v->HTotal[k] 5066 / v->PixelClock[k]), 5067 1.0)); 5068 if (v->MaximumVStartup[i][j][k] > 1023) 5069 v->MaximumVStartup[i][j][k] = 1023; 5070 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); 5071 } 5072 } 5073 } 5074 5075 ReorderingBytes = v->NumberOfChannels 5076 * dml_max3( 5077 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 5078 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 5079 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 5080 5081 for (i = 0; i < v->soc.num_states; ++i) { 5082 for (j = 0; j <= 1; ++j) { 5083 v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; 5084 } 5085 } 5086 5087 if (v->UseMinimumRequiredDCFCLK == true) { 5088 UseMinimumDCFCLK( 5089 mode_lib, 5090 v->MaxInterDCNTileRepeaters, 5091 MaxPrefetchMode, 5092 v->DRAMClockChangeLatency, 5093 v->SREnterPlusExitTime, 5094 v->ReturnBusWidth, 5095 v->RoundTripPingLatencyCycles, 5096 ReorderingBytes, 5097 v->PixelChunkSizeInKByte, 5098 v->MetaChunkSize, 5099 v->GPUVMEnable, 5100 v->GPUVMMaxPageTableLevels, 5101 v->HostVMEnable, 5102 v->NumberOfActivePlanes, 5103 v->HostVMMinPageSize, 5104 v->HostVMMaxNonCachedPageTableLevels, 5105 v->DynamicMetadataVMEnabled, 5106 v->ImmediateFlipRequirement, 5107 v->ProgressiveToInterlaceUnitInOPP, 5108 v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, 5109 v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, 5110 v->VTotal, 5111 v->VActive, 5112 v->DynamicMetadataTransmittedBytes, 5113 v->DynamicMetadataLinesBeforeActiveRequired, 5114 v->Interlace, 5115 v->RequiredDPPCLK, 5116 v->RequiredDISPCLK, 5117 v->UrgLatency, 5118 v->NoOfDPP, 5119 v->ProjectedDCFCLKDeepSleep, 5120 v->MaximumVStartup, 5121 v->TotalVActivePixelBandwidth, 5122 v->TotalVActiveCursorBandwidth, 5123 v->TotalMetaRowBandwidth, 5124 v->TotalDPTERowBandwidth, 5125 v->TotalNumberOfActiveDPP, 5126 v->TotalNumberOfDCCActiveDPP, 5127 v->dpte_group_bytes, 5128 v->PrefetchLinesY, 5129 v->PrefetchLinesC, 5130 v->swath_width_luma_ub_all_states, 5131 v->swath_width_chroma_ub_all_states, 5132 v->BytePerPixelY, 5133 v->BytePerPixelC, 5134 v->HTotal, 5135 v->PixelClock, 5136 v->PDEAndMetaPTEBytesPerFrame, 5137 v->DPTEBytesPerRow, 5138 v->MetaRowBytes, 5139 v->DynamicMetadataEnable, 5140 v->VActivePixelBandwidth, 5141 v->VActiveCursorBandwidth, 5142 v->ReadBandwidthLuma, 5143 v->ReadBandwidthChroma, 5144 v->DCFCLKPerState, 5145 v->DCFCLKState); 5146 } 5147 5148 for (i = 0; i < v->soc.num_states; ++i) { 5149 for (j = 0; j <= 1; ++j) { 5150 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 5151 v->ReturnBusWidth * v->DCFCLKState[i][j], 5152 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); 5153 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; 5154 double PixelDataOnlyReturnBWPerState = dml_min( 5155 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5156 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 5157 double PixelMixedWithVMDataReturnBWPerState = dml_min( 5158 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5159 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 5160 5161 if (v->HostVMEnable != true) { 5162 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; 5163 } else { 5164 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; 5165 } 5166 } 5167 } 5168 5169 //Re-ordering Buffer Support Check 5170 for (i = 0; i < v->soc.num_states; ++i) { 5171 for (j = 0; j <= 1; ++j) { 5172 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] 5173 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { 5174 v->ROBSupport[i][j] = true; 5175 } else { 5176 v->ROBSupport[i][j] = false; 5177 } 5178 } 5179 } 5180 5181 //Vertical Active BW support check 5182 5183 MaxTotalVActiveRDBandwidth = 0; 5184 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5185 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 5186 } 5187 5188 for (i = 0; i < v->soc.num_states; ++i) { 5189 for (j = 0; j <= 1; ++j) { 5190 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( 5191 dml_min( 5192 v->ReturnBusWidth * v->DCFCLKState[i][j], 5193 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5194 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, 5195 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5196 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); 5197 5198 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { 5199 v->TotalVerticalActiveBandwidthSupport[i][j] = true; 5200 } else { 5201 v->TotalVerticalActiveBandwidthSupport[i][j] = false; 5202 } 5203 } 5204 } 5205 5206 v->UrgentLatency = CalculateUrgentLatency( 5207 v->UrgentLatencyPixelDataOnly, 5208 v->UrgentLatencyPixelMixedWithVMData, 5209 v->UrgentLatencyVMDataOnly, 5210 v->DoUrgentLatencyAdjustment, 5211 v->UrgentLatencyAdjustmentFabricClockComponent, 5212 v->UrgentLatencyAdjustmentFabricClockReference, 5213 v->FabricClock); 5214 //Prefetch Check 5215 for (i = 0; i < v->soc.num_states; ++i) { 5216 for (j = 0; j <= 1; ++j) { 5217 double VMDataOnlyReturnBWPerState; 5218 double HostVMInefficiencyFactor = 1; 5219 int NextPrefetchModeState = MinPrefetchMode; 5220 bool UnboundedRequestEnabledThisState = false; 5221 int CompressedBufferSizeInkByteThisState = 0; 5222 double dummy; 5223 5224 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; 5225 5226 v->BandwidthWithoutPrefetchSupported[i][j] = true; 5227 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] 5228 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { 5229 v->BandwidthWithoutPrefetchSupported[i][j] = false; 5230 } 5231 5232 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5233 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 5234 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 5235 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 5236 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 5237 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 5238 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 5239 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 5240 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 5241 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 5242 } 5243 5244 VMDataOnlyReturnBWPerState = dml_min( 5245 dml_min( 5246 v->ReturnBusWidth * v->DCFCLKState[i][j], 5247 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5248 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5249 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5250 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 5251 if (v->GPUVMEnable && v->HostVMEnable) 5252 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; 5253 5254 v->ExtraLatency = CalculateExtraLatency( 5255 v->RoundTripPingLatencyCycles, 5256 ReorderingBytes, 5257 v->DCFCLKState[i][j], 5258 v->TotalNumberOfActiveDPP[i][j], 5259 v->PixelChunkSizeInKByte, 5260 v->TotalNumberOfDCCActiveDPP[i][j], 5261 v->MetaChunkSize, 5262 v->ReturnBWPerState[i][j], 5263 v->GPUVMEnable, 5264 v->HostVMEnable, 5265 v->NumberOfActivePlanes, 5266 v->NoOfDPPThisState, 5267 v->dpte_group_bytes, 5268 HostVMInefficiencyFactor, 5269 v->HostVMMinPageSize, 5270 v->HostVMMaxNonCachedPageTableLevels); 5271 5272 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5273 do { 5274 v->PrefetchModePerState[i][j] = NextPrefetchModeState; 5275 v->MaxVStartup = v->NextMaxVStartup; 5276 5277 v->TWait = CalculateTWait( 5278 v->PrefetchModePerState[i][j], 5279 v->DRAMClockChangeLatency, 5280 v->UrgLatency[i], 5281 v->SREnterPlusExitTime); 5282 5283 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5284 Pipe myPipe; 5285 5286 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; 5287 myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; 5288 myPipe.PixelClock = v->PixelClock[k]; 5289 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; 5290 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; 5291 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 5292 myPipe.SourceScan = v->SourceScan[k]; 5293 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; 5294 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; 5295 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; 5296 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; 5297 myPipe.InterlaceEnable = v->Interlace[k]; 5298 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 5299 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 5300 myPipe.HTotal = v->HTotal[k]; 5301 myPipe.DCCEnable = v->DCCEnable[k]; 5302 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 5303 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; 5304 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 5305 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 5306 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 5307 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 5308 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 5309 mode_lib, 5310 HostVMInefficiencyFactor, 5311 &myPipe, 5312 v->DSCDelayPerState[i][k], 5313 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 5314 v->DPPCLKDelaySCL, 5315 v->DPPCLKDelaySCLLBOnly, 5316 v->DPPCLKDelayCNVCCursor, 5317 v->DISPCLKDelaySubtotal, 5318 v->SwathWidthYThisState[k] / v->HRatio[k], 5319 v->OutputFormat[k], 5320 v->MaxInterDCNTileRepeaters, 5321 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 5322 v->MaximumVStartup[i][j][k], 5323 v->GPUVMMaxPageTableLevels, 5324 v->GPUVMEnable, 5325 v->HostVMEnable, 5326 v->HostVMMaxNonCachedPageTableLevels, 5327 v->HostVMMinPageSize, 5328 v->DynamicMetadataEnable[k], 5329 v->DynamicMetadataVMEnabled, 5330 v->DynamicMetadataLinesBeforeActiveRequired[k], 5331 v->DynamicMetadataTransmittedBytes[k], 5332 v->UrgLatency[i], 5333 v->ExtraLatency, 5334 v->TimeCalc, 5335 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5336 v->MetaRowBytes[i][j][k], 5337 v->DPTEBytesPerRow[i][j][k], 5338 v->PrefetchLinesY[i][j][k], 5339 v->SwathWidthYThisState[k], 5340 v->PrefillY[k], 5341 v->MaxNumSwY[k], 5342 v->PrefetchLinesC[i][j][k], 5343 v->SwathWidthCThisState[k], 5344 v->PrefillC[k], 5345 v->MaxNumSwC[k], 5346 v->swath_width_luma_ub_this_state[k], 5347 v->swath_width_chroma_ub_this_state[k], 5348 v->SwathHeightYThisState[k], 5349 v->SwathHeightCThisState[k], 5350 v->TWait, 5351 &v->DSTXAfterScaler[k], 5352 &v->DSTYAfterScaler[k], 5353 &v->LineTimesForPrefetch[k], 5354 &v->PrefetchBW[k], 5355 &v->LinesForMetaPTE[k], 5356 &v->LinesForMetaAndDPTERow[k], 5357 &v->VRatioPreY[i][j][k], 5358 &v->VRatioPreC[i][j][k], 5359 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 5360 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 5361 &v->NoTimeForDynamicMetadata[i][j][k], 5362 &v->Tno_bw[k], 5363 &v->prefetch_vmrow_bw[k], 5364 &v->dummy7[k], 5365 &v->dummy8[k], 5366 &v->dummy13[k], 5367 &v->VUpdateOffsetPix[k], 5368 &v->VUpdateWidthPix[k], 5369 &v->VReadyOffsetPix[k]); 5370 } 5371 5372 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5373 CalculateUrgentBurstFactor( 5374 v->swath_width_luma_ub_this_state[k], 5375 v->swath_width_chroma_ub_this_state[k], 5376 v->SwathHeightYThisState[k], 5377 v->SwathHeightCThisState[k], 5378 v->HTotal[k] / v->PixelClock[k], 5379 v->UrgentLatency, 5380 v->CursorBufferSize, 5381 v->CursorWidth[k][0], 5382 v->CursorBPP[k][0], 5383 v->VRatioPreY[i][j][k], 5384 v->VRatioPreC[i][j][k], 5385 v->BytePerPixelInDETY[k], 5386 v->BytePerPixelInDETC[k], 5387 v->DETBufferSizeYThisState[k], 5388 v->DETBufferSizeCThisState[k], 5389 &v->UrgentBurstFactorCursorPre[k], 5390 &v->UrgentBurstFactorLumaPre[k], 5391 &v->UrgentBurstFactorChroma[k], 5392 &v->NotUrgentLatencyHidingPre[k]); 5393 } 5394 5395 v->MaximumReadBandwidthWithPrefetch = 0.0; 5396 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5397 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 5398 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; 5399 5400 v->MaximumReadBandwidthWithPrefetch = 5401 v->MaximumReadBandwidthWithPrefetch 5402 + dml_max4( 5403 v->VActivePixelBandwidth[i][j][k], 5404 v->VActiveCursorBandwidth[i][j][k] 5405 + v->NoOfDPP[i][j][k] 5406 * (v->meta_row_bandwidth[i][j][k] 5407 + v->dpte_row_bandwidth[i][j][k]), 5408 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5409 v->NoOfDPP[i][j][k] 5410 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5411 * v->UrgentBurstFactorLumaPre[k] 5412 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5413 * v->UrgentBurstFactorChromaPre[k]) 5414 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5415 } 5416 5417 v->NotEnoughUrgentLatencyHidingPre = false; 5418 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5419 if (v->NotUrgentLatencyHidingPre[k] == true) { 5420 v->NotEnoughUrgentLatencyHidingPre = true; 5421 } 5422 } 5423 5424 v->PrefetchSupported[i][j] = true; 5425 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] 5426 || v->NotEnoughUrgentLatencyHidingPre == 1) { 5427 v->PrefetchSupported[i][j] = false; 5428 } 5429 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5430 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 5431 || v->NoTimeForPrefetch[i][j][k] == true) { 5432 v->PrefetchSupported[i][j] = false; 5433 } 5434 } 5435 5436 v->DynamicMetadataSupported[i][j] = true; 5437 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5438 if (v->NoTimeForDynamicMetadata[i][j][k] == true) { 5439 v->DynamicMetadataSupported[i][j] = false; 5440 } 5441 } 5442 5443 v->VRatioInPrefetchSupported[i][j] = true; 5444 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5445 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { 5446 v->VRatioInPrefetchSupported[i][j] = false; 5447 } 5448 } 5449 v->AnyLinesForVMOrRowTooLarge = false; 5450 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5451 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { 5452 v->AnyLinesForVMOrRowTooLarge = true; 5453 } 5454 } 5455 5456 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5457 5458 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { 5459 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; 5460 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5461 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 5462 - dml_max( 5463 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], 5464 v->NoOfDPP[i][j][k] 5465 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5466 * v->UrgentBurstFactorLumaPre[k] 5467 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5468 * v->UrgentBurstFactorChromaPre[k]) 5469 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5470 } 5471 v->TotImmediateFlipBytes = 0.0; 5472 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5473 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 5474 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] 5475 + v->DPTEBytesPerRow[i][j][k]; 5476 } 5477 5478 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5479 CalculateFlipSchedule( 5480 mode_lib, 5481 HostVMInefficiencyFactor, 5482 v->ExtraLatency, 5483 v->UrgLatency[i], 5484 v->GPUVMMaxPageTableLevels, 5485 v->HostVMEnable, 5486 v->HostVMMaxNonCachedPageTableLevels, 5487 v->GPUVMEnable, 5488 v->HostVMMinPageSize, 5489 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5490 v->MetaRowBytes[i][j][k], 5491 v->DPTEBytesPerRow[i][j][k], 5492 v->BandwidthAvailableForImmediateFlip, 5493 v->TotImmediateFlipBytes, 5494 v->SourcePixelFormat[k], 5495 v->HTotal[k] / v->PixelClock[k], 5496 v->VRatio[k], 5497 v->VRatioChroma[k], 5498 v->Tno_bw[k], 5499 v->DCCEnable[k], 5500 v->dpte_row_height[k], 5501 v->meta_row_height[k], 5502 v->dpte_row_height_chroma[k], 5503 v->meta_row_height_chroma[k], 5504 &v->DestinationLinesToRequestVMInImmediateFlip[k], 5505 &v->DestinationLinesToRequestRowInImmediateFlip[k], 5506 &v->final_flip_bw[k], 5507 &v->ImmediateFlipSupportedForPipe[k]); 5508 } 5509 v->total_dcn_read_bw_with_flip = 0.0; 5510 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5511 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 5512 + dml_max3( 5513 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5514 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] 5515 + v->VActiveCursorBandwidth[i][j][k], 5516 v->NoOfDPP[i][j][k] 5517 * (v->final_flip_bw[k] 5518 + v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5519 * v->UrgentBurstFactorLumaPre[k] 5520 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5521 * v->UrgentBurstFactorChromaPre[k]) 5522 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5523 } 5524 v->ImmediateFlipSupportedForState[i][j] = true; 5525 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { 5526 v->ImmediateFlipSupportedForState[i][j] = false; 5527 } 5528 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5529 if (v->ImmediateFlipSupportedForPipe[k] == false) { 5530 v->ImmediateFlipSupportedForState[i][j] = false; 5531 } 5532 } 5533 } else { 5534 v->ImmediateFlipSupportedForState[i][j] = false; 5535 } 5536 5537 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { 5538 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5539 NextPrefetchModeState = NextPrefetchModeState + 1; 5540 } else { 5541 v->NextMaxVStartup = v->NextMaxVStartup - 1; 5542 } 5543 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5544 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5545 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement != dm_immediate_flip_required) 5546 || v->ImmediateFlipSupportedForState[i][j] == true)) 5547 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); 5548 5549 CalculateUnboundedRequestAndCompressedBufferSize( 5550 v->DETBufferSizeInKByte[0], 5551 v->ConfigReturnBufferSizeInKByte, 5552 v->UseUnboundedRequesting, 5553 v->TotalNumberOfActiveDPP[i][j], 5554 NoChroma, 5555 v->MaxNumDPP, 5556 v->CompressedBufferSegmentSizeInkByte, 5557 v->Output, 5558 &UnboundedRequestEnabledThisState, 5559 &CompressedBufferSizeInkByteThisState); 5560 5561 CalculateWatermarksAndDRAMSpeedChangeSupport( 5562 mode_lib, 5563 v->PrefetchModePerState[i][j], 5564 v->NumberOfActivePlanes, 5565 v->MaxLineBufferLines, 5566 v->LineBufferSize, 5567 v->WritebackInterfaceBufferSize, 5568 v->DCFCLKState[i][j], 5569 v->ReturnBWPerState[i][j], 5570 v->SynchronizedVBlank, 5571 v->dpte_group_bytes, 5572 v->MetaChunkSize, 5573 v->UrgLatency[i], 5574 v->ExtraLatency, 5575 v->WritebackLatency, 5576 v->WritebackChunkSize, 5577 v->SOCCLKPerState[i], 5578 v->DRAMClockChangeLatency, 5579 v->SRExitTime, 5580 v->SREnterPlusExitTime, 5581 v->SRExitZ8Time, 5582 v->SREnterPlusExitZ8Time, 5583 v->ProjectedDCFCLKDeepSleep[i][j], 5584 v->DETBufferSizeYThisState, 5585 v->DETBufferSizeCThisState, 5586 v->SwathHeightYThisState, 5587 v->SwathHeightCThisState, 5588 v->LBBitPerPixel, 5589 v->SwathWidthYThisState, 5590 v->SwathWidthCThisState, 5591 v->HRatio, 5592 v->HRatioChroma, 5593 v->vtaps, 5594 v->VTAPsChroma, 5595 v->VRatio, 5596 v->VRatioChroma, 5597 v->HTotal, 5598 v->PixelClock, 5599 v->BlendingAndTiming, 5600 v->NoOfDPPThisState, 5601 v->BytePerPixelInDETY, 5602 v->BytePerPixelInDETC, 5603 v->DSTXAfterScaler, 5604 v->DSTYAfterScaler, 5605 v->WritebackEnable, 5606 v->WritebackPixelFormat, 5607 v->WritebackDestinationWidth, 5608 v->WritebackDestinationHeight, 5609 v->WritebackSourceHeight, 5610 UnboundedRequestEnabledThisState, 5611 CompressedBufferSizeInkByteThisState, 5612 &v->DRAMClockChangeSupport[i][j], 5613 &v->UrgentWatermark, 5614 &v->WritebackUrgentWatermark, 5615 &v->DRAMClockChangeWatermark, 5616 &v->WritebackDRAMClockChangeWatermark, 5617 &dummy, 5618 &dummy, 5619 &dummy, 5620 &dummy, 5621 &v->MinActiveDRAMClockChangeLatencySupported); 5622 } 5623 } 5624 5625 /*PTE Buffer Size Check*/ 5626 for (i = 0; i < v->soc.num_states; i++) { 5627 for (j = 0; j < 2; j++) { 5628 v->PTEBufferSizeNotExceeded[i][j] = true; 5629 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5630 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { 5631 v->PTEBufferSizeNotExceeded[i][j] = false; 5632 } 5633 } 5634 } 5635 } 5636 5637 /*Cursor Support Check*/ 5638 v->CursorSupport = true; 5639 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5640 if (v->CursorWidth[k][0] > 0.0) { 5641 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { 5642 v->CursorSupport = false; 5643 } 5644 } 5645 } 5646 5647 /*Valid Pitch Check*/ 5648 v->PitchSupport = true; 5649 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5650 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); 5651 if (v->DCCEnable[k] == true) { 5652 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); 5653 } else { 5654 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; 5655 } 5656 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 5657 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe 5658 && v->SourcePixelFormat[k] != dm_mono_8) { 5659 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); 5660 if (v->DCCEnable[k] == true) { 5661 v->AlignedDCCMetaPitchC[k] = dml_ceil( 5662 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 5663 64.0 * v->Read256BlockWidthC[k]); 5664 } else { 5665 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5666 } 5667 } else { 5668 v->AlignedCPitch[k] = v->PitchC[k]; 5669 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5670 } 5671 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] 5672 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { 5673 v->PitchSupport = false; 5674 } 5675 } 5676 5677 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5678 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { 5679 ViewportExceedsSurface = true; 5680 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 5681 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 5682 && v->SourcePixelFormat[k] != dm_rgbe) { 5683 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] 5684 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { 5685 ViewportExceedsSurface = true; 5686 } 5687 } 5688 } 5689 } 5690 5691 /*Mode Support, Voltage State and SOC Configuration*/ 5692 for (i = v->soc.num_states - 1; i >= 0; i--) { 5693 for (j = 0; j < 2; j++) { 5694 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true 5695 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP 5696 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false 5697 && v->DTBCLKRequiredMoreThanSupported[i] == false 5698 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true 5699 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true 5700 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true 5701 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false 5702 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true 5703 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5704 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false 5705 && ((v->HostVMEnable == false && v->ImmediateFlipRequirement != dm_immediate_flip_required) 5706 || v->ImmediateFlipSupportedForState[i][j] == true) 5707 && FMTBufferExceeded == false) { 5708 v->ModeSupport[i][j] = true; 5709 } else { 5710 v->ModeSupport[i][j] = false; 5711 } 5712 } 5713 } 5714 5715 { 5716 unsigned int MaximumMPCCombine = 0; 5717 for (i = v->soc.num_states; i >= 0; i--) { 5718 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { 5719 v->VoltageLevel = i; 5720 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; 5721 if (v->ModeSupport[i][0] == true) { 5722 MaximumMPCCombine = 0; 5723 } else { 5724 MaximumMPCCombine = 1; 5725 } 5726 } 5727 } 5728 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; 5729 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { 5730 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; 5731 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; 5732 } 5733 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; 5734 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; 5735 v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; 5736 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; 5737 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; 5738 v->maxMpcComb = MaximumMPCCombine; 5739 } 5740 } 5741 5742 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5743 struct display_mode_lib *mode_lib, 5744 unsigned int PrefetchMode, 5745 unsigned int NumberOfActivePlanes, 5746 unsigned int MaxLineBufferLines, 5747 unsigned int LineBufferSize, 5748 unsigned int WritebackInterfaceBufferSize, 5749 double DCFCLK, 5750 double ReturnBW, 5751 bool SynchronizedVBlank, 5752 unsigned int dpte_group_bytes[], 5753 unsigned int MetaChunkSize, 5754 double UrgentLatency, 5755 double ExtraLatency, 5756 double WritebackLatency, 5757 double WritebackChunkSize, 5758 double SOCCLK, 5759 double DRAMClockChangeLatency, 5760 double SRExitTime, 5761 double SREnterPlusExitTime, 5762 double SRExitZ8Time, 5763 double SREnterPlusExitZ8Time, 5764 double DCFCLKDeepSleep, 5765 unsigned int DETBufferSizeY[], 5766 unsigned int DETBufferSizeC[], 5767 unsigned int SwathHeightY[], 5768 unsigned int SwathHeightC[], 5769 unsigned int LBBitPerPixel[], 5770 double SwathWidthY[], 5771 double SwathWidthC[], 5772 double HRatio[], 5773 double HRatioChroma[], 5774 unsigned int vtaps[], 5775 unsigned int VTAPsChroma[], 5776 double VRatio[], 5777 double VRatioChroma[], 5778 unsigned int HTotal[], 5779 double PixelClock[], 5780 unsigned int BlendingAndTiming[], 5781 unsigned int DPPPerPlane[], 5782 double BytePerPixelDETY[], 5783 double BytePerPixelDETC[], 5784 double DSTXAfterScaler[], 5785 double DSTYAfterScaler[], 5786 bool WritebackEnable[], 5787 enum source_format_class WritebackPixelFormat[], 5788 double WritebackDestinationWidth[], 5789 double WritebackDestinationHeight[], 5790 double WritebackSourceHeight[], 5791 bool UnboundedRequestEnabled, 5792 int unsigned CompressedBufferSizeInkByte, 5793 enum clock_change_support *DRAMClockChangeSupport, 5794 double *UrgentWatermark, 5795 double *WritebackUrgentWatermark, 5796 double *DRAMClockChangeWatermark, 5797 double *WritebackDRAMClockChangeWatermark, 5798 double *StutterExitWatermark, 5799 double *StutterEnterPlusExitWatermark, 5800 double *Z8StutterExitWatermark, 5801 double *Z8StutterEnterPlusExitWatermark, 5802 double *MinActiveDRAMClockChangeLatencySupported) 5803 { 5804 struct vba_vars_st *v = &mode_lib->vba; 5805 double EffectiveLBLatencyHidingY; 5806 double EffectiveLBLatencyHidingC; 5807 double LinesInDETY[DC__NUM_DPP__MAX]; 5808 double LinesInDETC; 5809 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5810 unsigned int LinesInDETCRoundedDownToSwath; 5811 double FullDETBufferingTimeY; 5812 double FullDETBufferingTimeC; 5813 double ActiveDRAMClockChangeLatencyMarginY; 5814 double ActiveDRAMClockChangeLatencyMarginC; 5815 double WritebackDRAMClockChangeLatencyMargin; 5816 double PlaneWithMinActiveDRAMClockChangeMargin; 5817 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5818 double WritebackDRAMClockChangeLatencyHiding; 5819 double TotalPixelBW = 0.0; 5820 int k, j; 5821 5822 *UrgentWatermark = UrgentLatency + ExtraLatency; 5823 5824 #ifdef __DML_VBA_DEBUG__ 5825 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 5826 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); 5827 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark); 5828 #endif 5829 5830 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; 5831 5832 #ifdef __DML_VBA_DEBUG__ 5833 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency); 5834 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark); 5835 #endif 5836 5837 v->TotalActiveWriteback = 0; 5838 for (k = 0; k < NumberOfActivePlanes; ++k) { 5839 if (WritebackEnable[k] == true) { 5840 v->TotalActiveWriteback = v->TotalActiveWriteback + 1; 5841 } 5842 } 5843 5844 if (v->TotalActiveWriteback <= 1) { 5845 *WritebackUrgentWatermark = WritebackLatency; 5846 } else { 5847 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5848 } 5849 5850 if (v->TotalActiveWriteback <= 1) { 5851 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; 5852 } else { 5853 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5854 } 5855 5856 for (k = 0; k < NumberOfActivePlanes; ++k) { 5857 TotalPixelBW = TotalPixelBW 5858 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) 5859 / (HTotal[k] / PixelClock[k]); 5860 } 5861 5862 for (k = 0; k < NumberOfActivePlanes; ++k) { 5863 double EffectiveDETBufferSizeY = DETBufferSizeY[k]; 5864 5865 v->LBLatencyHidingSourceLinesY = dml_min( 5866 (double) MaxLineBufferLines, 5867 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1); 5868 5869 v->LBLatencyHidingSourceLinesC = dml_min( 5870 (double) MaxLineBufferLines, 5871 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1); 5872 5873 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]); 5874 5875 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); 5876 5877 if (UnboundedRequestEnabled) { 5878 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 5879 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW; 5880 } 5881 5882 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5883 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5884 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; 5885 if (BytePerPixelDETC[k] > 0) { 5886 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 5887 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5888 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; 5889 } else { 5890 LinesInDETC = 0; 5891 FullDETBufferingTimeC = 999999; 5892 } 5893 5894 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 5895 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; 5896 5897 if (NumberOfActivePlanes > 1) { 5898 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5899 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; 5900 } 5901 5902 if (BytePerPixelDETC[k] > 0) { 5903 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 5904 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; 5905 5906 if (NumberOfActivePlanes > 1) { 5907 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5908 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; 5909 } 5910 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); 5911 } else { 5912 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5913 } 5914 5915 if (WritebackEnable[k] == true) { 5916 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 5917 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); 5918 if (WritebackPixelFormat[k] == dm_444_64) { 5919 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; 5920 } 5921 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; 5922 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); 5923 } 5924 } 5925 5926 v->MinActiveDRAMClockChangeMargin = 999999; 5927 PlaneWithMinActiveDRAMClockChangeMargin = 0; 5928 for (k = 0; k < NumberOfActivePlanes; ++k) { 5929 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { 5930 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; 5931 if (BlendingAndTiming[k] == k) { 5932 PlaneWithMinActiveDRAMClockChangeMargin = k; 5933 } else { 5934 for (j = 0; j < NumberOfActivePlanes; ++j) { 5935 if (BlendingAndTiming[k] == j) { 5936 PlaneWithMinActiveDRAMClockChangeMargin = j; 5937 } 5938 } 5939 } 5940 } 5941 } 5942 5943 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; 5944 5945 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5946 for (k = 0; k < NumberOfActivePlanes; ++k) { 5947 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5948 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5949 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; 5950 } 5951 } 5952 5953 v->TotalNumberOfActiveOTG = 0; 5954 5955 for (k = 0; k < NumberOfActivePlanes; ++k) { 5956 if (BlendingAndTiming[k] == k) { 5957 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; 5958 } 5959 } 5960 5961 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { 5962 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5963 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 5964 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { 5965 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5966 } else { 5967 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5968 } 5969 5970 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; 5971 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); 5972 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5973 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5974 5975 #ifdef __DML_VBA_DEBUG__ 5976 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); 5977 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); 5978 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); 5979 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); 5980 #endif 5981 } 5982 5983 static void CalculateDCFCLKDeepSleep( 5984 struct display_mode_lib *mode_lib, 5985 unsigned int NumberOfActivePlanes, 5986 int BytePerPixelY[], 5987 int BytePerPixelC[], 5988 double VRatio[], 5989 double VRatioChroma[], 5990 double SwathWidthY[], 5991 double SwathWidthC[], 5992 unsigned int DPPPerPlane[], 5993 double HRatio[], 5994 double HRatioChroma[], 5995 double PixelClock[], 5996 double PSCL_THROUGHPUT[], 5997 double PSCL_THROUGHPUT_CHROMA[], 5998 double DPPCLK[], 5999 double ReadBandwidthLuma[], 6000 double ReadBandwidthChroma[], 6001 int ReturnBusWidth, 6002 double *DCFCLKDeepSleep) 6003 { 6004 struct vba_vars_st *v = &mode_lib->vba; 6005 double DisplayPipeLineDeliveryTimeLuma; 6006 double DisplayPipeLineDeliveryTimeChroma; 6007 double ReadBandwidth = 0.0; 6008 int k; 6009 6010 for (k = 0; k < NumberOfActivePlanes; ++k) { 6011 6012 if (VRatio[k] <= 1) { 6013 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6014 } else { 6015 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6016 } 6017 if (BytePerPixelC[k] == 0) { 6018 DisplayPipeLineDeliveryTimeChroma = 0; 6019 } else { 6020 if (VRatioChroma[k] <= 1) { 6021 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6022 } else { 6023 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6024 } 6025 } 6026 6027 if (BytePerPixelC[k] > 0) { 6028 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 6029 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 6030 } else { 6031 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 6032 } 6033 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); 6034 6035 } 6036 6037 for (k = 0; k < NumberOfActivePlanes; ++k) { 6038 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 6039 } 6040 6041 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); 6042 6043 for (k = 0; k < NumberOfActivePlanes; ++k) { 6044 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); 6045 } 6046 } 6047 6048 static void CalculateUrgentBurstFactor( 6049 int swath_width_luma_ub, 6050 int swath_width_chroma_ub, 6051 unsigned int SwathHeightY, 6052 unsigned int SwathHeightC, 6053 double LineTime, 6054 double UrgentLatency, 6055 double CursorBufferSize, 6056 unsigned int CursorWidth, 6057 unsigned int CursorBPP, 6058 double VRatio, 6059 double VRatioC, 6060 double BytePerPixelInDETY, 6061 double BytePerPixelInDETC, 6062 double DETBufferSizeY, 6063 double DETBufferSizeC, 6064 double *UrgentBurstFactorCursor, 6065 double *UrgentBurstFactorLuma, 6066 double *UrgentBurstFactorChroma, 6067 bool *NotEnoughUrgentLatencyHiding) 6068 { 6069 double LinesInDETLuma; 6070 double LinesInDETChroma; 6071 unsigned int LinesInCursorBuffer; 6072 double CursorBufferSizeInTime; 6073 double DETBufferSizeInTimeLuma; 6074 double DETBufferSizeInTimeChroma; 6075 6076 *NotEnoughUrgentLatencyHiding = 0; 6077 6078 if (CursorWidth > 0) { 6079 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 6080 if (VRatio > 0) { 6081 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 6082 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 6083 *NotEnoughUrgentLatencyHiding = 1; 6084 *UrgentBurstFactorCursor = 0; 6085 } else { 6086 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 6087 } 6088 } else { 6089 *UrgentBurstFactorCursor = 1; 6090 } 6091 } 6092 6093 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; 6094 if (VRatio > 0) { 6095 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 6096 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 6097 *NotEnoughUrgentLatencyHiding = 1; 6098 *UrgentBurstFactorLuma = 0; 6099 } else { 6100 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 6101 } 6102 } else { 6103 *UrgentBurstFactorLuma = 1; 6104 } 6105 6106 if (BytePerPixelInDETC > 0) { 6107 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; 6108 if (VRatio > 0) { 6109 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 6110 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 6111 *NotEnoughUrgentLatencyHiding = 1; 6112 *UrgentBurstFactorChroma = 0; 6113 } else { 6114 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 6115 } 6116 } else { 6117 *UrgentBurstFactorChroma = 1; 6118 } 6119 } 6120 } 6121 6122 static void CalculatePixelDeliveryTimes( 6123 unsigned int NumberOfActivePlanes, 6124 double VRatio[], 6125 double VRatioChroma[], 6126 double VRatioPrefetchY[], 6127 double VRatioPrefetchC[], 6128 unsigned int swath_width_luma_ub[], 6129 unsigned int swath_width_chroma_ub[], 6130 unsigned int DPPPerPlane[], 6131 double HRatio[], 6132 double HRatioChroma[], 6133 double PixelClock[], 6134 double PSCL_THROUGHPUT[], 6135 double PSCL_THROUGHPUT_CHROMA[], 6136 double DPPCLK[], 6137 int BytePerPixelC[], 6138 enum scan_direction_class SourceScan[], 6139 unsigned int NumberOfCursors[], 6140 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 6141 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 6142 unsigned int BlockWidth256BytesY[], 6143 unsigned int BlockHeight256BytesY[], 6144 unsigned int BlockWidth256BytesC[], 6145 unsigned int BlockHeight256BytesC[], 6146 double DisplayPipeLineDeliveryTimeLuma[], 6147 double DisplayPipeLineDeliveryTimeChroma[], 6148 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 6149 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 6150 double DisplayPipeRequestDeliveryTimeLuma[], 6151 double DisplayPipeRequestDeliveryTimeChroma[], 6152 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 6153 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 6154 double CursorRequestDeliveryTime[], 6155 double CursorRequestDeliveryTimePrefetch[]) 6156 { 6157 double req_per_swath_ub; 6158 int k; 6159 6160 for (k = 0; k < NumberOfActivePlanes; ++k) { 6161 if (VRatio[k] <= 1) { 6162 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6163 } else { 6164 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6165 } 6166 6167 if (BytePerPixelC[k] == 0) { 6168 DisplayPipeLineDeliveryTimeChroma[k] = 0; 6169 } else { 6170 if (VRatioChroma[k] <= 1) { 6171 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6172 } else { 6173 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6174 } 6175 } 6176 6177 if (VRatioPrefetchY[k] <= 1) { 6178 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6179 } else { 6180 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6181 } 6182 6183 if (BytePerPixelC[k] == 0) { 6184 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 6185 } else { 6186 if (VRatioPrefetchC[k] <= 1) { 6187 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6188 } else { 6189 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6190 } 6191 } 6192 } 6193 6194 for (k = 0; k < NumberOfActivePlanes; ++k) { 6195 if (SourceScan[k] != dm_vert) { 6196 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 6197 } else { 6198 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 6199 } 6200 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 6201 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 6202 if (BytePerPixelC[k] == 0) { 6203 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 6204 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 6205 } else { 6206 if (SourceScan[k] != dm_vert) { 6207 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 6208 } else { 6209 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 6210 } 6211 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 6212 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 6213 } 6214 #ifdef __DML_VBA_DEBUG__ 6215 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 6216 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 6217 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 6218 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 6219 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 6220 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 6221 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 6222 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 6223 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 6224 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 6225 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 6226 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 6227 #endif 6228 } 6229 6230 for (k = 0; k < NumberOfActivePlanes; ++k) { 6231 int cursor_req_per_width; 6232 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); 6233 if (NumberOfCursors[k] > 0) { 6234 if (VRatio[k] <= 1) { 6235 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6236 } else { 6237 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6238 } 6239 if (VRatioPrefetchY[k] <= 1) { 6240 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6241 } else { 6242 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6243 } 6244 } else { 6245 CursorRequestDeliveryTime[k] = 0; 6246 CursorRequestDeliveryTimePrefetch[k] = 0; 6247 } 6248 #ifdef __DML_VBA_DEBUG__ 6249 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); 6250 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); 6251 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); 6252 #endif 6253 } 6254 } 6255 6256 static void CalculateMetaAndPTETimes( 6257 int NumberOfActivePlanes, 6258 bool GPUVMEnable, 6259 int MetaChunkSize, 6260 int MinMetaChunkSizeBytes, 6261 int HTotal[], 6262 double VRatio[], 6263 double VRatioChroma[], 6264 double DestinationLinesToRequestRowInVBlank[], 6265 double DestinationLinesToRequestRowInImmediateFlip[], 6266 bool DCCEnable[], 6267 double PixelClock[], 6268 int BytePerPixelY[], 6269 int BytePerPixelC[], 6270 enum scan_direction_class SourceScan[], 6271 int dpte_row_height[], 6272 int dpte_row_height_chroma[], 6273 int meta_row_width[], 6274 int meta_row_width_chroma[], 6275 int meta_row_height[], 6276 int meta_row_height_chroma[], 6277 int meta_req_width[], 6278 int meta_req_width_chroma[], 6279 int meta_req_height[], 6280 int meta_req_height_chroma[], 6281 int dpte_group_bytes[], 6282 int PTERequestSizeY[], 6283 int PTERequestSizeC[], 6284 int PixelPTEReqWidthY[], 6285 int PixelPTEReqHeightY[], 6286 int PixelPTEReqWidthC[], 6287 int PixelPTEReqHeightC[], 6288 int dpte_row_width_luma_ub[], 6289 int dpte_row_width_chroma_ub[], 6290 double DST_Y_PER_PTE_ROW_NOM_L[], 6291 double DST_Y_PER_PTE_ROW_NOM_C[], 6292 double DST_Y_PER_META_ROW_NOM_L[], 6293 double DST_Y_PER_META_ROW_NOM_C[], 6294 double TimePerMetaChunkNominal[], 6295 double TimePerChromaMetaChunkNominal[], 6296 double TimePerMetaChunkVBlank[], 6297 double TimePerChromaMetaChunkVBlank[], 6298 double TimePerMetaChunkFlip[], 6299 double TimePerChromaMetaChunkFlip[], 6300 double time_per_pte_group_nom_luma[], 6301 double time_per_pte_group_vblank_luma[], 6302 double time_per_pte_group_flip_luma[], 6303 double time_per_pte_group_nom_chroma[], 6304 double time_per_pte_group_vblank_chroma[], 6305 double time_per_pte_group_flip_chroma[]) 6306 { 6307 unsigned int meta_chunk_width; 6308 unsigned int min_meta_chunk_width; 6309 unsigned int meta_chunk_per_row_int; 6310 unsigned int meta_row_remainder; 6311 unsigned int meta_chunk_threshold; 6312 unsigned int meta_chunks_per_row_ub; 6313 unsigned int meta_chunk_width_chroma; 6314 unsigned int min_meta_chunk_width_chroma; 6315 unsigned int meta_chunk_per_row_int_chroma; 6316 unsigned int meta_row_remainder_chroma; 6317 unsigned int meta_chunk_threshold_chroma; 6318 unsigned int meta_chunks_per_row_ub_chroma; 6319 unsigned int dpte_group_width_luma; 6320 unsigned int dpte_groups_per_row_luma_ub; 6321 unsigned int dpte_group_width_chroma; 6322 unsigned int dpte_groups_per_row_chroma_ub; 6323 int k; 6324 6325 for (k = 0; k < NumberOfActivePlanes; ++k) { 6326 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 6327 if (BytePerPixelC[k] == 0) { 6328 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 6329 } else { 6330 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 6331 } 6332 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 6333 if (BytePerPixelC[k] == 0) { 6334 DST_Y_PER_META_ROW_NOM_C[k] = 0; 6335 } else { 6336 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 6337 } 6338 } 6339 6340 for (k = 0; k < NumberOfActivePlanes; ++k) { 6341 if (DCCEnable[k] == true) { 6342 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 6343 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 6344 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 6345 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 6346 if (SourceScan[k] != dm_vert) { 6347 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 6348 } else { 6349 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 6350 } 6351 if (meta_row_remainder <= meta_chunk_threshold) { 6352 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 6353 } else { 6354 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 6355 } 6356 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6357 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6358 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6359 if (BytePerPixelC[k] == 0) { 6360 TimePerChromaMetaChunkNominal[k] = 0; 6361 TimePerChromaMetaChunkVBlank[k] = 0; 6362 TimePerChromaMetaChunkFlip[k] = 0; 6363 } else { 6364 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6365 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6366 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; 6367 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 6368 if (SourceScan[k] != dm_vert) { 6369 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; 6370 } else { 6371 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; 6372 } 6373 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 6374 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 6375 } else { 6376 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 6377 } 6378 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6379 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6380 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6381 } 6382 } else { 6383 TimePerMetaChunkNominal[k] = 0; 6384 TimePerMetaChunkVBlank[k] = 0; 6385 TimePerMetaChunkFlip[k] = 0; 6386 TimePerChromaMetaChunkNominal[k] = 0; 6387 TimePerChromaMetaChunkVBlank[k] = 0; 6388 TimePerChromaMetaChunkFlip[k] = 0; 6389 } 6390 } 6391 6392 for (k = 0; k < NumberOfActivePlanes; ++k) { 6393 if (GPUVMEnable == true) { 6394 if (SourceScan[k] != dm_vert) { 6395 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 6396 } else { 6397 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 6398 } 6399 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); 6400 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6401 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6402 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6403 if (BytePerPixelC[k] == 0) { 6404 time_per_pte_group_nom_chroma[k] = 0; 6405 time_per_pte_group_vblank_chroma[k] = 0; 6406 time_per_pte_group_flip_chroma[k] = 0; 6407 } else { 6408 if (SourceScan[k] != dm_vert) { 6409 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 6410 } else { 6411 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 6412 } 6413 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); 6414 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6415 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6416 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6417 } 6418 } else { 6419 time_per_pte_group_nom_luma[k] = 0; 6420 time_per_pte_group_vblank_luma[k] = 0; 6421 time_per_pte_group_flip_luma[k] = 0; 6422 time_per_pte_group_nom_chroma[k] = 0; 6423 time_per_pte_group_vblank_chroma[k] = 0; 6424 time_per_pte_group_flip_chroma[k] = 0; 6425 } 6426 } 6427 } 6428 6429 static void CalculateVMGroupAndRequestTimes( 6430 unsigned int NumberOfActivePlanes, 6431 bool GPUVMEnable, 6432 unsigned int GPUVMMaxPageTableLevels, 6433 unsigned int HTotal[], 6434 int BytePerPixelC[], 6435 double DestinationLinesToRequestVMInVBlank[], 6436 double DestinationLinesToRequestVMInImmediateFlip[], 6437 bool DCCEnable[], 6438 double PixelClock[], 6439 int dpte_row_width_luma_ub[], 6440 int dpte_row_width_chroma_ub[], 6441 int vm_group_bytes[], 6442 unsigned int dpde0_bytes_per_frame_ub_l[], 6443 unsigned int dpde0_bytes_per_frame_ub_c[], 6444 int meta_pte_bytes_per_frame_ub_l[], 6445 int meta_pte_bytes_per_frame_ub_c[], 6446 double TimePerVMGroupVBlank[], 6447 double TimePerVMGroupFlip[], 6448 double TimePerVMRequestVBlank[], 6449 double TimePerVMRequestFlip[]) 6450 { 6451 int num_group_per_lower_vm_stage; 6452 int num_req_per_lower_vm_stage; 6453 int k; 6454 6455 for (k = 0; k < NumberOfActivePlanes; ++k) { 6456 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 6457 if (DCCEnable[k] == false) { 6458 if (BytePerPixelC[k] > 0) { 6459 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6460 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6461 } else { 6462 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6463 } 6464 } else { 6465 if (GPUVMMaxPageTableLevels == 1) { 6466 if (BytePerPixelC[k] > 0) { 6467 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6468 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6469 } else { 6470 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6471 } 6472 } else { 6473 if (BytePerPixelC[k] > 0) { 6474 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6475 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6476 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6477 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6478 } else { 6479 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6480 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6481 } 6482 } 6483 } 6484 6485 if (DCCEnable[k] == false) { 6486 if (BytePerPixelC[k] > 0) { 6487 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6488 } else { 6489 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 6490 } 6491 } else { 6492 if (GPUVMMaxPageTableLevels == 1) { 6493 if (BytePerPixelC[k] > 0) { 6494 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6495 } else { 6496 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6497 } 6498 } else { 6499 if (BytePerPixelC[k] > 0) { 6500 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 6501 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6502 } else { 6503 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 6504 } 6505 } 6506 } 6507 6508 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6509 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6510 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6511 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6512 6513 if (GPUVMMaxPageTableLevels > 2) { 6514 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6515 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6516 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6517 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6518 } 6519 6520 } else { 6521 TimePerVMGroupVBlank[k] = 0; 6522 TimePerVMGroupFlip[k] = 0; 6523 TimePerVMRequestVBlank[k] = 0; 6524 TimePerVMRequestFlip[k] = 0; 6525 } 6526 } 6527 } 6528 6529 static void CalculateStutterEfficiency( 6530 struct display_mode_lib *mode_lib, 6531 int CompressedBufferSizeInkByte, 6532 bool UnboundedRequestEnabled, 6533 int ConfigReturnBufferSizeInKByte, 6534 int MetaFIFOSizeInKEntries, 6535 int ZeroSizeBufferEntries, 6536 int NumberOfActivePlanes, 6537 int ROBBufferSizeInKByte, 6538 double TotalDataReadBandwidth, 6539 double DCFCLK, 6540 double ReturnBW, 6541 double COMPBUF_RESERVED_SPACE_64B, 6542 double COMPBUF_RESERVED_SPACE_ZS, 6543 double SRExitTime, 6544 double SRExitZ8Time, 6545 bool SynchronizedVBlank, 6546 double Z8StutterEnterPlusExitWatermark, 6547 double StutterEnterPlusExitWatermark, 6548 bool ProgressiveToInterlaceUnitInOPP, 6549 bool Interlace[], 6550 double MinTTUVBlank[], 6551 int DPPPerPlane[], 6552 unsigned int DETBufferSizeY[], 6553 int BytePerPixelY[], 6554 double BytePerPixelDETY[], 6555 double SwathWidthY[], 6556 int SwathHeightY[], 6557 int SwathHeightC[], 6558 double NetDCCRateLuma[], 6559 double NetDCCRateChroma[], 6560 double DCCFractionOfZeroSizeRequestsLuma[], 6561 double DCCFractionOfZeroSizeRequestsChroma[], 6562 int HTotal[], 6563 int VTotal[], 6564 double PixelClock[], 6565 double VRatio[], 6566 enum scan_direction_class SourceScan[], 6567 int BlockHeight256BytesY[], 6568 int BlockWidth256BytesY[], 6569 int BlockHeight256BytesC[], 6570 int BlockWidth256BytesC[], 6571 int DCCYMaxUncompressedBlock[], 6572 int DCCCMaxUncompressedBlock[], 6573 int VActive[], 6574 bool DCCEnable[], 6575 bool WritebackEnable[], 6576 double ReadBandwidthPlaneLuma[], 6577 double ReadBandwidthPlaneChroma[], 6578 double meta_row_bw[], 6579 double dpte_row_bw[], 6580 double *StutterEfficiencyNotIncludingVBlank, 6581 double *StutterEfficiency, 6582 int *NumberOfStutterBurstsPerFrame, 6583 double *Z8StutterEfficiencyNotIncludingVBlank, 6584 double *Z8StutterEfficiency, 6585 int *Z8NumberOfStutterBurstsPerFrame, 6586 double *StutterPeriod) 6587 { 6588 struct vba_vars_st *v = &mode_lib->vba; 6589 6590 double DETBufferingTimeY; 6591 double SwathWidthYCriticalPlane = 0; 6592 double VActiveTimeCriticalPlane = 0; 6593 double FrameTimeCriticalPlane = 0; 6594 int BytePerPixelYCriticalPlane = 0; 6595 double LinesToFinishSwathTransferStutterCriticalPlane = 0; 6596 double MinTTUVBlankCriticalPlane = 0; 6597 double TotalCompressedReadBandwidth; 6598 double TotalRowReadBandwidth; 6599 double AverageDCCCompressionRate; 6600 double EffectiveCompressedBufferSize; 6601 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 6602 double StutterBurstTime; 6603 int TotalActiveWriteback; 6604 double LinesInDETY; 6605 double LinesInDETYRoundedDownToSwath; 6606 double MaximumEffectiveCompressionLuma; 6607 double MaximumEffectiveCompressionChroma; 6608 double TotalZeroSizeRequestReadBandwidth; 6609 double TotalZeroSizeCompressedReadBandwidth; 6610 double AverageDCCZeroSizeFraction; 6611 double AverageZeroSizeCompressionRate; 6612 int TotalNumberOfActiveOTG = 0; 6613 double LastStutterPeriod = 0.0; 6614 double LastZ8StutterPeriod = 0.0; 6615 int k; 6616 6617 TotalZeroSizeRequestReadBandwidth = 0; 6618 TotalZeroSizeCompressedReadBandwidth = 0; 6619 TotalRowReadBandwidth = 0; 6620 TotalCompressedReadBandwidth = 0; 6621 6622 for (k = 0; k < NumberOfActivePlanes; ++k) { 6623 if (DCCEnable[k] == true) { 6624 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) 6625 || DCCYMaxUncompressedBlock[k] < 256) { 6626 MaximumEffectiveCompressionLuma = 2; 6627 } else { 6628 MaximumEffectiveCompressionLuma = 4; 6629 } 6630 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); 6631 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 6632 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6633 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; 6634 if (ReadBandwidthPlaneChroma[k] > 0) { 6635 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) 6636 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { 6637 MaximumEffectiveCompressionChroma = 2; 6638 } else { 6639 MaximumEffectiveCompressionChroma = 4; 6640 } 6641 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 6642 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); 6643 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; 6644 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6645 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; 6646 } 6647 } else { 6648 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 6649 } 6650 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); 6651 } 6652 6653 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 6654 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 6655 6656 #ifdef __DML_VBA_DEBUG__ 6657 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 6658 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 6659 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); 6660 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 6661 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 6662 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6663 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 6664 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 6665 #endif 6666 6667 if (AverageDCCZeroSizeFraction == 1) { 6668 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6669 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; 6670 } else if (AverageDCCZeroSizeFraction > 0) { 6671 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6672 EffectiveCompressedBufferSize = dml_min( 6673 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6674 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) 6675 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, 6676 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6677 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6678 dml_print( 6679 "DML::%s: min 2 = %f\n", 6680 __func__, 6681 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); 6682 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); 6683 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6684 } else { 6685 EffectiveCompressedBufferSize = dml_min( 6686 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6687 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; 6688 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6689 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 6690 } 6691 6692 #ifdef __DML_VBA_DEBUG__ 6693 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 6694 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 6695 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6696 #endif 6697 6698 *StutterPeriod = 0; 6699 for (k = 0; k < NumberOfActivePlanes; ++k) { 6700 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) 6701 / BytePerPixelDETY[k] / SwathWidthY[k]; 6702 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 6703 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; 6704 #ifdef __DML_VBA_DEBUG__ 6705 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); 6706 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 6707 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); 6708 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); 6709 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 6710 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); 6711 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); 6712 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); 6713 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6714 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); 6715 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 6716 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6717 #endif 6718 6719 if (k == 0 || DETBufferingTimeY < *StutterPeriod) { 6720 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 6721 6722 *StutterPeriod = DETBufferingTimeY; 6723 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; 6724 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; 6725 BytePerPixelYCriticalPlane = BytePerPixelY[k]; 6726 SwathWidthYCriticalPlane = SwathWidthY[k]; 6727 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); 6728 MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; 6729 6730 #ifdef __DML_VBA_DEBUG__ 6731 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6732 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); 6733 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); 6734 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6735 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); 6736 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); 6737 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); 6738 #endif 6739 } 6740 } 6741 6742 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); 6743 #ifdef __DML_VBA_DEBUG__ 6744 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 6745 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6746 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); 6747 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); 6748 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6749 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 6750 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6751 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 6752 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 6753 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 6754 #endif 6755 6756 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW 6757 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 6758 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 6759 #ifdef __DML_VBA_DEBUG__ 6760 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); 6761 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); 6762 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 6763 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 6764 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6765 #endif 6766 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6767 6768 dml_print( 6769 "DML::%s: Time to finish residue swath=%f\n", 6770 __func__, 6771 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6772 6773 TotalActiveWriteback = 0; 6774 for (k = 0; k < NumberOfActivePlanes; ++k) { 6775 if (WritebackEnable[k]) { 6776 TotalActiveWriteback = TotalActiveWriteback + 1; 6777 } 6778 } 6779 6780 if (TotalActiveWriteback == 0) { 6781 #ifdef __DML_VBA_DEBUG__ 6782 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 6783 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 6784 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 6785 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6786 #endif 6787 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 6788 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 6789 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6790 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6791 } else { 6792 *StutterEfficiencyNotIncludingVBlank = 0.; 6793 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 6794 *NumberOfStutterBurstsPerFrame = 0; 6795 *Z8NumberOfStutterBurstsPerFrame = 0; 6796 } 6797 #ifdef __DML_VBA_DEBUG__ 6798 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6799 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6800 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); 6801 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 6802 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6803 #endif 6804 6805 for (k = 0; k < NumberOfActivePlanes; ++k) { 6806 if (v->BlendingAndTiming[k] == k) { 6807 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6808 } 6809 } 6810 6811 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6812 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6813 6814 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { 6815 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane 6816 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6817 } else { 6818 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6819 } 6820 } else { 6821 *StutterEfficiency = 0; 6822 } 6823 6824 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6825 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6826 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { 6827 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane 6828 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6829 } else { 6830 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6831 } 6832 } else { 6833 *Z8StutterEfficiency = 0.; 6834 } 6835 6836 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6837 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6838 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6839 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6840 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6841 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6842 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6843 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6844 } 6845 6846 static void CalculateSwathAndDETConfiguration( 6847 bool ForceSingleDPP, 6848 int NumberOfActivePlanes, 6849 unsigned int DETBufferSizeInKByte, 6850 double MaximumSwathWidthLuma[], 6851 double MaximumSwathWidthChroma[], 6852 enum scan_direction_class SourceScan[], 6853 enum source_format_class SourcePixelFormat[], 6854 enum dm_swizzle_mode SurfaceTiling[], 6855 int ViewportWidth[], 6856 int ViewportHeight[], 6857 int SurfaceWidthY[], 6858 int SurfaceWidthC[], 6859 int SurfaceHeightY[], 6860 int SurfaceHeightC[], 6861 int Read256BytesBlockHeightY[], 6862 int Read256BytesBlockHeightC[], 6863 int Read256BytesBlockWidthY[], 6864 int Read256BytesBlockWidthC[], 6865 enum odm_combine_mode ODMCombineEnabled[], 6866 int BlendingAndTiming[], 6867 int BytePerPixY[], 6868 int BytePerPixC[], 6869 double BytePerPixDETY[], 6870 double BytePerPixDETC[], 6871 int HActive[], 6872 double HRatio[], 6873 double HRatioChroma[], 6874 int DPPPerPlane[], 6875 int swath_width_luma_ub[], 6876 int swath_width_chroma_ub[], 6877 double SwathWidth[], 6878 double SwathWidthChroma[], 6879 int SwathHeightY[], 6880 int SwathHeightC[], 6881 unsigned int DETBufferSizeY[], 6882 unsigned int DETBufferSizeC[], 6883 bool ViewportSizeSupportPerPlane[], 6884 bool *ViewportSizeSupport) 6885 { 6886 int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 6887 int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 6888 int MinimumSwathHeightY; 6889 int MinimumSwathHeightC; 6890 int RoundedUpMaxSwathSizeBytesY; 6891 int RoundedUpMaxSwathSizeBytesC; 6892 int RoundedUpMinSwathSizeBytesY; 6893 int RoundedUpMinSwathSizeBytesC; 6894 int RoundedUpSwathSizeBytesY; 6895 int RoundedUpSwathSizeBytesC; 6896 double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; 6897 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; 6898 int k; 6899 6900 CalculateSwathWidth( 6901 ForceSingleDPP, 6902 NumberOfActivePlanes, 6903 SourcePixelFormat, 6904 SourceScan, 6905 ViewportWidth, 6906 ViewportHeight, 6907 SurfaceWidthY, 6908 SurfaceWidthC, 6909 SurfaceHeightY, 6910 SurfaceHeightC, 6911 ODMCombineEnabled, 6912 BytePerPixY, 6913 BytePerPixC, 6914 Read256BytesBlockHeightY, 6915 Read256BytesBlockHeightC, 6916 Read256BytesBlockWidthY, 6917 Read256BytesBlockWidthC, 6918 BlendingAndTiming, 6919 HActive, 6920 HRatio, 6921 DPPPerPlane, 6922 SwathWidthSingleDPP, 6923 SwathWidthSingleDPPChroma, 6924 SwathWidth, 6925 SwathWidthChroma, 6926 MaximumSwathHeightY, 6927 MaximumSwathHeightC, 6928 swath_width_luma_ub, 6929 swath_width_chroma_ub); 6930 6931 *ViewportSizeSupport = true; 6932 for (k = 0; k < NumberOfActivePlanes; ++k) { 6933 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 6934 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { 6935 if (SurfaceTiling[k] == dm_sw_linear 6936 || (SourcePixelFormat[k] == dm_444_64 6937 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) 6938 && SourceScan[k] != dm_vert)) { 6939 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6940 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { 6941 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6942 } else { 6943 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6944 } 6945 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6946 } else { 6947 if (SurfaceTiling[k] == dm_sw_linear) { 6948 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6949 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6950 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { 6951 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6952 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6953 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { 6954 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6955 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6956 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { 6957 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6958 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6959 } else { 6960 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6961 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6962 } 6963 } 6964 6965 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 6966 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; 6967 if (SourcePixelFormat[k] == dm_420_10) { 6968 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); 6969 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); 6970 } 6971 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 6972 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; 6973 if (SourcePixelFormat[k] == dm_420_10) { 6974 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); 6975 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); 6976 } 6977 6978 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6979 SwathHeightY[k] = MaximumSwathHeightY[k]; 6980 SwathHeightC[k] = MaximumSwathHeightC[k]; 6981 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6982 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6983 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC 6984 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6985 SwathHeightY[k] = MinimumSwathHeightY; 6986 SwathHeightC[k] = MaximumSwathHeightC[k]; 6987 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6988 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6989 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC 6990 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6991 SwathHeightY[k] = MaximumSwathHeightY[k]; 6992 SwathHeightC[k] = MinimumSwathHeightC; 6993 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6994 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6995 } else { 6996 SwathHeightY[k] = MinimumSwathHeightY; 6997 SwathHeightC[k] = MinimumSwathHeightC; 6998 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6999 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 7000 } 7001 { 7002 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7003 if (SwathHeightC[k] == 0) { 7004 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; 7005 DETBufferSizeC[k] = 0; 7006 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 7007 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; 7008 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; 7009 } else { 7010 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); 7011 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; 7012 } 7013 7014 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] 7015 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 7016 *ViewportSizeSupport = false; 7017 ViewportSizeSupportPerPlane[k] = false; 7018 } else { 7019 ViewportSizeSupportPerPlane[k] = true; 7020 } 7021 } 7022 } 7023 } 7024 7025 static void CalculateSwathWidth( 7026 bool ForceSingleDPP, 7027 int NumberOfActivePlanes, 7028 enum source_format_class SourcePixelFormat[], 7029 enum scan_direction_class SourceScan[], 7030 int ViewportWidth[], 7031 int ViewportHeight[], 7032 int SurfaceWidthY[], 7033 int SurfaceWidthC[], 7034 int SurfaceHeightY[], 7035 int SurfaceHeightC[], 7036 enum odm_combine_mode ODMCombineEnabled[], 7037 int BytePerPixY[], 7038 int BytePerPixC[], 7039 int Read256BytesBlockHeightY[], 7040 int Read256BytesBlockHeightC[], 7041 int Read256BytesBlockWidthY[], 7042 int Read256BytesBlockWidthC[], 7043 int BlendingAndTiming[], 7044 int HActive[], 7045 double HRatio[], 7046 int DPPPerPlane[], 7047 double SwathWidthSingleDPPY[], 7048 double SwathWidthSingleDPPC[], 7049 double SwathWidthY[], 7050 double SwathWidthC[], 7051 int MaximumSwathHeightY[], 7052 int MaximumSwathHeightC[], 7053 int swath_width_luma_ub[], 7054 int swath_width_chroma_ub[]) 7055 { 7056 enum odm_combine_mode MainPlaneODMCombine; 7057 int j, k; 7058 7059 #ifdef __DML_VBA_DEBUG__ 7060 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); 7061 #endif 7062 7063 for (k = 0; k < NumberOfActivePlanes; ++k) { 7064 if (SourceScan[k] != dm_vert) { 7065 SwathWidthSingleDPPY[k] = ViewportWidth[k]; 7066 } else { 7067 SwathWidthSingleDPPY[k] = ViewportHeight[k]; 7068 } 7069 7070 #ifdef __DML_VBA_DEBUG__ 7071 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 7072 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 7073 #endif 7074 7075 MainPlaneODMCombine = ODMCombineEnabled[k]; 7076 for (j = 0; j < NumberOfActivePlanes; ++j) { 7077 if (BlendingAndTiming[k] == j) { 7078 MainPlaneODMCombine = ODMCombineEnabled[j]; 7079 } 7080 } 7081 7082 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) { 7083 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); 7084 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) { 7085 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); 7086 } else if (DPPPerPlane[k] == 2) { 7087 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 7088 } else { 7089 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 7090 } 7091 7092 #ifdef __DML_VBA_DEBUG__ 7093 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); 7094 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); 7095 #endif 7096 7097 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { 7098 SwathWidthC[k] = SwathWidthY[k] / 2; 7099 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 7100 } else { 7101 SwathWidthC[k] = SwathWidthY[k]; 7102 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 7103 } 7104 7105 if (ForceSingleDPP == true) { 7106 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 7107 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 7108 } 7109 { 7110 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 7111 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 7112 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 7113 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 7114 7115 #ifdef __DML_VBA_DEBUG__ 7116 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 7117 #endif 7118 7119 if (SourceScan[k] != dm_vert) { 7120 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 7121 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 7122 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); 7123 if (BytePerPixC[k] > 0) { 7124 swath_width_chroma_ub[k] = dml_min( 7125 surface_width_ub_c, 7126 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); 7127 } else { 7128 swath_width_chroma_ub[k] = 0; 7129 } 7130 } else { 7131 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 7132 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 7133 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 7134 if (BytePerPixC[k] > 0) { 7135 swath_width_chroma_ub[k] = dml_min( 7136 surface_height_ub_c, 7137 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); 7138 } else { 7139 swath_width_chroma_ub[k] = 0; 7140 } 7141 } 7142 } 7143 } 7144 } 7145 7146 static double CalculateExtraLatency( 7147 int RoundTripPingLatencyCycles, 7148 int ReorderingBytes, 7149 double DCFCLK, 7150 int TotalNumberOfActiveDPP, 7151 int PixelChunkSizeInKByte, 7152 int TotalNumberOfDCCActiveDPP, 7153 int MetaChunkSize, 7154 double ReturnBW, 7155 bool GPUVMEnable, 7156 bool HostVMEnable, 7157 int NumberOfActivePlanes, 7158 int NumberOfDPP[], 7159 int dpte_group_bytes[], 7160 double HostVMInefficiencyFactor, 7161 double HostVMMinPageSize, 7162 int HostVMMaxNonCachedPageTableLevels) 7163 { 7164 double ExtraLatencyBytes; 7165 double ExtraLatency; 7166 7167 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7168 ReorderingBytes, 7169 TotalNumberOfActiveDPP, 7170 PixelChunkSizeInKByte, 7171 TotalNumberOfDCCActiveDPP, 7172 MetaChunkSize, 7173 GPUVMEnable, 7174 HostVMEnable, 7175 NumberOfActivePlanes, 7176 NumberOfDPP, 7177 dpte_group_bytes, 7178 HostVMInefficiencyFactor, 7179 HostVMMinPageSize, 7180 HostVMMaxNonCachedPageTableLevels); 7181 7182 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 7183 7184 #ifdef __DML_VBA_DEBUG__ 7185 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 7186 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 7187 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 7188 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 7189 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 7190 #endif 7191 7192 return ExtraLatency; 7193 } 7194 7195 static double CalculateExtraLatencyBytes( 7196 int ReorderingBytes, 7197 int TotalNumberOfActiveDPP, 7198 int PixelChunkSizeInKByte, 7199 int TotalNumberOfDCCActiveDPP, 7200 int MetaChunkSize, 7201 bool GPUVMEnable, 7202 bool HostVMEnable, 7203 int NumberOfActivePlanes, 7204 int NumberOfDPP[], 7205 int dpte_group_bytes[], 7206 double HostVMInefficiencyFactor, 7207 double HostVMMinPageSize, 7208 int HostVMMaxNonCachedPageTableLevels) 7209 { 7210 double ret; 7211 int HostVMDynamicLevels = 0, k; 7212 7213 if (GPUVMEnable == true && HostVMEnable == true) { 7214 if (HostVMMinPageSize < 2048) { 7215 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 7216 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 7217 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 7218 } else { 7219 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 7220 } 7221 } else { 7222 HostVMDynamicLevels = 0; 7223 } 7224 7225 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 7226 7227 if (GPUVMEnable == true) { 7228 for (k = 0; k < NumberOfActivePlanes; ++k) { 7229 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 7230 } 7231 } 7232 return ret; 7233 } 7234 7235 static double CalculateUrgentLatency( 7236 double UrgentLatencyPixelDataOnly, 7237 double UrgentLatencyPixelMixedWithVMData, 7238 double UrgentLatencyVMDataOnly, 7239 bool DoUrgentLatencyAdjustment, 7240 double UrgentLatencyAdjustmentFabricClockComponent, 7241 double UrgentLatencyAdjustmentFabricClockReference, 7242 double FabricClock) 7243 { 7244 double ret; 7245 7246 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 7247 if (DoUrgentLatencyAdjustment == true) { 7248 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 7249 } 7250 return ret; 7251 } 7252 7253 static void UseMinimumDCFCLK( 7254 struct display_mode_lib *mode_lib, 7255 int MaxInterDCNTileRepeaters, 7256 int MaxPrefetchMode, 7257 double FinalDRAMClockChangeLatency, 7258 double SREnterPlusExitTime, 7259 int ReturnBusWidth, 7260 int RoundTripPingLatencyCycles, 7261 int ReorderingBytes, 7262 int PixelChunkSizeInKByte, 7263 int MetaChunkSize, 7264 bool GPUVMEnable, 7265 int GPUVMMaxPageTableLevels, 7266 bool HostVMEnable, 7267 int NumberOfActivePlanes, 7268 double HostVMMinPageSize, 7269 int HostVMMaxNonCachedPageTableLevels, 7270 bool DynamicMetadataVMEnabled, 7271 enum immediate_flip_requirement ImmediateFlipRequirement, 7272 bool ProgressiveToInterlaceUnitInOPP, 7273 double MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation, 7274 double PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency, 7275 int VTotal[], 7276 int VActive[], 7277 int DynamicMetadataTransmittedBytes[], 7278 int DynamicMetadataLinesBeforeActiveRequired[], 7279 bool Interlace[], 7280 double RequiredDPPCLK[][2][DC__NUM_DPP__MAX], 7281 double RequiredDISPCLK[][2], 7282 double UrgLatency[], 7283 unsigned int NoOfDPP[][2][DC__NUM_DPP__MAX], 7284 double ProjectedDCFCLKDeepSleep[][2], 7285 double MaximumVStartup[][2][DC__NUM_DPP__MAX], 7286 double TotalVActivePixelBandwidth[][2], 7287 double TotalVActiveCursorBandwidth[][2], 7288 double TotalMetaRowBandwidth[][2], 7289 double TotalDPTERowBandwidth[][2], 7290 unsigned int TotalNumberOfActiveDPP[][2], 7291 unsigned int TotalNumberOfDCCActiveDPP[][2], 7292 int dpte_group_bytes[], 7293 double PrefetchLinesY[][2][DC__NUM_DPP__MAX], 7294 double PrefetchLinesC[][2][DC__NUM_DPP__MAX], 7295 int swath_width_luma_ub_all_states[][2][DC__NUM_DPP__MAX], 7296 int swath_width_chroma_ub_all_states[][2][DC__NUM_DPP__MAX], 7297 int BytePerPixelY[], 7298 int BytePerPixelC[], 7299 int HTotal[], 7300 double PixelClock[], 7301 double PDEAndMetaPTEBytesPerFrame[][2][DC__NUM_DPP__MAX], 7302 double DPTEBytesPerRow[][2][DC__NUM_DPP__MAX], 7303 double MetaRowBytes[][2][DC__NUM_DPP__MAX], 7304 bool DynamicMetadataEnable[], 7305 double VActivePixelBandwidth[][2][DC__NUM_DPP__MAX], 7306 double VActiveCursorBandwidth[][2][DC__NUM_DPP__MAX], 7307 double ReadBandwidthLuma[], 7308 double ReadBandwidthChroma[], 7309 double DCFCLKPerState[], 7310 double DCFCLKState[][2]) 7311 { 7312 struct vba_vars_st *v = &mode_lib->vba; 7313 int dummy1, i, j, k; 7314 double NormalEfficiency, dummy2, dummy3; 7315 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 7316 7317 NormalEfficiency = PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; 7318 for (i = 0; i < v->soc.num_states; ++i) { 7319 for (j = 0; j <= 1; ++j) { 7320 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 7321 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 7322 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; 7323 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 7324 double MinimumTWait; 7325 double NonDPTEBandwidth; 7326 double DPTEBandwidth; 7327 double DCFCLKRequiredForAverageBandwidth; 7328 double ExtraLatencyBytes; 7329 double ExtraLatencyCycles; 7330 double DCFCLKRequiredForPeakBandwidth; 7331 int NoOfDPPState[DC__NUM_DPP__MAX]; 7332 double MinimumTvmPlus2Tr0; 7333 7334 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 7335 for (k = 0; k < NumberOfActivePlanes; ++k) { 7336 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 7337 + NoOfDPP[i][j][k] * DPTEBytesPerRow[i][j][k] / (15.75 * HTotal[k] / PixelClock[k]); 7338 } 7339 7340 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) { 7341 NoOfDPPState[k] = NoOfDPP[i][j][k]; 7342 } 7343 7344 MinimumTWait = CalculateTWait(MaxPrefetchMode, FinalDRAMClockChangeLatency, UrgLatency[i], SREnterPlusExitTime); 7345 NonDPTEBandwidth = TotalVActivePixelBandwidth[i][j] + TotalVActiveCursorBandwidth[i][j] + TotalMetaRowBandwidth[i][j]; 7346 DPTEBandwidth = (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) ? 7347 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : TotalDPTERowBandwidth[i][j]; 7348 DCFCLKRequiredForAverageBandwidth = dml_max3( 7349 ProjectedDCFCLKDeepSleep[i][j], 7350 (NonDPTEBandwidth + TotalDPTERowBandwidth[i][j]) / ReturnBusWidth 7351 / (MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), 7352 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / ReturnBusWidth); 7353 7354 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7355 ReorderingBytes, 7356 TotalNumberOfActiveDPP[i][j], 7357 PixelChunkSizeInKByte, 7358 TotalNumberOfDCCActiveDPP[i][j], 7359 MetaChunkSize, 7360 GPUVMEnable, 7361 HostVMEnable, 7362 NumberOfActivePlanes, 7363 NoOfDPPState, 7364 dpte_group_bytes, 7365 1, 7366 HostVMMinPageSize, 7367 HostVMMaxNonCachedPageTableLevels); 7368 ExtraLatencyCycles = RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / ReturnBusWidth; 7369 for (k = 0; k < NumberOfActivePlanes; ++k) { 7370 double DCFCLKCyclesRequiredInPrefetch; 7371 double ExpectedPrefetchBWAcceleration; 7372 double PrefetchTime; 7373 7374 PixelDCFCLKCyclesRequiredInPrefetch[k] = (PrefetchLinesY[i][j][k] * swath_width_luma_ub_all_states[i][j][k] * BytePerPixelY[k] 7375 + PrefetchLinesC[i][j][k] * swath_width_chroma_ub_all_states[i][j][k] * BytePerPixelC[k]) / NormalEfficiency / ReturnBusWidth; 7376 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 7377 + PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth * (GPUVMMaxPageTableLevels > 2 ? 1 : 0) 7378 + 2 * DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / ReturnBusWidth 7379 + 2 * MetaRowBytes[i][j][k] / NormalEfficiency / ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; 7380 PrefetchPixelLinesTime[k] = dml_max(PrefetchLinesY[i][j][k], PrefetchLinesC[i][j][k]) * HTotal[k] / PixelClock[k]; 7381 ExpectedPrefetchBWAcceleration = (VActivePixelBandwidth[i][j][k] + VActiveCursorBandwidth[i][j][k]) 7382 / (ReadBandwidthLuma[k] + ReadBandwidthChroma[k]); 7383 DynamicMetadataVMExtraLatency[k] = 7384 (GPUVMEnable == true && DynamicMetadataEnable[k] == true && DynamicMetadataVMEnabled == true) ? 7385 UrgLatency[i] * GPUVMMaxPageTableLevels * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 7386 PrefetchTime = (MaximumVStartup[i][j][k] - 1) * HTotal[k] / PixelClock[k] - MinimumTWait 7387 - UrgLatency[i] 7388 * ((GPUVMMaxPageTableLevels <= 2 ? GPUVMMaxPageTableLevels : GPUVMMaxPageTableLevels - 2) 7389 * (HostVMEnable == true ? HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) 7390 - DynamicMetadataVMExtraLatency[k]; 7391 7392 if (PrefetchTime > 0) { 7393 double ExpectedVRatioPrefetch; 7394 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] 7395 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); 7396 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] 7397 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; 7398 if (HostVMEnable == true || ImmediateFlipRequirement == dm_immediate_flip_required) { 7399 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] 7400 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / ReturnBusWidth; 7401 } 7402 } else { 7403 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i]; 7404 } 7405 if (DynamicMetadataEnable[k] == true) { 7406 double TSetupPipe; 7407 double TdmbfPipe; 7408 double TdmsksPipe; 7409 double TdmecPipe; 7410 double AllowedTimeForUrgentExtraLatency; 7411 7412 CalculateVupdateAndDynamicMetadataParameters( 7413 MaxInterDCNTileRepeaters, 7414 RequiredDPPCLK[i][j][k], 7415 RequiredDISPCLK[i][j], 7416 ProjectedDCFCLKDeepSleep[i][j], 7417 PixelClock[k], 7418 HTotal[k], 7419 VTotal[k] - VActive[k], 7420 DynamicMetadataTransmittedBytes[k], 7421 DynamicMetadataLinesBeforeActiveRequired[k], 7422 Interlace[k], 7423 ProgressiveToInterlaceUnitInOPP, 7424 &TSetupPipe, 7425 &TdmbfPipe, 7426 &TdmecPipe, 7427 &TdmsksPipe, 7428 &dummy1, 7429 &dummy2, 7430 &dummy3); 7431 AllowedTimeForUrgentExtraLatency = MaximumVStartup[i][j][k] * HTotal[k] / PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe 7432 - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 7433 if (AllowedTimeForUrgentExtraLatency > 0) { 7434 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( 7435 DCFCLKRequiredForPeakBandwidthPerPlane[k], 7436 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 7437 } else { 7438 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKPerState[i]; 7439 } 7440 } 7441 } 7442 DCFCLKRequiredForPeakBandwidth = 0; 7443 for (k = 0; k <= NumberOfActivePlanes - 1; ++k) { 7444 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; 7445 } 7446 MinimumTvmPlus2Tr0 = UrgLatency[i] 7447 * (GPUVMEnable == true ? 7448 (HostVMEnable == true ? 7449 (GPUVMMaxPageTableLevels + 2) * (HostVMMaxNonCachedPageTableLevels + 1) - 1 : GPUVMMaxPageTableLevels + 1) : 7450 0); 7451 for (k = 0; k < NumberOfActivePlanes; ++k) { 7452 double MaximumTvmPlus2Tr0PlusTsw; 7453 MaximumTvmPlus2Tr0PlusTsw = (MaximumVStartup[i][j][k] - 2) * HTotal[k] / PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 7454 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 7455 DCFCLKRequiredForPeakBandwidth = DCFCLKPerState[i]; 7456 } else { 7457 DCFCLKRequiredForPeakBandwidth = dml_max3( 7458 DCFCLKRequiredForPeakBandwidth, 7459 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), 7460 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 7461 } 7462 } 7463 DCFCLKState[i][j] = dml_min(DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 7464 } 7465 } 7466 } 7467 7468 static void CalculateUnboundedRequestAndCompressedBufferSize( 7469 unsigned int DETBufferSizeInKByte, 7470 int ConfigReturnBufferSizeInKByte, 7471 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 7472 int TotalActiveDPP, 7473 bool NoChromaPlanes, 7474 int MaxNumDPP, 7475 int CompressedBufferSegmentSizeInkByteFinal, 7476 enum output_encoder_class *Output, 7477 bool *UnboundedRequestEnabled, 7478 int *CompressedBufferSizeInkByte) 7479 { 7480 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7481 7482 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); 7483 *CompressedBufferSizeInkByte = ( 7484 *UnboundedRequestEnabled == true ? 7485 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : 7486 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); 7487 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 7488 7489 #ifdef __DML_VBA_DEBUG__ 7490 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 7491 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); 7492 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 7493 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 7494 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); 7495 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 7496 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 7497 #endif 7498 } 7499 7500 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) 7501 { 7502 bool ret_val = false; 7503 7504 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); 7505 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) { 7506 ret_val = false; 7507 } 7508 return (ret_val); 7509 } 7510 7511