1 /* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 * OTHER DEALINGS IN THE SOFTWARE. 21 * 22 * Authors: AMD 23 * 24 */ 25 26 #include "dc.h" 27 #include "dc_link.h" 28 #include "../display_mode_lib.h" 29 #include "display_mode_vba_31.h" 30 #include "../dml_inline_defs.h" 31 32 /* 33 * NOTE: 34 * This file is gcc-parsable HW gospel, coming straight from HW engineers. 35 * 36 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 37 * ways. Unless there is something clearly wrong with it the code should 38 * remain as-is as it provides us with a guarantee from HW that it is correct. 39 */ 40 41 #define BPP_INVALID 0 42 #define BPP_BLENDED_PIPE 0xffffffff 43 #define DCN31_MAX_DSC_IMAGE_WIDTH 5184 44 #define DCN31_MAX_FMT_420_BUFFER_WIDTH 4096 45 46 // For DML-C changes that hasn't been propagated to VBA yet 47 //#define __DML_VBA_ALLOW_DELTA__ 48 49 // Move these to ip paramaters/constant 50 51 // At which vstartup the DML start to try if the mode can be supported 52 #define __DML_VBA_MIN_VSTARTUP__ 9 53 54 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) 55 #define __DML_ARB_TO_RET_DELAY__ (7 + 95) 56 57 // fudge factor for min dcfclk calclation 58 #define __DML_MIN_DCFCLK_FACTOR__ 1.15 59 60 typedef struct { 61 double DPPCLK; 62 double DISPCLK; 63 double PixelClock; 64 double DCFCLKDeepSleep; 65 unsigned int DPPPerPlane; 66 bool ScalerEnabled; 67 double VRatio; 68 double VRatioChroma; 69 enum scan_direction_class SourceScan; 70 unsigned int BlockWidth256BytesY; 71 unsigned int BlockHeight256BytesY; 72 unsigned int BlockWidth256BytesC; 73 unsigned int BlockHeight256BytesC; 74 unsigned int InterlaceEnable; 75 unsigned int NumberOfCursors; 76 unsigned int VBlank; 77 unsigned int HTotal; 78 unsigned int DCCEnable; 79 bool ODMCombineIsEnabled; 80 enum source_format_class SourcePixelFormat; 81 int BytePerPixelY; 82 int BytePerPixelC; 83 bool ProgressiveToInterlaceUnitInOPP; 84 } Pipe; 85 86 #define BPP_INVALID 0 87 #define BPP_BLENDED_PIPE 0xffffffff 88 89 static bool CalculateBytePerPixelAnd256BBlockSizes( 90 enum source_format_class SourcePixelFormat, 91 enum dm_swizzle_mode SurfaceTiling, 92 unsigned int *BytePerPixelY, 93 unsigned int *BytePerPixelC, 94 double *BytePerPixelDETY, 95 double *BytePerPixelDETC, 96 unsigned int *BlockHeight256BytesY, 97 unsigned int *BlockHeight256BytesC, 98 unsigned int *BlockWidth256BytesY, 99 unsigned int *BlockWidth256BytesC); 100 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 101 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); 102 static unsigned int dscceComputeDelay( 103 unsigned int bpc, 104 double BPP, 105 unsigned int sliceWidth, 106 unsigned int numSlices, 107 enum output_format_class pixelFormat, 108 enum output_encoder_class Output); 109 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); 110 static bool CalculatePrefetchSchedule( 111 struct display_mode_lib *mode_lib, 112 double HostVMInefficiencyFactor, 113 Pipe *myPipe, 114 unsigned int DSCDelay, 115 double DPPCLKDelaySubtotalPlusCNVCFormater, 116 double DPPCLKDelaySCL, 117 double DPPCLKDelaySCLLBOnly, 118 double DPPCLKDelayCNVCCursor, 119 double DISPCLKDelaySubtotal, 120 unsigned int DPP_RECOUT_WIDTH, 121 enum output_format_class OutputFormat, 122 unsigned int MaxInterDCNTileRepeaters, 123 unsigned int VStartup, 124 unsigned int MaxVStartup, 125 unsigned int GPUVMPageTableLevels, 126 bool GPUVMEnable, 127 bool HostVMEnable, 128 unsigned int HostVMMaxNonCachedPageTableLevels, 129 double HostVMMinPageSize, 130 bool DynamicMetadataEnable, 131 bool DynamicMetadataVMEnabled, 132 int DynamicMetadataLinesBeforeActiveRequired, 133 unsigned int DynamicMetadataTransmittedBytes, 134 double UrgentLatency, 135 double UrgentExtraLatency, 136 double TCalc, 137 unsigned int PDEAndMetaPTEBytesFrame, 138 unsigned int MetaRowByte, 139 unsigned int PixelPTEBytesPerRow, 140 double PrefetchSourceLinesY, 141 unsigned int SwathWidthY, 142 double VInitPreFillY, 143 unsigned int MaxNumSwathY, 144 double PrefetchSourceLinesC, 145 unsigned int SwathWidthC, 146 double VInitPreFillC, 147 unsigned int MaxNumSwathC, 148 int swath_width_luma_ub, 149 int swath_width_chroma_ub, 150 unsigned int SwathHeightY, 151 unsigned int SwathHeightC, 152 double TWait, 153 double *DSTXAfterScaler, 154 double *DSTYAfterScaler, 155 double *DestinationLinesForPrefetch, 156 double *PrefetchBandwidth, 157 double *DestinationLinesToRequestVMInVBlank, 158 double *DestinationLinesToRequestRowInVBlank, 159 double *VRatioPrefetchY, 160 double *VRatioPrefetchC, 161 double *RequiredPrefetchPixDataBWLuma, 162 double *RequiredPrefetchPixDataBWChroma, 163 bool *NotEnoughTimeForDynamicMetadata, 164 double *Tno_bw, 165 double *prefetch_vmrow_bw, 166 double *Tdmdl_vm, 167 double *Tdmdl, 168 double *TSetup, 169 int *VUpdateOffsetPix, 170 double *VUpdateWidthPix, 171 double *VReadyOffsetPix); 172 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 173 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 174 static void CalculateDCCConfiguration( 175 bool DCCEnabled, 176 bool DCCProgrammingAssumesScanDirectionUnknown, 177 enum source_format_class SourcePixelFormat, 178 unsigned int SurfaceWidthLuma, 179 unsigned int SurfaceWidthChroma, 180 unsigned int SurfaceHeightLuma, 181 unsigned int SurfaceHeightChroma, 182 double DETBufferSize, 183 unsigned int RequestHeight256ByteLuma, 184 unsigned int RequestHeight256ByteChroma, 185 enum dm_swizzle_mode TilingFormat, 186 unsigned int BytePerPixelY, 187 unsigned int BytePerPixelC, 188 double BytePerPixelDETY, 189 double BytePerPixelDETC, 190 enum scan_direction_class ScanOrientation, 191 unsigned int *MaxUncompressedBlockLuma, 192 unsigned int *MaxUncompressedBlockChroma, 193 unsigned int *MaxCompressedBlockLuma, 194 unsigned int *MaxCompressedBlockChroma, 195 unsigned int *IndependentBlockLuma, 196 unsigned int *IndependentBlockChroma); 197 static double CalculatePrefetchSourceLines( 198 struct display_mode_lib *mode_lib, 199 double VRatio, 200 double vtaps, 201 bool Interlace, 202 bool ProgressiveToInterlaceUnitInOPP, 203 unsigned int SwathHeight, 204 unsigned int ViewportYStart, 205 double *VInitPreFill, 206 unsigned int *MaxNumSwath); 207 static unsigned int CalculateVMAndRowBytes( 208 struct display_mode_lib *mode_lib, 209 bool DCCEnable, 210 unsigned int BlockHeight256Bytes, 211 unsigned int BlockWidth256Bytes, 212 enum source_format_class SourcePixelFormat, 213 unsigned int SurfaceTiling, 214 unsigned int BytePerPixel, 215 enum scan_direction_class ScanDirection, 216 unsigned int SwathWidth, 217 unsigned int ViewportHeight, 218 bool GPUVMEnable, 219 bool HostVMEnable, 220 unsigned int HostVMMaxNonCachedPageTableLevels, 221 unsigned int GPUVMMinPageSize, 222 unsigned int HostVMMinPageSize, 223 unsigned int PTEBufferSizeInRequests, 224 unsigned int Pitch, 225 unsigned int DCCMetaPitch, 226 unsigned int *MacroTileWidth, 227 unsigned int *MetaRowByte, 228 unsigned int *PixelPTEBytesPerRow, 229 bool *PTEBufferSizeNotExceeded, 230 int *dpte_row_width_ub, 231 unsigned int *dpte_row_height, 232 unsigned int *MetaRequestWidth, 233 unsigned int *MetaRequestHeight, 234 unsigned int *meta_row_width, 235 unsigned int *meta_row_height, 236 int *vm_group_bytes, 237 unsigned int *dpte_group_bytes, 238 unsigned int *PixelPTEReqWidth, 239 unsigned int *PixelPTEReqHeight, 240 unsigned int *PTERequestSize, 241 int *DPDE0BytesFrame, 242 int *MetaPTEBytesFrame); 243 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); 244 static void CalculateRowBandwidth( 245 bool GPUVMEnable, 246 enum source_format_class SourcePixelFormat, 247 double VRatio, 248 double VRatioChroma, 249 bool DCCEnable, 250 double LineTime, 251 unsigned int MetaRowByteLuma, 252 unsigned int MetaRowByteChroma, 253 unsigned int meta_row_height_luma, 254 unsigned int meta_row_height_chroma, 255 unsigned int PixelPTEBytesPerRowLuma, 256 unsigned int PixelPTEBytesPerRowChroma, 257 unsigned int dpte_row_height_luma, 258 unsigned int dpte_row_height_chroma, 259 double *meta_row_bw, 260 double *dpte_row_bw); 261 262 static void CalculateFlipSchedule( 263 struct display_mode_lib *mode_lib, 264 double HostVMInefficiencyFactor, 265 double UrgentExtraLatency, 266 double UrgentLatency, 267 unsigned int GPUVMMaxPageTableLevels, 268 bool HostVMEnable, 269 unsigned int HostVMMaxNonCachedPageTableLevels, 270 bool GPUVMEnable, 271 double HostVMMinPageSize, 272 double PDEAndMetaPTEBytesPerFrame, 273 double MetaRowBytes, 274 double DPTEBytesPerRow, 275 double BandwidthAvailableForImmediateFlip, 276 unsigned int TotImmediateFlipBytes, 277 enum source_format_class SourcePixelFormat, 278 double LineTime, 279 double VRatio, 280 double VRatioChroma, 281 double Tno_bw, 282 bool DCCEnable, 283 unsigned int dpte_row_height, 284 unsigned int meta_row_height, 285 unsigned int dpte_row_height_chroma, 286 unsigned int meta_row_height_chroma, 287 double *DestinationLinesToRequestVMInImmediateFlip, 288 double *DestinationLinesToRequestRowInImmediateFlip, 289 double *final_flip_bw, 290 bool *ImmediateFlipSupportedForPipe); 291 static double CalculateWriteBackDelay( 292 enum source_format_class WritebackPixelFormat, 293 double WritebackHRatio, 294 double WritebackVRatio, 295 unsigned int WritebackVTaps, 296 int WritebackDestinationWidth, 297 int WritebackDestinationHeight, 298 int WritebackSourceHeight, 299 unsigned int HTotal); 300 301 static void CalculateVupdateAndDynamicMetadataParameters( 302 int MaxInterDCNTileRepeaters, 303 double DPPCLK, 304 double DISPCLK, 305 double DCFClkDeepSleep, 306 double PixelClock, 307 int HTotal, 308 int VBlank, 309 int DynamicMetadataTransmittedBytes, 310 int DynamicMetadataLinesBeforeActiveRequired, 311 int InterlaceEnable, 312 bool ProgressiveToInterlaceUnitInOPP, 313 double *TSetup, 314 double *Tdmbf, 315 double *Tdmec, 316 double *Tdmsks, 317 int *VUpdateOffsetPix, 318 double *VUpdateWidthPix, 319 double *VReadyOffsetPix); 320 321 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 322 struct display_mode_lib *mode_lib, 323 unsigned int PrefetchMode, 324 unsigned int NumberOfActivePlanes, 325 unsigned int MaxLineBufferLines, 326 unsigned int LineBufferSize, 327 unsigned int WritebackInterfaceBufferSize, 328 double DCFCLK, 329 double ReturnBW, 330 bool SynchronizedVBlank, 331 unsigned int dpte_group_bytes[], 332 unsigned int MetaChunkSize, 333 double UrgentLatency, 334 double ExtraLatency, 335 double WritebackLatency, 336 double WritebackChunkSize, 337 double SOCCLK, 338 double DRAMClockChangeLatency, 339 double SRExitTime, 340 double SREnterPlusExitTime, 341 double SRExitZ8Time, 342 double SREnterPlusExitZ8Time, 343 double DCFCLKDeepSleep, 344 unsigned int DETBufferSizeY[], 345 unsigned int DETBufferSizeC[], 346 unsigned int SwathHeightY[], 347 unsigned int SwathHeightC[], 348 unsigned int LBBitPerPixel[], 349 double SwathWidthY[], 350 double SwathWidthC[], 351 double HRatio[], 352 double HRatioChroma[], 353 unsigned int vtaps[], 354 unsigned int VTAPsChroma[], 355 double VRatio[], 356 double VRatioChroma[], 357 unsigned int HTotal[], 358 double PixelClock[], 359 unsigned int BlendingAndTiming[], 360 unsigned int DPPPerPlane[], 361 double BytePerPixelDETY[], 362 double BytePerPixelDETC[], 363 double DSTXAfterScaler[], 364 double DSTYAfterScaler[], 365 bool WritebackEnable[], 366 enum source_format_class WritebackPixelFormat[], 367 double WritebackDestinationWidth[], 368 double WritebackDestinationHeight[], 369 double WritebackSourceHeight[], 370 bool UnboundedRequestEnabled, 371 int unsigned CompressedBufferSizeInkByte, 372 enum clock_change_support *DRAMClockChangeSupport, 373 double *UrgentWatermark, 374 double *WritebackUrgentWatermark, 375 double *DRAMClockChangeWatermark, 376 double *WritebackDRAMClockChangeWatermark, 377 double *StutterExitWatermark, 378 double *StutterEnterPlusExitWatermark, 379 double *Z8StutterExitWatermark, 380 double *Z8StutterEnterPlusExitWatermark, 381 double *MinActiveDRAMClockChangeLatencySupported); 382 383 static void CalculateDCFCLKDeepSleep( 384 struct display_mode_lib *mode_lib, 385 unsigned int NumberOfActivePlanes, 386 int BytePerPixelY[], 387 int BytePerPixelC[], 388 double VRatio[], 389 double VRatioChroma[], 390 double SwathWidthY[], 391 double SwathWidthC[], 392 unsigned int DPPPerPlane[], 393 double HRatio[], 394 double HRatioChroma[], 395 double PixelClock[], 396 double PSCL_THROUGHPUT[], 397 double PSCL_THROUGHPUT_CHROMA[], 398 double DPPCLK[], 399 double ReadBandwidthLuma[], 400 double ReadBandwidthChroma[], 401 int ReturnBusWidth, 402 double *DCFCLKDeepSleep); 403 404 static void CalculateUrgentBurstFactor( 405 int swath_width_luma_ub, 406 int swath_width_chroma_ub, 407 unsigned int SwathHeightY, 408 unsigned int SwathHeightC, 409 double LineTime, 410 double UrgentLatency, 411 double CursorBufferSize, 412 unsigned int CursorWidth, 413 unsigned int CursorBPP, 414 double VRatio, 415 double VRatioC, 416 double BytePerPixelInDETY, 417 double BytePerPixelInDETC, 418 double DETBufferSizeY, 419 double DETBufferSizeC, 420 double *UrgentBurstFactorCursor, 421 double *UrgentBurstFactorLuma, 422 double *UrgentBurstFactorChroma, 423 bool *NotEnoughUrgentLatencyHiding); 424 425 static void UseMinimumDCFCLK( 426 struct display_mode_lib *mode_lib, 427 int MaxPrefetchMode, 428 int ReorderingBytes); 429 430 static void CalculatePixelDeliveryTimes( 431 unsigned int NumberOfActivePlanes, 432 double VRatio[], 433 double VRatioChroma[], 434 double VRatioPrefetchY[], 435 double VRatioPrefetchC[], 436 unsigned int swath_width_luma_ub[], 437 unsigned int swath_width_chroma_ub[], 438 unsigned int DPPPerPlane[], 439 double HRatio[], 440 double HRatioChroma[], 441 double PixelClock[], 442 double PSCL_THROUGHPUT[], 443 double PSCL_THROUGHPUT_CHROMA[], 444 double DPPCLK[], 445 int BytePerPixelC[], 446 enum scan_direction_class SourceScan[], 447 unsigned int NumberOfCursors[], 448 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 449 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 450 unsigned int BlockWidth256BytesY[], 451 unsigned int BlockHeight256BytesY[], 452 unsigned int BlockWidth256BytesC[], 453 unsigned int BlockHeight256BytesC[], 454 double DisplayPipeLineDeliveryTimeLuma[], 455 double DisplayPipeLineDeliveryTimeChroma[], 456 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 457 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 458 double DisplayPipeRequestDeliveryTimeLuma[], 459 double DisplayPipeRequestDeliveryTimeChroma[], 460 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 461 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 462 double CursorRequestDeliveryTime[], 463 double CursorRequestDeliveryTimePrefetch[]); 464 465 static void CalculateMetaAndPTETimes( 466 int NumberOfActivePlanes, 467 bool GPUVMEnable, 468 int MetaChunkSize, 469 int MinMetaChunkSizeBytes, 470 int HTotal[], 471 double VRatio[], 472 double VRatioChroma[], 473 double DestinationLinesToRequestRowInVBlank[], 474 double DestinationLinesToRequestRowInImmediateFlip[], 475 bool DCCEnable[], 476 double PixelClock[], 477 int BytePerPixelY[], 478 int BytePerPixelC[], 479 enum scan_direction_class SourceScan[], 480 int dpte_row_height[], 481 int dpte_row_height_chroma[], 482 int meta_row_width[], 483 int meta_row_width_chroma[], 484 int meta_row_height[], 485 int meta_row_height_chroma[], 486 int meta_req_width[], 487 int meta_req_width_chroma[], 488 int meta_req_height[], 489 int meta_req_height_chroma[], 490 int dpte_group_bytes[], 491 int PTERequestSizeY[], 492 int PTERequestSizeC[], 493 int PixelPTEReqWidthY[], 494 int PixelPTEReqHeightY[], 495 int PixelPTEReqWidthC[], 496 int PixelPTEReqHeightC[], 497 int dpte_row_width_luma_ub[], 498 int dpte_row_width_chroma_ub[], 499 double DST_Y_PER_PTE_ROW_NOM_L[], 500 double DST_Y_PER_PTE_ROW_NOM_C[], 501 double DST_Y_PER_META_ROW_NOM_L[], 502 double DST_Y_PER_META_ROW_NOM_C[], 503 double TimePerMetaChunkNominal[], 504 double TimePerChromaMetaChunkNominal[], 505 double TimePerMetaChunkVBlank[], 506 double TimePerChromaMetaChunkVBlank[], 507 double TimePerMetaChunkFlip[], 508 double TimePerChromaMetaChunkFlip[], 509 double time_per_pte_group_nom_luma[], 510 double time_per_pte_group_vblank_luma[], 511 double time_per_pte_group_flip_luma[], 512 double time_per_pte_group_nom_chroma[], 513 double time_per_pte_group_vblank_chroma[], 514 double time_per_pte_group_flip_chroma[]); 515 516 static void CalculateVMGroupAndRequestTimes( 517 unsigned int NumberOfActivePlanes, 518 bool GPUVMEnable, 519 unsigned int GPUVMMaxPageTableLevels, 520 unsigned int HTotal[], 521 int BytePerPixelC[], 522 double DestinationLinesToRequestVMInVBlank[], 523 double DestinationLinesToRequestVMInImmediateFlip[], 524 bool DCCEnable[], 525 double PixelClock[], 526 int dpte_row_width_luma_ub[], 527 int dpte_row_width_chroma_ub[], 528 int vm_group_bytes[], 529 unsigned int dpde0_bytes_per_frame_ub_l[], 530 unsigned int dpde0_bytes_per_frame_ub_c[], 531 int meta_pte_bytes_per_frame_ub_l[], 532 int meta_pte_bytes_per_frame_ub_c[], 533 double TimePerVMGroupVBlank[], 534 double TimePerVMGroupFlip[], 535 double TimePerVMRequestVBlank[], 536 double TimePerVMRequestFlip[]); 537 538 static void CalculateStutterEfficiency( 539 struct display_mode_lib *mode_lib, 540 int CompressedBufferSizeInkByte, 541 bool UnboundedRequestEnabled, 542 int ConfigReturnBufferSizeInKByte, 543 int MetaFIFOSizeInKEntries, 544 int ZeroSizeBufferEntries, 545 int NumberOfActivePlanes, 546 int ROBBufferSizeInKByte, 547 double TotalDataReadBandwidth, 548 double DCFCLK, 549 double ReturnBW, 550 double COMPBUF_RESERVED_SPACE_64B, 551 double COMPBUF_RESERVED_SPACE_ZS, 552 double SRExitTime, 553 double SRExitZ8Time, 554 bool SynchronizedVBlank, 555 double Z8StutterEnterPlusExitWatermark, 556 double StutterEnterPlusExitWatermark, 557 bool ProgressiveToInterlaceUnitInOPP, 558 bool Interlace[], 559 double MinTTUVBlank[], 560 int DPPPerPlane[], 561 unsigned int DETBufferSizeY[], 562 int BytePerPixelY[], 563 double BytePerPixelDETY[], 564 double SwathWidthY[], 565 int SwathHeightY[], 566 int SwathHeightC[], 567 double NetDCCRateLuma[], 568 double NetDCCRateChroma[], 569 double DCCFractionOfZeroSizeRequestsLuma[], 570 double DCCFractionOfZeroSizeRequestsChroma[], 571 int HTotal[], 572 int VTotal[], 573 double PixelClock[], 574 double VRatio[], 575 enum scan_direction_class SourceScan[], 576 int BlockHeight256BytesY[], 577 int BlockWidth256BytesY[], 578 int BlockHeight256BytesC[], 579 int BlockWidth256BytesC[], 580 int DCCYMaxUncompressedBlock[], 581 int DCCCMaxUncompressedBlock[], 582 int VActive[], 583 bool DCCEnable[], 584 bool WritebackEnable[], 585 double ReadBandwidthPlaneLuma[], 586 double ReadBandwidthPlaneChroma[], 587 double meta_row_bw[], 588 double dpte_row_bw[], 589 double *StutterEfficiencyNotIncludingVBlank, 590 double *StutterEfficiency, 591 int *NumberOfStutterBurstsPerFrame, 592 double *Z8StutterEfficiencyNotIncludingVBlank, 593 double *Z8StutterEfficiency, 594 int *Z8NumberOfStutterBurstsPerFrame, 595 double *StutterPeriod); 596 597 static void CalculateSwathAndDETConfiguration( 598 bool ForceSingleDPP, 599 int NumberOfActivePlanes, 600 unsigned int DETBufferSizeInKByte, 601 double MaximumSwathWidthLuma[], 602 double MaximumSwathWidthChroma[], 603 enum scan_direction_class SourceScan[], 604 enum source_format_class SourcePixelFormat[], 605 enum dm_swizzle_mode SurfaceTiling[], 606 int ViewportWidth[], 607 int ViewportHeight[], 608 int SurfaceWidthY[], 609 int SurfaceWidthC[], 610 int SurfaceHeightY[], 611 int SurfaceHeightC[], 612 int Read256BytesBlockHeightY[], 613 int Read256BytesBlockHeightC[], 614 int Read256BytesBlockWidthY[], 615 int Read256BytesBlockWidthC[], 616 enum odm_combine_mode ODMCombineEnabled[], 617 int BlendingAndTiming[], 618 int BytePerPixY[], 619 int BytePerPixC[], 620 double BytePerPixDETY[], 621 double BytePerPixDETC[], 622 int HActive[], 623 double HRatio[], 624 double HRatioChroma[], 625 int DPPPerPlane[], 626 int swath_width_luma_ub[], 627 int swath_width_chroma_ub[], 628 double SwathWidth[], 629 double SwathWidthChroma[], 630 int SwathHeightY[], 631 int SwathHeightC[], 632 unsigned int DETBufferSizeY[], 633 unsigned int DETBufferSizeC[], 634 bool ViewportSizeSupportPerPlane[], 635 bool *ViewportSizeSupport); 636 static void CalculateSwathWidth( 637 bool ForceSingleDPP, 638 int NumberOfActivePlanes, 639 enum source_format_class SourcePixelFormat[], 640 enum scan_direction_class SourceScan[], 641 int ViewportWidth[], 642 int ViewportHeight[], 643 int SurfaceWidthY[], 644 int SurfaceWidthC[], 645 int SurfaceHeightY[], 646 int SurfaceHeightC[], 647 enum odm_combine_mode ODMCombineEnabled[], 648 int BytePerPixY[], 649 int BytePerPixC[], 650 int Read256BytesBlockHeightY[], 651 int Read256BytesBlockHeightC[], 652 int Read256BytesBlockWidthY[], 653 int Read256BytesBlockWidthC[], 654 int BlendingAndTiming[], 655 int HActive[], 656 double HRatio[], 657 int DPPPerPlane[], 658 double SwathWidthSingleDPPY[], 659 double SwathWidthSingleDPPC[], 660 double SwathWidthY[], 661 double SwathWidthC[], 662 int MaximumSwathHeightY[], 663 int MaximumSwathHeightC[], 664 int swath_width_luma_ub[], 665 int swath_width_chroma_ub[]); 666 667 static double CalculateExtraLatency( 668 int RoundTripPingLatencyCycles, 669 int ReorderingBytes, 670 double DCFCLK, 671 int TotalNumberOfActiveDPP, 672 int PixelChunkSizeInKByte, 673 int TotalNumberOfDCCActiveDPP, 674 int MetaChunkSize, 675 double ReturnBW, 676 bool GPUVMEnable, 677 bool HostVMEnable, 678 int NumberOfActivePlanes, 679 int NumberOfDPP[], 680 int dpte_group_bytes[], 681 double HostVMInefficiencyFactor, 682 double HostVMMinPageSize, 683 int HostVMMaxNonCachedPageTableLevels); 684 685 static double CalculateExtraLatencyBytes( 686 int ReorderingBytes, 687 int TotalNumberOfActiveDPP, 688 int PixelChunkSizeInKByte, 689 int TotalNumberOfDCCActiveDPP, 690 int MetaChunkSize, 691 bool GPUVMEnable, 692 bool HostVMEnable, 693 int NumberOfActivePlanes, 694 int NumberOfDPP[], 695 int dpte_group_bytes[], 696 double HostVMInefficiencyFactor, 697 double HostVMMinPageSize, 698 int HostVMMaxNonCachedPageTableLevels); 699 700 static double CalculateUrgentLatency( 701 double UrgentLatencyPixelDataOnly, 702 double UrgentLatencyPixelMixedWithVMData, 703 double UrgentLatencyVMDataOnly, 704 bool DoUrgentLatencyAdjustment, 705 double UrgentLatencyAdjustmentFabricClockComponent, 706 double UrgentLatencyAdjustmentFabricClockReference, 707 double FabricClockSingle); 708 709 static void CalculateUnboundedRequestAndCompressedBufferSize( 710 unsigned int DETBufferSizeInKByte, 711 int ConfigReturnBufferSizeInKByte, 712 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 713 int TotalActiveDPP, 714 bool NoChromaPlanes, 715 int MaxNumDPP, 716 int CompressedBufferSegmentSizeInkByteFinal, 717 enum output_encoder_class *Output, 718 bool *UnboundedRequestEnabled, 719 int *CompressedBufferSizeInkByte); 720 721 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); 722 723 void dml31_recalculate(struct display_mode_lib *mode_lib) 724 { 725 ModeSupportAndSystemConfiguration(mode_lib); 726 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 727 DisplayPipeConfiguration(mode_lib); 728 #ifdef __DML_VBA_DEBUG__ 729 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); 730 #endif 731 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 732 } 733 734 static unsigned int dscceComputeDelay( 735 unsigned int bpc, 736 double BPP, 737 unsigned int sliceWidth, 738 unsigned int numSlices, 739 enum output_format_class pixelFormat, 740 enum output_encoder_class Output) 741 { 742 // valid bpc = source bits per component in the set of {8, 10, 12} 743 // valid bpp = increments of 1/16 of a bit 744 // min = 6/7/8 in N420/N422/444, respectively 745 // max = such that compression is 1:1 746 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 747 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 748 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 749 750 // fixed value 751 unsigned int rcModelSize = 8192; 752 753 // N422/N420 operate at 2 pixels per clock 754 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; 755 756 if (pixelFormat == dm_420) 757 pixelsPerClock = 2; 758 else if (pixelFormat == dm_444) 759 pixelsPerClock = 1; 760 else if (pixelFormat == dm_n422) 761 pixelsPerClock = 2; 762 // #all other modes operate at 1 pixel per clock 763 else 764 pixelsPerClock = 1; 765 766 //initial transmit delay as per PPS 767 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 768 769 //compute ssm delay 770 if (bpc == 8) 771 D = 81; 772 else if (bpc == 10) 773 D = 89; 774 else 775 D = 113; 776 777 //divide by pixel per cycle to compute slice width as seen by DSC 778 w = sliceWidth / pixelsPerClock; 779 780 //422 mode has an additional cycle of delay 781 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 782 s = 0; 783 else 784 s = 1; 785 786 //main calculation for the dscce 787 ix = initalXmitDelay + 45; 788 wx = (w + 2) / 3; 789 P = 3 * wx - w; 790 l0 = ix / w; 791 a = ix + P * l0; 792 ax = (a + 2) / 3 + D + 6 + 1; 793 L = (ax + wx - 1) / wx; 794 if ((ix % w) == 0 && P != 0) 795 lstall = 1; 796 else 797 lstall = 0; 798 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 799 800 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 801 pixels = Delay * 3 * pixelsPerClock; 802 return pixels; 803 } 804 805 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 806 { 807 unsigned int Delay = 0; 808 809 if (pixelFormat == dm_420) { 810 // sfr 811 Delay = Delay + 2; 812 // dsccif 813 Delay = Delay + 0; 814 // dscc - input deserializer 815 Delay = Delay + 3; 816 // dscc gets pixels every other cycle 817 Delay = Delay + 2; 818 // dscc - input cdc fifo 819 Delay = Delay + 12; 820 // dscc gets pixels every other cycle 821 Delay = Delay + 13; 822 // dscc - cdc uncertainty 823 Delay = Delay + 2; 824 // dscc - output cdc fifo 825 Delay = Delay + 7; 826 // dscc gets pixels every other cycle 827 Delay = Delay + 3; 828 // dscc - cdc uncertainty 829 Delay = Delay + 2; 830 // dscc - output serializer 831 Delay = Delay + 1; 832 // sft 833 Delay = Delay + 1; 834 } else if (pixelFormat == dm_n422) { 835 // sfr 836 Delay = Delay + 2; 837 // dsccif 838 Delay = Delay + 1; 839 // dscc - input deserializer 840 Delay = Delay + 5; 841 // dscc - input cdc fifo 842 Delay = Delay + 25; 843 // dscc - cdc uncertainty 844 Delay = Delay + 2; 845 // dscc - output cdc fifo 846 Delay = Delay + 10; 847 // dscc - cdc uncertainty 848 Delay = Delay + 2; 849 // dscc - output serializer 850 Delay = Delay + 1; 851 // sft 852 Delay = Delay + 1; 853 } else { 854 // sfr 855 Delay = Delay + 2; 856 // dsccif 857 Delay = Delay + 0; 858 // dscc - input deserializer 859 Delay = Delay + 3; 860 // dscc - input cdc fifo 861 Delay = Delay + 12; 862 // dscc - cdc uncertainty 863 Delay = Delay + 2; 864 // dscc - output cdc fifo 865 Delay = Delay + 7; 866 // dscc - output serializer 867 Delay = Delay + 1; 868 // dscc - cdc uncertainty 869 Delay = Delay + 2; 870 // sft 871 Delay = Delay + 1; 872 } 873 874 return Delay; 875 } 876 877 static bool CalculatePrefetchSchedule( 878 struct display_mode_lib *mode_lib, 879 double HostVMInefficiencyFactor, 880 Pipe *myPipe, 881 unsigned int DSCDelay, 882 double DPPCLKDelaySubtotalPlusCNVCFormater, 883 double DPPCLKDelaySCL, 884 double DPPCLKDelaySCLLBOnly, 885 double DPPCLKDelayCNVCCursor, 886 double DISPCLKDelaySubtotal, 887 unsigned int DPP_RECOUT_WIDTH, 888 enum output_format_class OutputFormat, 889 unsigned int MaxInterDCNTileRepeaters, 890 unsigned int VStartup, 891 unsigned int MaxVStartup, 892 unsigned int GPUVMPageTableLevels, 893 bool GPUVMEnable, 894 bool HostVMEnable, 895 unsigned int HostVMMaxNonCachedPageTableLevels, 896 double HostVMMinPageSize, 897 bool DynamicMetadataEnable, 898 bool DynamicMetadataVMEnabled, 899 int DynamicMetadataLinesBeforeActiveRequired, 900 unsigned int DynamicMetadataTransmittedBytes, 901 double UrgentLatency, 902 double UrgentExtraLatency, 903 double TCalc, 904 unsigned int PDEAndMetaPTEBytesFrame, 905 unsigned int MetaRowByte, 906 unsigned int PixelPTEBytesPerRow, 907 double PrefetchSourceLinesY, 908 unsigned int SwathWidthY, 909 double VInitPreFillY, 910 unsigned int MaxNumSwathY, 911 double PrefetchSourceLinesC, 912 unsigned int SwathWidthC, 913 double VInitPreFillC, 914 unsigned int MaxNumSwathC, 915 int swath_width_luma_ub, 916 int swath_width_chroma_ub, 917 unsigned int SwathHeightY, 918 unsigned int SwathHeightC, 919 double TWait, 920 double *DSTXAfterScaler, 921 double *DSTYAfterScaler, 922 double *DestinationLinesForPrefetch, 923 double *PrefetchBandwidth, 924 double *DestinationLinesToRequestVMInVBlank, 925 double *DestinationLinesToRequestRowInVBlank, 926 double *VRatioPrefetchY, 927 double *VRatioPrefetchC, 928 double *RequiredPrefetchPixDataBWLuma, 929 double *RequiredPrefetchPixDataBWChroma, 930 bool *NotEnoughTimeForDynamicMetadata, 931 double *Tno_bw, 932 double *prefetch_vmrow_bw, 933 double *Tdmdl_vm, 934 double *Tdmdl, 935 double *TSetup, 936 int *VUpdateOffsetPix, 937 double *VUpdateWidthPix, 938 double *VReadyOffsetPix) 939 { 940 bool MyError = false; 941 unsigned int DPPCycles, DISPCLKCycles; 942 double DSTTotalPixelsAfterScaler; 943 double LineTime; 944 double dst_y_prefetch_equ; 945 double Tsw_oto; 946 double prefetch_bw_oto; 947 double prefetch_bw_pr; 948 double Tvm_oto; 949 double Tr0_oto; 950 double Tvm_oto_lines; 951 double Tr0_oto_lines; 952 double dst_y_prefetch_oto; 953 double TimeForFetchingMetaPTE = 0; 954 double TimeForFetchingRowInVBlank = 0; 955 double LinesToRequestPrefetchPixelData = 0; 956 unsigned int HostVMDynamicLevelsTrips; 957 double trip_to_mem; 958 double Tvm_trips; 959 double Tr0_trips; 960 double Tvm_trips_rounded; 961 double Tr0_trips_rounded; 962 double Lsw_oto; 963 double Tpre_rounded; 964 double prefetch_bw_equ; 965 double Tvm_equ; 966 double Tr0_equ; 967 double Tdmbf; 968 double Tdmec; 969 double Tdmsks; 970 double prefetch_sw_bytes; 971 double bytes_pp; 972 double dep_bytes; 973 int max_vratio_pre = 4; 974 double min_Lsw; 975 double Tsw_est1 = 0; 976 double Tsw_est3 = 0; 977 double max_Tsw = 0; 978 979 if (GPUVMEnable == true && HostVMEnable == true) { 980 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 981 } else { 982 HostVMDynamicLevelsTrips = 0; 983 } 984 #ifdef __DML_VBA_DEBUG__ 985 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); 986 #endif 987 CalculateVupdateAndDynamicMetadataParameters( 988 MaxInterDCNTileRepeaters, 989 myPipe->DPPCLK, 990 myPipe->DISPCLK, 991 myPipe->DCFCLKDeepSleep, 992 myPipe->PixelClock, 993 myPipe->HTotal, 994 myPipe->VBlank, 995 DynamicMetadataTransmittedBytes, 996 DynamicMetadataLinesBeforeActiveRequired, 997 myPipe->InterlaceEnable, 998 myPipe->ProgressiveToInterlaceUnitInOPP, 999 TSetup, 1000 &Tdmbf, 1001 &Tdmec, 1002 &Tdmsks, 1003 VUpdateOffsetPix, 1004 VUpdateWidthPix, 1005 VReadyOffsetPix); 1006 1007 LineTime = myPipe->HTotal / myPipe->PixelClock; 1008 trip_to_mem = UrgentLatency; 1009 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 1010 1011 #ifdef __DML_VBA_ALLOW_DELTA__ 1012 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 1013 #else 1014 if (DynamicMetadataVMEnabled == true) { 1015 #endif 1016 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 1017 } else { 1018 *Tdmdl = TWait + UrgentExtraLatency; 1019 } 1020 1021 #ifdef __DML_VBA_ALLOW_DELTA__ 1022 if (DynamicMetadataEnable == false) { 1023 *Tdmdl = 0.0; 1024 } 1025 #endif 1026 1027 if (DynamicMetadataEnable == true) { 1028 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 1029 *NotEnoughTimeForDynamicMetadata = true; 1030 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 1031 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 1032 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 1033 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); 1034 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *Tdmdl); 1035 } else { 1036 *NotEnoughTimeForDynamicMetadata = false; 1037 } 1038 } else { 1039 *NotEnoughTimeForDynamicMetadata = false; 1040 } 1041 1042 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 1043 1044 if (myPipe->ScalerEnabled) 1045 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 1046 else 1047 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 1048 1049 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 1050 1051 DISPCLKCycles = DISPCLKDelaySubtotal; 1052 1053 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 1054 return true; 1055 1056 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 1057 1058 #ifdef __DML_VBA_DEBUG__ 1059 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 1060 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 1061 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); 1062 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 1063 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); 1064 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 1065 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 1066 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); 1067 #endif 1068 1069 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 1070 1071 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 1072 *DSTYAfterScaler = 1; 1073 else 1074 *DSTYAfterScaler = 0; 1075 1076 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 1077 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 1078 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 1079 1080 #ifdef __DML_VBA_DEBUG__ 1081 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 1082 #endif 1083 1084 MyError = false; 1085 1086 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 1087 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 1088 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 1089 1090 #ifdef __DML_VBA_ALLOW_DELTA__ 1091 if (!myPipe->DCCEnable) { 1092 Tr0_trips = 0.0; 1093 Tr0_trips_rounded = 0.0; 1094 } 1095 #endif 1096 1097 if (!GPUVMEnable) { 1098 Tvm_trips = 0.0; 1099 Tvm_trips_rounded = 0.0; 1100 } 1101 1102 if (GPUVMEnable) { 1103 if (GPUVMPageTableLevels >= 3) { 1104 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 1105 } else { 1106 *Tno_bw = 0; 1107 } 1108 } else if (!myPipe->DCCEnable) { 1109 *Tno_bw = LineTime; 1110 } else { 1111 *Tno_bw = LineTime / 4; 1112 } 1113 1114 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) 1115 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 1116 else 1117 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 1118 /*rev 99*/ 1119 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane); 1120 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; 1121 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 1122 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 1123 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); 1124 1125 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); 1126 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; 1127 Tsw_oto = Lsw_oto * LineTime; 1128 1129 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto; 1130 1131 #ifdef __DML_VBA_DEBUG__ 1132 dml_print("DML: HTotal: %d\n", myPipe->HTotal); 1133 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); 1134 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); 1135 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); 1136 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); 1137 dml_print("DML: Tsw_oto: %f\n", Tsw_oto); 1138 #endif 1139 1140 if (GPUVMEnable == true) 1141 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); 1142 else 1143 Tvm_oto = LineTime / 4.0; 1144 1145 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1146 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) 1147 LineTime - Tvm_oto, 1148 LineTime / 4); 1149 } else { 1150 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 1151 } 1152 1153 #ifdef __DML_VBA_DEBUG__ 1154 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 1155 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 1156 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); 1157 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1158 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1159 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1160 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 1161 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 1162 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 1163 #endif 1164 1165 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 1166 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 1167 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 1168 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1169 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 1170 Tpre_rounded = dst_y_prefetch_equ * LineTime; 1171 1172 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 1173 1174 if (prefetch_sw_bytes < dep_bytes) 1175 prefetch_sw_bytes = 2 * dep_bytes; 1176 1177 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); 1178 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); 1179 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); 1180 dml_print("DML: Lsw_oto: %f\n", Lsw_oto); 1181 dml_print("DML: LineTime: %f\n", LineTime); 1182 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); 1183 1184 dml_print("DML: LineTime: %f\n", LineTime); 1185 dml_print("DML: VStartup: %d\n", VStartup); 1186 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); 1187 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); 1188 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); 1189 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); 1190 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 1191 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 1192 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 1193 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd \n", *Tdmdl_vm); 1194 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", *Tdmdl); 1195 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler \n", *DSTXAfterScaler); 1196 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler \n", *DSTYAfterScaler); 1197 1198 *PrefetchBandwidth = 0; 1199 *DestinationLinesToRequestVMInVBlank = 0; 1200 *DestinationLinesToRequestRowInVBlank = 0; 1201 *VRatioPrefetchY = 0; 1202 *VRatioPrefetchC = 0; 1203 *RequiredPrefetchPixDataBWLuma = 0; 1204 if (dst_y_prefetch_equ > 1) { 1205 double PrefetchBandwidth1; 1206 double PrefetchBandwidth2; 1207 double PrefetchBandwidth3; 1208 double PrefetchBandwidth4; 1209 1210 if (Tpre_rounded - *Tno_bw > 0) { 1211 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1212 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 1213 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 1214 } else { 1215 PrefetchBandwidth1 = 0; 1216 } 1217 1218 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 1219 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1220 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 1221 } 1222 1223 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1224 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1225 else 1226 PrefetchBandwidth2 = 0; 1227 1228 if (Tpre_rounded - Tvm_trips_rounded > 0) { 1229 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1230 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 1231 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 1232 } else { 1233 PrefetchBandwidth3 = 0; 1234 } 1235 1236 #ifdef __DML_VBA_DEBUG__ 1237 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 1238 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 1239 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 1240 #endif 1241 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { 1242 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1243 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 1244 } 1245 1246 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1247 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1248 else 1249 PrefetchBandwidth4 = 0; 1250 1251 { 1252 bool Case1OK; 1253 bool Case2OK; 1254 bool Case3OK; 1255 1256 if (PrefetchBandwidth1 > 0) { 1257 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded 1258 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1259 Case1OK = true; 1260 } else { 1261 Case1OK = false; 1262 } 1263 } else { 1264 Case1OK = false; 1265 } 1266 1267 if (PrefetchBandwidth2 > 0) { 1268 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded 1269 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1270 Case2OK = true; 1271 } else { 1272 Case2OK = false; 1273 } 1274 } else { 1275 Case2OK = false; 1276 } 1277 1278 if (PrefetchBandwidth3 > 0) { 1279 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded 1280 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1281 Case3OK = true; 1282 } else { 1283 Case3OK = false; 1284 } 1285 } else { 1286 Case3OK = false; 1287 } 1288 1289 if (Case1OK) { 1290 prefetch_bw_equ = PrefetchBandwidth1; 1291 } else if (Case2OK) { 1292 prefetch_bw_equ = PrefetchBandwidth2; 1293 } else if (Case3OK) { 1294 prefetch_bw_equ = PrefetchBandwidth3; 1295 } else { 1296 prefetch_bw_equ = PrefetchBandwidth4; 1297 } 1298 1299 #ifdef __DML_VBA_DEBUG__ 1300 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 1301 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 1302 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 1303 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 1304 #endif 1305 1306 if (prefetch_bw_equ > 0) { 1307 if (GPUVMEnable == true) { 1308 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1309 } else { 1310 Tvm_equ = LineTime / 4; 1311 } 1312 1313 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1314 Tr0_equ = dml_max4( 1315 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1316 Tr0_trips, 1317 (LineTime - Tvm_equ) / 2, 1318 LineTime / 4); 1319 } else { 1320 Tr0_equ = (LineTime - Tvm_equ) / 2; 1321 } 1322 } else { 1323 Tvm_equ = 0; 1324 Tr0_equ = 0; 1325 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 1326 } 1327 } 1328 1329 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 1330 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 1331 TimeForFetchingMetaPTE = Tvm_oto; 1332 TimeForFetchingRowInVBlank = Tr0_oto; 1333 *PrefetchBandwidth = prefetch_bw_oto; 1334 } else { 1335 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 1336 TimeForFetchingMetaPTE = Tvm_equ; 1337 TimeForFetchingRowInVBlank = Tr0_equ; 1338 *PrefetchBandwidth = prefetch_bw_equ; 1339 } 1340 1341 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 1342 1343 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 1344 1345 #ifdef __DML_VBA_ALLOW_DELTA__ 1346 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 1347 // See note above dated 5/30/2018 1348 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 1349 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? 1350 #else 1351 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 1352 #endif 1353 1354 #ifdef __DML_VBA_DEBUG__ 1355 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 1356 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1357 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 1358 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1359 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1360 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1361 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 1362 #endif 1363 1364 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { 1365 1366 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 1367 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1368 #ifdef __DML_VBA_DEBUG__ 1369 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1370 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 1371 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); 1372 #endif 1373 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 1374 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 1375 *VRatioPrefetchY = dml_max( 1376 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, 1377 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); 1378 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1379 } else { 1380 MyError = true; 1381 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1382 *VRatioPrefetchY = 0; 1383 } 1384 #ifdef __DML_VBA_DEBUG__ 1385 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1386 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1387 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 1388 #endif 1389 } 1390 1391 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 1392 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1393 1394 #ifdef __DML_VBA_DEBUG__ 1395 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1396 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 1397 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); 1398 #endif 1399 if ((SwathHeightC > 4) || VInitPreFillC > 3) { 1400 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 1401 *VRatioPrefetchC = dml_max( 1402 *VRatioPrefetchC, 1403 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); 1404 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1405 } else { 1406 MyError = true; 1407 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1408 *VRatioPrefetchC = 0; 1409 } 1410 #ifdef __DML_VBA_DEBUG__ 1411 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1412 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 1413 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 1414 #endif 1415 } 1416 1417 #ifdef __DML_VBA_DEBUG__ 1418 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 1419 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 1420 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1421 #endif 1422 1423 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; 1424 1425 #ifdef __DML_VBA_DEBUG__ 1426 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); 1427 #endif 1428 1429 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub 1430 / LineTime; 1431 } else { 1432 MyError = true; 1433 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1434 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); 1435 *VRatioPrefetchY = 0; 1436 *VRatioPrefetchC = 0; 1437 *RequiredPrefetchPixDataBWLuma = 0; 1438 *RequiredPrefetchPixDataBWChroma = 0; 1439 } 1440 1441 dml_print( 1442 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 1443 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 1444 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 1445 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1446 dml_print( 1447 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", 1448 (double) LinesToRequestPrefetchPixelData * LineTime); 1449 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", 1450 (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / 1451 (double) myPipe->HTotal)) * LineTime); 1452 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 1453 dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", 1454 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank 1455 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 1456 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1457 1458 } else { 1459 MyError = true; 1460 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1461 } 1462 1463 { 1464 double prefetch_vm_bw; 1465 double prefetch_row_bw; 1466 1467 if (PDEAndMetaPTEBytesFrame == 0) { 1468 prefetch_vm_bw = 0; 1469 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1470 #ifdef __DML_VBA_DEBUG__ 1471 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1472 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1473 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1474 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1475 #endif 1476 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1477 #ifdef __DML_VBA_DEBUG__ 1478 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 1479 #endif 1480 } else { 1481 prefetch_vm_bw = 0; 1482 MyError = true; 1483 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1484 } 1485 1486 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1487 prefetch_row_bw = 0; 1488 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1489 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1490 1491 #ifdef __DML_VBA_DEBUG__ 1492 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1493 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1494 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1495 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 1496 #endif 1497 } else { 1498 prefetch_row_bw = 0; 1499 MyError = true; 1500 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1501 } 1502 1503 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1504 } 1505 1506 if (MyError) { 1507 *PrefetchBandwidth = 0; 1508 TimeForFetchingMetaPTE = 0; 1509 TimeForFetchingRowInVBlank = 0; 1510 *DestinationLinesToRequestVMInVBlank = 0; 1511 *DestinationLinesToRequestRowInVBlank = 0; 1512 *DestinationLinesForPrefetch = 0; 1513 LinesToRequestPrefetchPixelData = 0; 1514 *VRatioPrefetchY = 0; 1515 *VRatioPrefetchC = 0; 1516 *RequiredPrefetchPixDataBWLuma = 0; 1517 *RequiredPrefetchPixDataBWChroma = 0; 1518 } 1519 1520 return MyError; 1521 } 1522 1523 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1524 { 1525 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1526 } 1527 1528 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1529 { 1530 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); 1531 } 1532 1533 static void CalculateDCCConfiguration( 1534 bool DCCEnabled, 1535 bool DCCProgrammingAssumesScanDirectionUnknown, 1536 enum source_format_class SourcePixelFormat, 1537 unsigned int SurfaceWidthLuma, 1538 unsigned int SurfaceWidthChroma, 1539 unsigned int SurfaceHeightLuma, 1540 unsigned int SurfaceHeightChroma, 1541 double DETBufferSize, 1542 unsigned int RequestHeight256ByteLuma, 1543 unsigned int RequestHeight256ByteChroma, 1544 enum dm_swizzle_mode TilingFormat, 1545 unsigned int BytePerPixelY, 1546 unsigned int BytePerPixelC, 1547 double BytePerPixelDETY, 1548 double BytePerPixelDETC, 1549 enum scan_direction_class ScanOrientation, 1550 unsigned int *MaxUncompressedBlockLuma, 1551 unsigned int *MaxUncompressedBlockChroma, 1552 unsigned int *MaxCompressedBlockLuma, 1553 unsigned int *MaxCompressedBlockChroma, 1554 unsigned int *IndependentBlockLuma, 1555 unsigned int *IndependentBlockChroma) 1556 { 1557 int yuv420; 1558 int horz_div_l; 1559 int horz_div_c; 1560 int vert_div_l; 1561 int vert_div_c; 1562 1563 int swath_buf_size; 1564 double detile_buf_vp_horz_limit; 1565 double detile_buf_vp_vert_limit; 1566 1567 int MAS_vp_horz_limit; 1568 int MAS_vp_vert_limit; 1569 int max_vp_horz_width; 1570 int max_vp_vert_height; 1571 int eff_surf_width_l; 1572 int eff_surf_width_c; 1573 int eff_surf_height_l; 1574 int eff_surf_height_c; 1575 1576 int full_swath_bytes_horz_wc_l; 1577 int full_swath_bytes_horz_wc_c; 1578 int full_swath_bytes_vert_wc_l; 1579 int full_swath_bytes_vert_wc_c; 1580 int req128_horz_wc_l; 1581 int req128_horz_wc_c; 1582 int req128_vert_wc_l; 1583 int req128_vert_wc_c; 1584 int segment_order_horz_contiguous_luma; 1585 int segment_order_horz_contiguous_chroma; 1586 int segment_order_vert_contiguous_luma; 1587 int segment_order_vert_contiguous_chroma; 1588 1589 typedef enum { 1590 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA 1591 } RequestType; 1592 RequestType RequestLuma; 1593 RequestType RequestChroma; 1594 1595 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); 1596 horz_div_l = 1; 1597 horz_div_c = 1; 1598 vert_div_l = 1; 1599 vert_div_c = 1; 1600 1601 if (BytePerPixelY == 1) 1602 vert_div_l = 0; 1603 if (BytePerPixelC == 1) 1604 vert_div_c = 0; 1605 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1606 horz_div_l = 0; 1607 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1608 horz_div_c = 0; 1609 1610 if (BytePerPixelC == 0) { 1611 swath_buf_size = DETBufferSize / 2 - 2 * 256; 1612 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 1613 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 1614 } else { 1615 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; 1616 detile_buf_vp_horz_limit = (double) swath_buf_size 1617 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) 1618 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 1619 detile_buf_vp_vert_limit = (double) swath_buf_size 1620 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 1621 } 1622 1623 if (SourcePixelFormat == dm_420_10) { 1624 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 1625 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 1626 } 1627 1628 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 1629 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 1630 1631 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; 1632 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); 1633 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 1634 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 1635 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 1636 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 1637 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 1638 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 1639 1640 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 1641 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 1642 if (BytePerPixelC > 0) { 1643 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 1644 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 1645 } else { 1646 full_swath_bytes_horz_wc_c = 0; 1647 full_swath_bytes_vert_wc_c = 0; 1648 } 1649 1650 if (SourcePixelFormat == dm_420_10) { 1651 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); 1652 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); 1653 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); 1654 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); 1655 } 1656 1657 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1658 req128_horz_wc_l = 0; 1659 req128_horz_wc_c = 0; 1660 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { 1661 req128_horz_wc_l = 0; 1662 req128_horz_wc_c = 1; 1663 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1664 req128_horz_wc_l = 1; 1665 req128_horz_wc_c = 0; 1666 } else { 1667 req128_horz_wc_l = 1; 1668 req128_horz_wc_c = 1; 1669 } 1670 1671 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1672 req128_vert_wc_l = 0; 1673 req128_vert_wc_c = 0; 1674 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { 1675 req128_vert_wc_l = 0; 1676 req128_vert_wc_c = 1; 1677 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1678 req128_vert_wc_l = 1; 1679 req128_vert_wc_c = 0; 1680 } else { 1681 req128_vert_wc_l = 1; 1682 req128_vert_wc_c = 1; 1683 } 1684 1685 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1686 segment_order_horz_contiguous_luma = 0; 1687 } else { 1688 segment_order_horz_contiguous_luma = 1; 1689 } 1690 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1691 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1692 segment_order_vert_contiguous_luma = 0; 1693 } else { 1694 segment_order_vert_contiguous_luma = 1; 1695 } 1696 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1697 segment_order_horz_contiguous_chroma = 0; 1698 } else { 1699 segment_order_horz_contiguous_chroma = 1; 1700 } 1701 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1702 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1703 segment_order_vert_contiguous_chroma = 0; 1704 } else { 1705 segment_order_vert_contiguous_chroma = 1; 1706 } 1707 1708 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1709 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 1710 RequestLuma = REQ_256Bytes; 1711 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 1712 RequestLuma = REQ_128BytesNonContiguous; 1713 } else { 1714 RequestLuma = REQ_128BytesContiguous; 1715 } 1716 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 1717 RequestChroma = REQ_256Bytes; 1718 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 1719 RequestChroma = REQ_128BytesNonContiguous; 1720 } else { 1721 RequestChroma = REQ_128BytesContiguous; 1722 } 1723 } else if (ScanOrientation != dm_vert) { 1724 if (req128_horz_wc_l == 0) { 1725 RequestLuma = REQ_256Bytes; 1726 } else if (segment_order_horz_contiguous_luma == 0) { 1727 RequestLuma = REQ_128BytesNonContiguous; 1728 } else { 1729 RequestLuma = REQ_128BytesContiguous; 1730 } 1731 if (req128_horz_wc_c == 0) { 1732 RequestChroma = REQ_256Bytes; 1733 } else if (segment_order_horz_contiguous_chroma == 0) { 1734 RequestChroma = REQ_128BytesNonContiguous; 1735 } else { 1736 RequestChroma = REQ_128BytesContiguous; 1737 } 1738 } else { 1739 if (req128_vert_wc_l == 0) { 1740 RequestLuma = REQ_256Bytes; 1741 } else if (segment_order_vert_contiguous_luma == 0) { 1742 RequestLuma = REQ_128BytesNonContiguous; 1743 } else { 1744 RequestLuma = REQ_128BytesContiguous; 1745 } 1746 if (req128_vert_wc_c == 0) { 1747 RequestChroma = REQ_256Bytes; 1748 } else if (segment_order_vert_contiguous_chroma == 0) { 1749 RequestChroma = REQ_128BytesNonContiguous; 1750 } else { 1751 RequestChroma = REQ_128BytesContiguous; 1752 } 1753 } 1754 1755 if (RequestLuma == REQ_256Bytes) { 1756 *MaxUncompressedBlockLuma = 256; 1757 *MaxCompressedBlockLuma = 256; 1758 *IndependentBlockLuma = 0; 1759 } else if (RequestLuma == REQ_128BytesContiguous) { 1760 *MaxUncompressedBlockLuma = 256; 1761 *MaxCompressedBlockLuma = 128; 1762 *IndependentBlockLuma = 128; 1763 } else { 1764 *MaxUncompressedBlockLuma = 256; 1765 *MaxCompressedBlockLuma = 64; 1766 *IndependentBlockLuma = 64; 1767 } 1768 1769 if (RequestChroma == REQ_256Bytes) { 1770 *MaxUncompressedBlockChroma = 256; 1771 *MaxCompressedBlockChroma = 256; 1772 *IndependentBlockChroma = 0; 1773 } else if (RequestChroma == REQ_128BytesContiguous) { 1774 *MaxUncompressedBlockChroma = 256; 1775 *MaxCompressedBlockChroma = 128; 1776 *IndependentBlockChroma = 128; 1777 } else { 1778 *MaxUncompressedBlockChroma = 256; 1779 *MaxCompressedBlockChroma = 64; 1780 *IndependentBlockChroma = 64; 1781 } 1782 1783 if (DCCEnabled != true || BytePerPixelC == 0) { 1784 *MaxUncompressedBlockChroma = 0; 1785 *MaxCompressedBlockChroma = 0; 1786 *IndependentBlockChroma = 0; 1787 } 1788 1789 if (DCCEnabled != true) { 1790 *MaxUncompressedBlockLuma = 0; 1791 *MaxCompressedBlockLuma = 0; 1792 *IndependentBlockLuma = 0; 1793 } 1794 } 1795 1796 static double CalculatePrefetchSourceLines( 1797 struct display_mode_lib *mode_lib, 1798 double VRatio, 1799 double vtaps, 1800 bool Interlace, 1801 bool ProgressiveToInterlaceUnitInOPP, 1802 unsigned int SwathHeight, 1803 unsigned int ViewportYStart, 1804 double *VInitPreFill, 1805 unsigned int *MaxNumSwath) 1806 { 1807 struct vba_vars_st *v = &mode_lib->vba; 1808 unsigned int MaxPartialSwath; 1809 1810 if (ProgressiveToInterlaceUnitInOPP) 1811 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1812 else 1813 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1814 1815 if (!v->IgnoreViewportPositioning) { 1816 1817 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1818 1819 if (*VInitPreFill > 1.0) 1820 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1821 else 1822 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; 1823 MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1824 1825 } else { 1826 1827 if (ViewportYStart != 0) 1828 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1829 1830 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1831 1832 if (*VInitPreFill > 1.0) 1833 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1834 else 1835 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; 1836 } 1837 1838 #ifdef __DML_VBA_DEBUG__ 1839 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 1840 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); 1841 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); 1842 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); 1843 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); 1844 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 1845 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 1846 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 1847 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); 1848 #endif 1849 return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1850 } 1851 1852 static unsigned int CalculateVMAndRowBytes( 1853 struct display_mode_lib *mode_lib, 1854 bool DCCEnable, 1855 unsigned int BlockHeight256Bytes, 1856 unsigned int BlockWidth256Bytes, 1857 enum source_format_class SourcePixelFormat, 1858 unsigned int SurfaceTiling, 1859 unsigned int BytePerPixel, 1860 enum scan_direction_class ScanDirection, 1861 unsigned int SwathWidth, 1862 unsigned int ViewportHeight, 1863 bool GPUVMEnable, 1864 bool HostVMEnable, 1865 unsigned int HostVMMaxNonCachedPageTableLevels, 1866 unsigned int GPUVMMinPageSize, 1867 unsigned int HostVMMinPageSize, 1868 unsigned int PTEBufferSizeInRequests, 1869 unsigned int Pitch, 1870 unsigned int DCCMetaPitch, 1871 unsigned int *MacroTileWidth, 1872 unsigned int *MetaRowByte, 1873 unsigned int *PixelPTEBytesPerRow, 1874 bool *PTEBufferSizeNotExceeded, 1875 int *dpte_row_width_ub, 1876 unsigned int *dpte_row_height, 1877 unsigned int *MetaRequestWidth, 1878 unsigned int *MetaRequestHeight, 1879 unsigned int *meta_row_width, 1880 unsigned int *meta_row_height, 1881 int *vm_group_bytes, 1882 unsigned int *dpte_group_bytes, 1883 unsigned int *PixelPTEReqWidth, 1884 unsigned int *PixelPTEReqHeight, 1885 unsigned int *PTERequestSize, 1886 int *DPDE0BytesFrame, 1887 int *MetaPTEBytesFrame) 1888 { 1889 struct vba_vars_st *v = &mode_lib->vba; 1890 unsigned int MPDEBytesFrame; 1891 unsigned int DCCMetaSurfaceBytes; 1892 unsigned int MacroTileSizeBytes; 1893 unsigned int MacroTileHeight; 1894 unsigned int ExtraDPDEBytesFrame; 1895 unsigned int PDEAndMetaPTEBytesFrame; 1896 unsigned int PixelPTEReqHeightPTEs = 0; 1897 unsigned int HostVMDynamicLevels = 0; 1898 double FractionOfPTEReturnDrop; 1899 1900 if (GPUVMEnable == true && HostVMEnable == true) { 1901 if (HostVMMinPageSize < 2048) { 1902 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1903 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 1904 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 1905 } else { 1906 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 1907 } 1908 } 1909 1910 *MetaRequestHeight = 8 * BlockHeight256Bytes; 1911 *MetaRequestWidth = 8 * BlockWidth256Bytes; 1912 if (ScanDirection != dm_vert) { 1913 *meta_row_height = *MetaRequestHeight; 1914 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 1915 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1916 } else { 1917 *meta_row_height = *MetaRequestWidth; 1918 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 1919 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1920 } 1921 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; 1922 if (GPUVMEnable == true) { 1923 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; 1924 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); 1925 } else { 1926 *MetaPTEBytesFrame = 0; 1927 MPDEBytesFrame = 0; 1928 } 1929 1930 if (DCCEnable != true) { 1931 *MetaPTEBytesFrame = 0; 1932 MPDEBytesFrame = 0; 1933 *MetaRowByte = 0; 1934 } 1935 1936 if (SurfaceTiling == dm_sw_linear) { 1937 MacroTileSizeBytes = 256; 1938 MacroTileHeight = BlockHeight256Bytes; 1939 } else { 1940 MacroTileSizeBytes = 65536; 1941 MacroTileHeight = 16 * BlockHeight256Bytes; 1942 } 1943 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1944 1945 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { 1946 if (ScanDirection != dm_vert) { 1947 *DPDE0BytesFrame = 64 1948 * (dml_ceil( 1949 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1950 / (8 * 2097152), 1951 1) + 1); 1952 } else { 1953 *DPDE0BytesFrame = 64 1954 * (dml_ceil( 1955 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1956 / (8 * 2097152), 1957 1) + 1); 1958 } 1959 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); 1960 } else { 1961 *DPDE0BytesFrame = 0; 1962 ExtraDPDEBytesFrame = 0; 1963 } 1964 1965 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 1966 1967 #ifdef __DML_VBA_DEBUG__ 1968 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 1969 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 1970 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 1971 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 1972 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1973 #endif 1974 1975 if (HostVMEnable == true) { 1976 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 1977 } 1978 #ifdef __DML_VBA_DEBUG__ 1979 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1980 #endif 1981 1982 if (SurfaceTiling == dm_sw_linear) { 1983 PixelPTEReqHeightPTEs = 1; 1984 *PixelPTEReqHeight = 1; 1985 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 1986 *PTERequestSize = 64; 1987 FractionOfPTEReturnDrop = 0; 1988 } else if (MacroTileSizeBytes == 4096) { 1989 PixelPTEReqHeightPTEs = 1; 1990 *PixelPTEReqHeight = MacroTileHeight; 1991 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1992 *PTERequestSize = 64; 1993 if (ScanDirection != dm_vert) 1994 FractionOfPTEReturnDrop = 0; 1995 else 1996 FractionOfPTEReturnDrop = 7 / 8; 1997 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 1998 PixelPTEReqHeightPTEs = 16; 1999 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2000 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2001 *PTERequestSize = 128; 2002 FractionOfPTEReturnDrop = 0; 2003 } else { 2004 PixelPTEReqHeightPTEs = 1; 2005 *PixelPTEReqHeight = MacroTileHeight; 2006 *PixelPTEReqWidth = 8 * *MacroTileWidth; 2007 *PTERequestSize = 64; 2008 FractionOfPTEReturnDrop = 0; 2009 } 2010 2011 if (SurfaceTiling == dm_sw_linear) { 2012 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2013 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2014 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2015 } else if (ScanDirection != dm_vert) { 2016 *dpte_row_height = *PixelPTEReqHeight; 2017 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2018 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2019 } else { 2020 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 2021 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 2022 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2023 } 2024 2025 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { 2026 *PTEBufferSizeNotExceeded = true; 2027 } else { 2028 *PTEBufferSizeNotExceeded = false; 2029 } 2030 2031 if (GPUVMEnable != true) { 2032 *PixelPTEBytesPerRow = 0; 2033 *PTEBufferSizeNotExceeded = true; 2034 } 2035 2036 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); 2037 2038 if (HostVMEnable == true) { 2039 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2040 } 2041 2042 if (HostVMEnable == true) { 2043 *vm_group_bytes = 512; 2044 *dpte_group_bytes = 512; 2045 } else if (GPUVMEnable == true) { 2046 *vm_group_bytes = 2048; 2047 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { 2048 *dpte_group_bytes = 512; 2049 } else { 2050 *dpte_group_bytes = 2048; 2051 } 2052 } else { 2053 *vm_group_bytes = 0; 2054 *dpte_group_bytes = 0; 2055 } 2056 return PDEAndMetaPTEBytesFrame; 2057 } 2058 2059 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) 2060 { 2061 struct vba_vars_st *v = &mode_lib->vba; 2062 unsigned int j, k; 2063 double HostVMInefficiencyFactor = 1.0; 2064 bool NoChromaPlanes = true; 2065 int ReorderBytes; 2066 double VMDataOnlyReturnBW; 2067 double MaxTotalRDBandwidth = 0; 2068 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; 2069 2070 v->WritebackDISPCLK = 0.0; 2071 v->DISPCLKWithRamping = 0; 2072 v->DISPCLKWithoutRamping = 0; 2073 v->GlobalDPPCLK = 0.0; 2074 /* DAL custom code: need to update ReturnBW in case min dcfclk is overriden */ 2075 { 2076 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 2077 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], 2078 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); 2079 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; 2080 if (v->HostVMEnable != true) { 2081 v->ReturnBW = dml_min( 2082 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2083 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 2084 } else { 2085 v->ReturnBW = dml_min( 2086 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2087 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 2088 } 2089 } 2090 /* End DAL custom code */ 2091 2092 // DISPCLK and DPPCLK Calculation 2093 // 2094 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2095 if (v->WritebackEnable[k]) { 2096 v->WritebackDISPCLK = dml_max( 2097 v->WritebackDISPCLK, 2098 dml31_CalculateWriteBackDISPCLK( 2099 v->WritebackPixelFormat[k], 2100 v->PixelClock[k], 2101 v->WritebackHRatio[k], 2102 v->WritebackVRatio[k], 2103 v->WritebackHTaps[k], 2104 v->WritebackVTaps[k], 2105 v->WritebackSourceWidth[k], 2106 v->WritebackDestinationWidth[k], 2107 v->HTotal[k], 2108 v->WritebackLineBufferSize)); 2109 } 2110 } 2111 2112 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2113 if (v->HRatio[k] > 1) { 2114 v->PSCL_THROUGHPUT_LUMA[k] = dml_min( 2115 v->MaxDCHUBToPSCLThroughput, 2116 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); 2117 } else { 2118 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2119 } 2120 2121 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] 2122 * dml_max( 2123 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 2124 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); 2125 2126 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { 2127 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; 2128 } 2129 2130 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 2131 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { 2132 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 2133 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; 2134 } else { 2135 if (v->HRatioChroma[k] > 1) { 2136 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 2137 v->MaxDCHUBToPSCLThroughput, 2138 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 2139 } else { 2140 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2141 } 2142 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] 2143 * dml_max3( 2144 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 2145 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 2146 1.0); 2147 2148 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { 2149 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; 2150 } 2151 2152 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); 2153 } 2154 } 2155 2156 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2157 if (v->BlendingAndTiming[k] != k) 2158 continue; 2159 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { 2160 v->DISPCLKWithRamping = dml_max( 2161 v->DISPCLKWithRamping, 2162 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2163 * (1 + v->DISPCLKRampingMargin / 100)); 2164 v->DISPCLKWithoutRamping = dml_max( 2165 v->DISPCLKWithoutRamping, 2166 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2167 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2168 v->DISPCLKWithRamping = dml_max( 2169 v->DISPCLKWithRamping, 2170 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2171 * (1 + v->DISPCLKRampingMargin / 100)); 2172 v->DISPCLKWithoutRamping = dml_max( 2173 v->DISPCLKWithoutRamping, 2174 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2175 } else { 2176 v->DISPCLKWithRamping = dml_max( 2177 v->DISPCLKWithRamping, 2178 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); 2179 v->DISPCLKWithoutRamping = dml_max( 2180 v->DISPCLKWithoutRamping, 2181 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2182 } 2183 } 2184 2185 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); 2186 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); 2187 2188 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); 2189 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); 2190 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); 2191 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 2192 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, 2193 v->DISPCLKDPPCLKVCOSpeed); 2194 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2195 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; 2196 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2197 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; 2198 } else { 2199 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; 2200 } 2201 v->DISPCLK = v->DISPCLK_calculated; 2202 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); 2203 2204 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2205 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2206 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); 2207 } 2208 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); 2209 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2210 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); 2211 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); 2212 } 2213 2214 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2215 v->DPPCLK[k] = v->DPPCLK_calculated[k]; 2216 } 2217 2218 // Urgent and B P-State/DRAM Clock Change Watermark 2219 DTRACE(" dcfclk_mhz = %f", v->DCFCLK); 2220 DTRACE(" return_bus_bw = %f", v->ReturnBW); 2221 2222 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2223 CalculateBytePerPixelAnd256BBlockSizes( 2224 v->SourcePixelFormat[k], 2225 v->SurfaceTiling[k], 2226 &v->BytePerPixelY[k], 2227 &v->BytePerPixelC[k], 2228 &v->BytePerPixelDETY[k], 2229 &v->BytePerPixelDETC[k], 2230 &v->BlockHeight256BytesY[k], 2231 &v->BlockHeight256BytesC[k], 2232 &v->BlockWidth256BytesY[k], 2233 &v->BlockWidth256BytesC[k]); 2234 } 2235 2236 CalculateSwathWidth( 2237 false, 2238 v->NumberOfActivePlanes, 2239 v->SourcePixelFormat, 2240 v->SourceScan, 2241 v->ViewportWidth, 2242 v->ViewportHeight, 2243 v->SurfaceWidthY, 2244 v->SurfaceWidthC, 2245 v->SurfaceHeightY, 2246 v->SurfaceHeightC, 2247 v->ODMCombineEnabled, 2248 v->BytePerPixelY, 2249 v->BytePerPixelC, 2250 v->BlockHeight256BytesY, 2251 v->BlockHeight256BytesC, 2252 v->BlockWidth256BytesY, 2253 v->BlockWidth256BytesC, 2254 v->BlendingAndTiming, 2255 v->HActive, 2256 v->HRatio, 2257 v->DPPPerPlane, 2258 v->SwathWidthSingleDPPY, 2259 v->SwathWidthSingleDPPC, 2260 v->SwathWidthY, 2261 v->SwathWidthC, 2262 v->dummyinteger3, 2263 v->dummyinteger4, 2264 v->swath_width_luma_ub, 2265 v->swath_width_chroma_ub); 2266 2267 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2268 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) 2269 * v->VRatio[k]; 2270 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) 2271 * v->VRatioChroma[k]; 2272 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); 2273 } 2274 2275 // DCFCLK Deep Sleep 2276 CalculateDCFCLKDeepSleep( 2277 mode_lib, 2278 v->NumberOfActivePlanes, 2279 v->BytePerPixelY, 2280 v->BytePerPixelC, 2281 v->VRatio, 2282 v->VRatioChroma, 2283 v->SwathWidthY, 2284 v->SwathWidthC, 2285 v->DPPPerPlane, 2286 v->HRatio, 2287 v->HRatioChroma, 2288 v->PixelClock, 2289 v->PSCL_THROUGHPUT_LUMA, 2290 v->PSCL_THROUGHPUT_CHROMA, 2291 v->DPPCLK, 2292 v->ReadBandwidthPlaneLuma, 2293 v->ReadBandwidthPlaneChroma, 2294 v->ReturnBusWidth, 2295 &v->DCFCLKDeepSleep); 2296 2297 // DSCCLK 2298 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2299 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { 2300 v->DSCCLK_calculated[k] = 0.0; 2301 } else { 2302 if (v->OutputFormat[k] == dm_420) 2303 v->DSCFormatFactor = 2; 2304 else if (v->OutputFormat[k] == dm_444) 2305 v->DSCFormatFactor = 1; 2306 else if (v->OutputFormat[k] == dm_n422) 2307 v->DSCFormatFactor = 2; 2308 else 2309 v->DSCFormatFactor = 1; 2310 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) 2311 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor 2312 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2313 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) 2314 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor 2315 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2316 else 2317 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor 2318 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2319 } 2320 } 2321 2322 // DSC Delay 2323 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2324 double BPP = v->OutputBpp[k]; 2325 2326 if (v->DSCEnabled[k] && BPP != 0) { 2327 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { 2328 v->DSCDelay[k] = dscceComputeDelay( 2329 v->DSCInputBitPerComponent[k], 2330 BPP, 2331 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2332 v->NumberOfDSCSlices[k], 2333 v->OutputFormat[k], 2334 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 2335 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2336 v->DSCDelay[k] = 2 2337 * (dscceComputeDelay( 2338 v->DSCInputBitPerComponent[k], 2339 BPP, 2340 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2341 v->NumberOfDSCSlices[k] / 2.0, 2342 v->OutputFormat[k], 2343 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2344 } else { 2345 v->DSCDelay[k] = 4 2346 * (dscceComputeDelay( 2347 v->DSCInputBitPerComponent[k], 2348 BPP, 2349 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2350 v->NumberOfDSCSlices[k] / 4.0, 2351 v->OutputFormat[k], 2352 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2353 } 2354 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 2355 } else { 2356 v->DSCDelay[k] = 0; 2357 } 2358 } 2359 2360 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2361 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes 2362 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) 2363 v->DSCDelay[k] = v->DSCDelay[j]; 2364 2365 // Prefetch 2366 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2367 unsigned int PDEAndMetaPTEBytesFrameY; 2368 unsigned int PixelPTEBytesPerRowY; 2369 unsigned int MetaRowByteY; 2370 unsigned int MetaRowByteC; 2371 unsigned int PDEAndMetaPTEBytesFrameC; 2372 unsigned int PixelPTEBytesPerRowC; 2373 bool PTEBufferSizeNotExceededY; 2374 bool PTEBufferSizeNotExceededC; 2375 2376 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2377 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 2378 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { 2379 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; 2380 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 2381 } else { 2382 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 2383 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 2384 } 2385 2386 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( 2387 mode_lib, 2388 v->DCCEnable[k], 2389 v->BlockHeight256BytesC[k], 2390 v->BlockWidth256BytesC[k], 2391 v->SourcePixelFormat[k], 2392 v->SurfaceTiling[k], 2393 v->BytePerPixelC[k], 2394 v->SourceScan[k], 2395 v->SwathWidthC[k], 2396 v->ViewportHeightChroma[k], 2397 v->GPUVMEnable, 2398 v->HostVMEnable, 2399 v->HostVMMaxNonCachedPageTableLevels, 2400 v->GPUVMMinPageSize, 2401 v->HostVMMinPageSize, 2402 v->PTEBufferSizeInRequestsForChroma, 2403 v->PitchC[k], 2404 v->DCCMetaPitchC[k], 2405 &v->MacroTileWidthC[k], 2406 &MetaRowByteC, 2407 &PixelPTEBytesPerRowC, 2408 &PTEBufferSizeNotExceededC, 2409 &v->dpte_row_width_chroma_ub[k], 2410 &v->dpte_row_height_chroma[k], 2411 &v->meta_req_width_chroma[k], 2412 &v->meta_req_height_chroma[k], 2413 &v->meta_row_width_chroma[k], 2414 &v->meta_row_height_chroma[k], 2415 &v->dummyinteger1, 2416 &v->dummyinteger2, 2417 &v->PixelPTEReqWidthC[k], 2418 &v->PixelPTEReqHeightC[k], 2419 &v->PTERequestSizeC[k], 2420 &v->dpde0_bytes_per_frame_ub_c[k], 2421 &v->meta_pte_bytes_per_frame_ub_c[k]); 2422 2423 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2424 mode_lib, 2425 v->VRatioChroma[k], 2426 v->VTAPsChroma[k], 2427 v->Interlace[k], 2428 v->ProgressiveToInterlaceUnitInOPP, 2429 v->SwathHeightC[k], 2430 v->ViewportYStartC[k], 2431 &v->VInitPreFillC[k], 2432 &v->MaxNumSwathC[k]); 2433 } else { 2434 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 2435 v->PTEBufferSizeInRequestsForChroma = 0; 2436 PixelPTEBytesPerRowC = 0; 2437 PDEAndMetaPTEBytesFrameC = 0; 2438 MetaRowByteC = 0; 2439 v->MaxNumSwathC[k] = 0; 2440 v->PrefetchSourceLinesC[k] = 0; 2441 } 2442 2443 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 2444 mode_lib, 2445 v->DCCEnable[k], 2446 v->BlockHeight256BytesY[k], 2447 v->BlockWidth256BytesY[k], 2448 v->SourcePixelFormat[k], 2449 v->SurfaceTiling[k], 2450 v->BytePerPixelY[k], 2451 v->SourceScan[k], 2452 v->SwathWidthY[k], 2453 v->ViewportHeight[k], 2454 v->GPUVMEnable, 2455 v->HostVMEnable, 2456 v->HostVMMaxNonCachedPageTableLevels, 2457 v->GPUVMMinPageSize, 2458 v->HostVMMinPageSize, 2459 v->PTEBufferSizeInRequestsForLuma, 2460 v->PitchY[k], 2461 v->DCCMetaPitchY[k], 2462 &v->MacroTileWidthY[k], 2463 &MetaRowByteY, 2464 &PixelPTEBytesPerRowY, 2465 &PTEBufferSizeNotExceededY, 2466 &v->dpte_row_width_luma_ub[k], 2467 &v->dpte_row_height[k], 2468 &v->meta_req_width[k], 2469 &v->meta_req_height[k], 2470 &v->meta_row_width[k], 2471 &v->meta_row_height[k], 2472 &v->vm_group_bytes[k], 2473 &v->dpte_group_bytes[k], 2474 &v->PixelPTEReqWidthY[k], 2475 &v->PixelPTEReqHeightY[k], 2476 &v->PTERequestSizeY[k], 2477 &v->dpde0_bytes_per_frame_ub_l[k], 2478 &v->meta_pte_bytes_per_frame_ub_l[k]); 2479 2480 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 2481 mode_lib, 2482 v->VRatio[k], 2483 v->vtaps[k], 2484 v->Interlace[k], 2485 v->ProgressiveToInterlaceUnitInOPP, 2486 v->SwathHeightY[k], 2487 v->ViewportYStartY[k], 2488 &v->VInitPreFillY[k], 2489 &v->MaxNumSwathY[k]); 2490 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 2491 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2492 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 2493 2494 CalculateRowBandwidth( 2495 v->GPUVMEnable, 2496 v->SourcePixelFormat[k], 2497 v->VRatio[k], 2498 v->VRatioChroma[k], 2499 v->DCCEnable[k], 2500 v->HTotal[k] / v->PixelClock[k], 2501 MetaRowByteY, 2502 MetaRowByteC, 2503 v->meta_row_height[k], 2504 v->meta_row_height_chroma[k], 2505 PixelPTEBytesPerRowY, 2506 PixelPTEBytesPerRowC, 2507 v->dpte_row_height[k], 2508 v->dpte_row_height_chroma[k], 2509 &v->meta_row_bw[k], 2510 &v->dpte_row_bw[k]); 2511 } 2512 2513 v->TotalDCCActiveDPP = 0; 2514 v->TotalActiveDPP = 0; 2515 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2516 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; 2517 if (v->DCCEnable[k]) 2518 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; 2519 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2520 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 2521 NoChromaPlanes = false; 2522 } 2523 2524 ReorderBytes = v->NumberOfChannels 2525 * dml_max3( 2526 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2527 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2528 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 2529 2530 VMDataOnlyReturnBW = dml_min( 2531 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) 2532 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2533 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth 2534 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 2535 2536 #ifdef __DML_VBA_DEBUG__ 2537 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); 2538 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); 2539 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); 2540 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); 2541 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); 2542 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); 2543 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); 2544 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); 2545 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); 2546 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 2547 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); 2548 #endif 2549 2550 if (v->GPUVMEnable && v->HostVMEnable) 2551 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; 2552 2553 v->UrgentExtraLatency = CalculateExtraLatency( 2554 v->RoundTripPingLatencyCycles, 2555 ReorderBytes, 2556 v->DCFCLK, 2557 v->TotalActiveDPP, 2558 v->PixelChunkSizeInKByte, 2559 v->TotalDCCActiveDPP, 2560 v->MetaChunkSize, 2561 v->ReturnBW, 2562 v->GPUVMEnable, 2563 v->HostVMEnable, 2564 v->NumberOfActivePlanes, 2565 v->DPPPerPlane, 2566 v->dpte_group_bytes, 2567 HostVMInefficiencyFactor, 2568 v->HostVMMinPageSize, 2569 v->HostVMMaxNonCachedPageTableLevels); 2570 2571 v->TCalc = 24.0 / v->DCFCLKDeepSleep; 2572 2573 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2574 if (v->BlendingAndTiming[k] == k) { 2575 if (v->WritebackEnable[k] == true) { 2576 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency 2577 + CalculateWriteBackDelay( 2578 v->WritebackPixelFormat[k], 2579 v->WritebackHRatio[k], 2580 v->WritebackVRatio[k], 2581 v->WritebackVTaps[k], 2582 v->WritebackDestinationWidth[k], 2583 v->WritebackDestinationHeight[k], 2584 v->WritebackSourceHeight[k], 2585 v->HTotal[k]) / v->DISPCLK; 2586 } else 2587 v->WritebackDelay[v->VoltageLevel][k] = 0; 2588 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 2589 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { 2590 v->WritebackDelay[v->VoltageLevel][k] = dml_max( 2591 v->WritebackDelay[v->VoltageLevel][k], 2592 v->WritebackLatency 2593 + CalculateWriteBackDelay( 2594 v->WritebackPixelFormat[j], 2595 v->WritebackHRatio[j], 2596 v->WritebackVRatio[j], 2597 v->WritebackVTaps[j], 2598 v->WritebackDestinationWidth[j], 2599 v->WritebackDestinationHeight[j], 2600 v->WritebackSourceHeight[j], 2601 v->HTotal[k]) / v->DISPCLK); 2602 } 2603 } 2604 } 2605 } 2606 2607 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2608 for (j = 0; j < v->NumberOfActivePlanes; ++j) 2609 if (v->BlendingAndTiming[k] == j) 2610 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; 2611 2612 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2613 v->MaxVStartupLines[k] = 2614 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 2615 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 2616 v->VTotal[k] - v->VActive[k] 2617 - dml_max( 2618 1.0, 2619 dml_ceil( 2620 (double) v->WritebackDelay[v->VoltageLevel][k] 2621 / (v->HTotal[k] / v->PixelClock[k]), 2622 1)); 2623 if (v->MaxVStartupLines[k] > 1023) 2624 v->MaxVStartupLines[k] = 1023; 2625 2626 #ifdef __DML_VBA_DEBUG__ 2627 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 2628 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); 2629 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); 2630 #endif 2631 } 2632 2633 v->MaximumMaxVStartupLines = 0; 2634 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2635 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); 2636 2637 // VBA_DELTA 2638 // We don't really care to iterate between the various prefetch modes 2639 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); 2640 2641 v->UrgentLatency = CalculateUrgentLatency( 2642 v->UrgentLatencyPixelDataOnly, 2643 v->UrgentLatencyPixelMixedWithVMData, 2644 v->UrgentLatencyVMDataOnly, 2645 v->DoUrgentLatencyAdjustment, 2646 v->UrgentLatencyAdjustmentFabricClockComponent, 2647 v->UrgentLatencyAdjustmentFabricClockReference, 2648 v->FabricClock); 2649 2650 v->FractionOfUrgentBandwidth = 0.0; 2651 v->FractionOfUrgentBandwidthImmediateFlip = 0.0; 2652 2653 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; 2654 2655 do { 2656 double MaxTotalRDBandwidthNoUrgentBurst = 0.0; 2657 bool DestinationLineTimesForPrefetchLessThan2 = false; 2658 bool VRatioPrefetchMoreThan4 = false; 2659 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); 2660 MaxTotalRDBandwidth = 0; 2661 2662 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); 2663 2664 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2665 Pipe myPipe; 2666 2667 myPipe.DPPCLK = v->DPPCLK[k]; 2668 myPipe.DISPCLK = v->DISPCLK; 2669 myPipe.PixelClock = v->PixelClock[k]; 2670 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; 2671 myPipe.DPPPerPlane = v->DPPPerPlane[k]; 2672 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 2673 myPipe.VRatio = v->VRatio[k]; 2674 myPipe.VRatioChroma = v->VRatioChroma[k]; 2675 myPipe.SourceScan = v->SourceScan[k]; 2676 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; 2677 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; 2678 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; 2679 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; 2680 myPipe.InterlaceEnable = v->Interlace[k]; 2681 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 2682 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 2683 myPipe.HTotal = v->HTotal[k]; 2684 myPipe.DCCEnable = v->DCCEnable[k]; 2685 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 2686 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; 2687 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 2688 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 2689 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 2690 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 2691 v->ErrorResult[k] = CalculatePrefetchSchedule( 2692 mode_lib, 2693 HostVMInefficiencyFactor, 2694 &myPipe, 2695 v->DSCDelay[k], 2696 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 2697 v->DPPCLKDelaySCL, 2698 v->DPPCLKDelaySCLLBOnly, 2699 v->DPPCLKDelayCNVCCursor, 2700 v->DISPCLKDelaySubtotal, 2701 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2702 v->OutputFormat[k], 2703 v->MaxInterDCNTileRepeaters, 2704 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2705 v->MaxVStartupLines[k], 2706 v->GPUVMMaxPageTableLevels, 2707 v->GPUVMEnable, 2708 v->HostVMEnable, 2709 v->HostVMMaxNonCachedPageTableLevels, 2710 v->HostVMMinPageSize, 2711 v->DynamicMetadataEnable[k], 2712 v->DynamicMetadataVMEnabled, 2713 v->DynamicMetadataLinesBeforeActiveRequired[k], 2714 v->DynamicMetadataTransmittedBytes[k], 2715 v->UrgentLatency, 2716 v->UrgentExtraLatency, 2717 v->TCalc, 2718 v->PDEAndMetaPTEBytesFrame[k], 2719 v->MetaRowByte[k], 2720 v->PixelPTEBytesPerRow[k], 2721 v->PrefetchSourceLinesY[k], 2722 v->SwathWidthY[k], 2723 v->VInitPreFillY[k], 2724 v->MaxNumSwathY[k], 2725 v->PrefetchSourceLinesC[k], 2726 v->SwathWidthC[k], 2727 v->VInitPreFillC[k], 2728 v->MaxNumSwathC[k], 2729 v->swath_width_luma_ub[k], 2730 v->swath_width_chroma_ub[k], 2731 v->SwathHeightY[k], 2732 v->SwathHeightC[k], 2733 TWait, 2734 &v->DSTXAfterScaler[k], 2735 &v->DSTYAfterScaler[k], 2736 &v->DestinationLinesForPrefetch[k], 2737 &v->PrefetchBandwidth[k], 2738 &v->DestinationLinesToRequestVMInVBlank[k], 2739 &v->DestinationLinesToRequestRowInVBlank[k], 2740 &v->VRatioPrefetchY[k], 2741 &v->VRatioPrefetchC[k], 2742 &v->RequiredPrefetchPixDataBWLuma[k], 2743 &v->RequiredPrefetchPixDataBWChroma[k], 2744 &v->NotEnoughTimeForDynamicMetadata[k], 2745 &v->Tno_bw[k], 2746 &v->prefetch_vmrow_bw[k], 2747 &v->Tdmdl_vm[k], 2748 &v->Tdmdl[k], 2749 &v->TSetup[k], 2750 &v->VUpdateOffsetPix[k], 2751 &v->VUpdateWidthPix[k], 2752 &v->VReadyOffsetPix[k]); 2753 2754 #ifdef __DML_VBA_DEBUG__ 2755 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); 2756 #endif 2757 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); 2758 } 2759 2760 v->NoEnoughUrgentLatencyHiding = false; 2761 v->NoEnoughUrgentLatencyHidingPre = false; 2762 2763 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2764 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2765 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 2766 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2767 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; 2768 2769 CalculateUrgentBurstFactor( 2770 v->swath_width_luma_ub[k], 2771 v->swath_width_chroma_ub[k], 2772 v->SwathHeightY[k], 2773 v->SwathHeightC[k], 2774 v->HTotal[k] / v->PixelClock[k], 2775 v->UrgentLatency, 2776 v->CursorBufferSize, 2777 v->CursorWidth[k][0], 2778 v->CursorBPP[k][0], 2779 v->VRatio[k], 2780 v->VRatioChroma[k], 2781 v->BytePerPixelDETY[k], 2782 v->BytePerPixelDETC[k], 2783 v->DETBufferSizeY[k], 2784 v->DETBufferSizeC[k], 2785 &v->UrgBurstFactorCursor[k], 2786 &v->UrgBurstFactorLuma[k], 2787 &v->UrgBurstFactorChroma[k], 2788 &v->NoUrgentLatencyHiding[k]); 2789 2790 CalculateUrgentBurstFactor( 2791 v->swath_width_luma_ub[k], 2792 v->swath_width_chroma_ub[k], 2793 v->SwathHeightY[k], 2794 v->SwathHeightC[k], 2795 v->HTotal[k] / v->PixelClock[k], 2796 v->UrgentLatency, 2797 v->CursorBufferSize, 2798 v->CursorWidth[k][0], 2799 v->CursorBPP[k][0], 2800 v->VRatioPrefetchY[k], 2801 v->VRatioPrefetchC[k], 2802 v->BytePerPixelDETY[k], 2803 v->BytePerPixelDETC[k], 2804 v->DETBufferSizeY[k], 2805 v->DETBufferSizeC[k], 2806 &v->UrgBurstFactorCursorPre[k], 2807 &v->UrgBurstFactorLumaPre[k], 2808 &v->UrgBurstFactorChromaPre[k], 2809 &v->NoUrgentLatencyHidingPre[k]); 2810 2811 MaxTotalRDBandwidth = MaxTotalRDBandwidth 2812 + dml_max3( 2813 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2814 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2815 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2816 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] 2817 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2818 v->DPPPerPlane[k] 2819 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2820 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2821 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2822 2823 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst 2824 + dml_max3( 2825 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2826 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] 2827 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2828 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) 2829 + v->cursor_bw_pre[k]); 2830 2831 #ifdef __DML_VBA_DEBUG__ 2832 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); 2833 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); 2834 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); 2835 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); 2836 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); 2837 2838 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); 2839 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); 2840 2841 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); 2842 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); 2843 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); 2844 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); 2845 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); 2846 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); 2847 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); 2848 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); 2849 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); 2850 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); 2851 #endif 2852 2853 if (v->DestinationLinesForPrefetch[k] < 2) 2854 DestinationLineTimesForPrefetchLessThan2 = true; 2855 2856 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) 2857 VRatioPrefetchMoreThan4 = true; 2858 2859 if (v->NoUrgentLatencyHiding[k] == true) 2860 v->NoEnoughUrgentLatencyHiding = true; 2861 2862 if (v->NoUrgentLatencyHidingPre[k] == true) 2863 v->NoEnoughUrgentLatencyHidingPre = true; 2864 } 2865 2866 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; 2867 2868 #ifdef __DML_VBA_DEBUG__ 2869 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f \n", __func__, MaxTotalRDBandwidthNoUrgentBurst); 2870 dml_print("DML::%s: ReturnBW=%f \n", __func__, v->ReturnBW); 2871 dml_print("DML::%s: FractionOfUrgentBandwidth=%f \n", __func__, v->FractionOfUrgentBandwidth); 2872 #endif 2873 2874 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 2875 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) 2876 v->PrefetchModeSupported = true; 2877 else { 2878 v->PrefetchModeSupported = false; 2879 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); 2880 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); 2881 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); 2882 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); 2883 } 2884 2885 // PREVIOUS_ERROR 2886 // This error result check was done after the PrefetchModeSupported. So we will 2887 // still try to calculate flip schedule even prefetch mode not supported 2888 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2889 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { 2890 v->PrefetchModeSupported = false; 2891 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); 2892 } 2893 } 2894 2895 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { 2896 v->BandwidthAvailableForImmediateFlip = v->ReturnBW; 2897 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2898 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 2899 - dml_max( 2900 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2901 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2902 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2903 v->DPPPerPlane[k] 2904 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2905 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2906 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2907 } 2908 2909 v->TotImmediateFlipBytes = 0; 2910 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2911 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 2912 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); 2913 } 2914 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2915 CalculateFlipSchedule( 2916 mode_lib, 2917 HostVMInefficiencyFactor, 2918 v->UrgentExtraLatency, 2919 v->UrgentLatency, 2920 v->GPUVMMaxPageTableLevels, 2921 v->HostVMEnable, 2922 v->HostVMMaxNonCachedPageTableLevels, 2923 v->GPUVMEnable, 2924 v->HostVMMinPageSize, 2925 v->PDEAndMetaPTEBytesFrame[k], 2926 v->MetaRowByte[k], 2927 v->PixelPTEBytesPerRow[k], 2928 v->BandwidthAvailableForImmediateFlip, 2929 v->TotImmediateFlipBytes, 2930 v->SourcePixelFormat[k], 2931 v->HTotal[k] / v->PixelClock[k], 2932 v->VRatio[k], 2933 v->VRatioChroma[k], 2934 v->Tno_bw[k], 2935 v->DCCEnable[k], 2936 v->dpte_row_height[k], 2937 v->meta_row_height[k], 2938 v->dpte_row_height_chroma[k], 2939 v->meta_row_height_chroma[k], 2940 &v->DestinationLinesToRequestVMInImmediateFlip[k], 2941 &v->DestinationLinesToRequestRowInImmediateFlip[k], 2942 &v->final_flip_bw[k], 2943 &v->ImmediateFlipSupportedForPipe[k]); 2944 } 2945 2946 v->total_dcn_read_bw_with_flip = 0.0; 2947 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2948 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2949 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 2950 + dml_max3( 2951 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2952 v->DPPPerPlane[k] * v->final_flip_bw[k] 2953 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] 2954 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] 2955 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2956 v->DPPPerPlane[k] 2957 * (v->final_flip_bw[k] 2958 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2959 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2960 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2961 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst 2962 + dml_max3( 2963 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2964 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] 2965 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], 2966 v->DPPPerPlane[k] 2967 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] 2968 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); 2969 } 2970 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; 2971 2972 v->ImmediateFlipSupported = true; 2973 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { 2974 #ifdef __DML_VBA_DEBUG__ 2975 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); 2976 #endif 2977 v->ImmediateFlipSupported = false; 2978 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; 2979 } 2980 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2981 if (v->ImmediateFlipSupportedForPipe[k] == false) { 2982 #ifdef __DML_VBA_DEBUG__ 2983 dml_print("DML::%s: Pipe %0d not supporting iflip\n", 2984 __func__, k); 2985 #endif 2986 v->ImmediateFlipSupported = false; 2987 } 2988 } 2989 } else { 2990 v->ImmediateFlipSupported = false; 2991 } 2992 2993 v->PrefetchAndImmediateFlipSupported = 2994 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable 2995 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || 2996 v->ImmediateFlipSupported)) ? true : false; 2997 #ifdef __DML_VBA_DEBUG__ 2998 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); 2999 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required); 3000 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); 3001 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); 3002 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); 3003 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); 3004 #endif 3005 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); 3006 3007 v->VStartupLines = v->VStartupLines + 1; 3008 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); 3009 ASSERT(v->PrefetchAndImmediateFlipSupported); 3010 3011 // Unbounded Request Enabled 3012 CalculateUnboundedRequestAndCompressedBufferSize( 3013 v->DETBufferSizeInKByte[0], 3014 v->ConfigReturnBufferSizeInKByte, 3015 v->UseUnboundedRequesting, 3016 v->TotalActiveDPP, 3017 NoChromaPlanes, 3018 v->MaxNumDPP, 3019 v->CompressedBufferSegmentSizeInkByte, 3020 v->Output, 3021 &v->UnboundedRequestEnabled, 3022 &v->CompressedBufferSizeInkByte); 3023 3024 //Watermarks and NB P-State/DRAM Clock Change Support 3025 { 3026 enum clock_change_support DRAMClockChangeSupport; // dummy 3027 CalculateWatermarksAndDRAMSpeedChangeSupport( 3028 mode_lib, 3029 PrefetchMode, 3030 v->NumberOfActivePlanes, 3031 v->MaxLineBufferLines, 3032 v->LineBufferSize, 3033 v->WritebackInterfaceBufferSize, 3034 v->DCFCLK, 3035 v->ReturnBW, 3036 v->SynchronizedVBlank, 3037 v->dpte_group_bytes, 3038 v->MetaChunkSize, 3039 v->UrgentLatency, 3040 v->UrgentExtraLatency, 3041 v->WritebackLatency, 3042 v->WritebackChunkSize, 3043 v->SOCCLK, 3044 v->DRAMClockChangeLatency, 3045 v->SRExitTime, 3046 v->SREnterPlusExitTime, 3047 v->SRExitZ8Time, 3048 v->SREnterPlusExitZ8Time, 3049 v->DCFCLKDeepSleep, 3050 v->DETBufferSizeY, 3051 v->DETBufferSizeC, 3052 v->SwathHeightY, 3053 v->SwathHeightC, 3054 v->LBBitPerPixel, 3055 v->SwathWidthY, 3056 v->SwathWidthC, 3057 v->HRatio, 3058 v->HRatioChroma, 3059 v->vtaps, 3060 v->VTAPsChroma, 3061 v->VRatio, 3062 v->VRatioChroma, 3063 v->HTotal, 3064 v->PixelClock, 3065 v->BlendingAndTiming, 3066 v->DPPPerPlane, 3067 v->BytePerPixelDETY, 3068 v->BytePerPixelDETC, 3069 v->DSTXAfterScaler, 3070 v->DSTYAfterScaler, 3071 v->WritebackEnable, 3072 v->WritebackPixelFormat, 3073 v->WritebackDestinationWidth, 3074 v->WritebackDestinationHeight, 3075 v->WritebackSourceHeight, 3076 v->UnboundedRequestEnabled, 3077 v->CompressedBufferSizeInkByte, 3078 &DRAMClockChangeSupport, 3079 &v->UrgentWatermark, 3080 &v->WritebackUrgentWatermark, 3081 &v->DRAMClockChangeWatermark, 3082 &v->WritebackDRAMClockChangeWatermark, 3083 &v->StutterExitWatermark, 3084 &v->StutterEnterPlusExitWatermark, 3085 &v->Z8StutterExitWatermark, 3086 &v->Z8StutterEnterPlusExitWatermark, 3087 &v->MinActiveDRAMClockChangeLatencySupported); 3088 3089 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3090 if (v->WritebackEnable[k] == true) { 3091 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 3092 0, 3093 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); 3094 } else { 3095 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; 3096 } 3097 } 3098 } 3099 3100 //Display Pipeline Delivery Time in Prefetch, Groups 3101 CalculatePixelDeliveryTimes( 3102 v->NumberOfActivePlanes, 3103 v->VRatio, 3104 v->VRatioChroma, 3105 v->VRatioPrefetchY, 3106 v->VRatioPrefetchC, 3107 v->swath_width_luma_ub, 3108 v->swath_width_chroma_ub, 3109 v->DPPPerPlane, 3110 v->HRatio, 3111 v->HRatioChroma, 3112 v->PixelClock, 3113 v->PSCL_THROUGHPUT_LUMA, 3114 v->PSCL_THROUGHPUT_CHROMA, 3115 v->DPPCLK, 3116 v->BytePerPixelC, 3117 v->SourceScan, 3118 v->NumberOfCursors, 3119 v->CursorWidth, 3120 v->CursorBPP, 3121 v->BlockWidth256BytesY, 3122 v->BlockHeight256BytesY, 3123 v->BlockWidth256BytesC, 3124 v->BlockHeight256BytesC, 3125 v->DisplayPipeLineDeliveryTimeLuma, 3126 v->DisplayPipeLineDeliveryTimeChroma, 3127 v->DisplayPipeLineDeliveryTimeLumaPrefetch, 3128 v->DisplayPipeLineDeliveryTimeChromaPrefetch, 3129 v->DisplayPipeRequestDeliveryTimeLuma, 3130 v->DisplayPipeRequestDeliveryTimeChroma, 3131 v->DisplayPipeRequestDeliveryTimeLumaPrefetch, 3132 v->DisplayPipeRequestDeliveryTimeChromaPrefetch, 3133 v->CursorRequestDeliveryTime, 3134 v->CursorRequestDeliveryTimePrefetch); 3135 3136 CalculateMetaAndPTETimes( 3137 v->NumberOfActivePlanes, 3138 v->GPUVMEnable, 3139 v->MetaChunkSize, 3140 v->MinMetaChunkSizeBytes, 3141 v->HTotal, 3142 v->VRatio, 3143 v->VRatioChroma, 3144 v->DestinationLinesToRequestRowInVBlank, 3145 v->DestinationLinesToRequestRowInImmediateFlip, 3146 v->DCCEnable, 3147 v->PixelClock, 3148 v->BytePerPixelY, 3149 v->BytePerPixelC, 3150 v->SourceScan, 3151 v->dpte_row_height, 3152 v->dpte_row_height_chroma, 3153 v->meta_row_width, 3154 v->meta_row_width_chroma, 3155 v->meta_row_height, 3156 v->meta_row_height_chroma, 3157 v->meta_req_width, 3158 v->meta_req_width_chroma, 3159 v->meta_req_height, 3160 v->meta_req_height_chroma, 3161 v->dpte_group_bytes, 3162 v->PTERequestSizeY, 3163 v->PTERequestSizeC, 3164 v->PixelPTEReqWidthY, 3165 v->PixelPTEReqHeightY, 3166 v->PixelPTEReqWidthC, 3167 v->PixelPTEReqHeightC, 3168 v->dpte_row_width_luma_ub, 3169 v->dpte_row_width_chroma_ub, 3170 v->DST_Y_PER_PTE_ROW_NOM_L, 3171 v->DST_Y_PER_PTE_ROW_NOM_C, 3172 v->DST_Y_PER_META_ROW_NOM_L, 3173 v->DST_Y_PER_META_ROW_NOM_C, 3174 v->TimePerMetaChunkNominal, 3175 v->TimePerChromaMetaChunkNominal, 3176 v->TimePerMetaChunkVBlank, 3177 v->TimePerChromaMetaChunkVBlank, 3178 v->TimePerMetaChunkFlip, 3179 v->TimePerChromaMetaChunkFlip, 3180 v->time_per_pte_group_nom_luma, 3181 v->time_per_pte_group_vblank_luma, 3182 v->time_per_pte_group_flip_luma, 3183 v->time_per_pte_group_nom_chroma, 3184 v->time_per_pte_group_vblank_chroma, 3185 v->time_per_pte_group_flip_chroma); 3186 3187 CalculateVMGroupAndRequestTimes( 3188 v->NumberOfActivePlanes, 3189 v->GPUVMEnable, 3190 v->GPUVMMaxPageTableLevels, 3191 v->HTotal, 3192 v->BytePerPixelC, 3193 v->DestinationLinesToRequestVMInVBlank, 3194 v->DestinationLinesToRequestVMInImmediateFlip, 3195 v->DCCEnable, 3196 v->PixelClock, 3197 v->dpte_row_width_luma_ub, 3198 v->dpte_row_width_chroma_ub, 3199 v->vm_group_bytes, 3200 v->dpde0_bytes_per_frame_ub_l, 3201 v->dpde0_bytes_per_frame_ub_c, 3202 v->meta_pte_bytes_per_frame_ub_l, 3203 v->meta_pte_bytes_per_frame_ub_c, 3204 v->TimePerVMGroupVBlank, 3205 v->TimePerVMGroupFlip, 3206 v->TimePerVMRequestVBlank, 3207 v->TimePerVMRequestFlip); 3208 3209 // Min TTUVBlank 3210 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3211 if (PrefetchMode == 0) { 3212 v->AllowDRAMClockChangeDuringVBlank[k] = true; 3213 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3214 v->MinTTUVBlank[k] = dml_max( 3215 v->DRAMClockChangeWatermark, 3216 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); 3217 } else if (PrefetchMode == 1) { 3218 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3219 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3220 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); 3221 } else { 3222 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3223 v->AllowDRAMSelfRefreshDuringVBlank[k] = false; 3224 v->MinTTUVBlank[k] = v->UrgentWatermark; 3225 } 3226 if (!v->DynamicMetadataEnable[k]) 3227 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; 3228 } 3229 3230 // DCC Configuration 3231 v->ActiveDPPs = 0; 3232 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3233 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 3234 v->SourcePixelFormat[k], 3235 v->SurfaceWidthY[k], 3236 v->SurfaceWidthC[k], 3237 v->SurfaceHeightY[k], 3238 v->SurfaceHeightC[k], 3239 v->DETBufferSizeInKByte[0] * 1024, 3240 v->BlockHeight256BytesY[k], 3241 v->BlockHeight256BytesC[k], 3242 v->SurfaceTiling[k], 3243 v->BytePerPixelY[k], 3244 v->BytePerPixelC[k], 3245 v->BytePerPixelDETY[k], 3246 v->BytePerPixelDETC[k], 3247 v->SourceScan[k], 3248 &v->DCCYMaxUncompressedBlock[k], 3249 &v->DCCCMaxUncompressedBlock[k], 3250 &v->DCCYMaxCompressedBlock[k], 3251 &v->DCCCMaxCompressedBlock[k], 3252 &v->DCCYIndependentBlock[k], 3253 &v->DCCCIndependentBlock[k]); 3254 } 3255 3256 // VStartup Adjustment 3257 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3258 bool isInterlaceTiming; 3259 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; 3260 #ifdef __DML_VBA_DEBUG__ 3261 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); 3262 #endif 3263 3264 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; 3265 3266 #ifdef __DML_VBA_DEBUG__ 3267 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); 3268 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 3269 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3270 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); 3271 #endif 3272 3273 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; 3274 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { 3275 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; 3276 } 3277 3278 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); 3279 3280 v->MIN_DST_Y_NEXT_START[k] = ((isInterlaceTiming ? dml_floor((v->VTotal[k] - v->VFrontPorch[k]) / 2.0, 1.0) : v->VTotal[k]) 3281 - v->VFrontPorch[k]) 3282 + dml_max(1.0, dml_ceil(v->WritebackDelay[v->VoltageLevel][k] / (v->HTotal[k] / v->PixelClock[k]), 1.0)) 3283 + dml_floor(4.0 * v->TSetup[k] / (v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; 3284 3285 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); 3286 3287 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) 3288 <= (isInterlaceTiming ? 3289 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : 3290 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { 3291 v->VREADY_AT_OR_AFTER_VSYNC[k] = true; 3292 } else { 3293 v->VREADY_AT_OR_AFTER_VSYNC[k] = false; 3294 } 3295 #ifdef __DML_VBA_DEBUG__ 3296 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); 3297 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); 3298 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); 3299 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); 3300 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); 3301 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); 3302 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); 3303 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); 3304 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3305 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); 3306 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); 3307 #endif 3308 } 3309 3310 { 3311 //Maximum Bandwidth Used 3312 double TotalWRBandwidth = 0; 3313 double MaxPerPlaneVActiveWRBandwidth = 0; 3314 double WRBandwidth = 0; 3315 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3316 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { 3317 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3318 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; 3319 } else if (v->WritebackEnable[k] == true) { 3320 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3321 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; 3322 } 3323 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; 3324 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); 3325 } 3326 3327 v->TotalDataReadBandwidth = 0; 3328 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3329 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; 3330 } 3331 } 3332 // Stutter Efficiency 3333 CalculateStutterEfficiency( 3334 mode_lib, 3335 v->CompressedBufferSizeInkByte, 3336 v->UnboundedRequestEnabled, 3337 v->ConfigReturnBufferSizeInKByte, 3338 v->MetaFIFOSizeInKEntries, 3339 v->ZeroSizeBufferEntries, 3340 v->NumberOfActivePlanes, 3341 v->ROBBufferSizeInKByte, 3342 v->TotalDataReadBandwidth, 3343 v->DCFCLK, 3344 v->ReturnBW, 3345 v->COMPBUF_RESERVED_SPACE_64B, 3346 v->COMPBUF_RESERVED_SPACE_ZS, 3347 v->SRExitTime, 3348 v->SRExitZ8Time, 3349 v->SynchronizedVBlank, 3350 v->StutterEnterPlusExitWatermark, 3351 v->Z8StutterEnterPlusExitWatermark, 3352 v->ProgressiveToInterlaceUnitInOPP, 3353 v->Interlace, 3354 v->MinTTUVBlank, 3355 v->DPPPerPlane, 3356 v->DETBufferSizeY, 3357 v->BytePerPixelY, 3358 v->BytePerPixelDETY, 3359 v->SwathWidthY, 3360 v->SwathHeightY, 3361 v->SwathHeightC, 3362 v->DCCRateLuma, 3363 v->DCCRateChroma, 3364 v->DCCFractionOfZeroSizeRequestsLuma, 3365 v->DCCFractionOfZeroSizeRequestsChroma, 3366 v->HTotal, 3367 v->VTotal, 3368 v->PixelClock, 3369 v->VRatio, 3370 v->SourceScan, 3371 v->BlockHeight256BytesY, 3372 v->BlockWidth256BytesY, 3373 v->BlockHeight256BytesC, 3374 v->BlockWidth256BytesC, 3375 v->DCCYMaxUncompressedBlock, 3376 v->DCCCMaxUncompressedBlock, 3377 v->VActive, 3378 v->DCCEnable, 3379 v->WritebackEnable, 3380 v->ReadBandwidthPlaneLuma, 3381 v->ReadBandwidthPlaneChroma, 3382 v->meta_row_bw, 3383 v->dpte_row_bw, 3384 &v->StutterEfficiencyNotIncludingVBlank, 3385 &v->StutterEfficiency, 3386 &v->NumberOfStutterBurstsPerFrame, 3387 &v->Z8StutterEfficiencyNotIncludingVBlank, 3388 &v->Z8StutterEfficiency, 3389 &v->Z8NumberOfStutterBurstsPerFrame, 3390 &v->StutterPeriod); 3391 } 3392 3393 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 3394 { 3395 struct vba_vars_st *v = &mode_lib->vba; 3396 // Display Pipe Configuration 3397 double BytePerPixDETY[DC__NUM_DPP__MAX]; 3398 double BytePerPixDETC[DC__NUM_DPP__MAX]; 3399 int BytePerPixY[DC__NUM_DPP__MAX]; 3400 int BytePerPixC[DC__NUM_DPP__MAX]; 3401 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; 3402 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; 3403 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; 3404 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; 3405 double dummy1[DC__NUM_DPP__MAX]; 3406 double dummy2[DC__NUM_DPP__MAX]; 3407 double dummy3[DC__NUM_DPP__MAX]; 3408 double dummy4[DC__NUM_DPP__MAX]; 3409 int dummy5[DC__NUM_DPP__MAX]; 3410 int dummy6[DC__NUM_DPP__MAX]; 3411 bool dummy7[DC__NUM_DPP__MAX]; 3412 bool dummysinglestring; 3413 3414 unsigned int k; 3415 3416 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3417 3418 CalculateBytePerPixelAnd256BBlockSizes( 3419 v->SourcePixelFormat[k], 3420 v->SurfaceTiling[k], 3421 &BytePerPixY[k], 3422 &BytePerPixC[k], 3423 &BytePerPixDETY[k], 3424 &BytePerPixDETC[k], 3425 &Read256BytesBlockHeightY[k], 3426 &Read256BytesBlockHeightC[k], 3427 &Read256BytesBlockWidthY[k], 3428 &Read256BytesBlockWidthC[k]); 3429 } 3430 3431 CalculateSwathAndDETConfiguration( 3432 false, 3433 v->NumberOfActivePlanes, 3434 v->DETBufferSizeInKByte[0], 3435 dummy1, 3436 dummy2, 3437 v->SourceScan, 3438 v->SourcePixelFormat, 3439 v->SurfaceTiling, 3440 v->ViewportWidth, 3441 v->ViewportHeight, 3442 v->SurfaceWidthY, 3443 v->SurfaceWidthC, 3444 v->SurfaceHeightY, 3445 v->SurfaceHeightC, 3446 Read256BytesBlockHeightY, 3447 Read256BytesBlockHeightC, 3448 Read256BytesBlockWidthY, 3449 Read256BytesBlockWidthC, 3450 v->ODMCombineEnabled, 3451 v->BlendingAndTiming, 3452 BytePerPixY, 3453 BytePerPixC, 3454 BytePerPixDETY, 3455 BytePerPixDETC, 3456 v->HActive, 3457 v->HRatio, 3458 v->HRatioChroma, 3459 v->DPPPerPlane, 3460 dummy5, 3461 dummy6, 3462 dummy3, 3463 dummy4, 3464 v->SwathHeightY, 3465 v->SwathHeightC, 3466 v->DETBufferSizeY, 3467 v->DETBufferSizeC, 3468 dummy7, 3469 &dummysinglestring); 3470 } 3471 3472 static bool CalculateBytePerPixelAnd256BBlockSizes( 3473 enum source_format_class SourcePixelFormat, 3474 enum dm_swizzle_mode SurfaceTiling, 3475 unsigned int *BytePerPixelY, 3476 unsigned int *BytePerPixelC, 3477 double *BytePerPixelDETY, 3478 double *BytePerPixelDETC, 3479 unsigned int *BlockHeight256BytesY, 3480 unsigned int *BlockHeight256BytesC, 3481 unsigned int *BlockWidth256BytesY, 3482 unsigned int *BlockWidth256BytesC) 3483 { 3484 if (SourcePixelFormat == dm_444_64) { 3485 *BytePerPixelDETY = 8; 3486 *BytePerPixelDETC = 0; 3487 *BytePerPixelY = 8; 3488 *BytePerPixelC = 0; 3489 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 3490 *BytePerPixelDETY = 4; 3491 *BytePerPixelDETC = 0; 3492 *BytePerPixelY = 4; 3493 *BytePerPixelC = 0; 3494 } else if (SourcePixelFormat == dm_444_16) { 3495 *BytePerPixelDETY = 2; 3496 *BytePerPixelDETC = 0; 3497 *BytePerPixelY = 2; 3498 *BytePerPixelC = 0; 3499 } else if (SourcePixelFormat == dm_444_8) { 3500 *BytePerPixelDETY = 1; 3501 *BytePerPixelDETC = 0; 3502 *BytePerPixelY = 1; 3503 *BytePerPixelC = 0; 3504 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3505 *BytePerPixelDETY = 4; 3506 *BytePerPixelDETC = 1; 3507 *BytePerPixelY = 4; 3508 *BytePerPixelC = 1; 3509 } else if (SourcePixelFormat == dm_420_8) { 3510 *BytePerPixelDETY = 1; 3511 *BytePerPixelDETC = 2; 3512 *BytePerPixelY = 1; 3513 *BytePerPixelC = 2; 3514 } else if (SourcePixelFormat == dm_420_12) { 3515 *BytePerPixelDETY = 2; 3516 *BytePerPixelDETC = 4; 3517 *BytePerPixelY = 2; 3518 *BytePerPixelC = 4; 3519 } else { 3520 *BytePerPixelDETY = 4.0 / 3; 3521 *BytePerPixelDETC = 8.0 / 3; 3522 *BytePerPixelY = 2; 3523 *BytePerPixelC = 4; 3524 } 3525 3526 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16 3527 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) { 3528 if (SurfaceTiling == dm_sw_linear) { 3529 *BlockHeight256BytesY = 1; 3530 } else if (SourcePixelFormat == dm_444_64) { 3531 *BlockHeight256BytesY = 4; 3532 } else if (SourcePixelFormat == dm_444_8) { 3533 *BlockHeight256BytesY = 16; 3534 } else { 3535 *BlockHeight256BytesY = 8; 3536 } 3537 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3538 *BlockHeight256BytesC = 0; 3539 *BlockWidth256BytesC = 0; 3540 } else { 3541 if (SurfaceTiling == dm_sw_linear) { 3542 *BlockHeight256BytesY = 1; 3543 *BlockHeight256BytesC = 1; 3544 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3545 *BlockHeight256BytesY = 8; 3546 *BlockHeight256BytesC = 16; 3547 } else if (SourcePixelFormat == dm_420_8) { 3548 *BlockHeight256BytesY = 16; 3549 *BlockHeight256BytesC = 8; 3550 } else { 3551 *BlockHeight256BytesY = 8; 3552 *BlockHeight256BytesC = 8; 3553 } 3554 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3555 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 3556 } 3557 return true; 3558 } 3559 3560 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) 3561 { 3562 if (PrefetchMode == 0) { 3563 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); 3564 } else if (PrefetchMode == 1) { 3565 return dml_max(SREnterPlusExitTime, UrgentLatency); 3566 } else { 3567 return UrgentLatency; 3568 } 3569 } 3570 3571 double dml31_CalculateWriteBackDISPCLK( 3572 enum source_format_class WritebackPixelFormat, 3573 double PixelClock, 3574 double WritebackHRatio, 3575 double WritebackVRatio, 3576 unsigned int WritebackHTaps, 3577 unsigned int WritebackVTaps, 3578 long WritebackSourceWidth, 3579 long WritebackDestinationWidth, 3580 unsigned int HTotal, 3581 unsigned int WritebackLineBufferSize) 3582 { 3583 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 3584 3585 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 3586 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 3587 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 3588 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 3589 } 3590 3591 static double CalculateWriteBackDelay( 3592 enum source_format_class WritebackPixelFormat, 3593 double WritebackHRatio, 3594 double WritebackVRatio, 3595 unsigned int WritebackVTaps, 3596 int WritebackDestinationWidth, 3597 int WritebackDestinationHeight, 3598 int WritebackSourceHeight, 3599 unsigned int HTotal) 3600 { 3601 double CalculateWriteBackDelay; 3602 double Line_length; 3603 double Output_lines_last_notclamped; 3604 double WritebackVInit; 3605 3606 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3607 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); 3608 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); 3609 if (Output_lines_last_notclamped < 0) { 3610 CalculateWriteBackDelay = 0; 3611 } else { 3612 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3613 } 3614 return CalculateWriteBackDelay; 3615 } 3616 3617 static void CalculateVupdateAndDynamicMetadataParameters( 3618 int MaxInterDCNTileRepeaters, 3619 double DPPCLK, 3620 double DISPCLK, 3621 double DCFClkDeepSleep, 3622 double PixelClock, 3623 int HTotal, 3624 int VBlank, 3625 int DynamicMetadataTransmittedBytes, 3626 int DynamicMetadataLinesBeforeActiveRequired, 3627 int InterlaceEnable, 3628 bool ProgressiveToInterlaceUnitInOPP, 3629 double *TSetup, 3630 double *Tdmbf, 3631 double *Tdmec, 3632 double *Tdmsks, 3633 int *VUpdateOffsetPix, 3634 double *VUpdateWidthPix, 3635 double *VReadyOffsetPix) 3636 { 3637 double TotalRepeaterDelayTime; 3638 3639 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); 3640 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); 3641 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); 3642 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); 3643 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3644 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; 3645 *Tdmec = HTotal / PixelClock; 3646 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 3647 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3648 } else { 3649 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3650 } 3651 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 3652 *Tdmsks = *Tdmsks / 2; 3653 } 3654 #ifdef __DML_VBA_DEBUG__ 3655 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3656 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3657 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3658 #endif 3659 } 3660 3661 static void CalculateRowBandwidth( 3662 bool GPUVMEnable, 3663 enum source_format_class SourcePixelFormat, 3664 double VRatio, 3665 double VRatioChroma, 3666 bool DCCEnable, 3667 double LineTime, 3668 unsigned int MetaRowByteLuma, 3669 unsigned int MetaRowByteChroma, 3670 unsigned int meta_row_height_luma, 3671 unsigned int meta_row_height_chroma, 3672 unsigned int PixelPTEBytesPerRowLuma, 3673 unsigned int PixelPTEBytesPerRowChroma, 3674 unsigned int dpte_row_height_luma, 3675 unsigned int dpte_row_height_chroma, 3676 double *meta_row_bw, 3677 double *dpte_row_bw) 3678 { 3679 if (DCCEnable != true) { 3680 *meta_row_bw = 0; 3681 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3682 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); 3683 } else { 3684 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3685 } 3686 3687 if (GPUVMEnable != true) { 3688 *dpte_row_bw = 0; 3689 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3690 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3691 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 3692 } else { 3693 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3694 } 3695 } 3696 3697 static void CalculateFlipSchedule( 3698 struct display_mode_lib *mode_lib, 3699 double HostVMInefficiencyFactor, 3700 double UrgentExtraLatency, 3701 double UrgentLatency, 3702 unsigned int GPUVMMaxPageTableLevels, 3703 bool HostVMEnable, 3704 unsigned int HostVMMaxNonCachedPageTableLevels, 3705 bool GPUVMEnable, 3706 double HostVMMinPageSize, 3707 double PDEAndMetaPTEBytesPerFrame, 3708 double MetaRowBytes, 3709 double DPTEBytesPerRow, 3710 double BandwidthAvailableForImmediateFlip, 3711 unsigned int TotImmediateFlipBytes, 3712 enum source_format_class SourcePixelFormat, 3713 double LineTime, 3714 double VRatio, 3715 double VRatioChroma, 3716 double Tno_bw, 3717 bool DCCEnable, 3718 unsigned int dpte_row_height, 3719 unsigned int meta_row_height, 3720 unsigned int dpte_row_height_chroma, 3721 unsigned int meta_row_height_chroma, 3722 double *DestinationLinesToRequestVMInImmediateFlip, 3723 double *DestinationLinesToRequestRowInImmediateFlip, 3724 double *final_flip_bw, 3725 bool *ImmediateFlipSupportedForPipe) 3726 { 3727 double min_row_time = 0.0; 3728 unsigned int HostVMDynamicLevelsTrips; 3729 double TimeForFetchingMetaPTEImmediateFlip; 3730 double TimeForFetchingRowInVBlankImmediateFlip; 3731 double ImmediateFlipBW; 3732 3733 if (GPUVMEnable == true && HostVMEnable == true) { 3734 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 3735 } else { 3736 HostVMDynamicLevelsTrips = 0; 3737 } 3738 3739 if (GPUVMEnable == true || DCCEnable == true) { 3740 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 3741 } 3742 3743 if (GPUVMEnable == true) { 3744 TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3745 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3746 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 3747 LineTime / 4.0); 3748 } else { 3749 TimeForFetchingMetaPTEImmediateFlip = 0; 3750 } 3751 3752 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3753 if ((GPUVMEnable == true || DCCEnable == true)) { 3754 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 3755 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 3756 UrgentLatency * (HostVMDynamicLevelsTrips + 1), 3757 LineTime / 4); 3758 } else { 3759 TimeForFetchingRowInVBlankImmediateFlip = 0; 3760 } 3761 3762 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3763 3764 if (GPUVMEnable == true) { 3765 *final_flip_bw = dml_max( 3766 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 3767 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 3768 } else if ((GPUVMEnable == true || DCCEnable == true)) { 3769 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 3770 } else { 3771 *final_flip_bw = 0; 3772 } 3773 3774 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 3775 if (GPUVMEnable == true && DCCEnable != true) { 3776 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 3777 } else if (GPUVMEnable != true && DCCEnable == true) { 3778 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 3779 } else { 3780 min_row_time = dml_min4( 3781 dpte_row_height * LineTime / VRatio, 3782 meta_row_height * LineTime / VRatio, 3783 dpte_row_height_chroma * LineTime / VRatioChroma, 3784 meta_row_height_chroma * LineTime / VRatioChroma); 3785 } 3786 } else { 3787 if (GPUVMEnable == true && DCCEnable != true) { 3788 min_row_time = dpte_row_height * LineTime / VRatio; 3789 } else if (GPUVMEnable != true && DCCEnable == true) { 3790 min_row_time = meta_row_height * LineTime / VRatio; 3791 } else { 3792 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 3793 } 3794 } 3795 3796 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 3797 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3798 *ImmediateFlipSupportedForPipe = false; 3799 } else { 3800 *ImmediateFlipSupportedForPipe = true; 3801 } 3802 3803 #ifdef __DML_VBA_DEBUG__ 3804 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip); 3805 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip); 3806 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 3807 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); 3808 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 3809 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 3810 #endif 3811 3812 } 3813 3814 static double TruncToValidBPP( 3815 double LinkBitRate, 3816 int Lanes, 3817 int HTotal, 3818 int HActive, 3819 double PixelClock, 3820 double DesiredBPP, 3821 bool DSCEnable, 3822 enum output_encoder_class Output, 3823 enum output_format_class Format, 3824 unsigned int DSCInputBitPerComponent, 3825 int DSCSlices, 3826 int AudioRate, 3827 int AudioLayout, 3828 enum odm_combine_mode ODMCombine) 3829 { 3830 double MaxLinkBPP; 3831 int MinDSCBPP; 3832 double MaxDSCBPP; 3833 int NonDSCBPP0; 3834 int NonDSCBPP1; 3835 int NonDSCBPP2; 3836 3837 if (Format == dm_420) { 3838 NonDSCBPP0 = 12; 3839 NonDSCBPP1 = 15; 3840 NonDSCBPP2 = 18; 3841 MinDSCBPP = 6; 3842 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 3843 } else if (Format == dm_444) { 3844 NonDSCBPP0 = 24; 3845 NonDSCBPP1 = 30; 3846 NonDSCBPP2 = 36; 3847 MinDSCBPP = 8; 3848 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 3849 } else { 3850 3851 NonDSCBPP0 = 16; 3852 NonDSCBPP1 = 20; 3853 NonDSCBPP2 = 24; 3854 3855 if (Format == dm_n422) { 3856 MinDSCBPP = 7; 3857 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3858 } else { 3859 MinDSCBPP = 8; 3860 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3861 } 3862 } 3863 3864 if (DSCEnable && Output == dm_dp) { 3865 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 3866 } else { 3867 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 3868 } 3869 3870 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { 3871 MaxLinkBPP = 16; 3872 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { 3873 MaxLinkBPP = 32; 3874 } 3875 3876 if (DesiredBPP == 0) { 3877 if (DSCEnable) { 3878 if (MaxLinkBPP < MinDSCBPP) { 3879 return BPP_INVALID; 3880 } else if (MaxLinkBPP >= MaxDSCBPP) { 3881 return MaxDSCBPP; 3882 } else { 3883 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 3884 } 3885 } else { 3886 if (MaxLinkBPP >= NonDSCBPP2) { 3887 return NonDSCBPP2; 3888 } else if (MaxLinkBPP >= NonDSCBPP1) { 3889 return NonDSCBPP1; 3890 } else if (MaxLinkBPP >= NonDSCBPP0) { 3891 return 16.0; 3892 } else { 3893 return BPP_INVALID; 3894 } 3895 } 3896 } else { 3897 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) 3898 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 3899 return BPP_INVALID; 3900 } else { 3901 return DesiredBPP; 3902 } 3903 } 3904 return BPP_INVALID; 3905 } 3906 3907 static noinline void CalculatePrefetchSchedulePerPlane( 3908 struct display_mode_lib *mode_lib, 3909 double HostVMInefficiencyFactor, 3910 int i, 3911 unsigned j, 3912 unsigned k) 3913 { 3914 struct vba_vars_st *v = &mode_lib->vba; 3915 Pipe myPipe; 3916 3917 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; 3918 myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; 3919 myPipe.PixelClock = v->PixelClock[k]; 3920 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; 3921 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; 3922 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 3923 myPipe.VRatio = mode_lib->vba.VRatio[k]; 3924 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; 3925 3926 myPipe.SourceScan = v->SourceScan[k]; 3927 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; 3928 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; 3929 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; 3930 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; 3931 myPipe.InterlaceEnable = v->Interlace[k]; 3932 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 3933 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 3934 myPipe.HTotal = v->HTotal[k]; 3935 myPipe.DCCEnable = v->DCCEnable[k]; 3936 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 3937 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; 3938 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 3939 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 3940 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 3941 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 3942 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 3943 mode_lib, 3944 HostVMInefficiencyFactor, 3945 &myPipe, 3946 v->DSCDelayPerState[i][k], 3947 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 3948 v->DPPCLKDelaySCL, 3949 v->DPPCLKDelaySCLLBOnly, 3950 v->DPPCLKDelayCNVCCursor, 3951 v->DISPCLKDelaySubtotal, 3952 v->SwathWidthYThisState[k] / v->HRatio[k], 3953 v->OutputFormat[k], 3954 v->MaxInterDCNTileRepeaters, 3955 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 3956 v->MaximumVStartup[i][j][k], 3957 v->GPUVMMaxPageTableLevels, 3958 v->GPUVMEnable, 3959 v->HostVMEnable, 3960 v->HostVMMaxNonCachedPageTableLevels, 3961 v->HostVMMinPageSize, 3962 v->DynamicMetadataEnable[k], 3963 v->DynamicMetadataVMEnabled, 3964 v->DynamicMetadataLinesBeforeActiveRequired[k], 3965 v->DynamicMetadataTransmittedBytes[k], 3966 v->UrgLatency[i], 3967 v->ExtraLatency, 3968 v->TimeCalc, 3969 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 3970 v->MetaRowBytes[i][j][k], 3971 v->DPTEBytesPerRow[i][j][k], 3972 v->PrefetchLinesY[i][j][k], 3973 v->SwathWidthYThisState[k], 3974 v->PrefillY[k], 3975 v->MaxNumSwY[k], 3976 v->PrefetchLinesC[i][j][k], 3977 v->SwathWidthCThisState[k], 3978 v->PrefillC[k], 3979 v->MaxNumSwC[k], 3980 v->swath_width_luma_ub_this_state[k], 3981 v->swath_width_chroma_ub_this_state[k], 3982 v->SwathHeightYThisState[k], 3983 v->SwathHeightCThisState[k], 3984 v->TWait, 3985 &v->DSTXAfterScaler[k], 3986 &v->DSTYAfterScaler[k], 3987 &v->LineTimesForPrefetch[k], 3988 &v->PrefetchBW[k], 3989 &v->LinesForMetaPTE[k], 3990 &v->LinesForMetaAndDPTERow[k], 3991 &v->VRatioPreY[i][j][k], 3992 &v->VRatioPreC[i][j][k], 3993 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 3994 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 3995 &v->NoTimeForDynamicMetadata[i][j][k], 3996 &v->Tno_bw[k], 3997 &v->prefetch_vmrow_bw[k], 3998 &v->dummy7[k], 3999 &v->dummy8[k], 4000 &v->dummy13[k], 4001 &v->VUpdateOffsetPix[k], 4002 &v->VUpdateWidthPix[k], 4003 &v->VReadyOffsetPix[k]); 4004 } 4005 4006 void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 4007 { 4008 struct vba_vars_st *v = &mode_lib->vba; 4009 4010 int i, j; 4011 unsigned int k, m; 4012 int ReorderingBytes; 4013 int MinPrefetchMode = 0, MaxPrefetchMode = 2; 4014 bool NoChroma = true; 4015 bool EnoughWritebackUnits = true; 4016 bool P2IWith420 = false; 4017 bool DSCOnlyIfNecessaryWithBPP = false; 4018 bool DSC422NativeNotSupported = false; 4019 double MaxTotalVActiveRDBandwidth; 4020 bool ViewportExceedsSurface = false; 4021 bool FMTBufferExceeded = false; 4022 4023 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 4024 4025 CalculateMinAndMaxPrefetchMode( 4026 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 4027 &MinPrefetchMode, &MaxPrefetchMode); 4028 4029 /*Scale Ratio, taps Support Check*/ 4030 4031 v->ScaleRatioAndTapsSupport = true; 4032 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4033 if (v->ScalerEnabled[k] == false 4034 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 4035 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 4036 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 4037 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 4038 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { 4039 v->ScaleRatioAndTapsSupport = false; 4040 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 4041 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio 4042 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] 4043 || v->VRatio[k] > v->vtaps[k] 4044 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 4045 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 4046 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 4047 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 4048 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) 4049 || v->HRatioChroma[k] > v->MaxHSCLRatio 4050 || v->VRatioChroma[k] > v->MaxVSCLRatio 4051 || v->HRatioChroma[k] > v->HTAPsChroma[k] 4052 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { 4053 v->ScaleRatioAndTapsSupport = false; 4054 } 4055 } 4056 /*Source Format, Pixel Format and Scan Support Check*/ 4057 4058 v->SourceFormatPixelAndScanSupport = true; 4059 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4060 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) 4061 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t 4062 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { 4063 v->SourceFormatPixelAndScanSupport = false; 4064 } 4065 } 4066 /*Bandwidth Support Check*/ 4067 4068 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4069 CalculateBytePerPixelAnd256BBlockSizes( 4070 v->SourcePixelFormat[k], 4071 v->SurfaceTiling[k], 4072 &v->BytePerPixelY[k], 4073 &v->BytePerPixelC[k], 4074 &v->BytePerPixelInDETY[k], 4075 &v->BytePerPixelInDETC[k], 4076 &v->Read256BlockHeightY[k], 4077 &v->Read256BlockHeightC[k], 4078 &v->Read256BlockWidthY[k], 4079 &v->Read256BlockWidthC[k]); 4080 } 4081 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4082 if (v->SourceScan[k] != dm_vert) { 4083 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; 4084 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; 4085 } else { 4086 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; 4087 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; 4088 } 4089 } 4090 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4091 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) 4092 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4093 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) 4094 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; 4095 } 4096 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4097 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { 4098 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4099 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; 4100 } else if (v->WritebackEnable[k] == true) { 4101 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4102 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; 4103 } else { 4104 v->WriteBandwidth[k] = 0.0; 4105 } 4106 } 4107 4108 /*Writeback Latency support check*/ 4109 4110 v->WritebackLatencySupport = true; 4111 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4112 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { 4113 v->WritebackLatencySupport = false; 4114 } 4115 } 4116 4117 /*Writeback Mode Support Check*/ 4118 4119 v->TotalNumberOfActiveWriteback = 0; 4120 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4121 if (v->WritebackEnable[k] == true) { 4122 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; 4123 } 4124 } 4125 4126 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { 4127 EnoughWritebackUnits = false; 4128 } 4129 4130 /*Writeback Scale Ratio and Taps Support Check*/ 4131 4132 v->WritebackScaleRatioAndTapsSupport = true; 4133 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4134 if (v->WritebackEnable[k] == true) { 4135 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio 4136 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio 4137 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio 4138 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps 4139 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps 4140 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] 4141 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { 4142 v->WritebackScaleRatioAndTapsSupport = false; 4143 } 4144 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { 4145 v->WritebackScaleRatioAndTapsSupport = false; 4146 } 4147 } 4148 } 4149 /*Maximum DISPCLK/DPPCLK Support check*/ 4150 4151 v->WritebackRequiredDISPCLK = 0.0; 4152 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4153 if (v->WritebackEnable[k] == true) { 4154 v->WritebackRequiredDISPCLK = dml_max( 4155 v->WritebackRequiredDISPCLK, 4156 dml31_CalculateWriteBackDISPCLK( 4157 v->WritebackPixelFormat[k], 4158 v->PixelClock[k], 4159 v->WritebackHRatio[k], 4160 v->WritebackVRatio[k], 4161 v->WritebackHTaps[k], 4162 v->WritebackVTaps[k], 4163 v->WritebackSourceWidth[k], 4164 v->WritebackDestinationWidth[k], 4165 v->HTotal[k], 4166 v->WritebackLineBufferSize)); 4167 } 4168 } 4169 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4170 if (v->HRatio[k] > 1.0) { 4171 v->PSCL_FACTOR[k] = dml_min( 4172 v->MaxDCHUBToPSCLThroughput, 4173 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); 4174 } else { 4175 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4176 } 4177 if (v->BytePerPixelC[k] == 0.0) { 4178 v->PSCL_FACTOR_CHROMA[k] = 0.0; 4179 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4180 * dml_max3( 4181 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4182 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4183 1.0); 4184 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4185 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4186 } 4187 } else { 4188 if (v->HRatioChroma[k] > 1.0) { 4189 v->PSCL_FACTOR_CHROMA[k] = dml_min( 4190 v->MaxDCHUBToPSCLThroughput, 4191 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 4192 } else { 4193 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4194 } 4195 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4196 * dml_max5( 4197 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4198 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4199 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 4200 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 4201 1.0); 4202 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) 4203 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4204 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4205 } 4206 } 4207 } 4208 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4209 int MaximumSwathWidthSupportLuma; 4210 int MaximumSwathWidthSupportChroma; 4211 4212 if (v->SurfaceTiling[k] == dm_sw_linear) { 4213 MaximumSwathWidthSupportLuma = 8192.0; 4214 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { 4215 MaximumSwathWidthSupportLuma = 2880.0; 4216 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4217 MaximumSwathWidthSupportLuma = 3840.0; 4218 } else { 4219 MaximumSwathWidthSupportLuma = 5760.0; 4220 } 4221 4222 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { 4223 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; 4224 } else { 4225 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; 4226 } 4227 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] 4228 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); 4229 if (v->BytePerPixelC[k] == 0.0) { 4230 v->MaximumSwathWidthInLineBufferChroma = 0; 4231 } else { 4232 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] 4233 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); 4234 } 4235 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); 4236 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); 4237 } 4238 4239 CalculateSwathAndDETConfiguration( 4240 true, 4241 v->NumberOfActivePlanes, 4242 v->DETBufferSizeInKByte[0], 4243 v->MaximumSwathWidthLuma, 4244 v->MaximumSwathWidthChroma, 4245 v->SourceScan, 4246 v->SourcePixelFormat, 4247 v->SurfaceTiling, 4248 v->ViewportWidth, 4249 v->ViewportHeight, 4250 v->SurfaceWidthY, 4251 v->SurfaceWidthC, 4252 v->SurfaceHeightY, 4253 v->SurfaceHeightC, 4254 v->Read256BlockHeightY, 4255 v->Read256BlockHeightC, 4256 v->Read256BlockWidthY, 4257 v->Read256BlockWidthC, 4258 v->odm_combine_dummy, 4259 v->BlendingAndTiming, 4260 v->BytePerPixelY, 4261 v->BytePerPixelC, 4262 v->BytePerPixelInDETY, 4263 v->BytePerPixelInDETC, 4264 v->HActive, 4265 v->HRatio, 4266 v->HRatioChroma, 4267 v->NoOfDPPThisState, 4268 v->swath_width_luma_ub_this_state, 4269 v->swath_width_chroma_ub_this_state, 4270 v->SwathWidthYThisState, 4271 v->SwathWidthCThisState, 4272 v->SwathHeightYThisState, 4273 v->SwathHeightCThisState, 4274 v->DETBufferSizeYThisState, 4275 v->DETBufferSizeCThisState, 4276 v->SingleDPPViewportSizeSupportPerPlane, 4277 &v->ViewportSizeSupport[0][0]); 4278 4279 for (i = 0; i < v->soc.num_states; i++) { 4280 for (j = 0; j < 2; j++) { 4281 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); 4282 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); 4283 v->RequiredDISPCLK[i][j] = 0.0; 4284 v->DISPCLK_DPPCLK_Support[i][j] = true; 4285 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4286 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4287 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4288 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] 4289 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4290 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4291 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] 4292 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4293 } 4294 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4295 * (1 + v->DISPCLKRampingMargin / 100.0); 4296 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] 4297 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4298 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4299 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 4300 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4301 } 4302 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4303 * (1 + v->DISPCLKRampingMargin / 100.0); 4304 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] 4305 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4306 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4307 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 4308 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4309 } 4310 4311 if (v->ODMCombinePolicy == dm_odm_combine_policy_none 4312 || !(v->Output[k] == dm_dp || 4313 v->Output[k] == dm_edp)) { 4314 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4315 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4316 4317 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4318 FMTBufferExceeded = true; 4319 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { 4320 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4321 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4322 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 4323 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { 4324 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4325 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4326 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { 4327 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4328 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4329 } else { 4330 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4331 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4332 } 4333 if (v->DSCEnabled[k] && v->HActive[k] > DCN31_MAX_DSC_IMAGE_WIDTH 4334 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4335 if (v->HActive[k] / 2 > DCN31_MAX_DSC_IMAGE_WIDTH) { 4336 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4337 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4338 } else { 4339 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4340 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4341 } 4342 } 4343 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN31_MAX_FMT_420_BUFFER_WIDTH 4344 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4345 if (v->HActive[k] / 2 > DCN31_MAX_FMT_420_BUFFER_WIDTH) { 4346 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4347 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4348 4349 if (v->HActive[k] / 4 > DCN31_MAX_FMT_420_BUFFER_WIDTH) 4350 FMTBufferExceeded = true; 4351 } else { 4352 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4353 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4354 } 4355 } 4356 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4357 v->MPCCombine[i][j][k] = false; 4358 v->NoOfDPP[i][j][k] = 4; 4359 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; 4360 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4361 v->MPCCombine[i][j][k] = false; 4362 v->NoOfDPP[i][j][k] = 2; 4363 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; 4364 } else if ((v->WhenToDoMPCCombine == dm_mpc_never 4365 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4366 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { 4367 v->MPCCombine[i][j][k] = false; 4368 v->NoOfDPP[i][j][k] = 1; 4369 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4370 } else { 4371 v->MPCCombine[i][j][k] = true; 4372 v->NoOfDPP[i][j][k] = 2; 4373 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4374 } 4375 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4376 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4377 > v->MaxDppclkRoundedDownToDFSGranularity) 4378 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4379 v->DISPCLK_DPPCLK_Support[i][j] = false; 4380 } 4381 } 4382 v->TotalNumberOfActiveDPP[i][j] = 0; 4383 v->TotalNumberOfSingleDPPPlanes[i][j] = 0; 4384 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4385 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4386 if (v->NoOfDPP[i][j][k] == 1) 4387 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; 4388 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4389 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 4390 NoChroma = false; 4391 } 4392 4393 // UPTO 4394 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never 4395 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { 4396 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { 4397 double BWOfNonSplitPlaneOfMaximumBandwidth; 4398 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4399 BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4400 NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4401 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4402 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth 4403 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { 4404 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4405 NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4406 } 4407 } 4408 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; 4409 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4410 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4411 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4412 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4413 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; 4414 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; 4415 } 4416 } 4417 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { 4418 v->RequiredDISPCLK[i][j] = 0.0; 4419 v->DISPCLK_DPPCLK_Support[i][j] = true; 4420 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4421 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4422 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { 4423 v->MPCCombine[i][j][k] = true; 4424 v->NoOfDPP[i][j][k] = 2; 4425 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4426 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4427 } else { 4428 v->MPCCombine[i][j][k] = false; 4429 v->NoOfDPP[i][j][k] = 1; 4430 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4431 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4432 } 4433 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4434 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4435 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4436 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4437 } else { 4438 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4439 } 4440 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4441 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4442 > v->MaxDppclkRoundedDownToDFSGranularity) 4443 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4444 v->DISPCLK_DPPCLK_Support[i][j] = false; 4445 } 4446 } 4447 v->TotalNumberOfActiveDPP[i][j] = 0.0; 4448 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4449 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4450 } 4451 } 4452 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); 4453 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { 4454 v->DISPCLK_DPPCLK_Support[i][j] = false; 4455 } 4456 } 4457 } 4458 4459 /*Total Available Pipes Support Check*/ 4460 4461 for (i = 0; i < v->soc.num_states; i++) { 4462 for (j = 0; j < 2; j++) { 4463 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { 4464 v->TotalAvailablePipesSupport[i][j] = true; 4465 } else { 4466 v->TotalAvailablePipesSupport[i][j] = false; 4467 } 4468 } 4469 } 4470 /*Display IO and DSC Support Check*/ 4471 4472 v->NonsupportedDSCInputBPC = false; 4473 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4474 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) 4475 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { 4476 v->NonsupportedDSCInputBPC = true; 4477 } 4478 } 4479 4480 /*Number Of DSC Slices*/ 4481 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4482 if (v->BlendingAndTiming[k] == k) { 4483 if (v->PixelClockBackEnd[k] > 3200) { 4484 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); 4485 } else if (v->PixelClockBackEnd[k] > 1360) { 4486 v->NumberOfDSCSlices[k] = 8; 4487 } else if (v->PixelClockBackEnd[k] > 680) { 4488 v->NumberOfDSCSlices[k] = 4; 4489 } else if (v->PixelClockBackEnd[k] > 340) { 4490 v->NumberOfDSCSlices[k] = 2; 4491 } else { 4492 v->NumberOfDSCSlices[k] = 1; 4493 } 4494 } else { 4495 v->NumberOfDSCSlices[k] = 0; 4496 } 4497 } 4498 4499 for (i = 0; i < v->soc.num_states; i++) { 4500 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4501 v->RequiresDSC[i][k] = false; 4502 v->RequiresFEC[i][k] = false; 4503 if (v->BlendingAndTiming[k] == k) { 4504 if (v->Output[k] == dm_hdmi) { 4505 v->RequiresDSC[i][k] = false; 4506 v->RequiresFEC[i][k] = false; 4507 v->OutputBppPerState[i][k] = TruncToValidBPP( 4508 dml_min(600.0, v->PHYCLKPerState[i]) * 10, 4509 3, 4510 v->HTotal[k], 4511 v->HActive[k], 4512 v->PixelClockBackEnd[k], 4513 v->ForcedOutputLinkBPP[k], 4514 false, 4515 v->Output[k], 4516 v->OutputFormat[k], 4517 v->DSCInputBitPerComponent[k], 4518 v->NumberOfDSCSlices[k], 4519 v->AudioSampleRate[k], 4520 v->AudioSampleLayout[k], 4521 v->ODMCombineEnablePerState[i][k]); 4522 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) { 4523 if (v->DSCEnable[k] == true) { 4524 v->RequiresDSC[i][k] = true; 4525 v->LinkDSCEnable = true; 4526 if (v->Output[k] == dm_dp) { 4527 v->RequiresFEC[i][k] = true; 4528 } else { 4529 v->RequiresFEC[i][k] = false; 4530 } 4531 } else { 4532 v->RequiresDSC[i][k] = false; 4533 v->LinkDSCEnable = false; 4534 v->RequiresFEC[i][k] = false; 4535 } 4536 4537 v->Outbpp = BPP_INVALID; 4538 if (v->PHYCLKPerState[i] >= 270.0) { 4539 v->Outbpp = TruncToValidBPP( 4540 (1.0 - v->Downspreading / 100.0) * 2700, 4541 v->OutputLinkDPLanes[k], 4542 v->HTotal[k], 4543 v->HActive[k], 4544 v->PixelClockBackEnd[k], 4545 v->ForcedOutputLinkBPP[k], 4546 v->LinkDSCEnable, 4547 v->Output[k], 4548 v->OutputFormat[k], 4549 v->DSCInputBitPerComponent[k], 4550 v->NumberOfDSCSlices[k], 4551 v->AudioSampleRate[k], 4552 v->AudioSampleLayout[k], 4553 v->ODMCombineEnablePerState[i][k]); 4554 v->OutputBppPerState[i][k] = v->Outbpp; 4555 // TODO: Need some other way to handle this nonsense 4556 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" 4557 } 4558 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { 4559 v->Outbpp = TruncToValidBPP( 4560 (1.0 - v->Downspreading / 100.0) * 5400, 4561 v->OutputLinkDPLanes[k], 4562 v->HTotal[k], 4563 v->HActive[k], 4564 v->PixelClockBackEnd[k], 4565 v->ForcedOutputLinkBPP[k], 4566 v->LinkDSCEnable, 4567 v->Output[k], 4568 v->OutputFormat[k], 4569 v->DSCInputBitPerComponent[k], 4570 v->NumberOfDSCSlices[k], 4571 v->AudioSampleRate[k], 4572 v->AudioSampleLayout[k], 4573 v->ODMCombineEnablePerState[i][k]); 4574 v->OutputBppPerState[i][k] = v->Outbpp; 4575 // TODO: Need some other way to handle this nonsense 4576 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" 4577 } 4578 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { 4579 v->Outbpp = TruncToValidBPP( 4580 (1.0 - v->Downspreading / 100.0) * 8100, 4581 v->OutputLinkDPLanes[k], 4582 v->HTotal[k], 4583 v->HActive[k], 4584 v->PixelClockBackEnd[k], 4585 v->ForcedOutputLinkBPP[k], 4586 v->LinkDSCEnable, 4587 v->Output[k], 4588 v->OutputFormat[k], 4589 v->DSCInputBitPerComponent[k], 4590 v->NumberOfDSCSlices[k], 4591 v->AudioSampleRate[k], 4592 v->AudioSampleLayout[k], 4593 v->ODMCombineEnablePerState[i][k]); 4594 v->OutputBppPerState[i][k] = v->Outbpp; 4595 // TODO: Need some other way to handle this nonsense 4596 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" 4597 } 4598 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) { 4599 v->Outbpp = TruncToValidBPP( 4600 (1.0 - v->Downspreading / 100.0) * 10000, 4601 4, 4602 v->HTotal[k], 4603 v->HActive[k], 4604 v->PixelClockBackEnd[k], 4605 v->ForcedOutputLinkBPP[k], 4606 v->LinkDSCEnable, 4607 v->Output[k], 4608 v->OutputFormat[k], 4609 v->DSCInputBitPerComponent[k], 4610 v->NumberOfDSCSlices[k], 4611 v->AudioSampleRate[k], 4612 v->AudioSampleLayout[k], 4613 v->ODMCombineEnablePerState[i][k]); 4614 v->OutputBppPerState[i][k] = v->Outbpp; 4615 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4"; 4616 } 4617 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) { 4618 v->Outbpp = TruncToValidBPP( 4619 12000, 4620 4, 4621 v->HTotal[k], 4622 v->HActive[k], 4623 v->PixelClockBackEnd[k], 4624 v->ForcedOutputLinkBPP[k], 4625 v->LinkDSCEnable, 4626 v->Output[k], 4627 v->OutputFormat[k], 4628 v->DSCInputBitPerComponent[k], 4629 v->NumberOfDSCSlices[k], 4630 v->AudioSampleRate[k], 4631 v->AudioSampleLayout[k], 4632 v->ODMCombineEnablePerState[i][k]); 4633 v->OutputBppPerState[i][k] = v->Outbpp; 4634 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4"; 4635 } 4636 } 4637 } else { 4638 v->OutputBppPerState[i][k] = 0; 4639 } 4640 } 4641 } 4642 4643 for (i = 0; i < v->soc.num_states; i++) { 4644 v->LinkCapacitySupport[i] = true; 4645 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4646 if (v->BlendingAndTiming[k] == k 4647 && (v->Output[k] == dm_dp || 4648 v->Output[k] == dm_edp || 4649 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { 4650 v->LinkCapacitySupport[i] = false; 4651 } 4652 } 4653 } 4654 4655 // UPTO 2172 4656 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4657 if (v->BlendingAndTiming[k] == k 4658 && (v->Output[k] == dm_dp || 4659 v->Output[k] == dm_edp || 4660 v->Output[k] == dm_hdmi)) { 4661 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { 4662 P2IWith420 = true; 4663 } 4664 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 4665 && !v->DSC422NativeSupport) { 4666 DSC422NativeNotSupported = true; 4667 } 4668 } 4669 } 4670 4671 for (i = 0; i < v->soc.num_states; ++i) { 4672 v->ODMCombine4To1SupportCheckOK[i] = true; 4673 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4674 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 4675 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp 4676 || v->Output[k] == dm_hdmi)) { 4677 v->ODMCombine4To1SupportCheckOK[i] = false; 4678 } 4679 } 4680 } 4681 4682 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ 4683 4684 for (i = 0; i < v->soc.num_states; i++) { 4685 v->NotEnoughDSCUnits[i] = false; 4686 v->TotalDSCUnitsRequired = 0.0; 4687 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4688 if (v->RequiresDSC[i][k] == true) { 4689 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4690 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; 4691 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4692 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; 4693 } else { 4694 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; 4695 } 4696 } 4697 } 4698 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { 4699 v->NotEnoughDSCUnits[i] = true; 4700 } 4701 } 4702 /*DSC Delay per state*/ 4703 4704 for (i = 0; i < v->soc.num_states; i++) { 4705 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4706 if (v->OutputBppPerState[i][k] == BPP_INVALID) { 4707 v->BPP = 0.0; 4708 } else { 4709 v->BPP = v->OutputBppPerState[i][k]; 4710 } 4711 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { 4712 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { 4713 v->DSCDelayPerState[i][k] = dscceComputeDelay( 4714 v->DSCInputBitPerComponent[k], 4715 v->BPP, 4716 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4717 v->NumberOfDSCSlices[k], 4718 v->OutputFormat[k], 4719 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 4720 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4721 v->DSCDelayPerState[i][k] = 2.0 4722 * (dscceComputeDelay( 4723 v->DSCInputBitPerComponent[k], 4724 v->BPP, 4725 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4726 v->NumberOfDSCSlices[k] / 2, 4727 v->OutputFormat[k], 4728 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4729 } else { 4730 v->DSCDelayPerState[i][k] = 4.0 4731 * (dscceComputeDelay( 4732 v->DSCInputBitPerComponent[k], 4733 v->BPP, 4734 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4735 v->NumberOfDSCSlices[k] / 4, 4736 v->OutputFormat[k], 4737 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4738 } 4739 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 4740 } else { 4741 v->DSCDelayPerState[i][k] = 0.0; 4742 } 4743 } 4744 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4745 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4746 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { 4747 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; 4748 } 4749 } 4750 } 4751 } 4752 4753 //Calculate Swath, DET Configuration, DCFCLKDeepSleep 4754 // 4755 for (i = 0; i < v->soc.num_states; ++i) { 4756 for (j = 0; j <= 1; ++j) { 4757 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4758 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; 4759 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 4760 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; 4761 } 4762 4763 CalculateSwathAndDETConfiguration( 4764 false, 4765 v->NumberOfActivePlanes, 4766 v->DETBufferSizeInKByte[0], 4767 v->MaximumSwathWidthLuma, 4768 v->MaximumSwathWidthChroma, 4769 v->SourceScan, 4770 v->SourcePixelFormat, 4771 v->SurfaceTiling, 4772 v->ViewportWidth, 4773 v->ViewportHeight, 4774 v->SurfaceWidthY, 4775 v->SurfaceWidthC, 4776 v->SurfaceHeightY, 4777 v->SurfaceHeightC, 4778 v->Read256BlockHeightY, 4779 v->Read256BlockHeightC, 4780 v->Read256BlockWidthY, 4781 v->Read256BlockWidthC, 4782 v->ODMCombineEnableThisState, 4783 v->BlendingAndTiming, 4784 v->BytePerPixelY, 4785 v->BytePerPixelC, 4786 v->BytePerPixelInDETY, 4787 v->BytePerPixelInDETC, 4788 v->HActive, 4789 v->HRatio, 4790 v->HRatioChroma, 4791 v->NoOfDPPThisState, 4792 v->swath_width_luma_ub_this_state, 4793 v->swath_width_chroma_ub_this_state, 4794 v->SwathWidthYThisState, 4795 v->SwathWidthCThisState, 4796 v->SwathHeightYThisState, 4797 v->SwathHeightCThisState, 4798 v->DETBufferSizeYThisState, 4799 v->DETBufferSizeCThisState, 4800 v->dummystring, 4801 &v->ViewportSizeSupport[i][j]); 4802 4803 CalculateDCFCLKDeepSleep( 4804 mode_lib, 4805 v->NumberOfActivePlanes, 4806 v->BytePerPixelY, 4807 v->BytePerPixelC, 4808 v->VRatio, 4809 v->VRatioChroma, 4810 v->SwathWidthYThisState, 4811 v->SwathWidthCThisState, 4812 v->NoOfDPPThisState, 4813 v->HRatio, 4814 v->HRatioChroma, 4815 v->PixelClock, 4816 v->PSCL_FACTOR, 4817 v->PSCL_FACTOR_CHROMA, 4818 v->RequiredDPPCLKThisState, 4819 v->ReadBandwidthLuma, 4820 v->ReadBandwidthChroma, 4821 v->ReturnBusWidth, 4822 &v->ProjectedDCFCLKDeepSleep[i][j]); 4823 4824 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4825 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; 4826 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; 4827 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; 4828 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; 4829 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; 4830 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; 4831 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; 4832 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; 4833 } 4834 } 4835 } 4836 4837 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4838 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 4839 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4840 } 4841 4842 for (i = 0; i < v->soc.num_states; i++) { 4843 for (j = 0; j < 2; j++) { 4844 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; 4845 4846 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4847 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 4848 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 4849 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 4850 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 4851 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 4852 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 4853 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 4854 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 4855 } 4856 4857 v->TotalNumberOfDCCActiveDPP[i][j] = 0; 4858 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4859 if (v->DCCEnable[k] == true) { 4860 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4861 } 4862 } 4863 4864 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4865 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4866 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4867 4868 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) 4869 && v->SourceScan[k] != dm_vert) { 4870 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) 4871 / 2; 4872 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 4873 } else { 4874 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 4875 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 4876 } 4877 4878 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4879 mode_lib, 4880 v->DCCEnable[k], 4881 v->Read256BlockHeightC[k], 4882 v->Read256BlockWidthC[k], 4883 v->SourcePixelFormat[k], 4884 v->SurfaceTiling[k], 4885 v->BytePerPixelC[k], 4886 v->SourceScan[k], 4887 v->SwathWidthCThisState[k], 4888 v->ViewportHeightChroma[k], 4889 v->GPUVMEnable, 4890 v->HostVMEnable, 4891 v->HostVMMaxNonCachedPageTableLevels, 4892 v->GPUVMMinPageSize, 4893 v->HostVMMinPageSize, 4894 v->PTEBufferSizeInRequestsForChroma, 4895 v->PitchC[k], 4896 0.0, 4897 &v->MacroTileWidthC[k], 4898 &v->MetaRowBytesC, 4899 &v->DPTEBytesPerRowC, 4900 &v->PTEBufferSizeNotExceededC[i][j][k], 4901 &v->dummyinteger7, 4902 &v->dpte_row_height_chroma[k], 4903 &v->dummyinteger28, 4904 &v->dummyinteger26, 4905 &v->dummyinteger23, 4906 &v->meta_row_height_chroma[k], 4907 &v->dummyinteger8, 4908 &v->dummyinteger9, 4909 &v->dummyinteger19, 4910 &v->dummyinteger20, 4911 &v->dummyinteger17, 4912 &v->dummyinteger10, 4913 &v->dummyinteger11); 4914 4915 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( 4916 mode_lib, 4917 v->VRatioChroma[k], 4918 v->VTAPsChroma[k], 4919 v->Interlace[k], 4920 v->ProgressiveToInterlaceUnitInOPP, 4921 v->SwathHeightCThisState[k], 4922 v->ViewportYStartC[k], 4923 &v->PrefillC[k], 4924 &v->MaxNumSwC[k]); 4925 } else { 4926 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 4927 v->PTEBufferSizeInRequestsForChroma = 0; 4928 v->PDEAndMetaPTEBytesPerFrameC = 0.0; 4929 v->MetaRowBytesC = 0.0; 4930 v->DPTEBytesPerRowC = 0.0; 4931 v->PrefetchLinesC[i][j][k] = 0.0; 4932 v->PTEBufferSizeNotExceededC[i][j][k] = true; 4933 } 4934 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4935 mode_lib, 4936 v->DCCEnable[k], 4937 v->Read256BlockHeightY[k], 4938 v->Read256BlockWidthY[k], 4939 v->SourcePixelFormat[k], 4940 v->SurfaceTiling[k], 4941 v->BytePerPixelY[k], 4942 v->SourceScan[k], 4943 v->SwathWidthYThisState[k], 4944 v->ViewportHeight[k], 4945 v->GPUVMEnable, 4946 v->HostVMEnable, 4947 v->HostVMMaxNonCachedPageTableLevels, 4948 v->GPUVMMinPageSize, 4949 v->HostVMMinPageSize, 4950 v->PTEBufferSizeInRequestsForLuma, 4951 v->PitchY[k], 4952 v->DCCMetaPitchY[k], 4953 &v->MacroTileWidthY[k], 4954 &v->MetaRowBytesY, 4955 &v->DPTEBytesPerRowY, 4956 &v->PTEBufferSizeNotExceededY[i][j][k], 4957 &v->dummyinteger7, 4958 &v->dpte_row_height[k], 4959 &v->dummyinteger29, 4960 &v->dummyinteger27, 4961 &v->dummyinteger24, 4962 &v->meta_row_height[k], 4963 &v->dummyinteger25, 4964 &v->dpte_group_bytes[k], 4965 &v->dummyinteger21, 4966 &v->dummyinteger22, 4967 &v->dummyinteger18, 4968 &v->dummyinteger5, 4969 &v->dummyinteger6); 4970 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( 4971 mode_lib, 4972 v->VRatio[k], 4973 v->vtaps[k], 4974 v->Interlace[k], 4975 v->ProgressiveToInterlaceUnitInOPP, 4976 v->SwathHeightYThisState[k], 4977 v->ViewportYStartY[k], 4978 &v->PrefillY[k], 4979 &v->MaxNumSwY[k]); 4980 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; 4981 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; 4982 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; 4983 4984 CalculateRowBandwidth( 4985 v->GPUVMEnable, 4986 v->SourcePixelFormat[k], 4987 v->VRatio[k], 4988 v->VRatioChroma[k], 4989 v->DCCEnable[k], 4990 v->HTotal[k] / v->PixelClock[k], 4991 v->MetaRowBytesY, 4992 v->MetaRowBytesC, 4993 v->meta_row_height[k], 4994 v->meta_row_height_chroma[k], 4995 v->DPTEBytesPerRowY, 4996 v->DPTEBytesPerRowC, 4997 v->dpte_row_height[k], 4998 v->dpte_row_height_chroma[k], 4999 &v->meta_row_bandwidth[i][j][k], 5000 &v->dpte_row_bandwidth[i][j][k]); 5001 } 5002 /*DCCMetaBufferSizeSupport(i, j) = True 5003 For k = 0 To NumberOfActivePlanes - 1 5004 If MetaRowBytes(i, j, k) > 24064 Then 5005 DCCMetaBufferSizeSupport(i, j) = False 5006 End If 5007 Next k*/ 5008 v->DCCMetaBufferSizeSupport[i][j] = true; 5009 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5010 if (v->MetaRowBytes[i][j][k] > 24064) 5011 v->DCCMetaBufferSizeSupport[i][j] = false; 5012 } 5013 v->UrgLatency[i] = CalculateUrgentLatency( 5014 v->UrgentLatencyPixelDataOnly, 5015 v->UrgentLatencyPixelMixedWithVMData, 5016 v->UrgentLatencyVMDataOnly, 5017 v->DoUrgentLatencyAdjustment, 5018 v->UrgentLatencyAdjustmentFabricClockComponent, 5019 v->UrgentLatencyAdjustmentFabricClockReference, 5020 v->FabricClockPerState[i]); 5021 5022 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5023 CalculateUrgentBurstFactor( 5024 v->swath_width_luma_ub_this_state[k], 5025 v->swath_width_chroma_ub_this_state[k], 5026 v->SwathHeightYThisState[k], 5027 v->SwathHeightCThisState[k], 5028 v->HTotal[k] / v->PixelClock[k], 5029 v->UrgLatency[i], 5030 v->CursorBufferSize, 5031 v->CursorWidth[k][0], 5032 v->CursorBPP[k][0], 5033 v->VRatio[k], 5034 v->VRatioChroma[k], 5035 v->BytePerPixelInDETY[k], 5036 v->BytePerPixelInDETC[k], 5037 v->DETBufferSizeYThisState[k], 5038 v->DETBufferSizeCThisState[k], 5039 &v->UrgentBurstFactorCursor[k], 5040 &v->UrgentBurstFactorLuma[k], 5041 &v->UrgentBurstFactorChroma[k], 5042 &NotUrgentLatencyHiding[k]); 5043 } 5044 5045 v->NotEnoughUrgentLatencyHidingA[i][j] = false; 5046 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5047 if (NotUrgentLatencyHiding[k]) { 5048 v->NotEnoughUrgentLatencyHidingA[i][j] = true; 5049 } 5050 } 5051 5052 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5053 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] 5054 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; 5055 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; 5056 } 5057 5058 v->TotalVActivePixelBandwidth[i][j] = 0; 5059 v->TotalVActiveCursorBandwidth[i][j] = 0; 5060 v->TotalMetaRowBandwidth[i][j] = 0; 5061 v->TotalDPTERowBandwidth[i][j] = 0; 5062 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5063 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; 5064 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; 5065 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; 5066 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; 5067 } 5068 } 5069 } 5070 5071 //Calculate Return BW 5072 for (i = 0; i < v->soc.num_states; ++i) { 5073 for (j = 0; j <= 1; ++j) { 5074 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5075 if (v->BlendingAndTiming[k] == k) { 5076 if (v->WritebackEnable[k] == true) { 5077 v->WritebackDelayTime[k] = v->WritebackLatency 5078 + CalculateWriteBackDelay( 5079 v->WritebackPixelFormat[k], 5080 v->WritebackHRatio[k], 5081 v->WritebackVRatio[k], 5082 v->WritebackVTaps[k], 5083 v->WritebackDestinationWidth[k], 5084 v->WritebackDestinationHeight[k], 5085 v->WritebackSourceHeight[k], 5086 v->HTotal[k]) / v->RequiredDISPCLK[i][j]; 5087 } else { 5088 v->WritebackDelayTime[k] = 0.0; 5089 } 5090 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5091 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { 5092 v->WritebackDelayTime[k] = dml_max( 5093 v->WritebackDelayTime[k], 5094 v->WritebackLatency 5095 + CalculateWriteBackDelay( 5096 v->WritebackPixelFormat[m], 5097 v->WritebackHRatio[m], 5098 v->WritebackVRatio[m], 5099 v->WritebackVTaps[m], 5100 v->WritebackDestinationWidth[m], 5101 v->WritebackDestinationHeight[m], 5102 v->WritebackSourceHeight[m], 5103 v->HTotal[m]) / v->RequiredDISPCLK[i][j]); 5104 } 5105 } 5106 } 5107 } 5108 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5109 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5110 if (v->BlendingAndTiming[k] == m) { 5111 v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; 5112 } 5113 } 5114 } 5115 v->MaxMaxVStartup[i][j] = 0; 5116 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5117 v->MaximumVStartup[i][j][k] = 5118 (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) ? 5119 dml_floor((v->VTotal[k] - v->VActive[k]) / 2.0, 1.0) : 5120 v->VTotal[k] - v->VActive[k] 5121 - dml_max( 5122 1.0, 5123 dml_ceil( 5124 1.0 * v->WritebackDelayTime[k] 5125 / (v->HTotal[k] 5126 / v->PixelClock[k]), 5127 1.0)); 5128 if (v->MaximumVStartup[i][j][k] > 1023) 5129 v->MaximumVStartup[i][j][k] = 1023; 5130 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); 5131 } 5132 } 5133 } 5134 5135 ReorderingBytes = v->NumberOfChannels 5136 * dml_max3( 5137 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 5138 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 5139 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 5140 5141 for (i = 0; i < v->soc.num_states; ++i) { 5142 for (j = 0; j <= 1; ++j) { 5143 v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; 5144 } 5145 } 5146 5147 if (v->UseMinimumRequiredDCFCLK == true) 5148 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); 5149 5150 for (i = 0; i < v->soc.num_states; ++i) { 5151 for (j = 0; j <= 1; ++j) { 5152 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 5153 v->ReturnBusWidth * v->DCFCLKState[i][j], 5154 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); 5155 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; 5156 double PixelDataOnlyReturnBWPerState = dml_min( 5157 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5158 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 5159 double PixelMixedWithVMDataReturnBWPerState = dml_min( 5160 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5161 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 5162 5163 if (v->HostVMEnable != true) { 5164 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; 5165 } else { 5166 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; 5167 } 5168 } 5169 } 5170 5171 //Re-ordering Buffer Support Check 5172 for (i = 0; i < v->soc.num_states; ++i) { 5173 for (j = 0; j <= 1; ++j) { 5174 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] 5175 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { 5176 v->ROBSupport[i][j] = true; 5177 } else { 5178 v->ROBSupport[i][j] = false; 5179 } 5180 } 5181 } 5182 5183 //Vertical Active BW support check 5184 5185 MaxTotalVActiveRDBandwidth = 0; 5186 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5187 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 5188 } 5189 5190 for (i = 0; i < v->soc.num_states; ++i) { 5191 for (j = 0; j <= 1; ++j) { 5192 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( 5193 dml_min( 5194 v->ReturnBusWidth * v->DCFCLKState[i][j], 5195 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5196 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, 5197 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5198 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); 5199 5200 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { 5201 v->TotalVerticalActiveBandwidthSupport[i][j] = true; 5202 } else { 5203 v->TotalVerticalActiveBandwidthSupport[i][j] = false; 5204 } 5205 } 5206 } 5207 5208 v->UrgentLatency = CalculateUrgentLatency( 5209 v->UrgentLatencyPixelDataOnly, 5210 v->UrgentLatencyPixelMixedWithVMData, 5211 v->UrgentLatencyVMDataOnly, 5212 v->DoUrgentLatencyAdjustment, 5213 v->UrgentLatencyAdjustmentFabricClockComponent, 5214 v->UrgentLatencyAdjustmentFabricClockReference, 5215 v->FabricClock); 5216 //Prefetch Check 5217 for (i = 0; i < v->soc.num_states; ++i) { 5218 for (j = 0; j <= 1; ++j) { 5219 double VMDataOnlyReturnBWPerState; 5220 double HostVMInefficiencyFactor = 1; 5221 int NextPrefetchModeState = MinPrefetchMode; 5222 bool UnboundedRequestEnabledThisState = false; 5223 int CompressedBufferSizeInkByteThisState = 0; 5224 double dummy; 5225 5226 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; 5227 5228 v->BandwidthWithoutPrefetchSupported[i][j] = true; 5229 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] 5230 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { 5231 v->BandwidthWithoutPrefetchSupported[i][j] = false; 5232 } 5233 5234 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5235 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 5236 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 5237 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 5238 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 5239 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 5240 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 5241 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 5242 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 5243 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 5244 } 5245 5246 VMDataOnlyReturnBWPerState = dml_min( 5247 dml_min( 5248 v->ReturnBusWidth * v->DCFCLKState[i][j], 5249 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5250 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5251 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5252 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 5253 if (v->GPUVMEnable && v->HostVMEnable) 5254 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; 5255 5256 v->ExtraLatency = CalculateExtraLatency( 5257 v->RoundTripPingLatencyCycles, 5258 ReorderingBytes, 5259 v->DCFCLKState[i][j], 5260 v->TotalNumberOfActiveDPP[i][j], 5261 v->PixelChunkSizeInKByte, 5262 v->TotalNumberOfDCCActiveDPP[i][j], 5263 v->MetaChunkSize, 5264 v->ReturnBWPerState[i][j], 5265 v->GPUVMEnable, 5266 v->HostVMEnable, 5267 v->NumberOfActivePlanes, 5268 v->NoOfDPPThisState, 5269 v->dpte_group_bytes, 5270 HostVMInefficiencyFactor, 5271 v->HostVMMinPageSize, 5272 v->HostVMMaxNonCachedPageTableLevels); 5273 5274 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5275 do { 5276 v->PrefetchModePerState[i][j] = NextPrefetchModeState; 5277 v->MaxVStartup = v->NextMaxVStartup; 5278 5279 v->TWait = CalculateTWait( 5280 v->PrefetchModePerState[i][j], 5281 v->DRAMClockChangeLatency, 5282 v->UrgLatency[i], 5283 v->SREnterPlusExitTime); 5284 5285 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5286 CalculatePrefetchSchedulePerPlane(mode_lib, 5287 HostVMInefficiencyFactor, 5288 i, j, k); 5289 } 5290 5291 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5292 CalculateUrgentBurstFactor( 5293 v->swath_width_luma_ub_this_state[k], 5294 v->swath_width_chroma_ub_this_state[k], 5295 v->SwathHeightYThisState[k], 5296 v->SwathHeightCThisState[k], 5297 v->HTotal[k] / v->PixelClock[k], 5298 v->UrgentLatency, 5299 v->CursorBufferSize, 5300 v->CursorWidth[k][0], 5301 v->CursorBPP[k][0], 5302 v->VRatioPreY[i][j][k], 5303 v->VRatioPreC[i][j][k], 5304 v->BytePerPixelInDETY[k], 5305 v->BytePerPixelInDETC[k], 5306 v->DETBufferSizeYThisState[k], 5307 v->DETBufferSizeCThisState[k], 5308 &v->UrgentBurstFactorCursorPre[k], 5309 &v->UrgentBurstFactorLumaPre[k], 5310 &v->UrgentBurstFactorChroma[k], 5311 &v->NotUrgentLatencyHidingPre[k]); 5312 } 5313 5314 v->MaximumReadBandwidthWithPrefetch = 0.0; 5315 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5316 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 5317 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; 5318 5319 v->MaximumReadBandwidthWithPrefetch = 5320 v->MaximumReadBandwidthWithPrefetch 5321 + dml_max3( 5322 v->VActivePixelBandwidth[i][j][k] 5323 + v->VActiveCursorBandwidth[i][j][k] 5324 + v->NoOfDPP[i][j][k] 5325 * (v->meta_row_bandwidth[i][j][k] 5326 + v->dpte_row_bandwidth[i][j][k]), 5327 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5328 v->NoOfDPP[i][j][k] 5329 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5330 * v->UrgentBurstFactorLumaPre[k] 5331 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5332 * v->UrgentBurstFactorChromaPre[k]) 5333 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5334 } 5335 5336 v->NotEnoughUrgentLatencyHidingPre = false; 5337 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5338 if (v->NotUrgentLatencyHidingPre[k] == true) { 5339 v->NotEnoughUrgentLatencyHidingPre = true; 5340 } 5341 } 5342 5343 v->PrefetchSupported[i][j] = true; 5344 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] 5345 || v->NotEnoughUrgentLatencyHidingPre == 1) { 5346 v->PrefetchSupported[i][j] = false; 5347 } 5348 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5349 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 5350 || v->NoTimeForPrefetch[i][j][k] == true) { 5351 v->PrefetchSupported[i][j] = false; 5352 } 5353 } 5354 5355 v->DynamicMetadataSupported[i][j] = true; 5356 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5357 if (v->NoTimeForDynamicMetadata[i][j][k] == true) { 5358 v->DynamicMetadataSupported[i][j] = false; 5359 } 5360 } 5361 5362 v->VRatioInPrefetchSupported[i][j] = true; 5363 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5364 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { 5365 v->VRatioInPrefetchSupported[i][j] = false; 5366 } 5367 } 5368 v->AnyLinesForVMOrRowTooLarge = false; 5369 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5370 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { 5371 v->AnyLinesForVMOrRowTooLarge = true; 5372 } 5373 } 5374 5375 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5376 5377 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { 5378 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; 5379 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5380 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 5381 - dml_max( 5382 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], 5383 v->NoOfDPP[i][j][k] 5384 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5385 * v->UrgentBurstFactorLumaPre[k] 5386 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5387 * v->UrgentBurstFactorChromaPre[k]) 5388 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5389 } 5390 v->TotImmediateFlipBytes = 0.0; 5391 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5392 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 5393 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] 5394 + v->DPTEBytesPerRow[i][j][k]; 5395 } 5396 5397 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5398 CalculateFlipSchedule( 5399 mode_lib, 5400 HostVMInefficiencyFactor, 5401 v->ExtraLatency, 5402 v->UrgLatency[i], 5403 v->GPUVMMaxPageTableLevels, 5404 v->HostVMEnable, 5405 v->HostVMMaxNonCachedPageTableLevels, 5406 v->GPUVMEnable, 5407 v->HostVMMinPageSize, 5408 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5409 v->MetaRowBytes[i][j][k], 5410 v->DPTEBytesPerRow[i][j][k], 5411 v->BandwidthAvailableForImmediateFlip, 5412 v->TotImmediateFlipBytes, 5413 v->SourcePixelFormat[k], 5414 v->HTotal[k] / v->PixelClock[k], 5415 v->VRatio[k], 5416 v->VRatioChroma[k], 5417 v->Tno_bw[k], 5418 v->DCCEnable[k], 5419 v->dpte_row_height[k], 5420 v->meta_row_height[k], 5421 v->dpte_row_height_chroma[k], 5422 v->meta_row_height_chroma[k], 5423 &v->DestinationLinesToRequestVMInImmediateFlip[k], 5424 &v->DestinationLinesToRequestRowInImmediateFlip[k], 5425 &v->final_flip_bw[k], 5426 &v->ImmediateFlipSupportedForPipe[k]); 5427 } 5428 v->total_dcn_read_bw_with_flip = 0.0; 5429 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5430 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 5431 + dml_max3( 5432 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5433 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] 5434 + v->VActiveCursorBandwidth[i][j][k], 5435 v->NoOfDPP[i][j][k] 5436 * (v->final_flip_bw[k] 5437 + v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5438 * v->UrgentBurstFactorLumaPre[k] 5439 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5440 * v->UrgentBurstFactorChromaPre[k]) 5441 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5442 } 5443 v->ImmediateFlipSupportedForState[i][j] = true; 5444 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { 5445 v->ImmediateFlipSupportedForState[i][j] = false; 5446 } 5447 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5448 if (v->ImmediateFlipSupportedForPipe[k] == false) { 5449 v->ImmediateFlipSupportedForState[i][j] = false; 5450 } 5451 } 5452 } else { 5453 v->ImmediateFlipSupportedForState[i][j] = false; 5454 } 5455 5456 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { 5457 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5458 NextPrefetchModeState = NextPrefetchModeState + 1; 5459 } else { 5460 v->NextMaxVStartup = v->NextMaxVStartup - 1; 5461 } 5462 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5463 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5464 && ((v->HostVMEnable == false && 5465 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5466 || v->ImmediateFlipSupportedForState[i][j] == true)) 5467 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); 5468 5469 CalculateUnboundedRequestAndCompressedBufferSize( 5470 v->DETBufferSizeInKByte[0], 5471 v->ConfigReturnBufferSizeInKByte, 5472 v->UseUnboundedRequesting, 5473 v->TotalNumberOfActiveDPP[i][j], 5474 NoChroma, 5475 v->MaxNumDPP, 5476 v->CompressedBufferSegmentSizeInkByte, 5477 v->Output, 5478 &UnboundedRequestEnabledThisState, 5479 &CompressedBufferSizeInkByteThisState); 5480 5481 CalculateWatermarksAndDRAMSpeedChangeSupport( 5482 mode_lib, 5483 v->PrefetchModePerState[i][j], 5484 v->NumberOfActivePlanes, 5485 v->MaxLineBufferLines, 5486 v->LineBufferSize, 5487 v->WritebackInterfaceBufferSize, 5488 v->DCFCLKState[i][j], 5489 v->ReturnBWPerState[i][j], 5490 v->SynchronizedVBlank, 5491 v->dpte_group_bytes, 5492 v->MetaChunkSize, 5493 v->UrgLatency[i], 5494 v->ExtraLatency, 5495 v->WritebackLatency, 5496 v->WritebackChunkSize, 5497 v->SOCCLKPerState[i], 5498 v->DRAMClockChangeLatency, 5499 v->SRExitTime, 5500 v->SREnterPlusExitTime, 5501 v->SRExitZ8Time, 5502 v->SREnterPlusExitZ8Time, 5503 v->ProjectedDCFCLKDeepSleep[i][j], 5504 v->DETBufferSizeYThisState, 5505 v->DETBufferSizeCThisState, 5506 v->SwathHeightYThisState, 5507 v->SwathHeightCThisState, 5508 v->LBBitPerPixel, 5509 v->SwathWidthYThisState, 5510 v->SwathWidthCThisState, 5511 v->HRatio, 5512 v->HRatioChroma, 5513 v->vtaps, 5514 v->VTAPsChroma, 5515 v->VRatio, 5516 v->VRatioChroma, 5517 v->HTotal, 5518 v->PixelClock, 5519 v->BlendingAndTiming, 5520 v->NoOfDPPThisState, 5521 v->BytePerPixelInDETY, 5522 v->BytePerPixelInDETC, 5523 v->DSTXAfterScaler, 5524 v->DSTYAfterScaler, 5525 v->WritebackEnable, 5526 v->WritebackPixelFormat, 5527 v->WritebackDestinationWidth, 5528 v->WritebackDestinationHeight, 5529 v->WritebackSourceHeight, 5530 UnboundedRequestEnabledThisState, 5531 CompressedBufferSizeInkByteThisState, 5532 &v->DRAMClockChangeSupport[i][j], 5533 &v->UrgentWatermark, 5534 &v->WritebackUrgentWatermark, 5535 &v->DRAMClockChangeWatermark, 5536 &v->WritebackDRAMClockChangeWatermark, 5537 &dummy, 5538 &dummy, 5539 &dummy, 5540 &dummy, 5541 &v->MinActiveDRAMClockChangeLatencySupported); 5542 } 5543 } 5544 5545 /*PTE Buffer Size Check*/ 5546 for (i = 0; i < v->soc.num_states; i++) { 5547 for (j = 0; j < 2; j++) { 5548 v->PTEBufferSizeNotExceeded[i][j] = true; 5549 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5550 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { 5551 v->PTEBufferSizeNotExceeded[i][j] = false; 5552 } 5553 } 5554 } 5555 } 5556 5557 /*Cursor Support Check*/ 5558 v->CursorSupport = true; 5559 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5560 if (v->CursorWidth[k][0] > 0.0) { 5561 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { 5562 v->CursorSupport = false; 5563 } 5564 } 5565 } 5566 5567 /*Valid Pitch Check*/ 5568 v->PitchSupport = true; 5569 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5570 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); 5571 if (v->DCCEnable[k] == true) { 5572 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); 5573 } else { 5574 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; 5575 } 5576 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 5577 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe 5578 && v->SourcePixelFormat[k] != dm_mono_8) { 5579 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); 5580 if (v->DCCEnable[k] == true) { 5581 v->AlignedDCCMetaPitchC[k] = dml_ceil( 5582 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 5583 64.0 * v->Read256BlockWidthC[k]); 5584 } else { 5585 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5586 } 5587 } else { 5588 v->AlignedCPitch[k] = v->PitchC[k]; 5589 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5590 } 5591 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] 5592 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { 5593 v->PitchSupport = false; 5594 } 5595 } 5596 5597 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5598 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { 5599 ViewportExceedsSurface = true; 5600 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 5601 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 5602 && v->SourcePixelFormat[k] != dm_rgbe) { 5603 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] 5604 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { 5605 ViewportExceedsSurface = true; 5606 } 5607 } 5608 } 5609 } 5610 5611 /*Mode Support, Voltage State and SOC Configuration*/ 5612 for (i = v->soc.num_states - 1; i >= 0; i--) { 5613 for (j = 0; j < 2; j++) { 5614 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true 5615 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP 5616 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false 5617 && v->DTBCLKRequiredMoreThanSupported[i] == false 5618 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true 5619 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true 5620 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true 5621 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false 5622 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true 5623 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5624 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false 5625 && ((v->HostVMEnable == false 5626 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5627 || v->ImmediateFlipSupportedForState[i][j] == true) 5628 && FMTBufferExceeded == false) { 5629 v->ModeSupport[i][j] = true; 5630 } else { 5631 v->ModeSupport[i][j] = false; 5632 } 5633 } 5634 } 5635 5636 { 5637 unsigned int MaximumMPCCombine = 0; 5638 for (i = v->soc.num_states; i >= 0; i--) { 5639 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { 5640 v->VoltageLevel = i; 5641 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; 5642 if (v->ModeSupport[i][0] == true) { 5643 MaximumMPCCombine = 0; 5644 } else { 5645 MaximumMPCCombine = 1; 5646 } 5647 } 5648 } 5649 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; 5650 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { 5651 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; 5652 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; 5653 } 5654 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; 5655 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; 5656 v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; 5657 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; 5658 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; 5659 v->maxMpcComb = MaximumMPCCombine; 5660 } 5661 } 5662 5663 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5664 struct display_mode_lib *mode_lib, 5665 unsigned int PrefetchMode, 5666 unsigned int NumberOfActivePlanes, 5667 unsigned int MaxLineBufferLines, 5668 unsigned int LineBufferSize, 5669 unsigned int WritebackInterfaceBufferSize, 5670 double DCFCLK, 5671 double ReturnBW, 5672 bool SynchronizedVBlank, 5673 unsigned int dpte_group_bytes[], 5674 unsigned int MetaChunkSize, 5675 double UrgentLatency, 5676 double ExtraLatency, 5677 double WritebackLatency, 5678 double WritebackChunkSize, 5679 double SOCCLK, 5680 double DRAMClockChangeLatency, 5681 double SRExitTime, 5682 double SREnterPlusExitTime, 5683 double SRExitZ8Time, 5684 double SREnterPlusExitZ8Time, 5685 double DCFCLKDeepSleep, 5686 unsigned int DETBufferSizeY[], 5687 unsigned int DETBufferSizeC[], 5688 unsigned int SwathHeightY[], 5689 unsigned int SwathHeightC[], 5690 unsigned int LBBitPerPixel[], 5691 double SwathWidthY[], 5692 double SwathWidthC[], 5693 double HRatio[], 5694 double HRatioChroma[], 5695 unsigned int vtaps[], 5696 unsigned int VTAPsChroma[], 5697 double VRatio[], 5698 double VRatioChroma[], 5699 unsigned int HTotal[], 5700 double PixelClock[], 5701 unsigned int BlendingAndTiming[], 5702 unsigned int DPPPerPlane[], 5703 double BytePerPixelDETY[], 5704 double BytePerPixelDETC[], 5705 double DSTXAfterScaler[], 5706 double DSTYAfterScaler[], 5707 bool WritebackEnable[], 5708 enum source_format_class WritebackPixelFormat[], 5709 double WritebackDestinationWidth[], 5710 double WritebackDestinationHeight[], 5711 double WritebackSourceHeight[], 5712 bool UnboundedRequestEnabled, 5713 int unsigned CompressedBufferSizeInkByte, 5714 enum clock_change_support *DRAMClockChangeSupport, 5715 double *UrgentWatermark, 5716 double *WritebackUrgentWatermark, 5717 double *DRAMClockChangeWatermark, 5718 double *WritebackDRAMClockChangeWatermark, 5719 double *StutterExitWatermark, 5720 double *StutterEnterPlusExitWatermark, 5721 double *Z8StutterExitWatermark, 5722 double *Z8StutterEnterPlusExitWatermark, 5723 double *MinActiveDRAMClockChangeLatencySupported) 5724 { 5725 struct vba_vars_st *v = &mode_lib->vba; 5726 double EffectiveLBLatencyHidingY; 5727 double EffectiveLBLatencyHidingC; 5728 double LinesInDETY[DC__NUM_DPP__MAX]; 5729 double LinesInDETC; 5730 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5731 unsigned int LinesInDETCRoundedDownToSwath; 5732 double FullDETBufferingTimeY; 5733 double FullDETBufferingTimeC; 5734 double ActiveDRAMClockChangeLatencyMarginY; 5735 double ActiveDRAMClockChangeLatencyMarginC; 5736 double WritebackDRAMClockChangeLatencyMargin; 5737 double PlaneWithMinActiveDRAMClockChangeMargin; 5738 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5739 double WritebackDRAMClockChangeLatencyHiding; 5740 double TotalPixelBW = 0.0; 5741 int k, j; 5742 5743 *UrgentWatermark = UrgentLatency + ExtraLatency; 5744 5745 #ifdef __DML_VBA_DEBUG__ 5746 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 5747 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); 5748 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark); 5749 #endif 5750 5751 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; 5752 5753 #ifdef __DML_VBA_DEBUG__ 5754 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency); 5755 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark); 5756 #endif 5757 5758 v->TotalActiveWriteback = 0; 5759 for (k = 0; k < NumberOfActivePlanes; ++k) { 5760 if (WritebackEnable[k] == true) { 5761 v->TotalActiveWriteback = v->TotalActiveWriteback + 1; 5762 } 5763 } 5764 5765 if (v->TotalActiveWriteback <= 1) { 5766 *WritebackUrgentWatermark = WritebackLatency; 5767 } else { 5768 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5769 } 5770 5771 if (v->TotalActiveWriteback <= 1) { 5772 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; 5773 } else { 5774 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5775 } 5776 5777 for (k = 0; k < NumberOfActivePlanes; ++k) { 5778 TotalPixelBW = TotalPixelBW 5779 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) 5780 / (HTotal[k] / PixelClock[k]); 5781 } 5782 5783 for (k = 0; k < NumberOfActivePlanes; ++k) { 5784 double EffectiveDETBufferSizeY = DETBufferSizeY[k]; 5785 5786 v->LBLatencyHidingSourceLinesY = dml_min( 5787 (double) MaxLineBufferLines, 5788 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1); 5789 5790 v->LBLatencyHidingSourceLinesC = dml_min( 5791 (double) MaxLineBufferLines, 5792 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1); 5793 5794 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]); 5795 5796 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); 5797 5798 if (UnboundedRequestEnabled) { 5799 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 5800 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW; 5801 } 5802 5803 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5804 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5805 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; 5806 if (BytePerPixelDETC[k] > 0) { 5807 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 5808 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5809 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; 5810 } else { 5811 LinesInDETC = 0; 5812 FullDETBufferingTimeC = 999999; 5813 } 5814 5815 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 5816 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; 5817 5818 if (NumberOfActivePlanes > 1) { 5819 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5820 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; 5821 } 5822 5823 if (BytePerPixelDETC[k] > 0) { 5824 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 5825 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; 5826 5827 if (NumberOfActivePlanes > 1) { 5828 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5829 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; 5830 } 5831 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); 5832 } else { 5833 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5834 } 5835 5836 if (WritebackEnable[k] == true) { 5837 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 5838 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); 5839 if (WritebackPixelFormat[k] == dm_444_64) { 5840 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; 5841 } 5842 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; 5843 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); 5844 } 5845 } 5846 5847 v->MinActiveDRAMClockChangeMargin = 999999; 5848 PlaneWithMinActiveDRAMClockChangeMargin = 0; 5849 for (k = 0; k < NumberOfActivePlanes; ++k) { 5850 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { 5851 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; 5852 if (BlendingAndTiming[k] == k) { 5853 PlaneWithMinActiveDRAMClockChangeMargin = k; 5854 } else { 5855 for (j = 0; j < NumberOfActivePlanes; ++j) { 5856 if (BlendingAndTiming[k] == j) { 5857 PlaneWithMinActiveDRAMClockChangeMargin = j; 5858 } 5859 } 5860 } 5861 } 5862 } 5863 5864 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; 5865 5866 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5867 for (k = 0; k < NumberOfActivePlanes; ++k) { 5868 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5869 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5870 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; 5871 } 5872 } 5873 5874 v->TotalNumberOfActiveOTG = 0; 5875 5876 for (k = 0; k < NumberOfActivePlanes; ++k) { 5877 if (BlendingAndTiming[k] == k) { 5878 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; 5879 } 5880 } 5881 5882 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { 5883 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5884 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 5885 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { 5886 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5887 } else { 5888 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5889 } 5890 5891 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; 5892 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); 5893 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5894 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5895 5896 #ifdef __DML_VBA_DEBUG__ 5897 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); 5898 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); 5899 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); 5900 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); 5901 #endif 5902 } 5903 5904 static void CalculateDCFCLKDeepSleep( 5905 struct display_mode_lib *mode_lib, 5906 unsigned int NumberOfActivePlanes, 5907 int BytePerPixelY[], 5908 int BytePerPixelC[], 5909 double VRatio[], 5910 double VRatioChroma[], 5911 double SwathWidthY[], 5912 double SwathWidthC[], 5913 unsigned int DPPPerPlane[], 5914 double HRatio[], 5915 double HRatioChroma[], 5916 double PixelClock[], 5917 double PSCL_THROUGHPUT[], 5918 double PSCL_THROUGHPUT_CHROMA[], 5919 double DPPCLK[], 5920 double ReadBandwidthLuma[], 5921 double ReadBandwidthChroma[], 5922 int ReturnBusWidth, 5923 double *DCFCLKDeepSleep) 5924 { 5925 struct vba_vars_st *v = &mode_lib->vba; 5926 double DisplayPipeLineDeliveryTimeLuma; 5927 double DisplayPipeLineDeliveryTimeChroma; 5928 double ReadBandwidth = 0.0; 5929 int k; 5930 5931 for (k = 0; k < NumberOfActivePlanes; ++k) { 5932 5933 if (VRatio[k] <= 1) { 5934 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5935 } else { 5936 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5937 } 5938 if (BytePerPixelC[k] == 0) { 5939 DisplayPipeLineDeliveryTimeChroma = 0; 5940 } else { 5941 if (VRatioChroma[k] <= 1) { 5942 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5943 } else { 5944 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5945 } 5946 } 5947 5948 if (BytePerPixelC[k] > 0) { 5949 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 5950 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 5951 } else { 5952 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 5953 } 5954 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); 5955 5956 } 5957 5958 for (k = 0; k < NumberOfActivePlanes; ++k) { 5959 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 5960 } 5961 5962 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); 5963 5964 for (k = 0; k < NumberOfActivePlanes; ++k) { 5965 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); 5966 } 5967 } 5968 5969 static void CalculateUrgentBurstFactor( 5970 int swath_width_luma_ub, 5971 int swath_width_chroma_ub, 5972 unsigned int SwathHeightY, 5973 unsigned int SwathHeightC, 5974 double LineTime, 5975 double UrgentLatency, 5976 double CursorBufferSize, 5977 unsigned int CursorWidth, 5978 unsigned int CursorBPP, 5979 double VRatio, 5980 double VRatioC, 5981 double BytePerPixelInDETY, 5982 double BytePerPixelInDETC, 5983 double DETBufferSizeY, 5984 double DETBufferSizeC, 5985 double *UrgentBurstFactorCursor, 5986 double *UrgentBurstFactorLuma, 5987 double *UrgentBurstFactorChroma, 5988 bool *NotEnoughUrgentLatencyHiding) 5989 { 5990 double LinesInDETLuma; 5991 double LinesInDETChroma; 5992 unsigned int LinesInCursorBuffer; 5993 double CursorBufferSizeInTime; 5994 double DETBufferSizeInTimeLuma; 5995 double DETBufferSizeInTimeChroma; 5996 5997 *NotEnoughUrgentLatencyHiding = 0; 5998 5999 if (CursorWidth > 0) { 6000 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 6001 if (VRatio > 0) { 6002 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 6003 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 6004 *NotEnoughUrgentLatencyHiding = 1; 6005 *UrgentBurstFactorCursor = 0; 6006 } else { 6007 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 6008 } 6009 } else { 6010 *UrgentBurstFactorCursor = 1; 6011 } 6012 } 6013 6014 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; 6015 if (VRatio > 0) { 6016 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 6017 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 6018 *NotEnoughUrgentLatencyHiding = 1; 6019 *UrgentBurstFactorLuma = 0; 6020 } else { 6021 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 6022 } 6023 } else { 6024 *UrgentBurstFactorLuma = 1; 6025 } 6026 6027 if (BytePerPixelInDETC > 0) { 6028 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; 6029 if (VRatio > 0) { 6030 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 6031 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 6032 *NotEnoughUrgentLatencyHiding = 1; 6033 *UrgentBurstFactorChroma = 0; 6034 } else { 6035 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 6036 } 6037 } else { 6038 *UrgentBurstFactorChroma = 1; 6039 } 6040 } 6041 } 6042 6043 static void CalculatePixelDeliveryTimes( 6044 unsigned int NumberOfActivePlanes, 6045 double VRatio[], 6046 double VRatioChroma[], 6047 double VRatioPrefetchY[], 6048 double VRatioPrefetchC[], 6049 unsigned int swath_width_luma_ub[], 6050 unsigned int swath_width_chroma_ub[], 6051 unsigned int DPPPerPlane[], 6052 double HRatio[], 6053 double HRatioChroma[], 6054 double PixelClock[], 6055 double PSCL_THROUGHPUT[], 6056 double PSCL_THROUGHPUT_CHROMA[], 6057 double DPPCLK[], 6058 int BytePerPixelC[], 6059 enum scan_direction_class SourceScan[], 6060 unsigned int NumberOfCursors[], 6061 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 6062 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 6063 unsigned int BlockWidth256BytesY[], 6064 unsigned int BlockHeight256BytesY[], 6065 unsigned int BlockWidth256BytesC[], 6066 unsigned int BlockHeight256BytesC[], 6067 double DisplayPipeLineDeliveryTimeLuma[], 6068 double DisplayPipeLineDeliveryTimeChroma[], 6069 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 6070 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 6071 double DisplayPipeRequestDeliveryTimeLuma[], 6072 double DisplayPipeRequestDeliveryTimeChroma[], 6073 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 6074 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 6075 double CursorRequestDeliveryTime[], 6076 double CursorRequestDeliveryTimePrefetch[]) 6077 { 6078 double req_per_swath_ub; 6079 int k; 6080 6081 for (k = 0; k < NumberOfActivePlanes; ++k) { 6082 if (VRatio[k] <= 1) { 6083 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6084 } else { 6085 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6086 } 6087 6088 if (BytePerPixelC[k] == 0) { 6089 DisplayPipeLineDeliveryTimeChroma[k] = 0; 6090 } else { 6091 if (VRatioChroma[k] <= 1) { 6092 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6093 } else { 6094 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6095 } 6096 } 6097 6098 if (VRatioPrefetchY[k] <= 1) { 6099 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6100 } else { 6101 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6102 } 6103 6104 if (BytePerPixelC[k] == 0) { 6105 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 6106 } else { 6107 if (VRatioPrefetchC[k] <= 1) { 6108 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6109 } else { 6110 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6111 } 6112 } 6113 } 6114 6115 for (k = 0; k < NumberOfActivePlanes; ++k) { 6116 if (SourceScan[k] != dm_vert) { 6117 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 6118 } else { 6119 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 6120 } 6121 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 6122 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 6123 if (BytePerPixelC[k] == 0) { 6124 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 6125 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 6126 } else { 6127 if (SourceScan[k] != dm_vert) { 6128 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 6129 } else { 6130 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 6131 } 6132 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 6133 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 6134 } 6135 #ifdef __DML_VBA_DEBUG__ 6136 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 6137 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 6138 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 6139 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 6140 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 6141 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 6142 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 6143 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 6144 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 6145 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 6146 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 6147 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 6148 #endif 6149 } 6150 6151 for (k = 0; k < NumberOfActivePlanes; ++k) { 6152 int cursor_req_per_width; 6153 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); 6154 if (NumberOfCursors[k] > 0) { 6155 if (VRatio[k] <= 1) { 6156 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6157 } else { 6158 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6159 } 6160 if (VRatioPrefetchY[k] <= 1) { 6161 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6162 } else { 6163 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6164 } 6165 } else { 6166 CursorRequestDeliveryTime[k] = 0; 6167 CursorRequestDeliveryTimePrefetch[k] = 0; 6168 } 6169 #ifdef __DML_VBA_DEBUG__ 6170 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); 6171 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); 6172 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); 6173 #endif 6174 } 6175 } 6176 6177 static void CalculateMetaAndPTETimes( 6178 int NumberOfActivePlanes, 6179 bool GPUVMEnable, 6180 int MetaChunkSize, 6181 int MinMetaChunkSizeBytes, 6182 int HTotal[], 6183 double VRatio[], 6184 double VRatioChroma[], 6185 double DestinationLinesToRequestRowInVBlank[], 6186 double DestinationLinesToRequestRowInImmediateFlip[], 6187 bool DCCEnable[], 6188 double PixelClock[], 6189 int BytePerPixelY[], 6190 int BytePerPixelC[], 6191 enum scan_direction_class SourceScan[], 6192 int dpte_row_height[], 6193 int dpte_row_height_chroma[], 6194 int meta_row_width[], 6195 int meta_row_width_chroma[], 6196 int meta_row_height[], 6197 int meta_row_height_chroma[], 6198 int meta_req_width[], 6199 int meta_req_width_chroma[], 6200 int meta_req_height[], 6201 int meta_req_height_chroma[], 6202 int dpte_group_bytes[], 6203 int PTERequestSizeY[], 6204 int PTERequestSizeC[], 6205 int PixelPTEReqWidthY[], 6206 int PixelPTEReqHeightY[], 6207 int PixelPTEReqWidthC[], 6208 int PixelPTEReqHeightC[], 6209 int dpte_row_width_luma_ub[], 6210 int dpte_row_width_chroma_ub[], 6211 double DST_Y_PER_PTE_ROW_NOM_L[], 6212 double DST_Y_PER_PTE_ROW_NOM_C[], 6213 double DST_Y_PER_META_ROW_NOM_L[], 6214 double DST_Y_PER_META_ROW_NOM_C[], 6215 double TimePerMetaChunkNominal[], 6216 double TimePerChromaMetaChunkNominal[], 6217 double TimePerMetaChunkVBlank[], 6218 double TimePerChromaMetaChunkVBlank[], 6219 double TimePerMetaChunkFlip[], 6220 double TimePerChromaMetaChunkFlip[], 6221 double time_per_pte_group_nom_luma[], 6222 double time_per_pte_group_vblank_luma[], 6223 double time_per_pte_group_flip_luma[], 6224 double time_per_pte_group_nom_chroma[], 6225 double time_per_pte_group_vblank_chroma[], 6226 double time_per_pte_group_flip_chroma[]) 6227 { 6228 unsigned int meta_chunk_width; 6229 unsigned int min_meta_chunk_width; 6230 unsigned int meta_chunk_per_row_int; 6231 unsigned int meta_row_remainder; 6232 unsigned int meta_chunk_threshold; 6233 unsigned int meta_chunks_per_row_ub; 6234 unsigned int meta_chunk_width_chroma; 6235 unsigned int min_meta_chunk_width_chroma; 6236 unsigned int meta_chunk_per_row_int_chroma; 6237 unsigned int meta_row_remainder_chroma; 6238 unsigned int meta_chunk_threshold_chroma; 6239 unsigned int meta_chunks_per_row_ub_chroma; 6240 unsigned int dpte_group_width_luma; 6241 unsigned int dpte_groups_per_row_luma_ub; 6242 unsigned int dpte_group_width_chroma; 6243 unsigned int dpte_groups_per_row_chroma_ub; 6244 int k; 6245 6246 for (k = 0; k < NumberOfActivePlanes; ++k) { 6247 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 6248 if (BytePerPixelC[k] == 0) { 6249 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 6250 } else { 6251 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 6252 } 6253 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 6254 if (BytePerPixelC[k] == 0) { 6255 DST_Y_PER_META_ROW_NOM_C[k] = 0; 6256 } else { 6257 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 6258 } 6259 } 6260 6261 for (k = 0; k < NumberOfActivePlanes; ++k) { 6262 if (DCCEnable[k] == true) { 6263 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 6264 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 6265 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 6266 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 6267 if (SourceScan[k] != dm_vert) { 6268 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 6269 } else { 6270 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 6271 } 6272 if (meta_row_remainder <= meta_chunk_threshold) { 6273 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 6274 } else { 6275 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 6276 } 6277 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6278 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6279 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6280 if (BytePerPixelC[k] == 0) { 6281 TimePerChromaMetaChunkNominal[k] = 0; 6282 TimePerChromaMetaChunkVBlank[k] = 0; 6283 TimePerChromaMetaChunkFlip[k] = 0; 6284 } else { 6285 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6286 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6287 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; 6288 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 6289 if (SourceScan[k] != dm_vert) { 6290 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; 6291 } else { 6292 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; 6293 } 6294 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 6295 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 6296 } else { 6297 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 6298 } 6299 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6300 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6301 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6302 } 6303 } else { 6304 TimePerMetaChunkNominal[k] = 0; 6305 TimePerMetaChunkVBlank[k] = 0; 6306 TimePerMetaChunkFlip[k] = 0; 6307 TimePerChromaMetaChunkNominal[k] = 0; 6308 TimePerChromaMetaChunkVBlank[k] = 0; 6309 TimePerChromaMetaChunkFlip[k] = 0; 6310 } 6311 } 6312 6313 for (k = 0; k < NumberOfActivePlanes; ++k) { 6314 if (GPUVMEnable == true) { 6315 if (SourceScan[k] != dm_vert) { 6316 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 6317 } else { 6318 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 6319 } 6320 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); 6321 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6322 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6323 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6324 if (BytePerPixelC[k] == 0) { 6325 time_per_pte_group_nom_chroma[k] = 0; 6326 time_per_pte_group_vblank_chroma[k] = 0; 6327 time_per_pte_group_flip_chroma[k] = 0; 6328 } else { 6329 if (SourceScan[k] != dm_vert) { 6330 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 6331 } else { 6332 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 6333 } 6334 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); 6335 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6336 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6337 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6338 } 6339 } else { 6340 time_per_pte_group_nom_luma[k] = 0; 6341 time_per_pte_group_vblank_luma[k] = 0; 6342 time_per_pte_group_flip_luma[k] = 0; 6343 time_per_pte_group_nom_chroma[k] = 0; 6344 time_per_pte_group_vblank_chroma[k] = 0; 6345 time_per_pte_group_flip_chroma[k] = 0; 6346 } 6347 } 6348 } 6349 6350 static void CalculateVMGroupAndRequestTimes( 6351 unsigned int NumberOfActivePlanes, 6352 bool GPUVMEnable, 6353 unsigned int GPUVMMaxPageTableLevels, 6354 unsigned int HTotal[], 6355 int BytePerPixelC[], 6356 double DestinationLinesToRequestVMInVBlank[], 6357 double DestinationLinesToRequestVMInImmediateFlip[], 6358 bool DCCEnable[], 6359 double PixelClock[], 6360 int dpte_row_width_luma_ub[], 6361 int dpte_row_width_chroma_ub[], 6362 int vm_group_bytes[], 6363 unsigned int dpde0_bytes_per_frame_ub_l[], 6364 unsigned int dpde0_bytes_per_frame_ub_c[], 6365 int meta_pte_bytes_per_frame_ub_l[], 6366 int meta_pte_bytes_per_frame_ub_c[], 6367 double TimePerVMGroupVBlank[], 6368 double TimePerVMGroupFlip[], 6369 double TimePerVMRequestVBlank[], 6370 double TimePerVMRequestFlip[]) 6371 { 6372 int num_group_per_lower_vm_stage; 6373 int num_req_per_lower_vm_stage; 6374 int k; 6375 6376 for (k = 0; k < NumberOfActivePlanes; ++k) { 6377 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 6378 if (DCCEnable[k] == false) { 6379 if (BytePerPixelC[k] > 0) { 6380 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6381 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6382 } else { 6383 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6384 } 6385 } else { 6386 if (GPUVMMaxPageTableLevels == 1) { 6387 if (BytePerPixelC[k] > 0) { 6388 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6389 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6390 } else { 6391 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6392 } 6393 } else { 6394 if (BytePerPixelC[k] > 0) { 6395 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6396 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6397 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6398 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6399 } else { 6400 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6401 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6402 } 6403 } 6404 } 6405 6406 if (DCCEnable[k] == false) { 6407 if (BytePerPixelC[k] > 0) { 6408 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6409 } else { 6410 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 6411 } 6412 } else { 6413 if (GPUVMMaxPageTableLevels == 1) { 6414 if (BytePerPixelC[k] > 0) { 6415 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6416 } else { 6417 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6418 } 6419 } else { 6420 if (BytePerPixelC[k] > 0) { 6421 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 6422 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6423 } else { 6424 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 6425 } 6426 } 6427 } 6428 6429 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6430 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6431 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6432 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6433 6434 if (GPUVMMaxPageTableLevels > 2) { 6435 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6436 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6437 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6438 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6439 } 6440 6441 } else { 6442 TimePerVMGroupVBlank[k] = 0; 6443 TimePerVMGroupFlip[k] = 0; 6444 TimePerVMRequestVBlank[k] = 0; 6445 TimePerVMRequestFlip[k] = 0; 6446 } 6447 } 6448 } 6449 6450 static void CalculateStutterEfficiency( 6451 struct display_mode_lib *mode_lib, 6452 int CompressedBufferSizeInkByte, 6453 bool UnboundedRequestEnabled, 6454 int ConfigReturnBufferSizeInKByte, 6455 int MetaFIFOSizeInKEntries, 6456 int ZeroSizeBufferEntries, 6457 int NumberOfActivePlanes, 6458 int ROBBufferSizeInKByte, 6459 double TotalDataReadBandwidth, 6460 double DCFCLK, 6461 double ReturnBW, 6462 double COMPBUF_RESERVED_SPACE_64B, 6463 double COMPBUF_RESERVED_SPACE_ZS, 6464 double SRExitTime, 6465 double SRExitZ8Time, 6466 bool SynchronizedVBlank, 6467 double Z8StutterEnterPlusExitWatermark, 6468 double StutterEnterPlusExitWatermark, 6469 bool ProgressiveToInterlaceUnitInOPP, 6470 bool Interlace[], 6471 double MinTTUVBlank[], 6472 int DPPPerPlane[], 6473 unsigned int DETBufferSizeY[], 6474 int BytePerPixelY[], 6475 double BytePerPixelDETY[], 6476 double SwathWidthY[], 6477 int SwathHeightY[], 6478 int SwathHeightC[], 6479 double NetDCCRateLuma[], 6480 double NetDCCRateChroma[], 6481 double DCCFractionOfZeroSizeRequestsLuma[], 6482 double DCCFractionOfZeroSizeRequestsChroma[], 6483 int HTotal[], 6484 int VTotal[], 6485 double PixelClock[], 6486 double VRatio[], 6487 enum scan_direction_class SourceScan[], 6488 int BlockHeight256BytesY[], 6489 int BlockWidth256BytesY[], 6490 int BlockHeight256BytesC[], 6491 int BlockWidth256BytesC[], 6492 int DCCYMaxUncompressedBlock[], 6493 int DCCCMaxUncompressedBlock[], 6494 int VActive[], 6495 bool DCCEnable[], 6496 bool WritebackEnable[], 6497 double ReadBandwidthPlaneLuma[], 6498 double ReadBandwidthPlaneChroma[], 6499 double meta_row_bw[], 6500 double dpte_row_bw[], 6501 double *StutterEfficiencyNotIncludingVBlank, 6502 double *StutterEfficiency, 6503 int *NumberOfStutterBurstsPerFrame, 6504 double *Z8StutterEfficiencyNotIncludingVBlank, 6505 double *Z8StutterEfficiency, 6506 int *Z8NumberOfStutterBurstsPerFrame, 6507 double *StutterPeriod) 6508 { 6509 struct vba_vars_st *v = &mode_lib->vba; 6510 6511 double DETBufferingTimeY; 6512 double SwathWidthYCriticalPlane = 0; 6513 double VActiveTimeCriticalPlane = 0; 6514 double FrameTimeCriticalPlane = 0; 6515 int BytePerPixelYCriticalPlane = 0; 6516 double LinesToFinishSwathTransferStutterCriticalPlane = 0; 6517 double MinTTUVBlankCriticalPlane = 0; 6518 double TotalCompressedReadBandwidth; 6519 double TotalRowReadBandwidth; 6520 double AverageDCCCompressionRate; 6521 double EffectiveCompressedBufferSize; 6522 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 6523 double StutterBurstTime; 6524 int TotalActiveWriteback; 6525 double LinesInDETY; 6526 double LinesInDETYRoundedDownToSwath; 6527 double MaximumEffectiveCompressionLuma; 6528 double MaximumEffectiveCompressionChroma; 6529 double TotalZeroSizeRequestReadBandwidth; 6530 double TotalZeroSizeCompressedReadBandwidth; 6531 double AverageDCCZeroSizeFraction; 6532 double AverageZeroSizeCompressionRate; 6533 int TotalNumberOfActiveOTG = 0; 6534 double LastStutterPeriod = 0.0; 6535 double LastZ8StutterPeriod = 0.0; 6536 int k; 6537 6538 TotalZeroSizeRequestReadBandwidth = 0; 6539 TotalZeroSizeCompressedReadBandwidth = 0; 6540 TotalRowReadBandwidth = 0; 6541 TotalCompressedReadBandwidth = 0; 6542 6543 for (k = 0; k < NumberOfActivePlanes; ++k) { 6544 if (DCCEnable[k] == true) { 6545 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) 6546 || DCCYMaxUncompressedBlock[k] < 256) { 6547 MaximumEffectiveCompressionLuma = 2; 6548 } else { 6549 MaximumEffectiveCompressionLuma = 4; 6550 } 6551 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); 6552 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 6553 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6554 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; 6555 if (ReadBandwidthPlaneChroma[k] > 0) { 6556 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) 6557 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { 6558 MaximumEffectiveCompressionChroma = 2; 6559 } else { 6560 MaximumEffectiveCompressionChroma = 4; 6561 } 6562 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 6563 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); 6564 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; 6565 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6566 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; 6567 } 6568 } else { 6569 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 6570 } 6571 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); 6572 } 6573 6574 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 6575 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 6576 6577 #ifdef __DML_VBA_DEBUG__ 6578 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 6579 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 6580 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); 6581 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 6582 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 6583 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6584 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 6585 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 6586 #endif 6587 6588 if (AverageDCCZeroSizeFraction == 1) { 6589 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6590 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; 6591 } else if (AverageDCCZeroSizeFraction > 0) { 6592 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6593 EffectiveCompressedBufferSize = dml_min( 6594 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6595 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) 6596 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, 6597 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6598 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6599 dml_print( 6600 "DML::%s: min 2 = %f\n", 6601 __func__, 6602 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); 6603 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); 6604 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6605 } else { 6606 EffectiveCompressedBufferSize = dml_min( 6607 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6608 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; 6609 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6610 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 6611 } 6612 6613 #ifdef __DML_VBA_DEBUG__ 6614 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 6615 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 6616 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6617 #endif 6618 6619 *StutterPeriod = 0; 6620 for (k = 0; k < NumberOfActivePlanes; ++k) { 6621 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) 6622 / BytePerPixelDETY[k] / SwathWidthY[k]; 6623 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 6624 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; 6625 #ifdef __DML_VBA_DEBUG__ 6626 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); 6627 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 6628 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); 6629 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); 6630 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 6631 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); 6632 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); 6633 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); 6634 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6635 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); 6636 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 6637 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6638 #endif 6639 6640 if (k == 0 || DETBufferingTimeY < *StutterPeriod) { 6641 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 6642 6643 *StutterPeriod = DETBufferingTimeY; 6644 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; 6645 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; 6646 BytePerPixelYCriticalPlane = BytePerPixelY[k]; 6647 SwathWidthYCriticalPlane = SwathWidthY[k]; 6648 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); 6649 MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; 6650 6651 #ifdef __DML_VBA_DEBUG__ 6652 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6653 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); 6654 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); 6655 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6656 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); 6657 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); 6658 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); 6659 #endif 6660 } 6661 } 6662 6663 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); 6664 #ifdef __DML_VBA_DEBUG__ 6665 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 6666 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6667 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); 6668 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); 6669 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6670 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 6671 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6672 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 6673 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 6674 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 6675 #endif 6676 6677 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW 6678 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 6679 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 6680 #ifdef __DML_VBA_DEBUG__ 6681 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); 6682 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); 6683 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 6684 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 6685 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6686 #endif 6687 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6688 6689 dml_print( 6690 "DML::%s: Time to finish residue swath=%f\n", 6691 __func__, 6692 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6693 6694 TotalActiveWriteback = 0; 6695 for (k = 0; k < NumberOfActivePlanes; ++k) { 6696 if (WritebackEnable[k]) { 6697 TotalActiveWriteback = TotalActiveWriteback + 1; 6698 } 6699 } 6700 6701 if (TotalActiveWriteback == 0) { 6702 #ifdef __DML_VBA_DEBUG__ 6703 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 6704 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 6705 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 6706 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6707 #endif 6708 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 6709 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 6710 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6711 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6712 } else { 6713 *StutterEfficiencyNotIncludingVBlank = 0.; 6714 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 6715 *NumberOfStutterBurstsPerFrame = 0; 6716 *Z8NumberOfStutterBurstsPerFrame = 0; 6717 } 6718 #ifdef __DML_VBA_DEBUG__ 6719 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6720 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6721 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); 6722 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 6723 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6724 #endif 6725 6726 for (k = 0; k < NumberOfActivePlanes; ++k) { 6727 if (v->BlendingAndTiming[k] == k) { 6728 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6729 } 6730 } 6731 6732 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6733 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6734 6735 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { 6736 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane 6737 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6738 } else { 6739 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6740 } 6741 } else { 6742 *StutterEfficiency = 0; 6743 } 6744 6745 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6746 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6747 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { 6748 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane 6749 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6750 } else { 6751 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6752 } 6753 } else { 6754 *Z8StutterEfficiency = 0.; 6755 } 6756 6757 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6758 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6759 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6760 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6761 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6762 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6763 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6764 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6765 } 6766 6767 static void CalculateSwathAndDETConfiguration( 6768 bool ForceSingleDPP, 6769 int NumberOfActivePlanes, 6770 unsigned int DETBufferSizeInKByte, 6771 double MaximumSwathWidthLuma[], 6772 double MaximumSwathWidthChroma[], 6773 enum scan_direction_class SourceScan[], 6774 enum source_format_class SourcePixelFormat[], 6775 enum dm_swizzle_mode SurfaceTiling[], 6776 int ViewportWidth[], 6777 int ViewportHeight[], 6778 int SurfaceWidthY[], 6779 int SurfaceWidthC[], 6780 int SurfaceHeightY[], 6781 int SurfaceHeightC[], 6782 int Read256BytesBlockHeightY[], 6783 int Read256BytesBlockHeightC[], 6784 int Read256BytesBlockWidthY[], 6785 int Read256BytesBlockWidthC[], 6786 enum odm_combine_mode ODMCombineEnabled[], 6787 int BlendingAndTiming[], 6788 int BytePerPixY[], 6789 int BytePerPixC[], 6790 double BytePerPixDETY[], 6791 double BytePerPixDETC[], 6792 int HActive[], 6793 double HRatio[], 6794 double HRatioChroma[], 6795 int DPPPerPlane[], 6796 int swath_width_luma_ub[], 6797 int swath_width_chroma_ub[], 6798 double SwathWidth[], 6799 double SwathWidthChroma[], 6800 int SwathHeightY[], 6801 int SwathHeightC[], 6802 unsigned int DETBufferSizeY[], 6803 unsigned int DETBufferSizeC[], 6804 bool ViewportSizeSupportPerPlane[], 6805 bool *ViewportSizeSupport) 6806 { 6807 int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 6808 int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 6809 int MinimumSwathHeightY; 6810 int MinimumSwathHeightC; 6811 int RoundedUpMaxSwathSizeBytesY; 6812 int RoundedUpMaxSwathSizeBytesC; 6813 int RoundedUpMinSwathSizeBytesY; 6814 int RoundedUpMinSwathSizeBytesC; 6815 int RoundedUpSwathSizeBytesY; 6816 int RoundedUpSwathSizeBytesC; 6817 double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; 6818 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; 6819 int k; 6820 6821 CalculateSwathWidth( 6822 ForceSingleDPP, 6823 NumberOfActivePlanes, 6824 SourcePixelFormat, 6825 SourceScan, 6826 ViewportWidth, 6827 ViewportHeight, 6828 SurfaceWidthY, 6829 SurfaceWidthC, 6830 SurfaceHeightY, 6831 SurfaceHeightC, 6832 ODMCombineEnabled, 6833 BytePerPixY, 6834 BytePerPixC, 6835 Read256BytesBlockHeightY, 6836 Read256BytesBlockHeightC, 6837 Read256BytesBlockWidthY, 6838 Read256BytesBlockWidthC, 6839 BlendingAndTiming, 6840 HActive, 6841 HRatio, 6842 DPPPerPlane, 6843 SwathWidthSingleDPP, 6844 SwathWidthSingleDPPChroma, 6845 SwathWidth, 6846 SwathWidthChroma, 6847 MaximumSwathHeightY, 6848 MaximumSwathHeightC, 6849 swath_width_luma_ub, 6850 swath_width_chroma_ub); 6851 6852 *ViewportSizeSupport = true; 6853 for (k = 0; k < NumberOfActivePlanes; ++k) { 6854 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 6855 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { 6856 if (SurfaceTiling[k] == dm_sw_linear 6857 || (SourcePixelFormat[k] == dm_444_64 6858 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) 6859 && SourceScan[k] != dm_vert)) { 6860 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6861 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { 6862 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6863 } else { 6864 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6865 } 6866 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6867 } else { 6868 if (SurfaceTiling[k] == dm_sw_linear) { 6869 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6870 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6871 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { 6872 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6873 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6874 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { 6875 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6876 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6877 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { 6878 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6879 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6880 } else { 6881 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6882 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6883 } 6884 } 6885 6886 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 6887 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; 6888 if (SourcePixelFormat[k] == dm_420_10) { 6889 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); 6890 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); 6891 } 6892 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 6893 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; 6894 if (SourcePixelFormat[k] == dm_420_10) { 6895 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); 6896 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); 6897 } 6898 6899 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6900 SwathHeightY[k] = MaximumSwathHeightY[k]; 6901 SwathHeightC[k] = MaximumSwathHeightC[k]; 6902 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6903 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6904 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC 6905 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6906 SwathHeightY[k] = MinimumSwathHeightY; 6907 SwathHeightC[k] = MaximumSwathHeightC[k]; 6908 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6909 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6910 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC 6911 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6912 SwathHeightY[k] = MaximumSwathHeightY[k]; 6913 SwathHeightC[k] = MinimumSwathHeightC; 6914 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6915 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6916 } else { 6917 SwathHeightY[k] = MinimumSwathHeightY; 6918 SwathHeightC[k] = MinimumSwathHeightC; 6919 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6920 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6921 } 6922 { 6923 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 6924 if (SwathHeightC[k] == 0) { 6925 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; 6926 DETBufferSizeC[k] = 0; 6927 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 6928 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; 6929 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; 6930 } else { 6931 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); 6932 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; 6933 } 6934 6935 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] 6936 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 6937 *ViewportSizeSupport = false; 6938 ViewportSizeSupportPerPlane[k] = false; 6939 } else { 6940 ViewportSizeSupportPerPlane[k] = true; 6941 } 6942 } 6943 } 6944 } 6945 6946 static void CalculateSwathWidth( 6947 bool ForceSingleDPP, 6948 int NumberOfActivePlanes, 6949 enum source_format_class SourcePixelFormat[], 6950 enum scan_direction_class SourceScan[], 6951 int ViewportWidth[], 6952 int ViewportHeight[], 6953 int SurfaceWidthY[], 6954 int SurfaceWidthC[], 6955 int SurfaceHeightY[], 6956 int SurfaceHeightC[], 6957 enum odm_combine_mode ODMCombineEnabled[], 6958 int BytePerPixY[], 6959 int BytePerPixC[], 6960 int Read256BytesBlockHeightY[], 6961 int Read256BytesBlockHeightC[], 6962 int Read256BytesBlockWidthY[], 6963 int Read256BytesBlockWidthC[], 6964 int BlendingAndTiming[], 6965 int HActive[], 6966 double HRatio[], 6967 int DPPPerPlane[], 6968 double SwathWidthSingleDPPY[], 6969 double SwathWidthSingleDPPC[], 6970 double SwathWidthY[], 6971 double SwathWidthC[], 6972 int MaximumSwathHeightY[], 6973 int MaximumSwathHeightC[], 6974 int swath_width_luma_ub[], 6975 int swath_width_chroma_ub[]) 6976 { 6977 enum odm_combine_mode MainPlaneODMCombine; 6978 int j, k; 6979 6980 #ifdef __DML_VBA_DEBUG__ 6981 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); 6982 #endif 6983 6984 for (k = 0; k < NumberOfActivePlanes; ++k) { 6985 if (SourceScan[k] != dm_vert) { 6986 SwathWidthSingleDPPY[k] = ViewportWidth[k]; 6987 } else { 6988 SwathWidthSingleDPPY[k] = ViewportHeight[k]; 6989 } 6990 6991 #ifdef __DML_VBA_DEBUG__ 6992 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 6993 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 6994 #endif 6995 6996 MainPlaneODMCombine = ODMCombineEnabled[k]; 6997 for (j = 0; j < NumberOfActivePlanes; ++j) { 6998 if (BlendingAndTiming[k] == j) { 6999 MainPlaneODMCombine = ODMCombineEnabled[j]; 7000 } 7001 } 7002 7003 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) { 7004 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); 7005 } else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) { 7006 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); 7007 } else if (DPPPerPlane[k] == 2) { 7008 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 7009 } else { 7010 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 7011 } 7012 7013 #ifdef __DML_VBA_DEBUG__ 7014 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); 7015 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); 7016 #endif 7017 7018 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { 7019 SwathWidthC[k] = SwathWidthY[k] / 2; 7020 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 7021 } else { 7022 SwathWidthC[k] = SwathWidthY[k]; 7023 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 7024 } 7025 7026 if (ForceSingleDPP == true) { 7027 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 7028 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 7029 } 7030 { 7031 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 7032 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 7033 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 7034 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 7035 7036 #ifdef __DML_VBA_DEBUG__ 7037 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 7038 #endif 7039 7040 if (SourceScan[k] != dm_vert) { 7041 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 7042 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 7043 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); 7044 if (BytePerPixC[k] > 0) { 7045 swath_width_chroma_ub[k] = dml_min( 7046 surface_width_ub_c, 7047 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); 7048 } else { 7049 swath_width_chroma_ub[k] = 0; 7050 } 7051 } else { 7052 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 7053 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 7054 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 7055 if (BytePerPixC[k] > 0) { 7056 swath_width_chroma_ub[k] = dml_min( 7057 surface_height_ub_c, 7058 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); 7059 } else { 7060 swath_width_chroma_ub[k] = 0; 7061 } 7062 } 7063 } 7064 } 7065 } 7066 7067 static double CalculateExtraLatency( 7068 int RoundTripPingLatencyCycles, 7069 int ReorderingBytes, 7070 double DCFCLK, 7071 int TotalNumberOfActiveDPP, 7072 int PixelChunkSizeInKByte, 7073 int TotalNumberOfDCCActiveDPP, 7074 int MetaChunkSize, 7075 double ReturnBW, 7076 bool GPUVMEnable, 7077 bool HostVMEnable, 7078 int NumberOfActivePlanes, 7079 int NumberOfDPP[], 7080 int dpte_group_bytes[], 7081 double HostVMInefficiencyFactor, 7082 double HostVMMinPageSize, 7083 int HostVMMaxNonCachedPageTableLevels) 7084 { 7085 double ExtraLatencyBytes; 7086 double ExtraLatency; 7087 7088 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7089 ReorderingBytes, 7090 TotalNumberOfActiveDPP, 7091 PixelChunkSizeInKByte, 7092 TotalNumberOfDCCActiveDPP, 7093 MetaChunkSize, 7094 GPUVMEnable, 7095 HostVMEnable, 7096 NumberOfActivePlanes, 7097 NumberOfDPP, 7098 dpte_group_bytes, 7099 HostVMInefficiencyFactor, 7100 HostVMMinPageSize, 7101 HostVMMaxNonCachedPageTableLevels); 7102 7103 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 7104 7105 #ifdef __DML_VBA_DEBUG__ 7106 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 7107 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 7108 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 7109 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 7110 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 7111 #endif 7112 7113 return ExtraLatency; 7114 } 7115 7116 static double CalculateExtraLatencyBytes( 7117 int ReorderingBytes, 7118 int TotalNumberOfActiveDPP, 7119 int PixelChunkSizeInKByte, 7120 int TotalNumberOfDCCActiveDPP, 7121 int MetaChunkSize, 7122 bool GPUVMEnable, 7123 bool HostVMEnable, 7124 int NumberOfActivePlanes, 7125 int NumberOfDPP[], 7126 int dpte_group_bytes[], 7127 double HostVMInefficiencyFactor, 7128 double HostVMMinPageSize, 7129 int HostVMMaxNonCachedPageTableLevels) 7130 { 7131 double ret; 7132 int HostVMDynamicLevels = 0, k; 7133 7134 if (GPUVMEnable == true && HostVMEnable == true) { 7135 if (HostVMMinPageSize < 2048) { 7136 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 7137 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 7138 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 7139 } else { 7140 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 7141 } 7142 } else { 7143 HostVMDynamicLevels = 0; 7144 } 7145 7146 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 7147 7148 if (GPUVMEnable == true) { 7149 for (k = 0; k < NumberOfActivePlanes; ++k) { 7150 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 7151 } 7152 } 7153 return ret; 7154 } 7155 7156 static double CalculateUrgentLatency( 7157 double UrgentLatencyPixelDataOnly, 7158 double UrgentLatencyPixelMixedWithVMData, 7159 double UrgentLatencyVMDataOnly, 7160 bool DoUrgentLatencyAdjustment, 7161 double UrgentLatencyAdjustmentFabricClockComponent, 7162 double UrgentLatencyAdjustmentFabricClockReference, 7163 double FabricClock) 7164 { 7165 double ret; 7166 7167 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 7168 if (DoUrgentLatencyAdjustment == true) { 7169 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 7170 } 7171 return ret; 7172 } 7173 7174 static void UseMinimumDCFCLK( 7175 struct display_mode_lib *mode_lib, 7176 int MaxPrefetchMode, 7177 int ReorderingBytes) 7178 { 7179 struct vba_vars_st *v = &mode_lib->vba; 7180 int dummy1, i, j, k; 7181 double NormalEfficiency, dummy2, dummy3; 7182 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 7183 7184 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; 7185 for (i = 0; i < v->soc.num_states; ++i) { 7186 for (j = 0; j <= 1; ++j) { 7187 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 7188 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 7189 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; 7190 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 7191 double MinimumTWait; 7192 double NonDPTEBandwidth; 7193 double DPTEBandwidth; 7194 double DCFCLKRequiredForAverageBandwidth; 7195 double ExtraLatencyBytes; 7196 double ExtraLatencyCycles; 7197 double DCFCLKRequiredForPeakBandwidth; 7198 int NoOfDPPState[DC__NUM_DPP__MAX]; 7199 double MinimumTvmPlus2Tr0; 7200 7201 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 7202 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7203 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 7204 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); 7205 } 7206 7207 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { 7208 NoOfDPPState[k] = v->NoOfDPP[i][j][k]; 7209 } 7210 7211 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); 7212 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; 7213 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? 7214 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; 7215 DCFCLKRequiredForAverageBandwidth = dml_max3( 7216 v->ProjectedDCFCLKDeepSleep[i][j], 7217 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth 7218 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), 7219 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); 7220 7221 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7222 ReorderingBytes, 7223 v->TotalNumberOfActiveDPP[i][j], 7224 v->PixelChunkSizeInKByte, 7225 v->TotalNumberOfDCCActiveDPP[i][j], 7226 v->MetaChunkSize, 7227 v->GPUVMEnable, 7228 v->HostVMEnable, 7229 v->NumberOfActivePlanes, 7230 NoOfDPPState, 7231 v->dpte_group_bytes, 7232 1, 7233 v->HostVMMinPageSize, 7234 v->HostVMMaxNonCachedPageTableLevels); 7235 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; 7236 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7237 double DCFCLKCyclesRequiredInPrefetch; 7238 double ExpectedPrefetchBWAcceleration; 7239 double PrefetchTime; 7240 7241 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] 7242 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; 7243 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 7244 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) 7245 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth 7246 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; 7247 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; 7248 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) 7249 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); 7250 DynamicMetadataVMExtraLatency[k] = 7251 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? 7252 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 7253 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait 7254 - v->UrgLatency[i] 7255 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) 7256 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) 7257 - DynamicMetadataVMExtraLatency[k]; 7258 7259 if (PrefetchTime > 0) { 7260 double ExpectedVRatioPrefetch; 7261 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] 7262 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); 7263 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] 7264 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; 7265 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { 7266 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] 7267 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; 7268 } 7269 } else { 7270 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7271 } 7272 if (v->DynamicMetadataEnable[k] == true) { 7273 double TSetupPipe; 7274 double TdmbfPipe; 7275 double TdmsksPipe; 7276 double TdmecPipe; 7277 double AllowedTimeForUrgentExtraLatency; 7278 7279 CalculateVupdateAndDynamicMetadataParameters( 7280 v->MaxInterDCNTileRepeaters, 7281 v->RequiredDPPCLK[i][j][k], 7282 v->RequiredDISPCLK[i][j], 7283 v->ProjectedDCFCLKDeepSleep[i][j], 7284 v->PixelClock[k], 7285 v->HTotal[k], 7286 v->VTotal[k] - v->VActive[k], 7287 v->DynamicMetadataTransmittedBytes[k], 7288 v->DynamicMetadataLinesBeforeActiveRequired[k], 7289 v->Interlace[k], 7290 v->ProgressiveToInterlaceUnitInOPP, 7291 &TSetupPipe, 7292 &TdmbfPipe, 7293 &TdmecPipe, 7294 &TdmsksPipe, 7295 &dummy1, 7296 &dummy2, 7297 &dummy3); 7298 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe 7299 - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 7300 if (AllowedTimeForUrgentExtraLatency > 0) { 7301 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( 7302 DCFCLKRequiredForPeakBandwidthPerPlane[k], 7303 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 7304 } else { 7305 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7306 } 7307 } 7308 } 7309 DCFCLKRequiredForPeakBandwidth = 0; 7310 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) { 7311 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; 7312 } 7313 MinimumTvmPlus2Tr0 = v->UrgLatency[i] 7314 * (v->GPUVMEnable == true ? 7315 (v->HostVMEnable == true ? 7316 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 7317 0); 7318 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7319 double MaximumTvmPlus2Tr0PlusTsw; 7320 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 7321 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 7322 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; 7323 } else { 7324 DCFCLKRequiredForPeakBandwidth = dml_max3( 7325 DCFCLKRequiredForPeakBandwidth, 7326 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), 7327 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 7328 } 7329 } 7330 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 7331 } 7332 } 7333 } 7334 7335 static void CalculateUnboundedRequestAndCompressedBufferSize( 7336 unsigned int DETBufferSizeInKByte, 7337 int ConfigReturnBufferSizeInKByte, 7338 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 7339 int TotalActiveDPP, 7340 bool NoChromaPlanes, 7341 int MaxNumDPP, 7342 int CompressedBufferSegmentSizeInkByteFinal, 7343 enum output_encoder_class *Output, 7344 bool *UnboundedRequestEnabled, 7345 int *CompressedBufferSizeInkByte) 7346 { 7347 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7348 7349 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); 7350 *CompressedBufferSizeInkByte = ( 7351 *UnboundedRequestEnabled == true ? 7352 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : 7353 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); 7354 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 7355 7356 #ifdef __DML_VBA_DEBUG__ 7357 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 7358 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); 7359 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 7360 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 7361 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); 7362 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 7363 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 7364 #endif 7365 } 7366 7367 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) 7368 { 7369 bool ret_val = false; 7370 7371 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); 7372 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) { 7373 ret_val = false; 7374 } 7375 return (ret_val); 7376 } 7377 7378