1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: AMD 24 * 25 */ 26 27 #define UNIT_TEST 0 28 #if !UNIT_TEST 29 #include "dc.h" 30 #include "dc_link.h" 31 #endif 32 #include "../display_mode_lib.h" 33 #include "display_mode_vba_314.h" 34 #include "../dml_inline_defs.h" 35 36 /* 37 * NOTE: 38 * This file is gcc-parsable HW gospel, coming straight from HW engineers. 39 * 40 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 41 * ways. Unless there is something clearly wrong with it the code should 42 * remain as-is as it provides us with a guarantee from HW that it is correct. 43 */ 44 45 #define BPP_INVALID 0 46 #define BPP_BLENDED_PIPE 0xffffffff 47 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184 48 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096 49 50 // For DML-C changes that hasn't been propagated to VBA yet 51 //#define __DML_VBA_ALLOW_DELTA__ 52 53 // Move these to ip parameters/constant 54 55 // At which vstartup the DML start to try if the mode can be supported 56 #define __DML_VBA_MIN_VSTARTUP__ 9 57 58 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) 59 #define __DML_ARB_TO_RET_DELAY__ (7 + 95) 60 61 // fudge factor for min dcfclk calclation 62 #define __DML_MIN_DCFCLK_FACTOR__ 1.15 63 64 struct { 65 double DPPCLK; 66 double DISPCLK; 67 double PixelClock; 68 double DCFCLKDeepSleep; 69 unsigned int DPPPerPlane; 70 bool ScalerEnabled; 71 double VRatio; 72 double VRatioChroma; 73 enum scan_direction_class SourceScan; 74 unsigned int BlockWidth256BytesY; 75 unsigned int BlockHeight256BytesY; 76 unsigned int BlockWidth256BytesC; 77 unsigned int BlockHeight256BytesC; 78 unsigned int InterlaceEnable; 79 unsigned int NumberOfCursors; 80 unsigned int VBlank; 81 unsigned int HTotal; 82 unsigned int DCCEnable; 83 bool ODMCombineIsEnabled; 84 enum source_format_class SourcePixelFormat; 85 int BytePerPixelY; 86 int BytePerPixelC; 87 bool ProgressiveToInterlaceUnitInOPP; 88 } Pipe; 89 90 #define BPP_INVALID 0 91 #define BPP_BLENDED_PIPE 0xffffffff 92 93 static bool CalculateBytePerPixelAnd256BBlockSizes( 94 enum source_format_class SourcePixelFormat, 95 enum dm_swizzle_mode SurfaceTiling, 96 unsigned int *BytePerPixelY, 97 unsigned int *BytePerPixelC, 98 double *BytePerPixelDETY, 99 double *BytePerPixelDETC, 100 unsigned int *BlockHeight256BytesY, 101 unsigned int *BlockHeight256BytesC, 102 unsigned int *BlockWidth256BytesY, 103 unsigned int *BlockWidth256BytesC); 104 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 105 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); 106 static unsigned int dscceComputeDelay( 107 unsigned int bpc, 108 double BPP, 109 unsigned int sliceWidth, 110 unsigned int numSlices, 111 enum output_format_class pixelFormat, 112 enum output_encoder_class Output); 113 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); 114 static bool CalculatePrefetchSchedule( 115 struct display_mode_lib *mode_lib, 116 double HostVMInefficiencyFactor, 117 Pipe *myPipe, 118 unsigned int DSCDelay, 119 double DPPCLKDelaySubtotalPlusCNVCFormater, 120 double DPPCLKDelaySCL, 121 double DPPCLKDelaySCLLBOnly, 122 double DPPCLKDelayCNVCCursor, 123 double DISPCLKDelaySubtotal, 124 unsigned int DPP_RECOUT_WIDTH, 125 enum output_format_class OutputFormat, 126 unsigned int MaxInterDCNTileRepeaters, 127 unsigned int VStartup, 128 unsigned int MaxVStartup, 129 unsigned int GPUVMPageTableLevels, 130 bool GPUVMEnable, 131 bool HostVMEnable, 132 unsigned int HostVMMaxNonCachedPageTableLevels, 133 double HostVMMinPageSize, 134 bool DynamicMetadataEnable, 135 bool DynamicMetadataVMEnabled, 136 int DynamicMetadataLinesBeforeActiveRequired, 137 unsigned int DynamicMetadataTransmittedBytes, 138 double UrgentLatency, 139 double UrgentExtraLatency, 140 double TCalc, 141 unsigned int PDEAndMetaPTEBytesFrame, 142 unsigned int MetaRowByte, 143 unsigned int PixelPTEBytesPerRow, 144 double PrefetchSourceLinesY, 145 unsigned int SwathWidthY, 146 double VInitPreFillY, 147 unsigned int MaxNumSwathY, 148 double PrefetchSourceLinesC, 149 unsigned int SwathWidthC, 150 double VInitPreFillC, 151 unsigned int MaxNumSwathC, 152 int swath_width_luma_ub, 153 int swath_width_chroma_ub, 154 unsigned int SwathHeightY, 155 unsigned int SwathHeightC, 156 double TWait, 157 double *DSTXAfterScaler, 158 double *DSTYAfterScaler, 159 double *DestinationLinesForPrefetch, 160 double *PrefetchBandwidth, 161 double *DestinationLinesToRequestVMInVBlank, 162 double *DestinationLinesToRequestRowInVBlank, 163 double *VRatioPrefetchY, 164 double *VRatioPrefetchC, 165 double *RequiredPrefetchPixDataBWLuma, 166 double *RequiredPrefetchPixDataBWChroma, 167 bool *NotEnoughTimeForDynamicMetadata, 168 double *Tno_bw, 169 double *prefetch_vmrow_bw, 170 double *Tdmdl_vm, 171 double *Tdmdl, 172 double *TSetup, 173 int *VUpdateOffsetPix, 174 double *VUpdateWidthPix, 175 double *VReadyOffsetPix); 176 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 177 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 178 static void CalculateDCCConfiguration( 179 bool DCCEnabled, 180 bool DCCProgrammingAssumesScanDirectionUnknown, 181 enum source_format_class SourcePixelFormat, 182 unsigned int SurfaceWidthLuma, 183 unsigned int SurfaceWidthChroma, 184 unsigned int SurfaceHeightLuma, 185 unsigned int SurfaceHeightChroma, 186 double DETBufferSize, 187 unsigned int RequestHeight256ByteLuma, 188 unsigned int RequestHeight256ByteChroma, 189 enum dm_swizzle_mode TilingFormat, 190 unsigned int BytePerPixelY, 191 unsigned int BytePerPixelC, 192 double BytePerPixelDETY, 193 double BytePerPixelDETC, 194 enum scan_direction_class ScanOrientation, 195 unsigned int *MaxUncompressedBlockLuma, 196 unsigned int *MaxUncompressedBlockChroma, 197 unsigned int *MaxCompressedBlockLuma, 198 unsigned int *MaxCompressedBlockChroma, 199 unsigned int *IndependentBlockLuma, 200 unsigned int *IndependentBlockChroma); 201 static double CalculatePrefetchSourceLines( 202 struct display_mode_lib *mode_lib, 203 double VRatio, 204 double vtaps, 205 bool Interlace, 206 bool ProgressiveToInterlaceUnitInOPP, 207 unsigned int SwathHeight, 208 unsigned int ViewportYStart, 209 double *VInitPreFill, 210 unsigned int *MaxNumSwath); 211 static unsigned int CalculateVMAndRowBytes( 212 struct display_mode_lib *mode_lib, 213 bool DCCEnable, 214 unsigned int BlockHeight256Bytes, 215 unsigned int BlockWidth256Bytes, 216 enum source_format_class SourcePixelFormat, 217 unsigned int SurfaceTiling, 218 unsigned int BytePerPixel, 219 enum scan_direction_class ScanDirection, 220 unsigned int SwathWidth, 221 unsigned int ViewportHeight, 222 bool GPUVMEnable, 223 bool HostVMEnable, 224 unsigned int HostVMMaxNonCachedPageTableLevels, 225 unsigned int GPUVMMinPageSize, 226 unsigned int HostVMMinPageSize, 227 unsigned int PTEBufferSizeInRequests, 228 unsigned int Pitch, 229 unsigned int DCCMetaPitch, 230 unsigned int *MacroTileWidth, 231 unsigned int *MetaRowByte, 232 unsigned int *PixelPTEBytesPerRow, 233 bool *PTEBufferSizeNotExceeded, 234 int *dpte_row_width_ub, 235 unsigned int *dpte_row_height, 236 unsigned int *MetaRequestWidth, 237 unsigned int *MetaRequestHeight, 238 unsigned int *meta_row_width, 239 unsigned int *meta_row_height, 240 int *vm_group_bytes, 241 unsigned int *dpte_group_bytes, 242 unsigned int *PixelPTEReqWidth, 243 unsigned int *PixelPTEReqHeight, 244 unsigned int *PTERequestSize, 245 int *DPDE0BytesFrame, 246 int *MetaPTEBytesFrame); 247 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); 248 static void CalculateRowBandwidth( 249 bool GPUVMEnable, 250 enum source_format_class SourcePixelFormat, 251 double VRatio, 252 double VRatioChroma, 253 bool DCCEnable, 254 double LineTime, 255 unsigned int MetaRowByteLuma, 256 unsigned int MetaRowByteChroma, 257 unsigned int meta_row_height_luma, 258 unsigned int meta_row_height_chroma, 259 unsigned int PixelPTEBytesPerRowLuma, 260 unsigned int PixelPTEBytesPerRowChroma, 261 unsigned int dpte_row_height_luma, 262 unsigned int dpte_row_height_chroma, 263 double *meta_row_bw, 264 double *dpte_row_bw); 265 266 static void CalculateFlipSchedule( 267 struct display_mode_lib *mode_lib, 268 double HostVMInefficiencyFactor, 269 double UrgentExtraLatency, 270 double UrgentLatency, 271 unsigned int GPUVMMaxPageTableLevels, 272 bool HostVMEnable, 273 unsigned int HostVMMaxNonCachedPageTableLevels, 274 bool GPUVMEnable, 275 double HostVMMinPageSize, 276 double PDEAndMetaPTEBytesPerFrame, 277 double MetaRowBytes, 278 double DPTEBytesPerRow, 279 double BandwidthAvailableForImmediateFlip, 280 unsigned int TotImmediateFlipBytes, 281 enum source_format_class SourcePixelFormat, 282 double LineTime, 283 double VRatio, 284 double VRatioChroma, 285 double Tno_bw, 286 bool DCCEnable, 287 unsigned int dpte_row_height, 288 unsigned int meta_row_height, 289 unsigned int dpte_row_height_chroma, 290 unsigned int meta_row_height_chroma, 291 double *DestinationLinesToRequestVMInImmediateFlip, 292 double *DestinationLinesToRequestRowInImmediateFlip, 293 double *final_flip_bw, 294 bool *ImmediateFlipSupportedForPipe); 295 static double CalculateWriteBackDelay( 296 enum source_format_class WritebackPixelFormat, 297 double WritebackHRatio, 298 double WritebackVRatio, 299 unsigned int WritebackVTaps, 300 int WritebackDestinationWidth, 301 int WritebackDestinationHeight, 302 int WritebackSourceHeight, 303 unsigned int HTotal); 304 305 static void CalculateVupdateAndDynamicMetadataParameters( 306 int MaxInterDCNTileRepeaters, 307 double DPPCLK, 308 double DISPCLK, 309 double DCFClkDeepSleep, 310 double PixelClock, 311 int HTotal, 312 int VBlank, 313 int DynamicMetadataTransmittedBytes, 314 int DynamicMetadataLinesBeforeActiveRequired, 315 int InterlaceEnable, 316 bool ProgressiveToInterlaceUnitInOPP, 317 double *TSetup, 318 double *Tdmbf, 319 double *Tdmec, 320 double *Tdmsks, 321 int *VUpdateOffsetPix, 322 double *VUpdateWidthPix, 323 double *VReadyOffsetPix); 324 325 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 326 struct display_mode_lib *mode_lib, 327 unsigned int PrefetchMode, 328 unsigned int NumberOfActivePlanes, 329 unsigned int MaxLineBufferLines, 330 unsigned int LineBufferSize, 331 unsigned int WritebackInterfaceBufferSize, 332 double DCFCLK, 333 double ReturnBW, 334 bool SynchronizedVBlank, 335 unsigned int dpte_group_bytes[], 336 unsigned int MetaChunkSize, 337 double UrgentLatency, 338 double ExtraLatency, 339 double WritebackLatency, 340 double WritebackChunkSize, 341 double SOCCLK, 342 double DRAMClockChangeLatency, 343 double SRExitTime, 344 double SREnterPlusExitTime, 345 double SRExitZ8Time, 346 double SREnterPlusExitZ8Time, 347 double DCFCLKDeepSleep, 348 unsigned int DETBufferSizeY[], 349 unsigned int DETBufferSizeC[], 350 unsigned int SwathHeightY[], 351 unsigned int SwathHeightC[], 352 unsigned int LBBitPerPixel[], 353 double SwathWidthY[], 354 double SwathWidthC[], 355 double HRatio[], 356 double HRatioChroma[], 357 unsigned int vtaps[], 358 unsigned int VTAPsChroma[], 359 double VRatio[], 360 double VRatioChroma[], 361 unsigned int HTotal[], 362 double PixelClock[], 363 unsigned int BlendingAndTiming[], 364 unsigned int DPPPerPlane[], 365 double BytePerPixelDETY[], 366 double BytePerPixelDETC[], 367 double DSTXAfterScaler[], 368 double DSTYAfterScaler[], 369 bool WritebackEnable[], 370 enum source_format_class WritebackPixelFormat[], 371 double WritebackDestinationWidth[], 372 double WritebackDestinationHeight[], 373 double WritebackSourceHeight[], 374 bool UnboundedRequestEnabled, 375 unsigned int CompressedBufferSizeInkByte, 376 enum clock_change_support *DRAMClockChangeSupport, 377 double *UrgentWatermark, 378 double *WritebackUrgentWatermark, 379 double *DRAMClockChangeWatermark, 380 double *WritebackDRAMClockChangeWatermark, 381 double *StutterExitWatermark, 382 double *StutterEnterPlusExitWatermark, 383 double *Z8StutterExitWatermark, 384 double *Z8StutterEnterPlusExitWatermark, 385 double *MinActiveDRAMClockChangeLatencySupported); 386 387 static void CalculateDCFCLKDeepSleep( 388 struct display_mode_lib *mode_lib, 389 unsigned int NumberOfActivePlanes, 390 int BytePerPixelY[], 391 int BytePerPixelC[], 392 double VRatio[], 393 double VRatioChroma[], 394 double SwathWidthY[], 395 double SwathWidthC[], 396 unsigned int DPPPerPlane[], 397 double HRatio[], 398 double HRatioChroma[], 399 double PixelClock[], 400 double PSCL_THROUGHPUT[], 401 double PSCL_THROUGHPUT_CHROMA[], 402 double DPPCLK[], 403 double ReadBandwidthLuma[], 404 double ReadBandwidthChroma[], 405 int ReturnBusWidth, 406 double *DCFCLKDeepSleep); 407 408 static void CalculateUrgentBurstFactor( 409 int swath_width_luma_ub, 410 int swath_width_chroma_ub, 411 unsigned int SwathHeightY, 412 unsigned int SwathHeightC, 413 double LineTime, 414 double UrgentLatency, 415 double CursorBufferSize, 416 unsigned int CursorWidth, 417 unsigned int CursorBPP, 418 double VRatio, 419 double VRatioC, 420 double BytePerPixelInDETY, 421 double BytePerPixelInDETC, 422 double DETBufferSizeY, 423 double DETBufferSizeC, 424 double *UrgentBurstFactorCursor, 425 double *UrgentBurstFactorLuma, 426 double *UrgentBurstFactorChroma, 427 bool *NotEnoughUrgentLatencyHiding); 428 429 static void UseMinimumDCFCLK( 430 struct display_mode_lib *mode_lib, 431 int MaxPrefetchMode, 432 int ReorderingBytes); 433 434 static void CalculatePixelDeliveryTimes( 435 unsigned int NumberOfActivePlanes, 436 double VRatio[], 437 double VRatioChroma[], 438 double VRatioPrefetchY[], 439 double VRatioPrefetchC[], 440 unsigned int swath_width_luma_ub[], 441 unsigned int swath_width_chroma_ub[], 442 unsigned int DPPPerPlane[], 443 double HRatio[], 444 double HRatioChroma[], 445 double PixelClock[], 446 double PSCL_THROUGHPUT[], 447 double PSCL_THROUGHPUT_CHROMA[], 448 double DPPCLK[], 449 int BytePerPixelC[], 450 enum scan_direction_class SourceScan[], 451 unsigned int NumberOfCursors[], 452 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 453 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 454 unsigned int BlockWidth256BytesY[], 455 unsigned int BlockHeight256BytesY[], 456 unsigned int BlockWidth256BytesC[], 457 unsigned int BlockHeight256BytesC[], 458 double DisplayPipeLineDeliveryTimeLuma[], 459 double DisplayPipeLineDeliveryTimeChroma[], 460 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 461 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 462 double DisplayPipeRequestDeliveryTimeLuma[], 463 double DisplayPipeRequestDeliveryTimeChroma[], 464 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 465 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 466 double CursorRequestDeliveryTime[], 467 double CursorRequestDeliveryTimePrefetch[]); 468 469 static void CalculateMetaAndPTETimes( 470 int NumberOfActivePlanes, 471 bool GPUVMEnable, 472 int MetaChunkSize, 473 int MinMetaChunkSizeBytes, 474 int HTotal[], 475 double VRatio[], 476 double VRatioChroma[], 477 double DestinationLinesToRequestRowInVBlank[], 478 double DestinationLinesToRequestRowInImmediateFlip[], 479 bool DCCEnable[], 480 double PixelClock[], 481 int BytePerPixelY[], 482 int BytePerPixelC[], 483 enum scan_direction_class SourceScan[], 484 int dpte_row_height[], 485 int dpte_row_height_chroma[], 486 int meta_row_width[], 487 int meta_row_width_chroma[], 488 int meta_row_height[], 489 int meta_row_height_chroma[], 490 int meta_req_width[], 491 int meta_req_width_chroma[], 492 int meta_req_height[], 493 int meta_req_height_chroma[], 494 int dpte_group_bytes[], 495 int PTERequestSizeY[], 496 int PTERequestSizeC[], 497 int PixelPTEReqWidthY[], 498 int PixelPTEReqHeightY[], 499 int PixelPTEReqWidthC[], 500 int PixelPTEReqHeightC[], 501 int dpte_row_width_luma_ub[], 502 int dpte_row_width_chroma_ub[], 503 double DST_Y_PER_PTE_ROW_NOM_L[], 504 double DST_Y_PER_PTE_ROW_NOM_C[], 505 double DST_Y_PER_META_ROW_NOM_L[], 506 double DST_Y_PER_META_ROW_NOM_C[], 507 double TimePerMetaChunkNominal[], 508 double TimePerChromaMetaChunkNominal[], 509 double TimePerMetaChunkVBlank[], 510 double TimePerChromaMetaChunkVBlank[], 511 double TimePerMetaChunkFlip[], 512 double TimePerChromaMetaChunkFlip[], 513 double time_per_pte_group_nom_luma[], 514 double time_per_pte_group_vblank_luma[], 515 double time_per_pte_group_flip_luma[], 516 double time_per_pte_group_nom_chroma[], 517 double time_per_pte_group_vblank_chroma[], 518 double time_per_pte_group_flip_chroma[]); 519 520 static void CalculateVMGroupAndRequestTimes( 521 unsigned int NumberOfActivePlanes, 522 bool GPUVMEnable, 523 unsigned int GPUVMMaxPageTableLevels, 524 unsigned int HTotal[], 525 int BytePerPixelC[], 526 double DestinationLinesToRequestVMInVBlank[], 527 double DestinationLinesToRequestVMInImmediateFlip[], 528 bool DCCEnable[], 529 double PixelClock[], 530 int dpte_row_width_luma_ub[], 531 int dpte_row_width_chroma_ub[], 532 int vm_group_bytes[], 533 unsigned int dpde0_bytes_per_frame_ub_l[], 534 unsigned int dpde0_bytes_per_frame_ub_c[], 535 int meta_pte_bytes_per_frame_ub_l[], 536 int meta_pte_bytes_per_frame_ub_c[], 537 double TimePerVMGroupVBlank[], 538 double TimePerVMGroupFlip[], 539 double TimePerVMRequestVBlank[], 540 double TimePerVMRequestFlip[]); 541 542 static void CalculateStutterEfficiency( 543 struct display_mode_lib *mode_lib, 544 int CompressedBufferSizeInkByte, 545 bool UnboundedRequestEnabled, 546 int ConfigReturnBufferSizeInKByte, 547 int MetaFIFOSizeInKEntries, 548 int ZeroSizeBufferEntries, 549 int NumberOfActivePlanes, 550 int ROBBufferSizeInKByte, 551 double TotalDataReadBandwidth, 552 double DCFCLK, 553 double ReturnBW, 554 double COMPBUF_RESERVED_SPACE_64B, 555 double COMPBUF_RESERVED_SPACE_ZS, 556 double SRExitTime, 557 double SRExitZ8Time, 558 bool SynchronizedVBlank, 559 double Z8StutterEnterPlusExitWatermark, 560 double StutterEnterPlusExitWatermark, 561 bool ProgressiveToInterlaceUnitInOPP, 562 bool Interlace[], 563 double MinTTUVBlank[], 564 int DPPPerPlane[], 565 unsigned int DETBufferSizeY[], 566 int BytePerPixelY[], 567 double BytePerPixelDETY[], 568 double SwathWidthY[], 569 int SwathHeightY[], 570 int SwathHeightC[], 571 double NetDCCRateLuma[], 572 double NetDCCRateChroma[], 573 double DCCFractionOfZeroSizeRequestsLuma[], 574 double DCCFractionOfZeroSizeRequestsChroma[], 575 int HTotal[], 576 int VTotal[], 577 double PixelClock[], 578 double VRatio[], 579 enum scan_direction_class SourceScan[], 580 int BlockHeight256BytesY[], 581 int BlockWidth256BytesY[], 582 int BlockHeight256BytesC[], 583 int BlockWidth256BytesC[], 584 int DCCYMaxUncompressedBlock[], 585 int DCCCMaxUncompressedBlock[], 586 int VActive[], 587 bool DCCEnable[], 588 bool WritebackEnable[], 589 double ReadBandwidthPlaneLuma[], 590 double ReadBandwidthPlaneChroma[], 591 double meta_row_bw[], 592 double dpte_row_bw[], 593 double *StutterEfficiencyNotIncludingVBlank, 594 double *StutterEfficiency, 595 int *NumberOfStutterBurstsPerFrame, 596 double *Z8StutterEfficiencyNotIncludingVBlank, 597 double *Z8StutterEfficiency, 598 int *Z8NumberOfStutterBurstsPerFrame, 599 double *StutterPeriod); 600 601 static void CalculateSwathAndDETConfiguration( 602 bool ForceSingleDPP, 603 int NumberOfActivePlanes, 604 unsigned int DETBufferSizeInKByte, 605 double MaximumSwathWidthLuma[], 606 double MaximumSwathWidthChroma[], 607 enum scan_direction_class SourceScan[], 608 enum source_format_class SourcePixelFormat[], 609 enum dm_swizzle_mode SurfaceTiling[], 610 int ViewportWidth[], 611 int ViewportHeight[], 612 int SurfaceWidthY[], 613 int SurfaceWidthC[], 614 int SurfaceHeightY[], 615 int SurfaceHeightC[], 616 int Read256BytesBlockHeightY[], 617 int Read256BytesBlockHeightC[], 618 int Read256BytesBlockWidthY[], 619 int Read256BytesBlockWidthC[], 620 enum odm_combine_mode ODMCombineEnabled[], 621 int BlendingAndTiming[], 622 int BytePerPixY[], 623 int BytePerPixC[], 624 double BytePerPixDETY[], 625 double BytePerPixDETC[], 626 int HActive[], 627 double HRatio[], 628 double HRatioChroma[], 629 int DPPPerPlane[], 630 int swath_width_luma_ub[], 631 int swath_width_chroma_ub[], 632 double SwathWidth[], 633 double SwathWidthChroma[], 634 int SwathHeightY[], 635 int SwathHeightC[], 636 unsigned int DETBufferSizeY[], 637 unsigned int DETBufferSizeC[], 638 bool ViewportSizeSupportPerPlane[], 639 bool *ViewportSizeSupport); 640 static void CalculateSwathWidth( 641 bool ForceSingleDPP, 642 int NumberOfActivePlanes, 643 enum source_format_class SourcePixelFormat[], 644 enum scan_direction_class SourceScan[], 645 int ViewportWidth[], 646 int ViewportHeight[], 647 int SurfaceWidthY[], 648 int SurfaceWidthC[], 649 int SurfaceHeightY[], 650 int SurfaceHeightC[], 651 enum odm_combine_mode ODMCombineEnabled[], 652 int BytePerPixY[], 653 int BytePerPixC[], 654 int Read256BytesBlockHeightY[], 655 int Read256BytesBlockHeightC[], 656 int Read256BytesBlockWidthY[], 657 int Read256BytesBlockWidthC[], 658 int BlendingAndTiming[], 659 int HActive[], 660 double HRatio[], 661 int DPPPerPlane[], 662 double SwathWidthSingleDPPY[], 663 double SwathWidthSingleDPPC[], 664 double SwathWidthY[], 665 double SwathWidthC[], 666 int MaximumSwathHeightY[], 667 int MaximumSwathHeightC[], 668 int swath_width_luma_ub[], 669 int swath_width_chroma_ub[]); 670 671 static double CalculateExtraLatency( 672 int RoundTripPingLatencyCycles, 673 int ReorderingBytes, 674 double DCFCLK, 675 int TotalNumberOfActiveDPP, 676 int PixelChunkSizeInKByte, 677 int TotalNumberOfDCCActiveDPP, 678 int MetaChunkSize, 679 double ReturnBW, 680 bool GPUVMEnable, 681 bool HostVMEnable, 682 int NumberOfActivePlanes, 683 int NumberOfDPP[], 684 int dpte_group_bytes[], 685 double HostVMInefficiencyFactor, 686 double HostVMMinPageSize, 687 int HostVMMaxNonCachedPageTableLevels); 688 689 static double CalculateExtraLatencyBytes( 690 int ReorderingBytes, 691 int TotalNumberOfActiveDPP, 692 int PixelChunkSizeInKByte, 693 int TotalNumberOfDCCActiveDPP, 694 int MetaChunkSize, 695 bool GPUVMEnable, 696 bool HostVMEnable, 697 int NumberOfActivePlanes, 698 int NumberOfDPP[], 699 int dpte_group_bytes[], 700 double HostVMInefficiencyFactor, 701 double HostVMMinPageSize, 702 int HostVMMaxNonCachedPageTableLevels); 703 704 static double CalculateUrgentLatency( 705 double UrgentLatencyPixelDataOnly, 706 double UrgentLatencyPixelMixedWithVMData, 707 double UrgentLatencyVMDataOnly, 708 bool DoUrgentLatencyAdjustment, 709 double UrgentLatencyAdjustmentFabricClockComponent, 710 double UrgentLatencyAdjustmentFabricClockReference, 711 double FabricClockSingle); 712 713 static void CalculateUnboundedRequestAndCompressedBufferSize( 714 unsigned int DETBufferSizeInKByte, 715 int ConfigReturnBufferSizeInKByte, 716 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 717 int TotalActiveDPP, 718 bool NoChromaPlanes, 719 int MaxNumDPP, 720 int CompressedBufferSegmentSizeInkByteFinal, 721 enum output_encoder_class *Output, 722 bool *UnboundedRequestEnabled, 723 int *CompressedBufferSizeInkByte); 724 725 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); 726 static unsigned int CalculateMaxVStartup( 727 unsigned int VTotal, 728 unsigned int VActive, 729 unsigned int VBlankNom, 730 unsigned int HTotal, 731 double PixelClock, 732 bool ProgressiveTointerlaceUnitinOPP, 733 bool Interlace, 734 unsigned int VBlankNomDefaultUS, 735 double WritebackDelayTime); 736 737 void dml314_recalculate(struct display_mode_lib *mode_lib) 738 { 739 ModeSupportAndSystemConfiguration(mode_lib); 740 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 741 DisplayPipeConfiguration(mode_lib); 742 #ifdef __DML_VBA_DEBUG__ 743 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); 744 #endif 745 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 746 } 747 748 static unsigned int dscceComputeDelay( 749 unsigned int bpc, 750 double BPP, 751 unsigned int sliceWidth, 752 unsigned int numSlices, 753 enum output_format_class pixelFormat, 754 enum output_encoder_class Output) 755 { 756 // valid bpc = source bits per component in the set of {8, 10, 12} 757 // valid bpp = increments of 1/16 of a bit 758 // min = 6/7/8 in N420/N422/444, respectively 759 // max = such that compression is 1:1 760 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 761 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 762 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 763 764 // fixed value 765 unsigned int rcModelSize = 8192; 766 767 // N422/N420 operate at 2 pixels per clock 768 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; 769 770 if (pixelFormat == dm_420) 771 pixelsPerClock = 2; 772 else if (pixelFormat == dm_444) 773 pixelsPerClock = 1; 774 else if (pixelFormat == dm_n422) 775 pixelsPerClock = 2; 776 // #all other modes operate at 1 pixel per clock 777 else 778 pixelsPerClock = 1; 779 780 //initial transmit delay as per PPS 781 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 782 783 //compute ssm delay 784 if (bpc == 8) 785 D = 81; 786 else if (bpc == 10) 787 D = 89; 788 else 789 D = 113; 790 791 //divide by pixel per cycle to compute slice width as seen by DSC 792 w = sliceWidth / pixelsPerClock; 793 794 //422 mode has an additional cycle of delay 795 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 796 s = 0; 797 else 798 s = 1; 799 800 //main calculation for the dscce 801 ix = initalXmitDelay + 45; 802 wx = (w + 2) / 3; 803 P = 3 * wx - w; 804 l0 = ix / w; 805 a = ix + P * l0; 806 ax = (a + 2) / 3 + D + 6 + 1; 807 L = (ax + wx - 1) / wx; 808 if ((ix % w) == 0 && P != 0) 809 lstall = 1; 810 else 811 lstall = 0; 812 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 813 814 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 815 pixels = Delay * 3 * pixelsPerClock; 816 return pixels; 817 } 818 819 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 820 { 821 unsigned int Delay = 0; 822 823 if (pixelFormat == dm_420) { 824 // sfr 825 Delay = Delay + 2; 826 // dsccif 827 Delay = Delay + 0; 828 // dscc - input deserializer 829 Delay = Delay + 3; 830 // dscc gets pixels every other cycle 831 Delay = Delay + 2; 832 // dscc - input cdc fifo 833 Delay = Delay + 12; 834 // dscc gets pixels every other cycle 835 Delay = Delay + 13; 836 // dscc - cdc uncertainty 837 Delay = Delay + 2; 838 // dscc - output cdc fifo 839 Delay = Delay + 7; 840 // dscc gets pixels every other cycle 841 Delay = Delay + 3; 842 // dscc - cdc uncertainty 843 Delay = Delay + 2; 844 // dscc - output serializer 845 Delay = Delay + 1; 846 // sft 847 Delay = Delay + 1; 848 } else if (pixelFormat == dm_n422) { 849 // sfr 850 Delay = Delay + 2; 851 // dsccif 852 Delay = Delay + 1; 853 // dscc - input deserializer 854 Delay = Delay + 5; 855 // dscc - input cdc fifo 856 Delay = Delay + 25; 857 // dscc - cdc uncertainty 858 Delay = Delay + 2; 859 // dscc - output cdc fifo 860 Delay = Delay + 10; 861 // dscc - cdc uncertainty 862 Delay = Delay + 2; 863 // dscc - output serializer 864 Delay = Delay + 1; 865 // sft 866 Delay = Delay + 1; 867 } else { 868 // sfr 869 Delay = Delay + 2; 870 // dsccif 871 Delay = Delay + 0; 872 // dscc - input deserializer 873 Delay = Delay + 3; 874 // dscc - input cdc fifo 875 Delay = Delay + 12; 876 // dscc - cdc uncertainty 877 Delay = Delay + 2; 878 // dscc - output cdc fifo 879 Delay = Delay + 7; 880 // dscc - output serializer 881 Delay = Delay + 1; 882 // dscc - cdc uncertainty 883 Delay = Delay + 2; 884 // sft 885 Delay = Delay + 1; 886 } 887 888 return Delay; 889 } 890 891 static bool CalculatePrefetchSchedule( 892 struct display_mode_lib *mode_lib, 893 double HostVMInefficiencyFactor, 894 Pipe *myPipe, 895 unsigned int DSCDelay, 896 double DPPCLKDelaySubtotalPlusCNVCFormater, 897 double DPPCLKDelaySCL, 898 double DPPCLKDelaySCLLBOnly, 899 double DPPCLKDelayCNVCCursor, 900 double DISPCLKDelaySubtotal, 901 unsigned int DPP_RECOUT_WIDTH, 902 enum output_format_class OutputFormat, 903 unsigned int MaxInterDCNTileRepeaters, 904 unsigned int VStartup, 905 unsigned int MaxVStartup, 906 unsigned int GPUVMPageTableLevels, 907 bool GPUVMEnable, 908 bool HostVMEnable, 909 unsigned int HostVMMaxNonCachedPageTableLevels, 910 double HostVMMinPageSize, 911 bool DynamicMetadataEnable, 912 bool DynamicMetadataVMEnabled, 913 int DynamicMetadataLinesBeforeActiveRequired, 914 unsigned int DynamicMetadataTransmittedBytes, 915 double UrgentLatency, 916 double UrgentExtraLatency, 917 double TCalc, 918 unsigned int PDEAndMetaPTEBytesFrame, 919 unsigned int MetaRowByte, 920 unsigned int PixelPTEBytesPerRow, 921 double PrefetchSourceLinesY, 922 unsigned int SwathWidthY, 923 double VInitPreFillY, 924 unsigned int MaxNumSwathY, 925 double PrefetchSourceLinesC, 926 unsigned int SwathWidthC, 927 double VInitPreFillC, 928 unsigned int MaxNumSwathC, 929 int swath_width_luma_ub, 930 int swath_width_chroma_ub, 931 unsigned int SwathHeightY, 932 unsigned int SwathHeightC, 933 double TWait, 934 double *DSTXAfterScaler, 935 double *DSTYAfterScaler, 936 double *DestinationLinesForPrefetch, 937 double *PrefetchBandwidth, 938 double *DestinationLinesToRequestVMInVBlank, 939 double *DestinationLinesToRequestRowInVBlank, 940 double *VRatioPrefetchY, 941 double *VRatioPrefetchC, 942 double *RequiredPrefetchPixDataBWLuma, 943 double *RequiredPrefetchPixDataBWChroma, 944 bool *NotEnoughTimeForDynamicMetadata, 945 double *Tno_bw, 946 double *prefetch_vmrow_bw, 947 double *Tdmdl_vm, 948 double *Tdmdl, 949 double *TSetup, 950 int *VUpdateOffsetPix, 951 double *VUpdateWidthPix, 952 double *VReadyOffsetPix) 953 { 954 bool MyError = false; 955 unsigned int DPPCycles, DISPCLKCycles; 956 double DSTTotalPixelsAfterScaler; 957 double LineTime; 958 double dst_y_prefetch_equ; 959 double Tsw_oto; 960 double prefetch_bw_oto; 961 double prefetch_bw_pr; 962 double Tvm_oto; 963 double Tr0_oto; 964 double Tvm_oto_lines; 965 double Tr0_oto_lines; 966 double dst_y_prefetch_oto; 967 double TimeForFetchingMetaPTE = 0; 968 double TimeForFetchingRowInVBlank = 0; 969 double LinesToRequestPrefetchPixelData = 0; 970 unsigned int HostVMDynamicLevelsTrips; 971 double trip_to_mem; 972 double Tvm_trips; 973 double Tr0_trips; 974 double Tvm_trips_rounded; 975 double Tr0_trips_rounded; 976 double Lsw_oto; 977 double Tpre_rounded; 978 double prefetch_bw_equ; 979 double Tvm_equ; 980 double Tr0_equ; 981 double Tdmbf; 982 double Tdmec; 983 double Tdmsks; 984 double prefetch_sw_bytes; 985 double bytes_pp; 986 double dep_bytes; 987 int max_vratio_pre = 4; 988 double min_Lsw; 989 double Tsw_est1 = 0; 990 double Tsw_est3 = 0; 991 double max_Tsw = 0; 992 993 if (GPUVMEnable == true && HostVMEnable == true) { 994 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 995 } else { 996 HostVMDynamicLevelsTrips = 0; 997 } 998 #ifdef __DML_VBA_DEBUG__ 999 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); 1000 #endif 1001 CalculateVupdateAndDynamicMetadataParameters( 1002 MaxInterDCNTileRepeaters, 1003 myPipe->DPPCLK, 1004 myPipe->DISPCLK, 1005 myPipe->DCFCLKDeepSleep, 1006 myPipe->PixelClock, 1007 myPipe->HTotal, 1008 myPipe->VBlank, 1009 DynamicMetadataTransmittedBytes, 1010 DynamicMetadataLinesBeforeActiveRequired, 1011 myPipe->InterlaceEnable, 1012 myPipe->ProgressiveToInterlaceUnitInOPP, 1013 TSetup, 1014 &Tdmbf, 1015 &Tdmec, 1016 &Tdmsks, 1017 VUpdateOffsetPix, 1018 VUpdateWidthPix, 1019 VReadyOffsetPix); 1020 1021 LineTime = myPipe->HTotal / myPipe->PixelClock; 1022 trip_to_mem = UrgentLatency; 1023 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 1024 1025 #ifdef __DML_VBA_ALLOW_DELTA__ 1026 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 1027 #else 1028 if (DynamicMetadataVMEnabled == true) { 1029 #endif 1030 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 1031 } else { 1032 *Tdmdl = TWait + UrgentExtraLatency; 1033 } 1034 1035 #ifdef __DML_VBA_ALLOW_DELTA__ 1036 if (DynamicMetadataEnable == false) { 1037 *Tdmdl = 0.0; 1038 } 1039 #endif 1040 1041 if (DynamicMetadataEnable == true) { 1042 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 1043 *NotEnoughTimeForDynamicMetadata = true; 1044 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 1045 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 1046 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 1047 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); 1048 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 1049 } else { 1050 *NotEnoughTimeForDynamicMetadata = false; 1051 } 1052 } else { 1053 *NotEnoughTimeForDynamicMetadata = false; 1054 } 1055 1056 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 1057 1058 if (myPipe->ScalerEnabled) 1059 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 1060 else 1061 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 1062 1063 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 1064 1065 DISPCLKCycles = DISPCLKDelaySubtotal; 1066 1067 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 1068 return true; 1069 1070 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 1071 1072 #ifdef __DML_VBA_DEBUG__ 1073 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 1074 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 1075 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); 1076 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 1077 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); 1078 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 1079 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 1080 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); 1081 #endif 1082 1083 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 1084 1085 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 1086 *DSTYAfterScaler = 1; 1087 else 1088 *DSTYAfterScaler = 0; 1089 1090 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 1091 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 1092 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 1093 1094 #ifdef __DML_VBA_DEBUG__ 1095 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 1096 #endif 1097 1098 MyError = false; 1099 1100 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 1101 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 1102 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 1103 1104 #ifdef __DML_VBA_ALLOW_DELTA__ 1105 if (!myPipe->DCCEnable) { 1106 Tr0_trips = 0.0; 1107 Tr0_trips_rounded = 0.0; 1108 } 1109 #endif 1110 1111 if (!GPUVMEnable) { 1112 Tvm_trips = 0.0; 1113 Tvm_trips_rounded = 0.0; 1114 } 1115 1116 if (GPUVMEnable) { 1117 if (GPUVMPageTableLevels >= 3) { 1118 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 1119 } else { 1120 *Tno_bw = 0; 1121 } 1122 } else if (!myPipe->DCCEnable) { 1123 *Tno_bw = LineTime; 1124 } else { 1125 *Tno_bw = LineTime / 4; 1126 } 1127 1128 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) 1129 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 1130 else 1131 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 1132 /*rev 99*/ 1133 prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane); 1134 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; 1135 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 1136 prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime)); 1137 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); 1138 1139 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); 1140 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; 1141 Tsw_oto = Lsw_oto * LineTime; 1142 1143 prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto; 1144 1145 #ifdef __DML_VBA_DEBUG__ 1146 dml_print("DML: HTotal: %d\n", myPipe->HTotal); 1147 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); 1148 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); 1149 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); 1150 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); 1151 dml_print("DML: Tsw_oto: %f\n", Tsw_oto); 1152 #endif 1153 1154 if (GPUVMEnable == true) 1155 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); 1156 else 1157 Tvm_oto = LineTime / 4.0; 1158 1159 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1160 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) 1161 LineTime - Tvm_oto, 1162 LineTime / 4); 1163 } else { 1164 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 1165 } 1166 1167 #ifdef __DML_VBA_DEBUG__ 1168 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 1169 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 1170 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); 1171 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1172 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1173 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1174 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 1175 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 1176 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 1177 #endif 1178 1179 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 1180 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 1181 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 1182 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1183 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 1184 Tpre_rounded = dst_y_prefetch_equ * LineTime; 1185 1186 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 1187 1188 if (prefetch_sw_bytes < dep_bytes) 1189 prefetch_sw_bytes = 2 * dep_bytes; 1190 1191 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); 1192 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); 1193 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); 1194 dml_print("DML: Lsw_oto: %f\n", Lsw_oto); 1195 dml_print("DML: LineTime: %f\n", LineTime); 1196 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); 1197 1198 dml_print("DML: LineTime: %f\n", LineTime); 1199 dml_print("DML: VStartup: %d\n", VStartup); 1200 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); 1201 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); 1202 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); 1203 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); 1204 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 1205 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 1206 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 1207 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm); 1208 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl); 1209 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler); 1210 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler); 1211 1212 *PrefetchBandwidth = 0; 1213 *DestinationLinesToRequestVMInVBlank = 0; 1214 *DestinationLinesToRequestRowInVBlank = 0; 1215 *VRatioPrefetchY = 0; 1216 *VRatioPrefetchC = 0; 1217 *RequiredPrefetchPixDataBWLuma = 0; 1218 if (dst_y_prefetch_equ > 1) { 1219 double PrefetchBandwidth1; 1220 double PrefetchBandwidth2; 1221 double PrefetchBandwidth3; 1222 double PrefetchBandwidth4; 1223 1224 if (Tpre_rounded - *Tno_bw > 0) { 1225 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1226 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 1227 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 1228 } else { 1229 PrefetchBandwidth1 = 0; 1230 } 1231 1232 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 1233 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1234 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 1235 } 1236 1237 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1238 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1239 else 1240 PrefetchBandwidth2 = 0; 1241 1242 if (Tpre_rounded - Tvm_trips_rounded > 0) { 1243 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1244 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 1245 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 1246 } else { 1247 PrefetchBandwidth3 = 0; 1248 } 1249 1250 #ifdef __DML_VBA_DEBUG__ 1251 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 1252 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 1253 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 1254 #endif 1255 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { 1256 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1257 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 1258 } 1259 1260 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1261 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1262 else 1263 PrefetchBandwidth4 = 0; 1264 1265 { 1266 bool Case1OK; 1267 bool Case2OK; 1268 bool Case3OK; 1269 1270 if (PrefetchBandwidth1 > 0) { 1271 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded 1272 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1273 Case1OK = true; 1274 } else { 1275 Case1OK = false; 1276 } 1277 } else { 1278 Case1OK = false; 1279 } 1280 1281 if (PrefetchBandwidth2 > 0) { 1282 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded 1283 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1284 Case2OK = true; 1285 } else { 1286 Case2OK = false; 1287 } 1288 } else { 1289 Case2OK = false; 1290 } 1291 1292 if (PrefetchBandwidth3 > 0) { 1293 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded 1294 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1295 Case3OK = true; 1296 } else { 1297 Case3OK = false; 1298 } 1299 } else { 1300 Case3OK = false; 1301 } 1302 1303 if (Case1OK) { 1304 prefetch_bw_equ = PrefetchBandwidth1; 1305 } else if (Case2OK) { 1306 prefetch_bw_equ = PrefetchBandwidth2; 1307 } else if (Case3OK) { 1308 prefetch_bw_equ = PrefetchBandwidth3; 1309 } else { 1310 prefetch_bw_equ = PrefetchBandwidth4; 1311 } 1312 1313 #ifdef __DML_VBA_DEBUG__ 1314 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 1315 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 1316 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 1317 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 1318 #endif 1319 1320 if (prefetch_bw_equ > 0) { 1321 if (GPUVMEnable == true) { 1322 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1323 } else { 1324 Tvm_equ = LineTime / 4; 1325 } 1326 1327 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1328 Tr0_equ = dml_max4( 1329 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1330 Tr0_trips, 1331 (LineTime - Tvm_equ) / 2, 1332 LineTime / 4); 1333 } else { 1334 Tr0_equ = (LineTime - Tvm_equ) / 2; 1335 } 1336 } else { 1337 Tvm_equ = 0; 1338 Tr0_equ = 0; 1339 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 1340 } 1341 } 1342 1343 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 1344 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 1345 TimeForFetchingMetaPTE = Tvm_oto; 1346 TimeForFetchingRowInVBlank = Tr0_oto; 1347 *PrefetchBandwidth = prefetch_bw_oto; 1348 } else { 1349 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 1350 TimeForFetchingMetaPTE = Tvm_equ; 1351 TimeForFetchingRowInVBlank = Tr0_equ; 1352 *PrefetchBandwidth = prefetch_bw_equ; 1353 } 1354 1355 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 1356 1357 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 1358 1359 #ifdef __DML_VBA_ALLOW_DELTA__ 1360 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 1361 // See note above dated 5/30/2018 1362 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 1363 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? 1364 #else 1365 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 1366 #endif 1367 1368 #ifdef __DML_VBA_DEBUG__ 1369 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 1370 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1371 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 1372 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1373 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1374 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1375 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 1376 #endif 1377 1378 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { 1379 1380 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 1381 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1382 #ifdef __DML_VBA_DEBUG__ 1383 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1384 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 1385 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); 1386 #endif 1387 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 1388 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 1389 *VRatioPrefetchY = dml_max( 1390 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, 1391 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); 1392 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1393 } else { 1394 MyError = true; 1395 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1396 *VRatioPrefetchY = 0; 1397 } 1398 #ifdef __DML_VBA_DEBUG__ 1399 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1400 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1401 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 1402 #endif 1403 } 1404 1405 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 1406 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1407 1408 #ifdef __DML_VBA_DEBUG__ 1409 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1410 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 1411 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); 1412 #endif 1413 if ((SwathHeightC > 4) || VInitPreFillC > 3) { 1414 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 1415 *VRatioPrefetchC = dml_max( 1416 *VRatioPrefetchC, 1417 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); 1418 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1419 } else { 1420 MyError = true; 1421 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1422 *VRatioPrefetchC = 0; 1423 } 1424 #ifdef __DML_VBA_DEBUG__ 1425 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1426 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 1427 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 1428 #endif 1429 } 1430 1431 #ifdef __DML_VBA_DEBUG__ 1432 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 1433 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 1434 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1435 #endif 1436 1437 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; 1438 1439 #ifdef __DML_VBA_DEBUG__ 1440 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); 1441 #endif 1442 1443 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub 1444 / LineTime; 1445 } else { 1446 MyError = true; 1447 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1448 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); 1449 *VRatioPrefetchY = 0; 1450 *VRatioPrefetchC = 0; 1451 *RequiredPrefetchPixDataBWLuma = 0; 1452 *RequiredPrefetchPixDataBWChroma = 0; 1453 } 1454 1455 dml_print( 1456 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 1457 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 1458 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 1459 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1460 dml_print( 1461 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", 1462 (double) LinesToRequestPrefetchPixelData * LineTime); 1463 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 1464 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 1465 dml_print( 1466 "DML: Tslack(pre): %fus - time left over in schedule\n", 1467 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank 1468 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 1469 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1470 1471 } else { 1472 MyError = true; 1473 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1474 } 1475 1476 { 1477 double prefetch_vm_bw; 1478 double prefetch_row_bw; 1479 1480 if (PDEAndMetaPTEBytesFrame == 0) { 1481 prefetch_vm_bw = 0; 1482 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1483 #ifdef __DML_VBA_DEBUG__ 1484 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1485 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1486 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1487 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1488 #endif 1489 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1490 #ifdef __DML_VBA_DEBUG__ 1491 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 1492 #endif 1493 } else { 1494 prefetch_vm_bw = 0; 1495 MyError = true; 1496 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1497 } 1498 1499 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1500 prefetch_row_bw = 0; 1501 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1502 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1503 1504 #ifdef __DML_VBA_DEBUG__ 1505 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1506 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1507 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1508 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 1509 #endif 1510 } else { 1511 prefetch_row_bw = 0; 1512 MyError = true; 1513 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1514 } 1515 1516 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1517 } 1518 1519 if (MyError) { 1520 *PrefetchBandwidth = 0; 1521 TimeForFetchingMetaPTE = 0; 1522 TimeForFetchingRowInVBlank = 0; 1523 *DestinationLinesToRequestVMInVBlank = 0; 1524 *DestinationLinesToRequestRowInVBlank = 0; 1525 *DestinationLinesForPrefetch = 0; 1526 LinesToRequestPrefetchPixelData = 0; 1527 *VRatioPrefetchY = 0; 1528 *VRatioPrefetchC = 0; 1529 *RequiredPrefetchPixDataBWLuma = 0; 1530 *RequiredPrefetchPixDataBWChroma = 0; 1531 } 1532 1533 return MyError; 1534 } 1535 1536 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1537 { 1538 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1539 } 1540 1541 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1542 { 1543 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); 1544 } 1545 1546 static void CalculateDCCConfiguration( 1547 bool DCCEnabled, 1548 bool DCCProgrammingAssumesScanDirectionUnknown, 1549 enum source_format_class SourcePixelFormat, 1550 unsigned int SurfaceWidthLuma, 1551 unsigned int SurfaceWidthChroma, 1552 unsigned int SurfaceHeightLuma, 1553 unsigned int SurfaceHeightChroma, 1554 double DETBufferSize, 1555 unsigned int RequestHeight256ByteLuma, 1556 unsigned int RequestHeight256ByteChroma, 1557 enum dm_swizzle_mode TilingFormat, 1558 unsigned int BytePerPixelY, 1559 unsigned int BytePerPixelC, 1560 double BytePerPixelDETY, 1561 double BytePerPixelDETC, 1562 enum scan_direction_class ScanOrientation, 1563 unsigned int *MaxUncompressedBlockLuma, 1564 unsigned int *MaxUncompressedBlockChroma, 1565 unsigned int *MaxCompressedBlockLuma, 1566 unsigned int *MaxCompressedBlockChroma, 1567 unsigned int *IndependentBlockLuma, 1568 unsigned int *IndependentBlockChroma) 1569 { 1570 int yuv420; 1571 int horz_div_l; 1572 int horz_div_c; 1573 int vert_div_l; 1574 int vert_div_c; 1575 1576 int swath_buf_size; 1577 double detile_buf_vp_horz_limit; 1578 double detile_buf_vp_vert_limit; 1579 1580 int MAS_vp_horz_limit; 1581 int MAS_vp_vert_limit; 1582 int max_vp_horz_width; 1583 int max_vp_vert_height; 1584 int eff_surf_width_l; 1585 int eff_surf_width_c; 1586 int eff_surf_height_l; 1587 int eff_surf_height_c; 1588 1589 int full_swath_bytes_horz_wc_l; 1590 int full_swath_bytes_horz_wc_c; 1591 int full_swath_bytes_vert_wc_l; 1592 int full_swath_bytes_vert_wc_c; 1593 int req128_horz_wc_l; 1594 int req128_horz_wc_c; 1595 int req128_vert_wc_l; 1596 int req128_vert_wc_c; 1597 int segment_order_horz_contiguous_luma; 1598 int segment_order_horz_contiguous_chroma; 1599 int segment_order_vert_contiguous_luma; 1600 int segment_order_vert_contiguous_chroma; 1601 1602 enum { 1603 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA 1604 } RequestType; 1605 RequestType RequestLuma; 1606 RequestType RequestChroma; 1607 1608 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); 1609 horz_div_l = 1; 1610 horz_div_c = 1; 1611 vert_div_l = 1; 1612 vert_div_c = 1; 1613 1614 if (BytePerPixelY == 1) 1615 vert_div_l = 0; 1616 if (BytePerPixelC == 1) 1617 vert_div_c = 0; 1618 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1619 horz_div_l = 0; 1620 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1621 horz_div_c = 0; 1622 1623 if (BytePerPixelC == 0) { 1624 swath_buf_size = DETBufferSize / 2 - 2 * 256; 1625 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 1626 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 1627 } else { 1628 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; 1629 detile_buf_vp_horz_limit = (double) swath_buf_size 1630 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) 1631 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 1632 detile_buf_vp_vert_limit = (double) swath_buf_size 1633 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 1634 } 1635 1636 if (SourcePixelFormat == dm_420_10) { 1637 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 1638 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 1639 } 1640 1641 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 1642 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 1643 1644 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; 1645 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); 1646 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 1647 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 1648 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 1649 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 1650 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 1651 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 1652 1653 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 1654 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 1655 if (BytePerPixelC > 0) { 1656 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 1657 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 1658 } else { 1659 full_swath_bytes_horz_wc_c = 0; 1660 full_swath_bytes_vert_wc_c = 0; 1661 } 1662 1663 if (SourcePixelFormat == dm_420_10) { 1664 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); 1665 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); 1666 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); 1667 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); 1668 } 1669 1670 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1671 req128_horz_wc_l = 0; 1672 req128_horz_wc_c = 0; 1673 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { 1674 req128_horz_wc_l = 0; 1675 req128_horz_wc_c = 1; 1676 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1677 req128_horz_wc_l = 1; 1678 req128_horz_wc_c = 0; 1679 } else { 1680 req128_horz_wc_l = 1; 1681 req128_horz_wc_c = 1; 1682 } 1683 1684 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1685 req128_vert_wc_l = 0; 1686 req128_vert_wc_c = 0; 1687 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { 1688 req128_vert_wc_l = 0; 1689 req128_vert_wc_c = 1; 1690 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1691 req128_vert_wc_l = 1; 1692 req128_vert_wc_c = 0; 1693 } else { 1694 req128_vert_wc_l = 1; 1695 req128_vert_wc_c = 1; 1696 } 1697 1698 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1699 segment_order_horz_contiguous_luma = 0; 1700 } else { 1701 segment_order_horz_contiguous_luma = 1; 1702 } 1703 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1704 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1705 segment_order_vert_contiguous_luma = 0; 1706 } else { 1707 segment_order_vert_contiguous_luma = 1; 1708 } 1709 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1710 segment_order_horz_contiguous_chroma = 0; 1711 } else { 1712 segment_order_horz_contiguous_chroma = 1; 1713 } 1714 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1715 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1716 segment_order_vert_contiguous_chroma = 0; 1717 } else { 1718 segment_order_vert_contiguous_chroma = 1; 1719 } 1720 1721 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1722 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 1723 RequestLuma = REQ_256Bytes; 1724 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 1725 RequestLuma = REQ_128BytesNonContiguous; 1726 } else { 1727 RequestLuma = REQ_128BytesContiguous; 1728 } 1729 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 1730 RequestChroma = REQ_256Bytes; 1731 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 1732 RequestChroma = REQ_128BytesNonContiguous; 1733 } else { 1734 RequestChroma = REQ_128BytesContiguous; 1735 } 1736 } else if (ScanOrientation != dm_vert) { 1737 if (req128_horz_wc_l == 0) { 1738 RequestLuma = REQ_256Bytes; 1739 } else if (segment_order_horz_contiguous_luma == 0) { 1740 RequestLuma = REQ_128BytesNonContiguous; 1741 } else { 1742 RequestLuma = REQ_128BytesContiguous; 1743 } 1744 if (req128_horz_wc_c == 0) { 1745 RequestChroma = REQ_256Bytes; 1746 } else if (segment_order_horz_contiguous_chroma == 0) { 1747 RequestChroma = REQ_128BytesNonContiguous; 1748 } else { 1749 RequestChroma = REQ_128BytesContiguous; 1750 } 1751 } else { 1752 if (req128_vert_wc_l == 0) { 1753 RequestLuma = REQ_256Bytes; 1754 } else if (segment_order_vert_contiguous_luma == 0) { 1755 RequestLuma = REQ_128BytesNonContiguous; 1756 } else { 1757 RequestLuma = REQ_128BytesContiguous; 1758 } 1759 if (req128_vert_wc_c == 0) { 1760 RequestChroma = REQ_256Bytes; 1761 } else if (segment_order_vert_contiguous_chroma == 0) { 1762 RequestChroma = REQ_128BytesNonContiguous; 1763 } else { 1764 RequestChroma = REQ_128BytesContiguous; 1765 } 1766 } 1767 1768 if (RequestLuma == REQ_256Bytes) { 1769 *MaxUncompressedBlockLuma = 256; 1770 *MaxCompressedBlockLuma = 256; 1771 *IndependentBlockLuma = 0; 1772 } else if (RequestLuma == REQ_128BytesContiguous) { 1773 *MaxUncompressedBlockLuma = 256; 1774 *MaxCompressedBlockLuma = 128; 1775 *IndependentBlockLuma = 128; 1776 } else { 1777 *MaxUncompressedBlockLuma = 256; 1778 *MaxCompressedBlockLuma = 64; 1779 *IndependentBlockLuma = 64; 1780 } 1781 1782 if (RequestChroma == REQ_256Bytes) { 1783 *MaxUncompressedBlockChroma = 256; 1784 *MaxCompressedBlockChroma = 256; 1785 *IndependentBlockChroma = 0; 1786 } else if (RequestChroma == REQ_128BytesContiguous) { 1787 *MaxUncompressedBlockChroma = 256; 1788 *MaxCompressedBlockChroma = 128; 1789 *IndependentBlockChroma = 128; 1790 } else { 1791 *MaxUncompressedBlockChroma = 256; 1792 *MaxCompressedBlockChroma = 64; 1793 *IndependentBlockChroma = 64; 1794 } 1795 1796 if (DCCEnabled != true || BytePerPixelC == 0) { 1797 *MaxUncompressedBlockChroma = 0; 1798 *MaxCompressedBlockChroma = 0; 1799 *IndependentBlockChroma = 0; 1800 } 1801 1802 if (DCCEnabled != true) { 1803 *MaxUncompressedBlockLuma = 0; 1804 *MaxCompressedBlockLuma = 0; 1805 *IndependentBlockLuma = 0; 1806 } 1807 } 1808 1809 static double CalculatePrefetchSourceLines( 1810 struct display_mode_lib *mode_lib, 1811 double VRatio, 1812 double vtaps, 1813 bool Interlace, 1814 bool ProgressiveToInterlaceUnitInOPP, 1815 unsigned int SwathHeight, 1816 unsigned int ViewportYStart, 1817 double *VInitPreFill, 1818 unsigned int *MaxNumSwath) 1819 { 1820 struct vba_vars_st *v = &mode_lib->vba; 1821 unsigned int MaxPartialSwath; 1822 1823 if (ProgressiveToInterlaceUnitInOPP) 1824 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1825 else 1826 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1827 1828 if (!v->IgnoreViewportPositioning) { 1829 1830 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1831 1832 if (*VInitPreFill > 1.0) 1833 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1834 else 1835 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; 1836 MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1837 1838 } else { 1839 1840 if (ViewportYStart != 0) 1841 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1842 1843 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1844 1845 if (*VInitPreFill > 1.0) 1846 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1847 else 1848 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; 1849 } 1850 1851 #ifdef __DML_VBA_DEBUG__ 1852 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 1853 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); 1854 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); 1855 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); 1856 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); 1857 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 1858 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 1859 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 1860 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); 1861 #endif 1862 return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1863 } 1864 1865 static unsigned int CalculateVMAndRowBytes( 1866 struct display_mode_lib *mode_lib, 1867 bool DCCEnable, 1868 unsigned int BlockHeight256Bytes, 1869 unsigned int BlockWidth256Bytes, 1870 enum source_format_class SourcePixelFormat, 1871 unsigned int SurfaceTiling, 1872 unsigned int BytePerPixel, 1873 enum scan_direction_class ScanDirection, 1874 unsigned int SwathWidth, 1875 unsigned int ViewportHeight, 1876 bool GPUVMEnable, 1877 bool HostVMEnable, 1878 unsigned int HostVMMaxNonCachedPageTableLevels, 1879 unsigned int GPUVMMinPageSize, 1880 unsigned int HostVMMinPageSize, 1881 unsigned int PTEBufferSizeInRequests, 1882 unsigned int Pitch, 1883 unsigned int DCCMetaPitch, 1884 unsigned int *MacroTileWidth, 1885 unsigned int *MetaRowByte, 1886 unsigned int *PixelPTEBytesPerRow, 1887 bool *PTEBufferSizeNotExceeded, 1888 int *dpte_row_width_ub, 1889 unsigned int *dpte_row_height, 1890 unsigned int *MetaRequestWidth, 1891 unsigned int *MetaRequestHeight, 1892 unsigned int *meta_row_width, 1893 unsigned int *meta_row_height, 1894 int *vm_group_bytes, 1895 unsigned int *dpte_group_bytes, 1896 unsigned int *PixelPTEReqWidth, 1897 unsigned int *PixelPTEReqHeight, 1898 unsigned int *PTERequestSize, 1899 int *DPDE0BytesFrame, 1900 int *MetaPTEBytesFrame) 1901 { 1902 struct vba_vars_st *v = &mode_lib->vba; 1903 unsigned int MPDEBytesFrame; 1904 unsigned int DCCMetaSurfaceBytes; 1905 unsigned int MacroTileSizeBytes; 1906 unsigned int MacroTileHeight; 1907 unsigned int ExtraDPDEBytesFrame; 1908 unsigned int PDEAndMetaPTEBytesFrame; 1909 unsigned int PixelPTEReqHeightPTEs = 0; 1910 unsigned int HostVMDynamicLevels = 0; 1911 double FractionOfPTEReturnDrop; 1912 1913 if (GPUVMEnable == true && HostVMEnable == true) { 1914 if (HostVMMinPageSize < 2048) { 1915 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1916 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 1917 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 1918 } else { 1919 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 1920 } 1921 } 1922 1923 *MetaRequestHeight = 8 * BlockHeight256Bytes; 1924 *MetaRequestWidth = 8 * BlockWidth256Bytes; 1925 if (ScanDirection != dm_vert) { 1926 *meta_row_height = *MetaRequestHeight; 1927 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 1928 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1929 } else { 1930 *meta_row_height = *MetaRequestWidth; 1931 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 1932 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1933 } 1934 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; 1935 if (GPUVMEnable == true) { 1936 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; 1937 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); 1938 } else { 1939 *MetaPTEBytesFrame = 0; 1940 MPDEBytesFrame = 0; 1941 } 1942 1943 if (DCCEnable != true) { 1944 *MetaPTEBytesFrame = 0; 1945 MPDEBytesFrame = 0; 1946 *MetaRowByte = 0; 1947 } 1948 1949 if (SurfaceTiling == dm_sw_linear) { 1950 MacroTileSizeBytes = 256; 1951 MacroTileHeight = BlockHeight256Bytes; 1952 } else { 1953 MacroTileSizeBytes = 65536; 1954 MacroTileHeight = 16 * BlockHeight256Bytes; 1955 } 1956 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1957 1958 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { 1959 if (ScanDirection != dm_vert) { 1960 *DPDE0BytesFrame = 64 1961 * (dml_ceil( 1962 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1963 / (8 * 2097152), 1964 1) + 1); 1965 } else { 1966 *DPDE0BytesFrame = 64 1967 * (dml_ceil( 1968 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1969 / (8 * 2097152), 1970 1) + 1); 1971 } 1972 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); 1973 } else { 1974 *DPDE0BytesFrame = 0; 1975 ExtraDPDEBytesFrame = 0; 1976 } 1977 1978 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 1979 1980 #ifdef __DML_VBA_DEBUG__ 1981 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 1982 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 1983 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 1984 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 1985 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1986 #endif 1987 1988 if (HostVMEnable == true) { 1989 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 1990 } 1991 #ifdef __DML_VBA_DEBUG__ 1992 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1993 #endif 1994 1995 if (SurfaceTiling == dm_sw_linear) { 1996 PixelPTEReqHeightPTEs = 1; 1997 *PixelPTEReqHeight = 1; 1998 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 1999 *PTERequestSize = 64; 2000 FractionOfPTEReturnDrop = 0; 2001 } else if (MacroTileSizeBytes == 4096) { 2002 PixelPTEReqHeightPTEs = 1; 2003 *PixelPTEReqHeight = MacroTileHeight; 2004 *PixelPTEReqWidth = 8 * *MacroTileWidth; 2005 *PTERequestSize = 64; 2006 if (ScanDirection != dm_vert) 2007 FractionOfPTEReturnDrop = 0; 2008 else 2009 FractionOfPTEReturnDrop = 7 / 8; 2010 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 2011 PixelPTEReqHeightPTEs = 16; 2012 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 2013 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 2014 *PTERequestSize = 128; 2015 FractionOfPTEReturnDrop = 0; 2016 } else { 2017 PixelPTEReqHeightPTEs = 1; 2018 *PixelPTEReqHeight = MacroTileHeight; 2019 *PixelPTEReqWidth = 8 * *MacroTileWidth; 2020 *PTERequestSize = 64; 2021 FractionOfPTEReturnDrop = 0; 2022 } 2023 2024 if (SurfaceTiling == dm_sw_linear) { 2025 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 2026 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2027 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2028 } else if (ScanDirection != dm_vert) { 2029 *dpte_row_height = *PixelPTEReqHeight; 2030 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 2031 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 2032 } else { 2033 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 2034 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 2035 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 2036 } 2037 2038 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { 2039 *PTEBufferSizeNotExceeded = true; 2040 } else { 2041 *PTEBufferSizeNotExceeded = false; 2042 } 2043 2044 if (GPUVMEnable != true) { 2045 *PixelPTEBytesPerRow = 0; 2046 *PTEBufferSizeNotExceeded = true; 2047 } 2048 2049 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); 2050 2051 if (HostVMEnable == true) { 2052 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 2053 } 2054 2055 if (HostVMEnable == true) { 2056 *vm_group_bytes = 512; 2057 *dpte_group_bytes = 512; 2058 } else if (GPUVMEnable == true) { 2059 *vm_group_bytes = 2048; 2060 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { 2061 *dpte_group_bytes = 512; 2062 } else { 2063 *dpte_group_bytes = 2048; 2064 } 2065 } else { 2066 *vm_group_bytes = 0; 2067 *dpte_group_bytes = 0; 2068 } 2069 return PDEAndMetaPTEBytesFrame; 2070 } 2071 2072 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) 2073 { 2074 struct vba_vars_st *v = &mode_lib->vba; 2075 unsigned int j, k; 2076 double HostVMInefficiencyFactor = 1.0; 2077 bool NoChromaPlanes = true; 2078 int ReorderBytes; 2079 double VMDataOnlyReturnBW; 2080 double MaxTotalRDBandwidth = 0; 2081 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; 2082 2083 v->WritebackDISPCLK = 0.0; 2084 v->DISPCLKWithRamping = 0; 2085 v->DISPCLKWithoutRamping = 0; 2086 v->GlobalDPPCLK = 0.0; 2087 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */ 2088 { 2089 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 2090 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], 2091 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); 2092 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; 2093 2094 if (v->HostVMEnable != true) { 2095 v->ReturnBW = dml_min( 2096 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2097 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 2098 } else { 2099 v->ReturnBW = dml_min( 2100 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2101 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 2102 } 2103 } 2104 /* End DAL custom code */ 2105 2106 // DISPCLK and DPPCLK Calculation 2107 // 2108 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2109 if (v->WritebackEnable[k]) { 2110 v->WritebackDISPCLK = dml_max( 2111 v->WritebackDISPCLK, 2112 dml314_CalculateWriteBackDISPCLK( 2113 v->WritebackPixelFormat[k], 2114 v->PixelClock[k], 2115 v->WritebackHRatio[k], 2116 v->WritebackVRatio[k], 2117 v->WritebackHTaps[k], 2118 v->WritebackVTaps[k], 2119 v->WritebackSourceWidth[k], 2120 v->WritebackDestinationWidth[k], 2121 v->HTotal[k], 2122 v->WritebackLineBufferSize)); 2123 } 2124 } 2125 2126 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2127 if (v->HRatio[k] > 1) { 2128 v->PSCL_THROUGHPUT_LUMA[k] = dml_min( 2129 v->MaxDCHUBToPSCLThroughput, 2130 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); 2131 } else { 2132 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2133 } 2134 2135 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] 2136 * dml_max( 2137 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 2138 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); 2139 2140 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { 2141 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; 2142 } 2143 2144 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 2145 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { 2146 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 2147 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; 2148 } else { 2149 if (v->HRatioChroma[k] > 1) { 2150 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 2151 v->MaxDCHUBToPSCLThroughput, 2152 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 2153 } else { 2154 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2155 } 2156 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] 2157 * dml_max3( 2158 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 2159 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 2160 1.0); 2161 2162 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { 2163 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; 2164 } 2165 2166 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); 2167 } 2168 } 2169 2170 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2171 if (v->BlendingAndTiming[k] != k) 2172 continue; 2173 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { 2174 v->DISPCLKWithRamping = dml_max( 2175 v->DISPCLKWithRamping, 2176 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2177 * (1 + v->DISPCLKRampingMargin / 100)); 2178 v->DISPCLKWithoutRamping = dml_max( 2179 v->DISPCLKWithoutRamping, 2180 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2181 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2182 v->DISPCLKWithRamping = dml_max( 2183 v->DISPCLKWithRamping, 2184 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2185 * (1 + v->DISPCLKRampingMargin / 100)); 2186 v->DISPCLKWithoutRamping = dml_max( 2187 v->DISPCLKWithoutRamping, 2188 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2189 } else { 2190 v->DISPCLKWithRamping = dml_max( 2191 v->DISPCLKWithRamping, 2192 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); 2193 v->DISPCLKWithoutRamping = dml_max( 2194 v->DISPCLKWithoutRamping, 2195 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2196 } 2197 } 2198 2199 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); 2200 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); 2201 2202 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); 2203 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); 2204 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); 2205 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 2206 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, 2207 v->DISPCLKDPPCLKVCOSpeed); 2208 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2209 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; 2210 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2211 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; 2212 } else { 2213 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; 2214 } 2215 v->DISPCLK = v->DISPCLK_calculated; 2216 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); 2217 2218 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2219 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2220 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); 2221 } 2222 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); 2223 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2224 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); 2225 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); 2226 } 2227 2228 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2229 v->DPPCLK[k] = v->DPPCLK_calculated[k]; 2230 } 2231 2232 // Urgent and B P-State/DRAM Clock Change Watermark 2233 DTRACE(" dcfclk_mhz = %f", v->DCFCLK); 2234 DTRACE(" return_bus_bw = %f", v->ReturnBW); 2235 2236 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2237 CalculateBytePerPixelAnd256BBlockSizes( 2238 v->SourcePixelFormat[k], 2239 v->SurfaceTiling[k], 2240 &v->BytePerPixelY[k], 2241 &v->BytePerPixelC[k], 2242 &v->BytePerPixelDETY[k], 2243 &v->BytePerPixelDETC[k], 2244 &v->BlockHeight256BytesY[k], 2245 &v->BlockHeight256BytesC[k], 2246 &v->BlockWidth256BytesY[k], 2247 &v->BlockWidth256BytesC[k]); 2248 } 2249 2250 CalculateSwathWidth( 2251 false, 2252 v->NumberOfActivePlanes, 2253 v->SourcePixelFormat, 2254 v->SourceScan, 2255 v->ViewportWidth, 2256 v->ViewportHeight, 2257 v->SurfaceWidthY, 2258 v->SurfaceWidthC, 2259 v->SurfaceHeightY, 2260 v->SurfaceHeightC, 2261 v->ODMCombineEnabled, 2262 v->BytePerPixelY, 2263 v->BytePerPixelC, 2264 v->BlockHeight256BytesY, 2265 v->BlockHeight256BytesC, 2266 v->BlockWidth256BytesY, 2267 v->BlockWidth256BytesC, 2268 v->BlendingAndTiming, 2269 v->HActive, 2270 v->HRatio, 2271 v->DPPPerPlane, 2272 v->SwathWidthSingleDPPY, 2273 v->SwathWidthSingleDPPC, 2274 v->SwathWidthY, 2275 v->SwathWidthC, 2276 v->dummyinteger3, 2277 v->dummyinteger4, 2278 v->swath_width_luma_ub, 2279 v->swath_width_chroma_ub); 2280 2281 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2282 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) 2283 * v->VRatio[k]; 2284 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) 2285 * v->VRatioChroma[k]; 2286 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); 2287 } 2288 2289 // DCFCLK Deep Sleep 2290 CalculateDCFCLKDeepSleep( 2291 mode_lib, 2292 v->NumberOfActivePlanes, 2293 v->BytePerPixelY, 2294 v->BytePerPixelC, 2295 v->VRatio, 2296 v->VRatioChroma, 2297 v->SwathWidthY, 2298 v->SwathWidthC, 2299 v->DPPPerPlane, 2300 v->HRatio, 2301 v->HRatioChroma, 2302 v->PixelClock, 2303 v->PSCL_THROUGHPUT_LUMA, 2304 v->PSCL_THROUGHPUT_CHROMA, 2305 v->DPPCLK, 2306 v->ReadBandwidthPlaneLuma, 2307 v->ReadBandwidthPlaneChroma, 2308 v->ReturnBusWidth, 2309 &v->DCFCLKDeepSleep); 2310 2311 // DSCCLK 2312 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2313 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { 2314 v->DSCCLK_calculated[k] = 0.0; 2315 } else { 2316 if (v->OutputFormat[k] == dm_420) 2317 v->DSCFormatFactor = 2; 2318 else if (v->OutputFormat[k] == dm_444) 2319 v->DSCFormatFactor = 1; 2320 else if (v->OutputFormat[k] == dm_n422) 2321 v->DSCFormatFactor = 2; 2322 else 2323 v->DSCFormatFactor = 1; 2324 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) 2325 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor 2326 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2327 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) 2328 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor 2329 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2330 else 2331 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor 2332 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2333 } 2334 } 2335 2336 // DSC Delay 2337 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2338 double BPP = v->OutputBpp[k]; 2339 2340 if (v->DSCEnabled[k] && BPP != 0) { 2341 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { 2342 v->DSCDelay[k] = dscceComputeDelay( 2343 v->DSCInputBitPerComponent[k], 2344 BPP, 2345 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2346 v->NumberOfDSCSlices[k], 2347 v->OutputFormat[k], 2348 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 2349 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2350 v->DSCDelay[k] = 2 2351 * (dscceComputeDelay( 2352 v->DSCInputBitPerComponent[k], 2353 BPP, 2354 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2355 v->NumberOfDSCSlices[k] / 2.0, 2356 v->OutputFormat[k], 2357 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2358 } else { 2359 v->DSCDelay[k] = 4 2360 * (dscceComputeDelay( 2361 v->DSCInputBitPerComponent[k], 2362 BPP, 2363 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2364 v->NumberOfDSCSlices[k] / 4.0, 2365 v->OutputFormat[k], 2366 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2367 } 2368 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 2369 } else { 2370 v->DSCDelay[k] = 0; 2371 } 2372 } 2373 2374 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2375 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes 2376 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) 2377 v->DSCDelay[k] = v->DSCDelay[j]; 2378 2379 // Prefetch 2380 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2381 unsigned int PDEAndMetaPTEBytesFrameY; 2382 unsigned int PixelPTEBytesPerRowY; 2383 unsigned int MetaRowByteY; 2384 unsigned int MetaRowByteC; 2385 unsigned int PDEAndMetaPTEBytesFrameC; 2386 unsigned int PixelPTEBytesPerRowC; 2387 bool PTEBufferSizeNotExceededY; 2388 bool PTEBufferSizeNotExceededC; 2389 2390 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2391 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 2392 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { 2393 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; 2394 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 2395 } else { 2396 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 2397 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 2398 } 2399 2400 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( 2401 mode_lib, 2402 v->DCCEnable[k], 2403 v->BlockHeight256BytesC[k], 2404 v->BlockWidth256BytesC[k], 2405 v->SourcePixelFormat[k], 2406 v->SurfaceTiling[k], 2407 v->BytePerPixelC[k], 2408 v->SourceScan[k], 2409 v->SwathWidthC[k], 2410 v->ViewportHeightChroma[k], 2411 v->GPUVMEnable, 2412 v->HostVMEnable, 2413 v->HostVMMaxNonCachedPageTableLevels, 2414 v->GPUVMMinPageSize, 2415 v->HostVMMinPageSize, 2416 v->PTEBufferSizeInRequestsForChroma, 2417 v->PitchC[k], 2418 v->DCCMetaPitchC[k], 2419 &v->MacroTileWidthC[k], 2420 &MetaRowByteC, 2421 &PixelPTEBytesPerRowC, 2422 &PTEBufferSizeNotExceededC, 2423 &v->dpte_row_width_chroma_ub[k], 2424 &v->dpte_row_height_chroma[k], 2425 &v->meta_req_width_chroma[k], 2426 &v->meta_req_height_chroma[k], 2427 &v->meta_row_width_chroma[k], 2428 &v->meta_row_height_chroma[k], 2429 &v->dummyinteger1, 2430 &v->dummyinteger2, 2431 &v->PixelPTEReqWidthC[k], 2432 &v->PixelPTEReqHeightC[k], 2433 &v->PTERequestSizeC[k], 2434 &v->dpde0_bytes_per_frame_ub_c[k], 2435 &v->meta_pte_bytes_per_frame_ub_c[k]); 2436 2437 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2438 mode_lib, 2439 v->VRatioChroma[k], 2440 v->VTAPsChroma[k], 2441 v->Interlace[k], 2442 v->ProgressiveToInterlaceUnitInOPP, 2443 v->SwathHeightC[k], 2444 v->ViewportYStartC[k], 2445 &v->VInitPreFillC[k], 2446 &v->MaxNumSwathC[k]); 2447 } else { 2448 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 2449 v->PTEBufferSizeInRequestsForChroma = 0; 2450 PixelPTEBytesPerRowC = 0; 2451 PDEAndMetaPTEBytesFrameC = 0; 2452 MetaRowByteC = 0; 2453 v->MaxNumSwathC[k] = 0; 2454 v->PrefetchSourceLinesC[k] = 0; 2455 } 2456 2457 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 2458 mode_lib, 2459 v->DCCEnable[k], 2460 v->BlockHeight256BytesY[k], 2461 v->BlockWidth256BytesY[k], 2462 v->SourcePixelFormat[k], 2463 v->SurfaceTiling[k], 2464 v->BytePerPixelY[k], 2465 v->SourceScan[k], 2466 v->SwathWidthY[k], 2467 v->ViewportHeight[k], 2468 v->GPUVMEnable, 2469 v->HostVMEnable, 2470 v->HostVMMaxNonCachedPageTableLevels, 2471 v->GPUVMMinPageSize, 2472 v->HostVMMinPageSize, 2473 v->PTEBufferSizeInRequestsForLuma, 2474 v->PitchY[k], 2475 v->DCCMetaPitchY[k], 2476 &v->MacroTileWidthY[k], 2477 &MetaRowByteY, 2478 &PixelPTEBytesPerRowY, 2479 &PTEBufferSizeNotExceededY, 2480 &v->dpte_row_width_luma_ub[k], 2481 &v->dpte_row_height[k], 2482 &v->meta_req_width[k], 2483 &v->meta_req_height[k], 2484 &v->meta_row_width[k], 2485 &v->meta_row_height[k], 2486 &v->vm_group_bytes[k], 2487 &v->dpte_group_bytes[k], 2488 &v->PixelPTEReqWidthY[k], 2489 &v->PixelPTEReqHeightY[k], 2490 &v->PTERequestSizeY[k], 2491 &v->dpde0_bytes_per_frame_ub_l[k], 2492 &v->meta_pte_bytes_per_frame_ub_l[k]); 2493 2494 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 2495 mode_lib, 2496 v->VRatio[k], 2497 v->vtaps[k], 2498 v->Interlace[k], 2499 v->ProgressiveToInterlaceUnitInOPP, 2500 v->SwathHeightY[k], 2501 v->ViewportYStartY[k], 2502 &v->VInitPreFillY[k], 2503 &v->MaxNumSwathY[k]); 2504 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 2505 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2506 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 2507 2508 CalculateRowBandwidth( 2509 v->GPUVMEnable, 2510 v->SourcePixelFormat[k], 2511 v->VRatio[k], 2512 v->VRatioChroma[k], 2513 v->DCCEnable[k], 2514 v->HTotal[k] / v->PixelClock[k], 2515 MetaRowByteY, 2516 MetaRowByteC, 2517 v->meta_row_height[k], 2518 v->meta_row_height_chroma[k], 2519 PixelPTEBytesPerRowY, 2520 PixelPTEBytesPerRowC, 2521 v->dpte_row_height[k], 2522 v->dpte_row_height_chroma[k], 2523 &v->meta_row_bw[k], 2524 &v->dpte_row_bw[k]); 2525 } 2526 2527 v->TotalDCCActiveDPP = 0; 2528 v->TotalActiveDPP = 0; 2529 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2530 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; 2531 if (v->DCCEnable[k]) 2532 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; 2533 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2534 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 2535 NoChromaPlanes = false; 2536 } 2537 2538 ReorderBytes = v->NumberOfChannels 2539 * dml_max3( 2540 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2541 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2542 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 2543 2544 VMDataOnlyReturnBW = dml_min( 2545 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) 2546 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2547 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth 2548 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 2549 2550 #ifdef __DML_VBA_DEBUG__ 2551 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); 2552 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); 2553 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); 2554 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); 2555 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); 2556 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); 2557 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); 2558 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); 2559 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); 2560 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 2561 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); 2562 #endif 2563 2564 if (v->GPUVMEnable && v->HostVMEnable) 2565 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; 2566 2567 v->UrgentExtraLatency = CalculateExtraLatency( 2568 v->RoundTripPingLatencyCycles, 2569 ReorderBytes, 2570 v->DCFCLK, 2571 v->TotalActiveDPP, 2572 v->PixelChunkSizeInKByte, 2573 v->TotalDCCActiveDPP, 2574 v->MetaChunkSize, 2575 v->ReturnBW, 2576 v->GPUVMEnable, 2577 v->HostVMEnable, 2578 v->NumberOfActivePlanes, 2579 v->DPPPerPlane, 2580 v->dpte_group_bytes, 2581 HostVMInefficiencyFactor, 2582 v->HostVMMinPageSize, 2583 v->HostVMMaxNonCachedPageTableLevels); 2584 2585 v->TCalc = 24.0 / v->DCFCLKDeepSleep; 2586 2587 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2588 if (v->BlendingAndTiming[k] == k) { 2589 if (v->WritebackEnable[k] == true) { 2590 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency 2591 + CalculateWriteBackDelay( 2592 v->WritebackPixelFormat[k], 2593 v->WritebackHRatio[k], 2594 v->WritebackVRatio[k], 2595 v->WritebackVTaps[k], 2596 v->WritebackDestinationWidth[k], 2597 v->WritebackDestinationHeight[k], 2598 v->WritebackSourceHeight[k], 2599 v->HTotal[k]) / v->DISPCLK; 2600 } else 2601 v->WritebackDelay[v->VoltageLevel][k] = 0; 2602 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 2603 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { 2604 v->WritebackDelay[v->VoltageLevel][k] = dml_max( 2605 v->WritebackDelay[v->VoltageLevel][k], 2606 v->WritebackLatency 2607 + CalculateWriteBackDelay( 2608 v->WritebackPixelFormat[j], 2609 v->WritebackHRatio[j], 2610 v->WritebackVRatio[j], 2611 v->WritebackVTaps[j], 2612 v->WritebackDestinationWidth[j], 2613 v->WritebackDestinationHeight[j], 2614 v->WritebackSourceHeight[j], 2615 v->HTotal[k]) / v->DISPCLK); 2616 } 2617 } 2618 } 2619 } 2620 2621 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2622 for (j = 0; j < v->NumberOfActivePlanes; ++j) 2623 if (v->BlendingAndTiming[k] == j) 2624 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; 2625 2626 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2627 v->MaxVStartupLines[k] = 2628 CalculateMaxVStartup( 2629 v->VTotal[k], 2630 v->VActive[k], 2631 v->VBlankNom[k], 2632 v->HTotal[k], 2633 v->PixelClock[k], 2634 v->ProgressiveToInterlaceUnitInOPP, 2635 v->Interlace[k], 2636 v->ip.VBlankNomDefaultUS, 2637 v->WritebackDelay[v->VoltageLevel][k]); 2638 2639 #ifdef __DML_VBA_DEBUG__ 2640 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 2641 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); 2642 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); 2643 #endif 2644 } 2645 2646 v->MaximumMaxVStartupLines = 0; 2647 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2648 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); 2649 2650 // VBA_DELTA 2651 // We don't really care to iterate between the various prefetch modes 2652 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); 2653 2654 v->UrgentLatency = CalculateUrgentLatency( 2655 v->UrgentLatencyPixelDataOnly, 2656 v->UrgentLatencyPixelMixedWithVMData, 2657 v->UrgentLatencyVMDataOnly, 2658 v->DoUrgentLatencyAdjustment, 2659 v->UrgentLatencyAdjustmentFabricClockComponent, 2660 v->UrgentLatencyAdjustmentFabricClockReference, 2661 v->FabricClock); 2662 2663 v->FractionOfUrgentBandwidth = 0.0; 2664 v->FractionOfUrgentBandwidthImmediateFlip = 0.0; 2665 2666 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; 2667 2668 do { 2669 double MaxTotalRDBandwidthNoUrgentBurst = 0.0; 2670 bool DestinationLineTimesForPrefetchLessThan2 = false; 2671 bool VRatioPrefetchMoreThan4 = false; 2672 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); 2673 2674 MaxTotalRDBandwidth = 0; 2675 2676 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); 2677 2678 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2679 Pipe myPipe; 2680 2681 myPipe.DPPCLK = v->DPPCLK[k]; 2682 myPipe.DISPCLK = v->DISPCLK; 2683 myPipe.PixelClock = v->PixelClock[k]; 2684 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; 2685 myPipe.DPPPerPlane = v->DPPPerPlane[k]; 2686 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 2687 myPipe.VRatio = v->VRatio[k]; 2688 myPipe.VRatioChroma = v->VRatioChroma[k]; 2689 myPipe.SourceScan = v->SourceScan[k]; 2690 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; 2691 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; 2692 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; 2693 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; 2694 myPipe.InterlaceEnable = v->Interlace[k]; 2695 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 2696 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 2697 myPipe.HTotal = v->HTotal[k]; 2698 myPipe.DCCEnable = v->DCCEnable[k]; 2699 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 2700 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; 2701 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 2702 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 2703 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 2704 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 2705 v->ErrorResult[k] = CalculatePrefetchSchedule( 2706 mode_lib, 2707 HostVMInefficiencyFactor, 2708 &myPipe, 2709 v->DSCDelay[k], 2710 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 2711 v->DPPCLKDelaySCL, 2712 v->DPPCLKDelaySCLLBOnly, 2713 v->DPPCLKDelayCNVCCursor, 2714 v->DISPCLKDelaySubtotal, 2715 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2716 v->OutputFormat[k], 2717 v->MaxInterDCNTileRepeaters, 2718 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2719 v->MaxVStartupLines[k], 2720 v->GPUVMMaxPageTableLevels, 2721 v->GPUVMEnable, 2722 v->HostVMEnable, 2723 v->HostVMMaxNonCachedPageTableLevels, 2724 v->HostVMMinPageSize, 2725 v->DynamicMetadataEnable[k], 2726 v->DynamicMetadataVMEnabled, 2727 v->DynamicMetadataLinesBeforeActiveRequired[k], 2728 v->DynamicMetadataTransmittedBytes[k], 2729 v->UrgentLatency, 2730 v->UrgentExtraLatency, 2731 v->TCalc, 2732 v->PDEAndMetaPTEBytesFrame[k], 2733 v->MetaRowByte[k], 2734 v->PixelPTEBytesPerRow[k], 2735 v->PrefetchSourceLinesY[k], 2736 v->SwathWidthY[k], 2737 v->VInitPreFillY[k], 2738 v->MaxNumSwathY[k], 2739 v->PrefetchSourceLinesC[k], 2740 v->SwathWidthC[k], 2741 v->VInitPreFillC[k], 2742 v->MaxNumSwathC[k], 2743 v->swath_width_luma_ub[k], 2744 v->swath_width_chroma_ub[k], 2745 v->SwathHeightY[k], 2746 v->SwathHeightC[k], 2747 TWait, 2748 &v->DSTXAfterScaler[k], 2749 &v->DSTYAfterScaler[k], 2750 &v->DestinationLinesForPrefetch[k], 2751 &v->PrefetchBandwidth[k], 2752 &v->DestinationLinesToRequestVMInVBlank[k], 2753 &v->DestinationLinesToRequestRowInVBlank[k], 2754 &v->VRatioPrefetchY[k], 2755 &v->VRatioPrefetchC[k], 2756 &v->RequiredPrefetchPixDataBWLuma[k], 2757 &v->RequiredPrefetchPixDataBWChroma[k], 2758 &v->NotEnoughTimeForDynamicMetadata[k], 2759 &v->Tno_bw[k], 2760 &v->prefetch_vmrow_bw[k], 2761 &v->Tdmdl_vm[k], 2762 &v->Tdmdl[k], 2763 &v->TSetup[k], 2764 &v->VUpdateOffsetPix[k], 2765 &v->VUpdateWidthPix[k], 2766 &v->VReadyOffsetPix[k]); 2767 2768 #ifdef __DML_VBA_DEBUG__ 2769 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); 2770 #endif 2771 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); 2772 } 2773 2774 v->NoEnoughUrgentLatencyHiding = false; 2775 v->NoEnoughUrgentLatencyHidingPre = false; 2776 2777 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2778 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2779 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 2780 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2781 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; 2782 2783 CalculateUrgentBurstFactor( 2784 v->swath_width_luma_ub[k], 2785 v->swath_width_chroma_ub[k], 2786 v->SwathHeightY[k], 2787 v->SwathHeightC[k], 2788 v->HTotal[k] / v->PixelClock[k], 2789 v->UrgentLatency, 2790 v->CursorBufferSize, 2791 v->CursorWidth[k][0], 2792 v->CursorBPP[k][0], 2793 v->VRatio[k], 2794 v->VRatioChroma[k], 2795 v->BytePerPixelDETY[k], 2796 v->BytePerPixelDETC[k], 2797 v->DETBufferSizeY[k], 2798 v->DETBufferSizeC[k], 2799 &v->UrgBurstFactorCursor[k], 2800 &v->UrgBurstFactorLuma[k], 2801 &v->UrgBurstFactorChroma[k], 2802 &v->NoUrgentLatencyHiding[k]); 2803 2804 CalculateUrgentBurstFactor( 2805 v->swath_width_luma_ub[k], 2806 v->swath_width_chroma_ub[k], 2807 v->SwathHeightY[k], 2808 v->SwathHeightC[k], 2809 v->HTotal[k] / v->PixelClock[k], 2810 v->UrgentLatency, 2811 v->CursorBufferSize, 2812 v->CursorWidth[k][0], 2813 v->CursorBPP[k][0], 2814 v->VRatioPrefetchY[k], 2815 v->VRatioPrefetchC[k], 2816 v->BytePerPixelDETY[k], 2817 v->BytePerPixelDETC[k], 2818 v->DETBufferSizeY[k], 2819 v->DETBufferSizeC[k], 2820 &v->UrgBurstFactorCursorPre[k], 2821 &v->UrgBurstFactorLumaPre[k], 2822 &v->UrgBurstFactorChromaPre[k], 2823 &v->NoUrgentLatencyHidingPre[k]); 2824 2825 MaxTotalRDBandwidth = MaxTotalRDBandwidth 2826 + dml_max3( 2827 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2828 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2829 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2830 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] 2831 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2832 v->DPPPerPlane[k] 2833 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2834 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2835 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2836 2837 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst 2838 + dml_max3( 2839 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2840 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] 2841 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2842 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) 2843 + v->cursor_bw_pre[k]); 2844 2845 #ifdef __DML_VBA_DEBUG__ 2846 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); 2847 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); 2848 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); 2849 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); 2850 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); 2851 2852 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); 2853 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); 2854 2855 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); 2856 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); 2857 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); 2858 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); 2859 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); 2860 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); 2861 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); 2862 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); 2863 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); 2864 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); 2865 #endif 2866 2867 if (v->DestinationLinesForPrefetch[k] < 2) 2868 DestinationLineTimesForPrefetchLessThan2 = true; 2869 2870 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) 2871 VRatioPrefetchMoreThan4 = true; 2872 2873 if (v->NoUrgentLatencyHiding[k] == true) 2874 v->NoEnoughUrgentLatencyHiding = true; 2875 2876 if (v->NoUrgentLatencyHidingPre[k] == true) 2877 v->NoEnoughUrgentLatencyHidingPre = true; 2878 } 2879 2880 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; 2881 2882 #ifdef __DML_VBA_DEBUG__ 2883 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst); 2884 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW); 2885 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth); 2886 #endif 2887 2888 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 2889 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) 2890 v->PrefetchModeSupported = true; 2891 else { 2892 v->PrefetchModeSupported = false; 2893 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); 2894 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); 2895 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); 2896 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); 2897 } 2898 2899 // PREVIOUS_ERROR 2900 // This error result check was done after the PrefetchModeSupported. So we will 2901 // still try to calculate flip schedule even prefetch mode not supported 2902 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2903 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { 2904 v->PrefetchModeSupported = false; 2905 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); 2906 } 2907 } 2908 2909 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { 2910 v->BandwidthAvailableForImmediateFlip = v->ReturnBW; 2911 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2912 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 2913 - dml_max( 2914 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2915 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2916 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2917 v->DPPPerPlane[k] 2918 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2919 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2920 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2921 } 2922 2923 v->TotImmediateFlipBytes = 0; 2924 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2925 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 2926 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); 2927 } 2928 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2929 CalculateFlipSchedule( 2930 mode_lib, 2931 HostVMInefficiencyFactor, 2932 v->UrgentExtraLatency, 2933 v->UrgentLatency, 2934 v->GPUVMMaxPageTableLevels, 2935 v->HostVMEnable, 2936 v->HostVMMaxNonCachedPageTableLevels, 2937 v->GPUVMEnable, 2938 v->HostVMMinPageSize, 2939 v->PDEAndMetaPTEBytesFrame[k], 2940 v->MetaRowByte[k], 2941 v->PixelPTEBytesPerRow[k], 2942 v->BandwidthAvailableForImmediateFlip, 2943 v->TotImmediateFlipBytes, 2944 v->SourcePixelFormat[k], 2945 v->HTotal[k] / v->PixelClock[k], 2946 v->VRatio[k], 2947 v->VRatioChroma[k], 2948 v->Tno_bw[k], 2949 v->DCCEnable[k], 2950 v->dpte_row_height[k], 2951 v->meta_row_height[k], 2952 v->dpte_row_height_chroma[k], 2953 v->meta_row_height_chroma[k], 2954 &v->DestinationLinesToRequestVMInImmediateFlip[k], 2955 &v->DestinationLinesToRequestRowInImmediateFlip[k], 2956 &v->final_flip_bw[k], 2957 &v->ImmediateFlipSupportedForPipe[k]); 2958 } 2959 2960 v->total_dcn_read_bw_with_flip = 0.0; 2961 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2962 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2963 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 2964 + dml_max3( 2965 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2966 v->DPPPerPlane[k] * v->final_flip_bw[k] 2967 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] 2968 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] 2969 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2970 v->DPPPerPlane[k] 2971 * (v->final_flip_bw[k] 2972 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2973 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2974 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2975 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst 2976 + dml_max3( 2977 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2978 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] 2979 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], 2980 v->DPPPerPlane[k] 2981 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] 2982 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); 2983 } 2984 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; 2985 2986 v->ImmediateFlipSupported = true; 2987 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { 2988 #ifdef __DML_VBA_DEBUG__ 2989 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); 2990 #endif 2991 v->ImmediateFlipSupported = false; 2992 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; 2993 } 2994 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2995 if (v->ImmediateFlipSupportedForPipe[k] == false) { 2996 #ifdef __DML_VBA_DEBUG__ 2997 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); 2998 #endif 2999 v->ImmediateFlipSupported = false; 3000 } 3001 } 3002 } else { 3003 v->ImmediateFlipSupported = false; 3004 } 3005 3006 v->PrefetchAndImmediateFlipSupported = 3007 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable 3008 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || 3009 v->ImmediateFlipSupported)) ? true : false; 3010 #ifdef __DML_VBA_DEBUG__ 3011 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); 3012 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required); 3013 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); 3014 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); 3015 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); 3016 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); 3017 #endif 3018 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); 3019 3020 v->VStartupLines = v->VStartupLines + 1; 3021 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); 3022 ASSERT(v->PrefetchAndImmediateFlipSupported); 3023 3024 // Unbounded Request Enabled 3025 CalculateUnboundedRequestAndCompressedBufferSize( 3026 v->DETBufferSizeInKByte[0], 3027 v->ConfigReturnBufferSizeInKByte, 3028 v->UseUnboundedRequesting, 3029 v->TotalActiveDPP, 3030 NoChromaPlanes, 3031 v->MaxNumDPP, 3032 v->CompressedBufferSegmentSizeInkByte, 3033 v->Output, 3034 &v->UnboundedRequestEnabled, 3035 &v->CompressedBufferSizeInkByte); 3036 3037 //Watermarks and NB P-State/DRAM Clock Change Support 3038 { 3039 enum clock_change_support DRAMClockChangeSupport; // dummy 3040 3041 CalculateWatermarksAndDRAMSpeedChangeSupport( 3042 mode_lib, 3043 PrefetchMode, 3044 v->NumberOfActivePlanes, 3045 v->MaxLineBufferLines, 3046 v->LineBufferSize, 3047 v->WritebackInterfaceBufferSize, 3048 v->DCFCLK, 3049 v->ReturnBW, 3050 v->SynchronizedVBlank, 3051 v->dpte_group_bytes, 3052 v->MetaChunkSize, 3053 v->UrgentLatency, 3054 v->UrgentExtraLatency, 3055 v->WritebackLatency, 3056 v->WritebackChunkSize, 3057 v->SOCCLK, 3058 v->DRAMClockChangeLatency, 3059 v->SRExitTime, 3060 v->SREnterPlusExitTime, 3061 v->SRExitZ8Time, 3062 v->SREnterPlusExitZ8Time, 3063 v->DCFCLKDeepSleep, 3064 v->DETBufferSizeY, 3065 v->DETBufferSizeC, 3066 v->SwathHeightY, 3067 v->SwathHeightC, 3068 v->LBBitPerPixel, 3069 v->SwathWidthY, 3070 v->SwathWidthC, 3071 v->HRatio, 3072 v->HRatioChroma, 3073 v->vtaps, 3074 v->VTAPsChroma, 3075 v->VRatio, 3076 v->VRatioChroma, 3077 v->HTotal, 3078 v->PixelClock, 3079 v->BlendingAndTiming, 3080 v->DPPPerPlane, 3081 v->BytePerPixelDETY, 3082 v->BytePerPixelDETC, 3083 v->DSTXAfterScaler, 3084 v->DSTYAfterScaler, 3085 v->WritebackEnable, 3086 v->WritebackPixelFormat, 3087 v->WritebackDestinationWidth, 3088 v->WritebackDestinationHeight, 3089 v->WritebackSourceHeight, 3090 v->UnboundedRequestEnabled, 3091 v->CompressedBufferSizeInkByte, 3092 &DRAMClockChangeSupport, 3093 &v->UrgentWatermark, 3094 &v->WritebackUrgentWatermark, 3095 &v->DRAMClockChangeWatermark, 3096 &v->WritebackDRAMClockChangeWatermark, 3097 &v->StutterExitWatermark, 3098 &v->StutterEnterPlusExitWatermark, 3099 &v->Z8StutterExitWatermark, 3100 &v->Z8StutterEnterPlusExitWatermark, 3101 &v->MinActiveDRAMClockChangeLatencySupported); 3102 3103 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3104 if (v->WritebackEnable[k] == true) { 3105 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 3106 0, 3107 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); 3108 } else { 3109 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; 3110 } 3111 } 3112 } 3113 3114 //Display Pipeline Delivery Time in Prefetch, Groups 3115 CalculatePixelDeliveryTimes( 3116 v->NumberOfActivePlanes, 3117 v->VRatio, 3118 v->VRatioChroma, 3119 v->VRatioPrefetchY, 3120 v->VRatioPrefetchC, 3121 v->swath_width_luma_ub, 3122 v->swath_width_chroma_ub, 3123 v->DPPPerPlane, 3124 v->HRatio, 3125 v->HRatioChroma, 3126 v->PixelClock, 3127 v->PSCL_THROUGHPUT_LUMA, 3128 v->PSCL_THROUGHPUT_CHROMA, 3129 v->DPPCLK, 3130 v->BytePerPixelC, 3131 v->SourceScan, 3132 v->NumberOfCursors, 3133 v->CursorWidth, 3134 v->CursorBPP, 3135 v->BlockWidth256BytesY, 3136 v->BlockHeight256BytesY, 3137 v->BlockWidth256BytesC, 3138 v->BlockHeight256BytesC, 3139 v->DisplayPipeLineDeliveryTimeLuma, 3140 v->DisplayPipeLineDeliveryTimeChroma, 3141 v->DisplayPipeLineDeliveryTimeLumaPrefetch, 3142 v->DisplayPipeLineDeliveryTimeChromaPrefetch, 3143 v->DisplayPipeRequestDeliveryTimeLuma, 3144 v->DisplayPipeRequestDeliveryTimeChroma, 3145 v->DisplayPipeRequestDeliveryTimeLumaPrefetch, 3146 v->DisplayPipeRequestDeliveryTimeChromaPrefetch, 3147 v->CursorRequestDeliveryTime, 3148 v->CursorRequestDeliveryTimePrefetch); 3149 3150 CalculateMetaAndPTETimes( 3151 v->NumberOfActivePlanes, 3152 v->GPUVMEnable, 3153 v->MetaChunkSize, 3154 v->MinMetaChunkSizeBytes, 3155 v->HTotal, 3156 v->VRatio, 3157 v->VRatioChroma, 3158 v->DestinationLinesToRequestRowInVBlank, 3159 v->DestinationLinesToRequestRowInImmediateFlip, 3160 v->DCCEnable, 3161 v->PixelClock, 3162 v->BytePerPixelY, 3163 v->BytePerPixelC, 3164 v->SourceScan, 3165 v->dpte_row_height, 3166 v->dpte_row_height_chroma, 3167 v->meta_row_width, 3168 v->meta_row_width_chroma, 3169 v->meta_row_height, 3170 v->meta_row_height_chroma, 3171 v->meta_req_width, 3172 v->meta_req_width_chroma, 3173 v->meta_req_height, 3174 v->meta_req_height_chroma, 3175 v->dpte_group_bytes, 3176 v->PTERequestSizeY, 3177 v->PTERequestSizeC, 3178 v->PixelPTEReqWidthY, 3179 v->PixelPTEReqHeightY, 3180 v->PixelPTEReqWidthC, 3181 v->PixelPTEReqHeightC, 3182 v->dpte_row_width_luma_ub, 3183 v->dpte_row_width_chroma_ub, 3184 v->DST_Y_PER_PTE_ROW_NOM_L, 3185 v->DST_Y_PER_PTE_ROW_NOM_C, 3186 v->DST_Y_PER_META_ROW_NOM_L, 3187 v->DST_Y_PER_META_ROW_NOM_C, 3188 v->TimePerMetaChunkNominal, 3189 v->TimePerChromaMetaChunkNominal, 3190 v->TimePerMetaChunkVBlank, 3191 v->TimePerChromaMetaChunkVBlank, 3192 v->TimePerMetaChunkFlip, 3193 v->TimePerChromaMetaChunkFlip, 3194 v->time_per_pte_group_nom_luma, 3195 v->time_per_pte_group_vblank_luma, 3196 v->time_per_pte_group_flip_luma, 3197 v->time_per_pte_group_nom_chroma, 3198 v->time_per_pte_group_vblank_chroma, 3199 v->time_per_pte_group_flip_chroma); 3200 3201 CalculateVMGroupAndRequestTimes( 3202 v->NumberOfActivePlanes, 3203 v->GPUVMEnable, 3204 v->GPUVMMaxPageTableLevels, 3205 v->HTotal, 3206 v->BytePerPixelC, 3207 v->DestinationLinesToRequestVMInVBlank, 3208 v->DestinationLinesToRequestVMInImmediateFlip, 3209 v->DCCEnable, 3210 v->PixelClock, 3211 v->dpte_row_width_luma_ub, 3212 v->dpte_row_width_chroma_ub, 3213 v->vm_group_bytes, 3214 v->dpde0_bytes_per_frame_ub_l, 3215 v->dpde0_bytes_per_frame_ub_c, 3216 v->meta_pte_bytes_per_frame_ub_l, 3217 v->meta_pte_bytes_per_frame_ub_c, 3218 v->TimePerVMGroupVBlank, 3219 v->TimePerVMGroupFlip, 3220 v->TimePerVMRequestVBlank, 3221 v->TimePerVMRequestFlip); 3222 3223 // Min TTUVBlank 3224 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3225 if (PrefetchMode == 0) { 3226 v->AllowDRAMClockChangeDuringVBlank[k] = true; 3227 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3228 v->MinTTUVBlank[k] = dml_max( 3229 v->DRAMClockChangeWatermark, 3230 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); 3231 } else if (PrefetchMode == 1) { 3232 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3233 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3234 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); 3235 } else { 3236 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3237 v->AllowDRAMSelfRefreshDuringVBlank[k] = false; 3238 v->MinTTUVBlank[k] = v->UrgentWatermark; 3239 } 3240 if (!v->DynamicMetadataEnable[k]) 3241 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; 3242 } 3243 3244 // DCC Configuration 3245 v->ActiveDPPs = 0; 3246 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3247 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 3248 v->SourcePixelFormat[k], 3249 v->SurfaceWidthY[k], 3250 v->SurfaceWidthC[k], 3251 v->SurfaceHeightY[k], 3252 v->SurfaceHeightC[k], 3253 v->DETBufferSizeInKByte[0] * 1024, 3254 v->BlockHeight256BytesY[k], 3255 v->BlockHeight256BytesC[k], 3256 v->SurfaceTiling[k], 3257 v->BytePerPixelY[k], 3258 v->BytePerPixelC[k], 3259 v->BytePerPixelDETY[k], 3260 v->BytePerPixelDETC[k], 3261 v->SourceScan[k], 3262 &v->DCCYMaxUncompressedBlock[k], 3263 &v->DCCCMaxUncompressedBlock[k], 3264 &v->DCCYMaxCompressedBlock[k], 3265 &v->DCCCMaxCompressedBlock[k], 3266 &v->DCCYIndependentBlock[k], 3267 &v->DCCCIndependentBlock[k]); 3268 } 3269 3270 // VStartup Adjustment 3271 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3272 bool isInterlaceTiming; 3273 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; 3274 #ifdef __DML_VBA_DEBUG__ 3275 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); 3276 #endif 3277 3278 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; 3279 3280 #ifdef __DML_VBA_DEBUG__ 3281 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); 3282 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 3283 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3284 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); 3285 #endif 3286 3287 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; 3288 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { 3289 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; 3290 } 3291 3292 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); 3293 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); 3294 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) { 3295 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0); 3296 } else { 3297 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]; 3298 } 3299 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / (double)v->HTotal[k] / v->PixelClock[k], 1.0) / 4.0; 3300 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) 3301 <= (isInterlaceTiming ? 3302 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : 3303 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { 3304 v->VREADY_AT_OR_AFTER_VSYNC[k] = true; 3305 } else { 3306 v->VREADY_AT_OR_AFTER_VSYNC[k] = false; 3307 } 3308 #ifdef __DML_VBA_DEBUG__ 3309 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); 3310 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); 3311 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); 3312 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); 3313 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); 3314 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); 3315 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); 3316 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); 3317 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3318 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); 3319 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); 3320 #endif 3321 } 3322 3323 { 3324 //Maximum Bandwidth Used 3325 double TotalWRBandwidth = 0; 3326 double MaxPerPlaneVActiveWRBandwidth = 0; 3327 double WRBandwidth = 0; 3328 3329 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3330 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { 3331 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3332 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; 3333 } else if (v->WritebackEnable[k] == true) { 3334 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3335 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; 3336 } 3337 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; 3338 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); 3339 } 3340 3341 v->TotalDataReadBandwidth = 0; 3342 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3343 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; 3344 } 3345 } 3346 // Stutter Efficiency 3347 CalculateStutterEfficiency( 3348 mode_lib, 3349 v->CompressedBufferSizeInkByte, 3350 v->UnboundedRequestEnabled, 3351 v->ConfigReturnBufferSizeInKByte, 3352 v->MetaFIFOSizeInKEntries, 3353 v->ZeroSizeBufferEntries, 3354 v->NumberOfActivePlanes, 3355 v->ROBBufferSizeInKByte, 3356 v->TotalDataReadBandwidth, 3357 v->DCFCLK, 3358 v->ReturnBW, 3359 v->COMPBUF_RESERVED_SPACE_64B, 3360 v->COMPBUF_RESERVED_SPACE_ZS, 3361 v->SRExitTime, 3362 v->SRExitZ8Time, 3363 v->SynchronizedVBlank, 3364 v->StutterEnterPlusExitWatermark, 3365 v->Z8StutterEnterPlusExitWatermark, 3366 v->ProgressiveToInterlaceUnitInOPP, 3367 v->Interlace, 3368 v->MinTTUVBlank, 3369 v->DPPPerPlane, 3370 v->DETBufferSizeY, 3371 v->BytePerPixelY, 3372 v->BytePerPixelDETY, 3373 v->SwathWidthY, 3374 v->SwathHeightY, 3375 v->SwathHeightC, 3376 v->DCCRateLuma, 3377 v->DCCRateChroma, 3378 v->DCCFractionOfZeroSizeRequestsLuma, 3379 v->DCCFractionOfZeroSizeRequestsChroma, 3380 v->HTotal, 3381 v->VTotal, 3382 v->PixelClock, 3383 v->VRatio, 3384 v->SourceScan, 3385 v->BlockHeight256BytesY, 3386 v->BlockWidth256BytesY, 3387 v->BlockHeight256BytesC, 3388 v->BlockWidth256BytesC, 3389 v->DCCYMaxUncompressedBlock, 3390 v->DCCCMaxUncompressedBlock, 3391 v->VActive, 3392 v->DCCEnable, 3393 v->WritebackEnable, 3394 v->ReadBandwidthPlaneLuma, 3395 v->ReadBandwidthPlaneChroma, 3396 v->meta_row_bw, 3397 v->dpte_row_bw, 3398 &v->StutterEfficiencyNotIncludingVBlank, 3399 &v->StutterEfficiency, 3400 &v->NumberOfStutterBurstsPerFrame, 3401 &v->Z8StutterEfficiencyNotIncludingVBlank, 3402 &v->Z8StutterEfficiency, 3403 &v->Z8NumberOfStutterBurstsPerFrame, 3404 &v->StutterPeriod); 3405 } 3406 3407 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 3408 { 3409 struct vba_vars_st *v = &mode_lib->vba; 3410 // Display Pipe Configuration 3411 double BytePerPixDETY[DC__NUM_DPP__MAX]; 3412 double BytePerPixDETC[DC__NUM_DPP__MAX]; 3413 int BytePerPixY[DC__NUM_DPP__MAX]; 3414 int BytePerPixC[DC__NUM_DPP__MAX]; 3415 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; 3416 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; 3417 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; 3418 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; 3419 double dummy1[DC__NUM_DPP__MAX]; 3420 double dummy2[DC__NUM_DPP__MAX]; 3421 double dummy3[DC__NUM_DPP__MAX]; 3422 double dummy4[DC__NUM_DPP__MAX]; 3423 int dummy5[DC__NUM_DPP__MAX]; 3424 int dummy6[DC__NUM_DPP__MAX]; 3425 bool dummy7[DC__NUM_DPP__MAX]; 3426 bool dummysinglestring; 3427 3428 unsigned int k; 3429 3430 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3431 3432 CalculateBytePerPixelAnd256BBlockSizes( 3433 v->SourcePixelFormat[k], 3434 v->SurfaceTiling[k], 3435 &BytePerPixY[k], 3436 &BytePerPixC[k], 3437 &BytePerPixDETY[k], 3438 &BytePerPixDETC[k], 3439 &Read256BytesBlockHeightY[k], 3440 &Read256BytesBlockHeightC[k], 3441 &Read256BytesBlockWidthY[k], 3442 &Read256BytesBlockWidthC[k]); 3443 } 3444 3445 CalculateSwathAndDETConfiguration( 3446 false, 3447 v->NumberOfActivePlanes, 3448 v->DETBufferSizeInKByte[0], 3449 dummy1, 3450 dummy2, 3451 v->SourceScan, 3452 v->SourcePixelFormat, 3453 v->SurfaceTiling, 3454 v->ViewportWidth, 3455 v->ViewportHeight, 3456 v->SurfaceWidthY, 3457 v->SurfaceWidthC, 3458 v->SurfaceHeightY, 3459 v->SurfaceHeightC, 3460 Read256BytesBlockHeightY, 3461 Read256BytesBlockHeightC, 3462 Read256BytesBlockWidthY, 3463 Read256BytesBlockWidthC, 3464 v->ODMCombineEnabled, 3465 v->BlendingAndTiming, 3466 BytePerPixY, 3467 BytePerPixC, 3468 BytePerPixDETY, 3469 BytePerPixDETC, 3470 v->HActive, 3471 v->HRatio, 3472 v->HRatioChroma, 3473 v->DPPPerPlane, 3474 dummy5, 3475 dummy6, 3476 dummy3, 3477 dummy4, 3478 v->SwathHeightY, 3479 v->SwathHeightC, 3480 v->DETBufferSizeY, 3481 v->DETBufferSizeC, 3482 dummy7, 3483 &dummysinglestring); 3484 } 3485 3486 static bool CalculateBytePerPixelAnd256BBlockSizes( 3487 enum source_format_class SourcePixelFormat, 3488 enum dm_swizzle_mode SurfaceTiling, 3489 unsigned int *BytePerPixelY, 3490 unsigned int *BytePerPixelC, 3491 double *BytePerPixelDETY, 3492 double *BytePerPixelDETC, 3493 unsigned int *BlockHeight256BytesY, 3494 unsigned int *BlockHeight256BytesC, 3495 unsigned int *BlockWidth256BytesY, 3496 unsigned int *BlockWidth256BytesC) 3497 { 3498 if (SourcePixelFormat == dm_444_64) { 3499 *BytePerPixelDETY = 8; 3500 *BytePerPixelDETC = 0; 3501 *BytePerPixelY = 8; 3502 *BytePerPixelC = 0; 3503 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 3504 *BytePerPixelDETY = 4; 3505 *BytePerPixelDETC = 0; 3506 *BytePerPixelY = 4; 3507 *BytePerPixelC = 0; 3508 } else if (SourcePixelFormat == dm_444_16) { 3509 *BytePerPixelDETY = 2; 3510 *BytePerPixelDETC = 0; 3511 *BytePerPixelY = 2; 3512 *BytePerPixelC = 0; 3513 } else if (SourcePixelFormat == dm_444_8) { 3514 *BytePerPixelDETY = 1; 3515 *BytePerPixelDETC = 0; 3516 *BytePerPixelY = 1; 3517 *BytePerPixelC = 0; 3518 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3519 *BytePerPixelDETY = 4; 3520 *BytePerPixelDETC = 1; 3521 *BytePerPixelY = 4; 3522 *BytePerPixelC = 1; 3523 } else if (SourcePixelFormat == dm_420_8) { 3524 *BytePerPixelDETY = 1; 3525 *BytePerPixelDETC = 2; 3526 *BytePerPixelY = 1; 3527 *BytePerPixelC = 2; 3528 } else if (SourcePixelFormat == dm_420_12) { 3529 *BytePerPixelDETY = 2; 3530 *BytePerPixelDETC = 4; 3531 *BytePerPixelY = 2; 3532 *BytePerPixelC = 4; 3533 } else { 3534 *BytePerPixelDETY = 4.0 / 3; 3535 *BytePerPixelDETC = 8.0 / 3; 3536 *BytePerPixelY = 2; 3537 *BytePerPixelC = 4; 3538 } 3539 3540 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16 3541 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) { 3542 if (SurfaceTiling == dm_sw_linear) { 3543 *BlockHeight256BytesY = 1; 3544 } else if (SourcePixelFormat == dm_444_64) { 3545 *BlockHeight256BytesY = 4; 3546 } else if (SourcePixelFormat == dm_444_8) { 3547 *BlockHeight256BytesY = 16; 3548 } else { 3549 *BlockHeight256BytesY = 8; 3550 } 3551 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3552 *BlockHeight256BytesC = 0; 3553 *BlockWidth256BytesC = 0; 3554 } else { 3555 if (SurfaceTiling == dm_sw_linear) { 3556 *BlockHeight256BytesY = 1; 3557 *BlockHeight256BytesC = 1; 3558 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3559 *BlockHeight256BytesY = 8; 3560 *BlockHeight256BytesC = 16; 3561 } else if (SourcePixelFormat == dm_420_8) { 3562 *BlockHeight256BytesY = 16; 3563 *BlockHeight256BytesC = 8; 3564 } else { 3565 *BlockHeight256BytesY = 8; 3566 *BlockHeight256BytesC = 8; 3567 } 3568 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3569 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 3570 } 3571 return true; 3572 } 3573 3574 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) 3575 { 3576 if (PrefetchMode == 0) { 3577 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); 3578 } else if (PrefetchMode == 1) { 3579 return dml_max(SREnterPlusExitTime, UrgentLatency); 3580 } else { 3581 return UrgentLatency; 3582 } 3583 } 3584 3585 double dml314_CalculateWriteBackDISPCLK( 3586 enum source_format_class WritebackPixelFormat, 3587 double PixelClock, 3588 double WritebackHRatio, 3589 double WritebackVRatio, 3590 unsigned int WritebackHTaps, 3591 unsigned int WritebackVTaps, 3592 long WritebackSourceWidth, 3593 long WritebackDestinationWidth, 3594 unsigned int HTotal, 3595 unsigned int WritebackLineBufferSize) 3596 { 3597 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 3598 3599 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 3600 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 3601 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 3602 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 3603 } 3604 3605 static double CalculateWriteBackDelay( 3606 enum source_format_class WritebackPixelFormat, 3607 double WritebackHRatio, 3608 double WritebackVRatio, 3609 unsigned int WritebackVTaps, 3610 int WritebackDestinationWidth, 3611 int WritebackDestinationHeight, 3612 int WritebackSourceHeight, 3613 unsigned int HTotal) 3614 { 3615 double CalculateWriteBackDelay; 3616 double Line_length; 3617 double Output_lines_last_notclamped; 3618 double WritebackVInit; 3619 3620 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3621 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); 3622 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); 3623 if (Output_lines_last_notclamped < 0) { 3624 CalculateWriteBackDelay = 0; 3625 } else { 3626 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3627 } 3628 return CalculateWriteBackDelay; 3629 } 3630 3631 static void CalculateVupdateAndDynamicMetadataParameters( 3632 int MaxInterDCNTileRepeaters, 3633 double DPPCLK, 3634 double DISPCLK, 3635 double DCFClkDeepSleep, 3636 double PixelClock, 3637 int HTotal, 3638 int VBlank, 3639 int DynamicMetadataTransmittedBytes, 3640 int DynamicMetadataLinesBeforeActiveRequired, 3641 int InterlaceEnable, 3642 bool ProgressiveToInterlaceUnitInOPP, 3643 double *TSetup, 3644 double *Tdmbf, 3645 double *Tdmec, 3646 double *Tdmsks, 3647 int *VUpdateOffsetPix, 3648 double *VUpdateWidthPix, 3649 double *VReadyOffsetPix) 3650 { 3651 double TotalRepeaterDelayTime; 3652 3653 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); 3654 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); 3655 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); 3656 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); 3657 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3658 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; 3659 *Tdmec = HTotal / PixelClock; 3660 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 3661 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3662 } else { 3663 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3664 } 3665 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 3666 *Tdmsks = *Tdmsks / 2; 3667 } 3668 #ifdef __DML_VBA_DEBUG__ 3669 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3670 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3671 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3672 #endif 3673 } 3674 3675 static void CalculateRowBandwidth( 3676 bool GPUVMEnable, 3677 enum source_format_class SourcePixelFormat, 3678 double VRatio, 3679 double VRatioChroma, 3680 bool DCCEnable, 3681 double LineTime, 3682 unsigned int MetaRowByteLuma, 3683 unsigned int MetaRowByteChroma, 3684 unsigned int meta_row_height_luma, 3685 unsigned int meta_row_height_chroma, 3686 unsigned int PixelPTEBytesPerRowLuma, 3687 unsigned int PixelPTEBytesPerRowChroma, 3688 unsigned int dpte_row_height_luma, 3689 unsigned int dpte_row_height_chroma, 3690 double *meta_row_bw, 3691 double *dpte_row_bw) 3692 { 3693 if (DCCEnable != true) { 3694 *meta_row_bw = 0; 3695 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3696 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); 3697 } else { 3698 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3699 } 3700 3701 if (GPUVMEnable != true) { 3702 *dpte_row_bw = 0; 3703 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3704 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3705 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 3706 } else { 3707 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3708 } 3709 } 3710 3711 static void CalculateFlipSchedule( 3712 struct display_mode_lib *mode_lib, 3713 double HostVMInefficiencyFactor, 3714 double UrgentExtraLatency, 3715 double UrgentLatency, 3716 unsigned int GPUVMMaxPageTableLevels, 3717 bool HostVMEnable, 3718 unsigned int HostVMMaxNonCachedPageTableLevels, 3719 bool GPUVMEnable, 3720 double HostVMMinPageSize, 3721 double PDEAndMetaPTEBytesPerFrame, 3722 double MetaRowBytes, 3723 double DPTEBytesPerRow, 3724 double BandwidthAvailableForImmediateFlip, 3725 unsigned int TotImmediateFlipBytes, 3726 enum source_format_class SourcePixelFormat, 3727 double LineTime, 3728 double VRatio, 3729 double VRatioChroma, 3730 double Tno_bw, 3731 bool DCCEnable, 3732 unsigned int dpte_row_height, 3733 unsigned int meta_row_height, 3734 unsigned int dpte_row_height_chroma, 3735 unsigned int meta_row_height_chroma, 3736 double *DestinationLinesToRequestVMInImmediateFlip, 3737 double *DestinationLinesToRequestRowInImmediateFlip, 3738 double *final_flip_bw, 3739 bool *ImmediateFlipSupportedForPipe) 3740 { 3741 double min_row_time = 0.0; 3742 unsigned int HostVMDynamicLevelsTrips; 3743 double TimeForFetchingMetaPTEImmediateFlip; 3744 double TimeForFetchingRowInVBlankImmediateFlip; 3745 double ImmediateFlipBW; 3746 3747 if (GPUVMEnable == true && HostVMEnable == true) { 3748 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 3749 } else { 3750 HostVMDynamicLevelsTrips = 0; 3751 } 3752 3753 if (GPUVMEnable == true || DCCEnable == true) { 3754 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * BandwidthAvailableForImmediateFlip / TotImmediateFlipBytes; 3755 } 3756 3757 if (GPUVMEnable == true) { 3758 TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3759 Tno_bw + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3760 UrgentExtraLatency + UrgentLatency * (GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 3761 LineTime / 4.0); 3762 } else { 3763 TimeForFetchingMetaPTEImmediateFlip = 0; 3764 } 3765 3766 *DestinationLinesToRequestVMInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3767 if ((GPUVMEnable == true || DCCEnable == true)) { 3768 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 3769 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 3770 UrgentLatency * (HostVMDynamicLevelsTrips + 1), 3771 LineTime / 4); 3772 } else { 3773 TimeForFetchingRowInVBlankImmediateFlip = 0; 3774 } 3775 3776 *DestinationLinesToRequestRowInImmediateFlip = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3777 3778 if (GPUVMEnable == true) { 3779 *final_flip_bw = dml_max( 3780 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInImmediateFlip * LineTime), 3781 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime)); 3782 } else if ((GPUVMEnable == true || DCCEnable == true)) { 3783 *final_flip_bw = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInImmediateFlip * LineTime); 3784 } else { 3785 *final_flip_bw = 0; 3786 } 3787 3788 if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_rgbe_alpha) { 3789 if (GPUVMEnable == true && DCCEnable != true) { 3790 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, dpte_row_height_chroma * LineTime / VRatioChroma); 3791 } else if (GPUVMEnable != true && DCCEnable == true) { 3792 min_row_time = dml_min(meta_row_height * LineTime / VRatio, meta_row_height_chroma * LineTime / VRatioChroma); 3793 } else { 3794 min_row_time = dml_min4( 3795 dpte_row_height * LineTime / VRatio, 3796 meta_row_height * LineTime / VRatio, 3797 dpte_row_height_chroma * LineTime / VRatioChroma, 3798 meta_row_height_chroma * LineTime / VRatioChroma); 3799 } 3800 } else { 3801 if (GPUVMEnable == true && DCCEnable != true) { 3802 min_row_time = dpte_row_height * LineTime / VRatio; 3803 } else if (GPUVMEnable != true && DCCEnable == true) { 3804 min_row_time = meta_row_height * LineTime / VRatio; 3805 } else { 3806 min_row_time = dml_min(dpte_row_height * LineTime / VRatio, meta_row_height * LineTime / VRatio); 3807 } 3808 } 3809 3810 if (*DestinationLinesToRequestVMInImmediateFlip >= 32 || *DestinationLinesToRequestRowInImmediateFlip >= 16 3811 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3812 *ImmediateFlipSupportedForPipe = false; 3813 } else { 3814 *ImmediateFlipSupportedForPipe = true; 3815 } 3816 3817 #ifdef __DML_VBA_DEBUG__ 3818 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestVMInImmediateFlip); 3819 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, *DestinationLinesToRequestRowInImmediateFlip); 3820 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 3821 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); 3822 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 3823 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, *ImmediateFlipSupportedForPipe); 3824 #endif 3825 3826 } 3827 3828 static double TruncToValidBPP( 3829 double LinkBitRate, 3830 int Lanes, 3831 int HTotal, 3832 int HActive, 3833 double PixelClock, 3834 double DesiredBPP, 3835 bool DSCEnable, 3836 enum output_encoder_class Output, 3837 enum output_format_class Format, 3838 unsigned int DSCInputBitPerComponent, 3839 int DSCSlices, 3840 int AudioRate, 3841 int AudioLayout, 3842 enum odm_combine_mode ODMCombine) 3843 { 3844 double MaxLinkBPP; 3845 int MinDSCBPP; 3846 double MaxDSCBPP; 3847 int NonDSCBPP0; 3848 int NonDSCBPP1; 3849 int NonDSCBPP2; 3850 3851 if (Format == dm_420) { 3852 NonDSCBPP0 = 12; 3853 NonDSCBPP1 = 15; 3854 NonDSCBPP2 = 18; 3855 MinDSCBPP = 6; 3856 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 3857 } else if (Format == dm_444) { 3858 NonDSCBPP0 = 24; 3859 NonDSCBPP1 = 30; 3860 NonDSCBPP2 = 36; 3861 MinDSCBPP = 8; 3862 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 3863 } else { 3864 3865 NonDSCBPP0 = 16; 3866 NonDSCBPP1 = 20; 3867 NonDSCBPP2 = 24; 3868 3869 if (Format == dm_n422) { 3870 MinDSCBPP = 7; 3871 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3872 } else { 3873 MinDSCBPP = 8; 3874 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3875 } 3876 } 3877 3878 if (DSCEnable && Output == dm_dp) { 3879 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 3880 } else { 3881 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 3882 } 3883 3884 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { 3885 MaxLinkBPP = 16; 3886 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { 3887 MaxLinkBPP = 32; 3888 } 3889 3890 if (DesiredBPP == 0) { 3891 if (DSCEnable) { 3892 if (MaxLinkBPP < MinDSCBPP) { 3893 return BPP_INVALID; 3894 } else if (MaxLinkBPP >= MaxDSCBPP) { 3895 return MaxDSCBPP; 3896 } else { 3897 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 3898 } 3899 } else { 3900 if (MaxLinkBPP >= NonDSCBPP2) { 3901 return NonDSCBPP2; 3902 } else if (MaxLinkBPP >= NonDSCBPP1) { 3903 return NonDSCBPP1; 3904 } else if (MaxLinkBPP >= NonDSCBPP0) { 3905 return 16.0; 3906 } else { 3907 return BPP_INVALID; 3908 } 3909 } 3910 } else { 3911 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) 3912 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 3913 return BPP_INVALID; 3914 } else { 3915 return DesiredBPP; 3916 } 3917 } 3918 return BPP_INVALID; 3919 } 3920 3921 static noinline void CalculatePrefetchSchedulePerPlane( 3922 struct display_mode_lib *mode_lib, 3923 double HostVMInefficiencyFactor, 3924 int i, 3925 unsigned int j, 3926 unsigned int k) 3927 { 3928 struct vba_vars_st *v = &mode_lib->vba; 3929 Pipe myPipe; 3930 3931 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; 3932 myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; 3933 myPipe.PixelClock = v->PixelClock[k]; 3934 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; 3935 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; 3936 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 3937 myPipe.VRatio = mode_lib->vba.VRatio[k]; 3938 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; 3939 3940 myPipe.SourceScan = v->SourceScan[k]; 3941 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; 3942 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; 3943 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; 3944 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; 3945 myPipe.InterlaceEnable = v->Interlace[k]; 3946 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 3947 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 3948 myPipe.HTotal = v->HTotal[k]; 3949 myPipe.DCCEnable = v->DCCEnable[k]; 3950 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 3951 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; 3952 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 3953 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 3954 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 3955 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 3956 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 3957 mode_lib, 3958 HostVMInefficiencyFactor, 3959 &myPipe, 3960 v->DSCDelayPerState[i][k], 3961 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 3962 v->DPPCLKDelaySCL, 3963 v->DPPCLKDelaySCLLBOnly, 3964 v->DPPCLKDelayCNVCCursor, 3965 v->DISPCLKDelaySubtotal, 3966 v->SwathWidthYThisState[k] / v->HRatio[k], 3967 v->OutputFormat[k], 3968 v->MaxInterDCNTileRepeaters, 3969 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 3970 v->MaximumVStartup[i][j][k], 3971 v->GPUVMMaxPageTableLevels, 3972 v->GPUVMEnable, 3973 v->HostVMEnable, 3974 v->HostVMMaxNonCachedPageTableLevels, 3975 v->HostVMMinPageSize, 3976 v->DynamicMetadataEnable[k], 3977 v->DynamicMetadataVMEnabled, 3978 v->DynamicMetadataLinesBeforeActiveRequired[k], 3979 v->DynamicMetadataTransmittedBytes[k], 3980 v->UrgLatency[i], 3981 v->ExtraLatency, 3982 v->TimeCalc, 3983 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 3984 v->MetaRowBytes[i][j][k], 3985 v->DPTEBytesPerRow[i][j][k], 3986 v->PrefetchLinesY[i][j][k], 3987 v->SwathWidthYThisState[k], 3988 v->PrefillY[k], 3989 v->MaxNumSwY[k], 3990 v->PrefetchLinesC[i][j][k], 3991 v->SwathWidthCThisState[k], 3992 v->PrefillC[k], 3993 v->MaxNumSwC[k], 3994 v->swath_width_luma_ub_this_state[k], 3995 v->swath_width_chroma_ub_this_state[k], 3996 v->SwathHeightYThisState[k], 3997 v->SwathHeightCThisState[k], 3998 v->TWait, 3999 &v->DSTXAfterScaler[k], 4000 &v->DSTYAfterScaler[k], 4001 &v->LineTimesForPrefetch[k], 4002 &v->PrefetchBW[k], 4003 &v->LinesForMetaPTE[k], 4004 &v->LinesForMetaAndDPTERow[k], 4005 &v->VRatioPreY[i][j][k], 4006 &v->VRatioPreC[i][j][k], 4007 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 4008 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 4009 &v->NoTimeForDynamicMetadata[i][j][k], 4010 &v->Tno_bw[k], 4011 &v->prefetch_vmrow_bw[k], 4012 &v->dummy7[k], 4013 &v->dummy8[k], 4014 &v->dummy13[k], 4015 &v->VUpdateOffsetPix[k], 4016 &v->VUpdateWidthPix[k], 4017 &v->VReadyOffsetPix[k]); 4018 } 4019 4020 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 4021 { 4022 struct vba_vars_st *v = &mode_lib->vba; 4023 4024 int i, j; 4025 unsigned int k, m; 4026 int ReorderingBytes; 4027 int MinPrefetchMode = 0, MaxPrefetchMode = 2; 4028 bool NoChroma = true; 4029 bool EnoughWritebackUnits = true; 4030 bool P2IWith420 = false; 4031 bool DSCOnlyIfNecessaryWithBPP = false; 4032 bool DSC422NativeNotSupported = false; 4033 double MaxTotalVActiveRDBandwidth; 4034 bool ViewportExceedsSurface = false; 4035 bool FMTBufferExceeded = false; 4036 4037 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 4038 4039 CalculateMinAndMaxPrefetchMode( 4040 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 4041 &MinPrefetchMode, &MaxPrefetchMode); 4042 4043 /*Scale Ratio, taps Support Check*/ 4044 4045 v->ScaleRatioAndTapsSupport = true; 4046 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4047 if (v->ScalerEnabled[k] == false 4048 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 4049 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 4050 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 4051 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 4052 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { 4053 v->ScaleRatioAndTapsSupport = false; 4054 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 4055 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio 4056 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] 4057 || v->VRatio[k] > v->vtaps[k] 4058 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 4059 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 4060 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 4061 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 4062 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) 4063 || v->HRatioChroma[k] > v->MaxHSCLRatio 4064 || v->VRatioChroma[k] > v->MaxVSCLRatio 4065 || v->HRatioChroma[k] > v->HTAPsChroma[k] 4066 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { 4067 v->ScaleRatioAndTapsSupport = false; 4068 } 4069 } 4070 /*Source Format, Pixel Format and Scan Support Check*/ 4071 4072 v->SourceFormatPixelAndScanSupport = true; 4073 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4074 if ((v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) 4075 || ((v->SurfaceTiling[k] == dm_sw_64kb_d || v->SurfaceTiling[k] == dm_sw_64kb_d_t 4076 || v->SurfaceTiling[k] == dm_sw_64kb_d_x) && !(v->SourcePixelFormat[k] == dm_444_64))) { 4077 v->SourceFormatPixelAndScanSupport = false; 4078 } 4079 } 4080 /*Bandwidth Support Check*/ 4081 4082 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4083 CalculateBytePerPixelAnd256BBlockSizes( 4084 v->SourcePixelFormat[k], 4085 v->SurfaceTiling[k], 4086 &v->BytePerPixelY[k], 4087 &v->BytePerPixelC[k], 4088 &v->BytePerPixelInDETY[k], 4089 &v->BytePerPixelInDETC[k], 4090 &v->Read256BlockHeightY[k], 4091 &v->Read256BlockHeightC[k], 4092 &v->Read256BlockWidthY[k], 4093 &v->Read256BlockWidthC[k]); 4094 } 4095 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4096 if (v->SourceScan[k] != dm_vert) { 4097 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; 4098 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; 4099 } else { 4100 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; 4101 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; 4102 } 4103 } 4104 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4105 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) 4106 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4107 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) 4108 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; 4109 } 4110 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4111 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { 4112 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4113 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; 4114 } else if (v->WritebackEnable[k] == true) { 4115 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 4116 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; 4117 } else { 4118 v->WriteBandwidth[k] = 0.0; 4119 } 4120 } 4121 4122 /*Writeback Latency support check*/ 4123 4124 v->WritebackLatencySupport = true; 4125 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4126 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { 4127 v->WritebackLatencySupport = false; 4128 } 4129 } 4130 4131 /*Writeback Mode Support Check*/ 4132 4133 v->TotalNumberOfActiveWriteback = 0; 4134 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4135 if (v->WritebackEnable[k] == true) { 4136 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; 4137 } 4138 } 4139 4140 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { 4141 EnoughWritebackUnits = false; 4142 } 4143 4144 /*Writeback Scale Ratio and Taps Support Check*/ 4145 4146 v->WritebackScaleRatioAndTapsSupport = true; 4147 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4148 if (v->WritebackEnable[k] == true) { 4149 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio 4150 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio 4151 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio 4152 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps 4153 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps 4154 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] 4155 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { 4156 v->WritebackScaleRatioAndTapsSupport = false; 4157 } 4158 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { 4159 v->WritebackScaleRatioAndTapsSupport = false; 4160 } 4161 } 4162 } 4163 /*Maximum DISPCLK/DPPCLK Support check*/ 4164 4165 v->WritebackRequiredDISPCLK = 0.0; 4166 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4167 if (v->WritebackEnable[k] == true) { 4168 v->WritebackRequiredDISPCLK = dml_max( 4169 v->WritebackRequiredDISPCLK, 4170 dml314_CalculateWriteBackDISPCLK( 4171 v->WritebackPixelFormat[k], 4172 v->PixelClock[k], 4173 v->WritebackHRatio[k], 4174 v->WritebackVRatio[k], 4175 v->WritebackHTaps[k], 4176 v->WritebackVTaps[k], 4177 v->WritebackSourceWidth[k], 4178 v->WritebackDestinationWidth[k], 4179 v->HTotal[k], 4180 v->WritebackLineBufferSize)); 4181 } 4182 } 4183 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4184 if (v->HRatio[k] > 1.0) { 4185 v->PSCL_FACTOR[k] = dml_min( 4186 v->MaxDCHUBToPSCLThroughput, 4187 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); 4188 } else { 4189 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4190 } 4191 if (v->BytePerPixelC[k] == 0.0) { 4192 v->PSCL_FACTOR_CHROMA[k] = 0.0; 4193 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4194 * dml_max3( 4195 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4196 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4197 1.0); 4198 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4199 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4200 } 4201 } else { 4202 if (v->HRatioChroma[k] > 1.0) { 4203 v->PSCL_FACTOR_CHROMA[k] = dml_min( 4204 v->MaxDCHUBToPSCLThroughput, 4205 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 4206 } else { 4207 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4208 } 4209 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4210 * dml_max5( 4211 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4212 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4213 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 4214 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 4215 1.0); 4216 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) 4217 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4218 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4219 } 4220 } 4221 } 4222 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4223 int MaximumSwathWidthSupportLuma; 4224 int MaximumSwathWidthSupportChroma; 4225 4226 if (v->SurfaceTiling[k] == dm_sw_linear) { 4227 MaximumSwathWidthSupportLuma = 8192.0; 4228 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { 4229 MaximumSwathWidthSupportLuma = 2880.0; 4230 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4231 MaximumSwathWidthSupportLuma = 3840.0; 4232 } else { 4233 MaximumSwathWidthSupportLuma = 5760.0; 4234 } 4235 4236 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { 4237 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; 4238 } else { 4239 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; 4240 } 4241 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] 4242 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); 4243 if (v->BytePerPixelC[k] == 0.0) { 4244 v->MaximumSwathWidthInLineBufferChroma = 0; 4245 } else { 4246 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] 4247 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); 4248 } 4249 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); 4250 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); 4251 } 4252 4253 CalculateSwathAndDETConfiguration( 4254 true, 4255 v->NumberOfActivePlanes, 4256 v->DETBufferSizeInKByte[0], 4257 v->MaximumSwathWidthLuma, 4258 v->MaximumSwathWidthChroma, 4259 v->SourceScan, 4260 v->SourcePixelFormat, 4261 v->SurfaceTiling, 4262 v->ViewportWidth, 4263 v->ViewportHeight, 4264 v->SurfaceWidthY, 4265 v->SurfaceWidthC, 4266 v->SurfaceHeightY, 4267 v->SurfaceHeightC, 4268 v->Read256BlockHeightY, 4269 v->Read256BlockHeightC, 4270 v->Read256BlockWidthY, 4271 v->Read256BlockWidthC, 4272 v->odm_combine_dummy, 4273 v->BlendingAndTiming, 4274 v->BytePerPixelY, 4275 v->BytePerPixelC, 4276 v->BytePerPixelInDETY, 4277 v->BytePerPixelInDETC, 4278 v->HActive, 4279 v->HRatio, 4280 v->HRatioChroma, 4281 v->NoOfDPPThisState, 4282 v->swath_width_luma_ub_this_state, 4283 v->swath_width_chroma_ub_this_state, 4284 v->SwathWidthYThisState, 4285 v->SwathWidthCThisState, 4286 v->SwathHeightYThisState, 4287 v->SwathHeightCThisState, 4288 v->DETBufferSizeYThisState, 4289 v->DETBufferSizeCThisState, 4290 v->SingleDPPViewportSizeSupportPerPlane, 4291 &v->ViewportSizeSupport[0][0]); 4292 4293 for (i = 0; i < v->soc.num_states; i++) { 4294 for (j = 0; j < 2; j++) { 4295 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); 4296 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); 4297 v->RequiredDISPCLK[i][j] = 0.0; 4298 v->DISPCLK_DPPCLK_Support[i][j] = true; 4299 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4300 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4301 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4302 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] 4303 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4304 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4305 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] 4306 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4307 } 4308 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4309 * (1 + v->DISPCLKRampingMargin / 100.0); 4310 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] 4311 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4312 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4313 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 4314 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4315 } 4316 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4317 * (1 + v->DISPCLKRampingMargin / 100.0); 4318 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] 4319 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4320 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4321 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 4322 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4323 } 4324 4325 if (v->ODMCombinePolicy == dm_odm_combine_policy_none 4326 || !(v->Output[k] == dm_dp || 4327 v->Output[k] == dm_dp2p0 || 4328 v->Output[k] == dm_edp)) { 4329 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4330 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4331 4332 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) 4333 FMTBufferExceeded = true; 4334 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { 4335 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4336 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4337 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 4338 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { 4339 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4340 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4341 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { 4342 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4343 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4344 } else { 4345 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4346 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4347 } 4348 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH 4349 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4350 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) { 4351 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4352 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4353 } else { 4354 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4355 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4356 } 4357 } 4358 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH 4359 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4360 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) { 4361 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4362 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4363 4364 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH) 4365 FMTBufferExceeded = true; 4366 } else { 4367 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4368 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4369 } 4370 } 4371 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4372 v->MPCCombine[i][j][k] = false; 4373 v->NoOfDPP[i][j][k] = 4; 4374 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; 4375 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4376 v->MPCCombine[i][j][k] = false; 4377 v->NoOfDPP[i][j][k] = 2; 4378 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; 4379 } else if ((v->WhenToDoMPCCombine == dm_mpc_never 4380 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4381 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { 4382 v->MPCCombine[i][j][k] = false; 4383 v->NoOfDPP[i][j][k] = 1; 4384 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4385 } else { 4386 v->MPCCombine[i][j][k] = true; 4387 v->NoOfDPP[i][j][k] = 2; 4388 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4389 } 4390 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4391 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4392 > v->MaxDppclkRoundedDownToDFSGranularity) 4393 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4394 v->DISPCLK_DPPCLK_Support[i][j] = false; 4395 } 4396 } 4397 v->TotalNumberOfActiveDPP[i][j] = 0; 4398 v->TotalNumberOfSingleDPPPlanes[i][j] = 0; 4399 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4400 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4401 if (v->NoOfDPP[i][j][k] == 1) 4402 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; 4403 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4404 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 4405 NoChroma = false; 4406 } 4407 4408 // UPTO 4409 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never 4410 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { 4411 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { 4412 double BWOfNonSplitPlaneOfMaximumBandwidth; 4413 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4414 4415 BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4416 NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4417 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4418 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth 4419 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { 4420 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4421 NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4422 } 4423 } 4424 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; 4425 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4426 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4427 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4428 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4429 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; 4430 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; 4431 } 4432 } 4433 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { 4434 v->RequiredDISPCLK[i][j] = 0.0; 4435 v->DISPCLK_DPPCLK_Support[i][j] = true; 4436 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4437 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4438 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { 4439 v->MPCCombine[i][j][k] = true; 4440 v->NoOfDPP[i][j][k] = 2; 4441 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4442 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4443 } else { 4444 v->MPCCombine[i][j][k] = false; 4445 v->NoOfDPP[i][j][k] = 1; 4446 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4447 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4448 } 4449 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4450 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4451 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4452 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4453 } else { 4454 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4455 } 4456 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4457 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4458 > v->MaxDppclkRoundedDownToDFSGranularity) 4459 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4460 v->DISPCLK_DPPCLK_Support[i][j] = false; 4461 } 4462 } 4463 v->TotalNumberOfActiveDPP[i][j] = 0.0; 4464 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4465 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4466 } 4467 } 4468 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); 4469 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { 4470 v->DISPCLK_DPPCLK_Support[i][j] = false; 4471 } 4472 } 4473 } 4474 4475 /*Total Available Pipes Support Check*/ 4476 4477 for (i = 0; i < v->soc.num_states; i++) { 4478 for (j = 0; j < 2; j++) { 4479 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { 4480 v->TotalAvailablePipesSupport[i][j] = true; 4481 } else { 4482 v->TotalAvailablePipesSupport[i][j] = false; 4483 } 4484 } 4485 } 4486 /*Display IO and DSC Support Check*/ 4487 4488 v->NonsupportedDSCInputBPC = false; 4489 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4490 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) 4491 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { 4492 v->NonsupportedDSCInputBPC = true; 4493 } 4494 } 4495 4496 /*Number Of DSC Slices*/ 4497 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4498 if (v->BlendingAndTiming[k] == k) { 4499 if (v->PixelClockBackEnd[k] > 3200) { 4500 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); 4501 } else if (v->PixelClockBackEnd[k] > 1360) { 4502 v->NumberOfDSCSlices[k] = 8; 4503 } else if (v->PixelClockBackEnd[k] > 680) { 4504 v->NumberOfDSCSlices[k] = 4; 4505 } else if (v->PixelClockBackEnd[k] > 340) { 4506 v->NumberOfDSCSlices[k] = 2; 4507 } else { 4508 v->NumberOfDSCSlices[k] = 1; 4509 } 4510 } else { 4511 v->NumberOfDSCSlices[k] = 0; 4512 } 4513 } 4514 4515 for (i = 0; i < v->soc.num_states; i++) { 4516 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4517 v->RequiresDSC[i][k] = false; 4518 v->RequiresFEC[i][k] = false; 4519 if (v->BlendingAndTiming[k] == k) { 4520 if (v->Output[k] == dm_hdmi) { 4521 v->RequiresDSC[i][k] = false; 4522 v->RequiresFEC[i][k] = false; 4523 v->OutputBppPerState[i][k] = TruncToValidBPP( 4524 dml_min(600.0, v->PHYCLKPerState[i]) * 10, 4525 3, 4526 v->HTotal[k], 4527 v->HActive[k], 4528 v->PixelClockBackEnd[k], 4529 v->ForcedOutputLinkBPP[k], 4530 false, 4531 v->Output[k], 4532 v->OutputFormat[k], 4533 v->DSCInputBitPerComponent[k], 4534 v->NumberOfDSCSlices[k], 4535 v->AudioSampleRate[k], 4536 v->AudioSampleLayout[k], 4537 v->ODMCombineEnablePerState[i][k]); 4538 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp) { 4539 if (v->DSCEnable[k] == true) { 4540 v->RequiresDSC[i][k] = true; 4541 v->LinkDSCEnable = true; 4542 if (v->Output[k] == dm_dp) { 4543 v->RequiresFEC[i][k] = true; 4544 } else { 4545 v->RequiresFEC[i][k] = false; 4546 } 4547 } else { 4548 v->RequiresDSC[i][k] = false; 4549 v->LinkDSCEnable = false; 4550 v->RequiresFEC[i][k] = false; 4551 } 4552 4553 v->Outbpp = BPP_INVALID; 4554 if (v->PHYCLKPerState[i] >= 270.0) { 4555 v->Outbpp = TruncToValidBPP( 4556 (1.0 - v->Downspreading / 100.0) * 2700, 4557 v->OutputLinkDPLanes[k], 4558 v->HTotal[k], 4559 v->HActive[k], 4560 v->PixelClockBackEnd[k], 4561 v->ForcedOutputLinkBPP[k], 4562 v->LinkDSCEnable, 4563 v->Output[k], 4564 v->OutputFormat[k], 4565 v->DSCInputBitPerComponent[k], 4566 v->NumberOfDSCSlices[k], 4567 v->AudioSampleRate[k], 4568 v->AudioSampleLayout[k], 4569 v->ODMCombineEnablePerState[i][k]); 4570 v->OutputBppPerState[i][k] = v->Outbpp; 4571 // TODO: Need some other way to handle this nonsense 4572 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" 4573 } 4574 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { 4575 v->Outbpp = TruncToValidBPP( 4576 (1.0 - v->Downspreading / 100.0) * 5400, 4577 v->OutputLinkDPLanes[k], 4578 v->HTotal[k], 4579 v->HActive[k], 4580 v->PixelClockBackEnd[k], 4581 v->ForcedOutputLinkBPP[k], 4582 v->LinkDSCEnable, 4583 v->Output[k], 4584 v->OutputFormat[k], 4585 v->DSCInputBitPerComponent[k], 4586 v->NumberOfDSCSlices[k], 4587 v->AudioSampleRate[k], 4588 v->AudioSampleLayout[k], 4589 v->ODMCombineEnablePerState[i][k]); 4590 v->OutputBppPerState[i][k] = v->Outbpp; 4591 // TODO: Need some other way to handle this nonsense 4592 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" 4593 } 4594 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { 4595 v->Outbpp = TruncToValidBPP( 4596 (1.0 - v->Downspreading / 100.0) * 8100, 4597 v->OutputLinkDPLanes[k], 4598 v->HTotal[k], 4599 v->HActive[k], 4600 v->PixelClockBackEnd[k], 4601 v->ForcedOutputLinkBPP[k], 4602 v->LinkDSCEnable, 4603 v->Output[k], 4604 v->OutputFormat[k], 4605 v->DSCInputBitPerComponent[k], 4606 v->NumberOfDSCSlices[k], 4607 v->AudioSampleRate[k], 4608 v->AudioSampleLayout[k], 4609 v->ODMCombineEnablePerState[i][k]); 4610 v->OutputBppPerState[i][k] = v->Outbpp; 4611 // TODO: Need some other way to handle this nonsense 4612 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" 4613 } 4614 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 10000.0 / 18) { 4615 v->Outbpp = TruncToValidBPP( 4616 (1.0 - v->Downspreading / 100.0) * 10000, 4617 4, 4618 v->HTotal[k], 4619 v->HActive[k], 4620 v->PixelClockBackEnd[k], 4621 v->ForcedOutputLinkBPP[k], 4622 v->LinkDSCEnable, 4623 v->Output[k], 4624 v->OutputFormat[k], 4625 v->DSCInputBitPerComponent[k], 4626 v->NumberOfDSCSlices[k], 4627 v->AudioSampleRate[k], 4628 v->AudioSampleLayout[k], 4629 v->ODMCombineEnablePerState[i][k]); 4630 v->OutputBppPerState[i][k] = v->Outbpp; 4631 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "10x4"; 4632 } 4633 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[i] >= 12000.0 / 18) { 4634 v->Outbpp = TruncToValidBPP( 4635 12000, 4636 4, 4637 v->HTotal[k], 4638 v->HActive[k], 4639 v->PixelClockBackEnd[k], 4640 v->ForcedOutputLinkBPP[k], 4641 v->LinkDSCEnable, 4642 v->Output[k], 4643 v->OutputFormat[k], 4644 v->DSCInputBitPerComponent[k], 4645 v->NumberOfDSCSlices[k], 4646 v->AudioSampleRate[k], 4647 v->AudioSampleLayout[k], 4648 v->ODMCombineEnablePerState[i][k]); 4649 v->OutputBppPerState[i][k] = v->Outbpp; 4650 //v->OutputTypeAndRatePerState[i][k] = v->Output[k] & "12x4"; 4651 } 4652 } 4653 } else { 4654 v->OutputBppPerState[i][k] = 0; 4655 } 4656 } 4657 } 4658 4659 for (i = 0; i < v->soc.num_states; i++) { 4660 v->LinkCapacitySupport[i] = true; 4661 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4662 if (v->BlendingAndTiming[k] == k 4663 && (v->Output[k] == dm_dp || 4664 v->Output[k] == dm_edp || 4665 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { 4666 v->LinkCapacitySupport[i] = false; 4667 } 4668 } 4669 } 4670 4671 // UPTO 2172 4672 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4673 if (v->BlendingAndTiming[k] == k 4674 && (v->Output[k] == dm_dp || 4675 v->Output[k] == dm_edp || 4676 v->Output[k] == dm_hdmi)) { 4677 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { 4678 P2IWith420 = true; 4679 } 4680 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 4681 && !v->DSC422NativeSupport) { 4682 DSC422NativeNotSupported = true; 4683 } 4684 } 4685 } 4686 4687 4688 for (i = 0; i < v->soc.num_states; ++i) { 4689 v->ODMCombine4To1SupportCheckOK[i] = true; 4690 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4691 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 4692 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp 4693 || v->Output[k] == dm_hdmi)) { 4694 v->ODMCombine4To1SupportCheckOK[i] = false; 4695 } 4696 } 4697 } 4698 4699 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ 4700 4701 for (i = 0; i < v->soc.num_states; i++) { 4702 v->NotEnoughDSCUnits[i] = false; 4703 v->TotalDSCUnitsRequired = 0.0; 4704 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4705 if (v->RequiresDSC[i][k] == true) { 4706 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4707 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; 4708 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4709 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; 4710 } else { 4711 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; 4712 } 4713 } 4714 } 4715 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { 4716 v->NotEnoughDSCUnits[i] = true; 4717 } 4718 } 4719 /*DSC Delay per state*/ 4720 4721 for (i = 0; i < v->soc.num_states; i++) { 4722 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4723 if (v->OutputBppPerState[i][k] == BPP_INVALID) { 4724 v->BPP = 0.0; 4725 } else { 4726 v->BPP = v->OutputBppPerState[i][k]; 4727 } 4728 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { 4729 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { 4730 v->DSCDelayPerState[i][k] = dscceComputeDelay( 4731 v->DSCInputBitPerComponent[k], 4732 v->BPP, 4733 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4734 v->NumberOfDSCSlices[k], 4735 v->OutputFormat[k], 4736 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 4737 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4738 v->DSCDelayPerState[i][k] = 2.0 4739 * (dscceComputeDelay( 4740 v->DSCInputBitPerComponent[k], 4741 v->BPP, 4742 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4743 v->NumberOfDSCSlices[k] / 2, 4744 v->OutputFormat[k], 4745 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4746 } else { 4747 v->DSCDelayPerState[i][k] = 4.0 4748 * (dscceComputeDelay( 4749 v->DSCInputBitPerComponent[k], 4750 v->BPP, 4751 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4752 v->NumberOfDSCSlices[k] / 4, 4753 v->OutputFormat[k], 4754 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4755 } 4756 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 4757 } else { 4758 v->DSCDelayPerState[i][k] = 0.0; 4759 } 4760 } 4761 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4762 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4763 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { 4764 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; 4765 } 4766 } 4767 } 4768 } 4769 4770 //Calculate Swath, DET Configuration, DCFCLKDeepSleep 4771 // 4772 for (i = 0; i < v->soc.num_states; ++i) { 4773 for (j = 0; j <= 1; ++j) { 4774 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4775 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; 4776 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 4777 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; 4778 } 4779 4780 CalculateSwathAndDETConfiguration( 4781 false, 4782 v->NumberOfActivePlanes, 4783 v->DETBufferSizeInKByte[0], 4784 v->MaximumSwathWidthLuma, 4785 v->MaximumSwathWidthChroma, 4786 v->SourceScan, 4787 v->SourcePixelFormat, 4788 v->SurfaceTiling, 4789 v->ViewportWidth, 4790 v->ViewportHeight, 4791 v->SurfaceWidthY, 4792 v->SurfaceWidthC, 4793 v->SurfaceHeightY, 4794 v->SurfaceHeightC, 4795 v->Read256BlockHeightY, 4796 v->Read256BlockHeightC, 4797 v->Read256BlockWidthY, 4798 v->Read256BlockWidthC, 4799 v->ODMCombineEnableThisState, 4800 v->BlendingAndTiming, 4801 v->BytePerPixelY, 4802 v->BytePerPixelC, 4803 v->BytePerPixelInDETY, 4804 v->BytePerPixelInDETC, 4805 v->HActive, 4806 v->HRatio, 4807 v->HRatioChroma, 4808 v->NoOfDPPThisState, 4809 v->swath_width_luma_ub_this_state, 4810 v->swath_width_chroma_ub_this_state, 4811 v->SwathWidthYThisState, 4812 v->SwathWidthCThisState, 4813 v->SwathHeightYThisState, 4814 v->SwathHeightCThisState, 4815 v->DETBufferSizeYThisState, 4816 v->DETBufferSizeCThisState, 4817 v->dummystring, 4818 &v->ViewportSizeSupport[i][j]); 4819 4820 CalculateDCFCLKDeepSleep( 4821 mode_lib, 4822 v->NumberOfActivePlanes, 4823 v->BytePerPixelY, 4824 v->BytePerPixelC, 4825 v->VRatio, 4826 v->VRatioChroma, 4827 v->SwathWidthYThisState, 4828 v->SwathWidthCThisState, 4829 v->NoOfDPPThisState, 4830 v->HRatio, 4831 v->HRatioChroma, 4832 v->PixelClock, 4833 v->PSCL_FACTOR, 4834 v->PSCL_FACTOR_CHROMA, 4835 v->RequiredDPPCLKThisState, 4836 v->ReadBandwidthLuma, 4837 v->ReadBandwidthChroma, 4838 v->ReturnBusWidth, 4839 &v->ProjectedDCFCLKDeepSleep[i][j]); 4840 4841 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4842 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; 4843 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; 4844 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; 4845 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; 4846 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; 4847 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; 4848 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; 4849 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; 4850 } 4851 } 4852 } 4853 4854 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4855 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 4856 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4857 } 4858 4859 for (i = 0; i < v->soc.num_states; i++) { 4860 for (j = 0; j < 2; j++) { 4861 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; 4862 4863 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4864 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 4865 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 4866 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 4867 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 4868 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 4869 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 4870 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 4871 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 4872 } 4873 4874 v->TotalNumberOfDCCActiveDPP[i][j] = 0; 4875 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4876 if (v->DCCEnable[k] == true) { 4877 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4878 } 4879 } 4880 4881 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4882 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4883 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4884 4885 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) 4886 && v->SourceScan[k] != dm_vert) { 4887 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) 4888 / 2; 4889 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 4890 } else { 4891 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 4892 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 4893 } 4894 4895 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4896 mode_lib, 4897 v->DCCEnable[k], 4898 v->Read256BlockHeightC[k], 4899 v->Read256BlockWidthC[k], 4900 v->SourcePixelFormat[k], 4901 v->SurfaceTiling[k], 4902 v->BytePerPixelC[k], 4903 v->SourceScan[k], 4904 v->SwathWidthCThisState[k], 4905 v->ViewportHeightChroma[k], 4906 v->GPUVMEnable, 4907 v->HostVMEnable, 4908 v->HostVMMaxNonCachedPageTableLevels, 4909 v->GPUVMMinPageSize, 4910 v->HostVMMinPageSize, 4911 v->PTEBufferSizeInRequestsForChroma, 4912 v->PitchC[k], 4913 0.0, 4914 &v->MacroTileWidthC[k], 4915 &v->MetaRowBytesC, 4916 &v->DPTEBytesPerRowC, 4917 &v->PTEBufferSizeNotExceededC[i][j][k], 4918 &v->dummyinteger7, 4919 &v->dpte_row_height_chroma[k], 4920 &v->dummyinteger28, 4921 &v->dummyinteger26, 4922 &v->dummyinteger23, 4923 &v->meta_row_height_chroma[k], 4924 &v->dummyinteger8, 4925 &v->dummyinteger9, 4926 &v->dummyinteger19, 4927 &v->dummyinteger20, 4928 &v->dummyinteger17, 4929 &v->dummyinteger10, 4930 &v->dummyinteger11); 4931 4932 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( 4933 mode_lib, 4934 v->VRatioChroma[k], 4935 v->VTAPsChroma[k], 4936 v->Interlace[k], 4937 v->ProgressiveToInterlaceUnitInOPP, 4938 v->SwathHeightCThisState[k], 4939 v->ViewportYStartC[k], 4940 &v->PrefillC[k], 4941 &v->MaxNumSwC[k]); 4942 } else { 4943 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 4944 v->PTEBufferSizeInRequestsForChroma = 0; 4945 v->PDEAndMetaPTEBytesPerFrameC = 0.0; 4946 v->MetaRowBytesC = 0.0; 4947 v->DPTEBytesPerRowC = 0.0; 4948 v->PrefetchLinesC[i][j][k] = 0.0; 4949 v->PTEBufferSizeNotExceededC[i][j][k] = true; 4950 } 4951 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4952 mode_lib, 4953 v->DCCEnable[k], 4954 v->Read256BlockHeightY[k], 4955 v->Read256BlockWidthY[k], 4956 v->SourcePixelFormat[k], 4957 v->SurfaceTiling[k], 4958 v->BytePerPixelY[k], 4959 v->SourceScan[k], 4960 v->SwathWidthYThisState[k], 4961 v->ViewportHeight[k], 4962 v->GPUVMEnable, 4963 v->HostVMEnable, 4964 v->HostVMMaxNonCachedPageTableLevels, 4965 v->GPUVMMinPageSize, 4966 v->HostVMMinPageSize, 4967 v->PTEBufferSizeInRequestsForLuma, 4968 v->PitchY[k], 4969 v->DCCMetaPitchY[k], 4970 &v->MacroTileWidthY[k], 4971 &v->MetaRowBytesY, 4972 &v->DPTEBytesPerRowY, 4973 &v->PTEBufferSizeNotExceededY[i][j][k], 4974 &v->dummyinteger7, 4975 &v->dpte_row_height[k], 4976 &v->dummyinteger29, 4977 &v->dummyinteger27, 4978 &v->dummyinteger24, 4979 &v->meta_row_height[k], 4980 &v->dummyinteger25, 4981 &v->dpte_group_bytes[k], 4982 &v->dummyinteger21, 4983 &v->dummyinteger22, 4984 &v->dummyinteger18, 4985 &v->dummyinteger5, 4986 &v->dummyinteger6); 4987 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( 4988 mode_lib, 4989 v->VRatio[k], 4990 v->vtaps[k], 4991 v->Interlace[k], 4992 v->ProgressiveToInterlaceUnitInOPP, 4993 v->SwathHeightYThisState[k], 4994 v->ViewportYStartY[k], 4995 &v->PrefillY[k], 4996 &v->MaxNumSwY[k]); 4997 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; 4998 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; 4999 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; 5000 5001 CalculateRowBandwidth( 5002 v->GPUVMEnable, 5003 v->SourcePixelFormat[k], 5004 v->VRatio[k], 5005 v->VRatioChroma[k], 5006 v->DCCEnable[k], 5007 v->HTotal[k] / v->PixelClock[k], 5008 v->MetaRowBytesY, 5009 v->MetaRowBytesC, 5010 v->meta_row_height[k], 5011 v->meta_row_height_chroma[k], 5012 v->DPTEBytesPerRowY, 5013 v->DPTEBytesPerRowC, 5014 v->dpte_row_height[k], 5015 v->dpte_row_height_chroma[k], 5016 &v->meta_row_bandwidth[i][j][k], 5017 &v->dpte_row_bandwidth[i][j][k]); 5018 } 5019 /* 5020 * DCCMetaBufferSizeSupport(i, j) = True 5021 * For k = 0 To NumberOfActivePlanes - 1 5022 * If MetaRowBytes(i, j, k) > 24064 Then 5023 * DCCMetaBufferSizeSupport(i, j) = False 5024 * End If 5025 * Next k 5026 */ 5027 v->DCCMetaBufferSizeSupport[i][j] = true; 5028 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5029 if (v->MetaRowBytes[i][j][k] > 24064) 5030 v->DCCMetaBufferSizeSupport[i][j] = false; 5031 } 5032 v->UrgLatency[i] = CalculateUrgentLatency( 5033 v->UrgentLatencyPixelDataOnly, 5034 v->UrgentLatencyPixelMixedWithVMData, 5035 v->UrgentLatencyVMDataOnly, 5036 v->DoUrgentLatencyAdjustment, 5037 v->UrgentLatencyAdjustmentFabricClockComponent, 5038 v->UrgentLatencyAdjustmentFabricClockReference, 5039 v->FabricClockPerState[i]); 5040 5041 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5042 CalculateUrgentBurstFactor( 5043 v->swath_width_luma_ub_this_state[k], 5044 v->swath_width_chroma_ub_this_state[k], 5045 v->SwathHeightYThisState[k], 5046 v->SwathHeightCThisState[k], 5047 v->HTotal[k] / v->PixelClock[k], 5048 v->UrgLatency[i], 5049 v->CursorBufferSize, 5050 v->CursorWidth[k][0], 5051 v->CursorBPP[k][0], 5052 v->VRatio[k], 5053 v->VRatioChroma[k], 5054 v->BytePerPixelInDETY[k], 5055 v->BytePerPixelInDETC[k], 5056 v->DETBufferSizeYThisState[k], 5057 v->DETBufferSizeCThisState[k], 5058 &v->UrgentBurstFactorCursor[k], 5059 &v->UrgentBurstFactorLuma[k], 5060 &v->UrgentBurstFactorChroma[k], 5061 &NotUrgentLatencyHiding[k]); 5062 } 5063 5064 v->NotEnoughUrgentLatencyHidingA[i][j] = false; 5065 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5066 if (NotUrgentLatencyHiding[k]) { 5067 v->NotEnoughUrgentLatencyHidingA[i][j] = true; 5068 } 5069 } 5070 5071 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5072 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] 5073 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; 5074 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; 5075 } 5076 5077 v->TotalVActivePixelBandwidth[i][j] = 0; 5078 v->TotalVActiveCursorBandwidth[i][j] = 0; 5079 v->TotalMetaRowBandwidth[i][j] = 0; 5080 v->TotalDPTERowBandwidth[i][j] = 0; 5081 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5082 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; 5083 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; 5084 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; 5085 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; 5086 } 5087 } 5088 } 5089 5090 //Calculate Return BW 5091 for (i = 0; i < v->soc.num_states; ++i) { 5092 for (j = 0; j <= 1; ++j) { 5093 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5094 if (v->BlendingAndTiming[k] == k) { 5095 if (v->WritebackEnable[k] == true) { 5096 v->WritebackDelayTime[k] = v->WritebackLatency 5097 + CalculateWriteBackDelay( 5098 v->WritebackPixelFormat[k], 5099 v->WritebackHRatio[k], 5100 v->WritebackVRatio[k], 5101 v->WritebackVTaps[k], 5102 v->WritebackDestinationWidth[k], 5103 v->WritebackDestinationHeight[k], 5104 v->WritebackSourceHeight[k], 5105 v->HTotal[k]) / v->RequiredDISPCLK[i][j]; 5106 } else { 5107 v->WritebackDelayTime[k] = 0.0; 5108 } 5109 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5110 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { 5111 v->WritebackDelayTime[k] = dml_max( 5112 v->WritebackDelayTime[k], 5113 v->WritebackLatency 5114 + CalculateWriteBackDelay( 5115 v->WritebackPixelFormat[m], 5116 v->WritebackHRatio[m], 5117 v->WritebackVRatio[m], 5118 v->WritebackVTaps[m], 5119 v->WritebackDestinationWidth[m], 5120 v->WritebackDestinationHeight[m], 5121 v->WritebackSourceHeight[m], 5122 v->HTotal[m]) / v->RequiredDISPCLK[i][j]); 5123 } 5124 } 5125 } 5126 } 5127 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5128 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5129 if (v->BlendingAndTiming[k] == m) { 5130 v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; 5131 } 5132 } 5133 } 5134 v->MaxMaxVStartup[i][j] = 0; 5135 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5136 v->MaximumVStartup[i][j][k] = 5137 CalculateMaxVStartup( 5138 v->VTotal[k], 5139 v->VActive[k], 5140 v->VBlankNom[k], 5141 v->HTotal[k], 5142 v->PixelClock[k], 5143 v->ProgressiveToInterlaceUnitInOPP, 5144 v->Interlace[k], 5145 v->ip.VBlankNomDefaultUS, 5146 v->WritebackDelayTime[k]); 5147 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); 5148 } 5149 } 5150 } 5151 5152 ReorderingBytes = v->NumberOfChannels 5153 * dml_max3( 5154 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 5155 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 5156 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 5157 5158 for (i = 0; i < v->soc.num_states; ++i) { 5159 for (j = 0; j <= 1; ++j) { 5160 v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; 5161 } 5162 } 5163 5164 if (v->UseMinimumRequiredDCFCLK == true) 5165 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); 5166 5167 for (i = 0; i < v->soc.num_states; ++i) { 5168 for (j = 0; j <= 1; ++j) { 5169 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 5170 v->ReturnBusWidth * v->DCFCLKState[i][j], 5171 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); 5172 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; 5173 double PixelDataOnlyReturnBWPerState = dml_min( 5174 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5175 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 5176 double PixelMixedWithVMDataReturnBWPerState = dml_min( 5177 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5178 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 5179 5180 if (v->HostVMEnable != true) { 5181 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; 5182 } else { 5183 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; 5184 } 5185 } 5186 } 5187 5188 //Re-ordering Buffer Support Check 5189 for (i = 0; i < v->soc.num_states; ++i) { 5190 for (j = 0; j <= 1; ++j) { 5191 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] 5192 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { 5193 v->ROBSupport[i][j] = true; 5194 } else { 5195 v->ROBSupport[i][j] = false; 5196 } 5197 } 5198 } 5199 5200 //Vertical Active BW support check 5201 5202 MaxTotalVActiveRDBandwidth = 0; 5203 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5204 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 5205 } 5206 5207 for (i = 0; i < v->soc.num_states; ++i) { 5208 for (j = 0; j <= 1; ++j) { 5209 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( 5210 dml_min( 5211 v->ReturnBusWidth * v->DCFCLKState[i][j], 5212 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5213 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, 5214 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5215 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); 5216 5217 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { 5218 v->TotalVerticalActiveBandwidthSupport[i][j] = true; 5219 } else { 5220 v->TotalVerticalActiveBandwidthSupport[i][j] = false; 5221 } 5222 } 5223 } 5224 5225 v->UrgentLatency = CalculateUrgentLatency( 5226 v->UrgentLatencyPixelDataOnly, 5227 v->UrgentLatencyPixelMixedWithVMData, 5228 v->UrgentLatencyVMDataOnly, 5229 v->DoUrgentLatencyAdjustment, 5230 v->UrgentLatencyAdjustmentFabricClockComponent, 5231 v->UrgentLatencyAdjustmentFabricClockReference, 5232 v->FabricClock); 5233 //Prefetch Check 5234 for (i = 0; i < v->soc.num_states; ++i) { 5235 for (j = 0; j <= 1; ++j) { 5236 double VMDataOnlyReturnBWPerState; 5237 double HostVMInefficiencyFactor = 1; 5238 int NextPrefetchModeState = MinPrefetchMode; 5239 bool UnboundedRequestEnabledThisState = false; 5240 int CompressedBufferSizeInkByteThisState = 0; 5241 double dummy; 5242 5243 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; 5244 5245 v->BandwidthWithoutPrefetchSupported[i][j] = true; 5246 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] 5247 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { 5248 v->BandwidthWithoutPrefetchSupported[i][j] = false; 5249 } 5250 5251 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5252 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 5253 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 5254 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 5255 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 5256 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 5257 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 5258 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 5259 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 5260 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 5261 } 5262 5263 VMDataOnlyReturnBWPerState = dml_min( 5264 dml_min( 5265 v->ReturnBusWidth * v->DCFCLKState[i][j], 5266 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5267 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5268 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5269 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 5270 if (v->GPUVMEnable && v->HostVMEnable) 5271 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; 5272 5273 v->ExtraLatency = CalculateExtraLatency( 5274 v->RoundTripPingLatencyCycles, 5275 ReorderingBytes, 5276 v->DCFCLKState[i][j], 5277 v->TotalNumberOfActiveDPP[i][j], 5278 v->PixelChunkSizeInKByte, 5279 v->TotalNumberOfDCCActiveDPP[i][j], 5280 v->MetaChunkSize, 5281 v->ReturnBWPerState[i][j], 5282 v->GPUVMEnable, 5283 v->HostVMEnable, 5284 v->NumberOfActivePlanes, 5285 v->NoOfDPPThisState, 5286 v->dpte_group_bytes, 5287 HostVMInefficiencyFactor, 5288 v->HostVMMinPageSize, 5289 v->HostVMMaxNonCachedPageTableLevels); 5290 5291 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5292 do { 5293 v->PrefetchModePerState[i][j] = NextPrefetchModeState; 5294 v->MaxVStartup = v->NextMaxVStartup; 5295 5296 v->TWait = CalculateTWait( 5297 v->PrefetchModePerState[i][j], 5298 v->DRAMClockChangeLatency, 5299 v->UrgLatency[i], 5300 v->SREnterPlusExitTime); 5301 5302 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5303 CalculatePrefetchSchedulePerPlane(mode_lib, 5304 HostVMInefficiencyFactor, 5305 i, j, k); 5306 } 5307 5308 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5309 CalculateUrgentBurstFactor( 5310 v->swath_width_luma_ub_this_state[k], 5311 v->swath_width_chroma_ub_this_state[k], 5312 v->SwathHeightYThisState[k], 5313 v->SwathHeightCThisState[k], 5314 v->HTotal[k] / v->PixelClock[k], 5315 v->UrgentLatency, 5316 v->CursorBufferSize, 5317 v->CursorWidth[k][0], 5318 v->CursorBPP[k][0], 5319 v->VRatioPreY[i][j][k], 5320 v->VRatioPreC[i][j][k], 5321 v->BytePerPixelInDETY[k], 5322 v->BytePerPixelInDETC[k], 5323 v->DETBufferSizeYThisState[k], 5324 v->DETBufferSizeCThisState[k], 5325 &v->UrgentBurstFactorCursorPre[k], 5326 &v->UrgentBurstFactorLumaPre[k], 5327 &v->UrgentBurstFactorChroma[k], 5328 &v->NotUrgentLatencyHidingPre[k]); 5329 } 5330 5331 v->MaximumReadBandwidthWithPrefetch = 0.0; 5332 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5333 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 5334 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; 5335 5336 v->MaximumReadBandwidthWithPrefetch = 5337 v->MaximumReadBandwidthWithPrefetch 5338 + dml_max3( 5339 v->VActivePixelBandwidth[i][j][k] 5340 + v->VActiveCursorBandwidth[i][j][k] 5341 + v->NoOfDPP[i][j][k] 5342 * (v->meta_row_bandwidth[i][j][k] 5343 + v->dpte_row_bandwidth[i][j][k]), 5344 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5345 v->NoOfDPP[i][j][k] 5346 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5347 * v->UrgentBurstFactorLumaPre[k] 5348 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5349 * v->UrgentBurstFactorChromaPre[k]) 5350 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5351 } 5352 5353 v->NotEnoughUrgentLatencyHidingPre = false; 5354 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5355 if (v->NotUrgentLatencyHidingPre[k] == true) { 5356 v->NotEnoughUrgentLatencyHidingPre = true; 5357 } 5358 } 5359 5360 v->PrefetchSupported[i][j] = true; 5361 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] 5362 || v->NotEnoughUrgentLatencyHidingPre == 1) { 5363 v->PrefetchSupported[i][j] = false; 5364 } 5365 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5366 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 5367 || v->NoTimeForPrefetch[i][j][k] == true) { 5368 v->PrefetchSupported[i][j] = false; 5369 } 5370 } 5371 5372 v->DynamicMetadataSupported[i][j] = true; 5373 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5374 if (v->NoTimeForDynamicMetadata[i][j][k] == true) { 5375 v->DynamicMetadataSupported[i][j] = false; 5376 } 5377 } 5378 5379 v->VRatioInPrefetchSupported[i][j] = true; 5380 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5381 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { 5382 v->VRatioInPrefetchSupported[i][j] = false; 5383 } 5384 } 5385 v->AnyLinesForVMOrRowTooLarge = false; 5386 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5387 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { 5388 v->AnyLinesForVMOrRowTooLarge = true; 5389 } 5390 } 5391 5392 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5393 5394 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { 5395 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; 5396 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5397 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 5398 - dml_max( 5399 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], 5400 v->NoOfDPP[i][j][k] 5401 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5402 * v->UrgentBurstFactorLumaPre[k] 5403 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5404 * v->UrgentBurstFactorChromaPre[k]) 5405 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5406 } 5407 v->TotImmediateFlipBytes = 0.0; 5408 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5409 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 5410 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] 5411 + v->DPTEBytesPerRow[i][j][k]; 5412 } 5413 5414 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5415 CalculateFlipSchedule( 5416 mode_lib, 5417 HostVMInefficiencyFactor, 5418 v->ExtraLatency, 5419 v->UrgLatency[i], 5420 v->GPUVMMaxPageTableLevels, 5421 v->HostVMEnable, 5422 v->HostVMMaxNonCachedPageTableLevels, 5423 v->GPUVMEnable, 5424 v->HostVMMinPageSize, 5425 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5426 v->MetaRowBytes[i][j][k], 5427 v->DPTEBytesPerRow[i][j][k], 5428 v->BandwidthAvailableForImmediateFlip, 5429 v->TotImmediateFlipBytes, 5430 v->SourcePixelFormat[k], 5431 v->HTotal[k] / v->PixelClock[k], 5432 v->VRatio[k], 5433 v->VRatioChroma[k], 5434 v->Tno_bw[k], 5435 v->DCCEnable[k], 5436 v->dpte_row_height[k], 5437 v->meta_row_height[k], 5438 v->dpte_row_height_chroma[k], 5439 v->meta_row_height_chroma[k], 5440 &v->DestinationLinesToRequestVMInImmediateFlip[k], 5441 &v->DestinationLinesToRequestRowInImmediateFlip[k], 5442 &v->final_flip_bw[k], 5443 &v->ImmediateFlipSupportedForPipe[k]); 5444 } 5445 v->total_dcn_read_bw_with_flip = 0.0; 5446 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5447 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 5448 + dml_max3( 5449 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5450 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] 5451 + v->VActiveCursorBandwidth[i][j][k], 5452 v->NoOfDPP[i][j][k] 5453 * (v->final_flip_bw[k] 5454 + v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5455 * v->UrgentBurstFactorLumaPre[k] 5456 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5457 * v->UrgentBurstFactorChromaPre[k]) 5458 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5459 } 5460 v->ImmediateFlipSupportedForState[i][j] = true; 5461 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { 5462 v->ImmediateFlipSupportedForState[i][j] = false; 5463 } 5464 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5465 if (v->ImmediateFlipSupportedForPipe[k] == false) { 5466 v->ImmediateFlipSupportedForState[i][j] = false; 5467 } 5468 } 5469 } else { 5470 v->ImmediateFlipSupportedForState[i][j] = false; 5471 } 5472 5473 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { 5474 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5475 NextPrefetchModeState = NextPrefetchModeState + 1; 5476 } else { 5477 v->NextMaxVStartup = v->NextMaxVStartup - 1; 5478 } 5479 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5480 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5481 && ((v->HostVMEnable == false && 5482 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5483 || v->ImmediateFlipSupportedForState[i][j] == true)) 5484 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); 5485 5486 CalculateUnboundedRequestAndCompressedBufferSize( 5487 v->DETBufferSizeInKByte[0], 5488 v->ConfigReturnBufferSizeInKByte, 5489 v->UseUnboundedRequesting, 5490 v->TotalNumberOfActiveDPP[i][j], 5491 NoChroma, 5492 v->MaxNumDPP, 5493 v->CompressedBufferSegmentSizeInkByte, 5494 v->Output, 5495 &UnboundedRequestEnabledThisState, 5496 &CompressedBufferSizeInkByteThisState); 5497 5498 CalculateWatermarksAndDRAMSpeedChangeSupport( 5499 mode_lib, 5500 v->PrefetchModePerState[i][j], 5501 v->NumberOfActivePlanes, 5502 v->MaxLineBufferLines, 5503 v->LineBufferSize, 5504 v->WritebackInterfaceBufferSize, 5505 v->DCFCLKState[i][j], 5506 v->ReturnBWPerState[i][j], 5507 v->SynchronizedVBlank, 5508 v->dpte_group_bytes, 5509 v->MetaChunkSize, 5510 v->UrgLatency[i], 5511 v->ExtraLatency, 5512 v->WritebackLatency, 5513 v->WritebackChunkSize, 5514 v->SOCCLKPerState[i], 5515 v->DRAMClockChangeLatency, 5516 v->SRExitTime, 5517 v->SREnterPlusExitTime, 5518 v->SRExitZ8Time, 5519 v->SREnterPlusExitZ8Time, 5520 v->ProjectedDCFCLKDeepSleep[i][j], 5521 v->DETBufferSizeYThisState, 5522 v->DETBufferSizeCThisState, 5523 v->SwathHeightYThisState, 5524 v->SwathHeightCThisState, 5525 v->LBBitPerPixel, 5526 v->SwathWidthYThisState, 5527 v->SwathWidthCThisState, 5528 v->HRatio, 5529 v->HRatioChroma, 5530 v->vtaps, 5531 v->VTAPsChroma, 5532 v->VRatio, 5533 v->VRatioChroma, 5534 v->HTotal, 5535 v->PixelClock, 5536 v->BlendingAndTiming, 5537 v->NoOfDPPThisState, 5538 v->BytePerPixelInDETY, 5539 v->BytePerPixelInDETC, 5540 v->DSTXAfterScaler, 5541 v->DSTYAfterScaler, 5542 v->WritebackEnable, 5543 v->WritebackPixelFormat, 5544 v->WritebackDestinationWidth, 5545 v->WritebackDestinationHeight, 5546 v->WritebackSourceHeight, 5547 UnboundedRequestEnabledThisState, 5548 CompressedBufferSizeInkByteThisState, 5549 &v->DRAMClockChangeSupport[i][j], 5550 &v->UrgentWatermark, 5551 &v->WritebackUrgentWatermark, 5552 &v->DRAMClockChangeWatermark, 5553 &v->WritebackDRAMClockChangeWatermark, 5554 &dummy, 5555 &dummy, 5556 &dummy, 5557 &dummy, 5558 &v->MinActiveDRAMClockChangeLatencySupported); 5559 } 5560 } 5561 5562 /*PTE Buffer Size Check*/ 5563 for (i = 0; i < v->soc.num_states; i++) { 5564 for (j = 0; j < 2; j++) { 5565 v->PTEBufferSizeNotExceeded[i][j] = true; 5566 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5567 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { 5568 v->PTEBufferSizeNotExceeded[i][j] = false; 5569 } 5570 } 5571 } 5572 } 5573 5574 /*Cursor Support Check*/ 5575 v->CursorSupport = true; 5576 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5577 if (v->CursorWidth[k][0] > 0.0) { 5578 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { 5579 v->CursorSupport = false; 5580 } 5581 } 5582 } 5583 5584 /*Valid Pitch Check*/ 5585 v->PitchSupport = true; 5586 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5587 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); 5588 if (v->DCCEnable[k] == true) { 5589 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); 5590 } else { 5591 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; 5592 } 5593 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 5594 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe 5595 && v->SourcePixelFormat[k] != dm_mono_8) { 5596 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); 5597 if (v->DCCEnable[k] == true) { 5598 v->AlignedDCCMetaPitchC[k] = dml_ceil( 5599 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 5600 64.0 * v->Read256BlockWidthC[k]); 5601 } else { 5602 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5603 } 5604 } else { 5605 v->AlignedCPitch[k] = v->PitchC[k]; 5606 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5607 } 5608 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] 5609 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { 5610 v->PitchSupport = false; 5611 } 5612 } 5613 5614 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5615 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { 5616 ViewportExceedsSurface = true; 5617 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 5618 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 5619 && v->SourcePixelFormat[k] != dm_rgbe) { 5620 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] 5621 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { 5622 ViewportExceedsSurface = true; 5623 } 5624 } 5625 } 5626 } 5627 5628 /*Mode Support, Voltage State and SOC Configuration*/ 5629 for (i = v->soc.num_states - 1; i >= 0; i--) { 5630 for (j = 0; j < 2; j++) { 5631 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true 5632 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP 5633 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false 5634 && v->DTBCLKRequiredMoreThanSupported[i] == false 5635 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true 5636 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true 5637 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true 5638 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false 5639 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true 5640 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5641 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false 5642 && ((v->HostVMEnable == false 5643 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5644 || v->ImmediateFlipSupportedForState[i][j] == true) 5645 && FMTBufferExceeded == false) { 5646 v->ModeSupport[i][j] = true; 5647 } else { 5648 v->ModeSupport[i][j] = false; 5649 } 5650 } 5651 } 5652 5653 { 5654 unsigned int MaximumMPCCombine = 0; 5655 5656 for (i = v->soc.num_states; i >= 0; i--) { 5657 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { 5658 v->VoltageLevel = i; 5659 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; 5660 if (v->ModeSupport[i][0] == true) { 5661 MaximumMPCCombine = 0; 5662 } else { 5663 MaximumMPCCombine = 1; 5664 } 5665 } 5666 } 5667 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; 5668 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { 5669 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; 5670 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; 5671 } 5672 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; 5673 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; 5674 v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; 5675 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; 5676 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; 5677 v->maxMpcComb = MaximumMPCCombine; 5678 } 5679 } 5680 5681 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5682 struct display_mode_lib *mode_lib, 5683 unsigned int PrefetchMode, 5684 unsigned int NumberOfActivePlanes, 5685 unsigned int MaxLineBufferLines, 5686 unsigned int LineBufferSize, 5687 unsigned int WritebackInterfaceBufferSize, 5688 double DCFCLK, 5689 double ReturnBW, 5690 bool SynchronizedVBlank, 5691 unsigned int dpte_group_bytes[], 5692 unsigned int MetaChunkSize, 5693 double UrgentLatency, 5694 double ExtraLatency, 5695 double WritebackLatency, 5696 double WritebackChunkSize, 5697 double SOCCLK, 5698 double DRAMClockChangeLatency, 5699 double SRExitTime, 5700 double SREnterPlusExitTime, 5701 double SRExitZ8Time, 5702 double SREnterPlusExitZ8Time, 5703 double DCFCLKDeepSleep, 5704 unsigned int DETBufferSizeY[], 5705 unsigned int DETBufferSizeC[], 5706 unsigned int SwathHeightY[], 5707 unsigned int SwathHeightC[], 5708 unsigned int LBBitPerPixel[], 5709 double SwathWidthY[], 5710 double SwathWidthC[], 5711 double HRatio[], 5712 double HRatioChroma[], 5713 unsigned int vtaps[], 5714 unsigned int VTAPsChroma[], 5715 double VRatio[], 5716 double VRatioChroma[], 5717 unsigned int HTotal[], 5718 double PixelClock[], 5719 unsigned int BlendingAndTiming[], 5720 unsigned int DPPPerPlane[], 5721 double BytePerPixelDETY[], 5722 double BytePerPixelDETC[], 5723 double DSTXAfterScaler[], 5724 double DSTYAfterScaler[], 5725 bool WritebackEnable[], 5726 enum source_format_class WritebackPixelFormat[], 5727 double WritebackDestinationWidth[], 5728 double WritebackDestinationHeight[], 5729 double WritebackSourceHeight[], 5730 bool UnboundedRequestEnabled, 5731 unsigned int CompressedBufferSizeInkByte, 5732 enum clock_change_support *DRAMClockChangeSupport, 5733 double *UrgentWatermark, 5734 double *WritebackUrgentWatermark, 5735 double *DRAMClockChangeWatermark, 5736 double *WritebackDRAMClockChangeWatermark, 5737 double *StutterExitWatermark, 5738 double *StutterEnterPlusExitWatermark, 5739 double *Z8StutterExitWatermark, 5740 double *Z8StutterEnterPlusExitWatermark, 5741 double *MinActiveDRAMClockChangeLatencySupported) 5742 { 5743 struct vba_vars_st *v = &mode_lib->vba; 5744 double EffectiveLBLatencyHidingY; 5745 double EffectiveLBLatencyHidingC; 5746 double LinesInDETY[DC__NUM_DPP__MAX]; 5747 double LinesInDETC; 5748 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5749 unsigned int LinesInDETCRoundedDownToSwath; 5750 double FullDETBufferingTimeY; 5751 double FullDETBufferingTimeC; 5752 double ActiveDRAMClockChangeLatencyMarginY; 5753 double ActiveDRAMClockChangeLatencyMarginC; 5754 double WritebackDRAMClockChangeLatencyMargin; 5755 double PlaneWithMinActiveDRAMClockChangeMargin; 5756 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5757 double WritebackDRAMClockChangeLatencyHiding; 5758 double TotalPixelBW = 0.0; 5759 int k, j; 5760 5761 *UrgentWatermark = UrgentLatency + ExtraLatency; 5762 5763 #ifdef __DML_VBA_DEBUG__ 5764 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 5765 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); 5766 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, *UrgentWatermark); 5767 #endif 5768 5769 *DRAMClockChangeWatermark = DRAMClockChangeLatency + *UrgentWatermark; 5770 5771 #ifdef __DML_VBA_DEBUG__ 5772 dml_print("DML::%s: DRAMClockChangeLatency = %f\n", __func__, DRAMClockChangeLatency); 5773 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, *DRAMClockChangeWatermark); 5774 #endif 5775 5776 v->TotalActiveWriteback = 0; 5777 for (k = 0; k < NumberOfActivePlanes; ++k) { 5778 if (WritebackEnable[k] == true) { 5779 v->TotalActiveWriteback = v->TotalActiveWriteback + 1; 5780 } 5781 } 5782 5783 if (v->TotalActiveWriteback <= 1) { 5784 *WritebackUrgentWatermark = WritebackLatency; 5785 } else { 5786 *WritebackUrgentWatermark = WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5787 } 5788 5789 if (v->TotalActiveWriteback <= 1) { 5790 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency; 5791 } else { 5792 *WritebackDRAMClockChangeWatermark = DRAMClockChangeLatency + WritebackLatency + WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5793 } 5794 5795 for (k = 0; k < NumberOfActivePlanes; ++k) { 5796 TotalPixelBW = TotalPixelBW 5797 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * VRatioChroma[k]) 5798 / (HTotal[k] / PixelClock[k]); 5799 } 5800 5801 for (k = 0; k < NumberOfActivePlanes; ++k) { 5802 double EffectiveDETBufferSizeY = DETBufferSizeY[k]; 5803 5804 v->LBLatencyHidingSourceLinesY = dml_min( 5805 (double) MaxLineBufferLines, 5806 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(HRatio[k], 1.0)), 1)) - (vtaps[k] - 1); 5807 5808 v->LBLatencyHidingSourceLinesC = dml_min( 5809 (double) MaxLineBufferLines, 5810 dml_floor(LineBufferSize / LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(HRatioChroma[k], 1.0)), 1)) - (VTAPsChroma[k] - 1); 5811 5812 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / VRatio[k] * (HTotal[k] / PixelClock[k]); 5813 5814 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / VRatioChroma[k] * (HTotal[k] / PixelClock[k]); 5815 5816 if (UnboundedRequestEnabled) { 5817 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 5818 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * VRatio[k] / (HTotal[k] / PixelClock[k]) / TotalPixelBW; 5819 } 5820 5821 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5822 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5823 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (HTotal[k] / PixelClock[k]) / VRatio[k]; 5824 if (BytePerPixelDETC[k] > 0) { 5825 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 5826 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5827 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatioChroma[k]; 5828 } else { 5829 LinesInDETC = 0; 5830 FullDETBufferingTimeC = 999999; 5831 } 5832 5833 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 5834 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; 5835 5836 if (NumberOfActivePlanes > 1) { 5837 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5838 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightY[k] * HTotal[k] / PixelClock[k] / VRatio[k]; 5839 } 5840 5841 if (BytePerPixelDETC[k] > 0) { 5842 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 5843 - ((double) DSTXAfterScaler[k] / HTotal[k] + DSTYAfterScaler[k]) * HTotal[k] / PixelClock[k] - *UrgentWatermark - *DRAMClockChangeWatermark; 5844 5845 if (NumberOfActivePlanes > 1) { 5846 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5847 - (1 - 1.0 / NumberOfActivePlanes) * SwathHeightC[k] * HTotal[k] / PixelClock[k] / VRatioChroma[k]; 5848 } 5849 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); 5850 } else { 5851 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5852 } 5853 5854 if (WritebackEnable[k] == true) { 5855 WritebackDRAMClockChangeLatencyHiding = WritebackInterfaceBufferSize * 1024 5856 / (WritebackDestinationWidth[k] * WritebackDestinationHeight[k] / (WritebackSourceHeight[k] * HTotal[k] / PixelClock[k]) * 4); 5857 if (WritebackPixelFormat[k] == dm_444_64) { 5858 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; 5859 } 5860 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; 5861 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); 5862 } 5863 } 5864 5865 v->MinActiveDRAMClockChangeMargin = 999999; 5866 PlaneWithMinActiveDRAMClockChangeMargin = 0; 5867 for (k = 0; k < NumberOfActivePlanes; ++k) { 5868 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { 5869 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; 5870 if (BlendingAndTiming[k] == k) { 5871 PlaneWithMinActiveDRAMClockChangeMargin = k; 5872 } else { 5873 for (j = 0; j < NumberOfActivePlanes; ++j) { 5874 if (BlendingAndTiming[k] == j) { 5875 PlaneWithMinActiveDRAMClockChangeMargin = j; 5876 } 5877 } 5878 } 5879 } 5880 } 5881 5882 *MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + DRAMClockChangeLatency; 5883 5884 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5885 for (k = 0; k < NumberOfActivePlanes; ++k) { 5886 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (BlendingAndTiming[k] == k)) && !(BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5887 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5888 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; 5889 } 5890 } 5891 5892 v->TotalNumberOfActiveOTG = 0; 5893 5894 for (k = 0; k < NumberOfActivePlanes; ++k) { 5895 if (BlendingAndTiming[k] == k) { 5896 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; 5897 } 5898 } 5899 5900 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { 5901 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5902 } else if ((SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 5903 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { 5904 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5905 } else { 5906 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5907 } 5908 5909 *StutterExitWatermark = SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; 5910 *StutterEnterPlusExitWatermark = (SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); 5911 *Z8StutterExitWatermark = SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5912 *Z8StutterEnterPlusExitWatermark = SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5913 5914 #ifdef __DML_VBA_DEBUG__ 5915 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); 5916 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); 5917 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); 5918 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); 5919 #endif 5920 } 5921 5922 static void CalculateDCFCLKDeepSleep( 5923 struct display_mode_lib *mode_lib, 5924 unsigned int NumberOfActivePlanes, 5925 int BytePerPixelY[], 5926 int BytePerPixelC[], 5927 double VRatio[], 5928 double VRatioChroma[], 5929 double SwathWidthY[], 5930 double SwathWidthC[], 5931 unsigned int DPPPerPlane[], 5932 double HRatio[], 5933 double HRatioChroma[], 5934 double PixelClock[], 5935 double PSCL_THROUGHPUT[], 5936 double PSCL_THROUGHPUT_CHROMA[], 5937 double DPPCLK[], 5938 double ReadBandwidthLuma[], 5939 double ReadBandwidthChroma[], 5940 int ReturnBusWidth, 5941 double *DCFCLKDeepSleep) 5942 { 5943 struct vba_vars_st *v = &mode_lib->vba; 5944 double DisplayPipeLineDeliveryTimeLuma; 5945 double DisplayPipeLineDeliveryTimeChroma; 5946 double ReadBandwidth = 0.0; 5947 int k; 5948 5949 for (k = 0; k < NumberOfActivePlanes; ++k) { 5950 5951 if (VRatio[k] <= 1) { 5952 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5953 } else { 5954 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5955 } 5956 if (BytePerPixelC[k] == 0) { 5957 DisplayPipeLineDeliveryTimeChroma = 0; 5958 } else { 5959 if (VRatioChroma[k] <= 1) { 5960 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5961 } else { 5962 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5963 } 5964 } 5965 5966 if (BytePerPixelC[k] > 0) { 5967 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 5968 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 5969 } else { 5970 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 5971 } 5972 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); 5973 5974 } 5975 5976 for (k = 0; k < NumberOfActivePlanes; ++k) { 5977 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 5978 } 5979 5980 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); 5981 5982 for (k = 0; k < NumberOfActivePlanes; ++k) { 5983 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); 5984 } 5985 } 5986 5987 static void CalculateUrgentBurstFactor( 5988 int swath_width_luma_ub, 5989 int swath_width_chroma_ub, 5990 unsigned int SwathHeightY, 5991 unsigned int SwathHeightC, 5992 double LineTime, 5993 double UrgentLatency, 5994 double CursorBufferSize, 5995 unsigned int CursorWidth, 5996 unsigned int CursorBPP, 5997 double VRatio, 5998 double VRatioC, 5999 double BytePerPixelInDETY, 6000 double BytePerPixelInDETC, 6001 double DETBufferSizeY, 6002 double DETBufferSizeC, 6003 double *UrgentBurstFactorCursor, 6004 double *UrgentBurstFactorLuma, 6005 double *UrgentBurstFactorChroma, 6006 bool *NotEnoughUrgentLatencyHiding) 6007 { 6008 double LinesInDETLuma; 6009 double LinesInDETChroma; 6010 unsigned int LinesInCursorBuffer; 6011 double CursorBufferSizeInTime; 6012 double DETBufferSizeInTimeLuma; 6013 double DETBufferSizeInTimeChroma; 6014 6015 *NotEnoughUrgentLatencyHiding = 0; 6016 6017 if (CursorWidth > 0) { 6018 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 6019 if (VRatio > 0) { 6020 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 6021 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 6022 *NotEnoughUrgentLatencyHiding = 1; 6023 *UrgentBurstFactorCursor = 0; 6024 } else { 6025 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 6026 } 6027 } else { 6028 *UrgentBurstFactorCursor = 1; 6029 } 6030 } 6031 6032 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; 6033 if (VRatio > 0) { 6034 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 6035 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 6036 *NotEnoughUrgentLatencyHiding = 1; 6037 *UrgentBurstFactorLuma = 0; 6038 } else { 6039 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 6040 } 6041 } else { 6042 *UrgentBurstFactorLuma = 1; 6043 } 6044 6045 if (BytePerPixelInDETC > 0) { 6046 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; 6047 if (VRatio > 0) { 6048 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 6049 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 6050 *NotEnoughUrgentLatencyHiding = 1; 6051 *UrgentBurstFactorChroma = 0; 6052 } else { 6053 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 6054 } 6055 } else { 6056 *UrgentBurstFactorChroma = 1; 6057 } 6058 } 6059 } 6060 6061 static void CalculatePixelDeliveryTimes( 6062 unsigned int NumberOfActivePlanes, 6063 double VRatio[], 6064 double VRatioChroma[], 6065 double VRatioPrefetchY[], 6066 double VRatioPrefetchC[], 6067 unsigned int swath_width_luma_ub[], 6068 unsigned int swath_width_chroma_ub[], 6069 unsigned int DPPPerPlane[], 6070 double HRatio[], 6071 double HRatioChroma[], 6072 double PixelClock[], 6073 double PSCL_THROUGHPUT[], 6074 double PSCL_THROUGHPUT_CHROMA[], 6075 double DPPCLK[], 6076 int BytePerPixelC[], 6077 enum scan_direction_class SourceScan[], 6078 unsigned int NumberOfCursors[], 6079 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 6080 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 6081 unsigned int BlockWidth256BytesY[], 6082 unsigned int BlockHeight256BytesY[], 6083 unsigned int BlockWidth256BytesC[], 6084 unsigned int BlockHeight256BytesC[], 6085 double DisplayPipeLineDeliveryTimeLuma[], 6086 double DisplayPipeLineDeliveryTimeChroma[], 6087 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 6088 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 6089 double DisplayPipeRequestDeliveryTimeLuma[], 6090 double DisplayPipeRequestDeliveryTimeChroma[], 6091 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 6092 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 6093 double CursorRequestDeliveryTime[], 6094 double CursorRequestDeliveryTimePrefetch[]) 6095 { 6096 double req_per_swath_ub; 6097 int k; 6098 6099 for (k = 0; k < NumberOfActivePlanes; ++k) { 6100 if (VRatio[k] <= 1) { 6101 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6102 } else { 6103 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6104 } 6105 6106 if (BytePerPixelC[k] == 0) { 6107 DisplayPipeLineDeliveryTimeChroma[k] = 0; 6108 } else { 6109 if (VRatioChroma[k] <= 1) { 6110 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6111 } else { 6112 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6113 } 6114 } 6115 6116 if (VRatioPrefetchY[k] <= 1) { 6117 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 6118 } else { 6119 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 6120 } 6121 6122 if (BytePerPixelC[k] == 0) { 6123 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 6124 } else { 6125 if (VRatioPrefetchC[k] <= 1) { 6126 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 6127 } else { 6128 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6129 } 6130 } 6131 } 6132 6133 for (k = 0; k < NumberOfActivePlanes; ++k) { 6134 if (SourceScan[k] != dm_vert) { 6135 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 6136 } else { 6137 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 6138 } 6139 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 6140 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 6141 if (BytePerPixelC[k] == 0) { 6142 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 6143 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 6144 } else { 6145 if (SourceScan[k] != dm_vert) { 6146 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 6147 } else { 6148 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 6149 } 6150 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 6151 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 6152 } 6153 #ifdef __DML_VBA_DEBUG__ 6154 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 6155 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 6156 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 6157 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 6158 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 6159 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 6160 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 6161 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 6162 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 6163 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 6164 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 6165 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 6166 #endif 6167 } 6168 6169 for (k = 0; k < NumberOfActivePlanes; ++k) { 6170 int cursor_req_per_width; 6171 6172 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); 6173 if (NumberOfCursors[k] > 0) { 6174 if (VRatio[k] <= 1) { 6175 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6176 } else { 6177 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6178 } 6179 if (VRatioPrefetchY[k] <= 1) { 6180 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6181 } else { 6182 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6183 } 6184 } else { 6185 CursorRequestDeliveryTime[k] = 0; 6186 CursorRequestDeliveryTimePrefetch[k] = 0; 6187 } 6188 #ifdef __DML_VBA_DEBUG__ 6189 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); 6190 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); 6191 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); 6192 #endif 6193 } 6194 } 6195 6196 static void CalculateMetaAndPTETimes( 6197 int NumberOfActivePlanes, 6198 bool GPUVMEnable, 6199 int MetaChunkSize, 6200 int MinMetaChunkSizeBytes, 6201 int HTotal[], 6202 double VRatio[], 6203 double VRatioChroma[], 6204 double DestinationLinesToRequestRowInVBlank[], 6205 double DestinationLinesToRequestRowInImmediateFlip[], 6206 bool DCCEnable[], 6207 double PixelClock[], 6208 int BytePerPixelY[], 6209 int BytePerPixelC[], 6210 enum scan_direction_class SourceScan[], 6211 int dpte_row_height[], 6212 int dpte_row_height_chroma[], 6213 int meta_row_width[], 6214 int meta_row_width_chroma[], 6215 int meta_row_height[], 6216 int meta_row_height_chroma[], 6217 int meta_req_width[], 6218 int meta_req_width_chroma[], 6219 int meta_req_height[], 6220 int meta_req_height_chroma[], 6221 int dpte_group_bytes[], 6222 int PTERequestSizeY[], 6223 int PTERequestSizeC[], 6224 int PixelPTEReqWidthY[], 6225 int PixelPTEReqHeightY[], 6226 int PixelPTEReqWidthC[], 6227 int PixelPTEReqHeightC[], 6228 int dpte_row_width_luma_ub[], 6229 int dpte_row_width_chroma_ub[], 6230 double DST_Y_PER_PTE_ROW_NOM_L[], 6231 double DST_Y_PER_PTE_ROW_NOM_C[], 6232 double DST_Y_PER_META_ROW_NOM_L[], 6233 double DST_Y_PER_META_ROW_NOM_C[], 6234 double TimePerMetaChunkNominal[], 6235 double TimePerChromaMetaChunkNominal[], 6236 double TimePerMetaChunkVBlank[], 6237 double TimePerChromaMetaChunkVBlank[], 6238 double TimePerMetaChunkFlip[], 6239 double TimePerChromaMetaChunkFlip[], 6240 double time_per_pte_group_nom_luma[], 6241 double time_per_pte_group_vblank_luma[], 6242 double time_per_pte_group_flip_luma[], 6243 double time_per_pte_group_nom_chroma[], 6244 double time_per_pte_group_vblank_chroma[], 6245 double time_per_pte_group_flip_chroma[]) 6246 { 6247 unsigned int meta_chunk_width; 6248 unsigned int min_meta_chunk_width; 6249 unsigned int meta_chunk_per_row_int; 6250 unsigned int meta_row_remainder; 6251 unsigned int meta_chunk_threshold; 6252 unsigned int meta_chunks_per_row_ub; 6253 unsigned int meta_chunk_width_chroma; 6254 unsigned int min_meta_chunk_width_chroma; 6255 unsigned int meta_chunk_per_row_int_chroma; 6256 unsigned int meta_row_remainder_chroma; 6257 unsigned int meta_chunk_threshold_chroma; 6258 unsigned int meta_chunks_per_row_ub_chroma; 6259 unsigned int dpte_group_width_luma; 6260 unsigned int dpte_groups_per_row_luma_ub; 6261 unsigned int dpte_group_width_chroma; 6262 unsigned int dpte_groups_per_row_chroma_ub; 6263 int k; 6264 6265 for (k = 0; k < NumberOfActivePlanes; ++k) { 6266 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 6267 if (BytePerPixelC[k] == 0) { 6268 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 6269 } else { 6270 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 6271 } 6272 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 6273 if (BytePerPixelC[k] == 0) { 6274 DST_Y_PER_META_ROW_NOM_C[k] = 0; 6275 } else { 6276 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 6277 } 6278 } 6279 6280 for (k = 0; k < NumberOfActivePlanes; ++k) { 6281 if (DCCEnable[k] == true) { 6282 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 6283 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 6284 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 6285 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 6286 if (SourceScan[k] != dm_vert) { 6287 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 6288 } else { 6289 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 6290 } 6291 if (meta_row_remainder <= meta_chunk_threshold) { 6292 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 6293 } else { 6294 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 6295 } 6296 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6297 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6298 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6299 if (BytePerPixelC[k] == 0) { 6300 TimePerChromaMetaChunkNominal[k] = 0; 6301 TimePerChromaMetaChunkVBlank[k] = 0; 6302 TimePerChromaMetaChunkFlip[k] = 0; 6303 } else { 6304 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6305 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6306 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; 6307 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 6308 if (SourceScan[k] != dm_vert) { 6309 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; 6310 } else { 6311 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; 6312 } 6313 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 6314 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 6315 } else { 6316 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 6317 } 6318 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6319 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6320 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6321 } 6322 } else { 6323 TimePerMetaChunkNominal[k] = 0; 6324 TimePerMetaChunkVBlank[k] = 0; 6325 TimePerMetaChunkFlip[k] = 0; 6326 TimePerChromaMetaChunkNominal[k] = 0; 6327 TimePerChromaMetaChunkVBlank[k] = 0; 6328 TimePerChromaMetaChunkFlip[k] = 0; 6329 } 6330 } 6331 6332 for (k = 0; k < NumberOfActivePlanes; ++k) { 6333 if (GPUVMEnable == true) { 6334 if (SourceScan[k] != dm_vert) { 6335 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 6336 } else { 6337 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 6338 } 6339 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); 6340 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6341 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6342 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6343 if (BytePerPixelC[k] == 0) { 6344 time_per_pte_group_nom_chroma[k] = 0; 6345 time_per_pte_group_vblank_chroma[k] = 0; 6346 time_per_pte_group_flip_chroma[k] = 0; 6347 } else { 6348 if (SourceScan[k] != dm_vert) { 6349 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 6350 } else { 6351 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 6352 } 6353 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); 6354 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6355 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6356 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6357 } 6358 } else { 6359 time_per_pte_group_nom_luma[k] = 0; 6360 time_per_pte_group_vblank_luma[k] = 0; 6361 time_per_pte_group_flip_luma[k] = 0; 6362 time_per_pte_group_nom_chroma[k] = 0; 6363 time_per_pte_group_vblank_chroma[k] = 0; 6364 time_per_pte_group_flip_chroma[k] = 0; 6365 } 6366 } 6367 } 6368 6369 static void CalculateVMGroupAndRequestTimes( 6370 unsigned int NumberOfActivePlanes, 6371 bool GPUVMEnable, 6372 unsigned int GPUVMMaxPageTableLevels, 6373 unsigned int HTotal[], 6374 int BytePerPixelC[], 6375 double DestinationLinesToRequestVMInVBlank[], 6376 double DestinationLinesToRequestVMInImmediateFlip[], 6377 bool DCCEnable[], 6378 double PixelClock[], 6379 int dpte_row_width_luma_ub[], 6380 int dpte_row_width_chroma_ub[], 6381 int vm_group_bytes[], 6382 unsigned int dpde0_bytes_per_frame_ub_l[], 6383 unsigned int dpde0_bytes_per_frame_ub_c[], 6384 int meta_pte_bytes_per_frame_ub_l[], 6385 int meta_pte_bytes_per_frame_ub_c[], 6386 double TimePerVMGroupVBlank[], 6387 double TimePerVMGroupFlip[], 6388 double TimePerVMRequestVBlank[], 6389 double TimePerVMRequestFlip[]) 6390 { 6391 int num_group_per_lower_vm_stage; 6392 int num_req_per_lower_vm_stage; 6393 int k; 6394 6395 for (k = 0; k < NumberOfActivePlanes; ++k) { 6396 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 6397 if (DCCEnable[k] == false) { 6398 if (BytePerPixelC[k] > 0) { 6399 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6400 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6401 } else { 6402 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6403 } 6404 } else { 6405 if (GPUVMMaxPageTableLevels == 1) { 6406 if (BytePerPixelC[k] > 0) { 6407 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6408 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6409 } else { 6410 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6411 } 6412 } else { 6413 if (BytePerPixelC[k] > 0) { 6414 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6415 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6416 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6417 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6418 } else { 6419 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6420 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6421 } 6422 } 6423 } 6424 6425 if (DCCEnable[k] == false) { 6426 if (BytePerPixelC[k] > 0) { 6427 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6428 } else { 6429 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 6430 } 6431 } else { 6432 if (GPUVMMaxPageTableLevels == 1) { 6433 if (BytePerPixelC[k] > 0) { 6434 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6435 } else { 6436 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6437 } 6438 } else { 6439 if (BytePerPixelC[k] > 0) { 6440 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 6441 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6442 } else { 6443 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 6444 } 6445 } 6446 } 6447 6448 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6449 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6450 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6451 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6452 6453 if (GPUVMMaxPageTableLevels > 2) { 6454 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6455 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6456 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6457 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6458 } 6459 6460 } else { 6461 TimePerVMGroupVBlank[k] = 0; 6462 TimePerVMGroupFlip[k] = 0; 6463 TimePerVMRequestVBlank[k] = 0; 6464 TimePerVMRequestFlip[k] = 0; 6465 } 6466 } 6467 } 6468 6469 static void CalculateStutterEfficiency( 6470 struct display_mode_lib *mode_lib, 6471 int CompressedBufferSizeInkByte, 6472 bool UnboundedRequestEnabled, 6473 int ConfigReturnBufferSizeInKByte, 6474 int MetaFIFOSizeInKEntries, 6475 int ZeroSizeBufferEntries, 6476 int NumberOfActivePlanes, 6477 int ROBBufferSizeInKByte, 6478 double TotalDataReadBandwidth, 6479 double DCFCLK, 6480 double ReturnBW, 6481 double COMPBUF_RESERVED_SPACE_64B, 6482 double COMPBUF_RESERVED_SPACE_ZS, 6483 double SRExitTime, 6484 double SRExitZ8Time, 6485 bool SynchronizedVBlank, 6486 double Z8StutterEnterPlusExitWatermark, 6487 double StutterEnterPlusExitWatermark, 6488 bool ProgressiveToInterlaceUnitInOPP, 6489 bool Interlace[], 6490 double MinTTUVBlank[], 6491 int DPPPerPlane[], 6492 unsigned int DETBufferSizeY[], 6493 int BytePerPixelY[], 6494 double BytePerPixelDETY[], 6495 double SwathWidthY[], 6496 int SwathHeightY[], 6497 int SwathHeightC[], 6498 double NetDCCRateLuma[], 6499 double NetDCCRateChroma[], 6500 double DCCFractionOfZeroSizeRequestsLuma[], 6501 double DCCFractionOfZeroSizeRequestsChroma[], 6502 int HTotal[], 6503 int VTotal[], 6504 double PixelClock[], 6505 double VRatio[], 6506 enum scan_direction_class SourceScan[], 6507 int BlockHeight256BytesY[], 6508 int BlockWidth256BytesY[], 6509 int BlockHeight256BytesC[], 6510 int BlockWidth256BytesC[], 6511 int DCCYMaxUncompressedBlock[], 6512 int DCCCMaxUncompressedBlock[], 6513 int VActive[], 6514 bool DCCEnable[], 6515 bool WritebackEnable[], 6516 double ReadBandwidthPlaneLuma[], 6517 double ReadBandwidthPlaneChroma[], 6518 double meta_row_bw[], 6519 double dpte_row_bw[], 6520 double *StutterEfficiencyNotIncludingVBlank, 6521 double *StutterEfficiency, 6522 int *NumberOfStutterBurstsPerFrame, 6523 double *Z8StutterEfficiencyNotIncludingVBlank, 6524 double *Z8StutterEfficiency, 6525 int *Z8NumberOfStutterBurstsPerFrame, 6526 double *StutterPeriod) 6527 { 6528 struct vba_vars_st *v = &mode_lib->vba; 6529 6530 double DETBufferingTimeY; 6531 double SwathWidthYCriticalPlane = 0; 6532 double VActiveTimeCriticalPlane = 0; 6533 double FrameTimeCriticalPlane = 0; 6534 int BytePerPixelYCriticalPlane = 0; 6535 double LinesToFinishSwathTransferStutterCriticalPlane = 0; 6536 double MinTTUVBlankCriticalPlane = 0; 6537 double TotalCompressedReadBandwidth; 6538 double TotalRowReadBandwidth; 6539 double AverageDCCCompressionRate; 6540 double EffectiveCompressedBufferSize; 6541 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 6542 double StutterBurstTime; 6543 int TotalActiveWriteback; 6544 double LinesInDETY; 6545 double LinesInDETYRoundedDownToSwath; 6546 double MaximumEffectiveCompressionLuma; 6547 double MaximumEffectiveCompressionChroma; 6548 double TotalZeroSizeRequestReadBandwidth; 6549 double TotalZeroSizeCompressedReadBandwidth; 6550 double AverageDCCZeroSizeFraction; 6551 double AverageZeroSizeCompressionRate; 6552 int TotalNumberOfActiveOTG = 0; 6553 double LastStutterPeriod = 0.0; 6554 double LastZ8StutterPeriod = 0.0; 6555 int k; 6556 6557 TotalZeroSizeRequestReadBandwidth = 0; 6558 TotalZeroSizeCompressedReadBandwidth = 0; 6559 TotalRowReadBandwidth = 0; 6560 TotalCompressedReadBandwidth = 0; 6561 6562 for (k = 0; k < NumberOfActivePlanes; ++k) { 6563 if (DCCEnable[k] == true) { 6564 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) 6565 || DCCYMaxUncompressedBlock[k] < 256) { 6566 MaximumEffectiveCompressionLuma = 2; 6567 } else { 6568 MaximumEffectiveCompressionLuma = 4; 6569 } 6570 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); 6571 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 6572 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6573 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; 6574 if (ReadBandwidthPlaneChroma[k] > 0) { 6575 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) 6576 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { 6577 MaximumEffectiveCompressionChroma = 2; 6578 } else { 6579 MaximumEffectiveCompressionChroma = 4; 6580 } 6581 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 6582 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); 6583 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; 6584 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6585 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; 6586 } 6587 } else { 6588 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 6589 } 6590 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); 6591 } 6592 6593 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 6594 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 6595 6596 #ifdef __DML_VBA_DEBUG__ 6597 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 6598 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 6599 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); 6600 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 6601 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 6602 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6603 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 6604 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 6605 #endif 6606 6607 if (AverageDCCZeroSizeFraction == 1) { 6608 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6609 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; 6610 } else if (AverageDCCZeroSizeFraction > 0) { 6611 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6612 EffectiveCompressedBufferSize = dml_min( 6613 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6614 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) 6615 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, 6616 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6617 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6618 dml_print( 6619 "DML::%s: min 2 = %f\n", 6620 __func__, 6621 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); 6622 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); 6623 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6624 } else { 6625 EffectiveCompressedBufferSize = dml_min( 6626 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6627 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; 6628 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6629 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 6630 } 6631 6632 #ifdef __DML_VBA_DEBUG__ 6633 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 6634 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 6635 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6636 #endif 6637 6638 *StutterPeriod = 0; 6639 for (k = 0; k < NumberOfActivePlanes; ++k) { 6640 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) 6641 / BytePerPixelDETY[k] / SwathWidthY[k]; 6642 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 6643 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; 6644 #ifdef __DML_VBA_DEBUG__ 6645 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); 6646 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 6647 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); 6648 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); 6649 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 6650 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); 6651 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); 6652 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); 6653 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6654 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); 6655 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 6656 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6657 #endif 6658 6659 if (k == 0 || DETBufferingTimeY < *StutterPeriod) { 6660 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 6661 6662 *StutterPeriod = DETBufferingTimeY; 6663 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; 6664 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; 6665 BytePerPixelYCriticalPlane = BytePerPixelY[k]; 6666 SwathWidthYCriticalPlane = SwathWidthY[k]; 6667 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); 6668 MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; 6669 6670 #ifdef __DML_VBA_DEBUG__ 6671 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6672 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); 6673 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); 6674 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6675 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); 6676 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); 6677 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); 6678 #endif 6679 } 6680 } 6681 6682 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); 6683 #ifdef __DML_VBA_DEBUG__ 6684 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 6685 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6686 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); 6687 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); 6688 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6689 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 6690 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6691 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 6692 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 6693 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 6694 #endif 6695 6696 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW 6697 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 6698 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 6699 #ifdef __DML_VBA_DEBUG__ 6700 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); 6701 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); 6702 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 6703 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 6704 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6705 #endif 6706 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6707 6708 dml_print( 6709 "DML::%s: Time to finish residue swath=%f\n", 6710 __func__, 6711 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6712 6713 TotalActiveWriteback = 0; 6714 for (k = 0; k < NumberOfActivePlanes; ++k) { 6715 if (WritebackEnable[k]) { 6716 TotalActiveWriteback = TotalActiveWriteback + 1; 6717 } 6718 } 6719 6720 if (TotalActiveWriteback == 0) { 6721 #ifdef __DML_VBA_DEBUG__ 6722 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 6723 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 6724 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 6725 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6726 #endif 6727 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 6728 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 6729 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6730 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6731 } else { 6732 *StutterEfficiencyNotIncludingVBlank = 0.; 6733 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 6734 *NumberOfStutterBurstsPerFrame = 0; 6735 *Z8NumberOfStutterBurstsPerFrame = 0; 6736 } 6737 #ifdef __DML_VBA_DEBUG__ 6738 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6739 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6740 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); 6741 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 6742 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6743 #endif 6744 6745 for (k = 0; k < NumberOfActivePlanes; ++k) { 6746 if (v->BlendingAndTiming[k] == k) { 6747 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6748 } 6749 } 6750 6751 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6752 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6753 6754 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { 6755 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane 6756 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6757 } else { 6758 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6759 } 6760 } else { 6761 *StutterEfficiency = 0; 6762 } 6763 6764 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6765 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6766 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { 6767 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane 6768 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6769 } else { 6770 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6771 } 6772 } else { 6773 *Z8StutterEfficiency = 0.; 6774 } 6775 6776 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6777 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6778 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6779 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6780 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6781 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6782 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6783 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6784 } 6785 6786 static void CalculateSwathAndDETConfiguration( 6787 bool ForceSingleDPP, 6788 int NumberOfActivePlanes, 6789 unsigned int DETBufferSizeInKByte, 6790 double MaximumSwathWidthLuma[], 6791 double MaximumSwathWidthChroma[], 6792 enum scan_direction_class SourceScan[], 6793 enum source_format_class SourcePixelFormat[], 6794 enum dm_swizzle_mode SurfaceTiling[], 6795 int ViewportWidth[], 6796 int ViewportHeight[], 6797 int SurfaceWidthY[], 6798 int SurfaceWidthC[], 6799 int SurfaceHeightY[], 6800 int SurfaceHeightC[], 6801 int Read256BytesBlockHeightY[], 6802 int Read256BytesBlockHeightC[], 6803 int Read256BytesBlockWidthY[], 6804 int Read256BytesBlockWidthC[], 6805 enum odm_combine_mode ODMCombineEnabled[], 6806 int BlendingAndTiming[], 6807 int BytePerPixY[], 6808 int BytePerPixC[], 6809 double BytePerPixDETY[], 6810 double BytePerPixDETC[], 6811 int HActive[], 6812 double HRatio[], 6813 double HRatioChroma[], 6814 int DPPPerPlane[], 6815 int swath_width_luma_ub[], 6816 int swath_width_chroma_ub[], 6817 double SwathWidth[], 6818 double SwathWidthChroma[], 6819 int SwathHeightY[], 6820 int SwathHeightC[], 6821 unsigned int DETBufferSizeY[], 6822 unsigned int DETBufferSizeC[], 6823 bool ViewportSizeSupportPerPlane[], 6824 bool *ViewportSizeSupport) 6825 { 6826 int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 6827 int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 6828 int MinimumSwathHeightY; 6829 int MinimumSwathHeightC; 6830 int RoundedUpMaxSwathSizeBytesY; 6831 int RoundedUpMaxSwathSizeBytesC; 6832 int RoundedUpMinSwathSizeBytesY; 6833 int RoundedUpMinSwathSizeBytesC; 6834 int RoundedUpSwathSizeBytesY; 6835 int RoundedUpSwathSizeBytesC; 6836 double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; 6837 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; 6838 int k; 6839 6840 CalculateSwathWidth( 6841 ForceSingleDPP, 6842 NumberOfActivePlanes, 6843 SourcePixelFormat, 6844 SourceScan, 6845 ViewportWidth, 6846 ViewportHeight, 6847 SurfaceWidthY, 6848 SurfaceWidthC, 6849 SurfaceHeightY, 6850 SurfaceHeightC, 6851 ODMCombineEnabled, 6852 BytePerPixY, 6853 BytePerPixC, 6854 Read256BytesBlockHeightY, 6855 Read256BytesBlockHeightC, 6856 Read256BytesBlockWidthY, 6857 Read256BytesBlockWidthC, 6858 BlendingAndTiming, 6859 HActive, 6860 HRatio, 6861 DPPPerPlane, 6862 SwathWidthSingleDPP, 6863 SwathWidthSingleDPPChroma, 6864 SwathWidth, 6865 SwathWidthChroma, 6866 MaximumSwathHeightY, 6867 MaximumSwathHeightC, 6868 swath_width_luma_ub, 6869 swath_width_chroma_ub); 6870 6871 *ViewportSizeSupport = true; 6872 for (k = 0; k < NumberOfActivePlanes; ++k) { 6873 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 6874 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { 6875 if (SurfaceTiling[k] == dm_sw_linear 6876 || (SourcePixelFormat[k] == dm_444_64 6877 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) 6878 && SourceScan[k] != dm_vert)) { 6879 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6880 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { 6881 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6882 } else { 6883 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6884 } 6885 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6886 } else { 6887 if (SurfaceTiling[k] == dm_sw_linear) { 6888 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6889 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6890 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { 6891 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6892 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6893 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { 6894 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6895 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6896 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { 6897 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6898 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6899 } else { 6900 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6901 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6902 } 6903 } 6904 6905 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 6906 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; 6907 if (SourcePixelFormat[k] == dm_420_10) { 6908 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); 6909 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); 6910 } 6911 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 6912 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; 6913 if (SourcePixelFormat[k] == dm_420_10) { 6914 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); 6915 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); 6916 } 6917 6918 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6919 SwathHeightY[k] = MaximumSwathHeightY[k]; 6920 SwathHeightC[k] = MaximumSwathHeightC[k]; 6921 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6922 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6923 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC 6924 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6925 SwathHeightY[k] = MinimumSwathHeightY; 6926 SwathHeightC[k] = MaximumSwathHeightC[k]; 6927 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6928 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6929 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC 6930 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6931 SwathHeightY[k] = MaximumSwathHeightY[k]; 6932 SwathHeightC[k] = MinimumSwathHeightC; 6933 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6934 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6935 } else { 6936 SwathHeightY[k] = MinimumSwathHeightY; 6937 SwathHeightC[k] = MinimumSwathHeightC; 6938 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6939 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6940 } 6941 { 6942 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 6943 6944 if (SwathHeightC[k] == 0) { 6945 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; 6946 DETBufferSizeC[k] = 0; 6947 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 6948 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; 6949 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; 6950 } else { 6951 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); 6952 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; 6953 } 6954 6955 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] 6956 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 6957 *ViewportSizeSupport = false; 6958 ViewportSizeSupportPerPlane[k] = false; 6959 } else { 6960 ViewportSizeSupportPerPlane[k] = true; 6961 } 6962 } 6963 } 6964 } 6965 6966 static void CalculateSwathWidth( 6967 bool ForceSingleDPP, 6968 int NumberOfActivePlanes, 6969 enum source_format_class SourcePixelFormat[], 6970 enum scan_direction_class SourceScan[], 6971 int ViewportWidth[], 6972 int ViewportHeight[], 6973 int SurfaceWidthY[], 6974 int SurfaceWidthC[], 6975 int SurfaceHeightY[], 6976 int SurfaceHeightC[], 6977 enum odm_combine_mode ODMCombineEnabled[], 6978 int BytePerPixY[], 6979 int BytePerPixC[], 6980 int Read256BytesBlockHeightY[], 6981 int Read256BytesBlockHeightC[], 6982 int Read256BytesBlockWidthY[], 6983 int Read256BytesBlockWidthC[], 6984 int BlendingAndTiming[], 6985 int HActive[], 6986 double HRatio[], 6987 int DPPPerPlane[], 6988 double SwathWidthSingleDPPY[], 6989 double SwathWidthSingleDPPC[], 6990 double SwathWidthY[], 6991 double SwathWidthC[], 6992 int MaximumSwathHeightY[], 6993 int MaximumSwathHeightC[], 6994 int swath_width_luma_ub[], 6995 int swath_width_chroma_ub[]) 6996 { 6997 enum odm_combine_mode MainPlaneODMCombine; 6998 int j, k; 6999 7000 #ifdef __DML_VBA_DEBUG__ 7001 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); 7002 #endif 7003 7004 for (k = 0; k < NumberOfActivePlanes; ++k) { 7005 if (SourceScan[k] != dm_vert) { 7006 SwathWidthSingleDPPY[k] = ViewportWidth[k]; 7007 } else { 7008 SwathWidthSingleDPPY[k] = ViewportHeight[k]; 7009 } 7010 7011 #ifdef __DML_VBA_DEBUG__ 7012 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 7013 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 7014 #endif 7015 7016 MainPlaneODMCombine = ODMCombineEnabled[k]; 7017 for (j = 0; j < NumberOfActivePlanes; ++j) { 7018 if (BlendingAndTiming[k] == j) { 7019 MainPlaneODMCombine = ODMCombineEnabled[j]; 7020 } 7021 } 7022 7023 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) 7024 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); 7025 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) 7026 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); 7027 else if (DPPPerPlane[k] == 2) 7028 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 7029 else 7030 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 7031 7032 #ifdef __DML_VBA_DEBUG__ 7033 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); 7034 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); 7035 #endif 7036 7037 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { 7038 SwathWidthC[k] = SwathWidthY[k] / 2; 7039 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 7040 } else { 7041 SwathWidthC[k] = SwathWidthY[k]; 7042 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 7043 } 7044 7045 if (ForceSingleDPP == true) { 7046 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 7047 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 7048 } 7049 { 7050 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 7051 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 7052 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 7053 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 7054 7055 #ifdef __DML_VBA_DEBUG__ 7056 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 7057 #endif 7058 7059 if (SourceScan[k] != dm_vert) { 7060 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 7061 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 7062 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); 7063 if (BytePerPixC[k] > 0) { 7064 swath_width_chroma_ub[k] = dml_min( 7065 surface_width_ub_c, 7066 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); 7067 } else { 7068 swath_width_chroma_ub[k] = 0; 7069 } 7070 } else { 7071 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 7072 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 7073 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 7074 if (BytePerPixC[k] > 0) { 7075 swath_width_chroma_ub[k] = dml_min( 7076 surface_height_ub_c, 7077 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); 7078 } else { 7079 swath_width_chroma_ub[k] = 0; 7080 } 7081 } 7082 } 7083 } 7084 } 7085 7086 static double CalculateExtraLatency( 7087 int RoundTripPingLatencyCycles, 7088 int ReorderingBytes, 7089 double DCFCLK, 7090 int TotalNumberOfActiveDPP, 7091 int PixelChunkSizeInKByte, 7092 int TotalNumberOfDCCActiveDPP, 7093 int MetaChunkSize, 7094 double ReturnBW, 7095 bool GPUVMEnable, 7096 bool HostVMEnable, 7097 int NumberOfActivePlanes, 7098 int NumberOfDPP[], 7099 int dpte_group_bytes[], 7100 double HostVMInefficiencyFactor, 7101 double HostVMMinPageSize, 7102 int HostVMMaxNonCachedPageTableLevels) 7103 { 7104 double ExtraLatencyBytes; 7105 double ExtraLatency; 7106 7107 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7108 ReorderingBytes, 7109 TotalNumberOfActiveDPP, 7110 PixelChunkSizeInKByte, 7111 TotalNumberOfDCCActiveDPP, 7112 MetaChunkSize, 7113 GPUVMEnable, 7114 HostVMEnable, 7115 NumberOfActivePlanes, 7116 NumberOfDPP, 7117 dpte_group_bytes, 7118 HostVMInefficiencyFactor, 7119 HostVMMinPageSize, 7120 HostVMMaxNonCachedPageTableLevels); 7121 7122 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 7123 7124 #ifdef __DML_VBA_DEBUG__ 7125 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 7126 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 7127 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 7128 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 7129 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 7130 #endif 7131 7132 return ExtraLatency; 7133 } 7134 7135 static double CalculateExtraLatencyBytes( 7136 int ReorderingBytes, 7137 int TotalNumberOfActiveDPP, 7138 int PixelChunkSizeInKByte, 7139 int TotalNumberOfDCCActiveDPP, 7140 int MetaChunkSize, 7141 bool GPUVMEnable, 7142 bool HostVMEnable, 7143 int NumberOfActivePlanes, 7144 int NumberOfDPP[], 7145 int dpte_group_bytes[], 7146 double HostVMInefficiencyFactor, 7147 double HostVMMinPageSize, 7148 int HostVMMaxNonCachedPageTableLevels) 7149 { 7150 double ret; 7151 int HostVMDynamicLevels = 0, k; 7152 7153 if (GPUVMEnable == true && HostVMEnable == true) { 7154 if (HostVMMinPageSize < 2048) 7155 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 7156 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 7157 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 7158 else 7159 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 7160 else 7161 HostVMDynamicLevels = 0; 7162 7163 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 7164 7165 if (GPUVMEnable == true) 7166 for (k = 0; k < NumberOfActivePlanes; ++k) 7167 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 7168 } 7169 return ret; 7170 } 7171 7172 static double CalculateUrgentLatency( 7173 double UrgentLatencyPixelDataOnly, 7174 double UrgentLatencyPixelMixedWithVMData, 7175 double UrgentLatencyVMDataOnly, 7176 bool DoUrgentLatencyAdjustment, 7177 double UrgentLatencyAdjustmentFabricClockComponent, 7178 double UrgentLatencyAdjustmentFabricClockReference, 7179 double FabricClock) 7180 { 7181 double ret; 7182 7183 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 7184 if (DoUrgentLatencyAdjustment == true) 7185 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 7186 return ret; 7187 } 7188 7189 static void UseMinimumDCFCLK( 7190 struct display_mode_lib *mode_lib, 7191 int MaxPrefetchMode, 7192 int ReorderingBytes) 7193 { 7194 struct vba_vars_st *v = &mode_lib->vba; 7195 int dummy1, i, j, k; 7196 double NormalEfficiency, dummy2, dummy3; 7197 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 7198 7199 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; 7200 for (i = 0; i < v->soc.num_states; ++i) { 7201 for (j = 0; j <= 1; ++j) { 7202 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 7203 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 7204 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; 7205 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 7206 double MinimumTWait; 7207 double NonDPTEBandwidth; 7208 double DPTEBandwidth; 7209 double DCFCLKRequiredForAverageBandwidth; 7210 double ExtraLatencyBytes; 7211 double ExtraLatencyCycles; 7212 double DCFCLKRequiredForPeakBandwidth; 7213 int NoOfDPPState[DC__NUM_DPP__MAX]; 7214 double MinimumTvmPlus2Tr0; 7215 7216 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 7217 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7218 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 7219 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); 7220 } 7221 7222 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) 7223 NoOfDPPState[k] = v->NoOfDPP[i][j][k]; 7224 7225 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); 7226 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; 7227 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? 7228 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; 7229 DCFCLKRequiredForAverageBandwidth = dml_max3( 7230 v->ProjectedDCFCLKDeepSleep[i][j], 7231 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth 7232 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), 7233 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); 7234 7235 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7236 ReorderingBytes, 7237 v->TotalNumberOfActiveDPP[i][j], 7238 v->PixelChunkSizeInKByte, 7239 v->TotalNumberOfDCCActiveDPP[i][j], 7240 v->MetaChunkSize, 7241 v->GPUVMEnable, 7242 v->HostVMEnable, 7243 v->NumberOfActivePlanes, 7244 NoOfDPPState, 7245 v->dpte_group_bytes, 7246 1, 7247 v->HostVMMinPageSize, 7248 v->HostVMMaxNonCachedPageTableLevels); 7249 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; 7250 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7251 double DCFCLKCyclesRequiredInPrefetch; 7252 double ExpectedPrefetchBWAcceleration; 7253 double PrefetchTime; 7254 7255 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] 7256 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; 7257 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 7258 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) 7259 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth 7260 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; 7261 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; 7262 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) 7263 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); 7264 DynamicMetadataVMExtraLatency[k] = 7265 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? 7266 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 7267 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait 7268 - v->UrgLatency[i] 7269 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) 7270 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) 7271 - DynamicMetadataVMExtraLatency[k]; 7272 7273 if (PrefetchTime > 0) { 7274 double ExpectedVRatioPrefetch; 7275 7276 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] 7277 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); 7278 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] 7279 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; 7280 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { 7281 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] 7282 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; 7283 } 7284 } else { 7285 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7286 } 7287 if (v->DynamicMetadataEnable[k] == true) { 7288 double TSetupPipe; 7289 double TdmbfPipe; 7290 double TdmsksPipe; 7291 double TdmecPipe; 7292 double AllowedTimeForUrgentExtraLatency; 7293 7294 CalculateVupdateAndDynamicMetadataParameters( 7295 v->MaxInterDCNTileRepeaters, 7296 v->RequiredDPPCLK[i][j][k], 7297 v->RequiredDISPCLK[i][j], 7298 v->ProjectedDCFCLKDeepSleep[i][j], 7299 v->PixelClock[k], 7300 v->HTotal[k], 7301 v->VTotal[k] - v->VActive[k], 7302 v->DynamicMetadataTransmittedBytes[k], 7303 v->DynamicMetadataLinesBeforeActiveRequired[k], 7304 v->Interlace[k], 7305 v->ProgressiveToInterlaceUnitInOPP, 7306 &TSetupPipe, 7307 &TdmbfPipe, 7308 &TdmecPipe, 7309 &TdmsksPipe, 7310 &dummy1, 7311 &dummy2, 7312 &dummy3); 7313 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe 7314 - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 7315 if (AllowedTimeForUrgentExtraLatency > 0) { 7316 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( 7317 DCFCLKRequiredForPeakBandwidthPerPlane[k], 7318 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 7319 } else { 7320 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7321 } 7322 } 7323 } 7324 DCFCLKRequiredForPeakBandwidth = 0; 7325 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) 7326 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; 7327 7328 MinimumTvmPlus2Tr0 = v->UrgLatency[i] 7329 * (v->GPUVMEnable == true ? 7330 (v->HostVMEnable == true ? 7331 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 7332 0); 7333 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7334 double MaximumTvmPlus2Tr0PlusTsw; 7335 7336 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 7337 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 7338 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; 7339 } else { 7340 DCFCLKRequiredForPeakBandwidth = dml_max3( 7341 DCFCLKRequiredForPeakBandwidth, 7342 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), 7343 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 7344 } 7345 } 7346 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 7347 } 7348 } 7349 } 7350 7351 static void CalculateUnboundedRequestAndCompressedBufferSize( 7352 unsigned int DETBufferSizeInKByte, 7353 int ConfigReturnBufferSizeInKByte, 7354 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 7355 int TotalActiveDPP, 7356 bool NoChromaPlanes, 7357 int MaxNumDPP, 7358 int CompressedBufferSegmentSizeInkByteFinal, 7359 enum output_encoder_class *Output, 7360 bool *UnboundedRequestEnabled, 7361 int *CompressedBufferSizeInkByte) 7362 { 7363 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7364 7365 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); 7366 *CompressedBufferSizeInkByte = ( 7367 *UnboundedRequestEnabled == true ? 7368 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : 7369 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); 7370 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 7371 7372 #ifdef __DML_VBA_DEBUG__ 7373 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 7374 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); 7375 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 7376 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 7377 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); 7378 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 7379 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 7380 #endif 7381 } 7382 7383 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) 7384 { 7385 bool ret_val = false; 7386 7387 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); 7388 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 7389 ret_val = false; 7390 return ret_val; 7391 } 7392 7393 static unsigned int CalculateMaxVStartup( 7394 unsigned int VTotal, 7395 unsigned int VActive, 7396 unsigned int VBlankNom, 7397 unsigned int HTotal, 7398 double PixelClock, 7399 bool ProgressiveTointerlaceUnitinOPP, 7400 bool Interlace, 7401 unsigned int VBlankNomDefaultUS, 7402 double WritebackDelayTime) 7403 { 7404 unsigned int MaxVStartup = 0; 7405 unsigned int vblank_size = 0; 7406 double line_time_us = HTotal / PixelClock; 7407 unsigned int vblank_actual = VTotal - VActive; 7408 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0); 7409 unsigned int vblank_nom_input = dml_min(VBlankNom, vblank_nom_default_in_line); 7410 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input; 7411 7412 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail); 7413 if (Interlace && !ProgressiveTointerlaceUnitinOPP) 7414 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0); 7415 else 7416 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0)); 7417 if (MaxVStartup > 1023) 7418 MaxVStartup = 1023; 7419 return MaxVStartup; 7420 } 7421