1 // SPDX-License-Identifier: MIT 2 /* 3 * Copyright 2022 Advanced Micro Devices, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR 19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21 * OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: AMD 24 * 25 */ 26 27 #define UNIT_TEST 0 28 #if !UNIT_TEST 29 #include "dc.h" 30 #endif 31 #include "../display_mode_lib.h" 32 #include "display_mode_vba_314.h" 33 #include "../dml_inline_defs.h" 34 35 /* 36 * NOTE: 37 * This file is gcc-parsable HW gospel, coming straight from HW engineers. 38 * 39 * It doesn't adhere to Linux kernel style and sometimes will do things in odd 40 * ways. Unless there is something clearly wrong with it the code should 41 * remain as-is as it provides us with a guarantee from HW that it is correct. 42 */ 43 44 #define BPP_INVALID 0 45 #define BPP_BLENDED_PIPE 0xffffffff 46 #define DCN314_MAX_DSC_IMAGE_WIDTH 5184 47 #define DCN314_MAX_FMT_420_BUFFER_WIDTH 4096 48 49 // For DML-C changes that hasn't been propagated to VBA yet 50 //#define __DML_VBA_ALLOW_DELTA__ 51 52 // Move these to ip parameters/constant 53 54 // At which vstartup the DML start to try if the mode can be supported 55 #define __DML_VBA_MIN_VSTARTUP__ 9 56 57 // Delay in DCFCLK from ARB to DET (1st num is ARB to SDPIF, 2nd number is SDPIF to DET) 58 #define __DML_ARB_TO_RET_DELAY__ (7 + 95) 59 60 // fudge factor for min dcfclk calclation 61 #define __DML_MIN_DCFCLK_FACTOR__ 1.15 62 63 typedef struct { 64 double DPPCLK; 65 double DISPCLK; 66 double PixelClock; 67 double DCFCLKDeepSleep; 68 unsigned int DPPPerPlane; 69 bool ScalerEnabled; 70 double VRatio; 71 double VRatioChroma; 72 enum scan_direction_class SourceScan; 73 unsigned int BlockWidth256BytesY; 74 unsigned int BlockHeight256BytesY; 75 unsigned int BlockWidth256BytesC; 76 unsigned int BlockHeight256BytesC; 77 unsigned int InterlaceEnable; 78 unsigned int NumberOfCursors; 79 unsigned int VBlank; 80 unsigned int HTotal; 81 unsigned int DCCEnable; 82 bool ODMCombineIsEnabled; 83 enum source_format_class SourcePixelFormat; 84 int BytePerPixelY; 85 int BytePerPixelC; 86 bool ProgressiveToInterlaceUnitInOPP; 87 } Pipe; 88 89 #define BPP_INVALID 0 90 #define BPP_BLENDED_PIPE 0xffffffff 91 92 static bool CalculateBytePerPixelAnd256BBlockSizes( 93 enum source_format_class SourcePixelFormat, 94 enum dm_swizzle_mode SurfaceTiling, 95 unsigned int *BytePerPixelY, 96 unsigned int *BytePerPixelC, 97 double *BytePerPixelDETY, 98 double *BytePerPixelDETC, 99 unsigned int *BlockHeight256BytesY, 100 unsigned int *BlockHeight256BytesC, 101 unsigned int *BlockWidth256BytesY, 102 unsigned int *BlockWidth256BytesC); 103 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib); 104 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib); 105 static unsigned int dscceComputeDelay( 106 unsigned int bpc, 107 double BPP, 108 unsigned int sliceWidth, 109 unsigned int numSlices, 110 enum output_format_class pixelFormat, 111 enum output_encoder_class Output); 112 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output); 113 static bool CalculatePrefetchSchedule( 114 struct display_mode_lib *mode_lib, 115 double HostVMInefficiencyFactor, 116 Pipe *myPipe, 117 unsigned int DSCDelay, 118 double DPPCLKDelaySubtotalPlusCNVCFormater, 119 double DPPCLKDelaySCL, 120 double DPPCLKDelaySCLLBOnly, 121 double DPPCLKDelayCNVCCursor, 122 double DISPCLKDelaySubtotal, 123 unsigned int DPP_RECOUT_WIDTH, 124 enum output_format_class OutputFormat, 125 unsigned int MaxInterDCNTileRepeaters, 126 unsigned int VStartup, 127 unsigned int MaxVStartup, 128 unsigned int GPUVMPageTableLevels, 129 bool GPUVMEnable, 130 bool HostVMEnable, 131 unsigned int HostVMMaxNonCachedPageTableLevels, 132 double HostVMMinPageSize, 133 bool DynamicMetadataEnable, 134 bool DynamicMetadataVMEnabled, 135 int DynamicMetadataLinesBeforeActiveRequired, 136 unsigned int DynamicMetadataTransmittedBytes, 137 double UrgentLatency, 138 double UrgentExtraLatency, 139 double TCalc, 140 unsigned int PDEAndMetaPTEBytesFrame, 141 unsigned int MetaRowByte, 142 unsigned int PixelPTEBytesPerRow, 143 double PrefetchSourceLinesY, 144 unsigned int SwathWidthY, 145 double VInitPreFillY, 146 unsigned int MaxNumSwathY, 147 double PrefetchSourceLinesC, 148 unsigned int SwathWidthC, 149 double VInitPreFillC, 150 unsigned int MaxNumSwathC, 151 int swath_width_luma_ub, 152 int swath_width_chroma_ub, 153 unsigned int SwathHeightY, 154 unsigned int SwathHeightC, 155 double TWait, 156 double *DSTXAfterScaler, 157 double *DSTYAfterScaler, 158 double *DestinationLinesForPrefetch, 159 double *PrefetchBandwidth, 160 double *DestinationLinesToRequestVMInVBlank, 161 double *DestinationLinesToRequestRowInVBlank, 162 double *VRatioPrefetchY, 163 double *VRatioPrefetchC, 164 double *RequiredPrefetchPixDataBWLuma, 165 double *RequiredPrefetchPixDataBWChroma, 166 bool *NotEnoughTimeForDynamicMetadata, 167 double *Tno_bw, 168 double *prefetch_vmrow_bw, 169 double *Tdmdl_vm, 170 double *Tdmdl, 171 double *TSetup, 172 int *VUpdateOffsetPix, 173 double *VUpdateWidthPix, 174 double *VReadyOffsetPix); 175 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed); 176 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed); 177 static void CalculateDCCConfiguration( 178 bool DCCEnabled, 179 bool DCCProgrammingAssumesScanDirectionUnknown, 180 enum source_format_class SourcePixelFormat, 181 unsigned int SurfaceWidthLuma, 182 unsigned int SurfaceWidthChroma, 183 unsigned int SurfaceHeightLuma, 184 unsigned int SurfaceHeightChroma, 185 double DETBufferSize, 186 unsigned int RequestHeight256ByteLuma, 187 unsigned int RequestHeight256ByteChroma, 188 enum dm_swizzle_mode TilingFormat, 189 unsigned int BytePerPixelY, 190 unsigned int BytePerPixelC, 191 double BytePerPixelDETY, 192 double BytePerPixelDETC, 193 enum scan_direction_class ScanOrientation, 194 unsigned int *MaxUncompressedBlockLuma, 195 unsigned int *MaxUncompressedBlockChroma, 196 unsigned int *MaxCompressedBlockLuma, 197 unsigned int *MaxCompressedBlockChroma, 198 unsigned int *IndependentBlockLuma, 199 unsigned int *IndependentBlockChroma); 200 static double CalculatePrefetchSourceLines( 201 struct display_mode_lib *mode_lib, 202 double VRatio, 203 double vtaps, 204 bool Interlace, 205 bool ProgressiveToInterlaceUnitInOPP, 206 unsigned int SwathHeight, 207 unsigned int ViewportYStart, 208 double *VInitPreFill, 209 unsigned int *MaxNumSwath); 210 static unsigned int CalculateVMAndRowBytes( 211 struct display_mode_lib *mode_lib, 212 bool DCCEnable, 213 unsigned int BlockHeight256Bytes, 214 unsigned int BlockWidth256Bytes, 215 enum source_format_class SourcePixelFormat, 216 unsigned int SurfaceTiling, 217 unsigned int BytePerPixel, 218 enum scan_direction_class ScanDirection, 219 unsigned int SwathWidth, 220 unsigned int ViewportHeight, 221 bool GPUVMEnable, 222 bool HostVMEnable, 223 unsigned int HostVMMaxNonCachedPageTableLevels, 224 unsigned int GPUVMMinPageSize, 225 unsigned int HostVMMinPageSize, 226 unsigned int PTEBufferSizeInRequests, 227 unsigned int Pitch, 228 unsigned int DCCMetaPitch, 229 unsigned int *MacroTileWidth, 230 unsigned int *MetaRowByte, 231 unsigned int *PixelPTEBytesPerRow, 232 bool *PTEBufferSizeNotExceeded, 233 int *dpte_row_width_ub, 234 unsigned int *dpte_row_height, 235 unsigned int *MetaRequestWidth, 236 unsigned int *MetaRequestHeight, 237 unsigned int *meta_row_width, 238 unsigned int *meta_row_height, 239 int *vm_group_bytes, 240 unsigned int *dpte_group_bytes, 241 unsigned int *PixelPTEReqWidth, 242 unsigned int *PixelPTEReqHeight, 243 unsigned int *PTERequestSize, 244 int *DPDE0BytesFrame, 245 int *MetaPTEBytesFrame); 246 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime); 247 static void CalculateRowBandwidth( 248 bool GPUVMEnable, 249 enum source_format_class SourcePixelFormat, 250 double VRatio, 251 double VRatioChroma, 252 bool DCCEnable, 253 double LineTime, 254 unsigned int MetaRowByteLuma, 255 unsigned int MetaRowByteChroma, 256 unsigned int meta_row_height_luma, 257 unsigned int meta_row_height_chroma, 258 unsigned int PixelPTEBytesPerRowLuma, 259 unsigned int PixelPTEBytesPerRowChroma, 260 unsigned int dpte_row_height_luma, 261 unsigned int dpte_row_height_chroma, 262 double *meta_row_bw, 263 double *dpte_row_bw); 264 265 static void CalculateFlipSchedule( 266 struct display_mode_lib *mode_lib, 267 unsigned int k, 268 double HostVMInefficiencyFactor, 269 double UrgentExtraLatency, 270 double UrgentLatency, 271 double PDEAndMetaPTEBytesPerFrame, 272 double MetaRowBytes, 273 double DPTEBytesPerRow); 274 static double CalculateWriteBackDelay( 275 enum source_format_class WritebackPixelFormat, 276 double WritebackHRatio, 277 double WritebackVRatio, 278 unsigned int WritebackVTaps, 279 int WritebackDestinationWidth, 280 int WritebackDestinationHeight, 281 int WritebackSourceHeight, 282 unsigned int HTotal); 283 284 static void CalculateVupdateAndDynamicMetadataParameters( 285 int MaxInterDCNTileRepeaters, 286 double DPPCLK, 287 double DISPCLK, 288 double DCFClkDeepSleep, 289 double PixelClock, 290 int HTotal, 291 int VBlank, 292 int DynamicMetadataTransmittedBytes, 293 int DynamicMetadataLinesBeforeActiveRequired, 294 int InterlaceEnable, 295 bool ProgressiveToInterlaceUnitInOPP, 296 double *TSetup, 297 double *Tdmbf, 298 double *Tdmec, 299 double *Tdmsks, 300 int *VUpdateOffsetPix, 301 double *VUpdateWidthPix, 302 double *VReadyOffsetPix); 303 304 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 305 struct display_mode_lib *mode_lib, 306 unsigned int PrefetchMode, 307 double DCFCLK, 308 double ReturnBW, 309 double UrgentLatency, 310 double ExtraLatency, 311 double SOCCLK, 312 double DCFCLKDeepSleep, 313 unsigned int DETBufferSizeY[], 314 unsigned int DETBufferSizeC[], 315 unsigned int SwathHeightY[], 316 unsigned int SwathHeightC[], 317 double SwathWidthY[], 318 double SwathWidthC[], 319 unsigned int DPPPerPlane[], 320 double BytePerPixelDETY[], 321 double BytePerPixelDETC[], 322 bool UnboundedRequestEnabled, 323 unsigned int CompressedBufferSizeInkByte, 324 enum clock_change_support *DRAMClockChangeSupport, 325 double *StutterExitWatermark, 326 double *StutterEnterPlusExitWatermark, 327 double *Z8StutterExitWatermark, 328 double *Z8StutterEnterPlusExitWatermark); 329 330 static void CalculateDCFCLKDeepSleep( 331 struct display_mode_lib *mode_lib, 332 unsigned int NumberOfActivePlanes, 333 int BytePerPixelY[], 334 int BytePerPixelC[], 335 double VRatio[], 336 double VRatioChroma[], 337 double SwathWidthY[], 338 double SwathWidthC[], 339 unsigned int DPPPerPlane[], 340 double HRatio[], 341 double HRatioChroma[], 342 double PixelClock[], 343 double PSCL_THROUGHPUT[], 344 double PSCL_THROUGHPUT_CHROMA[], 345 double DPPCLK[], 346 double ReadBandwidthLuma[], 347 double ReadBandwidthChroma[], 348 int ReturnBusWidth, 349 double *DCFCLKDeepSleep); 350 351 static void CalculateUrgentBurstFactor( 352 int swath_width_luma_ub, 353 int swath_width_chroma_ub, 354 unsigned int SwathHeightY, 355 unsigned int SwathHeightC, 356 double LineTime, 357 double UrgentLatency, 358 double CursorBufferSize, 359 unsigned int CursorWidth, 360 unsigned int CursorBPP, 361 double VRatio, 362 double VRatioC, 363 double BytePerPixelInDETY, 364 double BytePerPixelInDETC, 365 double DETBufferSizeY, 366 double DETBufferSizeC, 367 double *UrgentBurstFactorCursor, 368 double *UrgentBurstFactorLuma, 369 double *UrgentBurstFactorChroma, 370 bool *NotEnoughUrgentLatencyHiding); 371 372 static void UseMinimumDCFCLK( 373 struct display_mode_lib *mode_lib, 374 int MaxPrefetchMode, 375 int ReorderingBytes); 376 377 static void CalculatePixelDeliveryTimes( 378 unsigned int NumberOfActivePlanes, 379 double VRatio[], 380 double VRatioChroma[], 381 double VRatioPrefetchY[], 382 double VRatioPrefetchC[], 383 unsigned int swath_width_luma_ub[], 384 unsigned int swath_width_chroma_ub[], 385 unsigned int DPPPerPlane[], 386 double HRatio[], 387 double HRatioChroma[], 388 double PixelClock[], 389 double PSCL_THROUGHPUT[], 390 double PSCL_THROUGHPUT_CHROMA[], 391 double DPPCLK[], 392 int BytePerPixelC[], 393 enum scan_direction_class SourceScan[], 394 unsigned int NumberOfCursors[], 395 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 396 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 397 unsigned int BlockWidth256BytesY[], 398 unsigned int BlockHeight256BytesY[], 399 unsigned int BlockWidth256BytesC[], 400 unsigned int BlockHeight256BytesC[], 401 double DisplayPipeLineDeliveryTimeLuma[], 402 double DisplayPipeLineDeliveryTimeChroma[], 403 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 404 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 405 double DisplayPipeRequestDeliveryTimeLuma[], 406 double DisplayPipeRequestDeliveryTimeChroma[], 407 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 408 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 409 double CursorRequestDeliveryTime[], 410 double CursorRequestDeliveryTimePrefetch[]); 411 412 static void CalculateMetaAndPTETimes( 413 int NumberOfActivePlanes, 414 bool GPUVMEnable, 415 int MetaChunkSize, 416 int MinMetaChunkSizeBytes, 417 int HTotal[], 418 double VRatio[], 419 double VRatioChroma[], 420 double DestinationLinesToRequestRowInVBlank[], 421 double DestinationLinesToRequestRowInImmediateFlip[], 422 bool DCCEnable[], 423 double PixelClock[], 424 int BytePerPixelY[], 425 int BytePerPixelC[], 426 enum scan_direction_class SourceScan[], 427 int dpte_row_height[], 428 int dpte_row_height_chroma[], 429 int meta_row_width[], 430 int meta_row_width_chroma[], 431 int meta_row_height[], 432 int meta_row_height_chroma[], 433 int meta_req_width[], 434 int meta_req_width_chroma[], 435 int meta_req_height[], 436 int meta_req_height_chroma[], 437 int dpte_group_bytes[], 438 int PTERequestSizeY[], 439 int PTERequestSizeC[], 440 int PixelPTEReqWidthY[], 441 int PixelPTEReqHeightY[], 442 int PixelPTEReqWidthC[], 443 int PixelPTEReqHeightC[], 444 int dpte_row_width_luma_ub[], 445 int dpte_row_width_chroma_ub[], 446 double DST_Y_PER_PTE_ROW_NOM_L[], 447 double DST_Y_PER_PTE_ROW_NOM_C[], 448 double DST_Y_PER_META_ROW_NOM_L[], 449 double DST_Y_PER_META_ROW_NOM_C[], 450 double TimePerMetaChunkNominal[], 451 double TimePerChromaMetaChunkNominal[], 452 double TimePerMetaChunkVBlank[], 453 double TimePerChromaMetaChunkVBlank[], 454 double TimePerMetaChunkFlip[], 455 double TimePerChromaMetaChunkFlip[], 456 double time_per_pte_group_nom_luma[], 457 double time_per_pte_group_vblank_luma[], 458 double time_per_pte_group_flip_luma[], 459 double time_per_pte_group_nom_chroma[], 460 double time_per_pte_group_vblank_chroma[], 461 double time_per_pte_group_flip_chroma[]); 462 463 static void CalculateVMGroupAndRequestTimes( 464 unsigned int NumberOfActivePlanes, 465 bool GPUVMEnable, 466 unsigned int GPUVMMaxPageTableLevels, 467 unsigned int HTotal[], 468 int BytePerPixelC[], 469 double DestinationLinesToRequestVMInVBlank[], 470 double DestinationLinesToRequestVMInImmediateFlip[], 471 bool DCCEnable[], 472 double PixelClock[], 473 int dpte_row_width_luma_ub[], 474 int dpte_row_width_chroma_ub[], 475 int vm_group_bytes[], 476 unsigned int dpde0_bytes_per_frame_ub_l[], 477 unsigned int dpde0_bytes_per_frame_ub_c[], 478 int meta_pte_bytes_per_frame_ub_l[], 479 int meta_pte_bytes_per_frame_ub_c[], 480 double TimePerVMGroupVBlank[], 481 double TimePerVMGroupFlip[], 482 double TimePerVMRequestVBlank[], 483 double TimePerVMRequestFlip[]); 484 485 static void CalculateStutterEfficiency( 486 struct display_mode_lib *mode_lib, 487 int CompressedBufferSizeInkByte, 488 bool UnboundedRequestEnabled, 489 int ConfigReturnBufferSizeInKByte, 490 int MetaFIFOSizeInKEntries, 491 int ZeroSizeBufferEntries, 492 int NumberOfActivePlanes, 493 int ROBBufferSizeInKByte, 494 double TotalDataReadBandwidth, 495 double DCFCLK, 496 double ReturnBW, 497 double COMPBUF_RESERVED_SPACE_64B, 498 double COMPBUF_RESERVED_SPACE_ZS, 499 double SRExitTime, 500 double SRExitZ8Time, 501 bool SynchronizedVBlank, 502 double Z8StutterEnterPlusExitWatermark, 503 double StutterEnterPlusExitWatermark, 504 bool ProgressiveToInterlaceUnitInOPP, 505 bool Interlace[], 506 double MinTTUVBlank[], 507 int DPPPerPlane[], 508 unsigned int DETBufferSizeY[], 509 int BytePerPixelY[], 510 double BytePerPixelDETY[], 511 double SwathWidthY[], 512 int SwathHeightY[], 513 int SwathHeightC[], 514 double NetDCCRateLuma[], 515 double NetDCCRateChroma[], 516 double DCCFractionOfZeroSizeRequestsLuma[], 517 double DCCFractionOfZeroSizeRequestsChroma[], 518 int HTotal[], 519 int VTotal[], 520 double PixelClock[], 521 double VRatio[], 522 enum scan_direction_class SourceScan[], 523 int BlockHeight256BytesY[], 524 int BlockWidth256BytesY[], 525 int BlockHeight256BytesC[], 526 int BlockWidth256BytesC[], 527 int DCCYMaxUncompressedBlock[], 528 int DCCCMaxUncompressedBlock[], 529 int VActive[], 530 bool DCCEnable[], 531 bool WritebackEnable[], 532 double ReadBandwidthPlaneLuma[], 533 double ReadBandwidthPlaneChroma[], 534 double meta_row_bw[], 535 double dpte_row_bw[], 536 double *StutterEfficiencyNotIncludingVBlank, 537 double *StutterEfficiency, 538 int *NumberOfStutterBurstsPerFrame, 539 double *Z8StutterEfficiencyNotIncludingVBlank, 540 double *Z8StutterEfficiency, 541 int *Z8NumberOfStutterBurstsPerFrame, 542 double *StutterPeriod); 543 544 static void CalculateSwathAndDETConfiguration( 545 bool ForceSingleDPP, 546 int NumberOfActivePlanes, 547 unsigned int DETBufferSizeInKByte, 548 double MaximumSwathWidthLuma[], 549 double MaximumSwathWidthChroma[], 550 enum scan_direction_class SourceScan[], 551 enum source_format_class SourcePixelFormat[], 552 enum dm_swizzle_mode SurfaceTiling[], 553 int ViewportWidth[], 554 int ViewportHeight[], 555 int SurfaceWidthY[], 556 int SurfaceWidthC[], 557 int SurfaceHeightY[], 558 int SurfaceHeightC[], 559 int Read256BytesBlockHeightY[], 560 int Read256BytesBlockHeightC[], 561 int Read256BytesBlockWidthY[], 562 int Read256BytesBlockWidthC[], 563 enum odm_combine_mode ODMCombineEnabled[], 564 int BlendingAndTiming[], 565 int BytePerPixY[], 566 int BytePerPixC[], 567 double BytePerPixDETY[], 568 double BytePerPixDETC[], 569 int HActive[], 570 double HRatio[], 571 double HRatioChroma[], 572 int DPPPerPlane[], 573 int swath_width_luma_ub[], 574 int swath_width_chroma_ub[], 575 double SwathWidth[], 576 double SwathWidthChroma[], 577 int SwathHeightY[], 578 int SwathHeightC[], 579 unsigned int DETBufferSizeY[], 580 unsigned int DETBufferSizeC[], 581 bool ViewportSizeSupportPerPlane[], 582 bool *ViewportSizeSupport); 583 static void CalculateSwathWidth( 584 bool ForceSingleDPP, 585 int NumberOfActivePlanes, 586 enum source_format_class SourcePixelFormat[], 587 enum scan_direction_class SourceScan[], 588 int ViewportWidth[], 589 int ViewportHeight[], 590 int SurfaceWidthY[], 591 int SurfaceWidthC[], 592 int SurfaceHeightY[], 593 int SurfaceHeightC[], 594 enum odm_combine_mode ODMCombineEnabled[], 595 int BytePerPixY[], 596 int BytePerPixC[], 597 int Read256BytesBlockHeightY[], 598 int Read256BytesBlockHeightC[], 599 int Read256BytesBlockWidthY[], 600 int Read256BytesBlockWidthC[], 601 int BlendingAndTiming[], 602 int HActive[], 603 double HRatio[], 604 int DPPPerPlane[], 605 double SwathWidthSingleDPPY[], 606 double SwathWidthSingleDPPC[], 607 double SwathWidthY[], 608 double SwathWidthC[], 609 int MaximumSwathHeightY[], 610 int MaximumSwathHeightC[], 611 int swath_width_luma_ub[], 612 int swath_width_chroma_ub[]); 613 614 static double CalculateExtraLatency( 615 int RoundTripPingLatencyCycles, 616 int ReorderingBytes, 617 double DCFCLK, 618 int TotalNumberOfActiveDPP, 619 int PixelChunkSizeInKByte, 620 int TotalNumberOfDCCActiveDPP, 621 int MetaChunkSize, 622 double ReturnBW, 623 bool GPUVMEnable, 624 bool HostVMEnable, 625 int NumberOfActivePlanes, 626 int NumberOfDPP[], 627 int dpte_group_bytes[], 628 double HostVMInefficiencyFactor, 629 double HostVMMinPageSize, 630 int HostVMMaxNonCachedPageTableLevels); 631 632 static double CalculateExtraLatencyBytes( 633 int ReorderingBytes, 634 int TotalNumberOfActiveDPP, 635 int PixelChunkSizeInKByte, 636 int TotalNumberOfDCCActiveDPP, 637 int MetaChunkSize, 638 bool GPUVMEnable, 639 bool HostVMEnable, 640 int NumberOfActivePlanes, 641 int NumberOfDPP[], 642 int dpte_group_bytes[], 643 double HostVMInefficiencyFactor, 644 double HostVMMinPageSize, 645 int HostVMMaxNonCachedPageTableLevels); 646 647 static double CalculateUrgentLatency( 648 double UrgentLatencyPixelDataOnly, 649 double UrgentLatencyPixelMixedWithVMData, 650 double UrgentLatencyVMDataOnly, 651 bool DoUrgentLatencyAdjustment, 652 double UrgentLatencyAdjustmentFabricClockComponent, 653 double UrgentLatencyAdjustmentFabricClockReference, 654 double FabricClockSingle); 655 656 static void CalculateUnboundedRequestAndCompressedBufferSize( 657 unsigned int DETBufferSizeInKByte, 658 int ConfigReturnBufferSizeInKByte, 659 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 660 int TotalActiveDPP, 661 bool NoChromaPlanes, 662 int MaxNumDPP, 663 int CompressedBufferSegmentSizeInkByteFinal, 664 enum output_encoder_class *Output, 665 bool *UnboundedRequestEnabled, 666 int *CompressedBufferSizeInkByte); 667 668 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output); 669 static unsigned int CalculateMaxVStartup( 670 unsigned int VTotal, 671 unsigned int VActive, 672 unsigned int VBlankNom, 673 unsigned int HTotal, 674 double PixelClock, 675 bool ProgressiveTointerlaceUnitinOPP, 676 bool Interlace, 677 unsigned int VBlankNomDefaultUS, 678 double WritebackDelayTime); 679 680 void dml314_recalculate(struct display_mode_lib *mode_lib) 681 { 682 ModeSupportAndSystemConfiguration(mode_lib); 683 PixelClockAdjustmentForProgressiveToInterlaceUnit(mode_lib); 684 DisplayPipeConfiguration(mode_lib); 685 #ifdef __DML_VBA_DEBUG__ 686 dml_print("DML::%s: Calling DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation\n", __func__); 687 #endif 688 DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(mode_lib); 689 } 690 691 static unsigned int dscceComputeDelay( 692 unsigned int bpc, 693 double BPP, 694 unsigned int sliceWidth, 695 unsigned int numSlices, 696 enum output_format_class pixelFormat, 697 enum output_encoder_class Output) 698 { 699 // valid bpc = source bits per component in the set of {8, 10, 12} 700 // valid bpp = increments of 1/16 of a bit 701 // min = 6/7/8 in N420/N422/444, respectively 702 // max = such that compression is 1:1 703 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode) 704 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4} 705 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420} 706 707 // fixed value 708 unsigned int rcModelSize = 8192; 709 710 // N422/N420 operate at 2 pixels per clock 711 unsigned int pixelsPerClock = 0, lstall, D, initalXmitDelay, w, s, ix, wx, P, l0, a, ax, L, Delay, pixels; 712 713 if (pixelFormat == dm_420) 714 pixelsPerClock = 2; 715 else if (pixelFormat == dm_444) 716 pixelsPerClock = 1; 717 else if (pixelFormat == dm_n422) 718 pixelsPerClock = 2; 719 // #all other modes operate at 1 pixel per clock 720 else 721 pixelsPerClock = 1; 722 723 //initial transmit delay as per PPS 724 initalXmitDelay = dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock); 725 726 //compute ssm delay 727 if (bpc == 8) 728 D = 81; 729 else if (bpc == 10) 730 D = 89; 731 else 732 D = 113; 733 734 //divide by pixel per cycle to compute slice width as seen by DSC 735 w = sliceWidth / pixelsPerClock; 736 737 //422 mode has an additional cycle of delay 738 if (pixelFormat == dm_420 || pixelFormat == dm_444 || pixelFormat == dm_n422) 739 s = 0; 740 else 741 s = 1; 742 743 //main calculation for the dscce 744 ix = initalXmitDelay + 45; 745 wx = (w + 2) / 3; 746 P = 3 * wx - w; 747 l0 = ix / w; 748 a = ix + P * l0; 749 ax = (a + 2) / 3 + D + 6 + 1; 750 L = (ax + wx - 1) / wx; 751 if ((ix % w) == 0 && P != 0) 752 lstall = 1; 753 else 754 lstall = 0; 755 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22; 756 757 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels 758 pixels = Delay * 3 * pixelsPerClock; 759 return pixels; 760 } 761 762 static unsigned int dscComputeDelay(enum output_format_class pixelFormat, enum output_encoder_class Output) 763 { 764 unsigned int Delay = 0; 765 766 if (pixelFormat == dm_420) { 767 // sfr 768 Delay = Delay + 2; 769 // dsccif 770 Delay = Delay + 0; 771 // dscc - input deserializer 772 Delay = Delay + 3; 773 // dscc gets pixels every other cycle 774 Delay = Delay + 2; 775 // dscc - input cdc fifo 776 Delay = Delay + 12; 777 // dscc gets pixels every other cycle 778 Delay = Delay + 13; 779 // dscc - cdc uncertainty 780 Delay = Delay + 2; 781 // dscc - output cdc fifo 782 Delay = Delay + 7; 783 // dscc gets pixels every other cycle 784 Delay = Delay + 3; 785 // dscc - cdc uncertainty 786 Delay = Delay + 2; 787 // dscc - output serializer 788 Delay = Delay + 1; 789 // sft 790 Delay = Delay + 1; 791 } else if (pixelFormat == dm_n422) { 792 // sfr 793 Delay = Delay + 2; 794 // dsccif 795 Delay = Delay + 1; 796 // dscc - input deserializer 797 Delay = Delay + 5; 798 // dscc - input cdc fifo 799 Delay = Delay + 25; 800 // dscc - cdc uncertainty 801 Delay = Delay + 2; 802 // dscc - output cdc fifo 803 Delay = Delay + 10; 804 // dscc - cdc uncertainty 805 Delay = Delay + 2; 806 // dscc - output serializer 807 Delay = Delay + 1; 808 // sft 809 Delay = Delay + 1; 810 } else { 811 // sfr 812 Delay = Delay + 2; 813 // dsccif 814 Delay = Delay + 0; 815 // dscc - input deserializer 816 Delay = Delay + 3; 817 // dscc - input cdc fifo 818 Delay = Delay + 12; 819 // dscc - cdc uncertainty 820 Delay = Delay + 2; 821 // dscc - output cdc fifo 822 Delay = Delay + 7; 823 // dscc - output serializer 824 Delay = Delay + 1; 825 // dscc - cdc uncertainty 826 Delay = Delay + 2; 827 // sft 828 Delay = Delay + 1; 829 } 830 831 return Delay; 832 } 833 834 static bool CalculatePrefetchSchedule( 835 struct display_mode_lib *mode_lib, 836 double HostVMInefficiencyFactor, 837 Pipe *myPipe, 838 unsigned int DSCDelay, 839 double DPPCLKDelaySubtotalPlusCNVCFormater, 840 double DPPCLKDelaySCL, 841 double DPPCLKDelaySCLLBOnly, 842 double DPPCLKDelayCNVCCursor, 843 double DISPCLKDelaySubtotal, 844 unsigned int DPP_RECOUT_WIDTH, 845 enum output_format_class OutputFormat, 846 unsigned int MaxInterDCNTileRepeaters, 847 unsigned int VStartup, 848 unsigned int MaxVStartup, 849 unsigned int GPUVMPageTableLevels, 850 bool GPUVMEnable, 851 bool HostVMEnable, 852 unsigned int HostVMMaxNonCachedPageTableLevels, 853 double HostVMMinPageSize, 854 bool DynamicMetadataEnable, 855 bool DynamicMetadataVMEnabled, 856 int DynamicMetadataLinesBeforeActiveRequired, 857 unsigned int DynamicMetadataTransmittedBytes, 858 double UrgentLatency, 859 double UrgentExtraLatency, 860 double TCalc, 861 unsigned int PDEAndMetaPTEBytesFrame, 862 unsigned int MetaRowByte, 863 unsigned int PixelPTEBytesPerRow, 864 double PrefetchSourceLinesY, 865 unsigned int SwathWidthY, 866 double VInitPreFillY, 867 unsigned int MaxNumSwathY, 868 double PrefetchSourceLinesC, 869 unsigned int SwathWidthC, 870 double VInitPreFillC, 871 unsigned int MaxNumSwathC, 872 int swath_width_luma_ub, 873 int swath_width_chroma_ub, 874 unsigned int SwathHeightY, 875 unsigned int SwathHeightC, 876 double TWait, 877 double *DSTXAfterScaler, 878 double *DSTYAfterScaler, 879 double *DestinationLinesForPrefetch, 880 double *PrefetchBandwidth, 881 double *DestinationLinesToRequestVMInVBlank, 882 double *DestinationLinesToRequestRowInVBlank, 883 double *VRatioPrefetchY, 884 double *VRatioPrefetchC, 885 double *RequiredPrefetchPixDataBWLuma, 886 double *RequiredPrefetchPixDataBWChroma, 887 bool *NotEnoughTimeForDynamicMetadata, 888 double *Tno_bw, 889 double *prefetch_vmrow_bw, 890 double *Tdmdl_vm, 891 double *Tdmdl, 892 double *TSetup, 893 int *VUpdateOffsetPix, 894 double *VUpdateWidthPix, 895 double *VReadyOffsetPix) 896 { 897 bool MyError = false; 898 unsigned int DPPCycles, DISPCLKCycles; 899 double DSTTotalPixelsAfterScaler; 900 double LineTime; 901 double dst_y_prefetch_equ; 902 #ifdef __DML_VBA_DEBUG__ 903 double Tsw_oto; 904 #endif 905 double prefetch_bw_oto; 906 double prefetch_bw_pr; 907 double Tvm_oto; 908 double Tr0_oto; 909 double Tvm_oto_lines; 910 double Tr0_oto_lines; 911 double dst_y_prefetch_oto; 912 double TimeForFetchingMetaPTE = 0; 913 double TimeForFetchingRowInVBlank = 0; 914 double LinesToRequestPrefetchPixelData = 0; 915 unsigned int HostVMDynamicLevelsTrips; 916 double trip_to_mem; 917 double Tvm_trips; 918 double Tr0_trips; 919 double Tvm_trips_rounded; 920 double Tr0_trips_rounded; 921 double Lsw_oto; 922 double Tpre_rounded; 923 double prefetch_bw_equ; 924 double Tvm_equ; 925 double Tr0_equ; 926 double Tdmbf; 927 double Tdmec; 928 double Tdmsks; 929 double prefetch_sw_bytes; 930 double bytes_pp; 931 double dep_bytes; 932 int max_vratio_pre = 4; 933 double min_Lsw; 934 double Tsw_est1 = 0; 935 double Tsw_est3 = 0; 936 double max_Tsw = 0; 937 938 if (GPUVMEnable == true && HostVMEnable == true) { 939 HostVMDynamicLevelsTrips = HostVMMaxNonCachedPageTableLevels; 940 } else { 941 HostVMDynamicLevelsTrips = 0; 942 } 943 #ifdef __DML_VBA_DEBUG__ 944 dml_print("DML::%s: GPUVMEnable=%d HostVMEnable=%d HostVMInefficiencyFactor=%f\n", __func__, GPUVMEnable, HostVMEnable, HostVMInefficiencyFactor); 945 #endif 946 CalculateVupdateAndDynamicMetadataParameters( 947 MaxInterDCNTileRepeaters, 948 myPipe->DPPCLK, 949 myPipe->DISPCLK, 950 myPipe->DCFCLKDeepSleep, 951 myPipe->PixelClock, 952 myPipe->HTotal, 953 myPipe->VBlank, 954 DynamicMetadataTransmittedBytes, 955 DynamicMetadataLinesBeforeActiveRequired, 956 myPipe->InterlaceEnable, 957 myPipe->ProgressiveToInterlaceUnitInOPP, 958 TSetup, 959 &Tdmbf, 960 &Tdmec, 961 &Tdmsks, 962 VUpdateOffsetPix, 963 VUpdateWidthPix, 964 VReadyOffsetPix); 965 966 LineTime = myPipe->HTotal / myPipe->PixelClock; 967 trip_to_mem = UrgentLatency; 968 Tvm_trips = UrgentExtraLatency + trip_to_mem * (GPUVMPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1); 969 970 #ifdef __DML_VBA_ALLOW_DELTA__ 971 if (DynamicMetadataVMEnabled == true && GPUVMEnable == true) { 972 #else 973 if (DynamicMetadataVMEnabled == true) { 974 #endif 975 *Tdmdl = TWait + Tvm_trips + trip_to_mem; 976 } else { 977 *Tdmdl = TWait + UrgentExtraLatency; 978 } 979 980 #ifdef __DML_VBA_ALLOW_DELTA__ 981 if (DynamicMetadataEnable == false) { 982 *Tdmdl = 0.0; 983 } 984 #endif 985 986 if (DynamicMetadataEnable == true) { 987 if (VStartup * LineTime < *TSetup + *Tdmdl + Tdmbf + Tdmec + Tdmsks) { 988 *NotEnoughTimeForDynamicMetadata = true; 989 dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__); 990 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, Tdmbf); 991 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, Tdmec); 992 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, Tdmsks); 993 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", __func__, *Tdmdl); 994 } else { 995 *NotEnoughTimeForDynamicMetadata = false; 996 } 997 } else { 998 *NotEnoughTimeForDynamicMetadata = false; 999 } 1000 1001 *Tdmdl_vm = (DynamicMetadataEnable == true && DynamicMetadataVMEnabled == true && GPUVMEnable == true ? TWait + Tvm_trips : 0); 1002 1003 if (myPipe->ScalerEnabled) 1004 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCL; 1005 else 1006 DPPCycles = DPPCLKDelaySubtotalPlusCNVCFormater + DPPCLKDelaySCLLBOnly; 1007 1008 DPPCycles = DPPCycles + myPipe->NumberOfCursors * DPPCLKDelayCNVCCursor; 1009 1010 DISPCLKCycles = DISPCLKDelaySubtotal; 1011 1012 if (myPipe->DPPCLK == 0.0 || myPipe->DISPCLK == 0.0) 1013 return true; 1014 1015 *DSTXAfterScaler = DPPCycles * myPipe->PixelClock / myPipe->DPPCLK + DISPCLKCycles * myPipe->PixelClock / myPipe->DISPCLK + DSCDelay; 1016 1017 #ifdef __DML_VBA_DEBUG__ 1018 dml_print("DML::%s: DPPCycles: %d\n", __func__, DPPCycles); 1019 dml_print("DML::%s: PixelClock: %f\n", __func__, myPipe->PixelClock); 1020 dml_print("DML::%s: DPPCLK: %f\n", __func__, myPipe->DPPCLK); 1021 dml_print("DML::%s: DISPCLKCycles: %d\n", __func__, DISPCLKCycles); 1022 dml_print("DML::%s: DISPCLK: %f\n", __func__, myPipe->DISPCLK); 1023 dml_print("DML::%s: DSCDelay: %d\n", __func__, DSCDelay); 1024 dml_print("DML::%s: DSTXAfterScaler: %d\n", __func__, *DSTXAfterScaler); 1025 dml_print("DML::%s: ODMCombineIsEnabled: %d\n", __func__, myPipe->ODMCombineIsEnabled); 1026 #endif 1027 1028 *DSTXAfterScaler = *DSTXAfterScaler + ((myPipe->ODMCombineIsEnabled) ? 18 : 0) + (myPipe->DPPPerPlane - 1) * DPP_RECOUT_WIDTH; 1029 1030 if (OutputFormat == dm_420 || (myPipe->InterlaceEnable && myPipe->ProgressiveToInterlaceUnitInOPP)) 1031 *DSTYAfterScaler = 1; 1032 else 1033 *DSTYAfterScaler = 0; 1034 1035 DSTTotalPixelsAfterScaler = *DSTYAfterScaler * myPipe->HTotal + *DSTXAfterScaler; 1036 *DSTYAfterScaler = dml_floor(DSTTotalPixelsAfterScaler / myPipe->HTotal, 1); 1037 *DSTXAfterScaler = DSTTotalPixelsAfterScaler - ((double) (*DSTYAfterScaler * myPipe->HTotal)); 1038 1039 #ifdef __DML_VBA_DEBUG__ 1040 dml_print("DML::%s: DSTXAfterScaler: %d (final)\n", __func__, *DSTXAfterScaler); 1041 #endif 1042 1043 MyError = false; 1044 1045 Tr0_trips = trip_to_mem * (HostVMDynamicLevelsTrips + 1); 1046 Tvm_trips_rounded = dml_ceil(4.0 * Tvm_trips / LineTime, 1) / 4 * LineTime; 1047 Tr0_trips_rounded = dml_ceil(4.0 * Tr0_trips / LineTime, 1) / 4 * LineTime; 1048 1049 #ifdef __DML_VBA_ALLOW_DELTA__ 1050 if (!myPipe->DCCEnable) { 1051 Tr0_trips = 0.0; 1052 Tr0_trips_rounded = 0.0; 1053 } 1054 #endif 1055 1056 if (!GPUVMEnable) { 1057 Tvm_trips = 0.0; 1058 Tvm_trips_rounded = 0.0; 1059 } 1060 1061 if (GPUVMEnable) { 1062 if (GPUVMPageTableLevels >= 3) { 1063 *Tno_bw = UrgentExtraLatency + trip_to_mem * ((GPUVMPageTableLevels - 2) - 1); 1064 } else { 1065 *Tno_bw = 0; 1066 } 1067 } else if (!myPipe->DCCEnable) { 1068 *Tno_bw = LineTime; 1069 } else { 1070 *Tno_bw = LineTime / 4; 1071 } 1072 1073 if (myPipe->SourcePixelFormat == dm_420_8 || myPipe->SourcePixelFormat == dm_420_10 || myPipe->SourcePixelFormat == dm_420_12) 1074 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC / 4; 1075 else 1076 bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC; 1077 /*rev 99*/ 1078 prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane; 1079 prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr; 1080 max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime; 1081 prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC; 1082 prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw); 1083 1084 min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre); 1085 Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4; 1086 #ifdef __DML_VBA_DEBUG__ 1087 Tsw_oto = Lsw_oto * LineTime; 1088 #endif 1089 1090 1091 #ifdef __DML_VBA_DEBUG__ 1092 dml_print("DML: HTotal: %d\n", myPipe->HTotal); 1093 dml_print("DML: prefetch_bw_oto: %f\n", prefetch_bw_oto); 1094 dml_print("DML: PrefetchSourceLinesY: %f\n", PrefetchSourceLinesY); 1095 dml_print("DML: swath_width_luma_ub: %d\n", swath_width_luma_ub); 1096 dml_print("DML: BytePerPixelY: %d\n", myPipe->BytePerPixelY); 1097 dml_print("DML: Tsw_oto: %f\n", Tsw_oto); 1098 #endif 1099 1100 if (GPUVMEnable == true) 1101 Tvm_oto = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_oto, Tvm_trips, LineTime / 4.0); 1102 else 1103 Tvm_oto = LineTime / 4.0; 1104 1105 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1106 Tr0_oto = dml_max4((MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_oto, Tr0_trips, // PREVIOUS_ERROR (missing this term) 1107 LineTime - Tvm_oto, 1108 LineTime / 4); 1109 } else { 1110 Tr0_oto = (LineTime - Tvm_oto) / 2.0; 1111 } 1112 1113 #ifdef __DML_VBA_DEBUG__ 1114 dml_print("DML::%s: Tvm_trips = %f\n", __func__, Tvm_trips); 1115 dml_print("DML::%s: Tr0_trips = %f\n", __func__, Tr0_trips); 1116 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, MetaRowByte); 1117 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1118 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1119 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1120 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, prefetch_bw_oto); 1121 dml_print("DML::%s: Tr0_oto = %f\n", __func__, Tr0_oto); 1122 dml_print("DML::%s: Tvm_oto = %f\n", __func__, Tvm_oto); 1123 #endif 1124 1125 Tvm_oto_lines = dml_ceil(4.0 * Tvm_oto / LineTime, 1) / 4.0; 1126 Tr0_oto_lines = dml_ceil(4.0 * Tr0_oto / LineTime, 1) / 4.0; 1127 dst_y_prefetch_oto = Tvm_oto_lines + 2 * Tr0_oto_lines + Lsw_oto; 1128 dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime - (*DSTYAfterScaler + *DSTXAfterScaler / myPipe->HTotal); 1129 dst_y_prefetch_equ = dml_floor(4.0 * (dst_y_prefetch_equ + 0.125), 1) / 4.0; 1130 Tpre_rounded = dst_y_prefetch_equ * LineTime; 1131 1132 dep_bytes = dml_max(PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor, MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor); 1133 1134 if (prefetch_sw_bytes < dep_bytes) 1135 prefetch_sw_bytes = 2 * dep_bytes; 1136 1137 dml_print("DML: dst_y_prefetch_oto: %f\n", dst_y_prefetch_oto); 1138 dml_print("DML: Tvm_oto_lines: %f\n", Tvm_oto_lines); 1139 dml_print("DML: Tr0_oto_lines: %f\n", Tr0_oto_lines); 1140 dml_print("DML: Lsw_oto: %f\n", Lsw_oto); 1141 dml_print("DML: LineTime: %f\n", LineTime); 1142 dml_print("DML: dst_y_prefetch_equ: %f (after round)\n", dst_y_prefetch_equ); 1143 1144 dml_print("DML: LineTime: %f\n", LineTime); 1145 dml_print("DML: VStartup: %d\n", VStartup); 1146 dml_print("DML: Tvstartup: %fus - time between vstartup and first pixel of active\n", VStartup * LineTime); 1147 dml_print("DML: TSetup: %fus - time from vstartup to vready\n", *TSetup); 1148 dml_print("DML: TCalc: %fus - time for calculations in dchub starting at vready\n", TCalc); 1149 dml_print("DML: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", TWait); 1150 dml_print("DML: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", Tdmbf); 1151 dml_print("DML: Tdmec: %fus - time dio takes to transfer dmd\n", Tdmec); 1152 dml_print("DML: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", Tdmsks); 1153 dml_print("DML: Tdmdl_vm: %fus - time for vm stages of dmd\n", *Tdmdl_vm); 1154 dml_print("DML: Tdmdl: %fus - time for fabric to become ready and fetch dmd\n", *Tdmdl); 1155 dml_print("DML: DSTXAfterScaler: %f pixels - number of pixel clocks pipeline and buffer delay after scaler\n", *DSTXAfterScaler); 1156 dml_print("DML: DSTYAfterScaler: %f lines - number of lines of pipeline and buffer delay after scaler\n", *DSTYAfterScaler); 1157 1158 *PrefetchBandwidth = 0; 1159 *DestinationLinesToRequestVMInVBlank = 0; 1160 *DestinationLinesToRequestRowInVBlank = 0; 1161 *VRatioPrefetchY = 0; 1162 *VRatioPrefetchC = 0; 1163 *RequiredPrefetchPixDataBWLuma = 0; 1164 if (dst_y_prefetch_equ > 1) { 1165 double PrefetchBandwidth1; 1166 double PrefetchBandwidth2; 1167 double PrefetchBandwidth3; 1168 double PrefetchBandwidth4; 1169 1170 if (Tpre_rounded - *Tno_bw > 0) { 1171 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1172 + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw); 1173 Tsw_est1 = prefetch_sw_bytes / PrefetchBandwidth1; 1174 } else { 1175 PrefetchBandwidth1 = 0; 1176 } 1177 1178 if (VStartup == MaxVStartup && Tsw_est1 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw > 0) { 1179 PrefetchBandwidth1 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + 2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1180 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - *Tno_bw); 1181 } 1182 1183 if (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded > 0) 1184 PrefetchBandwidth2 = (PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor + prefetch_sw_bytes) / (Tpre_rounded - *Tno_bw - 2 * Tr0_trips_rounded); 1185 else 1186 PrefetchBandwidth2 = 0; 1187 1188 if (Tpre_rounded - Tvm_trips_rounded > 0) { 1189 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor 1190 + prefetch_sw_bytes) / (Tpre_rounded - Tvm_trips_rounded); 1191 Tsw_est3 = prefetch_sw_bytes / PrefetchBandwidth3; 1192 } else { 1193 PrefetchBandwidth3 = 0; 1194 } 1195 1196 #ifdef __DML_VBA_DEBUG__ 1197 dml_print("DML::%s: Tpre_rounded: %f\n", __func__, Tpre_rounded); 1198 dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, Tvm_trips_rounded); 1199 dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, PrefetchBandwidth3); 1200 #endif 1201 if (VStartup == MaxVStartup && Tsw_est3 / LineTime < min_Lsw && Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded > 0) { 1202 PrefetchBandwidth3 = (2 * MetaRowByte + 2 * PixelPTEBytesPerRow * HostVMInefficiencyFactor) 1203 / (Tpre_rounded - min_Lsw * LineTime - 0.75 * LineTime - Tvm_trips_rounded); 1204 } 1205 1206 if (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded > 0) 1207 PrefetchBandwidth4 = prefetch_sw_bytes / (Tpre_rounded - Tvm_trips_rounded - 2 * Tr0_trips_rounded); 1208 else 1209 PrefetchBandwidth4 = 0; 1210 1211 { 1212 bool Case1OK; 1213 bool Case2OK; 1214 bool Case3OK; 1215 1216 if (PrefetchBandwidth1 > 0) { 1217 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth1 >= Tvm_trips_rounded 1218 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth1 >= Tr0_trips_rounded) { 1219 Case1OK = true; 1220 } else { 1221 Case1OK = false; 1222 } 1223 } else { 1224 Case1OK = false; 1225 } 1226 1227 if (PrefetchBandwidth2 > 0) { 1228 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth2 >= Tvm_trips_rounded 1229 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth2 < Tr0_trips_rounded) { 1230 Case2OK = true; 1231 } else { 1232 Case2OK = false; 1233 } 1234 } else { 1235 Case2OK = false; 1236 } 1237 1238 if (PrefetchBandwidth3 > 0) { 1239 if (*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / PrefetchBandwidth3 < Tvm_trips_rounded 1240 && (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / PrefetchBandwidth3 >= Tr0_trips_rounded) { 1241 Case3OK = true; 1242 } else { 1243 Case3OK = false; 1244 } 1245 } else { 1246 Case3OK = false; 1247 } 1248 1249 if (Case1OK) { 1250 prefetch_bw_equ = PrefetchBandwidth1; 1251 } else if (Case2OK) { 1252 prefetch_bw_equ = PrefetchBandwidth2; 1253 } else if (Case3OK) { 1254 prefetch_bw_equ = PrefetchBandwidth3; 1255 } else { 1256 prefetch_bw_equ = PrefetchBandwidth4; 1257 } 1258 1259 #ifdef __DML_VBA_DEBUG__ 1260 dml_print("DML::%s: Case1OK: %d\n", __func__, Case1OK); 1261 dml_print("DML::%s: Case2OK: %d\n", __func__, Case2OK); 1262 dml_print("DML::%s: Case3OK: %d\n", __func__, Case3OK); 1263 dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, prefetch_bw_equ); 1264 #endif 1265 1266 if (prefetch_bw_equ > 0) { 1267 if (GPUVMEnable == true) { 1268 Tvm_equ = dml_max3(*Tno_bw + PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / prefetch_bw_equ, Tvm_trips, LineTime / 4); 1269 } else { 1270 Tvm_equ = LineTime / 4; 1271 } 1272 1273 if ((GPUVMEnable == true || myPipe->DCCEnable == true)) { 1274 Tr0_equ = dml_max4( 1275 (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / prefetch_bw_equ, 1276 Tr0_trips, 1277 (LineTime - Tvm_equ) / 2, 1278 LineTime / 4); 1279 } else { 1280 Tr0_equ = (LineTime - Tvm_equ) / 2; 1281 } 1282 } else { 1283 Tvm_equ = 0; 1284 Tr0_equ = 0; 1285 dml_print("DML: prefetch_bw_equ equals 0! %s:%d\n", __FILE__, __LINE__); 1286 } 1287 } 1288 1289 if (dst_y_prefetch_oto < dst_y_prefetch_equ) { 1290 *DestinationLinesForPrefetch = dst_y_prefetch_oto; 1291 TimeForFetchingMetaPTE = Tvm_oto; 1292 TimeForFetchingRowInVBlank = Tr0_oto; 1293 *PrefetchBandwidth = prefetch_bw_oto; 1294 } else { 1295 *DestinationLinesForPrefetch = dst_y_prefetch_equ; 1296 TimeForFetchingMetaPTE = Tvm_equ; 1297 TimeForFetchingRowInVBlank = Tr0_equ; 1298 *PrefetchBandwidth = prefetch_bw_equ; 1299 } 1300 1301 *DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * TimeForFetchingMetaPTE / LineTime, 1.0) / 4.0; 1302 1303 *DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * TimeForFetchingRowInVBlank / LineTime, 1.0) / 4.0; 1304 1305 #ifdef __DML_VBA_ALLOW_DELTA__ 1306 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch 1307 // See note above dated 5/30/2018 1308 // - ((NumberOfCursors > 0 || GPUVMEnable || DCCEnable) ? 1309 - ((GPUVMEnable || myPipe->DCCEnable) ? (*DestinationLinesToRequestVMInVBlank + 2 * *DestinationLinesToRequestRowInVBlank) : 0.0); // TODO: Did someone else add this?? 1310 #else 1311 LinesToRequestPrefetchPixelData = *DestinationLinesForPrefetch - *DestinationLinesToRequestVMInVBlank - 2 * *DestinationLinesToRequestRowInVBlank; 1312 #endif 1313 1314 #ifdef __DML_VBA_DEBUG__ 1315 dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *DestinationLinesForPrefetch); 1316 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1317 dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, TimeForFetchingRowInVBlank); 1318 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1319 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1320 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1321 dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, LinesToRequestPrefetchPixelData); 1322 #endif 1323 1324 if (LinesToRequestPrefetchPixelData > 0 && prefetch_bw_equ > 0) { 1325 1326 *VRatioPrefetchY = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData; 1327 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1328 #ifdef __DML_VBA_DEBUG__ 1329 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1330 dml_print("DML::%s: SwathHeightY = %d\n", __func__, SwathHeightY); 1331 dml_print("DML::%s: VInitPreFillY = %f\n", __func__, VInitPreFillY); 1332 #endif 1333 if ((SwathHeightY > 4) && (VInitPreFillY > 3)) { 1334 if (LinesToRequestPrefetchPixelData > (VInitPreFillY - 3.0) / 2.0) { 1335 *VRatioPrefetchY = dml_max( 1336 (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData, 1337 (double) MaxNumSwathY * SwathHeightY / (LinesToRequestPrefetchPixelData - (VInitPreFillY - 3.0) / 2.0)); 1338 *VRatioPrefetchY = dml_max(*VRatioPrefetchY, 1.0); 1339 } else { 1340 MyError = true; 1341 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1342 *VRatioPrefetchY = 0; 1343 } 1344 #ifdef __DML_VBA_DEBUG__ 1345 dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *VRatioPrefetchY); 1346 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, PrefetchSourceLinesY); 1347 dml_print("DML::%s: MaxNumSwathY = %d\n", __func__, MaxNumSwathY); 1348 #endif 1349 } 1350 1351 *VRatioPrefetchC = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData; 1352 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1353 1354 #ifdef __DML_VBA_DEBUG__ 1355 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1356 dml_print("DML::%s: SwathHeightC = %d\n", __func__, SwathHeightC); 1357 dml_print("DML::%s: VInitPreFillC = %f\n", __func__, VInitPreFillC); 1358 #endif 1359 if ((SwathHeightC > 4) || VInitPreFillC > 3) { 1360 if (LinesToRequestPrefetchPixelData > (VInitPreFillC - 3.0) / 2.0) { 1361 *VRatioPrefetchC = dml_max( 1362 *VRatioPrefetchC, 1363 (double) MaxNumSwathC * SwathHeightC / (LinesToRequestPrefetchPixelData - (VInitPreFillC - 3.0) / 2.0)); 1364 *VRatioPrefetchC = dml_max(*VRatioPrefetchC, 1.0); 1365 } else { 1366 MyError = true; 1367 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1368 *VRatioPrefetchC = 0; 1369 } 1370 #ifdef __DML_VBA_DEBUG__ 1371 dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *VRatioPrefetchC); 1372 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, PrefetchSourceLinesC); 1373 dml_print("DML::%s: MaxNumSwathC = %d\n", __func__, MaxNumSwathC); 1374 #endif 1375 } 1376 1377 #ifdef __DML_VBA_DEBUG__ 1378 dml_print("DML::%s: BytePerPixelY = %d\n", __func__, myPipe->BytePerPixelY); 1379 dml_print("DML::%s: swath_width_luma_ub = %d\n", __func__, swath_width_luma_ub); 1380 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1381 #endif 1382 1383 *RequiredPrefetchPixDataBWLuma = (double) PrefetchSourceLinesY / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelY * swath_width_luma_ub / LineTime; 1384 1385 #ifdef __DML_VBA_DEBUG__ 1386 dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *RequiredPrefetchPixDataBWLuma); 1387 #endif 1388 1389 *RequiredPrefetchPixDataBWChroma = (double) PrefetchSourceLinesC / LinesToRequestPrefetchPixelData * myPipe->BytePerPixelC * swath_width_chroma_ub 1390 / LineTime; 1391 } else { 1392 MyError = true; 1393 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1394 dml_print("DML: LinesToRequestPrefetchPixelData: %f, should be > 0\n", LinesToRequestPrefetchPixelData); 1395 *VRatioPrefetchY = 0; 1396 *VRatioPrefetchC = 0; 1397 *RequiredPrefetchPixDataBWLuma = 0; 1398 *RequiredPrefetchPixDataBWChroma = 0; 1399 } 1400 1401 dml_print( 1402 "DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", 1403 (double) LinesToRequestPrefetchPixelData * LineTime + 2.0 * TimeForFetchingRowInVBlank + TimeForFetchingMetaPTE); 1404 dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", TimeForFetchingMetaPTE); 1405 dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", TimeForFetchingRowInVBlank); 1406 dml_print( 1407 "DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", 1408 (double) LinesToRequestPrefetchPixelData * LineTime); 1409 dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime); 1410 dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n"); 1411 dml_print( 1412 "DML: Tslack(pre): %fus - time left over in schedule\n", 1413 VStartup * LineTime - TimeForFetchingMetaPTE - 2 * TimeForFetchingRowInVBlank 1414 - (*DSTYAfterScaler + ((double) (*DSTXAfterScaler) / (double) myPipe->HTotal)) * LineTime - TWait - TCalc - *TSetup); 1415 dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %d\n", PixelPTEBytesPerRow); 1416 1417 } else { 1418 MyError = true; 1419 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1420 } 1421 1422 { 1423 double prefetch_vm_bw; 1424 double prefetch_row_bw; 1425 1426 if (PDEAndMetaPTEBytesFrame == 0) { 1427 prefetch_vm_bw = 0; 1428 } else if (*DestinationLinesToRequestVMInVBlank > 0) { 1429 #ifdef __DML_VBA_DEBUG__ 1430 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1431 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, HostVMInefficiencyFactor); 1432 dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *DestinationLinesToRequestVMInVBlank); 1433 dml_print("DML::%s: LineTime = %f\n", __func__, LineTime); 1434 #endif 1435 prefetch_vm_bw = PDEAndMetaPTEBytesFrame * HostVMInefficiencyFactor / (*DestinationLinesToRequestVMInVBlank * LineTime); 1436 #ifdef __DML_VBA_DEBUG__ 1437 dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw); 1438 #endif 1439 } else { 1440 prefetch_vm_bw = 0; 1441 MyError = true; 1442 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1443 } 1444 1445 if (MetaRowByte + PixelPTEBytesPerRow == 0) { 1446 prefetch_row_bw = 0; 1447 } else if (*DestinationLinesToRequestRowInVBlank > 0) { 1448 prefetch_row_bw = (MetaRowByte + PixelPTEBytesPerRow * HostVMInefficiencyFactor) / (*DestinationLinesToRequestRowInVBlank * LineTime); 1449 1450 #ifdef __DML_VBA_DEBUG__ 1451 dml_print("DML::%s: MetaRowByte = %d\n", __func__, MetaRowByte); 1452 dml_print("DML::%s: PixelPTEBytesPerRow = %d\n", __func__, PixelPTEBytesPerRow); 1453 dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *DestinationLinesToRequestRowInVBlank); 1454 dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw); 1455 #endif 1456 } else { 1457 prefetch_row_bw = 0; 1458 MyError = true; 1459 dml_print("DML: MyErr set %s:%d\n", __FILE__, __LINE__); 1460 } 1461 1462 *prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw); 1463 } 1464 1465 if (MyError) { 1466 *PrefetchBandwidth = 0; 1467 TimeForFetchingMetaPTE = 0; 1468 TimeForFetchingRowInVBlank = 0; 1469 *DestinationLinesToRequestVMInVBlank = 0; 1470 *DestinationLinesToRequestRowInVBlank = 0; 1471 *DestinationLinesForPrefetch = 0; 1472 LinesToRequestPrefetchPixelData = 0; 1473 *VRatioPrefetchY = 0; 1474 *VRatioPrefetchC = 0; 1475 *RequiredPrefetchPixDataBWLuma = 0; 1476 *RequiredPrefetchPixDataBWChroma = 0; 1477 } 1478 1479 return MyError; 1480 } 1481 1482 static double RoundToDFSGranularityUp(double Clock, double VCOSpeed) 1483 { 1484 return VCOSpeed * 4 / dml_floor(VCOSpeed * 4 / Clock, 1); 1485 } 1486 1487 static double RoundToDFSGranularityDown(double Clock, double VCOSpeed) 1488 { 1489 return VCOSpeed * 4 / dml_ceil(VCOSpeed * 4.0 / Clock, 1); 1490 } 1491 1492 static void CalculateDCCConfiguration( 1493 bool DCCEnabled, 1494 bool DCCProgrammingAssumesScanDirectionUnknown, 1495 enum source_format_class SourcePixelFormat, 1496 unsigned int SurfaceWidthLuma, 1497 unsigned int SurfaceWidthChroma, 1498 unsigned int SurfaceHeightLuma, 1499 unsigned int SurfaceHeightChroma, 1500 double DETBufferSize, 1501 unsigned int RequestHeight256ByteLuma, 1502 unsigned int RequestHeight256ByteChroma, 1503 enum dm_swizzle_mode TilingFormat, 1504 unsigned int BytePerPixelY, 1505 unsigned int BytePerPixelC, 1506 double BytePerPixelDETY, 1507 double BytePerPixelDETC, 1508 enum scan_direction_class ScanOrientation, 1509 unsigned int *MaxUncompressedBlockLuma, 1510 unsigned int *MaxUncompressedBlockChroma, 1511 unsigned int *MaxCompressedBlockLuma, 1512 unsigned int *MaxCompressedBlockChroma, 1513 unsigned int *IndependentBlockLuma, 1514 unsigned int *IndependentBlockChroma) 1515 { 1516 int yuv420; 1517 int horz_div_l; 1518 int horz_div_c; 1519 int vert_div_l; 1520 int vert_div_c; 1521 1522 int swath_buf_size; 1523 double detile_buf_vp_horz_limit; 1524 double detile_buf_vp_vert_limit; 1525 1526 int MAS_vp_horz_limit; 1527 int MAS_vp_vert_limit; 1528 int max_vp_horz_width; 1529 int max_vp_vert_height; 1530 int eff_surf_width_l; 1531 int eff_surf_width_c; 1532 int eff_surf_height_l; 1533 int eff_surf_height_c; 1534 1535 int full_swath_bytes_horz_wc_l; 1536 int full_swath_bytes_horz_wc_c; 1537 int full_swath_bytes_vert_wc_l; 1538 int full_swath_bytes_vert_wc_c; 1539 int req128_horz_wc_l; 1540 int req128_horz_wc_c; 1541 int req128_vert_wc_l; 1542 int req128_vert_wc_c; 1543 int segment_order_horz_contiguous_luma; 1544 int segment_order_horz_contiguous_chroma; 1545 int segment_order_vert_contiguous_luma; 1546 int segment_order_vert_contiguous_chroma; 1547 1548 typedef enum { 1549 REQ_256Bytes, REQ_128BytesNonContiguous, REQ_128BytesContiguous, REQ_NA 1550 } RequestType; 1551 RequestType RequestLuma; 1552 RequestType RequestChroma; 1553 1554 yuv420 = ((SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12) ? 1 : 0); 1555 horz_div_l = 1; 1556 horz_div_c = 1; 1557 vert_div_l = 1; 1558 vert_div_c = 1; 1559 1560 if (BytePerPixelY == 1) 1561 vert_div_l = 0; 1562 if (BytePerPixelC == 1) 1563 vert_div_c = 0; 1564 if (BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1565 horz_div_l = 0; 1566 if (BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_s || TilingFormat == dm_sw_64kb_s_t || TilingFormat == dm_sw_64kb_s_x)) 1567 horz_div_c = 0; 1568 1569 if (BytePerPixelC == 0) { 1570 swath_buf_size = DETBufferSize / 2 - 2 * 256; 1571 detile_buf_vp_horz_limit = (double) swath_buf_size / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l)); 1572 detile_buf_vp_vert_limit = (double) swath_buf_size / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l)); 1573 } else { 1574 swath_buf_size = DETBufferSize / 2 - 2 * 2 * 256; 1575 detile_buf_vp_horz_limit = (double) swath_buf_size 1576 / ((double) RequestHeight256ByteLuma * BytePerPixelY / (1 + horz_div_l) 1577 + (double) RequestHeight256ByteChroma * BytePerPixelC / (1 + horz_div_c) / (1 + yuv420)); 1578 detile_buf_vp_vert_limit = (double) swath_buf_size 1579 / (256.0 / RequestHeight256ByteLuma / (1 + vert_div_l) + 256.0 / RequestHeight256ByteChroma / (1 + vert_div_c) / (1 + yuv420)); 1580 } 1581 1582 if (SourcePixelFormat == dm_420_10) { 1583 detile_buf_vp_horz_limit = 1.5 * detile_buf_vp_horz_limit; 1584 detile_buf_vp_vert_limit = 1.5 * detile_buf_vp_vert_limit; 1585 } 1586 1587 detile_buf_vp_horz_limit = dml_floor(detile_buf_vp_horz_limit - 1, 16); 1588 detile_buf_vp_vert_limit = dml_floor(detile_buf_vp_vert_limit - 1, 16); 1589 1590 MAS_vp_horz_limit = SourcePixelFormat == dm_rgbe_alpha ? 3840 : 5760; 1591 MAS_vp_vert_limit = (BytePerPixelC > 0 ? 2880 : 5760); 1592 max_vp_horz_width = dml_min((double) MAS_vp_horz_limit, detile_buf_vp_horz_limit); 1593 max_vp_vert_height = dml_min((double) MAS_vp_vert_limit, detile_buf_vp_vert_limit); 1594 eff_surf_width_l = (SurfaceWidthLuma > max_vp_horz_width ? max_vp_horz_width : SurfaceWidthLuma); 1595 eff_surf_width_c = eff_surf_width_l / (1 + yuv420); 1596 eff_surf_height_l = (SurfaceHeightLuma > max_vp_vert_height ? max_vp_vert_height : SurfaceHeightLuma); 1597 eff_surf_height_c = eff_surf_height_l / (1 + yuv420); 1598 1599 full_swath_bytes_horz_wc_l = eff_surf_width_l * RequestHeight256ByteLuma * BytePerPixelY; 1600 full_swath_bytes_vert_wc_l = eff_surf_height_l * 256 / RequestHeight256ByteLuma; 1601 if (BytePerPixelC > 0) { 1602 full_swath_bytes_horz_wc_c = eff_surf_width_c * RequestHeight256ByteChroma * BytePerPixelC; 1603 full_swath_bytes_vert_wc_c = eff_surf_height_c * 256 / RequestHeight256ByteChroma; 1604 } else { 1605 full_swath_bytes_horz_wc_c = 0; 1606 full_swath_bytes_vert_wc_c = 0; 1607 } 1608 1609 if (SourcePixelFormat == dm_420_10) { 1610 full_swath_bytes_horz_wc_l = dml_ceil(full_swath_bytes_horz_wc_l * 2 / 3, 256); 1611 full_swath_bytes_horz_wc_c = dml_ceil(full_swath_bytes_horz_wc_c * 2 / 3, 256); 1612 full_swath_bytes_vert_wc_l = dml_ceil(full_swath_bytes_vert_wc_l * 2 / 3, 256); 1613 full_swath_bytes_vert_wc_c = dml_ceil(full_swath_bytes_vert_wc_c * 2 / 3, 256); 1614 } 1615 1616 if (2 * full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1617 req128_horz_wc_l = 0; 1618 req128_horz_wc_c = 0; 1619 } else if (full_swath_bytes_horz_wc_l < 1.5 * full_swath_bytes_horz_wc_c && 2 * full_swath_bytes_horz_wc_l + full_swath_bytes_horz_wc_c <= DETBufferSize) { 1620 req128_horz_wc_l = 0; 1621 req128_horz_wc_c = 1; 1622 } else if (full_swath_bytes_horz_wc_l >= 1.5 * full_swath_bytes_horz_wc_c && full_swath_bytes_horz_wc_l + 2 * full_swath_bytes_horz_wc_c <= DETBufferSize) { 1623 req128_horz_wc_l = 1; 1624 req128_horz_wc_c = 0; 1625 } else { 1626 req128_horz_wc_l = 1; 1627 req128_horz_wc_c = 1; 1628 } 1629 1630 if (2 * full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1631 req128_vert_wc_l = 0; 1632 req128_vert_wc_c = 0; 1633 } else if (full_swath_bytes_vert_wc_l < 1.5 * full_swath_bytes_vert_wc_c && 2 * full_swath_bytes_vert_wc_l + full_swath_bytes_vert_wc_c <= DETBufferSize) { 1634 req128_vert_wc_l = 0; 1635 req128_vert_wc_c = 1; 1636 } else if (full_swath_bytes_vert_wc_l >= 1.5 * full_swath_bytes_vert_wc_c && full_swath_bytes_vert_wc_l + 2 * full_swath_bytes_vert_wc_c <= DETBufferSize) { 1637 req128_vert_wc_l = 1; 1638 req128_vert_wc_c = 0; 1639 } else { 1640 req128_vert_wc_l = 1; 1641 req128_vert_wc_c = 1; 1642 } 1643 1644 if (BytePerPixelY == 2 || (BytePerPixelY == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1645 segment_order_horz_contiguous_luma = 0; 1646 } else { 1647 segment_order_horz_contiguous_luma = 1; 1648 } 1649 if ((BytePerPixelY == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1650 || (BytePerPixelY == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1651 segment_order_vert_contiguous_luma = 0; 1652 } else { 1653 segment_order_vert_contiguous_luma = 1; 1654 } 1655 if (BytePerPixelC == 2 || (BytePerPixelC == 4 && TilingFormat != dm_sw_64kb_r_x)) { 1656 segment_order_horz_contiguous_chroma = 0; 1657 } else { 1658 segment_order_horz_contiguous_chroma = 1; 1659 } 1660 if ((BytePerPixelC == 8 && (TilingFormat == dm_sw_64kb_d || TilingFormat == dm_sw_64kb_d_x || TilingFormat == dm_sw_64kb_d_t || TilingFormat == dm_sw_64kb_r_x)) 1661 || (BytePerPixelC == 4 && TilingFormat == dm_sw_64kb_r_x)) { 1662 segment_order_vert_contiguous_chroma = 0; 1663 } else { 1664 segment_order_vert_contiguous_chroma = 1; 1665 } 1666 1667 if (DCCProgrammingAssumesScanDirectionUnknown == true) { 1668 if (req128_horz_wc_l == 0 && req128_vert_wc_l == 0) { 1669 RequestLuma = REQ_256Bytes; 1670 } else if ((req128_horz_wc_l == 1 && segment_order_horz_contiguous_luma == 0) || (req128_vert_wc_l == 1 && segment_order_vert_contiguous_luma == 0)) { 1671 RequestLuma = REQ_128BytesNonContiguous; 1672 } else { 1673 RequestLuma = REQ_128BytesContiguous; 1674 } 1675 if (req128_horz_wc_c == 0 && req128_vert_wc_c == 0) { 1676 RequestChroma = REQ_256Bytes; 1677 } else if ((req128_horz_wc_c == 1 && segment_order_horz_contiguous_chroma == 0) || (req128_vert_wc_c == 1 && segment_order_vert_contiguous_chroma == 0)) { 1678 RequestChroma = REQ_128BytesNonContiguous; 1679 } else { 1680 RequestChroma = REQ_128BytesContiguous; 1681 } 1682 } else if (ScanOrientation != dm_vert) { 1683 if (req128_horz_wc_l == 0) { 1684 RequestLuma = REQ_256Bytes; 1685 } else if (segment_order_horz_contiguous_luma == 0) { 1686 RequestLuma = REQ_128BytesNonContiguous; 1687 } else { 1688 RequestLuma = REQ_128BytesContiguous; 1689 } 1690 if (req128_horz_wc_c == 0) { 1691 RequestChroma = REQ_256Bytes; 1692 } else if (segment_order_horz_contiguous_chroma == 0) { 1693 RequestChroma = REQ_128BytesNonContiguous; 1694 } else { 1695 RequestChroma = REQ_128BytesContiguous; 1696 } 1697 } else { 1698 if (req128_vert_wc_l == 0) { 1699 RequestLuma = REQ_256Bytes; 1700 } else if (segment_order_vert_contiguous_luma == 0) { 1701 RequestLuma = REQ_128BytesNonContiguous; 1702 } else { 1703 RequestLuma = REQ_128BytesContiguous; 1704 } 1705 if (req128_vert_wc_c == 0) { 1706 RequestChroma = REQ_256Bytes; 1707 } else if (segment_order_vert_contiguous_chroma == 0) { 1708 RequestChroma = REQ_128BytesNonContiguous; 1709 } else { 1710 RequestChroma = REQ_128BytesContiguous; 1711 } 1712 } 1713 1714 if (RequestLuma == REQ_256Bytes) { 1715 *MaxUncompressedBlockLuma = 256; 1716 *MaxCompressedBlockLuma = 256; 1717 *IndependentBlockLuma = 0; 1718 } else if (RequestLuma == REQ_128BytesContiguous) { 1719 *MaxUncompressedBlockLuma = 256; 1720 *MaxCompressedBlockLuma = 128; 1721 *IndependentBlockLuma = 128; 1722 } else { 1723 *MaxUncompressedBlockLuma = 256; 1724 *MaxCompressedBlockLuma = 64; 1725 *IndependentBlockLuma = 64; 1726 } 1727 1728 if (RequestChroma == REQ_256Bytes) { 1729 *MaxUncompressedBlockChroma = 256; 1730 *MaxCompressedBlockChroma = 256; 1731 *IndependentBlockChroma = 0; 1732 } else if (RequestChroma == REQ_128BytesContiguous) { 1733 *MaxUncompressedBlockChroma = 256; 1734 *MaxCompressedBlockChroma = 128; 1735 *IndependentBlockChroma = 128; 1736 } else { 1737 *MaxUncompressedBlockChroma = 256; 1738 *MaxCompressedBlockChroma = 64; 1739 *IndependentBlockChroma = 64; 1740 } 1741 1742 if (DCCEnabled != true || BytePerPixelC == 0) { 1743 *MaxUncompressedBlockChroma = 0; 1744 *MaxCompressedBlockChroma = 0; 1745 *IndependentBlockChroma = 0; 1746 } 1747 1748 if (DCCEnabled != true) { 1749 *MaxUncompressedBlockLuma = 0; 1750 *MaxCompressedBlockLuma = 0; 1751 *IndependentBlockLuma = 0; 1752 } 1753 } 1754 1755 static double CalculatePrefetchSourceLines( 1756 struct display_mode_lib *mode_lib, 1757 double VRatio, 1758 double vtaps, 1759 bool Interlace, 1760 bool ProgressiveToInterlaceUnitInOPP, 1761 unsigned int SwathHeight, 1762 unsigned int ViewportYStart, 1763 double *VInitPreFill, 1764 unsigned int *MaxNumSwath) 1765 { 1766 struct vba_vars_st *v = &mode_lib->vba; 1767 unsigned int MaxPartialSwath; 1768 1769 if (ProgressiveToInterlaceUnitInOPP) 1770 *VInitPreFill = dml_floor((VRatio + vtaps + 1) / 2.0, 1); 1771 else 1772 *VInitPreFill = dml_floor((VRatio + vtaps + 1 + Interlace * 0.5 * VRatio) / 2.0, 1); 1773 1774 if (!v->IgnoreViewportPositioning) { 1775 1776 *MaxNumSwath = dml_ceil((*VInitPreFill - 1.0) / SwathHeight, 1) + 1.0; 1777 1778 if (*VInitPreFill > 1.0) 1779 MaxPartialSwath = (unsigned int) (*VInitPreFill - 2) % SwathHeight; 1780 else 1781 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 2) % SwathHeight; 1782 MaxPartialSwath = dml_max(1U, MaxPartialSwath); 1783 1784 } else { 1785 1786 if (ViewportYStart != 0) 1787 dml_print("WARNING DML: using viewport y position of 0 even though actual viewport y position is non-zero in prefetch source lines calculation\n"); 1788 1789 *MaxNumSwath = dml_ceil(*VInitPreFill / SwathHeight, 1); 1790 1791 if (*VInitPreFill > 1.0) 1792 MaxPartialSwath = (unsigned int) (*VInitPreFill - 1) % SwathHeight; 1793 else 1794 MaxPartialSwath = (unsigned int) (*VInitPreFill + SwathHeight - 1) % SwathHeight; 1795 } 1796 1797 #ifdef __DML_VBA_DEBUG__ 1798 dml_print("DML::%s: VRatio = %f\n", __func__, VRatio); 1799 dml_print("DML::%s: vtaps = %f\n", __func__, vtaps); 1800 dml_print("DML::%s: VInitPreFill = %f\n", __func__, *VInitPreFill); 1801 dml_print("DML::%s: ProgressiveToInterlaceUnitInOPP = %d\n", __func__, ProgressiveToInterlaceUnitInOPP); 1802 dml_print("DML::%s: IgnoreViewportPositioning = %d\n", __func__, v->IgnoreViewportPositioning); 1803 dml_print("DML::%s: SwathHeight = %d\n", __func__, SwathHeight); 1804 dml_print("DML::%s: MaxPartialSwath = %d\n", __func__, MaxPartialSwath); 1805 dml_print("DML::%s: MaxNumSwath = %d\n", __func__, *MaxNumSwath); 1806 dml_print("DML::%s: Prefetch source lines = %d\n", __func__, *MaxNumSwath * SwathHeight + MaxPartialSwath); 1807 #endif 1808 return *MaxNumSwath * SwathHeight + MaxPartialSwath; 1809 } 1810 1811 static unsigned int CalculateVMAndRowBytes( 1812 struct display_mode_lib *mode_lib, 1813 bool DCCEnable, 1814 unsigned int BlockHeight256Bytes, 1815 unsigned int BlockWidth256Bytes, 1816 enum source_format_class SourcePixelFormat, 1817 unsigned int SurfaceTiling, 1818 unsigned int BytePerPixel, 1819 enum scan_direction_class ScanDirection, 1820 unsigned int SwathWidth, 1821 unsigned int ViewportHeight, 1822 bool GPUVMEnable, 1823 bool HostVMEnable, 1824 unsigned int HostVMMaxNonCachedPageTableLevels, 1825 unsigned int GPUVMMinPageSize, 1826 unsigned int HostVMMinPageSize, 1827 unsigned int PTEBufferSizeInRequests, 1828 unsigned int Pitch, 1829 unsigned int DCCMetaPitch, 1830 unsigned int *MacroTileWidth, 1831 unsigned int *MetaRowByte, 1832 unsigned int *PixelPTEBytesPerRow, 1833 bool *PTEBufferSizeNotExceeded, 1834 int *dpte_row_width_ub, 1835 unsigned int *dpte_row_height, 1836 unsigned int *MetaRequestWidth, 1837 unsigned int *MetaRequestHeight, 1838 unsigned int *meta_row_width, 1839 unsigned int *meta_row_height, 1840 int *vm_group_bytes, 1841 unsigned int *dpte_group_bytes, 1842 unsigned int *PixelPTEReqWidth, 1843 unsigned int *PixelPTEReqHeight, 1844 unsigned int *PTERequestSize, 1845 int *DPDE0BytesFrame, 1846 int *MetaPTEBytesFrame) 1847 { 1848 struct vba_vars_st *v = &mode_lib->vba; 1849 unsigned int MPDEBytesFrame; 1850 unsigned int DCCMetaSurfaceBytes; 1851 unsigned int MacroTileSizeBytes; 1852 unsigned int MacroTileHeight; 1853 unsigned int ExtraDPDEBytesFrame; 1854 unsigned int PDEAndMetaPTEBytesFrame; 1855 unsigned int PixelPTEReqHeightPTEs = 0; 1856 unsigned int HostVMDynamicLevels = 0; 1857 double FractionOfPTEReturnDrop; 1858 1859 if (GPUVMEnable == true && HostVMEnable == true) { 1860 if (HostVMMinPageSize < 2048) { 1861 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 1862 } else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) { 1863 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 1864 } else { 1865 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 1866 } 1867 } 1868 1869 *MetaRequestHeight = 8 * BlockHeight256Bytes; 1870 *MetaRequestWidth = 8 * BlockWidth256Bytes; 1871 if (ScanDirection != dm_vert) { 1872 *meta_row_height = *MetaRequestHeight; 1873 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestWidth) + *MetaRequestWidth; 1874 *MetaRowByte = *meta_row_width * *MetaRequestHeight * BytePerPixel / 256.0; 1875 } else { 1876 *meta_row_height = *MetaRequestWidth; 1877 *meta_row_width = dml_ceil((double) SwathWidth - 1, *MetaRequestHeight) + *MetaRequestHeight; 1878 *MetaRowByte = *meta_row_width * *MetaRequestWidth * BytePerPixel / 256.0; 1879 } 1880 DCCMetaSurfaceBytes = DCCMetaPitch * (dml_ceil(ViewportHeight - 1, 64 * BlockHeight256Bytes) + 64 * BlockHeight256Bytes) * BytePerPixel / 256; 1881 if (GPUVMEnable == true) { 1882 *MetaPTEBytesFrame = (dml_ceil((double) (DCCMetaSurfaceBytes - 4.0 * 1024.0) / (8 * 4.0 * 1024), 1) + 1) * 64; 1883 MPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 1); 1884 } else { 1885 *MetaPTEBytesFrame = 0; 1886 MPDEBytesFrame = 0; 1887 } 1888 1889 if (DCCEnable != true) { 1890 *MetaPTEBytesFrame = 0; 1891 MPDEBytesFrame = 0; 1892 *MetaRowByte = 0; 1893 } 1894 1895 if (SurfaceTiling == dm_sw_linear) { 1896 MacroTileSizeBytes = 256; 1897 MacroTileHeight = BlockHeight256Bytes; 1898 } else { 1899 MacroTileSizeBytes = 65536; 1900 MacroTileHeight = 16 * BlockHeight256Bytes; 1901 } 1902 *MacroTileWidth = MacroTileSizeBytes / BytePerPixel / MacroTileHeight; 1903 1904 if (GPUVMEnable == true && v->GPUVMMaxPageTableLevels > 1) { 1905 if (ScanDirection != dm_vert) { 1906 *DPDE0BytesFrame = 64 1907 * (dml_ceil( 1908 ((Pitch * (dml_ceil(ViewportHeight - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1909 / (8 * 2097152), 1910 1) + 1); 1911 } else { 1912 *DPDE0BytesFrame = 64 1913 * (dml_ceil( 1914 ((Pitch * (dml_ceil((double) SwathWidth - 1, MacroTileHeight) + MacroTileHeight) * BytePerPixel) - MacroTileSizeBytes) 1915 / (8 * 2097152), 1916 1) + 1); 1917 } 1918 ExtraDPDEBytesFrame = 128 * (v->GPUVMMaxPageTableLevels - 2); 1919 } else { 1920 *DPDE0BytesFrame = 0; 1921 ExtraDPDEBytesFrame = 0; 1922 } 1923 1924 PDEAndMetaPTEBytesFrame = *MetaPTEBytesFrame + MPDEBytesFrame + *DPDE0BytesFrame + ExtraDPDEBytesFrame; 1925 1926 #ifdef __DML_VBA_DEBUG__ 1927 dml_print("DML::%s: MetaPTEBytesFrame = %d\n", __func__, *MetaPTEBytesFrame); 1928 dml_print("DML::%s: MPDEBytesFrame = %d\n", __func__, MPDEBytesFrame); 1929 dml_print("DML::%s: DPDE0BytesFrame = %d\n", __func__, *DPDE0BytesFrame); 1930 dml_print("DML::%s: ExtraDPDEBytesFrame= %d\n", __func__, ExtraDPDEBytesFrame); 1931 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1932 #endif 1933 1934 if (HostVMEnable == true) { 1935 PDEAndMetaPTEBytesFrame = PDEAndMetaPTEBytesFrame * (1 + 8 * HostVMDynamicLevels); 1936 } 1937 #ifdef __DML_VBA_DEBUG__ 1938 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %d\n", __func__, PDEAndMetaPTEBytesFrame); 1939 #endif 1940 1941 if (SurfaceTiling == dm_sw_linear) { 1942 PixelPTEReqHeightPTEs = 1; 1943 *PixelPTEReqHeight = 1; 1944 *PixelPTEReqWidth = 32768.0 / BytePerPixel; 1945 *PTERequestSize = 64; 1946 FractionOfPTEReturnDrop = 0; 1947 } else if (MacroTileSizeBytes == 4096) { 1948 PixelPTEReqHeightPTEs = 1; 1949 *PixelPTEReqHeight = MacroTileHeight; 1950 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1951 *PTERequestSize = 64; 1952 if (ScanDirection != dm_vert) 1953 FractionOfPTEReturnDrop = 0; 1954 else 1955 FractionOfPTEReturnDrop = 7 / 8; 1956 } else if (GPUVMMinPageSize == 4 && MacroTileSizeBytes > 4096) { 1957 PixelPTEReqHeightPTEs = 16; 1958 *PixelPTEReqHeight = 16 * BlockHeight256Bytes; 1959 *PixelPTEReqWidth = 16 * BlockWidth256Bytes; 1960 *PTERequestSize = 128; 1961 FractionOfPTEReturnDrop = 0; 1962 } else { 1963 PixelPTEReqHeightPTEs = 1; 1964 *PixelPTEReqHeight = MacroTileHeight; 1965 *PixelPTEReqWidth = 8 * *MacroTileWidth; 1966 *PTERequestSize = 64; 1967 FractionOfPTEReturnDrop = 0; 1968 } 1969 1970 if (SurfaceTiling == dm_sw_linear) { 1971 *dpte_row_height = dml_min(128, 1 << (unsigned int) dml_floor(dml_log2(PTEBufferSizeInRequests * *PixelPTEReqWidth / Pitch), 1)); 1972 *dpte_row_width_ub = (dml_ceil((double)(Pitch * *dpte_row_height - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1973 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1974 } else if (ScanDirection != dm_vert) { 1975 *dpte_row_height = *PixelPTEReqHeight; 1976 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqWidth, 1) + 1) * *PixelPTEReqWidth; 1977 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqWidth * *PTERequestSize; 1978 } else { 1979 *dpte_row_height = dml_min(*PixelPTEReqWidth, *MacroTileWidth); 1980 *dpte_row_width_ub = (dml_ceil((double) (SwathWidth - 1) / *PixelPTEReqHeight, 1) + 1) * *PixelPTEReqHeight; 1981 *PixelPTEBytesPerRow = *dpte_row_width_ub / *PixelPTEReqHeight * *PTERequestSize; 1982 } 1983 1984 if (*PixelPTEBytesPerRow * (1 - FractionOfPTEReturnDrop) <= 64 * PTEBufferSizeInRequests) { 1985 *PTEBufferSizeNotExceeded = true; 1986 } else { 1987 *PTEBufferSizeNotExceeded = false; 1988 } 1989 1990 if (GPUVMEnable != true) { 1991 *PixelPTEBytesPerRow = 0; 1992 *PTEBufferSizeNotExceeded = true; 1993 } 1994 1995 dml_print("DML: vm_bytes = meta_pte_bytes_per_frame (per_pipe) = MetaPTEBytesFrame = : %i\n", *MetaPTEBytesFrame); 1996 1997 if (HostVMEnable == true) { 1998 *PixelPTEBytesPerRow = *PixelPTEBytesPerRow * (1 + 8 * HostVMDynamicLevels); 1999 } 2000 2001 if (HostVMEnable == true) { 2002 *vm_group_bytes = 512; 2003 *dpte_group_bytes = 512; 2004 } else if (GPUVMEnable == true) { 2005 *vm_group_bytes = 2048; 2006 if (SurfaceTiling != dm_sw_linear && PixelPTEReqHeightPTEs == 1 && ScanDirection == dm_vert) { 2007 *dpte_group_bytes = 512; 2008 } else { 2009 *dpte_group_bytes = 2048; 2010 } 2011 } else { 2012 *vm_group_bytes = 0; 2013 *dpte_group_bytes = 0; 2014 } 2015 return PDEAndMetaPTEBytesFrame; 2016 } 2017 2018 static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerformanceCalculation(struct display_mode_lib *mode_lib) 2019 { 2020 struct vba_vars_st *v = &mode_lib->vba; 2021 unsigned int j, k; 2022 double HostVMInefficiencyFactor = 1.0; 2023 bool NoChromaPlanes = true; 2024 int ReorderBytes; 2025 double VMDataOnlyReturnBW; 2026 double MaxTotalRDBandwidth = 0; 2027 int PrefetchMode = v->PrefetchModePerState[v->VoltageLevel][v->maxMpcComb]; 2028 2029 v->WritebackDISPCLK = 0.0; 2030 v->DISPCLKWithRamping = 0; 2031 v->DISPCLKWithoutRamping = 0; 2032 v->GlobalDPPCLK = 0.0; 2033 /* DAL custom code: need to update ReturnBW in case min dcfclk is overridden */ 2034 { 2035 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 2036 v->ReturnBusWidth * v->DCFCLKState[v->VoltageLevel][v->maxMpcComb], 2037 v->FabricClockPerState[v->VoltageLevel] * v->FabricDatapathToDCNDataReturn); 2038 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[v->VoltageLevel] * v->NumberOfChannels * v->DRAMChannelWidth; 2039 2040 if (v->HostVMEnable != true) { 2041 v->ReturnBW = dml_min( 2042 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2043 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 2044 } else { 2045 v->ReturnBW = dml_min( 2046 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2047 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 2048 } 2049 } 2050 /* End DAL custom code */ 2051 2052 // DISPCLK and DPPCLK Calculation 2053 // 2054 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2055 if (v->WritebackEnable[k]) { 2056 v->WritebackDISPCLK = dml_max( 2057 v->WritebackDISPCLK, 2058 dml314_CalculateWriteBackDISPCLK( 2059 v->WritebackPixelFormat[k], 2060 v->PixelClock[k], 2061 v->WritebackHRatio[k], 2062 v->WritebackVRatio[k], 2063 v->WritebackHTaps[k], 2064 v->WritebackVTaps[k], 2065 v->WritebackSourceWidth[k], 2066 v->WritebackDestinationWidth[k], 2067 v->HTotal[k], 2068 v->WritebackLineBufferSize)); 2069 } 2070 } 2071 2072 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2073 if (v->HRatio[k] > 1) { 2074 v->PSCL_THROUGHPUT_LUMA[k] = dml_min( 2075 v->MaxDCHUBToPSCLThroughput, 2076 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1)); 2077 } else { 2078 v->PSCL_THROUGHPUT_LUMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2079 } 2080 2081 v->DPPCLKUsingSingleDPPLuma = v->PixelClock[k] 2082 * dml_max( 2083 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 2084 dml_max(v->HRatio[k] * v->VRatio[k] / v->PSCL_THROUGHPUT_LUMA[k], 1.0)); 2085 2086 if ((v->htaps[k] > 6 || v->vtaps[k] > 6) && v->DPPCLKUsingSingleDPPLuma < 2 * v->PixelClock[k]) { 2087 v->DPPCLKUsingSingleDPPLuma = 2 * v->PixelClock[k]; 2088 } 2089 2090 if ((v->SourcePixelFormat[k] != dm_420_8 && v->SourcePixelFormat[k] != dm_420_10 && v->SourcePixelFormat[k] != dm_420_12 2091 && v->SourcePixelFormat[k] != dm_rgbe_alpha)) { 2092 v->PSCL_THROUGHPUT_CHROMA[k] = 0.0; 2093 v->DPPCLKUsingSingleDPP[k] = v->DPPCLKUsingSingleDPPLuma; 2094 } else { 2095 if (v->HRatioChroma[k] > 1) { 2096 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min( 2097 v->MaxDCHUBToPSCLThroughput, 2098 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 2099 } else { 2100 v->PSCL_THROUGHPUT_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 2101 } 2102 v->DPPCLKUsingSingleDPPChroma = v->PixelClock[k] 2103 * dml_max3( 2104 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 2105 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_THROUGHPUT_CHROMA[k], 2106 1.0); 2107 2108 if ((v->HTAPsChroma[k] > 6 || v->VTAPsChroma[k] > 6) && v->DPPCLKUsingSingleDPPChroma < 2 * v->PixelClock[k]) { 2109 v->DPPCLKUsingSingleDPPChroma = 2 * v->PixelClock[k]; 2110 } 2111 2112 v->DPPCLKUsingSingleDPP[k] = dml_max(v->DPPCLKUsingSingleDPPLuma, v->DPPCLKUsingSingleDPPChroma); 2113 } 2114 } 2115 2116 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2117 if (v->BlendingAndTiming[k] != k) 2118 continue; 2119 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) { 2120 v->DISPCLKWithRamping = dml_max( 2121 v->DISPCLKWithRamping, 2122 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2123 * (1 + v->DISPCLKRampingMargin / 100)); 2124 v->DISPCLKWithoutRamping = dml_max( 2125 v->DISPCLKWithoutRamping, 2126 v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2127 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2128 v->DISPCLKWithRamping = dml_max( 2129 v->DISPCLKWithRamping, 2130 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) 2131 * (1 + v->DISPCLKRampingMargin / 100)); 2132 v->DISPCLKWithoutRamping = dml_max( 2133 v->DISPCLKWithoutRamping, 2134 v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2135 } else { 2136 v->DISPCLKWithRamping = dml_max( 2137 v->DISPCLKWithRamping, 2138 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) * (1 + v->DISPCLKRampingMargin / 100)); 2139 v->DISPCLKWithoutRamping = dml_max( 2140 v->DISPCLKWithoutRamping, 2141 v->PixelClock[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100)); 2142 } 2143 } 2144 2145 v->DISPCLKWithRamping = dml_max(v->DISPCLKWithRamping, v->WritebackDISPCLK); 2146 v->DISPCLKWithoutRamping = dml_max(v->DISPCLKWithoutRamping, v->WritebackDISPCLK); 2147 2148 ASSERT(v->DISPCLKDPPCLKVCOSpeed != 0); 2149 v->DISPCLKWithRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithRamping, v->DISPCLKDPPCLKVCOSpeed); 2150 v->DISPCLKWithoutRampingRoundedToDFSGranularity = RoundToDFSGranularityUp(v->DISPCLKWithoutRamping, v->DISPCLKDPPCLKVCOSpeed); 2151 v->MaxDispclkRoundedToDFSGranularity = RoundToDFSGranularityDown( 2152 v->soc.clock_limits[v->soc.num_states - 1].dispclk_mhz, 2153 v->DISPCLKDPPCLKVCOSpeed); 2154 if (v->DISPCLKWithoutRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2155 v->DISPCLK_calculated = v->DISPCLKWithoutRampingRoundedToDFSGranularity; 2156 } else if (v->DISPCLKWithRampingRoundedToDFSGranularity > v->MaxDispclkRoundedToDFSGranularity) { 2157 v->DISPCLK_calculated = v->MaxDispclkRoundedToDFSGranularity; 2158 } else { 2159 v->DISPCLK_calculated = v->DISPCLKWithRampingRoundedToDFSGranularity; 2160 } 2161 v->DISPCLK = v->DISPCLK_calculated; 2162 DTRACE(" dispclk_mhz (calculated) = %f", v->DISPCLK_calculated); 2163 2164 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2165 v->DPPCLK_calculated[k] = v->DPPCLKUsingSingleDPP[k] / v->DPPPerPlane[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2166 v->GlobalDPPCLK = dml_max(v->GlobalDPPCLK, v->DPPCLK_calculated[k]); 2167 } 2168 v->GlobalDPPCLK = RoundToDFSGranularityUp(v->GlobalDPPCLK, v->DISPCLKDPPCLKVCOSpeed); 2169 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2170 v->DPPCLK_calculated[k] = v->GlobalDPPCLK / 255 * dml_ceil(v->DPPCLK_calculated[k] * 255.0 / v->GlobalDPPCLK, 1); 2171 DTRACE(" dppclk_mhz[%i] (calculated) = %f", k, v->DPPCLK_calculated[k]); 2172 } 2173 2174 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2175 v->DPPCLK[k] = v->DPPCLK_calculated[k]; 2176 } 2177 2178 // Urgent and B P-State/DRAM Clock Change Watermark 2179 DTRACE(" dcfclk_mhz = %f", v->DCFCLK); 2180 DTRACE(" return_bus_bw = %f", v->ReturnBW); 2181 2182 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2183 CalculateBytePerPixelAnd256BBlockSizes( 2184 v->SourcePixelFormat[k], 2185 v->SurfaceTiling[k], 2186 &v->BytePerPixelY[k], 2187 &v->BytePerPixelC[k], 2188 &v->BytePerPixelDETY[k], 2189 &v->BytePerPixelDETC[k], 2190 &v->BlockHeight256BytesY[k], 2191 &v->BlockHeight256BytesC[k], 2192 &v->BlockWidth256BytesY[k], 2193 &v->BlockWidth256BytesC[k]); 2194 } 2195 2196 CalculateSwathWidth( 2197 false, 2198 v->NumberOfActivePlanes, 2199 v->SourcePixelFormat, 2200 v->SourceScan, 2201 v->ViewportWidth, 2202 v->ViewportHeight, 2203 v->SurfaceWidthY, 2204 v->SurfaceWidthC, 2205 v->SurfaceHeightY, 2206 v->SurfaceHeightC, 2207 v->ODMCombineEnabled, 2208 v->BytePerPixelY, 2209 v->BytePerPixelC, 2210 v->BlockHeight256BytesY, 2211 v->BlockHeight256BytesC, 2212 v->BlockWidth256BytesY, 2213 v->BlockWidth256BytesC, 2214 v->BlendingAndTiming, 2215 v->HActive, 2216 v->HRatio, 2217 v->DPPPerPlane, 2218 v->SwathWidthSingleDPPY, 2219 v->SwathWidthSingleDPPC, 2220 v->SwathWidthY, 2221 v->SwathWidthC, 2222 v->dummyinteger3, 2223 v->dummyinteger4, 2224 v->swath_width_luma_ub, 2225 v->swath_width_chroma_ub); 2226 2227 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2228 v->ReadBandwidthPlaneLuma[k] = v->SwathWidthSingleDPPY[k] * v->BytePerPixelY[k] / (v->HTotal[k] / v->PixelClock[k]) 2229 * v->VRatio[k]; 2230 v->ReadBandwidthPlaneChroma[k] = v->SwathWidthSingleDPPC[k] * v->BytePerPixelC[k] / (v->HTotal[k] / v->PixelClock[k]) 2231 * v->VRatioChroma[k]; 2232 DTRACE(" read_bw[%i] = %fBps", k, v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]); 2233 } 2234 2235 // DCFCLK Deep Sleep 2236 CalculateDCFCLKDeepSleep( 2237 mode_lib, 2238 v->NumberOfActivePlanes, 2239 v->BytePerPixelY, 2240 v->BytePerPixelC, 2241 v->VRatio, 2242 v->VRatioChroma, 2243 v->SwathWidthY, 2244 v->SwathWidthC, 2245 v->DPPPerPlane, 2246 v->HRatio, 2247 v->HRatioChroma, 2248 v->PixelClock, 2249 v->PSCL_THROUGHPUT_LUMA, 2250 v->PSCL_THROUGHPUT_CHROMA, 2251 v->DPPCLK, 2252 v->ReadBandwidthPlaneLuma, 2253 v->ReadBandwidthPlaneChroma, 2254 v->ReturnBusWidth, 2255 &v->DCFCLKDeepSleep); 2256 2257 // DSCCLK 2258 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2259 if ((v->BlendingAndTiming[k] != k) || !v->DSCEnabled[k]) { 2260 v->DSCCLK_calculated[k] = 0.0; 2261 } else { 2262 if (v->OutputFormat[k] == dm_420) 2263 v->DSCFormatFactor = 2; 2264 else if (v->OutputFormat[k] == dm_444) 2265 v->DSCFormatFactor = 1; 2266 else if (v->OutputFormat[k] == dm_n422) 2267 v->DSCFormatFactor = 2; 2268 else 2269 v->DSCFormatFactor = 1; 2270 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1) 2271 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 12 / v->DSCFormatFactor 2272 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2273 else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) 2274 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 6 / v->DSCFormatFactor 2275 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2276 else 2277 v->DSCCLK_calculated[k] = v->PixelClockBackEnd[k] / 3 / v->DSCFormatFactor 2278 / (1 - v->DISPCLKDPPCLKDSCCLKDownSpreading / 100); 2279 } 2280 } 2281 2282 // DSC Delay 2283 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2284 double BPP = v->OutputBpp[k]; 2285 2286 if (v->DSCEnabled[k] && BPP != 0) { 2287 if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_disabled) { 2288 v->DSCDelay[k] = dscceComputeDelay( 2289 v->DSCInputBitPerComponent[k], 2290 BPP, 2291 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2292 v->NumberOfDSCSlices[k], 2293 v->OutputFormat[k], 2294 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 2295 } else if (v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1) { 2296 v->DSCDelay[k] = 2 2297 * (dscceComputeDelay( 2298 v->DSCInputBitPerComponent[k], 2299 BPP, 2300 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2301 v->NumberOfDSCSlices[k] / 2.0, 2302 v->OutputFormat[k], 2303 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2304 } else { 2305 v->DSCDelay[k] = 4 2306 * (dscceComputeDelay( 2307 v->DSCInputBitPerComponent[k], 2308 BPP, 2309 dml_ceil((double) v->HActive[k] / v->NumberOfDSCSlices[k], 1), 2310 v->NumberOfDSCSlices[k] / 4.0, 2311 v->OutputFormat[k], 2312 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 2313 } 2314 v->DSCDelay[k] = v->DSCDelay[k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 2315 } else { 2316 v->DSCDelay[k] = 0; 2317 } 2318 } 2319 2320 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2321 for (j = 0; j < v->NumberOfActivePlanes; ++j) // NumberOfPlanes 2322 if (j != k && v->BlendingAndTiming[k] == j && v->DSCEnabled[j]) 2323 v->DSCDelay[k] = v->DSCDelay[j]; 2324 2325 // Prefetch 2326 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2327 unsigned int PDEAndMetaPTEBytesFrameY; 2328 unsigned int PixelPTEBytesPerRowY; 2329 unsigned int MetaRowByteY; 2330 unsigned int MetaRowByteC; 2331 unsigned int PDEAndMetaPTEBytesFrameC; 2332 unsigned int PixelPTEBytesPerRowC; 2333 bool PTEBufferSizeNotExceededY; 2334 bool PTEBufferSizeNotExceededC; 2335 2336 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2337 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 2338 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) && v->SourceScan[k] != dm_vert) { 2339 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) / 2; 2340 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 2341 } else { 2342 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 2343 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 2344 } 2345 2346 PDEAndMetaPTEBytesFrameC = CalculateVMAndRowBytes( 2347 mode_lib, 2348 v->DCCEnable[k], 2349 v->BlockHeight256BytesC[k], 2350 v->BlockWidth256BytesC[k], 2351 v->SourcePixelFormat[k], 2352 v->SurfaceTiling[k], 2353 v->BytePerPixelC[k], 2354 v->SourceScan[k], 2355 v->SwathWidthC[k], 2356 v->ViewportHeightChroma[k], 2357 v->GPUVMEnable, 2358 v->HostVMEnable, 2359 v->HostVMMaxNonCachedPageTableLevels, 2360 v->GPUVMMinPageSize, 2361 v->HostVMMinPageSize, 2362 v->PTEBufferSizeInRequestsForChroma, 2363 v->PitchC[k], 2364 v->DCCMetaPitchC[k], 2365 &v->MacroTileWidthC[k], 2366 &MetaRowByteC, 2367 &PixelPTEBytesPerRowC, 2368 &PTEBufferSizeNotExceededC, 2369 &v->dpte_row_width_chroma_ub[k], 2370 &v->dpte_row_height_chroma[k], 2371 &v->meta_req_width_chroma[k], 2372 &v->meta_req_height_chroma[k], 2373 &v->meta_row_width_chroma[k], 2374 &v->meta_row_height_chroma[k], 2375 &v->dummyinteger1, 2376 &v->dummyinteger2, 2377 &v->PixelPTEReqWidthC[k], 2378 &v->PixelPTEReqHeightC[k], 2379 &v->PTERequestSizeC[k], 2380 &v->dpde0_bytes_per_frame_ub_c[k], 2381 &v->meta_pte_bytes_per_frame_ub_c[k]); 2382 2383 v->PrefetchSourceLinesC[k] = CalculatePrefetchSourceLines( 2384 mode_lib, 2385 v->VRatioChroma[k], 2386 v->VTAPsChroma[k], 2387 v->Interlace[k], 2388 v->ProgressiveToInterlaceUnitInOPP, 2389 v->SwathHeightC[k], 2390 v->ViewportYStartC[k], 2391 &v->VInitPreFillC[k], 2392 &v->MaxNumSwathC[k]); 2393 } else { 2394 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 2395 v->PTEBufferSizeInRequestsForChroma = 0; 2396 PixelPTEBytesPerRowC = 0; 2397 PDEAndMetaPTEBytesFrameC = 0; 2398 MetaRowByteC = 0; 2399 v->MaxNumSwathC[k] = 0; 2400 v->PrefetchSourceLinesC[k] = 0; 2401 } 2402 2403 PDEAndMetaPTEBytesFrameY = CalculateVMAndRowBytes( 2404 mode_lib, 2405 v->DCCEnable[k], 2406 v->BlockHeight256BytesY[k], 2407 v->BlockWidth256BytesY[k], 2408 v->SourcePixelFormat[k], 2409 v->SurfaceTiling[k], 2410 v->BytePerPixelY[k], 2411 v->SourceScan[k], 2412 v->SwathWidthY[k], 2413 v->ViewportHeight[k], 2414 v->GPUVMEnable, 2415 v->HostVMEnable, 2416 v->HostVMMaxNonCachedPageTableLevels, 2417 v->GPUVMMinPageSize, 2418 v->HostVMMinPageSize, 2419 v->PTEBufferSizeInRequestsForLuma, 2420 v->PitchY[k], 2421 v->DCCMetaPitchY[k], 2422 &v->MacroTileWidthY[k], 2423 &MetaRowByteY, 2424 &PixelPTEBytesPerRowY, 2425 &PTEBufferSizeNotExceededY, 2426 &v->dpte_row_width_luma_ub[k], 2427 &v->dpte_row_height[k], 2428 &v->meta_req_width[k], 2429 &v->meta_req_height[k], 2430 &v->meta_row_width[k], 2431 &v->meta_row_height[k], 2432 &v->vm_group_bytes[k], 2433 &v->dpte_group_bytes[k], 2434 &v->PixelPTEReqWidthY[k], 2435 &v->PixelPTEReqHeightY[k], 2436 &v->PTERequestSizeY[k], 2437 &v->dpde0_bytes_per_frame_ub_l[k], 2438 &v->meta_pte_bytes_per_frame_ub_l[k]); 2439 2440 v->PrefetchSourceLinesY[k] = CalculatePrefetchSourceLines( 2441 mode_lib, 2442 v->VRatio[k], 2443 v->vtaps[k], 2444 v->Interlace[k], 2445 v->ProgressiveToInterlaceUnitInOPP, 2446 v->SwathHeightY[k], 2447 v->ViewportYStartY[k], 2448 &v->VInitPreFillY[k], 2449 &v->MaxNumSwathY[k]); 2450 v->PixelPTEBytesPerRow[k] = PixelPTEBytesPerRowY + PixelPTEBytesPerRowC; 2451 v->PDEAndMetaPTEBytesFrame[k] = PDEAndMetaPTEBytesFrameY + PDEAndMetaPTEBytesFrameC; 2452 v->MetaRowByte[k] = MetaRowByteY + MetaRowByteC; 2453 2454 CalculateRowBandwidth( 2455 v->GPUVMEnable, 2456 v->SourcePixelFormat[k], 2457 v->VRatio[k], 2458 v->VRatioChroma[k], 2459 v->DCCEnable[k], 2460 v->HTotal[k] / v->PixelClock[k], 2461 MetaRowByteY, 2462 MetaRowByteC, 2463 v->meta_row_height[k], 2464 v->meta_row_height_chroma[k], 2465 PixelPTEBytesPerRowY, 2466 PixelPTEBytesPerRowC, 2467 v->dpte_row_height[k], 2468 v->dpte_row_height_chroma[k], 2469 &v->meta_row_bw[k], 2470 &v->dpte_row_bw[k]); 2471 } 2472 2473 v->TotalDCCActiveDPP = 0; 2474 v->TotalActiveDPP = 0; 2475 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2476 v->TotalActiveDPP = v->TotalActiveDPP + v->DPPPerPlane[k]; 2477 if (v->DCCEnable[k]) 2478 v->TotalDCCActiveDPP = v->TotalDCCActiveDPP + v->DPPPerPlane[k]; 2479 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12 2480 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 2481 NoChromaPlanes = false; 2482 } 2483 2484 ReorderBytes = v->NumberOfChannels 2485 * dml_max3( 2486 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 2487 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 2488 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 2489 2490 VMDataOnlyReturnBW = dml_min( 2491 dml_min(v->ReturnBusWidth * v->DCFCLK, v->FabricClock * v->FabricDatapathToDCNDataReturn) 2492 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 2493 v->DRAMSpeed * v->NumberOfChannels * v->DRAMChannelWidth 2494 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 2495 2496 #ifdef __DML_VBA_DEBUG__ 2497 dml_print("DML::%s: v->ReturnBusWidth = %f\n", __func__, v->ReturnBusWidth); 2498 dml_print("DML::%s: v->DCFCLK = %f\n", __func__, v->DCFCLK); 2499 dml_print("DML::%s: v->FabricClock = %f\n", __func__, v->FabricClock); 2500 dml_print("DML::%s: v->FabricDatapathToDCNDataReturn = %f\n", __func__, v->FabricDatapathToDCNDataReturn); 2501 dml_print("DML::%s: v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency = %f\n", __func__, v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency); 2502 dml_print("DML::%s: v->DRAMSpeed = %f\n", __func__, v->DRAMSpeed); 2503 dml_print("DML::%s: v->NumberOfChannels = %f\n", __func__, v->NumberOfChannels); 2504 dml_print("DML::%s: v->DRAMChannelWidth = %f\n", __func__, v->DRAMChannelWidth); 2505 dml_print("DML::%s: v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly = %f\n", __func__, v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly); 2506 dml_print("DML::%s: VMDataOnlyReturnBW = %f\n", __func__, VMDataOnlyReturnBW); 2507 dml_print("DML::%s: ReturnBW = %f\n", __func__, v->ReturnBW); 2508 #endif 2509 2510 if (v->GPUVMEnable && v->HostVMEnable) 2511 HostVMInefficiencyFactor = v->ReturnBW / VMDataOnlyReturnBW; 2512 2513 v->UrgentExtraLatency = CalculateExtraLatency( 2514 v->RoundTripPingLatencyCycles, 2515 ReorderBytes, 2516 v->DCFCLK, 2517 v->TotalActiveDPP, 2518 v->PixelChunkSizeInKByte, 2519 v->TotalDCCActiveDPP, 2520 v->MetaChunkSize, 2521 v->ReturnBW, 2522 v->GPUVMEnable, 2523 v->HostVMEnable, 2524 v->NumberOfActivePlanes, 2525 v->DPPPerPlane, 2526 v->dpte_group_bytes, 2527 HostVMInefficiencyFactor, 2528 v->HostVMMinPageSize, 2529 v->HostVMMaxNonCachedPageTableLevels); 2530 2531 v->TCalc = 24.0 / v->DCFCLKDeepSleep; 2532 2533 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2534 if (v->BlendingAndTiming[k] == k) { 2535 if (v->WritebackEnable[k] == true) { 2536 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackLatency 2537 + CalculateWriteBackDelay( 2538 v->WritebackPixelFormat[k], 2539 v->WritebackHRatio[k], 2540 v->WritebackVRatio[k], 2541 v->WritebackVTaps[k], 2542 v->WritebackDestinationWidth[k], 2543 v->WritebackDestinationHeight[k], 2544 v->WritebackSourceHeight[k], 2545 v->HTotal[k]) / v->DISPCLK; 2546 } else 2547 v->WritebackDelay[v->VoltageLevel][k] = 0; 2548 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 2549 if (v->BlendingAndTiming[j] == k && v->WritebackEnable[j] == true) { 2550 v->WritebackDelay[v->VoltageLevel][k] = dml_max( 2551 v->WritebackDelay[v->VoltageLevel][k], 2552 v->WritebackLatency 2553 + CalculateWriteBackDelay( 2554 v->WritebackPixelFormat[j], 2555 v->WritebackHRatio[j], 2556 v->WritebackVRatio[j], 2557 v->WritebackVTaps[j], 2558 v->WritebackDestinationWidth[j], 2559 v->WritebackDestinationHeight[j], 2560 v->WritebackSourceHeight[j], 2561 v->HTotal[k]) / v->DISPCLK); 2562 } 2563 } 2564 } 2565 } 2566 2567 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2568 for (j = 0; j < v->NumberOfActivePlanes; ++j) 2569 if (v->BlendingAndTiming[k] == j) 2570 v->WritebackDelay[v->VoltageLevel][k] = v->WritebackDelay[v->VoltageLevel][j]; 2571 2572 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2573 v->MaxVStartupLines[k] = 2574 CalculateMaxVStartup( 2575 v->VTotal[k], 2576 v->VActive[k], 2577 v->VBlankNom[k], 2578 v->HTotal[k], 2579 v->PixelClock[k], 2580 v->ProgressiveToInterlaceUnitInOPP, 2581 v->Interlace[k], 2582 v->ip.VBlankNomDefaultUS, 2583 v->WritebackDelay[v->VoltageLevel][k]); 2584 2585 #ifdef __DML_VBA_DEBUG__ 2586 dml_print("DML::%s: k=%d MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 2587 dml_print("DML::%s: k=%d VoltageLevel = %d\n", __func__, k, v->VoltageLevel); 2588 dml_print("DML::%s: k=%d WritebackDelay = %f\n", __func__, k, v->WritebackDelay[v->VoltageLevel][k]); 2589 #endif 2590 } 2591 2592 v->MaximumMaxVStartupLines = 0; 2593 for (k = 0; k < v->NumberOfActivePlanes; ++k) 2594 v->MaximumMaxVStartupLines = dml_max(v->MaximumMaxVStartupLines, v->MaxVStartupLines[k]); 2595 2596 // VBA_DELTA 2597 // We don't really care to iterate between the various prefetch modes 2598 //v->PrefetchERROR = CalculateMinAndMaxPrefetchMode(v->AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, &v->MinPrefetchMode, &v->MaxPrefetchMode); 2599 2600 v->UrgentLatency = CalculateUrgentLatency( 2601 v->UrgentLatencyPixelDataOnly, 2602 v->UrgentLatencyPixelMixedWithVMData, 2603 v->UrgentLatencyVMDataOnly, 2604 v->DoUrgentLatencyAdjustment, 2605 v->UrgentLatencyAdjustmentFabricClockComponent, 2606 v->UrgentLatencyAdjustmentFabricClockReference, 2607 v->FabricClock); 2608 2609 v->FractionOfUrgentBandwidth = 0.0; 2610 v->FractionOfUrgentBandwidthImmediateFlip = 0.0; 2611 2612 v->VStartupLines = __DML_VBA_MIN_VSTARTUP__; 2613 2614 do { 2615 double MaxTotalRDBandwidthNoUrgentBurst = 0.0; 2616 bool DestinationLineTimesForPrefetchLessThan2 = false; 2617 bool VRatioPrefetchMoreThan4 = false; 2618 double TWait = CalculateTWait(PrefetchMode, v->DRAMClockChangeLatency, v->UrgentLatency, v->SREnterPlusExitTime); 2619 2620 MaxTotalRDBandwidth = 0; 2621 2622 dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, v->VStartupLines); 2623 2624 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2625 Pipe myPipe; 2626 2627 myPipe.DPPCLK = v->DPPCLK[k]; 2628 myPipe.DISPCLK = v->DISPCLK; 2629 myPipe.PixelClock = v->PixelClock[k]; 2630 myPipe.DCFCLKDeepSleep = v->DCFCLKDeepSleep; 2631 myPipe.DPPPerPlane = v->DPPPerPlane[k]; 2632 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 2633 myPipe.VRatio = v->VRatio[k]; 2634 myPipe.VRatioChroma = v->VRatioChroma[k]; 2635 myPipe.SourceScan = v->SourceScan[k]; 2636 myPipe.BlockWidth256BytesY = v->BlockWidth256BytesY[k]; 2637 myPipe.BlockHeight256BytesY = v->BlockHeight256BytesY[k]; 2638 myPipe.BlockWidth256BytesC = v->BlockWidth256BytesC[k]; 2639 myPipe.BlockHeight256BytesC = v->BlockHeight256BytesC[k]; 2640 myPipe.InterlaceEnable = v->Interlace[k]; 2641 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 2642 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 2643 myPipe.HTotal = v->HTotal[k]; 2644 myPipe.DCCEnable = v->DCCEnable[k]; 2645 myPipe.ODMCombineIsEnabled = v->ODMCombineEnabled[k] == dm_odm_combine_mode_4to1 2646 || v->ODMCombineEnabled[k] == dm_odm_combine_mode_2to1; 2647 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 2648 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 2649 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 2650 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 2651 v->ErrorResult[k] = CalculatePrefetchSchedule( 2652 mode_lib, 2653 HostVMInefficiencyFactor, 2654 &myPipe, 2655 v->DSCDelay[k], 2656 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 2657 v->DPPCLKDelaySCL, 2658 v->DPPCLKDelaySCLLBOnly, 2659 v->DPPCLKDelayCNVCCursor, 2660 v->DISPCLKDelaySubtotal, 2661 (unsigned int) (v->SwathWidthY[k] / v->HRatio[k]), 2662 v->OutputFormat[k], 2663 v->MaxInterDCNTileRepeaters, 2664 dml_min(v->VStartupLines, v->MaxVStartupLines[k]), 2665 v->MaxVStartupLines[k], 2666 v->GPUVMMaxPageTableLevels, 2667 v->GPUVMEnable, 2668 v->HostVMEnable, 2669 v->HostVMMaxNonCachedPageTableLevels, 2670 v->HostVMMinPageSize, 2671 v->DynamicMetadataEnable[k], 2672 v->DynamicMetadataVMEnabled, 2673 v->DynamicMetadataLinesBeforeActiveRequired[k], 2674 v->DynamicMetadataTransmittedBytes[k], 2675 v->UrgentLatency, 2676 v->UrgentExtraLatency, 2677 v->TCalc, 2678 v->PDEAndMetaPTEBytesFrame[k], 2679 v->MetaRowByte[k], 2680 v->PixelPTEBytesPerRow[k], 2681 v->PrefetchSourceLinesY[k], 2682 v->SwathWidthY[k], 2683 v->VInitPreFillY[k], 2684 v->MaxNumSwathY[k], 2685 v->PrefetchSourceLinesC[k], 2686 v->SwathWidthC[k], 2687 v->VInitPreFillC[k], 2688 v->MaxNumSwathC[k], 2689 v->swath_width_luma_ub[k], 2690 v->swath_width_chroma_ub[k], 2691 v->SwathHeightY[k], 2692 v->SwathHeightC[k], 2693 TWait, 2694 &v->DSTXAfterScaler[k], 2695 &v->DSTYAfterScaler[k], 2696 &v->DestinationLinesForPrefetch[k], 2697 &v->PrefetchBandwidth[k], 2698 &v->DestinationLinesToRequestVMInVBlank[k], 2699 &v->DestinationLinesToRequestRowInVBlank[k], 2700 &v->VRatioPrefetchY[k], 2701 &v->VRatioPrefetchC[k], 2702 &v->RequiredPrefetchPixDataBWLuma[k], 2703 &v->RequiredPrefetchPixDataBWChroma[k], 2704 &v->NotEnoughTimeForDynamicMetadata[k], 2705 &v->Tno_bw[k], 2706 &v->prefetch_vmrow_bw[k], 2707 &v->Tdmdl_vm[k], 2708 &v->Tdmdl[k], 2709 &v->TSetup[k], 2710 &v->VUpdateOffsetPix[k], 2711 &v->VUpdateWidthPix[k], 2712 &v->VReadyOffsetPix[k]); 2713 2714 #ifdef __DML_VBA_DEBUG__ 2715 dml_print("DML::%s: k=%0d Prefetch cal result=%0d\n", __func__, k, v->ErrorResult[k]); 2716 #endif 2717 v->VStartup[k] = dml_min(v->VStartupLines, v->MaxVStartupLines[k]); 2718 } 2719 2720 v->NoEnoughUrgentLatencyHiding = false; 2721 v->NoEnoughUrgentLatencyHidingPre = false; 2722 2723 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2724 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2725 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 2726 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 2727 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPrefetchY[k]; 2728 2729 CalculateUrgentBurstFactor( 2730 v->swath_width_luma_ub[k], 2731 v->swath_width_chroma_ub[k], 2732 v->SwathHeightY[k], 2733 v->SwathHeightC[k], 2734 v->HTotal[k] / v->PixelClock[k], 2735 v->UrgentLatency, 2736 v->CursorBufferSize, 2737 v->CursorWidth[k][0], 2738 v->CursorBPP[k][0], 2739 v->VRatio[k], 2740 v->VRatioChroma[k], 2741 v->BytePerPixelDETY[k], 2742 v->BytePerPixelDETC[k], 2743 v->DETBufferSizeY[k], 2744 v->DETBufferSizeC[k], 2745 &v->UrgBurstFactorCursor[k], 2746 &v->UrgBurstFactorLuma[k], 2747 &v->UrgBurstFactorChroma[k], 2748 &v->NoUrgentLatencyHiding[k]); 2749 2750 CalculateUrgentBurstFactor( 2751 v->swath_width_luma_ub[k], 2752 v->swath_width_chroma_ub[k], 2753 v->SwathHeightY[k], 2754 v->SwathHeightC[k], 2755 v->HTotal[k] / v->PixelClock[k], 2756 v->UrgentLatency, 2757 v->CursorBufferSize, 2758 v->CursorWidth[k][0], 2759 v->CursorBPP[k][0], 2760 v->VRatioPrefetchY[k], 2761 v->VRatioPrefetchC[k], 2762 v->BytePerPixelDETY[k], 2763 v->BytePerPixelDETC[k], 2764 v->DETBufferSizeY[k], 2765 v->DETBufferSizeC[k], 2766 &v->UrgBurstFactorCursorPre[k], 2767 &v->UrgBurstFactorLumaPre[k], 2768 &v->UrgBurstFactorChromaPre[k], 2769 &v->NoUrgentLatencyHidingPre[k]); 2770 2771 MaxTotalRDBandwidth = MaxTotalRDBandwidth 2772 + dml_max3( 2773 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2774 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2775 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2776 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k] 2777 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2778 v->DPPPerPlane[k] 2779 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2780 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2781 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2782 2783 MaxTotalRDBandwidthNoUrgentBurst = MaxTotalRDBandwidthNoUrgentBurst 2784 + dml_max3( 2785 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2786 v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k] 2787 + v->DPPPerPlane[k] * (v->meta_row_bw[k] + v->dpte_row_bw[k]), 2788 v->DPPPerPlane[k] * (v->RequiredPrefetchPixDataBWLuma[k] + v->RequiredPrefetchPixDataBWChroma[k]) 2789 + v->cursor_bw_pre[k]); 2790 2791 #ifdef __DML_VBA_DEBUG__ 2792 dml_print("DML::%s: k=%0d DPPPerPlane=%d\n", __func__, k, v->DPPPerPlane[k]); 2793 dml_print("DML::%s: k=%0d UrgBurstFactorLuma=%f\n", __func__, k, v->UrgBurstFactorLuma[k]); 2794 dml_print("DML::%s: k=%0d UrgBurstFactorChroma=%f\n", __func__, k, v->UrgBurstFactorChroma[k]); 2795 dml_print("DML::%s: k=%0d UrgBurstFactorLumaPre=%f\n", __func__, k, v->UrgBurstFactorLumaPre[k]); 2796 dml_print("DML::%s: k=%0d UrgBurstFactorChromaPre=%f\n", __func__, k, v->UrgBurstFactorChromaPre[k]); 2797 2798 dml_print("DML::%s: k=%0d VRatioPrefetchY=%f\n", __func__, k, v->VRatioPrefetchY[k]); 2799 dml_print("DML::%s: k=%0d VRatioY=%f\n", __func__, k, v->VRatio[k]); 2800 2801 dml_print("DML::%s: k=%0d prefetch_vmrow_bw=%f\n", __func__, k, v->prefetch_vmrow_bw[k]); 2802 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma=%f\n", __func__, k, v->ReadBandwidthPlaneLuma[k]); 2803 dml_print("DML::%s: k=%0d ReadBandwidthPlaneChroma=%f\n", __func__, k, v->ReadBandwidthPlaneChroma[k]); 2804 dml_print("DML::%s: k=%0d cursor_bw=%f\n", __func__, k, v->cursor_bw[k]); 2805 dml_print("DML::%s: k=%0d meta_row_bw=%f\n", __func__, k, v->meta_row_bw[k]); 2806 dml_print("DML::%s: k=%0d dpte_row_bw=%f\n", __func__, k, v->dpte_row_bw[k]); 2807 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWLuma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWLuma[k]); 2808 dml_print("DML::%s: k=%0d RequiredPrefetchPixDataBWChroma=%f\n", __func__, k, v->RequiredPrefetchPixDataBWChroma[k]); 2809 dml_print("DML::%s: k=%0d cursor_bw_pre=%f\n", __func__, k, v->cursor_bw_pre[k]); 2810 dml_print("DML::%s: k=%0d MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, k, MaxTotalRDBandwidthNoUrgentBurst); 2811 #endif 2812 2813 if (v->DestinationLinesForPrefetch[k] < 2) 2814 DestinationLineTimesForPrefetchLessThan2 = true; 2815 2816 if (v->VRatioPrefetchY[k] > 4 || v->VRatioPrefetchC[k] > 4) 2817 VRatioPrefetchMoreThan4 = true; 2818 2819 if (v->NoUrgentLatencyHiding[k] == true) 2820 v->NoEnoughUrgentLatencyHiding = true; 2821 2822 if (v->NoUrgentLatencyHidingPre[k] == true) 2823 v->NoEnoughUrgentLatencyHidingPre = true; 2824 } 2825 2826 v->FractionOfUrgentBandwidth = MaxTotalRDBandwidthNoUrgentBurst / v->ReturnBW; 2827 2828 #ifdef __DML_VBA_DEBUG__ 2829 dml_print("DML::%s: MaxTotalRDBandwidthNoUrgentBurst=%f\n", __func__, MaxTotalRDBandwidthNoUrgentBurst); 2830 dml_print("DML::%s: ReturnBW=%f\n", __func__, v->ReturnBW); 2831 dml_print("DML::%s: FractionOfUrgentBandwidth=%f\n", __func__, v->FractionOfUrgentBandwidth); 2832 #endif 2833 2834 if (MaxTotalRDBandwidth <= v->ReturnBW && v->NoEnoughUrgentLatencyHiding == 0 && v->NoEnoughUrgentLatencyHidingPre == 0 2835 && !VRatioPrefetchMoreThan4 && !DestinationLineTimesForPrefetchLessThan2) 2836 v->PrefetchModeSupported = true; 2837 else { 2838 v->PrefetchModeSupported = false; 2839 dml_print("DML::%s: ***failed***. Bandwidth violation. Results are NOT valid\n", __func__); 2840 dml_print("DML::%s: MaxTotalRDBandwidth:%f AvailReturnBandwidth:%f\n", __func__, MaxTotalRDBandwidth, v->ReturnBW); 2841 dml_print("DML::%s: VRatioPrefetch %s more than 4\n", __func__, (VRatioPrefetchMoreThan4) ? "is" : "is not"); 2842 dml_print("DML::%s: DestinationLines for Prefetch %s less than 2\n", __func__, (DestinationLineTimesForPrefetchLessThan2) ? "is" : "is not"); 2843 } 2844 2845 // PREVIOUS_ERROR 2846 // This error result check was done after the PrefetchModeSupported. So we will 2847 // still try to calculate flip schedule even prefetch mode not supported 2848 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2849 if (v->ErrorResult[k] == true || v->NotEnoughTimeForDynamicMetadata[k] == true) { 2850 v->PrefetchModeSupported = false; 2851 dml_print("DML::%s: ***failed***. Prefetch schedule violation. Results are NOT valid\n", __func__); 2852 } 2853 } 2854 2855 if (v->PrefetchModeSupported == true && v->ImmediateFlipSupport == true) { 2856 v->BandwidthAvailableForImmediateFlip = v->ReturnBW; 2857 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2858 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 2859 - dml_max( 2860 v->ReadBandwidthPlaneLuma[k] * v->UrgBurstFactorLuma[k] 2861 + v->ReadBandwidthPlaneChroma[k] * v->UrgBurstFactorChroma[k] 2862 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2863 v->DPPPerPlane[k] 2864 * (v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2865 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2866 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2867 } 2868 2869 v->TotImmediateFlipBytes = 0; 2870 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2871 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 2872 + v->DPPPerPlane[k] * (v->PDEAndMetaPTEBytesFrame[k] + v->MetaRowByte[k] + v->PixelPTEBytesPerRow[k]); 2873 } 2874 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2875 CalculateFlipSchedule( 2876 mode_lib, 2877 k, 2878 HostVMInefficiencyFactor, 2879 v->UrgentExtraLatency, 2880 v->UrgentLatency, 2881 v->PDEAndMetaPTEBytesFrame[k], 2882 v->MetaRowByte[k], 2883 v->PixelPTEBytesPerRow[k]); 2884 } 2885 2886 v->total_dcn_read_bw_with_flip = 0.0; 2887 v->total_dcn_read_bw_with_flip_no_urgent_burst = 0.0; 2888 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2889 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 2890 + dml_max3( 2891 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2892 v->DPPPerPlane[k] * v->final_flip_bw[k] 2893 + v->ReadBandwidthLuma[k] * v->UrgBurstFactorLuma[k] 2894 + v->ReadBandwidthChroma[k] * v->UrgBurstFactorChroma[k] 2895 + v->cursor_bw[k] * v->UrgBurstFactorCursor[k], 2896 v->DPPPerPlane[k] 2897 * (v->final_flip_bw[k] 2898 + v->RequiredPrefetchPixDataBWLuma[k] * v->UrgBurstFactorLumaPre[k] 2899 + v->RequiredPrefetchPixDataBWChroma[k] * v->UrgBurstFactorChromaPre[k]) 2900 + v->cursor_bw_pre[k] * v->UrgBurstFactorCursorPre[k]); 2901 v->total_dcn_read_bw_with_flip_no_urgent_burst = v->total_dcn_read_bw_with_flip_no_urgent_burst 2902 + dml_max3( 2903 v->DPPPerPlane[k] * v->prefetch_vmrow_bw[k], 2904 v->DPPPerPlane[k] * v->final_flip_bw[k] + v->ReadBandwidthPlaneLuma[k] 2905 + v->ReadBandwidthPlaneChroma[k] + v->cursor_bw[k], 2906 v->DPPPerPlane[k] 2907 * (v->final_flip_bw[k] + v->RequiredPrefetchPixDataBWLuma[k] 2908 + v->RequiredPrefetchPixDataBWChroma[k]) + v->cursor_bw_pre[k]); 2909 } 2910 v->FractionOfUrgentBandwidthImmediateFlip = v->total_dcn_read_bw_with_flip_no_urgent_burst / v->ReturnBW; 2911 2912 v->ImmediateFlipSupported = true; 2913 if (v->total_dcn_read_bw_with_flip > v->ReturnBW) { 2914 #ifdef __DML_VBA_DEBUG__ 2915 dml_print("DML::%s: total_dcn_read_bw_with_flip %f (bw w/ flip too high!)\n", __func__, v->total_dcn_read_bw_with_flip); 2916 #endif 2917 v->ImmediateFlipSupported = false; 2918 v->total_dcn_read_bw_with_flip = MaxTotalRDBandwidth; 2919 } 2920 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2921 if (v->ImmediateFlipSupportedForPipe[k] == false) { 2922 #ifdef __DML_VBA_DEBUG__ 2923 dml_print("DML::%s: Pipe %0d not supporting iflip\n", __func__, k); 2924 #endif 2925 v->ImmediateFlipSupported = false; 2926 } 2927 } 2928 } else { 2929 v->ImmediateFlipSupported = false; 2930 } 2931 2932 v->PrefetchAndImmediateFlipSupported = 2933 (v->PrefetchModeSupported == true && ((!v->ImmediateFlipSupport && !v->HostVMEnable 2934 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) || 2935 v->ImmediateFlipSupported)) ? true : false; 2936 #ifdef __DML_VBA_DEBUG__ 2937 dml_print("DML::%s: PrefetchModeSupported %d\n", __func__, v->PrefetchModeSupported); 2938 dml_print("DML::%s: ImmediateFlipRequirement %d\n", __func__, v->ImmediateFlipRequirement == dm_immediate_flip_required); 2939 dml_print("DML::%s: ImmediateFlipSupported %d\n", __func__, v->ImmediateFlipSupported); 2940 dml_print("DML::%s: ImmediateFlipSupport %d\n", __func__, v->ImmediateFlipSupport); 2941 dml_print("DML::%s: HostVMEnable %d\n", __func__, v->HostVMEnable); 2942 dml_print("DML::%s: PrefetchAndImmediateFlipSupported %d\n", __func__, v->PrefetchAndImmediateFlipSupported); 2943 #endif 2944 dml_print("DML::%s: Done loop: Vstartup=%d, Max Vstartup is %d\n", __func__, v->VStartupLines, v->MaximumMaxVStartupLines); 2945 2946 v->VStartupLines = v->VStartupLines + 1; 2947 } while (!v->PrefetchAndImmediateFlipSupported && v->VStartupLines <= v->MaximumMaxVStartupLines); 2948 ASSERT(v->PrefetchAndImmediateFlipSupported); 2949 2950 // Unbounded Request Enabled 2951 CalculateUnboundedRequestAndCompressedBufferSize( 2952 v->DETBufferSizeInKByte[0], 2953 v->ConfigReturnBufferSizeInKByte, 2954 v->UseUnboundedRequesting, 2955 v->TotalActiveDPP, 2956 NoChromaPlanes, 2957 v->MaxNumDPP, 2958 v->CompressedBufferSegmentSizeInkByte, 2959 v->Output, 2960 &v->UnboundedRequestEnabled, 2961 &v->CompressedBufferSizeInkByte); 2962 2963 //Watermarks and NB P-State/DRAM Clock Change Support 2964 { 2965 enum clock_change_support DRAMClockChangeSupport; // dummy 2966 2967 CalculateWatermarksAndDRAMSpeedChangeSupport( 2968 mode_lib, 2969 PrefetchMode, 2970 v->DCFCLK, 2971 v->ReturnBW, 2972 v->UrgentLatency, 2973 v->UrgentExtraLatency, 2974 v->SOCCLK, 2975 v->DCFCLKDeepSleep, 2976 v->DETBufferSizeY, 2977 v->DETBufferSizeC, 2978 v->SwathHeightY, 2979 v->SwathHeightC, 2980 v->SwathWidthY, 2981 v->SwathWidthC, 2982 v->DPPPerPlane, 2983 v->BytePerPixelDETY, 2984 v->BytePerPixelDETC, 2985 v->UnboundedRequestEnabled, 2986 v->CompressedBufferSizeInkByte, 2987 &DRAMClockChangeSupport, 2988 &v->StutterExitWatermark, 2989 &v->StutterEnterPlusExitWatermark, 2990 &v->Z8StutterExitWatermark, 2991 &v->Z8StutterEnterPlusExitWatermark); 2992 2993 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 2994 if (v->WritebackEnable[k] == true) { 2995 v->WritebackAllowDRAMClockChangeEndPosition[k] = dml_max( 2996 0, 2997 v->VStartup[k] * v->HTotal[k] / v->PixelClock[k] - v->WritebackDRAMClockChangeWatermark); 2998 } else { 2999 v->WritebackAllowDRAMClockChangeEndPosition[k] = 0; 3000 } 3001 } 3002 } 3003 3004 //Display Pipeline Delivery Time in Prefetch, Groups 3005 CalculatePixelDeliveryTimes( 3006 v->NumberOfActivePlanes, 3007 v->VRatio, 3008 v->VRatioChroma, 3009 v->VRatioPrefetchY, 3010 v->VRatioPrefetchC, 3011 v->swath_width_luma_ub, 3012 v->swath_width_chroma_ub, 3013 v->DPPPerPlane, 3014 v->HRatio, 3015 v->HRatioChroma, 3016 v->PixelClock, 3017 v->PSCL_THROUGHPUT_LUMA, 3018 v->PSCL_THROUGHPUT_CHROMA, 3019 v->DPPCLK, 3020 v->BytePerPixelC, 3021 v->SourceScan, 3022 v->NumberOfCursors, 3023 v->CursorWidth, 3024 v->CursorBPP, 3025 v->BlockWidth256BytesY, 3026 v->BlockHeight256BytesY, 3027 v->BlockWidth256BytesC, 3028 v->BlockHeight256BytesC, 3029 v->DisplayPipeLineDeliveryTimeLuma, 3030 v->DisplayPipeLineDeliveryTimeChroma, 3031 v->DisplayPipeLineDeliveryTimeLumaPrefetch, 3032 v->DisplayPipeLineDeliveryTimeChromaPrefetch, 3033 v->DisplayPipeRequestDeliveryTimeLuma, 3034 v->DisplayPipeRequestDeliveryTimeChroma, 3035 v->DisplayPipeRequestDeliveryTimeLumaPrefetch, 3036 v->DisplayPipeRequestDeliveryTimeChromaPrefetch, 3037 v->CursorRequestDeliveryTime, 3038 v->CursorRequestDeliveryTimePrefetch); 3039 3040 CalculateMetaAndPTETimes( 3041 v->NumberOfActivePlanes, 3042 v->GPUVMEnable, 3043 v->MetaChunkSize, 3044 v->MinMetaChunkSizeBytes, 3045 v->HTotal, 3046 v->VRatio, 3047 v->VRatioChroma, 3048 v->DestinationLinesToRequestRowInVBlank, 3049 v->DestinationLinesToRequestRowInImmediateFlip, 3050 v->DCCEnable, 3051 v->PixelClock, 3052 v->BytePerPixelY, 3053 v->BytePerPixelC, 3054 v->SourceScan, 3055 v->dpte_row_height, 3056 v->dpte_row_height_chroma, 3057 v->meta_row_width, 3058 v->meta_row_width_chroma, 3059 v->meta_row_height, 3060 v->meta_row_height_chroma, 3061 v->meta_req_width, 3062 v->meta_req_width_chroma, 3063 v->meta_req_height, 3064 v->meta_req_height_chroma, 3065 v->dpte_group_bytes, 3066 v->PTERequestSizeY, 3067 v->PTERequestSizeC, 3068 v->PixelPTEReqWidthY, 3069 v->PixelPTEReqHeightY, 3070 v->PixelPTEReqWidthC, 3071 v->PixelPTEReqHeightC, 3072 v->dpte_row_width_luma_ub, 3073 v->dpte_row_width_chroma_ub, 3074 v->DST_Y_PER_PTE_ROW_NOM_L, 3075 v->DST_Y_PER_PTE_ROW_NOM_C, 3076 v->DST_Y_PER_META_ROW_NOM_L, 3077 v->DST_Y_PER_META_ROW_NOM_C, 3078 v->TimePerMetaChunkNominal, 3079 v->TimePerChromaMetaChunkNominal, 3080 v->TimePerMetaChunkVBlank, 3081 v->TimePerChromaMetaChunkVBlank, 3082 v->TimePerMetaChunkFlip, 3083 v->TimePerChromaMetaChunkFlip, 3084 v->time_per_pte_group_nom_luma, 3085 v->time_per_pte_group_vblank_luma, 3086 v->time_per_pte_group_flip_luma, 3087 v->time_per_pte_group_nom_chroma, 3088 v->time_per_pte_group_vblank_chroma, 3089 v->time_per_pte_group_flip_chroma); 3090 3091 CalculateVMGroupAndRequestTimes( 3092 v->NumberOfActivePlanes, 3093 v->GPUVMEnable, 3094 v->GPUVMMaxPageTableLevels, 3095 v->HTotal, 3096 v->BytePerPixelC, 3097 v->DestinationLinesToRequestVMInVBlank, 3098 v->DestinationLinesToRequestVMInImmediateFlip, 3099 v->DCCEnable, 3100 v->PixelClock, 3101 v->dpte_row_width_luma_ub, 3102 v->dpte_row_width_chroma_ub, 3103 v->vm_group_bytes, 3104 v->dpde0_bytes_per_frame_ub_l, 3105 v->dpde0_bytes_per_frame_ub_c, 3106 v->meta_pte_bytes_per_frame_ub_l, 3107 v->meta_pte_bytes_per_frame_ub_c, 3108 v->TimePerVMGroupVBlank, 3109 v->TimePerVMGroupFlip, 3110 v->TimePerVMRequestVBlank, 3111 v->TimePerVMRequestFlip); 3112 3113 // Min TTUVBlank 3114 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3115 if (PrefetchMode == 0) { 3116 v->AllowDRAMClockChangeDuringVBlank[k] = true; 3117 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3118 v->MinTTUVBlank[k] = dml_max( 3119 v->DRAMClockChangeWatermark, 3120 dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark)); 3121 } else if (PrefetchMode == 1) { 3122 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3123 v->AllowDRAMSelfRefreshDuringVBlank[k] = true; 3124 v->MinTTUVBlank[k] = dml_max(v->StutterEnterPlusExitWatermark, v->UrgentWatermark); 3125 } else { 3126 v->AllowDRAMClockChangeDuringVBlank[k] = false; 3127 v->AllowDRAMSelfRefreshDuringVBlank[k] = false; 3128 v->MinTTUVBlank[k] = v->UrgentWatermark; 3129 } 3130 if (!v->DynamicMetadataEnable[k]) 3131 v->MinTTUVBlank[k] = v->TCalc + v->MinTTUVBlank[k]; 3132 } 3133 3134 // DCC Configuration 3135 v->ActiveDPPs = 0; 3136 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3137 CalculateDCCConfiguration(v->DCCEnable[k], false, // We should always know the direction DCCProgrammingAssumesScanDirectionUnknown, 3138 v->SourcePixelFormat[k], 3139 v->SurfaceWidthY[k], 3140 v->SurfaceWidthC[k], 3141 v->SurfaceHeightY[k], 3142 v->SurfaceHeightC[k], 3143 v->DETBufferSizeInKByte[0] * 1024, 3144 v->BlockHeight256BytesY[k], 3145 v->BlockHeight256BytesC[k], 3146 v->SurfaceTiling[k], 3147 v->BytePerPixelY[k], 3148 v->BytePerPixelC[k], 3149 v->BytePerPixelDETY[k], 3150 v->BytePerPixelDETC[k], 3151 v->SourceScan[k], 3152 &v->DCCYMaxUncompressedBlock[k], 3153 &v->DCCCMaxUncompressedBlock[k], 3154 &v->DCCYMaxCompressedBlock[k], 3155 &v->DCCCMaxCompressedBlock[k], 3156 &v->DCCYIndependentBlock[k], 3157 &v->DCCCIndependentBlock[k]); 3158 } 3159 3160 // VStartup Adjustment 3161 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3162 bool isInterlaceTiming; 3163 double Tvstartup_margin = (v->MaxVStartupLines[k] - v->VStartup[k]) * v->HTotal[k] / v->PixelClock[k]; 3164 #ifdef __DML_VBA_DEBUG__ 3165 dml_print("DML::%s: k=%d, MinTTUVBlank = %f (before margin)\n", __func__, k, v->MinTTUVBlank[k]); 3166 #endif 3167 3168 v->MinTTUVBlank[k] = v->MinTTUVBlank[k] + Tvstartup_margin; 3169 3170 #ifdef __DML_VBA_DEBUG__ 3171 dml_print("DML::%s: k=%d, Tvstartup_margin = %f\n", __func__, k, Tvstartup_margin); 3172 dml_print("DML::%s: k=%d, MaxVStartupLines = %d\n", __func__, k, v->MaxVStartupLines[k]); 3173 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3174 dml_print("DML::%s: k=%d, MinTTUVBlank = %f\n", __func__, k, v->MinTTUVBlank[k]); 3175 #endif 3176 3177 v->Tdmdl[k] = v->Tdmdl[k] + Tvstartup_margin; 3178 if (v->DynamicMetadataEnable[k] && v->DynamicMetadataVMEnabled) { 3179 v->Tdmdl_vm[k] = v->Tdmdl_vm[k] + Tvstartup_margin; 3180 } 3181 3182 isInterlaceTiming = (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP); 3183 v->VStartup[k] = (isInterlaceTiming ? (2 * v->MaxVStartupLines[k]) : v->MaxVStartupLines[k]); 3184 if (v->Interlace[k] && !v->ProgressiveToInterlaceUnitInOPP) { 3185 v->MIN_DST_Y_NEXT_START[k] = dml_floor((v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]) / 2.0, 1.0); 3186 } else { 3187 v->MIN_DST_Y_NEXT_START[k] = v->VTotal[k] - v->VFrontPorch[k] + v->VTotal[k] - v->VActive[k] - v->VStartup[k]; 3188 } 3189 v->MIN_DST_Y_NEXT_START[k] += dml_floor(4.0 * v->TSetup[k] / ((double)v->HTotal[k] / v->PixelClock[k]), 1.0) / 4.0; 3190 if (((v->VUpdateOffsetPix[k] + v->VUpdateWidthPix[k] + v->VReadyOffsetPix[k]) / v->HTotal[k]) 3191 <= (isInterlaceTiming ? 3192 dml_floor((v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]) / 2.0, 1.0) : 3193 (int) (v->VTotal[k] - v->VActive[k] - v->VFrontPorch[k] - v->VStartup[k]))) { 3194 v->VREADY_AT_OR_AFTER_VSYNC[k] = true; 3195 } else { 3196 v->VREADY_AT_OR_AFTER_VSYNC[k] = false; 3197 } 3198 #ifdef __DML_VBA_DEBUG__ 3199 dml_print("DML::%s: k=%d, VStartup = %d (max)\n", __func__, k, v->VStartup[k]); 3200 dml_print("DML::%s: k=%d, VUpdateOffsetPix = %d\n", __func__, k, v->VUpdateOffsetPix[k]); 3201 dml_print("DML::%s: k=%d, VUpdateWidthPix = %d\n", __func__, k, v->VUpdateWidthPix[k]); 3202 dml_print("DML::%s: k=%d, VReadyOffsetPix = %d\n", __func__, k, v->VReadyOffsetPix[k]); 3203 dml_print("DML::%s: k=%d, HTotal = %d\n", __func__, k, v->HTotal[k]); 3204 dml_print("DML::%s: k=%d, VTotal = %d\n", __func__, k, v->VTotal[k]); 3205 dml_print("DML::%s: k=%d, VActive = %d\n", __func__, k, v->VActive[k]); 3206 dml_print("DML::%s: k=%d, VFrontPorch = %d\n", __func__, k, v->VFrontPorch[k]); 3207 dml_print("DML::%s: k=%d, VStartup = %d\n", __func__, k, v->VStartup[k]); 3208 dml_print("DML::%s: k=%d, MIN_DST_Y_NEXT_START = %f\n", __func__, k, v->MIN_DST_Y_NEXT_START[k]); 3209 dml_print("DML::%s: k=%d, VREADY_AT_OR_AFTER_VSYNC = %d\n", __func__, k, v->VREADY_AT_OR_AFTER_VSYNC[k]); 3210 #endif 3211 } 3212 3213 { 3214 //Maximum Bandwidth Used 3215 double TotalWRBandwidth = 0; 3216 double MaxPerPlaneVActiveWRBandwidth = 0; 3217 double WRBandwidth = 0; 3218 3219 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3220 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_32) { 3221 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3222 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 4; 3223 } else if (v->WritebackEnable[k] == true) { 3224 WRBandwidth = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3225 / (v->HTotal[k] * v->WritebackSourceHeight[k] / v->PixelClock[k]) * 8; 3226 } 3227 TotalWRBandwidth = TotalWRBandwidth + WRBandwidth; 3228 MaxPerPlaneVActiveWRBandwidth = dml_max(MaxPerPlaneVActiveWRBandwidth, WRBandwidth); 3229 } 3230 3231 v->TotalDataReadBandwidth = 0; 3232 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3233 v->TotalDataReadBandwidth = v->TotalDataReadBandwidth + v->ReadBandwidthPlaneLuma[k] + v->ReadBandwidthPlaneChroma[k]; 3234 } 3235 } 3236 // Stutter Efficiency 3237 CalculateStutterEfficiency( 3238 mode_lib, 3239 v->CompressedBufferSizeInkByte, 3240 v->UnboundedRequestEnabled, 3241 v->ConfigReturnBufferSizeInKByte, 3242 v->MetaFIFOSizeInKEntries, 3243 v->ZeroSizeBufferEntries, 3244 v->NumberOfActivePlanes, 3245 v->ROBBufferSizeInKByte, 3246 v->TotalDataReadBandwidth, 3247 v->DCFCLK, 3248 v->ReturnBW, 3249 v->COMPBUF_RESERVED_SPACE_64B, 3250 v->COMPBUF_RESERVED_SPACE_ZS, 3251 v->SRExitTime, 3252 v->SRExitZ8Time, 3253 v->SynchronizedVBlank, 3254 v->StutterEnterPlusExitWatermark, 3255 v->Z8StutterEnterPlusExitWatermark, 3256 v->ProgressiveToInterlaceUnitInOPP, 3257 v->Interlace, 3258 v->MinTTUVBlank, 3259 v->DPPPerPlane, 3260 v->DETBufferSizeY, 3261 v->BytePerPixelY, 3262 v->BytePerPixelDETY, 3263 v->SwathWidthY, 3264 v->SwathHeightY, 3265 v->SwathHeightC, 3266 v->DCCRateLuma, 3267 v->DCCRateChroma, 3268 v->DCCFractionOfZeroSizeRequestsLuma, 3269 v->DCCFractionOfZeroSizeRequestsChroma, 3270 v->HTotal, 3271 v->VTotal, 3272 v->PixelClock, 3273 v->VRatio, 3274 v->SourceScan, 3275 v->BlockHeight256BytesY, 3276 v->BlockWidth256BytesY, 3277 v->BlockHeight256BytesC, 3278 v->BlockWidth256BytesC, 3279 v->DCCYMaxUncompressedBlock, 3280 v->DCCCMaxUncompressedBlock, 3281 v->VActive, 3282 v->DCCEnable, 3283 v->WritebackEnable, 3284 v->ReadBandwidthPlaneLuma, 3285 v->ReadBandwidthPlaneChroma, 3286 v->meta_row_bw, 3287 v->dpte_row_bw, 3288 &v->StutterEfficiencyNotIncludingVBlank, 3289 &v->StutterEfficiency, 3290 &v->NumberOfStutterBurstsPerFrame, 3291 &v->Z8StutterEfficiencyNotIncludingVBlank, 3292 &v->Z8StutterEfficiency, 3293 &v->Z8NumberOfStutterBurstsPerFrame, 3294 &v->StutterPeriod); 3295 } 3296 3297 static void DisplayPipeConfiguration(struct display_mode_lib *mode_lib) 3298 { 3299 struct vba_vars_st *v = &mode_lib->vba; 3300 // Display Pipe Configuration 3301 double BytePerPixDETY[DC__NUM_DPP__MAX]; 3302 double BytePerPixDETC[DC__NUM_DPP__MAX]; 3303 int BytePerPixY[DC__NUM_DPP__MAX]; 3304 int BytePerPixC[DC__NUM_DPP__MAX]; 3305 int Read256BytesBlockHeightY[DC__NUM_DPP__MAX]; 3306 int Read256BytesBlockHeightC[DC__NUM_DPP__MAX]; 3307 int Read256BytesBlockWidthY[DC__NUM_DPP__MAX]; 3308 int Read256BytesBlockWidthC[DC__NUM_DPP__MAX]; 3309 double dummy1[DC__NUM_DPP__MAX]; 3310 double dummy2[DC__NUM_DPP__MAX]; 3311 double dummy3[DC__NUM_DPP__MAX]; 3312 double dummy4[DC__NUM_DPP__MAX]; 3313 int dummy5[DC__NUM_DPP__MAX]; 3314 int dummy6[DC__NUM_DPP__MAX]; 3315 bool dummy7[DC__NUM_DPP__MAX]; 3316 bool dummysinglestring; 3317 3318 unsigned int k; 3319 3320 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 3321 3322 CalculateBytePerPixelAnd256BBlockSizes( 3323 v->SourcePixelFormat[k], 3324 v->SurfaceTiling[k], 3325 &BytePerPixY[k], 3326 &BytePerPixC[k], 3327 &BytePerPixDETY[k], 3328 &BytePerPixDETC[k], 3329 &Read256BytesBlockHeightY[k], 3330 &Read256BytesBlockHeightC[k], 3331 &Read256BytesBlockWidthY[k], 3332 &Read256BytesBlockWidthC[k]); 3333 } 3334 3335 CalculateSwathAndDETConfiguration( 3336 false, 3337 v->NumberOfActivePlanes, 3338 v->DETBufferSizeInKByte[0], 3339 dummy1, 3340 dummy2, 3341 v->SourceScan, 3342 v->SourcePixelFormat, 3343 v->SurfaceTiling, 3344 v->ViewportWidth, 3345 v->ViewportHeight, 3346 v->SurfaceWidthY, 3347 v->SurfaceWidthC, 3348 v->SurfaceHeightY, 3349 v->SurfaceHeightC, 3350 Read256BytesBlockHeightY, 3351 Read256BytesBlockHeightC, 3352 Read256BytesBlockWidthY, 3353 Read256BytesBlockWidthC, 3354 v->ODMCombineEnabled, 3355 v->BlendingAndTiming, 3356 BytePerPixY, 3357 BytePerPixC, 3358 BytePerPixDETY, 3359 BytePerPixDETC, 3360 v->HActive, 3361 v->HRatio, 3362 v->HRatioChroma, 3363 v->DPPPerPlane, 3364 dummy5, 3365 dummy6, 3366 dummy3, 3367 dummy4, 3368 v->SwathHeightY, 3369 v->SwathHeightC, 3370 v->DETBufferSizeY, 3371 v->DETBufferSizeC, 3372 dummy7, 3373 &dummysinglestring); 3374 } 3375 3376 static bool CalculateBytePerPixelAnd256BBlockSizes( 3377 enum source_format_class SourcePixelFormat, 3378 enum dm_swizzle_mode SurfaceTiling, 3379 unsigned int *BytePerPixelY, 3380 unsigned int *BytePerPixelC, 3381 double *BytePerPixelDETY, 3382 double *BytePerPixelDETC, 3383 unsigned int *BlockHeight256BytesY, 3384 unsigned int *BlockHeight256BytesC, 3385 unsigned int *BlockWidth256BytesY, 3386 unsigned int *BlockWidth256BytesC) 3387 { 3388 if (SourcePixelFormat == dm_444_64) { 3389 *BytePerPixelDETY = 8; 3390 *BytePerPixelDETC = 0; 3391 *BytePerPixelY = 8; 3392 *BytePerPixelC = 0; 3393 } else if (SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_rgbe) { 3394 *BytePerPixelDETY = 4; 3395 *BytePerPixelDETC = 0; 3396 *BytePerPixelY = 4; 3397 *BytePerPixelC = 0; 3398 } else if (SourcePixelFormat == dm_444_16) { 3399 *BytePerPixelDETY = 2; 3400 *BytePerPixelDETC = 0; 3401 *BytePerPixelY = 2; 3402 *BytePerPixelC = 0; 3403 } else if (SourcePixelFormat == dm_444_8) { 3404 *BytePerPixelDETY = 1; 3405 *BytePerPixelDETC = 0; 3406 *BytePerPixelY = 1; 3407 *BytePerPixelC = 0; 3408 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3409 *BytePerPixelDETY = 4; 3410 *BytePerPixelDETC = 1; 3411 *BytePerPixelY = 4; 3412 *BytePerPixelC = 1; 3413 } else if (SourcePixelFormat == dm_420_8) { 3414 *BytePerPixelDETY = 1; 3415 *BytePerPixelDETC = 2; 3416 *BytePerPixelY = 1; 3417 *BytePerPixelC = 2; 3418 } else if (SourcePixelFormat == dm_420_12) { 3419 *BytePerPixelDETY = 2; 3420 *BytePerPixelDETC = 4; 3421 *BytePerPixelY = 2; 3422 *BytePerPixelC = 4; 3423 } else { 3424 *BytePerPixelDETY = 4.0 / 3; 3425 *BytePerPixelDETC = 8.0 / 3; 3426 *BytePerPixelY = 2; 3427 *BytePerPixelC = 4; 3428 } 3429 3430 if ((SourcePixelFormat == dm_444_64 || SourcePixelFormat == dm_444_32 || SourcePixelFormat == dm_444_16 || SourcePixelFormat == dm_444_8 || SourcePixelFormat == dm_mono_16 3431 || SourcePixelFormat == dm_mono_8 || SourcePixelFormat == dm_rgbe)) { 3432 if (SurfaceTiling == dm_sw_linear) { 3433 *BlockHeight256BytesY = 1; 3434 } else if (SourcePixelFormat == dm_444_64) { 3435 *BlockHeight256BytesY = 4; 3436 } else if (SourcePixelFormat == dm_444_8) { 3437 *BlockHeight256BytesY = 16; 3438 } else { 3439 *BlockHeight256BytesY = 8; 3440 } 3441 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3442 *BlockHeight256BytesC = 0; 3443 *BlockWidth256BytesC = 0; 3444 } else { 3445 if (SurfaceTiling == dm_sw_linear) { 3446 *BlockHeight256BytesY = 1; 3447 *BlockHeight256BytesC = 1; 3448 } else if (SourcePixelFormat == dm_rgbe_alpha) { 3449 *BlockHeight256BytesY = 8; 3450 *BlockHeight256BytesC = 16; 3451 } else if (SourcePixelFormat == dm_420_8) { 3452 *BlockHeight256BytesY = 16; 3453 *BlockHeight256BytesC = 8; 3454 } else { 3455 *BlockHeight256BytesY = 8; 3456 *BlockHeight256BytesC = 8; 3457 } 3458 *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY; 3459 *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC; 3460 } 3461 return true; 3462 } 3463 3464 static double CalculateTWait(unsigned int PrefetchMode, double DRAMClockChangeLatency, double UrgentLatency, double SREnterPlusExitTime) 3465 { 3466 if (PrefetchMode == 0) { 3467 return dml_max(DRAMClockChangeLatency + UrgentLatency, dml_max(SREnterPlusExitTime, UrgentLatency)); 3468 } else if (PrefetchMode == 1) { 3469 return dml_max(SREnterPlusExitTime, UrgentLatency); 3470 } else { 3471 return UrgentLatency; 3472 } 3473 } 3474 3475 double dml314_CalculateWriteBackDISPCLK( 3476 enum source_format_class WritebackPixelFormat, 3477 double PixelClock, 3478 double WritebackHRatio, 3479 double WritebackVRatio, 3480 unsigned int WritebackHTaps, 3481 unsigned int WritebackVTaps, 3482 long WritebackSourceWidth, 3483 long WritebackDestinationWidth, 3484 unsigned int HTotal, 3485 unsigned int WritebackLineBufferSize) 3486 { 3487 double DISPCLK_H, DISPCLK_V, DISPCLK_HB; 3488 3489 DISPCLK_H = PixelClock * dml_ceil(WritebackHTaps / 8.0, 1) / WritebackHRatio; 3490 DISPCLK_V = PixelClock * (WritebackVTaps * dml_ceil(WritebackDestinationWidth / 6.0, 1) + 8.0) / HTotal; 3491 DISPCLK_HB = PixelClock * WritebackVTaps * (WritebackDestinationWidth * WritebackVTaps - WritebackLineBufferSize / 57.0) / 6.0 / WritebackSourceWidth; 3492 return dml_max3(DISPCLK_H, DISPCLK_V, DISPCLK_HB); 3493 } 3494 3495 static double CalculateWriteBackDelay( 3496 enum source_format_class WritebackPixelFormat, 3497 double WritebackHRatio, 3498 double WritebackVRatio, 3499 unsigned int WritebackVTaps, 3500 int WritebackDestinationWidth, 3501 int WritebackDestinationHeight, 3502 int WritebackSourceHeight, 3503 unsigned int HTotal) 3504 { 3505 double CalculateWriteBackDelay; 3506 double Line_length; 3507 double Output_lines_last_notclamped; 3508 double WritebackVInit; 3509 3510 WritebackVInit = (WritebackVRatio + WritebackVTaps + 1) / 2; 3511 Line_length = dml_max((double) WritebackDestinationWidth, dml_ceil(WritebackDestinationWidth / 6.0, 1) * WritebackVTaps); 3512 Output_lines_last_notclamped = WritebackDestinationHeight - 1 - dml_ceil((WritebackSourceHeight - WritebackVInit) / WritebackVRatio, 1); 3513 if (Output_lines_last_notclamped < 0) { 3514 CalculateWriteBackDelay = 0; 3515 } else { 3516 CalculateWriteBackDelay = Output_lines_last_notclamped * Line_length + (HTotal - WritebackDestinationWidth) + 80; 3517 } 3518 return CalculateWriteBackDelay; 3519 } 3520 3521 static void CalculateVupdateAndDynamicMetadataParameters( 3522 int MaxInterDCNTileRepeaters, 3523 double DPPCLK, 3524 double DISPCLK, 3525 double DCFClkDeepSleep, 3526 double PixelClock, 3527 int HTotal, 3528 int VBlank, 3529 int DynamicMetadataTransmittedBytes, 3530 int DynamicMetadataLinesBeforeActiveRequired, 3531 int InterlaceEnable, 3532 bool ProgressiveToInterlaceUnitInOPP, 3533 double *TSetup, 3534 double *Tdmbf, 3535 double *Tdmec, 3536 double *Tdmsks, 3537 int *VUpdateOffsetPix, 3538 double *VUpdateWidthPix, 3539 double *VReadyOffsetPix) 3540 { 3541 double TotalRepeaterDelayTime; 3542 3543 TotalRepeaterDelayTime = MaxInterDCNTileRepeaters * (2 / DPPCLK + 3 / DISPCLK); 3544 *VUpdateWidthPix = dml_ceil((14.0 / DCFClkDeepSleep + 12.0 / DPPCLK + TotalRepeaterDelayTime) * PixelClock, 1.0); 3545 *VReadyOffsetPix = dml_ceil(dml_max(150.0 / DPPCLK, TotalRepeaterDelayTime + 20.0 / DCFClkDeepSleep + 10.0 / DPPCLK) * PixelClock, 1.0); 3546 *VUpdateOffsetPix = dml_ceil(HTotal / 4.0, 1); 3547 *TSetup = (*VUpdateOffsetPix + *VUpdateWidthPix + *VReadyOffsetPix) / PixelClock; 3548 *Tdmbf = DynamicMetadataTransmittedBytes / 4.0 / DISPCLK; 3549 *Tdmec = HTotal / PixelClock; 3550 if (DynamicMetadataLinesBeforeActiveRequired == 0) { 3551 *Tdmsks = VBlank * HTotal / PixelClock / 2.0; 3552 } else { 3553 *Tdmsks = DynamicMetadataLinesBeforeActiveRequired * HTotal / PixelClock; 3554 } 3555 if (InterlaceEnable == 1 && ProgressiveToInterlaceUnitInOPP == false) { 3556 *Tdmsks = *Tdmsks / 2; 3557 } 3558 #ifdef __DML_VBA_DEBUG__ 3559 dml_print("DML::%s: VUpdateWidthPix = %d\n", __func__, *VUpdateWidthPix); 3560 dml_print("DML::%s: VReadyOffsetPix = %d\n", __func__, *VReadyOffsetPix); 3561 dml_print("DML::%s: VUpdateOffsetPix = %d\n", __func__, *VUpdateOffsetPix); 3562 #endif 3563 } 3564 3565 static void CalculateRowBandwidth( 3566 bool GPUVMEnable, 3567 enum source_format_class SourcePixelFormat, 3568 double VRatio, 3569 double VRatioChroma, 3570 bool DCCEnable, 3571 double LineTime, 3572 unsigned int MetaRowByteLuma, 3573 unsigned int MetaRowByteChroma, 3574 unsigned int meta_row_height_luma, 3575 unsigned int meta_row_height_chroma, 3576 unsigned int PixelPTEBytesPerRowLuma, 3577 unsigned int PixelPTEBytesPerRowChroma, 3578 unsigned int dpte_row_height_luma, 3579 unsigned int dpte_row_height_chroma, 3580 double *meta_row_bw, 3581 double *dpte_row_bw) 3582 { 3583 if (DCCEnable != true) { 3584 *meta_row_bw = 0; 3585 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3586 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime) + VRatioChroma * MetaRowByteChroma / (meta_row_height_chroma * LineTime); 3587 } else { 3588 *meta_row_bw = VRatio * MetaRowByteLuma / (meta_row_height_luma * LineTime); 3589 } 3590 3591 if (GPUVMEnable != true) { 3592 *dpte_row_bw = 0; 3593 } else if (SourcePixelFormat == dm_420_8 || SourcePixelFormat == dm_420_10 || SourcePixelFormat == dm_420_12 || SourcePixelFormat == dm_rgbe_alpha) { 3594 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime) 3595 + VRatioChroma * PixelPTEBytesPerRowChroma / (dpte_row_height_chroma * LineTime); 3596 } else { 3597 *dpte_row_bw = VRatio * PixelPTEBytesPerRowLuma / (dpte_row_height_luma * LineTime); 3598 } 3599 } 3600 3601 static void CalculateFlipSchedule( 3602 struct display_mode_lib *mode_lib, 3603 unsigned int k, 3604 double HostVMInefficiencyFactor, 3605 double UrgentExtraLatency, 3606 double UrgentLatency, 3607 double PDEAndMetaPTEBytesPerFrame, 3608 double MetaRowBytes, 3609 double DPTEBytesPerRow) 3610 { 3611 struct vba_vars_st *v = &mode_lib->vba; 3612 double min_row_time = 0.0; 3613 unsigned int HostVMDynamicLevelsTrips; 3614 double TimeForFetchingMetaPTEImmediateFlip; 3615 double TimeForFetchingRowInVBlankImmediateFlip; 3616 double ImmediateFlipBW; 3617 double LineTime = v->HTotal[k] / v->PixelClock[k]; 3618 3619 if (v->GPUVMEnable == true && v->HostVMEnable == true) { 3620 HostVMDynamicLevelsTrips = v->HostVMMaxNonCachedPageTableLevels; 3621 } else { 3622 HostVMDynamicLevelsTrips = 0; 3623 } 3624 3625 if (v->GPUVMEnable == true || v->DCCEnable[k] == true) { 3626 ImmediateFlipBW = (PDEAndMetaPTEBytesPerFrame + MetaRowBytes + DPTEBytesPerRow) * v->BandwidthAvailableForImmediateFlip / v->TotImmediateFlipBytes; 3627 } 3628 3629 if (v->GPUVMEnable == true) { 3630 TimeForFetchingMetaPTEImmediateFlip = dml_max3( 3631 v->Tno_bw[k] + PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / ImmediateFlipBW, 3632 UrgentExtraLatency + UrgentLatency * (v->GPUVMMaxPageTableLevels * (HostVMDynamicLevelsTrips + 1) - 1), 3633 LineTime / 4.0); 3634 } else { 3635 TimeForFetchingMetaPTEImmediateFlip = 0; 3636 } 3637 3638 v->DestinationLinesToRequestVMInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingMetaPTEImmediateFlip / LineTime), 1) / 4.0; 3639 if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3640 TimeForFetchingRowInVBlankImmediateFlip = dml_max3( 3641 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / ImmediateFlipBW, 3642 UrgentLatency * (HostVMDynamicLevelsTrips + 1), 3643 LineTime / 4); 3644 } else { 3645 TimeForFetchingRowInVBlankImmediateFlip = 0; 3646 } 3647 3648 v->DestinationLinesToRequestRowInImmediateFlip[k] = dml_ceil(4.0 * (TimeForFetchingRowInVBlankImmediateFlip / LineTime), 1) / 4.0; 3649 3650 if (v->GPUVMEnable == true) { 3651 v->final_flip_bw[k] = dml_max( 3652 PDEAndMetaPTEBytesPerFrame * HostVMInefficiencyFactor / (v->DestinationLinesToRequestVMInImmediateFlip[k] * LineTime), 3653 (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime)); 3654 } else if ((v->GPUVMEnable == true || v->DCCEnable[k] == true)) { 3655 v->final_flip_bw[k] = (MetaRowBytes + DPTEBytesPerRow * HostVMInefficiencyFactor) / (v->DestinationLinesToRequestRowInImmediateFlip[k] * LineTime); 3656 } else { 3657 v->final_flip_bw[k] = 0; 3658 } 3659 3660 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 3661 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3662 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3663 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3664 min_row_time = dml_min(v->meta_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3665 } else { 3666 min_row_time = dml_min4( 3667 v->dpte_row_height[k] * LineTime / v->VRatio[k], 3668 v->meta_row_height[k] * LineTime / v->VRatio[k], 3669 v->dpte_row_height_chroma[k] * LineTime / v->VRatioChroma[k], 3670 v->meta_row_height_chroma[k] * LineTime / v->VRatioChroma[k]); 3671 } 3672 } else { 3673 if (v->GPUVMEnable == true && v->DCCEnable[k] != true) { 3674 min_row_time = v->dpte_row_height[k] * LineTime / v->VRatio[k]; 3675 } else if (v->GPUVMEnable != true && v->DCCEnable[k] == true) { 3676 min_row_time = v->meta_row_height[k] * LineTime / v->VRatio[k]; 3677 } else { 3678 min_row_time = dml_min(v->dpte_row_height[k] * LineTime / v->VRatio[k], v->meta_row_height[k] * LineTime / v->VRatio[k]); 3679 } 3680 } 3681 3682 if (v->DestinationLinesToRequestVMInImmediateFlip[k] >= 32 || v->DestinationLinesToRequestRowInImmediateFlip[k] >= 16 3683 || TimeForFetchingMetaPTEImmediateFlip + 2 * TimeForFetchingRowInVBlankImmediateFlip > min_row_time) { 3684 v->ImmediateFlipSupportedForPipe[k] = false; 3685 } else { 3686 v->ImmediateFlipSupportedForPipe[k] = true; 3687 } 3688 3689 #ifdef __DML_VBA_DEBUG__ 3690 dml_print("DML::%s: DestinationLinesToRequestVMInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestVMInImmediateFlip[k]); 3691 dml_print("DML::%s: DestinationLinesToRequestRowInImmediateFlip = %f\n", __func__, v->DestinationLinesToRequestRowInImmediateFlip[k]); 3692 dml_print("DML::%s: TimeForFetchingMetaPTEImmediateFlip = %f\n", __func__, TimeForFetchingMetaPTEImmediateFlip); 3693 dml_print("DML::%s: TimeForFetchingRowInVBlankImmediateFlip = %f\n", __func__, TimeForFetchingRowInVBlankImmediateFlip); 3694 dml_print("DML::%s: min_row_time = %f\n", __func__, min_row_time); 3695 dml_print("DML::%s: ImmediateFlipSupportedForPipe = %d\n", __func__, v->ImmediateFlipSupportedForPipe[k]); 3696 #endif 3697 3698 } 3699 3700 static double TruncToValidBPP( 3701 double LinkBitRate, 3702 int Lanes, 3703 int HTotal, 3704 int HActive, 3705 double PixelClock, 3706 double DesiredBPP, 3707 bool DSCEnable, 3708 enum output_encoder_class Output, 3709 enum output_format_class Format, 3710 unsigned int DSCInputBitPerComponent, 3711 int DSCSlices, 3712 int AudioRate, 3713 int AudioLayout, 3714 enum odm_combine_mode ODMCombine) 3715 { 3716 double MaxLinkBPP; 3717 int MinDSCBPP; 3718 double MaxDSCBPP; 3719 int NonDSCBPP0; 3720 int NonDSCBPP1; 3721 int NonDSCBPP2; 3722 3723 if (Format == dm_420) { 3724 NonDSCBPP0 = 12; 3725 NonDSCBPP1 = 15; 3726 NonDSCBPP2 = 18; 3727 MinDSCBPP = 6; 3728 MaxDSCBPP = 1.5 * DSCInputBitPerComponent - 1 / 16; 3729 } else if (Format == dm_444) { 3730 NonDSCBPP0 = 24; 3731 NonDSCBPP1 = 30; 3732 NonDSCBPP2 = 36; 3733 MinDSCBPP = 8; 3734 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16; 3735 } else { 3736 3737 NonDSCBPP0 = 16; 3738 NonDSCBPP1 = 20; 3739 NonDSCBPP2 = 24; 3740 3741 if (Format == dm_n422) { 3742 MinDSCBPP = 7; 3743 MaxDSCBPP = 2 * DSCInputBitPerComponent - 1.0 / 16.0; 3744 } else { 3745 MinDSCBPP = 8; 3746 MaxDSCBPP = 3 * DSCInputBitPerComponent - 1.0 / 16.0; 3747 } 3748 } 3749 3750 if (DSCEnable && Output == dm_dp) { 3751 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock * (1 - 2.4 / 100); 3752 } else { 3753 MaxLinkBPP = LinkBitRate / 10 * 8 * Lanes / PixelClock; 3754 } 3755 3756 if (ODMCombine == dm_odm_combine_mode_4to1 && MaxLinkBPP > 16) { 3757 MaxLinkBPP = 16; 3758 } else if (ODMCombine == dm_odm_combine_mode_2to1 && MaxLinkBPP > 32) { 3759 MaxLinkBPP = 32; 3760 } 3761 3762 if (DesiredBPP == 0) { 3763 if (DSCEnable) { 3764 if (MaxLinkBPP < MinDSCBPP) { 3765 return BPP_INVALID; 3766 } else if (MaxLinkBPP >= MaxDSCBPP) { 3767 return MaxDSCBPP; 3768 } else { 3769 return dml_floor(16.0 * MaxLinkBPP, 1.0) / 16.0; 3770 } 3771 } else { 3772 if (MaxLinkBPP >= NonDSCBPP2) { 3773 return NonDSCBPP2; 3774 } else if (MaxLinkBPP >= NonDSCBPP1) { 3775 return NonDSCBPP1; 3776 } else if (MaxLinkBPP >= NonDSCBPP0) { 3777 return 16.0; 3778 } else { 3779 return BPP_INVALID; 3780 } 3781 } 3782 } else { 3783 if (!((DSCEnable == false && (DesiredBPP == NonDSCBPP2 || DesiredBPP == NonDSCBPP1 || DesiredBPP <= NonDSCBPP0)) 3784 || (DSCEnable && DesiredBPP >= MinDSCBPP && DesiredBPP <= MaxDSCBPP))) { 3785 return BPP_INVALID; 3786 } else { 3787 return DesiredBPP; 3788 } 3789 } 3790 return BPP_INVALID; 3791 } 3792 3793 static noinline void CalculatePrefetchSchedulePerPlane( 3794 struct display_mode_lib *mode_lib, 3795 double HostVMInefficiencyFactor, 3796 int i, 3797 unsigned int j, 3798 unsigned int k) 3799 { 3800 struct vba_vars_st *v = &mode_lib->vba; 3801 Pipe myPipe; 3802 3803 myPipe.DPPCLK = v->RequiredDPPCLK[i][j][k]; 3804 myPipe.DISPCLK = v->RequiredDISPCLK[i][j]; 3805 myPipe.PixelClock = v->PixelClock[k]; 3806 myPipe.DCFCLKDeepSleep = v->ProjectedDCFCLKDeepSleep[i][j]; 3807 myPipe.DPPPerPlane = v->NoOfDPP[i][j][k]; 3808 myPipe.ScalerEnabled = v->ScalerEnabled[k]; 3809 myPipe.VRatio = mode_lib->vba.VRatio[k]; 3810 myPipe.VRatioChroma = mode_lib->vba.VRatioChroma[k]; 3811 3812 myPipe.SourceScan = v->SourceScan[k]; 3813 myPipe.BlockWidth256BytesY = v->Read256BlockWidthY[k]; 3814 myPipe.BlockHeight256BytesY = v->Read256BlockHeightY[k]; 3815 myPipe.BlockWidth256BytesC = v->Read256BlockWidthC[k]; 3816 myPipe.BlockHeight256BytesC = v->Read256BlockHeightC[k]; 3817 myPipe.InterlaceEnable = v->Interlace[k]; 3818 myPipe.NumberOfCursors = v->NumberOfCursors[k]; 3819 myPipe.VBlank = v->VTotal[k] - v->VActive[k]; 3820 myPipe.HTotal = v->HTotal[k]; 3821 myPipe.DCCEnable = v->DCCEnable[k]; 3822 myPipe.ODMCombineIsEnabled = v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 3823 || v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1; 3824 myPipe.SourcePixelFormat = v->SourcePixelFormat[k]; 3825 myPipe.BytePerPixelY = v->BytePerPixelY[k]; 3826 myPipe.BytePerPixelC = v->BytePerPixelC[k]; 3827 myPipe.ProgressiveToInterlaceUnitInOPP = v->ProgressiveToInterlaceUnitInOPP; 3828 v->NoTimeForPrefetch[i][j][k] = CalculatePrefetchSchedule( 3829 mode_lib, 3830 HostVMInefficiencyFactor, 3831 &myPipe, 3832 v->DSCDelayPerState[i][k], 3833 v->DPPCLKDelaySubtotal + v->DPPCLKDelayCNVCFormater, 3834 v->DPPCLKDelaySCL, 3835 v->DPPCLKDelaySCLLBOnly, 3836 v->DPPCLKDelayCNVCCursor, 3837 v->DISPCLKDelaySubtotal, 3838 v->SwathWidthYThisState[k] / v->HRatio[k], 3839 v->OutputFormat[k], 3840 v->MaxInterDCNTileRepeaters, 3841 dml_min(v->MaxVStartup, v->MaximumVStartup[i][j][k]), 3842 v->MaximumVStartup[i][j][k], 3843 v->GPUVMMaxPageTableLevels, 3844 v->GPUVMEnable, 3845 v->HostVMEnable, 3846 v->HostVMMaxNonCachedPageTableLevels, 3847 v->HostVMMinPageSize, 3848 v->DynamicMetadataEnable[k], 3849 v->DynamicMetadataVMEnabled, 3850 v->DynamicMetadataLinesBeforeActiveRequired[k], 3851 v->DynamicMetadataTransmittedBytes[k], 3852 v->UrgLatency[i], 3853 v->ExtraLatency, 3854 v->TimeCalc, 3855 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 3856 v->MetaRowBytes[i][j][k], 3857 v->DPTEBytesPerRow[i][j][k], 3858 v->PrefetchLinesY[i][j][k], 3859 v->SwathWidthYThisState[k], 3860 v->PrefillY[k], 3861 v->MaxNumSwY[k], 3862 v->PrefetchLinesC[i][j][k], 3863 v->SwathWidthCThisState[k], 3864 v->PrefillC[k], 3865 v->MaxNumSwC[k], 3866 v->swath_width_luma_ub_this_state[k], 3867 v->swath_width_chroma_ub_this_state[k], 3868 v->SwathHeightYThisState[k], 3869 v->SwathHeightCThisState[k], 3870 v->TWait, 3871 &v->DSTXAfterScaler[k], 3872 &v->DSTYAfterScaler[k], 3873 &v->LineTimesForPrefetch[k], 3874 &v->PrefetchBW[k], 3875 &v->LinesForMetaPTE[k], 3876 &v->LinesForMetaAndDPTERow[k], 3877 &v->VRatioPreY[i][j][k], 3878 &v->VRatioPreC[i][j][k], 3879 &v->RequiredPrefetchPixelDataBWLuma[i][j][k], 3880 &v->RequiredPrefetchPixelDataBWChroma[i][j][k], 3881 &v->NoTimeForDynamicMetadata[i][j][k], 3882 &v->Tno_bw[k], 3883 &v->prefetch_vmrow_bw[k], 3884 &v->dummy7[k], 3885 &v->dummy8[k], 3886 &v->dummy13[k], 3887 &v->VUpdateOffsetPix[k], 3888 &v->VUpdateWidthPix[k], 3889 &v->VReadyOffsetPix[k]); 3890 } 3891 3892 void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib) 3893 { 3894 struct vba_vars_st *v = &mode_lib->vba; 3895 3896 int i, j; 3897 unsigned int k, m; 3898 int ReorderingBytes; 3899 int MinPrefetchMode = 0, MaxPrefetchMode = 2; 3900 bool NoChroma = true; 3901 bool EnoughWritebackUnits = true; 3902 bool P2IWith420 = false; 3903 bool DSCOnlyIfNecessaryWithBPP = false; 3904 bool DSC422NativeNotSupported = false; 3905 double MaxTotalVActiveRDBandwidth; 3906 bool ViewportExceedsSurface = false; 3907 bool FMTBufferExceeded = false; 3908 3909 /*MODE SUPPORT, VOLTAGE STATE AND SOC CONFIGURATION*/ 3910 3911 CalculateMinAndMaxPrefetchMode( 3912 mode_lib->vba.AllowDRAMSelfRefreshOrDRAMClockChangeInVblank, 3913 &MinPrefetchMode, &MaxPrefetchMode); 3914 3915 /*Scale Ratio, taps Support Check*/ 3916 3917 v->ScaleRatioAndTapsSupport = true; 3918 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3919 if (v->ScalerEnabled[k] == false 3920 && ((v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3921 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3922 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3923 && v->SourcePixelFormat[k] != dm_rgbe_alpha) || v->HRatio[k] != 1.0 || v->htaps[k] != 1.0 3924 || v->VRatio[k] != 1.0 || v->vtaps[k] != 1.0)) { 3925 v->ScaleRatioAndTapsSupport = false; 3926 } else if (v->vtaps[k] < 1.0 || v->vtaps[k] > 8.0 || v->htaps[k] < 1.0 || v->htaps[k] > 8.0 3927 || (v->htaps[k] > 1.0 && (v->htaps[k] % 2) == 1) || v->HRatio[k] > v->MaxHSCLRatio 3928 || v->VRatio[k] > v->MaxVSCLRatio || v->HRatio[k] > v->htaps[k] 3929 || v->VRatio[k] > v->vtaps[k] 3930 || (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 3931 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_mono_16 3932 && v->SourcePixelFormat[k] != dm_mono_8 && v->SourcePixelFormat[k] != dm_rgbe 3933 && (v->VTAPsChroma[k] < 1 || v->VTAPsChroma[k] > 8 || v->HTAPsChroma[k] < 1 3934 || v->HTAPsChroma[k] > 8 || (v->HTAPsChroma[k] > 1 && v->HTAPsChroma[k] % 2 == 1) 3935 || v->HRatioChroma[k] > v->MaxHSCLRatio 3936 || v->VRatioChroma[k] > v->MaxVSCLRatio 3937 || v->HRatioChroma[k] > v->HTAPsChroma[k] 3938 || v->VRatioChroma[k] > v->VTAPsChroma[k]))) { 3939 v->ScaleRatioAndTapsSupport = false; 3940 } 3941 } 3942 /*Source Format, Pixel Format and Scan Support Check*/ 3943 3944 v->SourceFormatPixelAndScanSupport = true; 3945 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3946 if (v->SurfaceTiling[k] == dm_sw_linear && (!(v->SourceScan[k] != dm_vert) || v->DCCEnable[k] == true)) { 3947 v->SourceFormatPixelAndScanSupport = false; 3948 } 3949 } 3950 /*Bandwidth Support Check*/ 3951 3952 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3953 CalculateBytePerPixelAnd256BBlockSizes( 3954 v->SourcePixelFormat[k], 3955 v->SurfaceTiling[k], 3956 &v->BytePerPixelY[k], 3957 &v->BytePerPixelC[k], 3958 &v->BytePerPixelInDETY[k], 3959 &v->BytePerPixelInDETC[k], 3960 &v->Read256BlockHeightY[k], 3961 &v->Read256BlockHeightC[k], 3962 &v->Read256BlockWidthY[k], 3963 &v->Read256BlockWidthC[k]); 3964 } 3965 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3966 if (v->SourceScan[k] != dm_vert) { 3967 v->SwathWidthYSingleDPP[k] = v->ViewportWidth[k]; 3968 v->SwathWidthCSingleDPP[k] = v->ViewportWidthChroma[k]; 3969 } else { 3970 v->SwathWidthYSingleDPP[k] = v->ViewportHeight[k]; 3971 v->SwathWidthCSingleDPP[k] = v->ViewportHeightChroma[k]; 3972 } 3973 } 3974 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3975 v->ReadBandwidthLuma[k] = v->SwathWidthYSingleDPP[k] * dml_ceil(v->BytePerPixelInDETY[k], 1.0) 3976 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 3977 v->ReadBandwidthChroma[k] = v->SwathWidthYSingleDPP[k] / 2 * dml_ceil(v->BytePerPixelInDETC[k], 2.0) 3978 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k] / 2.0; 3979 } 3980 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3981 if (v->WritebackEnable[k] == true && v->WritebackPixelFormat[k] == dm_444_64) { 3982 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3983 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 8.0; 3984 } else if (v->WritebackEnable[k] == true) { 3985 v->WriteBandwidth[k] = v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] 3986 / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4.0; 3987 } else { 3988 v->WriteBandwidth[k] = 0.0; 3989 } 3990 } 3991 3992 /*Writeback Latency support check*/ 3993 3994 v->WritebackLatencySupport = true; 3995 for (k = 0; k < v->NumberOfActivePlanes; k++) { 3996 if (v->WritebackEnable[k] == true && (v->WriteBandwidth[k] > v->WritebackInterfaceBufferSize * 1024 / v->WritebackLatency)) { 3997 v->WritebackLatencySupport = false; 3998 } 3999 } 4000 4001 /*Writeback Mode Support Check*/ 4002 4003 v->TotalNumberOfActiveWriteback = 0; 4004 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4005 if (v->WritebackEnable[k] == true) { 4006 v->TotalNumberOfActiveWriteback = v->TotalNumberOfActiveWriteback + 1; 4007 } 4008 } 4009 4010 if (v->TotalNumberOfActiveWriteback > v->MaxNumWriteback) { 4011 EnoughWritebackUnits = false; 4012 } 4013 4014 /*Writeback Scale Ratio and Taps Support Check*/ 4015 4016 v->WritebackScaleRatioAndTapsSupport = true; 4017 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4018 if (v->WritebackEnable[k] == true) { 4019 if (v->WritebackHRatio[k] > v->WritebackMaxHSCLRatio || v->WritebackVRatio[k] > v->WritebackMaxVSCLRatio 4020 || v->WritebackHRatio[k] < v->WritebackMinHSCLRatio 4021 || v->WritebackVRatio[k] < v->WritebackMinVSCLRatio 4022 || v->WritebackHTaps[k] > v->WritebackMaxHSCLTaps 4023 || v->WritebackVTaps[k] > v->WritebackMaxVSCLTaps 4024 || v->WritebackHRatio[k] > v->WritebackHTaps[k] || v->WritebackVRatio[k] > v->WritebackVTaps[k] 4025 || (v->WritebackHTaps[k] > 2.0 && ((v->WritebackHTaps[k] % 2) == 1))) { 4026 v->WritebackScaleRatioAndTapsSupport = false; 4027 } 4028 if (2.0 * v->WritebackDestinationWidth[k] * (v->WritebackVTaps[k] - 1) * 57 > v->WritebackLineBufferSize) { 4029 v->WritebackScaleRatioAndTapsSupport = false; 4030 } 4031 } 4032 } 4033 /*Maximum DISPCLK/DPPCLK Support check*/ 4034 4035 v->WritebackRequiredDISPCLK = 0.0; 4036 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4037 if (v->WritebackEnable[k] == true) { 4038 v->WritebackRequiredDISPCLK = dml_max( 4039 v->WritebackRequiredDISPCLK, 4040 dml314_CalculateWriteBackDISPCLK( 4041 v->WritebackPixelFormat[k], 4042 v->PixelClock[k], 4043 v->WritebackHRatio[k], 4044 v->WritebackVRatio[k], 4045 v->WritebackHTaps[k], 4046 v->WritebackVTaps[k], 4047 v->WritebackSourceWidth[k], 4048 v->WritebackDestinationWidth[k], 4049 v->HTotal[k], 4050 v->WritebackLineBufferSize)); 4051 } 4052 } 4053 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4054 if (v->HRatio[k] > 1.0) { 4055 v->PSCL_FACTOR[k] = dml_min( 4056 v->MaxDCHUBToPSCLThroughput, 4057 v->MaxPSCLToLBThroughput * v->HRatio[k] / dml_ceil(v->htaps[k] / 6.0, 1.0)); 4058 } else { 4059 v->PSCL_FACTOR[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4060 } 4061 if (v->BytePerPixelC[k] == 0.0) { 4062 v->PSCL_FACTOR_CHROMA[k] = 0.0; 4063 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4064 * dml_max3( 4065 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4066 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4067 1.0); 4068 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0) && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4069 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4070 } 4071 } else { 4072 if (v->HRatioChroma[k] > 1.0) { 4073 v->PSCL_FACTOR_CHROMA[k] = dml_min( 4074 v->MaxDCHUBToPSCLThroughput, 4075 v->MaxPSCLToLBThroughput * v->HRatioChroma[k] / dml_ceil(v->HTAPsChroma[k] / 6.0, 1.0)); 4076 } else { 4077 v->PSCL_FACTOR_CHROMA[k] = dml_min(v->MaxDCHUBToPSCLThroughput, v->MaxPSCLToLBThroughput); 4078 } 4079 v->MinDPPCLKUsingSingleDPP[k] = v->PixelClock[k] 4080 * dml_max5( 4081 v->vtaps[k] / 6.0 * dml_min(1.0, v->HRatio[k]), 4082 v->HRatio[k] * v->VRatio[k] / v->PSCL_FACTOR[k], 4083 v->VTAPsChroma[k] / 6.0 * dml_min(1.0, v->HRatioChroma[k]), 4084 v->HRatioChroma[k] * v->VRatioChroma[k] / v->PSCL_FACTOR_CHROMA[k], 4085 1.0); 4086 if ((v->htaps[k] > 6.0 || v->vtaps[k] > 6.0 || v->HTAPsChroma[k] > 6.0 || v->VTAPsChroma[k] > 6.0) 4087 && v->MinDPPCLKUsingSingleDPP[k] < 2.0 * v->PixelClock[k]) { 4088 v->MinDPPCLKUsingSingleDPP[k] = 2.0 * v->PixelClock[k]; 4089 } 4090 } 4091 } 4092 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4093 int MaximumSwathWidthSupportLuma; 4094 int MaximumSwathWidthSupportChroma; 4095 4096 if (v->SurfaceTiling[k] == dm_sw_linear) { 4097 MaximumSwathWidthSupportLuma = 8192.0; 4098 } else if (v->SourceScan[k] == dm_vert && v->BytePerPixelC[k] > 0) { 4099 MaximumSwathWidthSupportLuma = 2880.0; 4100 } else if (v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4101 MaximumSwathWidthSupportLuma = 3840.0; 4102 } else { 4103 MaximumSwathWidthSupportLuma = 5760.0; 4104 } 4105 4106 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) { 4107 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma / 2.0; 4108 } else { 4109 MaximumSwathWidthSupportChroma = MaximumSwathWidthSupportLuma; 4110 } 4111 v->MaximumSwathWidthInLineBufferLuma = v->LineBufferSize * dml_max(v->HRatio[k], 1.0) / v->LBBitPerPixel[k] 4112 / (v->vtaps[k] + dml_max(dml_ceil(v->VRatio[k], 1.0) - 2, 0.0)); 4113 if (v->BytePerPixelC[k] == 0.0) { 4114 v->MaximumSwathWidthInLineBufferChroma = 0; 4115 } else { 4116 v->MaximumSwathWidthInLineBufferChroma = v->LineBufferSize * dml_max(v->HRatioChroma[k], 1.0) / v->LBBitPerPixel[k] 4117 / (v->VTAPsChroma[k] + dml_max(dml_ceil(v->VRatioChroma[k], 1.0) - 2, 0.0)); 4118 } 4119 v->MaximumSwathWidthLuma[k] = dml_min(MaximumSwathWidthSupportLuma, v->MaximumSwathWidthInLineBufferLuma); 4120 v->MaximumSwathWidthChroma[k] = dml_min(MaximumSwathWidthSupportChroma, v->MaximumSwathWidthInLineBufferChroma); 4121 } 4122 4123 CalculateSwathAndDETConfiguration( 4124 true, 4125 v->NumberOfActivePlanes, 4126 v->DETBufferSizeInKByte[0], 4127 v->MaximumSwathWidthLuma, 4128 v->MaximumSwathWidthChroma, 4129 v->SourceScan, 4130 v->SourcePixelFormat, 4131 v->SurfaceTiling, 4132 v->ViewportWidth, 4133 v->ViewportHeight, 4134 v->SurfaceWidthY, 4135 v->SurfaceWidthC, 4136 v->SurfaceHeightY, 4137 v->SurfaceHeightC, 4138 v->Read256BlockHeightY, 4139 v->Read256BlockHeightC, 4140 v->Read256BlockWidthY, 4141 v->Read256BlockWidthC, 4142 v->odm_combine_dummy, 4143 v->BlendingAndTiming, 4144 v->BytePerPixelY, 4145 v->BytePerPixelC, 4146 v->BytePerPixelInDETY, 4147 v->BytePerPixelInDETC, 4148 v->HActive, 4149 v->HRatio, 4150 v->HRatioChroma, 4151 v->NoOfDPPThisState, 4152 v->swath_width_luma_ub_this_state, 4153 v->swath_width_chroma_ub_this_state, 4154 v->SwathWidthYThisState, 4155 v->SwathWidthCThisState, 4156 v->SwathHeightYThisState, 4157 v->SwathHeightCThisState, 4158 v->DETBufferSizeYThisState, 4159 v->DETBufferSizeCThisState, 4160 v->SingleDPPViewportSizeSupportPerPlane, 4161 &v->ViewportSizeSupport[0][0]); 4162 4163 for (i = 0; i < v->soc.num_states; i++) { 4164 for (j = 0; j < 2; j++) { 4165 v->MaxDispclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDispclk[i], v->DISPCLKDPPCLKVCOSpeed); 4166 v->MaxDppclkRoundedDownToDFSGranularity = RoundToDFSGranularityDown(v->MaxDppclk[i], v->DISPCLKDPPCLKVCOSpeed); 4167 v->RequiredDISPCLK[i][j] = 0.0; 4168 v->DISPCLK_DPPCLK_Support[i][j] = true; 4169 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4170 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4171 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4172 if ((v->PlaneRequiredDISPCLKWithoutODMCombine >= v->MaxDispclk[i] 4173 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4174 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4175 v->PlaneRequiredDISPCLKWithoutODMCombine = v->PixelClock[k] 4176 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4177 } 4178 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4179 * (1 + v->DISPCLKRampingMargin / 100.0); 4180 if ((v->PlaneRequiredDISPCLKWithODMCombine2To1 >= v->MaxDispclk[i] 4181 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4182 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4183 v->PlaneRequiredDISPCLKWithODMCombine2To1 = v->PixelClock[k] / 2 4184 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4185 } 4186 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4187 * (1 + v->DISPCLKRampingMargin / 100.0); 4188 if ((v->PlaneRequiredDISPCLKWithODMCombine4To1 >= v->MaxDispclk[i] 4189 && v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4190 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4191 v->PlaneRequiredDISPCLKWithODMCombine4To1 = v->PixelClock[k] / 4 4192 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4193 } 4194 4195 if (v->ODMCombinePolicy == dm_odm_combine_policy_none 4196 || !(v->Output[k] == dm_dp || 4197 v->Output[k] == dm_dp2p0 || 4198 v->Output[k] == dm_edp)) { 4199 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4200 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4201 4202 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) 4203 FMTBufferExceeded = true; 4204 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_2to1) { 4205 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4206 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4207 } else if (v->ODMCombinePolicy == dm_odm_combine_policy_4to1 4208 || v->PlaneRequiredDISPCLKWithODMCombine2To1 > v->MaxDispclkRoundedDownToDFSGranularity) { 4209 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4210 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4211 } else if (v->PlaneRequiredDISPCLKWithoutODMCombine > v->MaxDispclkRoundedDownToDFSGranularity) { 4212 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4213 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4214 } else { 4215 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4216 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithoutODMCombine; 4217 } 4218 if (v->DSCEnabled[k] && v->HActive[k] > DCN314_MAX_DSC_IMAGE_WIDTH 4219 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4220 if (v->HActive[k] / 2 > DCN314_MAX_DSC_IMAGE_WIDTH) { 4221 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4222 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4223 } else { 4224 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4225 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4226 } 4227 } 4228 if (v->OutputFormat[k] == dm_420 && v->HActive[k] > DCN314_MAX_FMT_420_BUFFER_WIDTH 4229 && v->ODMCombineEnablePerState[i][k] != dm_odm_combine_mode_4to1) { 4230 if (v->HActive[k] / 2 > DCN314_MAX_FMT_420_BUFFER_WIDTH) { 4231 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_4to1; 4232 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine4To1; 4233 4234 if (v->HActive[k] / 4 > DCN314_MAX_FMT_420_BUFFER_WIDTH) 4235 FMTBufferExceeded = true; 4236 } else { 4237 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_2to1; 4238 v->PlaneRequiredDISPCLK = v->PlaneRequiredDISPCLKWithODMCombine2To1; 4239 } 4240 } 4241 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4242 v->MPCCombine[i][j][k] = false; 4243 v->NoOfDPP[i][j][k] = 4; 4244 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 4; 4245 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4246 v->MPCCombine[i][j][k] = false; 4247 v->NoOfDPP[i][j][k] = 2; 4248 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2; 4249 } else if ((v->WhenToDoMPCCombine == dm_mpc_never 4250 || (v->MinDPPCLKUsingSingleDPP[k] * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4251 <= v->MaxDppclkRoundedDownToDFSGranularity && v->SingleDPPViewportSizeSupportPerPlane[k] == true))) { 4252 v->MPCCombine[i][j][k] = false; 4253 v->NoOfDPP[i][j][k] = 1; 4254 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4255 } else { 4256 v->MPCCombine[i][j][k] = true; 4257 v->NoOfDPP[i][j][k] = 2; 4258 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4259 } 4260 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4261 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4262 > v->MaxDppclkRoundedDownToDFSGranularity) 4263 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4264 v->DISPCLK_DPPCLK_Support[i][j] = false; 4265 } 4266 } 4267 v->TotalNumberOfActiveDPP[i][j] = 0; 4268 v->TotalNumberOfSingleDPPPlanes[i][j] = 0; 4269 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4270 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4271 if (v->NoOfDPP[i][j][k] == 1) 4272 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] + 1; 4273 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4274 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) 4275 NoChroma = false; 4276 } 4277 4278 // UPTO 4279 if (j == 1 && v->WhenToDoMPCCombine != dm_mpc_never 4280 && !UnboundedRequest(v->UseUnboundedRequesting, v->TotalNumberOfActiveDPP[i][j], NoChroma, v->Output[0])) { 4281 while (!(v->TotalNumberOfActiveDPP[i][j] >= v->MaxNumDPP || v->TotalNumberOfSingleDPPPlanes[i][j] == 0)) { 4282 double BWOfNonSplitPlaneOfMaximumBandwidth; 4283 unsigned int NumberOfNonSplitPlaneOfMaximumBandwidth; 4284 4285 BWOfNonSplitPlaneOfMaximumBandwidth = 0; 4286 NumberOfNonSplitPlaneOfMaximumBandwidth = 0; 4287 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4288 if (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k] > BWOfNonSplitPlaneOfMaximumBandwidth 4289 && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled && v->MPCCombine[i][j][k] == false) { 4290 BWOfNonSplitPlaneOfMaximumBandwidth = v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 4291 NumberOfNonSplitPlaneOfMaximumBandwidth = k; 4292 } 4293 } 4294 v->MPCCombine[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = true; 4295 v->NoOfDPP[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 2; 4296 v->RequiredDPPCLK[i][j][NumberOfNonSplitPlaneOfMaximumBandwidth] = 4297 v->MinDPPCLKUsingSingleDPP[NumberOfNonSplitPlaneOfMaximumBandwidth] 4298 * (1 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100) / 2; 4299 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + 1; 4300 v->TotalNumberOfSingleDPPPlanes[i][j] = v->TotalNumberOfSingleDPPPlanes[i][j] - 1; 4301 } 4302 } 4303 if (v->TotalNumberOfActiveDPP[i][j] > v->MaxNumDPP) { 4304 v->RequiredDISPCLK[i][j] = 0.0; 4305 v->DISPCLK_DPPCLK_Support[i][j] = true; 4306 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4307 v->ODMCombineEnablePerState[i][k] = dm_odm_combine_mode_disabled; 4308 if (v->SingleDPPViewportSizeSupportPerPlane[k] == false && v->WhenToDoMPCCombine != dm_mpc_never) { 4309 v->MPCCombine[i][j][k] = true; 4310 v->NoOfDPP[i][j][k] = 2; 4311 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4312 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) / 2.0; 4313 } else { 4314 v->MPCCombine[i][j][k] = false; 4315 v->NoOfDPP[i][j][k] = 1; 4316 v->RequiredDPPCLK[i][j][k] = v->MinDPPCLKUsingSingleDPP[k] 4317 * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4318 } 4319 if (!(v->MaxDispclk[i] == v->MaxDispclk[v->soc.num_states - 1] 4320 && v->MaxDppclk[i] == v->MaxDppclk[v->soc.num_states - 1])) { 4321 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4322 * (1.0 + v->DISPCLKRampingMargin / 100.0); 4323 } else { 4324 v->PlaneRequiredDISPCLK = v->PixelClock[k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0); 4325 } 4326 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->PlaneRequiredDISPCLK); 4327 if ((v->MinDPPCLKUsingSingleDPP[k] / v->NoOfDPP[i][j][k] * (1.0 + v->DISPCLKDPPCLKDSCCLKDownSpreading / 100.0) 4328 > v->MaxDppclkRoundedDownToDFSGranularity) 4329 || (v->PlaneRequiredDISPCLK > v->MaxDispclkRoundedDownToDFSGranularity)) { 4330 v->DISPCLK_DPPCLK_Support[i][j] = false; 4331 } 4332 } 4333 v->TotalNumberOfActiveDPP[i][j] = 0.0; 4334 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4335 v->TotalNumberOfActiveDPP[i][j] = v->TotalNumberOfActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4336 } 4337 } 4338 v->RequiredDISPCLK[i][j] = dml_max(v->RequiredDISPCLK[i][j], v->WritebackRequiredDISPCLK); 4339 if (v->MaxDispclkRoundedDownToDFSGranularity < v->WritebackRequiredDISPCLK) { 4340 v->DISPCLK_DPPCLK_Support[i][j] = false; 4341 } 4342 } 4343 } 4344 4345 /*Total Available Pipes Support Check*/ 4346 4347 for (i = 0; i < v->soc.num_states; i++) { 4348 for (j = 0; j < 2; j++) { 4349 if (v->TotalNumberOfActiveDPP[i][j] <= v->MaxNumDPP) { 4350 v->TotalAvailablePipesSupport[i][j] = true; 4351 } else { 4352 v->TotalAvailablePipesSupport[i][j] = false; 4353 } 4354 } 4355 } 4356 /*Display IO and DSC Support Check*/ 4357 4358 v->NonsupportedDSCInputBPC = false; 4359 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4360 if (!(v->DSCInputBitPerComponent[k] == 12.0 || v->DSCInputBitPerComponent[k] == 10.0 || v->DSCInputBitPerComponent[k] == 8.0) 4361 || v->DSCInputBitPerComponent[k] > v->MaximumDSCBitsPerComponent) { 4362 v->NonsupportedDSCInputBPC = true; 4363 } 4364 } 4365 4366 /*Number Of DSC Slices*/ 4367 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4368 if (v->BlendingAndTiming[k] == k) { 4369 if (v->PixelClockBackEnd[k] > 3200) { 4370 v->NumberOfDSCSlices[k] = dml_ceil(v->PixelClockBackEnd[k] / 400.0, 4.0); 4371 } else if (v->PixelClockBackEnd[k] > 1360) { 4372 v->NumberOfDSCSlices[k] = 8; 4373 } else if (v->PixelClockBackEnd[k] > 680) { 4374 v->NumberOfDSCSlices[k] = 4; 4375 } else if (v->PixelClockBackEnd[k] > 340) { 4376 v->NumberOfDSCSlices[k] = 2; 4377 } else { 4378 v->NumberOfDSCSlices[k] = 1; 4379 } 4380 } else { 4381 v->NumberOfDSCSlices[k] = 0; 4382 } 4383 } 4384 4385 for (i = 0; i < v->soc.num_states; i++) { 4386 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4387 v->RequiresDSC[i][k] = false; 4388 v->RequiresFEC[i][k] = false; 4389 if (v->BlendingAndTiming[k] == k) { 4390 if (v->Output[k] == dm_hdmi) { 4391 v->RequiresDSC[i][k] = false; 4392 v->RequiresFEC[i][k] = false; 4393 v->OutputBppPerState[i][k] = TruncToValidBPP( 4394 dml_min(600.0, v->PHYCLKPerState[i]) * 10, 4395 3, 4396 v->HTotal[k], 4397 v->HActive[k], 4398 v->PixelClockBackEnd[k], 4399 v->ForcedOutputLinkBPP[k], 4400 false, 4401 v->Output[k], 4402 v->OutputFormat[k], 4403 v->DSCInputBitPerComponent[k], 4404 v->NumberOfDSCSlices[k], 4405 v->AudioSampleRate[k], 4406 v->AudioSampleLayout[k], 4407 v->ODMCombineEnablePerState[i][k]); 4408 } else if (v->Output[k] == dm_dp || v->Output[k] == dm_edp || v->Output[k] == dm_dp2p0) { 4409 if (v->DSCEnable[k] == true) { 4410 v->RequiresDSC[i][k] = true; 4411 v->LinkDSCEnable = true; 4412 if (v->Output[k] == dm_dp || v->Output[k] == dm_dp2p0) { 4413 v->RequiresFEC[i][k] = true; 4414 } else { 4415 v->RequiresFEC[i][k] = false; 4416 } 4417 } else { 4418 v->RequiresDSC[i][k] = false; 4419 v->LinkDSCEnable = false; 4420 if (v->Output[k] == dm_dp2p0) { 4421 v->RequiresFEC[i][k] = true; 4422 } else { 4423 v->RequiresFEC[i][k] = false; 4424 } 4425 } 4426 if (v->Output[k] == dm_dp2p0) { 4427 v->Outbpp = BPP_INVALID; 4428 if ((v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr10) && 4429 v->PHYCLKD18PerState[k] >= 10000.0 / 18.0) { 4430 v->Outbpp = TruncToValidBPP( 4431 (1.0 - v->Downspreading / 100.0) * 10000, 4432 v->OutputLinkDPLanes[k], 4433 v->HTotal[k], 4434 v->HActive[k], 4435 v->PixelClockBackEnd[k], 4436 v->ForcedOutputLinkBPP[k], 4437 v->LinkDSCEnable, 4438 v->Output[k], 4439 v->OutputFormat[k], 4440 v->DSCInputBitPerComponent[k], 4441 v->NumberOfDSCSlices[k], 4442 v->AudioSampleRate[k], 4443 v->AudioSampleLayout[k], 4444 v->ODMCombineEnablePerState[i][k]); 4445 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 13500.0 / 18.0 && 4446 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { 4447 v->RequiresDSC[i][k] = true; 4448 v->LinkDSCEnable = true; 4449 v->Outbpp = TruncToValidBPP( 4450 (1.0 - v->Downspreading / 100.0) * 10000, 4451 v->OutputLinkDPLanes[k], 4452 v->HTotal[k], 4453 v->HActive[k], 4454 v->PixelClockBackEnd[k], 4455 v->ForcedOutputLinkBPP[k], 4456 v->LinkDSCEnable, 4457 v->Output[k], 4458 v->OutputFormat[k], 4459 v->DSCInputBitPerComponent[k], 4460 v->NumberOfDSCSlices[k], 4461 v->AudioSampleRate[k], 4462 v->AudioSampleLayout[k], 4463 v->ODMCombineEnablePerState[i][k]); 4464 } 4465 v->OutputBppPerState[i][k] = v->Outbpp; 4466 // TODO: Need some other way to handle this nonsense 4467 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR10" 4468 } 4469 if (v->Outbpp == BPP_INVALID && 4470 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr13p5) && 4471 v->PHYCLKD18PerState[k] >= 13500.0 / 18.0) { 4472 v->Outbpp = TruncToValidBPP( 4473 (1.0 - v->Downspreading / 100.0) * 13500, 4474 v->OutputLinkDPLanes[k], 4475 v->HTotal[k], 4476 v->HActive[k], 4477 v->PixelClockBackEnd[k], 4478 v->ForcedOutputLinkBPP[k], 4479 v->LinkDSCEnable, 4480 v->Output[k], 4481 v->OutputFormat[k], 4482 v->DSCInputBitPerComponent[k], 4483 v->NumberOfDSCSlices[k], 4484 v->AudioSampleRate[k], 4485 v->AudioSampleLayout[k], 4486 v->ODMCombineEnablePerState[i][k]); 4487 if (v->Outbpp == BPP_INVALID && v->PHYCLKD18PerState[k] < 20000.0 / 18.0 && 4488 v->DSCEnable[k] == true && v->ForcedOutputLinkBPP[k] == 0) { 4489 v->RequiresDSC[i][k] = true; 4490 v->LinkDSCEnable = true; 4491 v->Outbpp = TruncToValidBPP( 4492 (1.0 - v->Downspreading / 100.0) * 13500, 4493 v->OutputLinkDPLanes[k], 4494 v->HTotal[k], 4495 v->HActive[k], 4496 v->PixelClockBackEnd[k], 4497 v->ForcedOutputLinkBPP[k], 4498 v->LinkDSCEnable, 4499 v->Output[k], 4500 v->OutputFormat[k], 4501 v->DSCInputBitPerComponent[k], 4502 v->NumberOfDSCSlices[k], 4503 v->AudioSampleRate[k], 4504 v->AudioSampleLayout[k], 4505 v->ODMCombineEnablePerState[i][k]); 4506 } 4507 v->OutputBppPerState[i][k] = v->Outbpp; 4508 // TODO: Need some other way to handle this nonsense 4509 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR13p5" 4510 } 4511 if (v->Outbpp == BPP_INVALID && 4512 (v->OutputLinkDPRate[k] == dm_dp_rate_na || v->OutputLinkDPRate[k] == dm_dp_rate_uhbr20) && 4513 v->PHYCLKD18PerState[k] >= 20000.0 / 18.0) { 4514 v->Outbpp = TruncToValidBPP( 4515 (1.0 - v->Downspreading / 100.0) * 20000, 4516 v->OutputLinkDPLanes[k], 4517 v->HTotal[k], 4518 v->HActive[k], 4519 v->PixelClockBackEnd[k], 4520 v->ForcedOutputLinkBPP[k], 4521 v->LinkDSCEnable, 4522 v->Output[k], 4523 v->OutputFormat[k], 4524 v->DSCInputBitPerComponent[k], 4525 v->NumberOfDSCSlices[k], 4526 v->AudioSampleRate[k], 4527 v->AudioSampleLayout[k], 4528 v->ODMCombineEnablePerState[i][k]); 4529 if (v->Outbpp == BPP_INVALID && v->DSCEnable[k] == true && 4530 v->ForcedOutputLinkBPP[k] == 0) { 4531 v->RequiresDSC[i][k] = true; 4532 v->LinkDSCEnable = true; 4533 v->Outbpp = TruncToValidBPP( 4534 (1.0 - v->Downspreading / 100.0) * 20000, 4535 v->OutputLinkDPLanes[k], 4536 v->HTotal[k], 4537 v->HActive[k], 4538 v->PixelClockBackEnd[k], 4539 v->ForcedOutputLinkBPP[k], 4540 v->LinkDSCEnable, 4541 v->Output[k], 4542 v->OutputFormat[k], 4543 v->DSCInputBitPerComponent[k], 4544 v->NumberOfDSCSlices[k], 4545 v->AudioSampleRate[k], 4546 v->AudioSampleLayout[k], 4547 v->ODMCombineEnablePerState[i][k]); 4548 } 4549 v->OutputBppPerState[i][k] = v->Outbpp; 4550 // TODO: Need some other way to handle this nonsense 4551 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " UHBR20" 4552 } 4553 } else { 4554 v->Outbpp = BPP_INVALID; 4555 if (v->PHYCLKPerState[i] >= 270.0) { 4556 v->Outbpp = TruncToValidBPP( 4557 (1.0 - v->Downspreading / 100.0) * 2700, 4558 v->OutputLinkDPLanes[k], 4559 v->HTotal[k], 4560 v->HActive[k], 4561 v->PixelClockBackEnd[k], 4562 v->ForcedOutputLinkBPP[k], 4563 v->LinkDSCEnable, 4564 v->Output[k], 4565 v->OutputFormat[k], 4566 v->DSCInputBitPerComponent[k], 4567 v->NumberOfDSCSlices[k], 4568 v->AudioSampleRate[k], 4569 v->AudioSampleLayout[k], 4570 v->ODMCombineEnablePerState[i][k]); 4571 v->OutputBppPerState[i][k] = v->Outbpp; 4572 // TODO: Need some other way to handle this nonsense 4573 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR" 4574 } 4575 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 540.0) { 4576 v->Outbpp = TruncToValidBPP( 4577 (1.0 - v->Downspreading / 100.0) * 5400, 4578 v->OutputLinkDPLanes[k], 4579 v->HTotal[k], 4580 v->HActive[k], 4581 v->PixelClockBackEnd[k], 4582 v->ForcedOutputLinkBPP[k], 4583 v->LinkDSCEnable, 4584 v->Output[k], 4585 v->OutputFormat[k], 4586 v->DSCInputBitPerComponent[k], 4587 v->NumberOfDSCSlices[k], 4588 v->AudioSampleRate[k], 4589 v->AudioSampleLayout[k], 4590 v->ODMCombineEnablePerState[i][k]); 4591 v->OutputBppPerState[i][k] = v->Outbpp; 4592 // TODO: Need some other way to handle this nonsense 4593 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR2" 4594 } 4595 if (v->Outbpp == BPP_INVALID && v->PHYCLKPerState[i] >= 810.0) { 4596 v->Outbpp = TruncToValidBPP( 4597 (1.0 - v->Downspreading / 100.0) * 8100, 4598 v->OutputLinkDPLanes[k], 4599 v->HTotal[k], 4600 v->HActive[k], 4601 v->PixelClockBackEnd[k], 4602 v->ForcedOutputLinkBPP[k], 4603 v->LinkDSCEnable, 4604 v->Output[k], 4605 v->OutputFormat[k], 4606 v->DSCInputBitPerComponent[k], 4607 v->NumberOfDSCSlices[k], 4608 v->AudioSampleRate[k], 4609 v->AudioSampleLayout[k], 4610 v->ODMCombineEnablePerState[i][k]); 4611 v->OutputBppPerState[i][k] = v->Outbpp; 4612 // TODO: Need some other way to handle this nonsense 4613 // v->OutputTypeAndRatePerState[i][k] = v->Output[k] & " HBR3" 4614 } 4615 } 4616 } 4617 } else { 4618 v->OutputBppPerState[i][k] = 0; 4619 } 4620 } 4621 } 4622 4623 for (i = 0; i < v->soc.num_states; i++) { 4624 v->LinkCapacitySupport[i] = true; 4625 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4626 if (v->BlendingAndTiming[k] == k 4627 && (v->Output[k] == dm_dp || 4628 v->Output[k] == dm_edp || 4629 v->Output[k] == dm_hdmi) && v->OutputBppPerState[i][k] == 0) { 4630 v->LinkCapacitySupport[i] = false; 4631 } 4632 } 4633 } 4634 4635 // UPTO 2172 4636 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4637 if (v->BlendingAndTiming[k] == k 4638 && (v->Output[k] == dm_dp || 4639 v->Output[k] == dm_edp || 4640 v->Output[k] == dm_hdmi)) { 4641 if (v->OutputFormat[k] == dm_420 && v->Interlace[k] == 1 && v->ProgressiveToInterlaceUnitInOPP == true) { 4642 P2IWith420 = true; 4643 } 4644 if (v->DSCEnable[k] == true && v->OutputFormat[k] == dm_n422 4645 && !v->DSC422NativeSupport) { 4646 DSC422NativeNotSupported = true; 4647 } 4648 } 4649 } 4650 4651 4652 for (i = 0; i < v->soc.num_states; ++i) { 4653 v->ODMCombine4To1SupportCheckOK[i] = true; 4654 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4655 if (v->BlendingAndTiming[k] == k && v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1 4656 && (v->ODMCombine4To1Supported == false || v->Output[k] == dm_dp || v->Output[k] == dm_edp 4657 || v->Output[k] == dm_hdmi)) { 4658 v->ODMCombine4To1SupportCheckOK[i] = false; 4659 } 4660 } 4661 } 4662 4663 /* Skip dscclk validation: as long as dispclk is supported, dscclk is also implicitly supported */ 4664 4665 for (i = 0; i < v->soc.num_states; i++) { 4666 v->NotEnoughDSCUnits[i] = false; 4667 v->TotalDSCUnitsRequired = 0.0; 4668 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4669 if (v->RequiresDSC[i][k] == true) { 4670 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_4to1) { 4671 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 4.0; 4672 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4673 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 2.0; 4674 } else { 4675 v->TotalDSCUnitsRequired = v->TotalDSCUnitsRequired + 1.0; 4676 } 4677 } 4678 } 4679 if (v->TotalDSCUnitsRequired > v->NumberOfDSC) { 4680 v->NotEnoughDSCUnits[i] = true; 4681 } 4682 } 4683 /*DSC Delay per state*/ 4684 4685 for (i = 0; i < v->soc.num_states; i++) { 4686 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4687 if (v->OutputBppPerState[i][k] == BPP_INVALID) { 4688 v->BPP = 0.0; 4689 } else { 4690 v->BPP = v->OutputBppPerState[i][k]; 4691 } 4692 if (v->RequiresDSC[i][k] == true && v->BPP != 0.0) { 4693 if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_disabled) { 4694 v->DSCDelayPerState[i][k] = dscceComputeDelay( 4695 v->DSCInputBitPerComponent[k], 4696 v->BPP, 4697 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4698 v->NumberOfDSCSlices[k], 4699 v->OutputFormat[k], 4700 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k]); 4701 } else if (v->ODMCombineEnablePerState[i][k] == dm_odm_combine_mode_2to1) { 4702 v->DSCDelayPerState[i][k] = 2.0 4703 * (dscceComputeDelay( 4704 v->DSCInputBitPerComponent[k], 4705 v->BPP, 4706 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4707 v->NumberOfDSCSlices[k] / 2, 4708 v->OutputFormat[k], 4709 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4710 } else { 4711 v->DSCDelayPerState[i][k] = 4.0 4712 * (dscceComputeDelay( 4713 v->DSCInputBitPerComponent[k], 4714 v->BPP, 4715 dml_ceil(1.0 * v->HActive[k] / v->NumberOfDSCSlices[k], 1.0), 4716 v->NumberOfDSCSlices[k] / 4, 4717 v->OutputFormat[k], 4718 v->Output[k]) + dscComputeDelay(v->OutputFormat[k], v->Output[k])); 4719 } 4720 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][k] * v->PixelClock[k] / v->PixelClockBackEnd[k]; 4721 } else { 4722 v->DSCDelayPerState[i][k] = 0.0; 4723 } 4724 } 4725 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4726 for (m = 0; m < v->NumberOfActivePlanes; m++) { 4727 if (v->BlendingAndTiming[k] == m && v->RequiresDSC[i][m] == true) { 4728 v->DSCDelayPerState[i][k] = v->DSCDelayPerState[i][m]; 4729 } 4730 } 4731 } 4732 } 4733 4734 //Calculate Swath, DET Configuration, DCFCLKDeepSleep 4735 // 4736 for (i = 0; i < v->soc.num_states; ++i) { 4737 for (j = 0; j <= 1; ++j) { 4738 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4739 v->RequiredDPPCLKThisState[k] = v->RequiredDPPCLK[i][j][k]; 4740 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 4741 v->ODMCombineEnableThisState[k] = v->ODMCombineEnablePerState[i][k]; 4742 } 4743 4744 CalculateSwathAndDETConfiguration( 4745 false, 4746 v->NumberOfActivePlanes, 4747 v->DETBufferSizeInKByte[0], 4748 v->MaximumSwathWidthLuma, 4749 v->MaximumSwathWidthChroma, 4750 v->SourceScan, 4751 v->SourcePixelFormat, 4752 v->SurfaceTiling, 4753 v->ViewportWidth, 4754 v->ViewportHeight, 4755 v->SurfaceWidthY, 4756 v->SurfaceWidthC, 4757 v->SurfaceHeightY, 4758 v->SurfaceHeightC, 4759 v->Read256BlockHeightY, 4760 v->Read256BlockHeightC, 4761 v->Read256BlockWidthY, 4762 v->Read256BlockWidthC, 4763 v->ODMCombineEnableThisState, 4764 v->BlendingAndTiming, 4765 v->BytePerPixelY, 4766 v->BytePerPixelC, 4767 v->BytePerPixelInDETY, 4768 v->BytePerPixelInDETC, 4769 v->HActive, 4770 v->HRatio, 4771 v->HRatioChroma, 4772 v->NoOfDPPThisState, 4773 v->swath_width_luma_ub_this_state, 4774 v->swath_width_chroma_ub_this_state, 4775 v->SwathWidthYThisState, 4776 v->SwathWidthCThisState, 4777 v->SwathHeightYThisState, 4778 v->SwathHeightCThisState, 4779 v->DETBufferSizeYThisState, 4780 v->DETBufferSizeCThisState, 4781 v->dummystring, 4782 &v->ViewportSizeSupport[i][j]); 4783 4784 CalculateDCFCLKDeepSleep( 4785 mode_lib, 4786 v->NumberOfActivePlanes, 4787 v->BytePerPixelY, 4788 v->BytePerPixelC, 4789 v->VRatio, 4790 v->VRatioChroma, 4791 v->SwathWidthYThisState, 4792 v->SwathWidthCThisState, 4793 v->NoOfDPPThisState, 4794 v->HRatio, 4795 v->HRatioChroma, 4796 v->PixelClock, 4797 v->PSCL_FACTOR, 4798 v->PSCL_FACTOR_CHROMA, 4799 v->RequiredDPPCLKThisState, 4800 v->ReadBandwidthLuma, 4801 v->ReadBandwidthChroma, 4802 v->ReturnBusWidth, 4803 &v->ProjectedDCFCLKDeepSleep[i][j]); 4804 4805 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4806 v->swath_width_luma_ub_all_states[i][j][k] = v->swath_width_luma_ub_this_state[k]; 4807 v->swath_width_chroma_ub_all_states[i][j][k] = v->swath_width_chroma_ub_this_state[k]; 4808 v->SwathWidthYAllStates[i][j][k] = v->SwathWidthYThisState[k]; 4809 v->SwathWidthCAllStates[i][j][k] = v->SwathWidthCThisState[k]; 4810 v->SwathHeightYAllStates[i][j][k] = v->SwathHeightYThisState[k]; 4811 v->SwathHeightCAllStates[i][j][k] = v->SwathHeightCThisState[k]; 4812 v->DETBufferSizeYAllStates[i][j][k] = v->DETBufferSizeYThisState[k]; 4813 v->DETBufferSizeCAllStates[i][j][k] = v->DETBufferSizeCThisState[k]; 4814 } 4815 } 4816 } 4817 4818 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4819 v->cursor_bw[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 4820 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatio[k]; 4821 } 4822 4823 for (i = 0; i < v->soc.num_states; i++) { 4824 for (j = 0; j < 2; j++) { 4825 bool NotUrgentLatencyHiding[DC__NUM_DPP__MAX]; 4826 4827 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4828 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 4829 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 4830 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 4831 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 4832 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 4833 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 4834 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 4835 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 4836 } 4837 4838 v->TotalNumberOfDCCActiveDPP[i][j] = 0; 4839 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4840 if (v->DCCEnable[k] == true) { 4841 v->TotalNumberOfDCCActiveDPP[i][j] = v->TotalNumberOfDCCActiveDPP[i][j] + v->NoOfDPP[i][j][k]; 4842 } 4843 } 4844 4845 for (k = 0; k < v->NumberOfActivePlanes; k++) { 4846 if (v->SourcePixelFormat[k] == dm_420_8 || v->SourcePixelFormat[k] == dm_420_10 4847 || v->SourcePixelFormat[k] == dm_420_12 || v->SourcePixelFormat[k] == dm_rgbe_alpha) { 4848 4849 if ((v->SourcePixelFormat[k] == dm_420_10 || v->SourcePixelFormat[k] == dm_420_12) 4850 && v->SourceScan[k] != dm_vert) { 4851 v->PTEBufferSizeInRequestsForLuma = (v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma) 4852 / 2; 4853 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsForLuma; 4854 } else { 4855 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma; 4856 v->PTEBufferSizeInRequestsForChroma = v->PTEBufferSizeInRequestsChroma; 4857 } 4858 4859 v->PDEAndMetaPTEBytesPerFrameC = CalculateVMAndRowBytes( 4860 mode_lib, 4861 v->DCCEnable[k], 4862 v->Read256BlockHeightC[k], 4863 v->Read256BlockWidthC[k], 4864 v->SourcePixelFormat[k], 4865 v->SurfaceTiling[k], 4866 v->BytePerPixelC[k], 4867 v->SourceScan[k], 4868 v->SwathWidthCThisState[k], 4869 v->ViewportHeightChroma[k], 4870 v->GPUVMEnable, 4871 v->HostVMEnable, 4872 v->HostVMMaxNonCachedPageTableLevels, 4873 v->GPUVMMinPageSize, 4874 v->HostVMMinPageSize, 4875 v->PTEBufferSizeInRequestsForChroma, 4876 v->PitchC[k], 4877 0.0, 4878 &v->MacroTileWidthC[k], 4879 &v->MetaRowBytesC, 4880 &v->DPTEBytesPerRowC, 4881 &v->PTEBufferSizeNotExceededC[i][j][k], 4882 &v->dummyinteger7, 4883 &v->dpte_row_height_chroma[k], 4884 &v->dummyinteger28, 4885 &v->dummyinteger26, 4886 &v->dummyinteger23, 4887 &v->meta_row_height_chroma[k], 4888 &v->dummyinteger8, 4889 &v->dummyinteger9, 4890 &v->dummyinteger19, 4891 &v->dummyinteger20, 4892 &v->dummyinteger17, 4893 &v->dummyinteger10, 4894 &v->dummyinteger11); 4895 4896 v->PrefetchLinesC[i][j][k] = CalculatePrefetchSourceLines( 4897 mode_lib, 4898 v->VRatioChroma[k], 4899 v->VTAPsChroma[k], 4900 v->Interlace[k], 4901 v->ProgressiveToInterlaceUnitInOPP, 4902 v->SwathHeightCThisState[k], 4903 v->ViewportYStartC[k], 4904 &v->PrefillC[k], 4905 &v->MaxNumSwC[k]); 4906 } else { 4907 v->PTEBufferSizeInRequestsForLuma = v->PTEBufferSizeInRequestsLuma + v->PTEBufferSizeInRequestsChroma; 4908 v->PTEBufferSizeInRequestsForChroma = 0; 4909 v->PDEAndMetaPTEBytesPerFrameC = 0.0; 4910 v->MetaRowBytesC = 0.0; 4911 v->DPTEBytesPerRowC = 0.0; 4912 v->PrefetchLinesC[i][j][k] = 0.0; 4913 v->PTEBufferSizeNotExceededC[i][j][k] = true; 4914 } 4915 v->PDEAndMetaPTEBytesPerFrameY = CalculateVMAndRowBytes( 4916 mode_lib, 4917 v->DCCEnable[k], 4918 v->Read256BlockHeightY[k], 4919 v->Read256BlockWidthY[k], 4920 v->SourcePixelFormat[k], 4921 v->SurfaceTiling[k], 4922 v->BytePerPixelY[k], 4923 v->SourceScan[k], 4924 v->SwathWidthYThisState[k], 4925 v->ViewportHeight[k], 4926 v->GPUVMEnable, 4927 v->HostVMEnable, 4928 v->HostVMMaxNonCachedPageTableLevels, 4929 v->GPUVMMinPageSize, 4930 v->HostVMMinPageSize, 4931 v->PTEBufferSizeInRequestsForLuma, 4932 v->PitchY[k], 4933 v->DCCMetaPitchY[k], 4934 &v->MacroTileWidthY[k], 4935 &v->MetaRowBytesY, 4936 &v->DPTEBytesPerRowY, 4937 &v->PTEBufferSizeNotExceededY[i][j][k], 4938 &v->dummyinteger7, 4939 &v->dpte_row_height[k], 4940 &v->dummyinteger29, 4941 &v->dummyinteger27, 4942 &v->dummyinteger24, 4943 &v->meta_row_height[k], 4944 &v->dummyinteger25, 4945 &v->dpte_group_bytes[k], 4946 &v->dummyinteger21, 4947 &v->dummyinteger22, 4948 &v->dummyinteger18, 4949 &v->dummyinteger5, 4950 &v->dummyinteger6); 4951 v->PrefetchLinesY[i][j][k] = CalculatePrefetchSourceLines( 4952 mode_lib, 4953 v->VRatio[k], 4954 v->vtaps[k], 4955 v->Interlace[k], 4956 v->ProgressiveToInterlaceUnitInOPP, 4957 v->SwathHeightYThisState[k], 4958 v->ViewportYStartY[k], 4959 &v->PrefillY[k], 4960 &v->MaxNumSwY[k]); 4961 v->PDEAndMetaPTEBytesPerFrame[i][j][k] = v->PDEAndMetaPTEBytesPerFrameY + v->PDEAndMetaPTEBytesPerFrameC; 4962 v->MetaRowBytes[i][j][k] = v->MetaRowBytesY + v->MetaRowBytesC; 4963 v->DPTEBytesPerRow[i][j][k] = v->DPTEBytesPerRowY + v->DPTEBytesPerRowC; 4964 4965 CalculateRowBandwidth( 4966 v->GPUVMEnable, 4967 v->SourcePixelFormat[k], 4968 v->VRatio[k], 4969 v->VRatioChroma[k], 4970 v->DCCEnable[k], 4971 v->HTotal[k] / v->PixelClock[k], 4972 v->MetaRowBytesY, 4973 v->MetaRowBytesC, 4974 v->meta_row_height[k], 4975 v->meta_row_height_chroma[k], 4976 v->DPTEBytesPerRowY, 4977 v->DPTEBytesPerRowC, 4978 v->dpte_row_height[k], 4979 v->dpte_row_height_chroma[k], 4980 &v->meta_row_bandwidth[i][j][k], 4981 &v->dpte_row_bandwidth[i][j][k]); 4982 } 4983 /* 4984 * DCCMetaBufferSizeSupport(i, j) = True 4985 * For k = 0 To NumberOfActivePlanes - 1 4986 * If MetaRowBytes(i, j, k) > 24064 Then 4987 * DCCMetaBufferSizeSupport(i, j) = False 4988 * End If 4989 * Next k 4990 */ 4991 v->DCCMetaBufferSizeSupport[i][j] = true; 4992 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 4993 if (v->MetaRowBytes[i][j][k] > 24064) 4994 v->DCCMetaBufferSizeSupport[i][j] = false; 4995 } 4996 v->UrgLatency[i] = CalculateUrgentLatency( 4997 v->UrgentLatencyPixelDataOnly, 4998 v->UrgentLatencyPixelMixedWithVMData, 4999 v->UrgentLatencyVMDataOnly, 5000 v->DoUrgentLatencyAdjustment, 5001 v->UrgentLatencyAdjustmentFabricClockComponent, 5002 v->UrgentLatencyAdjustmentFabricClockReference, 5003 v->FabricClockPerState[i]); 5004 5005 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5006 CalculateUrgentBurstFactor( 5007 v->swath_width_luma_ub_this_state[k], 5008 v->swath_width_chroma_ub_this_state[k], 5009 v->SwathHeightYThisState[k], 5010 v->SwathHeightCThisState[k], 5011 v->HTotal[k] / v->PixelClock[k], 5012 v->UrgLatency[i], 5013 v->CursorBufferSize, 5014 v->CursorWidth[k][0], 5015 v->CursorBPP[k][0], 5016 v->VRatio[k], 5017 v->VRatioChroma[k], 5018 v->BytePerPixelInDETY[k], 5019 v->BytePerPixelInDETC[k], 5020 v->DETBufferSizeYThisState[k], 5021 v->DETBufferSizeCThisState[k], 5022 &v->UrgentBurstFactorCursor[k], 5023 &v->UrgentBurstFactorLuma[k], 5024 &v->UrgentBurstFactorChroma[k], 5025 &NotUrgentLatencyHiding[k]); 5026 } 5027 5028 v->NotEnoughUrgentLatencyHidingA[i][j] = false; 5029 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5030 if (NotUrgentLatencyHiding[k]) { 5031 v->NotEnoughUrgentLatencyHidingA[i][j] = true; 5032 } 5033 } 5034 5035 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5036 v->VActivePixelBandwidth[i][j][k] = v->ReadBandwidthLuma[k] * v->UrgentBurstFactorLuma[k] 5037 + v->ReadBandwidthChroma[k] * v->UrgentBurstFactorChroma[k]; 5038 v->VActiveCursorBandwidth[i][j][k] = v->cursor_bw[k] * v->UrgentBurstFactorCursor[k]; 5039 } 5040 5041 v->TotalVActivePixelBandwidth[i][j] = 0; 5042 v->TotalVActiveCursorBandwidth[i][j] = 0; 5043 v->TotalMetaRowBandwidth[i][j] = 0; 5044 v->TotalDPTERowBandwidth[i][j] = 0; 5045 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5046 v->TotalVActivePixelBandwidth[i][j] = v->TotalVActivePixelBandwidth[i][j] + v->VActivePixelBandwidth[i][j][k]; 5047 v->TotalVActiveCursorBandwidth[i][j] = v->TotalVActiveCursorBandwidth[i][j] + v->VActiveCursorBandwidth[i][j][k]; 5048 v->TotalMetaRowBandwidth[i][j] = v->TotalMetaRowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->meta_row_bandwidth[i][j][k]; 5049 v->TotalDPTERowBandwidth[i][j] = v->TotalDPTERowBandwidth[i][j] + v->NoOfDPP[i][j][k] * v->dpte_row_bandwidth[i][j][k]; 5050 } 5051 } 5052 } 5053 5054 //Calculate Return BW 5055 for (i = 0; i < v->soc.num_states; ++i) { 5056 for (j = 0; j <= 1; ++j) { 5057 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5058 if (v->BlendingAndTiming[k] == k) { 5059 if (v->WritebackEnable[k] == true) { 5060 v->WritebackDelayTime[k] = v->WritebackLatency 5061 + CalculateWriteBackDelay( 5062 v->WritebackPixelFormat[k], 5063 v->WritebackHRatio[k], 5064 v->WritebackVRatio[k], 5065 v->WritebackVTaps[k], 5066 v->WritebackDestinationWidth[k], 5067 v->WritebackDestinationHeight[k], 5068 v->WritebackSourceHeight[k], 5069 v->HTotal[k]) / v->RequiredDISPCLK[i][j]; 5070 } else { 5071 v->WritebackDelayTime[k] = 0.0; 5072 } 5073 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5074 if (v->BlendingAndTiming[m] == k && v->WritebackEnable[m] == true) { 5075 v->WritebackDelayTime[k] = dml_max( 5076 v->WritebackDelayTime[k], 5077 v->WritebackLatency 5078 + CalculateWriteBackDelay( 5079 v->WritebackPixelFormat[m], 5080 v->WritebackHRatio[m], 5081 v->WritebackVRatio[m], 5082 v->WritebackVTaps[m], 5083 v->WritebackDestinationWidth[m], 5084 v->WritebackDestinationHeight[m], 5085 v->WritebackSourceHeight[m], 5086 v->HTotal[m]) / v->RequiredDISPCLK[i][j]); 5087 } 5088 } 5089 } 5090 } 5091 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5092 for (m = 0; m < v->NumberOfActivePlanes; m++) { 5093 if (v->BlendingAndTiming[k] == m) { 5094 v->WritebackDelayTime[k] = v->WritebackDelayTime[m]; 5095 } 5096 } 5097 } 5098 v->MaxMaxVStartup[i][j] = 0; 5099 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5100 v->MaximumVStartup[i][j][k] = 5101 CalculateMaxVStartup( 5102 v->VTotal[k], 5103 v->VActive[k], 5104 v->VBlankNom[k], 5105 v->HTotal[k], 5106 v->PixelClock[k], 5107 v->ProgressiveToInterlaceUnitInOPP, 5108 v->Interlace[k], 5109 v->ip.VBlankNomDefaultUS, 5110 v->WritebackDelayTime[k]); 5111 v->MaxMaxVStartup[i][j] = dml_max(v->MaxMaxVStartup[i][j], v->MaximumVStartup[i][j][k]); 5112 } 5113 } 5114 } 5115 5116 ReorderingBytes = v->NumberOfChannels 5117 * dml_max3( 5118 v->UrgentOutOfOrderReturnPerChannelPixelDataOnly, 5119 v->UrgentOutOfOrderReturnPerChannelPixelMixedWithVMData, 5120 v->UrgentOutOfOrderReturnPerChannelVMDataOnly); 5121 5122 for (i = 0; i < v->soc.num_states; ++i) { 5123 for (j = 0; j <= 1; ++j) { 5124 v->DCFCLKState[i][j] = v->DCFCLKPerState[i]; 5125 } 5126 } 5127 5128 if (v->UseMinimumRequiredDCFCLK == true) 5129 UseMinimumDCFCLK(mode_lib, MaxPrefetchMode, ReorderingBytes); 5130 5131 for (i = 0; i < v->soc.num_states; ++i) { 5132 for (j = 0; j <= 1; ++j) { 5133 double IdealFabricAndSDPPortBandwidthPerState = dml_min( 5134 v->ReturnBusWidth * v->DCFCLKState[i][j], 5135 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn); 5136 double IdealDRAMBandwidthPerState = v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth; 5137 double PixelDataOnlyReturnBWPerState = dml_min( 5138 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5139 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelDataOnly / 100.0); 5140 double PixelMixedWithVMDataReturnBWPerState = dml_min( 5141 IdealFabricAndSDPPortBandwidthPerState * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5142 IdealDRAMBandwidthPerState * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyPixelMixedWithVMData / 100.0); 5143 5144 if (v->HostVMEnable != true) { 5145 v->ReturnBWPerState[i][j] = PixelDataOnlyReturnBWPerState; 5146 } else { 5147 v->ReturnBWPerState[i][j] = PixelMixedWithVMDataReturnBWPerState; 5148 } 5149 } 5150 } 5151 5152 //Re-ordering Buffer Support Check 5153 for (i = 0; i < v->soc.num_states; ++i) { 5154 for (j = 0; j <= 1; ++j) { 5155 if ((v->ROBBufferSizeInKByte - v->PixelChunkSizeInKByte) * 1024 / v->ReturnBWPerState[i][j] 5156 > (v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / v->DCFCLKState[i][j] + ReorderingBytes / v->ReturnBWPerState[i][j]) { 5157 v->ROBSupport[i][j] = true; 5158 } else { 5159 v->ROBSupport[i][j] = false; 5160 } 5161 } 5162 } 5163 5164 //Vertical Active BW support check 5165 5166 MaxTotalVActiveRDBandwidth = 0; 5167 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5168 MaxTotalVActiveRDBandwidth = MaxTotalVActiveRDBandwidth + v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]; 5169 } 5170 5171 for (i = 0; i < v->soc.num_states; ++i) { 5172 for (j = 0; j <= 1; ++j) { 5173 v->MaxTotalVerticalActiveAvailableBandwidth[i][j] = dml_min( 5174 dml_min( 5175 v->ReturnBusWidth * v->DCFCLKState[i][j], 5176 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5177 * v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100, 5178 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5179 * v->MaxAveragePercentOfIdealDRAMBWDisplayCanUseInNormalSystemOperation / 100); 5180 5181 if (MaxTotalVActiveRDBandwidth <= v->MaxTotalVerticalActiveAvailableBandwidth[i][j]) { 5182 v->TotalVerticalActiveBandwidthSupport[i][j] = true; 5183 } else { 5184 v->TotalVerticalActiveBandwidthSupport[i][j] = false; 5185 } 5186 } 5187 } 5188 5189 v->UrgentLatency = CalculateUrgentLatency( 5190 v->UrgentLatencyPixelDataOnly, 5191 v->UrgentLatencyPixelMixedWithVMData, 5192 v->UrgentLatencyVMDataOnly, 5193 v->DoUrgentLatencyAdjustment, 5194 v->UrgentLatencyAdjustmentFabricClockComponent, 5195 v->UrgentLatencyAdjustmentFabricClockReference, 5196 v->FabricClock); 5197 //Prefetch Check 5198 for (i = 0; i < v->soc.num_states; ++i) { 5199 for (j = 0; j <= 1; ++j) { 5200 double VMDataOnlyReturnBWPerState; 5201 double HostVMInefficiencyFactor = 1; 5202 int NextPrefetchModeState = MinPrefetchMode; 5203 bool UnboundedRequestEnabledThisState = false; 5204 int CompressedBufferSizeInkByteThisState = 0; 5205 double dummy; 5206 5207 v->TimeCalc = 24 / v->ProjectedDCFCLKDeepSleep[i][j]; 5208 5209 v->BandwidthWithoutPrefetchSupported[i][j] = true; 5210 if (v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j] 5211 + v->TotalDPTERowBandwidth[i][j] > v->ReturnBWPerState[i][j] || v->NotEnoughUrgentLatencyHidingA[i][j]) { 5212 v->BandwidthWithoutPrefetchSupported[i][j] = false; 5213 } 5214 5215 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5216 v->NoOfDPPThisState[k] = v->NoOfDPP[i][j][k]; 5217 v->swath_width_luma_ub_this_state[k] = v->swath_width_luma_ub_all_states[i][j][k]; 5218 v->swath_width_chroma_ub_this_state[k] = v->swath_width_chroma_ub_all_states[i][j][k]; 5219 v->SwathWidthYThisState[k] = v->SwathWidthYAllStates[i][j][k]; 5220 v->SwathWidthCThisState[k] = v->SwathWidthCAllStates[i][j][k]; 5221 v->SwathHeightYThisState[k] = v->SwathHeightYAllStates[i][j][k]; 5222 v->SwathHeightCThisState[k] = v->SwathHeightCAllStates[i][j][k]; 5223 v->DETBufferSizeYThisState[k] = v->DETBufferSizeYAllStates[i][j][k]; 5224 v->DETBufferSizeCThisState[k] = v->DETBufferSizeCAllStates[i][j][k]; 5225 } 5226 5227 VMDataOnlyReturnBWPerState = dml_min( 5228 dml_min( 5229 v->ReturnBusWidth * v->DCFCLKState[i][j], 5230 v->FabricClockPerState[i] * v->FabricDatapathToDCNDataReturn) 5231 * v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0, 5232 v->DRAMSpeedPerState[i] * v->NumberOfChannels * v->DRAMChannelWidth 5233 * v->PercentOfIdealDRAMBWReceivedAfterUrgLatencyVMDataOnly / 100.0); 5234 if (v->GPUVMEnable && v->HostVMEnable) 5235 HostVMInefficiencyFactor = v->ReturnBWPerState[i][j] / VMDataOnlyReturnBWPerState; 5236 5237 v->ExtraLatency = CalculateExtraLatency( 5238 v->RoundTripPingLatencyCycles, 5239 ReorderingBytes, 5240 v->DCFCLKState[i][j], 5241 v->TotalNumberOfActiveDPP[i][j], 5242 v->PixelChunkSizeInKByte, 5243 v->TotalNumberOfDCCActiveDPP[i][j], 5244 v->MetaChunkSize, 5245 v->ReturnBWPerState[i][j], 5246 v->GPUVMEnable, 5247 v->HostVMEnable, 5248 v->NumberOfActivePlanes, 5249 v->NoOfDPPThisState, 5250 v->dpte_group_bytes, 5251 HostVMInefficiencyFactor, 5252 v->HostVMMinPageSize, 5253 v->HostVMMaxNonCachedPageTableLevels); 5254 5255 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5256 do { 5257 v->PrefetchModePerState[i][j] = NextPrefetchModeState; 5258 v->MaxVStartup = v->NextMaxVStartup; 5259 5260 v->TWait = CalculateTWait( 5261 v->PrefetchModePerState[i][j], 5262 v->DRAMClockChangeLatency, 5263 v->UrgLatency[i], 5264 v->SREnterPlusExitTime); 5265 5266 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5267 CalculatePrefetchSchedulePerPlane(mode_lib, 5268 HostVMInefficiencyFactor, 5269 i, j, k); 5270 } 5271 5272 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5273 CalculateUrgentBurstFactor( 5274 v->swath_width_luma_ub_this_state[k], 5275 v->swath_width_chroma_ub_this_state[k], 5276 v->SwathHeightYThisState[k], 5277 v->SwathHeightCThisState[k], 5278 v->HTotal[k] / v->PixelClock[k], 5279 v->UrgLatency[i], 5280 v->CursorBufferSize, 5281 v->CursorWidth[k][0], 5282 v->CursorBPP[k][0], 5283 v->VRatioPreY[i][j][k], 5284 v->VRatioPreC[i][j][k], 5285 v->BytePerPixelInDETY[k], 5286 v->BytePerPixelInDETC[k], 5287 v->DETBufferSizeYThisState[k], 5288 v->DETBufferSizeCThisState[k], 5289 &v->UrgentBurstFactorCursorPre[k], 5290 &v->UrgentBurstFactorLumaPre[k], 5291 &v->UrgentBurstFactorChromaPre[k], 5292 &v->NotUrgentLatencyHidingPre[k]); 5293 } 5294 5295 v->MaximumReadBandwidthWithPrefetch = 0.0; 5296 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5297 v->cursor_bw_pre[k] = v->NumberOfCursors[k] * v->CursorWidth[k][0] * v->CursorBPP[k][0] / 8.0 5298 / (v->HTotal[k] / v->PixelClock[k]) * v->VRatioPreY[i][j][k]; 5299 5300 v->MaximumReadBandwidthWithPrefetch = 5301 v->MaximumReadBandwidthWithPrefetch 5302 + dml_max3( 5303 v->VActivePixelBandwidth[i][j][k] 5304 + v->VActiveCursorBandwidth[i][j][k] 5305 + v->NoOfDPP[i][j][k] 5306 * (v->meta_row_bandwidth[i][j][k] 5307 + v->dpte_row_bandwidth[i][j][k]), 5308 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5309 v->NoOfDPP[i][j][k] 5310 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5311 * v->UrgentBurstFactorLumaPre[k] 5312 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5313 * v->UrgentBurstFactorChromaPre[k]) 5314 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5315 } 5316 5317 v->NotEnoughUrgentLatencyHidingPre = false; 5318 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5319 if (v->NotUrgentLatencyHidingPre[k] == true) { 5320 v->NotEnoughUrgentLatencyHidingPre = true; 5321 } 5322 } 5323 5324 v->PrefetchSupported[i][j] = true; 5325 if (v->BandwidthWithoutPrefetchSupported[i][j] == false || v->MaximumReadBandwidthWithPrefetch > v->ReturnBWPerState[i][j] 5326 || v->NotEnoughUrgentLatencyHidingPre == 1) { 5327 v->PrefetchSupported[i][j] = false; 5328 } 5329 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5330 if (v->LineTimesForPrefetch[k] < 2.0 || v->LinesForMetaPTE[k] >= 32.0 || v->LinesForMetaAndDPTERow[k] >= 16.0 5331 || v->NoTimeForPrefetch[i][j][k] == true) { 5332 v->PrefetchSupported[i][j] = false; 5333 } 5334 } 5335 5336 v->DynamicMetadataSupported[i][j] = true; 5337 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5338 if (v->NoTimeForDynamicMetadata[i][j][k] == true) { 5339 v->DynamicMetadataSupported[i][j] = false; 5340 } 5341 } 5342 5343 v->VRatioInPrefetchSupported[i][j] = true; 5344 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5345 if (v->VRatioPreY[i][j][k] > 4.0 || v->VRatioPreC[i][j][k] > 4.0 || v->NoTimeForPrefetch[i][j][k] == true) { 5346 v->VRatioInPrefetchSupported[i][j] = false; 5347 } 5348 } 5349 v->AnyLinesForVMOrRowTooLarge = false; 5350 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5351 if (v->LinesForMetaAndDPTERow[k] >= 16 || v->LinesForMetaPTE[k] >= 32) { 5352 v->AnyLinesForVMOrRowTooLarge = true; 5353 } 5354 } 5355 5356 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5357 5358 if (v->PrefetchSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true) { 5359 v->BandwidthAvailableForImmediateFlip = v->ReturnBWPerState[i][j]; 5360 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5361 v->BandwidthAvailableForImmediateFlip = v->BandwidthAvailableForImmediateFlip 5362 - dml_max( 5363 v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k], 5364 v->NoOfDPP[i][j][k] 5365 * (v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5366 * v->UrgentBurstFactorLumaPre[k] 5367 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5368 * v->UrgentBurstFactorChromaPre[k]) 5369 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5370 } 5371 v->TotImmediateFlipBytes = 0.0; 5372 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5373 v->TotImmediateFlipBytes = v->TotImmediateFlipBytes 5374 + v->NoOfDPP[i][j][k] * v->PDEAndMetaPTEBytesPerFrame[i][j][k] + v->MetaRowBytes[i][j][k] 5375 + v->DPTEBytesPerRow[i][j][k]; 5376 } 5377 5378 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5379 CalculateFlipSchedule( 5380 mode_lib, 5381 k, 5382 HostVMInefficiencyFactor, 5383 v->ExtraLatency, 5384 v->UrgLatency[i], 5385 v->PDEAndMetaPTEBytesPerFrame[i][j][k], 5386 v->MetaRowBytes[i][j][k], 5387 v->DPTEBytesPerRow[i][j][k]); 5388 } 5389 v->total_dcn_read_bw_with_flip = 0.0; 5390 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5391 v->total_dcn_read_bw_with_flip = v->total_dcn_read_bw_with_flip 5392 + dml_max3( 5393 v->NoOfDPP[i][j][k] * v->prefetch_vmrow_bw[k], 5394 v->NoOfDPP[i][j][k] * v->final_flip_bw[k] + v->VActivePixelBandwidth[i][j][k] 5395 + v->VActiveCursorBandwidth[i][j][k], 5396 v->NoOfDPP[i][j][k] 5397 * (v->final_flip_bw[k] 5398 + v->RequiredPrefetchPixelDataBWLuma[i][j][k] 5399 * v->UrgentBurstFactorLumaPre[k] 5400 + v->RequiredPrefetchPixelDataBWChroma[i][j][k] 5401 * v->UrgentBurstFactorChromaPre[k]) 5402 + v->cursor_bw_pre[k] * v->UrgentBurstFactorCursorPre[k]); 5403 } 5404 v->ImmediateFlipSupportedForState[i][j] = true; 5405 if (v->total_dcn_read_bw_with_flip > v->ReturnBWPerState[i][j]) { 5406 v->ImmediateFlipSupportedForState[i][j] = false; 5407 } 5408 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5409 if (v->ImmediateFlipSupportedForPipe[k] == false) { 5410 v->ImmediateFlipSupportedForState[i][j] = false; 5411 } 5412 } 5413 } else { 5414 v->ImmediateFlipSupportedForState[i][j] = false; 5415 } 5416 5417 if (v->MaxVStartup <= __DML_VBA_MIN_VSTARTUP__ || v->AnyLinesForVMOrRowTooLarge == false) { 5418 v->NextMaxVStartup = v->MaxMaxVStartup[i][j]; 5419 NextPrefetchModeState = NextPrefetchModeState + 1; 5420 } else { 5421 v->NextMaxVStartup = v->NextMaxVStartup - 1; 5422 } 5423 v->NextPrefetchMode = v->NextPrefetchMode + 1; 5424 } while (!((v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5425 && ((v->HostVMEnable == false && 5426 v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5427 || v->ImmediateFlipSupportedForState[i][j] == true)) 5428 || (v->NextMaxVStartup == v->MaxMaxVStartup[i][j] && NextPrefetchModeState > MaxPrefetchMode))); 5429 5430 CalculateUnboundedRequestAndCompressedBufferSize( 5431 v->DETBufferSizeInKByte[0], 5432 v->ConfigReturnBufferSizeInKByte, 5433 v->UseUnboundedRequesting, 5434 v->TotalNumberOfActiveDPP[i][j], 5435 NoChroma, 5436 v->MaxNumDPP, 5437 v->CompressedBufferSegmentSizeInkByte, 5438 v->Output, 5439 &UnboundedRequestEnabledThisState, 5440 &CompressedBufferSizeInkByteThisState); 5441 5442 CalculateWatermarksAndDRAMSpeedChangeSupport( 5443 mode_lib, 5444 v->PrefetchModePerState[i][j], 5445 v->DCFCLKState[i][j], 5446 v->ReturnBWPerState[i][j], 5447 v->UrgLatency[i], 5448 v->ExtraLatency, 5449 v->SOCCLKPerState[i], 5450 v->ProjectedDCFCLKDeepSleep[i][j], 5451 v->DETBufferSizeYThisState, 5452 v->DETBufferSizeCThisState, 5453 v->SwathHeightYThisState, 5454 v->SwathHeightCThisState, 5455 v->SwathWidthYThisState, 5456 v->SwathWidthCThisState, 5457 v->NoOfDPPThisState, 5458 v->BytePerPixelInDETY, 5459 v->BytePerPixelInDETC, 5460 UnboundedRequestEnabledThisState, 5461 CompressedBufferSizeInkByteThisState, 5462 &v->DRAMClockChangeSupport[i][j], 5463 &dummy, 5464 &dummy, 5465 &dummy, 5466 &dummy); 5467 } 5468 } 5469 5470 /*PTE Buffer Size Check*/ 5471 for (i = 0; i < v->soc.num_states; i++) { 5472 for (j = 0; j < 2; j++) { 5473 v->PTEBufferSizeNotExceeded[i][j] = true; 5474 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5475 if (v->PTEBufferSizeNotExceededY[i][j][k] == false || v->PTEBufferSizeNotExceededC[i][j][k] == false) { 5476 v->PTEBufferSizeNotExceeded[i][j] = false; 5477 } 5478 } 5479 } 5480 } 5481 5482 /*Cursor Support Check*/ 5483 v->CursorSupport = true; 5484 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5485 if (v->CursorWidth[k][0] > 0.0) { 5486 if (v->CursorBPP[k][0] == 64 && v->Cursor64BppSupport == false) { 5487 v->CursorSupport = false; 5488 } 5489 } 5490 } 5491 5492 /*Valid Pitch Check*/ 5493 v->PitchSupport = true; 5494 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5495 v->AlignedYPitch[k] = dml_ceil(dml_max(v->PitchY[k], v->SurfaceWidthY[k]), v->MacroTileWidthY[k]); 5496 if (v->DCCEnable[k] == true) { 5497 v->AlignedDCCMetaPitchY[k] = dml_ceil(dml_max(v->DCCMetaPitchY[k], v->SurfaceWidthY[k]), 64.0 * v->Read256BlockWidthY[k]); 5498 } else { 5499 v->AlignedDCCMetaPitchY[k] = v->DCCMetaPitchY[k]; 5500 } 5501 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 && v->SourcePixelFormat[k] != dm_444_16 5502 && v->SourcePixelFormat[k] != dm_mono_16 && v->SourcePixelFormat[k] != dm_rgbe 5503 && v->SourcePixelFormat[k] != dm_mono_8) { 5504 v->AlignedCPitch[k] = dml_ceil(dml_max(v->PitchC[k], v->SurfaceWidthC[k]), v->MacroTileWidthC[k]); 5505 if (v->DCCEnable[k] == true) { 5506 v->AlignedDCCMetaPitchC[k] = dml_ceil( 5507 dml_max(v->DCCMetaPitchC[k], v->SurfaceWidthC[k]), 5508 64.0 * v->Read256BlockWidthC[k]); 5509 } else { 5510 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5511 } 5512 } else { 5513 v->AlignedCPitch[k] = v->PitchC[k]; 5514 v->AlignedDCCMetaPitchC[k] = v->DCCMetaPitchC[k]; 5515 } 5516 if (v->AlignedYPitch[k] > v->PitchY[k] || v->AlignedCPitch[k] > v->PitchC[k] 5517 || v->AlignedDCCMetaPitchY[k] > v->DCCMetaPitchY[k] || v->AlignedDCCMetaPitchC[k] > v->DCCMetaPitchC[k]) { 5518 v->PitchSupport = false; 5519 } 5520 } 5521 5522 for (k = 0; k < v->NumberOfActivePlanes; k++) { 5523 if (v->ViewportWidth[k] > v->SurfaceWidthY[k] || v->ViewportHeight[k] > v->SurfaceHeightY[k]) { 5524 ViewportExceedsSurface = true; 5525 if (v->SourcePixelFormat[k] != dm_444_64 && v->SourcePixelFormat[k] != dm_444_32 5526 && v->SourcePixelFormat[k] != dm_444_16 && v->SourcePixelFormat[k] != dm_444_8 5527 && v->SourcePixelFormat[k] != dm_rgbe) { 5528 if (v->ViewportWidthChroma[k] > v->SurfaceWidthC[k] 5529 || v->ViewportHeightChroma[k] > v->SurfaceHeightC[k]) { 5530 ViewportExceedsSurface = true; 5531 } 5532 } 5533 } 5534 } 5535 5536 /*Mode Support, Voltage State and SOC Configuration*/ 5537 for (i = v->soc.num_states - 1; i >= 0; i--) { 5538 for (j = 0; j < 2; j++) { 5539 if (v->ScaleRatioAndTapsSupport == true && v->SourceFormatPixelAndScanSupport == true && v->ViewportSizeSupport[i][j] == true 5540 && v->LinkCapacitySupport[i] == true && !P2IWith420 && !DSCOnlyIfNecessaryWithBPP 5541 && !DSC422NativeNotSupported && v->ODMCombine4To1SupportCheckOK[i] == true && v->NotEnoughDSCUnits[i] == false 5542 && v->DTBCLKRequiredMoreThanSupported[i] == false 5543 && v->ROBSupport[i][j] == true && v->DISPCLK_DPPCLK_Support[i][j] == true 5544 && v->TotalAvailablePipesSupport[i][j] == true && EnoughWritebackUnits == true 5545 && v->WritebackLatencySupport == true && v->WritebackScaleRatioAndTapsSupport == true 5546 && v->CursorSupport == true && v->PitchSupport == true && ViewportExceedsSurface == false 5547 && v->PrefetchSupported[i][j] == true && v->DynamicMetadataSupported[i][j] == true 5548 && v->TotalVerticalActiveBandwidthSupport[i][j] == true && v->VRatioInPrefetchSupported[i][j] == true 5549 && v->PTEBufferSizeNotExceeded[i][j] == true && v->NonsupportedDSCInputBPC == false 5550 && ((v->HostVMEnable == false 5551 && v->ImmediateFlipRequirement[0] != dm_immediate_flip_required) 5552 || v->ImmediateFlipSupportedForState[i][j] == true) 5553 && FMTBufferExceeded == false) { 5554 v->ModeSupport[i][j] = true; 5555 } else { 5556 v->ModeSupport[i][j] = false; 5557 } 5558 } 5559 } 5560 5561 { 5562 unsigned int MaximumMPCCombine = 0; 5563 5564 for (i = v->soc.num_states; i >= 0; i--) { 5565 if (i == v->soc.num_states || v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true) { 5566 v->VoltageLevel = i; 5567 v->ModeIsSupported = v->ModeSupport[i][0] == true || v->ModeSupport[i][1] == true; 5568 if (v->ModeSupport[i][0] == true) { 5569 MaximumMPCCombine = 0; 5570 } else { 5571 MaximumMPCCombine = 1; 5572 } 5573 } 5574 } 5575 v->ImmediateFlipSupport = v->ImmediateFlipSupportedForState[v->VoltageLevel][MaximumMPCCombine]; 5576 for (k = 0; k <= v->NumberOfActivePlanes - 1; k++) { 5577 v->MPCCombineEnable[k] = v->MPCCombine[v->VoltageLevel][MaximumMPCCombine][k]; 5578 v->DPPPerPlane[k] = v->NoOfDPP[v->VoltageLevel][MaximumMPCCombine][k]; 5579 } 5580 v->DCFCLK = v->DCFCLKState[v->VoltageLevel][MaximumMPCCombine]; 5581 v->DRAMSpeed = v->DRAMSpeedPerState[v->VoltageLevel]; 5582 v->FabricClock = v->FabricClockPerState[v->VoltageLevel]; 5583 v->SOCCLK = v->SOCCLKPerState[v->VoltageLevel]; 5584 v->ReturnBW = v->ReturnBWPerState[v->VoltageLevel][MaximumMPCCombine]; 5585 v->maxMpcComb = MaximumMPCCombine; 5586 } 5587 } 5588 5589 static void CalculateWatermarksAndDRAMSpeedChangeSupport( 5590 struct display_mode_lib *mode_lib, 5591 unsigned int PrefetchMode, 5592 double DCFCLK, 5593 double ReturnBW, 5594 double UrgentLatency, 5595 double ExtraLatency, 5596 double SOCCLK, 5597 double DCFCLKDeepSleep, 5598 unsigned int DETBufferSizeY[], 5599 unsigned int DETBufferSizeC[], 5600 unsigned int SwathHeightY[], 5601 unsigned int SwathHeightC[], 5602 double SwathWidthY[], 5603 double SwathWidthC[], 5604 unsigned int DPPPerPlane[], 5605 double BytePerPixelDETY[], 5606 double BytePerPixelDETC[], 5607 bool UnboundedRequestEnabled, 5608 unsigned int CompressedBufferSizeInkByte, 5609 enum clock_change_support *DRAMClockChangeSupport, 5610 double *StutterExitWatermark, 5611 double *StutterEnterPlusExitWatermark, 5612 double *Z8StutterExitWatermark, 5613 double *Z8StutterEnterPlusExitWatermark) 5614 { 5615 struct vba_vars_st *v = &mode_lib->vba; 5616 double EffectiveLBLatencyHidingY; 5617 double EffectiveLBLatencyHidingC; 5618 double LinesInDETY[DC__NUM_DPP__MAX]; 5619 double LinesInDETC; 5620 unsigned int LinesInDETYRoundedDownToSwath[DC__NUM_DPP__MAX]; 5621 unsigned int LinesInDETCRoundedDownToSwath; 5622 double FullDETBufferingTimeY; 5623 double FullDETBufferingTimeC; 5624 double ActiveDRAMClockChangeLatencyMarginY; 5625 double ActiveDRAMClockChangeLatencyMarginC; 5626 double WritebackDRAMClockChangeLatencyMargin; 5627 double PlaneWithMinActiveDRAMClockChangeMargin; 5628 double SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank; 5629 double WritebackDRAMClockChangeLatencyHiding; 5630 double TotalPixelBW = 0.0; 5631 int k, j; 5632 5633 v->UrgentWatermark = UrgentLatency + ExtraLatency; 5634 5635 #ifdef __DML_VBA_DEBUG__ 5636 dml_print("DML::%s: UrgentLatency = %f\n", __func__, UrgentLatency); 5637 dml_print("DML::%s: ExtraLatency = %f\n", __func__, ExtraLatency); 5638 dml_print("DML::%s: UrgentWatermark = %f\n", __func__, v->UrgentWatermark); 5639 #endif 5640 5641 v->DRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->UrgentWatermark; 5642 5643 #ifdef __DML_VBA_DEBUG__ 5644 dml_print("DML::%s: v->DRAMClockChangeLatency = %f\n", __func__, v->DRAMClockChangeLatency); 5645 dml_print("DML::%s: DRAMClockChangeWatermark = %f\n", __func__, v->DRAMClockChangeWatermark); 5646 #endif 5647 5648 v->TotalActiveWriteback = 0; 5649 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5650 if (v->WritebackEnable[k] == true) { 5651 v->TotalActiveWriteback = v->TotalActiveWriteback + 1; 5652 } 5653 } 5654 5655 if (v->TotalActiveWriteback <= 1) { 5656 v->WritebackUrgentWatermark = v->WritebackLatency; 5657 } else { 5658 v->WritebackUrgentWatermark = v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5659 } 5660 5661 if (v->TotalActiveWriteback <= 1) { 5662 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency; 5663 } else { 5664 v->WritebackDRAMClockChangeWatermark = v->DRAMClockChangeLatency + v->WritebackLatency + v->WritebackChunkSize * 1024.0 / 32.0 / SOCCLK; 5665 } 5666 5667 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5668 TotalPixelBW = TotalPixelBW 5669 + DPPPerPlane[k] * (SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] + SwathWidthC[k] * BytePerPixelDETC[k] * v->VRatioChroma[k]) 5670 / (v->HTotal[k] / v->PixelClock[k]); 5671 } 5672 5673 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5674 double EffectiveDETBufferSizeY = DETBufferSizeY[k]; 5675 5676 v->LBLatencyHidingSourceLinesY = dml_min( 5677 (double) v->MaxLineBufferLines, 5678 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthY[k] / dml_max(v->HRatio[k], 1.0)), 1)) - (v->vtaps[k] - 1); 5679 5680 v->LBLatencyHidingSourceLinesC = dml_min( 5681 (double) v->MaxLineBufferLines, 5682 dml_floor(v->LineBufferSize / v->LBBitPerPixel[k] / (SwathWidthC[k] / dml_max(v->HRatioChroma[k], 1.0)), 1)) - (v->VTAPsChroma[k] - 1); 5683 5684 EffectiveLBLatencyHidingY = v->LBLatencyHidingSourceLinesY / v->VRatio[k] * (v->HTotal[k] / v->PixelClock[k]); 5685 5686 EffectiveLBLatencyHidingC = v->LBLatencyHidingSourceLinesC / v->VRatioChroma[k] * (v->HTotal[k] / v->PixelClock[k]); 5687 5688 if (UnboundedRequestEnabled) { 5689 EffectiveDETBufferSizeY = EffectiveDETBufferSizeY 5690 + CompressedBufferSizeInkByte * 1024 * SwathWidthY[k] * BytePerPixelDETY[k] * v->VRatio[k] / (v->HTotal[k] / v->PixelClock[k]) / TotalPixelBW; 5691 } 5692 5693 LinesInDETY[k] = (double) EffectiveDETBufferSizeY / BytePerPixelDETY[k] / SwathWidthY[k]; 5694 LinesInDETYRoundedDownToSwath[k] = dml_floor(LinesInDETY[k], SwathHeightY[k]); 5695 FullDETBufferingTimeY = LinesInDETYRoundedDownToSwath[k] * (v->HTotal[k] / v->PixelClock[k]) / v->VRatio[k]; 5696 if (BytePerPixelDETC[k] > 0) { 5697 LinesInDETC = v->DETBufferSizeC[k] / BytePerPixelDETC[k] / SwathWidthC[k]; 5698 LinesInDETCRoundedDownToSwath = dml_floor(LinesInDETC, SwathHeightC[k]); 5699 FullDETBufferingTimeC = LinesInDETCRoundedDownToSwath * (v->HTotal[k] / v->PixelClock[k]) / v->VRatioChroma[k]; 5700 } else { 5701 LinesInDETC = 0; 5702 FullDETBufferingTimeC = 999999; 5703 } 5704 5705 ActiveDRAMClockChangeLatencyMarginY = EffectiveLBLatencyHidingY + FullDETBufferingTimeY 5706 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5707 5708 if (v->NumberOfActivePlanes > 1) { 5709 ActiveDRAMClockChangeLatencyMarginY = ActiveDRAMClockChangeLatencyMarginY 5710 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightY[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatio[k]; 5711 } 5712 5713 if (BytePerPixelDETC[k] > 0) { 5714 ActiveDRAMClockChangeLatencyMarginC = EffectiveLBLatencyHidingC + FullDETBufferingTimeC 5715 - ((double) v->DSTXAfterScaler[k] / v->HTotal[k] + v->DSTYAfterScaler[k]) * v->HTotal[k] / v->PixelClock[k] - v->UrgentWatermark - v->DRAMClockChangeWatermark; 5716 5717 if (v->NumberOfActivePlanes > 1) { 5718 ActiveDRAMClockChangeLatencyMarginC = ActiveDRAMClockChangeLatencyMarginC 5719 - (1 - 1.0 / v->NumberOfActivePlanes) * SwathHeightC[k] * v->HTotal[k] / v->PixelClock[k] / v->VRatioChroma[k]; 5720 } 5721 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(ActiveDRAMClockChangeLatencyMarginY, ActiveDRAMClockChangeLatencyMarginC); 5722 } else { 5723 v->ActiveDRAMClockChangeLatencyMargin[k] = ActiveDRAMClockChangeLatencyMarginY; 5724 } 5725 5726 if (v->WritebackEnable[k] == true) { 5727 WritebackDRAMClockChangeLatencyHiding = v->WritebackInterfaceBufferSize * 1024 5728 / (v->WritebackDestinationWidth[k] * v->WritebackDestinationHeight[k] / (v->WritebackSourceHeight[k] * v->HTotal[k] / v->PixelClock[k]) * 4); 5729 if (v->WritebackPixelFormat[k] == dm_444_64) { 5730 WritebackDRAMClockChangeLatencyHiding = WritebackDRAMClockChangeLatencyHiding / 2; 5731 } 5732 WritebackDRAMClockChangeLatencyMargin = WritebackDRAMClockChangeLatencyHiding - v->WritebackDRAMClockChangeWatermark; 5733 v->ActiveDRAMClockChangeLatencyMargin[k] = dml_min(v->ActiveDRAMClockChangeLatencyMargin[k], WritebackDRAMClockChangeLatencyMargin); 5734 } 5735 } 5736 5737 v->MinActiveDRAMClockChangeMargin = 999999; 5738 PlaneWithMinActiveDRAMClockChangeMargin = 0; 5739 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5740 if (v->ActiveDRAMClockChangeLatencyMargin[k] < v->MinActiveDRAMClockChangeMargin) { 5741 v->MinActiveDRAMClockChangeMargin = v->ActiveDRAMClockChangeLatencyMargin[k]; 5742 if (v->BlendingAndTiming[k] == k) { 5743 PlaneWithMinActiveDRAMClockChangeMargin = k; 5744 } else { 5745 for (j = 0; j < v->NumberOfActivePlanes; ++j) { 5746 if (v->BlendingAndTiming[k] == j) { 5747 PlaneWithMinActiveDRAMClockChangeMargin = j; 5748 } 5749 } 5750 } 5751 } 5752 } 5753 5754 v->MinActiveDRAMClockChangeLatencySupported = v->MinActiveDRAMClockChangeMargin + v->DRAMClockChangeLatency ; 5755 5756 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = 999999; 5757 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5758 if (!((k == PlaneWithMinActiveDRAMClockChangeMargin) && (v->BlendingAndTiming[k] == k)) && !(v->BlendingAndTiming[k] == PlaneWithMinActiveDRAMClockChangeMargin) 5759 && v->ActiveDRAMClockChangeLatencyMargin[k] < SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank) { 5760 SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank = v->ActiveDRAMClockChangeLatencyMargin[k]; 5761 } 5762 } 5763 5764 v->TotalNumberOfActiveOTG = 0; 5765 5766 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 5767 if (v->BlendingAndTiming[k] == k) { 5768 v->TotalNumberOfActiveOTG = v->TotalNumberOfActiveOTG + 1; 5769 } 5770 } 5771 5772 if (v->MinActiveDRAMClockChangeMargin > 0 && PrefetchMode == 0) { 5773 *DRAMClockChangeSupport = dm_dram_clock_change_vactive; 5774 } else if ((v->SynchronizedVBlank == true || v->TotalNumberOfActiveOTG == 1 5775 || SecondMinActiveDRAMClockChangeMarginOneDisplayInVBLank > 0) && PrefetchMode == 0) { 5776 *DRAMClockChangeSupport = dm_dram_clock_change_vblank; 5777 } else { 5778 *DRAMClockChangeSupport = dm_dram_clock_change_unsupported; 5779 } 5780 5781 *StutterExitWatermark = v->SRExitTime + ExtraLatency + 10 / DCFCLKDeepSleep; 5782 *StutterEnterPlusExitWatermark = (v->SREnterPlusExitTime + ExtraLatency + 10 / DCFCLKDeepSleep); 5783 *Z8StutterExitWatermark = v->SRExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5784 *Z8StutterEnterPlusExitWatermark = v->SREnterPlusExitZ8Time + ExtraLatency + 10 / DCFCLKDeepSleep; 5785 5786 #ifdef __DML_VBA_DEBUG__ 5787 dml_print("DML::%s: StutterExitWatermark = %f\n", __func__, *StutterExitWatermark); 5788 dml_print("DML::%s: StutterEnterPlusExitWatermark = %f\n", __func__, *StutterEnterPlusExitWatermark); 5789 dml_print("DML::%s: Z8StutterExitWatermark = %f\n", __func__, *Z8StutterExitWatermark); 5790 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, *Z8StutterEnterPlusExitWatermark); 5791 #endif 5792 } 5793 5794 static void CalculateDCFCLKDeepSleep( 5795 struct display_mode_lib *mode_lib, 5796 unsigned int NumberOfActivePlanes, 5797 int BytePerPixelY[], 5798 int BytePerPixelC[], 5799 double VRatio[], 5800 double VRatioChroma[], 5801 double SwathWidthY[], 5802 double SwathWidthC[], 5803 unsigned int DPPPerPlane[], 5804 double HRatio[], 5805 double HRatioChroma[], 5806 double PixelClock[], 5807 double PSCL_THROUGHPUT[], 5808 double PSCL_THROUGHPUT_CHROMA[], 5809 double DPPCLK[], 5810 double ReadBandwidthLuma[], 5811 double ReadBandwidthChroma[], 5812 int ReturnBusWidth, 5813 double *DCFCLKDeepSleep) 5814 { 5815 struct vba_vars_st *v = &mode_lib->vba; 5816 double DisplayPipeLineDeliveryTimeLuma; 5817 double DisplayPipeLineDeliveryTimeChroma; 5818 double ReadBandwidth = 0.0; 5819 int k; 5820 5821 for (k = 0; k < NumberOfActivePlanes; ++k) { 5822 5823 if (VRatio[k] <= 1) { 5824 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5825 } else { 5826 DisplayPipeLineDeliveryTimeLuma = SwathWidthY[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5827 } 5828 if (BytePerPixelC[k] == 0) { 5829 DisplayPipeLineDeliveryTimeChroma = 0; 5830 } else { 5831 if (VRatioChroma[k] <= 1) { 5832 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5833 } else { 5834 DisplayPipeLineDeliveryTimeChroma = SwathWidthC[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5835 } 5836 } 5837 5838 if (BytePerPixelC[k] > 0) { 5839 v->DCFCLKDeepSleepPerPlane[k] = dml_max(__DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 32.0 / DisplayPipeLineDeliveryTimeLuma, 5840 __DML_MIN_DCFCLK_FACTOR__ * SwathWidthC[k] * BytePerPixelC[k] / 32.0 / DisplayPipeLineDeliveryTimeChroma); 5841 } else { 5842 v->DCFCLKDeepSleepPerPlane[k] = __DML_MIN_DCFCLK_FACTOR__ * SwathWidthY[k] * BytePerPixelY[k] / 64.0 / DisplayPipeLineDeliveryTimeLuma; 5843 } 5844 v->DCFCLKDeepSleepPerPlane[k] = dml_max(v->DCFCLKDeepSleepPerPlane[k], PixelClock[k] / 16); 5845 5846 } 5847 5848 for (k = 0; k < NumberOfActivePlanes; ++k) { 5849 ReadBandwidth = ReadBandwidth + ReadBandwidthLuma[k] + ReadBandwidthChroma[k]; 5850 } 5851 5852 *DCFCLKDeepSleep = dml_max(8.0, __DML_MIN_DCFCLK_FACTOR__ * ReadBandwidth / ReturnBusWidth); 5853 5854 for (k = 0; k < NumberOfActivePlanes; ++k) { 5855 *DCFCLKDeepSleep = dml_max(*DCFCLKDeepSleep, v->DCFCLKDeepSleepPerPlane[k]); 5856 } 5857 } 5858 5859 static void CalculateUrgentBurstFactor( 5860 int swath_width_luma_ub, 5861 int swath_width_chroma_ub, 5862 unsigned int SwathHeightY, 5863 unsigned int SwathHeightC, 5864 double LineTime, 5865 double UrgentLatency, 5866 double CursorBufferSize, 5867 unsigned int CursorWidth, 5868 unsigned int CursorBPP, 5869 double VRatio, 5870 double VRatioC, 5871 double BytePerPixelInDETY, 5872 double BytePerPixelInDETC, 5873 double DETBufferSizeY, 5874 double DETBufferSizeC, 5875 double *UrgentBurstFactorCursor, 5876 double *UrgentBurstFactorLuma, 5877 double *UrgentBurstFactorChroma, 5878 bool *NotEnoughUrgentLatencyHiding) 5879 { 5880 double LinesInDETLuma; 5881 double LinesInDETChroma; 5882 unsigned int LinesInCursorBuffer; 5883 double CursorBufferSizeInTime; 5884 double DETBufferSizeInTimeLuma; 5885 double DETBufferSizeInTimeChroma; 5886 5887 *NotEnoughUrgentLatencyHiding = 0; 5888 5889 if (CursorWidth > 0) { 5890 LinesInCursorBuffer = 1 << (unsigned int) dml_floor(dml_log2(CursorBufferSize * 1024.0 / (CursorWidth * CursorBPP / 8.0)), 1.0); 5891 if (VRatio > 0) { 5892 CursorBufferSizeInTime = LinesInCursorBuffer * LineTime / VRatio; 5893 if (CursorBufferSizeInTime - UrgentLatency <= 0) { 5894 *NotEnoughUrgentLatencyHiding = 1; 5895 *UrgentBurstFactorCursor = 0; 5896 } else { 5897 *UrgentBurstFactorCursor = CursorBufferSizeInTime / (CursorBufferSizeInTime - UrgentLatency); 5898 } 5899 } else { 5900 *UrgentBurstFactorCursor = 1; 5901 } 5902 } 5903 5904 LinesInDETLuma = DETBufferSizeY / BytePerPixelInDETY / swath_width_luma_ub; 5905 if (VRatio > 0) { 5906 DETBufferSizeInTimeLuma = dml_floor(LinesInDETLuma, SwathHeightY) * LineTime / VRatio; 5907 if (DETBufferSizeInTimeLuma - UrgentLatency <= 0) { 5908 *NotEnoughUrgentLatencyHiding = 1; 5909 *UrgentBurstFactorLuma = 0; 5910 } else { 5911 *UrgentBurstFactorLuma = DETBufferSizeInTimeLuma / (DETBufferSizeInTimeLuma - UrgentLatency); 5912 } 5913 } else { 5914 *UrgentBurstFactorLuma = 1; 5915 } 5916 5917 if (BytePerPixelInDETC > 0) { 5918 LinesInDETChroma = DETBufferSizeC / BytePerPixelInDETC / swath_width_chroma_ub; 5919 if (VRatio > 0) { 5920 DETBufferSizeInTimeChroma = dml_floor(LinesInDETChroma, SwathHeightC) * LineTime / VRatio; 5921 if (DETBufferSizeInTimeChroma - UrgentLatency <= 0) { 5922 *NotEnoughUrgentLatencyHiding = 1; 5923 *UrgentBurstFactorChroma = 0; 5924 } else { 5925 *UrgentBurstFactorChroma = DETBufferSizeInTimeChroma / (DETBufferSizeInTimeChroma - UrgentLatency); 5926 } 5927 } else { 5928 *UrgentBurstFactorChroma = 1; 5929 } 5930 } 5931 } 5932 5933 static void CalculatePixelDeliveryTimes( 5934 unsigned int NumberOfActivePlanes, 5935 double VRatio[], 5936 double VRatioChroma[], 5937 double VRatioPrefetchY[], 5938 double VRatioPrefetchC[], 5939 unsigned int swath_width_luma_ub[], 5940 unsigned int swath_width_chroma_ub[], 5941 unsigned int DPPPerPlane[], 5942 double HRatio[], 5943 double HRatioChroma[], 5944 double PixelClock[], 5945 double PSCL_THROUGHPUT[], 5946 double PSCL_THROUGHPUT_CHROMA[], 5947 double DPPCLK[], 5948 int BytePerPixelC[], 5949 enum scan_direction_class SourceScan[], 5950 unsigned int NumberOfCursors[], 5951 unsigned int CursorWidth[][DC__NUM_CURSOR__MAX], 5952 unsigned int CursorBPP[][DC__NUM_CURSOR__MAX], 5953 unsigned int BlockWidth256BytesY[], 5954 unsigned int BlockHeight256BytesY[], 5955 unsigned int BlockWidth256BytesC[], 5956 unsigned int BlockHeight256BytesC[], 5957 double DisplayPipeLineDeliveryTimeLuma[], 5958 double DisplayPipeLineDeliveryTimeChroma[], 5959 double DisplayPipeLineDeliveryTimeLumaPrefetch[], 5960 double DisplayPipeLineDeliveryTimeChromaPrefetch[], 5961 double DisplayPipeRequestDeliveryTimeLuma[], 5962 double DisplayPipeRequestDeliveryTimeChroma[], 5963 double DisplayPipeRequestDeliveryTimeLumaPrefetch[], 5964 double DisplayPipeRequestDeliveryTimeChromaPrefetch[], 5965 double CursorRequestDeliveryTime[], 5966 double CursorRequestDeliveryTimePrefetch[]) 5967 { 5968 double req_per_swath_ub; 5969 int k; 5970 5971 for (k = 0; k < NumberOfActivePlanes; ++k) { 5972 if (VRatio[k] <= 1) { 5973 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5974 } else { 5975 DisplayPipeLineDeliveryTimeLuma[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5976 } 5977 5978 if (BytePerPixelC[k] == 0) { 5979 DisplayPipeLineDeliveryTimeChroma[k] = 0; 5980 } else { 5981 if (VRatioChroma[k] <= 1) { 5982 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5983 } else { 5984 DisplayPipeLineDeliveryTimeChroma[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 5985 } 5986 } 5987 5988 if (VRatioPrefetchY[k] <= 1) { 5989 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] * DPPPerPlane[k] / HRatio[k] / PixelClock[k]; 5990 } else { 5991 DisplayPipeLineDeliveryTimeLumaPrefetch[k] = swath_width_luma_ub[k] / PSCL_THROUGHPUT[k] / DPPCLK[k]; 5992 } 5993 5994 if (BytePerPixelC[k] == 0) { 5995 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = 0; 5996 } else { 5997 if (VRatioPrefetchC[k] <= 1) { 5998 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] * DPPPerPlane[k] / HRatioChroma[k] / PixelClock[k]; 5999 } else { 6000 DisplayPipeLineDeliveryTimeChromaPrefetch[k] = swath_width_chroma_ub[k] / PSCL_THROUGHPUT_CHROMA[k] / DPPCLK[k]; 6001 } 6002 } 6003 } 6004 6005 for (k = 0; k < NumberOfActivePlanes; ++k) { 6006 if (SourceScan[k] != dm_vert) { 6007 req_per_swath_ub = swath_width_luma_ub[k] / BlockWidth256BytesY[k]; 6008 } else { 6009 req_per_swath_ub = swath_width_luma_ub[k] / BlockHeight256BytesY[k]; 6010 } 6011 DisplayPipeRequestDeliveryTimeLuma[k] = DisplayPipeLineDeliveryTimeLuma[k] / req_per_swath_ub; 6012 DisplayPipeRequestDeliveryTimeLumaPrefetch[k] = DisplayPipeLineDeliveryTimeLumaPrefetch[k] / req_per_swath_ub; 6013 if (BytePerPixelC[k] == 0) { 6014 DisplayPipeRequestDeliveryTimeChroma[k] = 0; 6015 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = 0; 6016 } else { 6017 if (SourceScan[k] != dm_vert) { 6018 req_per_swath_ub = swath_width_chroma_ub[k] / BlockWidth256BytesC[k]; 6019 } else { 6020 req_per_swath_ub = swath_width_chroma_ub[k] / BlockHeight256BytesC[k]; 6021 } 6022 DisplayPipeRequestDeliveryTimeChroma[k] = DisplayPipeLineDeliveryTimeChroma[k] / req_per_swath_ub; 6023 DisplayPipeRequestDeliveryTimeChromaPrefetch[k] = DisplayPipeLineDeliveryTimeChromaPrefetch[k] / req_per_swath_ub; 6024 } 6025 #ifdef __DML_VBA_DEBUG__ 6026 dml_print("DML::%s: k=%d : HRatio = %f\n", __func__, k, HRatio[k]); 6027 dml_print("DML::%s: k=%d : VRatio = %f\n", __func__, k, VRatio[k]); 6028 dml_print("DML::%s: k=%d : HRatioChroma = %f\n", __func__, k, HRatioChroma[k]); 6029 dml_print("DML::%s: k=%d : VRatioChroma = %f\n", __func__, k, VRatioChroma[k]); 6030 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLuma[k]); 6031 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeLumaPrefetch[k]); 6032 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChroma[k]); 6033 dml_print("DML::%s: k=%d : DisplayPipeLineDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeLineDeliveryTimeChromaPrefetch[k]); 6034 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLuma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLuma[k]); 6035 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeLumaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeLumaPrefetch[k]); 6036 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChroma = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChroma[k]); 6037 dml_print("DML::%s: k=%d : DisplayPipeRequestDeliveryTimeChromaPrefetch = %f\n", __func__, k, DisplayPipeRequestDeliveryTimeChromaPrefetch[k]); 6038 #endif 6039 } 6040 6041 for (k = 0; k < NumberOfActivePlanes; ++k) { 6042 int cursor_req_per_width; 6043 6044 cursor_req_per_width = dml_ceil(CursorWidth[k][0] * CursorBPP[k][0] / 256 / 8, 1); 6045 if (NumberOfCursors[k] > 0) { 6046 if (VRatio[k] <= 1) { 6047 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6048 } else { 6049 CursorRequestDeliveryTime[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6050 } 6051 if (VRatioPrefetchY[k] <= 1) { 6052 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / HRatio[k] / PixelClock[k] / cursor_req_per_width; 6053 } else { 6054 CursorRequestDeliveryTimePrefetch[k] = CursorWidth[k][0] / PSCL_THROUGHPUT[k] / DPPCLK[k] / cursor_req_per_width; 6055 } 6056 } else { 6057 CursorRequestDeliveryTime[k] = 0; 6058 CursorRequestDeliveryTimePrefetch[k] = 0; 6059 } 6060 #ifdef __DML_VBA_DEBUG__ 6061 dml_print("DML::%s: k=%d : NumberOfCursors = %d\n", __func__, k, NumberOfCursors[k]); 6062 dml_print("DML::%s: k=%d : CursorRequestDeliveryTime = %f\n", __func__, k, CursorRequestDeliveryTime[k]); 6063 dml_print("DML::%s: k=%d : CursorRequestDeliveryTimePrefetch = %f\n", __func__, k, CursorRequestDeliveryTimePrefetch[k]); 6064 #endif 6065 } 6066 } 6067 6068 static void CalculateMetaAndPTETimes( 6069 int NumberOfActivePlanes, 6070 bool GPUVMEnable, 6071 int MetaChunkSize, 6072 int MinMetaChunkSizeBytes, 6073 int HTotal[], 6074 double VRatio[], 6075 double VRatioChroma[], 6076 double DestinationLinesToRequestRowInVBlank[], 6077 double DestinationLinesToRequestRowInImmediateFlip[], 6078 bool DCCEnable[], 6079 double PixelClock[], 6080 int BytePerPixelY[], 6081 int BytePerPixelC[], 6082 enum scan_direction_class SourceScan[], 6083 int dpte_row_height[], 6084 int dpte_row_height_chroma[], 6085 int meta_row_width[], 6086 int meta_row_width_chroma[], 6087 int meta_row_height[], 6088 int meta_row_height_chroma[], 6089 int meta_req_width[], 6090 int meta_req_width_chroma[], 6091 int meta_req_height[], 6092 int meta_req_height_chroma[], 6093 int dpte_group_bytes[], 6094 int PTERequestSizeY[], 6095 int PTERequestSizeC[], 6096 int PixelPTEReqWidthY[], 6097 int PixelPTEReqHeightY[], 6098 int PixelPTEReqWidthC[], 6099 int PixelPTEReqHeightC[], 6100 int dpte_row_width_luma_ub[], 6101 int dpte_row_width_chroma_ub[], 6102 double DST_Y_PER_PTE_ROW_NOM_L[], 6103 double DST_Y_PER_PTE_ROW_NOM_C[], 6104 double DST_Y_PER_META_ROW_NOM_L[], 6105 double DST_Y_PER_META_ROW_NOM_C[], 6106 double TimePerMetaChunkNominal[], 6107 double TimePerChromaMetaChunkNominal[], 6108 double TimePerMetaChunkVBlank[], 6109 double TimePerChromaMetaChunkVBlank[], 6110 double TimePerMetaChunkFlip[], 6111 double TimePerChromaMetaChunkFlip[], 6112 double time_per_pte_group_nom_luma[], 6113 double time_per_pte_group_vblank_luma[], 6114 double time_per_pte_group_flip_luma[], 6115 double time_per_pte_group_nom_chroma[], 6116 double time_per_pte_group_vblank_chroma[], 6117 double time_per_pte_group_flip_chroma[]) 6118 { 6119 unsigned int meta_chunk_width; 6120 unsigned int min_meta_chunk_width; 6121 unsigned int meta_chunk_per_row_int; 6122 unsigned int meta_row_remainder; 6123 unsigned int meta_chunk_threshold; 6124 unsigned int meta_chunks_per_row_ub; 6125 unsigned int meta_chunk_width_chroma; 6126 unsigned int min_meta_chunk_width_chroma; 6127 unsigned int meta_chunk_per_row_int_chroma; 6128 unsigned int meta_row_remainder_chroma; 6129 unsigned int meta_chunk_threshold_chroma; 6130 unsigned int meta_chunks_per_row_ub_chroma; 6131 unsigned int dpte_group_width_luma; 6132 unsigned int dpte_groups_per_row_luma_ub; 6133 unsigned int dpte_group_width_chroma; 6134 unsigned int dpte_groups_per_row_chroma_ub; 6135 int k; 6136 6137 for (k = 0; k < NumberOfActivePlanes; ++k) { 6138 DST_Y_PER_PTE_ROW_NOM_L[k] = dpte_row_height[k] / VRatio[k]; 6139 if (BytePerPixelC[k] == 0) { 6140 DST_Y_PER_PTE_ROW_NOM_C[k] = 0; 6141 } else { 6142 DST_Y_PER_PTE_ROW_NOM_C[k] = dpte_row_height_chroma[k] / VRatioChroma[k]; 6143 } 6144 DST_Y_PER_META_ROW_NOM_L[k] = meta_row_height[k] / VRatio[k]; 6145 if (BytePerPixelC[k] == 0) { 6146 DST_Y_PER_META_ROW_NOM_C[k] = 0; 6147 } else { 6148 DST_Y_PER_META_ROW_NOM_C[k] = meta_row_height_chroma[k] / VRatioChroma[k]; 6149 } 6150 } 6151 6152 for (k = 0; k < NumberOfActivePlanes; ++k) { 6153 if (DCCEnable[k] == true) { 6154 meta_chunk_width = MetaChunkSize * 1024 * 256 / BytePerPixelY[k] / meta_row_height[k]; 6155 min_meta_chunk_width = MinMetaChunkSizeBytes * 256 / BytePerPixelY[k] / meta_row_height[k]; 6156 meta_chunk_per_row_int = meta_row_width[k] / meta_chunk_width; 6157 meta_row_remainder = meta_row_width[k] % meta_chunk_width; 6158 if (SourceScan[k] != dm_vert) { 6159 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_width[k]; 6160 } else { 6161 meta_chunk_threshold = 2 * min_meta_chunk_width - meta_req_height[k]; 6162 } 6163 if (meta_row_remainder <= meta_chunk_threshold) { 6164 meta_chunks_per_row_ub = meta_chunk_per_row_int + 1; 6165 } else { 6166 meta_chunks_per_row_ub = meta_chunk_per_row_int + 2; 6167 } 6168 TimePerMetaChunkNominal[k] = meta_row_height[k] / VRatio[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6169 TimePerMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6170 TimePerMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub; 6171 if (BytePerPixelC[k] == 0) { 6172 TimePerChromaMetaChunkNominal[k] = 0; 6173 TimePerChromaMetaChunkVBlank[k] = 0; 6174 TimePerChromaMetaChunkFlip[k] = 0; 6175 } else { 6176 meta_chunk_width_chroma = MetaChunkSize * 1024 * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6177 min_meta_chunk_width_chroma = MinMetaChunkSizeBytes * 256 / BytePerPixelC[k] / meta_row_height_chroma[k]; 6178 meta_chunk_per_row_int_chroma = (double) meta_row_width_chroma[k] / meta_chunk_width_chroma; 6179 meta_row_remainder_chroma = meta_row_width_chroma[k] % meta_chunk_width_chroma; 6180 if (SourceScan[k] != dm_vert) { 6181 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_width_chroma[k]; 6182 } else { 6183 meta_chunk_threshold_chroma = 2 * min_meta_chunk_width_chroma - meta_req_height_chroma[k]; 6184 } 6185 if (meta_row_remainder_chroma <= meta_chunk_threshold_chroma) { 6186 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 1; 6187 } else { 6188 meta_chunks_per_row_ub_chroma = meta_chunk_per_row_int_chroma + 2; 6189 } 6190 TimePerChromaMetaChunkNominal[k] = meta_row_height_chroma[k] / VRatioChroma[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6191 TimePerChromaMetaChunkVBlank[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6192 TimePerChromaMetaChunkFlip[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / meta_chunks_per_row_ub_chroma; 6193 } 6194 } else { 6195 TimePerMetaChunkNominal[k] = 0; 6196 TimePerMetaChunkVBlank[k] = 0; 6197 TimePerMetaChunkFlip[k] = 0; 6198 TimePerChromaMetaChunkNominal[k] = 0; 6199 TimePerChromaMetaChunkVBlank[k] = 0; 6200 TimePerChromaMetaChunkFlip[k] = 0; 6201 } 6202 } 6203 6204 for (k = 0; k < NumberOfActivePlanes; ++k) { 6205 if (GPUVMEnable == true) { 6206 if (SourceScan[k] != dm_vert) { 6207 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqWidthY[k]; 6208 } else { 6209 dpte_group_width_luma = dpte_group_bytes[k] / PTERequestSizeY[k] * PixelPTEReqHeightY[k]; 6210 } 6211 dpte_groups_per_row_luma_ub = dml_ceil(1.0 * dpte_row_width_luma_ub[k] / dpte_group_width_luma, 1); 6212 time_per_pte_group_nom_luma[k] = DST_Y_PER_PTE_ROW_NOM_L[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6213 time_per_pte_group_vblank_luma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6214 time_per_pte_group_flip_luma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_luma_ub; 6215 if (BytePerPixelC[k] == 0) { 6216 time_per_pte_group_nom_chroma[k] = 0; 6217 time_per_pte_group_vblank_chroma[k] = 0; 6218 time_per_pte_group_flip_chroma[k] = 0; 6219 } else { 6220 if (SourceScan[k] != dm_vert) { 6221 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqWidthC[k]; 6222 } else { 6223 dpte_group_width_chroma = dpte_group_bytes[k] / PTERequestSizeC[k] * PixelPTEReqHeightC[k]; 6224 } 6225 dpte_groups_per_row_chroma_ub = dml_ceil(1.0 * dpte_row_width_chroma_ub[k] / dpte_group_width_chroma, 1); 6226 time_per_pte_group_nom_chroma[k] = DST_Y_PER_PTE_ROW_NOM_C[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6227 time_per_pte_group_vblank_chroma[k] = DestinationLinesToRequestRowInVBlank[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6228 time_per_pte_group_flip_chroma[k] = DestinationLinesToRequestRowInImmediateFlip[k] * HTotal[k] / PixelClock[k] / dpte_groups_per_row_chroma_ub; 6229 } 6230 } else { 6231 time_per_pte_group_nom_luma[k] = 0; 6232 time_per_pte_group_vblank_luma[k] = 0; 6233 time_per_pte_group_flip_luma[k] = 0; 6234 time_per_pte_group_nom_chroma[k] = 0; 6235 time_per_pte_group_vblank_chroma[k] = 0; 6236 time_per_pte_group_flip_chroma[k] = 0; 6237 } 6238 } 6239 } 6240 6241 static void CalculateVMGroupAndRequestTimes( 6242 unsigned int NumberOfActivePlanes, 6243 bool GPUVMEnable, 6244 unsigned int GPUVMMaxPageTableLevels, 6245 unsigned int HTotal[], 6246 int BytePerPixelC[], 6247 double DestinationLinesToRequestVMInVBlank[], 6248 double DestinationLinesToRequestVMInImmediateFlip[], 6249 bool DCCEnable[], 6250 double PixelClock[], 6251 int dpte_row_width_luma_ub[], 6252 int dpte_row_width_chroma_ub[], 6253 int vm_group_bytes[], 6254 unsigned int dpde0_bytes_per_frame_ub_l[], 6255 unsigned int dpde0_bytes_per_frame_ub_c[], 6256 int meta_pte_bytes_per_frame_ub_l[], 6257 int meta_pte_bytes_per_frame_ub_c[], 6258 double TimePerVMGroupVBlank[], 6259 double TimePerVMGroupFlip[], 6260 double TimePerVMRequestVBlank[], 6261 double TimePerVMRequestFlip[]) 6262 { 6263 int num_group_per_lower_vm_stage; 6264 int num_req_per_lower_vm_stage; 6265 int k; 6266 6267 for (k = 0; k < NumberOfActivePlanes; ++k) { 6268 if (GPUVMEnable == true && (DCCEnable[k] == true || GPUVMMaxPageTableLevels > 1)) { 6269 if (DCCEnable[k] == false) { 6270 if (BytePerPixelC[k] > 0) { 6271 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6272 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6273 } else { 6274 num_group_per_lower_vm_stage = dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6275 } 6276 } else { 6277 if (GPUVMMaxPageTableLevels == 1) { 6278 if (BytePerPixelC[k] > 0) { 6279 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6280 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6281 } else { 6282 num_group_per_lower_vm_stage = dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6283 } 6284 } else { 6285 if (BytePerPixelC[k] > 0) { 6286 num_group_per_lower_vm_stage = 2 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6287 + dml_ceil((double) (dpde0_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1) 6288 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6289 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_c[k]) / (double) (vm_group_bytes[k]), 1); 6290 } else { 6291 num_group_per_lower_vm_stage = 1 + dml_ceil((double) (dpde0_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1) 6292 + dml_ceil((double) (meta_pte_bytes_per_frame_ub_l[k]) / (double) (vm_group_bytes[k]), 1); 6293 } 6294 } 6295 } 6296 6297 if (DCCEnable[k] == false) { 6298 if (BytePerPixelC[k] > 0) { 6299 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64; 6300 } else { 6301 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64; 6302 } 6303 } else { 6304 if (GPUVMMaxPageTableLevels == 1) { 6305 if (BytePerPixelC[k] > 0) { 6306 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6307 } else { 6308 num_req_per_lower_vm_stage = meta_pte_bytes_per_frame_ub_l[k] / 64; 6309 } 6310 } else { 6311 if (BytePerPixelC[k] > 0) { 6312 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + dpde0_bytes_per_frame_ub_c[k] / 64 6313 + meta_pte_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_c[k] / 64; 6314 } else { 6315 num_req_per_lower_vm_stage = dpde0_bytes_per_frame_ub_l[k] / 64 + meta_pte_bytes_per_frame_ub_l[k] / 64; 6316 } 6317 } 6318 } 6319 6320 TimePerVMGroupVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6321 TimePerVMGroupFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_group_per_lower_vm_stage; 6322 TimePerVMRequestVBlank[k] = DestinationLinesToRequestVMInVBlank[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6323 TimePerVMRequestFlip[k] = DestinationLinesToRequestVMInImmediateFlip[k] * HTotal[k] / PixelClock[k] / num_req_per_lower_vm_stage; 6324 6325 if (GPUVMMaxPageTableLevels > 2) { 6326 TimePerVMGroupVBlank[k] = TimePerVMGroupVBlank[k] / 2; 6327 TimePerVMGroupFlip[k] = TimePerVMGroupFlip[k] / 2; 6328 TimePerVMRequestVBlank[k] = TimePerVMRequestVBlank[k] / 2; 6329 TimePerVMRequestFlip[k] = TimePerVMRequestFlip[k] / 2; 6330 } 6331 6332 } else { 6333 TimePerVMGroupVBlank[k] = 0; 6334 TimePerVMGroupFlip[k] = 0; 6335 TimePerVMRequestVBlank[k] = 0; 6336 TimePerVMRequestFlip[k] = 0; 6337 } 6338 } 6339 } 6340 6341 static void CalculateStutterEfficiency( 6342 struct display_mode_lib *mode_lib, 6343 int CompressedBufferSizeInkByte, 6344 bool UnboundedRequestEnabled, 6345 int ConfigReturnBufferSizeInKByte, 6346 int MetaFIFOSizeInKEntries, 6347 int ZeroSizeBufferEntries, 6348 int NumberOfActivePlanes, 6349 int ROBBufferSizeInKByte, 6350 double TotalDataReadBandwidth, 6351 double DCFCLK, 6352 double ReturnBW, 6353 double COMPBUF_RESERVED_SPACE_64B, 6354 double COMPBUF_RESERVED_SPACE_ZS, 6355 double SRExitTime, 6356 double SRExitZ8Time, 6357 bool SynchronizedVBlank, 6358 double Z8StutterEnterPlusExitWatermark, 6359 double StutterEnterPlusExitWatermark, 6360 bool ProgressiveToInterlaceUnitInOPP, 6361 bool Interlace[], 6362 double MinTTUVBlank[], 6363 int DPPPerPlane[], 6364 unsigned int DETBufferSizeY[], 6365 int BytePerPixelY[], 6366 double BytePerPixelDETY[], 6367 double SwathWidthY[], 6368 int SwathHeightY[], 6369 int SwathHeightC[], 6370 double NetDCCRateLuma[], 6371 double NetDCCRateChroma[], 6372 double DCCFractionOfZeroSizeRequestsLuma[], 6373 double DCCFractionOfZeroSizeRequestsChroma[], 6374 int HTotal[], 6375 int VTotal[], 6376 double PixelClock[], 6377 double VRatio[], 6378 enum scan_direction_class SourceScan[], 6379 int BlockHeight256BytesY[], 6380 int BlockWidth256BytesY[], 6381 int BlockHeight256BytesC[], 6382 int BlockWidth256BytesC[], 6383 int DCCYMaxUncompressedBlock[], 6384 int DCCCMaxUncompressedBlock[], 6385 int VActive[], 6386 bool DCCEnable[], 6387 bool WritebackEnable[], 6388 double ReadBandwidthPlaneLuma[], 6389 double ReadBandwidthPlaneChroma[], 6390 double meta_row_bw[], 6391 double dpte_row_bw[], 6392 double *StutterEfficiencyNotIncludingVBlank, 6393 double *StutterEfficiency, 6394 int *NumberOfStutterBurstsPerFrame, 6395 double *Z8StutterEfficiencyNotIncludingVBlank, 6396 double *Z8StutterEfficiency, 6397 int *Z8NumberOfStutterBurstsPerFrame, 6398 double *StutterPeriod) 6399 { 6400 struct vba_vars_st *v = &mode_lib->vba; 6401 6402 double DETBufferingTimeY; 6403 double SwathWidthYCriticalPlane = 0; 6404 double VActiveTimeCriticalPlane = 0; 6405 double FrameTimeCriticalPlane = 0; 6406 int BytePerPixelYCriticalPlane = 0; 6407 double LinesToFinishSwathTransferStutterCriticalPlane = 0; 6408 double MinTTUVBlankCriticalPlane = 0; 6409 double TotalCompressedReadBandwidth; 6410 double TotalRowReadBandwidth; 6411 double AverageDCCCompressionRate; 6412 double EffectiveCompressedBufferSize; 6413 double PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer; 6414 double StutterBurstTime; 6415 int TotalActiveWriteback; 6416 double LinesInDETY; 6417 double LinesInDETYRoundedDownToSwath; 6418 double MaximumEffectiveCompressionLuma; 6419 double MaximumEffectiveCompressionChroma; 6420 double TotalZeroSizeRequestReadBandwidth; 6421 double TotalZeroSizeCompressedReadBandwidth; 6422 double AverageDCCZeroSizeFraction; 6423 double AverageZeroSizeCompressionRate; 6424 int TotalNumberOfActiveOTG = 0; 6425 double LastStutterPeriod = 0.0; 6426 double LastZ8StutterPeriod = 0.0; 6427 int k; 6428 6429 TotalZeroSizeRequestReadBandwidth = 0; 6430 TotalZeroSizeCompressedReadBandwidth = 0; 6431 TotalRowReadBandwidth = 0; 6432 TotalCompressedReadBandwidth = 0; 6433 6434 for (k = 0; k < NumberOfActivePlanes; ++k) { 6435 if (DCCEnable[k] == true) { 6436 if ((SourceScan[k] == dm_vert && BlockWidth256BytesY[k] > SwathHeightY[k]) || (SourceScan[k] != dm_vert && BlockHeight256BytesY[k] > SwathHeightY[k]) 6437 || DCCYMaxUncompressedBlock[k] < 256) { 6438 MaximumEffectiveCompressionLuma = 2; 6439 } else { 6440 MaximumEffectiveCompressionLuma = 4; 6441 } 6442 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] / dml_min(NetDCCRateLuma[k], MaximumEffectiveCompressionLuma); 6443 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k]; 6444 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6445 + ReadBandwidthPlaneLuma[k] * DCCFractionOfZeroSizeRequestsLuma[k] / MaximumEffectiveCompressionLuma; 6446 if (ReadBandwidthPlaneChroma[k] > 0) { 6447 if ((SourceScan[k] == dm_vert && BlockWidth256BytesC[k] > SwathHeightC[k]) 6448 || (SourceScan[k] != dm_vert && BlockHeight256BytesC[k] > SwathHeightC[k]) || DCCCMaxUncompressedBlock[k] < 256) { 6449 MaximumEffectiveCompressionChroma = 2; 6450 } else { 6451 MaximumEffectiveCompressionChroma = 4; 6452 } 6453 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth 6454 + ReadBandwidthPlaneChroma[k] / dml_min(NetDCCRateChroma[k], MaximumEffectiveCompressionChroma); 6455 TotalZeroSizeRequestReadBandwidth = TotalZeroSizeRequestReadBandwidth + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k]; 6456 TotalZeroSizeCompressedReadBandwidth = TotalZeroSizeCompressedReadBandwidth 6457 + ReadBandwidthPlaneChroma[k] * DCCFractionOfZeroSizeRequestsChroma[k] / MaximumEffectiveCompressionChroma; 6458 } 6459 } else { 6460 TotalCompressedReadBandwidth = TotalCompressedReadBandwidth + ReadBandwidthPlaneLuma[k] + ReadBandwidthPlaneChroma[k]; 6461 } 6462 TotalRowReadBandwidth = TotalRowReadBandwidth + DPPPerPlane[k] * (meta_row_bw[k] + dpte_row_bw[k]); 6463 } 6464 6465 AverageDCCCompressionRate = TotalDataReadBandwidth / TotalCompressedReadBandwidth; 6466 AverageDCCZeroSizeFraction = TotalZeroSizeRequestReadBandwidth / TotalDataReadBandwidth; 6467 6468 #ifdef __DML_VBA_DEBUG__ 6469 dml_print("DML::%s: TotalCompressedReadBandwidth = %f\n", __func__, TotalCompressedReadBandwidth); 6470 dml_print("DML::%s: TotalZeroSizeRequestReadBandwidth = %f\n", __func__, TotalZeroSizeRequestReadBandwidth); 6471 dml_print("DML::%s: TotalZeroSizeCompressedReadBandwidth = %f\n", __func__, TotalZeroSizeCompressedReadBandwidth); 6472 dml_print("DML::%s: MaximumEffectiveCompressionLuma = %f\n", __func__, MaximumEffectiveCompressionLuma); 6473 dml_print("DML::%s: MaximumEffectiveCompressionChroma = %f\n", __func__, MaximumEffectiveCompressionChroma); 6474 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6475 dml_print("DML::%s: AverageDCCZeroSizeFraction = %f\n", __func__, AverageDCCZeroSizeFraction); 6476 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, CompressedBufferSizeInkByte); 6477 #endif 6478 6479 if (AverageDCCZeroSizeFraction == 1) { 6480 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6481 EffectiveCompressedBufferSize = MetaFIFOSizeInKEntries * 1024 * 64 * AverageZeroSizeCompressionRate + (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 * AverageZeroSizeCompressionRate; 6482 } else if (AverageDCCZeroSizeFraction > 0) { 6483 AverageZeroSizeCompressionRate = TotalZeroSizeRequestReadBandwidth / TotalZeroSizeCompressedReadBandwidth; 6484 EffectiveCompressedBufferSize = dml_min( 6485 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6486 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)) 6487 + dml_min((ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate, 6488 (ZeroSizeBufferEntries - COMPBUF_RESERVED_SPACE_ZS) * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6489 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6490 dml_print( 6491 "DML::%s: min 2 = %f\n", 6492 __func__, 6493 MetaFIFOSizeInKEntries * 1024 * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate + 1 / AverageDCCCompressionRate)); 6494 dml_print("DML::%s: min 3 = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate); 6495 dml_print("DML::%s: min 4 = %f\n", __func__, ZeroSizeBufferEntries * 64 / (AverageDCCZeroSizeFraction / AverageZeroSizeCompressionRate)); 6496 } else { 6497 EffectiveCompressedBufferSize = dml_min( 6498 CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate, 6499 MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate) + (ROBBufferSizeInKByte * 1024 - COMPBUF_RESERVED_SPACE_64B * 64) * AverageDCCCompressionRate; 6500 dml_print("DML::%s: min 1 = %f\n", __func__, CompressedBufferSizeInkByte * 1024 * AverageDCCCompressionRate); 6501 dml_print("DML::%s: min 2 = %f\n", __func__, MetaFIFOSizeInKEntries * 1024 * 64 * AverageDCCCompressionRate); 6502 } 6503 6504 #ifdef __DML_VBA_DEBUG__ 6505 dml_print("DML::%s: MetaFIFOSizeInKEntries = %d\n", __func__, MetaFIFOSizeInKEntries); 6506 dml_print("DML::%s: AverageZeroSizeCompressionRate = %f\n", __func__, AverageZeroSizeCompressionRate); 6507 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6508 #endif 6509 6510 *StutterPeriod = 0; 6511 for (k = 0; k < NumberOfActivePlanes; ++k) { 6512 LinesInDETY = (DETBufferSizeY[k] + (UnboundedRequestEnabled == true ? EffectiveCompressedBufferSize : 0) * ReadBandwidthPlaneLuma[k] / TotalDataReadBandwidth) 6513 / BytePerPixelDETY[k] / SwathWidthY[k]; 6514 LinesInDETYRoundedDownToSwath = dml_floor(LinesInDETY, SwathHeightY[k]); 6515 DETBufferingTimeY = LinesInDETYRoundedDownToSwath * (HTotal[k] / PixelClock[k]) / VRatio[k]; 6516 #ifdef __DML_VBA_DEBUG__ 6517 dml_print("DML::%s: k=%0d DETBufferSizeY = %f\n", __func__, k, DETBufferSizeY[k]); 6518 dml_print("DML::%s: k=%0d BytePerPixelDETY = %f\n", __func__, k, BytePerPixelDETY[k]); 6519 dml_print("DML::%s: k=%0d SwathWidthY = %f\n", __func__, k, SwathWidthY[k]); 6520 dml_print("DML::%s: k=%0d ReadBandwidthPlaneLuma = %f\n", __func__, k, ReadBandwidthPlaneLuma[k]); 6521 dml_print("DML::%s: k=%0d TotalDataReadBandwidth = %f\n", __func__, k, TotalDataReadBandwidth); 6522 dml_print("DML::%s: k=%0d LinesInDETY = %f\n", __func__, k, LinesInDETY); 6523 dml_print("DML::%s: k=%0d LinesInDETYRoundedDownToSwath = %f\n", __func__, k, LinesInDETYRoundedDownToSwath); 6524 dml_print("DML::%s: k=%0d HTotal = %d\n", __func__, k, HTotal[k]); 6525 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6526 dml_print("DML::%s: k=%0d VRatio = %f\n", __func__, k, VRatio[k]); 6527 dml_print("DML::%s: k=%0d DETBufferingTimeY = %f\n", __func__, k, DETBufferingTimeY); 6528 dml_print("DML::%s: k=%0d PixelClock = %f\n", __func__, k, PixelClock[k]); 6529 #endif 6530 6531 if (k == 0 || DETBufferingTimeY < *StutterPeriod) { 6532 bool isInterlaceTiming = Interlace[k] && !ProgressiveToInterlaceUnitInOPP; 6533 6534 *StutterPeriod = DETBufferingTimeY; 6535 FrameTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VTotal[k] / 2.0, 1.0) : VTotal[k]) * HTotal[k] / PixelClock[k]; 6536 VActiveTimeCriticalPlane = (isInterlaceTiming ? dml_floor(VActive[k] / 2.0, 1.0) : VActive[k]) * HTotal[k] / PixelClock[k]; 6537 BytePerPixelYCriticalPlane = BytePerPixelY[k]; 6538 SwathWidthYCriticalPlane = SwathWidthY[k]; 6539 LinesToFinishSwathTransferStutterCriticalPlane = SwathHeightY[k] - (LinesInDETY - LinesInDETYRoundedDownToSwath); 6540 MinTTUVBlankCriticalPlane = MinTTUVBlank[k]; 6541 6542 #ifdef __DML_VBA_DEBUG__ 6543 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6544 dml_print("DML::%s: MinTTUVBlankCriticalPlane = %f\n", __func__, MinTTUVBlankCriticalPlane); 6545 dml_print("DML::%s: FrameTimeCriticalPlane = %f\n", __func__, FrameTimeCriticalPlane); 6546 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6547 dml_print("DML::%s: BytePerPixelYCriticalPlane = %d\n", __func__, BytePerPixelYCriticalPlane); 6548 dml_print("DML::%s: SwathWidthYCriticalPlane = %f\n", __func__, SwathWidthYCriticalPlane); 6549 dml_print("DML::%s: LinesToFinishSwathTransferStutterCriticalPlane = %f\n", __func__, LinesToFinishSwathTransferStutterCriticalPlane); 6550 #endif 6551 } 6552 } 6553 6554 PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = dml_min(*StutterPeriod * TotalDataReadBandwidth, EffectiveCompressedBufferSize); 6555 #ifdef __DML_VBA_DEBUG__ 6556 dml_print("DML::%s: ROBBufferSizeInKByte = %d\n", __func__, ROBBufferSizeInKByte); 6557 dml_print("DML::%s: AverageDCCCompressionRate = %f\n", __func__, AverageDCCCompressionRate); 6558 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, *StutterPeriod * TotalDataReadBandwidth); 6559 dml_print("DML::%s: ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize = %f\n", __func__, ROBBufferSizeInKByte * 1024 * AverageDCCCompressionRate + EffectiveCompressedBufferSize); 6560 dml_print("DML::%s: EffectiveCompressedBufferSize = %f\n", __func__, EffectiveCompressedBufferSize); 6561 dml_print("DML::%s: PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer); 6562 dml_print("DML::%s: ReturnBW = %f\n", __func__, ReturnBW); 6563 dml_print("DML::%s: TotalDataReadBandwidth = %f\n", __func__, TotalDataReadBandwidth); 6564 dml_print("DML::%s: TotalRowReadBandwidth = %f\n", __func__, TotalRowReadBandwidth); 6565 dml_print("DML::%s: DCFCLK = %f\n", __func__, DCFCLK); 6566 #endif 6567 6568 StutterBurstTime = PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW 6569 + (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64) 6570 + *StutterPeriod * TotalRowReadBandwidth / ReturnBW; 6571 #ifdef __DML_VBA_DEBUG__ 6572 dml_print("DML::%s: Part 1 = %f\n", __func__, PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer / AverageDCCCompressionRate / ReturnBW); 6573 dml_print("DML::%s: StutterPeriod * TotalDataReadBandwidth = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth)); 6574 dml_print("DML::%s: Part 2 = %f\n", __func__, (*StutterPeriod * TotalDataReadBandwidth - PartOfUncompressedPixelBurstThatFitsInROBAndCompressedBuffer) / (DCFCLK * 64)); 6575 dml_print("DML::%s: Part 3 = %f\n", __func__, *StutterPeriod * TotalRowReadBandwidth / ReturnBW); 6576 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6577 #endif 6578 StutterBurstTime = dml_max(StutterBurstTime, LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6579 6580 dml_print( 6581 "DML::%s: Time to finish residue swath=%f\n", 6582 __func__, 6583 LinesToFinishSwathTransferStutterCriticalPlane * BytePerPixelYCriticalPlane * SwathWidthYCriticalPlane / ReturnBW); 6584 6585 TotalActiveWriteback = 0; 6586 for (k = 0; k < NumberOfActivePlanes; ++k) { 6587 if (WritebackEnable[k]) { 6588 TotalActiveWriteback = TotalActiveWriteback + 1; 6589 } 6590 } 6591 6592 if (TotalActiveWriteback == 0) { 6593 #ifdef __DML_VBA_DEBUG__ 6594 dml_print("DML::%s: SRExitTime = %f\n", __func__, SRExitTime); 6595 dml_print("DML::%s: SRExitZ8Time = %f\n", __func__, SRExitZ8Time); 6596 dml_print("DML::%s: StutterBurstTime = %f (final)\n", __func__, StutterBurstTime); 6597 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6598 #endif 6599 *StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitTime + StutterBurstTime) / *StutterPeriod) * 100; 6600 *Z8StutterEfficiencyNotIncludingVBlank = dml_max(0., 1 - (SRExitZ8Time + StutterBurstTime) / *StutterPeriod) * 100; 6601 *NumberOfStutterBurstsPerFrame = (*StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6602 *Z8NumberOfStutterBurstsPerFrame = (*Z8StutterEfficiencyNotIncludingVBlank > 0 ? dml_ceil(VActiveTimeCriticalPlane / *StutterPeriod, 1) : 0); 6603 } else { 6604 *StutterEfficiencyNotIncludingVBlank = 0.; 6605 *Z8StutterEfficiencyNotIncludingVBlank = 0.; 6606 *NumberOfStutterBurstsPerFrame = 0; 6607 *Z8NumberOfStutterBurstsPerFrame = 0; 6608 } 6609 #ifdef __DML_VBA_DEBUG__ 6610 dml_print("DML::%s: VActiveTimeCriticalPlane = %f\n", __func__, VActiveTimeCriticalPlane); 6611 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6612 dml_print("DML::%s: Z8StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *Z8StutterEfficiencyNotIncludingVBlank); 6613 dml_print("DML::%s: NumberOfStutterBurstsPerFrame = %d\n", __func__, *NumberOfStutterBurstsPerFrame); 6614 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6615 #endif 6616 6617 for (k = 0; k < NumberOfActivePlanes; ++k) { 6618 if (v->BlendingAndTiming[k] == k) { 6619 TotalNumberOfActiveOTG = TotalNumberOfActiveOTG + 1; 6620 } 6621 } 6622 6623 if (*StutterEfficiencyNotIncludingVBlank > 0) { 6624 LastStutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6625 6626 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastStutterPeriod + MinTTUVBlankCriticalPlane > StutterEnterPlusExitWatermark) { 6627 *StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitTime + StutterBurstTime * VActiveTimeCriticalPlane 6628 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6629 } else { 6630 *StutterEfficiency = *StutterEfficiencyNotIncludingVBlank; 6631 } 6632 } else { 6633 *StutterEfficiency = 0; 6634 } 6635 6636 if (*Z8StutterEfficiencyNotIncludingVBlank > 0) { 6637 LastZ8StutterPeriod = VActiveTimeCriticalPlane - (*NumberOfStutterBurstsPerFrame - 1) * *StutterPeriod; 6638 if ((SynchronizedVBlank || TotalNumberOfActiveOTG == 1) && LastZ8StutterPeriod + MinTTUVBlankCriticalPlane > Z8StutterEnterPlusExitWatermark) { 6639 *Z8StutterEfficiency = (1 - (*NumberOfStutterBurstsPerFrame * SRExitZ8Time + StutterBurstTime * VActiveTimeCriticalPlane 6640 / *StutterPeriod) / FrameTimeCriticalPlane) * 100; 6641 } else { 6642 *Z8StutterEfficiency = *Z8StutterEfficiencyNotIncludingVBlank; 6643 } 6644 } else { 6645 *Z8StutterEfficiency = 0.; 6646 } 6647 6648 dml_print("DML::%s: LastZ8StutterPeriod = %f\n", __func__, LastZ8StutterPeriod); 6649 dml_print("DML::%s: Z8StutterEnterPlusExitWatermark = %f\n", __func__, Z8StutterEnterPlusExitWatermark); 6650 dml_print("DML::%s: StutterBurstTime = %f\n", __func__, StutterBurstTime); 6651 dml_print("DML::%s: StutterPeriod = %f\n", __func__, *StutterPeriod); 6652 dml_print("DML::%s: StutterEfficiency = %f\n", __func__, *StutterEfficiency); 6653 dml_print("DML::%s: Z8StutterEfficiency = %f\n", __func__, *Z8StutterEfficiency); 6654 dml_print("DML::%s: StutterEfficiencyNotIncludingVBlank = %f\n", __func__, *StutterEfficiencyNotIncludingVBlank); 6655 dml_print("DML::%s: Z8NumberOfStutterBurstsPerFrame = %d\n", __func__, *Z8NumberOfStutterBurstsPerFrame); 6656 } 6657 6658 static void CalculateSwathAndDETConfiguration( 6659 bool ForceSingleDPP, 6660 int NumberOfActivePlanes, 6661 unsigned int DETBufferSizeInKByte, 6662 double MaximumSwathWidthLuma[], 6663 double MaximumSwathWidthChroma[], 6664 enum scan_direction_class SourceScan[], 6665 enum source_format_class SourcePixelFormat[], 6666 enum dm_swizzle_mode SurfaceTiling[], 6667 int ViewportWidth[], 6668 int ViewportHeight[], 6669 int SurfaceWidthY[], 6670 int SurfaceWidthC[], 6671 int SurfaceHeightY[], 6672 int SurfaceHeightC[], 6673 int Read256BytesBlockHeightY[], 6674 int Read256BytesBlockHeightC[], 6675 int Read256BytesBlockWidthY[], 6676 int Read256BytesBlockWidthC[], 6677 enum odm_combine_mode ODMCombineEnabled[], 6678 int BlendingAndTiming[], 6679 int BytePerPixY[], 6680 int BytePerPixC[], 6681 double BytePerPixDETY[], 6682 double BytePerPixDETC[], 6683 int HActive[], 6684 double HRatio[], 6685 double HRatioChroma[], 6686 int DPPPerPlane[], 6687 int swath_width_luma_ub[], 6688 int swath_width_chroma_ub[], 6689 double SwathWidth[], 6690 double SwathWidthChroma[], 6691 int SwathHeightY[], 6692 int SwathHeightC[], 6693 unsigned int DETBufferSizeY[], 6694 unsigned int DETBufferSizeC[], 6695 bool ViewportSizeSupportPerPlane[], 6696 bool *ViewportSizeSupport) 6697 { 6698 int MaximumSwathHeightY[DC__NUM_DPP__MAX]; 6699 int MaximumSwathHeightC[DC__NUM_DPP__MAX]; 6700 int MinimumSwathHeightY; 6701 int MinimumSwathHeightC; 6702 int RoundedUpMaxSwathSizeBytesY; 6703 int RoundedUpMaxSwathSizeBytesC; 6704 int RoundedUpMinSwathSizeBytesY; 6705 int RoundedUpMinSwathSizeBytesC; 6706 int RoundedUpSwathSizeBytesY; 6707 int RoundedUpSwathSizeBytesC; 6708 double SwathWidthSingleDPP[DC__NUM_DPP__MAX]; 6709 double SwathWidthSingleDPPChroma[DC__NUM_DPP__MAX]; 6710 int k; 6711 6712 CalculateSwathWidth( 6713 ForceSingleDPP, 6714 NumberOfActivePlanes, 6715 SourcePixelFormat, 6716 SourceScan, 6717 ViewportWidth, 6718 ViewportHeight, 6719 SurfaceWidthY, 6720 SurfaceWidthC, 6721 SurfaceHeightY, 6722 SurfaceHeightC, 6723 ODMCombineEnabled, 6724 BytePerPixY, 6725 BytePerPixC, 6726 Read256BytesBlockHeightY, 6727 Read256BytesBlockHeightC, 6728 Read256BytesBlockWidthY, 6729 Read256BytesBlockWidthC, 6730 BlendingAndTiming, 6731 HActive, 6732 HRatio, 6733 DPPPerPlane, 6734 SwathWidthSingleDPP, 6735 SwathWidthSingleDPPChroma, 6736 SwathWidth, 6737 SwathWidthChroma, 6738 MaximumSwathHeightY, 6739 MaximumSwathHeightC, 6740 swath_width_luma_ub, 6741 swath_width_chroma_ub); 6742 6743 *ViewportSizeSupport = true; 6744 for (k = 0; k < NumberOfActivePlanes; ++k) { 6745 if ((SourcePixelFormat[k] == dm_444_64 || SourcePixelFormat[k] == dm_444_32 || SourcePixelFormat[k] == dm_444_16 || SourcePixelFormat[k] == dm_mono_16 6746 || SourcePixelFormat[k] == dm_mono_8 || SourcePixelFormat[k] == dm_rgbe)) { 6747 if (SurfaceTiling[k] == dm_sw_linear 6748 || (SourcePixelFormat[k] == dm_444_64 6749 && (SurfaceTiling[k] == dm_sw_64kb_s || SurfaceTiling[k] == dm_sw_64kb_s_t || SurfaceTiling[k] == dm_sw_64kb_s_x) 6750 && SourceScan[k] != dm_vert)) { 6751 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6752 } else if (SourcePixelFormat[k] == dm_444_8 && SourceScan[k] == dm_vert) { 6753 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6754 } else { 6755 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6756 } 6757 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6758 } else { 6759 if (SurfaceTiling[k] == dm_sw_linear) { 6760 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6761 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6762 } else if (SourcePixelFormat[k] == dm_rgbe_alpha && SourceScan[k] == dm_vert) { 6763 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6764 MinimumSwathHeightC = MaximumSwathHeightC[k]; 6765 } else if (SourcePixelFormat[k] == dm_rgbe_alpha) { 6766 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6767 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6768 } else if (SourcePixelFormat[k] == dm_420_8 && SourceScan[k] == dm_vert) { 6769 MinimumSwathHeightY = MaximumSwathHeightY[k]; 6770 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6771 } else { 6772 MinimumSwathHeightC = MaximumSwathHeightC[k] / 2; 6773 MinimumSwathHeightY = MaximumSwathHeightY[k] / 2; 6774 } 6775 } 6776 6777 RoundedUpMaxSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MaximumSwathHeightY[k]; 6778 RoundedUpMinSwathSizeBytesY = swath_width_luma_ub[k] * BytePerPixDETY[k] * MinimumSwathHeightY; 6779 if (SourcePixelFormat[k] == dm_420_10) { 6780 RoundedUpMaxSwathSizeBytesY = dml_ceil((double) RoundedUpMaxSwathSizeBytesY, 256); 6781 RoundedUpMinSwathSizeBytesY = dml_ceil((double) RoundedUpMinSwathSizeBytesY, 256); 6782 } 6783 RoundedUpMaxSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MaximumSwathHeightC[k]; 6784 RoundedUpMinSwathSizeBytesC = swath_width_chroma_ub[k] * BytePerPixDETC[k] * MinimumSwathHeightC; 6785 if (SourcePixelFormat[k] == dm_420_10) { 6786 RoundedUpMaxSwathSizeBytesC = dml_ceil(RoundedUpMaxSwathSizeBytesC, 256); 6787 RoundedUpMinSwathSizeBytesC = dml_ceil(RoundedUpMinSwathSizeBytesC, 256); 6788 } 6789 6790 if (RoundedUpMaxSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6791 SwathHeightY[k] = MaximumSwathHeightY[k]; 6792 SwathHeightC[k] = MaximumSwathHeightC[k]; 6793 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6794 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6795 } else if (RoundedUpMaxSwathSizeBytesY >= 1.5 * RoundedUpMaxSwathSizeBytesC 6796 && RoundedUpMinSwathSizeBytesY + RoundedUpMaxSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6797 SwathHeightY[k] = MinimumSwathHeightY; 6798 SwathHeightC[k] = MaximumSwathHeightC[k]; 6799 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6800 RoundedUpSwathSizeBytesC = RoundedUpMaxSwathSizeBytesC; 6801 } else if (RoundedUpMaxSwathSizeBytesY < 1.5 * RoundedUpMaxSwathSizeBytesC 6802 && RoundedUpMaxSwathSizeBytesY + RoundedUpMinSwathSizeBytesC <= DETBufferSizeInKByte * 1024 / 2) { 6803 SwathHeightY[k] = MaximumSwathHeightY[k]; 6804 SwathHeightC[k] = MinimumSwathHeightC; 6805 RoundedUpSwathSizeBytesY = RoundedUpMaxSwathSizeBytesY; 6806 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6807 } else { 6808 SwathHeightY[k] = MinimumSwathHeightY; 6809 SwathHeightC[k] = MinimumSwathHeightC; 6810 RoundedUpSwathSizeBytesY = RoundedUpMinSwathSizeBytesY; 6811 RoundedUpSwathSizeBytesC = RoundedUpMinSwathSizeBytesC; 6812 } 6813 { 6814 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 6815 6816 if (SwathHeightC[k] == 0) { 6817 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024; 6818 DETBufferSizeC[k] = 0; 6819 } else if (RoundedUpSwathSizeBytesY <= 1.5 * RoundedUpSwathSizeBytesC) { 6820 DETBufferSizeY[k] = actDETBufferSizeInKByte * 1024 / 2; 6821 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 2; 6822 } else { 6823 DETBufferSizeY[k] = dml_floor(actDETBufferSizeInKByte * 1024 * 2 / 3, 1024); 6824 DETBufferSizeC[k] = actDETBufferSizeInKByte * 1024 / 3; 6825 } 6826 6827 if (RoundedUpMinSwathSizeBytesY + RoundedUpMinSwathSizeBytesC > actDETBufferSizeInKByte * 1024 / 2 || SwathWidth[k] > MaximumSwathWidthLuma[k] 6828 || (SwathHeightC[k] > 0 && SwathWidthChroma[k] > MaximumSwathWidthChroma[k])) { 6829 *ViewportSizeSupport = false; 6830 ViewportSizeSupportPerPlane[k] = false; 6831 } else { 6832 ViewportSizeSupportPerPlane[k] = true; 6833 } 6834 } 6835 } 6836 } 6837 6838 static void CalculateSwathWidth( 6839 bool ForceSingleDPP, 6840 int NumberOfActivePlanes, 6841 enum source_format_class SourcePixelFormat[], 6842 enum scan_direction_class SourceScan[], 6843 int ViewportWidth[], 6844 int ViewportHeight[], 6845 int SurfaceWidthY[], 6846 int SurfaceWidthC[], 6847 int SurfaceHeightY[], 6848 int SurfaceHeightC[], 6849 enum odm_combine_mode ODMCombineEnabled[], 6850 int BytePerPixY[], 6851 int BytePerPixC[], 6852 int Read256BytesBlockHeightY[], 6853 int Read256BytesBlockHeightC[], 6854 int Read256BytesBlockWidthY[], 6855 int Read256BytesBlockWidthC[], 6856 int BlendingAndTiming[], 6857 int HActive[], 6858 double HRatio[], 6859 int DPPPerPlane[], 6860 double SwathWidthSingleDPPY[], 6861 double SwathWidthSingleDPPC[], 6862 double SwathWidthY[], 6863 double SwathWidthC[], 6864 int MaximumSwathHeightY[], 6865 int MaximumSwathHeightC[], 6866 int swath_width_luma_ub[], 6867 int swath_width_chroma_ub[]) 6868 { 6869 enum odm_combine_mode MainPlaneODMCombine; 6870 int j, k; 6871 6872 #ifdef __DML_VBA_DEBUG__ 6873 dml_print("DML::%s: NumberOfActivePlanes = %d\n", __func__, NumberOfActivePlanes); 6874 #endif 6875 6876 for (k = 0; k < NumberOfActivePlanes; ++k) { 6877 if (SourceScan[k] != dm_vert) { 6878 SwathWidthSingleDPPY[k] = ViewportWidth[k]; 6879 } else { 6880 SwathWidthSingleDPPY[k] = ViewportHeight[k]; 6881 } 6882 6883 #ifdef __DML_VBA_DEBUG__ 6884 dml_print("DML::%s: k=%d ViewportWidth=%d\n", __func__, k, ViewportWidth[k]); 6885 dml_print("DML::%s: k=%d ViewportHeight=%d\n", __func__, k, ViewportHeight[k]); 6886 #endif 6887 6888 MainPlaneODMCombine = ODMCombineEnabled[k]; 6889 for (j = 0; j < NumberOfActivePlanes; ++j) { 6890 if (BlendingAndTiming[k] == j) { 6891 MainPlaneODMCombine = ODMCombineEnabled[j]; 6892 } 6893 } 6894 6895 if (MainPlaneODMCombine == dm_odm_combine_mode_4to1) 6896 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 4.0 * HRatio[k])); 6897 else if (MainPlaneODMCombine == dm_odm_combine_mode_2to1) 6898 SwathWidthY[k] = dml_min(SwathWidthSingleDPPY[k], dml_round(HActive[k] / 2.0 * HRatio[k])); 6899 else if (DPPPerPlane[k] == 2) 6900 SwathWidthY[k] = SwathWidthSingleDPPY[k] / 2; 6901 else 6902 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6903 6904 #ifdef __DML_VBA_DEBUG__ 6905 dml_print("DML::%s: k=%d SwathWidthSingleDPPY=%f\n", __func__, k, SwathWidthSingleDPPY[k]); 6906 dml_print("DML::%s: k=%d SwathWidthY=%f\n", __func__, k, SwathWidthY[k]); 6907 #endif 6908 6909 if (SourcePixelFormat[k] == dm_420_8 || SourcePixelFormat[k] == dm_420_10 || SourcePixelFormat[k] == dm_420_12) { 6910 SwathWidthC[k] = SwathWidthY[k] / 2; 6911 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k] / 2; 6912 } else { 6913 SwathWidthC[k] = SwathWidthY[k]; 6914 SwathWidthSingleDPPC[k] = SwathWidthSingleDPPY[k]; 6915 } 6916 6917 if (ForceSingleDPP == true) { 6918 SwathWidthY[k] = SwathWidthSingleDPPY[k]; 6919 SwathWidthC[k] = SwathWidthSingleDPPC[k]; 6920 } 6921 { 6922 int surface_width_ub_l = dml_ceil(SurfaceWidthY[k], Read256BytesBlockWidthY[k]); 6923 int surface_height_ub_l = dml_ceil(SurfaceHeightY[k], Read256BytesBlockHeightY[k]); 6924 6925 #ifdef __DML_VBA_DEBUG__ 6926 dml_print("DML::%s: k=%d surface_width_ub_l=%0d\n", __func__, k, surface_width_ub_l); 6927 #endif 6928 6929 if (SourceScan[k] != dm_vert) { 6930 MaximumSwathHeightY[k] = Read256BytesBlockHeightY[k]; 6931 MaximumSwathHeightC[k] = Read256BytesBlockHeightC[k]; 6932 swath_width_luma_ub[k] = dml_min(surface_width_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockWidthY[k]) + Read256BytesBlockWidthY[k]); 6933 if (BytePerPixC[k] > 0) { 6934 int surface_width_ub_c = dml_ceil(SurfaceWidthC[k], Read256BytesBlockWidthC[k]); 6935 6936 swath_width_chroma_ub[k] = dml_min( 6937 surface_width_ub_c, 6938 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockWidthC[k]) + Read256BytesBlockWidthC[k]); 6939 } else { 6940 swath_width_chroma_ub[k] = 0; 6941 } 6942 } else { 6943 MaximumSwathHeightY[k] = Read256BytesBlockWidthY[k]; 6944 MaximumSwathHeightC[k] = Read256BytesBlockWidthC[k]; 6945 swath_width_luma_ub[k] = dml_min(surface_height_ub_l, (int) dml_ceil(SwathWidthY[k] - 1, Read256BytesBlockHeightY[k]) + Read256BytesBlockHeightY[k]); 6946 if (BytePerPixC[k] > 0) { 6947 int surface_height_ub_c = dml_ceil(SurfaceHeightC[k], Read256BytesBlockHeightC[k]); 6948 6949 swath_width_chroma_ub[k] = dml_min( 6950 surface_height_ub_c, 6951 (int) dml_ceil(SwathWidthC[k] - 1, Read256BytesBlockHeightC[k]) + Read256BytesBlockHeightC[k]); 6952 } else { 6953 swath_width_chroma_ub[k] = 0; 6954 } 6955 } 6956 } 6957 } 6958 } 6959 6960 static double CalculateExtraLatency( 6961 int RoundTripPingLatencyCycles, 6962 int ReorderingBytes, 6963 double DCFCLK, 6964 int TotalNumberOfActiveDPP, 6965 int PixelChunkSizeInKByte, 6966 int TotalNumberOfDCCActiveDPP, 6967 int MetaChunkSize, 6968 double ReturnBW, 6969 bool GPUVMEnable, 6970 bool HostVMEnable, 6971 int NumberOfActivePlanes, 6972 int NumberOfDPP[], 6973 int dpte_group_bytes[], 6974 double HostVMInefficiencyFactor, 6975 double HostVMMinPageSize, 6976 int HostVMMaxNonCachedPageTableLevels) 6977 { 6978 double ExtraLatencyBytes; 6979 double ExtraLatency; 6980 6981 ExtraLatencyBytes = CalculateExtraLatencyBytes( 6982 ReorderingBytes, 6983 TotalNumberOfActiveDPP, 6984 PixelChunkSizeInKByte, 6985 TotalNumberOfDCCActiveDPP, 6986 MetaChunkSize, 6987 GPUVMEnable, 6988 HostVMEnable, 6989 NumberOfActivePlanes, 6990 NumberOfDPP, 6991 dpte_group_bytes, 6992 HostVMInefficiencyFactor, 6993 HostVMMinPageSize, 6994 HostVMMaxNonCachedPageTableLevels); 6995 6996 ExtraLatency = (RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__) / DCFCLK + ExtraLatencyBytes / ReturnBW; 6997 6998 #ifdef __DML_VBA_DEBUG__ 6999 dml_print("DML::%s: RoundTripPingLatencyCycles=%d\n", __func__, RoundTripPingLatencyCycles); 7000 dml_print("DML::%s: DCFCLK=%f\n", __func__, DCFCLK); 7001 dml_print("DML::%s: ExtraLatencyBytes=%f\n", __func__, ExtraLatencyBytes); 7002 dml_print("DML::%s: ReturnBW=%f\n", __func__, ReturnBW); 7003 dml_print("DML::%s: ExtraLatency=%f\n", __func__, ExtraLatency); 7004 #endif 7005 7006 return ExtraLatency; 7007 } 7008 7009 static double CalculateExtraLatencyBytes( 7010 int ReorderingBytes, 7011 int TotalNumberOfActiveDPP, 7012 int PixelChunkSizeInKByte, 7013 int TotalNumberOfDCCActiveDPP, 7014 int MetaChunkSize, 7015 bool GPUVMEnable, 7016 bool HostVMEnable, 7017 int NumberOfActivePlanes, 7018 int NumberOfDPP[], 7019 int dpte_group_bytes[], 7020 double HostVMInefficiencyFactor, 7021 double HostVMMinPageSize, 7022 int HostVMMaxNonCachedPageTableLevels) 7023 { 7024 double ret; 7025 int HostVMDynamicLevels = 0, k; 7026 7027 if (GPUVMEnable == true && HostVMEnable == true) { 7028 if (HostVMMinPageSize < 2048) 7029 HostVMDynamicLevels = HostVMMaxNonCachedPageTableLevels; 7030 else if (HostVMMinPageSize >= 2048 && HostVMMinPageSize < 1048576) 7031 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 1); 7032 else 7033 HostVMDynamicLevels = dml_max(0, (int) HostVMMaxNonCachedPageTableLevels - 2); 7034 } else { 7035 HostVMDynamicLevels = 0; 7036 } 7037 7038 ret = ReorderingBytes + (TotalNumberOfActiveDPP * PixelChunkSizeInKByte + TotalNumberOfDCCActiveDPP * MetaChunkSize) * 1024.0; 7039 7040 if (GPUVMEnable == true) { 7041 for (k = 0; k < NumberOfActivePlanes; ++k) 7042 ret = ret + NumberOfDPP[k] * dpte_group_bytes[k] * (1 + 8 * HostVMDynamicLevels) * HostVMInefficiencyFactor; 7043 } 7044 return ret; 7045 } 7046 7047 static double CalculateUrgentLatency( 7048 double UrgentLatencyPixelDataOnly, 7049 double UrgentLatencyPixelMixedWithVMData, 7050 double UrgentLatencyVMDataOnly, 7051 bool DoUrgentLatencyAdjustment, 7052 double UrgentLatencyAdjustmentFabricClockComponent, 7053 double UrgentLatencyAdjustmentFabricClockReference, 7054 double FabricClock) 7055 { 7056 double ret; 7057 7058 ret = dml_max3(UrgentLatencyPixelDataOnly, UrgentLatencyPixelMixedWithVMData, UrgentLatencyVMDataOnly); 7059 if (DoUrgentLatencyAdjustment == true) 7060 ret = ret + UrgentLatencyAdjustmentFabricClockComponent * (UrgentLatencyAdjustmentFabricClockReference / FabricClock - 1); 7061 return ret; 7062 } 7063 7064 static void UseMinimumDCFCLK( 7065 struct display_mode_lib *mode_lib, 7066 int MaxPrefetchMode, 7067 int ReorderingBytes) 7068 { 7069 struct vba_vars_st *v = &mode_lib->vba; 7070 int dummy1, i, j, k; 7071 double NormalEfficiency, dummy2, dummy3; 7072 double TotalMaxPrefetchFlipDPTERowBandwidth[DC__VOLTAGE_STATES][2]; 7073 7074 NormalEfficiency = v->PercentOfIdealFabricAndSDPPortBWReceivedAfterUrgLatency / 100.0; 7075 for (i = 0; i < v->soc.num_states; ++i) { 7076 for (j = 0; j <= 1; ++j) { 7077 double PixelDCFCLKCyclesRequiredInPrefetch[DC__NUM_DPP__MAX]; 7078 double PrefetchPixelLinesTime[DC__NUM_DPP__MAX]; 7079 double DCFCLKRequiredForPeakBandwidthPerPlane[DC__NUM_DPP__MAX]; 7080 double DynamicMetadataVMExtraLatency[DC__NUM_DPP__MAX]; 7081 double MinimumTWait; 7082 double NonDPTEBandwidth; 7083 double DPTEBandwidth; 7084 double DCFCLKRequiredForAverageBandwidth; 7085 double ExtraLatencyBytes; 7086 double ExtraLatencyCycles; 7087 double DCFCLKRequiredForPeakBandwidth; 7088 int NoOfDPPState[DC__NUM_DPP__MAX]; 7089 double MinimumTvmPlus2Tr0; 7090 7091 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = 0; 7092 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7093 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] = TotalMaxPrefetchFlipDPTERowBandwidth[i][j] 7094 + v->NoOfDPP[i][j][k] * v->DPTEBytesPerRow[i][j][k] / (15.75 * v->HTotal[k] / v->PixelClock[k]); 7095 } 7096 7097 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) 7098 NoOfDPPState[k] = v->NoOfDPP[i][j][k]; 7099 7100 MinimumTWait = CalculateTWait(MaxPrefetchMode, v->FinalDRAMClockChangeLatency, v->UrgLatency[i], v->SREnterPlusExitTime); 7101 NonDPTEBandwidth = v->TotalVActivePixelBandwidth[i][j] + v->TotalVActiveCursorBandwidth[i][j] + v->TotalMetaRowBandwidth[i][j]; 7102 DPTEBandwidth = (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) ? 7103 TotalMaxPrefetchFlipDPTERowBandwidth[i][j] : v->TotalDPTERowBandwidth[i][j]; 7104 DCFCLKRequiredForAverageBandwidth = dml_max3( 7105 v->ProjectedDCFCLKDeepSleep[i][j], 7106 (NonDPTEBandwidth + v->TotalDPTERowBandwidth[i][j]) / v->ReturnBusWidth 7107 / (v->MaxAveragePercentOfIdealFabricAndSDPPortBWDisplayCanUseInNormalSystemOperation / 100), 7108 (NonDPTEBandwidth + DPTEBandwidth / NormalEfficiency) / NormalEfficiency / v->ReturnBusWidth); 7109 7110 ExtraLatencyBytes = CalculateExtraLatencyBytes( 7111 ReorderingBytes, 7112 v->TotalNumberOfActiveDPP[i][j], 7113 v->PixelChunkSizeInKByte, 7114 v->TotalNumberOfDCCActiveDPP[i][j], 7115 v->MetaChunkSize, 7116 v->GPUVMEnable, 7117 v->HostVMEnable, 7118 v->NumberOfActivePlanes, 7119 NoOfDPPState, 7120 v->dpte_group_bytes, 7121 1, 7122 v->HostVMMinPageSize, 7123 v->HostVMMaxNonCachedPageTableLevels); 7124 ExtraLatencyCycles = v->RoundTripPingLatencyCycles + __DML_ARB_TO_RET_DELAY__ + ExtraLatencyBytes / NormalEfficiency / v->ReturnBusWidth; 7125 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7126 double DCFCLKCyclesRequiredInPrefetch; 7127 double ExpectedPrefetchBWAcceleration; 7128 double PrefetchTime; 7129 7130 PixelDCFCLKCyclesRequiredInPrefetch[k] = (v->PrefetchLinesY[i][j][k] * v->swath_width_luma_ub_all_states[i][j][k] * v->BytePerPixelY[k] 7131 + v->PrefetchLinesC[i][j][k] * v->swath_width_chroma_ub_all_states[i][j][k] * v->BytePerPixelC[k]) / NormalEfficiency / v->ReturnBusWidth; 7132 DCFCLKCyclesRequiredInPrefetch = 2 * ExtraLatencyCycles / NoOfDPPState[k] 7133 + v->PDEAndMetaPTEBytesPerFrame[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth * (v->GPUVMMaxPageTableLevels > 2 ? 1 : 0) 7134 + 2 * v->DPTEBytesPerRow[i][j][k] / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth 7135 + 2 * v->MetaRowBytes[i][j][k] / NormalEfficiency / v->ReturnBusWidth + PixelDCFCLKCyclesRequiredInPrefetch[k]; 7136 PrefetchPixelLinesTime[k] = dml_max(v->PrefetchLinesY[i][j][k], v->PrefetchLinesC[i][j][k]) * v->HTotal[k] / v->PixelClock[k]; 7137 ExpectedPrefetchBWAcceleration = (v->VActivePixelBandwidth[i][j][k] + v->VActiveCursorBandwidth[i][j][k]) 7138 / (v->ReadBandwidthLuma[k] + v->ReadBandwidthChroma[k]); 7139 DynamicMetadataVMExtraLatency[k] = 7140 (v->GPUVMEnable == true && v->DynamicMetadataEnable[k] == true && v->DynamicMetadataVMEnabled == true) ? 7141 v->UrgLatency[i] * v->GPUVMMaxPageTableLevels * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) : 0; 7142 PrefetchTime = (v->MaximumVStartup[i][j][k] - 1) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait 7143 - v->UrgLatency[i] 7144 * ((v->GPUVMMaxPageTableLevels <= 2 ? v->GPUVMMaxPageTableLevels : v->GPUVMMaxPageTableLevels - 2) 7145 * (v->HostVMEnable == true ? v->HostVMMaxNonCachedPageTableLevels + 1 : 1) - 1) 7146 - DynamicMetadataVMExtraLatency[k]; 7147 7148 if (PrefetchTime > 0) { 7149 double ExpectedVRatioPrefetch; 7150 7151 ExpectedVRatioPrefetch = PrefetchPixelLinesTime[k] 7152 / (PrefetchTime * PixelDCFCLKCyclesRequiredInPrefetch[k] / DCFCLKCyclesRequiredInPrefetch); 7153 DCFCLKRequiredForPeakBandwidthPerPlane[k] = NoOfDPPState[k] * PixelDCFCLKCyclesRequiredInPrefetch[k] / PrefetchPixelLinesTime[k] 7154 * dml_max(1.0, ExpectedVRatioPrefetch) * dml_max(1.0, ExpectedVRatioPrefetch / 4) * ExpectedPrefetchBWAcceleration; 7155 if (v->HostVMEnable == true || v->ImmediateFlipRequirement[0] == dm_immediate_flip_required) { 7156 DCFCLKRequiredForPeakBandwidthPerPlane[k] = DCFCLKRequiredForPeakBandwidthPerPlane[k] 7157 + NoOfDPPState[k] * DPTEBandwidth / NormalEfficiency / NormalEfficiency / v->ReturnBusWidth; 7158 } 7159 } else { 7160 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7161 } 7162 if (v->DynamicMetadataEnable[k] == true) { 7163 double TSetupPipe; 7164 double TdmbfPipe; 7165 double TdmsksPipe; 7166 double TdmecPipe; 7167 double AllowedTimeForUrgentExtraLatency; 7168 7169 CalculateVupdateAndDynamicMetadataParameters( 7170 v->MaxInterDCNTileRepeaters, 7171 v->RequiredDPPCLK[i][j][k], 7172 v->RequiredDISPCLK[i][j], 7173 v->ProjectedDCFCLKDeepSleep[i][j], 7174 v->PixelClock[k], 7175 v->HTotal[k], 7176 v->VTotal[k] - v->VActive[k], 7177 v->DynamicMetadataTransmittedBytes[k], 7178 v->DynamicMetadataLinesBeforeActiveRequired[k], 7179 v->Interlace[k], 7180 v->ProgressiveToInterlaceUnitInOPP, 7181 &TSetupPipe, 7182 &TdmbfPipe, 7183 &TdmecPipe, 7184 &TdmsksPipe, 7185 &dummy1, 7186 &dummy2, 7187 &dummy3); 7188 AllowedTimeForUrgentExtraLatency = v->MaximumVStartup[i][j][k] * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - TSetupPipe - TdmbfPipe - TdmecPipe 7189 - TdmsksPipe - DynamicMetadataVMExtraLatency[k]; 7190 if (AllowedTimeForUrgentExtraLatency > 0) { 7191 DCFCLKRequiredForPeakBandwidthPerPlane[k] = dml_max( 7192 DCFCLKRequiredForPeakBandwidthPerPlane[k], 7193 ExtraLatencyCycles / AllowedTimeForUrgentExtraLatency); 7194 } else { 7195 DCFCLKRequiredForPeakBandwidthPerPlane[k] = v->DCFCLKPerState[i]; 7196 } 7197 } 7198 } 7199 DCFCLKRequiredForPeakBandwidth = 0; 7200 for (k = 0; k <= v->NumberOfActivePlanes - 1; ++k) 7201 DCFCLKRequiredForPeakBandwidth = DCFCLKRequiredForPeakBandwidth + DCFCLKRequiredForPeakBandwidthPerPlane[k]; 7202 7203 MinimumTvmPlus2Tr0 = v->UrgLatency[i] 7204 * (v->GPUVMEnable == true ? 7205 (v->HostVMEnable == true ? 7206 (v->GPUVMMaxPageTableLevels + 2) * (v->HostVMMaxNonCachedPageTableLevels + 1) - 1 : v->GPUVMMaxPageTableLevels + 1) : 7207 0); 7208 for (k = 0; k < v->NumberOfActivePlanes; ++k) { 7209 double MaximumTvmPlus2Tr0PlusTsw; 7210 7211 MaximumTvmPlus2Tr0PlusTsw = (v->MaximumVStartup[i][j][k] - 2) * v->HTotal[k] / v->PixelClock[k] - MinimumTWait - DynamicMetadataVMExtraLatency[k]; 7212 if (MaximumTvmPlus2Tr0PlusTsw <= MinimumTvmPlus2Tr0 + PrefetchPixelLinesTime[k] / 4) { 7213 DCFCLKRequiredForPeakBandwidth = v->DCFCLKPerState[i]; 7214 } else { 7215 DCFCLKRequiredForPeakBandwidth = dml_max3( 7216 DCFCLKRequiredForPeakBandwidth, 7217 2 * ExtraLatencyCycles / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0 - PrefetchPixelLinesTime[k] / 4), 7218 (2 * ExtraLatencyCycles + PixelDCFCLKCyclesRequiredInPrefetch[k]) / (MaximumTvmPlus2Tr0PlusTsw - MinimumTvmPlus2Tr0)); 7219 } 7220 } 7221 v->DCFCLKState[i][j] = dml_min(v->DCFCLKPerState[i], 1.05 * dml_max(DCFCLKRequiredForAverageBandwidth, DCFCLKRequiredForPeakBandwidth)); 7222 } 7223 } 7224 } 7225 7226 static void CalculateUnboundedRequestAndCompressedBufferSize( 7227 unsigned int DETBufferSizeInKByte, 7228 int ConfigReturnBufferSizeInKByte, 7229 enum unbounded_requesting_policy UseUnboundedRequestingFinal, 7230 int TotalActiveDPP, 7231 bool NoChromaPlanes, 7232 int MaxNumDPP, 7233 int CompressedBufferSegmentSizeInkByteFinal, 7234 enum output_encoder_class *Output, 7235 bool *UnboundedRequestEnabled, 7236 int *CompressedBufferSizeInkByte) 7237 { 7238 double actDETBufferSizeInKByte = dml_ceil(DETBufferSizeInKByte, 64); 7239 7240 *UnboundedRequestEnabled = UnboundedRequest(UseUnboundedRequestingFinal, TotalActiveDPP, NoChromaPlanes, Output[0]); 7241 *CompressedBufferSizeInkByte = ( 7242 *UnboundedRequestEnabled == true ? 7243 ConfigReturnBufferSizeInKByte - TotalActiveDPP * actDETBufferSizeInKByte : 7244 ConfigReturnBufferSizeInKByte - MaxNumDPP * actDETBufferSizeInKByte); 7245 *CompressedBufferSizeInkByte = *CompressedBufferSizeInkByte * CompressedBufferSegmentSizeInkByteFinal / 64; 7246 7247 #ifdef __DML_VBA_DEBUG__ 7248 dml_print("DML::%s: TotalActiveDPP = %d\n", __func__, TotalActiveDPP); 7249 dml_print("DML::%s: DETBufferSizeInKByte = %d\n", __func__, DETBufferSizeInKByte); 7250 dml_print("DML::%s: ConfigReturnBufferSizeInKByte = %d\n", __func__, ConfigReturnBufferSizeInKByte); 7251 dml_print("DML::%s: UseUnboundedRequestingFinal = %d\n", __func__, UseUnboundedRequestingFinal); 7252 dml_print("DML::%s: actDETBufferSizeInKByte = %f\n", __func__, actDETBufferSizeInKByte); 7253 dml_print("DML::%s: UnboundedRequestEnabled = %d\n", __func__, *UnboundedRequestEnabled); 7254 dml_print("DML::%s: CompressedBufferSizeInkByte = %d\n", __func__, *CompressedBufferSizeInkByte); 7255 #endif 7256 } 7257 7258 static bool UnboundedRequest(enum unbounded_requesting_policy UseUnboundedRequestingFinal, int TotalNumberOfActiveDPP, bool NoChroma, enum output_encoder_class Output) 7259 { 7260 bool ret_val = false; 7261 7262 ret_val = (UseUnboundedRequestingFinal != dm_unbounded_requesting_disable && TotalNumberOfActiveDPP == 1 && NoChroma); 7263 if (UseUnboundedRequestingFinal == dm_unbounded_requesting_edp_only && Output != dm_edp) 7264 ret_val = false; 7265 return ret_val; 7266 } 7267 7268 static unsigned int CalculateMaxVStartup( 7269 unsigned int VTotal, 7270 unsigned int VActive, 7271 unsigned int VBlankNom, 7272 unsigned int HTotal, 7273 double PixelClock, 7274 bool ProgressiveTointerlaceUnitinOPP, 7275 bool Interlace, 7276 unsigned int VBlankNomDefaultUS, 7277 double WritebackDelayTime) 7278 { 7279 unsigned int MaxVStartup = 0; 7280 unsigned int vblank_size = 0; 7281 double line_time_us = HTotal / PixelClock; 7282 unsigned int vblank_actual = VTotal - VActive; 7283 unsigned int vblank_nom_default_in_line = dml_floor(VBlankNomDefaultUS / line_time_us, 1.0); 7284 unsigned int vblank_nom_input = VBlankNom; //dml_min(VBlankNom, vblank_nom_default_in_line); 7285 unsigned int vblank_avail = vblank_nom_input == 0 ? vblank_nom_default_in_line : vblank_nom_input; 7286 7287 vblank_size = (unsigned int) dml_min(vblank_actual, vblank_avail); 7288 if (Interlace && !ProgressiveTointerlaceUnitinOPP) 7289 MaxVStartup = dml_floor(vblank_size / 2.0, 1.0); 7290 else 7291 MaxVStartup = vblank_size - dml_max(1.0, dml_ceil(WritebackDelayTime / line_time_us, 1.0)); 7292 if (MaxVStartup > 1023) 7293 MaxVStartup = 1023; 7294 return MaxVStartup; 7295 } 7296